From ca9bc12b90fbc4e2b1f81360f63842c9da54bb3c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 11 Jan 2011 13:47:24 +0100
Subject: drbd: Get rid of BE_DRBD_MAGIC and BE_DRBD_MAGIC_BIG

Converting the constants happens at compile time.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 9e5f5607eba3..d28202811672 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -334,9 +334,7 @@ enum drbd_timeout_flag {
 #define UUID_JUST_CREATED ((__u64)4)
 
 #define DRBD_MAGIC 0x83740267
-#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
 #define DRBD_MAGIC_BIG 0x835a
-#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
 
 /* these are of type "int" */
 #define DRBD_MD_INDEX_INTERNAL -1
-- 
cgit v1.2.3


From 9749f30f1a387070e6e8351f35aeb829eacc3ab6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 20 Jul 2011 14:59:37 +0200
Subject: idr: idr_for_each_entry() macro

Inspired by the list_for_each_entry() macro
---
 include/linux/idr.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 255491cf522e..52a9da295296 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
 
 void __init idr_init_cache(void);
 
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp:     idr handle
+ * @entry:   the type * to use as cursor
+ * @id:      id entry's key
+ */
+#define idr_for_each_entry(idp, entry, id)				\
+	for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
+	     entry != NULL;                                             \
+	     ++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+
 #endif /* __IDR_H__ */
-- 
cgit v1.2.3


From fd340c12c98b57ec0751ebb317057eee41be0c3d Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 19 Jan 2011 16:57:39 +0100
Subject: drbd: Use new header layout

The new header layout will only be used if the peer supports
it of course.

For the first packet and the handshake packet the old (h80)
layout is used for compatibility reasons.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  1 -
 drivers/block/drbd/drbd_main.c     | 82 +++++++++++++++++---------------------
 drivers/block/drbd/drbd_receiver.c |  7 +++-
 include/linux/drbd.h               |  2 +-
 4 files changed, 42 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index dc669dfe5b0d..4de43481bcb9 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -345,7 +345,6 @@ struct p_header95 {
 	u16	  magic;	/* use DRBD_MAGIC_BIG here */
 	u16	  command;
 	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
-	u8	  payload[0];
 } __packed;
 
 struct p_header {
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 55ce48e24b8e..f8cb15c84ed8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1820,12 +1820,36 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
 }
 #endif
 
+static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h,
+			     enum drbd_packets cmd, int size)
+{
+	h->magic   = cpu_to_be32(DRBD_MAGIC);
+	h->command = cpu_to_be16(cmd);
+	h->length  = cpu_to_be16(size);
+}
+
+static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h,
+			     enum drbd_packets cmd, int size)
+{
+	h->magic   = cpu_to_be16(DRBD_MAGIC_BIG);
+	h->command = cpu_to_be16(cmd);
+	h->length  = cpu_to_be32(size);
+}
+
+static void prepare_header(struct drbd_conf *mdev, struct p_header *h,
+			   enum drbd_packets cmd, int size)
+{
+	if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET)
+		prepare_header95(mdev, &h->h95, cmd, size);
+	else
+		prepare_header80(mdev, &h->h80, cmd, size);
+}
+
 /* the appropriate socket mutex must be held already */
 int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			  enum drbd_packets cmd, struct p_header *hg,
+			  enum drbd_packets cmd, struct p_header *h,
 			  size_t size, unsigned msg_flags)
 {
-	struct p_header80 *h = (struct p_header80 *)hg;
 	int sent, ok;
 
 	if (!expect(h))
@@ -1833,9 +1857,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
 	if (!expect(size))
 		return false;
 
-	h->magic   = cpu_to_be32(DRBD_MAGIC);
-	h->command = cpu_to_be16(cmd);
-	h->length  = cpu_to_be16(size-sizeof(struct p_header80));
+	prepare_header(mdev, h, cmd, size - sizeof(struct p_header));
 
 	sent = drbd_send(mdev, sock, h, size, msg_flags);
 
@@ -1878,12 +1900,10 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
 int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
 		   size_t size)
 {
-	struct p_header80 h;
+	struct p_header h;
 	int ok;
 
-	h.magic   = cpu_to_be32(DRBD_MAGIC);
-	h.command = cpu_to_be16(cmd);
-	h.length  = cpu_to_be16(size);
+	prepare_header(mdev, &h, cmd, size);
 
 	if (!drbd_get_data_sock(mdev))
 		return 0;
@@ -2456,14 +2476,11 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev,
 	int ok;
 	struct p_block_req p;
 
+	prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size);
 	p.sector   = cpu_to_be64(sector);
 	p.block_id = ID_SYNCER /* unused */;
 	p.blksize  = cpu_to_be32(size);
 
-	p.head.h80.magic   = cpu_to_be32(DRBD_MAGIC);
-	p.head.h80.command = cpu_to_be16(cmd);
-	p.head.h80.length  = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
-
 	mutex_lock(&mdev->tconn->data.mutex);
 
 	ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0));
@@ -2663,22 +2680,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 	dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
 		crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
 
-	if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) {
-		p.head.h80.magic   = cpu_to_be32(DRBD_MAGIC);
-		p.head.h80.command = cpu_to_be16(P_DATA);
-		p.head.h80.length  =
-			cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size);
-	} else {
-		p.head.h95.magic   = cpu_to_be16(DRBD_MAGIC_BIG);
-		p.head.h95.command = cpu_to_be16(P_DATA);
-		p.head.h95.length  =
-			cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size);
-	}
-
+	prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size);
 	p.sector   = cpu_to_be64(req->i.sector);
 	p.block_id = (unsigned long)req;
-	p.seq_num  = cpu_to_be32(req->seq_num =
-				 atomic_add_return(1, &mdev->packet_seq));
+	p.seq_num  = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq));
 
 	dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
 
@@ -2748,18 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 	dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ?
 		crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0;
 
-	if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) {
-		p.head.h80.magic   = cpu_to_be32(DRBD_MAGIC);
-		p.head.h80.command = cpu_to_be16(cmd);
-		p.head.h80.length  =
-			cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size);
-	} else {
-		p.head.h95.magic   = cpu_to_be16(DRBD_MAGIC_BIG);
-		p.head.h95.command = cpu_to_be16(cmd);
-		p.head.h95.length  =
-			cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size);
-	}
-
+	prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size);
 	p.sector   = cpu_to_be64(e->i.sector);
 	p.block_id = e->block_id;
 	/* p.seq_num  = 0;    No sequence numbers here.. */
@@ -3028,7 +3022,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
 	drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker);
 	drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender);
 
-	mdev->tconn->agreed_pro_version = PRO_VERSION_MAX;
+	/* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */
 	mdev->write_ordering = WO_bdev_flush;
 	mdev->resync_wenr = LC_FREE;
 	mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
@@ -3506,12 +3500,8 @@ int __init drbd_init(void)
 {
 	int err;
 
-	if (sizeof(struct p_handshake) != 80) {
-		printk(KERN_ERR
-		       "drbd: never change the size or layout "
-		       "of the HandShake packet.\n");
-		return -EINVAL;
-	}
+	BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95));
+	BUILD_BUG_ON(sizeof(struct p_handshake) != 80);
 
 	if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
 		printk(KERN_ERR
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 9393fe482efc..8f5a241fe20a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -761,6 +761,9 @@ static int drbd_connect(struct drbd_conf *mdev)
 		return -2;
 
 	clear_bit(DISCARD_CONCURRENT, &mdev->flags);
+	mdev->tconn->agreed_pro_version = 99;
+	/* agreed_pro_version must be smaller than 100 so we send the old
+	   header (h80) in the first packet and in the handshake packet. */
 
 	sock  = NULL;
 	msock = NULL;
@@ -935,12 +938,12 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi
 		return false;
 	}
 
-	if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) {
+	if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
 		*cmd = be16_to_cpu(h->h80.command);
 		*packet_size = be16_to_cpu(h->h80.length);
 	} else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
 		*cmd = be16_to_cpu(h->h95.command);
-		*packet_size = be32_to_cpu(h->h95.length);
+		*packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff;
 	} else {
 		dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
 		    be32_to_cpu(h->h80.magic),
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index d28202811672..35fc08a0a552 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.11"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 96
+#define PRO_VERSION_MAX 100
 
 
 enum drbd_io_error_p {
-- 
cgit v1.2.3


From 4738fa16907a933d72bbcae1b8922dc9330fde92 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 21 Feb 2011 13:20:55 +0100
Subject: drbd: use clear_bit_unlock() where appropriate

Some open-coded clear_bit(); smp_mb__after_clear_bit();
should in fact have been smp_mb__before_clear_bit(); clear_bit();

Instead, use clear_bit_unlock() to annotate the intention,
and have it do the right thing.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_bitmap.c |  3 +--
 drivers/block/drbd/drbd_main.c   |  3 +--
 include/linux/lru_cache.h        |  3 +--
 lib/lru_cache.c                  | 10 ++++------
 4 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index e8d652f197c3..4be737055718 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -219,8 +219,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	void *addr = &page_private(b->bm_pages[page_nr]);
-	clear_bit(BM_PAGE_IO_LOCK, addr);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
 	wake_up(&mdev->bitmap->bm_io_wait);
 }
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 592f0c949fd0..c77e51a40926 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2818,8 +2818,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
 		put_ldev(mdev);
 	}
 
-	clear_bit(BITMAP_IO, &mdev->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(BITMAP_IO, &mdev->flags);
 	wake_up(&mdev->misc_wait);
 
 	if (work->done)
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 7a71ffad037c..4cceafb0732d 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -275,8 +275,7 @@ static inline int lc_try_lock(struct lru_cache *lc)
  */
 static inline void lc_unlock(struct lru_cache *lc)
 {
-	clear_bit(__LC_DIRTY, &lc->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(__LC_DIRTY, &lc->flags);
 }
 
 static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index a07e7268d7ed..9f353f7f41ca 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -44,8 +44,8 @@ MODULE_LICENSE("GPL");
 } while (0)
 
 #define RETURN(x...)     do { \
-	clear_bit(__LC_PARANOIA, &lc->flags); \
-	smp_mb__after_clear_bit(); return x ; } while (0)
+	clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
+	return x ; } while (0)
 
 /* BUG() if e is not one of the elements tracked by lc */
 #define PARANOIA_LC_ELEMENT(lc, e) do {	\
@@ -438,8 +438,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e)
 	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
 	lc->changing_element = NULL;
 	lc->new_number = LC_FREE;
-	clear_bit(__LC_DIRTY, &lc->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(__LC_DIRTY, &lc->flags);
 	RETURN();
 }
 
@@ -463,8 +462,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
 		/* move it to the front of LRU. */
 		list_move(&e->list, &lc->lru);
 		lc->used--;
-		clear_bit(__LC_STARVING, &lc->flags);
-		smp_mb__after_clear_bit();
+		clear_bit_unlock(__LC_STARVING, &lc->flags);
 	}
 	RETURN(e->refcnt);
 }
-- 
cgit v1.2.3


From 46a15bc3ec425b546d140581c28192ab7877ddc4 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 21 Feb 2011 13:21:01 +0100
Subject: lru_cache: allow multiple changes per transaction

Allow multiple changes to the active set of elements in lru_cache.
The only current user of lru_cache, drbd, is driving this generalisation.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c |  50 ++------
 drivers/block/drbd/drbd_nl.c     |   4 +-
 include/linux/lru_cache.h        |  68 +++++++----
 lib/lru_cache.c                  | 243 +++++++++++++++++++++++++++------------
 4 files changed, 225 insertions(+), 140 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 1ce3de6eed1b..44097c87fed7 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
 {
 	struct lc_element *al_ext;
 	struct lc_element *tmp;
-	unsigned long     al_flags = 0;
 	int wake;
 
 	spin_lock_irq(&mdev->al_lock);
@@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
 			return NULL;
 		}
 	}
-	al_ext   = lc_get(mdev->act_log, enr);
-	al_flags = mdev->act_log->flags;
+	al_ext = lc_get(mdev->act_log, enr);
 	spin_unlock_irq(&mdev->al_lock);
-
-	/*
-	if (!al_ext) {
-		if (al_flags & LC_STARVING)
-			dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
-		if (al_flags & LC_DIRTY)
-			dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
-	}
-	*/
-
 	return al_ext;
 }
 
@@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
 		mdev->al_writ_cnt++;
 
 		spin_lock_irq(&mdev->al_lock);
-		lc_changed(mdev->act_log, al_ext);
+		lc_committed(mdev->act_log);
 		spin_unlock_irq(&mdev->al_lock);
 		wake_up(&mdev->al_wait);
 	}
@@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev)
 	struct lc_element *al_ext;
 	int i;
 
-	D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
+	D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags));
 
 	for (i = 0; i < mdev->act_log->nr_elements; i++) {
 		al_ext = lc_element_by_index(mdev->act_log, i);
@@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
 			}
 			ext->rs_left = rs_left;
 			ext->rs_failed = success ? 0 : count;
-			lc_changed(mdev->resync, &ext->lce);
+			/* we don't keep a persistent log of the resync lru,
+			 * we can commit any change right away. */
+			lc_committed(mdev->resync);
 		}
 		lc_put(mdev->resync, &ext->lce);
 		/* no race, we are within the al_lock! */
@@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
 		if (bm_ext->lce.lc_number != enr) {
 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			bm_ext->rs_failed = 0;
-			lc_changed(mdev->resync, &bm_ext->lce);
+			lc_committed(mdev->resync);
 			wakeup = 1;
 		}
 		if (bm_ext->lce.refcnt == 1)
@@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
 		if (rs_flags & LC_STARVING)
 			dev_warn(DEV, "Have to wait for element"
 			     " (resync LRU too small?)\n");
-		BUG_ON(rs_flags & LC_DIRTY);
+		BUG_ON(rs_flags & LC_LOCKED);
 	}
 
 	return bm_ext;
@@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
 
 static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
 {
-	struct lc_element *al_ext;
-	int rv = 0;
+	int rv;
 
 	spin_lock_irq(&mdev->al_lock);
-	if (unlikely(enr == mdev->act_log->new_number))
-		rv = 1;
-	else {
-		al_ext = lc_find(mdev->act_log, enr);
-		if (al_ext) {
-			if (al_ext->refcnt)
-				rv = 1;
-		}
-	}
+	rv = lc_is_used(mdev->act_log, enr);
 	spin_unlock_irq(&mdev->al_lock);
 
-	/*
-	if (unlikely(rv)) {
-		dev_info(DEV, "Delaying sync read until app's write is done\n");
-	}
-	*/
 	return rv;
 }
 
@@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 			if (rs_flags & LC_STARVING)
 				dev_warn(DEV, "Have to wait for element"
 				     " (resync LRU too small?)\n");
-			BUG_ON(rs_flags & LC_DIRTY);
+			BUG_ON(rs_flags & LC_LOCKED);
 			goto try_again;
 		}
 		if (bm_ext->lce.lc_number != enr) {
 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			bm_ext->rs_failed = 0;
-			lc_changed(mdev->resync, &bm_ext->lce);
+			lc_committed(mdev->resync);
 			wake_up(&mdev->al_wait);
 			D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
 		}
@@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 	}
 check_al:
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
-		if (unlikely(al_enr+i == mdev->act_log->new_number))
-			goto try_again;
 		if (lc_is_used(mdev->act_log, al_enr+i))
 			goto try_again;
 	}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ae8f42e38e4f..0a92f5226c2a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
 
 	in_use = 0;
 	t = mdev->act_log;
-	n = lc_create("act_log", drbd_al_ext_cache,
+	n = lc_create("act_log", drbd_al_ext_cache, 1,
 		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
 
 	if (n == NULL) {
@@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	}
 
 	resync_lru = lc_create("resync", drbd_bm_ext_cache,
-			61, sizeof(struct bm_extent),
+			1, 61, sizeof(struct bm_extent),
 			offsetof(struct bm_extent, lce));
 	if (!resync_lru) {
 		retcode = ERR_NOMEM;
diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 4cceafb0732d..cbafae40c649 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -166,9 +166,11 @@ struct lc_element {
 	/* if we want to track a larger set of objects,
 	 * it needs to become arch independend u64 */
 	unsigned lc_number;
-
 	/* special label when on free list */
 #define LC_FREE (~0U)
+
+	/* for pending changes */
+	unsigned lc_new_number;
 };
 
 struct lru_cache {
@@ -176,6 +178,7 @@ struct lru_cache {
 	struct list_head lru;
 	struct list_head free;
 	struct list_head in_use;
+	struct list_head to_be_changed;
 
 	/* the pre-created kmem cache to allocate the objects from */
 	struct kmem_cache *lc_cache;
@@ -186,7 +189,7 @@ struct lru_cache {
 	size_t element_off;
 
 	/* number of elements (indices) */
-	unsigned int  nr_elements;
+	unsigned int nr_elements;
 	/* Arbitrary limit on maximum tracked objects. Practical limit is much
 	 * lower due to allocation failures, probably. For typical use cases,
 	 * nr_elements should be a few thousand at most.
@@ -194,18 +197,19 @@ struct lru_cache {
 	 * 8 high bits of .lc_index to be overloaded with flags in the future. */
 #define LC_MAX_ACTIVE	(1<<24)
 
+	/* allow to accumulate a few (index:label) changes,
+	 * but no more than max_pending_changes */
+	unsigned int max_pending_changes;
+	/* number of elements currently on to_be_changed list */
+	unsigned int pending_changes;
+
 	/* statistics */
-	unsigned used; /* number of lelements currently on in_use list */
-	unsigned long hits, misses, starving, dirty, changed;
+	unsigned used; /* number of elements currently on in_use list */
+	unsigned long hits, misses, starving, locked, changed;
 
 	/* see below: flag-bits for lru_cache */
 	unsigned long flags;
 
-	/* when changing the label of an index element */
-	unsigned int  new_number;
-
-	/* for paranoia when changing the label of an index element */
-	struct lc_element *changing_element;
 
 	void  *lc_private;
 	const char *name;
@@ -221,10 +225,15 @@ enum {
 	/* debugging aid, to catch concurrent access early.
 	 * user needs to guarantee exclusive access by proper locking! */
 	__LC_PARANOIA,
-	/* if we need to change the set, but currently there is a changing
-	 * transaction pending, we are "dirty", and must deferr further
-	 * changing requests */
+
+	/* annotate that the set is "dirty", possibly accumulating further
+	 * changes, until a transaction is finally triggered */
 	__LC_DIRTY,
+
+	/* Locked, no further changes allowed.
+	 * Also used to serialize changing transactions. */
+	__LC_LOCKED,
+
 	/* if we need to change the set, but currently there is no free nor
 	 * unused element available, we are "starving", and must not give out
 	 * further references, to guarantee that eventually some refcnt will
@@ -236,9 +245,11 @@ enum {
 };
 #define LC_PARANOIA (1<<__LC_PARANOIA)
 #define LC_DIRTY    (1<<__LC_DIRTY)
+#define LC_LOCKED   (1<<__LC_LOCKED)
 #define LC_STARVING (1<<__LC_STARVING)
 
 extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned max_pending_changes,
 		unsigned e_count, size_t e_size, size_t e_off);
 extern void lc_reset(struct lru_cache *lc);
 extern void lc_destroy(struct lru_cache *lc);
@@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
 extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
 extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
 extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
-extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
+extern void lc_committed(struct lru_cache *lc);
 
 struct seq_file;
 extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
@@ -258,31 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
 				void (*detail) (struct seq_file *, struct lc_element *));
 
 /**
- * lc_try_lock - can be used to stop lc_get() from changing the tracked set
+ * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
  * @lc: the lru cache to operate on
  *
- * Note that the reference counts and order on the active and lru lists may
- * still change.  Returns true if we acquired the lock.
+ * Allows (expects) the set to be "dirty".  Note that the reference counts and
+ * order on the active and lru lists may still change.  Used to serialize
+ * changing transactions.  Returns true if we aquired the lock.
  */
-static inline int lc_try_lock(struct lru_cache *lc)
+static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
 {
-	return !test_and_set_bit(__LC_DIRTY, &lc->flags);
+	return !test_and_set_bit(__LC_LOCKED, &lc->flags);
 }
 
+/**
+ * lc_try_lock - variant to stop lc_get() from changing the tracked set
+ * @lc: the lru cache to operate on
+ *
+ * Note that the reference counts and order on the active and lru lists may
+ * still change.  Only works on a "clean" set.  Returns true if we aquired the
+ * lock, which means there are no pending changes, and any further attempt to
+ * change the set will not succeed until the next lc_unlock().
+ */
+extern int lc_try_lock(struct lru_cache *lc);
+
 /**
  * lc_unlock - unlock @lc, allow lc_get() to change the set again
  * @lc: the lru cache to operate on
  */
 static inline void lc_unlock(struct lru_cache *lc)
 {
-	clear_bit_unlock(__LC_DIRTY, &lc->flags);
+	clear_bit(__LC_DIRTY, &lc->flags);
+	clear_bit_unlock(__LC_LOCKED, &lc->flags);
 }
 
-static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
-{
-	struct lc_element *e = lc_find(lc, enr);
-	return e && e->refcnt;
-}
+extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
 
 #define lc_entry(ptr, type, member) \
 	container_of(ptr, type, member)
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 17621684758a..d71d89498943 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -55,9 +55,40 @@ MODULE_LICENSE("GPL");
 	BUG_ON(i >= lc_->nr_elements);	\
 	BUG_ON(lc_->lc_element[i] != e_); } while (0)
 
+
+/* We need to atomically
+ *  - try to grab the lock (set LC_LOCKED)
+ *  - only if there is no pending transaction
+ *    (neither LC_DIRTY nor LC_STARVING is set)
+ * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
+ * it is not sufficient to just say
+ *	return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
+ */
+int lc_try_lock(struct lru_cache *lc)
+{
+	unsigned long val;
+	do {
+		val = cmpxchg(&lc->flags, 0, LC_LOCKED);
+	} while (unlikely (val == LC_PARANOIA));
+	/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
+	return 0 == val;
+#if 0
+	/* Alternative approach, spin in case someone enters or leaves a
+	 * PARANOIA_ENTRY()/RETURN() section. */
+	unsigned long old, new, val;
+	do {
+		old = lc->flags & LC_PARANOIA;
+		new = old | LC_LOCKED;
+		val = cmpxchg(&lc->flags, old, new);
+	} while (unlikely (val == (old ^ LC_PARANOIA)));
+	return old == val;
+#endif
+}
+
 /**
  * lc_create - prepares to track objects in an active set
  * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
+ * @max_pending_changes: maximum changes to accumulate until a transaction is required
  * @e_count: number of elements allowed to be active simultaneously
  * @e_size: size of the tracked objects
  * @e_off: offset to the &struct lc_element member in a tracked object
@@ -66,6 +97,7 @@ MODULE_LICENSE("GPL");
  * or NULL on (allocation) failure.
  */
 struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned max_pending_changes,
 		unsigned e_count, size_t e_size, size_t e_off)
 {
 	struct hlist_head *slot = NULL;
@@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 	INIT_LIST_HEAD(&lc->in_use);
 	INIT_LIST_HEAD(&lc->lru);
 	INIT_LIST_HEAD(&lc->free);
+	INIT_LIST_HEAD(&lc->to_be_changed);
 
 	lc->name = name;
 	lc->element_size = e_size;
 	lc->element_off = e_off;
 	lc->nr_elements = e_count;
-	lc->new_number = LC_FREE;
+	lc->max_pending_changes = max_pending_changes;
 	lc->lc_cache = cache;
 	lc->lc_element = element;
 	lc->lc_slot = slot;
@@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 		e = p + e_off;
 		e->lc_index = i;
 		e->lc_number = LC_FREE;
+		e->lc_new_number = LC_FREE;
 		list_add(&e->list, &lc->free);
 		element[i] = e;
 	}
@@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc)
 	INIT_LIST_HEAD(&lc->in_use);
 	INIT_LIST_HEAD(&lc->lru);
 	INIT_LIST_HEAD(&lc->free);
+	INIT_LIST_HEAD(&lc->to_be_changed);
 	lc->used = 0;
 	lc->hits = 0;
 	lc->misses = 0;
 	lc->starving = 0;
-	lc->dirty = 0;
+	lc->locked = 0;
 	lc->changed = 0;
+	lc->pending_changes = 0;
 	lc->flags = 0;
-	lc->changing_element = NULL;
-	lc->new_number = LC_FREE;
 	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
 
 	for (i = 0; i < lc->nr_elements; i++) {
@@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc)
 		/* re-init it */
 		e->lc_index = i;
 		e->lc_number = LC_FREE;
+		e->lc_new_number = LC_FREE;
 		list_add(&e->list, &lc->free);
 	}
 }
@@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
 	/* NOTE:
 	 * total calls to lc_get are
 	 * (starving + hits + misses)
-	 * misses include "dirty" count (update from an other thread in
+	 * misses include "locked" count (update from an other thread in
 	 * progress) and "changed", when this in fact lead to an successful
 	 * update of the cache.
 	 */
 	return seq_printf(seq, "\t%s: used:%u/%u "
-		"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
+		"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
 		lc->name, lc->used, lc->nr_elements,
-		lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
+		lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
 }
 
 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
@@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
 }
 
 
-/**
- * lc_find - find element by label, if present in the hash table
- * @lc: The lru_cache object
- * @enr: element number
- *
- * Returns the pointer to an element, if the element with the requested
- * "label" or element number is present in the hash table,
- * or NULL if not found. Does not change the refcnt.
- */
-struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
+static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
+		bool include_changing)
 {
 	struct hlist_node *n;
 	struct lc_element *e;
@@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
 	BUG_ON(!lc);
 	BUG_ON(!lc->nr_elements);
 	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
-		if (e->lc_number == enr)
+		/* "about to be changed" elements, pending transaction commit,
+		 * are hashed by their "new number". "Normal" elements have
+		 * lc_number == lc_new_number. */
+		if (e->lc_new_number != enr)
+			continue;
+		if (e->lc_new_number == e->lc_number || include_changing)
 			return e;
+		break;
 	}
 	return NULL;
 }
 
-/* returned element will be "recycled" immediately */
-static struct lc_element *lc_evict(struct lru_cache *lc)
+/**
+ * lc_find - find element by label, if present in the hash table
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns the pointer to an element, if the element with the requested
+ * "label" or element number is present in the hash table,
+ * or NULL if not found. Does not change the refcnt.
+ * Ignores elements that are "about to be used", i.e. not yet in the active
+ * set, but still pending transaction commit.
+ */
+struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
 {
-	struct list_head  *n;
-	struct lc_element *e;
-
-	if (list_empty(&lc->lru))
-		return NULL;
-
-	n = lc->lru.prev;
-	e = list_entry(n, struct lc_element, list);
-
-	PARANOIA_LC_ELEMENT(lc, e);
+	return __lc_find(lc, enr, 0);
+}
 
-	list_del(&e->list);
-	hlist_del(&e->colision);
-	return e;
+/**
+ * lc_is_used - find element by label
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns true, if the element with the requested "label" or element number is
+ * present in the hash table, and is used (refcnt > 0).
+ * Also finds elements that are not _currently_ used but only "about to be
+ * used", i.e. on the "to_be_changed" list, pending transaction commit.
+ */
+bool lc_is_used(struct lru_cache *lc, unsigned int enr)
+{
+	struct lc_element *e = __lc_find(lc, enr, 1);
+	return e && e->refcnt;
 }
 
 /**
@@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
 	PARANOIA_LC_ELEMENT(lc, e);
 	BUG_ON(e->refcnt);
 
-	e->lc_number = LC_FREE;
+	e->lc_number = e->lc_new_number = LC_FREE;
 	hlist_del_init(&e->colision);
 	list_move(&e->list, &lc->free);
 	RETURN();
 }
 
-static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
+static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
 {
 	struct list_head *n;
+	struct lc_element *e;
+
+	if (!list_empty(&lc->free))
+		n = lc->free.next;
+	else if (!list_empty(&lc->lru))
+		n = lc->lru.prev;
+	else
+		return NULL;
+
+	e = list_entry(n, struct lc_element, list);
+	PARANOIA_LC_ELEMENT(lc, e);
 
-	if (list_empty(&lc->free))
-		return lc_evict(lc);
+	e->lc_new_number = new_number;
+	if (!hlist_unhashed(&e->colision))
+		__hlist_del(&e->colision);
+	hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
+	list_move(&e->list, &lc->to_be_changed);
 
-	n = lc->free.next;
-	list_del(n);
-	return list_entry(n, struct lc_element, list);
+	return e;
 }
 
 static int lc_unused_element_available(struct lru_cache *lc)
@@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
 		RETURN(NULL);
 	}
 
-	e = lc_find(lc, enr);
-	if (e) {
+	e = __lc_find(lc, enr, 1);
+	/* if lc_new_number != lc_number,
+	 * this enr is currently being pulled in already,
+	 * and will be available once the pending transaction
+	 * has been committed. */
+	if (e && e->lc_new_number == e->lc_number) {
 		++lc->hits;
 		if (e->refcnt++ == 0)
 			lc->used++;
@@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
 	if (!may_change)
 		RETURN(NULL);
 
+	/* It has been found above, but on the "to_be_changed" list, not yet
+	 * committed.  Don't pull it in twice, wait for the transaction, then
+	 * try again */
+	if (e)
+		RETURN(NULL);
+
+	/* To avoid races with lc_try_lock(), first, mark us dirty
+	 * (using test_and_set_bit, as it implies memory barriers), ... */
+	test_and_set_bit(__LC_DIRTY, &lc->flags);
+
+	/* ... only then check if it is locked anyways. If lc_unlock clears
+	 * the dirty bit again, that's not a problem, we will come here again.
+	 */
+	if (test_bit(__LC_LOCKED, &lc->flags)) {
+		++lc->locked;
+		RETURN(NULL);
+	}
+
 	/* In case there is nothing available and we can not kick out
 	 * the LRU element, we have to wait ...
 	 */
@@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
 		RETURN(NULL);
 	}
 
-	/* it was not present in the active set.
-	 * we are going to recycle an unused (or even "free") element.
-	 * user may need to commit a transaction to record that change.
-	 * we serialize on flags & LC_DIRTY */
-	if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
-		++lc->dirty;
+	/* It was not present in the active set.  We are going to recycle an
+	 * unused (or even "free") element, but we won't accumulate more than
+	 * max_pending_changes changes.  */
+	if (lc->pending_changes >= lc->max_pending_changes)
 		RETURN(NULL);
-	}
 
-	e = lc_get_unused_element(lc);
+	e = lc_prepare_for_change(lc, enr);
 	BUG_ON(!e);
 
 	clear_bit(__LC_STARVING, &lc->flags);
 	BUG_ON(++e->refcnt != 1);
 	lc->used++;
-
-	lc->changing_element = e;
-	lc->new_number = enr;
+	lc->pending_changes++;
 
 	RETURN(e);
 }
@@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool
  *  pointer to an UNUSED element with some different element number,
  *          where that different number may also be %LC_FREE.
  *
- *          In this case, the cache is marked %LC_DIRTY (blocking further changes),
- *          and the returned element pointer is removed from the lru list and
- *          hash collision chains.  The user now should do whatever housekeeping
- *          is necessary.
- *          Then he must call lc_changed(lc,element_pointer), to finish
- *          the change.
+ *          In this case, the cache is marked %LC_DIRTY,
+ *          so lc_try_lock() will no longer succeed.
+ *          The returned element pointer is moved to the "to_be_changed" list,
+ *          and registered with the new element number on the hash collision chains,
+ *          so it is possible to pick it up from lc_is_used().
+ *          Up to "max_pending_changes" (see lc_create()) can be accumulated.
+ *          The user now should do whatever housekeeping is necessary,
+ *          typically serialize on lc_try_lock_for_transaction(), then call
+ *          lc_committed(lc) and lc_unlock(), to finish the change.
  *
  * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
  *       any cache set change.
@@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
 }
 
 /**
- * lc_changed - tell @lc that the change has been recorded
+ * lc_committed - tell @lc that pending changes have been recorded
  * @lc: the lru cache to operate on
- * @e: the element pending label change
+ *
+ * User is expected to serialize on explicit lc_try_lock_for_transaction()
+ * before the transaction is started, and later needs to lc_unlock() explicitly
+ * as well.
  */
-void lc_changed(struct lru_cache *lc, struct lc_element *e)
+void lc_committed(struct lru_cache *lc)
 {
+	struct lc_element *e, *tmp;
+
 	PARANOIA_ENTRY();
-	BUG_ON(e != lc->changing_element);
-	PARANOIA_LC_ELEMENT(lc, e);
-	++lc->changed;
-	e->lc_number = lc->new_number;
-	list_add(&e->list, &lc->in_use);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
-	lc->changing_element = NULL;
-	lc->new_number = LC_FREE;
-	clear_bit_unlock(__LC_DIRTY, &lc->flags);
+	list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
+		/* count number of changes, not number of transactions */
+		++lc->changed;
+		e->lc_number = e->lc_new_number;
+		list_move(&e->list, &lc->in_use);
+	}
+	lc->pending_changes = 0;
 	RETURN();
 }
 
@@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
 	PARANOIA_ENTRY();
 	PARANOIA_LC_ELEMENT(lc, e);
 	BUG_ON(e->refcnt == 0);
-	BUG_ON(e == lc->changing_element);
+	BUG_ON(e->lc_number != e->lc_new_number);
 	if (--e->refcnt == 0) {
 		/* move it to the front of LRU. */
 		list_move(&e->list, &lc->lru);
@@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
 void lc_set(struct lru_cache *lc, unsigned int enr, int index)
 {
 	struct lc_element *e;
+	struct list_head *lh;
 
 	if (index < 0 || index >= lc->nr_elements)
 		return;
 
 	e = lc_element_by_index(lc, index);
-	e->lc_number = enr;
+	BUG_ON(e->lc_number != e->lc_new_number);
+	BUG_ON(e->refcnt != 0);
 
+	e->lc_number = e->lc_new_number = enr;
 	hlist_del_init(&e->colision);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
-	list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
+	if (enr == LC_FREE)
+		lh = &lc->free;
+	else {
+		hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
+		lh = &lc->lru;
+	}
+	list_move(&e->list, lh);
 }
 
 /**
@@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get);
 EXPORT_SYMBOL(lc_find);
 EXPORT_SYMBOL(lc_get);
 EXPORT_SYMBOL(lc_put);
-EXPORT_SYMBOL(lc_changed);
+EXPORT_SYMBOL(lc_committed);
 EXPORT_SYMBOL(lc_element_by_index);
 EXPORT_SYMBOL(lc_index_of);
 EXPORT_SYMBOL(lc_seq_printf_stats);
 EXPORT_SYMBOL(lc_seq_dump_details);
+EXPORT_SYMBOL(lc_try_lock);
+EXPORT_SYMBOL(lc_is_used);
-- 
cgit v1.2.3


From 7ad651b52218eea3f9280dbb353dfe0c42742d85 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 21 Feb 2011 13:21:03 +0100
Subject: drbd: new on-disk activity log transaction format

Use a new on-disk transaction format for the activity log, which allows
for multiple changes to the active set per transaction.

Using 4k transaction blocks, we can now get rid of the work-around code
to deal with devices not supporting 512 byte logical block size.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c | 409 ++++++++++++++++++++++++---------------
 drivers/block/drbd/drbd_int.h    |  44 +++--
 drivers/block/drbd/drbd_main.c   |   4 -
 drivers/block/drbd/drbd_nl.c     |  42 +---
 include/linux/drbd.h             |   4 +
 include/linux/drbd_limits.h      |   8 +-
 6 files changed, 302 insertions(+), 209 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 44097c87fed7..ea3895de4e6d 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -24,21 +24,67 @@
  */
 
 #include <linux/slab.h>
+#include <linux/crc32c.h>
 #include <linux/drbd.h>
+#include <linux/drbd_limits.h>
+#include <linux/dynamic_debug.h>
 #include "drbd_int.h"
 #include "drbd_wrappers.h"
 
-/* We maintain a trivial checksum in our on disk activity log.
- * With that we can ensure correct operation even when the storage
- * device might do a partial (last) sector write while losing power.
- */
-struct __packed al_transaction {
-	u32       magic;
-	u32       tr_number;
-	struct __packed {
-		u32 pos;
-		u32 extent; } updates[1 + AL_EXTENTS_PT];
-	u32       xor_sum;
+/* all fields on disc in big endian */
+struct __packed al_transaction_on_disk {
+	/* don't we all like magic */
+	__be32	magic;
+
+	/* to identify the most recent transaction block
+	 * in the on disk ring buffer */
+	__be32	tr_number;
+
+	/* checksum on the full 4k block, with this field set to 0. */
+	__be32	crc32c;
+
+	/* type of transaction, special transaction types like:
+	 * purge-all, set-all-idle, set-all-active, ... to-be-defined */
+	__be16	transaction_type;
+
+	/* we currently allow only a few thousand extents,
+	 * so 16bit will be enough for the slot number. */
+
+	/* how many updates in this transaction */
+	__be16	n_updates;
+
+	/* maximum slot number, "al-extents" in drbd.conf speak.
+	 * Having this in each transaction should make reconfiguration
+	 * of that parameter easier. */
+	__be16	context_size;
+
+	/* slot number the context starts with */
+	__be16	context_start_slot_nr;
+
+	/* Some reserved bytes.  Expected usage is a 64bit counter of
+	 * sectors-written since device creation, and other data generation tag
+	 * supporting usage */
+	__be32	__reserved[4];
+
+	/* --- 36 byte used --- */
+
+	/* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
+	 * in one transaction, then use the remaining byte in the 4k block for
+	 * context information.  "Flexible" number of updates per transaction
+	 * does not help, as we have to account for the case when all update
+	 * slots are used anyways, so it would only complicate code without
+	 * additional benefit.
+	 */
+	__be16	update_slot_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* but the extent number is 32bit, which at an extent size of 4 MiB
+	 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
+	__be32	update_extent_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* --- 420 bytes used (36 + 64*6) --- */
+
+	/* 4096 - 420 = 3676 = 919 * 4 */
+	__be32	context[AL_CONTEXT_PER_TRANSACTION];
 };
 
 struct update_odbm_work {
@@ -48,11 +94,8 @@ struct update_odbm_work {
 
 struct update_al_work {
 	struct drbd_work w;
-	struct lc_element *al_ext;
 	struct completion event;
-	unsigned int enr;
-	/* if old_enr != LC_FREE, write corresponding bitmap sector, too */
-	unsigned int old_enr;
+	int err;
 };
 
 struct drbd_atodb_wait {
@@ -107,67 +150,30 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 			 sector_t sector, int rw)
 {
-	int logical_block_size, mask, ok;
-	int offset = 0;
+	int ok;
 	struct page *iop = mdev->md_io_page;
 
 	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
 
 	BUG_ON(!bdev->md_bdev);
 
-	logical_block_size = bdev_logical_block_size(bdev->md_bdev);
-	if (logical_block_size == 0)
-		logical_block_size = MD_SECTOR_SIZE;
-
-	/* in case logical_block_size != 512 [ s390 only? ] */
-	if (logical_block_size != MD_SECTOR_SIZE) {
-		mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
-		D_ASSERT(mask == 1 || mask == 3 || mask == 7);
-		D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
-		offset = sector & mask;
-		sector = sector & ~mask;
-		iop = mdev->md_io_tmpp;
-
-		if (rw & WRITE) {
-			/* these are GFP_KERNEL pages, pre-allocated
-			 * on device initialization */
-			void *p = page_address(mdev->md_io_page);
-			void *hp = page_address(mdev->md_io_tmpp);
-
-			ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
-					READ, logical_block_size);
-
-			if (unlikely(!ok)) {
-				dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
-				    "READ [logical_block_size!=512]) failed!\n",
-				    (unsigned long long)sector);
-				return 0;
-			}
-
-			memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE);
-		}
-	}
+	dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
+	     current->comm, current->pid, __func__,
+	     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 
 	if (sector < drbd_md_first_sector(bdev) ||
-	    sector > drbd_md_last_sector(bdev))
+	    sector + 7 > drbd_md_last_sector(bdev))
 		dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
 		     current->comm, current->pid, __func__,
 		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 
-	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
+	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
 	if (unlikely(!ok)) {
 		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
 		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 		return 0;
 	}
 
-	if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
-		void *p = page_address(mdev->md_io_page);
-		void *hp = page_address(mdev->md_io_tmpp);
-
-		memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE);
-	}
-
 	return ok;
 }
 
@@ -211,20 +217,34 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
 		 * current->bio_tail list now.
 		 * we have to delegate updates to the activity log
 		 * to the worker thread. */
-		init_completion(&al_work.event);
-		al_work.al_ext = al_ext;
-		al_work.enr = enr;
-		al_work.old_enr = al_ext->lc_number;
-		al_work.w.cb = w_al_write_transaction;
-		al_work.w.mdev = mdev;
-		drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w);
-		wait_for_completion(&al_work.event);
 
-		mdev->al_writ_cnt++;
+		/* Serialize multiple transactions.
+		 * This uses test_and_set_bit, memory barrier is implicit.
+		 * Optimization potential:
+		 * first check for transaction number > old transaction number,
+		 * so not all waiters have to lock/unlock.  */
+		wait_event(mdev->al_wait, lc_try_lock_for_transaction(mdev->act_log));
 
-		spin_lock_irq(&mdev->al_lock);
-		lc_committed(mdev->act_log);
-		spin_unlock_irq(&mdev->al_lock);
+		/* Double check: it may have been committed by someone else,
+		 * while we have been waiting for the lock. */
+		if (al_ext->lc_number != enr) {
+			init_completion(&al_work.event);
+			al_work.w.cb = w_al_write_transaction;
+			al_work.w.mdev = mdev;
+			drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w);
+			wait_for_completion(&al_work.event);
+
+			mdev->al_writ_cnt++;
+
+			spin_lock_irq(&mdev->al_lock);
+			/* FIXME
+			if (al_work.err)
+				we need an "lc_cancel" here;
+			*/
+			lc_committed(mdev->act_log);
+			spin_unlock_irq(&mdev->al_lock);
+		}
+		lc_unlock(mdev->act_log);
 		wake_up(&mdev->al_wait);
 	}
 }
@@ -283,95 +303,118 @@ w_al_write_transaction(struct drbd_work *w, int unused)
 {
 	struct update_al_work *aw = container_of(w, struct update_al_work, w);
 	struct drbd_conf *mdev = w->mdev;
-	struct lc_element *updated = aw->al_ext;
-	const unsigned int new_enr = aw->enr;
-	const unsigned int evicted = aw->old_enr;
-	struct al_transaction *buffer;
+	struct al_transaction_on_disk *buffer;
+	struct lc_element *e;
 	sector_t sector;
-	int i, n, mx;
-	unsigned int extent_nr;
-	u32 xor_sum = 0;
+	int i, mx;
+	unsigned extent_nr;
+	unsigned crc = 0;
 
 	if (!get_ldev(mdev)) {
-		dev_err(DEV,
-			"disk is %s, cannot start al transaction (-%d +%d)\n",
-			drbd_disk_str(mdev->state.disk), evicted, new_enr);
+		dev_err(DEV, "disk is %s, cannot start al transaction\n",
+			drbd_disk_str(mdev->state.disk));
+		aw->err = -EIO;
 		complete(&((struct update_al_work *)w)->event);
 		return 1;
 	}
-	/* do we have to do a bitmap write, first?
-	 * TODO reduce maximum latency:
-	 * submit both bios, then wait for both,
-	 * instead of doing two synchronous sector writes.
-	 * For now, we must not write the transaction,
-	 * if we cannot write out the bitmap of the evicted extent. */
-	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
-		drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted));
 
 	/* The bitmap write may have failed, causing a state change. */
 	if (mdev->state.disk < D_INCONSISTENT) {
 		dev_err(DEV,
-			"disk is %s, cannot write al transaction (-%d +%d)\n",
-			drbd_disk_str(mdev->state.disk), evicted, new_enr);
+			"disk is %s, cannot write al transaction\n",
+			drbd_disk_str(mdev->state.disk));
+		aw->err = -EIO;
 		complete(&((struct update_al_work *)w)->event);
 		put_ldev(mdev);
 		return 1;
 	}
 
 	mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
-	buffer = (struct al_transaction *)page_address(mdev->md_io_page);
+	buffer = page_address(mdev->md_io_page);
 
-	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
+	memset(buffer, 0, sizeof(*buffer));
+	buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
 	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
 
-	n = lc_index_of(mdev->act_log, updated);
+	i = 0;
+
+	/* Even though no one can start to change this list
+	 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
+	 * lc_try_lock_for_transaction() --, someone may still
+	 * be in the process of changing it. */
+	spin_lock_irq(&mdev->al_lock);
+	list_for_each_entry(e, &mdev->act_log->to_be_changed, list) {
+		if (i == AL_UPDATES_PER_TRANSACTION) {
+			i++;
+			break;
+		}
+		buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
+		buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
+		if (e->lc_number != LC_FREE)
+			drbd_bm_mark_for_writeout(mdev,
+					al_extent_to_bm_page(e->lc_number));
+		i++;
+	}
+	spin_unlock_irq(&mdev->al_lock);
+	BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
 
-	buffer->updates[0].pos = cpu_to_be32(n);
-	buffer->updates[0].extent = cpu_to_be32(new_enr);
+	buffer->n_updates = cpu_to_be16(i);
+	for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
+		buffer->update_slot_nr[i] = cpu_to_be16(-1);
+		buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
+	}
 
-	xor_sum ^= new_enr;
+	buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements);
+	buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle);
 
-	mx = min_t(int, AL_EXTENTS_PT,
+	mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
 		   mdev->act_log->nr_elements - mdev->al_tr_cycle);
 	for (i = 0; i < mx; i++) {
 		unsigned idx = mdev->al_tr_cycle + i;
 		extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
-		buffer->updates[i+1].pos = cpu_to_be32(idx);
-		buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
-		xor_sum ^= extent_nr;
+		buffer->context[i] = cpu_to_be32(extent_nr);
 	}
-	for (; i < AL_EXTENTS_PT; i++) {
-		buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
-		buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
-		xor_sum ^= LC_FREE;
-	}
-	mdev->al_tr_cycle += AL_EXTENTS_PT;
+	for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
+		buffer->context[i] = cpu_to_be32(LC_FREE);
+
+	mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
 	if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
 		mdev->al_tr_cycle = 0;
 
-	buffer->xor_sum = cpu_to_be32(xor_sum);
-
 	sector =  mdev->ldev->md.md_offset
-		+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
-
-	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
-		drbd_chk_io_error(mdev, 1, true);
+		+ mdev->ldev->md.al_offset
+		+ mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
 
-	if (++mdev->al_tr_pos >
-	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
-		mdev->al_tr_pos = 0;
+	crc = crc32c(0, buffer, 4096);
+	buffer->crc32c = cpu_to_be32(crc);
 
-	D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
-	mdev->al_tr_number++;
+	if (drbd_bm_write_hinted(mdev))
+		aw->err = -EIO;
+		/* drbd_chk_io_error done already */
+	else if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
+		aw->err = -EIO;
+		drbd_chk_io_error(mdev, 1, true);
+	} else {
+		/* advance ringbuffer position and transaction counter */
+		mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
+		mdev->al_tr_number++;
+	}
 
 	mutex_unlock(&mdev->md_io_mutex);
-
 	complete(&((struct update_al_work *)w)->event);
 	put_ldev(mdev);
 
 	return 1;
 }
 
+/* FIXME
+ * reading of the activity log,
+ * and potentially dirtying of the affected bitmap regions,
+ * should be done from userland only.
+ * DRBD would simply always attach with an empty activity log,
+ * and refuse to attach to something that looks like a crashed primary.
+ */
+
 /**
  * drbd_al_read_tr() - Read a single transaction from the on disk activity log
  * @mdev:	DRBD device.
@@ -383,27 +426,39 @@ w_al_write_transaction(struct drbd_work *w, int unused)
  */
 static int drbd_al_read_tr(struct drbd_conf *mdev,
 			   struct drbd_backing_dev *bdev,
-			   struct al_transaction *b,
 			   int index)
 {
+	struct al_transaction_on_disk *b = page_address(mdev->md_io_page);
 	sector_t sector;
-	int rv, i;
-	u32 xor_sum = 0;
+	u32 crc;
 
-	sector = bdev->md.md_offset + bdev->md.al_offset + index;
+	sector =  bdev->md.md_offset
+		+ bdev->md.al_offset
+		+ index * (MD_BLOCK_SIZE>>9);
 
 	/* Dont process error normally,
 	 * as this is done before disk is attached! */
 	if (!drbd_md_sync_page_io(mdev, bdev, sector, READ))
 		return -1;
 
-	rv = (b->magic == cpu_to_be32(DRBD_MAGIC));
+	if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC)))
+		return 0;
+
+	if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION))
+		return 0;
 
-	for (i = 0; i < AL_EXTENTS_PT + 1; i++)
-		xor_sum ^= be32_to_cpu(b->updates[i].extent);
-	rv &= (xor_sum == be32_to_cpu(b->xor_sum));
+	if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX))
+		return 0;
 
-	return rv;
+	if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX))
+		return 0;
+
+	crc = be32_to_cpu(b->crc32c);
+	b->crc32c = 0;
+	if (!expect(crc == crc32c(0, b, 4096)))
+		return 0;
+
+	return 1;
 }
 
 /**
@@ -415,7 +470,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev,
  */
 int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
-	struct al_transaction *buffer;
+	struct al_transaction_on_disk *b;
 	int i;
 	int rv;
 	int mx;
@@ -428,25 +483,36 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	u32 to_tnr = 0;
 	u32 cnr;
 
-	mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
+	/* Note that this is expected to be called with a newly created,
+	 * clean and all unused activity log of the "expected size".
+	 */
 
 	/* lock out all other meta data io for now,
 	 * and make sure the page is mapped.
 	 */
 	mutex_lock(&mdev->md_io_mutex);
-	buffer = page_address(mdev->md_io_page);
+	b = page_address(mdev->md_io_page);
+
+	/* Always use the full ringbuffer space for now.
+	 * possible optimization: read in all of it,
+	 * then scan the in-memory pages. */
+
+	mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
 
 	/* Find the valid transaction in the log */
-	for (i = 0; i <= mx; i++) {
-		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+	for (i = 0; i < mx; i++) {
+		rv = drbd_al_read_tr(mdev, bdev, i);
+		/* invalid data in that block */
 		if (rv == 0)
 			continue;
+
+		/* IO error */
 		if (rv == -1) {
 			mutex_unlock(&mdev->md_io_mutex);
 			return 0;
 		}
-		cnr = be32_to_cpu(buffer->tr_number);
 
+		cnr = be32_to_cpu(b->tr_number);
 		if (++found_valid == 1) {
 			from = i;
 			to = i;
@@ -454,8 +520,11 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 			to_tnr = cnr;
 			continue;
 		}
+
+		D_ASSERT(cnr != to_tnr);
+		D_ASSERT(cnr != from_tnr);
 		if ((int)cnr - (int)from_tnr < 0) {
-			D_ASSERT(from_tnr - cnr + i - from == mx+1);
+			D_ASSERT(from_tnr - cnr + i - from == mx);
 			from = i;
 			from_tnr = cnr;
 		}
@@ -476,11 +545,10 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
 	i = from;
 	while (1) {
-		int j, pos;
-		unsigned int extent_nr;
-		unsigned int trn;
+		struct lc_element *e;
+		unsigned j, n, slot, extent_nr;
 
-		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
+		rv = drbd_al_read_tr(mdev, bdev, i);
 		if (!expect(rv != 0))
 			goto cancel;
 		if (rv == -1) {
@@ -488,23 +556,55 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 			return 0;
 		}
 
-		trn = be32_to_cpu(buffer->tr_number);
+		/* deal with different transaction types.
+		 * not yet implemented */
+		if (!expect(b->transaction_type == 0))
+			goto cancel;
 
-		spin_lock_irq(&mdev->al_lock);
+		/* on the fly re-create/resize activity log?
+		 * will be a special transaction type flag. */
+		if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements))
+			goto cancel;
+		if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements))
+			goto cancel;
 
-		/* This loop runs backwards because in the cyclic
-		   elements there might be an old version of the
-		   updated element (in slot 0). So the element in slot 0
-		   can overwrite old versions. */
-		for (j = AL_EXTENTS_PT; j >= 0; j--) {
-			pos = be32_to_cpu(buffer->updates[j].pos);
-			extent_nr = be32_to_cpu(buffer->updates[j].extent);
+		/* We are the only user of the activity log right now,
+		 * don't actually need to take that lock. */
+		spin_lock_irq(&mdev->al_lock);
 
-			if (extent_nr == LC_FREE)
-				continue;
+		/* first, apply the context, ... */
+		for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr);
+		     j < AL_CONTEXT_PER_TRANSACTION &&
+		     slot < mdev->act_log->nr_elements; j++, slot++) {
+			extent_nr = be32_to_cpu(b->context[j]);
+			e = lc_element_by_index(mdev->act_log, slot);
+			if (e->lc_number != extent_nr) {
+				if (extent_nr != LC_FREE)
+					active_extents++;
+				else
+					active_extents--;
+			}
+			lc_set(mdev->act_log, extent_nr, slot);
+		}
 
-			lc_set(mdev->act_log, extent_nr, pos);
-			active_extents++;
+		/* ... then apply the updates,
+		 * which override the context information.
+		 * drbd_al_read_tr already did the rangecheck
+		 * on n <= AL_UPDATES_PER_TRANSACTION */
+		n = be16_to_cpu(b->n_updates);
+		for (j = 0; j < n; j++) {
+			slot = be16_to_cpu(b->update_slot_nr[j]);
+			extent_nr = be32_to_cpu(b->update_extent_nr[j]);
+			if (!expect(slot < mdev->act_log->nr_elements))
+				break;
+			e = lc_element_by_index(mdev->act_log, slot);
+			if (e->lc_number != extent_nr) {
+				if (extent_nr != LC_FREE)
+					active_extents++;
+				else
+					active_extents--;
+			}
+			lc_set(mdev->act_log, extent_nr, slot);
 		}
 		spin_unlock_irq(&mdev->al_lock);
 
@@ -514,15 +614,12 @@ cancel:
 		if (i == to)
 			break;
 		i++;
-		if (i > mx)
+		if (i >= mx)
 			i = 0;
 	}
 
 	mdev->al_tr_number = to_tnr+1;
-	mdev->al_tr_pos = to;
-	if (++mdev->al_tr_pos >
-	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
-		mdev->al_tr_pos = 0;
+	mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
 
 	/* ok, we are done with it */
 	mutex_unlock(&mdev->md_io_mutex);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index edfdeb62c18f..3213808a898a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1069,7 +1069,6 @@ struct drbd_conf {
 	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 	wait_queue_head_t ee_wait;
 	struct page *md_io_page;	/* one page buffer for md_io */
-	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
 	struct mutex md_io_mutex;	/* protects the md_io_buffer */
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
@@ -1259,22 +1258,39 @@ extern void drbd_ldev_destroy(struct drbd_conf *mdev);
    * either at the end of the backing device
    * or on a separate meta data device. */
 
-#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
 /* The following numbers are sectors */
-#define MD_AL_OFFSET 8	    /* 8 Sectors after start of meta area */
-#define MD_AL_MAX_SIZE 64   /* = 32 kb LOG  ~ 3776 extents ~ 14 GB Storage */
-/* Allows up to about 3.8TB */
-#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE)
-
-/* Since the smalles IO unit is usually 512 byte */
-#define MD_SECTOR_SHIFT	 9
-#define MD_SECTOR_SIZE	 (1<<MD_SECTOR_SHIFT)
-
-/* activity log */
-#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */
-#define AL_EXTENT_SHIFT 22		 /* One extent represents 4M Storage */
+/* Allows up to about 3.8TB, so if you want more,
+ * you need to use the "flexible" meta data format. */
+#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
+#define MD_AL_OFFSET	8    /* 8 Sectors after start of meta area */
+#define MD_AL_SECTORS	64   /* = 32 kB on disk activity log ring buffer */
+#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS)
+
+/* we do all meta data IO in 4k blocks */
+#define MD_BLOCK_SHIFT	12
+#define MD_BLOCK_SIZE	(1<<MD_BLOCK_SHIFT)
+
+/* One activity log extent represents 4M of storage */
+#define AL_EXTENT_SHIFT 22
 #define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
 
+/* We could make these currently hardcoded constants configurable
+ * variables at create-md time (or even re-configurable at runtime?).
+ * Which will require some more changes to the DRBD "super block"
+ * and attach code.
+ *
+ * updates per transaction:
+ *   This many changes to the active set can be logged with one transaction.
+ *   This number is arbitrary.
+ * context per transaction:
+ *   This many context extent numbers are logged with each transaction.
+ *   This number is resulting from the transaction block size (4k), the layout
+ *   of the transaction header, and the number of updates per transaction.
+ *   See drbd_actlog.c:struct al_transaction_on_disk
+ * */
+#define AL_UPDATES_PER_TRANSACTION	 64	// arbitrary
+#define AL_CONTEXT_PER_TRANSACTION	919	// (4096 - 36 - 6*64)/4
+
 #if BITS_PER_LONG == 32
 #define LN2_BPL 5
 #define cpu_to_lel(A) cpu_to_le32(A)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c77e51a40926..efedfbc06198 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2841,10 +2841,6 @@ void drbd_ldev_destroy(struct drbd_conf *mdev)
 		drbd_free_bc(mdev->ldev);
 		mdev->ldev = NULL;);
 
-	if (mdev->md_io_tmpp) {
-		__free_page(mdev->md_io_tmpp);
-		mdev->md_io_tmpp = NULL;
-	}
 	clear_bit(GO_DISKLESS, &mdev->flags);
 }
 
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 0a92f5226c2a..90d731723205 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -527,7 +527,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
 	case DRBD_MD_INDEX_FLEX_INT:
 		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
 		/* al size is still fixed */
-		bdev->md.al_offset = -MD_AL_MAX_SIZE;
+		bdev->md.al_offset = -MD_AL_SECTORS;
 		/* we need (slightly less than) ~ this much bitmap sectors: */
 		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
@@ -751,8 +751,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
 	unsigned int in_use;
 	int i;
 
-	if (!expect(mdev->sync_conf.al_extents >= 7))
-		mdev->sync_conf.al_extents = 127;
+	if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN))
+		mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN;
 
 	if (mdev->act_log &&
 	    mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
@@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev)
 
 	in_use = 0;
 	t = mdev->act_log;
-	n = lc_create("act_log", drbd_al_ext_cache, 1,
+	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
 		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
 
 	if (n == NULL) {
@@ -932,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	union drbd_state ns, os;
 	enum drbd_state_rv rv;
 	int cp_discovered = 0;
-	int logical_block_size;
 
 	drbd_reconfig_start(mdev);
 
@@ -1087,25 +1086,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	drbd_md_set_sector_offsets(mdev, nbc);
 
-	/* allocate a second IO page if logical_block_size != 512 */
-	logical_block_size = bdev_logical_block_size(nbc->md_bdev);
-	if (logical_block_size == 0)
-		logical_block_size = MD_SECTOR_SIZE;
-
-	if (logical_block_size != MD_SECTOR_SIZE) {
-		if (!mdev->md_io_tmpp) {
-			struct page *page = alloc_page(GFP_NOIO);
-			if (!page)
-				goto force_diskless_dec;
-
-			dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
-			     logical_block_size, MD_SECTOR_SIZE);
-			dev_warn(DEV, "Workaround engaged (has performance impact).\n");
-
-			mdev->md_io_tmpp = page;
-		}
-	}
-
 	if (!mdev->bitmap) {
 		if (drbd_bm_init(mdev)) {
 			retcode = ERR_NOMEM;
@@ -1804,14 +1784,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 
 	if (!expect(sc.rate >= 1))
 		sc.rate = 1;
-	if (!expect(sc.al_extents >= 7))
-		sc.al_extents = 127; /* arbitrary minimum */
-#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
-	if (sc.al_extents > AL_MAX) {
-		dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
-		sc.al_extents = AL_MAX;
-	}
-#undef AL_MAX
+
+	/* clip to allowed range */
+	if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN))
+		sc.al_extents = DRBD_AL_EXTENTS_MIN;
+	if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX))
+		sc.al_extents = DRBD_AL_EXTENTS_MAX;
 
 	/* to avoid spurious errors when configuring minors before configuring
 	 * the minors they depend on: if necessary, first create the minor we
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 35fc08a0a552..70a688b92c1b 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -336,6 +336,10 @@ enum drbd_timeout_flag {
 #define DRBD_MAGIC 0x83740267
 #define DRBD_MAGIC_BIG 0x835a
 
+/* how I came up with this magic?
+ * base64 decode "actlog==" ;) */
+#define DRBD_AL_MAGIC 0x69cb65a2
+
 /* these are of type "int" */
 #define DRBD_MD_INDEX_INTERNAL -1
 #define DRBD_MD_INDEX_FLEX_EXT -2
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 447c36752385..75f05af33725 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -102,10 +102,12 @@
 #define DRBD_RATE_DEF 250  /* kb/second */
 
   /* less than 7 would hit performance unnecessarily.
-   * 3833 is the largest prime that still does fit
-   * into 64 sectors of activity log */
+   * 919 slots context information per transaction,
+   * 32k activity log, 4k transaction size,
+   * one transaction in flight:
+   * 919 * 7 = 6433 */
 #define DRBD_AL_EXTENTS_MIN  7
-#define DRBD_AL_EXTENTS_MAX  3833
+#define DRBD_AL_EXTENTS_MAX  6433
 #define DRBD_AL_EXTENTS_DEF  127
 
 #define DRBD_AFTER_MIN  -1
-- 
cgit v1.2.3


From 1aba4d7fcfabe999e0c99683b394aa76d5c42842 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Mon, 21 Feb 2011 15:38:08 +0100
Subject: drbd: Preparing the connector interface to operator on connections

Up to now it only operated on minor numbers. Now it can work also
on named connections.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h  |  1 +
 drivers/block/drbd/drbd_main.c | 15 +++++++
 drivers/block/drbd/drbd_nl.c   | 96 +++++++++++++++++++++++++++---------------
 include/linux/drbd.h           | 19 +++++++--
 4 files changed, 94 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 48367e53a7a5..033af1995867 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1479,6 +1479,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev);
 
 struct drbd_tconn *drbd_new_tconn(char *name);
 extern void drbd_free_tconn(struct drbd_tconn *tconn);
+struct drbd_tconn *conn_by_name(const char *name);
 
 extern int proc_details;
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index cbec5ff2cc74..4761426f9ad7 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2196,6 +2196,21 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq)
 	INIT_LIST_HEAD(&wq->q);
 }
 
+struct drbd_tconn *conn_by_name(const char *name)
+{
+	struct drbd_tconn *tconn;
+
+	write_lock_irq(&global_state_lock);
+	list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
+		if (!strcmp(tconn->name, name))
+			goto found;
+	}
+	tconn = NULL;
+found:
+	write_unlock_irq(&global_state_lock);
+	return tconn;
+}
+
 struct drbd_tconn *drbd_new_tconn(char *name)
 {
 	struct drbd_tconn *tconn;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index b141f891f643..27a43d138f6b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2184,42 +2184,57 @@ out:
 	return 0;
 }
 
+enum cn_handler_type {
+	CHT_MINOR,
+	CHT_CONN,
+	CHT_CTOR,
+	/* CHT_RES, later */
+};
+
 struct cn_handler_struct {
-	int (*function)(struct drbd_conf *,
-			 struct drbd_nl_cfg_req *,
-			 struct drbd_nl_cfg_reply *);
+	enum cn_handler_type type;
+	union {
+		int (*minor_based)(struct drbd_conf *,
+				   struct drbd_nl_cfg_req *,
+				   struct drbd_nl_cfg_reply *);
+		int (*conn_based)(struct drbd_tconn *,
+				  struct drbd_nl_cfg_req *,
+				  struct drbd_nl_cfg_reply *);
+		int (*constructor)(struct drbd_nl_cfg_req *,
+				   struct drbd_nl_cfg_reply *);
+	};
 	int reply_body_size;
 };
 
 static struct cn_handler_struct cnd_table[] = {
-	[ P_primary ]		= { &drbd_nl_primary,		0 },
-	[ P_secondary ]		= { &drbd_nl_secondary,		0 },
-	[ P_disk_conf ]		= { &drbd_nl_disk_conf,		0 },
-	[ P_detach ]		= { &drbd_nl_detach,		0 },
-	[ P_net_conf ]		= { &drbd_nl_net_conf,		0 },
-	[ P_disconnect ]	= { &drbd_nl_disconnect,	0 },
-	[ P_resize ]		= { &drbd_nl_resize,		0 },
-	[ P_syncer_conf ]	= { &drbd_nl_syncer_conf,	0 },
-	[ P_invalidate ]	= { &drbd_nl_invalidate,	0 },
-	[ P_invalidate_peer ]	= { &drbd_nl_invalidate_peer,	0 },
-	[ P_pause_sync ]	= { &drbd_nl_pause_sync,	0 },
-	[ P_resume_sync ]	= { &drbd_nl_resume_sync,	0 },
-	[ P_suspend_io ]	= { &drbd_nl_suspend_io,	0 },
-	[ P_resume_io ]		= { &drbd_nl_resume_io,		0 },
-	[ P_outdate ]		= { &drbd_nl_outdate,		0 },
-	[ P_get_config ]	= { &drbd_nl_get_config,
+	[ P_primary ]		= { CHT_MINOR, { &drbd_nl_primary },	0 },
+	[ P_secondary ]		= { CHT_MINOR, { &drbd_nl_secondary },	0 },
+	[ P_disk_conf ]		= { CHT_MINOR, { &drbd_nl_disk_conf },	0 },
+	[ P_detach ]		= { CHT_MINOR, { &drbd_nl_detach },	0 },
+	[ P_net_conf ]		= { CHT_MINOR, { &drbd_nl_net_conf },	0 },
+	[ P_disconnect ]	= { CHT_MINOR, { &drbd_nl_disconnect },	0 },
+	[ P_resize ]		= { CHT_MINOR, { &drbd_nl_resize },	0 },
+	[ P_syncer_conf ]	= { CHT_MINOR, { &drbd_nl_syncer_conf },0 },
+	[ P_invalidate ]	= { CHT_MINOR, { &drbd_nl_invalidate },	0 },
+	[ P_invalidate_peer ]	= { CHT_MINOR, { &drbd_nl_invalidate_peer },0 },
+	[ P_pause_sync ]	= { CHT_MINOR, { &drbd_nl_pause_sync },	0 },
+	[ P_resume_sync ]	= { CHT_MINOR, { &drbd_nl_resume_sync },0 },
+	[ P_suspend_io ]	= { CHT_MINOR, { &drbd_nl_suspend_io },	0 },
+	[ P_resume_io ]		= { CHT_MINOR, { &drbd_nl_resume_io },	0 },
+	[ P_outdate ]		= { CHT_MINOR, { &drbd_nl_outdate },	0 },
+	[ P_get_config ]	= { CHT_MINOR, { &drbd_nl_get_config },
 				    sizeof(struct syncer_conf_tag_len_struct) +
 				    sizeof(struct disk_conf_tag_len_struct) +
 				    sizeof(struct net_conf_tag_len_struct) },
-	[ P_get_state ]		= { &drbd_nl_get_state,
+	[ P_get_state ]		= { CHT_MINOR, { &drbd_nl_get_state },
 				    sizeof(struct get_state_tag_len_struct) +
 				    sizeof(struct sync_progress_tag_len_struct)	},
-	[ P_get_uuids ]		= { &drbd_nl_get_uuids,
+	[ P_get_uuids ]		= { CHT_MINOR, { &drbd_nl_get_uuids },
 				    sizeof(struct get_uuids_tag_len_struct) },
-	[ P_get_timeout_flag ]	= { &drbd_nl_get_timeout_flag,
+	[ P_get_timeout_flag ]	= { CHT_MINOR, { &drbd_nl_get_timeout_flag },
 				    sizeof(struct get_timeout_flag_tag_len_struct)},
-	[ P_start_ov ]		= { &drbd_nl_start_ov,		0 },
-	[ P_new_c_uuid ]	= { &drbd_nl_new_c_uuid,	0 },
+	[ P_start_ov ]		= { CHT_MINOR, { &drbd_nl_start_ov },	0 },
+	[ P_new_c_uuid ]	= { CHT_MINOR, { &drbd_nl_new_c_uuid },	0 },
 };
 
 static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
@@ -2229,6 +2244,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 	struct cn_msg *cn_reply;
 	struct drbd_nl_cfg_reply *reply;
 	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn;
 	int retcode, rr;
 	int reply_size = sizeof(struct cn_msg)
 		+ sizeof(struct drbd_nl_cfg_reply)
@@ -2244,13 +2260,6 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 		goto fail;
 	}
 
-	mdev = ensure_mdev(nlp->drbd_minor,
-			(nlp->flags & DRBD_NL_CREATE_DEVICE));
-	if (!mdev) {
-		retcode = ERR_MINOR_INVALID;
-		goto fail;
-	}
-
 	if (nlp->packet_type >= P_nl_after_last_packet ||
 	    nlp->packet_type == P_return_code_only) {
 		retcode = ERR_PACKET_NR;
@@ -2260,7 +2269,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 	cm = cnd_table + nlp->packet_type;
 
 	/* This may happen if packet number is 0: */
-	if (cm->function == NULL) {
+	if (cm->minor_based == NULL) {
 		retcode = ERR_PACKET_NR;
 		goto fail;
 	}
@@ -2281,7 +2290,28 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
 	/* reply->tag_list; might be modified by cm->function. */
 
-	rr = cm->function(mdev, nlp, reply);
+	retcode = ERR_MINOR_INVALID;
+	rr = 0;
+	switch (cm->type) {
+	case CHT_MINOR:
+		mdev = minor_to_mdev(nlp->drbd_minor);
+		if (!mdev)
+			goto fail;
+		rr = cm->minor_based(mdev, nlp, reply);
+		break;
+	case CHT_CONN:
+		tconn = conn_by_name(nlp->obj_name);
+		if (!tconn) {
+			retcode = ERR_CONN_NOT_KNOWN;
+			goto fail;
+		}
+		rr = cm->conn_based(tconn, nlp, reply);
+		break;
+	case CHT_CTOR:
+		rr = cm->constructor(nlp, reply);
+		break;
+	/* case CHT_RES: */
+	}
 
 	cn_reply->id = req->id;
 	cn_reply->seq = req->seq;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 70a688b92c1b..7683b4ab6583 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -155,6 +155,7 @@ enum drbd_ret_code {
 	ERR_CONG_NOT_PROTO_A	= 155,
 	ERR_PIC_AFTER_DEP	= 156,
 	ERR_PIC_PEER_DEP	= 157,
+	ERR_CONN_NOT_KNOWN      = 158,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -347,8 +348,11 @@ enum drbd_timeout_flag {
 
 /* Start of the new netlink/connector stuff */
 
-#define DRBD_NL_CREATE_DEVICE 0x01
-#define DRBD_NL_SET_DEFAULTS  0x02
+enum drbd_ncr_flags {
+	DRBD_NL_CREATE_DEVICE = 0x01,
+	DRBD_NL_SET_DEFAULTS =  0x02,
+};
+#define DRBD_NL_OBJ_NAME_LEN 32
 
 
 /* For searching a vacant cn_idx value */
@@ -356,8 +360,15 @@ enum drbd_timeout_flag {
 
 struct drbd_nl_cfg_req {
 	int packet_type;
-	unsigned int drbd_minor;
-	int flags;
+	union {
+		struct {
+			unsigned int drbd_minor;
+			enum drbd_ncr_flags flags;
+		};
+		struct {
+			char obj_name[DRBD_NL_OBJ_NAME_LEN];
+		};
+	};
 	unsigned short tag_list[];
 };
 
-- 
cgit v1.2.3


From 774b305518a68a50df4f479bcf79da2add724e6e Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 22 Feb 2011 02:07:03 -0500
Subject: drbd: Implemented new commands to create/delete connections/minors

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h  |   4 +-
 drivers/block/drbd/drbd_main.c |  68 ++++++++++++++------------
 drivers/block/drbd/drbd_nl.c   | 106 +++++++++++++++++++++++++----------------
 include/linux/drbd.h           |   3 ++
 include/linux/drbd_nl.h        |  12 +++++
 5 files changed, 120 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a27e2a4e038d..535d503886d8 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1258,7 +1258,6 @@ extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
 extern void drbd_go_diskless(struct drbd_conf *mdev);
 extern void drbd_ldev_destroy(struct drbd_conf *mdev);
 
-
 /* Meta data layout
    We reserve a 128MB Block (4k aligned)
    * either at the end of the backing device
@@ -1476,8 +1475,9 @@ extern wait_queue_head_t drbd_pp_wait;
 extern rwlock_t global_state_lock;
 
 extern int conn_lowest_minor(struct drbd_tconn *tconn);
-extern struct drbd_conf *drbd_new_device(unsigned int minor);
+enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr);
 extern void drbd_free_mdev(struct drbd_conf *mdev);
+extern void drbd_delete_device(unsigned int minor);
 
 struct drbd_tconn *drbd_new_tconn(char *name);
 extern void drbd_free_tconn(struct drbd_tconn *tconn);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 2bfd63058f40..ec7d0d98657c 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -614,13 +614,16 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas
 	return thi ? thi->name : task->comm;
 }
 
-#ifdef CONFIG_SMP
 int conn_lowest_minor(struct drbd_tconn *tconn)
 {
 	int minor = 0;
-	idr_get_next(&tconn->volumes, &minor);
+
+	if (!idr_get_next(&tconn->volumes, &minor))
+		return -1;
 	return minor;
 }
+
+#ifdef CONFIG_SMP
 /**
  * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
  * @mdev:	DRBD device.
@@ -2078,15 +2081,16 @@ static void drbd_release_ee_lists(struct drbd_conf *mdev)
 		dev_err(DEV, "%d EEs in net list found!\n", rr);
 }
 
-/* caution. no locking.
- * currently only used from module cleanup code. */
-static void drbd_delete_device(unsigned int minor)
+/* caution. no locking. */
+void drbd_delete_device(unsigned int minor)
 {
 	struct drbd_conf *mdev = minor_to_mdev(minor);
 
 	if (!mdev)
 		return;
 
+	idr_remove(&mdev->tconn->volumes, minor);
+
 	/* paranoia asserts */
 	D_ASSERT(mdev->open_cnt == 0);
 	D_ASSERT(list_empty(&mdev->tconn->data.work.q));
@@ -2101,7 +2105,6 @@ static void drbd_delete_device(unsigned int minor)
 		bdput(mdev->this_bdev);
 
 	drbd_free_resources(mdev);
-	drbd_free_tconn(mdev->tconn);
 
 	drbd_release_ee_lists(mdev);
 
@@ -2223,6 +2226,9 @@ struct drbd_tconn *drbd_new_tconn(char *name)
 	if (!tconn->name)
 		goto fail;
 
+	if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
+		goto fail;
+
 	if (!tl_init(tconn))
 		goto fail;
 
@@ -2252,6 +2258,7 @@ struct drbd_tconn *drbd_new_tconn(char *name)
 
 fail:
 	tl_cleanup(tconn);
+	free_cpumask_var(tconn->cpu_mask);
 	kfree(tconn->name);
 	kfree(tconn);
 
@@ -2265,6 +2272,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn)
 	write_unlock_irq(&global_state_lock);
 	idr_destroy(&tconn->volumes);
 
+	free_cpumask_var(tconn->cpu_mask);
 	kfree(tconn->name);
 	kfree(tconn->int_dig_out);
 	kfree(tconn->int_dig_in);
@@ -2272,32 +2280,31 @@ void drbd_free_tconn(struct drbd_tconn *tconn)
 	kfree(tconn);
 }
 
-struct drbd_conf *drbd_new_device(unsigned int minor)
+enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
 {
 	struct drbd_conf *mdev;
 	struct gendisk *disk;
 	struct request_queue *q;
-	char conn_name[9]; /* drbd1234N */
-	int vnr;
+	int vnr_got = vnr;
+
+	mdev = minor_to_mdev(minor);
+	if (mdev)
+		return ERR_MINOR_EXISTS;
 
 	/* GFP_KERNEL, we are outside of all write-out paths */
 	mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
 	if (!mdev)
-		return NULL;
-	sprintf(conn_name, "drbd%d", minor);
-	mdev->tconn = drbd_new_tconn(conn_name);
-	if (!mdev->tconn)
-		goto out_no_tconn;
-	if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL))
-		goto out_no_cpumask;
-	if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr))
-		goto out_no_cpumask;
-	if (vnr != 0) {
-		dev_err(DEV, "vnr = %d\n", vnr);
-		goto out_no_cpumask;
-	}
-	if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL))
-		goto out_no_cpumask;
+		return ERR_NOMEM;
+
+	mdev->tconn = tconn;
+	if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
+		goto out_no_idr;
+	if (idr_get_new(&tconn->volumes, mdev, &vnr_got))
+		goto out_no_idr;
+	if (vnr_got != vnr) {
+		dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr);
+		goto out_no_q;
+	}
 
 	mdev->minor = minor;
 
@@ -2354,7 +2361,10 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
 	INIT_LIST_HEAD(&mdev->current_epoch->list);
 	mdev->epochs = 1;
 
-	return mdev;
+	minor_table[minor] = mdev;
+	add_disk(disk);
+
+	return NO_ERROR;
 
 /* out_whatever_else:
 	kfree(mdev->current_epoch); */
@@ -2367,12 +2377,10 @@ out_no_io_page:
 out_no_disk:
 	blk_cleanup_queue(q);
 out_no_q:
-	free_cpumask_var(mdev->tconn->cpu_mask);
-out_no_cpumask:
-	drbd_free_tconn(mdev->tconn);
-out_no_tconn:
+	idr_remove(&tconn->volumes, vnr_got);
+out_no_idr:
 	kfree(mdev);
-	return NULL;
+	return ERR_NOMEM;
 }
 
 /* counterpart of drbd_new_device.
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 455a51dd364d..f2739fd188a0 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -443,40 +443,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 	return rv;
 }
 
-static struct drbd_conf *ensure_mdev(int minor, int create)
-{
-	struct drbd_conf *mdev;
-
-	if (minor >= minor_count)
-		return NULL;
-
-	mdev = minor_to_mdev(minor);
-
-	if (!mdev && create) {
-		struct gendisk *disk = NULL;
-		mdev = drbd_new_device(minor);
-
-		spin_lock_irq(&drbd_pp_lock);
-		if (minor_table[minor] == NULL) {
-			minor_table[minor] = mdev;
-			disk = mdev->vdisk;
-			mdev = NULL;
-		} /* else: we lost the race */
-		spin_unlock_irq(&drbd_pp_lock);
-
-		if (disk) /* we won the race above */
-			/* in case we ever add a drbd_delete_device(),
-			 * don't forget the del_gendisk! */
-			add_disk(disk);
-		else /* we lost the race above */
-			drbd_free_mdev(mdev);
-
-		mdev = minor_to_mdev(minor);
-	}
-
-	return mdev;
-}
-
 static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			   struct drbd_nl_cfg_reply *reply)
 {
@@ -1789,12 +1755,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX))
 		sc.al_extents = DRBD_AL_EXTENTS_MAX;
 
-	/* to avoid spurious errors when configuring minors before configuring
-	 * the minors they depend on: if necessary, first create the minor we
-	 * depend on */
-	if (sc.after >= 0)
-		ensure_mdev(sc.after, 1);
-
 	/* most sanity checks done, try to assign the new sync-after
 	 * dependency.  need to hold the global lock in there,
 	 * to avoid a race in the dependency loop check. */
@@ -2184,13 +2144,73 @@ out:
 	return 0;
 }
 
+static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
+{
+	struct new_connection args;
+
+	if (!new_connection_from_tags(nlp->tag_list, &args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	reply->ret_code = NO_ERROR;
+	if (!drbd_new_tconn(args.name))
+		reply->ret_code = ERR_NOMEM;
+
+	return 0;
+}
+
+static int drbd_nl_new_minor(struct drbd_tconn *tconn,
+		      struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
+{
+	struct new_minor args;
+
+	args.vol_nr = 0;
+	args.minor = 0;
+
+	if (!new_minor_from_tags(nlp->tag_list, &args)) {
+		reply->ret_code = ERR_MANDATORY_TAG;
+		return 0;
+	}
+
+	reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr);
+
+	return 0;
+}
+
+static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
+			     struct drbd_nl_cfg_reply *reply)
+{
+	if (mdev->state.disk == D_DISKLESS &&
+	    mdev->state.conn == C_STANDALONE &&
+	    mdev->state.role == R_SECONDARY) {
+		drbd_delete_device(mdev_to_minor(mdev));
+		reply->ret_code = NO_ERROR;
+	} else {
+		reply->ret_code = ERR_MINOR_CONFIGURED;
+	}
+	return 0;
+}
+
+static int drbd_nl_del_conn(struct drbd_tconn *tconn,
+			    struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
+{
+	if (conn_lowest_minor(tconn) < 0) {
+		drbd_free_tconn(tconn);
+		reply->ret_code = NO_ERROR;
+	} else {
+		reply->ret_code = ERR_CONN_IN_USE;
+	}
+
+	return 0;
+}
+
 enum cn_handler_type {
 	CHT_MINOR,
 	CHT_CONN,
 	CHT_CTOR,
 	/* CHT_RES, later */
 };
-
 struct cn_handler_struct {
 	enum cn_handler_type type;
 	union {
@@ -2235,6 +2255,10 @@ static struct cn_handler_struct cnd_table[] = {
 				    sizeof(struct get_timeout_flag_tag_len_struct)},
 	[ P_start_ov ]		= { CHT_MINOR, { &drbd_nl_start_ov },	0 },
 	[ P_new_c_uuid ]	= { CHT_MINOR, { &drbd_nl_new_c_uuid },	0 },
+	[ P_new_connection ]	= { CHT_CTOR,  { .constructor = &drbd_nl_new_conn }, 0 },
+	[ P_new_minor ]		= { CHT_CONN,  { .conn_based = &drbd_nl_new_minor }, 0 },
+	[ P_del_minor ]		= { CHT_MINOR, { &drbd_nl_del_minor },	0 },
+	[ P_del_connection ]    = { CHT_CONN,  { .conn_based = &drbd_nl_del_conn }, 0 },
 };
 
 static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 7683b4ab6583..e192167e6145 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -156,6 +156,9 @@ enum drbd_ret_code {
 	ERR_PIC_AFTER_DEP	= 156,
 	ERR_PIC_PEER_DEP	= 157,
 	ERR_CONN_NOT_KNOWN      = 158,
+	ERR_CONN_IN_USE         = 159,
+	ERR_MINOR_CONFIGURED    = 160,
+	ERR_MINOR_EXISTS	= 161,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index ab6159e4fcf0..1216c7a432c5 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -152,6 +152,18 @@ NL_PACKET(new_c_uuid, 26,
 NL_RESPONSE(return_code_only, 27)
 #endif
 
+NL_PACKET(new_connection, 28, /* CHT_CTOR */
+	NL_STRING(	85,	T_MANDATORY,	name, DRBD_NL_OBJ_NAME_LEN)
+)
+
+NL_PACKET(new_minor, 29, /* CHT_CONN */
+	NL_INTEGER(	86,	T_MANDATORY,	minor)
+	NL_INTEGER(	87,	T_MANDATORY,	vol_nr)
+)
+
+NL_PACKET(del_minor, 30, ) /* CHT_MINOR */
+NL_PACKET(del_connection, 31, ) /* CHT_CONN */
+
 #undef NL_PACKET
 #undef NL_INTEGER
 #undef NL_INT64
-- 
cgit v1.2.3


From a5df0e199cf6b31400fa86f6c3f73fa6e127e9ed Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 23 Feb 2011 12:51:43 +0100
Subject: drbd: default to detach on-io-error

Old default behaviour was "pass-on",
which is not useful in production at all.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 75f05af33725..22920a8af4e2 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -125,7 +125,7 @@
 #define DRBD_DISK_SIZE_SECT_MAX  (1 * (2LLU << 40))
 #define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
 
-#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
+#define DRBD_ON_IO_ERROR_DEF EP_DETACH
 #define DRBD_FENCING_DEF FP_DONT_CARE
 #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
-- 
cgit v1.2.3


From ec2c35ac1ea288f5c931e32452ecea50068e8450 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 7 Mar 2011 10:20:08 +0100
Subject: drbd: prepare the transition from connector to genetlink

This adds the new API header and helper files.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_genl.h         | 349 +++++++++++++++++++++++++++++++
 include/linux/drbd_genl_api.h     |  55 +++++
 include/linux/genl_magic_func.h   | 417 ++++++++++++++++++++++++++++++++++++++
 include/linux/genl_magic_struct.h | 260 ++++++++++++++++++++++++
 4 files changed, 1081 insertions(+)
 create mode 100644 include/linux/drbd_genl.h
 create mode 100644 include/linux/drbd_genl_api.h
 create mode 100644 include/linux/genl_magic_func.h
 create mode 100644 include/linux/genl_magic_struct.h

(limited to 'include/linux')

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
new file mode 100644
index 000000000000..84e16848f7a1
--- /dev/null
+++ b/include/linux/drbd_genl.h
@@ -0,0 +1,349 @@
+/*
+ * General overview:
+ * full generic netlink message:
+ * |nlmsghdr|genlmsghdr|<payload>
+ *
+ * payload:
+ * |optional fixed size family header|<sequence of netlink attributes>
+ *
+ * sequence of netlink attributes:
+ * I chose to have all "top level" attributes NLA_NESTED,
+ * corresponding to some real struct.
+ * So we have a sequence of |tla, len|<nested nla sequence>
+ *
+ * nested nla sequence:
+ * may be empty, or contain a sequence of netlink attributes
+ * representing the struct fields.
+ *
+ * The tag number of any field (regardless of containing struct)
+ * will be available as T_ ## field_name,
+ * so you cannot have the same field name in two differnt structs.
+ *
+ * The tag numbers themselves are per struct, though,
+ * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
+ * which we won't use here).
+ * The tag numbers are used as index in the respective nla_policy array.
+ *
+ * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
+ *	genl_magic_struct.h
+ *		generates the struct declaration,
+ *		generates an entry in the tla enum,
+ *	genl_magic_func.h
+ *		generates an entry in the static tla policy
+ *		with .type = NLA_NESTED
+ *		generates the static <struct_name>_nl_policy definition,
+ *		and static conversion functions
+ *
+ *	genl_magic_func.h
+ *
+ * GENL_mc_group(group)
+ *	genl_magic_struct.h
+ *		does nothing
+ *	genl_magic_func.h
+ *		defines and registers the mcast group,
+ *		and provides a send helper
+ *
+ * GENL_notification(op_name, op_num, mcast_group, tla list)
+ *	These are notifications to userspace.
+ *
+ *	genl_magic_struct.h
+ *		generates an entry in the genl_ops enum,
+ *	genl_magic_func.h
+ *		does nothing
+ *
+ *	mcast group: the name of the mcast group this notification should be
+ *	expected on
+ *	tla list: the list of expected top level attributes,
+ *	for documentation and sanity checking.
+ *
+ * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
+ *	These are requests from userspace.
+ *
+ *	_op and _notification share the same "number space",
+ *	op_nr will be assigned to "genlmsghdr->cmd"
+ *
+ *	genl_magic_struct.h
+ *		generates an entry in the genl_ops enum,
+ *	genl_magic_func.h
+ *		generates an entry in the static genl_ops array,
+ *		and static register/unregister functions to
+ *		genl_register_family_with_ops().
+ *
+ *	flags and handler:
+ *		GENL_op_init( .doit = x, .dumpit = y, .flags = something)
+ *		GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
+ *	tla list: the list of expected top level attributes,
+ *	for documentation and sanity checking.
+ */
+
+/*
+ * STRUCTS
+ */
+
+/* this is sent kernel -> userland on various error conditions, and contains
+ * informational textual info, which is supposedly human readable.
+ * The computer relevant return code is in the drbd_genlmsghdr.
+ */
+GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
+		/* "arbitrary" size strings, nla_policy.len = 0 */
+	__str_field(1, GENLA_F_MANDATORY,	info_text, 0)
+)
+
+/* Configuration requests typically need a context to operate on.
+ * Possible keys are device minor (fits in the drbd_genlmsghdr),
+ * the replication link (aka connection) name,
+ * and/or the replication group (aka resource) name,
+ * and the volume id within the resource. */
+GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
+		/* currently only 256 volumes per group,
+		 * but maybe we still change that */
+	__u32_field(1, GENLA_F_MANDATORY,	ctx_volume)
+	__str_field(2, GENLA_F_MANDATORY,	ctx_conn_name, 128)
+)
+
+GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
+	__u64_field(1, GENLA_F_MANDATORY,	disk_size)
+	__str_field(2, GENLA_F_REQUIRED,	backing_dev,	128)
+	__str_field(3, GENLA_F_REQUIRED,	meta_dev,	128)
+	__u32_field(4, GENLA_F_REQUIRED,	meta_dev_idx)
+	__u32_field(5, GENLA_F_MANDATORY,	max_bio_bvecs)
+	__u32_field(6, GENLA_F_MANDATORY,	on_io_error)
+	__u32_field(7, GENLA_F_MANDATORY,	fencing)
+	__flg_field(8, GENLA_F_MANDATORY,	no_disk_barrier)
+	__flg_field(9, GENLA_F_MANDATORY,	no_disk_flush)
+	__flg_field(10, GENLA_F_MANDATORY,	no_disk_drain)
+	__flg_field(11, GENLA_F_MANDATORY,	no_md_flush)
+	__flg_field(12, GENLA_F_MANDATORY,	use_bmbv)
+)
+
+GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf,
+	__u32_field(1,	GENLA_F_MANDATORY,	rate)
+	__u32_field(2,	GENLA_F_MANDATORY,	after)
+	__u32_field(3,	GENLA_F_MANDATORY,	al_extents)
+	__str_field(4,	GENLA_F_MANDATORY,	cpu_mask,       32)
+	__str_field(5,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field(6,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__flg_field(7,	GENLA_F_MANDATORY,	use_rle)
+	__u32_field(8,	GENLA_F_MANDATORY,	on_no_data)
+	__u32_field(9,	GENLA_F_MANDATORY,	c_plan_ahead)
+	__u32_field(10,	GENLA_F_MANDATORY,	c_delay_target)
+	__u32_field(11,	GENLA_F_MANDATORY,	c_fill_target)
+	__u32_field(12,	GENLA_F_MANDATORY,	c_max_rate)
+	__u32_field(13,	GENLA_F_MANDATORY,	c_min_rate)
+)
+
+GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
+	__str_field(1,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
+						shared_secret,	SHARED_SECRET_MAX)
+	__str_field(2,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field(3,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field(4,	GENLA_F_REQUIRED,	my_addr,	128)
+	__str_field(5,	GENLA_F_REQUIRED,	peer_addr,	128)
+	__u32_field(6,	GENLA_F_REQUIRED,	wire_protocol)
+	__u32_field(7,	GENLA_F_MANDATORY,	try_connect_int)
+	__u32_field(8,	GENLA_F_MANDATORY,	timeout)
+	__u32_field(9,	GENLA_F_MANDATORY,	ping_int)
+	__u32_field(10,	GENLA_F_MANDATORY,	ping_timeo)
+	__u32_field(11,	GENLA_F_MANDATORY,	sndbuf_size)
+	__u32_field(12,	GENLA_F_MANDATORY,	rcvbuf_size)
+	__u32_field(13,	GENLA_F_MANDATORY,	ko_count)
+	__u32_field(14,	GENLA_F_MANDATORY,	max_buffers)
+	__u32_field(15,	GENLA_F_MANDATORY,	max_epoch_size)
+	__u32_field(16,	GENLA_F_MANDATORY,	unplug_watermark)
+	__u32_field(17,	GENLA_F_MANDATORY,	after_sb_0p)
+	__u32_field(18,	GENLA_F_MANDATORY,	after_sb_1p)
+	__u32_field(19,	GENLA_F_MANDATORY,	after_sb_2p)
+	__u32_field(20,	GENLA_F_MANDATORY,	rr_conflict)
+	__u32_field(21,	GENLA_F_MANDATORY,	on_congestion)
+	__u32_field(22,	GENLA_F_MANDATORY,	cong_fill)
+	__u32_field(23,	GENLA_F_MANDATORY,	cong_extents)
+	__flg_field(24, GENLA_F_MANDATORY,	two_primaries)
+	__flg_field(25, GENLA_F_MANDATORY,	want_lose)
+	__flg_field(26, GENLA_F_MANDATORY,	no_cork)
+	__flg_field(27, GENLA_F_MANDATORY,	always_asbp)
+	__flg_field(28, GENLA_F_MANDATORY,	dry_run)
+)
+
+GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
+	__flg_field(1, GENLA_F_MANDATORY,	assume_uptodate)
+)
+
+GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
+	__u64_field(1, GENLA_F_MANDATORY,	resize_size)
+	__flg_field(2, GENLA_F_MANDATORY,	resize_force)
+	__flg_field(3, GENLA_F_MANDATORY,	no_resync)
+)
+
+GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
+	/* the reason of the broadcast,
+	 * if this is an event triggered broadcast. */
+	__u32_field(1, GENLA_F_MANDATORY,	sib_reason)
+	__u32_field(2, GENLA_F_REQUIRED,	current_state)
+	__u64_field(3, GENLA_F_MANDATORY,	capacity)
+	__u64_field(4, GENLA_F_MANDATORY,	ed_uuid)
+
+	/* These are for broadcast from after state change work.
+	 * prev_state and new_state are from the moment the state change took
+	 * place, new_state is not neccessarily the same as current_state,
+	 * there may have been more state changes since.  Which will be
+	 * broadcasted soon, in their respective after state change work.  */
+	__u32_field(5, GENLA_F_MANDATORY,	prev_state)
+	__u32_field(6, GENLA_F_MANDATORY,	new_state)
+
+	/* if we have a local disk: */
+	__bin_field(7, GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
+	__u32_field(8, GENLA_F_MANDATORY,	disk_flags)
+	__u64_field(9, GENLA_F_MANDATORY,	bits_total)
+	__u64_field(10, GENLA_F_MANDATORY,	bits_oos)
+	/* and in case resync or online verify is active */
+	__u64_field(11, GENLA_F_MANDATORY,	bits_rs_total)
+	__u64_field(12, GENLA_F_MANDATORY,	bits_rs_failed)
+
+	/* for pre and post notifications of helper execution */
+	__str_field(13, GENLA_F_MANDATORY,	helper, 32)
+	__u32_field(14, GENLA_F_MANDATORY,	helper_exit_code)
+)
+
+GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
+	__u64_field(1, GENLA_F_MANDATORY,	ov_start_sector)
+)
+
+GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
+	__flg_field(1, GENLA_F_MANDATORY, clear_bm)
+)
+
+GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
+	__u32_field(1,	GENLA_F_REQUIRED,	timeout_type)
+)
+
+GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
+	__flg_field(1, GENLA_F_MANDATORY,	force_disconnect)
+)
+
+/*
+ * Notifications and commands (genlmsghdr->cmd)
+ */
+GENL_mc_group(events)
+
+	/* kernel -> userspace announcement of changes */
+GENL_notification(
+	DRBD_EVENT, 1, events,
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY)
+)
+
+	/* query kernel for specific or all info */
+GENL_op(
+	DRBD_ADM_GET_STATUS, 2,
+	GENL_op_init(
+		.doit = drbd_adm_get_status,
+		.dumpit = drbd_adm_get_status_all,
+		/* anyone may ask for the status,
+		 * it is broadcasted anyways */
+	),
+	/* To select the object .doit.
+	 * Or a subset of objects in .dumpit. */
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY)
+)
+
+#if 0
+	/* TO BE DONE */
+	/* create or destroy resources, aka replication groups */
+GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+#endif
+
+	/* add DRBD minor devices as volumes to resources */
+GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+
+	/* add or delete replication links to resources */
+GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+
+	/* operates on replication links */
+GENL_op(DRBD_ADM_SYNCER, 9,
+	GENL_doit(drbd_adm_syncer),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_CONNECT, 10,
+	GENL_doit(drbd_adm_connect),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
+)
+
+GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+
+	/* operates on minors */
+GENL_op(DRBD_ADM_ATTACH, 12,
+	GENL_doit(drbd_adm_attach),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_RESIZE, 13,
+	GENL_doit(drbd_adm_resize),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY)
+)
+
+	/* operates on all volumes within a resource */
+GENL_op(
+	DRBD_ADM_PRIMARY, 14,
+	GENL_doit(drbd_adm_set_role),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_SECONDARY, 15,
+	GENL_doit(drbd_adm_set_role),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_NEW_C_UUID, 16,
+	GENL_doit(drbd_adm_new_c_uuid),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_START_OV, 17,
+	GENL_doit(drbd_adm_start_ov),
+	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY)
+)
+
+GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_PAUSE_SYNC,	21, GENL_doit(drbd_adm_pause_sync),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_SYNC,	22, GENL_doit(drbd_adm_resume_sync),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_SUSPEND_IO,	23, GENL_doit(drbd_adm_suspend_io),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_IO,	24, GENL_doit(drbd_adm_resume_io),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h
new file mode 100644
index 000000000000..9ef50d51e34e
--- /dev/null
+++ b/include/linux/drbd_genl_api.h
@@ -0,0 +1,55 @@
+#ifndef DRBD_GENL_STRUCT_H
+#define DRBD_GENL_STRUCT_H
+
+/**
+ * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
+ * @minor:
+ *     For admin requests (user -> kernel): which minor device to operate on.
+ *     For (unicast) replies or informational (broadcast) messages
+ *     (kernel -> user): which minor device the information is about.
+ *     If we do not operate on minors, but on connections or resources,
+ *     the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
+ *     is used instead.
+ * @flags: possible operation modifiers (relevant only for user->kernel):
+ *     DRBD_GENL_F_SET_DEFAULTS
+ * @volume:
+ *     When creating a new minor (adding it to a resource), the resource needs
+ *     to know which volume number within the resource this is supposed to be.
+ *     The volume number corresponds to the same volume number on the remote side,
+ *     whereas the minor number on the remote side may be different
+ *     (union with flags).
+ * @ret_code: kernel->userland unicast cfg reply return code (union with flags);
+ */
+struct drbd_genlmsghdr {
+	__u32 minor;
+	union {
+	__u32 flags;
+	__s32 ret_code;
+	};
+};
+
+/* To be used in drbd_genlmsghdr.flags */
+enum {
+	DRBD_GENL_F_SET_DEFAULTS = 1,
+};
+
+enum drbd_state_info_bcast_reason {
+	SIB_GET_STATUS_REPLY = 1,
+	SIB_STATE_CHANGE = 2,
+	SIB_HELPER_PRE = 3,
+	SIB_HELPER_POST = 4,
+	SIB_SYNC_PROGRESS = 5,
+};
+
+/* hack around predefined gcc/cpp "linux=1",
+ * we cannot possibly include <1/drbd_genl.h> */
+#undef linux
+
+#include <linux/drbd.h>
+#define GENL_MAGIC_VERSION	API_VERSION
+#define GENL_MAGIC_FAMILY	drbd
+#define GENL_MAGIC_FAMILY_HDRSZ	sizeof(struct drbd_genlmsghdr)
+#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
+#include <linux/genl_magic_struct.h>
+
+#endif
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
new file mode 100644
index 000000000000..8a86f659d363
--- /dev/null
+++ b/include/linux/genl_magic_func.h
@@ -0,0 +1,417 @@
+#ifndef GENL_MAGIC_FUNC_H
+#define GENL_MAGIC_FUNC_H
+
+#include <linux/genl_magic_struct.h>
+
+/*
+ * Extension of genl attribute validation policies			{{{1
+ *									{{{2
+ */
+
+/**
+ * nla_is_required - return true if this attribute is required
+ * @nla: netlink attribute
+ */
+static inline int nla_is_required(const struct nlattr *nla)
+{
+        return nla->nla_type & GENLA_F_REQUIRED;
+}
+
+/**
+ * nla_is_mandatory - return true if understanding this attribute is mandatory
+ * @nla: netlink attribute
+ * Note: REQUIRED attributes are implicitly MANDATORY as well
+ */
+static inline int nla_is_mandatory(const struct nlattr *nla)
+{
+        return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED);
+}
+
+/* Functionality to be integrated into nla_parse(), and validate_nla(),
+ * respectively.
+ *
+ * Enforcing the "mandatory" bit is done here,
+ * by rejecting unknown mandatory attributes.
+ *
+ * Part of enforcing the "required" flag would mean to embed it into
+ * nla_policy.type, and extending validate_nla(), which currently does
+ * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels,
+ * so we cannot do that.  Thats why enforcing "required" is done in the
+ * generated assignment functions below. */
+static int nla_check_unknown(int maxtype, struct nlattr *head, int len)
+{
+	struct nlattr *nla;
+	int rem;
+        nla_for_each_attr(nla, head, len, rem) {
+		__u16 type = nla_type(nla);
+		if (type > maxtype && nla_is_mandatory(nla))
+			return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/*
+ * Magic: declare tla policy						{{{1
+ * Magic: declare nested policies
+ *									{{{2
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+	[tag_name] = { .type = NLA_NESTED },
+
+static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
+{ s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \
+	[__nla_type(attr_nr)] = { .type = nla_type },
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
+		__get, __put)						\
+	[__nla_type(attr_nr)] = { .type = nla_type,			\
+		.len = maxlen - (nla_type == NLA_NUL_STRING) },
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#ifndef __KERNEL__
+#ifndef pr_info
+#define pr_info(args...)	fprintf(stderr, args);
+#endif
+#endif
+
+#if 1
+static void dprint_field(const char *dir, int nla_type,
+		const char *name, void *valp)
+{
+	__u64 val = valp ? *(__u32 *)valp : 1;
+	switch (nla_type) {
+	case NLA_U8:  val = (__u8)val;
+	case NLA_U16: val = (__u16)val;
+	case NLA_U32: val = (__u32)val;
+		pr_info("%s attr %s: %d 0x%08x\n", dir,
+			name, (int)val, (unsigned)val);
+		break;
+	case NLA_U64:
+		val = *(__u64*)valp;
+		pr_info("%s attr %s: %lld 0x%08llx\n", dir,
+			name, (long long)val, (unsigned long long)val);
+		break;
+	case NLA_FLAG:
+		if (val)
+			pr_info("%s attr %s: set\n", dir, name);
+		break;
+	}
+}
+
+static void dprint_array(const char *dir, int nla_type,
+		const char *name, const char *val, unsigned len)
+{
+	switch (nla_type) {
+	case NLA_NUL_STRING:
+		if (len && val[len-1] == '\0')
+			len--;
+		pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
+		break;
+	default:
+		/* we can always show 4 byte,
+		 * thats what nlattr are aligned to. */
+		pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
+			dir, name, len, val[0], val[1], val[2], val[3]);
+	}
+}
+
+#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
+
+/* Name is a member field name of the struct s.
+ * If s is NULL (only parsing, no copy requested in *_from_attrs()),
+ * nla is supposed to point to the attribute containing the information
+ * corresponding to that struct member. */
+#define DPRINT_FIELD(dir, nla_type, name, s, nla)			\
+	do {								\
+		if (s)							\
+			dprint_field(dir, nla_type, #name, &s->name);	\
+		else if (nla)						\
+			dprint_field(dir, nla_type, #name,		\
+				(nla_type == NLA_FLAG) ? NULL		\
+						: nla_data(nla));	\
+	} while (0)
+
+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla)			\
+	do {								\
+		if (s)							\
+			dprint_array(dir, nla_type, #name,		\
+					s->name, s->name ## _len);	\
+		else if (nla)						\
+			dprint_array(dir, nla_type, #name,		\
+					nla_data(nla), nla_len(nla));	\
+	} while (0)
+#else
+#define DPRINT_TLA(a, op, b) do {} while (0)
+#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
+#endif
+
+/*
+ * Magic: provide conversion functions					{{{1
+ * populate struct from attribute table:
+ *									{{{2
+ */
+
+/* processing of generic netlink messages is serialized.
+ * use one static buffer for parsing of nested attributes */
+static struct nlattr *nested_attr_tb[128];
+
+#ifndef BUILD_BUG_ON
+/* Force a compilation error if condition is true */
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+/* Force a compilation error if condition is true, but also produce a
+   result (of value 0 and type size_t), so the expression can be used
+   e.g. in a structure initializer (or where-ever else comma expressions
+   aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+#endif
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+	/* static, potentially unused */				\
+int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[])	\
+{									\
+	const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1;		\
+	struct nlattr *tla = tb[tag_number];				\
+	struct nlattr **ntb = nested_attr_tb;				\
+	struct nlattr *nla;						\
+	int err;							\
+	BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb));	\
+	if (!tla)							\
+		return -ENOMSG;						\
+	DPRINT_TLA(#s_name, "<=-", #tag_name);				\
+	err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \
+	if (err)							\
+		return err;						\
+	err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla));	\
+	if (err)							\
+	      return err;						\
+									\
+	s_fields							\
+	return 0;							\
+}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+		nla = ntb[__nla_type(attr_nr)];				\
+		if (nla) {						\
+			if (s)						\
+				s->name = __get(nla);			\
+			DPRINT_FIELD("<<", nla_type, name, s, nla);	\
+		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
+			pr_info("<< missing attr: %s\n", #name);	\
+			return -ENOMSG;					\
+		}
+
+/* validate_nla() already checked nla_len <= maxlen appropriately. */
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+		nla = ntb[__nla_type(attr_nr)];				\
+		if (nla) {						\
+			if (s)						\
+				s->name ## _len =			\
+					__get(s->name, nla, maxlen);	\
+			DPRINT_ARRAY("<<", nla_type, name, s, nla);	\
+		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
+			pr_info("<< missing attr: %s\n", #name);	\
+			return -ENOMSG;					\
+		}							\
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+/*
+ * Magic: define op number to op name mapping				{{{1
+ *									{{{2
+ */
+const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
+{
+	switch (cmd) {
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)		\
+	case op_num: return #op_name;
+#include GENL_MAGIC_INCLUDE_FILE
+	default:
+		     return "unknown";
+	}
+}
+
+#ifdef __KERNEL__
+#include <linux/stringify.h>
+/*
+ * Magic: define genl_ops						{{{1
+ *									{{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)		\
+{								\
+	handler							\
+	.cmd = op_name,						\
+	.policy	= CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy),	\
+},
+
+#define ZZZ_genl_ops		CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
+static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+/*
+ * Define the genl_family, multicast groups,				{{{1
+ * and provide register/unregister functions.
+ *									{{{2
+ */
+#define ZZZ_genl_family		CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
+static struct genl_family ZZZ_genl_family __read_mostly = {
+	.id = GENL_ID_GENERATE,
+	.name = __stringify(GENL_MAGIC_FAMILY),
+	.version = GENL_MAGIC_VERSION,
+#ifdef GENL_MAGIC_FAMILY_HDRSZ
+	.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
+#endif
+	.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
+};
+
+/*
+ * Magic: define multicast groups
+ * Magic: define multicast group registration helper
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)						\
+static struct genl_multicast_group					\
+CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = {		\
+	.name = #group,							\
+};									\
+static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
+	struct sk_buff *skb, gfp_t flags)				\
+{									\
+	unsigned int group_id =						\
+		CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id;	\
+	if (!group_id)							\
+		return -EINVAL;						\
+	return genlmsg_multicast(skb, 0, group_id, flags);		\
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
+{
+	int err = genl_register_family_with_ops(&ZZZ_genl_family,
+		ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
+	if (err)
+		return err;
+#undef GENL_mc_group
+#define GENL_mc_group(group)						\
+	err = genl_register_mc_group(&ZZZ_genl_family,			\
+		&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group));		\
+	if (err)							\
+		goto fail;						\
+	else								\
+		pr_info("%s: mcg %s: %u\n", #group,			\
+			__stringify(GENL_MAGIC_FAMILY),			\
+			CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+	return 0;
+fail:
+	genl_unregister_family(&ZZZ_genl_family);
+	return err;
+}
+
+void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
+{
+	genl_unregister_family(&ZZZ_genl_family);
+}
+
+/*
+ * Magic: provide conversion functions					{{{1
+ * populate skb from struct.
+ *									{{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s,	\
+		const bool exclude_sensitive)				\
+{									\
+	struct nlattr *tla = nla_nest_start(skb, tag_number);		\
+	if (!tla)							\
+		goto nla_put_failure;					\
+	DPRINT_TLA(#s_name, "-=>", #tag_name);				\
+	s_fields							\
+	nla_nest_end(skb, tla);						\
+	return 0;							\
+									\
+nla_put_failure:							\
+	if (tla)							\
+		nla_nest_cancel(skb, tla);				\
+        return -EMSGSIZE;						\
+}									\
+static inline int s_name ## _to_priv_skb(struct sk_buff *skb,		\
+		struct s_name *s)					\
+{									\
+	return s_name ## _to_skb(skb, s, 0);				\
+}									\
+static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
+		struct s_name *s)					\
+{									\
+	return s_name ## _to_skb(skb, s, 1);				\
+}
+
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
+		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
+		__put(skb, attr_nr, s->name);				\
+	}
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
+		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
+		__put(skb, attr_nr, min_t(int, maxlen,			\
+			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
+						s->name);		\
+	}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#endif /* __KERNEL__ */
+
+/* }}}1 */
+#endif /* GENL_MAGIC_FUNC_H */
+/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
new file mode 100644
index 000000000000..745ebfd6c7e5
--- /dev/null
+++ b/include/linux/genl_magic_struct.h
@@ -0,0 +1,260 @@
+#ifndef GENL_MAGIC_STRUCT_H
+#define GENL_MAGIC_STRUCT_H
+
+#ifndef GENL_MAGIC_FAMILY
+# error "you need to define GENL_MAGIC_FAMILY before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_VERSION
+# error "you need to define GENL_MAGIC_VERSION before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_INCLUDE_FILE
+# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
+#endif
+
+#include <linux/genetlink.h>
+#include <linux/types.h>
+
+#define CONCAT__(a,b)	a ## b
+#define CONCAT_(a,b)	CONCAT__(a,b)
+
+extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
+extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
+
+/*
+ * Extension of genl attribute validation policies			{{{2
+ */
+
+/**
+ * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement
+ *
+ * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid.
+ * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that.
+ *
+ * @GENLA_F_MANDATORY: if present, receiver _must_ understand it.
+ * Without this, unknown attributes (> maxtype) are _silently_ ignored
+ * by validate_nla().
+ *
+ * To be used for API extensions, so older kernel can reject requests for not
+ * yet implemented features, if newer userland tries to use them even though
+ * the genl_family version clearly indicates they are not available.
+ *
+ * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure.
+ * To be used for API extensions for things that have sane defaults,
+ * so newer userland can still talk to older kernel, knowing it will
+ * silently ignore these attributes if not yet known.
+ *
+ * NOTE: These flags overload
+ *   NLA_F_NESTED		(1 << 15)
+ *   NLA_F_NET_BYTEORDER	(1 << 14)
+ * from linux/netlink.h, which are not useful for validate_nla():
+ * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting
+ * .type = NLA_NESTED in the appropriate policy.
+ *
+ * See also: nla_type()
+ */
+enum {
+	GENLA_F_MAY_IGNORE	= 0,
+	GENLA_F_MANDATORY	= 1 << 14,
+	GENLA_F_REQUIRED	= 1 << 15,
+
+	/* This will not be present in the __u16 .nla_type, but can be
+	 * triggered on in <struct>_to_skb, to exclude "sensitive"
+	 * information from broadcasts, or on unpriviledged get requests.
+	 * This is useful because genetlink multicast groups can be listened in
+	 * on by anyone.  */
+	GENLA_F_SENSITIVE	= 1 << 16,
+};
+
+#define __nla_type(x)	((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK))
+
+/*									}}}1
+ * MAGIC
+ * multi-include macro expansion magic starts here
+ */
+
+/* MAGIC helpers							{{{2 */
+
+/* possible field types */
+#define __flg_field(attr_nr, attr_flag, name) \
+	__field(attr_nr, attr_flag, name, NLA_FLAG, char, \
+			nla_get_flag, __nla_put_flag)
+#define __u8_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
+			nla_get_u8, NLA_PUT_U8)
+#define __u16_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
+			nla_get_u16, NLA_PUT_U16)
+#define __u32_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
+			nla_get_u32, NLA_PUT_U32)
+#define __u64_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
+			nla_get_u64, NLA_PUT_U64)
+#define __str_field(attr_nr, attr_flag, name, maxlen) \
+	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
+			nla_strlcpy, NLA_PUT)
+#define __bin_field(attr_nr, attr_flag, name, maxlen) \
+	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
+			nla_memcpy, NLA_PUT)
+
+#define __nla_put_flag(skb, attrtype, value)		\
+	do {						\
+		if (value)				\
+			NLA_PUT_FLAG(skb, attrtype);	\
+	} while (0)
+
+#define GENL_op_init(args...)	args
+#define GENL_doit(handler)		\
+	.doit = handler,		\
+	.flags = GENL_ADMIN_PERM,
+#define GENL_dumpit(handler)		\
+	.dumpit = handler,		\
+	.flags = GENL_ADMIN_PERM,
+
+/*									}}}1
+ * Magic: define the enum symbols for genl_ops
+ * Magic: define the enum symbols for top level attributes
+ * Magic: define the enum symbols for nested attributes
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
+	op_name = op_num,
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)			\
+	op_name = op_num,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+		tag_name = tag_number,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)	\
+enum {								\
+	s_fields						\
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+	T_ ## name = (__u16)(attr_nr | attr_flag),
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+	T_ ## name = (__u16)(attr_nr | attr_flag),
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/*									}}}1
+ * Magic: compile time assert unique numbers for operations
+ * Magic: -"- unique numbers for top level attributes
+ * Magic: -"- unique numbers for nested attributes
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)	\
+	case op_name:
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
+	case op_name:
+
+static inline void ct_assert_unique_operations(void)
+{
+	switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+		;
+	}
+}
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+		case tag_number:
+
+static inline void ct_assert_unique_top_level_attributes(void)
+{
+	switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+		;
+	}
+}
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
+{									\
+	switch (0) {							\
+		s_fields						\
+			;						\
+	}								\
+}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+	case attr_nr:
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+	case attr_nr:
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/*									}}}1
+ * Magic: declare structs
+ * struct <name> {
+ *	fields
+ * };
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+struct s_name { s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \
+	type name;
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+	type name[maxlen];	\
+	__u32 name ## _len;
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/* }}}1 */
+#endif /* GENL_MAGIC_STRUCT_H */
+/* vim: set foldmethod=marker nofoldenable : */
-- 
cgit v1.2.3


From d4f65b5d2497b2fd9c45f06b71deb4ab084a5b66 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 Sep 2012 13:06:29 +0100
Subject: KEYS: Add payload preparsing opportunity prior to key instantiate or
 update

Give the key type the opportunity to preparse the payload prior to the
instantiation and update routines being called.  This is done with the
provision of two new key type operations:

	int (*preparse)(struct key_preparsed_payload *prep);
	void (*free_preparse)(struct key_preparsed_payload *prep);

If the first operation is present, then it is called before key creation (in
the add/update case) or before the key semaphore is taken (in the update and
instantiate cases).  The second operation is called to clean up if the first
was called.

preparse() is given the opportunity to fill in the following structure:

	struct key_preparsed_payload {
		char		*description;
		void		*type_data[2];
		void		*payload;
		const void	*data;
		size_t		datalen;
		size_t		quotalen;
	};

Before the preparser is called, the first three fields will have been cleared,
the payload pointer and size will be stored in data and datalen and the default
quota size from the key_type struct will be stored into quotalen.

The preparser may parse the payload in any way it likes and may store data in
the type_data[] and payload fields for use by the instantiate() and update()
ops.

The preparser may also propose a description for the key by attaching it as a
string to the description field.  This can be used by passing a NULL or ""
description to the add_key() system call or the key_create_or_update()
function.  This cannot work with request_key() as that required the description
to tell the upcall about the key to be created.

This, for example permits keys that store PGP public keys to generate their own
name from the user ID and public key fingerprint in the key.

The instantiate() and update() operations are then modified to look like this:

	int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
	int (*update)(struct key *key, struct key_preparsed_payload *prep);

and the new payload data is passed in *prep, whether or not it was preparsed.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/security/keys.txt          |  50 +++++++++++++-
 fs/cifs/cifs_spnego.c                    |   6 +-
 fs/cifs/cifsacl.c                        |   8 +--
 include/keys/user-type.h                 |   6 +-
 include/linux/key-type.h                 |  35 +++++++++-
 net/ceph/crypto.c                        |   9 +--
 net/dns_resolver/dns_key.c               |   6 +-
 net/rxrpc/ar-key.c                       |  40 +++++------
 security/keys/encrypted-keys/encrypted.c |  16 +++--
 security/keys/key.c                      | 114 ++++++++++++++++++++++---------
 security/keys/keyctl.c                   |  18 +++--
 security/keys/keyring.c                  |   6 +-
 security/keys/request_key_auth.c         |   8 +--
 security/keys/trusted.c                  |  16 +++--
 security/keys/user_defined.c             |  14 ++--
 15 files changed, 250 insertions(+), 102 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index aa0dbd74b71b..7d9ca92022d8 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -412,6 +412,10 @@ The main syscalls are:
      to the keyring. In this case, an error will be generated if the process
      does not have permission to write to the keyring.
 
+     If the key type supports it, if the description is NULL or an empty
+     string, the key type will try and generate a description from the content
+     of the payload.
+
      The payload is optional, and the pointer can be NULL if not required by
      the type. The payload is plen in size, and plen can be zero for an empty
      payload.
@@ -1114,12 +1118,53 @@ The structure has a number of fields, some of which are mandatory:
      it should return 0.
 
 
- (*) int (*instantiate)(struct key *key, const void *data, size_t datalen);
+ (*) int (*preparse)(struct key_preparsed_payload *prep);
+
+     This optional method permits the key type to attempt to parse payload
+     before a key is created (add key) or the key semaphore is taken (update or
+     instantiate key).  The structure pointed to by prep looks like:
+
+	struct key_preparsed_payload {
+		char		*description;
+		void		*type_data[2];
+		void		*payload;
+		const void	*data;
+		size_t		datalen;
+		size_t		quotalen;
+	};
+
+     Before calling the method, the caller will fill in data and datalen with
+     the payload blob parameters; quotalen will be filled in with the default
+     quota size from the key type and the rest will be cleared.
+
+     If a description can be proposed from the payload contents, that should be
+     attached as a string to the description field.  This will be used for the
+     key description if the caller of add_key() passes NULL or "".
+
+     The method can attach anything it likes to type_data[] and payload.  These
+     are merely passed along to the instantiate() or update() operations.
+
+     The method should return 0 if success ful or a negative error code
+     otherwise.
+
+     
+ (*) void (*free_preparse)(struct key_preparsed_payload *prep);
+
+     This method is only required if the preparse() method is provided,
+     otherwise it is unused.  It cleans up anything attached to the
+     description, type_data and payload fields of the key_preparsed_payload
+     struct as filled in by the preparse() method.
+
+
+ (*) int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
 
      This method is called to attach a payload to a key during construction.
      The payload attached need not bear any relation to the data passed to this
      function.
 
+     The prep->data and prep->datalen fields will define the original payload
+     blob.  If preparse() was supplied then other fields may be filled in also.
+
      If the amount of data attached to the key differs from the size in
      keytype->def_datalen, then key_payload_reserve() should be called.
 
@@ -1135,6 +1180,9 @@ The structure has a number of fields, some of which are mandatory:
      If this type of key can be updated, then this method should be provided.
      It is called to update a key's payload from the blob of data provided.
 
+     The prep->data and prep->datalen fields will define the original payload
+     blob.  If preparse() was supplied then other fields may be filled in also.
+
      key_payload_reserve() should be called if the data length might change
      before any changes are actually made. Note that if this succeeds, the type
      is committed to changing the key because it's already been altered, so all
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index e622863b292f..086f381d6489 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -31,18 +31,18 @@
 
 /* create a new cifs key */
 static int
-cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen)
+cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	char *payload;
 	int ret;
 
 	ret = -ENOMEM;
-	payload = kmalloc(datalen, GFP_KERNEL);
+	payload = kmalloc(prep->datalen, GFP_KERNEL);
 	if (!payload)
 		goto error;
 
 	/* attach the data */
-	memcpy(payload, data, datalen);
+	memcpy(payload, prep->data, prep->datalen);
 	key->payload.data = payload;
 	ret = 0;
 
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 05f4dc263a23..f3c60e264ca8 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -167,17 +167,17 @@ static struct shrinker cifs_shrinker = {
 };
 
 static int
-cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
+cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	char *payload;
 
-	payload = kmalloc(datalen, GFP_KERNEL);
+	payload = kmalloc(prep->datalen, GFP_KERNEL);
 	if (!payload)
 		return -ENOMEM;
 
-	memcpy(payload, data, datalen);
+	memcpy(payload, prep->data, prep->datalen);
 	key->payload.data = payload;
-	key->datalen = datalen;
+	key->datalen = prep->datalen;
 	return 0;
 }
 
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index bc9ec1d7698c..5e452c84f1e6 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -35,8 +35,10 @@ struct user_key_payload {
 extern struct key_type key_type_user;
 extern struct key_type key_type_logon;
 
-extern int user_instantiate(struct key *key, const void *data, size_t datalen);
-extern int user_update(struct key *key, const void *data, size_t datalen);
+struct key_preparsed_payload;
+
+extern int user_instantiate(struct key *key, struct key_preparsed_payload *prep);
+extern int user_update(struct key *key, struct key_preparsed_payload *prep);
 extern int user_match(const struct key *key, const void *criterion);
 extern void user_revoke(struct key *key);
 extern void user_destroy(struct key *key);
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index f0c651cda7b0..518a53afb9ea 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -26,6 +26,27 @@ struct key_construction {
 	struct key	*authkey;/* authorisation for key being constructed */
 };
 
+/*
+ * Pre-parsed payload, used by key add, update and instantiate.
+ *
+ * This struct will be cleared and data and datalen will be set with the data
+ * and length parameters from the caller and quotalen will be set from
+ * def_datalen from the key type.  Then if the preparse() op is provided by the
+ * key type, that will be called.  Then the struct will be passed to the
+ * instantiate() or the update() op.
+ *
+ * If the preparse() op is given, the free_preparse() op will be called to
+ * clear the contents.
+ */
+struct key_preparsed_payload {
+	char		*description;	/* Proposed key description (or NULL) */
+	void		*type_data[2];	/* Private key-type data */
+	void		*payload;	/* Proposed payload */
+	const void	*data;		/* Raw data */
+	size_t		datalen;	/* Raw datalen */
+	size_t		quotalen;	/* Quota length for proposed payload */
+};
+
 typedef int (*request_key_actor_t)(struct key_construction *key,
 				   const char *op, void *aux);
 
@@ -45,18 +66,28 @@ struct key_type {
 	/* vet a description */
 	int (*vet_description)(const char *description);
 
+	/* Preparse the data blob from userspace that is to be the payload,
+	 * generating a proposed description and payload that will be handed to
+	 * the instantiate() and update() ops.
+	 */
+	int (*preparse)(struct key_preparsed_payload *prep);
+
+	/* Free a preparse data structure.
+	 */
+	void (*free_preparse)(struct key_preparsed_payload *prep);
+
 	/* instantiate a key of this type
 	 * - this method should call key_payload_reserve() to determine if the
 	 *   user's quota will hold the payload
 	 */
-	int (*instantiate)(struct key *key, const void *data, size_t datalen);
+	int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
 
 	/* update a key of this type (optional)
 	 * - this method should call key_payload_reserve() to recalculate the
 	 *   quota consumption
 	 * - the key must be locked against read when modifying
 	 */
-	int (*update)(struct key *key, const void *data, size_t datalen);
+	int (*update)(struct key *key, struct key_preparsed_payload *prep);
 
 	/* match a key against a description */
 	int (*match)(const struct key *key, const void *desc);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 9da7fdd3cd8a..af14cb425164 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -423,14 +423,15 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
 	}
 }
 
-int ceph_key_instantiate(struct key *key, const void *data, size_t datalen)
+int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct ceph_crypto_key *ckey;
+	size_t datalen = prep->datalen;
 	int ret;
 	void *p;
 
 	ret = -EINVAL;
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		goto err;
 
 	ret = key_payload_reserve(key, datalen);
@@ -443,8 +444,8 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen)
 		goto err;
 
 	/* TODO ceph_crypto_key_decode should really take const input */
-	p = (void *)data;
-	ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen);
+	p = (void *)prep->data;
+	ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen);
 	if (ret < 0)
 		goto err_ckey;
 
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index d9507dd05818..859ab8b6ec34 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -59,13 +59,13 @@ const struct cred *dns_resolver_cache;
  *        "ip1,ip2,...#foo=bar"
  */
 static int
-dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
+dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct user_key_payload *upayload;
 	unsigned long derrno;
 	int ret;
-	size_t result_len = 0;
-	const char *data = _data, *end, *opt;
+	size_t datalen = prep->datalen, result_len = 0;
+	const char *data = prep->data, *end, *opt;
 
 	kenter("%%%d,%s,'%*.*s',%zu",
 	       key->serial, key->description,
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 8b1f9f49960f..106c5a6b1ab2 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -26,8 +26,8 @@
 #include "ar-internal.h"
 
 static int rxrpc_vet_description_s(const char *);
-static int rxrpc_instantiate(struct key *, const void *, size_t);
-static int rxrpc_instantiate_s(struct key *, const void *, size_t);
+static int rxrpc_instantiate(struct key *, struct key_preparsed_payload *);
+static int rxrpc_instantiate_s(struct key *, struct key_preparsed_payload *);
 static void rxrpc_destroy(struct key *);
 static void rxrpc_destroy_s(struct key *);
 static void rxrpc_describe(const struct key *, struct seq_file *);
@@ -678,7 +678,7 @@ error:
  *
  * if no data is provided, then a no-security key is made
  */
-static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
+static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	const struct rxrpc_key_data_v1 *v1;
 	struct rxrpc_key_token *token, **pp;
@@ -686,26 +686,26 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 	u32 kver;
 	int ret;
 
-	_enter("{%x},,%zu", key_serial(key), datalen);
+	_enter("{%x},,%zu", key_serial(key), prep->datalen);
 
 	/* handle a no-security key */
-	if (!data && datalen == 0)
+	if (!prep->data && prep->datalen == 0)
 		return 0;
 
 	/* determine if the XDR payload format is being used */
-	if (datalen > 7 * 4) {
-		ret = rxrpc_instantiate_xdr(key, data, datalen);
+	if (prep->datalen > 7 * 4) {
+		ret = rxrpc_instantiate_xdr(key, prep->data, prep->datalen);
 		if (ret != -EPROTO)
 			return ret;
 	}
 
 	/* get the key interface version number */
 	ret = -EINVAL;
-	if (datalen <= 4 || !data)
+	if (prep->datalen <= 4 || !prep->data)
 		goto error;
-	memcpy(&kver, data, sizeof(kver));
-	data += sizeof(kver);
-	datalen -= sizeof(kver);
+	memcpy(&kver, prep->data, sizeof(kver));
+	prep->data += sizeof(kver);
+	prep->datalen -= sizeof(kver);
 
 	_debug("KEY I/F VERSION: %u", kver);
 
@@ -715,11 +715,11 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 
 	/* deal with a version 1 key */
 	ret = -EINVAL;
-	if (datalen < sizeof(*v1))
+	if (prep->datalen < sizeof(*v1))
 		goto error;
 
-	v1 = data;
-	if (datalen != sizeof(*v1) + v1->ticket_length)
+	v1 = prep->data;
+	if (prep->datalen != sizeof(*v1) + v1->ticket_length)
 		goto error;
 
 	_debug("SCIX: %u", v1->security_index);
@@ -784,17 +784,17 @@ error:
  * instantiate a server secret key
  * data should be a pointer to the 8-byte secret key
  */
-static int rxrpc_instantiate_s(struct key *key, const void *data,
-			       size_t datalen)
+static int rxrpc_instantiate_s(struct key *key,
+			       struct key_preparsed_payload *prep)
 {
 	struct crypto_blkcipher *ci;
 
-	_enter("{%x},,%zu", key_serial(key), datalen);
+	_enter("{%x},,%zu", key_serial(key), prep->datalen);
 
-	if (datalen != 8)
+	if (prep->datalen != 8)
 		return -EINVAL;
 
-	memcpy(&key->type_data, data, 8);
+	memcpy(&key->type_data, prep->data, 8);
 
 	ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(ci)) {
@@ -802,7 +802,7 @@ static int rxrpc_instantiate_s(struct key *key, const void *data,
 		return PTR_ERR(ci);
 	}
 
-	if (crypto_blkcipher_setkey(ci, data, 8) < 0)
+	if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0)
 		BUG();
 
 	key->payload.data = ci;
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 2d1bb8af7696..9e1e005c7596 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -773,8 +773,8 @@ static int encrypted_init(struct encrypted_key_payload *epayload,
  *
  * On success, return 0. Otherwise return errno.
  */
-static int encrypted_instantiate(struct key *key, const void *data,
-				 size_t datalen)
+static int encrypted_instantiate(struct key *key,
+				 struct key_preparsed_payload *prep)
 {
 	struct encrypted_key_payload *epayload = NULL;
 	char *datablob = NULL;
@@ -782,16 +782,17 @@ static int encrypted_instantiate(struct key *key, const void *data,
 	char *master_desc = NULL;
 	char *decrypted_datalen = NULL;
 	char *hex_encoded_iv = NULL;
+	size_t datalen = prep->datalen;
 	int ret;
 
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
 	datablob = kmalloc(datalen + 1, GFP_KERNEL);
 	if (!datablob)
 		return -ENOMEM;
 	datablob[datalen] = 0;
-	memcpy(datablob, data, datalen);
+	memcpy(datablob, prep->data, datalen);
 	ret = datablob_parse(datablob, &format, &master_desc,
 			     &decrypted_datalen, &hex_encoded_iv);
 	if (ret < 0)
@@ -834,16 +835,17 @@ static void encrypted_rcu_free(struct rcu_head *rcu)
  *
  * On success, return 0. Otherwise return errno.
  */
-static int encrypted_update(struct key *key, const void *data, size_t datalen)
+static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct encrypted_key_payload *epayload = key->payload.data;
 	struct encrypted_key_payload *new_epayload;
 	char *buf;
 	char *new_master_desc = NULL;
 	const char *format = NULL;
+	size_t datalen = prep->datalen;
 	int ret = 0;
 
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
 	buf = kmalloc(datalen + 1, GFP_KERNEL);
@@ -851,7 +853,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
 		return -ENOMEM;
 
 	buf[datalen] = 0;
-	memcpy(buf, data, datalen);
+	memcpy(buf, prep->data, datalen);
 	ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL);
 	if (ret < 0)
 		goto out;
diff --git a/security/keys/key.c b/security/keys/key.c
index 50d96d4e06f2..1d039af99f50 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -412,8 +412,7 @@ EXPORT_SYMBOL(key_payload_reserve);
  * key_construction_mutex.
  */
 static int __key_instantiate_and_link(struct key *key,
-				      const void *data,
-				      size_t datalen,
+				      struct key_preparsed_payload *prep,
 				      struct key *keyring,
 				      struct key *authkey,
 				      unsigned long *_prealloc)
@@ -431,7 +430,7 @@ static int __key_instantiate_and_link(struct key *key,
 	/* can't instantiate twice */
 	if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
 		/* instantiate the key */
-		ret = key->type->instantiate(key, data, datalen);
+		ret = key->type->instantiate(key, prep);
 
 		if (ret == 0) {
 			/* mark the key as being instantiated */
@@ -482,22 +481,37 @@ int key_instantiate_and_link(struct key *key,
 			     struct key *keyring,
 			     struct key *authkey)
 {
+	struct key_preparsed_payload prep;
 	unsigned long prealloc;
 	int ret;
 
+	memset(&prep, 0, sizeof(prep));
+	prep.data = data;
+	prep.datalen = datalen;
+	prep.quotalen = key->type->def_datalen;
+	if (key->type->preparse) {
+		ret = key->type->preparse(&prep);
+		if (ret < 0)
+			goto error;
+	}
+
 	if (keyring) {
 		ret = __key_link_begin(keyring, key->type, key->description,
 				       &prealloc);
 		if (ret < 0)
-			return ret;
+			goto error_free_preparse;
 	}
 
-	ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey,
+	ret = __key_instantiate_and_link(key, &prep, keyring, authkey,
 					 &prealloc);
 
 	if (keyring)
 		__key_link_end(keyring, key->type, prealloc);
 
+error_free_preparse:
+	if (key->type->preparse)
+		key->type->free_preparse(&prep);
+error:
 	return ret;
 }
 
@@ -706,7 +720,7 @@ void key_type_put(struct key_type *ktype)
  * if we get an error.
  */
 static inline key_ref_t __key_update(key_ref_t key_ref,
-				     const void *payload, size_t plen)
+				     struct key_preparsed_payload *prep)
 {
 	struct key *key = key_ref_to_ptr(key_ref);
 	int ret;
@@ -722,7 +736,7 @@ static inline key_ref_t __key_update(key_ref_t key_ref,
 
 	down_write(&key->sem);
 
-	ret = key->type->update(key, payload, plen);
+	ret = key->type->update(key, prep);
 	if (ret == 0)
 		/* updating a negative key instantiates it */
 		clear_bit(KEY_FLAG_NEGATIVE, &key->flags);
@@ -774,6 +788,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 			       unsigned long flags)
 {
 	unsigned long prealloc;
+	struct key_preparsed_payload prep;
 	const struct cred *cred = current_cred();
 	struct key_type *ktype;
 	struct key *keyring, *key = NULL;
@@ -789,8 +804,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	}
 
 	key_ref = ERR_PTR(-EINVAL);
-	if (!ktype->match || !ktype->instantiate)
-		goto error_2;
+	if (!ktype->match || !ktype->instantiate ||
+	    (!description && !ktype->preparse))
+		goto error_put_type;
 
 	keyring = key_ref_to_ptr(keyring_ref);
 
@@ -798,18 +814,37 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 
 	key_ref = ERR_PTR(-ENOTDIR);
 	if (keyring->type != &key_type_keyring)
-		goto error_2;
+		goto error_put_type;
+
+	memset(&prep, 0, sizeof(prep));
+	prep.data = payload;
+	prep.datalen = plen;
+	prep.quotalen = ktype->def_datalen;
+	if (ktype->preparse) {
+		ret = ktype->preparse(&prep);
+		if (ret < 0) {
+			key_ref = ERR_PTR(ret);
+			goto error_put_type;
+		}
+		if (!description)
+			description = prep.description;
+		key_ref = ERR_PTR(-EINVAL);
+		if (!description)
+			goto error_free_prep;
+	}
 
 	ret = __key_link_begin(keyring, ktype, description, &prealloc);
-	if (ret < 0)
-		goto error_2;
+	if (ret < 0) {
+		key_ref = ERR_PTR(ret);
+		goto error_free_prep;
+	}
 
 	/* if we're going to allocate a new key, we're going to have
 	 * to modify the keyring */
 	ret = key_permission(keyring_ref, KEY_WRITE);
 	if (ret < 0) {
 		key_ref = ERR_PTR(ret);
-		goto error_3;
+		goto error_link_end;
 	}
 
 	/* if it's possible to update this type of key, search for an existing
@@ -840,25 +875,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 			perm, flags);
 	if (IS_ERR(key)) {
 		key_ref = ERR_CAST(key);
-		goto error_3;
+		goto error_link_end;
 	}
 
 	/* instantiate it and link it into the target keyring */
-	ret = __key_instantiate_and_link(key, payload, plen, keyring, NULL,
-					 &prealloc);
+	ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc);
 	if (ret < 0) {
 		key_put(key);
 		key_ref = ERR_PTR(ret);
-		goto error_3;
+		goto error_link_end;
 	}
 
 	key_ref = make_key_ref(key, is_key_possessed(keyring_ref));
 
- error_3:
+error_link_end:
 	__key_link_end(keyring, ktype, prealloc);
- error_2:
+error_free_prep:
+	if (ktype->preparse)
+		ktype->free_preparse(&prep);
+error_put_type:
 	key_type_put(ktype);
- error:
+error:
 	return key_ref;
 
  found_matching_key:
@@ -866,10 +903,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	 * - we can drop the locks first as we have the key pinned
 	 */
 	__key_link_end(keyring, ktype, prealloc);
-	key_type_put(ktype);
 
-	key_ref = __key_update(key_ref, payload, plen);
-	goto error;
+	key_ref = __key_update(key_ref, &prep);
+	goto error_free_prep;
 }
 EXPORT_SYMBOL(key_create_or_update);
 
@@ -888,6 +924,7 @@ EXPORT_SYMBOL(key_create_or_update);
  */
 int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 {
+	struct key_preparsed_payload prep;
 	struct key *key = key_ref_to_ptr(key_ref);
 	int ret;
 
@@ -900,18 +937,31 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 
 	/* attempt to update it if supported */
 	ret = -EOPNOTSUPP;
-	if (key->type->update) {
-		down_write(&key->sem);
-
-		ret = key->type->update(key, payload, plen);
-		if (ret == 0)
-			/* updating a negative key instantiates it */
-			clear_bit(KEY_FLAG_NEGATIVE, &key->flags);
+	if (!key->type->update)
+		goto error;
 
-		up_write(&key->sem);
+	memset(&prep, 0, sizeof(prep));
+	prep.data = payload;
+	prep.datalen = plen;
+	prep.quotalen = key->type->def_datalen;
+	if (key->type->preparse) {
+		ret = key->type->preparse(&prep);
+		if (ret < 0)
+			goto error;
 	}
 
- error:
+	down_write(&key->sem);
+
+	ret = key->type->update(key, &prep);
+	if (ret == 0)
+		/* updating a negative key instantiates it */
+		clear_bit(KEY_FLAG_NEGATIVE, &key->flags);
+
+	up_write(&key->sem);
+
+	if (key->type->preparse)
+		key->type->free_preparse(&prep);
+error:
 	return ret;
 }
 EXPORT_SYMBOL(key_update);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 3364fbf46807..505d40be196c 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -46,6 +46,9 @@ static int key_get_type_from_user(char *type,
  * Extract the description of a new key from userspace and either add it as a
  * new key to the specified keyring or update a matching key in that keyring.
  *
+ * If the description is NULL or an empty string, the key type is asked to
+ * generate one from the payload.
+ *
  * The keyring must be writable so that we can attach the key to it.
  *
  * If successful, the new key's serial number is returned, otherwise an error
@@ -72,10 +75,17 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
 	if (ret < 0)
 		goto error;
 
-	description = strndup_user(_description, PAGE_SIZE);
-	if (IS_ERR(description)) {
-		ret = PTR_ERR(description);
-		goto error;
+	description = NULL;
+	if (_description) {
+		description = strndup_user(_description, PAGE_SIZE);
+		if (IS_ERR(description)) {
+			ret = PTR_ERR(description);
+			goto error;
+		}
+		if (!*description) {
+			kfree(description);
+			description = NULL;
+		}
 	}
 
 	/* pull the payload in if one was supplied */
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 81e7852d281d..f04d8cf81f3c 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -66,7 +66,7 @@ static inline unsigned keyring_hash(const char *desc)
  * operations.
  */
 static int keyring_instantiate(struct key *keyring,
-			       const void *data, size_t datalen);
+			       struct key_preparsed_payload *prep);
 static int keyring_match(const struct key *keyring, const void *criterion);
 static void keyring_revoke(struct key *keyring);
 static void keyring_destroy(struct key *keyring);
@@ -121,12 +121,12 @@ static void keyring_publish_name(struct key *keyring)
  * Returns 0 on success, -EINVAL if given any data.
  */
 static int keyring_instantiate(struct key *keyring,
-			       const void *data, size_t datalen)
+			       struct key_preparsed_payload *prep)
 {
 	int ret;
 
 	ret = -EINVAL;
-	if (datalen == 0) {
+	if (prep->datalen == 0) {
 		/* make the keyring available by name if it has one */
 		keyring_publish_name(keyring);
 		ret = 0;
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 60d4e3f5e4bb..85730d5a5a59 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -19,7 +19,8 @@
 #include <asm/uaccess.h>
 #include "internal.h"
 
-static int request_key_auth_instantiate(struct key *, const void *, size_t);
+static int request_key_auth_instantiate(struct key *,
+					struct key_preparsed_payload *);
 static void request_key_auth_describe(const struct key *, struct seq_file *);
 static void request_key_auth_revoke(struct key *);
 static void request_key_auth_destroy(struct key *);
@@ -42,10 +43,9 @@ struct key_type key_type_request_key_auth = {
  * Instantiate a request-key authorisation key.
  */
 static int request_key_auth_instantiate(struct key *key,
-					const void *data,
-					size_t datalen)
+					struct key_preparsed_payload *prep)
 {
-	key->payload.data = (struct request_key_auth *) data;
+	key->payload.data = (struct request_key_auth *)prep->data;
 	return 0;
 }
 
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 2d5d041f2049..42036c7a0856 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -927,22 +927,23 @@ static struct trusted_key_payload *trusted_payload_alloc(struct key *key)
  *
  * On success, return 0. Otherwise return errno.
  */
-static int trusted_instantiate(struct key *key, const void *data,
-			       size_t datalen)
+static int trusted_instantiate(struct key *key,
+			       struct key_preparsed_payload *prep)
 {
 	struct trusted_key_payload *payload = NULL;
 	struct trusted_key_options *options = NULL;
+	size_t datalen = prep->datalen;
 	char *datablob;
 	int ret = 0;
 	int key_cmd;
 
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
 	datablob = kmalloc(datalen + 1, GFP_KERNEL);
 	if (!datablob)
 		return -ENOMEM;
-	memcpy(datablob, data, datalen);
+	memcpy(datablob, prep->data, datalen);
 	datablob[datalen] = '\0';
 
 	options = trusted_options_alloc();
@@ -1011,17 +1012,18 @@ static void trusted_rcu_free(struct rcu_head *rcu)
 /*
  * trusted_update - reseal an existing key with new PCR values
  */
-static int trusted_update(struct key *key, const void *data, size_t datalen)
+static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct trusted_key_payload *p = key->payload.data;
 	struct trusted_key_payload *new_p;
 	struct trusted_key_options *new_o;
+	size_t datalen = prep->datalen;
 	char *datablob;
 	int ret = 0;
 
 	if (!p->migratable)
 		return -EPERM;
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
 	datablob = kmalloc(datalen + 1, GFP_KERNEL);
@@ -1038,7 +1040,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen)
 		goto out;
 	}
 
-	memcpy(datablob, data, datalen);
+	memcpy(datablob, prep->data, datalen);
 	datablob[datalen] = '\0';
 	ret = datablob_parse(datablob, new_p, new_o);
 	if (ret != Opt_update) {
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index c7660a25a3e4..55dc88939185 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -58,13 +58,14 @@ EXPORT_SYMBOL_GPL(key_type_logon);
 /*
  * instantiate a user defined key
  */
-int user_instantiate(struct key *key, const void *data, size_t datalen)
+int user_instantiate(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct user_key_payload *upayload;
+	size_t datalen = prep->datalen;
 	int ret;
 
 	ret = -EINVAL;
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		goto error;
 
 	ret = key_payload_reserve(key, datalen);
@@ -78,7 +79,7 @@ int user_instantiate(struct key *key, const void *data, size_t datalen)
 
 	/* attach the data */
 	upayload->datalen = datalen;
-	memcpy(upayload->data, data, datalen);
+	memcpy(upayload->data, prep->data, datalen);
 	rcu_assign_keypointer(key, upayload);
 	ret = 0;
 
@@ -92,13 +93,14 @@ EXPORT_SYMBOL_GPL(user_instantiate);
  * update a user defined key
  * - the key's semaphore is write-locked
  */
-int user_update(struct key *key, const void *data, size_t datalen)
+int user_update(struct key *key, struct key_preparsed_payload *prep)
 {
 	struct user_key_payload *upayload, *zap;
+	size_t datalen = prep->datalen;
 	int ret;
 
 	ret = -EINVAL;
-	if (datalen <= 0 || datalen > 32767 || !data)
+	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		goto error;
 
 	/* construct a replacement payload */
@@ -108,7 +110,7 @@ int user_update(struct key *key, const void *data, size_t datalen)
 		goto error;
 
 	upayload->datalen = datalen;
-	memcpy(upayload->data, data, datalen);
+	memcpy(upayload->data, prep->data, datalen);
 
 	/* check the quota and attach the new data */
 	zap = upayload;
-- 
cgit v1.2.3


From e6459606b04e6385ccd3c2060fc10f78a92c7700 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Mon, 1 Oct 2012 00:23:52 +0200
Subject: lib: Add early cpio decoder

Add a simple cpio decoder without library dependencies for the purpose
of extracting components from the initramfs blob for early kernel
uses.  Intended consumers so far are microcode and ACPI override.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1349043837-22659-2-git-send-email-trenn@suse.de
Cc: Len Brown <lenb@kernel.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/earlycpio.h |  17 ++++++
 lib/Makefile              |   2 +-
 lib/earlycpio.c           | 145 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/earlycpio.h
 create mode 100644 lib/earlycpio.c

(limited to 'include/linux')

diff --git a/include/linux/earlycpio.h b/include/linux/earlycpio.h
new file mode 100644
index 000000000000..111f46d83d00
--- /dev/null
+++ b/include/linux/earlycpio.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_EARLYCPIO_H
+#define _LINUX_EARLYCPIO_H
+
+#include <linux/types.h>
+
+#define MAX_CPIO_FILE_NAME 18
+
+struct cpio_data {
+	void *data;
+	size_t size;
+	char name[MAX_CPIO_FILE_NAME];
+};
+
+struct cpio_data find_cpio_data(const char *path, void *data, size_t len,
+				long *offset);
+
+#endif /* _LINUX_EARLYCPIO_H */
diff --git a/lib/Makefile b/lib/Makefile
index 42d283edc4d3..0924041b6959 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o earlycpio.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
new file mode 100644
index 000000000000..8078ef49cb79
--- /dev/null
+++ b/lib/earlycpio.c
@@ -0,0 +1,145 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2012 Intel Corporation; author H. Peter Anvin
+ *
+ *   This file is part of the Linux kernel, and is made available
+ *   under the terms of the GNU General Public License version 2, as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * earlycpio.c
+ *
+ * Find a specific cpio member; must precede any compressed content.
+ * This is used to locate data items in the initramfs used by the
+ * kernel itself during early boot (before the main initramfs is
+ * decompressed.)  It is the responsibility of the initramfs creator
+ * to ensure that these items are uncompressed at the head of the
+ * blob.  Depending on the boot loader or package tool that may be a
+ * separate file or part of the same file.
+ */
+
+#include <linux/earlycpio.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+enum cpio_fields {
+	C_MAGIC,
+	C_INO,
+	C_MODE,
+	C_UID,
+	C_GID,
+	C_NLINK,
+	C_MTIME,
+	C_FILESIZE,
+	C_MAJ,
+	C_MIN,
+	C_RMAJ,
+	C_RMIN,
+	C_NAMESIZE,
+	C_CHKSUM,
+	C_NFIELDS
+};
+
+/**
+ * cpio_data find_cpio_data - Search for files in an uncompressed cpio
+ * @path:   The directory to search for, including a slash at the end
+ * @data:   Pointer to the the cpio archive or a header inside
+ * @len:    Remaining length of the cpio based on data pointer
+ * @offset: When a matching file is found, this is the offset to the
+ *          beginning of the cpio. It can be used to iterate through
+ *          the cpio to find all files inside of a directory path
+ *
+ * @return: struct cpio_data containing the address, length and
+ *          filename (with the directory path cut off) of the found file.
+ *          If you search for a filename and not for files in a directory,
+ *          pass the absolute path of the filename in the cpio and make sure
+ *          the match returned an empty filename string.
+ */
+
+struct cpio_data __cpuinit find_cpio_data(const char *path, void *data,
+					  size_t len,  long *offset)
+{
+	const size_t cpio_header_len = 8*C_NFIELDS - 2;
+	struct cpio_data cd = { NULL, 0, "" };
+	const char *p, *dptr, *nptr;
+	unsigned int ch[C_NFIELDS], *chp, v;
+	unsigned char c, x;
+	size_t mypathsize = strlen(path);
+	int i, j;
+
+	p = data;
+
+	while (len > cpio_header_len) {
+		if (!*p) {
+			/* All cpio headers need to be 4-byte aligned */
+			p += 4;
+			len -= 4;
+			continue;
+		}
+
+		j = 6;		/* The magic field is only 6 characters */
+		chp = ch;
+		for (i = C_NFIELDS; i; i--) {
+			v = 0;
+			while (j--) {
+				v <<= 4;
+				c = *p++;
+
+				x = c - '0';
+				if (x < 10) {
+					v += x;
+					continue;
+				}
+
+				x = (c | 0x20) - 'a';
+				if (x < 6) {
+					v += x + 10;
+					continue;
+				}
+
+				goto quit; /* Invalid hexadecimal */
+			}
+			*chp++ = v;
+			j = 8;	/* All other fields are 8 characters */
+		}
+
+		if ((ch[C_MAGIC] - 0x070701) > 1)
+			goto quit; /* Invalid magic */
+
+		len -= cpio_header_len;
+
+		dptr = PTR_ALIGN(p + ch[C_NAMESIZE], 4);
+		nptr = PTR_ALIGN(dptr + ch[C_FILESIZE], 4);
+
+		if (nptr > p + len || dptr < p || nptr < dptr)
+			goto quit; /* Buffer overrun */
+
+		if ((ch[C_MODE] & 0170000) == 0100000 &&
+		    ch[C_NAMESIZE] >= mypathsize &&
+		    !memcmp(p, path, mypathsize)) {
+			*offset = (long)nptr - (long)data;
+			if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) {
+				pr_warn(
+				"File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
+				p, MAX_CPIO_FILE_NAME);
+			}
+			strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME);
+
+			cd.data = (void *)dptr;
+			cd.size = ch[C_FILESIZE];
+			return cd; /* Found it! */
+		}
+		len -= (nptr - p);
+		p = nptr;
+	}
+
+quit:
+	return cd;
+}
-- 
cgit v1.2.3


From 8e30524dcc0d0ac1a18a5cee482b9d9cde3cb332 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Mon, 1 Oct 2012 00:23:53 +0200
Subject: x86, acpi: Introduce x86 arch specific arch_reserve_mem_area() for
 e820 handling

This is needed for ACPI table overriding via initrd. Beside reserving
memblocks, X86 also requires to flag the memory area to E820_RESERVED or
E820_ACPI in the e820 mappings to be able to io(re)map it later.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Link: http://lkml.kernel.org/r/1349043837-22659-3-git-send-email-trenn@suse.de
Cc: Len Brown <lenb@kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/acpi/boot.c | 6 ++++++
 include/linux/acpi.h        | 8 ++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b2297e58c6ed..6b75777c0a8d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1700,3 +1700,9 @@ int __acpi_release_global_lock(unsigned int *lock)
 	} while (unlikely (val != old));
 	return old & 0x1;
 }
+
+void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
+{
+	e820_add_region(addr, size, E820_ACPI);
+	update_e820();
+}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4f2a76224509..946fd1ea79ff 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -426,6 +426,14 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 
 acpi_status acpi_os_prepare_sleep(u8 sleep_state,
 				  u32 pm1a_control, u32 pm1b_control);
+#if CONFIG_X86
+void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
+#else
+static inline void arch_reserve_mem_area(acpi_physical_address addr,
+					  size_t size)
+{
+}
+#endif /* CONFIG_X86 */
 #else
 #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
 #endif
-- 
cgit v1.2.3


From 53aac44c904abbad9f474f652f099de13b5c3563 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Mon, 1 Oct 2012 00:23:54 +0200
Subject: ACPI: Store valid ACPI tables passed via early initrd in reserved
 memblock areas

A later patch will compare them with ACPI tables that get loaded at boot or
runtime and if criteria match, a stored one is loaded.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Link: http://lkml.kernel.org/r/1349043837-22659-4-git-send-email-trenn@suse.de
Cc: Len Brown <lenb@kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/setup.c |   2 +
 drivers/acpi/Kconfig    |   9 ++++
 drivers/acpi/osl.c      | 122 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h    |   8 ++++
 4 files changed, 141 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b95..764e543b2297 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -941,6 +941,8 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_initrd();
 
+	acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
+
 	reserve_crashkernel();
 
 	vsmp_init();
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 80998958cf45..8cf719547b51 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -261,6 +261,15 @@ config ACPI_CUSTOM_DSDT
 	bool
 	default ACPI_CUSTOM_DSDT_FILE != ""
 
+config ACPI_INITRD_TABLE_OVERRIDE
+	bool "ACPI tables can be passed via uncompressed cpio in initrd"
+	default n
+	help
+	  This option provides functionality to override arbitrary ACPI tables
+	  via initrd. No functional change if no ACPI tables are passed via
+	  initrd, therefore it's safe to say Y.
+	  See Documentation/acpi/initrd_table_override.txt for details
+
 config ACPI_BLACKLIST_YEAR
 	int "Disable ACPI for systems before Jan 1st this year" if X86_32
 	default 0
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 9eaf708f5885..b20b07903218 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -534,6 +534,128 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
 	return AE_OK;
 }
 
+#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
+#include <linux/earlycpio.h>
+#include <linux/memblock.h>
+
+static u64 acpi_tables_addr;
+static int all_tables_size;
+
+/* Copied from acpica/tbutils.c:acpi_tb_checksum() */
+u8 __init acpi_table_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+	return sum;
+}
+
+/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */
+static const char * const table_sigs[] = {
+	ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ,
+	ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT,
+	ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF,
+	ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET,
+	ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI,
+	ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA,
+	ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT,
+	ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT,
+	ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL };
+
+/* Non-fatal errors: Affected tables/files are ignored */
+#define INVALID_TABLE(x, path, name)					\
+	{ pr_err("ACPI OVERRIDE: " x " [%s%s]\n", path, name); continue; }
+
+#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
+
+/* Must not increase 10 or needs code modification below */
+#define ACPI_OVERRIDE_TABLES 10
+
+void __init acpi_initrd_override(void *data, size_t size)
+{
+	int sig, no, table_nr = 0, total_offset = 0;
+	long offset = 0;
+	struct acpi_table_header *table;
+	char cpio_path[32] = "kernel/firmware/acpi/";
+	struct cpio_data file;
+	struct cpio_data early_initrd_files[ACPI_OVERRIDE_TABLES];
+	char *p;
+
+	if (data == NULL || size == 0)
+		return;
+
+	for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) {
+		file = find_cpio_data(cpio_path, data, size, &offset);
+		if (!file.data)
+			break;
+
+		data += offset;
+		size -= offset;
+
+		if (file.size < sizeof(struct acpi_table_header))
+			INVALID_TABLE("Table smaller than ACPI header",
+				      cpio_path, file.name);
+
+		table = file.data;
+
+		for (sig = 0; table_sigs[sig]; sig++)
+			if (!memcmp(table->signature, table_sigs[sig], 4))
+				break;
+
+		if (!table_sigs[sig])
+			INVALID_TABLE("Unknown signature",
+				      cpio_path, file.name);
+		if (file.size != table->length)
+			INVALID_TABLE("File length does not match table length",
+				      cpio_path, file.name);
+		if (acpi_table_checksum(file.data, table->length))
+			INVALID_TABLE("Bad table checksum",
+				      cpio_path, file.name);
+
+		pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n",
+			table->signature, cpio_path, file.name, table->length);
+
+		all_tables_size += table->length;
+		early_initrd_files[table_nr].data = file.data;
+		early_initrd_files[table_nr].size = file.size;
+		table_nr++;
+	}
+	if (table_nr == 0)
+		return;
+
+	acpi_tables_addr =
+		memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT,
+				       all_tables_size, PAGE_SIZE);
+	if (!acpi_tables_addr) {
+		WARN_ON(1);
+		return;
+	}
+	/*
+	 * Only calling e820_add_reserve does not work and the
+	 * tables are invalid (memory got used) later.
+	 * memblock_reserve works as expected and the tables won't get modified.
+	 * But it's not enough on X86 because ioremap will
+	 * complain later (used by acpi_os_map_memory) that the pages
+	 * that should get mapped are not marked "reserved".
+	 * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area)
+	 * works fine.
+	 */
+	memblock_reserve(acpi_tables_addr, acpi_tables_addr + all_tables_size);
+	arch_reserve_mem_area(acpi_tables_addr, all_tables_size);
+
+	p = early_ioremap(acpi_tables_addr, all_tables_size);
+
+	for (no = 0; no < table_nr; no++) {
+		memcpy(p + total_offset, early_initrd_files[no].data,
+		       early_initrd_files[no].size);
+		total_offset += early_initrd_files[no].size;
+	}
+	early_iounmap(p, all_tables_size);
+}
+#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */
+
 acpi_status
 acpi_os_table_override(struct acpi_table_header * existing_table,
 		       struct acpi_table_header ** new_table)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 946fd1ea79ff..d14081c10c17 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -76,6 +76,14 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table);
 
 typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end);
 
+#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
+void acpi_initrd_override(void *data, size_t size);
+#else
+static inline void acpi_initrd_override(void *data, size_t size)
+{
+}
+#endif
+
 char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 void __acpi_unmap_table(char *map, unsigned long size);
 int early_acpi_boot_init(void);
-- 
cgit v1.2.3


From 3a50597de8635cd05133bd12c95681c82fe7b878 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 19:24:29 +0100
Subject: KEYS: Make the session and process keyrings per-thread

Make the session keyring per-thread rather than per-process, but still
inherited from the parent thread to solve a problem with PAM and gdm.

The problem is that join_session_keyring() will reject attempts to change the
session keyring of a multithreaded program but gdm is now multithreaded before
it gets to the point of starting PAM and running pam_keyinit to create the
session keyring.  See:

	https://bugs.freedesktop.org/show_bug.cgi?id=49211

The reason that join_session_keyring() will only change the session keyring
under a single-threaded environment is that it's hard to alter the other
thread's credentials to effect the change in a multi-threaded program.  The
problems are such as:

 (1) How to prevent two threads both running join_session_keyring() from
     racing.

 (2) Another thread's credentials may not be modified directly by this process.

 (3) The number of threads is uncertain whilst we're not holding the
     appropriate spinlock, making preallocation slightly tricky.

 (4) We could use TIF_NOTIFY_RESUME and key_replace_session_keyring() to get
     another thread to replace its keyring, but that means preallocating for
     each thread.

A reasonable way around this is to make the session keyring per-thread rather
than per-process and just document that if you want a common session keyring,
you must get it before you spawn any threads - which is the current situation
anyway.

Whilst we're at it, we can the process keyring behave in the same way.  This
means we can clean up some of the ickyness in the creds code.

Basically, after this patch, the session, process and thread keyrings are about
inheritance rules only and not about sharing changes of keyring.

Reported-by: Mantas M. <grawity@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Ray Strode <rstrode@redhat.com>
---
 include/linux/cred.h         |  17 +-----
 kernel/cred.c                | 127 +++++--------------------------------------
 security/keys/keyctl.c       |  11 ++--
 security/keys/process_keys.c |  66 ++++++++--------------
 security/keys/request_key.c  |  10 ++--
 5 files changed, 50 insertions(+), 181 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index ebbed2ce6637..0142aacb70b7 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -76,21 +76,6 @@ extern int groups_search(const struct group_info *, kgid_t);
 extern int in_group_p(kgid_t);
 extern int in_egroup_p(kgid_t);
 
-/*
- * The common credentials for a thread group
- * - shared by CLONE_THREAD
- */
-#ifdef CONFIG_KEYS
-struct thread_group_cred {
-	atomic_t	usage;
-	pid_t		tgid;			/* thread group process ID */
-	spinlock_t	lock;
-	struct key __rcu *session_keyring;	/* keyring inherited over fork */
-	struct key	*process_keyring;	/* keyring private to this process */
-	struct rcu_head	rcu;			/* RCU deletion hook */
-};
-#endif
-
 /*
  * The security context of a task
  *
@@ -139,6 +124,8 @@ struct cred {
 #ifdef CONFIG_KEYS
 	unsigned char	jit_keyring;	/* default keyring to attach requested
 					 * keys to */
+	struct key __rcu *session_keyring; /* keyring inherited over fork */
+	struct key	*process_keyring; /* keyring private to this process */
 	struct key	*thread_keyring; /* keyring private to this thread */
 	struct key	*request_key_auth; /* assumed request_key authority */
 	struct thread_group_cred *tgcred; /* thread-group shared credentials */
diff --git a/kernel/cred.c b/kernel/cred.c
index de728ac50d82..3f7ad1ec2ae4 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -29,17 +29,6 @@
 
 static struct kmem_cache *cred_jar;
 
-/*
- * The common credentials for the initial task's thread group
- */
-#ifdef CONFIG_KEYS
-static struct thread_group_cred init_tgcred = {
-	.usage	= ATOMIC_INIT(2),
-	.tgid	= 0,
-	.lock	= __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
-};
-#endif
-
 /*
  * The initial credentials for the initial task
  */
@@ -65,9 +54,6 @@ struct cred init_cred = {
 	.user			= INIT_USER,
 	.user_ns		= &init_user_ns,
 	.group_info		= &init_groups,
-#ifdef CONFIG_KEYS
-	.tgcred			= &init_tgcred,
-#endif
 };
 
 static inline void set_cred_subscribers(struct cred *cred, int n)
@@ -95,36 +81,6 @@ static inline void alter_cred_subscribers(const struct cred *_cred, int n)
 #endif
 }
 
-/*
- * Dispose of the shared task group credentials
- */
-#ifdef CONFIG_KEYS
-static void release_tgcred_rcu(struct rcu_head *rcu)
-{
-	struct thread_group_cred *tgcred =
-		container_of(rcu, struct thread_group_cred, rcu);
-
-	BUG_ON(atomic_read(&tgcred->usage) != 0);
-
-	key_put(tgcred->session_keyring);
-	key_put(tgcred->process_keyring);
-	kfree(tgcred);
-}
-#endif
-
-/*
- * Release a set of thread group credentials.
- */
-static void release_tgcred(struct cred *cred)
-{
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred = cred->tgcred;
-
-	if (atomic_dec_and_test(&tgcred->usage))
-		call_rcu(&tgcred->rcu, release_tgcred_rcu);
-#endif
-}
-
 /*
  * The RCU callback to actually dispose of a set of credentials
  */
@@ -150,9 +106,10 @@ static void put_cred_rcu(struct rcu_head *rcu)
 #endif
 
 	security_cred_free(cred);
+	key_put(cred->session_keyring);
+	key_put(cred->process_keyring);
 	key_put(cred->thread_keyring);
 	key_put(cred->request_key_auth);
-	release_tgcred(cred);
 	if (cred->group_info)
 		put_group_info(cred->group_info);
 	free_uid(cred->user);
@@ -246,15 +203,6 @@ struct cred *cred_alloc_blank(void)
 	if (!new)
 		return NULL;
 
-#ifdef CONFIG_KEYS
-	new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
-	if (!new->tgcred) {
-		kmem_cache_free(cred_jar, new);
-		return NULL;
-	}
-	atomic_set(&new->tgcred->usage, 1);
-#endif
-
 	atomic_set(&new->usage, 1);
 #ifdef CONFIG_DEBUG_CREDENTIALS
 	new->magic = CRED_MAGIC;
@@ -308,9 +256,10 @@ struct cred *prepare_creds(void)
 	get_user_ns(new->user_ns);
 
 #ifdef CONFIG_KEYS
+	key_get(new->session_keyring);
+	key_get(new->process_keyring);
 	key_get(new->thread_keyring);
 	key_get(new->request_key_auth);
-	atomic_inc(&new->tgcred->usage);
 #endif
 
 #ifdef CONFIG_SECURITY
@@ -334,39 +283,20 @@ EXPORT_SYMBOL(prepare_creds);
  */
 struct cred *prepare_exec_creds(void)
 {
-	struct thread_group_cred *tgcred = NULL;
 	struct cred *new;
 
-#ifdef CONFIG_KEYS
-	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-	if (!tgcred)
-		return NULL;
-#endif
-
 	new = prepare_creds();
-	if (!new) {
-		kfree(tgcred);
+	if (!new)
 		return new;
-	}
 
 #ifdef CONFIG_KEYS
 	/* newly exec'd tasks don't get a thread keyring */
 	key_put(new->thread_keyring);
 	new->thread_keyring = NULL;
 
-	/* create a new per-thread-group creds for all this set of threads to
-	 * share */
-	memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred));
-
-	atomic_set(&tgcred->usage, 1);
-	spin_lock_init(&tgcred->lock);
-
 	/* inherit the session keyring; new process keyring */
-	key_get(tgcred->session_keyring);
-	tgcred->process_keyring = NULL;
-
-	release_tgcred(new);
-	new->tgcred = tgcred;
+	key_put(new->process_keyring);
+	new->process_keyring = NULL;
 #endif
 
 	return new;
@@ -383,9 +313,6 @@ struct cred *prepare_exec_creds(void)
  */
 int copy_creds(struct task_struct *p, unsigned long clone_flags)
 {
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred;
-#endif
 	struct cred *new;
 	int ret;
 
@@ -425,22 +352,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 			install_thread_keyring_to_cred(new);
 	}
 
-	/* we share the process and session keyrings between all the threads in
-	 * a process - this is slightly icky as we violate COW credentials a
-	 * bit */
+	/* The process keyring is only shared between the threads in a process;
+	 * anything outside of those threads doesn't inherit.
+	 */
 	if (!(clone_flags & CLONE_THREAD)) {
-		tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-		if (!tgcred) {
-			ret = -ENOMEM;
-			goto error_put;
-		}
-		atomic_set(&tgcred->usage, 1);
-		spin_lock_init(&tgcred->lock);
-		tgcred->process_keyring = NULL;
-		tgcred->session_keyring = key_get(new->tgcred->session_keyring);
-
-		release_tgcred(new);
-		new->tgcred = tgcred;
+		key_put(new->process_keyring);
+		new->process_keyring = NULL;
 	}
 #endif
 
@@ -643,9 +560,6 @@ void __init cred_init(void)
  */
 struct cred *prepare_kernel_cred(struct task_struct *daemon)
 {
-#ifdef CONFIG_KEYS
-	struct thread_group_cred *tgcred;
-#endif
 	const struct cred *old;
 	struct cred *new;
 
@@ -653,14 +567,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	if (!new)
 		return NULL;
 
-#ifdef CONFIG_KEYS
-	tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL);
-	if (!tgcred) {
-		kmem_cache_free(cred_jar, new);
-		return NULL;
-	}
-#endif
-
 	kdebug("prepare_kernel_cred() alloc %p", new);
 
 	if (daemon)
@@ -678,13 +584,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	get_group_info(new->group_info);
 
 #ifdef CONFIG_KEYS
-	atomic_set(&tgcred->usage, 1);
-	spin_lock_init(&tgcred->lock);
-	tgcred->process_keyring = NULL;
-	tgcred->session_keyring = NULL;
-	new->tgcred = tgcred;
-	new->request_key_auth = NULL;
+	new->session_keyring = NULL;
+	new->process_keyring = NULL;
 	new->thread_keyring = NULL;
+	new->request_key_auth = NULL;
 	new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
 #endif
 
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index a0d373f76815..65b38417c211 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1475,7 +1475,8 @@ long keyctl_session_to_parent(void)
 		goto error_keyring;
 	newwork = &cred->rcu;
 
-	cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
+	cred->session_keyring = key_ref_to_ptr(keyring_r);
+	keyring_r = NULL;
 	init_task_work(newwork, key_change_session_keyring);
 
 	me = current;
@@ -1500,7 +1501,7 @@ long keyctl_session_to_parent(void)
 	mycred = current_cred();
 	pcred = __task_cred(parent);
 	if (mycred == pcred ||
-	    mycred->tgcred->session_keyring == pcred->tgcred->session_keyring) {
+	    mycred->session_keyring == pcred->session_keyring) {
 		ret = 0;
 		goto unlock;
 	}
@@ -1516,9 +1517,9 @@ long keyctl_session_to_parent(void)
 		goto unlock;
 
 	/* the keyrings must have the same UID */
-	if ((pcred->tgcred->session_keyring &&
-	     pcred->tgcred->session_keyring->uid != mycred->euid) ||
-	    mycred->tgcred->session_keyring->uid != mycred->euid)
+	if ((pcred->session_keyring &&
+	     pcred->session_keyring->uid != mycred->euid) ||
+	    mycred->session_keyring->uid != mycred->euid)
 		goto unlock;
 
 	/* cancel an already pending keyring replacement */
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 178b8c3b130a..9de5dc598276 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -169,9 +169,8 @@ static int install_thread_keyring(void)
 int install_process_keyring_to_cred(struct cred *new)
 {
 	struct key *keyring;
-	int ret;
 
-	if (new->tgcred->process_keyring)
+	if (new->process_keyring)
 		return -EEXIST;
 
 	keyring = keyring_alloc("_pid", new->uid, new->gid,
@@ -179,17 +178,8 @@ int install_process_keyring_to_cred(struct cred *new)
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
 
-	spin_lock_irq(&new->tgcred->lock);
-	if (!new->tgcred->process_keyring) {
-		new->tgcred->process_keyring = keyring;
-		keyring = NULL;
-		ret = 0;
-	} else {
-		ret = -EEXIST;
-	}
-	spin_unlock_irq(&new->tgcred->lock);
-	key_put(keyring);
-	return ret;
+	new->process_keyring = keyring;
+	return 0;
 }
 
 /*
@@ -230,7 +220,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 	/* create an empty session keyring */
 	if (!keyring) {
 		flags = KEY_ALLOC_QUOTA_OVERRUN;
-		if (cred->tgcred->session_keyring)
+		if (cred->session_keyring)
 			flags = KEY_ALLOC_IN_QUOTA;
 
 		keyring = keyring_alloc("_ses", cred->uid, cred->gid,
@@ -242,17 +232,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 	}
 
 	/* install the keyring */
-	spin_lock_irq(&cred->tgcred->lock);
-	old = cred->tgcred->session_keyring;
-	rcu_assign_pointer(cred->tgcred->session_keyring, keyring);
-	spin_unlock_irq(&cred->tgcred->lock);
-
-	/* we're using RCU on the pointer, but there's no point synchronising
-	 * on it if it didn't previously point to anything */
-	if (old) {
-		synchronize_rcu();
+	old = cred->session_keyring;
+	rcu_assign_pointer(cred->session_keyring, keyring);
+
+	if (old)
 		key_put(old);
-	}
 
 	return 0;
 }
@@ -367,9 +351,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	}
 
 	/* search the process keyring second */
-	if (cred->tgcred->process_keyring) {
+	if (cred->process_keyring) {
 		key_ref = keyring_search_aux(
-			make_key_ref(cred->tgcred->process_keyring, 1),
+			make_key_ref(cred->process_keyring, 1),
 			cred, type, description, match, no_state_check);
 		if (!IS_ERR(key_ref))
 			goto found;
@@ -388,12 +372,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type,
 	}
 
 	/* search the session keyring */
-	if (cred->tgcred->session_keyring) {
+	if (cred->session_keyring) {
 		rcu_read_lock();
 		key_ref = keyring_search_aux(
-			make_key_ref(rcu_dereference(
-					     cred->tgcred->session_keyring),
-				     1),
+			make_key_ref(rcu_dereference(cred->session_keyring), 1),
 			cred, type, description, match, no_state_check);
 		rcu_read_unlock();
 
@@ -563,7 +545,7 @@ try_again:
 		break;
 
 	case KEY_SPEC_PROCESS_KEYRING:
-		if (!cred->tgcred->process_keyring) {
+		if (!cred->process_keyring) {
 			if (!(lflags & KEY_LOOKUP_CREATE))
 				goto error;
 
@@ -575,13 +557,13 @@ try_again:
 			goto reget_creds;
 		}
 
-		key = cred->tgcred->process_keyring;
+		key = cred->process_keyring;
 		atomic_inc(&key->usage);
 		key_ref = make_key_ref(key, 1);
 		break;
 
 	case KEY_SPEC_SESSION_KEYRING:
-		if (!cred->tgcred->session_keyring) {
+		if (!cred->session_keyring) {
 			/* always install a session keyring upon access if one
 			 * doesn't exist yet */
 			ret = install_user_keyrings();
@@ -596,7 +578,7 @@ try_again:
 			if (ret < 0)
 				goto error;
 			goto reget_creds;
-		} else if (cred->tgcred->session_keyring ==
+		} else if (cred->session_keyring ==
 			   cred->user->session_keyring &&
 			   lflags & KEY_LOOKUP_CREATE) {
 			ret = join_session_keyring(NULL);
@@ -606,7 +588,7 @@ try_again:
 		}
 
 		rcu_read_lock();
-		key = rcu_dereference(cred->tgcred->session_keyring);
+		key = rcu_dereference(cred->session_keyring);
 		atomic_inc(&key->usage);
 		rcu_read_unlock();
 		key_ref = make_key_ref(key, 1);
@@ -766,12 +748,6 @@ long join_session_keyring(const char *name)
 	struct key *keyring;
 	long ret, serial;
 
-	/* only permit this if there's a single thread in the thread group -
-	 * this avoids us having to adjust the creds on all threads and risking
-	 * ENOMEM */
-	if (!current_is_single_threaded())
-		return -EMLINK;
-
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
@@ -783,7 +759,7 @@ long join_session_keyring(const char *name)
 		if (ret < 0)
 			goto error;
 
-		serial = new->tgcred->session_keyring->serial;
+		serial = new->session_keyring->serial;
 		ret = commit_creds(new);
 		if (ret == 0)
 			ret = serial;
@@ -806,6 +782,9 @@ long join_session_keyring(const char *name)
 	} else if (IS_ERR(keyring)) {
 		ret = PTR_ERR(keyring);
 		goto error2;
+	} else if (keyring == new->session_keyring) {
+		ret = 0;
+		goto error2;
 	}
 
 	/* we've got a keyring - now to install it */
@@ -862,8 +841,7 @@ void key_change_session_keyring(struct callback_head *twork)
 
 	new->jit_keyring	= old->jit_keyring;
 	new->thread_keyring	= key_get(old->thread_keyring);
-	new->tgcred->tgid	= old->tgcred->tgid;
-	new->tgcred->process_keyring = key_get(old->tgcred->process_keyring);
+	new->process_keyring	= key_get(old->process_keyring);
 
 	security_transfer_creds(new, old);
 
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 000e75017520..275c4f9e4b8c 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -150,12 +150,12 @@ static int call_sbin_request_key(struct key_construction *cons,
 		cred->thread_keyring ? cred->thread_keyring->serial : 0);
 
 	prkey = 0;
-	if (cred->tgcred->process_keyring)
-		prkey = cred->tgcred->process_keyring->serial;
+	if (cred->process_keyring)
+		prkey = cred->process_keyring->serial;
 	sprintf(keyring_str[1], "%d", prkey);
 
 	rcu_read_lock();
-	session = rcu_dereference(cred->tgcred->session_keyring);
+	session = rcu_dereference(cred->session_keyring);
 	if (!session)
 		session = cred->user->session_keyring;
 	sskey = session->serial;
@@ -297,14 +297,14 @@ static void construct_get_dest_keyring(struct key **_dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_PROCESS_KEYRING:
-			dest_keyring = key_get(cred->tgcred->process_keyring);
+			dest_keyring = key_get(cred->process_keyring);
 			if (dest_keyring)
 				break;
 
 		case KEY_REQKEY_DEFL_SESSION_KEYRING:
 			rcu_read_lock();
 			dest_keyring = key_get(
-				rcu_dereference(cred->tgcred->session_keyring));
+				rcu_dereference(cred->session_keyring));
 			rcu_read_unlock();
 
 			if (dest_keyring)
-- 
cgit v1.2.3


From 96b5c8fea6c0861621051290d705ec2e971963f1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 19:24:56 +0100
Subject: KEYS: Reduce initial permissions on keys

Reduce the initial permissions on new keys to grant the possessor everything,
view permission only to the user (so the keys can be seen in /proc/keys) and
nothing else.

This gives the creator a chance to adjust the permissions mask before other
processes can access the new key or create a link to it.

To aid with this, keyring_alloc() now takes a permission argument rather than
setting the permissions itself.

The following permissions are now set:

 (1) The user and user-session keyrings grant the user that owns them full
     permissions and grant a possessor everything bar SETATTR.

 (2) The process and thread keyrings grant the possessor full permissions but
     only grant the user VIEW.  This permits the user to see them in
     /proc/keys, but not to do anything with them.

 (3) Anonymous session keyrings grant the possessor full permissions, but only
     grant the user VIEW and READ.  This means that the user can see them in
     /proc/keys and can list them, but nothing else.  Possibly READ shouldn't
     be provided either.

 (4) Named session keyrings grant everything an anonymous session keyring does,
     plus they grant the user LINK permission.  The whole point of named
     session keyrings is that others can also subscribe to them.  Possibly this
     should be a separate permission to LINK.

 (5) The temporary session keyring created by call_sbin_request_key() gets the
     same permissions as an anonymous session keyring.

 (6) Keys created by add_key() get VIEW, SEARCH, LINK and SETATTR for the
     possessor, plus READ and/or WRITE if the key type supports them.  The used
     only gets VIEW now.

 (7) Keys created by request_key() now get the same as those created by
     add_key().

Reported-by: Lennart Poettering <lennart@poettering.net>
Reported-by: Stef Walter <stefw@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/key.h          |  1 +
 security/keys/key.c          |  6 +++---
 security/keys/keyring.c      |  9 +++------
 security/keys/process_keys.c | 26 +++++++++++++++++---------
 security/keys/request_key.c  | 11 ++++++++++-
 5 files changed, 34 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index cef3b315ba7c..890699815212 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -264,6 +264,7 @@ extern int key_unlink(struct key *keyring,
 
 extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
 				 const struct cred *cred,
+				 key_perm_t perm,
 				 unsigned long flags,
 				 struct key *dest);
 
diff --git a/security/keys/key.c b/security/keys/key.c
index 50d96d4e06f2..bebeca3a78e4 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -826,13 +826,13 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	/* if the client doesn't provide, decide on the permissions we want */
 	if (perm == KEY_PERM_UNDEF) {
 		perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
-		perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR;
+		perm |= KEY_USR_VIEW;
 
 		if (ktype->read)
-			perm |= KEY_POS_READ | KEY_USR_READ;
+			perm |= KEY_POS_READ;
 
 		if (ktype == &key_type_keyring || ktype->update)
-			perm |= KEY_USR_WRITE;
+			perm |= KEY_POS_WRITE;
 	}
 
 	/* allocate a new key */
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 81e7852d281d..cf704a92083f 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -257,17 +257,14 @@ error:
  * Allocate a keyring and link into the destination keyring.
  */
 struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
-			  const struct cred *cred, unsigned long flags,
-			  struct key *dest)
+			  const struct cred *cred, key_perm_t perm,
+			  unsigned long flags, struct key *dest)
 {
 	struct key *keyring;
 	int ret;
 
 	keyring = key_alloc(&key_type_keyring, description,
-			    uid, gid, cred,
-			    (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL,
-			    flags);
-
+			    uid, gid, cred, perm, flags);
 	if (!IS_ERR(keyring)) {
 		ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL);
 		if (ret < 0) {
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 9de5dc598276..b58d93892740 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -46,9 +46,11 @@ int install_user_keyrings(void)
 	struct user_struct *user;
 	const struct cred *cred;
 	struct key *uid_keyring, *session_keyring;
+	key_perm_t user_keyring_perm;
 	char buf[20];
 	int ret;
 
+	user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL;
 	cred = current_cred();
 	user = cred->user;
 
@@ -72,8 +74,8 @@ int install_user_keyrings(void)
 		uid_keyring = find_keyring_by_name(buf, true);
 		if (IS_ERR(uid_keyring)) {
 			uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1,
-						    cred, KEY_ALLOC_IN_QUOTA,
-						    NULL);
+						    cred, user_keyring_perm,
+						    KEY_ALLOC_IN_QUOTA, NULL);
 			if (IS_ERR(uid_keyring)) {
 				ret = PTR_ERR(uid_keyring);
 				goto error;
@@ -88,7 +90,8 @@ int install_user_keyrings(void)
 		if (IS_ERR(session_keyring)) {
 			session_keyring =
 				keyring_alloc(buf, user->uid, (gid_t) -1,
-					      cred, KEY_ALLOC_IN_QUOTA, NULL);
+					      cred, user_keyring_perm,
+					      KEY_ALLOC_IN_QUOTA, NULL);
 			if (IS_ERR(session_keyring)) {
 				ret = PTR_ERR(session_keyring);
 				goto error_release;
@@ -129,6 +132,7 @@ int install_thread_keyring_to_cred(struct cred *new)
 	struct key *keyring;
 
 	keyring = keyring_alloc("_tid", new->uid, new->gid, new,
+				KEY_POS_ALL | KEY_USR_VIEW,
 				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
@@ -173,8 +177,9 @@ int install_process_keyring_to_cred(struct cred *new)
 	if (new->process_keyring)
 		return -EEXIST;
 
-	keyring = keyring_alloc("_pid", new->uid, new->gid,
-				new, KEY_ALLOC_QUOTA_OVERRUN, NULL);
+	keyring = keyring_alloc("_pid", new->uid, new->gid, new,
+				KEY_POS_ALL | KEY_USR_VIEW,
+				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	if (IS_ERR(keyring))
 		return PTR_ERR(keyring);
 
@@ -223,8 +228,9 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 		if (cred->session_keyring)
 			flags = KEY_ALLOC_IN_QUOTA;
 
-		keyring = keyring_alloc("_ses", cred->uid, cred->gid,
-					cred, flags, NULL);
+		keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred,
+					KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
+					flags, NULL);
 		if (IS_ERR(keyring))
 			return PTR_ERR(keyring);
 	} else {
@@ -773,8 +779,10 @@ long join_session_keyring(const char *name)
 	keyring = find_keyring_by_name(name, false);
 	if (PTR_ERR(keyring) == -ENOKEY) {
 		/* not found - try and create a new one */
-		keyring = keyring_alloc(name, old->uid, old->gid, old,
-					KEY_ALLOC_IN_QUOTA, NULL);
+		keyring = keyring_alloc(
+			name, old->uid, old->gid, old,
+			KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK,
+			KEY_ALLOC_IN_QUOTA, NULL);
 		if (IS_ERR(keyring)) {
 			ret = PTR_ERR(keyring);
 			goto error2;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 275c4f9e4b8c..0ae3a2202771 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -126,6 +126,7 @@ static int call_sbin_request_key(struct key_construction *cons,
 
 	cred = get_current_cred();
 	keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred,
+				KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ,
 				KEY_ALLOC_QUOTA_OVERRUN, NULL);
 	put_cred(cred);
 	if (IS_ERR(keyring)) {
@@ -347,6 +348,7 @@ static int construct_alloc_key(struct key_type *type,
 	const struct cred *cred = current_cred();
 	unsigned long prealloc;
 	struct key *key;
+	key_perm_t perm;
 	key_ref_t key_ref;
 	int ret;
 
@@ -355,8 +357,15 @@ static int construct_alloc_key(struct key_type *type,
 	*_key = NULL;
 	mutex_lock(&user->cons_lock);
 
+	perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR;
+	perm |= KEY_USR_VIEW;
+	if (type->read)
+		perm |= KEY_POS_READ;
+	if (type == &key_type_keyring || type->update)
+		perm |= KEY_POS_WRITE;
+
 	key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred,
-			KEY_POS_ALL, flags);
+			perm, flags);
 	if (IS_ERR(key))
 		goto alloc_failed;
 
-- 
cgit v1.2.3


From 385ddeac7ed99cf7dc62d76274d55fbd7cae1b5a Mon Sep 17 00:00:00 2001
From: "Luck, Tony" <tony.luck@intel.com>
Date: Fri, 5 Oct 2012 15:05:34 -0700
Subject: X86 ACPI: Use #ifdef not #if for CONFIG_X86 check

Fix a build warning on ia64:

include/linux/acpi.h:437:5: warning: "CONFIG_X86" is not defined

Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/506f59ae9600b36a4@agluck-desktop.sc.intel.com
Cc: Len Brown <lenb@kernel.org>
Cc: Thomas Renninger <trenn@suse.de>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d14081c10c17..49fe586e3b1e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -434,7 +434,7 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 
 acpi_status acpi_os_prepare_sleep(u8 sleep_state,
 				  u32 pm1a_control, u32 pm1b_control);
-#if CONFIG_X86
+#ifdef CONFIG_X86
 void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
 #else
 static inline void arch_reserve_mem_area(acpi_physical_address addr,
-- 
cgit v1.2.3


From 0384e90b853357d24935c65ba0e1bdd27faa6e58 Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Sun, 7 Oct 2012 18:19:44 +0200
Subject: spi/mcspi: allow configuration of pin directions

Allow D0 to be an input and D1 to be an output, configurable via
platform data and a new DT property.

Based on a patch from  Matus Ujhelyi <matus.ujhelyi@streamunlimited.com>

Signed-off-by: Daniel Mack <zonque@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 Documentation/devicetree/bindings/spi/omap-spi.txt |  4 +++-
 drivers/spi/spi-omap2-mcspi.c                      | 25 ++++++++++++++++------
 include/linux/platform_data/spi-omap2-mcspi.h      |  4 ++++
 3 files changed, 25 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt
index 81df374adbb9..2ef0a6b85653 100644
--- a/Documentation/devicetree/bindings/spi/omap-spi.txt
+++ b/Documentation/devicetree/bindings/spi/omap-spi.txt
@@ -6,7 +6,9 @@ Required properties:
   - "ti,omap4-spi" for OMAP4+.
 - ti,spi-num-cs : Number of chipselect supported  by the instance.
 - ti,hwmods: Name of the hwmod associated to the McSPI
-
+- ti,pindir-d0-in-d1-out: Select the D0 pin as input and D1 as
+			  output. The default is D0 as output and
+			  D1 as input.
 
 Example:
 
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 3542fdc664b1..51046332677c 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -130,6 +130,7 @@ struct omap2_mcspi {
 	struct omap2_mcspi_dma	*dma_channels;
 	struct device		*dev;
 	struct omap2_mcspi_regs ctx;
+	unsigned int		pin_dir:1;
 };
 
 struct omap2_mcspi_cs {
@@ -765,8 +766,15 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 	/* standard 4-wire master mode:  SCK, MOSI/out, MISO/in, nCS
 	 * REVISIT: this controller could support SPI_3WIRE mode.
 	 */
-	l &= ~(OMAP2_MCSPI_CHCONF_IS|OMAP2_MCSPI_CHCONF_DPE1);
-	l |= OMAP2_MCSPI_CHCONF_DPE0;
+	if (mcspi->pin_dir == MCSPI_PINDIR_D0_OUT_D1_IN) {
+		l &= ~OMAP2_MCSPI_CHCONF_IS;
+		l &= ~OMAP2_MCSPI_CHCONF_DPE1;
+		l |= OMAP2_MCSPI_CHCONF_DPE0;
+	} else {
+		l |= OMAP2_MCSPI_CHCONF_IS;
+		l |= OMAP2_MCSPI_CHCONF_DPE1;
+		l &= ~OMAP2_MCSPI_CHCONF_DPE0;
+	}
 
 	/* wordlength */
 	l &= ~OMAP2_MCSPI_CHCONF_WL_MASK;
@@ -1167,6 +1175,11 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev)
 	master->cleanup = omap2_mcspi_cleanup;
 	master->dev.of_node = node;
 
+	dev_set_drvdata(&pdev->dev, master);
+
+	mcspi = spi_master_get_devdata(master);
+	mcspi->master = master;
+
 	match = of_match_device(omap_mcspi_of_match, &pdev->dev);
 	if (match) {
 		u32 num_cs = 1; /* default number of chipselect */
@@ -1175,19 +1188,17 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev)
 		of_property_read_u32(node, "ti,spi-num-cs", &num_cs);
 		master->num_chipselect = num_cs;
 		master->bus_num = bus_num++;
+		if (of_get_property(node, "ti,pindir-d0-in-d1-out", NULL))
+			mcspi->pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT;
 	} else {
 		pdata = pdev->dev.platform_data;
 		master->num_chipselect = pdata->num_cs;
 		if (pdev->id != -1)
 			master->bus_num = pdev->id;
+		mcspi->pin_dir = pdata->pin_dir;
 	}
 	regs_offset = pdata->regs_offset;
 
-	dev_set_drvdata(&pdev->dev, master);
-
-	mcspi = spi_master_get_devdata(master);
-	mcspi->master = master;
-
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (r == NULL) {
 		status = -ENODEV;
diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h
index a357eb26bd25..ce70f7b5a8e1 100644
--- a/include/linux/platform_data/spi-omap2-mcspi.h
+++ b/include/linux/platform_data/spi-omap2-mcspi.h
@@ -7,9 +7,13 @@
 
 #define OMAP4_MCSPI_REG_OFFSET 0x100
 
+#define MCSPI_PINDIR_D0_OUT_D1_IN	0
+#define MCSPI_PINDIR_D0_IN_D1_OUT	1
+
 struct omap2_mcspi_platform_config {
 	unsigned short	num_cs;
 	unsigned int regs_offset;
+	unsigned int pin_dir:1;
 };
 
 struct omap2_mcspi_dev_attr {
-- 
cgit v1.2.3


From b2409b65d9401f7e3c572fc59db6a920fc7a163e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 19 Oct 2012 07:29:17 -0300
Subject: [media] remove include/linux/dvb/dmx.h

The only reason for this header is to make sure that include
linux/time.h were added before uapi/*/dmx.h. Just push down the
time.h header on the few places where this is used, and drop
this new header.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb-core/dmxdev.h    |  1 +
 drivers/media/firewire/firedtv.h   |  1 +
 drivers/media/pci/ttpci/av7110.h   |  1 +
 drivers/media/usb/tlg2300/pd-dvb.c |  1 +
 include/linux/dvb/dmx.h            | 29 -----------------------------
 5 files changed, 4 insertions(+), 29 deletions(-)
 delete mode 100644 include/linux/dvb/dmx.h

(limited to 'include/linux')

diff --git a/drivers/media/dvb-core/dmxdev.h b/drivers/media/dvb-core/dmxdev.h
index 02ebe28f830d..48c6cf92ab99 100644
--- a/drivers/media/dvb-core/dmxdev.h
+++ b/drivers/media/dvb-core/dmxdev.h
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
+#include <linux/time.h>
 #include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/fs.h>
diff --git a/drivers/media/firewire/firedtv.h b/drivers/media/firewire/firedtv.h
index 4fdcd8cb7530..c2ba085e0d20 100644
--- a/drivers/media/firewire/firedtv.h
+++ b/drivers/media/firewire/firedtv.h
@@ -13,6 +13,7 @@
 #ifndef _FIREDTV_H
 #define _FIREDTV_H
 
+#include <linux/time.h>
 #include <linux/dvb/dmx.h>
 #include <linux/dvb/frontend.h>
 #include <linux/list.h>
diff --git a/drivers/media/pci/ttpci/av7110.h b/drivers/media/pci/ttpci/av7110.h
index 88b3b2d6cc0e..a378662b1dcf 100644
--- a/drivers/media/pci/ttpci/av7110.h
+++ b/drivers/media/pci/ttpci/av7110.h
@@ -6,6 +6,7 @@
 #include <linux/netdevice.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
+#include <linux/time.h>
 
 #include <linux/dvb/video.h>
 #include <linux/dvb/audio.h>
diff --git a/drivers/media/usb/tlg2300/pd-dvb.c b/drivers/media/usb/tlg2300/pd-dvb.c
index 30fcb117e898..ca4994a5190c 100644
--- a/drivers/media/usb/tlg2300/pd-dvb.c
+++ b/drivers/media/usb/tlg2300/pd-dvb.c
@@ -1,6 +1,7 @@
 #include "pd-common.h"
 #include <linux/kernel.h>
 #include <linux/usb.h>
+#include <linux/time.h>
 #include <linux/dvb/dmx.h>
 #include <linux/delay.h>
 #include <linux/gfp.h>
diff --git a/include/linux/dvb/dmx.h b/include/linux/dvb/dmx.h
deleted file mode 100644
index 0be6d8f2b52b..000000000000
--- a/include/linux/dvb/dmx.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * dmx.h
- *
- * Copyright (C) 2000 Marcus Metzler <marcus@convergence.de>
- *                  & Ralph  Metzler <ralph@convergence.de>
- *                    for convergence integrated media GmbH
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- */
-#ifndef _DVBDMX_H_
-#define _DVBDMX_H_
-
-#include <linux/time.h>
-#include <uapi/linux/dvb/dmx.h>
-
-#endif /*_DVBDMX_H_*/
-- 
cgit v1.2.3


From 74df06daf632ce2d321d01cb046004768352efc4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 19 Oct 2012 07:41:50 -0300
Subject: [media] Remove include/linux/dvb/ stuff

The only file left there is include/linux/dvb/video.h. The
only function for it is to include linux/compiler.h, but this
is already indirectly included. So, get rid of it.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/Kbuild  |  0
 include/linux/dvb/video.h | 29 -----------------------------
 2 files changed, 29 deletions(-)
 delete mode 100644 include/linux/dvb/Kbuild
 delete mode 100644 include/linux/dvb/video.h

(limited to 'include/linux')

diff --git a/include/linux/dvb/Kbuild b/include/linux/dvb/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h
deleted file mode 100644
index 85c20d925696..000000000000
--- a/include/linux/dvb/video.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * video.h
- *
- * Copyright (C) 2000 Marcus Metzler <marcus@convergence.de>
- *                  & Ralph  Metzler <ralph@convergence.de>
- *                    for convergence integrated media GmbH
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- */
-#ifndef _DVBVIDEO_H_
-#define _DVBVIDEO_H_
-
-#include <linux/compiler.h>
-#include <uapi/linux/dvb/video.h>
-
-#endif /*_DVBVIDEO_H_*/
-- 
cgit v1.2.3


From 7e05484f02e1ea05a3aae0724d4df1e8a5a1920f Mon Sep 17 00:00:00 2001
From: Bryan Venteicher <bryan.venteicher@gmail.com>
Date: Tue, 16 Oct 2012 23:55:54 +1030
Subject: virtio-scsi: Add real 2-clause BSD license to header

This is analogous to commit a1b383870a made by Rusty Russell to all
the VirtIO headers at the time. This eases the use of the header as
is by other OSes.

Signed-off-by: Bryan Venteicher <bryanv@daemoninthecloset.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_scsi.h | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index d6b4440387b7..4195b97a3def 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h
@@ -1,7 +1,31 @@
+/*
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
 #ifndef _LINUX_VIRTIO_SCSI_H
 #define _LINUX_VIRTIO_SCSI_H
-/* This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers. */
 
 #define VIRTIO_SCSI_CDB_SIZE   32
 #define VIRTIO_SCSI_SENSE_SIZE 96
-- 
cgit v1.2.3


From 1b4f59e356cc94929305bd107b7f38eec62715ad Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Mon, 22 Oct 2012 18:05:36 +0400
Subject: slub: Commonize slab_cache field in struct page

Right now, slab and slub have fields in struct page to derive which
cache a page belongs to, but they do it slightly differently.

slab uses a field called slab_cache, that lives in the third double
word. slub, uses a field called "slab", living outside of the
doublewords area.

Ideally, we could use the same field for this. Since slub heavily makes
use of the doubleword region, there isn't really much room to move
slub's slab_cache field around. Since slab does not have such strict
placement restrictions, we can move it outside the doubleword area.

The naming used by slab, "slab_cache", is less confusing, and it is
preferred over slub's generic "slab".

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Christoph Lameter <cl@linux.com>
CC: David Rientjes <rientjes@google.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/mm_types.h |  7 ++-----
 mm/slub.c                | 24 ++++++++++++------------
 2 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 31f8a3af7d94..2fef4e720e79 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -128,10 +128,7 @@ struct page {
 		};
 
 		struct list_head list;	/* slobs list of pages */
-		struct {		/* slab fields */
-			struct kmem_cache *slab_cache;
-			struct slab *slab_page;
-		};
+		struct slab *slab_page; /* slab fields */
 	};
 
 	/* Remainder is not double word aligned */
@@ -146,7 +143,7 @@ struct page {
 #if USE_SPLIT_PTLOCKS
 		spinlock_t ptl;
 #endif
-		struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
 		struct page *first_page;	/* Compound tail pages */
 	};
 
diff --git a/mm/slub.c b/mm/slub.c
index 16274b273c61..35483e0ab6bc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1092,11 +1092,11 @@ static noinline struct kmem_cache_node *free_debug_processing(
 	if (!check_object(s, page, object, SLUB_RED_ACTIVE))
 		goto out;
 
-	if (unlikely(s != page->slab)) {
+	if (unlikely(s != page->slab_cache)) {
 		if (!PageSlab(page)) {
 			slab_err(s, page, "Attempt to free object(0x%p) "
 				"outside of slab", object);
-		} else if (!page->slab) {
+		} else if (!page->slab_cache) {
 			printk(KERN_ERR
 				"SLUB <none>: no slab for object 0x%p.\n",
 						object);
@@ -1357,7 +1357,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 		goto out;
 
 	inc_slabs_node(s, page_to_nid(page), page->objects);
-	page->slab = s;
+	page->slab_cache = s;
 	__SetPageSlab(page);
 	if (page->pfmemalloc)
 		SetPageSlabPfmemalloc(page);
@@ -1424,7 +1424,7 @@ static void rcu_free_slab(struct rcu_head *h)
 	else
 		page = container_of((struct list_head *)h, struct page, lru);
 
-	__free_slab(page->slab, page);
+	__free_slab(page->slab_cache, page);
 }
 
 static void free_slab(struct kmem_cache *s, struct page *page)
@@ -2617,9 +2617,9 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
 
 	page = virt_to_head_page(x);
 
-	if (kmem_cache_debug(s) && page->slab != s) {
+	if (kmem_cache_debug(s) && page->slab_cache != s) {
 		pr_err("kmem_cache_free: Wrong slab cache. %s but object"
-			" is from  %s\n", page->slab->name, s->name);
+			" is from  %s\n", page->slab_cache->name, s->name);
 		WARN_ON_ONCE(1);
 		return;
 	}
@@ -3418,7 +3418,7 @@ size_t ksize(const void *object)
 		return PAGE_SIZE << compound_order(page);
 	}
 
-	return slab_ksize(page->slab);
+	return slab_ksize(page->slab_cache);
 }
 EXPORT_SYMBOL(ksize);
 
@@ -3443,8 +3443,8 @@ bool verify_mem_not_deleted(const void *x)
 	}
 
 	slab_lock(page);
-	if (on_freelist(page->slab, page, object)) {
-		object_err(page->slab, page, object, "Object is on free-list");
+	if (on_freelist(page->slab_cache, page, object)) {
+		object_err(page->slab_cache, page, object, "Object is on free-list");
 		rv = false;
 	} else {
 		rv = true;
@@ -3475,7 +3475,7 @@ void kfree(const void *x)
 		__free_pages(page, compound_order(page));
 		return;
 	}
-	slab_free(page->slab, page, object, _RET_IP_);
+	slab_free(page->slab_cache, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);
 
@@ -3686,11 +3686,11 @@ static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
 
 		if (n) {
 			list_for_each_entry(p, &n->partial, lru)
-				p->slab = s;
+				p->slab_cache = s;
 
 #ifdef CONFIG_SLUB_DEBUG
 			list_for_each_entry(p, &n->full, lru)
-				p->slab = s;
+				p->slab_cache = s;
 #endif
 		}
 	}
-- 
cgit v1.2.3


From 6c9c6d6301287e369a754d628230fa6e50cdb74b Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuah.khan@hp.com>
Date: Mon, 8 Oct 2012 11:08:06 -0600
Subject: dma-debug: New interfaces to debug dma mapping errors

Add dma-debug interface debug_dma_mapping_error() to debug
drivers that fail to check dma mapping errors on addresses
returned by dma_map_single() and dma_map_page() interfaces.
This interface clears a flag set by debug_dma_map_page() to
indicate that dma_mapping_error() has been called by the
driver. When driver does unmap, debug_dma_unmap() checks the
flag and if this flag is still set, prints warning message
that includes call trace that leads up to the unmap. This
interface can be called from dma_mapping_error() routines to
enable dma mapping error check debugging.

Tested: Intel iommu and swiotlb (iommu=soft) on x86-64 with
        CONFIG_DMA_API_DEBUG enabled and disabled.

Signed-off-by: Shuah Khan <shuah.khan@hp.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 Documentation/DMA-API.txt          | 12 +++++++
 arch/x86/include/asm/dma-mapping.h |  1 +
 include/linux/dma-debug.h          |  7 ++++
 lib/dma-debug.c                    | 71 +++++++++++++++++++++++++++++++++++---
 4 files changed, 87 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 66bd97a95f10..78a6c569d204 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -678,3 +678,15 @@ out of dma_debug_entries. These entries are preallocated at boot. The number
 of preallocated entries is defined per architecture. If it is too low for you
 boot with 'dma_debug_entries=<your_desired_number>' to overwrite the
 architectural default.
+
+void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
+dma-debug interface debug_dma_mapping_error() to debug drivers that fail
+to check dma mapping errors on addresses returned by dma_map_single() and
+dma_map_page() interfaces. This interface clears a flag set by
+debug_dma_map_page() to indicate that dma_mapping_error() has been called by
+the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
+this flag is still set, prints warning message that includes call trace that
+leads up to the unmap. This interface can be called from dma_mapping_error()
+routines to enable dma mapping error check debugging.
+
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index f7b4c7903e7e..808dae63eeea 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -47,6 +47,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
+	debug_dma_mapping_error(dev, dma_addr);
 	if (ops->mapping_error)
 		return ops->mapping_error(dev, dma_addr);
 
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index 171ad8aedc83..fc0e34ce038f 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -39,6 +39,8 @@ extern void debug_dma_map_page(struct device *dev, struct page *page,
 			       int direction, dma_addr_t dma_addr,
 			       bool map_single);
 
+extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
 extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 				 size_t size, int direction, bool map_single);
 
@@ -105,6 +107,11 @@ static inline void debug_dma_map_page(struct device *dev, struct page *page,
 {
 }
 
+static inline void debug_dma_mapping_error(struct device *dev,
+					  dma_addr_t dma_addr)
+{
+}
+
 static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 					size_t size, int direction,
 					bool map_single)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index d84beb994f36..59f4a1a8187d 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -45,6 +45,12 @@ enum {
 	dma_debug_coherent,
 };
 
+enum map_err_types {
+	MAP_ERR_CHECK_NOT_APPLICABLE,
+	MAP_ERR_NOT_CHECKED,
+	MAP_ERR_CHECKED,
+};
+
 #define DMA_DEBUG_STACKTRACE_ENTRIES 5
 
 struct dma_debug_entry {
@@ -57,6 +63,7 @@ struct dma_debug_entry {
 	int              direction;
 	int		 sg_call_ents;
 	int		 sg_mapped_ents;
+	enum map_err_types  map_err_type;
 #ifdef CONFIG_STACKTRACE
 	struct		 stack_trace stacktrace;
 	unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
@@ -114,6 +121,12 @@ static struct device_driver *current_driver                    __read_mostly;
 
 static DEFINE_RWLOCK(driver_name_lock);
 
+static const char *const maperr2str[] = {
+	[MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
+	[MAP_ERR_NOT_CHECKED] = "dma map error not checked",
+	[MAP_ERR_CHECKED] = "dma map error checked",
+};
+
 static const char *type2name[4] = { "single", "page",
 				    "scather-gather", "coherent" };
 
@@ -376,11 +389,12 @@ void debug_dma_dump_mappings(struct device *dev)
 		list_for_each_entry(entry, &bucket->list, list) {
 			if (!dev || dev == entry->dev) {
 				dev_info(entry->dev,
-					 "%s idx %d P=%Lx D=%Lx L=%Lx %s\n",
+					 "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n",
 					 type2name[entry->type], idx,
 					 (unsigned long long)entry->paddr,
 					 entry->dev_addr, entry->size,
-					 dir2name[entry->direction]);
+					 dir2name[entry->direction],
+					 maperr2str[entry->map_err_type]);
 			}
 		}
 
@@ -838,13 +852,28 @@ static __init int dma_debug_entries_cmdline(char *str)
 __setup("dma_debug=", dma_debug_cmdline);
 __setup("dma_debug_entries=", dma_debug_entries_cmdline);
 
+/* Calling dma_mapping_error() from dma-debug api will result in calling
+   debug_dma_mapping_error() - need internal mapping error routine to
+   avoid debug checks */
+#ifndef DMA_ERROR_CODE
+#define DMA_ERROR_CODE 0
+#endif
+static inline int has_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	if (ops->mapping_error)
+		return ops->mapping_error(dev, dma_addr);
+
+	return (dma_addr == DMA_ERROR_CODE);
+}
+
 static void check_unmap(struct dma_debug_entry *ref)
 {
 	struct dma_debug_entry *entry;
 	struct hash_bucket *bucket;
 	unsigned long flags;
 
-	if (dma_mapping_error(ref->dev, ref->dev_addr)) {
+	if (unlikely(has_mapping_error(ref->dev, ref->dev_addr))) {
 		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
 			   "to free an invalid DMA memory address\n");
 		return;
@@ -910,6 +939,15 @@ static void check_unmap(struct dma_debug_entry *ref)
 			   dir2name[ref->direction]);
 	}
 
+	if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+		err_printk(ref->dev, entry,
+			   "DMA-API: device driver failed to check map error"
+			   "[device address=0x%016llx] [size=%llu bytes] "
+			   "[mapped as %s]",
+			   ref->dev_addr, ref->size,
+			   type2name[entry->type]);
+	}
+
 	hash_bucket_del(entry);
 	dma_entry_free(entry);
 
@@ -1017,7 +1055,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 	if (unlikely(global_disable))
 		return;
 
-	if (unlikely(dma_mapping_error(dev, dma_addr)))
+	if (unlikely(has_mapping_error(dev, dma_addr)))
 		return;
 
 	entry = dma_entry_alloc();
@@ -1030,6 +1068,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 	entry->dev_addr  = dma_addr;
 	entry->size      = size;
 	entry->direction = direction;
+	entry->map_err_type = MAP_ERR_NOT_CHECKED;
 
 	if (map_single)
 		entry->type = dma_debug_single;
@@ -1045,6 +1084,30 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 }
 EXPORT_SYMBOL(debug_dma_map_page);
 
+void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	struct dma_debug_entry ref;
+	struct dma_debug_entry *entry;
+	struct hash_bucket *bucket;
+	unsigned long flags;
+
+	if (unlikely(global_disable))
+		return;
+
+	ref.dev = dev;
+	ref.dev_addr = dma_addr;
+	bucket = get_hash_bucket(&ref, &flags);
+	entry = bucket_find_exact(bucket, &ref);
+
+	if (!entry)
+		goto out;
+
+	entry->map_err_type = MAP_ERR_CHECKED;
+out:
+	put_hash_bucket(bucket, &flags);
+}
+EXPORT_SYMBOL(debug_dma_mapping_error);
+
 void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
 			  size_t size, int direction, bool map_single)
 {
-- 
cgit v1.2.3


From 91872392f08486f692887d2f06a333f512648f22 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@alien8.de>
Date: Tue, 9 Oct 2012 19:52:05 +0200
Subject: drivers/base: Add a DEVICE_BOOL_ATTR macro

... which, analogous to DEVICE_INT_ATTR provides functionality to
set/clear bools. Its purpose is to be used where values need to be used
as booleans in configuration context.

Next patch uses this.

Signed-off-by: Borislav Petkov <bp@alien8.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Tony Luck <tony.luck@intel.com>
---
 drivers/base/core.c    | 21 +++++++++++++++++++++
 include/linux/device.h |  7 +++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index abea76c36a4b..c8ae1f6b01b9 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -171,6 +171,27 @@ ssize_t device_show_int(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(device_show_int);
 
+ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	if (strtobool(buf, ea->var) < 0)
+		return -EINVAL;
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(device_store_bool);
+
+ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct dev_ext_attribute *ea = to_ext_attr(attr);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var));
+}
+EXPORT_SYMBOL_GPL(device_show_bool);
+
 /**
  *	device_release - free device structure.
  *	@kobj:	device's kobject.
diff --git a/include/linux/device.h b/include/linux/device.h
index 86ef6ab553b1..50c85a26b003 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -496,6 +496,10 @@ ssize_t device_show_int(struct device *dev, struct device_attribute *attr,
 			char *buf);
 ssize_t device_store_int(struct device *dev, struct device_attribute *attr,
 			 const char *buf, size_t count);
+ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
+			char *buf);
+ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
+			 const char *buf, size_t count);
 
 #define DEVICE_ATTR(_name, _mode, _show, _store) \
 	struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
@@ -505,6 +509,9 @@ ssize_t device_store_int(struct device *dev, struct device_attribute *attr,
 #define DEVICE_INT_ATTR(_name, _mode, _var) \
 	struct dev_ext_attribute dev_attr_##_name = \
 		{ __ATTR(_name, _mode, device_show_int, device_store_int), &(_var) }
+#define DEVICE_BOOL_ATTR(_name, _mode, _var) \
+	struct dev_ext_attribute dev_attr_##_name = \
+		{ __ATTR(_name, _mode, device_show_bool, device_store_bool), &(_var) }
 #define DEVICE_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \
 	struct device_attribute dev_attr_##_name =		\
 		__ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
-- 
cgit v1.2.3


From 02b91b55260f7a1bdc8da25866cf27f726f5788f Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Thu, 28 Jun 2012 18:26:52 +0200
Subject: drbd: introduce stop-sector to online verify

We now can schedule only a specific range of sectors for online verify,
or interrupt a running verify without interrupting the connection.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_int.h      |  1 +
 drivers/block/drbd/drbd_main.c     | 17 +++++++++++++----
 drivers/block/drbd/drbd_nl.c       | 11 +++++++----
 drivers/block/drbd/drbd_proc.c     | 12 +++++++++---
 drivers/block/drbd/drbd_receiver.c |  8 ++++++++
 drivers/block/drbd/drbd_worker.c   | 33 +++++++++++++++++++++++++++------
 include/linux/drbd.h               |  2 +-
 include/linux/drbd_nl.h            |  1 +
 8 files changed, 67 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9a6d3a4a739a..3cce7357402b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1052,6 +1052,7 @@ struct drbd_conf {
 
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
+	sector_t ov_stop_sector;
 	/* where are we now? (sector) */
 	sector_t ov_position;
 	/* Start sector of out of sync range (to merge printk reporting). */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index dfa08b7411c0..df9965d820c9 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1231,13 +1231,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 	wake_up(&mdev->misc_wait);
 	wake_up(&mdev->state_wait);
 
-	/* aborted verify run. log the last position */
+	/* Aborted verify run, or we reached the stop sector.
+	 * Log the last position, unless end-of-device. */
 	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
-	    ns.conn < C_CONNECTED) {
+	    ns.conn <= C_CONNECTED) {
 		mdev->ov_start_sector =
 			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
-		dev_info(DEV, "Online Verify reached sector %llu\n",
-			(unsigned long long)mdev->ov_start_sector);
+		if (mdev->ov_left)
+			dev_info(DEV, "Online Verify reached sector %llu\n",
+				(unsigned long long)mdev->ov_start_sector);
 	}
 
 	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
@@ -1703,6 +1705,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
 		drbd_send_state(mdev, ns);
 
+	/* Verify finished, or reached stop sector.  Peer did not know about
+	 * the stop sector, and we may even have changed the stop sector during
+	 * verify to interrupt/stop early.  Send the new state. */
+	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
+	&& mdev->agreed_pro_version >= 97)
+		drbd_send_state(mdev, ns);
+
 	/* Wake up role changes, that were delayed because of connection establishing */
 	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
 		clear_bit(STATE_SENT, &mdev->flags);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ab660556a001..e2d368f1747e 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2211,8 +2211,10 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 				    struct drbd_nl_cfg_reply *reply)
 {
 	/* default to resume from last known position, if possible */
-	struct start_ov args =
-		{ .start_sector = mdev->ov_start_sector };
+	struct start_ov args = {
+		.start_sector = mdev->ov_start_sector,
+		.stop_sector = ULLONG_MAX,
+	};
 
 	if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) {
 		reply->ret_code = ERR_MANDATORY_TAG;
@@ -2224,8 +2226,9 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	drbd_suspend_io(mdev);
 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
 
-	/* w_make_ov_request expects position to be aligned */
-	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
+	/* w_make_ov_request expects start position to be aligned */
+	mdev->ov_start_sector = args.start_sector & ~(BM_SECT_PER_BIT-1);
+	mdev->ov_stop_sector = args.stop_sector;
 	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
 	drbd_resume_io(mdev);
 	return 0;
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 5496104f90b9..a5a453b4355f 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 		 * we convert to sectors in the display below. */
 		unsigned long bm_bits = drbd_bm_bits(mdev);
 		unsigned long bit_pos;
+		unsigned long long stop_sector = 0;
 		if (mdev->state.conn == C_VERIFY_S ||
-		    mdev->state.conn == C_VERIFY_T)
+		    mdev->state.conn == C_VERIFY_T) {
 			bit_pos = bm_bits - mdev->ov_left;
-		else
+			if (mdev->agreed_pro_version >= 97)
+				stop_sector = mdev->ov_stop_sector;
+		} else
 			bit_pos = mdev->bm_resync_fo;
 		/* Total sectors may be slightly off for oddly
 		 * sized devices. So what. */
 		seq_printf(seq,
-			"\t%3d%% sector pos: %llu/%llu\n",
+			"\t%3d%% sector pos: %llu/%llu",
 			(int)(bit_pos / (bm_bits/100+1)),
 			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
 			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
+		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
+			seq_printf(seq, " stop sector: %llu", stop_sector);
+		seq_printf(seq, "\n");
 	}
 }
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 434adf75259a..280735da1963 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3255,6 +3255,14 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
 		}
 	}
 
+	/* explicit verify finished notification, stop sector reached. */
+	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
+	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
+		ov_oos_print(mdev);
+		drbd_resync_finished(mdev);
+		return true;
+	}
+
 	/* peer says his disk is inconsistent, while we think it is uptodate,
 	 * and this happens while the peer still thinks we have a sync going on,
 	 * but we think we are already done with the sync.
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 6bce2cc179d4..1352455dd7dd 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -691,6 +691,7 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
 	int number, i, size;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel))
 		return 1;
@@ -699,9 +700,17 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
 
 	sector = mdev->ov_position;
 	for (i = 0; i < number; i++) {
-		if (sector >= capacity) {
+		if (sector >= capacity)
 			return 1;
-		}
+
+		/* We check for "finished" only in the reply path:
+		 * w_e_end_ov_reply().
+		 * We need to send at least one request out. */
+		stop_sector_reached = i > 0
+			&& mdev->agreed_pro_version >= 97
+			&& sector >= mdev->ov_stop_sector;
+		if (stop_sector_reached)
+			break;
 
 		size = BM_BLOCK_SIZE;
 
@@ -725,7 +734,8 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
 
  requeue:
 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
-	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	if (i == 0 || !stop_sector_reached)
+		mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	return 1;
 }
 
@@ -808,7 +818,12 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
 	if (dt <= 0)
 		dt = 1;
+	
 	db = mdev->rs_total;
+	/* adjust for verify start and stop sectors, respective reached position */
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		db -= mdev->ov_left;
+
 	dbdt = Bit2KB(db/dt);
 	mdev->rs_paused /= HZ;
 
@@ -831,7 +846,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	ns.conn = C_CONNECTED;
 
 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
-	     verify_done ? "Online verify " : "Resync",
+	     verify_done ? "Online verify" : "Resync",
 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
 
 	n_oos = drbd_bm_total_weight(mdev);
@@ -912,7 +927,9 @@ out:
 	mdev->rs_total  = 0;
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
-	if (verify_done)
+
+	/* reset start sector, if we reached end of device */
+	if (verify_done && mdev->ov_left == 0)
 		mdev->ov_start_sector = 0;
 
 	drbd_md_sync(mdev);
@@ -1158,6 +1175,7 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	unsigned int size = e->size;
 	int digest_size;
 	int ok, eq = 0;
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel)) {
 		drbd_free_ee(mdev, e);
@@ -1208,7 +1226,10 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	if ((mdev->ov_left & 0x200) == 0x200)
 		drbd_advance_rs_marks(mdev, mdev->ov_left);
 
-	if (mdev->ov_left == 0) {
+	stop_sector_reached = mdev->agreed_pro_version >= 97 &&
+		(sector + (size>>9)) >= mdev->ov_stop_sector;
+
+	if (mdev->ov_left == 0 || stop_sector_reached) {
 		ov_oos_print(mdev);
 		drbd_resync_finished(mdev);
 	}
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 47e3d4850584..4a7eccbd1292 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.13"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 96
+#define PRO_VERSION_MAX 97
 
 
 enum drbd_io_error_p {
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index a8706f08ab36..f6a576df19e0 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -145,6 +145,7 @@ NL_PACKET(dump_ee, 24,
 
 NL_PACKET(start_ov, 25,
 	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
+	NL_INT64(	90,	T_MANDATORY,	stop_sector)
 )
 
 NL_PACKET(new_c_uuid, 26,
-- 
cgit v1.2.3


From ccae7868b0c5697508a541c531cf96b361d62c1c Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 26 Sep 2012 14:07:04 +0200
Subject: drbd: log request sector offset and size for IO errors

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_req.c | 19 ++++++++++++++++++-
 include/linux/drbd.h          |  2 +-
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index d9e5962a9a8c..135ea76ed502 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -387,6 +387,20 @@ out_conflict:
 	return 1;
 }
 
+static void drbd_report_io_error(struct drbd_conf *mdev, struct drbd_request *req)
+{
+        char b[BDEVNAME_SIZE];
+
+	if (__ratelimit(&drbd_ratelimit_state))
+		return;
+
+	dev_warn(DEV, "local %s IO error sector %llu+%u on %s\n",
+			(req->rq_state & RQ_WRITE) ? "WRITE" : "READ",
+			(unsigned long long)req->sector,
+			req->size >> 9,
+			bdevname(mdev->ldev->backing_bdev, b));
+}
+
 /* obviously this could be coded as many single functions
  * instead of one huge switch,
  * or by putting the code directly in the respective locations
@@ -455,6 +469,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
+		drbd_report_io_error(mdev, req);
 		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
 		_req_may_be_done_not_susp(req, m);
 		break;
@@ -477,6 +492,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 			break;
 		}
 
+		drbd_report_io_error(mdev, req);
 		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
 
 	goto_queue_for_net_read:
@@ -900,7 +916,8 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns
 
 	if (!(local || remote) && !is_susp(mdev->state)) {
 		if (__ratelimit(&drbd_ratelimit_state))
-			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+			dev_err(DEV, "IO ERROR: neither local nor remote data, sector %llu+%u\n",
+					(unsigned long long)req->sector, req->size >> 9);
 		goto fail_free_complete;
 	}
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 4a7eccbd1292..94f58a102bbb 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.13"
+#define REL_VERSION "8.3.14"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 97
-- 
cgit v1.2.3


From 5d9db883761ad1bc2245fd3018715549b974203d Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 5 Oct 2012 13:54:56 +0800
Subject: efi: Add support for a UEFI variable filesystem

The existing EFI variables code only supports variables of up to 1024
bytes. This limitation existed in version 0.99 of the EFI specification,
but was removed before any full releases. Since variables can now be
larger than a single page, sysfs isn't the best interface for this. So,
instead, let's add a filesystem. Variables can be read, written and
created, with the first 4 bytes of each variable representing its UEFI
attributes. The create() method doesn't actually commit to flash since
zero-length variables can't exist per-spec.

Updates from Jeremy Kerr <jeremy.kerr@canonical.com>.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 384 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/efi.h        |   5 +
 2 files changed, 383 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index d10c9873dd9a..b605c4849772 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -80,6 +80,10 @@
 #include <linux/slab.h>
 #include <linux/pstore.h>
 
+#include <linux/fs.h>
+#include <linux/ramfs.h>
+#include <linux/pagemap.h>
+
 #include <asm/uaccess.h>
 
 #define EFIVARS_VERSION "0.08"
@@ -91,6 +95,7 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(EFIVARS_VERSION);
 
 #define DUMP_NAME_LEN 52
+#define GUID_LEN 37
 
 /*
  * The maximum size of VariableName + Data = 1024
@@ -108,7 +113,6 @@ struct efi_variable {
 	__u32         Attributes;
 } __attribute__((packed));
 
-
 struct efivar_entry {
 	struct efivars *efivars;
 	struct efi_variable var;
@@ -122,6 +126,9 @@ struct efivar_attribute {
 	ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
 };
 
+static struct efivars __efivars;
+static struct efivar_operations ops;
+
 #define PSTORE_EFI_ATTRIBUTES \
 	(EFI_VARIABLE_NON_VOLATILE | \
 	 EFI_VARIABLE_BOOTSERVICE_ACCESS | \
@@ -629,14 +636,380 @@ static struct kobj_type efivar_ktype = {
 	.default_attrs = def_attrs,
 };
 
-static struct pstore_info efi_pstore_info;
-
 static inline void
 efivar_unregister(struct efivar_entry *var)
 {
 	kobject_put(&var->kobj);
 }
 
+static int efivarfs_file_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t efivarfs_file_write(struct file *file,
+		const char __user *userbuf, size_t count, loff_t *ppos)
+{
+	struct efivar_entry *var = file->private_data;
+	struct efivars *efivars;
+	efi_status_t status;
+	void *data;
+	u32 attributes;
+	struct inode *inode = file->f_mapping->host;
+	int datasize = count - sizeof(attributes);
+
+	if (count < sizeof(attributes))
+		return -EINVAL;
+
+	data = kmalloc(datasize, GFP_KERNEL);
+
+	if (!data)
+		return -ENOMEM;
+
+	efivars = var->efivars;
+
+	if (copy_from_user(&attributes, userbuf, sizeof(attributes))) {
+		count = -EFAULT;
+		goto out;
+	}
+
+	if (attributes & ~(EFI_VARIABLE_MASK)) {
+		count = -EINVAL;
+		goto out;
+	}
+
+	if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) {
+		count = -EFAULT;
+		goto out;
+	}
+
+	if (validate_var(&var->var, data, datasize) == false) {
+		count = -EINVAL;
+		goto out;
+	}
+
+	status = efivars->ops->set_variable(var->var.VariableName,
+					    &var->var.VendorGuid,
+					    attributes, datasize,
+					    data);
+
+	switch (status) {
+	case EFI_SUCCESS:
+		mutex_lock(&inode->i_mutex);
+		i_size_write(inode, count);
+		mutex_unlock(&inode->i_mutex);
+		break;
+	case EFI_INVALID_PARAMETER:
+		count = -EINVAL;
+		break;
+	case EFI_OUT_OF_RESOURCES:
+		count = -ENOSPC;
+		break;
+	case EFI_DEVICE_ERROR:
+		count = -EIO;
+		break;
+	case EFI_WRITE_PROTECTED:
+		count = -EROFS;
+		break;
+	case EFI_SECURITY_VIOLATION:
+		count = -EACCES;
+		break;
+	case EFI_NOT_FOUND:
+		count = -ENOENT;
+		break;
+	default:
+		count = -EINVAL;
+		break;
+	}
+out:
+	kfree(data);
+
+	return count;
+}
+
+static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
+		size_t count, loff_t *ppos)
+{
+	struct efivar_entry *var = file->private_data;
+	struct efivars *efivars = var->efivars;
+	efi_status_t status;
+	unsigned long datasize = 0;
+	u32 attributes;
+	void *data;
+	ssize_t size;
+
+	status = efivars->ops->get_variable(var->var.VariableName,
+					    &var->var.VendorGuid,
+					    &attributes, &datasize, NULL);
+
+	if (status != EFI_BUFFER_TOO_SMALL)
+		return 0;
+
+	data = kmalloc(datasize + 4, GFP_KERNEL);
+
+	if (!data)
+		return 0;
+
+	status = efivars->ops->get_variable(var->var.VariableName,
+					    &var->var.VendorGuid,
+					    &attributes, &datasize,
+					    (data + 4));
+
+	if (status != EFI_SUCCESS)
+		return 0;
+
+	memcpy(data, &attributes, 4);
+	size = simple_read_from_buffer(userbuf, count, ppos,
+					data, datasize + 4);
+	kfree(data);
+
+	return size;
+}
+
+static void efivarfs_evict_inode(struct inode *inode)
+{
+	clear_inode(inode);
+}
+
+static const struct super_operations efivarfs_ops = {
+	.statfs = simple_statfs,
+	.drop_inode = generic_delete_inode,
+	.evict_inode = efivarfs_evict_inode,
+	.show_options = generic_show_options,
+};
+
+static struct super_block *efivarfs_sb;
+
+static const struct inode_operations efivarfs_dir_inode_operations;
+
+static const struct file_operations efivarfs_file_operations = {
+	.open	= efivarfs_file_open,
+	.read	= efivarfs_file_read,
+	.write	= efivarfs_file_write,
+	.llseek	= no_llseek,
+};
+
+static struct inode *efivarfs_get_inode(struct super_block *sb,
+				const struct inode *dir, int mode, dev_t dev)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_ino = get_next_ino();
+		inode->i_uid = inode->i_gid = 0;
+		inode->i_mode = mode;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		switch (mode & S_IFMT) {
+		case S_IFREG:
+			inode->i_fop = &efivarfs_file_operations;
+			break;
+		case S_IFDIR:
+			inode->i_op = &efivarfs_dir_inode_operations;
+			inode->i_fop = &simple_dir_operations;
+			inc_nlink(inode);
+			break;
+		}
+	}
+	return inode;
+}
+
+static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid)
+{
+	guid->b[0] = hex_to_bin(str[6]) << 4 | hex_to_bin(str[7]);
+	guid->b[1] = hex_to_bin(str[4]) << 4 | hex_to_bin(str[5]);
+	guid->b[2] = hex_to_bin(str[2]) << 4 | hex_to_bin(str[3]);
+	guid->b[3] = hex_to_bin(str[0]) << 4 | hex_to_bin(str[1]);
+	guid->b[4] = hex_to_bin(str[11]) << 4 | hex_to_bin(str[12]);
+	guid->b[5] = hex_to_bin(str[9]) << 4 | hex_to_bin(str[10]);
+	guid->b[6] = hex_to_bin(str[16]) << 4 | hex_to_bin(str[17]);
+	guid->b[7] = hex_to_bin(str[14]) << 4 | hex_to_bin(str[15]);
+	guid->b[8] = hex_to_bin(str[19]) << 4 | hex_to_bin(str[20]);
+	guid->b[9] = hex_to_bin(str[21]) << 4 | hex_to_bin(str[22]);
+	guid->b[10] = hex_to_bin(str[24]) << 4 | hex_to_bin(str[25]);
+	guid->b[11] = hex_to_bin(str[26]) << 4 | hex_to_bin(str[27]);
+	guid->b[12] = hex_to_bin(str[28]) << 4 | hex_to_bin(str[29]);
+	guid->b[13] = hex_to_bin(str[30]) << 4 | hex_to_bin(str[31]);
+	guid->b[14] = hex_to_bin(str[32]) << 4 | hex_to_bin(str[33]);
+	guid->b[15] = hex_to_bin(str[34]) << 4 | hex_to_bin(str[35]);
+}
+
+static int efivarfs_create(struct inode *dir, struct dentry *dentry,
+			  umode_t mode, bool excl)
+{
+	struct inode *inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0);
+	struct efivars *efivars = &__efivars;
+	struct efivar_entry *var;
+	int namelen, i = 0, err = 0;
+
+	if (dentry->d_name.len < 38)
+		return -EINVAL;
+
+	if (!inode)
+		return -ENOSPC;
+
+	var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL);
+
+	if (!var)
+		return -ENOMEM;
+
+	namelen = dentry->d_name.len - GUID_LEN;
+
+	efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1,
+			&var->var.VendorGuid);
+
+	for (i = 0; i < namelen; i++)
+		var->var.VariableName[i] = dentry->d_name.name[i];
+
+	var->var.VariableName[i] = '\0';
+
+	inode->i_private = var;
+	var->efivars = efivars;
+	var->kobj.kset = efivars->kset;
+
+	err = kobject_init_and_add(&var->kobj, &efivar_ktype, NULL, "%s",
+			     dentry->d_name.name);
+	if (err)
+		goto out;
+
+	kobject_uevent(&var->kobj, KOBJ_ADD);
+	spin_lock(&efivars->lock);
+	list_add(&var->list, &efivars->list);
+	spin_unlock(&efivars->lock);
+	d_instantiate(dentry, inode);
+	dget(dentry);
+out:
+	if (err)
+		kfree(var);
+	return err;
+}
+
+static int efivarfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct efivar_entry *var = dentry->d_inode->i_private;
+	struct efivars *efivars = var->efivars;
+	efi_status_t status;
+
+	spin_lock(&efivars->lock);
+
+	status = efivars->ops->set_variable(var->var.VariableName,
+					    &var->var.VendorGuid,
+					    0, 0, NULL);
+
+	if (status == EFI_SUCCESS || status == EFI_NOT_FOUND) {
+		list_del(&var->list);
+		spin_unlock(&efivars->lock);
+		efivar_unregister(var);
+		drop_nlink(dir);
+		dput(dentry);
+		return 0;
+	}
+
+	spin_unlock(&efivars->lock);
+	return -EINVAL;
+};
+
+int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode = NULL;
+	struct dentry *root;
+	struct efivar_entry *entry, *n;
+	struct efivars *efivars = &__efivars;
+	int err;
+
+	efivarfs_sb = sb;
+
+	sb->s_maxbytes          = MAX_LFS_FILESIZE;
+	sb->s_blocksize         = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
+	sb->s_magic             = PSTOREFS_MAGIC;
+	sb->s_op                = &efivarfs_ops;
+	sb->s_time_gran         = 1;
+
+	inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0);
+	if (!inode) {
+		err = -ENOMEM;
+		goto fail;
+	}
+	inode->i_op = &efivarfs_dir_inode_operations;
+
+	root = d_make_root(inode);
+	sb->s_root = root;
+	if (!root) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	list_for_each_entry_safe(entry, n, &efivars->list, list) {
+		struct inode *inode;
+		struct dentry *dentry, *root = efivarfs_sb->s_root;
+		char *name;
+		unsigned long size = 0;
+		int len, i;
+
+		len = utf16_strlen(entry->var.VariableName);
+
+		/* GUID plus trailing NULL */
+		name = kmalloc(len + 38, GFP_ATOMIC);
+
+		for (i = 0; i < len; i++)
+			name[i] = entry->var.VariableName[i] & 0xFF;
+
+		name[len] = '-';
+
+		efi_guid_unparse(&entry->var.VendorGuid, name + len + 1);
+
+		name[len+GUID_LEN] = '\0';
+
+		inode = efivarfs_get_inode(efivarfs_sb, root->d_inode,
+					  S_IFREG | 0644, 0);
+		dentry = d_alloc_name(root, name);
+
+		efivars->ops->get_variable(entry->var.VariableName,
+					   &entry->var.VendorGuid,
+					   &entry->var.Attributes,
+					   &size,
+					   NULL);
+
+		mutex_lock(&inode->i_mutex);
+		inode->i_private = entry;
+		i_size_write(inode, size+4);
+		mutex_unlock(&inode->i_mutex);
+		d_add(dentry, inode);
+	}
+
+	return 0;
+fail:
+	iput(inode);
+	return err;
+}
+
+static struct dentry *efivarfs_mount(struct file_system_type *fs_type,
+				    int flags, const char *dev_name, void *data)
+{
+	return mount_single(fs_type, flags, data, efivarfs_fill_super);
+}
+
+static void efivarfs_kill_sb(struct super_block *sb)
+{
+	kill_litter_super(sb);
+	efivarfs_sb = NULL;
+}
+
+static struct file_system_type efivarfs_type = {
+	.name    = "efivarfs",
+	.mount   = efivarfs_mount,
+	.kill_sb = efivarfs_kill_sb,
+};
+
+static const struct inode_operations efivarfs_dir_inode_operations = {
+	.lookup = simple_lookup,
+	.unlink = efivarfs_unlink,
+	.create = efivarfs_create,
+};
+
+static struct pstore_info efi_pstore_info;
+
 #ifdef CONFIG_PSTORE
 
 static int efi_pstore_open(struct pstore_info *psi)
@@ -1198,6 +1571,8 @@ int register_efivars(struct efivars *efivars,
 		pstore_register(&efivars->efi_pstore_info);
 	}
 
+	register_filesystem(&efivarfs_type);
+
 out:
 	kfree(variable_name);
 
@@ -1205,9 +1580,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(register_efivars);
 
-static struct efivars __efivars;
-static struct efivar_operations ops;
-
 /*
  * For now we register the efi subsystem with the firmware subsystem
  * and the vars subsystem with the efi subsystem.  In the future, it
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8670eb1eb8cd..b2af1571592b 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -29,7 +29,12 @@
 #define EFI_UNSUPPORTED		( 3 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_BAD_BUFFER_SIZE     ( 4 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_BUFFER_TOO_SMALL	( 5 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_NOT_READY		( 6 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_DEVICE_ERROR	( 7 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_WRITE_PROTECTED	( 8 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_OUT_OF_RESOURCES	( 9 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_NOT_FOUND		(14 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_SECURITY_VIOLATION	(26 | (1UL << (BITS_PER_LONG-1)))
 
 typedef unsigned long efi_status_t;
 typedef u8 efi_bool_t;
-- 
cgit v1.2.3


From 605e70c7aa1b7b0d554baf945630c1d606bbfbc3 Mon Sep 17 00:00:00 2001
From: "Lee, Chun-Yi" <joeyli.kernel@gmail.com>
Date: Fri, 5 Oct 2012 13:54:56 +0800
Subject: efi: add efivars kobject to efi sysfs folder

UEFI variable filesystem need a new mount point, so this patch add
efivars kobject to efi_kobj for create a /sys/firmware/efi/efivars
folder.

Cc: Matthew Garrett <mjg@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Lee, Chun-Yi <jlee@suse.com>
Signed-off-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 9 +++++++++
 include/linux/efi.h        | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index d7658b4a5010..6793965b7c8b 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -1527,6 +1527,7 @@ void unregister_efivars(struct efivars *efivars)
 		sysfs_remove_bin_file(&efivars->kset->kobj, efivars->del_var);
 	kfree(efivars->new_var);
 	kfree(efivars->del_var);
+	kobject_put(efivars->kobject);
 	kset_unregister(efivars->kset);
 }
 EXPORT_SYMBOL_GPL(unregister_efivars);
@@ -1558,6 +1559,14 @@ int register_efivars(struct efivars *efivars,
 		goto out;
 	}
 
+	efivars->kobject = kobject_create_and_add("efivars", parent_kobj);
+	if (!efivars->kobject) {
+		pr_err("efivars: Subsystem registration failed.\n");
+		error = -ENOMEM;
+		kset_unregister(efivars->kset);
+		goto out;
+	}
+
 	/*
 	 * Per EFI spec, the maximum storage allocated for both
 	 * the variable name and variable data is 1024 bytes.
diff --git a/include/linux/efi.h b/include/linux/efi.h
index b2af1571592b..337aefbfb003 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -662,6 +662,7 @@ struct efivars {
 	spinlock_t lock;
 	struct list_head list;
 	struct kset *kset;
+	struct kobject *kobject;
 	struct bin_attribute *new_var, *del_var;
 	const struct efivar_operations *ops;
 	struct efivar_entry *walk_entry;
-- 
cgit v1.2.3


From bd52276fa1d420c3a504b76ffaaa1642cc79d4c4 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Fri, 25 May 2012 16:20:31 +0100
Subject: x86-64/efi: Use EFI to deal with platform wall clock (again)

Other than ix86, x86-64 on EFI so far didn't set the
{g,s}et_wallclock accessors to the EFI routines, thus
incorrectly using raw RTC accesses instead.

Simply removing the #ifdef around the respective code isn't
enough, however: While so far early get-time calls were done in
physical mode, this doesn't work properly for x86-64, as virtual
addresses would still need to be set up for all runtime regions
(which wasn't the case on the system I have access to), so
instead the patch moves the call to efi_enter_virtual_mode()
ahead (which in turn allows to drop all code related to calling
efi-get-time in physical mode).

Additionally the earlier calling of efi_set_executable()
requires the CPA code to cope, i.e. during early boot it must be
avoided to call cpa_flush_array(), as the first thing this
function does is a BUG_ON(irqs_disabled()).

Also make the two EFI functions in question here static -
they're not being referenced elsewhere.

History:

    This commit was originally merged as bacef661acdb ("x86-64/efi:
    Use EFI to deal with platform wall clock") but it resulted in some
    ASUS machines no longer booting due to a firmware bug, and so was
    reverted in f026cfa82f62. A pre-emptive fix for the buggy ASUS
    firmware was merged in 03a1c254975e ("x86, efi: 1:1 pagetable
    mapping for virtual EFI calls") so now this patch can be
    reapplied.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Matt Fleming <matt.fleming@intel.com>
Acked-by: Matthew Garrett <mjg@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com> [added commit history]
---
 arch/x86/mm/pageattr.c      | 10 ++++++----
 arch/x86/platform/efi/efi.c | 30 ++++--------------------------
 include/linux/efi.h         |  2 --
 init/main.c                 |  8 ++++----
 4 files changed, 14 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a718e0d23503..931930a96160 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -919,11 +919,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
 	/*
 	 * On success we use clflush, when the CPU supports it to
-	 * avoid the wbindv. If the CPU does not support it and in the
-	 * error case we fall back to cpa_flush_all (which uses
-	 * wbindv):
+	 * avoid the wbindv. If the CPU does not support it, in the
+	 * error case, and during early boot (for EFI) we fall back
+	 * to cpa_flush_all (which uses wbinvd):
 	 */
-	if (!ret && cpu_has_clflush) {
+	if (early_boot_irqs_disabled)
+		__cpa_flush_all((void *)(long)cache);
+	else if (!ret && cpu_has_clflush) {
 		if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index aded2a91162a..757834434e59 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,22 +235,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
-					     efi_time_cap_t *tc)
-{
-	unsigned long flags;
-	efi_status_t status;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	efi_call_phys_prelog();
-	status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
-				virt_to_phys(tc));
-	efi_call_phys_epilog();
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return status;
-}
-
-int efi_set_rtc_mmss(unsigned long nowtime)
+static int efi_set_rtc_mmss(unsigned long nowtime)
 {
 	int real_seconds, real_minutes;
 	efi_status_t 	status;
@@ -279,7 +264,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
 	return 0;
 }
 
-unsigned long efi_get_time(void)
+static unsigned long efi_get_time(void)
 {
 	efi_status_t status;
 	efi_time_t eft;
@@ -635,18 +620,13 @@ static int __init efi_runtime_init(void)
 	}
 	/*
 	 * We will only need *early* access to the following
-	 * two EFI runtime services before set_virtual_address_map
+	 * EFI runtime service before set_virtual_address_map
 	 * is invoked.
 	 */
-	efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
 	efi_phys.set_virtual_address_map =
 		(efi_set_virtual_address_map_t *)
 		runtime->set_virtual_address_map;
-	/*
-	 * Make efi_get_time can be called before entering
-	 * virtual mode.
-	 */
-	efi.get_time = phys_efi_get_time;
+
 	early_iounmap(runtime, sizeof(efi_runtime_services_t));
 
 	return 0;
@@ -734,12 +714,10 @@ void __init efi_init(void)
 		efi_enabled = 0;
 		return;
 	}
-#ifdef CONFIG_X86_32
 	if (efi_native) {
 		x86_platform.get_wallclock = efi_get_time;
 		x86_platform.set_wallclock = efi_set_rtc_mmss;
 	}
-#endif
 
 #if EFI_DEBUG
 	print_efi_memmap();
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 337aefbfb003..5e2308d9c6be 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -516,8 +516,6 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
-extern unsigned long efi_get_time(void);
-extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
diff --git a/init/main.c b/init/main.c
index 9cf77ab138a6..ae70b647b4d9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -461,6 +461,10 @@ static void __init mm_init(void)
 	percpu_init_late();
 	pgtable_cache_init();
 	vmalloc_init();
+#ifdef CONFIG_X86
+	if (efi_enabled)
+		efi_enter_virtual_mode();
+#endif
 }
 
 asmlinkage void __init start_kernel(void)
@@ -601,10 +605,6 @@ asmlinkage void __init start_kernel(void)
 	calibrate_delay();
 	pidmap_init();
 	anon_vma_init();
-#ifdef CONFIG_X86
-	if (efi_enabled)
-		efi_enter_virtual_mode();
-#endif
 	thread_info_cache_init();
 	cred_init();
 	fork_init(totalram_pages);
-- 
cgit v1.2.3


From e05ed4d1fad9e730995abb08cb9bc3bffac5018b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 15 Oct 2012 10:19:39 -0700
Subject: swiotlb: Return physical addresses when calling
 swiotlb_tbl_map_single

This change makes it so that swiotlb_tbl_map_single will return a physical
address instead of a virtual address when called.  The advantage to this once
again is that we are avoiding a number of virt_to_phys and phys_to_virt
translations by working with everything as a physical address.

One change I had to make in order to support using physical addresses is that
I could no longer trust 0 to be a invalid physical address on all platforms.
So instead I made it so that ~0 is returned on error.  This should never be a
valid return value as it implies that only one byte would be available for
use.

In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_map_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr.  This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c | 22 ++++++-------
 include/linux/swiotlb.h   | 11 +++++--
 lib/swiotlb.c             | 78 ++++++++++++++++++++++++-----------------------
 3 files changed, 59 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 58db6df866ef..8a6035aa69c9 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -338,9 +338,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 				enum dma_data_direction dir,
 				struct dma_attrs *attrs)
 {
-	phys_addr_t phys = page_to_phys(page) + offset;
+	phys_addr_t map, phys = page_to_phys(page) + offset;
 	dma_addr_t dev_addr = xen_phys_to_bus(phys);
-	void *map;
 
 	BUG_ON(dir == DMA_NONE);
 	/*
@@ -356,16 +355,16 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 * Oh well, have to allocate and map a bounce buffer.
 	 */
 	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir);
-	if (!map)
+	if (map == SWIOTLB_MAP_ERROR)
 		return DMA_ERROR_CODE;
 
-	dev_addr = xen_virt_to_bus(map);
+	dev_addr = xen_phys_to_bus(map);
 
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
 	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, map, size, dir);
+		swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
 		dev_addr = 0;
 	}
 	return dev_addr;
@@ -494,11 +493,12 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 		if (swiotlb_force ||
 		    !dma_capable(hwdev, dev_addr, sg->length) ||
 		    range_straddles_page_boundary(paddr, sg->length)) {
-			void *map = swiotlb_tbl_map_single(hwdev,
-							   start_dma_addr,
-							   sg_phys(sg),
-							   sg->length, dir);
-			if (!map) {
+			phys_addr_t map = swiotlb_tbl_map_single(hwdev,
+								 start_dma_addr,
+								 sg_phys(sg),
+								 sg->length,
+								 dir);
+			if (map == SWIOTLB_MAP_ERROR) {
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
 				xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
@@ -506,7 +506,7 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 				sgl[0].dma_length = 0;
 				return DMA_ERROR_CODE;
 			}
-			sg->dma_address = xen_virt_to_bus(map);
+			sg->dma_address = xen_phys_to_bus(map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 8d08b3ed406d..1995f3e04fed 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -34,9 +34,14 @@ enum dma_sync_target {
 	SYNC_FOR_CPU = 0,
 	SYNC_FOR_DEVICE = 1,
 };
-extern void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
-				    phys_addr_t phys, size_t size,
-				    enum dma_data_direction dir);
+
+/* define the last possible byte of physical address space as a mapping error */
+#define SWIOTLB_MAP_ERROR (~(phys_addr_t)0x0)
+
+extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+					  dma_addr_t tbl_dma_addr,
+					  phys_addr_t phys, size_t size,
+					  enum dma_data_direction dir);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr,
 				     size_t size, enum dma_data_direction dir);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index f8c0d4e1d1d3..3adc148bb8d8 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -393,12 +393,13 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 }
 EXPORT_SYMBOL_GPL(swiotlb_bounce);
 
-void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
-			     phys_addr_t phys, size_t size,
-			     enum dma_data_direction dir)
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+				   dma_addr_t tbl_dma_addr,
+				   phys_addr_t orig_addr, size_t size,
+				   enum dma_data_direction dir)
 {
 	unsigned long flags;
-	char *dma_addr;
+	phys_addr_t tlb_addr;
 	unsigned int nslots, stride, index, wrap;
 	int i;
 	unsigned long mask;
@@ -462,7 +463,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
 				io_tlb_list[i] = 0;
 			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
 				io_tlb_list[i] = ++count;
-			dma_addr = (char *)phys_to_virt(io_tlb_start) + (index << IO_TLB_SHIFT);
+			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
 
 			/*
 			 * Update the indices to avoid searching in the next
@@ -480,7 +481,7 @@ void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
 
 not_found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	return NULL;
+	return SWIOTLB_MAP_ERROR;
 found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 
@@ -490,11 +491,12 @@ found:
 	 * needed.
 	 */
 	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
+		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-		swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+		swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), size,
+			       DMA_TO_DEVICE);
 
-	return dma_addr;
+	return tlb_addr;
 }
 EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
 
@@ -502,9 +504,8 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
  * Allocates bounce buffer and returns its kernel virtual address.
  */
 
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-	   enum dma_data_direction dir)
+phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+		       enum dma_data_direction dir)
 {
 	dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start);
 
@@ -598,12 +599,15 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		dma_mask = hwdev->coherent_dma_mask;
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
-		/*
-		 * The allocated memory isn't reachable by the device.
-		 */
-		free_pages((unsigned long) ret, order);
-		ret = NULL;
+	if (ret) {
+		dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+		if (dev_addr + size - 1 > dma_mask) {
+			/*
+			 * The allocated memory isn't reachable by the device.
+			 */
+			free_pages((unsigned long) ret, order);
+			ret = NULL;
+		}
 	}
 	if (!ret) {
 		/*
@@ -611,13 +615,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 * GFP_DMA memory; fall back on map_single(), which
 		 * will grab memory from the lowest available address range.
 		 */
-		ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
-		if (!ret)
+		phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
+		if (paddr == SWIOTLB_MAP_ERROR)
 			return NULL;
-	}
 
-	memset(ret, 0, size);
-	dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+		ret = phys_to_virt(paddr);
+		dev_addr = phys_to_dma(hwdev, paddr);
+	}
 
 	/* Confirm address can be DMA'd by device */
 	if (dev_addr + size - 1 > dma_mask) {
@@ -629,7 +633,10 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
 		return NULL;
 	}
+
 	*dma_handle = dev_addr;
+	memset(ret, 0, size);
+
 	return ret;
 }
 EXPORT_SYMBOL(swiotlb_alloc_coherent);
@@ -686,9 +693,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    enum dma_data_direction dir,
 			    struct dma_attrs *attrs)
 {
-	phys_addr_t phys = page_to_phys(page) + offset;
+	phys_addr_t map, phys = page_to_phys(page) + offset;
 	dma_addr_t dev_addr = phys_to_dma(dev, phys);
-	void *map;
 
 	BUG_ON(dir == DMA_NONE);
 	/*
@@ -699,22 +705,18 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
-	/*
-	 * Oh well, have to allocate and map a bounce buffer.
-	 */
+	/* Oh well, have to allocate and map a bounce buffer. */
 	map = map_single(dev, phys, size, dir);
-	if (!map) {
+	if (map == SWIOTLB_MAP_ERROR) {
 		swiotlb_full(dev, size, dir, 1);
 		return phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
-	dev_addr = swiotlb_virt_to_bus(dev, map);
+	dev_addr = phys_to_dma(dev, map);
 
-	/*
-	 * Ensure that the address returned is DMA'ble
-	 */
+	/* Ensure that the address returned is DMA'ble */
 	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, map, size, dir);
+		swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
 		return phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
@@ -840,9 +842,9 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 
 		if (swiotlb_force ||
 		    !dma_capable(hwdev, dev_addr, sg->length)) {
-			void *map = map_single(hwdev, sg_phys(sg),
-					       sg->length, dir);
-			if (!map) {
+			phys_addr_t map = map_single(hwdev, sg_phys(sg),
+						     sg->length, dir);
+			if (map == SWIOTLB_MAP_ERROR) {
 				/* Don't panic here, we expect map_sg users
 				   to do proper error handling. */
 				swiotlb_full(hwdev, sg->length, dir, 0);
@@ -851,7 +853,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sgl[0].dma_length = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
+			sg->dma_address = phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
 		sg->dma_length = sg->length;
-- 
cgit v1.2.3


From 61ca08c3220032dd88815b3465d56cb779258168 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 15 Oct 2012 10:19:44 -0700
Subject: swiotlb: Use physical addresses for swiotlb_tbl_unmap_single

This change makes it so that the unmap functionality also uses physical
addresses.  This helps to further reduce the use of virt_to_phys and
phys_to_virt functions.

In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_unmap_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr.  This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c |  4 ++--
 include/linux/swiotlb.h   |  3 ++-
 lib/swiotlb.c             | 37 +++++++++++++++++++------------------
 3 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8a6035aa69c9..4cedc284b5df 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -364,7 +364,7 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 * Ensure that the address returned is DMA'ble
 	 */
 	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
+		swiotlb_tbl_unmap_single(dev, map, size, dir);
 		dev_addr = 0;
 	}
 	return dev_addr;
@@ -388,7 +388,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
-		swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
 		return;
 	}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 1995f3e04fed..291643c6b88b 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -43,7 +43,8 @@ extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 					  phys_addr_t phys, size_t size,
 					  enum dma_data_direction dir);
 
-extern void swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr,
+extern void swiotlb_tbl_unmap_single(struct device *hwdev,
+				     phys_addr_t tlb_addr,
 				     size_t size, enum dma_data_direction dir);
 
 extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3adc148bb8d8..d7701dcf407f 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -515,20 +515,20 @@ phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 /*
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
-void
-swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
-			enum dma_data_direction dir)
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+			      size_t size, enum dma_data_direction dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (dma_addr - (char *)phys_to_virt(io_tlb_start)) >> IO_TLB_SHIFT;
-	phys_addr_t phys = io_tlb_orig_addr[index];
+	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
 	/*
 	 * First, sync the memory before unmapping the entry
 	 */
 	if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+		swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+			       size, DMA_FROM_DEVICE);
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
@@ -621,17 +621,18 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 
 		ret = phys_to_virt(paddr);
 		dev_addr = phys_to_dma(hwdev, paddr);
-	}
 
-	/* Confirm address can be DMA'd by device */
-	if (dev_addr + size - 1 > dma_mask) {
-		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-		       (unsigned long long)dma_mask,
-		       (unsigned long long)dev_addr);
+		/* Confirm address can be DMA'd by device */
+		if (dev_addr + size - 1 > dma_mask) {
+			printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+			       (unsigned long long)dma_mask,
+			       (unsigned long long)dev_addr);
 
-		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
-		return NULL;
+			/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+			swiotlb_tbl_unmap_single(hwdev, paddr,
+						 size, DMA_TO_DEVICE);
+			return NULL;
+		}
 	}
 
 	*dma_handle = dev_addr;
@@ -652,7 +653,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		free_pages((unsigned long)vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
-		swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -716,7 +717,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 
 	/* Ensure that the address returned is DMA'ble */
 	if (!dma_capable(dev, dev_addr, size)) {
-		swiotlb_tbl_unmap_single(dev, phys_to_virt(map), size, dir);
+		swiotlb_tbl_unmap_single(dev, map, size, dir);
 		return phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
@@ -740,7 +741,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir);
 		return;
 	}
 
-- 
cgit v1.2.3


From fbfda893eb570bbe9e9ad9128b6e9cf2a1e48c87 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 15 Oct 2012 10:19:49 -0700
Subject: swiotlb: Use physical addresses instead of virtual in
 swiotlb_tbl_sync_single

This change makes it so that the sync functionality also uses physical
addresses.  This helps to further reduce the use of virt_to_phys and
phys_to_virt functions.

In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_tbl_sync_single I am renaming phys to orig_addr, and
dma_addr to tlb_addr.  This way is should be clear that orig_addr is
contained within io_orig_addr and tlb_addr is an address within the
io_tlb_addr buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c |  3 +--
 include/linux/swiotlb.h   |  3 ++-
 lib/swiotlb.c             | 22 +++++++++++-----------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4cedc284b5df..af47e7594460 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -433,8 +433,7 @@ xen_swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr)) {
-		swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
-				       target);
+		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
 		return;
 	}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 291643c6b88b..e0ac98fd81a9 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -47,7 +47,8 @@ extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 				     phys_addr_t tlb_addr,
 				     size_t size, enum dma_data_direction dir);
 
-extern void swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr,
+extern void swiotlb_tbl_sync_single(struct device *hwdev,
+				    phys_addr_t tlb_addr,
 				    size_t size, enum dma_data_direction dir,
 				    enum dma_sync_target target);
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d7701dcf407f..16a548dc91ac 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -557,26 +557,27 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 }
 EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
 
-void
-swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
-			enum dma_data_direction dir,
-			enum dma_sync_target target)
+void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
+			     size_t size, enum dma_data_direction dir,
+			     enum dma_sync_target target)
 {
-	int index = (dma_addr - (char *)phys_to_virt(io_tlb_start)) >> IO_TLB_SHIFT;
-	phys_addr_t phys = io_tlb_orig_addr[index];
+	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
-	phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
 
 	switch (target) {
 	case SYNC_FOR_CPU:
 		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+			swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+				       size, DMA_FROM_DEVICE);
 		else
 			BUG_ON(dir != DMA_TO_DEVICE);
 		break;
 	case SYNC_FOR_DEVICE:
 		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+			swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+				       size, DMA_TO_DEVICE);
 		else
 			BUG_ON(dir != DMA_FROM_DEVICE);
 		break;
@@ -785,8 +786,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
-				       target);
+		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
 		return;
 	}
 
-- 
cgit v1.2.3


From af51a9f1848ff50079a10def56a2c064f326af22 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 15 Oct 2012 10:19:55 -0700
Subject: swiotlb: Do not export swiotlb_bounce since there are no external
 consumers

Currently swiotlb is the only consumer for swiotlb_bounce.  Since that is the
case it doesn't make much sense to be exporting it so make it a static
function only.

In addition we can save a few more lines of code by making it so that it
accepts the DMA address as a physical address instead of a virtual one.  This
is the last piece in essentially pushing all of the DMA address values to use
physical addresses in swiotlb.

In order to clarify things since we now have 2 physical addresses in use
inside of swiotlb_bounce I am renaming phys to orig_addr, and dma_addr to
tlb_addr.  This way is should be clear that orig_addr is contained within
io_orig_addr and tlb_addr is an address within the io_tlb_addr buffer.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swiotlb.h |  3 ---
 lib/swiotlb.c           | 35 ++++++++++++++++-------------------
 2 files changed, 16 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index e0ac98fd81a9..071d62c214a6 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -53,9 +53,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
 				    enum dma_sync_target target);
 
 /* Accessory functions. */
-extern void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-			   enum dma_data_direction dir);
-
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 16a548dc91ac..196b06984dec 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -355,14 +355,15 @@ static int is_swiotlb_buffer(phys_addr_t paddr)
 /*
  * Bounce: copy the swiotlb buffer back to the original dma location
  */
-void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
-		    enum dma_data_direction dir)
+static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
+			   size_t size, enum dma_data_direction dir)
 {
-	unsigned long pfn = PFN_DOWN(phys);
+	unsigned long pfn = PFN_DOWN(orig_addr);
+	unsigned char *vaddr = phys_to_virt(tlb_addr);
 
 	if (PageHighMem(pfn_to_page(pfn))) {
 		/* The buffer does not have a mapping.  Map it in and copy */
-		unsigned int offset = phys & ~PAGE_MASK;
+		unsigned int offset = orig_addr & ~PAGE_MASK;
 		char *buffer;
 		unsigned int sz = 0;
 		unsigned long flags;
@@ -373,25 +374,23 @@ void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
 			local_irq_save(flags);
 			buffer = kmap_atomic(pfn_to_page(pfn));
 			if (dir == DMA_TO_DEVICE)
-				memcpy(dma_addr, buffer + offset, sz);
+				memcpy(vaddr, buffer + offset, sz);
 			else
-				memcpy(buffer + offset, dma_addr, sz);
+				memcpy(buffer + offset, vaddr, sz);
 			kunmap_atomic(buffer);
 			local_irq_restore(flags);
 
 			size -= sz;
 			pfn++;
-			dma_addr += sz;
+			vaddr += sz;
 			offset = 0;
 		}
+	} else if (dir == DMA_TO_DEVICE) {
+		memcpy(vaddr, phys_to_virt(orig_addr), size);
 	} else {
-		if (dir == DMA_TO_DEVICE)
-			memcpy(dma_addr, phys_to_virt(phys), size);
-		else
-			memcpy(phys_to_virt(phys), dma_addr, size);
+		memcpy(phys_to_virt(orig_addr), vaddr, size);
 	}
 }
-EXPORT_SYMBOL_GPL(swiotlb_bounce);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 				   dma_addr_t tbl_dma_addr,
@@ -493,8 +492,7 @@ found:
 	for (i = 0; i < nslots; i++)
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
-		swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr), size,
-			       DMA_TO_DEVICE);
+		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
 
 	return tlb_addr;
 }
@@ -526,9 +524,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	/*
 	 * First, sync the memory before unmapping the entry
 	 */
-	if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
-			       size, DMA_FROM_DEVICE);
+	if (orig_addr && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
@@ -569,14 +566,14 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 	switch (target) {
 	case SYNC_FOR_CPU:
 		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+			swiotlb_bounce(orig_addr, tlb_addr,
 				       size, DMA_FROM_DEVICE);
 		else
 			BUG_ON(dir != DMA_TO_DEVICE);
 		break;
 	case SYNC_FOR_DEVICE:
 		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, phys_to_virt(tlb_addr),
+			swiotlb_bounce(orig_addr, tlb_addr,
 				       size, DMA_TO_DEVICE);
 		else
 			BUG_ON(dir != DMA_FROM_DEVICE);
-- 
cgit v1.2.3


From dc4dc36056392c0b0b1ca9e81bebff964b9297e0 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 30 Oct 2012 12:34:05 +0530
Subject: spi: tegra: add spi driver for SLINK controller

Tegra20/Tegra30 supports the spi interface through its SLINK
controller. Add spi driver for SLINK controller.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 .../bindings/spi/nvidia,tegra20-slink.txt          |   26 +
 drivers/spi/Kconfig                                |    6 +
 drivers/spi/Makefile                               |    2 +-
 drivers/spi/spi-tegra20-slink.c                    | 1359 ++++++++++++++++++++
 include/linux/spi/spi-tegra.h                      |   40 +
 5 files changed, 1432 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
 create mode 100644 drivers/spi/spi-tegra20-slink.c
 create mode 100644 include/linux/spi/spi-tegra.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
new file mode 100644
index 000000000000..f5b1ad1a1ec3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
@@ -0,0 +1,26 @@
+NVIDIA Tegra20/Tegra30 SLINK controller.
+
+Required properties:
+- compatible : should be "nvidia,tegra20-slink", "nvidia,tegra30-slink".
+- reg: Should contain SLINK registers location and length.
+- interrupts: Should contain SLINK interrupts.
+- nvidia,dma-request-selector : The Tegra DMA controller's phandle and
+  request selector for this SLINK controller.
+
+Recommended properties:
+- spi-max-frequency: Definition as per
+                     Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+
+slink@7000d600 {
+	compatible = "nvidia,tegra20-slink";
+	reg = <0x7000d600 0x200>;
+	interrupts = <0 82 0x04>;
+	nvidia,dma-request-selector = <&apbdma 16>;
+	spi-max-frequency = <25000000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+	status = "disabled";
+};
+
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 1acae359cabe..25290d9780b2 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -385,6 +385,12 @@ config SPI_MXS
 	help
 	  SPI driver for Freescale MXS devices.
 
+config SPI_TEGRA20_SLINK
+	tristate "Nvidia Tegra20/Tegra30 SLINK Controller"
+	depends on ARCH_TEGRA && TEGRA20_APB_DMA
+	help
+	  SPI driver for Nvidia Tegra20/Tegra30 SLINK Controller interface.
+
 config SPI_TI_SSP
 	tristate "TI Sequencer Serial Port - SPI Support"
 	depends on MFD_TI_SSP
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index c48df47e4b0f..f87c0f142e5a 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -60,10 +60,10 @@ obj-$(CONFIG_SPI_SH_MSIOF)		+= spi-sh-msiof.o
 obj-$(CONFIG_SPI_SH_SCI)		+= spi-sh-sci.o
 obj-$(CONFIG_SPI_SIRF)		+= spi-sirf.o
 obj-$(CONFIG_SPI_STMP3XXX)		+= spi-stmp.o
+obj-$(CONFIG_SPI_TEGRA20_SLINK)		+= spi-tegra20-slink.o
 obj-$(CONFIG_SPI_TI_SSP)		+= spi-ti-ssp.o
 obj-$(CONFIG_SPI_TLE62X0)		+= spi-tle62x0.o
 obj-$(CONFIG_SPI_TOPCLIFF_PCH)		+= spi-topcliff-pch.o
 obj-$(CONFIG_SPI_TXX9)			+= spi-txx9.o
 obj-$(CONFIG_SPI_XCOMM)		+= spi-xcomm.o
 obj-$(CONFIG_SPI_XILINX)		+= spi-xilinx.o
-
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
new file mode 100644
index 000000000000..b8985be81d96
--- /dev/null
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -0,0 +1,1359 @@
+/*
+ * SPI driver for Nvidia's Tegra20/Tegra30 SLINK Controller.
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-tegra.h>
+#include <mach/clk.h>
+
+#define SLINK_COMMAND			0x000
+#define SLINK_BIT_LENGTH(x)		(((x) & 0x1f) << 0)
+#define SLINK_WORD_SIZE(x)		(((x) & 0x1f) << 5)
+#define SLINK_BOTH_EN			(1 << 10)
+#define SLINK_CS_SW			(1 << 11)
+#define SLINK_CS_VALUE			(1 << 12)
+#define SLINK_CS_POLARITY		(1 << 13)
+#define SLINK_IDLE_SDA_DRIVE_LOW	(0 << 16)
+#define SLINK_IDLE_SDA_DRIVE_HIGH	(1 << 16)
+#define SLINK_IDLE_SDA_PULL_LOW		(2 << 16)
+#define SLINK_IDLE_SDA_PULL_HIGH	(3 << 16)
+#define SLINK_IDLE_SDA_MASK		(3 << 16)
+#define SLINK_CS_POLARITY1		(1 << 20)
+#define SLINK_CK_SDA			(1 << 21)
+#define SLINK_CS_POLARITY2		(1 << 22)
+#define SLINK_CS_POLARITY3		(1 << 23)
+#define SLINK_IDLE_SCLK_DRIVE_LOW	(0 << 24)
+#define SLINK_IDLE_SCLK_DRIVE_HIGH	(1 << 24)
+#define SLINK_IDLE_SCLK_PULL_LOW	(2 << 24)
+#define SLINK_IDLE_SCLK_PULL_HIGH	(3 << 24)
+#define SLINK_IDLE_SCLK_MASK		(3 << 24)
+#define SLINK_M_S			(1 << 28)
+#define SLINK_WAIT			(1 << 29)
+#define SLINK_GO			(1 << 30)
+#define SLINK_ENB			(1 << 31)
+
+#define SLINK_MODES			(SLINK_IDLE_SCLK_MASK | SLINK_CK_SDA)
+
+#define SLINK_COMMAND2			0x004
+#define SLINK_LSBFE			(1 << 0)
+#define SLINK_SSOE			(1 << 1)
+#define SLINK_SPIE			(1 << 4)
+#define SLINK_BIDIROE			(1 << 6)
+#define SLINK_MODFEN			(1 << 7)
+#define SLINK_INT_SIZE(x)		(((x) & 0x1f) << 8)
+#define SLINK_CS_ACTIVE_BETWEEN		(1 << 17)
+#define SLINK_SS_EN_CS(x)		(((x) & 0x3) << 18)
+#define SLINK_SS_SETUP(x)		(((x) & 0x3) << 20)
+#define SLINK_FIFO_REFILLS_0		(0 << 22)
+#define SLINK_FIFO_REFILLS_1		(1 << 22)
+#define SLINK_FIFO_REFILLS_2		(2 << 22)
+#define SLINK_FIFO_REFILLS_3		(3 << 22)
+#define SLINK_FIFO_REFILLS_MASK		(3 << 22)
+#define SLINK_WAIT_PACK_INT(x)		(((x) & 0x7) << 26)
+#define SLINK_SPC0			(1 << 29)
+#define SLINK_TXEN			(1 << 30)
+#define SLINK_RXEN			(1 << 31)
+
+#define SLINK_STATUS			0x008
+#define SLINK_COUNT(val)		(((val) >> 0) & 0x1f)
+#define SLINK_WORD(val)			(((val) >> 5) & 0x1f)
+#define SLINK_BLK_CNT(val)		(((val) >> 0) & 0xffff)
+#define SLINK_MODF			(1 << 16)
+#define SLINK_RX_UNF			(1 << 18)
+#define SLINK_TX_OVF			(1 << 19)
+#define SLINK_TX_FULL			(1 << 20)
+#define SLINK_TX_EMPTY			(1 << 21)
+#define SLINK_RX_FULL			(1 << 22)
+#define SLINK_RX_EMPTY			(1 << 23)
+#define SLINK_TX_UNF			(1 << 24)
+#define SLINK_RX_OVF			(1 << 25)
+#define SLINK_TX_FLUSH			(1 << 26)
+#define SLINK_RX_FLUSH			(1 << 27)
+#define SLINK_SCLK			(1 << 28)
+#define SLINK_ERR			(1 << 29)
+#define SLINK_RDY			(1 << 30)
+#define SLINK_BSY			(1 << 31)
+#define SLINK_FIFO_ERROR		(SLINK_TX_OVF | SLINK_RX_UNF |	\
+					SLINK_TX_UNF | SLINK_RX_OVF)
+
+#define SLINK_FIFO_EMPTY		(SLINK_TX_EMPTY | SLINK_RX_EMPTY)
+
+#define SLINK_MAS_DATA			0x010
+#define SLINK_SLAVE_DATA		0x014
+
+#define SLINK_DMA_CTL			0x018
+#define SLINK_DMA_BLOCK_SIZE(x)		(((x) & 0xffff) << 0)
+#define SLINK_TX_TRIG_1			(0 << 16)
+#define SLINK_TX_TRIG_4			(1 << 16)
+#define SLINK_TX_TRIG_8			(2 << 16)
+#define SLINK_TX_TRIG_16		(3 << 16)
+#define SLINK_TX_TRIG_MASK		(3 << 16)
+#define SLINK_RX_TRIG_1			(0 << 18)
+#define SLINK_RX_TRIG_4			(1 << 18)
+#define SLINK_RX_TRIG_8			(2 << 18)
+#define SLINK_RX_TRIG_16		(3 << 18)
+#define SLINK_RX_TRIG_MASK		(3 << 18)
+#define SLINK_PACKED			(1 << 20)
+#define SLINK_PACK_SIZE_4		(0 << 21)
+#define SLINK_PACK_SIZE_8		(1 << 21)
+#define SLINK_PACK_SIZE_16		(2 << 21)
+#define SLINK_PACK_SIZE_32		(3 << 21)
+#define SLINK_PACK_SIZE_MASK		(3 << 21)
+#define SLINK_IE_TXC			(1 << 26)
+#define SLINK_IE_RXC			(1 << 27)
+#define SLINK_DMA_EN			(1 << 31)
+
+#define SLINK_STATUS2			0x01c
+#define SLINK_TX_FIFO_EMPTY_COUNT(val)	(((val) & 0x3f) >> 0)
+#define SLINK_RX_FIFO_FULL_COUNT(val)	(((val) & 0x3f0000) >> 16)
+#define SLINK_SS_HOLD_TIME(val)		(((val) & 0xF) << 6)
+
+#define SLINK_TX_FIFO			0x100
+#define SLINK_RX_FIFO			0x180
+
+#define DATA_DIR_TX			(1 << 0)
+#define DATA_DIR_RX			(1 << 1)
+
+#define SLINK_DMA_TIMEOUT		(msecs_to_jiffies(1000))
+
+#define DEFAULT_SPI_DMA_BUF_LEN		(16*1024)
+#define TX_FIFO_EMPTY_COUNT_MAX		SLINK_TX_FIFO_EMPTY_COUNT(0x20)
+#define RX_FIFO_FULL_COUNT_ZERO		SLINK_RX_FIFO_FULL_COUNT(0)
+
+#define SLINK_STATUS2_RESET \
+	(TX_FIFO_EMPTY_COUNT_MAX | RX_FIFO_FULL_COUNT_ZERO << 16)
+
+#define MAX_CHIP_SELECT			4
+#define SLINK_FIFO_DEPTH		32
+
+struct tegra_slink_chip_data {
+	bool cs_hold_time;
+};
+
+struct tegra_slink_data {
+	struct device				*dev;
+	struct spi_master			*master;
+	const struct tegra_slink_chip_data	*chip_data;
+	spinlock_t				lock;
+
+	struct clk				*clk;
+	void __iomem				*base;
+	phys_addr_t				phys;
+	unsigned				irq;
+	int					dma_req_sel;
+	u32					spi_max_frequency;
+	u32					cur_speed;
+
+	struct spi_device			*cur_spi;
+	unsigned				cur_pos;
+	unsigned				cur_len;
+	unsigned				words_per_32bit;
+	unsigned				bytes_per_word;
+	unsigned				curr_dma_words;
+	unsigned				cur_direction;
+
+	unsigned				cur_rx_pos;
+	unsigned				cur_tx_pos;
+
+	unsigned				dma_buf_size;
+	unsigned				max_buf_size;
+	bool					is_curr_dma_xfer;
+	bool					is_hw_based_cs;
+
+	struct completion			rx_dma_complete;
+	struct completion			tx_dma_complete;
+
+	u32					tx_status;
+	u32					rx_status;
+	u32					status_reg;
+	bool					is_packed;
+	unsigned long				packed_size;
+
+	u32					command_reg;
+	u32					command2_reg;
+	u32					dma_control_reg;
+	u32					def_command_reg;
+	u32					def_command2_reg;
+
+	struct completion			xfer_completion;
+	struct spi_transfer			*curr_xfer;
+	struct dma_chan				*rx_dma_chan;
+	u32					*rx_dma_buf;
+	dma_addr_t				rx_dma_phys;
+	struct dma_async_tx_descriptor		*rx_dma_desc;
+
+	struct dma_chan				*tx_dma_chan;
+	u32					*tx_dma_buf;
+	dma_addr_t				tx_dma_phys;
+	struct dma_async_tx_descriptor		*tx_dma_desc;
+};
+
+static int tegra_slink_runtime_suspend(struct device *dev);
+static int tegra_slink_runtime_resume(struct device *dev);
+
+static inline unsigned long tegra_slink_readl(struct tegra_slink_data *tspi,
+		unsigned long reg)
+{
+	return readl(tspi->base + reg);
+}
+
+static inline void tegra_slink_writel(struct tegra_slink_data *tspi,
+		unsigned long val, unsigned long reg)
+{
+	writel(val, tspi->base + reg);
+
+	/* Read back register to make sure that register writes completed */
+	if (reg != SLINK_TX_FIFO)
+		readl(tspi->base + SLINK_MAS_DATA);
+}
+
+static void tegra_slink_clear_status(struct tegra_slink_data *tspi)
+{
+	unsigned long val;
+	unsigned long val_write = 0;
+
+	val = tegra_slink_readl(tspi, SLINK_STATUS);
+
+	/* Write 1 to clear status register */
+	val_write = SLINK_RDY | SLINK_FIFO_ERROR;
+	tegra_slink_writel(tspi, val_write, SLINK_STATUS);
+}
+
+static unsigned long tegra_slink_get_packed_size(struct tegra_slink_data *tspi,
+				  struct spi_transfer *t)
+{
+	unsigned long val;
+
+	switch (tspi->bytes_per_word) {
+	case 0:
+		val = SLINK_PACK_SIZE_4;
+		break;
+	case 1:
+		val = SLINK_PACK_SIZE_8;
+		break;
+	case 2:
+		val = SLINK_PACK_SIZE_16;
+		break;
+	case 4:
+		val = SLINK_PACK_SIZE_32;
+		break;
+	default:
+		val = 0;
+	}
+	return val;
+}
+
+static unsigned tegra_slink_calculate_curr_xfer_param(
+	struct spi_device *spi, struct tegra_slink_data *tspi,
+	struct spi_transfer *t)
+{
+	unsigned remain_len = t->len - tspi->cur_pos;
+	unsigned max_word;
+	unsigned bits_per_word ;
+	unsigned max_len;
+	unsigned total_fifo_words;
+
+	bits_per_word = t->bits_per_word ? t->bits_per_word :
+						spi->bits_per_word;
+	tspi->bytes_per_word = (bits_per_word - 1) / 8 + 1;
+
+	if (bits_per_word == 8 || bits_per_word == 16) {
+		tspi->is_packed = 1;
+		tspi->words_per_32bit = 32/bits_per_word;
+	} else {
+		tspi->is_packed = 0;
+		tspi->words_per_32bit = 1;
+	}
+	tspi->packed_size = tegra_slink_get_packed_size(tspi, t);
+
+	if (tspi->is_packed) {
+		max_len = min(remain_len, tspi->max_buf_size);
+		tspi->curr_dma_words = max_len/tspi->bytes_per_word;
+		total_fifo_words = max_len/4;
+	} else {
+		max_word = (remain_len - 1) / tspi->bytes_per_word + 1;
+		max_word = min(max_word, tspi->max_buf_size/4);
+		tspi->curr_dma_words = max_word;
+		total_fifo_words = max_word;
+	}
+	return total_fifo_words;
+}
+
+static unsigned tegra_slink_fill_tx_fifo_from_client_txbuf(
+	struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned nbytes;
+	unsigned tx_empty_count;
+	unsigned long fifo_status;
+	unsigned max_n_32bit;
+	unsigned i, count;
+	unsigned long x;
+	unsigned int written_words;
+	unsigned fifo_words_left;
+	u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos;
+
+	fifo_status = tegra_slink_readl(tspi, SLINK_STATUS2);
+	tx_empty_count = SLINK_TX_FIFO_EMPTY_COUNT(fifo_status);
+
+	if (tspi->is_packed) {
+		fifo_words_left = tx_empty_count * tspi->words_per_32bit;
+		written_words = min(fifo_words_left, tspi->curr_dma_words);
+		nbytes = written_words * tspi->bytes_per_word;
+		max_n_32bit = DIV_ROUND_UP(nbytes, 4);
+		for (count = 0; count < max_n_32bit; count++) {
+			x = 0;
+			for (i = 0; (i < 4) && nbytes; i++, nbytes--)
+				x |= (*tx_buf++) << (i*8);
+			tegra_slink_writel(tspi, x, SLINK_TX_FIFO);
+		}
+	} else {
+		max_n_32bit = min(tspi->curr_dma_words,  tx_empty_count);
+		written_words = max_n_32bit;
+		nbytes = written_words * tspi->bytes_per_word;
+		for (count = 0; count < max_n_32bit; count++) {
+			x = 0;
+			for (i = 0; nbytes && (i < tspi->bytes_per_word);
+							i++, nbytes--)
+				x |= ((*tx_buf++) << i*8);
+			tegra_slink_writel(tspi, x, SLINK_TX_FIFO);
+		}
+	}
+	tspi->cur_tx_pos += written_words * tspi->bytes_per_word;
+	return written_words;
+}
+
+static unsigned int tegra_slink_read_rx_fifo_to_client_rxbuf(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned rx_full_count;
+	unsigned long fifo_status;
+	unsigned i, count;
+	unsigned long x;
+	unsigned int read_words = 0;
+	unsigned len;
+	u8 *rx_buf = (u8 *)t->rx_buf + tspi->cur_rx_pos;
+
+	fifo_status = tegra_slink_readl(tspi, SLINK_STATUS2);
+	rx_full_count = SLINK_RX_FIFO_FULL_COUNT(fifo_status);
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		for (count = 0; count < rx_full_count; count++) {
+			x = tegra_slink_readl(tspi, SLINK_RX_FIFO);
+			for (i = 0; len && (i < 4); i++, len--)
+				*rx_buf++ = (x >> i*8) & 0xFF;
+		}
+		tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+		read_words += tspi->curr_dma_words;
+	} else {
+		unsigned int bits_per_word;
+
+		bits_per_word = t->bits_per_word ? t->bits_per_word :
+						tspi->cur_spi->bits_per_word;
+		for (count = 0; count < rx_full_count; count++) {
+			x = tegra_slink_readl(tspi, SLINK_RX_FIFO);
+			for (i = 0; (i < tspi->bytes_per_word); i++)
+				*rx_buf++ = (x >> (i*8)) & 0xFF;
+		}
+		tspi->cur_rx_pos += rx_full_count * tspi->bytes_per_word;
+		read_words += rx_full_count;
+	}
+	return read_words;
+}
+
+static void tegra_slink_copy_client_txbuf_to_spi_txbuf(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned len;
+
+	/* Make the dma buffer to read by cpu */
+	dma_sync_single_for_cpu(tspi->dev, tspi->tx_dma_phys,
+				tspi->dma_buf_size, DMA_TO_DEVICE);
+
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		memcpy(tspi->tx_dma_buf, t->tx_buf + tspi->cur_pos, len);
+	} else {
+		unsigned int i;
+		unsigned int count;
+		u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos;
+		unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word;
+		unsigned int x;
+
+		for (count = 0; count < tspi->curr_dma_words; count++) {
+			x = 0;
+			for (i = 0; consume && (i < tspi->bytes_per_word);
+							i++, consume--)
+				x |= ((*tx_buf++) << i * 8);
+			tspi->tx_dma_buf[count] = x;
+		}
+	}
+	tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+
+	/* Make the dma buffer to read by dma */
+	dma_sync_single_for_device(tspi->dev, tspi->tx_dma_phys,
+				tspi->dma_buf_size, DMA_TO_DEVICE);
+}
+
+static void tegra_slink_copy_spi_rxbuf_to_client_rxbuf(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned len;
+
+	/* Make the dma buffer to read by cpu */
+	dma_sync_single_for_cpu(tspi->dev, tspi->rx_dma_phys,
+		tspi->dma_buf_size, DMA_FROM_DEVICE);
+
+	if (tspi->is_packed) {
+		len = tspi->curr_dma_words * tspi->bytes_per_word;
+		memcpy(t->rx_buf + tspi->cur_rx_pos, tspi->rx_dma_buf, len);
+	} else {
+		unsigned int i;
+		unsigned int count;
+		unsigned char *rx_buf = t->rx_buf + tspi->cur_rx_pos;
+		unsigned int x;
+		unsigned int rx_mask, bits_per_word;
+
+		bits_per_word = t->bits_per_word ? t->bits_per_word :
+						tspi->cur_spi->bits_per_word;
+		rx_mask = (1 << bits_per_word) - 1;
+		for (count = 0; count < tspi->curr_dma_words; count++) {
+			x = tspi->rx_dma_buf[count];
+			x &= rx_mask;
+			for (i = 0; (i < tspi->bytes_per_word); i++)
+				*rx_buf++ = (x >> (i*8)) & 0xFF;
+		}
+	}
+	tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word;
+
+	/* Make the dma buffer to read by dma */
+	dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys,
+		tspi->dma_buf_size, DMA_FROM_DEVICE);
+}
+
+static void tegra_slink_dma_complete(void *args)
+{
+	struct completion *dma_complete = args;
+
+	complete(dma_complete);
+}
+
+static int tegra_slink_start_tx_dma(struct tegra_slink_data *tspi, int len)
+{
+	INIT_COMPLETION(tspi->tx_dma_complete);
+	tspi->tx_dma_desc = dmaengine_prep_slave_single(tspi->tx_dma_chan,
+				tspi->tx_dma_phys, len, DMA_MEM_TO_DEV,
+				DMA_PREP_INTERRUPT |  DMA_CTRL_ACK);
+	if (!tspi->tx_dma_desc) {
+		dev_err(tspi->dev, "Not able to get desc for Tx\n");
+		return -EIO;
+	}
+
+	tspi->tx_dma_desc->callback = tegra_slink_dma_complete;
+	tspi->tx_dma_desc->callback_param = &tspi->tx_dma_complete;
+
+	dmaengine_submit(tspi->tx_dma_desc);
+	dma_async_issue_pending(tspi->tx_dma_chan);
+	return 0;
+}
+
+static int tegra_slink_start_rx_dma(struct tegra_slink_data *tspi, int len)
+{
+	INIT_COMPLETION(tspi->rx_dma_complete);
+	tspi->rx_dma_desc = dmaengine_prep_slave_single(tspi->rx_dma_chan,
+				tspi->rx_dma_phys, len, DMA_DEV_TO_MEM,
+				DMA_PREP_INTERRUPT |  DMA_CTRL_ACK);
+	if (!tspi->rx_dma_desc) {
+		dev_err(tspi->dev, "Not able to get desc for Rx\n");
+		return -EIO;
+	}
+
+	tspi->rx_dma_desc->callback = tegra_slink_dma_complete;
+	tspi->rx_dma_desc->callback_param = &tspi->rx_dma_complete;
+
+	dmaengine_submit(tspi->rx_dma_desc);
+	dma_async_issue_pending(tspi->rx_dma_chan);
+	return 0;
+}
+
+static int tegra_slink_start_dma_based_transfer(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned long val;
+	unsigned long test_val;
+	unsigned int len;
+	int ret = 0;
+	unsigned long status;
+
+	/* Make sure that Rx and Tx fifo are empty */
+	status = tegra_slink_readl(tspi, SLINK_STATUS);
+	if ((status & SLINK_FIFO_EMPTY) != SLINK_FIFO_EMPTY) {
+		dev_err(tspi->dev,
+			"Rx/Tx fifo are not empty status 0x%08lx\n", status);
+		return -EIO;
+	}
+
+	val = SLINK_DMA_BLOCK_SIZE(tspi->curr_dma_words - 1);
+	val |= tspi->packed_size;
+	if (tspi->is_packed)
+		len = DIV_ROUND_UP(tspi->curr_dma_words * tspi->bytes_per_word,
+					4) * 4;
+	else
+		len = tspi->curr_dma_words * 4;
+
+	/* Set attention level based on length of transfer */
+	if (len & 0xF)
+		val |= SLINK_TX_TRIG_1 | SLINK_RX_TRIG_1;
+	else if (((len) >> 4) & 0x1)
+		val |= SLINK_TX_TRIG_4 | SLINK_RX_TRIG_4;
+	else
+		val |= SLINK_TX_TRIG_8 | SLINK_RX_TRIG_8;
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		val |= SLINK_IE_TXC;
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		val |= SLINK_IE_RXC;
+
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	tspi->dma_control_reg = val;
+
+	if (tspi->cur_direction & DATA_DIR_TX) {
+		tegra_slink_copy_client_txbuf_to_spi_txbuf(tspi, t);
+		wmb();
+		ret = tegra_slink_start_tx_dma(tspi, len);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"Starting tx dma failed, err %d\n", ret);
+			return ret;
+		}
+
+		/* Wait for tx fifo to be fill before starting slink */
+		test_val = tegra_slink_readl(tspi, SLINK_STATUS);
+		while (!(test_val & SLINK_TX_FULL))
+			test_val = tegra_slink_readl(tspi, SLINK_STATUS);
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX) {
+		/* Make the dma buffer to read by dma */
+		dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys,
+				tspi->dma_buf_size, DMA_FROM_DEVICE);
+
+		ret = tegra_slink_start_rx_dma(tspi, len);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"Starting rx dma failed, err %d\n", ret);
+			if (tspi->cur_direction & DATA_DIR_TX)
+				dmaengine_terminate_all(tspi->tx_dma_chan);
+			return ret;
+		}
+	}
+	tspi->is_curr_dma_xfer = true;
+	if (tspi->is_packed) {
+		val |= SLINK_PACKED;
+		tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+		/* HW need small delay after settign Packed mode */
+		udelay(1);
+	}
+	tspi->dma_control_reg = val;
+
+	val |= SLINK_DMA_EN;
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	return ret;
+}
+
+static int tegra_slink_start_cpu_based_transfer(
+		struct tegra_slink_data *tspi, struct spi_transfer *t)
+{
+	unsigned long val;
+	unsigned cur_words;
+
+	val = tspi->packed_size;
+	if (tspi->cur_direction & DATA_DIR_TX)
+		val |= SLINK_IE_TXC;
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		val |= SLINK_IE_RXC;
+
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	tspi->dma_control_reg = val;
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		cur_words = tegra_slink_fill_tx_fifo_from_client_txbuf(tspi, t);
+	else
+		cur_words = tspi->curr_dma_words;
+	val |= SLINK_DMA_BLOCK_SIZE(cur_words - 1);
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	tspi->dma_control_reg = val;
+
+	tspi->is_curr_dma_xfer = false;
+	if (tspi->is_packed) {
+		val |= SLINK_PACKED;
+		tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+		udelay(1);
+		wmb();
+	}
+	tspi->dma_control_reg = val;
+	val |= SLINK_DMA_EN;
+	tegra_slink_writel(tspi, val, SLINK_DMA_CTL);
+	return 0;
+}
+
+static int tegra_slink_init_dma_param(struct tegra_slink_data *tspi,
+			bool dma_to_memory)
+{
+	struct dma_chan *dma_chan;
+	u32 *dma_buf;
+	dma_addr_t dma_phys;
+	int ret;
+	struct dma_slave_config dma_sconfig;
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dma_chan = dma_request_channel(mask, NULL, NULL);
+	if (!dma_chan) {
+		dev_err(tspi->dev,
+			"Dma channel is not available, will try later\n");
+		return -EPROBE_DEFER;
+	}
+
+	dma_buf = dma_alloc_coherent(tspi->dev, tspi->dma_buf_size,
+				&dma_phys, GFP_KERNEL);
+	if (!dma_buf) {
+		dev_err(tspi->dev, " Not able to allocate the dma buffer\n");
+		dma_release_channel(dma_chan);
+		return -ENOMEM;
+	}
+
+	dma_sconfig.slave_id = tspi->dma_req_sel;
+	if (dma_to_memory) {
+		dma_sconfig.src_addr = tspi->phys + SLINK_RX_FIFO;
+		dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dma_sconfig.src_maxburst = 0;
+	} else {
+		dma_sconfig.dst_addr = tspi->phys + SLINK_TX_FIFO;
+		dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dma_sconfig.dst_maxburst = 0;
+	}
+
+	ret = dmaengine_slave_config(dma_chan, &dma_sconfig);
+	if (ret)
+		goto scrub;
+	if (dma_to_memory) {
+		tspi->rx_dma_chan = dma_chan;
+		tspi->rx_dma_buf = dma_buf;
+		tspi->rx_dma_phys = dma_phys;
+	} else {
+		tspi->tx_dma_chan = dma_chan;
+		tspi->tx_dma_buf = dma_buf;
+		tspi->tx_dma_phys = dma_phys;
+	}
+	return 0;
+
+scrub:
+	dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys);
+	dma_release_channel(dma_chan);
+	return ret;
+}
+
+static void tegra_slink_deinit_dma_param(struct tegra_slink_data *tspi,
+	bool dma_to_memory)
+{
+	u32 *dma_buf;
+	dma_addr_t dma_phys;
+	struct dma_chan *dma_chan;
+
+	if (dma_to_memory) {
+		dma_buf = tspi->rx_dma_buf;
+		dma_chan = tspi->rx_dma_chan;
+		dma_phys = tspi->rx_dma_phys;
+		tspi->rx_dma_chan = NULL;
+		tspi->rx_dma_buf = NULL;
+	} else {
+		dma_buf = tspi->tx_dma_buf;
+		dma_chan = tspi->tx_dma_chan;
+		dma_phys = tspi->tx_dma_phys;
+		tspi->tx_dma_buf = NULL;
+		tspi->tx_dma_chan = NULL;
+	}
+	if (!dma_chan)
+		return;
+
+	dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys);
+	dma_release_channel(dma_chan);
+}
+
+static int tegra_slink_start_transfer_one(struct spi_device *spi,
+		struct spi_transfer *t, bool is_first_of_msg,
+		bool is_single_xfer)
+{
+	struct tegra_slink_data *tspi = spi_master_get_devdata(spi->master);
+	u32 speed;
+	u8 bits_per_word;
+	unsigned total_fifo_words;
+	int ret;
+	struct tegra_spi_device_controller_data *cdata = spi->controller_data;
+	unsigned long command;
+	unsigned long command2;
+
+	bits_per_word = t->bits_per_word ? t->bits_per_word :
+					spi->bits_per_word;
+	speed = t->speed_hz ? t->speed_hz : spi->max_speed_hz;
+	if (!speed)
+		speed = tspi->spi_max_frequency;
+	if (speed != tspi->cur_speed) {
+		clk_set_rate(tspi->clk, speed * 4);
+		tspi->cur_speed = speed;
+	}
+
+	tspi->cur_spi = spi;
+	tspi->cur_pos = 0;
+	tspi->cur_rx_pos = 0;
+	tspi->cur_tx_pos = 0;
+	tspi->curr_xfer = t;
+	total_fifo_words = tegra_slink_calculate_curr_xfer_param(spi, tspi, t);
+
+	if (is_first_of_msg) {
+		tegra_slink_clear_status(tspi);
+
+		command = tspi->def_command_reg;
+		command |= SLINK_BIT_LENGTH(bits_per_word - 1);
+
+		command2 = tspi->def_command2_reg;
+		command2 |= SLINK_SS_EN_CS(spi->chip_select);
+
+		/* possibly use the hw based chip select */
+		tspi->is_hw_based_cs = false;
+		if (cdata && cdata->is_hw_based_cs && is_single_xfer &&
+			((tspi->curr_dma_words * tspi->bytes_per_word) ==
+						(t->len - tspi->cur_pos))) {
+			int setup_count;
+			int sts2;
+
+			setup_count = cdata->cs_setup_clk_count >> 1;
+			setup_count = max(setup_count, 3);
+			command2 |= SLINK_SS_SETUP(setup_count);
+			if (tspi->chip_data->cs_hold_time) {
+				int hold_count;
+
+				hold_count = cdata->cs_hold_clk_count;
+				hold_count = max(hold_count, 0xF);
+				sts2 = tegra_slink_readl(tspi, SLINK_STATUS2);
+				sts2 &= ~SLINK_SS_HOLD_TIME(0xF);
+				sts2 |= SLINK_SS_HOLD_TIME(hold_count);
+				tegra_slink_writel(tspi, sts2, SLINK_STATUS2);
+			}
+			tspi->is_hw_based_cs = true;
+		}
+
+		if (tspi->is_hw_based_cs)
+			command &= ~SLINK_CS_SW;
+		else
+			command |= SLINK_CS_SW | SLINK_CS_VALUE;
+
+		command &= ~SLINK_MODES;
+		if (spi->mode & SPI_CPHA)
+			command |= SLINK_CK_SDA;
+
+		if (spi->mode & SPI_CPOL)
+			command |= SLINK_IDLE_SCLK_DRIVE_HIGH;
+		else
+			command |= SLINK_IDLE_SCLK_DRIVE_LOW;
+	} else {
+		command = tspi->command_reg;
+		command &= ~SLINK_BIT_LENGTH(~0);
+		command |= SLINK_BIT_LENGTH(bits_per_word - 1);
+
+		command2 = tspi->command2_reg;
+		command2 &= ~(SLINK_RXEN | SLINK_TXEN);
+	}
+
+	tegra_slink_writel(tspi, command, SLINK_COMMAND);
+	tspi->command_reg = command;
+
+	tspi->cur_direction = 0;
+	if (t->rx_buf) {
+		command2 |= SLINK_RXEN;
+		tspi->cur_direction |= DATA_DIR_RX;
+	}
+	if (t->tx_buf) {
+		command2 |= SLINK_TXEN;
+		tspi->cur_direction |= DATA_DIR_TX;
+	}
+	tegra_slink_writel(tspi, command2, SLINK_COMMAND2);
+	tspi->command2_reg = command2;
+
+	if (total_fifo_words > SLINK_FIFO_DEPTH)
+		ret = tegra_slink_start_dma_based_transfer(tspi, t);
+	else
+		ret = tegra_slink_start_cpu_based_transfer(tspi, t);
+	return ret;
+}
+
+static int tegra_slink_setup(struct spi_device *spi)
+{
+	struct tegra_slink_data *tspi = spi_master_get_devdata(spi->master);
+	unsigned long val;
+	unsigned long flags;
+	int ret;
+	unsigned int cs_pol_bit[MAX_CHIP_SELECT] = {
+			SLINK_CS_POLARITY,
+			SLINK_CS_POLARITY1,
+			SLINK_CS_POLARITY2,
+			SLINK_CS_POLARITY3,
+	};
+
+	dev_dbg(&spi->dev, "setup %d bpw, %scpol, %scpha, %dHz\n",
+		spi->bits_per_word,
+		spi->mode & SPI_CPOL ? "" : "~",
+		spi->mode & SPI_CPHA ? "" : "~",
+		spi->max_speed_hz);
+
+	BUG_ON(spi->chip_select >= MAX_CHIP_SELECT);
+
+	ret = pm_runtime_get_sync(tspi->dev);
+	if (ret < 0) {
+		dev_err(tspi->dev, "pm runtime failed, e = %d\n", ret);
+		return ret;
+	}
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	val = tspi->def_command_reg;
+	if (spi->mode & SPI_CS_HIGH)
+		val |= cs_pol_bit[spi->chip_select];
+	else
+		val &= ~cs_pol_bit[spi->chip_select];
+	tspi->def_command_reg = val;
+	tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
+	spin_unlock_irqrestore(&tspi->lock, flags);
+
+	pm_runtime_put(tspi->dev);
+	return 0;
+}
+
+static int tegra_slink_prepare_transfer(struct spi_master *master)
+{
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+
+	return pm_runtime_get_sync(tspi->dev);
+}
+
+static int tegra_slink_unprepare_transfer(struct spi_master *master)
+{
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+
+	pm_runtime_put(tspi->dev);
+	return 0;
+}
+
+static int tegra_slink_transfer_one_message(struct spi_master *master,
+			struct spi_message *msg)
+{
+	bool is_first_msg = true;
+	int single_xfer;
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+	struct spi_transfer *xfer;
+	struct spi_device *spi = msg->spi;
+	int ret;
+
+	msg->status = 0;
+	msg->actual_length = 0;
+	single_xfer = list_is_singular(&msg->transfers);
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		INIT_COMPLETION(tspi->xfer_completion);
+		ret = tegra_slink_start_transfer_one(spi, xfer,
+					is_first_msg, single_xfer);
+		if (ret < 0) {
+			dev_err(tspi->dev,
+				"spi can not start transfer, err %d\n", ret);
+			goto exit;
+		}
+		is_first_msg = false;
+		ret = wait_for_completion_timeout(&tspi->xfer_completion,
+						SLINK_DMA_TIMEOUT);
+		if (WARN_ON(ret == 0)) {
+			dev_err(tspi->dev,
+				"spi trasfer timeout, err %d\n", ret);
+			ret = -EIO;
+			goto exit;
+		}
+
+		if (tspi->tx_status ||  tspi->rx_status) {
+			dev_err(tspi->dev, "Error in Transfer\n");
+			ret = -EIO;
+			goto exit;
+		}
+		msg->actual_length += xfer->len;
+		if (xfer->cs_change && xfer->delay_usecs) {
+			tegra_slink_writel(tspi, tspi->def_command_reg,
+					SLINK_COMMAND);
+			udelay(xfer->delay_usecs);
+		}
+	}
+	ret = 0;
+exit:
+	tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
+	tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2);
+	msg->status = ret;
+	spi_finalize_current_message(master);
+	return ret;
+}
+
+static irqreturn_t handle_cpu_based_xfer(struct tegra_slink_data *tspi)
+{
+	struct spi_transfer *t = tspi->curr_xfer;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	if (tspi->tx_status ||  tspi->rx_status ||
+				(tspi->status_reg & SLINK_BSY)) {
+		dev_err(tspi->dev,
+			"CpuXfer ERROR bit set 0x%x\n", tspi->status_reg);
+		dev_err(tspi->dev,
+			"CpuXfer 0x%08x:0x%08x:0x%08x\n", tspi->command_reg,
+				tspi->command2_reg, tspi->dma_control_reg);
+		tegra_periph_reset_assert(tspi->clk);
+		udelay(2);
+		tegra_periph_reset_deassert(tspi->clk);
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tegra_slink_read_rx_fifo_to_client_rxbuf(tspi, t);
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->cur_pos = tspi->cur_tx_pos;
+	else
+		tspi->cur_pos = tspi->cur_rx_pos;
+
+	if (tspi->cur_pos == t->len) {
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	tegra_slink_calculate_curr_xfer_param(tspi->cur_spi, tspi, t);
+	tegra_slink_start_cpu_based_transfer(tspi, t);
+exit:
+	spin_unlock_irqrestore(&tspi->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t handle_dma_based_xfer(struct tegra_slink_data *tspi)
+{
+	struct spi_transfer *t = tspi->curr_xfer;
+	long wait_status;
+	int err = 0;
+	unsigned total_fifo_words;
+	unsigned long flags;
+
+	/* Abort dmas if any error */
+	if (tspi->cur_direction & DATA_DIR_TX) {
+		if (tspi->tx_status) {
+			dmaengine_terminate_all(tspi->tx_dma_chan);
+			err += 1;
+		} else {
+			wait_status = wait_for_completion_interruptible_timeout(
+				&tspi->tx_dma_complete, SLINK_DMA_TIMEOUT);
+			if (wait_status <= 0) {
+				dmaengine_terminate_all(tspi->tx_dma_chan);
+				dev_err(tspi->dev, "TxDma Xfer failed\n");
+				err += 1;
+			}
+		}
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX) {
+		if (tspi->rx_status) {
+			dmaengine_terminate_all(tspi->rx_dma_chan);
+			err += 2;
+		} else {
+			wait_status = wait_for_completion_interruptible_timeout(
+				&tspi->rx_dma_complete, SLINK_DMA_TIMEOUT);
+			if (wait_status <= 0) {
+				dmaengine_terminate_all(tspi->rx_dma_chan);
+				dev_err(tspi->dev, "RxDma Xfer failed\n");
+				err += 2;
+			}
+		}
+	}
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	if (err) {
+		dev_err(tspi->dev,
+			"DmaXfer: ERROR bit set 0x%x\n", tspi->status_reg);
+		dev_err(tspi->dev,
+			"DmaXfer 0x%08x:0x%08x:0x%08x\n", tspi->command_reg,
+				tspi->command2_reg, tspi->dma_control_reg);
+		tegra_periph_reset_assert(tspi->clk);
+		udelay(2);
+		tegra_periph_reset_deassert(tspi->clk);
+		complete(&tspi->xfer_completion);
+		spin_unlock_irqrestore(&tspi->lock, flags);
+		return IRQ_HANDLED;
+	}
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tegra_slink_copy_spi_rxbuf_to_client_rxbuf(tspi, t);
+
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->cur_pos = tspi->cur_tx_pos;
+	else
+		tspi->cur_pos = tspi->cur_rx_pos;
+
+	if (tspi->cur_pos == t->len) {
+		complete(&tspi->xfer_completion);
+		goto exit;
+	}
+
+	/* Continue transfer in current message */
+	total_fifo_words = tegra_slink_calculate_curr_xfer_param(tspi->cur_spi,
+							tspi, t);
+	if (total_fifo_words > SLINK_FIFO_DEPTH)
+		err = tegra_slink_start_dma_based_transfer(tspi, t);
+	else
+		err = tegra_slink_start_cpu_based_transfer(tspi, t);
+
+exit:
+	spin_unlock_irqrestore(&tspi->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t tegra_slink_isr_thread(int irq, void *context_data)
+{
+	struct tegra_slink_data *tspi = context_data;
+
+	if (!tspi->is_curr_dma_xfer)
+		return handle_cpu_based_xfer(tspi);
+	return handle_dma_based_xfer(tspi);
+}
+
+static irqreturn_t tegra_slink_isr(int irq, void *context_data)
+{
+	struct tegra_slink_data *tspi = context_data;
+
+	tspi->status_reg = tegra_slink_readl(tspi, SLINK_STATUS);
+	if (tspi->cur_direction & DATA_DIR_TX)
+		tspi->tx_status = tspi->status_reg &
+					(SLINK_TX_OVF | SLINK_TX_UNF);
+
+	if (tspi->cur_direction & DATA_DIR_RX)
+		tspi->rx_status = tspi->status_reg &
+					(SLINK_RX_OVF | SLINK_RX_UNF);
+	tegra_slink_clear_status(tspi);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static struct tegra_spi_platform_data *tegra_slink_parse_dt(
+		struct platform_device *pdev)
+{
+	struct tegra_spi_platform_data *pdata;
+	const unsigned int *prop;
+	struct device_node *np = pdev->dev.of_node;
+	u32 of_dma[2];
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		dev_err(&pdev->dev, "Memory alloc for pdata failed\n");
+		return NULL;
+	}
+
+	if (of_property_read_u32_array(np, "nvidia,dma-request-selector",
+				of_dma, 2) >= 0)
+		pdata->dma_req_sel = of_dma[1];
+
+	prop = of_get_property(np, "spi-max-frequency", NULL);
+	if (prop)
+		pdata->spi_max_frequency = be32_to_cpup(prop);
+
+	return pdata;
+}
+
+const struct tegra_slink_chip_data tegra30_spi_cdata = {
+	.cs_hold_time = true,
+};
+
+const struct tegra_slink_chip_data tegra20_spi_cdata = {
+	.cs_hold_time = false,
+};
+
+static struct of_device_id tegra_slink_of_match[] __devinitconst = {
+	{ .compatible = "nvidia,tegra20-slink", .data = &tegra20_spi_cdata, },
+	{ .compatible = "nvidia,tegra30-slink", .data = &tegra30_spi_cdata, },
+	{}
+};
+MODULE_DEVICE_TABLE(of, tegra_slink_of_match);
+
+static int __devinit tegra_slink_probe(struct platform_device *pdev)
+{
+	struct spi_master	*master;
+	struct tegra_slink_data	*tspi;
+	struct resource		*r;
+	struct tegra_spi_platform_data *pdata = pdev->dev.platform_data;
+	int ret, spi_irq;
+	const struct tegra_slink_chip_data *cdata = NULL;
+	const struct of_device_id *match;
+
+	match = of_match_device(of_match_ptr(tegra_slink_of_match), &pdev->dev);
+	if (!match) {
+		dev_err(&pdev->dev, "Error: No device match found\n");
+		return -ENODEV;
+	}
+	cdata = match->data;
+	if (!pdata && pdev->dev.of_node)
+		pdata = tegra_slink_parse_dt(pdev);
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data, exiting\n");
+		return -ENODEV;
+	}
+
+	if (!pdata->spi_max_frequency)
+		pdata->spi_max_frequency = 25000000; /* 25MHz */
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*tspi));
+	if (!master) {
+		dev_err(&pdev->dev, "master allocation failed\n");
+		return -ENOMEM;
+	}
+
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	master->setup = tegra_slink_setup;
+	master->prepare_transfer_hardware = tegra_slink_prepare_transfer;
+	master->transfer_one_message = tegra_slink_transfer_one_message;
+	master->unprepare_transfer_hardware = tegra_slink_unprepare_transfer;
+	master->num_chipselect = MAX_CHIP_SELECT;
+	master->bus_num = -1;
+
+	dev_set_drvdata(&pdev->dev, master);
+	tspi = spi_master_get_devdata(master);
+	tspi->master = master;
+	tspi->dma_req_sel = pdata->dma_req_sel;
+	tspi->dev = &pdev->dev;
+	tspi->chip_data = cdata;
+	spin_lock_init(&tspi->lock);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(&pdev->dev, "No IO memory resource\n");
+		ret = -ENODEV;
+		goto exit_free_master;
+	}
+	tspi->phys = r->start;
+	tspi->base = devm_request_and_ioremap(&pdev->dev, r);
+	if (!tspi->base) {
+		dev_err(&pdev->dev,
+			"Cannot request memregion/iomap dma address\n");
+		ret = -EADDRNOTAVAIL;
+		goto exit_free_master;
+	}
+
+	spi_irq = platform_get_irq(pdev, 0);
+	tspi->irq = spi_irq;
+	ret = request_threaded_irq(tspi->irq, tegra_slink_isr,
+			tegra_slink_isr_thread, IRQF_ONESHOT,
+			dev_name(&pdev->dev), tspi);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register ISR for IRQ %d\n",
+					tspi->irq);
+		goto exit_free_master;
+	}
+
+	tspi->clk = devm_clk_get(&pdev->dev, "slink");
+	if (IS_ERR(tspi->clk)) {
+		dev_err(&pdev->dev, "can not get clock\n");
+		ret = PTR_ERR(tspi->clk);
+		goto exit_free_irq;
+	}
+
+	tspi->max_buf_size = SLINK_FIFO_DEPTH << 2;
+	tspi->dma_buf_size = DEFAULT_SPI_DMA_BUF_LEN;
+	tspi->spi_max_frequency = pdata->spi_max_frequency;
+
+	if (pdata->dma_req_sel) {
+		ret = tegra_slink_init_dma_param(tspi, true);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "RxDma Init failed, err %d\n", ret);
+			goto exit_free_irq;
+		}
+
+		ret = tegra_slink_init_dma_param(tspi, false);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "TxDma Init failed, err %d\n", ret);
+			goto exit_rx_dma_free;
+		}
+		tspi->max_buf_size = tspi->dma_buf_size;
+		init_completion(&tspi->tx_dma_complete);
+		init_completion(&tspi->rx_dma_complete);
+	}
+
+	init_completion(&tspi->xfer_completion);
+
+	pm_runtime_enable(&pdev->dev);
+	if (!pm_runtime_enabled(&pdev->dev)) {
+		ret = tegra_slink_runtime_resume(&pdev->dev);
+		if (ret)
+			goto exit_pm_disable;
+	}
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		goto exit_pm_disable;
+	}
+	tspi->def_command_reg  = SLINK_M_S;
+	tspi->def_command2_reg = SLINK_CS_ACTIVE_BETWEEN;
+	tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
+	tegra_slink_writel(tspi, tspi->def_command2_reg, SLINK_COMMAND2);
+	pm_runtime_put(&pdev->dev);
+
+	master->dev.of_node = pdev->dev.of_node;
+	ret = spi_register_master(master);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "can not register to master err %d\n", ret);
+		goto exit_pm_disable;
+	}
+	return ret;
+
+exit_pm_disable:
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_slink_runtime_suspend(&pdev->dev);
+	tegra_slink_deinit_dma_param(tspi, false);
+exit_rx_dma_free:
+	tegra_slink_deinit_dma_param(tspi, true);
+exit_free_irq:
+	free_irq(spi_irq, tspi);
+exit_free_master:
+	spi_master_put(master);
+	return ret;
+}
+
+static int __devexit tegra_slink_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = dev_get_drvdata(&pdev->dev);
+	struct tegra_slink_data	*tspi = spi_master_get_devdata(master);
+
+	free_irq(tspi->irq, tspi);
+	spi_unregister_master(master);
+
+	if (tspi->tx_dma_chan)
+		tegra_slink_deinit_dma_param(tspi, false);
+
+	if (tspi->rx_dma_chan)
+		tegra_slink_deinit_dma_param(tspi, true);
+
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_slink_runtime_suspend(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra_slink_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+
+	return spi_master_suspend(master);
+}
+
+static int tegra_slink_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm runtime failed, e = %d\n", ret);
+		return ret;
+	}
+	tegra_slink_writel(tspi, tspi->command_reg, SLINK_COMMAND);
+	tegra_slink_writel(tspi, tspi->command2_reg, SLINK_COMMAND2);
+	pm_runtime_put(dev);
+
+	return spi_master_resume(master);
+}
+#endif
+
+static int tegra_slink_runtime_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+
+	/* Flush all write which are in PPSB queue by reading back */
+	tegra_slink_readl(tspi, SLINK_MAS_DATA);
+
+	clk_disable_unprepare(tspi->clk);
+	return 0;
+}
+
+static int tegra_slink_runtime_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = clk_prepare_enable(tspi->clk);
+	if (ret < 0) {
+		dev_err(tspi->dev, "clk_prepare failed: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops slink_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_slink_runtime_suspend,
+		tegra_slink_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_slink_suspend, tegra_slink_resume)
+};
+static struct platform_driver tegra_slink_driver = {
+	.driver = {
+		.name		= "spi-tegra-slink",
+		.owner		= THIS_MODULE,
+		.pm		= &slink_pm_ops,
+		.of_match_table	= of_match_ptr(tegra_slink_of_match),
+	},
+	.probe =	tegra_slink_probe,
+	.remove =	__devexit_p(tegra_slink_remove),
+};
+module_platform_driver(tegra_slink_driver);
+
+MODULE_ALIAS("platform:spi-tegra-slink");
+MODULE_DESCRIPTION("NVIDIA Tegra20/Tegra30 SLINK Controller Driver");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/spi-tegra.h b/include/linux/spi/spi-tegra.h
new file mode 100644
index 000000000000..786932c62edb
--- /dev/null
+++ b/include/linux/spi/spi-tegra.h
@@ -0,0 +1,40 @@
+/*
+ * spi-tegra.h: SPI interface for Nvidia Tegra20 SLINK controller.
+ *
+ * Copyright (C) 2011 NVIDIA Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef _LINUX_SPI_TEGRA_H
+#define _LINUX_SPI_TEGRA_H
+
+struct tegra_spi_platform_data {
+	int dma_req_sel;
+	unsigned int spi_max_frequency;
+};
+
+/*
+ * Controller data from device to pass some info like
+ * hw based chip select can be used or not and if yes
+ * then CS hold and setup time.
+ */
+struct tegra_spi_device_controller_data {
+	bool is_hw_based_cs;
+	int cs_setup_clk_count;
+	int cs_hold_clk_count;
+};
+
+#endif /* _LINUX_SPI_TEGRA_H */
-- 
cgit v1.2.3


From 242860a47a75b933a79a30f6a40bf4858f4a3ecc Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <elezegarcia@gmail.com>
Date: Fri, 19 Oct 2012 09:33:12 -0300
Subject: mm/sl[aou]b: Move common kmem_cache_size() to slab.h

This function is identically defined in all three allocators
and it's trivial to move it to slab.h

Since now it's static, inline, header-defined function
this patch also drops the EXPORT_SYMBOL tag.

Cc: Pekka Enberg <penberg@kernel.org>
Cc: Matt Mackall <mpm@selenic.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Ezequiel Garcia <elezegarcia@gmail.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab.h | 9 ++++++++-
 mm/slab.c            | 6 ------
 mm/slob.c            | 6 ------
 mm/slub.c            | 9 ---------
 4 files changed, 8 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 83d1a1454b7e..743a10415122 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -128,7 +128,6 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
-unsigned int kmem_cache_size(struct kmem_cache *);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
@@ -388,6 +387,14 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+/*
+ * Determine the size of a slab object
+ */
+static inline unsigned int kmem_cache_size(struct kmem_cache *s)
+{
+	return s->object_size;
+}
+
 void __init kmem_cache_init_late(void);
 
 #endif	/* _LINUX_SLAB_H */
diff --git a/mm/slab.c b/mm/slab.c
index 6d5c83c6ddd5..1f7fd5f51f87 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3969,12 +3969,6 @@ void kfree(const void *objp)
 }
 EXPORT_SYMBOL(kfree);
 
-unsigned int kmem_cache_size(struct kmem_cache *cachep)
-{
-	return cachep->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 /*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
diff --git a/mm/slob.c b/mm/slob.c
index 287a88aa4a61..fffbc820774d 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -604,12 +604,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
-unsigned int kmem_cache_size(struct kmem_cache *c)
-{
-	return c->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 int __kmem_cache_shutdown(struct kmem_cache *c)
 {
 	/* No way to check for remaining objects */
diff --git a/mm/slub.c b/mm/slub.c
index 35483e0ab6bc..deee7c754a7d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3121,15 +3121,6 @@ error:
 	return -EINVAL;
 }
 
-/*
- * Determine the size of a slab object
- */
-unsigned int kmem_cache_size(struct kmem_cache *s)
-{
-	return s->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
 							const char *text)
 {
-- 
cgit v1.2.3


From 86b00e0da6be7bbc16412f126c5b548ac5d91d50 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Thu, 25 Oct 2012 23:34:42 -0500
Subject: rbd: get parent spec for version 2 images

Add support for getting the the information identifying the parent
image for rbd images that have them.  The child image holds a
reference to its parent image specification structure.  Create a new
entry "parent" in /sys/bus/rbd/image/N/ to report the identifying
information for the parent image, if any.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 Documentation/ABI/testing/sysfs-bus-rbd |   4 +
 drivers/block/rbd.c                     | 131 ++++++++++++++++++++++++++++++++
 include/linux/ceph/rados.h              |   2 +
 3 files changed, 137 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index 1cf2adf46b11..cd9213ccf3dc 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -70,6 +70,10 @@ snap_*
 
 	A directory per each snapshot
 
+parent
+
+	Information identifying the pool, image, and snapshot id for
+	the parent image in a layered rbd image (format 2 only).
 
 Entries under /sys/bus/rbd/devices/<dev-id>/snap_<snap-name>
 -------------------------------------------------------------
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 28052ff679ca..bce1fcfb5185 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -217,6 +217,9 @@ struct rbd_device {
 	struct ceph_osd_event   *watch_event;
 	struct ceph_osd_request *watch_request;
 
+	struct rbd_spec		*parent_spec;
+	u64			parent_overlap;
+
 	/* protects updating the header */
 	struct rw_semaphore     header_rwsem;
 
@@ -2009,6 +2012,49 @@ static ssize_t rbd_snap_show(struct device *dev,
 	return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
 }
 
+/*
+ * For an rbd v2 image, shows the pool id, image id, and snapshot id
+ * for the parent image.  If there is no parent, simply shows
+ * "(no parent image)".
+ */
+static ssize_t rbd_parent_show(struct device *dev,
+			     struct device_attribute *attr,
+			     char *buf)
+{
+	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
+	struct rbd_spec *spec = rbd_dev->parent_spec;
+	int count;
+	char *bufp = buf;
+
+	if (!spec)
+		return sprintf(buf, "(no parent image)\n");
+
+	count = sprintf(bufp, "pool_id %llu\npool_name %s\n",
+			(unsigned long long) spec->pool_id, spec->pool_name);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id,
+			spec->image_name ? spec->image_name : "(unknown)");
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n",
+			(unsigned long long) spec->snap_id, spec->snap_name);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap);
+	if (count < 0)
+		return count;
+	bufp += count;
+
+	return (ssize_t) (bufp - buf);
+}
+
 static ssize_t rbd_image_refresh(struct device *dev,
 				 struct device_attribute *attr,
 				 const char *buf,
@@ -2032,6 +2078,7 @@ static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
 static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
 static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
 static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
+static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
 
 static struct attribute *rbd_attrs[] = {
 	&dev_attr_size.attr,
@@ -2043,6 +2090,7 @@ static struct attribute *rbd_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_image_id.attr,
 	&dev_attr_current_snap.attr,
+	&dev_attr_parent.attr,
 	&dev_attr_refresh.attr,
 	NULL
 };
@@ -2192,6 +2240,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 
 static void rbd_dev_destroy(struct rbd_device *rbd_dev)
 {
+	rbd_spec_put(rbd_dev->parent_spec);
 	kfree(rbd_dev->header_name);
 	rbd_put_client(rbd_dev->rbd_client);
 	rbd_spec_put(rbd_dev->spec);
@@ -2400,6 +2449,71 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
 						&rbd_dev->header.features);
 }
 
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+{
+	struct rbd_spec *parent_spec;
+	size_t size;
+	void *reply_buf = NULL;
+	__le64 snapid;
+	void *p;
+	void *end;
+	char *image_id;
+	u64 overlap;
+	size_t len = 0;
+	int ret;
+
+	parent_spec = rbd_spec_alloc();
+	if (!parent_spec)
+		return -ENOMEM;
+
+	size = sizeof (__le64) +				/* pool_id */
+		sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX +	/* image_id */
+		sizeof (__le64) +				/* snap_id */
+		sizeof (__le64);				/* overlap */
+	reply_buf = kmalloc(size, GFP_KERNEL);
+	if (!reply_buf) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	snapid = cpu_to_le64(CEPH_NOSNAP);
+	ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+				"rbd", "get_parent",
+				(char *) &snapid, sizeof (snapid),
+				(char *) reply_buf, size,
+				CEPH_OSD_FLAG_READ, NULL);
+	dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+	if (ret < 0)
+		goto out_err;
+
+	ret = -ERANGE;
+	p = reply_buf;
+	end = (char *) reply_buf + size;
+	ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err);
+	if (parent_spec->pool_id == CEPH_NOPOOL)
+		goto out;	/* No parent?  No problem. */
+
+	image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
+	if (IS_ERR(image_id)) {
+		ret = PTR_ERR(image_id);
+		goto out_err;
+	}
+	parent_spec->image_id = image_id;
+	ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err);
+	ceph_decode_64_safe(&p, end, overlap, out_err);
+
+	rbd_dev->parent_overlap = overlap;
+	rbd_dev->parent_spec = parent_spec;
+	parent_spec = NULL;	/* rbd_dev now owns this */
+out:
+	ret = 0;
+out_err:
+	kfree(reply_buf);
+	rbd_spec_put(parent_spec);
+
+	return ret;
+}
+
 static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
 {
 	size_t size;
@@ -3154,6 +3268,12 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
 	ret = rbd_read_header(rbd_dev, &rbd_dev->header);
 	if (ret < 0)
 		goto out_err;
+
+	/* Version 1 images have no parent (no layering) */
+
+	rbd_dev->parent_spec = NULL;
+	rbd_dev->parent_overlap = 0;
+
 	rbd_dev->image_format = 1;
 
 	dout("discovered version 1 image, header name is %s\n",
@@ -3205,6 +3325,14 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
 	if (ret < 0)
 		goto out_err;
 
+	/* If the image supports layering, get the parent info */
+
+	if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
+		ret = rbd_dev_v2_parent_info(rbd_dev);
+		if (ret < 0)
+			goto out_err;
+	}
+
 	/* crypto and compression type aren't (yet) supported for v2 images */
 
 	rbd_dev->header.crypt_type = 0;
@@ -3224,6 +3352,9 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
 
 	return 0;
 out_err:
+	rbd_dev->parent_overlap = 0;
+	rbd_spec_put(rbd_dev->parent_spec);
+	rbd_dev->parent_spec = NULL;
 	kfree(rbd_dev->header_name);
 	rbd_dev->header_name = NULL;
 	kfree(rbd_dev->header.object_prefix);
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 0a99099801a4..15077db662ed 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -87,6 +87,8 @@ struct ceph_pg {
  *
  *  lpgp_num -- as above.
  */
+#define CEPH_NOPOOL  ((__u64) (-1))  /* pool id not defined */
+
 #define CEPH_PG_TYPE_REP     1
 #define CEPH_PG_TYPE_RAID4   2
 #define CEPH_PG_POOL_VERSION 2
-- 
cgit v1.2.3


From 72afc71ffca0f444ee0e1ef8c7e34ab209bb48b3 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 30 Oct 2012 19:40:33 -0500
Subject: libceph: define ceph_pg_pool_name_by_id()

Define and export function ceph_pg_pool_name_by_id() to supply
the name of a pg pool whose id is given.  This will be used by
the next patch.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osdmap.h |  1 +
 net/ceph/osdmap.c           | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index e88a620b9f8a..5ea57ba69320 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
 				struct ceph_pg pgid);
 
+extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
 extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
 
 #endif
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f552aa48fd9e..de73214b5d26 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -469,6 +469,22 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
 	return NULL;
 }
 
+const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
+{
+	struct ceph_pg_pool_info *pi;
+
+	if (id == CEPH_NOPOOL)
+		return NULL;
+
+	if (WARN_ON_ONCE(id > (u64) INT_MAX))
+		return NULL;
+
+	pi = __lookup_pg_pool(&map->pg_pools, (int) id);
+
+	return pi ? pi->name : NULL;
+}
+EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
+
 int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
 {
 	struct rb_node *rbp;
-- 
cgit v1.2.3


From 3b98c0c2093d1f92e5b7394ae0b13d142e7ef880 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 7 Mar 2011 12:49:34 +0100
Subject: drbd: switch configuration interface from connector to genetlink

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c |    3 +-
 drivers/block/drbd/drbd_int.h    |   36 +-
 drivers/block/drbd/drbd_main.c   |   27 +-
 drivers/block/drbd/drbd_nl.c     | 1536 +++++++++++++++++++-------------------
 drivers/block/drbd/drbd_state.c  |    7 +-
 include/linux/drbd.h             |   35 +-
 include/linux/genl_magic_func.h  |    2 +-
 7 files changed, 806 insertions(+), 840 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 7cd78617669b..c1a90616776b 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -702,6 +702,7 @@ static int w_update_odbm(struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
 	struct drbd_conf *mdev = w->mdev;
+	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
 
 	if (!get_ldev(mdev)) {
 		if (__ratelimit(&drbd_ratelimit_state))
@@ -725,7 +726,7 @@ static int w_update_odbm(struct drbd_work *w, int unused)
 			break;
 		}
 	}
-	drbd_bcast_sync_progress(mdev);
+	drbd_bcast_event(mdev, &sib);
 
 	return 1;
 }
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e68758344647..429fd8da6b71 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -44,6 +44,7 @@
 #include <net/tcp.h>
 #include <linux/lru_cache.h>
 #include <linux/prefetch.h>
+#include <linux/drbd_genl_api.h>
 #include <linux/drbd.h>
 #include "drbd_state.h"
 
@@ -65,7 +66,6 @@
 extern unsigned int minor_count;
 extern int disable_sendpage;
 extern int allow_oos;
-extern unsigned int cn_idx;
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
 extern int enable_faults;
@@ -865,14 +865,6 @@ struct drbd_md {
 	 */
 };
 
-/* for sync_conf and other types... */
-#define NL_PACKET(name, number, fields) struct name { fields };
-#define NL_INTEGER(pn,pr,member) int member;
-#define NL_INT64(pn,pr,member) __u64 member;
-#define NL_BIT(pn,pr,member)   unsigned member:1;
-#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
-#include "linux/drbd_nl.h"
-
 struct drbd_backing_dev {
 	struct block_device *backing_bdev;
 	struct block_device *md_bdev;
@@ -1502,7 +1494,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
 extern void drbd_free_mdev(struct drbd_conf *mdev);
 extern void drbd_delete_device(unsigned int minor);
 
-struct drbd_tconn *drbd_new_tconn(char *name);
+struct drbd_tconn *drbd_new_tconn(const char *name);
 extern void drbd_free_tconn(struct drbd_tconn *tconn);
 struct drbd_tconn *conn_by_name(const char *name);
 
@@ -1679,16 +1671,22 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
 extern void drbd_al_shrink(struct drbd_conf *mdev);
 
-
 /* drbd_nl.c */
-
-void drbd_nl_cleanup(void);
-int __init drbd_nl_init(void);
-void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state);
-void drbd_bcast_sync_progress(struct drbd_conf *mdev);
-void drbd_bcast_ee(struct drbd_conf *, const char *, const int, const char *,
-		   const char *, const struct drbd_peer_request *);
-
+/* state info broadcast */
+struct sib_info {
+	enum drbd_state_info_bcast_reason sib_reason;
+	union {
+		struct {
+			char *helper_name;
+			unsigned helper_exit_code;
+		};
+		struct {
+			union drbd_state os;
+			union drbd_state ns;
+		};
+	};
+};
+void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib);
 
 /*
  * inline helper functions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9f6db5947c65..9697ab872098 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -86,7 +86,6 @@ MODULE_PARM_DESC(allow_oos, "DONT USE!");
 module_param(minor_count, uint, 0444);
 module_param(disable_sendpage, bool, 0644);
 module_param(allow_oos, bool, 0);
-module_param(cn_idx, uint, 0444);
 module_param(proc_details, int, 0644);
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
@@ -108,7 +107,6 @@ module_param(fault_devs, int, 0644);
 unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 int disable_sendpage;
 int allow_oos;
-unsigned int cn_idx = CN_IDX_DRBD;
 int proc_details;       /* Detail level in proc drbd*/
 
 /* Module parameter for setting the user mode helper program
@@ -2175,7 +2173,7 @@ static void drbd_cleanup(void)
 	if (drbd_proc)
 		remove_proc_entry("drbd", NULL);
 
-	drbd_nl_cleanup();
+	drbd_genl_unregister();
 
 	idr_for_each_entry(&minors, mdev, i)
 		drbd_delete_device(i);
@@ -2237,6 +2235,9 @@ struct drbd_tconn *conn_by_name(const char *name)
 {
 	struct drbd_tconn *tconn;
 
+	if (!name || !name[0])
+		return NULL;
+
 	write_lock_irq(&global_state_lock);
 	list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
 		if (!strcmp(tconn->name, name))
@@ -2248,7 +2249,7 @@ found:
 	return tconn;
 }
 
-struct drbd_tconn *drbd_new_tconn(char *name)
+struct drbd_tconn *drbd_new_tconn(const char *name)
 {
 	struct drbd_tconn *tconn;
 
@@ -2333,6 +2334,7 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
 
 	mdev->tconn = tconn;
 	mdev->minor = minor;
+	mdev->vnr = vnr;
 
 	drbd_init_set_defaults(mdev);
 
@@ -2461,10 +2463,6 @@ int __init drbd_init(void)
 #endif
 	}
 
-	err = drbd_nl_init();
-	if (err)
-		return err;
-
 	err = register_blkdev(DRBD_MAJOR, "drbd");
 	if (err) {
 		printk(KERN_ERR
@@ -2473,6 +2471,13 @@ int __init drbd_init(void)
 		return err;
 	}
 
+	err = drbd_genl_register();
+	if (err) {
+		printk(KERN_ERR "drbd: unable to register generic netlink family\n");
+		goto fail;
+	}
+
+
 	register_reboot_notifier(&drbd_notifier);
 
 	/*
@@ -2487,12 +2492,12 @@ int __init drbd_init(void)
 
 	err = drbd_create_mempools();
 	if (err)
-		goto Enomem;
+		goto fail;
 
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
 		printk(KERN_ERR "drbd: unable to register proc file\n");
-		goto Enomem;
+		goto fail;
 	}
 
 	rwlock_init(&global_state_lock);
@@ -2507,7 +2512,7 @@ int __init drbd_init(void)
 
 	return 0; /* Success! */
 
-Enomem:
+fail:
 	drbd_cleanup();
 	if (err == -ENOMEM)
 		/* currently always the case */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f2739fd188a0..f9be14248e33 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -29,110 +29,225 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/slab.h>
-#include <linux/connector.h>
 #include <linux/blkpg.h>
 #include <linux/cpumask.h>
 #include "drbd_int.h"
 #include "drbd_req.h"
 #include "drbd_wrappers.h"
 #include <asm/unaligned.h>
-#include <linux/drbd_tag_magic.h>
 #include <linux/drbd_limits.h>
-#include <linux/compiler.h>
 #include <linux/kthread.h>
 
-static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
-static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
-static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
-
-/* see get_sb_bdev and bd_claim */
+#include <net/genetlink.h>
+
+/* .doit */
+// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
+// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
+/* .dumpit */
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
+
+#include <linux/drbd_genl_api.h>
+#include <linux/genl_magic_func.h>
+
+/* used blkdev_get_by_path, to claim our meta data device(s) */
 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
 
-/* Generate the tag_list to struct functions */
-#define NL_PACKET(name, number, fields) \
-static int name ## _from_tags( \
-	unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
-static int name ## _from_tags( \
-	unsigned short *tags, struct name *arg) \
-{ \
-	int tag; \
-	int dlen; \
-	\
-	while ((tag = get_unaligned(tags++)) != TT_END) {	\
-		dlen = get_unaligned(tags++);			\
-		switch (tag_number(tag)) { \
-		fields \
-		default: \
-			if (tag & T_MANDATORY) { \
-				printk(KERN_ERR "drbd: Unknown tag: %d\n", tag_number(tag)); \
-				return 0; \
-			} \
-		} \
-		tags = (unsigned short *)((char *)tags + dlen); \
-	} \
-	return 1; \
+/* Configuration is strictly serialized, because generic netlink message
+ * processing is strictly serialized by the genl_lock().
+ * Which means we can use one static global drbd_config_context struct.
+ */
+static struct drbd_config_context {
+	/* assigned from drbd_genlmsghdr */
+	unsigned int minor;
+	/* assigned from request attributes, if present */
+	unsigned int volume;
+#define VOLUME_UNSPECIFIED		(-1U)
+	/* pointer into the request skb,
+	 * limited lifetime! */
+	char *conn_name;
+
+	/* reply buffer */
+	struct sk_buff *reply_skb;
+	/* pointer into reply buffer */
+	struct drbd_genlmsghdr *reply_dh;
+	/* resolved from attributes, if possible */
+	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn;
+} adm_ctx;
+
+static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
+{
+	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
+	if (genlmsg_reply(skb, info))
+		printk(KERN_ERR "drbd: error sending genl reply\n");
 }
-#define NL_INTEGER(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
-		arg->member = get_unaligned((int *)(tags));	\
-		break;
-#define NL_INT64(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
-		arg->member = get_unaligned((u64 *)(tags));	\
-		break;
-#define NL_BIT(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
-		arg->member = *(char *)(tags) ? 1 : 0; \
-		break;
-#define NL_STRING(pn, pr, member, len) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
-		if (dlen > len) { \
-			printk(KERN_ERR "drbd: arg too long: %s (%u wanted, max len: %u bytes)\n", \
-				#member, dlen, (unsigned int)len); \
-			return 0; \
-		} \
-		 arg->member ## _len = dlen; \
-		 memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
-		 break;
-#include "linux/drbd_nl.h"
-
-/* Generate the struct to tag_list functions */
-#define NL_PACKET(name, number, fields) \
-static unsigned short* \
-name ## _to_tags( \
-	struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
-static unsigned short* \
-name ## _to_tags( \
-	struct name *arg, unsigned short *tags) \
-{ \
-	fields \
-	return tags; \
+
+/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
+ * reason it could fail was no space in skb, and there are 4k available. */
+static int drbd_msg_put_info(const char *info)
+{
+	struct sk_buff *skb = adm_ctx.reply_skb;
+	struct nlattr *nla;
+	int err = -EMSGSIZE;
+
+	if (!info || !info[0])
+		return 0;
+
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
+	if (!nla)
+		return err;
+
+	err = nla_put_string(skb, T_info_text, info);
+	if (err) {
+		nla_nest_cancel(skb, nla);
+		return err;
+	} else
+		nla_nest_end(skb, nla);
+	return 0;
 }
 
-#define NL_INTEGER(pn, pr, member) \
-	put_unaligned(pn | pr | TT_INTEGER, tags++);	\
-	put_unaligned(sizeof(int), tags++);		\
-	put_unaligned(arg->member, (int *)tags);	\
-	tags = (unsigned short *)((char *)tags+sizeof(int));
-#define NL_INT64(pn, pr, member) \
-	put_unaligned(pn | pr | TT_INT64, tags++);	\
-	put_unaligned(sizeof(u64), tags++);		\
-	put_unaligned(arg->member, (u64 *)tags);	\
-	tags = (unsigned short *)((char *)tags+sizeof(u64));
-#define NL_BIT(pn, pr, member) \
-	put_unaligned(pn | pr | TT_BIT, tags++);	\
-	put_unaligned(sizeof(char), tags++);		\
-	*(char *)tags = arg->member; \
-	tags = (unsigned short *)((char *)tags+sizeof(char));
-#define NL_STRING(pn, pr, member, len) \
-	put_unaligned(pn | pr | TT_STRING, tags++);	\
-	put_unaligned(arg->member ## _len, tags++);	\
-	memcpy(tags, arg->member, arg->member ## _len); \
-	tags = (unsigned short *)((char *)tags + arg->member ## _len);
-#include "linux/drbd_nl.h"
-
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
-void drbd_nl_send_reply(struct cn_msg *, int);
+/* This would be a good candidate for a "pre_doit" hook,
+ * and per-family private info->pointers.
+ * But we need to stay compatible with older kernels.
+ * If it returns successfully, adm_ctx members are valid.
+ */
+#define DRBD_ADM_NEED_MINOR	1
+#define DRBD_ADM_NEED_CONN	2
+static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
+		unsigned flags)
+{
+	struct drbd_genlmsghdr *d_in = info->userhdr;
+	const u8 cmd = info->genlhdr->cmd;
+	int err;
+
+	memset(&adm_ctx, 0, sizeof(adm_ctx));
+
+	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
+	if (cmd != DRBD_ADM_GET_STATUS
+	&& security_netlink_recv(skb, CAP_SYS_ADMIN))
+	       return -EPERM;
+
+	adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!adm_ctx.reply_skb)
+		goto fail;
+
+	adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
+					info, &drbd_genl_family, 0, cmd);
+	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
+	 * but anyways */
+	if (!adm_ctx.reply_dh)
+		goto fail;
+
+	adm_ctx.reply_dh->minor = d_in->minor;
+	adm_ctx.reply_dh->ret_code = NO_ERROR;
+
+	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
+		struct nlattr *nla;
+		/* parse and validate only */
+		err = drbd_cfg_context_from_attrs(NULL, info->attrs);
+		if (err)
+			goto fail;
+
+		/* It was present, and valid,
+		 * copy it over to the reply skb. */
+		err = nla_put_nohdr(adm_ctx.reply_skb,
+				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
+				info->attrs[DRBD_NLA_CFG_CONTEXT]);
+		if (err)
+			goto fail;
+
+		/* and assign stuff to the global adm_ctx */
+		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
+		adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
+		nla = nested_attr_tb[__nla_type(T_ctx_conn_name)];
+		if (nla)
+			adm_ctx.conn_name = nla_data(nla);
+	} else
+		adm_ctx.volume = VOLUME_UNSPECIFIED;
+
+	adm_ctx.minor = d_in->minor;
+	adm_ctx.mdev = minor_to_mdev(d_in->minor);
+	adm_ctx.tconn = conn_by_name(adm_ctx.conn_name);
+
+	if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
+		drbd_msg_put_info("unknown minor");
+		return ERR_MINOR_INVALID;
+	}
+	if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) {
+		drbd_msg_put_info("unknown connection");
+		return ERR_INVALID_REQUEST;
+	}
+
+	/* some more paranoia, if the request was over-determined */
+	if (adm_ctx.mdev &&
+	    adm_ctx.volume != VOLUME_UNSPECIFIED &&
+	    adm_ctx.volume != adm_ctx.mdev->vnr) {
+		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
+				adm_ctx.minor, adm_ctx.volume,
+				adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
+		drbd_msg_put_info("over-determined configuration context mismatch");
+		return ERR_INVALID_REQUEST;
+	}
+	if (adm_ctx.mdev && adm_ctx.tconn &&
+	    adm_ctx.mdev->tconn != adm_ctx.tconn) {
+		pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n",
+				adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name);
+		drbd_msg_put_info("over-determined configuration context mismatch");
+		return ERR_INVALID_REQUEST;
+	}
+	return NO_ERROR;
+
+fail:
+	nlmsg_free(adm_ctx.reply_skb);
+	adm_ctx.reply_skb = NULL;
+	return -ENOMEM;
+}
+
+static int drbd_adm_finish(struct genl_info *info, int retcode)
+{
+	struct nlattr *nla;
+	const char *conn_name = NULL;
+
+	if (!adm_ctx.reply_skb)
+		return -ENOMEM;
+
+	adm_ctx.reply_dh->ret_code = retcode;
+
+	nla = info->attrs[DRBD_NLA_CFG_CONTEXT];
+	if (nla) {
+		nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
+		if (nla)
+			conn_name = nla_data(nla);
+	}
+
+	drbd_adm_send_reply(adm_ctx.reply_skb, info);
+	return 0;
+}
 
 int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 {
@@ -142,9 +257,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 			NULL, /* Will be set to address family */
 			NULL, /* Will be set to address */
 			NULL };
-
 	char mb[12], af[20], ad[60], *afs;
 	char *argv[] = {usermode_helper, cmd, mb, NULL };
+	struct sib_info sib;
 	int ret;
 
 	snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
@@ -177,8 +292,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 	drbd_md_sync(mdev);
 
 	dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
-
-	drbd_bcast_ev_helper(mdev, cmd);
+	sib.sib_reason = SIB_HELPER_PRE;
+	sib.helper_name = cmd;
+	drbd_bcast_event(mdev, &sib);
 	ret = call_usermodehelper(usermode_helper, argv, envp, 1);
 	if (ret)
 		dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
@@ -188,6 +304,9 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 		dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
 				usermode_helper, cmd, mb,
 				(ret >> 8) & 0xff, ret);
+	sib.sib_reason = SIB_HELPER_POST;
+	sib.helper_exit_code = ret;
+	drbd_bcast_event(mdev, &sib);
 
 	if (ret < 0) /* Ignore any ERRNOs we got. */
 		ret = 0;
@@ -362,7 +481,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 		}
 
 		if (rv == SS_NOTHING_TO_DO)
-			goto fail;
+			goto out;
 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
 			nps = drbd_try_outdate_peer(mdev);
 
@@ -388,13 +507,13 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 			rv = _drbd_request_state(mdev, mask, val,
 						CS_VERBOSE + CS_WAIT_COMPLETE);
 			if (rv < SS_SUCCESS)
-				goto fail;
+				goto out;
 		}
 		break;
 	}
 
 	if (rv < SS_SUCCESS)
-		goto fail;
+		goto out;
 
 	if (forced)
 		dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
@@ -438,33 +557,46 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 	drbd_md_sync(mdev);
 
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
- fail:
+out:
 	mutex_unlock(mdev->state_mutex);
 	return rv;
 }
 
-static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+static const char *from_attrs_err_to_txt(int err)
 {
-	struct primary primary_args;
-
-	memset(&primary_args, 0, sizeof(struct primary));
-	if (!primary_from_tags(nlp->tag_list, &primary_args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
-
-	reply->ret_code =
-		drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
-
-	return 0;
+	return	err == -ENOMSG ? "required attribute missing" :
+		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
+		"invalid attribute value";
 }
 
-static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
+	struct set_role_parms parms;
+	int err;
+	enum drbd_ret_code retcode;
 
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	memset(&parms, 0, sizeof(parms));
+	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
+		err = set_role_parms_from_attrs(&parms, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+	}
+
+	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
+		retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
+	else
+		retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -541,6 +673,12 @@ char *ppsize(char *buf, unsigned long long size)
  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
  *  peer may not initiate a resize.
  */
+/* Note these are not to be confused with
+ * drbd_adm_suspend_io/drbd_adm_resume_io,
+ * which are (sub) state changes triggered by admin (drbdsetup),
+ * and can be long lived.
+ * This changes an mdev->flag, is triggered by drbd internals,
+ * and should be short-lived. */
 void drbd_suspend_io(struct drbd_conf *mdev)
 {
 	set_bit(SUSPEND_IO, &mdev->flags);
@@ -881,11 +1019,10 @@ static void drbd_suspend_al(struct drbd_conf *mdev)
 		dev_info(DEV, "Suspended AL updates\n");
 }
 
-/* does always return 0;
- * interesting return code is in reply->ret_code */
-static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 {
+	struct drbd_conf *mdev;
+	int err;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
 	sector_t max_possible_sectors;
@@ -897,6 +1034,13 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	enum drbd_state_rv rv;
 	int cp_discovered = 0;
 
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	mdev = adm_ctx.mdev;
 	conn_reconfig_start(mdev->tconn);
 
 	/* if you want to reconfigure, please tear down first */
@@ -910,7 +1054,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	 * to realize a "hot spare" feature (not that I'd recommend that) */
 	wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
 
-	/* allocation not in the IO path, cqueue thread context */
+	/* allocation not in the IO path, drbdsetup context */
 	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
 	if (!nbc) {
 		retcode = ERR_NOMEM;
@@ -922,12 +1066,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	nbc->dc.fencing       = DRBD_FENCING_DEF;
 	nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
 
-	if (!disk_conf_from_tags(nlp->tag_list, &nbc->dc)) {
+	err = disk_conf_from_attrs(&nbc->dc, info->attrs);
+	if (err) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
-	if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+	if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
 		retcode = ERR_MD_IDX_INVALID;
 		goto fail;
 	}
@@ -961,7 +1107,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	 */
 	bdev = blkdev_get_by_path(nbc->dc.meta_dev,
 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-				  (nbc->dc.meta_dev_idx < 0) ?
+				  ((int)nbc->dc.meta_dev_idx < 0) ?
 				  (void *)mdev : (void *)drbd_m_holder);
 	if (IS_ERR(bdev)) {
 		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
@@ -997,7 +1143,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		goto fail;
 	}
 
-	if (nbc->dc.meta_dev_idx < 0) {
+	if ((int)nbc->dc.meta_dev_idx < 0) {
 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
 		/* at least one MB, otherwise it does not make sense */
 		min_md_device_sectors = (2<<10);
@@ -1028,7 +1174,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		dev_warn(DEV, "==> truncating very big lower level device "
 			"to currently maximum possible %llu sectors <==\n",
 			(unsigned long long) max_possible_sectors);
-		if (nbc->dc.meta_dev_idx >= 0)
+		if ((int)nbc->dc.meta_dev_idx >= 0)
 			dev_warn(DEV, "==>> using internal or flexible "
 				      "meta data may help <<==\n");
 	}
@@ -1242,8 +1388,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 	put_ldev(mdev);
-	reply->ret_code = retcode;
 	conn_reconfig_done(mdev->tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 
  force_diskless_dec:
@@ -1251,6 +1397,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
  force_diskless:
 	drbd_force_state(mdev, NS(disk, D_FAILED));
 	drbd_md_sync(mdev);
+	conn_reconfig_done(mdev->tconn);
  fail:
 	if (nbc) {
 		if (nbc->backing_bdev)
@@ -1263,8 +1410,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	}
 	lc_destroy(resync_lru);
 
-	reply->ret_code = retcode;
-	conn_reconfig_done(mdev->tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -1273,42 +1419,54 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
  * internal references as well.
  * Only then we have finally detached. */
-static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			  struct drbd_nl_cfg_reply *reply)
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
+	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
-	int ret;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
-	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
-	/* D_FAILED will transition to DISKLESS. */
-	ret = wait_event_interruptible(mdev->misc_wait,
-			mdev->state.disk != D_FAILED);
+	retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
+	wait_event(mdev->misc_wait,
+			mdev->state.disk != D_DISKLESS ||
+			!atomic_read(&mdev->local_cnt));
 	drbd_resume_io(mdev);
-	if ((int)retcode == (int)SS_IS_DISKLESS)
-		retcode = SS_NOTHING_TO_DO;
-	if (ret)
-		retcode = ERR_INTR;
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp,
-			    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 {
-	int i;
-	enum drbd_ret_code retcode;
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+	struct drbd_conf *mdev;
 	struct net_conf *new_conf = NULL;
 	struct crypto_hash *tfm = NULL;
 	struct crypto_hash *integrity_w_tfm = NULL;
 	struct crypto_hash *integrity_r_tfm = NULL;
-	struct drbd_conf *mdev;
-	char hmac_name[CRYPTO_MAX_ALG_NAME];
 	void *int_dig_out = NULL;
 	void *int_dig_in = NULL;
 	void *int_dig_vv = NULL;
 	struct drbd_tconn *oconn;
+	struct drbd_tconn *tconn;
 	struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
+	enum drbd_ret_code retcode;
+	int i;
+	int err;
 
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	tconn = adm_ctx.tconn;
 	conn_reconfig_start(tconn);
 
 	if (tconn->cstate > C_STANDALONE) {
@@ -1343,8 +1501,10 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl
 	new_conf->on_congestion    = DRBD_ON_CONGESTION_DEF;
 	new_conf->cong_extents     = DRBD_CONG_EXTENTS_DEF;
 
-	if (!net_conf_from_tags(nlp->tag_list, new_conf)) {
+	err = net_conf_from_attrs(new_conf, info->attrs);
+	if (err) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
@@ -1495,8 +1655,8 @@ static int drbd_nl_net_conf(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nl
 		mdev->recv_cnt = 0;
 		kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 	}
-	reply->ret_code = retcode;
 	conn_reconfig_done(tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 
 fail:
@@ -1508,24 +1668,37 @@ fail:
 	crypto_free_hash(integrity_r_tfm);
 	kfree(new_conf);
 
-	reply->ret_code = retcode;
 	conn_reconfig_done(tconn);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode;
-	struct disconnect dc;
+	struct disconnect_parms parms;
+	struct drbd_tconn *tconn;
+	enum drbd_ret_code retcode;
+	int err;
 
-	memset(&dc, 0, sizeof(struct disconnect));
-	if (!disconnect_from_tags(nlp->tag_list, &dc)) {
-		retcode = ERR_MANDATORY_TAG;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
 		goto fail;
+
+	tconn = adm_ctx.tconn;
+	memset(&parms, 0, sizeof(parms));
+	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
+		err = disconnect_parms_from_attrs(&parms, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto fail;
+		}
 	}
 
-	if (dc.force) {
+	if (parms.force_disconnect) {
 		spin_lock_irq(&tconn->req_lock);
 		if (tconn->cstate >= C_WF_CONNECTION)
 			_conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
@@ -1567,7 +1740,7 @@ static int drbd_nl_disconnect(struct drbd_tconn *tconn, struct drbd_nl_cfg_req *
  done:
 	retcode = NO_ERROR;
  fail:
-	reply->ret_code = retcode;
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -1587,20 +1760,32 @@ void resync_after_online_grow(struct drbd_conf *mdev)
 		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
 }
 
-static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			  struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 {
-	struct resize rs;
-	int retcode = NO_ERROR;
+	struct resize_parms rs;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
 	enum dds_flags ddsf;
+	int err;
 
-	memset(&rs, 0, sizeof(struct resize));
-	if (!resize_from_tags(nlp->tag_list, &rs)) {
-		retcode = ERR_MANDATORY_TAG;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
 		goto fail;
+
+	memset(&rs, 0, sizeof(struct resize_parms));
+	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
+		err = resize_parms_from_attrs(&rs, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto fail;
+		}
 	}
 
+	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
 		goto fail;
@@ -1644,14 +1829,14 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	}
 
  fail:
-	reply->ret_code = retcode;
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			       struct drbd_nl_cfg_reply *reply)
+int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	int err;
 	int ovr; /* online verify running */
 	int rsr; /* re-sync running */
@@ -1662,12 +1847,21 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	int *rs_plan_s = NULL;
 	int fifo_size;
 
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto fail;
+	mdev = adm_ctx.mdev;
+
 	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
 		retcode = ERR_NOMEM;
+		drbd_msg_put_info("unable to allocate cpumask");
 		goto fail;
 	}
 
-	if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
+	if (((struct drbd_genlmsghdr*)info->userhdr)->flags
+			& DRBD_GENL_F_SET_DEFAULTS) {
 		memset(&sc, 0, sizeof(struct syncer_conf));
 		sc.rate       = DRBD_RATE_DEF;
 		sc.after      = DRBD_AFTER_DEF;
@@ -1681,8 +1875,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	} else
 		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 
-	if (!syncer_conf_from_tags(nlp->tag_list, &sc)) {
+	err = syncer_conf_from_attrs(&sc, info->attrs);
+	if (err) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
@@ -1832,14 +2028,23 @@ fail:
 	free_cpumask_var(new_cpu_mask);
 	crypto_free_hash(csums_tfm);
 	crypto_free_hash(verify_tfm);
-	reply->ret_code = retcode;
+
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode;
+	struct drbd_conf *mdev;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync. */
@@ -1862,7 +2067,8 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
 		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
 	}
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -1875,56 +2081,58 @@ static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
 	return rv;
 }
 
-static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				   struct drbd_nl_cfg_reply *reply)
+static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
+		union drbd_state mask, union drbd_state val)
 {
-	int retcode;
-
-	/* If there is still bitmap IO pending, probably because of a previous
-	 * resync just being finished, wait for it before requesting a new resync. */
-	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+	enum drbd_ret_code retcode;
 
-	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
-
-	if (retcode < SS_SUCCESS) {
-		if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
-			/* The peer will get a resync upon connect anyways. Just make that
-			   into a full resync. */
-			retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
-			if (retcode >= SS_SUCCESS) {
-				if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
-					"set_n_write from invalidate_peer",
-					BM_LOCKED_SET_ALLOWED))
-					retcode = ERR_IO_MD_DISK;
-			}
-		} else
-			retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
-	}
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	reply->ret_code = retcode;
+	retcode = drbd_request_state(adm_ctx.mdev, mask, val);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S));
+}
 
-	if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
-		retcode = ERR_PAUSE_IS_SET;
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	reply->ret_code = retcode;
+	if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
+		retcode = ERR_PAUSE_IS_SET;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			       struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
 	union drbd_state s;
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
-		s = mdev->state;
+	if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
+		s = adm_ctx.mdev->state;
 		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
 			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
 				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
@@ -1933,28 +2141,35 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 		}
 	}
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(susp, 1));
-
-	return 0;
+	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
 }
 
-static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 {
+	struct drbd_conf *mdev;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
 	if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
 		drbd_uuid_new_current(mdev);
 		clear_bit(NEW_CUR_UUID, &mdev->flags);
 	}
 	drbd_suspend_io(mdev);
-	reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
-	if (reply->ret_code == SS_SUCCESS) {
+	retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
+	if (retcode == SS_SUCCESS) {
 		if (mdev->state.conn < C_CONNECTED)
 			tl_clear(mdev->tconn);
 		if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
@@ -1962,138 +2177,259 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	}
 	drbd_resume_io(mdev);
 
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED));
-	return 0;
+	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
 }
 
-static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
+		const struct sib_info *sib)
 {
-	unsigned short *tl;
-
-	tl = reply->tag_list;
-
-	if (get_ldev(mdev)) {
-		tl = disk_conf_to_tags(&mdev->ldev->dc, tl);
-		put_ldev(mdev);
+	struct state_info *si = NULL; /* for sizeof(si->member); */
+	struct nlattr *nla;
+	int got_ldev;
+	int got_net;
+	int err = 0;
+	int exclude_sensitive;
+
+	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
+	 * to.  So we better exclude_sensitive information.
+	 *
+	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
+	 * in the context of the requesting user process. Exclude sensitive
+	 * information, unless current has superuser.
+	 *
+	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
+	 * relies on the current implementation of netlink_dump(), which
+	 * executes the dump callback successively from netlink_recvmsg(),
+	 * always in the context of the receiving process */
+	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
+
+	got_ldev = get_ldev(mdev);
+	got_net = get_net_conf(mdev->tconn);
+
+	/* We need to add connection name and volume number information still.
+	 * Minor number is in drbd_genlmsghdr. */
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
+	if (!nla)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, T_ctx_volume, mdev->vnr);
+	NLA_PUT_STRING(skb, T_ctx_conn_name, mdev->tconn->name);
+	nla_nest_end(skb, nla);
+
+	if (got_ldev)
+		if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive))
+			goto nla_put_failure;
+	if (got_net)
+		if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive))
+			goto nla_put_failure;
+
+	if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive))
+			goto nla_put_failure;
+
+	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
+	if (!nla)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY);
+	NLA_PUT_U32(skb, T_current_state, mdev->state.i);
+	NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid);
+	NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev));
+
+	if (got_ldev) {
+		NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags);
+		NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
+		NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev));
+		NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev));
+		if (C_SYNC_SOURCE <= mdev->state.conn &&
+		    C_PAUSED_SYNC_T >= mdev->state.conn) {
+			NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total);
+			NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed);
+		}
 	}
 
-	if (get_net_conf(mdev->tconn)) {
-		tl = net_conf_to_tags(mdev->tconn->net_conf, tl);
-		put_net_conf(mdev->tconn);
+	if (sib) {
+		switch(sib->sib_reason) {
+		case SIB_SYNC_PROGRESS:
+		case SIB_GET_STATUS_REPLY:
+			break;
+		case SIB_STATE_CHANGE:
+			NLA_PUT_U32(skb, T_prev_state, sib->os.i);
+			NLA_PUT_U32(skb, T_new_state, sib->ns.i);
+			break;
+		case SIB_HELPER_POST:
+			NLA_PUT_U32(skb,
+				T_helper_exit_code, sib->helper_exit_code);
+			/* fall through */
+		case SIB_HELPER_PRE:
+			NLA_PUT_STRING(skb, T_helper, sib->helper_name);
+			break;
+		}
 	}
-	tl = syncer_conf_to_tags(&mdev->sync_conf, tl);
-
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	nla_nest_end(skb, nla);
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+	if (0)
+nla_put_failure:
+		err = -EMSGSIZE;
+	if (got_ldev)
+		put_ldev(mdev);
+	if (got_net)
+		put_net_conf(mdev->tconn);
+	return err;
 }
 
-static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
 {
-	unsigned short *tl = reply->tag_list;
-	union drbd_state s = mdev->state;
-	unsigned long rs_left;
-	unsigned int res;
+	enum drbd_ret_code retcode;
+	int err;
 
-	tl = get_state_to_tags((struct get_state *)&s, tl);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	/* no local ref, no bitmap, no syncer progress. */
-	if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) {
-		if (get_ldev(mdev)) {
-			drbd_get_syncer_progress(mdev, &rs_left, &res);
-			tl = tl_add_int(tl, T_sync_progress, &res);
-			put_ldev(mdev);
-		}
+	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL);
+	if (err) {
+		nlmsg_free(adm_ctx.reply_skb);
+		return err;
 	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	return (int)((char *)tl - (char *)reply->tag_list);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	unsigned short *tl;
-
-	tl = reply->tag_list;
+	struct drbd_conf *mdev;
+	struct drbd_genlmsghdr *dh;
+	int minor = cb->args[0];
+
+	/* Open coded deferred single idr_for_each_entry iteration.
+	 * This may miss entries inserted after this dump started,
+	 * or entries deleted before they are reached.
+	 * But we need to make sure the mdev won't disappear while
+	 * we are looking at it. */
+
+	rcu_read_lock();
+	mdev = idr_get_next(&minors, &minor);
+	if (mdev) {
+		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid,
+				cb->nlh->nlmsg_seq, &drbd_genl_family,
+				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
+		if (!dh)
+			goto errout;
+
+		D_ASSERT(mdev->minor == minor);
+
+		dh->minor = minor;
+		dh->ret_code = NO_ERROR;
+
+		if (nla_put_status_info(skb, mdev, NULL)) {
+			genlmsg_cancel(skb, dh);
+			goto errout;
+		}
+		genlmsg_end(skb, dh);
+        }
 
-	if (get_ldev(mdev)) {
-		tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
-		tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags);
-		put_ldev(mdev);
-	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+errout:
+	rcu_read_unlock();
+	/* where to start idr_get_next with the next iteration */
+        cb->args[0] = minor+1;
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+	/* No more minors found: empty skb. Which will terminate the dump. */
+        return skb->len;
 }
 
-/**
- * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use
- * @mdev:	DRBD device.
- * @nlp:	Netlink/connector packet from drbdsetup
- * @reply:	Reply packet for drbdsetup
- */
-static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
 {
-	unsigned short *tl;
-	char rv;
-
-	tl = reply->tag_list;
+	enum drbd_ret_code retcode;
+	struct timeout_parms tp;
+	int err;
 
-	rv = mdev->state.pdsk == D_OUTDATED        ? UT_PEER_OUTDATED :
-	  test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv));
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	tp.timeout_type =
+		adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
+		test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED :
+		UT_DEFAULT;
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
+	if (err) {
+		nlmsg_free(adm_ctx.reply_skb);
+		return err;
+	}
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 {
-	/* default to resume from last known position, if possible */
-	struct start_ov args =
-		{ .start_sector = mdev->ov_start_sector };
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 
-	if (!start_ov_from_tags(nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
+	mdev = adm_ctx.mdev;
+	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
+		/* resume from last known position, if possible */
+		struct start_ov_parms parms =
+			{ .ov_start_sector = mdev->ov_start_sector };
+		int err = start_ov_parms_from_attrs(&parms, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+		/* w_make_ov_request expects position to be aligned */
+		mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
+	}
 	/* If there is still bitmap IO pending, e.g. previous resync or verify
 	 * just being finished, wait for it before requesting a new resync. */
 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
-
-	/* w_make_ov_request expects position to be aligned */
-	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
-	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+	retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
 
-static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	int skip_initial_sync = 0;
 	int err;
+	struct new_c_uuid_parms args;
 
-	struct new_c_uuid args;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out_nolock;
 
-	memset(&args, 0, sizeof(struct new_c_uuid));
-	if (!new_c_uuid_from_tags(nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
+	mdev = adm_ctx.mdev;
+	memset(&args, 0, sizeof(args));
+	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
+		err = new_c_uuid_parms_from_attrs(&args, info->attrs);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out_nolock;
+		}
 	}
 
 	mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */
@@ -2139,510 +2475,164 @@ out_dec:
 	put_ldev(mdev);
 out:
 	mutex_unlock(mdev->state_mutex);
-
-	reply->ret_code = retcode;
-	return 0;
-}
-
-static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
-{
-	struct new_connection args;
-
-	if (!new_connection_from_tags(nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
-
-	reply->ret_code = NO_ERROR;
-	if (!drbd_new_tconn(args.name))
-		reply->ret_code = ERR_NOMEM;
-
-	return 0;
-}
-
-static int drbd_nl_new_minor(struct drbd_tconn *tconn,
-		      struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
-{
-	struct new_minor args;
-
-	args.vol_nr = 0;
-	args.minor = 0;
-
-	if (!new_minor_from_tags(nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
-
-	reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr);
-
+out_nolock:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+static enum drbd_ret_code
+drbd_check_conn_name(const char *name)
 {
-	if (mdev->state.disk == D_DISKLESS &&
-	    mdev->state.conn == C_STANDALONE &&
-	    mdev->state.role == R_SECONDARY) {
-		drbd_delete_device(mdev_to_minor(mdev));
-		reply->ret_code = NO_ERROR;
-	} else {
-		reply->ret_code = ERR_MINOR_CONFIGURED;
+	if (!name || !name[0]) {
+		drbd_msg_put_info("connection name missing");
+		return ERR_MANDATORY_TAG;
 	}
-	return 0;
-}
-
-static int drbd_nl_del_conn(struct drbd_tconn *tconn,
-			    struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply)
-{
-	if (conn_lowest_minor(tconn) < 0) {
-		drbd_free_tconn(tconn);
-		reply->ret_code = NO_ERROR;
-	} else {
-		reply->ret_code = ERR_CONN_IN_USE;
+	/* if we want to use these in sysfs/configfs/debugfs some day,
+	 * we must not allow slashes */
+	if (strchr(name, '/')) {
+		drbd_msg_put_info("invalid connection name");
+		return ERR_INVALID_REQUEST;
 	}
-
-	return 0;
+	return NO_ERROR;
 }
 
-enum cn_handler_type {
-	CHT_MINOR,
-	CHT_CONN,
-	CHT_CTOR,
-	/* CHT_RES, later */
-};
-struct cn_handler_struct {
-	enum cn_handler_type type;
-	union {
-		int (*minor_based)(struct drbd_conf *,
-				   struct drbd_nl_cfg_req *,
-				   struct drbd_nl_cfg_reply *);
-		int (*conn_based)(struct drbd_tconn *,
-				  struct drbd_nl_cfg_req *,
-				  struct drbd_nl_cfg_reply *);
-		int (*constructor)(struct drbd_nl_cfg_req *,
-				   struct drbd_nl_cfg_reply *);
-	};
-	int reply_body_size;
-};
-
-static struct cn_handler_struct cnd_table[] = {
-	[ P_primary ]		= { CHT_MINOR, { &drbd_nl_primary },	0 },
-	[ P_secondary ]		= { CHT_MINOR, { &drbd_nl_secondary },	0 },
-	[ P_disk_conf ]		= { CHT_MINOR, { &drbd_nl_disk_conf },	0 },
-	[ P_detach ]		= { CHT_MINOR, { &drbd_nl_detach },	0 },
-	[ P_net_conf ]		= { CHT_CONN,  { .conn_based = &drbd_nl_net_conf },	0 },
-	[ P_disconnect ]	= { CHT_CONN,  { .conn_based = &drbd_nl_disconnect },	0 },
-	[ P_resize ]		= { CHT_MINOR, { &drbd_nl_resize },	0 },
-	[ P_syncer_conf ]	= { CHT_MINOR, { &drbd_nl_syncer_conf },0 },
-	[ P_invalidate ]	= { CHT_MINOR, { &drbd_nl_invalidate },	0 },
-	[ P_invalidate_peer ]	= { CHT_MINOR, { &drbd_nl_invalidate_peer },0 },
-	[ P_pause_sync ]	= { CHT_MINOR, { &drbd_nl_pause_sync },	0 },
-	[ P_resume_sync ]	= { CHT_MINOR, { &drbd_nl_resume_sync },0 },
-	[ P_suspend_io ]	= { CHT_MINOR, { &drbd_nl_suspend_io },	0 },
-	[ P_resume_io ]		= { CHT_MINOR, { &drbd_nl_resume_io },	0 },
-	[ P_outdate ]		= { CHT_MINOR, { &drbd_nl_outdate },	0 },
-	[ P_get_config ]	= { CHT_MINOR, { &drbd_nl_get_config },
-				    sizeof(struct syncer_conf_tag_len_struct) +
-				    sizeof(struct disk_conf_tag_len_struct) +
-				    sizeof(struct net_conf_tag_len_struct) },
-	[ P_get_state ]		= { CHT_MINOR, { &drbd_nl_get_state },
-				    sizeof(struct get_state_tag_len_struct) +
-				    sizeof(struct sync_progress_tag_len_struct)	},
-	[ P_get_uuids ]		= { CHT_MINOR, { &drbd_nl_get_uuids },
-				    sizeof(struct get_uuids_tag_len_struct) },
-	[ P_get_timeout_flag ]	= { CHT_MINOR, { &drbd_nl_get_timeout_flag },
-				    sizeof(struct get_timeout_flag_tag_len_struct)},
-	[ P_start_ov ]		= { CHT_MINOR, { &drbd_nl_start_ov },	0 },
-	[ P_new_c_uuid ]	= { CHT_MINOR, { &drbd_nl_new_c_uuid },	0 },
-	[ P_new_connection ]	= { CHT_CTOR,  { .constructor = &drbd_nl_new_conn }, 0 },
-	[ P_new_minor ]		= { CHT_CONN,  { .conn_based = &drbd_nl_new_minor }, 0 },
-	[ P_del_minor ]		= { CHT_MINOR, { &drbd_nl_del_minor },	0 },
-	[ P_del_connection ]    = { CHT_CONN,  { .conn_based = &drbd_nl_del_conn }, 0 },
-};
-
-static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
+int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
-	struct cn_handler_struct *cm;
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	struct drbd_conf *mdev;
-	struct drbd_tconn *tconn;
-	int retcode, rr;
-	int reply_size = sizeof(struct cn_msg)
-		+ sizeof(struct drbd_nl_cfg_reply)
-		+ sizeof(short int);
-
-	if (!try_module_get(THIS_MODULE)) {
-		printk(KERN_ERR "drbd: try_module_get() failed!\n");
-		return;
-	}
-
-	if (!cap_raised(current_cap(), CAP_SYS_ADMIN)) {
-		retcode = ERR_PERM;
-		goto fail;
-	}
+	enum drbd_ret_code retcode;
 
-	if (nlp->packet_type >= P_nl_after_last_packet ||
-	    nlp->packet_type == P_return_code_only) {
-		retcode = ERR_PACKET_NR;
-		goto fail;
-	}
+	retcode = drbd_adm_prepare(skb, info, 0);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	cm = cnd_table + nlp->packet_type;
+	retcode = drbd_check_conn_name(adm_ctx.conn_name);
+	if (retcode != NO_ERROR)
+		goto out;
 
-	/* This may happen if packet number is 0: */
-	if (cm->minor_based == NULL) {
-		retcode = ERR_PACKET_NR;
-		goto fail;
+	if (adm_ctx.tconn) {
+		retcode = ERR_INVALID_REQUEST;
+		drbd_msg_put_info("connection exists");
+		goto out;
 	}
 
-	reply_size += cm->reply_body_size;
-
-	/* allocation not in the IO path, cqueue thread context */
-	cn_reply = kzalloc(reply_size, GFP_KERNEL);
-	if (!cn_reply) {
+	if (!drbd_new_tconn(adm_ctx.conn_name))
 		retcode = ERR_NOMEM;
-		goto fail;
-	}
-	reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
-
-	reply->packet_type =
-		cm->reply_body_size ? nlp->packet_type : P_return_code_only;
-	reply->minor = nlp->drbd_minor;
-	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
-	/* reply->tag_list; might be modified by cm->function. */
-
-	retcode = ERR_MINOR_INVALID;
-	rr = 0;
-	switch (cm->type) {
-	case CHT_MINOR:
-		mdev = minor_to_mdev(nlp->drbd_minor);
-		if (!mdev)
-			goto fail;
-		rr = cm->minor_based(mdev, nlp, reply);
-		break;
-	case CHT_CONN:
-		tconn = conn_by_name(nlp->obj_name);
-		if (!tconn) {
-			retcode = ERR_CONN_NOT_KNOWN;
-			goto fail;
-		}
-		rr = cm->conn_based(tconn, nlp, reply);
-		break;
-	case CHT_CTOR:
-		rr = cm->constructor(nlp, reply);
-		break;
-	/* case CHT_RES: */
-	}
-
-	cn_reply->id = req->id;
-	cn_reply->seq = req->seq;
-	cn_reply->ack = req->ack  + 1;
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
-	cn_reply->flags = 0;
-
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
-	if (rr && rr != -ESRCH)
-		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
-
-	kfree(cn_reply);
-	module_put(THIS_MODULE);
-	return;
- fail:
-	drbd_nl_send_reply(req, retcode);
-	module_put(THIS_MODULE);
-}
-
-static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */
-
-static unsigned short *
-__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
-	unsigned short len, int nul_terminated)
-{
-	unsigned short l = tag_descriptions[tag_number(tag)].max_len;
-	len = (len < l) ? len :  l;
-	put_unaligned(tag, tl++);
-	put_unaligned(len, tl++);
-	memcpy(tl, data, len);
-	tl = (unsigned short*)((char*)tl + len);
-	if (nul_terminated)
-		*((char*)tl - 1) = 0;
-	return tl;
-}
-
-static unsigned short *
-tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len)
-{
-	return __tl_add_blob(tl, tag, data, len, 0);
-}
-
-static unsigned short *
-tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str)
-{
-	return __tl_add_blob(tl, tag, str, strlen(str)+1, 0);
-}
-
-static unsigned short *
-tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val)
-{
-	put_unaligned(tag, tl++);
-	switch(tag_type(tag)) {
-	case TT_INTEGER:
-		put_unaligned(sizeof(int), tl++);
-		put_unaligned(*(int *)val, (int *)tl);
-		tl = (unsigned short*)((char*)tl+sizeof(int));
-		break;
-	case TT_INT64:
-		put_unaligned(sizeof(u64), tl++);
-		put_unaligned(*(u64 *)val, (u64 *)tl);
-		tl = (unsigned short*)((char*)tl+sizeof(u64));
-		break;
-	default:
-		/* someone did something stupid. */
-		;
-	}
-	return tl;
-}
-
-void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct get_state_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-
-	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
-
-	tl = get_state_to_tags((struct get_state *)&state, tl);
-
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_inc_return(&drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_get_state;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-}
-
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct call_helper_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-
-	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
-
-	tl = tl_add_str(tl, T_helper, helper_name);
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_inc_return(&drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_call_helper;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-void drbd_bcast_ee(struct drbd_conf *mdev, const char *reason, const int dgs,
-		   const char *seen_hash, const char *calc_hash,
-			   const struct drbd_peer_request *peer_req)
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	unsigned short *tl;
-	struct page *page;
-	unsigned len;
+	struct drbd_genlmsghdr *dh = info->userhdr;
+	enum drbd_ret_code retcode;
 
-	if (!peer_req)
-		return;
-	if (!reason || !reason[0])
-		return;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	/* apparently we have to memcpy twice, first to prepare the data for the
-	 * struct cn_msg, then within cn_netlink_send from the cn_msg to the
-	 * netlink skb. */
-	/* receiver thread context, which is not in the writeout path (of this node),
-	 * but may be in the writeout path of the _other_ node.
-	 * GFP_NOIO to avoid potential "distributed deadlock". */
-	cn_reply = kzalloc(
-		sizeof(struct cn_msg)+
-		sizeof(struct drbd_nl_cfg_reply)+
-		sizeof(struct dump_ee_tag_len_struct)+
-		sizeof(short int),
-		GFP_NOIO);
-
-	if (!cn_reply) {
-		dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, "
-			     "sector %llu, size %u\n",
-			(unsigned long long)peer_req->i.sector,
-			peer_req->i.size);
-		return;
+	/* FIXME drop minor_count parameter, limit to MINORMASK */
+	if (dh->minor >= minor_count) {
+		drbd_msg_put_info("requested minor out of range");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
 	}
-
-	reply = (struct drbd_nl_cfg_reply*)cn_reply->data;
-	tl = reply->tag_list;
-
-	tl = tl_add_str(tl, T_dump_ee_reason, reason);
-	tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs);
-	tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs);
-	tl = tl_add_int(tl, T_ee_sector, &peer_req->i.sector);
-	tl = tl_add_int(tl, T_ee_block_id, &peer_req->block_id);
-
-	/* dump the first 32k */
-	len = min_t(unsigned, peer_req->i.size, 32 << 10);
-	put_unaligned(T_ee_data, tl++);
-	put_unaligned(len, tl++);
-
-	page = peer_req->pages;
-	page_chain_for_each(page) {
-		void *d = kmap_atomic(page, KM_USER0);
-		unsigned l = min_t(unsigned, len, PAGE_SIZE);
-		memcpy(tl, d, l);
-		kunmap_atomic(d, KM_USER0);
-		tl = (unsigned short*)((char*)tl + l);
-		len -= l;
-		if (len == 0)
-			break;
+	/* FIXME we need a define here */
+	if (adm_ctx.volume >= 256) {
+		drbd_msg_put_info("requested volume id out of range");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
 	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_inc_return(&drbd_nl_seq);
-	cn_reply->ack = 0; // not used here.
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char*)tl - (char*)reply->tag_list);
-	cn_reply->flags = 0;
 
-	reply->packet_type = P_dump_ee;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-	kfree(cn_reply);
+	retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-void drbd_bcast_sync_progress(struct drbd_conf *mdev)
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct sync_progress_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-	unsigned long rs_left;
-	unsigned int res;
-
-	/* no local ref, no bitmap, no syncer progress, no broadcast. */
-	if (!get_ldev(mdev))
-		return;
-	drbd_get_syncer_progress(mdev, &rs_left, &res);
-	put_ldev(mdev);
-
-	tl = tl_add_int(tl, T_sync_progress, &res);
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_inc_return(&drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 
-	reply->packet_type = P_sync_progress;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	mdev = adm_ctx.mdev;
+	if (mdev->state.disk == D_DISKLESS &&
+	    mdev->state.conn == C_STANDALONE &&
+	    mdev->state.role == R_SECONDARY) {
+		drbd_delete_device(mdev_to_minor(mdev));
+		retcode = NO_ERROR;
+	} else
+		retcode = ERR_MINOR_CONFIGURED;
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-int __init drbd_nl_init(void)
+int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
 {
-	static struct cb_id cn_id_drbd;
-	int err, try=10;
-
-	cn_id_drbd.val = CN_VAL_DRBD;
-	do {
-		cn_id_drbd.idx = cn_idx;
-		err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback);
-		if (!err)
-			break;
-		cn_idx = (cn_idx + CN_IDX_STEP);
-	} while (try--);
+	enum drbd_ret_code retcode;
 
-	if (err) {
-		printk(KERN_ERR "drbd: cn_drbd failed to register\n");
-		return err;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
+		drbd_free_tconn(adm_ctx.tconn);
+		retcode = NO_ERROR;
+	} else {
+		retcode = ERR_CONN_IN_USE;
 	}
 
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-void drbd_nl_cleanup(void)
+void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
 {
-	static struct cb_id cn_id_drbd;
-
-	cn_id_drbd.idx = cn_idx;
-	cn_id_drbd.val = CN_VAL_DRBD;
+	static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
+	struct sk_buff *msg;
+	struct drbd_genlmsghdr *d_out;
+	unsigned seq;
+	int err = -ENOMEM;
+
+	seq = atomic_inc_return(&drbd_genl_seq);
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+	if (!msg)
+		goto failed;
+
+	err = -EMSGSIZE;
+	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
+	if (!d_out) /* cannot happen, but anyways. */
+		goto nla_put_failure;
+	d_out->minor = mdev_to_minor(mdev);
+	d_out->ret_code = 0;
+
+	if (nla_put_status_info(msg, mdev, sib))
+		goto nla_put_failure;
+	genlmsg_end(msg, d_out);
+	err = drbd_genl_multicast_events(msg, 0);
+	/* msg has been consumed or freed in netlink_broadcast() */
+	if (err && err != -ESRCH)
+		goto failed;
 
-	cn_del_callback(&cn_id_drbd);
-}
+	return;
 
-void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
-{
-	char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	int rr;
-
-	memset(buffer, 0, sizeof(buffer));
-	cn_reply->id = req->id;
-
-	cn_reply->seq = req->seq;
-	cn_reply->ack = req->ack  + 1;
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_return_code_only;
-	reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
-	reply->ret_code = ret_code;
-
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-	if (rr && rr != -ESRCH)
-		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+nla_put_failure:
+	nlmsg_free(msg);
+failed:
+	dev_err(DEV, "Error %d while broadcasting event. "
+			"Event seq:%u sib_reason:%u\n",
+			err, seq, sib->sib_reason);
 }
-
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index ffee90d6d374..a280bc238acd 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -970,6 +970,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	enum drbd_fencing_p fp;
 	enum drbd_req_event what = NOTHING;
 	union drbd_state nsm = (union drbd_state){ .i = -1 };
+	struct sib_info sib;
+
+	sib.sib_reason = SIB_STATE_CHANGE;
+	sib.os = os;
+	sib.ns = ns;
 
 	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
@@ -984,7 +989,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	}
 
 	/* Inform userspace about the change... */
-	drbd_bcast_state(mdev, ns);
+	drbd_bcast_event(mdev, &sib);
 
 	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
 	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index e192167e6145..d28fdd8fcd49 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -51,7 +51,6 @@
 
 #endif
 
-
 extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.11"
 #define API_VERSION 88
@@ -159,6 +158,7 @@ enum drbd_ret_code {
 	ERR_CONN_IN_USE         = 159,
 	ERR_MINOR_CONFIGURED    = 160,
 	ERR_MINOR_EXISTS	= 161,
+	ERR_INVALID_REQUEST	= 162,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -349,37 +349,4 @@ enum drbd_timeout_flag {
 #define DRBD_MD_INDEX_FLEX_EXT -2
 #define DRBD_MD_INDEX_FLEX_INT -3
 
-/* Start of the new netlink/connector stuff */
-
-enum drbd_ncr_flags {
-	DRBD_NL_CREATE_DEVICE = 0x01,
-	DRBD_NL_SET_DEFAULTS =  0x02,
-};
-#define DRBD_NL_OBJ_NAME_LEN 32
-
-
-/* For searching a vacant cn_idx value */
-#define CN_IDX_STEP			6977
-
-struct drbd_nl_cfg_req {
-	int packet_type;
-	union {
-		struct {
-			unsigned int drbd_minor;
-			enum drbd_ncr_flags flags;
-		};
-		struct {
-			char obj_name[DRBD_NL_OBJ_NAME_LEN];
-		};
-	};
-	unsigned short tag_list[];
-};
-
-struct drbd_nl_cfg_reply {
-	int packet_type;
-	unsigned int minor;
-	int ret_code; /* enum ret_code or set_st_err_t */
-	unsigned short tag_list[]; /* only used with get_* calls */
-};
-
 #endif
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 8a86f659d363..c8c67239f616 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -95,7 +95,7 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
 #endif
 #endif
 
-#if 1
+#ifdef GENL_MAGIC_DEBUG
 static void dprint_field(const char *dir, int nla_type,
 		const char *name, void *valp)
 {
-- 
cgit v1.2.3


From 73d901b74f1070c8a664349b564ba6f8bc8ab283 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 7 Mar 2011 10:38:56 +0100
Subject: drbd: remove now unused connector related files

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_nl.h        | 172 -----------------------------------------
 include/linux/drbd_tag_magic.h |  84 --------------------
 2 files changed, 256 deletions(-)
 delete mode 100644 include/linux/drbd_nl.h
 delete mode 100644 include/linux/drbd_tag_magic.h

(limited to 'include/linux')

diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
deleted file mode 100644
index 1216c7a432c5..000000000000
--- a/include/linux/drbd_nl.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
-   PAKET( name,
-	  TYPE ( pn, pr, member )
-	  ...
-   )
-
-   You may never reissue one of the pn arguments
-*/
-
-#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
-#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
-#endif
-
-NL_PACKET(primary, 1,
-       NL_BIT(		1,	T_MAY_IGNORE,	primary_force)
-)
-
-NL_PACKET(secondary, 2, )
-
-NL_PACKET(disk_conf, 3,
-	NL_INT64(	2,	T_MAY_IGNORE,	disk_size)
-	NL_STRING(	3,	T_MANDATORY,	backing_dev,	128)
-	NL_STRING(	4,	T_MANDATORY,	meta_dev,	128)
-	NL_INTEGER(	5,	T_MANDATORY,	meta_dev_idx)
-	NL_INTEGER(	6,	T_MAY_IGNORE,	on_io_error)
-	NL_INTEGER(	7,	T_MAY_IGNORE,	fencing)
-	NL_BIT(		37,	T_MAY_IGNORE,	use_bmbv)
-	NL_BIT(		53,	T_MAY_IGNORE,	no_disk_flush)
-	NL_BIT(		54,	T_MAY_IGNORE,	no_md_flush)
-	  /*  55 max_bio_size was available in 8.2.6rc2 */
-	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs)
-	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier)
-	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain)
-)
-
-NL_PACKET(detach, 4, )
-
-NL_PACKET(net_conf, 5,
-	NL_STRING(	8,	T_MANDATORY,	my_addr,	128)
-	NL_STRING(	9,	T_MANDATORY,	peer_addr,	128)
-	NL_STRING(	10,	T_MAY_IGNORE,	shared_secret,	SHARED_SECRET_MAX)
-	NL_STRING(	11,	T_MAY_IGNORE,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	NL_STRING(	44,	T_MAY_IGNORE,	integrity_alg,	SHARED_SECRET_MAX)
-	NL_INTEGER(	14,	T_MAY_IGNORE,	timeout)
-	NL_INTEGER(	15,	T_MANDATORY,	wire_protocol)
-	NL_INTEGER(	16,	T_MAY_IGNORE,	try_connect_int)
-	NL_INTEGER(	17,	T_MAY_IGNORE,	ping_int)
-	NL_INTEGER(	18,	T_MAY_IGNORE,	max_epoch_size)
-	NL_INTEGER(	19,	T_MAY_IGNORE,	max_buffers)
-	NL_INTEGER(	20,	T_MAY_IGNORE,	unplug_watermark)
-	NL_INTEGER(	21,	T_MAY_IGNORE,	sndbuf_size)
-	NL_INTEGER(	22,	T_MAY_IGNORE,	ko_count)
-	NL_INTEGER(	24,	T_MAY_IGNORE,	after_sb_0p)
-	NL_INTEGER(	25,	T_MAY_IGNORE,	after_sb_1p)
-	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p)
-	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
-	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
-	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
-	NL_INTEGER(	81,	T_MAY_IGNORE,	on_congestion)
-	NL_INTEGER(	82,	T_MAY_IGNORE,	cong_fill)
-	NL_INTEGER(	83,	T_MAY_IGNORE,	cong_extents)
-	  /* 59 addr_family was available in GIT, never released */
-	NL_BIT(		60,	T_MANDATORY,	mind_af)
-	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
-	NL_BIT(		28,	T_MAY_IGNORE,	two_primaries)
-	NL_BIT(		41,	T_MAY_IGNORE,	always_asbp)
-	NL_BIT(		61,	T_MAY_IGNORE,	no_cork)
-	NL_BIT(		62,	T_MANDATORY,	auto_sndbuf_size)
-	NL_BIT(		70,	T_MANDATORY,	dry_run)
-)
-
-NL_PACKET(disconnect, 6,
-	NL_BIT(		84,	T_MAY_IGNORE,	force)
-)
-
-NL_PACKET(resize, 7,
-	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
-	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
-	NL_BIT(			69,	T_MANDATORY,	no_resync)
-)
-
-NL_PACKET(syncer_conf, 8,
-	NL_INTEGER(	30,	T_MAY_IGNORE,	rate)
-	NL_INTEGER(	31,	T_MAY_IGNORE,	after)
-	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents)
-/*	NL_INTEGER(     71,	T_MAY_IGNORE,	dp_volume)
- *	NL_INTEGER(     72,	T_MAY_IGNORE,	dp_interval)
- *	NL_INTEGER(     73,	T_MAY_IGNORE,	throttle_th)
- *	NL_INTEGER(     74,	T_MAY_IGNORE,	hold_off_th)
- * feature will be reimplemented differently with 8.3.9 */
-	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
-	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
-	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
-	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
-	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data)
-	NL_INTEGER(	76,	T_MAY_IGNORE,	c_plan_ahead)
-	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target)
-	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target)
-	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate)
-	NL_INTEGER(     80,	T_MAY_IGNORE,	c_min_rate)
-)
-
-NL_PACKET(invalidate, 9, )
-NL_PACKET(invalidate_peer, 10, )
-NL_PACKET(pause_sync, 11, )
-NL_PACKET(resume_sync, 12, )
-NL_PACKET(suspend_io, 13, )
-NL_PACKET(resume_io, 14, )
-NL_PACKET(outdate, 15, )
-NL_PACKET(get_config, 16, )
-NL_PACKET(get_state, 17,
-	NL_INTEGER(	33,	T_MAY_IGNORE,	state_i)
-)
-
-NL_PACKET(get_uuids, 18,
-	NL_STRING(	34,	T_MAY_IGNORE,	uuids,	(UI_SIZE*sizeof(__u64)))
-	NL_INTEGER(	35,	T_MAY_IGNORE,	uuids_flags)
-)
-
-NL_PACKET(get_timeout_flag, 19,
-	NL_BIT(		36,	T_MAY_IGNORE,	use_degraded)
-)
-
-NL_PACKET(call_helper, 20,
-	NL_STRING(	38,	T_MAY_IGNORE,	helper,		32)
-)
-
-/* Tag nr 42 already allocated in drbd-8.1 development. */
-
-NL_PACKET(sync_progress, 23,
-	NL_INTEGER(	43,	T_MAY_IGNORE,	sync_progress)
-)
-
-NL_PACKET(dump_ee, 24,
-	NL_STRING(	45,	T_MAY_IGNORE,	dump_ee_reason, 32)
-	NL_STRING(	46,	T_MAY_IGNORE,	seen_digest, SHARED_SECRET_MAX)
-	NL_STRING(	47,	T_MAY_IGNORE,	calc_digest, SHARED_SECRET_MAX)
-	NL_INT64(	48,	T_MAY_IGNORE,	ee_sector)
-	NL_INT64(	49,	T_MAY_IGNORE,	ee_block_id)
-	NL_STRING(	50,	T_MAY_IGNORE,	ee_data,	32 << 10)
-)
-
-NL_PACKET(start_ov, 25,
-	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
-)
-
-NL_PACKET(new_c_uuid, 26,
-       NL_BIT(		63,	T_MANDATORY,	clear_bm)
-)
-
-#ifdef NL_RESPONSE
-NL_RESPONSE(return_code_only, 27)
-#endif
-
-NL_PACKET(new_connection, 28, /* CHT_CTOR */
-	NL_STRING(	85,	T_MANDATORY,	name, DRBD_NL_OBJ_NAME_LEN)
-)
-
-NL_PACKET(new_minor, 29, /* CHT_CONN */
-	NL_INTEGER(	86,	T_MANDATORY,	minor)
-	NL_INTEGER(	87,	T_MANDATORY,	vol_nr)
-)
-
-NL_PACKET(del_minor, 30, ) /* CHT_MINOR */
-NL_PACKET(del_connection, 31, ) /* CHT_CONN */
-
-#undef NL_PACKET
-#undef NL_INTEGER
-#undef NL_INT64
-#undef NL_BIT
-#undef NL_STRING
-#undef NL_RESPONSE
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
deleted file mode 100644
index 069543190516..000000000000
--- a/include/linux/drbd_tag_magic.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef DRBD_TAG_MAGIC_H
-#define DRBD_TAG_MAGIC_H
-
-#define TT_END     0
-#define TT_REMOVED 0xE000
-
-/* declare packet_type enums */
-enum packet_types {
-#define NL_PACKET(name, number, fields) P_ ## name = number,
-#define NL_RESPONSE(name, number) P_ ## name = number,
-#define NL_INTEGER(pn, pr, member)
-#define NL_INT64(pn, pr, member)
-#define NL_BIT(pn, pr, member)
-#define NL_STRING(pn, pr, member, len)
-#include "drbd_nl.h"
-	P_nl_after_last_packet,
-};
-
-/* These struct are used to deduce the size of the tag lists: */
-#define NL_PACKET(name, number, fields)	\
-	struct name ## _tag_len_struct { fields };
-#define NL_INTEGER(pn, pr, member)		\
-	int member; int tag_and_len ## member;
-#define NL_INT64(pn, pr, member)		\
-	__u64 member; int tag_and_len ## member;
-#define NL_BIT(pn, pr, member)		\
-	unsigned char member:1; int tag_and_len ## member;
-#define NL_STRING(pn, pr, member, len)	\
-	unsigned char member[len]; int member ## _len; \
-	int tag_and_len ## member;
-#include "linux/drbd_nl.h"
-
-/* declare tag-list-sizes */
-static const int tag_list_sizes[] = {
-#define NL_PACKET(name, number, fields) 2 fields ,
-#define NL_INTEGER(pn, pr, member)      + 4 + 4
-#define NL_INT64(pn, pr, member)        + 4 + 8
-#define NL_BIT(pn, pr, member)          + 4 + 1
-#define NL_STRING(pn, pr, member, len)  + 4 + (len)
-#include "drbd_nl.h"
-};
-
-/* The two highest bits are used for the tag type */
-#define TT_MASK      0xC000
-#define TT_INTEGER   0x0000
-#define TT_INT64     0x4000
-#define TT_BIT       0x8000
-#define TT_STRING    0xC000
-/* The next bit indicates if processing of the tag is mandatory */
-#define T_MANDATORY  0x2000
-#define T_MAY_IGNORE 0x0000
-#define TN_MASK      0x1fff
-/* The remaining 13 bits are used to enumerate the tags */
-
-#define tag_type(T)   ((T) & TT_MASK)
-#define tag_number(T) ((T) & TN_MASK)
-
-/* declare tag enums */
-#define NL_PACKET(name, number, fields) fields
-enum drbd_tags {
-#define NL_INTEGER(pn, pr, member)     T_ ## member = pn | TT_INTEGER | pr ,
-#define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
-#define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
-#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
-#include "drbd_nl.h"
-};
-
-struct tag {
-	const char *name;
-	int type_n_flags;
-	int max_len;
-};
-
-/* declare tag names */
-#define NL_PACKET(name, number, fields) fields
-static const struct tag tag_descriptions[] = {
-#define NL_INTEGER(pn, pr, member)     [ pn ] = { #member, TT_INTEGER | pr, sizeof(int)   },
-#define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
-#define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
-#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
-#include "drbd_nl.h"
-};
-
-#endif
-- 
cgit v1.2.3


From 85f75dd7630436b0aa46a6393099c0f23121f5f0 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 15 Mar 2011 16:26:37 +0100
Subject: drbd: introduce in-kernel "down" command

This greatly simplifies deconfiguration of whole resources.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_main.c |   2 -
 drivers/block/drbd/drbd_nl.c   | 203 ++++++++++++++++++++++++++++++-----------
 include/linux/drbd_genl.h      |   2 +
 3 files changed, 154 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 113c7b465384..40b7b93def75 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2303,9 +2303,7 @@ fail:
 
 void drbd_free_tconn(struct drbd_tconn *tconn)
 {
-	mutex_lock(&drbd_cfg_mutex);
 	list_del(&tconn->all_tconn);
-	mutex_unlock(&drbd_cfg_mutex);
 	idr_destroy(&tconn->volumes);
 
 	free_cpumask_var(tconn->cpu_mask);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f965dfe4b5ff..d952e877f8d5 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -49,6 +49,7 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
 
 int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
 
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
@@ -1416,6 +1417,18 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+static int adm_detach(struct drbd_conf *mdev)
+{
+	enum drbd_ret_code retcode;
+	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
+	retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
+	wait_event(mdev->misc_wait,
+			mdev->state.disk != D_DISKLESS ||
+			!atomic_read(&mdev->local_cnt));
+	drbd_resume_io(mdev);
+	return retcode;
+}
+
 /* Detaching the disk is a process in multiple stages.  First we need to lock
  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
@@ -1423,7 +1436,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
  * Only then we have finally detached. */
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
@@ -1432,13 +1444,7 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mdev = adm_ctx.mdev;
-	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
-	retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
-	wait_event(mdev->misc_wait,
-			mdev->state.disk != D_DISKLESS ||
-			!atomic_read(&mdev->local_cnt));
-	drbd_resume_io(mdev);
+	retcode = adm_detach(adm_ctx.mdev);
 out:
 	drbd_adm_finish(info, retcode);
 	return 0;
@@ -1680,10 +1686,49 @@ out:
 	return 0;
 }
 
+static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
+{
+	enum drbd_state_rv rv;
+	if (force) {
+		spin_lock_irq(&tconn->req_lock);
+		if (tconn->cstate >= C_WF_CONNECTION)
+			_conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+		spin_unlock_irq(&tconn->req_lock);
+		return SS_SUCCESS;
+	}
+
+	rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
+
+	switch (rv) {
+	case SS_NOTHING_TO_DO:
+	case SS_ALREADY_STANDALONE:
+		return SS_SUCCESS;
+	case SS_PRIMARY_NOP:
+		/* Our state checking code wants to see the peer outdated. */
+		rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
+							pdsk, D_OUTDATED), CS_VERBOSE);
+		break;
+	case SS_CW_FAILED_BY_PEER:
+		/* The peer probably wants to see us outdated. */
+		rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
+							disk, D_OUTDATED), 0);
+		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+			rv = SS_SUCCESS;
+		}
+		break;
+	default:;
+		/* no special handling necessary */
+	}
+
+	return rv;
+}
+
 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
 	struct disconnect_parms parms;
 	struct drbd_tconn *tconn;
+	enum drbd_state_rv rv;
 	enum drbd_ret_code retcode;
 	int err;
 
@@ -1704,35 +1749,8 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	if (parms.force_disconnect) {
-		spin_lock_irq(&tconn->req_lock);
-		if (tconn->cstate >= C_WF_CONNECTION)
-			_conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
-		spin_unlock_irq(&tconn->req_lock);
-		goto done;
-	}
-
-	retcode = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
-
-	if (retcode == SS_NOTHING_TO_DO)
-		goto done;
-	else if (retcode == SS_ALREADY_STANDALONE)
-		goto done;
-	else if (retcode == SS_PRIMARY_NOP) {
-		/* Our state checking code wants to see the peer outdated. */
-		retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
-							pdsk, D_OUTDATED), CS_VERBOSE);
-	} else if (retcode == SS_CW_FAILED_BY_PEER) {
-		/* The peer probably wants to see us outdated. */
-		retcode = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
-							disk, D_OUTDATED), 0);
-		if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) {
-			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
-			retcode = SS_SUCCESS;
-		}
-	}
-
-	if (retcode < SS_SUCCESS)
+	rv = conn_try_disconnect(tconn, parms.force_disconnect);
+	if (rv < SS_SUCCESS)
 		goto fail;
 
 	if (wait_event_interruptible(tconn->ping_wait,
@@ -1743,7 +1761,6 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
- done:
 	retcode = NO_ERROR;
  fail:
 	drbd_adm_finish(info, retcode);
@@ -2644,9 +2661,21 @@ out:
 	return 0;
 }
 
+static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
+{
+	if (mdev->state.disk == D_DISKLESS &&
+	    /* no need to be mdev->state.conn == C_STANDALONE &&
+	     * we may want to delete a minor from a live replication group.
+	     */
+	    mdev->state.role == R_SECONDARY) {
+		drbd_delete_device(mdev_to_minor(mdev));
+		return NO_ERROR;
+	} else
+		return ERR_MINOR_CONFIGURED;
+}
+
 int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
@@ -2655,19 +2684,89 @@ int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mdev = adm_ctx.mdev;
-	if (mdev->state.disk == D_DISKLESS &&
-	    /* no need to be mdev->state.conn == C_STANDALONE &&
-	     * we may want to delete a minor from a live replication group.
-	     */
-	    mdev->state.role == R_SECONDARY) {
-		drbd_delete_device(mdev_to_minor(mdev));
-		retcode = NO_ERROR;
-		/* if this was the last volume of this connection,
-		 * this will terminate all threads */
+	mutex_lock(&drbd_cfg_mutex);
+	retcode = adm_delete_minor(adm_ctx.mdev);
+	mutex_unlock(&drbd_cfg_mutex);
+	/* if this was the last volume of this connection,
+	 * this will terminate all threads */
+	if (retcode == NO_ERROR)
 		conn_reconfig_done(adm_ctx.tconn);
-	} else
-		retcode = ERR_MINOR_CONFIGURED;
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	enum drbd_state_rv rv;
+	struct drbd_conf *mdev;
+	unsigned i;
+
+	retcode = drbd_adm_prepare(skb, info, 0);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (!adm_ctx.tconn) {
+		retcode = ERR_CONN_NOT_KNOWN;
+		goto out;
+	}
+
+	mutex_lock(&drbd_cfg_mutex);
+	/* demote */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = drbd_set_role(mdev, R_SECONDARY, 0);
+		if (retcode < SS_SUCCESS) {
+			drbd_msg_put_info("failed to demote");
+			goto out_unlock;
+		}
+	}
+
+	/* disconnect */
+	rv = conn_try_disconnect(adm_ctx.tconn, 0);
+	if (rv < SS_SUCCESS) {
+		retcode = rv; /* enum type mismatch! */
+		drbd_msg_put_info("failed to disconnect");
+		goto out_unlock;
+	}
+
+	/* detach */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		rv = adm_detach(mdev);
+		if (rv < SS_SUCCESS) {
+			retcode = rv; /* enum type mismatch! */
+			drbd_msg_put_info("failed to detach");
+			goto out_unlock;
+		}
+	}
+
+	/* delete volumes */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = adm_delete_minor(mdev);
+		if (retcode != NO_ERROR) {
+			/* "can not happen" */
+			drbd_msg_put_info("failed to delete volume");
+			goto out_unlock;
+		}
+	}
+
+	/* stop all threads */
+	conn_reconfig_done(adm_ctx.tconn);
+
+	/* delete connection */
+	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
+		drbd_free_tconn(adm_ctx.tconn);
+		retcode = NO_ERROR;
+	} else {
+		/* "can not happen" */
+		retcode = ERR_CONN_IN_USE;
+		drbd_msg_put_info("failed to delete connection");
+		goto out_unlock;
+	}
+out_unlock:
+	mutex_unlock(&drbd_cfg_mutex);
 out:
 	drbd_adm_finish(info, retcode);
 	return 0;
@@ -2683,12 +2782,14 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
+	mutex_lock(&drbd_cfg_mutex);
 	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
 		drbd_free_tconn(adm_ctx.tconn);
 		retcode = NO_ERROR;
 	} else {
 		retcode = ERR_CONN_IN_USE;
 	}
+	mutex_unlock(&drbd_cfg_mutex);
 
 out:
 	drbd_adm_finish(info, retcode);
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 84e16848f7a1..a07d69279b1a 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -347,3 +347,5 @@ GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
 GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+GENL_op(DRBD_ADM_DOWN,		27, GENL_doit(drbd_adm_down),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
-- 
cgit v1.2.3


From 047e95e259e81d7b97eca10cda0aa93082531ac1 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 16 Mar 2011 14:43:36 +0100
Subject: drbd: Allow volumes to become primary only on one side

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_state.c   | 21 ++++++++++++++++++---
 drivers/block/drbd/drbd_strings.c |  1 +
 include/linux/drbd.h              |  3 ++-
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 886b996ec7b3..11685658659e 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -329,6 +329,18 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio
 		dev_info(DEV, "%s\n", pb);
 }
 
+static bool vol_has_primary_peer(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int minor;
+
+	idr_for_each_entry(&tconn->volumes, mdev, minor) {
+		if (mdev->state.peer == R_PRIMARY)
+			return true;
+	}
+	return false;
+}
+
 /**
  * is_valid_state() - Returns an SS_ error code if ns is not valid
  * @mdev:	DRBD device.
@@ -349,9 +361,12 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 	}
 
 	if (get_net_conf(mdev->tconn)) {
-		if (!mdev->tconn->net_conf->two_primaries &&
-		    ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
-			rv = SS_TWO_PRIMARIES;
+		if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) {
+			if (ns.peer == R_PRIMARY)
+				rv = SS_TWO_PRIMARIES;
+			else if (vol_has_primary_peer(mdev->tconn))
+				rv = SS_O_VOL_PEER_PRI;
+			}
 		put_net_conf(mdev->tconn);
 	}
 
diff --git a/drivers/block/drbd/drbd_strings.c b/drivers/block/drbd/drbd_strings.c
index c44a2a602772..9a664bd27404 100644
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
 	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
 	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
 	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+	[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
 };
 
 const char *drbd_conn_str(enum drbd_conns s)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index d28fdd8fcd49..9cdb888607ae 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -300,7 +300,8 @@ enum drbd_state_rv {
 	SS_NOT_SUPPORTED = -17,      /* drbd-8.2 only */
 	SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
 	SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */
-	SS_AFTER_LAST_ERROR = -20,    /* Keep this at bottom */
+	SS_O_VOL_PEER_PRI = -20,
+	SS_AFTER_LAST_ERROR = -21,    /* Keep this at bottom */
 };
 
 /* from drbd_strings.c */
-- 
cgit v1.2.3


From d1fb57435c108b8dd66d7f47b4c60c1798dcae4c Mon Sep 17 00:00:00 2001
From: "Patil, Rachna" <rachna@ti.com>
Date: Tue, 16 Oct 2012 12:55:39 +0530
Subject: input: TSC: ti_tscadc: Add Step configuration as platform data

There are 16 programmable Step Configuration
registers which are used by the sequencer.
Program the Steps in order to configure a channel
input to be sampled. If the same step is applied
several times, the coordinate values read are more
accurate.
Hence we provide the user an option of how many steps
should be configured.

For ex: If this value is assigned as 4, This means that
4 steps are applied to read x co-ordinate and 4 steps to read
y co-ordinate. Furtheron the interrupt handler already
holds code to use delta filter and report the best value
out of these values to the input sub-system.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/ti_tscadc.c | 25 +++++++++++++------------
 include/linux/input/ti_tscadc.h       |  6 ++++++
 2 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ti_tscadc.c b/drivers/input/touchscreen/ti_tscadc.c
index d198cab61a69..c1bd8e5c132c 100644
--- a/drivers/input/touchscreen/ti_tscadc.c
+++ b/drivers/input/touchscreen/ti_tscadc.c
@@ -41,10 +41,6 @@
 #define REG_CHARGEDELAY		0x060
 #define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
 #define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
-#define REG_STEPCONFIG13	0x0C4
-#define REG_STEPDELAY13		0x0C8
-#define REG_STEPCONFIG14	0x0CC
-#define REG_STEPDELAY14		0x0D0
 #define REG_FIFO0CNT		0xE4
 #define REG_FIFO1THR		0xF4
 #define REG_FIFO0		0x100
@@ -134,6 +130,7 @@ struct tscadc {
 	unsigned int		wires;
 	unsigned int		x_plate_resistance;
 	bool			pen_down;
+	int			steps_to_configure;
 };
 
 static unsigned int tscadc_readl(struct tscadc *ts, unsigned int reg)
@@ -150,9 +147,10 @@ static void tscadc_writel(struct tscadc *tsc, unsigned int reg,
 static void tscadc_step_config(struct tscadc *ts_dev)
 {
 	unsigned int	config;
-	int i;
+	int i, total_steps;
 
 	/* Configure the Step registers */
+	total_steps = 2 * ts_dev->steps_to_configure;
 
 	config = STEPCONFIG_MODE_HWSYNC |
 			STEPCONFIG_AVG_16 | STEPCONFIG_XPP;
@@ -170,7 +168,7 @@ static void tscadc_step_config(struct tscadc *ts_dev)
 		break;
 	}
 
-	for (i = 1; i < 7; i++) {
+	for (i = 1; i <= ts_dev->steps_to_configure; i++) {
 		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
 		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
 	}
@@ -192,7 +190,7 @@ static void tscadc_step_config(struct tscadc *ts_dev)
 		break;
 	}
 
-	for (i = 7; i < 13; i++) {
+	for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) {
 		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
 		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
 	}
@@ -211,12 +209,14 @@ static void tscadc_step_config(struct tscadc *ts_dev)
 	config = STEPCONFIG_MODE_HWSYNC |
 			STEPCONFIG_AVG_16 | STEPCONFIG_YPP |
 			STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM;
-	tscadc_writel(ts_dev, REG_STEPCONFIG13, config);
-	tscadc_writel(ts_dev, REG_STEPDELAY13, STEPCONFIG_OPENDLY);
+	tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config);
+	tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 1),
+			STEPCONFIG_OPENDLY);
 
 	config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1;
-	tscadc_writel(ts_dev, REG_STEPCONFIG14, config);
-	tscadc_writel(ts_dev, REG_STEPDELAY14, STEPCONFIG_OPENDLY);
+	tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config);
+	tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
+			STEPCONFIG_OPENDLY);
 
 	tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB);
 }
@@ -379,6 +379,7 @@ static int __devinit tscadc_probe(struct platform_device *pdev)
 	ts_dev->irq = irq;
 	ts_dev->wires = pdata->wires;
 	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
+	ts_dev->steps_to_configure = pdata->steps_to_configure;
 
 	res = request_mem_region(res->start, resource_size(res), pdev->name);
 	if (!res) {
@@ -447,7 +448,7 @@ static int __devinit tscadc_probe(struct platform_device *pdev)
 	tscadc_idle_config(ts_dev);
 	tscadc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO1THRES);
 	tscadc_step_config(ts_dev);
-	tscadc_writel(ts_dev, REG_FIFO1THR, 6);
+	tscadc_writel(ts_dev, REG_FIFO1THR, ts_dev->steps_to_configure);
 
 	ctrl |= CNTRLREG_TSCSSENB;
 	tscadc_writel(ts_dev, REG_CTRL, ctrl);
diff --git a/include/linux/input/ti_tscadc.h b/include/linux/input/ti_tscadc.h
index b10a527a92a4..ad442a3eb68b 100644
--- a/include/linux/input/ti_tscadc.h
+++ b/include/linux/input/ti_tscadc.h
@@ -7,11 +7,17 @@
  *			i.e. 4/5/8 wire touchscreen support
  *			on the platform.
  * @x_plate_resistance:	X plate resistance.
+ * @steps_to_configure:	The sequencer supports a total of
+ *			16 programmable steps.
+ *			A step configured to read a single
+ *			co-ordinate value, can be applied
+ *			more number of times for better results.
  */
 
 struct tsc_data {
 	int wires;
 	int x_plate_resistance;
+	int steps_to_configure;
 };
 
 #endif
-- 
cgit v1.2.3


From 55c04de5176ea3eac6fdc469a6a063c5cb91ed7c Mon Sep 17 00:00:00 2001
From: "Patil, Rachna" <rachna@ti.com>
Date: Tue, 16 Oct 2012 12:55:42 +0530
Subject: input: TSC: ti_tscadc: Rename the existing touchscreen driver

Make way for addition of MFD driver.
The existing touchsreen driver is a MFD client.
For better readability we rename the file to
indicate its functionality as only touchscreen.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/Kconfig         |   4 +-
 drivers/input/touchscreen/Makefile        |   2 +-
 drivers/input/touchscreen/ti_am335x_tsc.c | 522 ++++++++++++++++++++++++++++++
 drivers/input/touchscreen/ti_tscadc.c     | 522 ------------------------------
 include/linux/input/ti_am335x_tsc.h       |  23 ++
 include/linux/input/ti_tscadc.h           |  23 --
 6 files changed, 548 insertions(+), 548 deletions(-)
 create mode 100644 drivers/input/touchscreen/ti_am335x_tsc.c
 delete mode 100644 drivers/input/touchscreen/ti_tscadc.c
 create mode 100644 include/linux/input/ti_am335x_tsc.h
 delete mode 100644 include/linux/input/ti_tscadc.h

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index f7668b24c378..d31dc5faeaaf 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -529,7 +529,7 @@ config TOUCHSCREEN_TOUCHWIN
 	  To compile this driver as a module, choose M here: the
 	  module will be called touchwin.
 
-config TOUCHSCREEN_TI_TSCADC
+config TOUCHSCREEN_TI_AM335X_TSC
 	tristate "TI Touchscreen Interface"
 	depends on ARCH_OMAP2PLUS
 	help
@@ -539,7 +539,7 @@ config TOUCHSCREEN_TI_TSCADC
 	  If unsure, say N.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called ti_tscadc.
+	  module will be called ti_am335x_tsc.
 
 config TOUCHSCREEN_ATMEL_TSADCC
 	tristate "Atmel Touchscreen Interface"
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 178eb128d90f..7c4c78ebe49e 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_TOUCHSCREEN_PIXCIR)	+= pixcir_i2c_ts.o
 obj-$(CONFIG_TOUCHSCREEN_S3C2410)	+= s3c2410_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ST1232)	+= st1232.o
 obj-$(CONFIG_TOUCHSCREEN_STMPE)		+= stmpe-ts.o
-obj-$(CONFIG_TOUCHSCREEN_TI_TSCADC)	+= ti_tscadc.o
+obj-$(CONFIG_TOUCHSCREEN_TI_AM335X_TSC)	+= ti_am335x_tsc.o
 obj-$(CONFIG_TOUCHSCREEN_TNETV107X)	+= tnetv107x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHIT213)	+= touchit213.o
 obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT)	+= touchright.o
diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
new file mode 100644
index 000000000000..462950acb97b
--- /dev/null
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c
@@ -0,0 +1,522 @@
+/*
+ * TI Touch Screen driver
+ *
+ * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/input/ti_am335x_tsc.h>
+#include <linux/delay.h>
+
+#define REG_RAWIRQSTATUS	0x024
+#define REG_IRQSTATUS		0x028
+#define REG_IRQENABLE		0x02C
+#define REG_IRQWAKEUP		0x034
+#define REG_CTRL		0x040
+#define REG_ADCFSM		0x044
+#define REG_CLKDIV		0x04C
+#define REG_SE			0x054
+#define REG_IDLECONFIG		0x058
+#define REG_CHARGECONFIG	0x05C
+#define REG_CHARGEDELAY		0x060
+#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
+#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
+#define REG_FIFO0CNT		0xE4
+#define REG_FIFO0THR		0xE8
+#define REG_FIFO1THR		0xF4
+#define REG_FIFO0		0x100
+#define REG_FIFO1		0x200
+
+/*	Register Bitfields	*/
+#define IRQWKUP_ENB		BIT(0)
+
+/* Step Enable */
+#define STEPENB_MASK		(0x1FFFF << 0)
+#define STEPENB(val)		(val << 0)
+#define STPENB_STEPENB		STEPENB(0x7FFF)
+
+/* IRQ enable */
+#define IRQENB_FIFO0THRES	BIT(2)
+#define IRQENB_FIFO1THRES	BIT(5)
+#define IRQENB_PENUP		BIT(9)
+
+/* Step Configuration */
+#define STEPCONFIG_MODE_MASK	(3 << 0)
+#define STEPCONFIG_MODE(val)	(val << 0)
+#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
+#define STEPCONFIG_AVG_MASK	(7 << 2)
+#define STEPCONFIG_AVG(val)	(val << 2)
+#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
+#define STEPCONFIG_XPP		BIT(5)
+#define STEPCONFIG_XNN		BIT(6)
+#define STEPCONFIG_YPP		BIT(7)
+#define STEPCONFIG_YNN		BIT(8)
+#define STEPCONFIG_XNP		BIT(9)
+#define STEPCONFIG_YPN		BIT(10)
+#define STEPCONFIG_INM_MASK	(0xF << 15)
+#define STEPCONFIG_INM(val)	(val << 15)
+#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
+#define STEPCONFIG_INP_MASK	(0xF << 19)
+#define STEPCONFIG_INP(val)	(val << 19)
+#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
+#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
+#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
+#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
+#define STEPCONFIG_FIFO1	BIT(26)
+
+/* Delay register */
+#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
+#define STEPDELAY_OPEN(val)	(val << 0)
+#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
+
+/* Charge Config */
+#define STEPCHARGE_RFP_MASK	(7 << 12)
+#define STEPCHARGE_RFP(val)	(val << 12)
+#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
+#define STEPCHARGE_INM_MASK	(0xF << 15)
+#define STEPCHARGE_INM(val)	(val << 15)
+#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
+#define STEPCHARGE_INP_MASK	(0xF << 19)
+#define STEPCHARGE_INP(val)	(val << 19)
+#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
+#define STEPCHARGE_RFM_MASK	(3 << 23)
+#define STEPCHARGE_RFM(val)	(val << 23)
+#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
+
+/* Charge delay */
+#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
+#define CHARGEDLY_OPEN(val)	(val << 0)
+#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
+
+/* Control register */
+#define CNTRLREG_TSCSSENB	BIT(0)
+#define CNTRLREG_STEPID		BIT(1)
+#define CNTRLREG_STEPCONFIGWRT	BIT(2)
+#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
+#define CNTRLREG_AFE_CTRL(val)	(val << 5)
+#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
+#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
+#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
+#define CNTRLREG_TSCENB		BIT(7)
+
+#define ADCFSM_STEPID		0x10
+#define SEQ_SETTLE		275
+#define ADC_CLK			3000000
+#define MAX_12BIT		((1 << 12) - 1)
+
+struct titsc {
+	struct input_dev	*input;
+	struct clk		*tsc_ick;
+	void __iomem		*tsc_base;
+	unsigned int		irq;
+	unsigned int		wires;
+	unsigned int		x_plate_resistance;
+	bool			pen_down;
+	int			steps_to_configure;
+};
+
+static unsigned int titsc_readl(struct titsc *ts, unsigned int reg)
+{
+	return readl(ts->tsc_base + reg);
+}
+
+static void titsc_writel(struct titsc *tsc, unsigned int reg,
+					unsigned int val)
+{
+	writel(val, tsc->tsc_base + reg);
+}
+
+static void titsc_step_config(struct titsc *ts_dev)
+{
+	unsigned int	config;
+	int i, total_steps;
+
+	/* Configure the Step registers */
+	total_steps = 2 * ts_dev->steps_to_configure;
+
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_XPP;
+	switch (ts_dev->wires) {
+	case 4:
+		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
+		break;
+	case 5:
+		config |= STEPCONFIG_YNN |
+				STEPCONFIG_INP_AN4 | STEPCONFIG_XNN |
+				STEPCONFIG_YPP;
+		break;
+	case 8:
+		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
+		break;
+	}
+
+	for (i = 1; i <= ts_dev->steps_to_configure; i++) {
+		titsc_writel(ts_dev, REG_STEPCONFIG(i), config);
+		titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
+	}
+
+	config = 0;
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_YNN |
+			STEPCONFIG_INM_ADCREFM | STEPCONFIG_FIFO1;
+	switch (ts_dev->wires) {
+	case 4:
+		config |= STEPCONFIG_YPP;
+		break;
+	case 5:
+		config |= STEPCONFIG_XPP | STEPCONFIG_INP_AN4 |
+				STEPCONFIG_XNP | STEPCONFIG_YPN;
+		break;
+	case 8:
+		config |= STEPCONFIG_YPP;
+		break;
+	}
+
+	for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) {
+		titsc_writel(ts_dev, REG_STEPCONFIG(i), config);
+		titsc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
+	}
+
+	config = 0;
+	/* Charge step configuration */
+	config = STEPCONFIG_XPP | STEPCONFIG_YNN |
+			STEPCHARGE_RFP_XPUL | STEPCHARGE_RFM_XNUR |
+			STEPCHARGE_INM_AN1 | STEPCHARGE_INP_AN1;
+
+	titsc_writel(ts_dev, REG_CHARGECONFIG, config);
+	titsc_writel(ts_dev, REG_CHARGEDELAY, CHARGEDLY_OPENDLY);
+
+	config = 0;
+	/* Configure to calculate pressure */
+	config = STEPCONFIG_MODE_HWSYNC |
+			STEPCONFIG_AVG_16 | STEPCONFIG_YPP |
+			STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM;
+	titsc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config);
+	titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 1),
+			STEPCONFIG_OPENDLY);
+
+	config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1;
+	titsc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config);
+	titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
+			STEPCONFIG_OPENDLY);
+
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB);
+}
+
+static void titsc_idle_config(struct titsc *ts_config)
+{
+	unsigned int idleconfig;
+
+	idleconfig = STEPCONFIG_YNN |
+			STEPCONFIG_INM_ADCREFM |
+			STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM;
+	titsc_writel(ts_config, REG_IDLECONFIG, idleconfig);
+}
+
+static void titsc_read_coordinates(struct titsc *ts_dev,
+				    unsigned int *x, unsigned int *y)
+{
+	unsigned int fifocount = titsc_readl(ts_dev, REG_FIFO0CNT);
+	unsigned int prev_val_x = ~0, prev_val_y = ~0;
+	unsigned int prev_diff_x = ~0, prev_diff_y = ~0;
+	unsigned int read, diff;
+	unsigned int i;
+
+	/*
+	 * Delta filter is used to remove large variations in sampled
+	 * values from ADC. The filter tries to predict where the next
+	 * coordinate could be. This is done by taking a previous
+	 * coordinate and subtracting it form current one. Further the
+	 * algorithm compares the difference with that of a present value,
+	 * if true the value is reported to the sub system.
+	 */
+	for (i = 0; i < fifocount - 1; i++) {
+		read = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
+		diff = abs(read - prev_val_x);
+		if (diff < prev_diff_x) {
+			prev_diff_x = diff;
+			*x = read;
+		}
+		prev_val_x = read;
+
+		read = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
+		diff = abs(read - prev_val_y);
+		if (diff < prev_diff_y) {
+			prev_diff_y = diff;
+			*y = read;
+		}
+		prev_val_y = read;
+	}
+}
+
+static irqreturn_t titsc_irq(int irq, void *dev)
+{
+	struct titsc *ts_dev = dev;
+	struct input_dev *input_dev = ts_dev->input;
+	unsigned int status, irqclr = 0;
+	unsigned int x = 0, y = 0;
+	unsigned int z1, z2, z;
+	unsigned int fsm;
+
+	status = titsc_readl(ts_dev, REG_IRQSTATUS);
+	if (status & IRQENB_FIFO0THRES) {
+		titsc_read_coordinates(ts_dev, &x, &y);
+
+		z1 = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
+		z2 = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
+
+		if (ts_dev->pen_down && z1 != 0 && z2 != 0) {
+			/*
+			 * Calculate pressure using formula
+			 * Resistance(touch) = x plate resistance *
+			 * x postion/4096 * ((z2 / z1) - 1)
+			 */
+			z = z2 - z1;
+			z *= x;
+			z *= ts_dev->x_plate_resistance;
+			z /= z1;
+			z = (z + 2047) >> 12;
+
+			if (z <= MAX_12BIT) {
+				input_report_abs(input_dev, ABS_X, x);
+				input_report_abs(input_dev, ABS_Y, y);
+				input_report_abs(input_dev, ABS_PRESSURE, z);
+				input_report_key(input_dev, BTN_TOUCH, 1);
+				input_sync(input_dev);
+			}
+		}
+		irqclr |= IRQENB_FIFO0THRES;
+	}
+
+	/*
+	 * Time for sequencer to settle, to read
+	 * correct state of the sequencer.
+	 */
+	udelay(SEQ_SETTLE);
+
+	status = titsc_readl(ts_dev, REG_RAWIRQSTATUS);
+	if (status & IRQENB_PENUP) {
+		/* Pen up event */
+		fsm = titsc_readl(ts_dev, REG_ADCFSM);
+		if (fsm == ADCFSM_STEPID) {
+			ts_dev->pen_down = false;
+			input_report_key(input_dev, BTN_TOUCH, 0);
+			input_report_abs(input_dev, ABS_PRESSURE, 0);
+			input_sync(input_dev);
+		} else {
+			ts_dev->pen_down = true;
+		}
+		irqclr |= IRQENB_PENUP;
+	}
+
+	titsc_writel(ts_dev, REG_IRQSTATUS, irqclr);
+
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB);
+	return IRQ_HANDLED;
+}
+
+/*
+ * The functions for inserting/removing driver as a module.
+ */
+
+static int __devinit titsc_probe(struct platform_device *pdev)
+{
+	const struct tsc_data *pdata = pdev->dev.platform_data;
+	struct resource *res;
+	struct titsc *ts_dev;
+	struct input_dev *input_dev;
+	struct clk *clk;
+	int err;
+	int clk_value, ctrl, irq;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "missing platform data.\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no memory resource defined.\n");
+		return -EINVAL;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no irq ID is specified.\n");
+		return -EINVAL;
+	}
+
+	/* Allocate memory for device */
+	ts_dev = kzalloc(sizeof(struct titsc), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!ts_dev || !input_dev) {
+		dev_err(&pdev->dev, "failed to allocate memory.\n");
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	ts_dev->input = input_dev;
+	ts_dev->irq = irq;
+	ts_dev->wires = pdata->wires;
+	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
+	ts_dev->steps_to_configure = pdata->steps_to_configure;
+
+	res = request_mem_region(res->start, resource_size(res), pdev->name);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to reserve registers.\n");
+		err = -EBUSY;
+		goto err_free_mem;
+	}
+
+	ts_dev->tsc_base = ioremap(res->start, resource_size(res));
+	if (!ts_dev->tsc_base) {
+		dev_err(&pdev->dev, "failed to map registers.\n");
+		err = -ENOMEM;
+		goto err_release_mem_region;
+	}
+
+	err = request_irq(ts_dev->irq, titsc_irq,
+			  0, pdev->dev.driver->name, ts_dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to allocate irq.\n");
+		goto err_unmap_regs;
+	}
+
+	ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick");
+	if (IS_ERR(ts_dev->tsc_ick)) {
+		dev_err(&pdev->dev, "failed to get TSC ick\n");
+		goto err_free_irq;
+	}
+	clk_enable(ts_dev->tsc_ick);
+
+	clk = clk_get(&pdev->dev, "adc_tsc_fck");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "failed to get TSC fck\n");
+		err = PTR_ERR(clk);
+		goto err_disable_clk;
+	}
+
+	clk_value = clk_get_rate(clk) / ADC_CLK;
+	clk_put(clk);
+
+	if (clk_value < 7) {
+		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
+		goto err_disable_clk;
+	}
+	/* CLKDIV needs to be configured to the value minus 1 */
+	titsc_writel(ts_dev, REG_CLKDIV, clk_value - 1);
+
+	 /* Enable wake-up of the SoC using touchscreen */
+	titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
+
+	ctrl = CNTRLREG_STEPCONFIGWRT |
+			CNTRLREG_TSCENB |
+			CNTRLREG_STEPID;
+	switch (ts_dev->wires) {
+	case 4:
+		ctrl |= CNTRLREG_4WIRE;
+		break;
+	case 5:
+		ctrl |= CNTRLREG_5WIRE;
+		break;
+	case 8:
+		ctrl |= CNTRLREG_8WIRE;
+		break;
+	}
+	titsc_writel(ts_dev, REG_CTRL, ctrl);
+
+	titsc_idle_config(ts_dev);
+	titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES);
+	titsc_step_config(ts_dev);
+	titsc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure);
+
+	ctrl |= CNTRLREG_TSCSSENB;
+	titsc_writel(ts_dev, REG_CTRL, ctrl);
+
+	input_dev->name = "ti-tsc-adc";
+	input_dev->dev.parent = &pdev->dev;
+
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+
+	input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0);
+	input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0);
+
+	/* register to the input system */
+	err = input_register_device(input_dev);
+	if (err)
+		goto err_disable_clk;
+
+	platform_set_drvdata(pdev, ts_dev);
+	return 0;
+
+err_disable_clk:
+	clk_disable(ts_dev->tsc_ick);
+	clk_put(ts_dev->tsc_ick);
+err_free_irq:
+	free_irq(ts_dev->irq, ts_dev);
+err_unmap_regs:
+	iounmap(ts_dev->tsc_base);
+err_release_mem_region:
+	release_mem_region(res->start, resource_size(res));
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(ts_dev);
+	return err;
+}
+
+static int __devexit titsc_remove(struct platform_device *pdev)
+{
+	struct titsc *ts_dev = platform_get_drvdata(pdev);
+	struct resource *res;
+
+	free_irq(ts_dev->irq, ts_dev);
+
+	input_unregister_device(ts_dev->input);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iounmap(ts_dev->tsc_base);
+	release_mem_region(res->start, resource_size(res));
+
+	clk_disable(ts_dev->tsc_ick);
+	clk_put(ts_dev->tsc_ick);
+
+	kfree(ts_dev);
+
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static struct platform_driver ti_tsc_driver = {
+	.probe	= titsc_probe,
+	.remove	= __devexit_p(titsc_remove),
+	.driver	= {
+		.name   = "tsc",
+		.owner	= THIS_MODULE,
+	},
+};
+module_platform_driver(ti_tsc_driver);
+
+MODULE_DESCRIPTION("TI touchscreen controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/ti_tscadc.c b/drivers/input/touchscreen/ti_tscadc.c
deleted file mode 100644
index ec0a442478a8..000000000000
--- a/drivers/input/touchscreen/ti_tscadc.c
+++ /dev/null
@@ -1,522 +0,0 @@
-/*
- * TI Touch Screen driver
- *
- * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/input.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/input/ti_tscadc.h>
-#include <linux/delay.h>
-
-#define REG_RAWIRQSTATUS	0x024
-#define REG_IRQSTATUS		0x028
-#define REG_IRQENABLE		0x02C
-#define REG_IRQWAKEUP		0x034
-#define REG_CTRL		0x040
-#define REG_ADCFSM		0x044
-#define REG_CLKDIV		0x04C
-#define REG_SE			0x054
-#define REG_IDLECONFIG		0x058
-#define REG_CHARGECONFIG	0x05C
-#define REG_CHARGEDELAY		0x060
-#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
-#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
-#define REG_FIFO0CNT		0xE4
-#define REG_FIFO0THR		0xE8
-#define REG_FIFO1THR		0xF4
-#define REG_FIFO0		0x100
-#define REG_FIFO1		0x200
-
-/*	Register Bitfields	*/
-#define IRQWKUP_ENB		BIT(0)
-
-/* Step Enable */
-#define STEPENB_MASK		(0x1FFFF << 0)
-#define STEPENB(val)		(val << 0)
-#define STPENB_STEPENB		STEPENB(0x7FFF)
-
-/* IRQ enable */
-#define IRQENB_FIFO0THRES	BIT(2)
-#define IRQENB_FIFO1THRES	BIT(5)
-#define IRQENB_PENUP		BIT(9)
-
-/* Step Configuration */
-#define STEPCONFIG_MODE_MASK	(3 << 0)
-#define STEPCONFIG_MODE(val)	(val << 0)
-#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
-#define STEPCONFIG_AVG_MASK	(7 << 2)
-#define STEPCONFIG_AVG(val)	(val << 2)
-#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
-#define STEPCONFIG_XPP		BIT(5)
-#define STEPCONFIG_XNN		BIT(6)
-#define STEPCONFIG_YPP		BIT(7)
-#define STEPCONFIG_YNN		BIT(8)
-#define STEPCONFIG_XNP		BIT(9)
-#define STEPCONFIG_YPN		BIT(10)
-#define STEPCONFIG_INM_MASK	(0xF << 15)
-#define STEPCONFIG_INM(val)	(val << 15)
-#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
-#define STEPCONFIG_INP_MASK	(0xF << 19)
-#define STEPCONFIG_INP(val)	(val << 19)
-#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
-#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
-#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
-#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
-#define STEPCONFIG_FIFO1	BIT(26)
-
-/* Delay register */
-#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
-#define STEPDELAY_OPEN(val)	(val << 0)
-#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
-
-/* Charge Config */
-#define STEPCHARGE_RFP_MASK	(7 << 12)
-#define STEPCHARGE_RFP(val)	(val << 12)
-#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
-#define STEPCHARGE_INM_MASK	(0xF << 15)
-#define STEPCHARGE_INM(val)	(val << 15)
-#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
-#define STEPCHARGE_INP_MASK	(0xF << 19)
-#define STEPCHARGE_INP(val)	(val << 19)
-#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
-#define STEPCHARGE_RFM_MASK	(3 << 23)
-#define STEPCHARGE_RFM(val)	(val << 23)
-#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
-
-/* Charge delay */
-#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
-#define CHARGEDLY_OPEN(val)	(val << 0)
-#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
-
-/* Control register */
-#define CNTRLREG_TSCSSENB	BIT(0)
-#define CNTRLREG_STEPID		BIT(1)
-#define CNTRLREG_STEPCONFIGWRT	BIT(2)
-#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
-#define CNTRLREG_AFE_CTRL(val)	(val << 5)
-#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
-#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
-#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
-#define CNTRLREG_TSCENB		BIT(7)
-
-#define ADCFSM_STEPID		0x10
-#define SEQ_SETTLE		275
-#define ADC_CLK			3000000
-#define MAX_12BIT		((1 << 12) - 1)
-
-struct tscadc {
-	struct input_dev	*input;
-	struct clk		*tsc_ick;
-	void __iomem		*tsc_base;
-	unsigned int		irq;
-	unsigned int		wires;
-	unsigned int		x_plate_resistance;
-	bool			pen_down;
-	int			steps_to_configure;
-};
-
-static unsigned int tscadc_readl(struct tscadc *ts, unsigned int reg)
-{
-	return readl(ts->tsc_base + reg);
-}
-
-static void tscadc_writel(struct tscadc *tsc, unsigned int reg,
-					unsigned int val)
-{
-	writel(val, tsc->tsc_base + reg);
-}
-
-static void tscadc_step_config(struct tscadc *ts_dev)
-{
-	unsigned int	config;
-	int i, total_steps;
-
-	/* Configure the Step registers */
-	total_steps = 2 * ts_dev->steps_to_configure;
-
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_AVG_16 | STEPCONFIG_XPP;
-	switch (ts_dev->wires) {
-	case 4:
-		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
-		break;
-	case 5:
-		config |= STEPCONFIG_YNN |
-				STEPCONFIG_INP_AN4 | STEPCONFIG_XNN |
-				STEPCONFIG_YPP;
-		break;
-	case 8:
-		config |= STEPCONFIG_INP_AN2 | STEPCONFIG_XNN;
-		break;
-	}
-
-	for (i = 1; i <= ts_dev->steps_to_configure; i++) {
-		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
-		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
-	}
-
-	config = 0;
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_AVG_16 | STEPCONFIG_YNN |
-			STEPCONFIG_INM_ADCREFM | STEPCONFIG_FIFO1;
-	switch (ts_dev->wires) {
-	case 4:
-		config |= STEPCONFIG_YPP;
-		break;
-	case 5:
-		config |= STEPCONFIG_XPP | STEPCONFIG_INP_AN4 |
-				STEPCONFIG_XNP | STEPCONFIG_YPN;
-		break;
-	case 8:
-		config |= STEPCONFIG_YPP;
-		break;
-	}
-
-	for (i = (ts_dev->steps_to_configure + 1); i <= total_steps; i++) {
-		tscadc_writel(ts_dev, REG_STEPCONFIG(i), config);
-		tscadc_writel(ts_dev, REG_STEPDELAY(i), STEPCONFIG_OPENDLY);
-	}
-
-	config = 0;
-	/* Charge step configuration */
-	config = STEPCONFIG_XPP | STEPCONFIG_YNN |
-			STEPCHARGE_RFP_XPUL | STEPCHARGE_RFM_XNUR |
-			STEPCHARGE_INM_AN1 | STEPCHARGE_INP_AN1;
-
-	tscadc_writel(ts_dev, REG_CHARGECONFIG, config);
-	tscadc_writel(ts_dev, REG_CHARGEDELAY, CHARGEDLY_OPENDLY);
-
-	config = 0;
-	/* Configure to calculate pressure */
-	config = STEPCONFIG_MODE_HWSYNC |
-			STEPCONFIG_AVG_16 | STEPCONFIG_YPP |
-			STEPCONFIG_XNN | STEPCONFIG_INM_ADCREFM;
-	tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 1), config);
-	tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 1),
-			STEPCONFIG_OPENDLY);
-
-	config |= STEPCONFIG_INP_AN3 | STEPCONFIG_FIFO1;
-	tscadc_writel(ts_dev, REG_STEPCONFIG(total_steps + 2), config);
-	tscadc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
-			STEPCONFIG_OPENDLY);
-
-	tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB);
-}
-
-static void tscadc_idle_config(struct tscadc *ts_config)
-{
-	unsigned int idleconfig;
-
-	idleconfig = STEPCONFIG_YNN |
-			STEPCONFIG_INM_ADCREFM |
-			STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM;
-	tscadc_writel(ts_config, REG_IDLECONFIG, idleconfig);
-}
-
-static void tscadc_read_coordinates(struct tscadc *ts_dev,
-				    unsigned int *x, unsigned int *y)
-{
-	unsigned int fifocount = tscadc_readl(ts_dev, REG_FIFO0CNT);
-	unsigned int prev_val_x = ~0, prev_val_y = ~0;
-	unsigned int prev_diff_x = ~0, prev_diff_y = ~0;
-	unsigned int read, diff;
-	unsigned int i;
-
-	/*
-	 * Delta filter is used to remove large variations in sampled
-	 * values from ADC. The filter tries to predict where the next
-	 * coordinate could be. This is done by taking a previous
-	 * coordinate and subtracting it form current one. Further the
-	 * algorithm compares the difference with that of a present value,
-	 * if true the value is reported to the sub system.
-	 */
-	for (i = 0; i < fifocount - 1; i++) {
-		read = tscadc_readl(ts_dev, REG_FIFO0) & 0xfff;
-		diff = abs(read - prev_val_x);
-		if (diff < prev_diff_x) {
-			prev_diff_x = diff;
-			*x = read;
-		}
-		prev_val_x = read;
-
-		read = tscadc_readl(ts_dev, REG_FIFO1) & 0xfff;
-		diff = abs(read - prev_val_y);
-		if (diff < prev_diff_y) {
-			prev_diff_y = diff;
-			*y = read;
-		}
-		prev_val_y = read;
-	}
-}
-
-static irqreturn_t tscadc_irq(int irq, void *dev)
-{
-	struct tscadc *ts_dev = dev;
-	struct input_dev *input_dev = ts_dev->input;
-	unsigned int status, irqclr = 0;
-	unsigned int x = 0, y = 0;
-	unsigned int z1, z2, z;
-	unsigned int fsm;
-
-	status = tscadc_readl(ts_dev, REG_IRQSTATUS);
-	if (status & IRQENB_FIFO0THRES) {
-		tscadc_read_coordinates(ts_dev, &x, &y);
-
-		z1 = tscadc_readl(ts_dev, REG_FIFO0) & 0xfff;
-		z2 = tscadc_readl(ts_dev, REG_FIFO1) & 0xfff;
-
-		if (ts_dev->pen_down && z1 != 0 && z2 != 0) {
-			/*
-			 * Calculate pressure using formula
-			 * Resistance(touch) = x plate resistance *
-			 * x postion/4096 * ((z2 / z1) - 1)
-			 */
-			z = z2 - z1;
-			z *= x;
-			z *= ts_dev->x_plate_resistance;
-			z /= z1;
-			z = (z + 2047) >> 12;
-
-			if (z <= MAX_12BIT) {
-				input_report_abs(input_dev, ABS_X, x);
-				input_report_abs(input_dev, ABS_Y, y);
-				input_report_abs(input_dev, ABS_PRESSURE, z);
-				input_report_key(input_dev, BTN_TOUCH, 1);
-				input_sync(input_dev);
-			}
-		}
-		irqclr |= IRQENB_FIFO0THRES;
-	}
-
-	/*
-	 * Time for sequencer to settle, to read
-	 * correct state of the sequencer.
-	 */
-	udelay(SEQ_SETTLE);
-
-	status = tscadc_readl(ts_dev, REG_RAWIRQSTATUS);
-	if (status & IRQENB_PENUP) {
-		/* Pen up event */
-		fsm = tscadc_readl(ts_dev, REG_ADCFSM);
-		if (fsm == ADCFSM_STEPID) {
-			ts_dev->pen_down = false;
-			input_report_key(input_dev, BTN_TOUCH, 0);
-			input_report_abs(input_dev, ABS_PRESSURE, 0);
-			input_sync(input_dev);
-		} else {
-			ts_dev->pen_down = true;
-		}
-		irqclr |= IRQENB_PENUP;
-	}
-
-	tscadc_writel(ts_dev, REG_IRQSTATUS, irqclr);
-
-	tscadc_writel(ts_dev, REG_SE, STPENB_STEPENB);
-	return IRQ_HANDLED;
-}
-
-/*
- * The functions for inserting/removing driver as a module.
- */
-
-static int __devinit tscadc_probe(struct platform_device *pdev)
-{
-	const struct tsc_data *pdata = pdev->dev.platform_data;
-	struct resource *res;
-	struct tscadc *ts_dev;
-	struct input_dev *input_dev;
-	struct clk *clk;
-	int err;
-	int clk_value, ctrl, irq;
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "missing platform data.\n");
-		return -EINVAL;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no memory resource defined.\n");
-		return -EINVAL;
-	}
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no irq ID is specified.\n");
-		return -EINVAL;
-	}
-
-	/* Allocate memory for device */
-	ts_dev = kzalloc(sizeof(struct tscadc), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!ts_dev || !input_dev) {
-		dev_err(&pdev->dev, "failed to allocate memory.\n");
-		err = -ENOMEM;
-		goto err_free_mem;
-	}
-
-	ts_dev->input = input_dev;
-	ts_dev->irq = irq;
-	ts_dev->wires = pdata->wires;
-	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
-	ts_dev->steps_to_configure = pdata->steps_to_configure;
-
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (!res) {
-		dev_err(&pdev->dev, "failed to reserve registers.\n");
-		err = -EBUSY;
-		goto err_free_mem;
-	}
-
-	ts_dev->tsc_base = ioremap(res->start, resource_size(res));
-	if (!ts_dev->tsc_base) {
-		dev_err(&pdev->dev, "failed to map registers.\n");
-		err = -ENOMEM;
-		goto err_release_mem_region;
-	}
-
-	err = request_irq(ts_dev->irq, tscadc_irq,
-			  0, pdev->dev.driver->name, ts_dev);
-	if (err) {
-		dev_err(&pdev->dev, "failed to allocate irq.\n");
-		goto err_unmap_regs;
-	}
-
-	ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick");
-	if (IS_ERR(ts_dev->tsc_ick)) {
-		dev_err(&pdev->dev, "failed to get TSC ick\n");
-		goto err_free_irq;
-	}
-	clk_enable(ts_dev->tsc_ick);
-
-	clk = clk_get(&pdev->dev, "adc_tsc_fck");
-	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "failed to get TSC fck\n");
-		err = PTR_ERR(clk);
-		goto err_disable_clk;
-	}
-
-	clk_value = clk_get_rate(clk) / ADC_CLK;
-	clk_put(clk);
-
-	if (clk_value < 7) {
-		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
-		goto err_disable_clk;
-	}
-	/* CLKDIV needs to be configured to the value minus 1 */
-	tscadc_writel(ts_dev, REG_CLKDIV, clk_value - 1);
-
-	 /* Enable wake-up of the SoC using touchscreen */
-	tscadc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
-
-	ctrl = CNTRLREG_STEPCONFIGWRT |
-			CNTRLREG_TSCENB |
-			CNTRLREG_STEPID;
-	switch (ts_dev->wires) {
-	case 4:
-		ctrl |= CNTRLREG_4WIRE;
-		break;
-	case 5:
-		ctrl |= CNTRLREG_5WIRE;
-		break;
-	case 8:
-		ctrl |= CNTRLREG_8WIRE;
-		break;
-	}
-	tscadc_writel(ts_dev, REG_CTRL, ctrl);
-
-	tscadc_idle_config(ts_dev);
-	tscadc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES);
-	tscadc_step_config(ts_dev);
-	tscadc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure);
-
-	ctrl |= CNTRLREG_TSCSSENB;
-	tscadc_writel(ts_dev, REG_CTRL, ctrl);
-
-	input_dev->name = "ti-tsc-adc";
-	input_dev->dev.parent = &pdev->dev;
-
-	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
-	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
-
-	input_set_abs_params(input_dev, ABS_X, 0, MAX_12BIT, 0, 0);
-	input_set_abs_params(input_dev, ABS_Y, 0, MAX_12BIT, 0, 0);
-	input_set_abs_params(input_dev, ABS_PRESSURE, 0, MAX_12BIT, 0, 0);
-
-	/* register to the input system */
-	err = input_register_device(input_dev);
-	if (err)
-		goto err_disable_clk;
-
-	platform_set_drvdata(pdev, ts_dev);
-	return 0;
-
-err_disable_clk:
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
-err_free_irq:
-	free_irq(ts_dev->irq, ts_dev);
-err_unmap_regs:
-	iounmap(ts_dev->tsc_base);
-err_release_mem_region:
-	release_mem_region(res->start, resource_size(res));
-err_free_mem:
-	input_free_device(input_dev);
-	kfree(ts_dev);
-	return err;
-}
-
-static int __devexit tscadc_remove(struct platform_device *pdev)
-{
-	struct tscadc *ts_dev = platform_get_drvdata(pdev);
-	struct resource *res;
-
-	free_irq(ts_dev->irq, ts_dev);
-
-	input_unregister_device(ts_dev->input);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	iounmap(ts_dev->tsc_base);
-	release_mem_region(res->start, resource_size(res));
-
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
-
-	kfree(ts_dev);
-
-	platform_set_drvdata(pdev, NULL);
-	return 0;
-}
-
-static struct platform_driver ti_tsc_driver = {
-	.probe	= tscadc_probe,
-	.remove	= __devexit_p(tscadc_remove),
-	.driver	= {
-		.name   = "tsc",
-		.owner	= THIS_MODULE,
-	},
-};
-module_platform_driver(ti_tsc_driver);
-
-MODULE_DESCRIPTION("TI touchscreen controller driver");
-MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/input/ti_am335x_tsc.h b/include/linux/input/ti_am335x_tsc.h
new file mode 100644
index 000000000000..49269a2aa329
--- /dev/null
+++ b/include/linux/input/ti_am335x_tsc.h
@@ -0,0 +1,23 @@
+#ifndef __LINUX_TI_AM335X_TSC_H
+#define __LINUX_TI_AM335X_TSC_H
+
+/**
+ * struct tsc_data	Touchscreen wire configuration
+ * @wires:		Wires refer to application modes
+ *			i.e. 4/5/8 wire touchscreen support
+ *			on the platform.
+ * @x_plate_resistance:	X plate resistance.
+ * @steps_to_configure:	The sequencer supports a total of
+ *			16 programmable steps.
+ *			A step configured to read a single
+ *			co-ordinate value, can be applied
+ *			more number of times for better results.
+ */
+
+struct tsc_data {
+	int wires;
+	int x_plate_resistance;
+	int steps_to_configure;
+};
+
+#endif
diff --git a/include/linux/input/ti_tscadc.h b/include/linux/input/ti_tscadc.h
deleted file mode 100644
index ad442a3eb68b..000000000000
--- a/include/linux/input/ti_tscadc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __LINUX_TI_TSCADC_H
-#define __LINUX_TI_TSCADC_H
-
-/**
- * struct tsc_data	Touchscreen wire configuration
- * @wires:		Wires refer to application modes
- *			i.e. 4/5/8 wire touchscreen support
- *			on the platform.
- * @x_plate_resistance:	X plate resistance.
- * @steps_to_configure:	The sequencer supports a total of
- *			16 programmable steps.
- *			A step configured to read a single
- *			co-ordinate value, can be applied
- *			more number of times for better results.
- */
-
-struct tsc_data {
-	int wires;
-	int x_plate_resistance;
-	int steps_to_configure;
-};
-
-#endif
-- 
cgit v1.2.3


From 01636eb970a029897b06fb96026941429212ddd9 Mon Sep 17 00:00:00 2001
From: "Patil, Rachna" <rachna@ti.com>
Date: Tue, 16 Oct 2012 12:55:43 +0530
Subject: mfd: ti_tscadc: Add support for TI's TSC/ADC MFDevice

Add the mfd core driver which supports touchscreen
and ADC.
With this patch we are only adding infrastructure to
support the MFD clients.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig                  |  11 ++
 drivers/mfd/Makefile                 |   1 +
 drivers/mfd/ti_am335x_tscadc.c       | 250 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/ti_am335x_tscadc.h | 137 +++++++++++++++++++
 4 files changed, 399 insertions(+)
 create mode 100644 drivers/mfd/ti_am335x_tscadc.c
 create mode 100644 include/linux/mfd/ti_am335x_tscadc.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index acab3ef8a310..9bba7f78ff36 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -94,6 +94,17 @@ config MFD_TI_SSP
 	  To compile this driver as a module, choose M here: the
 	  module will be called ti-ssp.
 
+config MFD_TI_AM335X_TSCADC
+	tristate "TI ADC / Touch Screen chip support"
+	select MFD_CORE
+	select REGMAP
+	select REGMAP_MMIO
+	help
+	  If you say yes here you get support for Texas Instruments series
+	  of Touch Screen /ADC chips.
+	  To compile this driver as a module, choose M here: the
+	  module will be called ti_am335x_tscadc.
+
 config HTC_EGPIO
 	bool "HTC EGPIO support"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d8ccb630ddb0..442c17e40741 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
 obj-$(CONFIG_MFD_DAVINCI_VOICECODEC)	+= davinci_voicecodec.o
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 obj-$(CONFIG_MFD_TI_SSP)	+= ti-ssp.o
+obj-$(CONFIG_MFD_TI_AM335X_TSCADC)	+= ti_am335x_tscadc.o
 
 obj-$(CONFIG_MFD_STA2X11)	+= sta2x11-mfd.o
 obj-$(CONFIG_MFD_STMPE)		+= stmpe.o
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
new file mode 100644
index 000000000000..14df67bc390f
--- /dev/null
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -0,0 +1,250 @@
+/*
+ * TI Touch Screen / ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/regmap.h>
+#include <linux/mfd/core.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/mfd/ti_am335x_tscadc.h>
+
+static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg)
+{
+	unsigned int val;
+
+	regmap_read(tsadc->regmap_tscadc, reg, &val);
+	return val;
+}
+
+static void tscadc_writel(struct ti_tscadc_dev *tsadc, unsigned int reg,
+					unsigned int val)
+{
+	regmap_write(tsadc->regmap_tscadc, reg, val);
+}
+
+static const struct regmap_config tscadc_regmap_config = {
+	.name = "ti_tscadc",
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+};
+
+static void tscadc_idle_config(struct ti_tscadc_dev *config)
+{
+	unsigned int idleconfig;
+
+	idleconfig = STEPCONFIG_YNN | STEPCONFIG_INM_ADCREFM |
+			STEPCONFIG_INP_ADCREFM | STEPCONFIG_YPN;
+
+	tscadc_writel(config, REG_IDLECONFIG, idleconfig);
+}
+
+static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
+{
+	struct ti_tscadc_dev	*tscadc;
+	struct resource		*res;
+	struct clk		*clk;
+	struct mfd_tscadc_board	*pdata = pdev->dev.platform_data;
+	int			irq;
+	int			err, ctrl;
+	int			clk_value, clock_rate;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no memory resource defined.\n");
+		return -EINVAL;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no irq ID is specified.\n");
+		return -EINVAL;
+	}
+
+	/* Allocate memory for device */
+	tscadc = devm_kzalloc(&pdev->dev,
+			sizeof(struct ti_tscadc_dev), GFP_KERNEL);
+	if (!tscadc) {
+		dev_err(&pdev->dev, "failed to allocate memory.\n");
+		return -ENOMEM;
+	}
+	tscadc->dev = &pdev->dev;
+	tscadc->irq = irq;
+
+	res = devm_request_mem_region(&pdev->dev,
+			res->start, resource_size(res), pdev->name);
+	if (!res) {
+		dev_err(&pdev->dev, "failed to reserve registers.\n");
+		err = -EBUSY;
+		goto err;
+	}
+
+	tscadc->tscadc_base = devm_ioremap(&pdev->dev,
+			res->start, resource_size(res));
+	if (!tscadc->tscadc_base) {
+		dev_err(&pdev->dev, "failed to map registers.\n");
+		err = -ENOMEM;
+		goto err;
+	}
+
+	tscadc->regmap_tscadc = devm_regmap_init_mmio(&pdev->dev,
+			tscadc->tscadc_base, &tscadc_regmap_config);
+	if (IS_ERR(tscadc->regmap_tscadc)) {
+		dev_err(&pdev->dev, "regmap init failed\n");
+		err = PTR_ERR(tscadc->regmap_tscadc);
+		goto err;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	/*
+	 * The TSC_ADC_Subsystem has 2 clock domains
+	 * OCP_CLK and ADC_CLK.
+	 * The ADC clock is expected to run at target of 3MHz,
+	 * and expected to capture 12-bit data at a rate of 200 KSPS.
+	 * The TSC_ADC_SS controller design assumes the OCP clock is
+	 * at least 6x faster than the ADC clock.
+	 */
+	clk = clk_get(&pdev->dev, "adc_tsc_fck");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "failed to get TSC fck\n");
+		err = PTR_ERR(clk);
+		goto err_disable_clk;
+	}
+	clock_rate = clk_get_rate(clk);
+	clk_put(clk);
+	clk_value = clock_rate / ADC_CLK;
+	if (clk_value < MAX_CLK_DIV) {
+		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
+		err = -EINVAL;
+		goto err_disable_clk;
+	}
+	/* TSCADC_CLKDIV needs to be configured to the value minus 1 */
+	clk_value = clk_value - 1;
+	tscadc_writel(tscadc, REG_CLKDIV, clk_value);
+
+	/* Set the control register bits */
+	ctrl = CNTRLREG_STEPCONFIGWRT |
+			CNTRLREG_TSCENB |
+			CNTRLREG_STEPID |
+			CNTRLREG_4WIRE;
+	tscadc_writel(tscadc, REG_CTRL, ctrl);
+
+	/* Set register bits for Idle Config Mode */
+	tscadc_idle_config(tscadc);
+
+	/* Enable the TSC module enable bit */
+	ctrl = tscadc_readl(tscadc, REG_CTRL);
+	ctrl |= CNTRLREG_TSCSSENB;
+	tscadc_writel(tscadc, REG_CTRL, ctrl);
+
+	err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells,
+			TSCADC_CELLS, NULL, 0, NULL);
+	if (err < 0)
+		goto err_disable_clk;
+
+	device_init_wakeup(&pdev->dev, true);
+	platform_set_drvdata(pdev, tscadc);
+
+	return 0;
+
+err_disable_clk:
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+err:
+	return err;
+}
+
+static int __devexit ti_tscadc_remove(struct platform_device *pdev)
+{
+	struct ti_tscadc_dev	*tscadc = platform_get_drvdata(pdev);
+
+	tscadc_writel(tscadc, REG_SE, 0x00);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	mfd_remove_devices(tscadc->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tscadc_suspend(struct device *dev)
+{
+	struct ti_tscadc_dev	*tscadc_dev = dev_get_drvdata(dev);
+
+	tscadc_writel(tscadc_dev, REG_SE, 0x00);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tscadc_resume(struct device *dev)
+{
+	struct ti_tscadc_dev	*tscadc_dev = dev_get_drvdata(dev);
+	unsigned int restore, ctrl;
+
+	pm_runtime_get_sync(dev);
+
+	/* context restore */
+	ctrl = CNTRLREG_STEPCONFIGWRT | CNTRLREG_TSCENB |
+			CNTRLREG_STEPID | CNTRLREG_4WIRE;
+	tscadc_writel(tscadc_dev, REG_CTRL, ctrl);
+	tscadc_idle_config(tscadc_dev);
+	tscadc_writel(tscadc_dev, REG_SE, STPENB_STEPENB);
+	restore = tscadc_readl(tscadc_dev, REG_CTRL);
+	tscadc_writel(tscadc_dev, REG_CTRL,
+			(restore | CNTRLREG_TSCSSENB));
+
+	return 0;
+}
+
+static const struct dev_pm_ops tscadc_pm_ops = {
+	.suspend = tscadc_suspend,
+	.resume = tscadc_resume,
+};
+#define TSCADC_PM_OPS (&tscadc_pm_ops)
+#else
+#define TSCADC_PM_OPS NULL
+#endif
+
+static struct platform_driver ti_tscadc_driver = {
+	.driver = {
+		.name   = "ti_tscadc",
+		.owner	= THIS_MODULE,
+		.pm	= TSCADC_PM_OPS,
+	},
+	.probe	= ti_tscadc_probe,
+	.remove	= __devexit_p(ti_tscadc_remove),
+
+};
+
+module_platform_driver(ti_tscadc_driver);
+
+MODULE_DESCRIPTION("TI touchscreen / ADC MFD controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
new file mode 100644
index 000000000000..b7232b157061
--- /dev/null
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -0,0 +1,137 @@
+#ifndef __LINUX_TI_AM335X_TSCADC_MFD_H
+#define __LINUX_TI_AM335X_TSCADC_MFD_H
+
+/*
+ * TI Touch Screen / ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/mfd/core.h>
+
+#define REG_RAWIRQSTATUS	0x024
+#define REG_IRQSTATUS		0x028
+#define REG_IRQENABLE		0x02C
+#define REG_IRQCLR		0x030
+#define REG_IRQWAKEUP		0x034
+#define REG_CTRL		0x040
+#define REG_ADCFSM		0x044
+#define REG_CLKDIV		0x04C
+#define REG_SE			0x054
+#define REG_IDLECONFIG		0x058
+#define REG_CHARGECONFIG	0x05C
+#define REG_CHARGEDELAY		0x060
+#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
+#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
+#define REG_FIFO0CNT		0xE4
+#define REG_FIFO0THR		0xE8
+#define REG_FIFO1CNT		0xF0
+#define REG_FIFO1THR		0xF4
+#define REG_FIFO0		0x100
+#define REG_FIFO1		0x200
+
+/*	Register Bitfields	*/
+/* IRQ wakeup enable */
+#define IRQWKUP_ENB		BIT(0)
+
+/* Step Enable */
+#define STEPENB_MASK		(0x1FFFF << 0)
+#define STEPENB(val)		((val) << 0)
+#define STPENB_STEPENB		STEPENB(0x1FFFF)
+
+/* IRQ enable */
+#define IRQENB_HW_PEN		BIT(0)
+#define IRQENB_FIFO0THRES	BIT(2)
+#define IRQENB_FIFO1THRES	BIT(5)
+#define IRQENB_PENUP		BIT(9)
+
+/* Step Configuration */
+#define STEPCONFIG_MODE_MASK	(3 << 0)
+#define STEPCONFIG_MODE(val)	((val) << 0)
+#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
+#define STEPCONFIG_AVG_MASK	(7 << 2)
+#define STEPCONFIG_AVG(val)	((val) << 2)
+#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
+#define STEPCONFIG_XPP		BIT(5)
+#define STEPCONFIG_XNN		BIT(6)
+#define STEPCONFIG_YPP		BIT(7)
+#define STEPCONFIG_YNN		BIT(8)
+#define STEPCONFIG_XNP		BIT(9)
+#define STEPCONFIG_YPN		BIT(10)
+#define STEPCONFIG_INM_MASK	(0xF << 15)
+#define STEPCONFIG_INM(val)	((val) << 15)
+#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
+#define STEPCONFIG_INP_MASK	(0xF << 19)
+#define STEPCONFIG_INP(val)	((val) << 19)
+#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
+#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
+#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
+#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
+#define STEPCONFIG_FIFO1	BIT(26)
+
+/* Delay register */
+#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
+#define STEPDELAY_OPEN(val)	((val) << 0)
+#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
+#define STEPDELAY_SAMPLE_MASK	(0xFF << 24)
+#define STEPDELAY_SAMPLE(val)	((val) << 24)
+#define STEPCONFIG_SAMPLEDLY	STEPDELAY_SAMPLE(0)
+
+/* Charge Config */
+#define STEPCHARGE_RFP_MASK	(7 << 12)
+#define STEPCHARGE_RFP(val)	((val) << 12)
+#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
+#define STEPCHARGE_INM_MASK	(0xF << 15)
+#define STEPCHARGE_INM(val)	((val) << 15)
+#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
+#define STEPCHARGE_INP_MASK	(0xF << 19)
+#define STEPCHARGE_INP(val)	((val) << 19)
+#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
+#define STEPCHARGE_RFM_MASK	(3 << 23)
+#define STEPCHARGE_RFM(val)	((val) << 23)
+#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
+
+/* Charge delay */
+#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
+#define CHARGEDLY_OPEN(val)	((val) << 0)
+#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
+
+/* Control register */
+#define CNTRLREG_TSCSSENB	BIT(0)
+#define CNTRLREG_STEPID		BIT(1)
+#define CNTRLREG_STEPCONFIGWRT	BIT(2)
+#define CNTRLREG_POWERDOWN	BIT(4)
+#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
+#define CNTRLREG_AFE_CTRL(val)	((val) << 5)
+#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
+#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
+#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
+#define CNTRLREG_TSCENB		BIT(7)
+
+#define ADC_CLK			3000000
+#define	MAX_CLK_DIV		7
+
+#define TSCADC_CELLS		0
+
+struct mfd_tscadc_board {
+	struct tsc_data *tsc_init;
+};
+
+struct ti_tscadc_dev {
+	struct device *dev;
+	struct regmap *regmap_tscadc;
+	void __iomem *tscadc_base;
+	int irq;
+	struct mfd_cell cells[TSCADC_CELLS];
+};
+
+#endif
-- 
cgit v1.2.3


From 2b99bafab19145a72e2c557326fc4662a864a162 Mon Sep 17 00:00:00 2001
From: "Patil, Rachna" <rachna@ti.com>
Date: Tue, 16 Oct 2012 12:55:44 +0530
Subject: input: TSC: ti_tsc: Convert TSC into a MFDevice

This patch converts touchscreen into a MFD client.
All the register definitions, clock initialization,
etc has been moved to MFD core driver.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/touchscreen/Kconfig         |   2 +-
 drivers/input/touchscreen/ti_am335x_tsc.c | 318 +++++++++---------------------
 drivers/mfd/ti_am335x_tscadc.c            |  11 ++
 include/linux/mfd/ti_am335x_tscadc.h      |  10 +-
 4 files changed, 118 insertions(+), 223 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index d31dc5faeaaf..0c45caddd41c 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -531,7 +531,7 @@ config TOUCHSCREEN_TOUCHWIN
 
 config TOUCHSCREEN_TI_AM335X_TSC
 	tristate "TI Touchscreen Interface"
-	depends on ARCH_OMAP2PLUS
+	depends on MFD_TI_AM335X_TSCADC
 	help
 	  Say Y here if you have 4/5/8 wire touchscreen controller
 	  to be connected to the ADC controller on your TI AM335x SoC.
diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
index 462950acb97b..7a18a8a15228 100644
--- a/drivers/input/touchscreen/ti_am335x_tsc.c
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c
@@ -27,106 +27,15 @@
 #include <linux/input/ti_am335x_tsc.h>
 #include <linux/delay.h>
 
-#define REG_RAWIRQSTATUS	0x024
-#define REG_IRQSTATUS		0x028
-#define REG_IRQENABLE		0x02C
-#define REG_IRQWAKEUP		0x034
-#define REG_CTRL		0x040
-#define REG_ADCFSM		0x044
-#define REG_CLKDIV		0x04C
-#define REG_SE			0x054
-#define REG_IDLECONFIG		0x058
-#define REG_CHARGECONFIG	0x05C
-#define REG_CHARGEDELAY		0x060
-#define REG_STEPCONFIG(n)	(0x64 + ((n - 1) * 8))
-#define REG_STEPDELAY(n)	(0x68 + ((n - 1) * 8))
-#define REG_FIFO0CNT		0xE4
-#define REG_FIFO0THR		0xE8
-#define REG_FIFO1THR		0xF4
-#define REG_FIFO0		0x100
-#define REG_FIFO1		0x200
-
-/*	Register Bitfields	*/
-#define IRQWKUP_ENB		BIT(0)
-
-/* Step Enable */
-#define STEPENB_MASK		(0x1FFFF << 0)
-#define STEPENB(val)		(val << 0)
-#define STPENB_STEPENB		STEPENB(0x7FFF)
-
-/* IRQ enable */
-#define IRQENB_FIFO0THRES	BIT(2)
-#define IRQENB_FIFO1THRES	BIT(5)
-#define IRQENB_PENUP		BIT(9)
-
-/* Step Configuration */
-#define STEPCONFIG_MODE_MASK	(3 << 0)
-#define STEPCONFIG_MODE(val)	(val << 0)
-#define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
-#define STEPCONFIG_AVG_MASK	(7 << 2)
-#define STEPCONFIG_AVG(val)	(val << 2)
-#define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
-#define STEPCONFIG_XPP		BIT(5)
-#define STEPCONFIG_XNN		BIT(6)
-#define STEPCONFIG_YPP		BIT(7)
-#define STEPCONFIG_YNN		BIT(8)
-#define STEPCONFIG_XNP		BIT(9)
-#define STEPCONFIG_YPN		BIT(10)
-#define STEPCONFIG_INM_MASK	(0xF << 15)
-#define STEPCONFIG_INM(val)	(val << 15)
-#define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
-#define STEPCONFIG_INP_MASK	(0xF << 19)
-#define STEPCONFIG_INP(val)	(val << 19)
-#define STEPCONFIG_INP_AN2	STEPCONFIG_INP(2)
-#define STEPCONFIG_INP_AN3	STEPCONFIG_INP(3)
-#define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
-#define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
-#define STEPCONFIG_FIFO1	BIT(26)
-
-/* Delay register */
-#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
-#define STEPDELAY_OPEN(val)	(val << 0)
-#define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
-
-/* Charge Config */
-#define STEPCHARGE_RFP_MASK	(7 << 12)
-#define STEPCHARGE_RFP(val)	(val << 12)
-#define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
-#define STEPCHARGE_INM_MASK	(0xF << 15)
-#define STEPCHARGE_INM(val)	(val << 15)
-#define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
-#define STEPCHARGE_INP_MASK	(0xF << 19)
-#define STEPCHARGE_INP(val)	(val << 19)
-#define STEPCHARGE_INP_AN1	STEPCHARGE_INP(1)
-#define STEPCHARGE_RFM_MASK	(3 << 23)
-#define STEPCHARGE_RFM(val)	(val << 23)
-#define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
-
-/* Charge delay */
-#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
-#define CHARGEDLY_OPEN(val)	(val << 0)
-#define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(1)
-
-/* Control register */
-#define CNTRLREG_TSCSSENB	BIT(0)
-#define CNTRLREG_STEPID		BIT(1)
-#define CNTRLREG_STEPCONFIGWRT	BIT(2)
-#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
-#define CNTRLREG_AFE_CTRL(val)	(val << 5)
-#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
-#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
-#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
-#define CNTRLREG_TSCENB		BIT(7)
+#include <linux/mfd/ti_am335x_tscadc.h>
 
 #define ADCFSM_STEPID		0x10
 #define SEQ_SETTLE		275
-#define ADC_CLK			3000000
 #define MAX_12BIT		((1 << 12) - 1)
 
 struct titsc {
 	struct input_dev	*input;
-	struct clk		*tsc_ick;
-	void __iomem		*tsc_base;
+	struct ti_tscadc_dev	*mfd_tscadc;
 	unsigned int		irq;
 	unsigned int		wires;
 	unsigned int		x_plate_resistance;
@@ -136,13 +45,13 @@ struct titsc {
 
 static unsigned int titsc_readl(struct titsc *ts, unsigned int reg)
 {
-	return readl(ts->tsc_base + reg);
+	return readl(ts->mfd_tscadc->tscadc_base + reg);
 }
 
 static void titsc_writel(struct titsc *tsc, unsigned int reg,
 					unsigned int val)
 {
-	writel(val, tsc->tsc_base + reg);
+	writel(val, tsc->mfd_tscadc->tscadc_base + reg);
 }
 
 static void titsc_step_config(struct titsc *ts_dev)
@@ -219,17 +128,7 @@ static void titsc_step_config(struct titsc *ts_dev)
 	titsc_writel(ts_dev, REG_STEPDELAY(total_steps + 2),
 			STEPCONFIG_OPENDLY);
 
-	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB);
-}
-
-static void titsc_idle_config(struct titsc *ts_config)
-{
-	unsigned int idleconfig;
-
-	idleconfig = STEPCONFIG_YNN |
-			STEPCONFIG_INM_ADCREFM |
-			STEPCONFIG_YPN | STEPCONFIG_INP_ADCREFM;
-	titsc_writel(ts_config, REG_IDLECONFIG, idleconfig);
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB_TC);
 }
 
 static void titsc_read_coordinates(struct titsc *ts_dev,
@@ -239,7 +138,7 @@ static void titsc_read_coordinates(struct titsc *ts_dev,
 	unsigned int prev_val_x = ~0, prev_val_y = ~0;
 	unsigned int prev_diff_x = ~0, prev_diff_y = ~0;
 	unsigned int read, diff;
-	unsigned int i;
+	unsigned int i, channel;
 
 	/*
 	 * Delta filter is used to remove large variations in sampled
@@ -250,21 +149,32 @@ static void titsc_read_coordinates(struct titsc *ts_dev,
 	 * if true the value is reported to the sub system.
 	 */
 	for (i = 0; i < fifocount - 1; i++) {
-		read = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
-		diff = abs(read - prev_val_x);
-		if (diff < prev_diff_x) {
-			prev_diff_x = diff;
-			*x = read;
+		read = titsc_readl(ts_dev, REG_FIFO0);
+		channel = read & 0xf0000;
+		channel = channel >> 0x10;
+		if ((channel >= 0) && (channel < ts_dev->steps_to_configure)) {
+			read &= 0xfff;
+			diff = abs(read - prev_val_x);
+			if (diff < prev_diff_x) {
+				prev_diff_x = diff;
+				*x = read;
+			}
+			prev_val_x = read;
 		}
-		prev_val_x = read;
 
-		read = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
-		diff = abs(read - prev_val_y);
-		if (diff < prev_diff_y) {
-			prev_diff_y = diff;
-			*y = read;
+		read = titsc_readl(ts_dev, REG_FIFO1);
+		channel = read & 0xf0000;
+		channel = channel >> 0x10;
+		if ((channel >= ts_dev->steps_to_configure) &&
+			(channel < (2 * ts_dev->steps_to_configure - 1))) {
+			read &= 0xfff;
+			diff = abs(read - prev_val_y);
+			if (diff < prev_diff_y) {
+				prev_diff_y = diff;
+				*y = read;
+			}
+			prev_val_y = read;
 		}
-		prev_val_y = read;
 	}
 }
 
@@ -276,6 +186,8 @@ static irqreturn_t titsc_irq(int irq, void *dev)
 	unsigned int x = 0, y = 0;
 	unsigned int z1, z2, z;
 	unsigned int fsm;
+	unsigned int fifo1count, fifo0count;
+	int i;
 
 	status = titsc_readl(ts_dev, REG_IRQSTATUS);
 	if (status & IRQENB_FIFO0THRES) {
@@ -284,6 +196,14 @@ static irqreturn_t titsc_irq(int irq, void *dev)
 		z1 = titsc_readl(ts_dev, REG_FIFO0) & 0xfff;
 		z2 = titsc_readl(ts_dev, REG_FIFO1) & 0xfff;
 
+		fifo1count = titsc_readl(ts_dev, REG_FIFO1CNT);
+		for (i = 0; i < fifo1count; i++)
+			titsc_readl(ts_dev, REG_FIFO1);
+
+		fifo0count = titsc_readl(ts_dev, REG_FIFO0CNT);
+		for (i = 0; i < fifo0count; i++)
+			titsc_readl(ts_dev, REG_FIFO0);
+
 		if (ts_dev->pen_down && z1 != 0 && z2 != 0) {
 			/*
 			 * Calculate pressure using formula
@@ -330,7 +250,7 @@ static irqreturn_t titsc_irq(int irq, void *dev)
 
 	titsc_writel(ts_dev, REG_IRQSTATUS, irqclr);
 
-	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB);
+	titsc_writel(ts_dev, REG_SE, STPENB_STEPENB_TC);
 	return IRQ_HANDLED;
 }
 
@@ -340,28 +260,16 @@ static irqreturn_t titsc_irq(int irq, void *dev)
 
 static int __devinit titsc_probe(struct platform_device *pdev)
 {
-	const struct tsc_data *pdata = pdev->dev.platform_data;
-	struct resource *res;
 	struct titsc *ts_dev;
 	struct input_dev *input_dev;
-	struct clk *clk;
+	struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data;
+	struct mfd_tscadc_board	*pdata;
 	int err;
-	int clk_value, ctrl, irq;
 
-	if (!pdata) {
-		dev_err(&pdev->dev, "missing platform data.\n");
-		return -EINVAL;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no memory resource defined.\n");
-		return -EINVAL;
-	}
+	pdata = tscadc_dev->dev->platform_data;
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no irq ID is specified.\n");
+	if (!pdata) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
 		return -EINVAL;
 	}
 
@@ -374,85 +282,26 @@ static int __devinit titsc_probe(struct platform_device *pdev)
 		goto err_free_mem;
 	}
 
+	tscadc_dev->tsc = ts_dev;
+	ts_dev->mfd_tscadc = tscadc_dev;
 	ts_dev->input = input_dev;
-	ts_dev->irq = irq;
-	ts_dev->wires = pdata->wires;
-	ts_dev->x_plate_resistance = pdata->x_plate_resistance;
-	ts_dev->steps_to_configure = pdata->steps_to_configure;
-
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (!res) {
-		dev_err(&pdev->dev, "failed to reserve registers.\n");
-		err = -EBUSY;
-		goto err_free_mem;
-	}
-
-	ts_dev->tsc_base = ioremap(res->start, resource_size(res));
-	if (!ts_dev->tsc_base) {
-		dev_err(&pdev->dev, "failed to map registers.\n");
-		err = -ENOMEM;
-		goto err_release_mem_region;
-	}
+	ts_dev->irq = tscadc_dev->irq;
+	ts_dev->wires = pdata->tsc_init->wires;
+	ts_dev->x_plate_resistance = pdata->tsc_init->x_plate_resistance;
+	ts_dev->steps_to_configure = pdata->tsc_init->steps_to_configure;
 
 	err = request_irq(ts_dev->irq, titsc_irq,
 			  0, pdev->dev.driver->name, ts_dev);
 	if (err) {
 		dev_err(&pdev->dev, "failed to allocate irq.\n");
-		goto err_unmap_regs;
-	}
-
-	ts_dev->tsc_ick = clk_get(&pdev->dev, "adc_tsc_ick");
-	if (IS_ERR(ts_dev->tsc_ick)) {
-		dev_err(&pdev->dev, "failed to get TSC ick\n");
-		goto err_free_irq;
-	}
-	clk_enable(ts_dev->tsc_ick);
-
-	clk = clk_get(&pdev->dev, "adc_tsc_fck");
-	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "failed to get TSC fck\n");
-		err = PTR_ERR(clk);
-		goto err_disable_clk;
-	}
-
-	clk_value = clk_get_rate(clk) / ADC_CLK;
-	clk_put(clk);
-
-	if (clk_value < 7) {
-		dev_err(&pdev->dev, "clock input less than min clock requirement\n");
-		goto err_disable_clk;
-	}
-	/* CLKDIV needs to be configured to the value minus 1 */
-	titsc_writel(ts_dev, REG_CLKDIV, clk_value - 1);
-
-	 /* Enable wake-up of the SoC using touchscreen */
-	titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
-
-	ctrl = CNTRLREG_STEPCONFIGWRT |
-			CNTRLREG_TSCENB |
-			CNTRLREG_STEPID;
-	switch (ts_dev->wires) {
-	case 4:
-		ctrl |= CNTRLREG_4WIRE;
-		break;
-	case 5:
-		ctrl |= CNTRLREG_5WIRE;
-		break;
-	case 8:
-		ctrl |= CNTRLREG_8WIRE;
-		break;
+		goto err_free_mem;
 	}
-	titsc_writel(ts_dev, REG_CTRL, ctrl);
 
-	titsc_idle_config(ts_dev);
 	titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES);
 	titsc_step_config(ts_dev);
 	titsc_writel(ts_dev, REG_FIFO0THR, ts_dev->steps_to_configure);
 
-	ctrl |= CNTRLREG_TSCSSENB;
-	titsc_writel(ts_dev, REG_CTRL, ctrl);
-
-	input_dev->name = "ti-tsc-adc";
+	input_dev->name = "ti-tsc";
 	input_dev->dev.parent = &pdev->dev;
 
 	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
@@ -465,20 +314,13 @@ static int __devinit titsc_probe(struct platform_device *pdev)
 	/* register to the input system */
 	err = input_register_device(input_dev);
 	if (err)
-		goto err_disable_clk;
+		goto err_free_irq;
 
 	platform_set_drvdata(pdev, ts_dev);
 	return 0;
 
-err_disable_clk:
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
 err_free_irq:
 	free_irq(ts_dev->irq, ts_dev);
-err_unmap_regs:
-	iounmap(ts_dev->tsc_base);
-err_release_mem_region:
-	release_mem_region(res->start, resource_size(res));
 err_free_mem:
 	input_free_device(input_dev);
 	kfree(ts_dev);
@@ -487,32 +329,66 @@ err_free_mem:
 
 static int __devexit titsc_remove(struct platform_device *pdev)
 {
-	struct titsc *ts_dev = platform_get_drvdata(pdev);
-	struct resource *res;
+	struct ti_tscadc_dev *tscadc_dev = pdev->dev.platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
 
 	free_irq(ts_dev->irq, ts_dev);
 
 	input_unregister_device(ts_dev->input);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	iounmap(ts_dev->tsc_base);
-	release_mem_region(res->start, resource_size(res));
+	platform_set_drvdata(pdev, NULL);
+	kfree(ts_dev);
+	return 0;
+}
 
-	clk_disable(ts_dev->tsc_ick);
-	clk_put(ts_dev->tsc_ick);
+#ifdef CONFIG_PM
+static int titsc_suspend(struct device *dev)
+{
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
+	unsigned int idle;
+
+	if (device_may_wakeup(tscadc_dev->dev)) {
+		idle = titsc_readl(ts_dev, REG_IRQENABLE);
+		titsc_writel(ts_dev, REG_IRQENABLE,
+				(idle | IRQENB_HW_PEN));
+		titsc_writel(ts_dev, REG_IRQWAKEUP, IRQWKUP_ENB);
+	}
+	return 0;
+}
 
-	kfree(ts_dev);
+static int titsc_resume(struct device *dev)
+{
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	struct titsc *ts_dev = tscadc_dev->tsc;
 
-	platform_set_drvdata(pdev, NULL);
+	if (device_may_wakeup(tscadc_dev->dev)) {
+		titsc_writel(ts_dev, REG_IRQWAKEUP,
+				0x00);
+		titsc_writel(ts_dev, REG_IRQCLR, IRQENB_HW_PEN);
+	}
+	titsc_step_config(ts_dev);
+	titsc_writel(ts_dev, REG_FIFO0THR,
+			ts_dev->steps_to_configure);
 	return 0;
 }
 
+static const struct dev_pm_ops titsc_pm_ops = {
+	.suspend = titsc_suspend,
+	.resume  = titsc_resume,
+};
+#define TITSC_PM_OPS (&titsc_pm_ops)
+#else
+#define TITSC_PM_OPS NULL
+#endif
+
 static struct platform_driver ti_tsc_driver = {
 	.probe	= titsc_probe,
 	.remove	= __devexit_p(titsc_remove),
 	.driver	= {
 		.name   = "tsc",
 		.owner	= THIS_MODULE,
+		.pm	= TITSC_PM_OPS,
 	},
 };
 module_platform_driver(ti_tsc_driver);
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index 14df67bc390f..d812be4b61df 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -24,6 +24,7 @@
 #include <linux/pm_runtime.h>
 
 #include <linux/mfd/ti_am335x_tscadc.h>
+#include <linux/input/ti_am335x_tsc.h>
 
 static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg)
 {
@@ -62,15 +63,19 @@ static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
 	struct resource		*res;
 	struct clk		*clk;
 	struct mfd_tscadc_board	*pdata = pdev->dev.platform_data;
+	struct mfd_cell		*cell;
 	int			irq;
 	int			err, ctrl;
 	int			clk_value, clock_rate;
+	int			tsc_wires;
 
 	if (!pdata) {
 		dev_err(&pdev->dev, "Could not find platform data\n");
 		return -EINVAL;
 	}
 
+	tsc_wires = pdata->tsc_init->wires;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		dev_err(&pdev->dev, "no memory resource defined.\n");
@@ -161,6 +166,12 @@ static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
 	ctrl |= CNTRLREG_TSCSSENB;
 	tscadc_writel(tscadc, REG_CTRL, ctrl);
 
+	/* TSC Cell */
+	cell = &tscadc->cells[TSC_CELL];
+	cell->name = "tsc";
+	cell->platform_data = tscadc;
+	cell->pdata_size = sizeof(*tscadc);
+
 	err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells,
 			TSCADC_CELLS, NULL, 0, NULL);
 	if (err < 0)
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index b7232b157061..fc18b2ef753f 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -47,6 +47,7 @@
 #define STEPENB_MASK		(0x1FFFF << 0)
 #define STEPENB(val)		((val) << 0)
 #define STPENB_STEPENB		STEPENB(0x1FFFF)
+#define STPENB_STEPENB_TC	STEPENB(0x1FFF)
 
 /* IRQ enable */
 #define IRQENB_HW_PEN		BIT(0)
@@ -120,7 +121,11 @@
 #define ADC_CLK			3000000
 #define	MAX_CLK_DIV		7
 
-#define TSCADC_CELLS		0
+#define TSCADC_CELLS		1
+
+enum tscadc_cells {
+	TSC_CELL,
+};
 
 struct mfd_tscadc_board {
 	struct tsc_data *tsc_init;
@@ -132,6 +137,9 @@ struct ti_tscadc_dev {
 	void __iomem *tscadc_base;
 	int irq;
 	struct mfd_cell cells[TSCADC_CELLS];
+
+	/* tsc device */
+	struct titsc *tsc;
 };
 
 #endif
-- 
cgit v1.2.3


From 5e53a69b44e893227b046a7bc74db3cb40d7f39b Mon Sep 17 00:00:00 2001
From: "Patil, Rachna" <rachna@ti.com>
Date: Tue, 16 Oct 2012 12:55:45 +0530
Subject: IIO : ADC: tiadc: Add support of TI's ADC driver

This patch adds support for TI's ADC driver.
This is a multifunctional device.
Analog input lines are provided on which
voltage measurements can be carried out.
You can have upto 8 input lines.

Signed-off-by: Patil, Rachna <rachna@ti.com>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/iio/adc/Kconfig                     |   7 +
 drivers/iio/adc/Makefile                    |   1 +
 drivers/iio/adc/ti_am335x_adc.c             | 260 ++++++++++++++++++++++++++++
 drivers/mfd/ti_am335x_tscadc.c              |  18 +-
 include/linux/mfd/ti_am335x_tscadc.h        |   9 +-
 include/linux/platform_data/ti_am335x_adc.h |  14 ++
 6 files changed, 307 insertions(+), 2 deletions(-)
 create mode 100644 drivers/iio/adc/ti_am335x_adc.c
 create mode 100644 include/linux/platform_data/ti_am335x_adc.h

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 492758120338..1401ed1af39f 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -60,4 +60,11 @@ config LP8788_ADC
 	help
 	  Say yes here to build support for TI LP8788 ADC.
 
+config TI_AM335X_ADC
+	tristate "TI's ADC driver"
+	depends on MFD_TI_AM335X_TSCADC
+	help
+	  Say yes here to build support for Texas Instruments ADC
+	  driver which is also a MFD client.
+
 endmenu
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 900995d5e179..4410a90fc84b 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_AD7476) += ad7476.o
 obj-$(CONFIG_AD7791) += ad7791.o
 obj-$(CONFIG_AT91_ADC) += at91_adc.o
 obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o
+obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
new file mode 100644
index 000000000000..02a43c87a8a3
--- /dev/null
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -0,0 +1,260 @@
+/*
+ * TI ADC MFD driver
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/iio/iio.h>
+
+#include <linux/mfd/ti_am335x_tscadc.h>
+#include <linux/platform_data/ti_am335x_adc.h>
+
+struct tiadc_device {
+	struct ti_tscadc_dev *mfd_tscadc;
+	int channels;
+};
+
+static unsigned int tiadc_readl(struct tiadc_device *adc, unsigned int reg)
+{
+	return readl(adc->mfd_tscadc->tscadc_base + reg);
+}
+
+static void tiadc_writel(struct tiadc_device *adc, unsigned int reg,
+					unsigned int val)
+{
+	writel(val, adc->mfd_tscadc->tscadc_base + reg);
+}
+
+static void tiadc_step_config(struct tiadc_device *adc_dev)
+{
+	unsigned int stepconfig;
+	int i, channels = 0, steps;
+
+	/*
+	 * There are 16 configurable steps and 8 analog input
+	 * lines available which are shared between Touchscreen and ADC.
+	 *
+	 * Steps backwards i.e. from 16 towards 0 are used by ADC
+	 * depending on number of input lines needed.
+	 * Channel would represent which analog input
+	 * needs to be given to ADC to digitalize data.
+	 */
+
+	steps = TOTAL_STEPS - adc_dev->channels;
+	channels = TOTAL_CHANNELS - adc_dev->channels;
+
+	stepconfig = STEPCONFIG_AVG_16 | STEPCONFIG_FIFO1;
+
+	for (i = (steps + 1); i <= TOTAL_STEPS; i++) {
+		tiadc_writel(adc_dev, REG_STEPCONFIG(i),
+				stepconfig | STEPCONFIG_INP(channels));
+		tiadc_writel(adc_dev, REG_STEPDELAY(i),
+				STEPCONFIG_OPENDLY);
+		channels++;
+	}
+	tiadc_writel(adc_dev, REG_SE, STPENB_STEPENB);
+}
+
+static int tiadc_channel_init(struct iio_dev *indio_dev, int channels)
+{
+	struct iio_chan_spec *chan_array;
+	int i;
+
+	indio_dev->num_channels = channels;
+	chan_array = kcalloc(indio_dev->num_channels,
+			sizeof(struct iio_chan_spec), GFP_KERNEL);
+
+	if (chan_array == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < (indio_dev->num_channels); i++) {
+		struct iio_chan_spec *chan = chan_array + i;
+		chan->type = IIO_VOLTAGE;
+		chan->indexed = 1;
+		chan->channel = i;
+		chan->info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT;
+	}
+
+	indio_dev->channels = chan_array;
+
+	return indio_dev->num_channels;
+}
+
+static void tiadc_channels_remove(struct iio_dev *indio_dev)
+{
+	kfree(indio_dev->channels);
+}
+
+static int tiadc_read_raw(struct iio_dev *indio_dev,
+		struct iio_chan_spec const *chan,
+		int *val, int *val2, long mask)
+{
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	int i;
+	unsigned int fifo1count, readx1;
+
+	/*
+	 * When the sub-system is first enabled,
+	 * the sequencer will always start with the
+	 * lowest step (1) and continue until step (16).
+	 * For ex: If we have enabled 4 ADC channels and
+	 * currently use only 1 out of them, the
+	 * sequencer still configures all the 4 steps,
+	 * leading to 3 unwanted data.
+	 * Hence we need to flush out this data.
+	 */
+
+	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
+	for (i = 0; i < fifo1count; i++) {
+		readx1 = tiadc_readl(adc_dev, REG_FIFO1);
+		if (i == chan->channel)
+			*val = readx1 & 0xfff;
+	}
+	tiadc_writel(adc_dev, REG_SE, STPENB_STEPENB);
+
+	return IIO_VAL_INT;
+}
+
+static const struct iio_info tiadc_info = {
+	.read_raw = &tiadc_read_raw,
+};
+
+static int __devinit tiadc_probe(struct platform_device *pdev)
+{
+	struct iio_dev		*indio_dev;
+	struct tiadc_device	*adc_dev;
+	struct ti_tscadc_dev	*tscadc_dev = pdev->dev.platform_data;
+	struct mfd_tscadc_board	*pdata;
+	int			err;
+
+	pdata = tscadc_dev->dev->platform_data;
+	if (!pdata || !pdata->adc_init) {
+		dev_err(&pdev->dev, "Could not find platform data\n");
+		return -EINVAL;
+	}
+
+	indio_dev = iio_device_alloc(sizeof(struct tiadc_device));
+	if (indio_dev == NULL) {
+		dev_err(&pdev->dev, "failed to allocate iio device\n");
+		err = -ENOMEM;
+		goto err_ret;
+	}
+	adc_dev = iio_priv(indio_dev);
+
+	adc_dev->mfd_tscadc = tscadc_dev;
+	adc_dev->channels = pdata->adc_init->adc_channels;
+
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->name = dev_name(&pdev->dev);
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->info = &tiadc_info;
+
+	tiadc_step_config(adc_dev);
+
+	err = tiadc_channel_init(indio_dev, adc_dev->channels);
+	if (err < 0)
+		goto err_free_device;
+
+	err = iio_device_register(indio_dev);
+	if (err)
+		goto err_free_channels;
+
+	platform_set_drvdata(pdev, indio_dev);
+
+	return 0;
+
+err_free_channels:
+	tiadc_channels_remove(indio_dev);
+err_free_device:
+	iio_device_free(indio_dev);
+err_ret:
+	return err;
+}
+
+static int __devexit tiadc_remove(struct platform_device *pdev)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+
+	iio_device_unregister(indio_dev);
+	tiadc_channels_remove(indio_dev);
+
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tiadc_suspend(struct device *dev)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	struct ti_tscadc_dev *tscadc_dev = dev->platform_data;
+	unsigned int idle;
+
+	if (!device_may_wakeup(tscadc_dev->dev)) {
+		idle = tiadc_readl(adc_dev, REG_CTRL);
+		idle &= ~(CNTRLREG_TSCSSENB);
+		tiadc_writel(adc_dev, REG_CTRL, (idle |
+				CNTRLREG_POWERDOWN));
+	}
+
+	return 0;
+}
+
+static int tiadc_resume(struct device *dev)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	unsigned int restore;
+
+	/* Make sure ADC is powered up */
+	restore = tiadc_readl(adc_dev, REG_CTRL);
+	restore &= ~(CNTRLREG_POWERDOWN);
+	tiadc_writel(adc_dev, REG_CTRL, restore);
+
+	tiadc_step_config(adc_dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops tiadc_pm_ops = {
+	.suspend = tiadc_suspend,
+	.resume = tiadc_resume,
+};
+#define TIADC_PM_OPS (&tiadc_pm_ops)
+#else
+#define TIADC_PM_OPS NULL
+#endif
+
+static struct platform_driver tiadc_driver = {
+	.driver = {
+		.name   = "tiadc",
+		.owner	= THIS_MODULE,
+		.pm	= TIADC_PM_OPS,
+	},
+	.probe	= tiadc_probe,
+	.remove	= __devexit_p(tiadc_remove),
+};
+
+module_platform_driver(tiadc_driver);
+
+MODULE_DESCRIPTION("TI ADC controller driver");
+MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index d812be4b61df..e947dd8bbcc3 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -25,6 +25,7 @@
 
 #include <linux/mfd/ti_am335x_tscadc.h>
 #include <linux/input/ti_am335x_tsc.h>
+#include <linux/platform_data/ti_am335x_adc.h>
 
 static unsigned int tscadc_readl(struct ti_tscadc_dev *tsadc, unsigned int reg)
 {
@@ -67,14 +68,23 @@ static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
 	int			irq;
 	int			err, ctrl;
 	int			clk_value, clock_rate;
-	int			tsc_wires;
+	int			tsc_wires, adc_channels = 0, total_channels;
 
 	if (!pdata) {
 		dev_err(&pdev->dev, "Could not find platform data\n");
 		return -EINVAL;
 	}
 
+	if (pdata->adc_init)
+		adc_channels = pdata->adc_init->adc_channels;
+
 	tsc_wires = pdata->tsc_init->wires;
+	total_channels = tsc_wires + adc_channels;
+
+	if (total_channels > 8) {
+		dev_err(&pdev->dev, "Number of i/p channels more than 8\n");
+		return -EINVAL;
+	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
@@ -172,6 +182,12 @@ static	int __devinit ti_tscadc_probe(struct platform_device *pdev)
 	cell->platform_data = tscadc;
 	cell->pdata_size = sizeof(*tscadc);
 
+	/* ADC Cell */
+	cell = &tscadc->cells[ADC_CELL];
+	cell->name = "tiadc";
+	cell->platform_data = tscadc;
+	cell->pdata_size = sizeof(*tscadc);
+
 	err = mfd_add_devices(&pdev->dev, pdev->id, tscadc->cells,
 			TSCADC_CELLS, NULL, 0, NULL);
 	if (err < 0)
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index fc18b2ef753f..c79ad5d2f271 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -120,15 +120,19 @@
 
 #define ADC_CLK			3000000
 #define	MAX_CLK_DIV		7
+#define TOTAL_STEPS		16
+#define TOTAL_CHANNELS		8
 
-#define TSCADC_CELLS		1
+#define TSCADC_CELLS		2
 
 enum tscadc_cells {
 	TSC_CELL,
+	ADC_CELL,
 };
 
 struct mfd_tscadc_board {
 	struct tsc_data *tsc_init;
+	struct adc_data *adc_init;
 };
 
 struct ti_tscadc_dev {
@@ -140,6 +144,9 @@ struct ti_tscadc_dev {
 
 	/* tsc device */
 	struct titsc *tsc;
+
+	/* adc device */
+	struct adc_device *adc;
 };
 
 #endif
diff --git a/include/linux/platform_data/ti_am335x_adc.h b/include/linux/platform_data/ti_am335x_adc.h
new file mode 100644
index 000000000000..e41d5834cb84
--- /dev/null
+++ b/include/linux/platform_data/ti_am335x_adc.h
@@ -0,0 +1,14 @@
+#ifndef __LINUX_TI_AM335X_ADC_H
+#define __LINUX_TI_AM335X_ADC_H
+
+/**
+ * struct adc_data	ADC Input information
+ * @adc_channels:	Number of analog inputs
+ *			available for ADC.
+ */
+
+struct adc_data {
+	unsigned int adc_channels;
+};
+
+#endif
-- 
cgit v1.2.3


From 216b6cbdcbd86b1db0754d58886b466ae31f5a63 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon@gmail.com>
Date: Wed, 29 Aug 2012 10:10:10 -0400
Subject: exportfs: add FILEID_INVALID to indicate invalid fid_type

This commit adds FILEID_INVALID = 0xff in fid_type to
indicate invalid fid_type

It avoids using magic number 255

Signed-off-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Vivek Trivedi <vtrivedi018@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/exportfs/expfs.c      | 4 ++--
 fs/fhandle.c             | 2 +-
 fs/nfsd/nfsfh.c          | 4 ++--
 include/linux/exportfs.h | 5 +++++
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 29ab099e3e08..f1f1c59c2966 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -322,10 +322,10 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
 
 	if (parent && (len < 4)) {
 		*max_len = 4;
-		return 255;
+		return FILEID_INVALID;
 	} else if (len < 2) {
 		*max_len = 2;
-		return 255;
+		return FILEID_INVALID;
 	}
 
 	len = 2;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index f775bfdd6e4a..26f12b95702a 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -52,7 +52,7 @@ static long do_sys_name_to_handle(struct path *path,
 	handle_bytes = handle_dwords * sizeof(u32);
 	handle->handle_bytes = handle_bytes;
 	if ((handle->handle_bytes > f_handle.handle_bytes) ||
-	    (retval == 255) || (retval == -ENOSPC)) {
+	    (retval == FILEID_INVALID) || (retval == -ENOSPC)) {
 		/* As per old exportfs_encode_fh documentation
 		 * we could return ENOSPC to indicate overflow
 		 * But file system returned 255 always. So handle
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 032af381b3aa..814afaa4458a 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -572,7 +572,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 
 		if (inode)
 			_fh_update(fhp, exp, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255) {
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
 			fh_put(fhp);
 			return nfserr_opnotsupp;
 		}
@@ -603,7 +603,7 @@ fh_update(struct svc_fh *fhp)
 			goto out;
 
 		_fh_update(fhp, fhp->fh_export, dentry);
-		if (fhp->fh_handle.fh_fileid_type == 255)
+		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
 			return nfserr_opnotsupp;
 	}
 out:
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 12291a7ee275..0e1452546300 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -83,6 +83,11 @@ enum fid_type {
 	 * 64 bit parent inode number.
 	 */
 	FILEID_NILFS_WITH_PARENT = 0x62,
+
+	/*
+	 * Filesystems must not use 0xff file ID.
+	 */
+	FILEID_INVALID = 0xff,
 };
 
 struct fid {
-- 
cgit v1.2.3


From ada8a8a13b13a2749818524a7949935f68d2b3eb Mon Sep 17 00:00:00 2001
From: Wei WANG <wei_wang@realsil.com.cn>
Date: Mon, 29 Oct 2012 13:49:33 +0800
Subject: mfd: Add realtek pcie card reader driver

Realtek PCI-E card reader driver adapts requests from upper-level
sdmmc/memstick layer to the real physical card reader.

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig             |    9 +
 drivers/mfd/Makefile            |    3 +
 drivers/mfd/rtl8411.c           |  251 ++++++++
 drivers/mfd/rts5209.c           |  223 +++++++
 drivers/mfd/rts5229.c           |  205 +++++++
 drivers/mfd/rtsx_pcr.c          | 1251 +++++++++++++++++++++++++++++++++++++++
 drivers/mfd/rtsx_pcr.h          |   32 +
 include/linux/mfd/rtsx_common.h |   48 ++
 include/linux/mfd/rtsx_pci.h    |  794 +++++++++++++++++++++++++
 9 files changed, 2816 insertions(+)
 create mode 100644 drivers/mfd/rtl8411.c
 create mode 100644 drivers/mfd/rts5209.c
 create mode 100644 drivers/mfd/rts5229.c
 create mode 100644 drivers/mfd/rtsx_pcr.c
 create mode 100644 drivers/mfd/rtsx_pcr.h
 create mode 100644 include/linux/mfd/rtsx_common.h
 create mode 100644 include/linux/mfd/rtsx_pci.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9bba7f78ff36..d0cb4d4bc2cc 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -63,6 +63,15 @@ config MFD_SM501_GPIO
 	 lines on the SM501. The platform data is used to supply the
 	 base number for the first GPIO line to register.
 
+config MFD_RTSX_PCI
+	tristate "Support for Realtek PCI-E card reader"
+	depends on PCI
+	help
+	  This supports for Realtek PCI-Express card reader including rts5209,
+	  rts5229, rtl8411, etc. Realtek card reader supports access to many
+	  types of memory cards, such as Memory Stick, Memory Stick Pro,
+	  Secure Digital and MultiMediaCard.
+
 config MFD_ASIC3
 	bool "Support for Compaq ASIC3"
 	depends on GENERIC_HARDIRQS && GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 442c17e40741..a4093a44304a 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -9,6 +9,9 @@ obj-$(CONFIG_MFD_88PM805)	+= 88pm805.o 88pm80x.o
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o tmio_core.o
 
+rtsx_pci-objs			:= rtsx_pcr.o rts5209.o rts5229.o rtl8411.o
+obj-$(CONFIG_MFD_RTSX_PCI)	+= rtsx_pci.o
+
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c
new file mode 100644
index 000000000000..89f046ca9e41
--- /dev/null
+++ b/drivers/mfd/rtl8411.c
@@ -0,0 +1,251 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, SYS_VER, &val);
+	return val & 0x0F;
+}
+
+static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CD_PAD_CTL,
+			CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE);
+}
+
+static int rtl8411_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rtl8411_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON);
+	if (err < 0)
+		return err;
+
+	udelay(150);
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_ON);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON);
+}
+
+static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	err = rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+			BPP_POWER_MASK, BPP_POWER_OFF);
+	if (err < 0)
+		return err;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL,
+			BPP_LDO_POWB, BPP_LDO_SUSPEND);
+}
+
+static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
+{
+	unsigned int card_exist;
+
+	card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+	card_exist &= CARD_EXIST;
+	if (!card_exist) {
+		/* Enable card CD */
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, CD_ENABLE);
+		/* Enable card interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00);
+		return 0;
+	}
+
+	if (hweight32(card_exist) > 1) {
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON);
+		msleep(100);
+
+		card_exist = rtsx_pci_readl(pcr, RTSX_BIPR);
+		if (card_exist & MS_EXIST)
+			card_exist = MS_EXIST;
+		else if (card_exist & SD_EXIST)
+			card_exist = SD_EXIST;
+		else
+			card_exist = 0;
+
+		rtsx_pci_write_register(pcr, CARD_PWR_CTL,
+				BPP_POWER_MASK, BPP_POWER_OFF);
+
+		dev_dbg(&(pcr->pci->dev),
+				"After CD deglitch, card_exist = 0x%x\n",
+				card_exist);
+	}
+
+	if (card_exist & MS_EXIST) {
+		/* Disable SD interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, MS_CD_EN_ONLY);
+	} else if (card_exist & SD_EXIST) {
+		/* Disable MS interrupt */
+		rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80);
+		rtsx_pci_write_register(pcr, CD_PAD_CTL,
+				CD_DISABLE_MASK, SD_CD_EN_ONLY);
+	}
+
+	return card_exist;
+}
+
+static const struct pcr_ops rtl8411_pcr_ops = {
+	.extra_init_hw = rtl8411_extra_init_hw,
+	.optimize_phy = NULL,
+	.turn_on_led = rtl8411_turn_on_led,
+	.turn_off_led = rtl8411_turn_off_led,
+	.enable_auto_blink = rtl8411_enable_auto_blink,
+	.disable_auto_blink = rtl8411_disable_auto_blink,
+	.card_power_on = rtl8411_card_power_on,
+	.card_power_off = rtl8411_card_power_off,
+	.cd_deglitch = rtl8411_cd_deglitch,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95),
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04),
+	0,
+};
+
+void rtl8411_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rtl8411_pcr_ops;
+
+	pcr->ic_version = rtl8411_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rtl8411_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rtl8411_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rtl8411_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rtl8411_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c
new file mode 100644
index 000000000000..283a4f148084
--- /dev/null
+++ b/drivers/mfd/rts5209.c
@@ -0,0 +1,223 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	val = rtsx_pci_readb(pcr, 0x1C);
+	return val & 0x0F;
+}
+
+static void rts5209_init_vendor_cfg(struct rtsx_pcr *pcr)
+{
+	u32 val;
+
+	rtsx_pci_read_config_dword(pcr, 0x724, &val);
+	dev_dbg(&(pcr->pci->dev), "Cfg 0x724: 0x%x\n", val);
+
+	if (!(val & 0x80)) {
+		if (val & 0x08)
+			pcr->ms_pmos = false;
+		else
+			pcr->ms_pmos = true;
+	}
+}
+
+static int rts5209_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Turn off LED */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03);
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5209_optimize_phy(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966);
+}
+
+static int rts5209_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00);
+}
+
+static int rts5209_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01);
+}
+
+static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D);
+}
+
+static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00);
+}
+
+static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+	u8 pwr_mask, partial_pwr_on, pwr_on;
+
+	pwr_mask = SD_POWER_MASK;
+	partial_pwr_on = SD_PARTIAL_POWER_ON;
+	pwr_on = SD_POWER_ON;
+
+	if (pcr->ms_pmos && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		partial_pwr_on = MS_PARTIAL_POWER_ON;
+		pwr_on = MS_POWER_ON;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask, partial_pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x04);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x00);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	u8 pwr_mask, pwr_off;
+
+	pwr_mask = SD_POWER_MASK;
+	pwr_off = SD_POWER_OFF;
+
+	if (pcr->ms_pmos && (card == RTSX_MS_CARD)) {
+		pwr_mask = MS_POWER_MASK;
+		pwr_off = MS_POWER_OFF;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0X06);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5209_pcr_ops = {
+	.extra_init_hw = rts5209_extra_init_hw,
+	.optimize_phy = rts5209_optimize_phy,
+	.turn_on_led = rts5209_turn_on_led,
+	.turn_off_led = rts5209_turn_off_led,
+	.enable_auto_blink = rts5209_enable_auto_blink,
+	.disable_auto_blink = rts5209_disable_auto_blink,
+	.card_power_on = rts5209_card_power_on,
+	.card_power_off = rts5209_card_power_off,
+	.cd_deglitch = NULL,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5209_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5209_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5209_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 |
+		EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5209_pcr_ops;
+
+	rts5209_init_vendor_cfg(pcr);
+
+	pcr->ic_version = rts5209_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c
new file mode 100644
index 000000000000..b9dbab266fda
--- /dev/null
+++ b/drivers/mfd/rts5229.c
@@ -0,0 +1,205 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & 0x0F;
+}
+
+static int rts5229_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5229_optimize_phy(struct rtsx_pcr *pcr)
+{
+	/* Optimize RX sensitivity */
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
+}
+
+static int rts5229_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5229_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK | PMOS_STRG_MASK,
+			SD_POWER_OFF | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0X00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5229_pcr_ops = {
+	.extra_init_hw = rts5229_extra_init_hw,
+	.optimize_phy = rts5229_optimize_phy,
+	.turn_on_led = rts5229_turn_on_led,
+	.turn_off_led = rts5229_turn_off_led,
+	.enable_auto_blink = rts5229_enable_auto_blink,
+	.disable_auto_blink = rts5229_disable_auto_blink,
+	.card_power_on = rts5229_card_power_on,
+	.card_power_off = rts5229_card_power_off,
+	.cd_deglitch = NULL,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* For RTS5229 version C */
+static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5229_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5229_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5229_pcr_ops;
+
+	pcr->ic_version = rts5229_get_ic_version(pcr);
+	if (pcr->ic_version == IC_VER_C) {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2;
+	} else {
+		pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1;
+		pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1;
+	}
+	pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
new file mode 100644
index 000000000000..56d4377c62c2
--- /dev/null
+++ b/drivers/mfd/rtsx_pcr.c
@@ -0,0 +1,1251 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rtsx_pci.h>
+#include <asm/unaligned.h>
+
+#include "rtsx_pcr.h"
+
+static bool msi_en = true;
+module_param(msi_en, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(msi_en, "Enable MSI");
+
+static DEFINE_IDR(rtsx_pci_idr);
+static DEFINE_SPINLOCK(rtsx_pci_lock);
+
+static struct mfd_cell rtsx_pcr_cells[] = {
+	[RTSX_SD_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_SDMMC,
+	},
+	[RTSX_MS_CARD] = {
+		.name = DRV_NAME_RTSX_PCI_MS,
+	},
+};
+
+static DEFINE_PCI_DEVICE_TABLE(rtsx_pci_ids) = {
+	{ PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, rtsx_pci_ids);
+
+void rtsx_pci_start_run(struct rtsx_pcr *pcr)
+{
+	/* If pci device removed, don't queue idle work any more */
+	if (pcr->remove_pci)
+		return;
+
+	if (pcr->state != PDEV_STAT_RUN) {
+		pcr->state = PDEV_STAT_RUN;
+		if (pcr->ops->enable_auto_blink)
+			pcr->ops->enable_auto_blink(pcr);
+	}
+
+	mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200));
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_start_run);
+
+int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data)
+{
+	int i;
+	u32 val = HAIMR_WRITE_START;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0) {
+			if (data != (u8)val)
+				return -EIO;
+			return 0;
+		}
+	}
+
+	return -ETIMEDOUT;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_register);
+
+int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data)
+{
+	u32 val = HAIMR_READ_START;
+	int i;
+
+	val |= (u32)(addr & 0x3FFF) << 16;
+	rtsx_pci_writel(pcr, RTSX_HAIMR, val);
+
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		val = rtsx_pci_readl(pcr, RTSX_HAIMR);
+		if ((val & HAIMR_TRANS_END) == 0)
+			break;
+	}
+
+	if (i >= MAX_RW_REG_CNT)
+		return -ETIMEDOUT;
+
+	if (data)
+		*data = (u8)(val & 0xFF);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_register);
+
+int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val)
+{
+	int err, i, finished = 0;
+	u8 tmp;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA0, 0xFF, (u8)val);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA1, 0xFF, (u8)(val >> 8));
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x81);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register);
+
+int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val)
+{
+	int err, i, finished = 0;
+	u16 data;
+	u8 *ptr, tmp;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x80);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < 100000; i++) {
+		err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp);
+		if (err < 0)
+			return err;
+
+		if (!(tmp & 0x80)) {
+			finished = 1;
+			break;
+		}
+	}
+
+	if (!finished)
+		return -ETIMEDOUT;
+
+	rtsx_pci_init_cmd(pcr);
+
+	rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA0, 0, 0);
+	rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA1, 0, 0);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	ptr = rtsx_pci_get_cmd_data(pcr);
+	data = ((u16)ptr[1] << 8) | ptr[0];
+
+	if (val)
+		*val = data;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register);
+
+void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+
+	rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80);
+	rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd);
+
+void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
+		u8 cmd_type, u16 reg_addr, u8 mask, u8 data)
+{
+	unsigned long flags;
+	u32 val = 0;
+	u32 *ptr = (u32 *)(pcr->host_cmds_ptr);
+
+	val |= (u32)(cmd_type & 0x03) << 30;
+	val |= (u32)(reg_addr & 0x3FFF) << 16;
+	val |= (u32)mask << 8;
+	val |= (u32)data;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	ptr += pcr->ci;
+	if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) {
+		put_unaligned_le32(val, ptr);
+		ptr++;
+		pcr->ci++;
+	}
+	spin_unlock_irqrestore(&pcr->lock, flags);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd);
+
+void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr)
+{
+	u32 val = 1 << 31;
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait);
+
+int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout)
+{
+	struct completion trans_done;
+	u32 val = 1 << 31;
+	long timeleft;
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	/* set up data structures for the wakeup system */
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	val |= (u32)(pcr->ci * 4) & 0x00FFFFFF;
+	/* Hardware Auto Response */
+	val |= 0x40000000;
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	/* Wait for TRANS_OK_INT */
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		dev_dbg(&(pcr->pci->dev), "Timeout (%s %d)\n",
+				__func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto finish_send_cmd;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+	if (pcr->trans_result == TRANS_RESULT_FAIL)
+		err = -EINVAL;
+	else if (pcr->trans_result == TRANS_RESULT_OK)
+		err = 0;
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+finish_send_cmd:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd);
+
+static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
+		dma_addr_t addr, unsigned int len, int end)
+{
+	u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi;
+	u64 val;
+	u8 option = SG_VALID | SG_TRANS_DATA;
+
+	dev_dbg(&(pcr->pci->dev), "DMA addr: 0x%x, Len: 0x%x\n",
+			(unsigned int)addr, len);
+
+	if (end)
+		option |= SG_END;
+	val = ((u64)addr << 32) | ((u64)len << 12) | option;
+
+	put_unaligned_le64(val, ptr);
+	ptr++;
+	pcr->sgi++;
+}
+
+int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read, int timeout)
+{
+	struct completion trans_done;
+	u8 dir;
+	int err = 0, i, count;
+	long timeleft;
+	unsigned long flags;
+	struct scatterlist *sg;
+	enum dma_data_direction dma_dir;
+	u32 val;
+	dma_addr_t addr;
+	unsigned int len;
+
+	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
+
+	/* don't transfer data during abort processing */
+	if (pcr->remove_pci)
+		return -EINVAL;
+
+	if ((sglist == NULL) || (num_sg <= 0))
+		return -EINVAL;
+
+	if (read) {
+		dir = DEVICE_TO_HOST;
+		dma_dir = DMA_FROM_DEVICE;
+	} else {
+		dir = HOST_TO_DEVICE;
+		dma_dir = DMA_TO_DEVICE;
+	}
+
+	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
+	if (count < 1) {
+		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
+		return -EINVAL;
+	}
+	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
+
+	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
+	pcr->sgi = 0;
+	for_each_sg(sglist, sg, count, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1);
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	pcr->done = &trans_done;
+	pcr->trans_result = TRANS_NOT_READY;
+	init_completion(&trans_done);
+	rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, val);
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	timeleft = wait_for_completion_interruptible_timeout(
+			&trans_done, msecs_to_jiffies(timeout));
+	if (timeleft <= 0) {
+		dev_dbg(&(pcr->pci->dev), "Timeout (%s %d)\n",
+				__func__, __LINE__);
+		err = -ETIMEDOUT;
+		goto out;
+	}
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	if (pcr->trans_result == TRANS_RESULT_FAIL)
+		err = -EINVAL;
+	else if (pcr->trans_result == TRANS_NO_DEVICE)
+		err = -ENODEV;
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+out:
+	spin_lock_irqsave(&pcr->lock, flags);
+	pcr->done = NULL;
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
+
+	if ((err < 0) && (err != -ENODEV))
+		rtsx_pci_stop_cmd(pcr);
+
+	if (pcr->finish_me)
+		complete(pcr->finish_me);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+
+		memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256);
+		ptr += 256;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++)
+			rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0);
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf);
+
+int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
+{
+	int err;
+	int i, j;
+	u16 reg;
+	u8 *ptr;
+
+	if (buf_len > 512)
+		buf_len = 512;
+
+	ptr = buf;
+	reg = PPBUF_BASE2;
+	for (i = 0; i < buf_len / 256; i++) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	if (buf_len % 256) {
+		rtsx_pci_init_cmd(pcr);
+
+		for (j = 0; j < buf_len % 256; j++) {
+			rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+					reg++, 0xFF, *ptr);
+			ptr++;
+		}
+
+		err = rtsx_pci_send_cmd(pcr, 250);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf);
+
+static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+
+	while (*tbl & 0xFFFF0000) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+				(u16)(*tbl >> 16), 0xFF, (u8)(*tbl));
+		tbl++;
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_enable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_enable_tbl;
+	else
+		return -EINVAL;
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable);
+
+int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card)
+{
+	const u32 *tbl;
+
+	if (card == RTSX_SD_CARD)
+		tbl = pcr->sd_pull_ctl_disable_tbl;
+	else if (card == RTSX_MS_CARD)
+		tbl = pcr->ms_pull_ctl_disable_tbl;
+	else
+		return -EINVAL;
+
+
+	return rtsx_pci_set_pull_ctl(pcr, tbl);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable);
+
+static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr)
+{
+	pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN;
+
+	if (pcr->num_slots > 1)
+		pcr->bier |= MS_INT_EN;
+
+	/* Enable Bus Interrupt */
+	rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier);
+
+	dev_dbg(&(pcr->pci->dev), "RTSX_BIER: 0x%08x\n", pcr->bier);
+}
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return ((depth > 1) ? (depth - 1) : depth);
+}
+
+static u8 revise_ssc_depth(u8 ssc_depth, u8 div)
+{
+	if (div > CLK_DIV_1) {
+		if (ssc_depth > (div - 1))
+			ssc_depth -= (div - 1);
+		else
+			ssc_depth = SSC_DEPTH_4M;
+	}
+
+	return ssc_depth;
+}
+
+int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int err, clk;
+	u8 N, min_N, max_N, clk_divider;
+	u8 mcu_cnt, div, max_div;
+	u8 depth[] = {
+		[RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M,
+		[RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M,
+		[RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M,
+		[RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K,
+		[RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K,
+	};
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+	err = rtsx_pci_write_register(pcr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (err < 0)
+		return err;
+
+	card_clock /= 1000000;
+	dev_dbg(&(pcr->pci->dev), "Switch card clock to %dMHz\n", card_clock);
+
+	min_N = 80;
+	max_N = 208;
+	max_div = CLK_DIV_8;
+
+	clk = card_clock;
+	if (!initial_mode && double_clk)
+		clk = card_clock * 2;
+	dev_dbg(&(pcr->pci->dev),
+			"Internal SSC clock: %dMHz (cur_clock = %d)\n",
+			clk, pcr->cur_clock);
+
+	if (clk == pcr->cur_clock)
+		return 0;
+
+	N = (u8)(clk - 2);
+	if ((clk <= 2) || (N > max_N))
+		return -EINVAL;
+
+	mcu_cnt = (u8)(125/clk + 3);
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	/* Make sure that the SSC clock div_n is equal or greater than min_N */
+	div = CLK_DIV_1;
+	while ((N < min_N) && (div < max_div)) {
+		N = (N + 2) * 2 - 2;
+		div++;
+	}
+	dev_dbg(&(pcr->pci->dev), "N = %d, div = %d\n", N, div);
+
+	ssc_depth = depth[ssc_depth];
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	ssc_depth = revise_ssc_depth(ssc_depth, div);
+	dev_dbg(&(pcr->pci->dev), "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL,
+			CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV,
+			0xFF, (div << 4) | mcu_cnt);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, N);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 2000);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC clock stable */
+	udelay(10);
+	err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+	if (err < 0)
+		return err;
+
+	pcr->cur_clock = clk;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock);
+
+int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_on)
+		return pcr->ops->card_power_on(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on);
+
+int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	if (pcr->ops->card_power_off)
+		return pcr->ops->card_power_off(pcr, card);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off);
+
+unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr)
+{
+	unsigned int val;
+
+	val = rtsx_pci_readl(pcr, RTSX_BIPR);
+	if (pcr->ops->cd_deglitch)
+		val = pcr->ops->cd_deglitch(pcr);
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_exist);
+
+void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr)
+{
+	struct completion finish;
+
+	pcr->finish_me = &finish;
+	init_completion(&finish);
+
+	if (pcr->done)
+		complete(pcr->done);
+
+	if (!pcr->remove_pci)
+		rtsx_pci_stop_cmd(pcr);
+
+	wait_for_completion_interruptible_timeout(&finish,
+			msecs_to_jiffies(2));
+	pcr->finish_me = NULL;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer);
+
+static void rtsx_pci_card_detect(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct rtsx_pcr *pcr;
+	unsigned long flags;
+	unsigned int card_detect = 0;
+	u32 irq_status;
+
+	dwork = to_delayed_work(work);
+	pcr = container_of(dwork, struct rtsx_pcr, carddet_work);
+
+	dev_dbg(&(pcr->pci->dev), "--> %s\n", __func__);
+
+	spin_lock_irqsave(&pcr->lock, flags);
+
+	irq_status = rtsx_pci_readl(pcr, RTSX_BIPR);
+	dev_dbg(&(pcr->pci->dev), "irq_status: 0x%08x\n", irq_status);
+
+	if (pcr->card_inserted || pcr->card_removed) {
+		dev_dbg(&(pcr->pci->dev),
+				"card_inserted: 0x%x, card_removed: 0x%x\n",
+				pcr->card_inserted, pcr->card_removed);
+
+		if (pcr->ops->cd_deglitch)
+			pcr->card_inserted = pcr->ops->cd_deglitch(pcr);
+
+		card_detect = pcr->card_inserted | pcr->card_removed;
+		pcr->card_inserted = 0;
+		pcr->card_removed = 0;
+	}
+
+	spin_unlock_irqrestore(&pcr->lock, flags);
+
+	if (card_detect & SD_EXIST)
+		pcr->slots[RTSX_SD_CARD].card_event(
+				pcr->slots[RTSX_SD_CARD].p_dev);
+	if (card_detect & MS_EXIST)
+		pcr->slots[RTSX_MS_CARD].card_event(
+				pcr->slots[RTSX_MS_CARD].p_dev);
+}
+
+static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
+{
+	struct rtsx_pcr *pcr = dev_id;
+	u32 int_reg;
+
+	if (!pcr)
+		return IRQ_NONE;
+
+	spin_lock(&pcr->lock);
+
+	int_reg = rtsx_pci_readl(pcr, RTSX_BIPR);
+	/* Clear interrupt flag */
+	rtsx_pci_writel(pcr, RTSX_BIPR, int_reg);
+	if ((int_reg & pcr->bier) == 0) {
+		spin_unlock(&pcr->lock);
+		return IRQ_NONE;
+	}
+	if (int_reg == 0xFFFFFFFF) {
+		spin_unlock(&pcr->lock);
+		return IRQ_HANDLED;
+	}
+
+	int_reg &= (pcr->bier | 0x7FFFFF);
+
+	if (int_reg & SD_INT) {
+		if (int_reg & SD_EXIST) {
+			pcr->card_inserted |= SD_EXIST;
+		} else {
+			pcr->card_removed |= SD_EXIST;
+			pcr->card_inserted &= ~SD_EXIST;
+		}
+	}
+
+	if (int_reg & MS_INT) {
+		if (int_reg & MS_EXIST) {
+			pcr->card_inserted |= MS_EXIST;
+		} else {
+			pcr->card_removed |= MS_EXIST;
+			pcr->card_inserted &= ~MS_EXIST;
+		}
+	}
+
+	if (pcr->card_inserted || pcr->card_removed)
+		schedule_delayed_work(&pcr->carddet_work,
+				msecs_to_jiffies(200));
+
+	if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) {
+		if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) {
+			pcr->trans_result = TRANS_RESULT_FAIL;
+			if (pcr->done)
+				complete(pcr->done);
+		} else if (int_reg & TRANS_OK_INT) {
+			pcr->trans_result = TRANS_RESULT_OK;
+			if (pcr->done)
+				complete(pcr->done);
+		}
+	}
+
+	spin_unlock(&pcr->lock);
+	return IRQ_HANDLED;
+}
+
+static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr)
+{
+	dev_info(&(pcr->pci->dev), "%s: pcr->msi_en = %d, pci->irq = %d\n",
+			__func__, pcr->msi_en, pcr->pci->irq);
+
+	if (request_irq(pcr->pci->irq, rtsx_pci_isr,
+			pcr->msi_en ? 0 : IRQF_SHARED,
+			DRV_NAME_RTSX_PCI, pcr)) {
+		dev_err(&(pcr->pci->dev),
+			"rtsx_sdmmc: unable to grab IRQ %d, disabling device\n",
+			pcr->pci->irq);
+		return -1;
+	}
+
+	pcr->irq = pcr->pci->irq;
+	pci_intx(pcr->pci, !pcr->msi_en);
+
+	return 0;
+}
+
+static void rtsx_pci_idle_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work);
+
+	dev_dbg(&(pcr->pci->dev), "--> %s\n", __func__);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pcr->state = PDEV_STAT_IDLE;
+
+	if (pcr->ops->disable_auto_blink)
+		pcr->ops->disable_auto_blink(pcr);
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	mutex_unlock(&pcr->pcr_mutex);
+}
+
+static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr);
+
+	rtsx_pci_enable_bus_int(pcr);
+
+	/* Power on SSC */
+	err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC power stable */
+	udelay(200);
+
+	if (pcr->ops->optimize_phy) {
+		err = pcr->ops->optimize_phy(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	rtsx_pci_init_cmd(pcr);
+
+	/* Set mcu_cnt to 7 to ensure data can be sampled properly */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07);
+
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00);
+	/* Disable card clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0);
+	/* Reset ASPM state to default value */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0);
+	/* Reset delink mode */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0);
+	/* Card driving select */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL,
+			0x07, DRIVER_TYPE_D);
+	/* Enable SSC Clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1,
+			0xFF, SSC_8X_EN | SSC_SEL_4M);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12);
+	/* Disable cd_pwr_save */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10);
+	/* Clear Link Ready Interrupt */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0,
+			LINK_RDY_INT, LINK_RDY_INT);
+	/* Enlarge the estimation window of PERST# glitch
+	 * to reduce the chance of invalid card interrupt
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80);
+	/* Update RC oscillator to 400k
+	 * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1
+	 *                1: 2M  0: 400k
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00);
+	/* Set interrupt write clear
+	 * bit 1: U_elbi_if_rd_clr_en
+	 *	1: Enable ELBI interrupt[31:22] & [7:0] flag read clear
+	 *	0: ELBI interrupt flag[31:22] & [7:0] only can be write clear
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0);
+	/* Force CLKREQ# PIN to drive 0 to request clock */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08);
+
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* Enable clk_request_n to enable clock power management */
+	rtsx_pci_write_config_byte(pcr, 0x81, 1);
+	/* Enter L1 when host tx idle */
+	rtsx_pci_write_config_byte(pcr, 0x70F, 0x5B);
+
+	if (pcr->ops->extra_init_hw) {
+		err = pcr->ops->extra_init_hw(pcr);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	spin_lock_init(&pcr->lock);
+	mutex_init(&pcr->pcr_mutex);
+
+	switch (PCI_PID(pcr)) {
+	default:
+	case 0x5209:
+		rts5209_init_params(pcr);
+		break;
+
+	case 0x5229:
+		rts5229_init_params(pcr);
+		break;
+
+	case 0x5289:
+		rtl8411_init_params(pcr);
+		break;
+	}
+
+	dev_dbg(&(pcr->pci->dev), "PID: 0x%04x, IC version: 0x%02x\n",
+			PCI_PID(pcr), pcr->ic_version);
+
+	pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot),
+			GFP_KERNEL);
+	if (!pcr->slots)
+		return -ENOMEM;
+
+	pcr->state = PDEV_STAT_IDLE;
+	err = rtsx_pci_init_hw(pcr);
+	if (err < 0) {
+		kfree(pcr->slots);
+		return err;
+	}
+
+	return 0;
+}
+
+static int __devinit rtsx_pci_probe(struct pci_dev *pcidev,
+				    const struct pci_device_id *id)
+{
+	struct rtsx_pcr *pcr;
+	struct pcr_handle *handle;
+	u32 base, len;
+	int ret, i;
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device,
+		(int)pcidev->revision);
+
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		return ret;
+
+	ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI);
+	if (ret)
+		goto disable;
+
+	pcr = kzalloc(sizeof(*pcr), GFP_KERNEL);
+	if (!pcr) {
+		ret = -ENOMEM;
+		goto release_pci;
+	}
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle) {
+		ret = -ENOMEM;
+		goto free_pcr;
+	}
+	handle->pcr = pcr;
+
+	if (!idr_pre_get(&rtsx_pci_idr, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto free_handle;
+	}
+
+	spin_lock(&rtsx_pci_lock);
+	ret = idr_get_new(&rtsx_pci_idr, pcr, &pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+	if (ret)
+		goto free_handle;
+
+	pcr->pci = pcidev;
+	dev_set_drvdata(&pcidev->dev, handle);
+
+	len = pci_resource_len(pcidev, 0);
+	base = pci_resource_start(pcidev, 0);
+	pcr->remap_addr = ioremap_nocache(base, len);
+	if (!pcr->remap_addr) {
+		ret = -ENOMEM;
+		goto free_host;
+	}
+
+	pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev),
+			RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr),
+			GFP_KERNEL);
+	if (pcr->rtsx_resv_buf == NULL) {
+		ret = -ENXIO;
+		goto unmap;
+	}
+	pcr->host_cmds_ptr = pcr->rtsx_resv_buf;
+	pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr;
+	pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN;
+	pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN;
+
+	pcr->card_inserted = 0;
+	pcr->card_removed = 0;
+	INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
+	INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work);
+
+	pcr->msi_en = msi_en;
+	if (pcr->msi_en) {
+		ret = pci_enable_msi(pcidev);
+		if (ret < 0)
+			pcr->msi_en = false;
+	}
+
+	ret = rtsx_pci_acquire_irq(pcr);
+	if (ret < 0)
+		goto free_dma;
+
+	pci_set_master(pcidev);
+	synchronize_irq(pcr->irq);
+
+	ret = rtsx_pci_init_chip(pcr);
+	if (ret < 0)
+		goto disable_irq;
+
+	for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) {
+		rtsx_pcr_cells[i].platform_data = handle;
+		rtsx_pcr_cells[i].pdata_size = sizeof(*handle);
+	}
+	ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells,
+			ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL);
+	if (ret < 0)
+		goto disable_irq;
+
+	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
+
+	return 0;
+
+disable_irq:
+	free_irq(pcr->irq, (void *)pcr);
+free_dma:
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+unmap:
+	iounmap(pcr->remap_addr);
+free_host:
+	dev_set_drvdata(&pcidev->dev, NULL);
+free_handle:
+	kfree(handle);
+free_pcr:
+	kfree(pcr);
+release_pci:
+	pci_release_regions(pcidev);
+disable:
+	pci_disable_device(pcidev);
+
+	return ret;
+}
+
+static void __devexit rtsx_pci_remove(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle = pci_get_drvdata(pcidev);
+	struct rtsx_pcr *pcr = handle->pcr;
+
+	pcr->remove_pci = true;
+
+	cancel_delayed_work(&pcr->carddet_work);
+	cancel_delayed_work(&pcr->idle_work);
+
+	mfd_remove_devices(&pcidev->dev);
+
+	dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN,
+			pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr);
+	free_irq(pcr->irq, (void *)pcr);
+	if (pcr->msi_en)
+		pci_disable_msi(pcr->pci);
+	iounmap(pcr->remap_addr);
+
+	dev_set_drvdata(&pcidev->dev, NULL);
+	pci_release_regions(pcidev);
+	pci_disable_device(pcidev);
+
+	spin_lock(&rtsx_pci_lock);
+	idr_remove(&rtsx_pci_idr, pcr->id);
+	spin_unlock(&rtsx_pci_lock);
+
+	kfree(pcr->slots);
+	kfree(pcr);
+	kfree(handle);
+
+	dev_dbg(&(pcidev->dev),
+		": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n",
+		pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device);
+}
+
+#ifdef CONFIG_PM
+
+static int rtsx_pci_suspend(struct pci_dev *pcidev, pm_message_t state)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+	int ret = 0;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+
+	cancel_delayed_work(&pcr->carddet_work);
+	cancel_delayed_work(&pcr->idle_work);
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	if (pcr->ops->turn_off_led)
+		pcr->ops->turn_off_led(pcr);
+
+	rtsx_pci_writel(pcr, RTSX_BIER, 0);
+	pcr->bier = 0;
+
+	rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08);
+	rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x02);
+
+	pci_save_state(pcidev);
+	pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0);
+	pci_disable_device(pcidev);
+	pci_set_power_state(pcidev, pci_choose_state(pcidev, state));
+
+	mutex_unlock(&pcr->pcr_mutex);
+	return ret;
+}
+
+static int rtsx_pci_resume(struct pci_dev *pcidev)
+{
+	struct pcr_handle *handle;
+	struct rtsx_pcr *pcr;
+	int ret = 0;
+
+	dev_dbg(&(pcidev->dev), "--> %s\n", __func__);
+
+	handle = pci_get_drvdata(pcidev);
+	pcr = handle->pcr;
+
+	mutex_lock(&pcr->pcr_mutex);
+
+	pci_set_power_state(pcidev, PCI_D0);
+	pci_restore_state(pcidev);
+	ret = pci_enable_device(pcidev);
+	if (ret)
+		goto out;
+	pci_set_master(pcidev);
+
+	ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00);
+	if (ret)
+		goto out;
+
+	ret = rtsx_pci_init_hw(pcr);
+	if (ret)
+		goto out;
+
+	schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
+
+out:
+	mutex_unlock(&pcr->pcr_mutex);
+	return ret;
+}
+
+#else /* CONFIG_PM */
+
+#define rtsx_pci_suspend NULL
+#define rtsx_pci_resume NULL
+
+#endif /* CONFIG_PM */
+
+static struct pci_driver rtsx_pci_driver = {
+	.name = DRV_NAME_RTSX_PCI,
+	.id_table = rtsx_pci_ids,
+	.probe = rtsx_pci_probe,
+	.remove = __devexit_p(rtsx_pci_remove),
+	.suspend = rtsx_pci_suspend,
+	.resume = rtsx_pci_resume,
+};
+module_pci_driver(rtsx_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wei WANG <wei_wang@realsil.com.cn>");
+MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver");
diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h
new file mode 100644
index 000000000000..12462c1df1a9
--- /dev/null
+++ b/drivers/mfd/rtsx_pcr.h
@@ -0,0 +1,32 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_PCR_H
+#define __RTSX_PCR_H
+
+#include <linux/mfd/rtsx_pci.h>
+
+void rts5209_init_params(struct rtsx_pcr *pcr);
+void rts5229_init_params(struct rtsx_pcr *pcr);
+void rtl8411_init_params(struct rtsx_pcr *pcr);
+
+#endif
diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h
new file mode 100644
index 000000000000..a8d393e3066b
--- /dev/null
+++ b/include/linux/mfd/rtsx_common.h
@@ -0,0 +1,48 @@
+/* Driver for Realtek driver-based card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_COMMON_H
+#define __RTSX_COMMON_H
+
+#define DRV_NAME_RTSX_PCI		"rtsx_pci"
+#define DRV_NAME_RTSX_PCI_SDMMC		"rtsx_pci_sdmmc"
+#define DRV_NAME_RTSX_PCI_MS		"rtsx_pci_ms"
+
+#define RTSX_REG_PAIR(addr, val)	(((u32)(addr) << 16) | (u8)(val))
+
+#define RTSX_SSC_DEPTH_4M		0x01
+#define RTSX_SSC_DEPTH_2M		0x02
+#define RTSX_SSC_DEPTH_1M		0x03
+#define RTSX_SSC_DEPTH_500K		0x04
+#define RTSX_SSC_DEPTH_250K		0x05
+
+#define RTSX_SD_CARD			0
+#define RTSX_MS_CARD			1
+
+struct platform_device;
+
+struct rtsx_slot {
+	struct platform_device	*p_dev;
+	void			(*card_event)(struct platform_device *p_dev);
+};
+
+#endif
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
new file mode 100644
index 000000000000..060b721fcbfb
--- /dev/null
+++ b/include/linux/mfd/rtsx_pci.h
@@ -0,0 +1,794 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ */
+
+#ifndef __RTSX_PCI_H
+#define __RTSX_PCI_H
+
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+#include "rtsx_common.h"
+
+#define MAX_RW_REG_CNT			1024
+
+/* PCI Operation Register Address */
+#define RTSX_HCBAR			0x00
+#define RTSX_HCBCTLR			0x04
+#define RTSX_HDBAR			0x08
+#define RTSX_HDBCTLR			0x0C
+#define RTSX_HAIMR			0x10
+#define RTSX_BIPR			0x14
+#define RTSX_BIER			0x18
+
+/* Host command buffer control register */
+#define STOP_CMD			(0x01 << 28)
+
+/* Host data buffer control register */
+#define SDMA_MODE			0x00
+#define ADMA_MODE			(0x02 << 26)
+#define STOP_DMA			(0x01 << 28)
+#define TRIG_DMA			(0x01 << 31)
+
+/* Host access internal memory register */
+#define HAIMR_TRANS_START		(0x01 << 31)
+#define HAIMR_READ			0x00
+#define HAIMR_WRITE			(0x01 << 30)
+#define HAIMR_READ_START		(HAIMR_TRANS_START | HAIMR_READ)
+#define HAIMR_WRITE_START		(HAIMR_TRANS_START | HAIMR_WRITE)
+#define HAIMR_TRANS_END			(HAIMR_TRANS_START)
+
+/* Bus interrupt pending register */
+#define CMD_DONE_INT			(1 << 31)
+#define DATA_DONE_INT			(1 << 30)
+#define TRANS_OK_INT			(1 << 29)
+#define TRANS_FAIL_INT			(1 << 28)
+#define XD_INT				(1 << 27)
+#define MS_INT				(1 << 26)
+#define SD_INT				(1 << 25)
+#define GPIO0_INT			(1 << 24)
+#define OC_INT				(1 << 23)
+#define SD_WRITE_PROTECT		(1 << 19)
+#define XD_EXIST			(1 << 18)
+#define MS_EXIST			(1 << 17)
+#define SD_EXIST			(1 << 16)
+#define DELINK_INT			GPIO0_INT
+#define MS_OC_INT			(1 << 23)
+#define SD_OC_INT			(1 << 22)
+
+#define CARD_INT		(XD_INT | MS_INT | SD_INT)
+#define NEED_COMPLETE_INT	(DATA_DONE_INT | TRANS_OK_INT | TRANS_FAIL_INT)
+#define RTSX_INT		(CMD_DONE_INT | NEED_COMPLETE_INT | \
+					CARD_INT | GPIO0_INT | OC_INT)
+
+#define CARD_EXIST		(XD_EXIST | MS_EXIST | SD_EXIST)
+
+/* Bus interrupt enable register */
+#define CMD_DONE_INT_EN		(1 << 31)
+#define DATA_DONE_INT_EN	(1 << 30)
+#define TRANS_OK_INT_EN		(1 << 29)
+#define TRANS_FAIL_INT_EN	(1 << 28)
+#define XD_INT_EN		(1 << 27)
+#define MS_INT_EN		(1 << 26)
+#define SD_INT_EN		(1 << 25)
+#define GPIO0_INT_EN		(1 << 24)
+#define OC_INT_EN		(1 << 23)
+#define DELINK_INT_EN		GPIO0_INT_EN
+#define MS_OC_INT_EN		(1 << 23)
+#define SD_OC_INT_EN		(1 << 22)
+
+#define READ_REG_CMD		0
+#define WRITE_REG_CMD		1
+#define CHECK_REG_CMD		2
+
+/*
+ * macros for easy use
+ */
+#define rtsx_pci_writel(pcr, reg, value) \
+	iowrite32(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readl(pcr, reg) \
+	ioread32((pcr)->remap_addr + reg)
+#define rtsx_pci_writew(pcr, reg, value) \
+	iowrite16(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readw(pcr, reg) \
+	ioread16((pcr)->remap_addr + reg)
+#define rtsx_pci_writeb(pcr, reg, value) \
+	iowrite8(value, (pcr)->remap_addr + reg)
+#define rtsx_pci_readb(pcr, reg) \
+	ioread8((pcr)->remap_addr + reg)
+
+#define rtsx_pci_read_config_byte(pcr, where, val) \
+	pci_read_config_byte((pcr)->pci, where, val)
+
+#define rtsx_pci_write_config_byte(pcr, where, val) \
+	pci_write_config_byte((pcr)->pci, where, val)
+
+#define rtsx_pci_read_config_dword(pcr, where, val) \
+	pci_read_config_dword((pcr)->pci, where, val)
+
+#define rtsx_pci_write_config_dword(pcr, where, val) \
+	pci_write_config_dword((pcr)->pci, where, val)
+
+#define STATE_TRANS_NONE	0
+#define STATE_TRANS_CMD		1
+#define STATE_TRANS_BUF		2
+#define STATE_TRANS_SG		3
+
+#define TRANS_NOT_READY		0
+#define TRANS_RESULT_OK		1
+#define TRANS_RESULT_FAIL	2
+#define TRANS_NO_DEVICE		3
+
+#define RTSX_RESV_BUF_LEN	4096
+#define HOST_CMDS_BUF_LEN	1024
+#define HOST_SG_TBL_BUF_LEN	(RTSX_RESV_BUF_LEN - HOST_CMDS_BUF_LEN)
+#define HOST_SG_TBL_ITEMS	(HOST_SG_TBL_BUF_LEN / 8)
+#define MAX_SG_ITEM_LEN		0x80000
+
+#define HOST_TO_DEVICE		0
+#define DEVICE_TO_HOST		1
+
+#define MAX_PHASE		31
+#define RX_TUNING_CNT		3
+
+/* SG descriptor */
+#define SG_INT			0x04
+#define SG_END			0x02
+#define SG_VALID		0x01
+
+#define SG_NO_OP		0x00
+#define SG_TRANS_DATA		(0x02 << 4)
+#define SG_LINK_DESC		(0x03 << 4)
+
+/* SD bank voltage */
+#define SD_IO_3V3		0
+#define SD_IO_1V8		1
+
+
+/* Card Clock Enable Register */
+#define SD_CLK_EN			0x04
+#define MS_CLK_EN			0x08
+
+/* Card Select Register */
+#define SD_MOD_SEL			2
+#define MS_MOD_SEL			3
+
+/* Card Output Enable Register */
+#define SD_OUTPUT_EN			0x04
+#define MS_OUTPUT_EN			0x08
+
+/* CARD_SHARE_MODE */
+#define CARD_SHARE_MASK			0x0F
+#define CARD_SHARE_MULTI_LUN		0x00
+#define	CARD_SHARE_NORMAL		0x00
+#define	CARD_SHARE_48_SD		0x04
+#define	CARD_SHARE_48_MS		0x08
+/* CARD_SHARE_MODE for barossa */
+#define CARD_SHARE_BAROSSA_SD		0x01
+#define CARD_SHARE_BAROSSA_MS		0x02
+
+/* SD30_DRIVE_SEL */
+#define DRIVER_TYPE_A			0x05
+#define DRIVER_TYPE_B			0x03
+#define DRIVER_TYPE_C			0x02
+#define DRIVER_TYPE_D			0x01
+
+/* FPDCTL */
+#define SSC_POWER_DOWN			0x01
+#define SD_OC_POWER_DOWN		0x02
+#define ALL_POWER_DOWN			0x07
+#define OC_POWER_DOWN			0x06
+
+/* CLK_CTL */
+#define CHANGE_CLK			0x01
+
+/* LDO_CTL */
+#define BPP_LDO_POWB			0x03
+#define BPP_LDO_ON			0x00
+#define BPP_LDO_SUSPEND			0x02
+#define BPP_LDO_OFF			0x03
+
+/* CD_PAD_CTL */
+#define CD_DISABLE_MASK			0x07
+#define MS_CD_DISABLE			0x04
+#define SD_CD_DISABLE			0x02
+#define XD_CD_DISABLE			0x01
+#define CD_DISABLE			0x07
+#define CD_ENABLE			0x00
+#define MS_CD_EN_ONLY			0x03
+#define SD_CD_EN_ONLY			0x05
+#define XD_CD_EN_ONLY			0x06
+#define FORCE_CD_LOW_MASK		0x38
+#define FORCE_CD_XD_LOW			0x08
+#define FORCE_CD_SD_LOW			0x10
+#define FORCE_CD_MS_LOW			0x20
+#define CD_AUTO_DISABLE			0x40
+
+/* SD_STAT1 */
+#define	SD_CRC7_ERR			0x80
+#define	SD_CRC16_ERR			0x40
+#define	SD_CRC_WRITE_ERR		0x20
+#define	SD_CRC_WRITE_ERR_MASK		0x1C
+#define	GET_CRC_TIME_OUT		0x02
+#define	SD_TUNING_COMPARE_ERR		0x01
+
+/* SD_STAT2 */
+#define	SD_RSP_80CLK_TIMEOUT		0x01
+
+/* SD_BUS_STAT */
+#define	SD_CLK_TOGGLE_EN		0x80
+#define	SD_CLK_FORCE_STOP	        0x40
+#define	SD_DAT3_STATUS		        0x10
+#define	SD_DAT2_STATUS		        0x08
+#define	SD_DAT1_STATUS		        0x04
+#define	SD_DAT0_STATUS		        0x02
+#define	SD_CMD_STATUS			0x01
+
+/* SD_PAD_CTL */
+#define	SD_IO_USING_1V8		        0x80
+#define	SD_IO_USING_3V3		        0x7F
+#define	TYPE_A_DRIVING		        0x00
+#define	TYPE_B_DRIVING			0x01
+#define	TYPE_C_DRIVING			0x02
+#define	TYPE_D_DRIVING		        0x03
+
+/* SD_SAMPLE_POINT_CTL */
+#define	DDR_FIX_RX_DAT			0x00
+#define	DDR_VAR_RX_DAT			0x80
+#define	DDR_FIX_RX_DAT_EDGE		0x00
+#define	DDR_FIX_RX_DAT_14_DELAY		0x40
+#define	DDR_FIX_RX_CMD			0x00
+#define	DDR_VAR_RX_CMD			0x20
+#define	DDR_FIX_RX_CMD_POS_EDGE		0x00
+#define	DDR_FIX_RX_CMD_14_DELAY		0x10
+#define	SD20_RX_POS_EDGE		0x00
+#define	SD20_RX_14_DELAY		0x08
+#define SD20_RX_SEL_MASK		0x08
+
+/* SD_PUSH_POINT_CTL */
+#define	DDR_FIX_TX_CMD_DAT		0x00
+#define	DDR_VAR_TX_CMD_DAT		0x80
+#define	DDR_FIX_TX_DAT_14_TSU		0x00
+#define	DDR_FIX_TX_DAT_12_TSU		0x40
+#define	DDR_FIX_TX_CMD_NEG_EDGE		0x00
+#define	DDR_FIX_TX_CMD_14_AHEAD		0x20
+#define	SD20_TX_NEG_EDGE		0x00
+#define	SD20_TX_14_AHEAD		0x10
+#define SD20_TX_SEL_MASK		0x10
+#define	DDR_VAR_SDCLK_POL_SWAP		0x01
+
+/* SD_TRANSFER */
+#define	SD_TRANSFER_START		0x80
+#define	SD_TRANSFER_END			0x40
+#define SD_STAT_IDLE			0x20
+#define	SD_TRANSFER_ERR			0x10
+/* SD Transfer Mode definition */
+#define	SD_TM_NORMAL_WRITE		0x00
+#define	SD_TM_AUTO_WRITE_3		0x01
+#define	SD_TM_AUTO_WRITE_4		0x02
+#define	SD_TM_AUTO_READ_3		0x05
+#define	SD_TM_AUTO_READ_4		0x06
+#define	SD_TM_CMD_RSP			0x08
+#define	SD_TM_AUTO_WRITE_1		0x09
+#define	SD_TM_AUTO_WRITE_2		0x0A
+#define	SD_TM_NORMAL_READ		0x0C
+#define	SD_TM_AUTO_READ_1		0x0D
+#define	SD_TM_AUTO_READ_2		0x0E
+#define	SD_TM_AUTO_TUNING		0x0F
+
+/* SD_VPTX_CTL / SD_VPRX_CTL */
+#define PHASE_CHANGE			0x80
+#define PHASE_NOT_RESET			0x40
+
+/* SD_DCMPS_TX_CTL / SD_DCMPS_RX_CTL */
+#define DCMPS_CHANGE			0x80
+#define DCMPS_CHANGE_DONE		0x40
+#define DCMPS_ERROR			0x20
+#define DCMPS_CURRENT_PHASE		0x1F
+
+/* SD Configure 1 Register */
+#define SD_CLK_DIVIDE_0			0x00
+#define	SD_CLK_DIVIDE_256		0xC0
+#define	SD_CLK_DIVIDE_128		0x80
+#define	SD_BUS_WIDTH_1BIT		0x00
+#define	SD_BUS_WIDTH_4BIT		0x01
+#define	SD_BUS_WIDTH_8BIT		0x02
+#define	SD_ASYNC_FIFO_NOT_RST		0x10
+#define	SD_20_MODE			0x00
+#define	SD_DDR_MODE			0x04
+#define	SD_30_MODE			0x08
+
+#define SD_CLK_DIVIDE_MASK		0xC0
+
+/* SD_CMD_STATE */
+#define SD_CMD_IDLE			0x80
+
+/* SD_DATA_STATE */
+#define SD_DATA_IDLE			0x80
+
+/* DCM_DRP_CTL */
+#define DCM_RESET			0x08
+#define DCM_LOCKED			0x04
+#define DCM_208M			0x00
+#define DCM_TX			        0x01
+#define DCM_RX			        0x02
+
+/* DCM_DRP_TRIG */
+#define DRP_START			0x80
+#define DRP_DONE			0x40
+
+/* DCM_DRP_CFG */
+#define DRP_WRITE			0x80
+#define DRP_READ			0x00
+#define DCM_WRITE_ADDRESS_50		0x50
+#define DCM_WRITE_ADDRESS_51		0x51
+#define DCM_READ_ADDRESS_00		0x00
+#define DCM_READ_ADDRESS_51		0x51
+
+/* IRQSTAT0 */
+#define DMA_DONE_INT			0x80
+#define SUSPEND_INT			0x40
+#define LINK_RDY_INT			0x20
+#define LINK_DOWN_INT			0x10
+
+/* DMACTL */
+#define DMA_RST				0x80
+#define DMA_BUSY			0x04
+#define DMA_DIR_TO_CARD			0x00
+#define DMA_DIR_FROM_CARD		0x02
+#define DMA_EN				0x01
+#define DMA_128				(0 << 4)
+#define DMA_256				(1 << 4)
+#define DMA_512				(2 << 4)
+#define DMA_1024			(3 << 4)
+#define DMA_PACK_SIZE_MASK		0x30
+
+/* SSC_CTL1 */
+#define SSC_RSTB			0x80
+#define SSC_8X_EN			0x40
+#define SSC_FIX_FRAC			0x20
+#define SSC_SEL_1M			0x00
+#define SSC_SEL_2M			0x08
+#define SSC_SEL_4M			0x10
+#define SSC_SEL_8M			0x18
+
+/* SSC_CTL2 */
+#define SSC_DEPTH_MASK			0x07
+#define SSC_DEPTH_DISALBE		0x00
+#define SSC_DEPTH_4M			0x01
+#define SSC_DEPTH_2M			0x02
+#define SSC_DEPTH_1M			0x03
+#define SSC_DEPTH_500K			0x04
+#define SSC_DEPTH_250K			0x05
+
+/* System Clock Control Register */
+#define CLK_LOW_FREQ			0x01
+
+/* System Clock Divider Register */
+#define CLK_DIV_1			0x01
+#define CLK_DIV_2			0x02
+#define CLK_DIV_4			0x03
+#define CLK_DIV_8			0x04
+
+/* MS_CFG */
+#define	SAMPLE_TIME_RISING		0x00
+#define	SAMPLE_TIME_FALLING		0x80
+#define	PUSH_TIME_DEFAULT		0x00
+#define	PUSH_TIME_ODD			0x40
+#define	NO_EXTEND_TOGGLE		0x00
+#define	EXTEND_TOGGLE_CHK		0x20
+#define	MS_BUS_WIDTH_1			0x00
+#define	MS_BUS_WIDTH_4			0x10
+#define	MS_BUS_WIDTH_8			0x18
+#define	MS_2K_SECTOR_MODE		0x04
+#define	MS_512_SECTOR_MODE		0x00
+#define	MS_TOGGLE_TIMEOUT_EN		0x00
+#define	MS_TOGGLE_TIMEOUT_DISEN		0x01
+#define MS_NO_CHECK_INT			0x02
+
+/* MS_TRANS_CFG */
+#define	WAIT_INT			0x80
+#define	NO_WAIT_INT			0x00
+#define	NO_AUTO_READ_INT_REG		0x00
+#define	AUTO_READ_INT_REG		0x40
+#define	MS_CRC16_ERR			0x20
+#define	MS_RDY_TIMEOUT			0x10
+#define	MS_INT_CMDNK			0x08
+#define	MS_INT_BREQ			0x04
+#define	MS_INT_ERR			0x02
+#define	MS_INT_CED			0x01
+
+/* MS_TRANSFER */
+#define	MS_TRANSFER_START		0x80
+#define	MS_TRANSFER_END			0x40
+#define	MS_TRANSFER_ERR			0x20
+#define	MS_BS_STATE			0x10
+#define	MS_TM_READ_BYTES		0x00
+#define	MS_TM_NORMAL_READ		0x01
+#define	MS_TM_WRITE_BYTES		0x04
+#define	MS_TM_NORMAL_WRITE		0x05
+#define	MS_TM_AUTO_READ			0x08
+#define	MS_TM_AUTO_WRITE		0x0C
+
+/* SD Configure 2 Register */
+#define	SD_CALCULATE_CRC7		0x00
+#define	SD_NO_CALCULATE_CRC7		0x80
+#define	SD_CHECK_CRC16			0x00
+#define	SD_NO_CHECK_CRC16		0x40
+#define SD_NO_CHECK_WAIT_CRC_TO		0x20
+#define	SD_WAIT_BUSY_END		0x08
+#define	SD_NO_WAIT_BUSY_END		0x00
+#define	SD_CHECK_CRC7			0x00
+#define	SD_NO_CHECK_CRC7		0x04
+#define	SD_RSP_LEN_0			0x00
+#define	SD_RSP_LEN_6			0x01
+#define	SD_RSP_LEN_17			0x02
+/* SD/MMC Response Type Definition */
+#define	SD_RSP_TYPE_R0			0x04
+#define	SD_RSP_TYPE_R1			0x01
+#define	SD_RSP_TYPE_R1b			0x09
+#define	SD_RSP_TYPE_R2			0x02
+#define	SD_RSP_TYPE_R3			0x05
+#define	SD_RSP_TYPE_R4			0x05
+#define	SD_RSP_TYPE_R5			0x01
+#define	SD_RSP_TYPE_R6			0x01
+#define	SD_RSP_TYPE_R7			0x01
+
+/* SD_CONFIURE3 */
+#define	SD_RSP_80CLK_TIMEOUT_EN		0x01
+
+/* Card Transfer Reset Register */
+#define SPI_STOP			0x01
+#define XD_STOP				0x02
+#define SD_STOP				0x04
+#define MS_STOP				0x08
+#define SPI_CLR_ERR			0x10
+#define XD_CLR_ERR			0x20
+#define SD_CLR_ERR			0x40
+#define MS_CLR_ERR			0x80
+
+/* Card Data Source Register */
+#define PINGPONG_BUFFER			0x01
+#define RING_BUFFER			0x00
+
+/* Card Power Control Register */
+#define PMOS_STRG_MASK			0x10
+#define PMOS_STRG_800mA			0x10
+#define PMOS_STRG_400mA			0x00
+#define SD_POWER_OFF			0x03
+#define SD_PARTIAL_POWER_ON		0x01
+#define SD_POWER_ON			0x00
+#define SD_POWER_MASK			0x03
+#define MS_POWER_OFF			0x0C
+#define MS_PARTIAL_POWER_ON		0x04
+#define MS_POWER_ON			0x00
+#define MS_POWER_MASK			0x0C
+#define BPP_POWER_OFF			0x0F
+#define BPP_POWER_5_PERCENT_ON		0x0E
+#define BPP_POWER_10_PERCENT_ON		0x0C
+#define BPP_POWER_15_PERCENT_ON		0x08
+#define BPP_POWER_ON			0x00
+#define BPP_POWER_MASK			0x0F
+
+/* PWR_GATE_CTRL */
+#define PWR_GATE_EN			0x01
+#define LDO3318_PWR_MASK		0x06
+#define LDO_ON				0x00
+#define LDO_SUSPEND			0x04
+#define LDO_OFF				0x06
+
+/* CARD_CLK_SOURCE */
+#define CRC_FIX_CLK			(0x00 << 0)
+#define CRC_VAR_CLK0			(0x01 << 0)
+#define CRC_VAR_CLK1			(0x02 << 0)
+#define SD30_FIX_CLK			(0x00 << 2)
+#define SD30_VAR_CLK0			(0x01 << 2)
+#define SD30_VAR_CLK1			(0x02 << 2)
+#define SAMPLE_FIX_CLK			(0x00 << 4)
+#define SAMPLE_VAR_CLK0			(0x01 << 4)
+#define SAMPLE_VAR_CLK1			(0x02 << 4)
+
+#define MS_CFG				0xFD40
+#define MS_TPC				0xFD41
+#define MS_TRANS_CFG			0xFD42
+#define MS_TRANSFER			0xFD43
+#define MS_INT_REG			0xFD44
+#define MS_BYTE_CNT			0xFD45
+#define MS_SECTOR_CNT_L			0xFD46
+#define MS_SECTOR_CNT_H			0xFD47
+#define MS_DBUS_H			0xFD48
+
+#define SD_CFG1				0xFDA0
+#define SD_CFG2				0xFDA1
+#define SD_CFG3				0xFDA2
+#define SD_STAT1			0xFDA3
+#define SD_STAT2			0xFDA4
+#define SD_BUS_STAT			0xFDA5
+#define SD_PAD_CTL			0xFDA6
+#define SD_SAMPLE_POINT_CTL		0xFDA7
+#define SD_PUSH_POINT_CTL		0xFDA8
+#define SD_CMD0				0xFDA9
+#define SD_CMD1				0xFDAA
+#define SD_CMD2				0xFDAB
+#define SD_CMD3				0xFDAC
+#define SD_CMD4				0xFDAD
+#define SD_CMD5				0xFDAE
+#define SD_BYTE_CNT_L			0xFDAF
+#define SD_BYTE_CNT_H			0xFDB0
+#define SD_BLOCK_CNT_L			0xFDB1
+#define SD_BLOCK_CNT_H			0xFDB2
+#define SD_TRANSFER			0xFDB3
+#define SD_CMD_STATE			0xFDB5
+#define SD_DATA_STATE			0xFDB6
+
+#define SRCTL				0xFC13
+
+#define	DCM_DRP_CTL			0xFC23
+#define	DCM_DRP_TRIG			0xFC24
+#define	DCM_DRP_CFG			0xFC25
+#define	DCM_DRP_WR_DATA_L		0xFC26
+#define	DCM_DRP_WR_DATA_H		0xFC27
+#define	DCM_DRP_RD_DATA_L		0xFC28
+#define	DCM_DRP_RD_DATA_H		0xFC29
+#define SD_VPCLK0_CTL			0xFC2A
+#define SD_VPCLK1_CTL			0xFC2B
+#define SD_DCMPS0_CTL			0xFC2C
+#define SD_DCMPS1_CTL			0xFC2D
+#define SD_VPTX_CTL			SD_VPCLK0_CTL
+#define SD_VPRX_CTL			SD_VPCLK1_CTL
+#define SD_DCMPS_TX_CTL			SD_DCMPS0_CTL
+#define SD_DCMPS_RX_CTL			SD_DCMPS1_CTL
+#define CARD_CLK_SOURCE			0xFC2E
+
+#define CARD_PWR_CTL			0xFD50
+#define CARD_CLK_SWITCH			0xFD51
+#define CARD_SHARE_MODE			0xFD52
+#define CARD_DRIVE_SEL			0xFD53
+#define CARD_STOP			0xFD54
+#define CARD_OE				0xFD55
+#define CARD_AUTO_BLINK			0xFD56
+#define CARD_GPIO_DIR			0xFD57
+#define CARD_GPIO			0xFD58
+#define CARD_DATA_SOURCE		0xFD5B
+#define CARD_SELECT			0xFD5C
+#define SD30_DRIVE_SEL			0xFD5E
+#define CARD_CLK_EN			0xFD69
+#define SDIO_CTRL			0xFD6B
+#define CD_PAD_CTL			0xFD73
+
+#define FPDCTL				0xFC00
+#define PDINFO				0xFC01
+
+#define CLK_CTL				0xFC02
+#define CLK_DIV				0xFC03
+#define CLK_SEL				0xFC04
+
+#define SSC_DIV_N_0			0xFC0F
+#define SSC_DIV_N_1			0xFC10
+#define SSC_CTL1			0xFC11
+#define SSC_CTL2			0xFC12
+
+#define RCCTL				0xFC14
+
+#define FPGA_PULL_CTL			0xFC1D
+#define OLT_LED_CTL			0xFC1E
+#define GPIO_CTL			0xFC1F
+
+#define LDO_CTL				0xFC1E
+#define SYS_VER				0xFC32
+
+#define CARD_PULL_CTL1			0xFD60
+#define CARD_PULL_CTL2			0xFD61
+#define CARD_PULL_CTL3			0xFD62
+#define CARD_PULL_CTL4			0xFD63
+#define CARD_PULL_CTL5			0xFD64
+#define CARD_PULL_CTL6			0xFD65
+
+/* PCI Express Related Registers */
+#define IRQEN0				0xFE20
+#define IRQSTAT0			0xFE21
+#define IRQEN1				0xFE22
+#define IRQSTAT1			0xFE23
+#define TLPRIEN				0xFE24
+#define TLPRISTAT			0xFE25
+#define TLPTIEN				0xFE26
+#define TLPTISTAT			0xFE27
+#define DMATC0				0xFE28
+#define DMATC1				0xFE29
+#define DMATC2				0xFE2A
+#define DMATC3				0xFE2B
+#define DMACTL				0xFE2C
+#define BCTL				0xFE2D
+#define RBBC0				0xFE2E
+#define RBBC1				0xFE2F
+#define RBDAT				0xFE30
+#define RBCTL				0xFE34
+#define CFGADDR0			0xFE35
+#define CFGADDR1			0xFE36
+#define CFGDATA0			0xFE37
+#define CFGDATA1			0xFE38
+#define CFGDATA2			0xFE39
+#define CFGDATA3			0xFE3A
+#define CFGRWCTL			0xFE3B
+#define PHYRWCTL			0xFE3C
+#define PHYDATA0			0xFE3D
+#define PHYDATA1			0xFE3E
+#define PHYADDR				0xFE3F
+#define MSGRXDATA0			0xFE40
+#define MSGRXDATA1			0xFE41
+#define MSGRXDATA2			0xFE42
+#define MSGRXDATA3			0xFE43
+#define MSGTXDATA0			0xFE44
+#define MSGTXDATA1			0xFE45
+#define MSGTXDATA2			0xFE46
+#define MSGTXDATA3			0xFE47
+#define MSGTXCTL			0xFE48
+#define PETXCFG				0xFE49
+
+#define CDRESUMECTL			0xFE52
+#define WAKE_SEL_CTL			0xFE54
+#define PME_FORCE_CTL			0xFE56
+#define ASPM_FORCE_CTL			0xFE57
+#define PM_CLK_FORCE_CTL		0xFE58
+#define PERST_GLITCH_WIDTH		0xFE5C
+#define CHANGE_LINK_STATE		0xFE5B
+#define RESET_LOAD_REG			0xFE5E
+#define EFUSE_CONTENT			0xFE5F
+#define HOST_SLEEP_STATE		0xFE60
+#define SDIO_CFG			0xFE70
+
+#define NFTS_TX_CTRL			0xFE72
+
+#define PWR_GATE_CTRL			0xFE75
+#define PWD_SUSPEND_EN			0xFE76
+#define LDO_PWR_SEL			0xFE78
+
+#define DUMMY_REG_RESET_0		0xFE90
+
+/* Memory mapping */
+#define SRAM_BASE			0xE600
+#define RBUF_BASE			0xF400
+#define PPBUF_BASE1			0xF800
+#define PPBUF_BASE2			0xFA00
+#define IMAGE_FLAG_ADDR0		0xCE80
+#define IMAGE_FLAG_ADDR1		0xCE81
+
+#define rtsx_pci_init_cmd(pcr)		((pcr)->ci = 0)
+
+struct rtsx_pcr;
+
+struct pcr_handle {
+	struct rtsx_pcr			*pcr;
+};
+
+struct pcr_ops {
+	int		(*extra_init_hw)(struct rtsx_pcr *pcr);
+	int		(*optimize_phy)(struct rtsx_pcr *pcr);
+	int		(*turn_on_led)(struct rtsx_pcr *pcr);
+	int		(*turn_off_led)(struct rtsx_pcr *pcr);
+	int		(*enable_auto_blink)(struct rtsx_pcr *pcr);
+	int		(*disable_auto_blink)(struct rtsx_pcr *pcr);
+	int		(*card_power_on)(struct rtsx_pcr *pcr, int card);
+	int		(*card_power_off)(struct rtsx_pcr *pcr, int card);
+	unsigned int	(*cd_deglitch)(struct rtsx_pcr *pcr);
+};
+
+enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
+
+struct rtsx_pcr {
+	struct pci_dev			*pci;
+	unsigned int			id;
+
+	/* pci resources */
+	unsigned long			addr;
+	void __iomem			*remap_addr;
+	int				irq;
+
+	/* host reserved buffer */
+	void				*rtsx_resv_buf;
+	dma_addr_t			rtsx_resv_buf_addr;
+
+	void				*host_cmds_ptr;
+	dma_addr_t			host_cmds_addr;
+	int				ci;
+
+	void				*host_sg_tbl_ptr;
+	dma_addr_t			host_sg_tbl_addr;
+	int				sgi;
+
+	u32				bier;
+	char				trans_result;
+
+	unsigned int			card_inserted;
+	unsigned int			card_removed;
+
+	struct delayed_work		carddet_work;
+	struct delayed_work		idle_work;
+
+	spinlock_t			lock;
+	struct mutex			pcr_mutex;
+	struct completion		*done;
+	struct completion		*finish_me;
+
+	unsigned int			cur_clock;
+	bool				ms_pmos;
+	bool				remove_pci;
+	bool				msi_en;
+
+#define EXTRA_CAPS_SD_SDR50		(1 << 0)
+#define EXTRA_CAPS_SD_SDR104		(1 << 1)
+#define EXTRA_CAPS_SD_DDR50		(1 << 2)
+#define EXTRA_CAPS_MMC_HSDDR		(1 << 3)
+#define EXTRA_CAPS_MMC_HS200		(1 << 4)
+#define EXTRA_CAPS_MMC_8BIT		(1 << 5)
+	u32				extra_caps;
+
+#define IC_VER_A			0
+#define IC_VER_B			1
+#define IC_VER_C			2
+#define IC_VER_D			3
+	u8				ic_version;
+
+	const u32			*sd_pull_ctl_enable_tbl;
+	const u32			*sd_pull_ctl_disable_tbl;
+	const u32			*ms_pull_ctl_enable_tbl;
+	const u32			*ms_pull_ctl_disable_tbl;
+
+	const struct pcr_ops		*ops;
+	enum PDEV_STAT			state;
+
+	int				num_slots;
+	struct rtsx_slot		*slots;
+};
+
+#define CHK_PCI_PID(pcr, pid)		((pcr)->pci->device == (pid))
+#define PCI_VID(pcr)			((pcr)->pci->vendor)
+#define PCI_PID(pcr)			((pcr)->pci->device)
+
+void rtsx_pci_start_run(struct rtsx_pcr *pcr);
+int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data);
+int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data);
+int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val);
+int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val);
+void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr);
+void rtsx_pci_add_cmd(struct rtsx_pcr *pcr,
+		u8 cmd_type, u16 reg_addr, u8 mask, u8 data);
+void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
+int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
+int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read, int timeout);
+int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
+int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
+int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
+int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card);
+unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr);
+void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr);
+
+static inline u8 *rtsx_pci_get_cmd_data(struct rtsx_pcr *pcr)
+{
+	return (u8 *)(pcr->host_cmds_ptr);
+}
+
+#endif
-- 
cgit v1.2.3


From f399002e68e626e7bc443e6fcab1772704cc197f Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 23 Mar 2011 14:31:09 +0100
Subject: drbd: distribute former syncer_conf settings to disk, connection, and
 resource level

This commit breaks the API again.

Move per-volume former syncer options into disk_conf.
Move per-connection former syncer options into net_conf.
Renamed the remainign sync_conf to res_opts

Syncer settings have been changeable at runtime, so we need to prepare
for these settings to be runtime-changeable in their new home as well.

Introduce new configuration operations, and share the netlink attribute
between "attach" (create new disk) and "disk-opts" (change options).
Same for "connect" and "net-opts".

Some fields cannot be changed at runtime, however.
Introduce a new flag GENLA_F_INVARIANT to be able to trigger on that in
the generated validation and assignment functions.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  10 +-
 drivers/block/drbd/drbd_main.c     |  72 +++--
 drivers/block/drbd/drbd_nl.c       | 550 ++++++++++++++++++++++++-------------
 drivers/block/drbd/drbd_receiver.c |  51 ++--
 drivers/block/drbd/drbd_state.c    |   4 +-
 drivers/block/drbd/drbd_worker.c   |  50 ++--
 include/linux/drbd_genl.h          | 133 +++++----
 include/linux/drbd_limits.h        |   2 +
 include/linux/genl_magic_func.h    |  49 ++--
 include/linux/genl_magic_struct.h  |  18 +-
 10 files changed, 572 insertions(+), 367 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d6e7e657e7a4..bc265f3733c6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -860,7 +860,7 @@ struct drbd_md {
 	s32 bm_offset;	/* signed relative sector offset to bitmap */
 
 	/* u32 al_nr_extents;	   important for restoring the AL
-	 * is stored into  sync_conf.al_extents, which in turn
+	 * is stored into  ldev->dc.al_extents, which in turn
 	 * gets applied to act_log->nr_elements
 	 */
 };
@@ -929,6 +929,7 @@ struct drbd_tconn {			/* is a resource from the config file */
 	atomic_t net_cnt;		/* Users of net_conf */
 	wait_queue_head_t net_cnt_wait;
 	wait_queue_head_t ping_wait;		/* Woken upon reception of a ping, and a state change */
+	struct res_opts res_opts;
 
 	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
 	struct drbd_socket meta;	/* ping/ack (metadata) packets */
@@ -945,6 +946,8 @@ struct drbd_tconn {			/* is a resource from the config file */
 	struct crypto_hash *cram_hmac_tfm;
 	struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
 	struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
+	struct crypto_hash *csums_tfm;
+	struct crypto_hash *verify_tfm;
 	void *int_dig_out;
 	void *int_dig_in;
 	void *int_dig_vv;
@@ -963,7 +966,6 @@ struct drbd_conf {
 	unsigned long flags;
 
 	/* configured by drbdsetup */
-	struct syncer_conf sync_conf;
 	struct drbd_backing_dev *ldev __protected_by(local);
 
 	sector_t p_size;     /* partner's disk size */
@@ -1037,8 +1039,6 @@ struct drbd_conf {
 	/* size of out-of-sync range in sectors. */
 	sector_t ov_last_oos_size;
 	unsigned long ov_left; /* in bits */
-	struct crypto_hash *csums_tfm;
-	struct crypto_hash *verify_tfm;
 
 	struct drbd_bitmap *bitmap;
 	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
@@ -1188,7 +1188,7 @@ extern int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd,
 			  char *data, size_t size);
 #define USE_DATA_SOCKET 1
 #define USE_META_SOCKET 0
-extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
+extern int drbd_send_sync_param(struct drbd_conf *mdev);
 extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
 			u32 set_size);
 extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet,
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 79a0e042252f..bdb12723585e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -784,7 +784,7 @@ int conn_send_cmd2(struct drbd_tconn *tconn, enum drbd_packet cmd, char *data,
 	return ok;
 }
 
-int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
+int drbd_send_sync_param(struct drbd_conf *mdev)
 {
 	struct p_rs_param_95 *p;
 	struct socket *sock;
@@ -793,7 +793,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
 
 	size = apv <= 87 ? sizeof(struct p_rs_param)
 		: apv == 88 ? sizeof(struct p_rs_param)
-			+ strlen(mdev->sync_conf.verify_alg) + 1
+			+ strlen(mdev->tconn->net_conf->verify_alg) + 1
 		: apv <= 94 ? sizeof(struct p_rs_param_89)
 		: /* apv >= 95 */ sizeof(struct p_rs_param_95);
 
@@ -812,16 +812,25 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
 		/* initialize verify_alg and csums_alg */
 		memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
 
-		p->rate = cpu_to_be32(sc->rate);
-		p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
-		p->c_delay_target = cpu_to_be32(sc->c_delay_target);
-		p->c_fill_target = cpu_to_be32(sc->c_fill_target);
-		p->c_max_rate = cpu_to_be32(sc->c_max_rate);
+		if (get_ldev(mdev)) {
+			p->rate = cpu_to_be32(mdev->ldev->dc.resync_rate);
+			p->c_plan_ahead = cpu_to_be32(mdev->ldev->dc.c_plan_ahead);
+			p->c_delay_target = cpu_to_be32(mdev->ldev->dc.c_delay_target);
+			p->c_fill_target = cpu_to_be32(mdev->ldev->dc.c_fill_target);
+			p->c_max_rate = cpu_to_be32(mdev->ldev->dc.c_max_rate);
+			put_ldev(mdev);
+		} else {
+			p->rate = cpu_to_be32(DRBD_RATE_DEF);
+			p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
+			p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
+			p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
+			p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
+		}
 
 		if (apv >= 88)
-			strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
+			strcpy(p->verify_alg, mdev->tconn->net_conf->verify_alg);
 		if (apv >= 89)
-			strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
+			strcpy(p->csums_alg, mdev->tconn->net_conf->csums_alg);
 
 		rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
 	} else
@@ -1043,7 +1052,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
 	int bits;
 
 	/* may we use this feature? */
-	if ((mdev->sync_conf.use_rle == 0) ||
+	if ((mdev->tconn->net_conf->use_rle == 0) ||
 		(mdev->tconn->agreed_pro_version < 90))
 			return 0;
 
@@ -1790,26 +1799,8 @@ static int drbd_release(struct gendisk *gd, fmode_t mode)
 
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
-	/* This way we get a compile error when sync_conf grows,
-	   and we forgot to initialize it here */
-	mdev->sync_conf = (struct syncer_conf) {
-		/* .rate = */		DRBD_RATE_DEF,
-		/* .after = */		DRBD_AFTER_DEF,
-		/* .al_extents = */	DRBD_AL_EXTENTS_DEF,
-		/* .verify_alg = */	{}, 0,
-		/* .cpu_mask = */	{}, 0,
-		/* .csums_alg = */	{}, 0,
-		/* .use_rle = */	0,
-		/* .on_no_data = */	DRBD_ON_NO_DATA_DEF,
-		/* .c_plan_ahead = */	DRBD_C_PLAN_AHEAD_DEF,
-		/* .c_delay_target = */	DRBD_C_DELAY_TARGET_DEF,
-		/* .c_fill_target = */	DRBD_C_FILL_TARGET_DEF,
-		/* .c_max_rate = */	DRBD_C_MAX_RATE_DEF,
-		/* .c_min_rate = */	DRBD_C_MIN_RATE_DEF
-	};
-
-	/* Have to use that way, because the layout differs between
-	   big endian and little endian */
+	/* Beware! The actual layout differs
+	 * between big endian and little endian */
 	mdev->state = (union drbd_state) {
 		{ .role = R_SECONDARY,
 		  .peer = R_UNKNOWN,
@@ -2286,6 +2277,11 @@ struct drbd_tconn *drbd_new_tconn(const char *name)
 	drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
 	drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
 
+	tconn->res_opts = (struct res_opts) {
+		{}, 0, /* cpu_mask */
+		DRBD_ON_NO_DATA_DEF, /* on_no_data */
+	};
+
 	mutex_lock(&drbd_cfg_mutex);
 	list_add_tail(&tconn->all_tconn, &drbd_tconns);
 	mutex_unlock(&drbd_cfg_mutex);
@@ -2559,10 +2555,10 @@ void drbd_free_sock(struct drbd_tconn *tconn)
 
 void drbd_free_resources(struct drbd_conf *mdev)
 {
-	crypto_free_hash(mdev->csums_tfm);
-	mdev->csums_tfm = NULL;
-	crypto_free_hash(mdev->verify_tfm);
-	mdev->verify_tfm = NULL;
+	crypto_free_hash(mdev->tconn->csums_tfm);
+	mdev->tconn->csums_tfm = NULL;
+	crypto_free_hash(mdev->tconn->verify_tfm);
+	mdev->tconn->verify_tfm = NULL;
 	crypto_free_hash(mdev->tconn->cram_hmac_tfm);
 	mdev->tconn->cram_hmac_tfm = NULL;
 	crypto_free_hash(mdev->tconn->integrity_w_tfm);
@@ -2589,7 +2585,7 @@ struct meta_data_on_disk {
 	u32 md_size_sect;
 	u32 al_offset;         /* offset to this block */
 	u32 al_nr_extents;     /* important for restoring the AL */
-	      /* `-- act_log->nr_elements <-- sync_conf.al_extents */
+	      /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
 	u32 bm_offset;         /* offset to the bitmap, from here */
 	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
 	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
@@ -2715,7 +2711,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
 	bdev->md.flags = be32_to_cpu(buffer->flags);
-	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
+	bdev->dc.al_extents = be32_to_cpu(buffer->al_nr_extents);
 	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
 
 	spin_lock_irq(&mdev->tconn->req_lock);
@@ -2727,8 +2723,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	}
 	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	if (mdev->sync_conf.al_extents < 7)
-		mdev->sync_conf.al_extents = 127;
+	if (bdev->dc.al_extents < 7)
+		bdev->dc.al_extents = 127;
 
  err:
 	mutex_unlock(&mdev->md_io_mutex);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ac0a175e778c..18cd2ed4e8ca 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -53,8 +53,10 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
 
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
@@ -66,7 +68,7 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
-int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
 /* .dumpit */
@@ -170,7 +172,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 		struct nlattr *nla;
 		/* parse and validate only */
-		err = drbd_cfg_context_from_attrs(NULL, info->attrs);
+		err = drbd_cfg_context_from_attrs(NULL, info);
 		if (err)
 			goto fail;
 
@@ -616,6 +618,7 @@ static const char *from_attrs_err_to_txt(int err)
 {
 	return	err == -ENOMSG ? "required attribute missing" :
 		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
+		err == -EEXIST ? "can not change invariant setting" :
 		"invalid attribute value";
 }
 
@@ -633,7 +636,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 
 	memset(&parms, 0, sizeof(parms));
 	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
-		err = set_role_parms_from_attrs(&parms, info->attrs);
+		err = set_role_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -898,24 +901,24 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int ass
  * failed, and 0 on success. You should call drbd_md_sync() after you called
  * this function.
  */
-static int drbd_check_al_size(struct drbd_conf *mdev)
+static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
 {
 	struct lru_cache *n, *t;
 	struct lc_element *e;
 	unsigned int in_use;
 	int i;
 
-	if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN))
-		mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN;
+	if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN))
+		dc->al_extents = DRBD_AL_EXTENTS_MIN;
 
 	if (mdev->act_log &&
-	    mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
+	    mdev->act_log->nr_elements == dc->al_extents)
 		return 0;
 
 	in_use = 0;
 	t = mdev->act_log;
 	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
-		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
+		dc->al_extents, sizeof(struct lc_element), 0);
 
 	if (n == NULL) {
 		dev_err(DEV, "Cannot allocate act_log lru!\n");
@@ -1069,6 +1072,114 @@ static void drbd_suspend_al(struct drbd_conf *mdev)
 		dev_info(DEV, "Suspended AL updates\n");
 }
 
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct drbd_conf *mdev;
+	struct disk_conf *ndc; /* new disk conf */
+	int err, fifo_size;
+	int *rs_plan_s = NULL;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
+
+	/* we also need a disk
+	 * to change the options on */
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+/* FIXME freeze IO, cluster wide.
+ *
+ * We should make sure no-one uses
+ * some half-updated struct when we
+ * assign it later. */
+
+	ndc = kmalloc(sizeof(*ndc), GFP_KERNEL);
+	if (!ndc) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc));
+	err = disk_conf_from_attrs_for_change(ndc, info);
+	if (err) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+	}
+
+	if (!expect(ndc->resync_rate >= 1))
+		ndc->resync_rate = 1;
+
+	/* clip to allowed range */
+	if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN))
+		ndc->al_extents = DRBD_AL_EXTENTS_MIN;
+	if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX))
+		ndc->al_extents = DRBD_AL_EXTENTS_MAX;
+
+	/* most sanity checks done, try to assign the new sync-after
+	 * dependency.  need to hold the global lock in there,
+	 * to avoid a race in the dependency loop check. */
+	retcode = drbd_alter_sa(mdev, ndc->resync_after);
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+	if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
+		rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
+		if (!rs_plan_s) {
+			dev_err(DEV, "kmalloc of fifo_buffer failed");
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
+	if (fifo_size != mdev->rs_plan_s.size) {
+		kfree(mdev->rs_plan_s.values);
+		mdev->rs_plan_s.values = rs_plan_s;
+		mdev->rs_plan_s.size   = fifo_size;
+		mdev->rs_planed = 0;
+		rs_plan_s = NULL;
+	}
+
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+	drbd_al_shrink(mdev);
+	err = drbd_check_al_size(mdev, ndc);
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+
+	if (err) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+
+	/* FIXME
+	 * To avoid someone looking at a half-updated struct, we probably
+	 * should have a rw-semaphor on net_conf and disk_conf.
+	 */
+	mdev->ldev->dc = *ndc;
+
+	drbd_md_sync(mdev);
+
+
+	if (mdev->state.conn >= C_CONNECTED)
+		drbd_send_sync_param(mdev);
+
+ fail:
+	put_ldev(mdev);
+	kfree(ndc);
+	kfree(rs_plan_s);
+ out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_conf *mdev;
@@ -1111,12 +1222,29 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	nbc->dc.disk_size     = DRBD_DISK_SIZE_SECT_DEF;
-	nbc->dc.on_io_error   = DRBD_ON_IO_ERROR_DEF;
-	nbc->dc.fencing       = DRBD_FENCING_DEF;
-	nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
-
-	err = disk_conf_from_attrs(&nbc->dc, info->attrs);
+	nbc->dc = (struct disk_conf) {
+		{}, 0, /* backing_dev */
+		{}, 0, /* meta_dev */
+		0, /* meta_dev_idx */
+		DRBD_DISK_SIZE_SECT_DEF, /* disk_size */
+		DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */
+		DRBD_ON_IO_ERROR_DEF, /* on_io_error */
+		DRBD_FENCING_DEF, /* fencing */
+		DRBD_RATE_DEF, /* resync_rate */
+		DRBD_AFTER_DEF, /* resync_after */
+		DRBD_AL_EXTENTS_DEF, /* al_extents */
+		DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */
+		DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */
+		DRBD_C_FILL_TARGET_DEF, /* c_fill_target */
+		DRBD_C_MAX_RATE_DEF, /* c_max_rate */
+		DRBD_C_MIN_RATE_DEF, /* c_min_rate */
+		0, /* no_disk_barrier */
+		0, /* no_disk_flush */
+		0, /* no_disk_drain */
+		0, /* no_md_flush */
+	};
+
+	err = disk_conf_from_attrs(&nbc->dc, info);
 	if (err) {
 		retcode = ERR_MANDATORY_TAG;
 		drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -1267,7 +1395,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Since we are diskless, fix the activity log first... */
-	if (drbd_check_al_size(mdev)) {
+	if (drbd_check_al_size(mdev, &nbc->dc)) {
 		retcode = ERR_NOMEM;
 		goto force_diskless_dec;
 	}
@@ -1498,6 +1626,158 @@ out:
 	return 0;
 }
 
+static bool conn_resync_running(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_SYNC_SOURCE ||
+		    mdev->state.conn == C_SYNC_TARGET ||
+		    mdev->state.conn == C_PAUSED_SYNC_S ||
+		    mdev->state.conn == C_PAUSED_SYNC_T)
+			return true;
+	}
+	return false;
+}
+
+static bool conn_ov_running(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T)
+			return true;
+	}
+	return false;
+}
+
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct drbd_tconn *tconn;
+	struct net_conf *new_conf = NULL;
+	int err;
+	int ovr; /* online verify running */
+	int rsr; /* re-sync running */
+	struct crypto_hash *verify_tfm = NULL;
+	struct crypto_hash *csums_tfm = NULL;
+
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	tconn = adm_ctx.tconn;
+
+	new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+	if (!new_conf) {
+		retcode = ERR_NOMEM;
+		goto out;
+	}
+
+	/* we also need a net config
+	 * to change the options on */
+	if (!get_net_conf(tconn)) {
+		drbd_msg_put_info("net conf missing, try connect");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+
+	conn_reconfig_start(tconn);
+
+	memcpy(new_conf, tconn->net_conf, sizeof(*new_conf));
+	err = net_conf_from_attrs_for_change(new_conf, info);
+	if (err) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto fail;
+	}
+
+	/* re-sync running */
+	rsr = conn_resync_running(tconn);
+	if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) {
+		retcode = ERR_CSUMS_RESYNC_RUNNING;
+		goto fail;
+	}
+
+	if (!rsr && new_conf->csums_alg[0]) {
+		csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(csums_tfm)) {
+			csums_tfm = NULL;
+			retcode = ERR_CSUMS_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
+			retcode = ERR_CSUMS_ALG_ND;
+			goto fail;
+		}
+	}
+
+	/* online verify running */
+	ovr = conn_ov_running(tconn);
+	if (ovr) {
+		if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) {
+			retcode = ERR_VERIFY_RUNNING;
+			goto fail;
+		}
+	}
+
+	if (!ovr && new_conf->verify_alg[0]) {
+		verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC);
+		if (IS_ERR(verify_tfm)) {
+			verify_tfm = NULL;
+			retcode = ERR_VERIFY_ALG;
+			goto fail;
+		}
+
+		if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
+			retcode = ERR_VERIFY_ALG_ND;
+			goto fail;
+		}
+	}
+
+
+	/* For now, use struct assignment, not pointer assignment.
+	 * We don't have any means to determine who might still
+	 * keep a local alias into the struct,
+	 * so we cannot just free it and hope for the best :(
+	 * FIXME
+	 * To avoid someone looking at a half-updated struct, we probably
+	 * should have a rw-semaphor on net_conf and disk_conf.
+	 */
+	*tconn->net_conf = *new_conf;
+
+	if (!rsr) {
+		crypto_free_hash(tconn->csums_tfm);
+		tconn->csums_tfm = csums_tfm;
+		csums_tfm = NULL;
+	}
+	if (!ovr) {
+		crypto_free_hash(tconn->verify_tfm);
+		tconn->verify_tfm = verify_tfm;
+		verify_tfm = NULL;
+	}
+
+	if (tconn->cstate >= C_WF_REPORT_PARAMS)
+		drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn)));
+
+ fail:
+	crypto_free_hash(csums_tfm);
+	crypto_free_hash(verify_tfm);
+	kfree(new_conf);
+	put_net_conf(tconn);
+	conn_reconfig_done(tconn);
+ out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 {
 	char hmac_name[CRYPTO_MAX_ALG_NAME];
@@ -1531,33 +1811,47 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* allocation not in the IO path, cqueue thread context */
-	new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+	new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
 	if (!new_conf) {
 		retcode = ERR_NOMEM;
 		goto fail;
 	}
 
-	new_conf->timeout	   = DRBD_TIMEOUT_DEF;
-	new_conf->try_connect_int  = DRBD_CONNECT_INT_DEF;
-	new_conf->ping_int	   = DRBD_PING_INT_DEF;
-	new_conf->max_epoch_size   = DRBD_MAX_EPOCH_SIZE_DEF;
-	new_conf->max_buffers	   = DRBD_MAX_BUFFERS_DEF;
-	new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
-	new_conf->sndbuf_size	   = DRBD_SNDBUF_SIZE_DEF;
-	new_conf->rcvbuf_size	   = DRBD_RCVBUF_SIZE_DEF;
-	new_conf->ko_count	   = DRBD_KO_COUNT_DEF;
-	new_conf->after_sb_0p	   = DRBD_AFTER_SB_0P_DEF;
-	new_conf->after_sb_1p	   = DRBD_AFTER_SB_1P_DEF;
-	new_conf->after_sb_2p	   = DRBD_AFTER_SB_2P_DEF;
-	new_conf->want_lose	   = 0;
-	new_conf->two_primaries    = 0;
-	new_conf->wire_protocol    = DRBD_PROT_C;
-	new_conf->ping_timeo	   = DRBD_PING_TIMEO_DEF;
-	new_conf->rr_conflict	   = DRBD_RR_CONFLICT_DEF;
-	new_conf->on_congestion    = DRBD_ON_CONGESTION_DEF;
-	new_conf->cong_extents     = DRBD_CONG_EXTENTS_DEF;
-
-	err = net_conf_from_attrs(new_conf, info->attrs);
+	*new_conf = (struct net_conf) {
+		{}, 0, /* my_addr */
+		{}, 0, /* peer_addr */
+		{}, 0, /* shared_secret */
+		{}, 0, /* cram_hmac_alg */
+		{}, 0, /* integrity_alg */
+		{}, 0, /* verify_alg */
+		{}, 0, /* csums_alg */
+		DRBD_PROTOCOL_DEF, /* wire_protocol */
+		DRBD_CONNECT_INT_DEF, /* try_connect_int */
+		DRBD_TIMEOUT_DEF, /* timeout */
+		DRBD_PING_INT_DEF, /* ping_int */
+		DRBD_PING_TIMEO_DEF, /* ping_timeo */
+		DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */
+		DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */
+		DRBD_KO_COUNT_DEF, /* ko_count */
+		DRBD_MAX_BUFFERS_DEF, /* max_buffers */
+		DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */
+		DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */
+		DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */
+		DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */
+		DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */
+		DRBD_RR_CONFLICT_DEF, /* rr_conflict */
+		DRBD_ON_CONGESTION_DEF, /* on_congestion */
+		DRBD_CONG_FILL_DEF, /* cong_fill */
+		DRBD_CONG_EXTENTS_DEF, /* cong_extents */
+		0, /* two_primaries */
+		0, /* want_lose */
+		0, /* no_cork */
+		0, /* always_asbp */
+		0, /* dry_run */
+		0, /* use_rle */
+	};
+
+	err = net_conf_from_attrs(new_conf, info);
 	if (err) {
 		retcode = ERR_MANDATORY_TAG;
 		drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -1789,7 +2083,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 	tconn = adm_ctx.tconn;
 	memset(&parms, 0, sizeof(parms));
 	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
-		err = disconnect_parms_from_attrs(&parms, info->attrs);
+		err = disconnect_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -1848,7 +2142,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 
 	memset(&rs, 0, sizeof(struct resize_parms));
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
-		err = resize_parms_from_attrs(&rs, info->attrs);
+		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -1904,26 +2198,21 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
-int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info)
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 {
-	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
-	int err;
-	int ovr; /* online verify running */
-	int rsr; /* re-sync running */
-	struct crypto_hash *verify_tfm = NULL;
-	struct crypto_hash *csums_tfm = NULL;
-	struct syncer_conf sc;
 	cpumask_var_t new_cpu_mask;
+	struct drbd_tconn *tconn;
 	int *rs_plan_s = NULL;
-	int fifo_size;
+	struct res_opts sc;
+	int err;
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
 	if (!adm_ctx.reply_skb)
 		return retcode;
 	if (retcode != NO_ERROR)
 		goto fail;
-	mdev = adm_ctx.mdev;
+	tconn = adm_ctx.tconn;
 
 	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
 		retcode = ERR_NOMEM;
@@ -1933,172 +2222,43 @@ int drbd_adm_syncer(struct sk_buff *skb, struct genl_info *info)
 
 	if (((struct drbd_genlmsghdr*)info->userhdr)->flags
 			& DRBD_GENL_F_SET_DEFAULTS) {
-		memset(&sc, 0, sizeof(struct syncer_conf));
-		sc.rate       = DRBD_RATE_DEF;
-		sc.after      = DRBD_AFTER_DEF;
-		sc.al_extents = DRBD_AL_EXTENTS_DEF;
+		memset(&sc, 0, sizeof(struct res_opts));
 		sc.on_no_data  = DRBD_ON_NO_DATA_DEF;
-		sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
-		sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
-		sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
-		sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
-		sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
 	} else
-		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
+		sc = tconn->res_opts;
 
-	err = syncer_conf_from_attrs(&sc, info->attrs);
+	err = res_opts_from_attrs(&sc, info);
 	if (err) {
 		retcode = ERR_MANDATORY_TAG;
 		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
-	/* re-sync running */
-	rsr = (	mdev->state.conn == C_SYNC_SOURCE ||
-		mdev->state.conn == C_SYNC_TARGET ||
-		mdev->state.conn == C_PAUSED_SYNC_S ||
-		mdev->state.conn == C_PAUSED_SYNC_T );
-
-	if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
-		retcode = ERR_CSUMS_RESYNC_RUNNING;
-		goto fail;
-	}
-
-	if (!rsr && sc.csums_alg[0]) {
-		csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(csums_tfm)) {
-			csums_tfm = NULL;
-			retcode = ERR_CSUMS_ALG;
-			goto fail;
-		}
-
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
-			retcode = ERR_CSUMS_ALG_ND;
-			goto fail;
-		}
-	}
-
-	/* online verify running */
-	ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
-
-	if (ovr) {
-		if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
-			retcode = ERR_VERIFY_RUNNING;
-			goto fail;
-		}
-	}
-
-	if (!ovr && sc.verify_alg[0]) {
-		verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(verify_tfm)) {
-			verify_tfm = NULL;
-			retcode = ERR_VERIFY_ALG;
-			goto fail;
-		}
-
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
-			retcode = ERR_VERIFY_ALG_ND;
-			goto fail;
-		}
-	}
-
 	/* silently ignore cpu mask on UP kernel */
 	if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
 		err = __bitmap_parse(sc.cpu_mask, 32, 0,
 				cpumask_bits(new_cpu_mask), nr_cpu_ids);
 		if (err) {
-			dev_warn(DEV, "__bitmap_parse() failed with %d\n", err);
+			conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
 			retcode = ERR_CPU_MASK_PARSE;
 			goto fail;
 		}
 	}
 
-	if (!expect(sc.rate >= 1))
-		sc.rate = 1;
-
-	/* clip to allowed range */
-	if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN))
-		sc.al_extents = DRBD_AL_EXTENTS_MIN;
-	if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX))
-		sc.al_extents = DRBD_AL_EXTENTS_MAX;
-
-	/* most sanity checks done, try to assign the new sync-after
-	 * dependency.  need to hold the global lock in there,
-	 * to avoid a race in the dependency loop check. */
-	retcode = drbd_alter_sa(mdev, sc.after);
-	if (retcode != NO_ERROR)
-		goto fail;
-
-	fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
-	if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
-		rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
-		if (!rs_plan_s) {
-			dev_err(DEV, "kmalloc of fifo_buffer failed");
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
-
-	/* ok, assign the rest of it as well.
-	 * lock against receive_SyncParam() */
-	spin_lock(&mdev->peer_seq_lock);
-	mdev->sync_conf = sc;
-
-	if (!rsr) {
-		crypto_free_hash(mdev->csums_tfm);
-		mdev->csums_tfm = csums_tfm;
-		csums_tfm = NULL;
-	}
-
-	if (!ovr) {
-		crypto_free_hash(mdev->verify_tfm);
-		mdev->verify_tfm = verify_tfm;
-		verify_tfm = NULL;
-	}
-
-	if (fifo_size != mdev->rs_plan_s.size) {
-		kfree(mdev->rs_plan_s.values);
-		mdev->rs_plan_s.values = rs_plan_s;
-		mdev->rs_plan_s.size   = fifo_size;
-		mdev->rs_planed = 0;
-		rs_plan_s = NULL;
-	}
-
-	spin_unlock(&mdev->peer_seq_lock);
 
-	if (get_ldev(mdev)) {
-		wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-		drbd_al_shrink(mdev);
-		err = drbd_check_al_size(mdev);
-		lc_unlock(mdev->act_log);
-		wake_up(&mdev->al_wait);
+	tconn->res_opts = sc;
 
-		put_ldev(mdev);
-		drbd_md_sync(mdev);
-
-		if (err) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
-
-	if (mdev->state.conn >= C_CONNECTED)
-		drbd_send_sync_param(mdev, &sc);
-
-	if (!cpumask_equal(mdev->tconn->cpu_mask, new_cpu_mask)) {
-		cpumask_copy(mdev->tconn->cpu_mask, new_cpu_mask);
-		drbd_calc_cpu_mask(mdev->tconn);
-		mdev->tconn->receiver.reset_cpu_mask = 1;
-		mdev->tconn->asender.reset_cpu_mask = 1;
-		mdev->tconn->worker.reset_cpu_mask = 1;
+	if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
+		cpumask_copy(tconn->cpu_mask, new_cpu_mask);
+		drbd_calc_cpu_mask(tconn);
+		tconn->receiver.reset_cpu_mask = 1;
+		tconn->asender.reset_cpu_mask = 1;
+		tconn->worker.reset_cpu_mask = 1;
 	}
 
-	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 fail:
 	kfree(rs_plan_s);
 	free_cpumask_var(new_cpu_mask);
-	crypto_free_hash(csums_tfm);
-	crypto_free_hash(verify_tfm);
 
 	drbd_adm_finish(info, retcode);
 	return 0;
@@ -2307,6 +2467,9 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 	if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr))
 		goto nla_put_failure;
 
+	if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
+		goto nla_put_failure;
+
 	if (got_ldev)
 		if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive))
 			goto nla_put_failure;
@@ -2314,9 +2477,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive))
 			goto nla_put_failure;
 
-	if (syncer_conf_to_skb(skb, &mdev->sync_conf, exclude_sensitive))
-			goto nla_put_failure;
-
 	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
 	if (!nla)
 		goto nla_put_failure;
@@ -2532,7 +2692,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 		/* resume from last known position, if possible */
 		struct start_ov_parms parms =
 			{ .ov_start_sector = mdev->ov_start_sector };
-		int err = start_ov_parms_from_attrs(&parms, info->attrs);
+		int err = start_ov_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
@@ -2568,7 +2728,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 	mdev = adm_ctx.mdev;
 	memset(&args, 0, sizeof(args));
 	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
-		err = new_c_uuid_parms_from_attrs(&args, info->attrs);
+		err = new_c_uuid_parms_from_attrs(&args, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 50c52712715e..c8c826b2444f 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -763,7 +763,7 @@ int drbd_connected(int vnr, void *p, void *data)
 		&mdev->tconn->cstate_mutex :
 		&mdev->own_state_mutex;
 
-	ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
+	ok &= drbd_send_sync_param(mdev);
 	ok &= drbd_send_sizes(mdev, 0, 0);
 	ok &= drbd_send_uuids(mdev);
 	ok &= drbd_send_state(mdev);
@@ -2085,7 +2085,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
 	int throttle = 0;
 
 	/* feature disabled? */
-	if (mdev->sync_conf.c_min_rate == 0)
+	if (mdev->ldev->dc.c_min_rate == 0)
 		return 0;
 
 	spin_lock_irq(&mdev->al_lock);
@@ -2125,7 +2125,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
 		db = mdev->rs_mark_left[i] - rs_left;
 		dbdt = Bit2KB(db/dt);
 
-		if (dbdt > mdev->sync_conf.c_min_rate)
+		if (dbdt > mdev->ldev->dc.c_min_rate)
 			throttle = 1;
 	}
 	return throttle;
@@ -3001,7 +3001,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
 	if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
 		return false;
 
-	mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
+	if (get_ldev(mdev)) {
+		mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
+		put_ldev(mdev);
+	}
 
 	if (apv >= 88) {
 		if (apv == 88) {
@@ -3029,10 +3032,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
 		}
 
-		if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
+		if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) {
 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 				dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
-				    mdev->sync_conf.verify_alg, p->verify_alg);
+				    mdev->tconn->net_conf->verify_alg, p->verify_alg);
 				goto disconnect;
 			}
 			verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -3043,10 +3046,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
 			}
 		}
 
-		if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
+		if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) {
 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 				dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
-				    mdev->sync_conf.csums_alg, p->csums_alg);
+				    mdev->tconn->net_conf->csums_alg, p->csums_alg);
 				goto disconnect;
 			}
 			csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -3057,37 +3060,39 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
 			}
 		}
 
-		if (apv > 94) {
-			mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
-			mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
-			mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
-			mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
-			mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
+		if (apv > 94 && get_ldev(mdev)) {
+			mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
+			mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
+			mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target);
+			mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target);
+			mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate);
 
-			fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+			fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
 			if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
 				rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
 				if (!rs_plan_s) {
 					dev_err(DEV, "kmalloc of fifo_buffer failed");
+					put_ldev(mdev);
 					goto disconnect;
 				}
 			}
+			put_ldev(mdev);
 		}
 
 		spin_lock(&mdev->peer_seq_lock);
 		/* lock against drbd_nl_syncer_conf() */
 		if (verify_tfm) {
-			strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
-			mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
-			crypto_free_hash(mdev->verify_tfm);
-			mdev->verify_tfm = verify_tfm;
+			strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg);
+			mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
+			crypto_free_hash(mdev->tconn->verify_tfm);
+			mdev->tconn->verify_tfm = verify_tfm;
 			dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
 		}
 		if (csums_tfm) {
-			strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
-			mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
-			crypto_free_hash(mdev->csums_tfm);
-			mdev->csums_tfm = csums_tfm;
+			strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg);
+			mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
+			crypto_free_hash(mdev->tconn->csums_tfm);
+			mdev->tconn->csums_tfm = csums_tfm;
 			dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
 		}
 		if (fifo_size != mdev->rs_plan_s.size) {
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 11685658659e..77fad527fb1d 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -402,7 +402,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 		rv = SS_CONNECTED_OUTDATES;
 
 	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-		 (mdev->sync_conf.verify_alg[0] == 0))
+		 (mdev->tconn->net_conf->verify_alg[0] == 0))
 		rv = SS_NO_VERIFY_ALG;
 
 	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
@@ -668,7 +668,7 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
 		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
 
-	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
+	if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO &&
 	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
 		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
 
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index a730520e468e..005876b32f74 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -310,12 +310,12 @@ static int w_e_send_csum(struct drbd_work *w, int cancel)
 	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
 		goto out;
 
-	digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+	digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
 	digest = kmalloc(digest_size, GFP_NOIO);
 	if (digest) {
 		sector_t sector = peer_req->i.sector;
 		unsigned int size = peer_req->i.size;
-		drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest);
+		drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
 		/* Free peer_req and pages before send.
 		 * In case we block on congestion, we could otherwise run into
 		 * some distributed deadlock, if the other side blocks on
@@ -451,13 +451,13 @@ static int drbd_rs_controller(struct drbd_conf *mdev)
 
 	spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
 
-	steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
+	steps = mdev->rs_plan_s.size; /* (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
 
 	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
-		want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
+		want = ((mdev->ldev->dc.resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
 	} else { /* normal path */
-		want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
-			sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
+		want = mdev->ldev->dc.c_fill_target ? mdev->ldev->dc.c_fill_target :
+			sect_in * mdev->ldev->dc.c_delay_target * HZ / (SLEEP_TIME * 10);
 	}
 
 	correction = want - mdev->rs_in_flight - mdev->rs_planed;
@@ -476,7 +476,7 @@ static int drbd_rs_controller(struct drbd_conf *mdev)
 	if (req_sect < 0)
 		req_sect = 0;
 
-	max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
+	max_sect = (mdev->ldev->dc.c_max_rate * 2 * SLEEP_TIME) / HZ;
 	if (req_sect > max_sect)
 		req_sect = max_sect;
 
@@ -492,11 +492,11 @@ static int drbd_rs_controller(struct drbd_conf *mdev)
 static int drbd_rs_number_requests(struct drbd_conf *mdev)
 {
 	int number;
-	if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+	if (mdev->rs_plan_s.size) { /* mdev->ldev->dc.c_plan_ahead */
 		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
 		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
 	} else {
-		mdev->c_sync_rate = mdev->sync_conf.rate;
+		mdev->c_sync_rate = mdev->ldev->dc.resync_rate;
 		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
 	}
 
@@ -619,7 +619,7 @@ next_sector:
 		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity)
 			size = (capacity-sector)<<9;
-		if (mdev->tconn->agreed_pro_version >= 89 && mdev->csums_tfm) {
+		if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
 			switch (read_for_csum(mdev, sector, size)) {
 			case -EIO: /* Disk failure */
 				put_ldev(mdev);
@@ -810,7 +810,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 			khelper_cmd = "after-resync-target";
 
-		if (mdev->csums_tfm && mdev->rs_total) {
+		if (mdev->tconn->csums_tfm && mdev->rs_total) {
 			const unsigned long s = mdev->rs_same_csum;
 			const unsigned long t = mdev->rs_total;
 			const int ratio =
@@ -1019,13 +1019,13 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
 		/* quick hack to try to avoid a race against reconfiguration.
 		 * a real fix would be much more involved,
 		 * introducing more locking mechanisms */
-		if (mdev->csums_tfm) {
-			digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+		if (mdev->tconn->csums_tfm) {
+			digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
 			D_ASSERT(digest_size == di->digest_size);
 			digest = kmalloc(digest_size, GFP_NOIO);
 		}
 		if (digest) {
-			drbd_csum_ee(mdev, mdev->csums_tfm, peer_req, digest);
+			drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
 			eq = !memcmp(digest, di->digest, digest_size);
 			kfree(digest);
 		}
@@ -1069,7 +1069,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel)
 	if (unlikely(cancel))
 		goto out;
 
-	digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+	digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
 	digest = kmalloc(digest_size, GFP_NOIO);
 	if (!digest) {
 		ok = 0;	/* terminate the connection in case the allocation failed */
@@ -1077,7 +1077,7 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel)
 	}
 
 	if (likely(!(peer_req->flags & EE_WAS_ERROR)))
-		drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest);
+		drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
 	else
 		memset(digest, 0, digest_size);
 
@@ -1141,10 +1141,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
 	di = peer_req->digest;
 
 	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
-		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+		digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
 		digest = kmalloc(digest_size, GFP_NOIO);
 		if (digest) {
-			drbd_csum_ee(mdev, mdev->verify_tfm, peer_req, digest);
+			drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
 
 			D_ASSERT(digest_size == di->digest_size);
 			eq = !memcmp(digest, di->digest, digest_size);
@@ -1319,9 +1319,9 @@ static int _drbd_may_sync_now(struct drbd_conf *mdev)
 	struct drbd_conf *odev = mdev;
 
 	while (1) {
-		if (odev->sync_conf.after == -1)
+		if (odev->ldev->dc.resync_after == -1)
 			return 1;
-		odev = minor_to_mdev(odev->sync_conf.after);
+		odev = minor_to_mdev(odev->ldev->dc.resync_after);
 		if (!expect(odev))
 			return 1;
 		if ((odev->state.conn >= C_SYNC_SOURCE &&
@@ -1408,11 +1408,11 @@ static int sync_after_error(struct drbd_conf *mdev, int o_minor)
 			return ERR_SYNC_AFTER_CYCLE;
 
 		/* dependency chain ends here, no cycles. */
-		if (odev->sync_conf.after == -1)
+		if (odev->ldev->dc.resync_after == -1)
 			return NO_ERROR;
 
 		/* follow the dependency chain */
-		odev = minor_to_mdev(odev->sync_conf.after);
+		odev = minor_to_mdev(odev->ldev->dc.resync_after);
 	}
 }
 
@@ -1424,7 +1424,7 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
 	write_lock_irq(&global_state_lock);
 	retcode = sync_after_error(mdev, na);
 	if (retcode == NO_ERROR) {
-		mdev->sync_conf.after = na;
+		mdev->ldev->dc.resync_after = na;
 		do {
 			changes  = _drbd_pause_after(mdev);
 			changes |= _drbd_resume_next(mdev);
@@ -1637,7 +1637,7 @@ int drbd_worker(struct drbd_thread *thi)
 	struct drbd_work *w = NULL;
 	struct drbd_conf *mdev;
 	LIST_HEAD(work_list);
-	int minor, intr = 0;
+	int vnr, intr = 0;
 
 	while (get_t_state(thi) == RUNNING) {
 		drbd_thread_current_set_cpu(thi);
@@ -1722,7 +1722,7 @@ int drbd_worker(struct drbd_thread *thi)
 	spin_unlock_irq(&tconn->data.work.q_lock);
 
 	drbd_thread_stop(&tconn->receiver);
-	idr_for_each_entry(&tconn->volumes, mdev, minor) {
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 		D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
 		/* _drbd_set_state only uses stop_nowait.
 		 * wait here for the exiting receiver. */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index a07d69279b1a..938e8560a833 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -102,66 +102,73 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
-	__u64_field(1, GENLA_F_MANDATORY,	disk_size)
-	__str_field(2, GENLA_F_REQUIRED,	backing_dev,	128)
-	__str_field(3, GENLA_F_REQUIRED,	meta_dev,	128)
-	__u32_field(4, GENLA_F_REQUIRED,	meta_dev_idx)
-	__u32_field(5, GENLA_F_MANDATORY,	max_bio_bvecs)
+	__str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	backing_dev,	128)
+	__str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev,	128)
+	__u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev_idx)
+
+	/* use the resize command to try and change the disk_size */
+	__u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	disk_size)
+	/* we could change the max_bio_bvecs,
+	 * but it won't propagate through the stack */
+	__u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	max_bio_bvecs)
+
 	__u32_field(6, GENLA_F_MANDATORY,	on_io_error)
 	__u32_field(7, GENLA_F_MANDATORY,	fencing)
-	__flg_field(8, GENLA_F_MANDATORY,	no_disk_barrier)
-	__flg_field(9, GENLA_F_MANDATORY,	no_disk_flush)
-	__flg_field(10, GENLA_F_MANDATORY,	no_disk_drain)
-	__flg_field(11, GENLA_F_MANDATORY,	no_md_flush)
-	__flg_field(12, GENLA_F_MANDATORY,	use_bmbv)
+
+	__u32_field(8,	GENLA_F_MANDATORY,	resync_rate)
+	__u32_field(9,	GENLA_F_MANDATORY,	resync_after)
+	__u32_field(10,	GENLA_F_MANDATORY,	al_extents)
+	__u32_field(11,	GENLA_F_MANDATORY,	c_plan_ahead)
+	__u32_field(12,	GENLA_F_MANDATORY,	c_delay_target)
+	__u32_field(13,	GENLA_F_MANDATORY,	c_fill_target)
+	__u32_field(14,	GENLA_F_MANDATORY,	c_max_rate)
+	__u32_field(15,	GENLA_F_MANDATORY,	c_min_rate)
+
+	__flg_field(16, GENLA_F_MANDATORY,	no_disk_barrier)
+	__flg_field(17, GENLA_F_MANDATORY,	no_disk_flush)
+	__flg_field(18, GENLA_F_MANDATORY,	no_disk_drain)
+	__flg_field(19, GENLA_F_MANDATORY,	no_md_flush)
+
 )
 
-GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf,
-	__u32_field(1,	GENLA_F_MANDATORY,	rate)
-	__u32_field(2,	GENLA_F_MANDATORY,	after)
-	__u32_field(3,	GENLA_F_MANDATORY,	al_extents)
-	__str_field(4,	GENLA_F_MANDATORY,	cpu_mask,       32)
-	__str_field(5,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field(6,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__flg_field(7,	GENLA_F_MANDATORY,	use_rle)
-	__u32_field(8,	GENLA_F_MANDATORY,	on_no_data)
-	__u32_field(9,	GENLA_F_MANDATORY,	c_plan_ahead)
-	__u32_field(10,	GENLA_F_MANDATORY,	c_delay_target)
-	__u32_field(11,	GENLA_F_MANDATORY,	c_fill_target)
-	__u32_field(12,	GENLA_F_MANDATORY,	c_max_rate)
-	__u32_field(13,	GENLA_F_MANDATORY,	c_min_rate)
+GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
+	__str_field(1,	GENLA_F_MANDATORY,	cpu_mask,       32)
+	__u32_field(2,	GENLA_F_MANDATORY,	on_no_data)
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
-	__str_field(1,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
+	__bin_field(1,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	my_addr,	128)
+	__bin_field(2,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	peer_addr,	128)
+	__str_field(3,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field(2,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field(3,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field(4,	GENLA_F_REQUIRED,	my_addr,	128)
-	__str_field(5,	GENLA_F_REQUIRED,	peer_addr,	128)
-	__u32_field(6,	GENLA_F_REQUIRED,	wire_protocol)
-	__u32_field(7,	GENLA_F_MANDATORY,	try_connect_int)
-	__u32_field(8,	GENLA_F_MANDATORY,	timeout)
-	__u32_field(9,	GENLA_F_MANDATORY,	ping_int)
-	__u32_field(10,	GENLA_F_MANDATORY,	ping_timeo)
-	__u32_field(11,	GENLA_F_MANDATORY,	sndbuf_size)
-	__u32_field(12,	GENLA_F_MANDATORY,	rcvbuf_size)
-	__u32_field(13,	GENLA_F_MANDATORY,	ko_count)
-	__u32_field(14,	GENLA_F_MANDATORY,	max_buffers)
-	__u32_field(15,	GENLA_F_MANDATORY,	max_epoch_size)
-	__u32_field(16,	GENLA_F_MANDATORY,	unplug_watermark)
-	__u32_field(17,	GENLA_F_MANDATORY,	after_sb_0p)
-	__u32_field(18,	GENLA_F_MANDATORY,	after_sb_1p)
-	__u32_field(19,	GENLA_F_MANDATORY,	after_sb_2p)
-	__u32_field(20,	GENLA_F_MANDATORY,	rr_conflict)
-	__u32_field(21,	GENLA_F_MANDATORY,	on_congestion)
-	__u32_field(22,	GENLA_F_MANDATORY,	cong_fill)
-	__u32_field(23,	GENLA_F_MANDATORY,	cong_extents)
-	__flg_field(24, GENLA_F_MANDATORY,	two_primaries)
-	__flg_field(25, GENLA_F_MANDATORY,	want_lose)
-	__flg_field(26, GENLA_F_MANDATORY,	no_cork)
-	__flg_field(27, GENLA_F_MANDATORY,	always_asbp)
-	__flg_field(28, GENLA_F_MANDATORY,	dry_run)
+	__str_field(4,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field(5,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field(8,	GENLA_F_MANDATORY,	wire_protocol)
+	__u32_field(9,	GENLA_F_MANDATORY,	try_connect_int)
+	__u32_field(10,	GENLA_F_MANDATORY,	timeout)
+	__u32_field(11,	GENLA_F_MANDATORY,	ping_int)
+	__u32_field(12,	GENLA_F_MANDATORY,	ping_timeo)
+	__u32_field(13,	GENLA_F_MANDATORY,	sndbuf_size)
+	__u32_field(14,	GENLA_F_MANDATORY,	rcvbuf_size)
+	__u32_field(15,	GENLA_F_MANDATORY,	ko_count)
+	__u32_field(16,	GENLA_F_MANDATORY,	max_buffers)
+	__u32_field(17,	GENLA_F_MANDATORY,	max_epoch_size)
+	__u32_field(18,	GENLA_F_MANDATORY,	unplug_watermark)
+	__u32_field(19,	GENLA_F_MANDATORY,	after_sb_0p)
+	__u32_field(20,	GENLA_F_MANDATORY,	after_sb_1p)
+	__u32_field(21,	GENLA_F_MANDATORY,	after_sb_2p)
+	__u32_field(22,	GENLA_F_MANDATORY,	rr_conflict)
+	__u32_field(23,	GENLA_F_MANDATORY,	on_congestion)
+	__u32_field(24,	GENLA_F_MANDATORY,	cong_fill)
+	__u32_field(25,	GENLA_F_MANDATORY,	cong_extents)
+	__flg_field(26, GENLA_F_MANDATORY,	two_primaries)
+	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
+	__flg_field(28, GENLA_F_MANDATORY,	no_cork)
+	__flg_field(29, GENLA_F_MANDATORY,	always_asbp)
+	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
+	__flg_field(31,	GENLA_F_MANDATORY,	use_rle)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
@@ -270,11 +277,10 @@ GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection),
 GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
 
-	/* operates on replication links */
-GENL_op(DRBD_ADM_SYNCER, 9,
-	GENL_doit(drbd_adm_syncer),
+GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
+	GENL_doit(drbd_adm_resource_opts),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY)
 )
 
 GENL_op(
@@ -284,16 +290,28 @@ GENL_op(
 	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
 )
 
+GENL_op(
+	DRBD_ADM_CHG_NET_OPTS, 29,
+	GENL_doit(drbd_adm_net_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
+)
+
 GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
 
-	/* operates on minors */
 GENL_op(DRBD_ADM_ATTACH, 12,
 	GENL_doit(drbd_adm_attach),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
 	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED)
 )
 
+GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
+	GENL_doit(drbd_adm_disk_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED)
+)
+
 GENL_op(
 	DRBD_ADM_RESIZE, 13,
 	GENL_doit(drbd_adm_resize),
@@ -301,7 +319,6 @@ GENL_op(
 	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY)
 )
 
-	/* operates on all volumes within a resource */
 GENL_op(
 	DRBD_ADM_PRIMARY, 14,
 	GENL_doit(drbd_adm_set_role),
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 22920a8af4e2..659a8eb38830 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -166,5 +166,7 @@
 #define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
 #define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
 
+#define DRBD_PROTOCOL_DEF DRBD_PROT_C
+
 #undef RANGE
 #endif
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index c8c67239f616..e458282a3728 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -190,11 +190,12 @@ static struct nlattr *nested_attr_tb[128];
 
 #undef GENL_struct
 #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
-	/* static, potentially unused */				\
-int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[])	\
+/* *_from_attrs functions are static, but potentially unused */		\
+static int __ ## s_name ## _from_attrs(struct s_name *s,		\
+		struct genl_info *info, bool exclude_invariants)	\
 {									\
 	const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1;		\
-	struct nlattr *tla = tb[tag_number];				\
+	struct nlattr *tla = info->attrs[tag_number];			\
 	struct nlattr **ntb = nested_attr_tb;				\
 	struct nlattr *nla;						\
 	int err;							\
@@ -211,33 +212,49 @@ int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[])	\
 									\
 	s_fields							\
 	return 0;							\
-}
+}					__attribute__((unused))		\
+static int s_name ## _from_attrs(struct s_name *s,			\
+						struct genl_info *info)	\
+{									\
+	return __ ## s_name ## _from_attrs(s, info, false);		\
+}					__attribute__((unused))		\
+static int s_name ## _from_attrs_for_change(struct s_name *s,		\
+						struct genl_info *info)	\
+{									\
+	return __ ## s_name ## _from_attrs(s, info, true);		\
+}					__attribute__((unused))		\
 
-#undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
 		nla = ntb[__nla_type(attr_nr)];				\
 		if (nla) {						\
-			if (s)						\
-				s->name = __get(nla);			\
-			DPRINT_FIELD("<<", nla_type, name, s, nla);	\
+			if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) {		\
+				pr_info("<< must not change invariant attr: %s\n", #name);	\
+				return -EEXIST;				\
+			}						\
+			assignment;					\
+		} else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) {		\
+			/* attribute missing from payload, */		\
+			/* which was expected */			\
 		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
 			pr_info("<< missing attr: %s\n", #name);	\
 			return -ENOMSG;					\
 		}
 
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
+			if (s)						\
+				s->name = __get(nla);			\
+			DPRINT_FIELD("<<", nla_type, name, s, nla))
+
 /* validate_nla() already checked nla_len <= maxlen appropriately. */
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
-		nla = ntb[__nla_type(attr_nr)];				\
-		if (nla) {						\
+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
 			if (s)						\
 				s->name ## _len =			\
 					__get(s->name, nla, maxlen);	\
-			DPRINT_ARRAY("<<", nla_type, name, s, nla);	\
-		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
-			pr_info("<< missing attr: %s\n", #name);	\
-			return -ENOMSG;					\
-		}							\
+			DPRINT_ARRAY("<<", nla_type, name, s, nla))
 
 #include GENL_MAGIC_INCLUDE_FILE
 
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 745ebfd6c7e5..9a605b9ee834 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -59,12 +59,20 @@ enum {
 	GENLA_F_MANDATORY	= 1 << 14,
 	GENLA_F_REQUIRED	= 1 << 15,
 
-	/* This will not be present in the __u16 .nla_type, but can be
-	 * triggered on in <struct>_to_skb, to exclude "sensitive"
-	 * information from broadcasts, or on unpriviledged get requests.
-	 * This is useful because genetlink multicast groups can be listened in
-	 * on by anyone.  */
+	/* Below will not be present in the __u16 .nla_type, but can be
+	 * triggered on in <struct>_to_skb resp. <struct>_from_attrs */
+
+	/* To exclude "sensitive" information from broadcasts, or on
+	 * unpriviledged get requests.  This is useful because genetlink
+	 * multicast groups can be listened in on by anyone.  */
 	GENLA_F_SENSITIVE	= 1 << 16,
+
+	/* INVARIAN options cannot be changed at runtime.
+	 * Useful to share an attribute policy and struct definition,
+	 * between some "create" and "change" commands,
+	 * but disallow certain fields to be changed online.
+	 */
+	GENLA_F_INVARIANT	= 1 << 17,
 };
 
 #define __nla_type(x)	((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK))
-- 
cgit v1.2.3


From cb703454a283d8dd5599e928eeea30367ca18874 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Thu, 24 Mar 2011 11:03:07 +0100
Subject: drbd: Converted drbd_try_outdate_peer() from mdev to tconn

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |   4 +-
 drivers/block/drbd/drbd_nl.c       | 144 ++++++++++++++++++-------------------
 drivers/block/drbd/drbd_receiver.c |   7 +-
 drivers/block/drbd/drbd_state.c    |  85 ++++++++++++++++++----
 drivers/block/drbd/drbd_state.h    |   5 ++
 include/linux/drbd.h               |   3 +-
 6 files changed, 152 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c1eb4462096e..74637cc1461c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1472,8 +1472,8 @@ extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
 					enum drbd_role new_role,
 					int force);
-extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
-extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
+extern bool conn_try_outdate_peer(struct drbd_tconn *tconn);
+extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
 /* drbd_worker.c */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f1ec727f7df5..85290a9beb6d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -366,116 +366,122 @@ int conn_khelper(struct drbd_tconn *tconn, char *cmd)
 	return ret;
 }
 
-enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
+static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
 {
+	enum drbd_fencing_p fp = FP_NOT_AVAIL;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (get_ldev_if_state(mdev, D_CONSISTENT)) {
+			fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing);
+			put_ldev(mdev);
+		}
+	}
+
+	return fp;
+}
+
+bool conn_try_outdate_peer(struct drbd_tconn *tconn)
+{
+	union drbd_state mask = { };
+	union drbd_state val = { };
+	enum drbd_fencing_p fp;
 	char *ex_to_string;
 	int r;
-	enum drbd_disk_state nps;
-	enum drbd_fencing_p fp;
 
-	D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
+	if (tconn->cstate >= C_WF_REPORT_PARAMS) {
+		conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
+		return false;
+	}
 
-	if (get_ldev_if_state(mdev, D_CONSISTENT)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	} else {
-		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
-		nps = mdev->state.pdsk;
+	fp = highest_fencing_policy(tconn);
+	switch (fp) {
+	case FP_NOT_AVAIL:
+		conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
 		goto out;
+	case FP_DONT_CARE:
+		return true;
+	default: ;
 	}
 
-	r = drbd_khelper(mdev, "fence-peer");
+	r = conn_khelper(tconn, "fence-peer");
 
 	switch ((r>>8) & 0xff) {
 	case 3: /* peer is inconsistent */
 		ex_to_string = "peer is inconsistent or worse";
-		nps = D_INCONSISTENT;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_INCONSISTENT;
 		break;
 	case 4: /* peer got outdated, or was already outdated */
 		ex_to_string = "peer was fenced";
-		nps = D_OUTDATED;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_OUTDATED;
 		break;
 	case 5: /* peer was down */
-		if (mdev->state.disk == D_UP_TO_DATE) {
+		if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
 			/* we will(have) create(d) a new UUID anyways... */
 			ex_to_string = "peer is unreachable, assumed to be dead";
-			nps = D_OUTDATED;
+			mask.pdsk = D_MASK;
+			val.pdsk = D_OUTDATED;
 		} else {
 			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
-			nps = mdev->state.pdsk;
 		}
 		break;
 	case 6: /* Peer is primary, voluntarily outdate myself.
 		 * This is useful when an unconnected R_SECONDARY is asked to
 		 * become R_PRIMARY, but finds the other peer being active. */
 		ex_to_string = "peer is active";
-		dev_warn(DEV, "Peer is primary, outdating myself.\n");
-		nps = D_UNKNOWN;
-		_drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE);
+		conn_warn(tconn, "Peer is primary, outdating myself.\n");
+		mask.disk = D_MASK;
+		val.disk = D_OUTDATED;
 		break;
 	case 7:
 		if (fp != FP_STONITH)
-			dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
+			conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
 		ex_to_string = "peer was stonithed";
-		nps = D_OUTDATED;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_OUTDATED;
 		break;
 	default:
 		/* The script is broken ... */
-		nps = D_UNKNOWN;
-		dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
-		return nps;
+		conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
+		return false; /* Eventually leave IO frozen */
 	}
 
-	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
-			(r>>8) & 0xff, ex_to_string);
+	conn_info(tconn, "fence-peer helper returned %d (%s)\n",
+		  (r>>8) & 0xff, ex_to_string);
 
-out:
-	if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
-		/* The handler was not successful... unfreeze here, the
-		   state engine can not unfreeze... */
-		_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
-	}
+ out:
 
-	return nps;
+	/* Not using
+	   conn_request_state(tconn, mask, val, CS_VERBOSE);
+	   here, because we might were able to re-establish the connection in the
+	   meantime. */
+	spin_lock_irq(&tconn->req_lock);
+	if (tconn->cstate < C_WF_REPORT_PARAMS)
+		_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	spin_unlock_irq(&tconn->req_lock);
+
+	return conn_highest_pdsk(tconn) <= D_OUTDATED;
 }
 
 static int _try_outdate_peer_async(void *data)
 {
-	struct drbd_conf *mdev = (struct drbd_conf *)data;
-	enum drbd_disk_state nps;
-	union drbd_state ns;
+	struct drbd_tconn *tconn = (struct drbd_tconn *)data;
 
-	nps = drbd_try_outdate_peer(mdev);
-
-	/* Not using
-	   drbd_request_state(mdev, NS(pdsk, nps));
-	   here, because we might were able to re-establish the connection
-	   in the meantime. This can only partially be solved in the state's
-	   engine is_valid_state() and is_valid_state_transition()
-	   functions.
-
-	   nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
-	   pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
-	   therefore we have to have the pre state change check here.
-	*/
-	spin_lock_irq(&mdev->tconn->req_lock);
-	ns = mdev->state;
-	if (ns.conn < C_WF_REPORT_PARAMS) {
-		ns.pdsk = nps;
-		_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	}
-	spin_unlock_irq(&mdev->tconn->req_lock);
+	conn_try_outdate_peer(tconn);
 
 	return 0;
 }
 
-void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
+void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
 {
 	struct task_struct *opa;
 
-	opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
+	opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
 	if (IS_ERR(opa))
-		dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
+		conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
 }
 
 enum drbd_state_rv
@@ -486,7 +492,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 	int try = 0;
 	int forced = 0;
 	union drbd_state mask, val;
-	enum drbd_disk_state nps;
 
 	if (new_role == R_PRIMARY)
 		request_ping(mdev->tconn); /* Detect a dead peer ASAP */
@@ -519,32 +524,23 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 		if (rv == SS_NO_UP_TO_DATE_DISK &&
 		    mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 			D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
-			nps = drbd_try_outdate_peer(mdev);
 
-			if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
+			if (conn_try_outdate_peer(mdev->tconn)) {
 				val.disk = D_UP_TO_DATE;
 				mask.disk = D_MASK;
 			}
-
-			val.pdsk = nps;
-			mask.pdsk = D_MASK;
-
 			continue;
 		}
 
 		if (rv == SS_NOTHING_TO_DO)
 			goto out;
 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
-			nps = drbd_try_outdate_peer(mdev);
-
-			if (force && nps > D_OUTDATED) {
+			if (!conn_try_outdate_peer(mdev->tconn) && force) {
 				dev_warn(DEV, "Forced into split brain situation!\n");
-				nps = D_OUTDATED;
-			}
-
-			mask.pdsk = D_MASK;
-			val.pdsk  = nps;
+				mask.pdsk = D_MASK;
+				val.pdsk  = D_OUTDATED;
 
+			}
 			continue;
 		}
 		if (rv == SS_TWO_PRIMARIES) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1fd871bc889e..91aa49f478e8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4030,9 +4030,11 @@ static void drbd_disconnect(struct drbd_tconn *tconn)
 	drbd_free_sock(tconn);
 
 	idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
-
 	conn_info(tconn, "Connection closed\n");
 
+	if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
+		conn_try_outdate_peer_async(tconn);
+
 	spin_lock_irq(&tconn->req_lock);
 	oc = tconn->cstate;
 	if (oc >= C_UNCONNECTED)
@@ -4109,9 +4111,6 @@ static int drbd_disconnected(int vnr, void *p, void *data)
 		put_ldev(mdev);
 	}
 
-	if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
-		drbd_try_outdate_peer_async(mdev);
-
 	/* serialize with bitmap writeout triggered by the state change,
 	 * if any. */
 	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 52ff1c7379e9..b4f668db3296 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -61,6 +61,73 @@ bool conn_all_vols_unconf(struct drbd_tconn *tconn)
 	return true;
 }
 
+/* Unfortunately the states where not correctly ordered, when
+   they where defined. therefore can not use max_t() here. */
+static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
+{
+	if (role1 == R_PRIMARY || role2 == R_PRIMARY)
+		return R_PRIMARY;
+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
+		return R_SECONDARY;
+	return R_UNKNOWN;
+}
+static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
+{
+	if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
+		return R_UNKNOWN;
+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
+		return R_SECONDARY;
+	return R_PRIMARY;
+}
+
+enum drbd_role conn_highest_role(struct drbd_tconn *tconn)
+{
+	enum drbd_role role = R_UNKNOWN;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		role = max_role(role, mdev->state.role);
+
+	return role;
+}
+
+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn)
+{
+	enum drbd_role peer = R_UNKNOWN;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		peer = max_role(peer, mdev->state.peer);
+
+	return peer;
+}
+
+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_DISKLESS;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = max_t(enum drbd_disk_state, ds, mdev->state.disk);
+
+	return ds;
+}
+
+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_DISKLESS;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk);
+
+	return ds;
+}
+
 /**
  * cl_wide_st_chg() - true if the state change is a cluster wide one
  * @mdev:	DRBD device.
@@ -329,18 +396,6 @@ static void print_state_change(struct drbd_conf *mdev, union drbd_state os, unio
 		dev_info(DEV, "%s\n", pb);
 }
 
-static bool vol_has_primary_peer(struct drbd_tconn *tconn)
-{
-	struct drbd_conf *mdev;
-	int vnr;
-
-	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
-		if (mdev->state.peer == R_PRIMARY)
-			return true;
-	}
-	return false;
-}
-
 /**
  * is_valid_state() - Returns an SS_ error code if ns is not valid
  * @mdev:	DRBD device.
@@ -364,7 +419,7 @@ is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
 		if (!mdev->tconn->net_conf->two_primaries && ns.role == R_PRIMARY) {
 			if (ns.peer == R_PRIMARY)
 				rv = SS_TWO_PRIMARIES;
-			else if (vol_has_primary_peer(mdev->tconn))
+			else if (conn_highest_peer(mdev->tconn) == R_PRIMARY)
 				rv = SS_O_VOL_PEER_PRI;
 			}
 		put_net_conf(mdev->tconn);
@@ -1390,8 +1445,8 @@ static int _set_state_itr_fn(int vnr, void *p, void *data)
 
 	rv = __drbd_set_state(mdev, ns, flags, NULL);
 
-	ms.role = max_t(enum drbd_role, mdev->state.role, ms.role);
-	ms.peer = max_t(enum drbd_role, mdev->state.peer, ms.peer);
+	ms.role = max_role(ns.role, ms.role);
+	ms.peer = max_role(ns.peer, ms.peer);
 	ms.disk = max_t(enum drbd_role, mdev->state.disk, ms.disk);
 	ms.pdsk = max_t(enum drbd_role, mdev->state.pdsk, ms.pdsk);
 	params->ms = ms;
diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h
index 55df0728bc88..394a1998acd9 100644
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -110,4 +110,9 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
 	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
 }
 
+enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
+
 #endif
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 9cdb888607ae..60d308819096 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -65,7 +65,8 @@ enum drbd_io_error_p {
 };
 
 enum drbd_fencing_p {
-	FP_DONT_CARE,
+	FP_NOT_AVAIL = -1, /* Not a policy */
+	FP_DONT_CARE = 0,
 	FP_RESOURCE,
 	FP_STONITH
 };
-- 
cgit v1.2.3


From 0c8e36d9b843be56e4e43d4ef3c3eb6a97205599 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 30 Mar 2011 16:00:17 +0200
Subject: drbd: Introduce protocol version 100 headers

The 8 byte header finally becomes too small. With the protocol 100 header we
have 16 bit for the volume number, proper 32 bit for the data length, and
32 bit for further extensions in the future.

Previous versions of drbd are using version 80 headers for all packets
short enough for protocol 80.  They support both header versions in
worker context, but only version 80 headers in asynchronous context.
For backwards compatibility, continue to use version 80 headers for
short packets before protocol version 100.

From protocol version 100 on, use the same header version for all
packets.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  8 ++++++++
 drivers/block/drbd/drbd_main.c     | 32 ++++++++++++++++++++++++++------
 drivers/block/drbd/drbd_nl.c       |  3 +--
 drivers/block/drbd/drbd_receiver.c | 14 ++++++++++++--
 include/linux/drbd.h               |  1 +
 include/linux/drbd_genl.h          |  2 --
 include/linux/drbd_limits.h        |  2 ++
 7 files changed, 50 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 6d55bb75a081..bf1aad683387 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -307,6 +307,14 @@ struct p_header95 {
 	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
 } __packed;
 
+struct p_header100 {
+	u32	  magic;
+	u16	  volume;
+	u16	  command;
+	u32	  length;
+	u32	  pad;
+} __packed;
+
 extern unsigned int drbd_header_size(struct drbd_tconn *tconn);
 
 /* these defines must not be changed without changing the protocol version */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index b9dcc50135c4..5d9112cefcd7 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -698,9 +698,15 @@ void drbd_thread_current_set_cpu(struct drbd_thread *thi)
  */
 unsigned int drbd_header_size(struct drbd_tconn *tconn)
 {
-	BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95));
-	BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
-	return sizeof(struct p_header80);
+	if (tconn->agreed_pro_version >= 100) {
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8));
+		return sizeof(struct p_header100);
+	} else {
+		BUILD_BUG_ON(sizeof(struct p_header80) !=
+			     sizeof(struct p_header95));
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
+		return sizeof(struct p_header80);
+	}
 }
 
 static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
@@ -719,10 +725,24 @@ static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd,
 	return sizeof(struct p_header95);
 }
 
-static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr, void *buffer,
-				   enum drbd_packet cmd, int size)
+static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd,
+				      int size, int vnr)
+{
+	h->magic = cpu_to_be32(DRBD_MAGIC_100);
+	h->volume = cpu_to_be16(vnr);
+	h->command = cpu_to_be16(cmd);
+	h->length = cpu_to_be32(size);
+	h->pad = 0;
+	return sizeof(struct p_header100);
+}
+
+static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr,
+				   void *buffer, enum drbd_packet cmd, int size)
 {
-	if (tconn->agreed_pro_version >= 95)
+	if (tconn->agreed_pro_version >= 100)
+		return prepare_header100(buffer, cmd, size, vnr);
+	else if (tconn->agreed_pro_version >= 95 &&
+		 size > DRBD_MAX_SIZE_H80_PACKET)
 		return prepare_header95(buffer, cmd, size);
 	else
 		return prepare_header80(buffer, cmd, size);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index d9bb1a5c756a..0f52b88719c8 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2833,8 +2833,7 @@ int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
 	}
-	/* FIXME we need a define here */
-	if (adm_ctx.volume >= 256) {
+	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
 		drbd_msg_put_info("requested volume id out of range");
 		retcode = ERR_INVALID_REQUEST;
 		goto out;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7e0ab2246fb6..311b95453cb7 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -983,8 +983,18 @@ static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_i
 {
 	unsigned int header_size = drbd_header_size(tconn);
 
-	if (header_size == sizeof(struct p_header95) &&
-	    *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
+	if (header_size == sizeof(struct p_header100) &&
+	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
+		struct p_header100 *h = header;
+		if (h->pad != 0) {
+			conn_err(tconn, "Header padding is not zero\n");
+			return -EINVAL;
+		}
+		pi->vnr = be16_to_cpu(h->volume);
+		pi->cmd = be16_to_cpu(h->command);
+		pi->size = be32_to_cpu(h->length);
+	} else if (header_size == sizeof(struct p_header95) &&
+		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
 		struct p_header95 *h = header;
 
 		pi->cmd = be16_to_cpu(h->command);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 60d308819096..fe8d6ba31bcb 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -341,6 +341,7 @@ enum drbd_timeout_flag {
 
 #define DRBD_MAGIC 0x83740267
 #define DRBD_MAGIC_BIG 0x835a
+#define DRBD_MAGIC_100 0x8620ec20
 
 /* how I came up with this magic?
  * base64 decode "actlog==" ;) */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 938e8560a833..10144d546a66 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -95,8 +95,6 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
  * and/or the replication group (aka resource) name,
  * and the volume id within the resource. */
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
-		/* currently only 256 volumes per group,
-		 * but maybe we still change that */
 	__u32_field(1, GENLA_F_MANDATORY,	ctx_volume)
 	__str_field(2, GENLA_F_MANDATORY,	ctx_conn_name, 128)
 )
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 659a8eb38830..7f5149bef70e 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -19,6 +19,8 @@
 #define DRBD_MINOR_COUNT_MAX 256
 #define DRBD_MINOR_COUNT_DEF 32
 
+#define DRBD_VOLUME_MAX 65535
+
 #define DRBD_DIALOG_REFRESH_MIN 0
 #define DRBD_DIALOG_REFRESH_MAX 600
 
-- 
cgit v1.2.3


From b032b6fa3528d6eed972db32257cb316a66e0dac Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 13 Apr 2011 18:16:10 -0700
Subject: drbd: Allow online change of replication protocol only with agreed_pv
 >= 100

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 5 +++++
 include/linux/drbd.h         | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 40de384aade6..d4b29fd603f4 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1671,6 +1671,11 @@ check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf)
 	struct drbd_conf *mdev;
 	int i;
 
+	if (tconn->net_conf && tconn->agreed_pro_version < 100 &&
+	    tconn->cstate == C_WF_REPORT_PARAMS &&
+	    new_conf->wire_protocol != tconn->net_conf->wire_protocol)
+		return ERR_NEED_APV_100;
+
 	if (new_conf->two_primaries &&
 	    (new_conf->wire_protocol != DRBD_PROT_C))
 		return ERR_NOT_PROTO_C;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index fe8d6ba31bcb..6c7c85d8fc41 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -160,6 +160,7 @@ enum drbd_ret_code {
 	ERR_MINOR_CONFIGURED    = 160,
 	ERR_MINOR_EXISTS	= 161,
 	ERR_INVALID_REQUEST	= 162,
+	ERR_NEED_APV_100	= 163,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
-- 
cgit v1.2.3


From d8cd289dbe69ce9b8115d6f200ceff657e5dafa0 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 3 May 2011 12:27:11 +0200
Subject: drbd: Remove left-over unused define

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 7f5149bef70e..bcebb016fda3 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -170,5 +170,4 @@
 
 #define DRBD_PROTOCOL_DEF DRBD_PROT_C
 
-#undef RANGE
 #endif
-- 
cgit v1.2.3


From b966b5dd8e17e6c105ca55533fd412de5d5b429e Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 3 May 2011 14:56:09 +0200
Subject: drbd: Generate the drbd_set_*_defaults() functions from drbd_genl.h

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h     |  2 +-
 drivers/block/drbd/drbd_main.c    |  2 +-
 drivers/block/drbd/drbd_nl.c      | 86 +++++-------------------------------
 include/linux/drbd_genl.h         | 91 +++++++++++++++++++--------------------
 include/linux/genl_magic_func.h   | 26 +++++++++++
 include/linux/genl_magic_struct.h |  8 ++++
 6 files changed, 91 insertions(+), 124 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 44f77265d2b0..8655fcb82028 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1390,7 +1390,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 
 
 /* drbd_nl.c */
-extern void drbd_set_res_opts_default(struct res_opts *r);
+extern void drbd_set_res_opts_defaults(struct res_opts *r);
 extern int drbd_msg_put_info(const char *info);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 427e959e4869..4ae3e7a99d7c 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2488,7 +2488,7 @@ struct drbd_tconn *conn_create(const char *name)
 	drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
 	drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
 
-	drbd_set_res_opts_default(&tconn->res_opts);
+	drbd_set_res_opts_defaults(&tconn->res_opts);
 
 	down_write(&drbd_cfg_rwsem);
 	kref_init(&tconn->kref);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 7320ac00f0fb..f5732cf46c2f 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1090,77 +1090,6 @@ static bool should_set_defaults(struct genl_info *info)
 	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
 }
 
-/* Maybe we should we generate these functions
- * from the drbd_genl.h magic as well?
- * That way we would not "accidentally forget" to add defaults here. */
-
-#define RESET_ARRAY_FIELD(field) do { \
-	memset(field, 0, sizeof(field)); \
-	field ## _len = 0; \
-} while (0)
-void drbd_set_res_opts_default(struct res_opts *r)
-{
-	RESET_ARRAY_FIELD(r->cpu_mask);
-	r->on_no_data  = DRBD_ON_NO_DATA_DEF;
-}
-
-static void drbd_set_net_conf_defaults(struct net_conf *nc)
-{
-	/* Do NOT (re)set those fields marked as GENLA_F_INVARIANT
-	 * in drbd_genl.h, they can only be change with disconnect/reconnect */
-	RESET_ARRAY_FIELD(nc->shared_secret);
-
-	RESET_ARRAY_FIELD(nc->cram_hmac_alg);
-	RESET_ARRAY_FIELD(nc->integrity_alg);
-	RESET_ARRAY_FIELD(nc->verify_alg);
-	RESET_ARRAY_FIELD(nc->csums_alg);
-#undef RESET_ARRAY_FIELD
-
-	nc->wire_protocol = DRBD_PROTOCOL_DEF;
-	nc->try_connect_int = DRBD_CONNECT_INT_DEF;
-	nc->timeout = DRBD_TIMEOUT_DEF;
-	nc->ping_int = DRBD_PING_INT_DEF;
-	nc->ping_timeo = DRBD_PING_TIMEO_DEF;
-	nc->sndbuf_size = DRBD_SNDBUF_SIZE_DEF;
-	nc->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF;
-	nc->ko_count = DRBD_KO_COUNT_DEF;
-	nc->max_buffers = DRBD_MAX_BUFFERS_DEF;
-	nc->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF;
-	nc->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
-	nc->after_sb_0p = DRBD_AFTER_SB_0P_DEF;
-	nc->after_sb_1p = DRBD_AFTER_SB_1P_DEF;
-	nc->after_sb_2p = DRBD_AFTER_SB_2P_DEF;
-	nc->rr_conflict = DRBD_RR_CONFLICT_DEF;
-	nc->on_congestion = DRBD_ON_CONGESTION_DEF;
-	nc->cong_fill = DRBD_CONG_FILL_DEF;
-	nc->cong_extents = DRBD_CONG_EXTENTS_DEF;
-	nc->two_primaries = 0;
-	nc->no_cork = 0;
-	nc->always_asbp = 0;
-	nc->use_rle = 0;
-}
-
-static void drbd_set_disk_conf_defaults(struct disk_conf *dc)
-{
-	/* Do NOT (re)set those fields marked as GENLA_F_INVARIANT
-	 * in drbd_genl.h, they can only be change with detach/reattach */
-	dc->on_io_error = DRBD_ON_IO_ERROR_DEF;
-	dc->fencing = DRBD_FENCING_DEF;
-	dc->resync_rate = DRBD_RATE_DEF;
-	dc->resync_after = DRBD_AFTER_DEF;
-	dc->al_extents = DRBD_AL_EXTENTS_DEF;
-	dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
-	dc->c_delay_target = DRBD_C_DELAY_TARGET_DEF;
-	dc->c_fill_target = DRBD_C_FILL_TARGET_DEF;
-	dc->c_max_rate = DRBD_C_MAX_RATE_DEF;
-	dc->c_min_rate = DRBD_C_MIN_RATE_DEF;
-	dc->no_disk_barrier = 0;
-	dc->no_disk_flush = 0;
-	dc->no_disk_drain = 0;
-	dc->no_md_flush = 0;
-}
-
-
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
@@ -1198,7 +1127,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 
 	memcpy(new_disk_conf, &mdev->ldev->dc, sizeof(*new_disk_conf));
 	if (should_set_defaults(info))
-		drbd_set_disk_conf_defaults(new_disk_conf);
+		set_disk_conf_defaults(new_disk_conf);
 
 	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
 	if (err) {
@@ -1315,7 +1244,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	drbd_set_disk_conf_defaults(&nbc->dc);
+	set_disk_conf_defaults(&nbc->dc);
 
 	err = disk_conf_from_attrs(&nbc->dc, info);
 	if (err) {
@@ -1911,7 +1840,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 
 	*new_conf = *old_conf;
 	if (should_set_defaults(info))
-		drbd_set_net_conf_defaults(new_conf);
+		set_net_conf_defaults(new_conf);
 
 	err = net_conf_from_attrs_for_change(new_conf, info);
 	if (err) {
@@ -2029,7 +1958,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	drbd_set_net_conf_defaults(new_conf);
+	set_net_conf_defaults(new_conf);
 
 	err = net_conf_from_attrs(new_conf, info);
 	if (err) {
@@ -2301,6 +2230,11 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+void drbd_set_res_opts_defaults(struct res_opts *r)
+{
+	return set_res_opts_defaults(r);
+}
+
 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
@@ -2325,7 +2259,7 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
 
 	res_opts = tconn->res_opts;
 	if (should_set_defaults(info))
-		drbd_set_res_opts_default(&res_opts);
+		set_res_opts_defaults(&res_opts);
 
 	err = res_opts_from_attrs(&res_opts, info);
 	if (err) {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 10144d546a66..549800668cb9 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -110,63 +110,62 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	 * but it won't propagate through the stack */
 	__u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	max_bio_bvecs)
 
-	__u32_field(6, GENLA_F_MANDATORY,	on_io_error)
-	__u32_field(7, GENLA_F_MANDATORY,	fencing)
-
-	__u32_field(8,	GENLA_F_MANDATORY,	resync_rate)
-	__u32_field(9,	GENLA_F_MANDATORY,	resync_after)
-	__u32_field(10,	GENLA_F_MANDATORY,	al_extents)
-	__u32_field(11,	GENLA_F_MANDATORY,	c_plan_ahead)
-	__u32_field(12,	GENLA_F_MANDATORY,	c_delay_target)
-	__u32_field(13,	GENLA_F_MANDATORY,	c_fill_target)
-	__u32_field(14,	GENLA_F_MANDATORY,	c_max_rate)
-	__u32_field(15,	GENLA_F_MANDATORY,	c_min_rate)
-
-	__flg_field(16, GENLA_F_MANDATORY,	no_disk_barrier)
-	__flg_field(17, GENLA_F_MANDATORY,	no_disk_flush)
-	__flg_field(18, GENLA_F_MANDATORY,	no_disk_drain)
-	__flg_field(19, GENLA_F_MANDATORY,	no_md_flush)
-
+	__u32_field_def(6, GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
+	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
+
+	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RATE_DEF)
+	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_AFTER_DEF)
+	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
+	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
+	__u32_field_def(12,	GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
+	__u32_field_def(13,	GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
+	__u32_field_def(14,	GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
+	__u32_field_def(15,	GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
+
+	__flg_field_def(16, GENLA_F_MANDATORY,	no_disk_barrier, 0)
+	__flg_field_def(17, GENLA_F_MANDATORY,	no_disk_flush, 0)
+	__flg_field_def(18, GENLA_F_MANDATORY,	no_disk_drain, 0)
+	__flg_field_def(19, GENLA_F_MANDATORY,	no_md_flush, 0)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
-	__str_field(1,	GENLA_F_MANDATORY,	cpu_mask,       32)
-	__u32_field(2,	GENLA_F_MANDATORY,	on_no_data)
+	__str_field_def(1,	GENLA_F_MANDATORY,	cpu_mask,       32)
+	__u32_field_def(2,	GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__bin_field(1,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	my_addr,	128)
 	__bin_field(2,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	peer_addr,	128)
-	__str_field(3,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
+	__str_field_def(3,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field(4,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field(5,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__u32_field(8,	GENLA_F_MANDATORY,	wire_protocol)
-	__u32_field(9,	GENLA_F_MANDATORY,	try_connect_int)
-	__u32_field(10,	GENLA_F_MANDATORY,	timeout)
-	__u32_field(11,	GENLA_F_MANDATORY,	ping_int)
-	__u32_field(12,	GENLA_F_MANDATORY,	ping_timeo)
-	__u32_field(13,	GENLA_F_MANDATORY,	sndbuf_size)
-	__u32_field(14,	GENLA_F_MANDATORY,	rcvbuf_size)
-	__u32_field(15,	GENLA_F_MANDATORY,	ko_count)
-	__u32_field(16,	GENLA_F_MANDATORY,	max_buffers)
-	__u32_field(17,	GENLA_F_MANDATORY,	max_epoch_size)
-	__u32_field(18,	GENLA_F_MANDATORY,	unplug_watermark)
-	__u32_field(19,	GENLA_F_MANDATORY,	after_sb_0p)
-	__u32_field(20,	GENLA_F_MANDATORY,	after_sb_1p)
-	__u32_field(21,	GENLA_F_MANDATORY,	after_sb_2p)
-	__u32_field(22,	GENLA_F_MANDATORY,	rr_conflict)
-	__u32_field(23,	GENLA_F_MANDATORY,	on_congestion)
-	__u32_field(24,	GENLA_F_MANDATORY,	cong_fill)
-	__u32_field(25,	GENLA_F_MANDATORY,	cong_extents)
-	__flg_field(26, GENLA_F_MANDATORY,	two_primaries)
+	__str_field_def(4,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(5,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(8,	GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(9,	GENLA_F_MANDATORY,	try_connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(10,	GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(11,	GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(12,	GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(13,	GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(14,	GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(15,	GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(16,	GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(17,	GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(18,	GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(19,	GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(20,	GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(21,	GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(22,	GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(23,	GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(24,	GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, 0)
 	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
-	__flg_field(28, GENLA_F_MANDATORY,	no_cork)
-	__flg_field(29, GENLA_F_MANDATORY,	always_asbp)
+	__flg_field_def(28, GENLA_F_MANDATORY,	no_cork, 0)
+	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, 0)
 	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
-	__flg_field(31,	GENLA_F_MANDATORY,	use_rle)
+	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, 0)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index e458282a3728..e908f1c50355 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -427,6 +427,32 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 
 #include GENL_MAGIC_INCLUDE_FILE
 
+
+/* Functions for initializing structs to default values.  */
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put)
+#undef __u32_field_def
+#define __u32_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __flg_field_def
+#define __flg_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __str_field_def
+#define __str_field_def(attr_nr, attr_flag, name, maxlen)		\
+	memset(x->name, 0, sizeof(x->name));				\
+	x->name ## _len = 0;
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
+static void set_ ## s_name ## _defaults(struct s_name *x) {	\
+s_fields								\
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
 #endif /* __KERNEL__ */
 
 /* }}}1 */
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 9a605b9ee834..f2c7cc7831df 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -107,6 +107,14 @@ enum {
 	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
 			nla_memcpy, NLA_PUT)
 
+/* fields with default values */
+#define __flg_field_def(attr_nr, attr_flag, name, default) \
+	__flg_field(attr_nr, attr_flag, name)
+#define __u32_field_def(attr_nr, attr_flag, name, default) \
+	__u32_field(attr_nr, attr_flag, name)
+#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
+	__str_field(attr_nr, attr_flag, name, maxlen)
+
 #define __nla_put_flag(skb, attrtype, value)		\
 	do {						\
 		if (value)				\
-- 
cgit v1.2.3


From 563e4cf25ec804eb02cd30a41baa2fcc6c06679b Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 4 May 2011 10:33:52 +0200
Subject: drbd: Introduce __s32_field in the genetlink macro magic

...and drop explicit typecasts (int)meta_dev_idx < 0.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c      | 8 ++++----
 include/linux/drbd_genl.h         | 2 +-
 include/linux/genl_magic_struct.h | 3 +++
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index a1854e3aa15e..b8ea4807c981 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1253,7 +1253,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+	if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
 		retcode = ERR_MD_IDX_INVALID;
 		goto fail;
 	}
@@ -1289,7 +1289,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	 */
 	bdev = blkdev_get_by_path(nbc->dc.meta_dev,
 				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-				  ((int)nbc->dc.meta_dev_idx < 0) ?
+				  (nbc->dc.meta_dev_idx < 0) ?
 				  (void *)mdev : (void *)drbd_m_holder);
 	if (IS_ERR(bdev)) {
 		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
@@ -1325,7 +1325,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 	}
 
-	if ((int)nbc->dc.meta_dev_idx < 0) {
+	if (nbc->dc.meta_dev_idx < 0) {
 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
 		/* at least one MB, otherwise it does not make sense */
 		min_md_device_sectors = (2<<10);
@@ -1356,7 +1356,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		dev_warn(DEV, "==> truncating very big lower level device "
 			"to currently maximum possible %llu sectors <==\n",
 			(unsigned long long) max_possible_sectors);
-		if ((int)nbc->dc.meta_dev_idx >= 0)
+		if (nbc->dc.meta_dev_idx >= 0)
 			dev_warn(DEV, "==>> using internal or flexible "
 				      "meta data may help <<==\n");
 	}
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 549800668cb9..f143e3c0f33b 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -102,7 +102,7 @@ GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	backing_dev,	128)
 	__str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev,	128)
-	__u32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev_idx)
+	__s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev_idx)
 
 	/* use the resize command to try and change the disk_size */
 	__u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	disk_size)
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index f2c7cc7831df..ddbdd0a24476 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -97,6 +97,9 @@ enum {
 #define __u32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
 			nla_get_u32, NLA_PUT_U32)
+#define __s32_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
+			nla_get_u32, NLA_PUT_U32)
 #define __u64_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
 			nla_get_u64, NLA_PUT_U64)
-- 
cgit v1.2.3


From a5d8e1fb9d22851de89bbf52db6b11c56b895dd4 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 4 May 2011 16:06:51 +0200
Subject: drbd: Convert boolean flags on netlink from NLA_FLAG to NLA_U8

Flags of type NLA_FLAG are either present or absent, but do not have a
value by themselves.  Use type NLA_U8 for our boolean flags instead, and
use the value to determine if the flag should be on or off.

On the drbdsetup command line, all those flags have an optional yes/no
argument which defaults to yes.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/genl_magic_struct.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index ddbdd0a24476..b1ddbb5bd725 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -86,8 +86,8 @@ enum {
 
 /* possible field types */
 #define __flg_field(attr_nr, attr_flag, name) \
-	__field(attr_nr, attr_flag, name, NLA_FLAG, char, \
-			nla_get_flag, __nla_put_flag)
+	__field(attr_nr, attr_flag, name, NLA_U8, char, \
+			nla_get_u8, NLA_PUT_U8)
 #define __u8_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
 			nla_get_u8, NLA_PUT_U8)
@@ -118,12 +118,6 @@ enum {
 #define __str_field_def(attr_nr, attr_flag, name, maxlen) \
 	__str_field(attr_nr, attr_flag, name, maxlen)
 
-#define __nla_put_flag(skb, attrtype, value)		\
-	do {						\
-		if (value)				\
-			NLA_PUT_FLAG(skb, attrtype);	\
-	} while (0)
-
 #define GENL_op_init(args...)	args
 #define GENL_doit(handler)		\
 	.doit = handler,		\
-- 
cgit v1.2.3


From 66b2f6b9c59c5e7003e13281dfe72e174f93988c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 4 May 2011 15:25:35 +0200
Subject: drbd: Turn no-disk-flushes into disk-flushes={yes|no}

Change the --no-disk-flushes drbdsetup command line option as well as
the no_disk_flush netlink packet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 2 +-
 include/linux/drbd_genl.h          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 83d39859a9fe..e7a6eeae94e2 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1179,7 +1179,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
 	rcu_read_lock();
 	dc = rcu_dereference(mdev->ldev->disk_conf);
 
-	if (wo == WO_bdev_flush && dc->no_disk_flush)
+	if (wo == WO_bdev_flush && !dc->disk_flushes)
 		wo = WO_drain_io;
 	if (wo == WO_drain_io && dc->no_disk_drain)
 		wo = WO_none;
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index f143e3c0f33b..945c4dd3470c 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -122,8 +122,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(14,	GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
 	__u32_field_def(15,	GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
 
-	__flg_field_def(16, GENLA_F_MANDATORY,	no_disk_barrier, 0)
-	__flg_field_def(17, GENLA_F_MANDATORY,	no_disk_flush, 0)
+	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, 1)
+	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, 1)
 	__flg_field_def(18, GENLA_F_MANDATORY,	no_disk_drain, 0)
 	__flg_field_def(19, GENLA_F_MANDATORY,	no_md_flush, 0)
 )
-- 
cgit v1.2.3


From d0c980e236243cd03aa2291243587ac1ba3c2b04 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 4 May 2011 15:25:35 +0200
Subject: drbd: Turn no-disk-drain into disk-drain={yes|no}

Change the --no-disk-drain drbdsetup command line option as well as
the no_disk_drain netlink packet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 2 +-
 include/linux/drbd_genl.h          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e7a6eeae94e2..5d1bdda8ec9f 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1181,7 +1181,7 @@ void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo)
 
 	if (wo == WO_bdev_flush && !dc->disk_flushes)
 		wo = WO_drain_io;
-	if (wo == WO_drain_io && dc->no_disk_drain)
+	if (wo == WO_drain_io && !dc->disk_drain)
 		wo = WO_none;
 	rcu_read_unlock();
 	mdev->write_ordering = wo;
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 945c4dd3470c..30ad6600b444 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -124,7 +124,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 
 	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, 1)
 	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, 1)
-	__flg_field_def(18, GENLA_F_MANDATORY,	no_disk_drain, 0)
+	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, 1)
 	__flg_field_def(19, GENLA_F_MANDATORY,	no_md_flush, 0)
 )
 
-- 
cgit v1.2.3


From e544046ab842ab93c275a6fc4e043c1cb637076d Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 4 May 2011 15:25:35 +0200
Subject: drbd: Turn no-md-flushes into md-flushes={yes|no}

Change the --no-md-flushes drbdsetup command line option as well as
the no_md_flush netlink packet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 6 +++---
 include/linux/drbd_genl.h    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9af097416e26..4a946a877bde 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1449,10 +1449,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 
 	/* Reset the "barriers don't work" bits here, then force meta data to
 	 * be written, to ensure we determine if barriers are supported. */
-	if (new_disk_conf->no_md_flush)
-		set_bit(MD_NO_FUA, &mdev->flags);
-	else
+	if (new_disk_conf->md_flushes)
 		clear_bit(MD_NO_FUA, &mdev->flags);
+	else
+		set_bit(MD_NO_FUA, &mdev->flags);
 
 	/* Point of no return reached.
 	 * Devices and memory are no longer released by error cleanup below.
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 30ad6600b444..53518fc23154 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -125,7 +125,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, 1)
 	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, 1)
 	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, 1)
-	__flg_field_def(19, GENLA_F_MANDATORY,	no_md_flush, 0)
+	__flg_field_def(19, GENLA_F_MANDATORY,	md_flushes, 1)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
-- 
cgit v1.2.3


From bb77d34ecc6fe6cdc3f4f0841a516695c2eacc04 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 4 May 2011 15:25:35 +0200
Subject: drbd: Turn no-tcp-cork into tcp-cork={yes|no}

Change the --no-tcp-cork drbdsetup command line option as well as
the no_cork netlink packet.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 8 ++++----
 drivers/block/drbd/drbd_worker.c   | 2 +-
 include/linux/drbd_genl.h          | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5d1bdda8ec9f..b4858bb78940 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -5040,7 +5040,7 @@ int drbd_asender(struct drbd_thread *thi)
 	int expect   = header_size;
 	bool ping_timeout_active = false;
 	struct net_conf *nc;
-	int ping_timeo, no_cork, ping_int;
+	int ping_timeo, tcp_cork, ping_int;
 
 	current->policy = SCHED_RR;  /* Make this a realtime task! */
 	current->rt_priority = 2;    /* more important than all other tasks */
@@ -5051,7 +5051,7 @@ int drbd_asender(struct drbd_thread *thi)
 		rcu_read_lock();
 		nc = rcu_dereference(tconn->net_conf);
 		ping_timeo = nc->ping_timeo;
-		no_cork = nc->no_cork;
+		tcp_cork = nc->tcp_cork;
 		ping_int = nc->ping_int;
 		rcu_read_unlock();
 
@@ -5066,14 +5066,14 @@ int drbd_asender(struct drbd_thread *thi)
 
 		/* TODO: conditionally cork; it may hurt latency if we cork without
 		   much to send */
-		if (!no_cork)
+		if (tcp_cork)
 			drbd_tcp_cork(tconn->meta.socket);
 		if (tconn_finish_peer_reqs(tconn)) {
 			conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
 			goto reconnect;
 		}
 		/* but unconditionally uncork unless disabled */
-		if (!no_cork)
+		if (tcp_cork)
 			drbd_tcp_uncork(tconn->meta.socket);
 
 		/* short circuit, recv_msg would return EINTR anyways. */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index e37c42d5dd6e..78c3de49eff6 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1694,7 +1694,7 @@ int drbd_worker(struct drbd_thread *thi)
 
 			rcu_read_lock();
 			nc = rcu_dereference(tconn->net_conf);
-			cork = nc ? !nc->no_cork : 0;
+			cork = nc ? nc->tcp_cork : 0;
 			rcu_read_unlock();
 
 			if (tconn->data.socket && cork)
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 53518fc23154..6632d10f1ee1 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -162,7 +162,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
 	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, 0)
 	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
-	__flg_field_def(28, GENLA_F_MANDATORY,	no_cork, 0)
+	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, 1)
 	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, 0)
 	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
 	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, 0)
-- 
cgit v1.2.3


From 7bac3e6f7e74993475a94487effe05dc1f68bdc7 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Fri, 6 May 2011 17:50:57 +0200
Subject: drbd: Also define the default values of boolean flags in a single
 place

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_genl.h   | 16 ++++++++--------
 include/linux/drbd_limits.h | 10 ++++++++++
 2 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 6632d10f1ee1..02647dc8c67c 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -122,10 +122,10 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(14,	GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
 	__u32_field_def(15,	GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
 
-	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, 1)
-	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, 1)
-	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, 1)
-	__flg_field_def(19, GENLA_F_MANDATORY,	md_flushes, 1)
+	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
+	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
+	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
+	__flg_field_def(19, GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
@@ -160,12 +160,12 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__u32_field_def(23,	GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
 	__u32_field_def(24,	GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
 	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
-	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, 0)
+	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
 	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
-	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, 1)
-	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, 0)
+	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
 	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
-	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, 0)
+	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index bcebb016fda3..3d3e2d5125cb 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -170,4 +170,14 @@
 
 #define DRBD_PROTOCOL_DEF DRBD_PROT_C
 
+#define DRBD_DISK_BARRIER_DEF	0
+#define DRBD_DISK_FLUSHES_DEF	1
+#define DRBD_DISK_DRAIN_DEF	1
+#define DRBD_MD_FLUSHES_DEF	1
+#define DRBD_TCP_CORK_DEF	1
+
+#define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
+#define DRBD_ALWAYS_ASBP_DEF	0
+#define DRBD_USE_RLE_DEF	0
+
 #endif
-- 
cgit v1.2.3


From 6139f60dc192e2c5478c1126d1aff7905dc0a98a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Fri, 6 May 2011 20:00:02 +0200
Subject: drbd: Rename the want_lose field/flag to discard_my_data

This is what it is called in config files and on the command line as
well.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  2 +-
 drivers/block/drbd/drbd_main.c     |  6 +++---
 drivers/block/drbd/drbd_nl.c       |  4 ++--
 drivers/block/drbd/drbd_receiver.c | 14 +++++++-------
 include/linux/drbd_genl.h          |  2 +-
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 56b190c65546..fa36757ffc4a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -413,7 +413,7 @@ struct p_rs_param_95 {
 } __packed;
 
 enum drbd_conn_flags {
-	CF_WANT_LOSE = 1,
+	CF_DISCARD_MY_DATA = 1,
 	CF_DRY_RUN = 2,
 };
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 22c2b4c881da..86c8bc5ac603 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -943,8 +943,8 @@ int __drbd_send_protocol(struct drbd_tconn *tconn)
 	p->after_sb_2p   = cpu_to_be32(nc->after_sb_2p);
 	p->two_primaries = cpu_to_be32(nc->two_primaries);
 	cf = 0;
-	if (nc->want_lose)
-		cf |= CF_WANT_LOSE;
+	if (nc->discard_my_data)
+		cf |= CF_DISCARD_MY_DATA;
 	if (nc->dry_run)
 		cf |= CF_DRY_RUN;
 	p->conn_flags    = cpu_to_be32(cf);
@@ -988,7 +988,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
 	mdev->comm_bm_set = drbd_bm_total_weight(mdev);
 	p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
 	rcu_read_lock();
-	uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->want_lose ? 1 : 0;
+	uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0;
 	rcu_read_unlock();
 	uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
 	uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 195428ee6052..9a82306adf92 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -606,7 +606,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 		mutex_lock(&mdev->tconn->conf_update);
 		nc = mdev->tconn->net_conf;
 		if (nc)
-			nc->want_lose = 0; /* without copy; single bit op is atomic */
+			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
 		mutex_unlock(&mdev->tconn->conf_update);
 
 		set_disk_ro(mdev->vdisk, false);
@@ -1738,7 +1738,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n
 			if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
 				return ERR_STONITH_AND_PROT_A;
 		}
-		if (mdev->state.role == R_PRIMARY && new_conf->want_lose)
+		if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data)
 			return ERR_DISCARD;
 	}
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index aa42967398e3..e4e8f8a408d1 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2908,9 +2908,9 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
 	}
 
 	if (hg == -100) {
-		if (nc->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
+		if (nc->discard_my_data && !(mdev->p_uuid[UI_FLAGS]&1))
 			hg = -1;
-		if (!nc->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
+		if (!nc->discard_my_data && (mdev->p_uuid[UI_FLAGS]&1))
 			hg = 1;
 
 		if (abs(hg) < 100)
@@ -3009,7 +3009,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 {
 	struct p_protocol *p = pi->data;
 	int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
-	int p_want_lose, p_two_primaries, cf;
+	int p_discard_my_data, p_two_primaries, cf;
 	struct net_conf *nc;
 
 	p_proto		= be32_to_cpu(p->protocol);
@@ -3018,7 +3018,7 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
 	p_two_primaries = be32_to_cpu(p->two_primaries);
 	cf		= be32_to_cpu(p->conn_flags);
-	p_want_lose = cf & CF_WANT_LOSE;
+	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
 
 	if (tconn->agreed_pro_version >= 87) {
 		char integrity_alg[SHARED_SECRET_MAX];
@@ -3075,8 +3075,8 @@ static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 		goto disconnect_rcu_unlock;
 	}
 
-	if (p_want_lose && nc->want_lose) {
-		conn_err(tconn, "both sides have the 'want_lose' flag set\n");
+	if (p_discard_my_data && nc->discard_my_data) {
+		conn_err(tconn, "both sides have the 'discard_my_data' flag set\n");
 		goto disconnect_rcu_unlock;
 	}
 
@@ -3806,7 +3806,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
 	}
 
 	mutex_lock(&mdev->tconn->conf_update);
-	mdev->tconn->net_conf->want_lose = 0; /* without copy; single bit op is atomic */
+	mdev->tconn->net_conf->discard_my_data = 0; /* without copy; single bit op is atomic */
 	mutex_unlock(&mdev->tconn->conf_update);
 
 	drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 02647dc8c67c..6aece551d87e 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -161,7 +161,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__u32_field_def(24,	GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
 	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
 	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
-	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	want_lose)
+	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	discard_my_data)
 	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
 	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
 	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
-- 
cgit v1.2.3


From 6394b9358e6187414b7a6de7ba2c681ee4a790ac Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 11 May 2011 14:29:52 +0200
Subject: drbd: Refer to resync-rate consistently throughout the code

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      | 6 +++---
 drivers/block/drbd/drbd_main.c     | 4 ++--
 drivers/block/drbd/drbd_receiver.c | 2 +-
 include/linux/drbd_genl.h          | 2 +-
 include/linux/drbd_limits.h        | 7 ++++---
 5 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index fa36757ffc4a..8026adacd3d2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -389,21 +389,21 @@ struct p_barrier_ack {
 } __packed;
 
 struct p_rs_param {
-	u32 rate;
+	u32 resync_rate;
 
 	      /* Since protocol version 88 and higher. */
 	char verify_alg[0];
 } __packed;
 
 struct p_rs_param_89 {
-	u32 rate;
+	u32 resync_rate;
         /* protocol version 89: */
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 } __packed;
 
 struct p_rs_param_95 {
-	u32 rate;
+	u32 resync_rate;
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 	u32 c_plan_ahead;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 86c8bc5ac603..26d7763d5255 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -888,14 +888,14 @@ int drbd_send_sync_param(struct drbd_conf *mdev)
 
 	if (get_ldev(mdev)) {
 		dc = rcu_dereference(mdev->ldev->disk_conf);
-		p->rate = cpu_to_be32(dc->resync_rate);
+		p->resync_rate = cpu_to_be32(dc->resync_rate);
 		p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
 		p->c_delay_target = cpu_to_be32(dc->c_delay_target);
 		p->c_fill_target = cpu_to_be32(dc->c_fill_target);
 		p->c_max_rate = cpu_to_be32(dc->c_max_rate);
 		put_ldev(mdev);
 	} else {
-		p->rate = cpu_to_be32(DRBD_RATE_DEF);
+		p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
 		p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
 		p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
 		p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e4e8f8a408d1..684f79542727 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3221,7 +3221,7 @@ static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
 		old_disk_conf = mdev->ldev->disk_conf;
 		*new_disk_conf = *old_disk_conf;
 
-		new_disk_conf->resync_rate = be32_to_cpu(p->rate);
+		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
 	}
 
 	if (apv >= 88) {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 6aece551d87e..778708d92939 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -113,7 +113,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(6, GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
 	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
 
-	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RATE_DEF)
+	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
 	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_AFTER_DEF)
 	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
 	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 3d3e2d5125cb..48339ae69d50 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -98,10 +98,11 @@
 
 /* syncer { */
   /* FIXME allow rate to be zero? */
-#define DRBD_RATE_MIN 1
+#define DRBD_RESYNC_RATE_MIN 1
 /* channel bonding 10 GbE, or other hardware */
-#define DRBD_RATE_MAX (4 << 20)
-#define DRBD_RATE_DEF 250  /* kb/second */
+#define DRBD_RESYNC_RATE_MAX (4 << 20)
+#define DRBD_RESYNC_RATE_DEF 250
+#define DRBD_RESYNC_RATE_SCALE 'k'  /* kilobytes */
 
   /* less than 7 would hit performance unnecessarily.
    * 919 slots context information per transaction,
-- 
cgit v1.2.3


From 69ef82dea4c34e4a0541fc3f415b0fef70fe12b0 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 11 May 2011 14:34:35 +0200
Subject: drbd: Refer to connect-int consistently throughout the code

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_receiver.c | 12 ++++++------
 include/linux/drbd_genl.h          |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 684f79542727..7deade196a33 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -617,7 +617,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 	struct sockaddr_in6 peer_in6;
 	struct net_conf *nc;
 	int err, peer_addr_len, my_addr_len;
-	int sndbuf_size, rcvbuf_size, try_connect_int;
+	int sndbuf_size, rcvbuf_size, connect_int;
 	int disconnect_on_error = 1;
 
 	rcu_read_lock();
@@ -629,7 +629,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 
 	sndbuf_size = nc->sndbuf_size;
 	rcvbuf_size = nc->rcvbuf_size;
-	try_connect_int = nc->try_connect_int;
+	connect_int = nc->connect_int;
 
 	my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6));
 	memcpy(&src_in6, nc->my_addr, my_addr_len);
@@ -653,7 +653,7 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 	}
 
 	sock->sk->sk_rcvtimeo =
-	sock->sk->sk_sndtimeo = try_connect_int * HZ;
+	sock->sk->sk_sndtimeo = connect_int * HZ;
 	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
 
        /* explicitly bind to the configured IP as source IP
@@ -702,7 +702,7 @@ out:
 static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 {
 	int timeo, err, my_addr_len;
-	int sndbuf_size, rcvbuf_size, try_connect_int;
+	int sndbuf_size, rcvbuf_size, connect_int;
 	struct socket *s_estab = NULL, *s_listen;
 	struct sockaddr_in6 my_addr;
 	struct net_conf *nc;
@@ -717,7 +717,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 
 	sndbuf_size = nc->sndbuf_size;
 	rcvbuf_size = nc->rcvbuf_size;
-	try_connect_int = nc->try_connect_int;
+	connect_int = nc->connect_int;
 
 	my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6));
 	memcpy(&my_addr, nc->my_addr, my_addr_len);
@@ -731,7 +731,7 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 		goto out;
 	}
 
-	timeo = try_connect_int * HZ;
+	timeo = connect_int * HZ;
 	timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
 
 	s_listen->sk->sk_reuse    = 1; /* SO_REUSEADDR */
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 778708d92939..67c816c0fc28 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -143,7 +143,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__str_field_def(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
 	__str_field_def(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
 	__u32_field_def(8,	GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
-	__u32_field_def(9,	GENLA_F_MANDATORY,	try_connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(9,	GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
 	__u32_field_def(10,	GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
 	__u32_field_def(11,	GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
 	__u32_field_def(12,	GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
-- 
cgit v1.2.3


From 95f8efd08bcce65df994049a292b94e56c7ada67 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Thu, 12 May 2011 11:15:34 +0200
Subject: drbd: Fix the upper limit of resync-after

The 32-bit resync_after netlink field takes a device minor number as
parameter, which is no longer limited to 255.  We cannot statically
verify which device numbers are valid, so set the ummer limit to the
highest possible signed 32-bit integer.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h    |  4 ++--
 drivers/block/drbd/drbd_nl.c     |  4 ++--
 drivers/block/drbd/drbd_worker.c | 26 +++++++++++++-------------
 include/linux/drbd.h             |  4 ++--
 include/linux/drbd_genl.h        |  2 +-
 include/linux/drbd_limits.h      |  7 ++++---
 6 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8026adacd3d2..e16722840767 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1408,8 +1408,8 @@ extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
 /* drbd_worker.c */
 extern int drbd_worker(struct drbd_thread *thi);
-enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor);
-void drbd_sync_after_changed(struct drbd_conf *mdev);
+enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor);
+void drbd_resync_after_changed(struct drbd_conf *mdev);
 extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
 extern void resume_next_sg(struct drbd_conf *mdev);
 extern void suspend_other_sg(struct drbd_conf *mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9a82306adf92..74c27f1507f3 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1183,10 +1183,10 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	write_lock_irq(&global_state_lock);
-	retcode = drbd_sync_after_valid(mdev, new_disk_conf->resync_after);
+	retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after);
 	if (retcode == NO_ERROR) {
 		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
-		drbd_sync_after_changed(mdev);
+		drbd_resync_after_changed(mdev);
 	}
 	write_unlock_irq(&global_state_lock);
 
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index ec8f4245ef9a..6410c55831e0 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -57,7 +57,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel);
 
 /* About the global_state_lock
    Each state transition on an device holds a read lock. In case we have
-   to evaluate the sync after dependencies, we grab a write lock, because
+   to evaluate the resync after dependencies, we grab a write lock, because
    we need stable states on all devices for that.  */
 rwlock_t global_state_lock;
 
@@ -1340,17 +1340,17 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
 static int _drbd_may_sync_now(struct drbd_conf *mdev)
 {
 	struct drbd_conf *odev = mdev;
-	int ra;
+	int resync_after;
 
 	while (1) {
 		if (!odev->ldev)
 			return 1;
 		rcu_read_lock();
-		ra = rcu_dereference(odev->ldev->disk_conf)->resync_after;
+		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
 		rcu_read_unlock();
-		if (ra == -1)
+		if (resync_after == -1)
 			return 1;
-		odev = minor_to_mdev(ra);
+		odev = minor_to_mdev(resync_after);
 		if (!expect(odev))
 			return 1;
 		if ((odev->state.conn >= C_SYNC_SOURCE &&
@@ -1426,36 +1426,36 @@ void suspend_other_sg(struct drbd_conf *mdev)
 }
 
 /* caller must hold global_state_lock */
-enum drbd_ret_code drbd_sync_after_valid(struct drbd_conf *mdev, int o_minor)
+enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
 {
 	struct drbd_conf *odev;
-	int ra;
+	int resync_after;
 
 	if (o_minor == -1)
 		return NO_ERROR;
 	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
-		return ERR_SYNC_AFTER;
+		return ERR_RESYNC_AFTER;
 
 	/* check for loops */
 	odev = minor_to_mdev(o_minor);
 	while (1) {
 		if (odev == mdev)
-			return ERR_SYNC_AFTER_CYCLE;
+			return ERR_RESYNC_AFTER_CYCLE;
 
 		rcu_read_lock();
-		ra = rcu_dereference(odev->ldev->disk_conf)->resync_after;
+		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
 		rcu_read_unlock();
 		/* dependency chain ends here, no cycles. */
-		if (ra == -1)
+		if (resync_after == -1)
 			return NO_ERROR;
 
 		/* follow the dependency chain */
-		odev = minor_to_mdev(ra);
+		odev = minor_to_mdev(resync_after);
 	}
 }
 
 /* caller must hold global_state_lock */
-void drbd_sync_after_changed(struct drbd_conf *mdev)
+void drbd_resync_after_changed(struct drbd_conf *mdev)
 {
 	int changes;
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 6c7c85d8fc41..05063e6db81f 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -130,8 +130,8 @@ enum drbd_ret_code {
 	ERR_INTR		= 129, /* EINTR */
 	ERR_RESIZE_RESYNC	= 130,
 	ERR_NO_PRIMARY		= 131,
-	ERR_SYNC_AFTER		= 132,
-	ERR_SYNC_AFTER_CYCLE	= 133,
+	ERR_RESYNC_AFTER	= 132,
+	ERR_RESYNC_AFTER_CYCLE	= 133,
 	ERR_PAUSE_IS_SET	= 134,
 	ERR_PAUSE_IS_CLEAR	= 135,
 	ERR_PACKET_NR		= 137,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 67c816c0fc28..a59466f7f661 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
 
 	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
-	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_AFTER_DEF)
+	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_RESYNC_AFTER_DEF)
 	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
 	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
 	__u32_field_def(12,	GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 48339ae69d50..c4a8f0fef7b2 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -113,9 +113,10 @@
 #define DRBD_AL_EXTENTS_MAX  6433
 #define DRBD_AL_EXTENTS_DEF  127
 
-#define DRBD_AFTER_MIN  -1
-#define DRBD_AFTER_MAX  255
-#define DRBD_AFTER_DEF  -1
+#define DRBD_RESYNC_AFTER_MIN  -1
+#define DRBD_RESYNC_AFTER_MAX  (1<<30)
+#define DRBD_RESYNC_AFTER_DEF  -1
+#define DRBD_RESYNC_AFTER_SCALE '1'
 
 /* } */
 
-- 
cgit v1.2.3


From 3a45abd577727d2268e190d372600f8652883453 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Thu, 12 May 2011 12:02:54 +0200
Subject: drbd: Convert resync-after into a signed netlink field

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_genl.h         | 2 +-
 include/linux/genl_magic_func.h   | 3 +++
 include/linux/genl_magic_struct.h | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index a59466f7f661..7b174a093a8d 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -114,7 +114,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
 
 	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
-	__u32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_RESYNC_AFTER_DEF)
+	__s32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_RESYNC_AFTER_DEF)
 	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
 	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
 	__u32_field_def(12,	GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index e908f1c50355..94e839aafae3 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -437,6 +437,9 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 #undef __u32_field_def
 #define __u32_field_def(attr_nr, attr_flag, name, default)		\
 	x->name = default;
+#undef __s32_field_def
+#define __s32_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
 #undef __flg_field_def
 #define __flg_field_def(attr_nr, attr_flag, name, default)		\
 	x->name = default;
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index b1ddbb5bd725..0fca21fd1af5 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -115,6 +115,8 @@ enum {
 	__flg_field(attr_nr, attr_flag, name)
 #define __u32_field_def(attr_nr, attr_flag, name, default) \
 	__u32_field(attr_nr, attr_flag, name)
+#define __s32_field_def(attr_nr, attr_flag, name, default) \
+	__s32_field(attr_nr, attr_flag, name)
 #define __str_field_def(attr_nr, attr_flag, name, maxlen) \
 	__str_field(attr_nr, attr_flag, name, maxlen)
 
-- 
cgit v1.2.3


From c5482bbd9607bf38cbc952eacaa429e6ba3160a0 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 11 May 2011 14:44:55 +0200
Subject: drbd: Rename DISK_SIZE_SECT -> DISK_SIZE

We don't have the units in constant names in other places, either.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index c4a8f0fef7b2..8f8bbea545e0 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -125,9 +125,10 @@
  * the upper limit with 64bit kernel, enough ram and flexible meta data
  * is 1 PiB, currently. */
 /* DRBD_MAX_SECTORS */
-#define DRBD_DISK_SIZE_SECT_MIN  0
-#define DRBD_DISK_SIZE_SECT_MAX  (1 * (2LLU << 40))
-#define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_MIN  0
+#define DRBD_DISK_SIZE_MAX  (16 * (2LLU << 30))
+#define DRBD_DISK_SIZE_DEF  0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_SCALE 's'  /* sectors */
 
 #define DRBD_ON_IO_ERROR_DEF EP_DETACH
 #define DRBD_FENCING_DEF FP_DONT_CARE
-- 
cgit v1.2.3


From dcb20d1a8e7d9602e52a9b673ae4d7f746d2cbb2 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Mon, 16 May 2011 14:30:24 +0200
Subject: drbd: Refuse to change network options online when...

* the peer does not speak protocol_version 100 and the
  user wants to change one of:
    - wire_protocol
    - two_primaries
    - integrity_alg

* the user wants to remove the allow_two_primaries flag
  when there are two primaries

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 22 ++++++++++++++++++----
 include/linux/drbd.h         |  1 +
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 74c27f1507f3..133a6724657d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1722,10 +1722,24 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n
 	struct drbd_conf *mdev;
 	int i;
 
-	if (old_conf && tconn->agreed_pro_version < 100 &&
-	    tconn->cstate == C_WF_REPORT_PARAMS &&
-	    new_conf->wire_protocol != old_conf->wire_protocol)
-		return ERR_NEED_APV_100;
+	if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) {
+		if (new_conf->wire_protocol != old_conf->wire_protocol)
+			return ERR_NEED_APV_100;
+
+		if (new_conf->two_primaries != old_conf->two_primaries)
+			return ERR_NEED_APV_100;
+
+		if (!new_conf->integrity_alg != !old_conf->integrity_alg)
+			return ERR_NEED_APV_100;
+
+		if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg))
+			return ERR_NEED_APV_100;
+	}
+
+	if (!new_conf->two_primaries &&
+	    conn_highest_role(tconn) == R_PRIMARY &&
+	    conn_highest_peer(tconn) == R_PRIMARY)
+		return ERR_NEED_ALLOW_TWO_PRI;
 
 	if (new_conf->two_primaries &&
 	    (new_conf->wire_protocol != DRBD_PROT_C))
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 05063e6db81f..679e81123229 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -161,6 +161,7 @@ enum drbd_ret_code {
 	ERR_MINOR_EXISTS	= 161,
 	ERR_INVALID_REQUEST	= 162,
 	ERR_NEED_APV_100	= 163,
+	ERR_NEED_ALLOW_TWO_PRI  = 164,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
-- 
cgit v1.2.3


From 309f0b70ab789bf85c5f5f32dbc466d42f024747 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Fri, 13 May 2011 01:24:14 +0200
Subject: drbd: Use more generic constant names

These constants are useful for the same purpose in more than one place.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 8f8bbea545e0..3627f760966e 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -113,10 +113,10 @@
 #define DRBD_AL_EXTENTS_MAX  6433
 #define DRBD_AL_EXTENTS_DEF  127
 
-#define DRBD_RESYNC_AFTER_MIN  -1
-#define DRBD_RESYNC_AFTER_MAX  (1<<30)
-#define DRBD_RESYNC_AFTER_DEF  -1
-#define DRBD_RESYNC_AFTER_SCALE '1'
+#define DRBD_MINOR_NUMBER_MIN  -1
+#define DRBD_MINOR_NUMBER_MAX  (1<<30)
+#define DRBD_MINOR_NUMBER_DEF  -1
+#define DRBD_MINOR_NUMBER_SCALE '1'
 
 /* } */
 
-- 
cgit v1.2.3


From 509100e6012db92f4af3796436b450447c6c8268 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 17 May 2011 13:29:46 +0200
Subject: drbd: Output signed / unsigned netlink fields correctly

Note: All input values are still treated as signed; unsigned long long values
are still broken.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/genl_magic_func.h   | 23 +++++++++++------
 include/linux/genl_magic_struct.h | 52 ++++++++++++++++++++++++++++-----------
 2 files changed, 53 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 94e839aafae3..2ae16126c6a4 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -78,12 +78,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
 { s_fields };
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \
+#define __field(attr_nr, attr_flag, name, nla_type, _type, __get,	\
+		 __put, __is_signed)					\
 	[__nla_type(attr_nr)] = { .type = nla_type },
 
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
-		__get, __put)						\
+		__get, __put, __is_signed)				\
 	[__nla_type(attr_nr)] = { .type = nla_type,			\
 		.len = maxlen - (nla_type == NLA_NUL_STRING) },
 
@@ -241,7 +242,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s,		\
 		}
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
 	__assign(attr_nr, attr_flag, name, nla_type, type,		\
 			if (s)						\
 				s->name = __get(nla);			\
@@ -249,7 +251,8 @@ static int s_name ## _from_attrs_for_change(struct s_name *s,		\
 
 /* validate_nla() already checked nla_len <= maxlen appropriately. */
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
 	__assign(attr_nr, attr_flag, name, nla_type, type,		\
 			if (s)						\
 				s->name ## _len =			\
@@ -410,14 +413,16 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
 	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
 		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
 		__put(skb, attr_nr, s->name);				\
 	}
 
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
 	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
 		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
 		__put(skb, attr_nr, min_t(int, maxlen,			\
@@ -431,9 +436,11 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 /* Functions for initializing structs to default values.  */
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put)
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)
 #undef __u32_field_def
 #define __u32_field_def(attr_nr, attr_flag, name, default)		\
 	x->name = default;
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 0fca21fd1af5..ba911da84d9f 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -87,28 +87,28 @@ enum {
 /* possible field types */
 #define __flg_field(attr_nr, attr_flag, name) \
 	__field(attr_nr, attr_flag, name, NLA_U8, char, \
-			nla_get_u8, NLA_PUT_U8)
+			nla_get_u8, NLA_PUT_U8, false)
 #define __u8_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
-			nla_get_u8, NLA_PUT_U8)
+			nla_get_u8, NLA_PUT_U8, false)
 #define __u16_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
-			nla_get_u16, NLA_PUT_U16)
+			nla_get_u16, NLA_PUT_U16, false)
 #define __u32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
-			nla_get_u32, NLA_PUT_U32)
+			nla_get_u32, NLA_PUT_U32, false)
 #define __s32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
-			nla_get_u32, NLA_PUT_U32)
+			nla_get_u32, NLA_PUT_U32, true)
 #define __u64_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
-			nla_get_u64, NLA_PUT_U64)
+			nla_get_u64, NLA_PUT_U64, false)
 #define __str_field(attr_nr, attr_flag, name, maxlen) \
 	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
-			nla_strlcpy, NLA_PUT)
+			nla_strlcpy, NLA_PUT, false)
 #define __bin_field(attr_nr, attr_flag, name, maxlen) \
 	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
-			nla_memcpy, NLA_PUT)
+			nla_memcpy, NLA_PUT, false)
 
 /* fields with default values */
 #define __flg_field_def(attr_nr, attr_flag, name, default) \
@@ -174,11 +174,13 @@ enum {								\
 };
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __field(attr_nr, attr_flag, name, nla_type, type,	\
+		__get, __put, __is_signed)			\
 	T_ ## name = (__u16)(attr_nr | attr_flag),
 
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type,	\
+		maxlen, __get, __put, __is_signed)		\
 	T_ ## name = (__u16)(attr_nr | attr_flag),
 
 #include GENL_MAGIC_INCLUDE_FILE
@@ -238,11 +240,13 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
 }
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put)	\
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
 	case attr_nr:
 
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
 	case attr_nr:
 
 #include GENL_MAGIC_INCLUDE_FILE
@@ -260,16 +264,36 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
 struct s_name { s_fields };
 
 #undef __field
-#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
 	type name;
 
 #undef __array
-#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
 	type name[maxlen];	\
 	__u32 name ## _len;
 
 #include GENL_MAGIC_INCLUDE_FILE
 
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+enum {									\
+	s_fields							\
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		is_signed)						\
+	F_ ## name ## _IS_SIGNED = is_signed,
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, is_signed)				\
+	F_ ## name ## _IS_SIGNED = is_signed,
+
+#include GENL_MAGIC_INCLUDE_FILE
+
 /* }}}1 */
 #endif /* GENL_MAGIC_STRUCT_H */
 /* vim: set foldmethod=marker nofoldenable : */
-- 
cgit v1.2.3


From bbbef2d5ad8203f67274196dac90754ae106a463 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 18 May 2011 16:48:16 +0200
Subject: drbd: Remove unused GENLA_F_MAY_IGNORE flag

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/genl_magic_struct.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index ba911da84d9f..f3c3425ac30f 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -40,11 +40,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
  * yet implemented features, if newer userland tries to use them even though
  * the genl_family version clearly indicates they are not available.
  *
- * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure.
- * To be used for API extensions for things that have sane defaults,
- * so newer userland can still talk to older kernel, knowing it will
- * silently ignore these attributes if not yet known.
- *
  * NOTE: These flags overload
  *   NLA_F_NESTED		(1 << 15)
  *   NLA_F_NET_BYTEORDER	(1 << 14)
@@ -55,7 +50,6 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
  * See also: nla_type()
  */
 enum {
-	GENLA_F_MAY_IGNORE	= 0,
 	GENLA_F_MANDATORY	= 1 << 14,
 	GENLA_F_REQUIRED	= 1 << 15,
 
-- 
cgit v1.2.3


From 5f9359201b5cf1d94fe0e0c47fcba38cfc921863 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Thu, 19 May 2011 17:39:28 +0200
Subject: drbd: Make drbd's use of netlink attribute flags less confusing

Make it more clear in the flag names which flags are internal to drbd, and
which are not.

The check for mandatory attributes is the only extension visible at the netlink
layer.  Attributes with this flag set would look like unknown attributes to
some kernel versions.  The netlink layer would ignore them and also skip
consistency checks on the attribute type and legth.  To avoid this, we check
for mandatory attributes first, remove the mandatory flag, and then process the
attributes normally.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_genl.h         | 244 +++++++++++++++++++-------------------
 include/linux/genl_magic_func.h   | 107 ++++++++---------
 include/linux/genl_magic_struct.h |  64 ++++------
 3 files changed, 193 insertions(+), 222 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 7b174a093a8d..4ceecb9307d9 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -86,7 +86,7 @@
  */
 GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
 		/* "arbitrary" size strings, nla_policy.len = 0 */
-	__str_field(1, GENLA_F_MANDATORY,	info_text, 0)
+	__str_field(1, DRBD_GENLA_F_MANDATORY,	info_text, 0)
 )
 
 /* Configuration requests typically need a context to operate on.
@@ -95,133 +95,133 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
  * and/or the replication group (aka resource) name,
  * and the volume id within the resource. */
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
-	__u32_field(1, GENLA_F_MANDATORY,	ctx_volume)
-	__str_field(2, GENLA_F_MANDATORY,	ctx_conn_name, 128)
+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
+	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_conn_name, 128)
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
-	__str_field(1, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	backing_dev,	128)
-	__str_field(2, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev,	128)
-	__s32_field(3, GENLA_F_REQUIRED | GENLA_F_INVARIANT,	meta_dev_idx)
+	__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	backing_dev,	128)
+	__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev,	128)
+	__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev_idx)
 
 	/* use the resize command to try and change the disk_size */
-	__u64_field(4, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	disk_size)
+	__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	disk_size)
 	/* we could change the max_bio_bvecs,
 	 * but it won't propagate through the stack */
-	__u32_field(5, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	max_bio_bvecs)
-
-	__u32_field_def(6, GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
-	__u32_field_def(7, GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
-
-	__u32_field_def(8,	GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
-	__s32_field_def(9,	GENLA_F_MANDATORY,	resync_after, DRBD_RESYNC_AFTER_DEF)
-	__u32_field_def(10,	GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
-	__u32_field_def(11,	GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
-	__u32_field_def(12,	GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
-	__u32_field_def(13,	GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
-	__u32_field_def(14,	GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
-	__u32_field_def(15,	GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
-
-	__flg_field_def(16, GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
-	__flg_field_def(17, GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
-	__flg_field_def(18, GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
-	__flg_field_def(19, GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
+	__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	max_bio_bvecs)
+
+	__u32_field_def(6, DRBD_GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
+	__u32_field_def(7, DRBD_GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
+
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
+	__s32_field_def(9,	DRBD_GENLA_F_MANDATORY,	resync_after, DRBD_MINOR_NUMBER_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
+
+	__flg_field_def(16, DRBD_GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
+	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
+	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
+	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
-	__str_field_def(1,	GENLA_F_MANDATORY,	cpu_mask,       32)
-	__u32_field_def(2,	GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY,	cpu_mask,       32)
+	__u32_field_def(2,	DRBD_GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
-	__bin_field(1,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	my_addr,	128)
-	__bin_field(2,	GENLA_F_REQUIRED | GENLA_F_INVARIANT,	peer_addr,	128)
-	__str_field_def(3,	GENLA_F_MANDATORY | GENLA_F_SENSITIVE,
+	__bin_field(1,	DRBD_F_REQUIRED | DRBD_F_INVARIANT,	my_addr,	128)
+	__bin_field(2,	DRBD_F_REQUIRED | DRBD_F_INVARIANT,	peer_addr,	128)
+	__str_field_def(3,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field_def(4,	GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field_def(5,	GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field_def(6,	GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field_def(7,	GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__u32_field_def(8,	GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
-	__u32_field_def(9,	GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
-	__u32_field_def(10,	GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
-	__u32_field_def(11,	GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
-	__u32_field_def(12,	GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
-	__u32_field_def(13,	GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
-	__u32_field_def(14,	GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
-	__u32_field_def(15,	GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
-	__u32_field_def(16,	GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
-	__u32_field_def(17,	GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
-	__u32_field_def(18,	GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
-	__u32_field_def(19,	GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
-	__u32_field_def(20,	GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
-	__u32_field_def(21,	GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
-	__u32_field_def(22,	GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
-	__u32_field_def(23,	GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
-	__u32_field_def(24,	GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
-	__u32_field_def(25,	GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
-	__flg_field_def(26, GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
-	__flg_field(27, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	discard_my_data)
-	__flg_field_def(28, GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
-	__flg_field_def(29, GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
-	__flg_field(30, GENLA_F_MANDATORY | GENLA_F_INVARIANT,	dry_run)
-	__flg_field_def(31,	GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
+	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(6,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(7,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(24,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(25,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
+	__flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
+	__flg_field_def(28, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(29, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
+	__flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	dry_run)
+	__flg_field_def(31,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
-	__flg_field(1, GENLA_F_MANDATORY,	assume_uptodate)
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	assume_uptodate)
 )
 
 GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
-	__u64_field(1, GENLA_F_MANDATORY,	resize_size)
-	__flg_field(2, GENLA_F_MANDATORY,	resize_force)
-	__flg_field(3, GENLA_F_MANDATORY,	no_resync)
+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
+	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
+	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
 )
 
 GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
 	/* the reason of the broadcast,
 	 * if this is an event triggered broadcast. */
-	__u32_field(1, GENLA_F_MANDATORY,	sib_reason)
-	__u32_field(2, GENLA_F_REQUIRED,	current_state)
-	__u64_field(3, GENLA_F_MANDATORY,	capacity)
-	__u64_field(4, GENLA_F_MANDATORY,	ed_uuid)
+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	sib_reason)
+	__u32_field(2, DRBD_F_REQUIRED,	current_state)
+	__u64_field(3, DRBD_GENLA_F_MANDATORY,	capacity)
+	__u64_field(4, DRBD_GENLA_F_MANDATORY,	ed_uuid)
 
 	/* These are for broadcast from after state change work.
 	 * prev_state and new_state are from the moment the state change took
 	 * place, new_state is not neccessarily the same as current_state,
 	 * there may have been more state changes since.  Which will be
 	 * broadcasted soon, in their respective after state change work.  */
-	__u32_field(5, GENLA_F_MANDATORY,	prev_state)
-	__u32_field(6, GENLA_F_MANDATORY,	new_state)
+	__u32_field(5, DRBD_GENLA_F_MANDATORY,	prev_state)
+	__u32_field(6, DRBD_GENLA_F_MANDATORY,	new_state)
 
 	/* if we have a local disk: */
-	__bin_field(7, GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
-	__u32_field(8, GENLA_F_MANDATORY,	disk_flags)
-	__u64_field(9, GENLA_F_MANDATORY,	bits_total)
-	__u64_field(10, GENLA_F_MANDATORY,	bits_oos)
+	__bin_field(7, DRBD_GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
+	__u32_field(8, DRBD_GENLA_F_MANDATORY,	disk_flags)
+	__u64_field(9, DRBD_GENLA_F_MANDATORY,	bits_total)
+	__u64_field(10, DRBD_GENLA_F_MANDATORY,	bits_oos)
 	/* and in case resync or online verify is active */
-	__u64_field(11, GENLA_F_MANDATORY,	bits_rs_total)
-	__u64_field(12, GENLA_F_MANDATORY,	bits_rs_failed)
+	__u64_field(11, DRBD_GENLA_F_MANDATORY,	bits_rs_total)
+	__u64_field(12, DRBD_GENLA_F_MANDATORY,	bits_rs_failed)
 
 	/* for pre and post notifications of helper execution */
-	__str_field(13, GENLA_F_MANDATORY,	helper, 32)
-	__u32_field(14, GENLA_F_MANDATORY,	helper_exit_code)
+	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32)
+	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code)
 )
 
 GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
-	__u64_field(1, GENLA_F_MANDATORY,	ov_start_sector)
+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector)
 )
 
 GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
-	__flg_field(1, GENLA_F_MANDATORY, clear_bm)
+	__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
 )
 
 GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
-	__u32_field(1,	GENLA_F_REQUIRED,	timeout_type)
+	__u32_field(1,	DRBD_F_REQUIRED,	timeout_type)
 )
 
 GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
-	__flg_field(1, GENLA_F_MANDATORY,	force_disconnect)
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect)
 )
 
 /*
@@ -232,11 +232,11 @@ GENL_mc_group(events)
 	/* kernel -> userspace announcement of changes */
 GENL_notification(
 	DRBD_EVENT, 1, events,
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY)
-	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY)
-	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
 )
 
 	/* query kernel for specific or all info */
@@ -250,116 +250,116 @@ GENL_op(
 	),
 	/* To select the object .doit.
 	 * Or a subset of objects in .dumpit. */
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
 )
 
 #if 0
 	/* TO BE DONE */
 	/* create or destroy resources, aka replication groups */
 GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 #endif
 
 	/* add DRBD minor devices as volumes to resources */
 GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
 	/* add or delete replication links to resources */
 GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
 GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
 	GENL_doit(drbd_adm_resource_opts),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
 )
 
 GENL_op(
 	DRBD_ADM_CONNECT, 10,
 	GENL_doit(drbd_adm_connect),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
 )
 
 GENL_op(
 	DRBD_ADM_CHG_NET_OPTS, 29,
 	GENL_doit(drbd_adm_net_opts),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
 )
 
 GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
 GENL_op(DRBD_ADM_ATTACH, 12,
 	GENL_doit(drbd_adm_attach),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
 )
 
 GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
 	GENL_doit(drbd_adm_disk_opts),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_DISK_OPTS, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
 )
 
 GENL_op(
 	DRBD_ADM_RESIZE, 13,
 	GENL_doit(drbd_adm_resize),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
 )
 
 GENL_op(
 	DRBD_ADM_PRIMARY, 14,
 	GENL_doit(drbd_adm_set_role),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
 )
 
 GENL_op(
 	DRBD_ADM_SECONDARY, 15,
 	GENL_doit(drbd_adm_set_role),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
 )
 
 GENL_op(
 	DRBD_ADM_NEW_C_UUID, 16,
 	GENL_doit(drbd_adm_new_c_uuid),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)
-	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
 )
 
 GENL_op(
 	DRBD_ADM_START_OV, 17,
 	GENL_doit(drbd_adm_start_ov),
-	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
 )
 
 GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_PAUSE_SYNC,	21, GENL_doit(drbd_adm_pause_sync),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_RESUME_SYNC,	22, GENL_doit(drbd_adm_resume_sync),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_SUSPEND_IO,	23, GENL_doit(drbd_adm_suspend_io),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_RESUME_IO,	24, GENL_doit(drbd_adm_resume_io),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DOWN,		27, GENL_doit(drbd_adm_down),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 2ae16126c6a4..58edd403a3ff 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -3,53 +3,6 @@
 
 #include <linux/genl_magic_struct.h>
 
-/*
- * Extension of genl attribute validation policies			{{{1
- *									{{{2
- */
-
-/**
- * nla_is_required - return true if this attribute is required
- * @nla: netlink attribute
- */
-static inline int nla_is_required(const struct nlattr *nla)
-{
-        return nla->nla_type & GENLA_F_REQUIRED;
-}
-
-/**
- * nla_is_mandatory - return true if understanding this attribute is mandatory
- * @nla: netlink attribute
- * Note: REQUIRED attributes are implicitly MANDATORY as well
- */
-static inline int nla_is_mandatory(const struct nlattr *nla)
-{
-        return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED);
-}
-
-/* Functionality to be integrated into nla_parse(), and validate_nla(),
- * respectively.
- *
- * Enforcing the "mandatory" bit is done here,
- * by rejecting unknown mandatory attributes.
- *
- * Part of enforcing the "required" flag would mean to embed it into
- * nla_policy.type, and extending validate_nla(), which currently does
- * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels,
- * so we cannot do that.  Thats why enforcing "required" is done in the
- * generated assignment functions below. */
-static int nla_check_unknown(int maxtype, struct nlattr *head, int len)
-{
-	struct nlattr *nla;
-	int rem;
-        nla_for_each_attr(nla, head, len, rem) {
-		__u16 type = nla_type(nla);
-		if (type > maxtype && nla_is_mandatory(nla))
-			return -EOPNOTSUPP;
-	}
-	return 0;
-}
-
 /*
  * Magic: declare tla policy						{{{1
  * Magic: declare nested policies
@@ -80,13 +33,13 @@ static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
 #undef __field
 #define __field(attr_nr, attr_flag, name, nla_type, _type, __get,	\
 		 __put, __is_signed)					\
-	[__nla_type(attr_nr)] = { .type = nla_type },
+	[attr_nr] = { .type = nla_type },
 
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
 		__get, __put, __is_signed)				\
-	[__nla_type(attr_nr)] = { .type = nla_type,			\
-		.len = maxlen - (nla_type == NLA_NUL_STRING) },
+	[attr_nr] = { .type = nla_type,					\
+		      .len = maxlen - (nla_type == NLA_NUL_STRING) },
 
 #include GENL_MAGIC_INCLUDE_FILE
 
@@ -189,6 +142,43 @@ static struct nlattr *nested_attr_tb[128];
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 #endif
 
+static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
+{
+	struct nlattr *head = nla_data(nla);
+	int len = nla_len(nla);
+	int rem;
+
+	/*
+	 * validate_nla (called from nla_parse_nested) ignores attributes
+	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
+	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
+	 * flag set also, check and remove that flag before calling
+	 * nla_parse_nested.
+	 */
+
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			if (nla_type(nla) > maxtype)
+				return -EOPNOTSUPP;
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
+		}
+	}
+	return 0;
+}
+
+static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype,
+					struct nlattr *nla,
+					const struct nla_policy *policy)
+{
+	int err;
+
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (!err)
+		err = nla_parse_nested(tb, maxtype, nla, policy);
+
+	return err;
+}
+
 #undef GENL_struct
 #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
 /* *_from_attrs functions are static, but potentially unused */		\
@@ -204,12 +194,9 @@ static int __ ## s_name ## _from_attrs(struct s_name *s,		\
 	if (!tla)							\
 		return -ENOMSG;						\
 	DPRINT_TLA(#s_name, "<=-", #tag_name);				\
-	err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \
+	err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy);	\
 	if (err)							\
 		return err;						\
-	err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla));	\
-	if (err)							\
-	      return err;						\
 									\
 	s_fields							\
 	return 0;							\
@@ -226,17 +213,17 @@ static int s_name ## _from_attrs_for_change(struct s_name *s,		\
 }					__attribute__((unused))		\
 
 #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
-		nla = ntb[__nla_type(attr_nr)];				\
+		nla = ntb[attr_nr];						\
 		if (nla) {						\
-			if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) {		\
+			if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
 				pr_info("<< must not change invariant attr: %s\n", #name);	\
 				return -EEXIST;				\
 			}						\
 			assignment;					\
-		} else if (exclude_invariants && ((attr_flag) & GENLA_F_INVARIANT)) {		\
+		} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
 			/* attribute missing from payload, */		\
 			/* which was expected */			\
-		} else if ((attr_flag) & GENLA_F_REQUIRED) {		\
+		} else if ((attr_flag) & DRBD_F_REQUIRED) {		\
 			pr_info("<< missing attr: %s\n", #name);	\
 			return -ENOMSG;					\
 		}
@@ -415,7 +402,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 #undef __field
 #define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
 		__is_signed)						\
-	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
 		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
 		__put(skb, attr_nr, s->name);				\
 	}
@@ -423,7 +410,7 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
 		__get, __put, __is_signed)				\
-	if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) {	\
+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
 		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
 		__put(skb, attr_nr, min_t(int, maxlen,			\
 			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index f3c3425ac30f..1d0bd79e27b3 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -26,50 +26,34 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
  * Extension of genl attribute validation policies			{{{2
  */
 
-/**
- * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement
- *
- * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid.
- * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that.
+/*
+ * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
+ * know about.  This flag can be set in nlattr->nla_type to indicate that this
+ * attribute must not be ignored.
  *
- * @GENLA_F_MANDATORY: if present, receiver _must_ understand it.
- * Without this, unknown attributes (> maxtype) are _silently_ ignored
- * by validate_nla().
+ * We check and remove this flag in drbd_nla_check_mandatory() before
+ * validating the attribute types and lengths via nla_parse_nested().
+ */
+#define DRBD_GENLA_F_MANDATORY (1 << 14)
+
+/*
+ * Flags specific to drbd and not visible at the netlink layer, used in
+ * <struct>_from_attrs and <struct>_to_skb:
  *
- * To be used for API extensions, so older kernel can reject requests for not
- * yet implemented features, if newer userland tries to use them even though
- * the genl_family version clearly indicates they are not available.
+ * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
+ * invalid.
  *
- * NOTE: These flags overload
- *   NLA_F_NESTED		(1 << 15)
- *   NLA_F_NET_BYTEORDER	(1 << 14)
- * from linux/netlink.h, which are not useful for validate_nla():
- * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting
- * .type = NLA_NESTED in the appropriate policy.
+ * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
+ * included in unpriviledged get requests or broadcasts.
  *
- * See also: nla_type()
+ * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
+ * cannot subsequently be changed.
  */
-enum {
-	GENLA_F_MANDATORY	= 1 << 14,
-	GENLA_F_REQUIRED	= 1 << 15,
-
-	/* Below will not be present in the __u16 .nla_type, but can be
-	 * triggered on in <struct>_to_skb resp. <struct>_from_attrs */
-
-	/* To exclude "sensitive" information from broadcasts, or on
-	 * unpriviledged get requests.  This is useful because genetlink
-	 * multicast groups can be listened in on by anyone.  */
-	GENLA_F_SENSITIVE	= 1 << 16,
-
-	/* INVARIAN options cannot be changed at runtime.
-	 * Useful to share an attribute policy and struct definition,
-	 * between some "create" and "change" commands,
-	 * but disallow certain fields to be changed online.
-	 */
-	GENLA_F_INVARIANT	= 1 << 17,
-};
+#define DRBD_F_REQUIRED (1 << 0)
+#define DRBD_F_SENSITIVE (1 << 1)
+#define DRBD_F_INVARIANT (1 << 2)
 
-#define __nla_type(x)	((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK))
+#define __nla_type(x)	((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
 
 /*									}}}1
  * MAGIC
@@ -170,12 +154,12 @@ enum {								\
 #undef __field
 #define __field(attr_nr, attr_flag, name, nla_type, type,	\
 		__get, __put, __is_signed)			\
-	T_ ## name = (__u16)(attr_nr | attr_flag),
+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
 
 #undef __array
 #define __array(attr_nr, attr_flag, name, nla_type, type,	\
 		maxlen, __get, __put, __is_signed)		\
-	T_ ## name = (__u16)(attr_nr | attr_flag),
+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
 
 #include GENL_MAGIC_INCLUDE_FILE
 
-- 
cgit v1.2.3


From 5084d71d89e1a94193378efb12ac659e4e6ada3f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 24 May 2011 14:08:58 +0200
Subject: drbd: drbd_nla_check_mandatory(): Need to remove the
 DRBD_GENLA_F_MANDATORY flag first

We need to remove the flag before checking for valid types.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/genl_magic_func.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 58edd403a3ff..357f2ad403b1 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -158,9 +158,9 @@ static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
 
 	nla_for_each_attr(nla, head, len, rem) {
 		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
 			if (nla_type(nla) > maxtype)
 				return -EOPNOTSUPP;
-			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
 		}
 	}
 	return 0;
-- 
cgit v1.2.3


From 67b58bf723b083d4776cd7c9959246ef46c0d36f Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 6 Jun 2011 15:36:04 +0200
Subject: drbd: spelling fix: too small

It is not "to small", but "too small".

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 8 ++++----
 include/linux/drbd.h         | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 59923db780b9..31d27dd92924 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1360,7 +1360,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
 			(unsigned long long) drbd_get_max_capacity(nbc),
 			(unsigned long long) new_disk_conf->disk_size);
-		retcode = ERR_DISK_TO_SMALL;
+		retcode = ERR_DISK_TOO_SMALL;
 		goto fail;
 	}
 
@@ -1374,7 +1374,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
-		retcode = ERR_MD_DISK_TO_SMALL;
+		retcode = ERR_MD_DISK_TOO_SMALL;
 		dev_warn(DEV, "refusing attach: md-device too small, "
 		     "at least %llu sectors needed for this meta-disk type\n",
 		     (unsigned long long) min_md_device_sectors);
@@ -1385,7 +1385,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	 * (we may currently be R_PRIMARY with no local disk...) */
 	if (drbd_get_max_capacity(nbc) <
 	    drbd_get_capacity(mdev->this_bdev)) {
-		retcode = ERR_DISK_TO_SMALL;
+		retcode = ERR_DISK_TOO_SMALL;
 		goto fail;
 	}
 
@@ -1447,7 +1447,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
 	    drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
-		retcode = ERR_DISK_TO_SMALL;
+		retcode = ERR_DISK_TOO_SMALL;
 		goto force_diskless_dec;
 	}
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 679e81123229..fedda00374af 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -112,8 +112,8 @@ enum drbd_ret_code {
 	ERR_OPEN_MD_DISK	= 105,
 	ERR_DISK_NOT_BDEV	= 107,
 	ERR_MD_NOT_BDEV		= 108,
-	ERR_DISK_TO_SMALL	= 111,
-	ERR_MD_DISK_TO_SMALL	= 112,
+	ERR_DISK_TOO_SMALL	= 111,
+	ERR_MD_DISK_TOO_SMALL	= 112,
 	ERR_BDCLAIM_DISK	= 114,
 	ERR_BDCLAIM_MD_DISK	= 115,
 	ERR_MD_IDX_INVALID	= 116,
-- 
cgit v1.2.3


From 789c1b626cb490acb36cf481b45040b324f60fde Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Mon, 6 Jun 2011 16:16:44 +0200
Subject: drbd: Use the terminology suggested by the command names in the
 source code and messages

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 16 ++++++++--------
 include/linux/drbd.h         |  4 ++--
 include/linux/drbd_genl.h    | 17 ++++-------------
 3 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 31d27dd92924..5b4090f52f5a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -47,8 +47,8 @@
 int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
 
-int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
-int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
 
 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
@@ -2972,7 +2972,7 @@ drbd_check_conn_name(const char *name)
 	return NO_ERROR;
 }
 
-int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
 
@@ -2989,7 +2989,7 @@ int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
 	if (adm_ctx.tconn) {
 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
 			retcode = ERR_INVALID_REQUEST;
-			drbd_msg_put_info("connection exists");
+			drbd_msg_put_info("resource exists");
 		}
 		/* else: still NO_ERROR */
 		goto out;
@@ -3086,7 +3086,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 
 	if (!adm_ctx.tconn) {
-		retcode = ERR_CONN_NOT_KNOWN;
+		retcode = ERR_RES_NOT_KNOWN;
 		goto out;
 	}
 
@@ -3140,7 +3140,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		retcode = NO_ERROR;
 	} else {
 		/* "can not happen" */
-		retcode = ERR_CONN_IN_USE;
+		retcode = ERR_RES_IN_USE;
 		drbd_msg_put_info("failed to delete connection");
 	}
 	goto out;
@@ -3149,7 +3149,7 @@ out:
 	return 0;
 }
 
-int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
 
@@ -3166,7 +3166,7 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
 
 		retcode = NO_ERROR;
 	} else {
-		retcode = ERR_CONN_IN_USE;
+		retcode = ERR_RES_IN_USE;
 	}
 
 	if (retcode == NO_ERROR)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index fedda00374af..161cd414b036 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -155,8 +155,8 @@ enum drbd_ret_code {
 	ERR_CONG_NOT_PROTO_A	= 155,
 	ERR_PIC_AFTER_DEP	= 156,
 	ERR_PIC_PEER_DEP	= 157,
-	ERR_CONN_NOT_KNOWN      = 158,
-	ERR_CONN_IN_USE         = 159,
+	ERR_RES_NOT_KNOWN	= 158,
+	ERR_RES_IN_USE		= 159,
 	ERR_MINOR_CONFIGURED    = 160,
 	ERR_MINOR_EXISTS	= 161,
 	ERR_INVALID_REQUEST	= 162,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 4ceecb9307d9..47ef324b69db 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -253,25 +253,16 @@ GENL_op(
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
 )
 
-#if 0
-	/* TO BE DONE */
-	/* create or destroy resources, aka replication groups */
-GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
-GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
-#endif
-
 	/* add DRBD minor devices as volumes to resources */
-GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor),
+GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
-	/* add or delete replication links to resources */
-GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection),
+	/* add or delete resources */
+GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
-GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection),
+GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 
 GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
-- 
cgit v1.2.3


From 7c3063cc6f0e75cdf312f5f318f9a4c02e460397 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Thu, 9 Jun 2011 17:52:12 +0200
Subject: drbd: Also need to check for DRBD_GENLA_F_MANDATORY flags before
 nla_find_nested()

This is done by introducing drbd_nla_find_nested() which handles the flag
before calling nla_find_nested().

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h   |  6 +++
 drivers/block/drbd/drbd_nl.c    | 96 ++++++++++++++++++++++++++++++++---------
 include/linux/drbd_genl.h       |  2 +-
 include/linux/genl_magic_func.h | 37 ----------------
 4 files changed, 83 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c3019730a24f..c58430183d5f 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1407,6 +1407,12 @@ extern bool conn_try_outdate_peer(struct drbd_tconn *tconn);
 extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
+struct nla_policy;
+extern int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla);
+extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+				 const struct nla_policy *policy);
+extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
+
 /* drbd_worker.c */
 extern int drbd_worker(struct drbd_thread *thi);
 enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 5b4090f52f5a..24187f1c93d5 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -92,7 +92,7 @@ static struct drbd_config_context {
 #define VOLUME_UNSPECIFIED		(-1U)
 	/* pointer into the request skb,
 	 * limited lifetime! */
-	char *conn_name;
+	char *resource_name;
 
 	/* reply buffer */
 	struct sk_buff *reply_skb;
@@ -191,15 +191,15 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 		/* and assign stuff to the global adm_ctx */
 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
 		adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
-		nla = nested_attr_tb[__nla_type(T_ctx_conn_name)];
+		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 		if (nla)
-			adm_ctx.conn_name = nla_data(nla);
+			adm_ctx.resource_name = nla_data(nla);
 	} else
 		adm_ctx.volume = VOLUME_UNSPECIFIED;
 
 	adm_ctx.minor = d_in->minor;
 	adm_ctx.mdev = minor_to_mdev(d_in->minor);
-	adm_ctx.tconn = conn_get_by_name(adm_ctx.conn_name);
+	adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name);
 
 	if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
 		drbd_msg_put_info("unknown minor");
@@ -214,7 +214,8 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 	if (adm_ctx.mdev && adm_ctx.tconn &&
 	    adm_ctx.mdev->tconn != adm_ctx.tconn) {
 		pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n",
-				adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name);
+				adm_ctx.minor, adm_ctx.resource_name,
+				adm_ctx.mdev->tconn->name);
 		drbd_msg_put_info("minor exists in different connection");
 		return ERR_INVALID_REQUEST;
 	}
@@ -239,7 +240,7 @@ fail:
 static int drbd_adm_finish(struct genl_info *info, int retcode)
 {
 	struct nlattr *nla;
-	const char *conn_name = NULL;
+	const char *resource_name = NULL;
 
 	if (adm_ctx.tconn) {
 		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
@@ -253,9 +254,10 @@ static int drbd_adm_finish(struct genl_info *info, int retcode)
 
 	nla = info->attrs[DRBD_NLA_CFG_CONTEXT];
 	if (nla) {
-		nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
-		if (nla)
-			conn_name = nla_data(nla);
+		int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+		nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
+		if (nla && !IS_ERR(nla))
+			resource_name = nla_data(nla);
 	}
 
 	drbd_adm_send_reply(adm_ctx.reply_skb, info);
@@ -2526,7 +2528,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
 	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
 }
 
-int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr)
+int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr)
 {
 	struct nlattr *nla;
 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
@@ -2534,7 +2536,7 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigne
 		goto nla_put_failure;
 	if (vnr != VOLUME_UNSPECIFIED)
 		NLA_PUT_U32(skb, T_ctx_volume, vnr);
-	NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name);
+	NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name);
 	nla_nest_end(skb, nla);
 	return 0;
 
@@ -2778,8 +2780,9 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
 	struct nlattr *nla;
-	const char *conn_name;
+	const char *resource_name;
 	struct drbd_tconn *tconn;
+	int maxtype;
 
 	/* Is this a followup call? */
 	if (cb->args[0]) {
@@ -2799,12 +2802,15 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 	/* No explicit context given.  Dump all. */
 	if (!nla)
 		goto dump;
-	nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
+	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
+	if (IS_ERR(nla))
+		return PTR_ERR(nla);
 	/* context given, but no name present? */
 	if (!nla)
 		return -EINVAL;
-	conn_name = nla_data(nla);
-	tconn = conn_get_by_name(conn_name);
+	resource_name = nla_data(nla);
+	tconn = conn_get_by_name(resource_name);
 
 	if (!tconn)
 		return -ENODEV;
@@ -2957,16 +2963,16 @@ out_nolock:
 }
 
 static enum drbd_ret_code
-drbd_check_conn_name(const char *name)
+drbd_check_resource_name(const char *name)
 {
 	if (!name || !name[0]) {
-		drbd_msg_put_info("connection name missing");
+		drbd_msg_put_info("resource name missing");
 		return ERR_MANDATORY_TAG;
 	}
 	/* if we want to use these in sysfs/configfs/debugfs some day,
 	 * we must not allow slashes */
 	if (strchr(name, '/')) {
-		drbd_msg_put_info("invalid connection name");
+		drbd_msg_put_info("invalid resource name");
 		return ERR_INVALID_REQUEST;
 	}
 	return NO_ERROR;
@@ -2982,7 +2988,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	retcode = drbd_check_conn_name(adm_ctx.conn_name);
+	retcode = drbd_check_resource_name(adm_ctx.resource_name);
 	if (retcode != NO_ERROR)
 		goto out;
 
@@ -2995,7 +3001,7 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	if (!conn_create(adm_ctx.conn_name))
+	if (!conn_create(adm_ctx.resource_name))
 		retcode = ERR_NOMEM;
 out:
 	drbd_adm_finish(info, retcode);
@@ -3213,3 +3219,53 @@ failed:
 			"Event seq:%u sib_reason:%u\n",
 			err, seq, sib->sib_reason);
 }
+
+int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
+{
+	struct nlattr *head = nla_data(nla);
+	int len = nla_len(nla);
+	int rem;
+
+	/*
+	 * validate_nla (called from nla_parse_nested) ignores attributes
+	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
+	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
+	 * flag set also, check and remove that flag before calling
+	 * nla_parse_nested.
+	 */
+
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
+			if (nla_type(nla) > maxtype)
+				return -EOPNOTSUPP;
+		}
+	}
+	return 0;
+}
+
+int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+			  const struct nla_policy *policy)
+{
+	int err;
+
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (!err)
+		err = nla_parse_nested(tb, maxtype, nla, policy);
+
+	return err;
+}
+
+struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
+{
+	int err;
+	/*
+	 * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
+	 * we don't know about that attribute, reject all the nested
+	 * attributes.
+	 */
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (err)
+		return ERR_PTR(err);
+	return nla_find_nested(nla, attrtype);
+}
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 47ef324b69db..0c2102c05384 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -96,7 +96,7 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
  * and the volume id within the resource. */
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
 	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
-	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_conn_name, 128)
+	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128)
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 357f2ad403b1..0b8a88e2e83e 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -142,43 +142,6 @@ static struct nlattr *nested_attr_tb[128];
 #define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 #endif
 
-static inline int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
-{
-	struct nlattr *head = nla_data(nla);
-	int len = nla_len(nla);
-	int rem;
-
-	/*
-	 * validate_nla (called from nla_parse_nested) ignores attributes
-	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
-	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
-	 * flag set also, check and remove that flag before calling
-	 * nla_parse_nested.
-	 */
-
-	nla_for_each_attr(nla, head, len, rem) {
-		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
-			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
-			if (nla_type(nla) > maxtype)
-				return -EOPNOTSUPP;
-		}
-	}
-	return 0;
-}
-
-static inline int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype,
-					struct nlattr *nla,
-					const struct nla_policy *policy)
-{
-	int err;
-
-	err = drbd_nla_check_mandatory(maxtype, nla);
-	if (!err)
-		err = nla_parse_nested(tb, maxtype, nla, policy);
-
-	return err;
-}
-
 #undef GENL_struct
 #define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
 /* *_from_attrs functions are static, but potentially unused */		\
-- 
cgit v1.2.3


From 089c075d88ac9407b8d7c5c8fc4b21c0d940bd82 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 14 Jun 2011 18:28:09 +0200
Subject: drbd: Convert the generic netlink interface to accept connection
 endpoints

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |   7 ++
 drivers/block/drbd/drbd_main.c     |  21 +++++
 drivers/block/drbd/drbd_nl.c       | 158 ++++++++++++++++++++++---------------
 drivers/block/drbd/drbd_receiver.c |  21 +++--
 drivers/block/drbd/drbd_state.c    |   2 +
 include/linux/drbd_genl.h          |  62 +++++++--------
 6 files changed, 164 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index c3019730a24f..6d6d1056d824 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -836,6 +836,11 @@ struct drbd_tconn {			/* is a resource from the config file */
 	wait_queue_head_t ping_wait;	/* Woken upon reception of a ping, and a state change */
 	struct res_opts res_opts;
 
+	struct sockaddr_storage my_addr;
+	int my_addr_len;
+	struct sockaddr_storage peer_addr;
+	int peer_addr_len;
+
 	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
 	struct drbd_socket meta;	/* ping/ack (metadata) packets */
 	int agreed_pro_version;		/* actually used protocol version */
@@ -1377,6 +1382,8 @@ extern void drbd_minor_destroy(struct kref *kref);
 struct drbd_tconn *conn_create(const char *name);
 extern void conn_destroy(struct kref *kref);
 struct drbd_tconn *conn_get_by_name(const char *name);
+extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+					    void *peer_addr, int peer_addr_len);
 extern void conn_free_crypto(struct drbd_tconn *tconn);
 
 extern int proc_details;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 178c711bc4af..79f275dc43a4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2420,6 +2420,27 @@ found:
 	return tconn;
 }
 
+struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+				     void *peer_addr, int peer_addr_len)
+{
+	struct drbd_tconn *tconn;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
+		if (tconn->my_addr_len == my_addr_len &&
+		    tconn->peer_addr_len == peer_addr_len &&
+		    !memcmp(&tconn->my_addr, my_addr, my_addr_len) &&
+		    !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) {
+			kref_get(&tconn->kref);
+			goto found;
+		}
+	}
+	tconn = NULL;
+found:
+	rcu_read_unlock();
+	return tconn;
+}
+
 static int drbd_alloc_socket(struct drbd_socket *socket)
 {
 	socket->rbuf = (void *) __get_free_page(GFP_KERNEL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 352be132b4be..e7933e04e7b8 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -94,6 +94,8 @@ static struct drbd_config_context {
 	/* pointer into the request skb,
 	 * limited lifetime! */
 	char *resource_name;
+	struct nlattr *my_addr;
+	struct nlattr *peer_addr;
 
 	/* reply buffer */
 	struct sk_buff *reply_skb;
@@ -142,6 +144,7 @@ int drbd_msg_put_info(const char *info)
  */
 #define DRBD_ADM_NEED_MINOR	1
 #define DRBD_ADM_NEED_RESOURCE	2
+#define DRBD_ADM_NEED_CONNECTION 4
 static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 		unsigned flags)
 {
@@ -174,6 +177,7 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 	adm_ctx.reply_dh->minor = d_in->minor;
 	adm_ctx.reply_dh->ret_code = NO_ERROR;
 
+	adm_ctx.volume = VOLUME_UNSPECIFIED;
 	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
 		struct nlattr *nla;
 		/* parse and validate only */
@@ -191,12 +195,21 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 
 		/* and assign stuff to the global adm_ctx */
 		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
-		adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
+		if (nla)
+			adm_ctx.volume = nla_get_u32(nla);
 		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
 		if (nla)
 			adm_ctx.resource_name = nla_data(nla);
-	} else
-		adm_ctx.volume = VOLUME_UNSPECIFIED;
+		adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
+		adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
+		if ((adm_ctx.my_addr &&
+		     nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) ||
+		    (adm_ctx.peer_addr &&
+		     nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) {
+			err = -EINVAL;
+			goto fail;
+		}
+	}
 
 	adm_ctx.minor = d_in->minor;
 	adm_ctx.mdev = minor_to_mdev(d_in->minor);
@@ -211,6 +224,26 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
 		return ERR_INVALID_REQUEST;
 	}
 
+	if (flags & DRBD_ADM_NEED_CONNECTION) {
+		if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) {
+			drbd_msg_put_info("no resource name expected");
+			return ERR_INVALID_REQUEST;
+		}
+		if (adm_ctx.mdev) {
+			drbd_msg_put_info("no minor number expected");
+			return ERR_INVALID_REQUEST;
+		}
+		if (adm_ctx.my_addr && adm_ctx.peer_addr)
+			adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
+							  nla_len(adm_ctx.my_addr),
+							  nla_data(adm_ctx.peer_addr),
+							  nla_len(adm_ctx.peer_addr));
+		if (!adm_ctx.tconn) {
+			drbd_msg_put_info("unknown connection");
+			return ERR_INVALID_REQUEST;
+		}
+	}
+
 	/* some more paranoia, if the request was over-determined */
 	if (adm_ctx.mdev && adm_ctx.tconn &&
 	    adm_ctx.mdev->tconn != adm_ctx.tconn) {
@@ -268,30 +301,28 @@ static int drbd_adm_finish(struct genl_info *info, int retcode)
 static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
 {
 	char *afs;
-	struct net_conf *nc;
 
-	rcu_read_lock();
-	nc = rcu_dereference(tconn->net_conf);
-	if (nc) {
-		switch (((struct sockaddr *)nc->peer_addr)->sa_family) {
-		case AF_INET6:
-			afs = "ipv6";
-			snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
-				 &((struct sockaddr_in6 *)nc->peer_addr)->sin6_addr);
-			break;
-		case AF_INET:
-			afs = "ipv4";
-			snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
-				 &((struct sockaddr_in *)nc->peer_addr)->sin_addr);
-			break;
-		default:
-			afs = "ssocks";
-			snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
-				 &((struct sockaddr_in *)nc->peer_addr)->sin_addr);
-		}
-		snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
+	/* FIXME: A future version will not allow this case. */
+	if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0)
+		return;
+
+	switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) {
+	case AF_INET6:
+		afs = "ipv6";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
+			 &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr);
+		break;
+	case AF_INET:
+		afs = "ipv4";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+			 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
+		break;
+	default:
+		afs = "ssocks";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+			 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
 	}
-	rcu_read_unlock();
+	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
 }
 
 int drbd_khelper(struct drbd_conf *mdev, char *cmd)
@@ -1874,7 +1905,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 	int rsr; /* re-sync running */
 	struct crypto crypto = { };
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
 	if (!adm_ctx.reply_skb)
 		return retcode;
 	if (retcode != NO_ERROR)
@@ -1986,18 +2017,39 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	struct drbd_conf *mdev;
 	struct net_conf *old_conf, *new_conf = NULL;
 	struct crypto crypto = { };
-	struct drbd_tconn *oconn;
 	struct drbd_tconn *tconn;
-	struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
 	enum drbd_ret_code retcode;
 	int i;
 	int err;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+
 	if (!adm_ctx.reply_skb)
 		return retcode;
 	if (retcode != NO_ERROR)
 		goto out;
+	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
+		drbd_msg_put_info("connection endpoint(s) missing");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+
+	/* No need for _rcu here. All reconfiguration is
+	 * strictly serialized on genl_lock(). We are protected against
+	 * concurrent reconfiguration/addition/deletion */
+	list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
+		if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len &&
+		    !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) {
+			retcode = ERR_LOCAL_ADDR;
+			goto out;
+		}
+
+		if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len &&
+		    !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) {
+			retcode = ERR_PEER_ADDR;
+			goto out;
+		}
+	}
 
 	tconn = adm_ctx.tconn;
 	conn_reconfig_start(tconn);
@@ -2027,37 +2079,6 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto fail;
 
-	retcode = NO_ERROR;
-
-	new_my_addr = (struct sockaddr *)&new_conf->my_addr;
-	new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
-
-	/* No need for _rcu here. All reconfiguration is
-	 * strictly serialized on genl_lock(). We are protected against
-	 * concurrent reconfiguration/addition/deletion */
-	list_for_each_entry(oconn, &drbd_tconns, all_tconn) {
-		struct net_conf *nc;
-		if (oconn == tconn)
-			continue;
-
-		rcu_read_lock();
-		nc = rcu_dereference(oconn->net_conf);
-		if (nc) {
-			taken_addr = (struct sockaddr *)&nc->my_addr;
-			if (new_conf->my_addr_len == nc->my_addr_len &&
-			    !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
-				retcode = ERR_LOCAL_ADDR;
-
-			taken_addr = (struct sockaddr *)&nc->peer_addr;
-			if (new_conf->peer_addr_len == nc->peer_addr_len &&
-			    !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
-				retcode = ERR_PEER_ADDR;
-		}
-		rcu_read_unlock();
-		if (retcode != NO_ERROR)
-			goto fail;
-	}
-
 	retcode = alloc_crypto(&crypto, new_conf);
 	if (retcode != NO_ERROR)
 		goto fail;
@@ -2083,6 +2104,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 	tconn->csums_tfm = crypto.csums_tfm;
 	tconn->verify_tfm = crypto.verify_tfm;
 
+	tconn->my_addr_len = nla_len(adm_ctx.my_addr);
+	memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len);
+	tconn->peer_addr_len = nla_len(adm_ctx.peer_addr);
+	memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len);
+
 	mutex_unlock(&tconn->conf_update);
 
 	rcu_read_lock();
@@ -2170,7 +2196,7 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
 	enum drbd_ret_code retcode;
 	int err;
 
-	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
 	if (!adm_ctx.reply_skb)
 		return retcode;
 	if (retcode != NO_ERROR)
@@ -2529,7 +2555,7 @@ int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
 	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
 }
 
-int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, unsigned vnr)
+int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr)
 {
 	struct nlattr *nla;
 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
@@ -2537,7 +2563,11 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *resource_name, uns
 		goto nla_put_failure;
 	if (vnr != VOLUME_UNSPECIFIED)
 		NLA_PUT_U32(skb, T_ctx_volume, vnr);
-	NLA_PUT_STRING(skb, T_ctx_resource_name, resource_name);
+	NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name);
+	if (tconn->my_addr_len)
+		NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr);
+	if (tconn->peer_addr_len)
+		NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr);
 	nla_nest_end(skb, nla);
 	return 0;
 
@@ -2574,7 +2604,7 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 
 	/* We need to add connection name and volume number information still.
 	 * Minor number is in drbd_genlmsghdr. */
-	if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr))
+	if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr))
 		goto nla_put_failure;
 
 	if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
@@ -2736,7 +2766,7 @@ next_tconn:
 			/* this is a tconn without a single volume */
 			dh->minor = -1U;
 			dh->ret_code = NO_ERROR;
-			if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED))
+			if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED))
 				genlmsg_cancel(skb, dh);
 			else
 				genlmsg_end(skb, dh);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 4ba097293278..ab1d36cb6214 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -626,23 +626,21 @@ static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 		rcu_read_unlock();
 		return NULL;
 	}
-
 	sndbuf_size = nc->sndbuf_size;
 	rcvbuf_size = nc->rcvbuf_size;
 	connect_int = nc->connect_int;
+	rcu_read_unlock();
 
-	my_addr_len = min_t(int, nc->my_addr_len, sizeof(src_in6));
-	memcpy(&src_in6, nc->my_addr, my_addr_len);
+	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
+	memcpy(&src_in6, &tconn->my_addr, my_addr_len);
 
-	if (((struct sockaddr *)nc->my_addr)->sa_family == AF_INET6)
+	if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
 		src_in6.sin6_port = 0;
 	else
 		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
 
-	peer_addr_len = min_t(int, nc->peer_addr_len, sizeof(src_in6));
-	memcpy(&peer_in6, nc->peer_addr, peer_addr_len);
-
-	rcu_read_unlock();
+	peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
+	memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
 
 	what = "sock_create_kern";
 	err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
@@ -714,15 +712,14 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 		rcu_read_unlock();
 		return NULL;
 	}
-
 	sndbuf_size = nc->sndbuf_size;
 	rcvbuf_size = nc->rcvbuf_size;
 	connect_int = nc->connect_int;
-
-	my_addr_len = min_t(int, nc->my_addr_len, sizeof(struct sockaddr_in6));
-	memcpy(&my_addr, nc->my_addr, my_addr_len);
 	rcu_read_unlock();
 
+	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
+	memcpy(&my_addr, &tconn->my_addr, my_addr_len);
+
 	what = "sock_create_kern";
 	err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
 		SOCK_STREAM, IPPROTO_TCP, &s_listen);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index cd55f46d5c55..d978e4d98a15 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1418,6 +1418,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
 
 		mutex_lock(&tconn->conf_update);
 		old_conf = tconn->net_conf;
+		tconn->my_addr_len = 0;
+		tconn->peer_addr_len = 0;
 		rcu_assign_pointer(tconn->net_conf, NULL);
 		conn_free_crypto(tconn);
 		mutex_unlock(&tconn->conf_update);
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 0c2102c05384..b93db6c83882 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -97,6 +97,8 @@ GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
 GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
 	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
 	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128)
+	__bin_field(3, DRBD_GENLA_F_MANDATORY,	ctx_my_addr, 128)
+	__bin_field(4, DRBD_GENLA_F_MANDATORY,	ctx_peer_addr, 128)
 )
 
 GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
@@ -134,38 +136,36 @@ GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
 )
 
 GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
-	__bin_field(1,	DRBD_F_REQUIRED | DRBD_F_INVARIANT,	my_addr,	128)
-	__bin_field(2,	DRBD_F_REQUIRED | DRBD_F_INVARIANT,	peer_addr,	128)
-	__str_field_def(3,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
 						shared_secret,	SHARED_SECRET_MAX)
-	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
-	__str_field_def(6,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
-	__str_field_def(7,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
-	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
-	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
-	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
-	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
-	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
-	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
-	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
-	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
-	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
-	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
-	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
-	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
-	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
-	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
-	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
-	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
-	__u32_field_def(24,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
-	__u32_field_def(25,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
-	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
-	__flg_field(27, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
-	__flg_field_def(28, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
-	__flg_field_def(29, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
-	__flg_field(30, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	dry_run)
-	__flg_field_def(31,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
+	__str_field_def(2,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(3,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(6,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(7,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(24, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
+	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
+	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
+	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	dry_run)
+	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
-- 
cgit v1.2.3


From 6dff2902208364d058746ee794da4d960f6eec6f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 28 Jun 2011 14:18:12 +0200
Subject: drbd: Rename --dry-run to --tentative

drbdadm already has a --dry-run option, so this option cannot directly be
passed through to drbdsetup.  Rename the drbdsetup option to resolve this
conflict.

For backward compatibility, make --dry-run an alias of --tentative.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_main.c     | 4 ++--
 drivers/block/drbd/drbd_receiver.c | 6 +++---
 include/linux/drbd_genl.h          | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 933d4767c110..72b1dfa4b656 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -926,7 +926,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
 	rcu_read_lock();
 	nc = rcu_dereference(tconn->net_conf);
 
-	if (nc->dry_run && tconn->agreed_pro_version < 92) {
+	if (nc->tentative && tconn->agreed_pro_version < 92) {
 		rcu_read_unlock();
 		mutex_unlock(&sock->mutex);
 		conn_err(tconn, "--dry-run is not supported by peer");
@@ -945,7 +945,7 @@ int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
 	cf = 0;
 	if (nc->discard_my_data)
 		cf |= CF_DISCARD_MY_DATA;
-	if (nc->dry_run)
+	if (nc->tentative)
 		cf |= CF_DRY_RUN;
 	p->conn_flags    = cpu_to_be32(cf);
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index ab1d36cb6214..d55a3cb21c31 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2836,7 +2836,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
 	enum drbd_conns rv = C_MASK;
 	enum drbd_disk_state mydisk;
 	struct net_conf *nc;
-	int hg, rule_nr, rr_conflict, dry_run;
+	int hg, rule_nr, rr_conflict, tentative;
 
 	mydisk = mdev->state.disk;
 	if (mydisk == D_NEGOTIATING)
@@ -2916,7 +2916,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
 			     (hg < 0) ? "peer" : "this");
 	}
 	rr_conflict = nc->rr_conflict;
-	dry_run = nc->dry_run;
+	tentative = nc->tentative;
 	rcu_read_unlock();
 
 	if (hg == -100) {
@@ -2949,7 +2949,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
 		}
 	}
 
-	if (dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
+	if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
 		if (hg == 0)
 			dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
 		else
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index b93db6c83882..e879a9324380 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -164,7 +164,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
 	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
 	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
-	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	dry_run)
+	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
 	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
 )
 
-- 
cgit v1.2.3


From f03c254961cce65ee2b21c4beccb6975b6f9d308 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 20 Jun 2011 22:21:19 +0200
Subject: drbd: allow ping-timeout of up to 30 seconds

Allow up to 300 centi-seconds to be configured for the "ping timeout".
There may be setups where heavy congestion, huge buffers, and asymmetric
bandwidth limitations may need a "huge" ping-timeout as work-around
for "spurious connection loss" problems.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 3627f760966e..82db83410f08 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -62,7 +62,7 @@
 
  /* timeout for the ping packets.*/
 #define DRBD_PING_TIMEO_MIN  1
-#define DRBD_PING_TIMEO_MAX  100
+#define DRBD_PING_TIMEO_MAX  300
 #define DRBD_PING_TIMEO_DEF  5
 
   /* max number of write requests between write barriers */
-- 
cgit v1.2.3


From d942ae44537669418a7cbfd916531d30513dbca8 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 31 May 2011 13:07:24 +0200
Subject: drbd: Fixes from the 8.3 development branch

 * commit 'ae57a0a':
   drbd: Only print sanitize state's warnings, if the state change happens
   drbd: we should write meta data updates with FLUSH FUA
   drbd: fix limit define, we support 1 PiByte now
   drbd: fix log message argument order
   drbd: Typo in user-visible message.
   drbd: Make "(rcv|snd)buf-size" and "ping-timeout" available for the proxy, too.
   drbd: Allow keywords to be used in multiple config sections.
   drbd: fix typos in comments.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h   | 10 ++++----
 drivers/block/drbd/drbd_state.c | 56 ++++++++++++++++++++++++++++++-----------
 include/linux/drbd_limits.h     |  2 +-
 3 files changed, 47 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index de42c7cf7caf..1d71b3a3586a 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -372,11 +372,11 @@ struct p_connection_features {
 	u32 protocol_max;
 
 	/* should be more than enough for future enhancements
-	 * for now, feature_flags and the reserverd array shall be zero.
+	 * for now, feature_flags and the reserved array shall be zero.
 	 */
 
 	u32 _pad;
-	u64 reserverd[7];
+	u64 reserved[7];
 } __packed;
 
 struct p_barrier {
@@ -914,7 +914,7 @@ struct drbd_conf {
 	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
 	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
 	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
-	atomic_t unacked_cnt;	 /* Need to send replys for */
+	atomic_t unacked_cnt;	 /* Need to send replies for */
 	atomic_t local_cnt;	 /* Waiting for local completion */
 
 	/* Interval tree of pending local requests */
@@ -2153,7 +2153,7 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
 		/* disk state is stable as well. */
 		break;
 
-	/* no new io accepted during tansitional states */
+	/* no new io accepted during transitional states */
 	case D_ATTACHING:
 	case D_FAILED:
 	case D_NEGOTIATING:
@@ -2217,7 +2217,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev)
 	/* we wait here
 	 *    as long as the device is suspended
 	 *    until the bitmap is no longer on the fly during connection
-	 *    handshake as long as we would exeed the max_buffer limit.
+	 *    handshake as long as we would exceed the max_buffer limit.
 	 *
 	 * to avoid races with the reconnect code,
 	 * we need to atomic_inc within the spinlock. */
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 8c9d0348736d..2cf69b25f1e7 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -37,6 +37,15 @@ struct after_state_chg_work {
 	struct completion *done;
 };
 
+enum sanitize_state_warnings {
+	NO_WARNING,
+	ABORTED_ONLINE_VERIFY,
+	ABORTED_RESYNC,
+	CONNECTION_LOST_NEGOTIATING,
+	IMPLICITLY_UPGRADED_DISK,
+	IMPLICITLY_UPGRADED_PDSK,
+};
+
 static int w_after_state_ch(struct drbd_work *w, int unused);
 static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 			   union drbd_state ns, enum chg_state_flags flags);
@@ -44,7 +53,7 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
 static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state);
 static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
 static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
-				       const char **warn_sync_abort);
+				       enum sanitize_state_warnings *warn);
 
 static inline bool is_susp(union drbd_state s)
 {
@@ -656,6 +665,21 @@ is_valid_transition(union drbd_state os, union drbd_state ns)
 	return rv;
 }
 
+static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
+{
+	static const char *msg_table[] = {
+		[NO_WARNING] = "",
+		[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
+		[ABORTED_RESYNC] = "Resync aborted.",
+		[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
+		[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
+		[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
+	};
+
+	if (warn != NO_WARNING)
+		dev_warn(DEV, "%s\n", msg_table[warn]);
+}
+
 /**
  * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
  * @mdev:	DRBD device.
@@ -667,11 +691,14 @@ is_valid_transition(union drbd_state os, union drbd_state ns)
  * to D_UNKNOWN. This rule and many more along those lines are in this function.
  */
 static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
-				       const char **warn_sync_abort)
+				       enum sanitize_state_warnings *warn)
 {
 	enum drbd_fencing_p fp;
 	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
 
+	if (warn)
+		*warn = NO_WARNING;
+
 	fp = FP_DONT_CARE;
 	if (get_ldev(mdev)) {
 		rcu_read_lock();
@@ -695,10 +722,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 	/* An implication of the disk states onto the connection state */
 	/* Abort resync if a disk fails/detaches */
 	if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
-		if (warn_sync_abort)
-			*warn_sync_abort =
-				ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
-				"Online-verify" : "Resync";
+		if (warn)
+			*warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
+				ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
 		ns.conn = C_CONNECTED;
 	}
 
@@ -709,7 +735,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 			ns.disk = mdev->new_state_tmp.disk;
 			ns.pdsk = mdev->new_state_tmp.pdsk;
 		} else {
-			dev_alert(DEV, "Connection lost while negotiating, no data!\n");
+			if (warn)
+				*warn = CONNECTION_LOST_NEGOTIATING;
 			ns.disk = D_DISKLESS;
 			ns.pdsk = D_UNKNOWN;
 		}
@@ -791,16 +818,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
 		ns.disk = disk_max;
 
 	if (ns.disk < disk_min) {
-		dev_warn(DEV, "Implicitly set disk from %s to %s\n",
-			 drbd_disk_str(ns.disk), drbd_disk_str(disk_min));
+		if (warn)
+			*warn = IMPLICITLY_UPGRADED_DISK;
 		ns.disk = disk_min;
 	}
 	if (ns.pdsk > pdsk_max)
 		ns.pdsk = pdsk_max;
 
 	if (ns.pdsk < pdsk_min) {
-		dev_warn(DEV, "Implicitly set pdsk from %s to %s\n",
-			 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min));
+		if (warn)
+			*warn = IMPLICITLY_UPGRADED_PDSK;
 		ns.pdsk = pdsk_min;
 	}
 
@@ -875,12 +902,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 {
 	union drbd_state os;
 	enum drbd_state_rv rv = SS_SUCCESS;
-	const char *warn_sync_abort = NULL;
+	enum sanitize_state_warnings ssw;
 	struct after_state_chg_work *ascw;
 
 	os = drbd_read_state(mdev);
 
-	ns = sanitize_state(mdev, ns, &warn_sync_abort);
+	ns = sanitize_state(mdev, ns, &ssw);
 	if (ns.i == os.i)
 		return SS_NOTHING_TO_DO;
 
@@ -909,8 +936,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 		return rv;
 	}
 
-	if (warn_sync_abort)
-		dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
+	print_sanitize_warnings(mdev, ssw);
 
 	drbd_pr_state_change(mdev, os, ns, flags);
 
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 82db83410f08..f1046b13d9f6 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -126,7 +126,7 @@
  * is 1 PiB, currently. */
 /* DRBD_MAX_SECTORS */
 #define DRBD_DISK_SIZE_MIN  0
-#define DRBD_DISK_SIZE_MAX  (16 * (2LLU << 30))
+#define DRBD_DISK_SIZE_MAX  (1 * (2LLU << 40))
 #define DRBD_DISK_SIZE_DEF  0 /* = disabled = no user size... */
 #define DRBD_DISK_SIZE_SCALE 's'  /* sectors */
 
-- 
cgit v1.2.3


From cdfda633d235028e9b27381dedb65416409e8729 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 5 Jul 2011 15:38:59 +0200
Subject: drbd: detach from frozen backing device

* drbd-8.3:
  documentation: Documented detach's --force and disk's --disk-timeout
  drbd: Implemented the disk-timeout option
  drbd: Force flag for the detach operation
  drbd: Allow new IOs while the local disk in in FAILED state
  drbd: Bitmap IO functions can not return prematurely if the disk breaks
  drbd: Added a kref to bm_aio_ctx
  drbd: Hold a reference to ldev while doing meta-data IO
  drbd: Keep a reference to the bio until the completion handler finished
  drbd: Implemented wait_until_done_or_disk_failure()
  drbd: Replaced md_io_mutex by an atomic: md_io_in_use
  drbd: moved md_io into mdev
  drbd: Immediately allow completion of IOs, that wait for IO completions on a failed disk
  drbd: Keep a reference to barrier acked requests

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c   |  75 ++++++++++++++++++------
 drivers/block/drbd/drbd_bitmap.c   | 115 +++++++++++++++++++++++++++----------
 drivers/block/drbd/drbd_int.h      |  12 ++--
 drivers/block/drbd/drbd_main.c     |  77 ++++++++++++++++++++++---
 drivers/block/drbd/drbd_nl.c       |  28 ++++++++-
 drivers/block/drbd/drbd_receiver.c |   2 -
 drivers/block/drbd/drbd_req.c      |  52 +++++++++++------
 drivers/block/drbd/drbd_req.h      |  19 +++---
 drivers/block/drbd/drbd_state.c    |   7 +++
 drivers/block/drbd/drbd_worker.c   |   9 ++-
 include/linux/drbd_genl.h          |   9 ++-
 include/linux/drbd_limits.h        |   6 ++
 12 files changed, 321 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index aeb483daea06..58b5b61628fc 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -114,18 +114,44 @@ struct drbd_atodb_wait {
 
 static int w_al_write_transaction(struct drbd_work *, int);
 
+void *drbd_md_get_buffer(struct drbd_conf *mdev)
+{
+	int r;
+
+	wait_event(mdev->misc_wait,
+		   (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
+		   mdev->state.disk <= D_FAILED);
+
+	return r ? NULL : page_address(mdev->md_io_page);
+}
+
+void drbd_md_put_buffer(struct drbd_conf *mdev)
+{
+	if (atomic_dec_and_test(&mdev->md_io_in_use))
+		wake_up(&mdev->misc_wait);
+}
+
+static bool md_io_allowed(struct drbd_conf *mdev)
+{
+	enum drbd_disk_state ds = mdev->state.disk;
+	return ds >= D_NEGOTIATING || ds == D_ATTACHING;
+}
+
+void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done)
+{
+	wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev));
+}
+
 static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 				 struct drbd_backing_dev *bdev,
 				 struct page *page, sector_t sector,
 				 int rw, int size)
 {
 	struct bio *bio;
-	struct drbd_md_io md_io;
 	int err;
 
-	md_io.mdev = mdev;
-	init_completion(&md_io.event);
-	md_io.error = 0;
+	mdev->md_io.done = 0;
+	mdev->md_io.error = -ENODEV;
 
 	if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
 		rw |= REQ_FUA | REQ_FLUSH;
@@ -137,17 +163,25 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
 	err = -EIO;
 	if (bio_add_page(bio, page, size, 0) != size)
 		goto out;
-	bio->bi_private = &md_io;
+	bio->bi_private = &mdev->md_io;
 	bio->bi_end_io = drbd_md_io_complete;
 	bio->bi_rw = rw;
 
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* Corresponding put_ldev in drbd_md_io_complete() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	bio_get(bio); /* one bio_put() is in the completion handler */
+	atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
 	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
 		bio_endio(bio, -EIO);
 	else
 		submit_bio(rw, bio);
-	wait_for_completion(&md_io.event);
+	wait_until_done_or_disk_failure(mdev, &mdev->md_io.done);
 	if (bio_flagged(bio, BIO_UPTODATE))
-		err = md_io.error;
+		err = mdev->md_io.error;
 
  out:
 	bio_put(bio);
@@ -160,7 +194,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 	int err;
 	struct page *iop = mdev->md_io_page;
 
-	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
+	D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
 
 	BUG_ON(!bdev->md_bdev);
 
@@ -344,8 +378,14 @@ w_al_write_transaction(struct drbd_work *w, int unused)
 		return 0;
 	}
 
-	mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
-	buffer = page_address(mdev->md_io_page);
+	buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
+	if (!buffer) {
+		dev_err(DEV, "disk failed while waiting for md_io buffer\n");
+		aw->err = -EIO;
+		complete(&((struct update_al_work *)w)->event);
+		put_ldev(mdev);
+		return 1;
+	}
 
 	memset(buffer, 0, sizeof(*buffer));
 	buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
@@ -415,7 +455,7 @@ w_al_write_transaction(struct drbd_work *w, int unused)
 		mdev->al_tr_number++;
 	}
 
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
 	complete(&((struct update_al_work *)w)->event);
 	put_ldev(mdev);
 
@@ -506,8 +546,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	/* lock out all other meta data io for now,
 	 * and make sure the page is mapped.
 	 */
-	mutex_lock(&mdev->md_io_mutex);
-	b = page_address(mdev->md_io_page);
+	b = drbd_md_get_buffer(mdev);
+	if (!b)
+		return 0;
 
 	/* Always use the full ringbuffer space for now.
 	 * possible optimization: read in all of it,
@@ -528,7 +569,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
 		/* IO error */
 		if (rv == -1) {
-			mutex_unlock(&mdev->md_io_mutex);
+			drbd_md_put_buffer(mdev);
 			return 0;
 		}
 
@@ -558,7 +599,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	if (!found_valid) {
 		if (found_initialized != mx)
 			dev_warn(DEV, "No usable activity log found.\n");
-		mutex_unlock(&mdev->md_io_mutex);
+		drbd_md_put_buffer(mdev);
 		return 1;
 	}
 
@@ -573,7 +614,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		if (!expect(rv != 0))
 			goto cancel;
 		if (rv == -1) {
-			mutex_unlock(&mdev->md_io_mutex);
+			drbd_md_put_buffer(mdev);
 			return 0;
 		}
 
@@ -643,7 +684,7 @@ cancel:
 	mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
 
 	/* ok, we are done with it */
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
 
 	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
 	     transactions, active_extents);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 52c48143b22a..706e5220dd4a 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -918,13 +918,22 @@ void drbd_bm_clear_all(struct drbd_conf *mdev)
 struct bm_aio_ctx {
 	struct drbd_conf *mdev;
 	atomic_t in_flight;
-	struct completion done;
+	unsigned int done;
 	unsigned flags;
 #define BM_AIO_COPY_PAGES	1
 #define BM_AIO_WRITE_HINTED	2
 	int error;
+	struct kref kref;
 };
 
+static void bm_aio_ctx_destroy(struct kref *kref)
+{
+	struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
+
+	put_ldev(ctx->mdev);
+	kfree(ctx);
+}
+
 /* bv_page may be a copy, or may be the original */
 static void bm_async_io_complete(struct bio *bio, int error)
 {
@@ -968,13 +977,16 @@ static void bm_async_io_complete(struct bio *bio, int error)
 
 	bio_put(bio);
 
-	if (atomic_dec_and_test(&ctx->in_flight))
-		complete(&ctx->done);
+	if (atomic_dec_and_test(&ctx->in_flight)) {
+		ctx->done = 1;
+		wake_up(&mdev->misc_wait);
+		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+	}
 }
 
 static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
 {
-	struct bio *bio = bio_alloc_drbd(GFP_KERNEL);
+	struct bio *bio = bio_alloc_drbd(GFP_NOIO);
 	struct drbd_conf *mdev = ctx->mdev;
 	struct drbd_bitmap *b = mdev->bitmap;
 	struct page *page;
@@ -1032,12 +1044,7 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
  */
 static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
 {
-	struct bm_aio_ctx ctx = {
-		.mdev = mdev,
-		.in_flight = ATOMIC_INIT(1),
-		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
-		.flags = flags,
-	};
+	struct bm_aio_ctx *ctx;
 	struct drbd_bitmap *b = mdev->bitmap;
 	int num_pages, i, count = 0;
 	unsigned long now;
@@ -1052,7 +1059,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 	 * For lazy writeout, we don't care for ongoing changes to the bitmap,
 	 * as we submit copies of pages anyways.
 	 */
-	if (!ctx.flags)
+
+	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+	if (!ctx)
+		return -ENOMEM;
+
+	*ctx = (struct bm_aio_ctx) {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = 0,
+		.flags = flags,
+		.error = 0,
+		.kref = { ATOMIC_INIT(2) },
+	};
+
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (!ctx->flags)
 		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
 
 	num_pages = b->bm_number_of_pages;
@@ -1081,32 +1108,40 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 				continue;
 			}
 		}
-		atomic_inc(&ctx.in_flight);
-		bm_page_io_async(&ctx, i, rw);
+		atomic_inc(&ctx->in_flight);
+		bm_page_io_async(ctx, i, rw);
 		++count;
 		cond_resched();
 	}
 
 	/*
-	 * We initialize ctx.in_flight to one to make sure bm_async_io_complete
-	 * will not complete() early, and decrement / test it here.  If there
+	 * We initialize ctx->in_flight to one to make sure bm_async_io_complete
+	 * will not set ctx->done early, and decrement / test it here.  If there
 	 * are still some bios in flight, we need to wait for them here.
+	 * If all IO is done already (or nothing had been submitted), there is
+	 * no need to wait.  Still, we need to put the kref associated with the
+	 * "in_flight reached zero, all done" event.
 	 */
-	if (!atomic_dec_and_test(&ctx.in_flight))
-		wait_for_completion(&ctx.done);
+	if (!atomic_dec_and_test(&ctx->in_flight))
+		wait_until_done_or_disk_failure(mdev, &ctx->done);
+	else
+		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 
 	/* summary for global bitmap IO */
 	if (flags == 0)
 		dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
-				rw == WRITE ? "WRITE" : "READ",
-				count, jiffies - now);
+			 rw == WRITE ? "WRITE" : "READ",
+			 count, jiffies - now);
 
-	if (ctx.error) {
+	if (ctx->error) {
 		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
 		drbd_chk_io_error(mdev, 1, true);
-		err = -EIO; /* ctx.error ? */
+		err = -EIO; /* ctx->error ? */
 	}
 
+	if (atomic_read(&ctx->in_flight))
+		err = -EIO; /* Disk failed during IO... */
+
 	now = jiffies;
 	if (rw == WRITE) {
 		drbd_md_flush(mdev);
@@ -1121,6 +1156,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 		dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
 		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
 
+out:
+	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 	return err;
 }
 
@@ -1177,28 +1214,46 @@ int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
  */
 int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
 {
-	struct bm_aio_ctx ctx = {
+	struct bm_aio_ctx *ctx;
+	int err;
+
+	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
+		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
+		return 0;
+	}
+
+	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+	if (!ctx)
+		return -ENOMEM;
+
+	*ctx = (struct bm_aio_ctx) {
 		.mdev = mdev,
 		.in_flight = ATOMIC_INIT(1),
-		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+		.done = 0,
 		.flags = BM_AIO_COPY_PAGES,
+		.error = 0,
+		.kref = { ATOMIC_INIT(2) },
 	};
 
-	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
-		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
-		return 0;
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
+		err = -ENODEV;
+		goto out;
 	}
 
-	bm_page_io_async(&ctx, idx, WRITE_SYNC);
-	wait_for_completion(&ctx.done);
+	bm_page_io_async(ctx, idx, WRITE_SYNC);
+	wait_until_done_or_disk_failure(mdev, &ctx->done);
 
-	if (ctx.error)
+	if (ctx->error)
 		drbd_chk_io_error(mdev, 1, true);
 		/* that should force detach, so the in memory bitmap will be
 		 * gone in a moment as well. */
 
 	mdev->bm_writ_cnt++;
-	return ctx.error;
+	err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
+ out:
+	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+	return err;
 }
 
 /* NOTE
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 6035784f0de3..4e582058a7c9 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -780,8 +780,7 @@ struct drbd_backing_dev {
 };
 
 struct drbd_md_io {
-	struct drbd_conf *mdev;
-	struct completion event;
+	unsigned int done;
 	int error;
 };
 
@@ -852,6 +851,7 @@ struct drbd_tconn {			/* is a resource from the config file */
 	struct drbd_tl_epoch *newest_tle;
 	struct drbd_tl_epoch *oldest_tle;
 	struct list_head out_of_sequence_requests;
+	struct list_head barrier_acked_requests;
 
 	struct crypto_hash *cram_hmac_tfm;
 	struct crypto_hash *integrity_tfm;  /* checksums we compute, updates protected by tconn->data->mutex */
@@ -978,7 +978,8 @@ struct drbd_conf {
 	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 	wait_queue_head_t ee_wait;
 	struct page *md_io_page;	/* one page buffer for md_io */
-	struct mutex md_io_mutex;	/* protects the md_io_buffer */
+	struct drbd_md_io md_io;
+	atomic_t md_io_in_use;		/* protects the md_io, md_io_page and md_io_tmpp */
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
 	struct lru_cache *act_log;	/* activity log */
@@ -1424,9 +1425,12 @@ extern void resume_next_sg(struct drbd_conf *mdev);
 extern void suspend_other_sg(struct drbd_conf *mdev);
 extern int drbd_resync_finished(struct drbd_conf *mdev);
 /* maybe rather drbd_main.c ? */
+extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
+extern void drbd_md_put_buffer(struct drbd_conf *mdev);
 extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
 		struct drbd_backing_dev *bdev, sector_t sector, int rw);
 extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int);
+extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done);
 extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
 
 static inline void ov_out_of_sync_print(struct drbd_conf *mdev)
@@ -2151,12 +2155,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
 	case D_OUTDATED:
 	case D_CONSISTENT:
 	case D_UP_TO_DATE:
+	case D_FAILED:
 		/* disk state is stable as well. */
 		break;
 
 	/* no new io accepted during transitional states */
 	case D_ATTACHING:
-	case D_FAILED:
 	case D_NEGOTIATING:
 	case D_UNKNOWN:
 	case D_MASK:
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 448de7bf8223..15384986e4a4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -215,6 +215,7 @@ static int tl_init(struct drbd_tconn *tconn)
 	tconn->oldest_tle = b;
 	tconn->newest_tle = b;
 	INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
+	INIT_LIST_HEAD(&tconn->barrier_acked_requests);
 
 	return 1;
 }
@@ -315,7 +316,7 @@ void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
 	   These have been list_move'd to the out_of_sequence_requests list in
 	   _req_mod(, BARRIER_ACKED) above.
 	   */
-	list_del_init(&b->requests);
+	list_splice_init(&b->requests, &tconn->barrier_acked_requests);
 	mdev = b->w.mdev;
 
 	nob = b->next;
@@ -417,8 +418,23 @@ void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 		b = tmp;
 		list_splice(&carry_reads, &b->requests);
 	}
-}
 
+	/* Actions operating on the disk state, also want to work on
+	   requests that got barrier acked. */
+	switch (what) {
+	case FAIL_FROZEN_DISK_IO:
+	case RESTART_FROZEN_DISK_IO:
+		list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
+			req = list_entry(le, struct drbd_request, tl_requests);
+			_req_mod(req, what);
+		}
+	case CONNECTION_LOST_WHILE_PENDING:
+	case RESEND:
+		break;
+	default:
+		conn_err(tconn, "what = %d in _tl_restart()\n", what);
+	}
+}
 
 /**
  * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
@@ -467,6 +483,42 @@ void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 	spin_unlock_irq(&tconn->req_lock);
 }
 
+/**
+ * tl_apply() - Applies an event to all requests for a certain mdev in the TL
+ * @mdev:	DRBD device.
+ * @what:       The action/event to perform with all request objects
+ *
+ * @what might ony be ABORT_DISK_IO.
+ */
+void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what)
+{
+	struct drbd_tconn *tconn = mdev->tconn;
+	struct drbd_tl_epoch *b;
+	struct list_head *le, *tle;
+	struct drbd_request *req;
+
+	D_ASSERT(what == ABORT_DISK_IO);
+
+	spin_lock_irq(&tconn->req_lock);
+	b = tconn->oldest_tle;
+	while (b) {
+		list_for_each_safe(le, tle, &b->requests) {
+			req = list_entry(le, struct drbd_request, tl_requests);
+			if (req->w.mdev == mdev)
+				_req_mod(req, what);
+		}
+		b = b->next;
+	}
+
+	list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
+		req = list_entry(le, struct drbd_request, tl_requests);
+		if (req->w.mdev == mdev)
+			_req_mod(req, what);
+	}
+
+	spin_unlock_irq(&tconn->req_lock);
+}
+
 static int drbd_thread_setup(void *arg)
 {
 	struct drbd_thread *thi = (struct drbd_thread *) arg;
@@ -2003,8 +2055,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
 	atomic_set(&mdev->rs_sect_in, 0);
 	atomic_set(&mdev->rs_sect_ev, 0);
 	atomic_set(&mdev->ap_in_flight, 0);
+	atomic_set(&mdev->md_io_in_use, 0);
 
-	mutex_init(&mdev->md_io_mutex);
 	mutex_init(&mdev->own_state_mutex);
 	mdev->state_mutex = &mdev->own_state_mutex;
 
@@ -2282,6 +2334,8 @@ void drbd_minor_destroy(struct kref *kref)
 	struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref);
 	struct drbd_tconn *tconn = mdev->tconn;
 
+	del_timer_sync(&mdev->request_timer);
+
 	/* paranoia asserts */
 	D_ASSERT(mdev->open_cnt == 0);
 	D_ASSERT(list_empty(&mdev->tconn->data.work.q));
@@ -2868,8 +2922,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	if (!get_ldev_if_state(mdev, D_FAILED))
 		return;
 
-	mutex_lock(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
+
 	memset(buffer, 0, 512);
 
 	buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -2900,7 +2956,8 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	 * since we updated it on metadata. */
 	mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
 
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
+out:
 	put_ldev(mdev);
 }
 
@@ -2920,8 +2977,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	if (!get_ldev_if_state(mdev, D_ATTACHING))
 		return ERR_IO_MD_DISK;
 
-	mutex_lock(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
 
 	if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
 		/* NOTE: can't do normal error processing here as this is
@@ -2983,7 +3041,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF;
 
  err:
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
+ out:
 	put_ldev(mdev);
 
 	return rv;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 97d1dab045d2..bf8d0b077624 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1236,6 +1236,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	synchronize_rcu();
 	kfree(old_disk_conf);
 	kfree(old_plan);
+	mod_timer(&mdev->request_timer, jiffies + HZ);
 	goto success;
 
 fail_unlock:
@@ -1628,6 +1629,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	if (rv < SS_SUCCESS)
 		goto force_diskless_dec;
 
+	mod_timer(&mdev->request_timer, jiffies + HZ);
+
 	if (mdev->state.role == R_PRIMARY)
 		mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 	else
@@ -1667,10 +1670,17 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
-static int adm_detach(struct drbd_conf *mdev)
+static int adm_detach(struct drbd_conf *mdev, int force)
 {
 	enum drbd_state_rv retcode;
 	int ret;
+
+	if (force) {
+		drbd_force_state(mdev, NS(disk, D_FAILED));
+		retcode = SS_SUCCESS;
+		goto out;
+	}
+
 	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
 	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
 	/* D_FAILED will transition to DISKLESS. */
@@ -1681,6 +1691,7 @@ static int adm_detach(struct drbd_conf *mdev)
 		retcode = SS_NOTHING_TO_DO;
 	if (ret)
 		retcode = ERR_INTR;
+out:
 	return retcode;
 }
 
@@ -1692,6 +1703,8 @@ static int adm_detach(struct drbd_conf *mdev)
 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
 	enum drbd_ret_code retcode;
+	struct detach_parms parms = { };
+	int err;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
@@ -1699,7 +1712,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	retcode = adm_detach(adm_ctx.mdev);
+	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
+		err = detach_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+	}
+
+	retcode = adm_detach(adm_ctx.mdev, parms.force_detach);
 out:
 	drbd_adm_finish(info, retcode);
 	return 0;
@@ -3116,7 +3138,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 
 	/* detach */
 	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
-		retcode = adm_detach(mdev);
+		retcode = adm_detach(mdev, 0);
 		if (retcode < SS_SUCCESS) {
 			drbd_msg_put_info("failed to detach");
 			goto out;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7218750d2937..3a7e54b8f418 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4366,8 +4366,6 @@ static int drbd_disconnected(struct drbd_conf *mdev)
 	atomic_set(&mdev->rs_pending_cnt, 0);
 	wake_up(&mdev->misc_wait);
 
-	del_timer(&mdev->request_timer);
-
 	del_timer_sync(&mdev->resync_timer);
 	resync_timer_fn((unsigned long)mdev);
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index c4e4553f5c2c..8fa51cda3b7e 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -213,8 +213,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 {
 	const unsigned long s = req->rq_state;
 	struct drbd_conf *mdev = req->w.mdev;
-	/* only WRITES may end up here without a master bio (on barrier ack) */
-	int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
+	int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
 
 	/* we must not complete the master bio, while it is
 	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock)
@@ -225,7 +224,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 	 *	the receiver,
 	 *	the bio_endio completion callbacks.
 	 */
-	if (s & RQ_LOCAL_PENDING)
+	if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))
 		return;
 	if (req->i.waiting) {
 		/* Retry all conflicting peer requests.  */
@@ -288,6 +287,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 		req->master_bio = NULL;
 	}
 
+	if (s & RQ_LOCAL_PENDING)
+		return;
+
 	if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
 		/* this is disconnected (local only) operation,
 		 * or protocol C P_WRITE_ACK,
@@ -362,7 +364,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		break;
 
 	case COMPLETED_OK:
-		if (bio_data_dir(req->master_bio) == WRITE)
+		if (req->rq_state & RQ_WRITE)
 			mdev->writ_cnt += req->i.size >> 9;
 		else
 			mdev->read_cnt += req->i.size >> 9;
@@ -374,6 +376,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		put_ldev(mdev);
 		break;
 
+	case ABORT_DISK_IO:
+		req->rq_state |= RQ_LOCAL_ABORTED;
+		if (req->rq_state & RQ_WRITE)
+			_req_may_be_done_not_susp(req, m);
+		else
+			goto goto_queue_for_net_read;
+		break;
+
 	case WRITE_COMPLETED_WITH_ERROR:
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
@@ -402,6 +412,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		__drbd_chk_io_error(mdev, false);
 		put_ldev(mdev);
 
+	goto_queue_for_net_read:
+
 		/* no point in retrying if there is no good remote data,
 		 * or we have no connection. */
 		if (mdev->state.pdsk != D_UP_TO_DATE) {
@@ -1071,14 +1083,21 @@ void request_timer_fn(unsigned long data)
 	struct drbd_request *req; /* oldest request */
 	struct list_head *le;
 	struct net_conf *nc;
-	unsigned long et; /* effective timeout = ko_count * timeout */
+	unsigned long ent = 0, dt = 0, et; /* effective timeout = ko_count * timeout */
 
 	rcu_read_lock();
 	nc = rcu_dereference(tconn->net_conf);
-	et = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
+	ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
+
+	if (get_ldev(mdev)) {
+		dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
+		put_ldev(mdev);
+	}
 	rcu_read_unlock();
 
-	if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
+	et = min_not_zero(dt, ent);
+
+	if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
 		return; /* Recurring timer stopped */
 
 	spin_lock_irq(&tconn->req_lock);
@@ -1091,17 +1110,18 @@ void request_timer_fn(unsigned long data)
 
 	le = le->prev;
 	req = list_entry(le, struct drbd_request, tl_requests);
-	if (time_is_before_eq_jiffies(req->start_time + et)) {
-		if (req->rq_state & RQ_NET_PENDING) {
+	if (ent && req->rq_state & RQ_NET_PENDING) {
+		if (time_is_before_eq_jiffies(req->start_time + ent)) {
 			dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
-			_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
-		} else {
-			dev_warn(DEV, "Local backing block device frozen?\n");
-			mod_timer(&mdev->request_timer, jiffies + et);
+			_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
+		}
+	}
+	if (dt && req->rq_state & RQ_LOCAL_PENDING) {
+		if (time_is_before_eq_jiffies(req->start_time + dt)) {
+			dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
+			__drbd_chk_io_error(mdev, 1);
 		}
-	} else {
-		mod_timer(&mdev->request_timer, req->start_time + et);
 	}
-
 	spin_unlock_irq(&tconn->req_lock);
+	mod_timer(&mdev->request_timer, req->start_time + et);
 }
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 5135c95fbf85..f6aff150addb 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -106,6 +106,7 @@ enum drbd_req_event {
 	READ_COMPLETED_WITH_ERROR,
 	READ_AHEAD_COMPLETED_WITH_ERROR,
 	WRITE_COMPLETED_WITH_ERROR,
+	ABORT_DISK_IO,
 	COMPLETED_OK,
 	RESEND,
 	FAIL_FROZEN_DISK_IO,
@@ -119,18 +120,21 @@ enum drbd_req_event {
  * same time, so we should hold the request lock anyways.
  */
 enum drbd_req_state_bits {
-	/* 210
-	 * 000: no local possible
-	 * 001: to be submitted
+	/* 3210
+	 * 0000: no local possible
+	 * 0001: to be submitted
 	 *    UNUSED, we could map: 011: submitted, completion still pending
-	 * 110: completed ok
-	 * 010: completed with error
+	 * 0110: completed ok
+	 * 0010: completed with error
+	 * 1001: Aborted (before completion)
+	 * 1x10: Aborted and completed -> free
 	 */
 	__RQ_LOCAL_PENDING,
 	__RQ_LOCAL_COMPLETED,
 	__RQ_LOCAL_OK,
+	__RQ_LOCAL_ABORTED,
 
-	/* 76543
+	/* 87654
 	 * 00000: no network possible
 	 * 00001: to be send
 	 * 00011: to be send, on worker queue
@@ -209,8 +213,9 @@ enum drbd_req_state_bits {
 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
 #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
 #define RQ_LOCAL_OK        (1UL << __RQ_LOCAL_OK)
+#define RQ_LOCAL_ABORTED   (1UL << __RQ_LOCAL_ABORTED)
 
-#define RQ_LOCAL_MASK      ((RQ_LOCAL_OK << 1)-1) /* 0x07 */
+#define RQ_LOCAL_MASK      ((RQ_LOCAL_ABORTED << 1)-1)
 
 #define RQ_NET_PENDING     (1UL << __RQ_NET_PENDING)
 #define RQ_NET_QUEUED      (1UL << __RQ_NET_QUEUED)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 4c13a6f4f184..f51cefdbeff3 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -29,6 +29,9 @@
 #include "drbd_int.h"
 #include "drbd_req.h"
 
+/* in drbd_main.c */
+extern void tl_apply(struct drbd_conf *mdev, enum drbd_req_event what);
+
 struct after_state_chg_work {
 	struct drbd_work w;
 	union drbd_state os;
@@ -1315,6 +1318,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 		rcu_read_unlock();
 		was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
 
+		/* Immediately allow completion of all application IO, that waits
+		   for completion from the local disk. */
+		tl_apply(mdev, ABORT_DISK_IO);
+
 		/* current state still has to be D_FAILED,
 		 * there is only one way out: to D_DISKLESS,
 		 * and that may only happen after our put_ldev below. */
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 6410c55831e0..dac8d9bc4bec 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -67,11 +67,18 @@ rwlock_t global_state_lock;
 void drbd_md_io_complete(struct bio *bio, int error)
 {
 	struct drbd_md_io *md_io;
+	struct drbd_conf *mdev;
 
 	md_io = (struct drbd_md_io *)bio->bi_private;
+	mdev = container_of(md_io, struct drbd_conf, md_io);
+
 	md_io->error = error;
 
-	complete(&md_io->event);
+	md_io->done = 1;
+	wake_up(&mdev->misc_wait);
+	bio_put(bio);
+	drbd_md_put_buffer(mdev);
+	put_ldev(mdev);
 }
 
 /* reads on behalf of the partner,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index e879a9324380..2e6cefefe5e5 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -128,6 +128,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
 	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
 	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
@@ -224,6 +225,10 @@ GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
 	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect)
 )
 
+GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_detach)
+)
+
 /*
  * Notifications and commands (genlmsghdr->cmd)
  */
@@ -335,7 +340,9 @@ GENL_op(
 )
 
 GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
-	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
+
 GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
 	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
 GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index f1046b13d9f6..ddd332db2a5d 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -50,6 +50,12 @@
 #define DRBD_TIMEOUT_MAX 600
 #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
 
+ /* If backing disk takes longer than disk_timeout, mark the disk as failed */
+#define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */
+#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
+#define DRBD_DISK_TIMEOUT_DEF 0    /* disabled */
+#define DRBD_DISK_TIMEOUT_SCALE '1'
+
   /* active connection retries when C_WF_CONNECTION */
 #define DRBD_CONNECT_INT_MIN 1
 #define DRBD_CONNECT_INT_MAX 120
-- 
cgit v1.2.3


From d5d7ebd42250620a6da2a8f6943c024391433488 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 5 Jul 2011 20:59:26 +0200
Subject: drbd: on attach, enforce clean meta data

Detection of unclean shutdown has moved into user space.

The kernel code will, whenever it updates the meta data, mark it as
"unclean", and will refuse to attach to such unclean meta data.

"drbdadm up" now schedules "drbdmeta apply-al", which will apply
the activity log to the bitmap, and/or reinitialize it, if necessary,
as well as set a "clean" indicator flag.

This moves a bit code out of kernel space.
As a side effect, it also prevents some 8.3 module from accidentally
ignoring the 8.4 style activity log, if someone should downgrade,
whether on purpose, or accidentally because he changed kernel versions
without providing an 8.4 for the new kernel, and the new kernel comes
with in-tree 8.3.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c | 259 ---------------------------------------
 drivers/block/drbd/drbd_int.h    |   6 -
 drivers/block/drbd/drbd_main.c   |  26 ++--
 drivers/block/drbd/drbd_nl.c     |  19 +--
 drivers/block/drbd/drbd_state.c  |   1 +
 include/linux/drbd.h             |  10 +-
 6 files changed, 28 insertions(+), 293 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 58b5b61628fc..da8ffd54fc18 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -462,265 +462,6 @@ w_al_write_transaction(struct drbd_work *w, int unused)
 	return 0;
 }
 
-/* FIXME
- * reading of the activity log,
- * and potentially dirtying of the affected bitmap regions,
- * should be done from userland only.
- * DRBD would simply always attach with an empty activity log,
- * and refuse to attach to something that looks like a crashed primary.
- */
-
-/**
- * drbd_al_read_tr() - Read a single transaction from the on disk activity log
- * @mdev:	DRBD device.
- * @bdev:	Block device to read form.
- * @b:		pointer to an al_transaction.
- * @index:	On disk slot of the transaction to read.
- *
- * Returns -1 on IO error, 0 on checksum error and 1 upon success.
- */
-static int drbd_al_read_tr(struct drbd_conf *mdev,
-			   struct drbd_backing_dev *bdev,
-			   int index)
-{
-	struct al_transaction_on_disk *b = page_address(mdev->md_io_page);
-	sector_t sector;
-	u32 crc;
-
-	sector =  bdev->md.md_offset
-		+ bdev->md.al_offset
-		+ index * (MD_BLOCK_SIZE>>9);
-
-	/* Dont process error normally,
-	 * as this is done before disk is attached! */
-	if (drbd_md_sync_page_io(mdev, bdev, sector, READ))
-		return -1;
-
-	if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC)))
-		return 0;
-
-	if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION))
-		return 0;
-
-	if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX))
-		return 0;
-
-	if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX))
-		return 0;
-
-	crc = be32_to_cpu(b->crc32c);
-	b->crc32c = 0;
-	if (!expect(crc == crc32c(0, b, 4096)))
-		return 0;
-
-	return 1;
-}
-
-/**
- * drbd_al_read_log() - Restores the activity log from its on disk representation.
- * @mdev:	DRBD device.
- * @bdev:	Block device to read form.
- *
- * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
- */
-int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
-{
-	struct al_transaction_on_disk *b;
-	int i;
-	int rv;
-	int mx;
-	int active_extents = 0;
-	int transactions = 0;
-	int found_valid = 0;
-	int found_initialized = 0;
-	int from = 0;
-	int to = 0;
-	u32 from_tnr = 0;
-	u32 to_tnr = 0;
-	u32 cnr;
-
-	/* Note that this is expected to be called with a newly created,
-	 * clean and all unused activity log of the "expected size".
-	 */
-
-	/* lock out all other meta data io for now,
-	 * and make sure the page is mapped.
-	 */
-	b = drbd_md_get_buffer(mdev);
-	if (!b)
-		return 0;
-
-	/* Always use the full ringbuffer space for now.
-	 * possible optimization: read in all of it,
-	 * then scan the in-memory pages. */
-
-	mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
-
-	/* Find the valid transaction in the log */
-	for (i = 0; i < mx; i++) {
-		rv = drbd_al_read_tr(mdev, bdev, i);
-		/* invalid data in that block */
-		if (rv == 0)
-			continue;
-		if (be16_to_cpu(b->transaction_type) == AL_TR_INITIALIZED) {
-			++found_initialized;
-			continue;
-		}
-
-		/* IO error */
-		if (rv == -1) {
-			drbd_md_put_buffer(mdev);
-			return 0;
-		}
-
-		cnr = be32_to_cpu(b->tr_number);
-		if (++found_valid == 1) {
-			from = i;
-			to = i;
-			from_tnr = cnr;
-			to_tnr = cnr;
-			continue;
-		}
-
-		D_ASSERT(cnr != to_tnr);
-		D_ASSERT(cnr != from_tnr);
-		if ((int)cnr - (int)from_tnr < 0) {
-			D_ASSERT(from_tnr - cnr + i - from == mx);
-			from = i;
-			from_tnr = cnr;
-		}
-		if ((int)cnr - (int)to_tnr > 0) {
-			D_ASSERT(cnr - to_tnr == i - to);
-			to = i;
-			to_tnr = cnr;
-		}
-	}
-
-	if (!found_valid) {
-		if (found_initialized != mx)
-			dev_warn(DEV, "No usable activity log found.\n");
-		drbd_md_put_buffer(mdev);
-		return 1;
-	}
-
-	/* Read the valid transactions.
-	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
-	i = from;
-	while (1) {
-		struct lc_element *e;
-		unsigned j, n, slot, extent_nr;
-
-		rv = drbd_al_read_tr(mdev, bdev, i);
-		if (!expect(rv != 0))
-			goto cancel;
-		if (rv == -1) {
-			drbd_md_put_buffer(mdev);
-			return 0;
-		}
-
-		/* deal with different transaction types.
-		 * not yet implemented */
-		if (!expect(b->transaction_type == 0))
-			goto cancel;
-
-		/* on the fly re-create/resize activity log?
-		 * will be a special transaction type flag. */
-		if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements))
-			goto cancel;
-		if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements))
-			goto cancel;
-
-		/* We are the only user of the activity log right now,
-		 * don't actually need to take that lock. */
-		spin_lock_irq(&mdev->al_lock);
-
-		/* first, apply the context, ... */
-		for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr);
-		     j < AL_CONTEXT_PER_TRANSACTION &&
-		     slot < mdev->act_log->nr_elements; j++, slot++) {
-			extent_nr = be32_to_cpu(b->context[j]);
-			e = lc_element_by_index(mdev->act_log, slot);
-			if (e->lc_number != extent_nr) {
-				if (extent_nr != LC_FREE)
-					active_extents++;
-				else
-					active_extents--;
-			}
-			lc_set(mdev->act_log, extent_nr, slot);
-		}
-
-		/* ... then apply the updates,
-		 * which override the context information.
-		 * drbd_al_read_tr already did the rangecheck
-		 * on n <= AL_UPDATES_PER_TRANSACTION */
-		n = be16_to_cpu(b->n_updates);
-		for (j = 0; j < n; j++) {
-			slot = be16_to_cpu(b->update_slot_nr[j]);
-			extent_nr = be32_to_cpu(b->update_extent_nr[j]);
-			if (!expect(slot < mdev->act_log->nr_elements))
-				break;
-			e = lc_element_by_index(mdev->act_log, slot);
-			if (e->lc_number != extent_nr) {
-				if (extent_nr != LC_FREE)
-					active_extents++;
-				else
-					active_extents--;
-			}
-			lc_set(mdev->act_log, extent_nr, slot);
-		}
-		spin_unlock_irq(&mdev->al_lock);
-
-		transactions++;
-
-cancel:
-		if (i == to)
-			break;
-		i++;
-		if (i >= mx)
-			i = 0;
-	}
-
-	mdev->al_tr_number = to_tnr+1;
-	mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
-
-	/* ok, we are done with it */
-	drbd_md_put_buffer(mdev);
-
-	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
-	     transactions, active_extents);
-
-	return 1;
-}
-
-/**
- * drbd_al_apply_to_bm() - Sets the bitmap to dirty(1) where covered by active AL extents
- * @mdev:	DRBD device.
- */
-void drbd_al_apply_to_bm(struct drbd_conf *mdev)
-{
-	unsigned int enr;
-	unsigned long add = 0;
-	char ppb[10];
-	int i, tmp;
-
-	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-
-	for (i = 0; i < mdev->act_log->nr_elements; i++) {
-		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
-		if (enr == LC_FREE)
-			continue;
-		tmp = drbd_bm_ALe_set_all(mdev, enr);
-		dynamic_dev_dbg(DEV, "AL: set %d bits in extent %u\n", tmp, enr);
-		add += tmp;
-	}
-
-	lc_unlock(mdev->act_log);
-	wake_up(&mdev->al_wait);
-
-	dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n",
-	     ppsize(ppb, Bit2KB(add)));
-}
-
 static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
 {
 	int rv;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4e582058a7c9..9d0d6d0fb820 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -164,10 +164,6 @@ drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
 /* usual integer division */
 #define div_floor(A, B) ((A)/(B))
 
-/* drbd_meta-data.c (still in drbd_main.c) */
-/* 4th incarnation of the disk layout. */
-#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
-
 extern struct ratelimit_state drbd_ratelimit_state;
 extern struct idr minors; /* RCU, updates: genl_lock() */
 extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */
@@ -1560,7 +1556,6 @@ extern void drbd_rs_cancel_all(struct drbd_conf *mdev);
 extern int drbd_rs_del_all(struct drbd_conf *mdev);
 extern void drbd_rs_failed_io(struct drbd_conf *mdev,
 		sector_t sector, int size);
-extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
 extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
 extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
@@ -1570,7 +1565,6 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
-extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
 extern void drbd_al_shrink(struct drbd_conf *mdev);
 
 /* drbd_nl.c */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 15384986e4a4..f1d696ab6e83 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2932,7 +2932,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
 	buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
-	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
+	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN);
 
 	buffer->md_size_sect  = cpu_to_be32(mdev->ldev->md.md_size_sect);
 	buffer->al_offset     = cpu_to_be32(mdev->ldev->md.al_offset);
@@ -2967,11 +2967,12 @@ out:
  * @bdev:	Device from which the meta data should be read in.
  *
  * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
- * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
+ * something goes wrong.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
 	struct meta_data_on_disk *buffer;
+	u32 magic, flags;
 	int i, rv = NO_ERROR;
 
 	if (!get_ldev_if_state(mdev, D_ATTACHING))
@@ -2989,8 +2990,20 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		goto err;
 	}
 
-	if (buffer->magic != cpu_to_be32(DRBD_MD_MAGIC)) {
-		dev_err(DEV, "Error while reading metadata, magic not found.\n");
+	magic = be32_to_cpu(buffer->magic);
+	flags = be32_to_cpu(buffer->flags);
+	if (magic == DRBD_MD_MAGIC_84_UNCLEAN ||
+	    (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) {
+			/* btw: that's Activity Log clean, not "all" clean. */
+		dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n");
+		rv = ERR_MD_UNCLEAN;
+		goto err;
+	}
+	if (magic != DRBD_MD_MAGIC_08) {
+		if (magic == DRBD_MD_MAGIC_07) 
+			dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
+		else
+			dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
 		rv = ERR_MD_INVALID;
 		goto err;
 	}
@@ -3035,11 +3048,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	}
 	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	/* This blocks wants to be get removed... */
-	bdev->disk_conf->al_extents = be32_to_cpu(buffer->al_nr_extents);
-	if (bdev->disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
-		bdev->disk_conf->al_extents = DRBD_AL_EXTENTS_DEF;
-
  err:
 	drbd_md_put_buffer(mdev);
  out:
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index bf8d0b077624..b39f5dc0f47b 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1267,7 +1267,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	union drbd_state ns, os;
 	enum drbd_state_rv rv;
 	struct net_conf *nc;
-	int cp_discovered = 0;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
@@ -1477,11 +1476,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto force_diskless_dec;
 	}
 
-	if (!drbd_al_read_log(mdev, nbc)) {
-		retcode = ERR_IO_MD_DISK;
-		goto force_diskless_dec;
-	}
-
 	/* Reset the "barriers don't work" bits here, then force meta data to
 	 * be written, to ensure we determine if barriers are supported. */
 	if (new_disk_conf->md_flushes)
@@ -1511,10 +1505,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
-	    !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod)) {
+	    !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod))
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
-		cp_discovered = 1;
-	}
 
 	mdev->send_cnt = 0;
 	mdev->recv_cnt = 0;
@@ -1566,15 +1558,6 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	if (cp_discovered) {
-		drbd_al_apply_to_bm(mdev);
-		if (drbd_bitmap_io(mdev, &drbd_bm_write,
-			"crashed primary apply AL", BM_LOCKED_MASK)) {
-			retcode = ERR_IO_MD_DISK;
-			goto force_diskless_dec;
-		}
-	}
-
 	if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
 		drbd_suspend_al(mdev); /* IO is still suspended here... */
 
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index f51cefdbeff3..c4d0d96d7906 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1017,6 +1017,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
 						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
 
+		mdf &= ~MDF_AL_CLEAN;
 		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
 			mdf |= MDF_CRASHED_PRIMARY;
 		if (mdev->state.role == R_PRIMARY ||
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 161cd414b036..1e9f754b66ac 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -162,6 +162,7 @@ enum drbd_ret_code {
 	ERR_INVALID_REQUEST	= 162,
 	ERR_NEED_APV_100	= 163,
 	ERR_NEED_ALLOW_TWO_PRI  = 164,
+	ERR_MD_UNCLEAN          = 165,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -321,7 +322,8 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
 #define MDF_FULL_SYNC		(1 << 3)
 #define MDF_WAS_UP_TO_DATE	(1 << 4)
 #define MDF_PEER_OUT_DATED	(1 << 5)
-#define MDF_CRASHED_PRIMARY     (1 << 6)
+#define MDF_CRASHED_PRIMARY	(1 << 6)
+#define MDF_AL_CLEAN		(1 << 7)
 
 enum drbd_uuid_index {
 	UI_CURRENT,
@@ -341,10 +343,16 @@ enum drbd_timeout_flag {
 
 #define UUID_JUST_CREATED ((__u64)4)
 
+/* magic numbers used in meta data and network packets */
 #define DRBD_MAGIC 0x83740267
 #define DRBD_MAGIC_BIG 0x835a
 #define DRBD_MAGIC_100 0x8620ec20
 
+#define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3)
+#define DRBD_MD_MAGIC_08   (DRBD_MAGIC+4)
+#define DRBD_MD_MAGIC_84_UNCLEAN	(DRBD_MAGIC+5)
+
+
 /* how I came up with this magic?
  * base64 decode "actlog==" ;) */
 #define DRBD_AL_MAGIC 0x69cb65a2
-- 
cgit v1.2.3


From 65d94927e036cd8e8e1406fa7fc387b4ae730159 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 13 Jul 2011 10:24:51 +0200
Subject: drbd: Changed some defaults

* Enabled the resync controller, with a fill target of 50Kib. That gives
  reasonable resync speeds without tuning. A much better default than
  the 250KiB/s fixed.

* Enable bitmap compression. It is save to use, and most people have
  more CPU power than network bandwidth.

* ko-count of 7: Abort a connection if the peer fails to process a
  write request within 42 seconds.

* al-extents of 1237: ~5 GiB seems to be a much more sane default
  these days.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index ddd332db2a5d..defdebfecb72 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -16,7 +16,7 @@
 #define DEBUG_RANGE_CHECK 0
 
 #define DRBD_MINOR_COUNT_MIN 1
-#define DRBD_MINOR_COUNT_MAX 256
+#define DRBD_MINOR_COUNT_MAX (1U << 20)
 #define DRBD_MINOR_COUNT_DEF 32
 
 #define DRBD_VOLUME_MAX 65535
@@ -99,7 +99,7 @@
    * 200 should be more than enough even for very short timeouts */
 #define DRBD_KO_COUNT_MIN  0
 #define DRBD_KO_COUNT_MAX  200
-#define DRBD_KO_COUNT_DEF  0
+#define DRBD_KO_COUNT_DEF  7
 /* } */
 
 /* syncer { */
@@ -117,7 +117,7 @@
    * 919 * 7 = 6433 */
 #define DRBD_AL_EXTENTS_MIN  7
 #define DRBD_AL_EXTENTS_MAX  6433
-#define DRBD_AL_EXTENTS_DEF  127
+#define DRBD_AL_EXTENTS_DEF  1237
 
 #define DRBD_MINOR_NUMBER_MIN  -1
 #define DRBD_MINOR_NUMBER_MAX  (1<<30)
@@ -151,7 +151,7 @@
 
 #define DRBD_C_PLAN_AHEAD_MIN  0
 #define DRBD_C_PLAN_AHEAD_MAX  300
-#define DRBD_C_PLAN_AHEAD_DEF  0 /* RS rate controller disabled by default */
+#define DRBD_C_PLAN_AHEAD_DEF  20
 
 #define DRBD_C_DELAY_TARGET_MIN 1
 #define DRBD_C_DELAY_TARGET_MAX 100
@@ -159,7 +159,7 @@
 
 #define DRBD_C_FILL_TARGET_MIN 0
 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
-#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
+#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
 
 #define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
 #define DRBD_C_MAX_RATE_MAX     (4 << 20)
@@ -167,7 +167,7 @@
 
 #define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
 #define DRBD_C_MIN_RATE_MAX     (4 << 20)
-#define DRBD_C_MIN_RATE_DEF     4096
+#define DRBD_C_MIN_RATE_DEF     250
 
 #define DRBD_CONG_FILL_MIN	0
 #define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
@@ -187,6 +187,6 @@
 
 #define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
 #define DRBD_ALWAYS_ASBP_DEF	0
-#define DRBD_USE_RLE_DEF	0
+#define DRBD_USE_RLE_DEF	1
 
 #endif
-- 
cgit v1.2.3


From 32bdb64038ba3127245912dae2cc8a450bb1d705 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Mon, 9 May 2011 18:26:20 +0200
Subject: drbd: Define scale factors in a single place

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index defdebfecb72..cd3565cfed44 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -18,29 +18,35 @@
 #define DRBD_MINOR_COUNT_MIN 1
 #define DRBD_MINOR_COUNT_MAX (1U << 20)
 #define DRBD_MINOR_COUNT_DEF 32
+#define DRBD_MINOR_COUNT_SCALE '1'
 
 #define DRBD_VOLUME_MAX 65535
 
 #define DRBD_DIALOG_REFRESH_MIN 0
 #define DRBD_DIALOG_REFRESH_MAX 600
+#define DRBD_DIALOG_REFRESH_SCALE '1'
 
 /* valid port number */
 #define DRBD_PORT_MIN 1
 #define DRBD_PORT_MAX 0xffff
+#define DRBD_PORT_SCALE '1'
 
 /* startup { */
   /* if you want more than 3.4 days, disable */
 #define DRBD_WFC_TIMEOUT_MIN 0
 #define DRBD_WFC_TIMEOUT_MAX 300000
 #define DRBD_WFC_TIMEOUT_DEF 0
+#define DRBD_WFC_TIMEOUT_SCALE '1'
 
 #define DRBD_DEGR_WFC_TIMEOUT_MIN 0
 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0
+#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
 
 #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
+#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
 /* }*/
 
 /* net { */
@@ -49,6 +55,7 @@
 #define DRBD_TIMEOUT_MIN 1
 #define DRBD_TIMEOUT_MAX 600
 #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
+#define DRBD_TIMEOUT_SCALE '1'
 
  /* If backing disk takes longer than disk_timeout, mark the disk as failed */
 #define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */
@@ -60,46 +67,55 @@
 #define DRBD_CONNECT_INT_MIN 1
 #define DRBD_CONNECT_INT_MAX 120
 #define DRBD_CONNECT_INT_DEF 10   /* seconds */
+#define DRBD_CONNECT_INT_SCALE '1'
 
   /* keep-alive probes when idle */
 #define DRBD_PING_INT_MIN 1
 #define DRBD_PING_INT_MAX 120
 #define DRBD_PING_INT_DEF 10
+#define DRBD_PING_INT_SCALE '1'
 
  /* timeout for the ping packets.*/
 #define DRBD_PING_TIMEO_MIN  1
 #define DRBD_PING_TIMEO_MAX  300
 #define DRBD_PING_TIMEO_DEF  5
+#define DRBD_PING_TIMEO_SCALE '1'
 
   /* max number of write requests between write barriers */
 #define DRBD_MAX_EPOCH_SIZE_MIN 1
 #define DRBD_MAX_EPOCH_SIZE_MAX 20000
 #define DRBD_MAX_EPOCH_SIZE_DEF 2048
+#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
 
   /* I don't think that a tcp send buffer of more than 10M is useful */
 #define DRBD_SNDBUF_SIZE_MIN  0
 #define DRBD_SNDBUF_SIZE_MAX  (10<<20)
 #define DRBD_SNDBUF_SIZE_DEF  0
+#define DRBD_SNDBUF_SIZE_SCALE '1'
 
 #define DRBD_RCVBUF_SIZE_MIN  0
 #define DRBD_RCVBUF_SIZE_MAX  (10<<20)
 #define DRBD_RCVBUF_SIZE_DEF  0
+#define DRBD_RCVBUF_SIZE_SCALE '1'
 
   /* @4k PageSize -> 128kB - 512MB */
 #define DRBD_MAX_BUFFERS_MIN  32
 #define DRBD_MAX_BUFFERS_MAX  131072
 #define DRBD_MAX_BUFFERS_DEF  2048
+#define DRBD_MAX_BUFFERS_SCALE '1'
 
   /* @4k PageSize -> 4kB - 512MB */
 #define DRBD_UNPLUG_WATERMARK_MIN  1
 #define DRBD_UNPLUG_WATERMARK_MAX  131072
 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
+#define DRBD_UNPLUG_WATERMARK_SCALE '1'
 
   /* 0 is disabled.
    * 200 should be more than enough even for very short timeouts */
 #define DRBD_KO_COUNT_MIN  0
 #define DRBD_KO_COUNT_MAX  200
 #define DRBD_KO_COUNT_DEF  7
+#define DRBD_KO_COUNT_SCALE '1'
 /* } */
 
 /* syncer { */
@@ -118,6 +134,7 @@
 #define DRBD_AL_EXTENTS_MIN  7
 #define DRBD_AL_EXTENTS_MAX  6433
 #define DRBD_AL_EXTENTS_DEF  1237
+#define DRBD_AL_EXTENTS_SCALE '1'
 
 #define DRBD_MINOR_NUMBER_MIN  -1
 #define DRBD_MINOR_NUMBER_MAX  (1<<30)
@@ -148,34 +165,42 @@
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
 #define DRBD_MAX_BIO_BVECS_DEF 0
+#define DRBD_MAX_BIO_BVECS_SCALE '1'
 
 #define DRBD_C_PLAN_AHEAD_MIN  0
 #define DRBD_C_PLAN_AHEAD_MAX  300
 #define DRBD_C_PLAN_AHEAD_DEF  20
+#define DRBD_C_PLAN_AHEAD_SCALE '1'
 
 #define DRBD_C_DELAY_TARGET_MIN 1
 #define DRBD_C_DELAY_TARGET_MAX 100
 #define DRBD_C_DELAY_TARGET_DEF 10
+#define DRBD_C_DELAY_TARGET_SCALE '1'
 
 #define DRBD_C_FILL_TARGET_MIN 0
 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
 #define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
+#define DRBD_C_FILL_TARGET_SCALE 's'  /* sectors */
 
-#define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
+#define DRBD_C_MAX_RATE_MIN     250
 #define DRBD_C_MAX_RATE_MAX     (4 << 20)
 #define DRBD_C_MAX_RATE_DEF     102400
+#define DRBD_C_MAX_RATE_SCALE	'k'  /* kilobytes */
 
-#define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
+#define DRBD_C_MIN_RATE_MIN     0
 #define DRBD_C_MIN_RATE_MAX     (4 << 20)
 #define DRBD_C_MIN_RATE_DEF     250
+#define DRBD_C_MIN_RATE_SCALE	'k'  /* kilobytes */
 
 #define DRBD_CONG_FILL_MIN	0
 #define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
 #define DRBD_CONG_FILL_DEF	0
+#define DRBD_CONG_FILL_SCALE	's'  /* sectors */
 
 #define DRBD_CONG_EXTENTS_MIN	DRBD_AL_EXTENTS_MIN
 #define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
 #define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
+#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
 
 #define DRBD_PROTOCOL_DEF DRBD_PROT_C
 
-- 
cgit v1.2.3


From 0317d9ecbc9bac43642b4aa70e3e1106f4fd26a1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 13 Jul 2011 13:40:30 +0200
Subject: drbd: Fix the maximum accepted minor device number

The maximum minor device number allowed by the kernel is (1<<20 - 1).  Reject
device numbers higher than that to earlier catch possible errors.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index cd3565cfed44..7d956e91ae7b 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -137,7 +137,7 @@
 #define DRBD_AL_EXTENTS_SCALE '1'
 
 #define DRBD_MINOR_NUMBER_MIN  -1
-#define DRBD_MINOR_NUMBER_MAX  (1<<30)
+#define DRBD_MINOR_NUMBER_MAX  ((1 << 20) - 1)
 #define DRBD_MINOR_NUMBER_DEF  -1
 #define DRBD_MINOR_NUMBER_SCALE '1'
 
-- 
cgit v1.2.3


From b80c043327ea4faac62a329a1d35f16c47a5128e Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Mon, 18 Jul 2011 11:09:17 +0200
Subject: drbd: The minor_count module parameter is only a hint nowadays

* The max of minor_count is 255
* In drbdadm count the number of minors, instead of finding
  the highest minor number
* No longer us the magic in the init script

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd_limits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 7d956e91ae7b..6d0a24331ed2 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -16,7 +16,7 @@
 #define DEBUG_RANGE_CHECK 0
 
 #define DRBD_MINOR_COUNT_MIN 1
-#define DRBD_MINOR_COUNT_MAX (1U << 20)
+#define DRBD_MINOR_COUNT_MAX 255
 #define DRBD_MINOR_COUNT_DEF 32
 #define DRBD_MINOR_COUNT_SCALE '1'
 
-- 
cgit v1.2.3


From 380207d08e7c4d1b19c0323777278992b4fbf9d6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 11 Nov 2011 12:31:20 +0100
Subject: drbd: Load balancing of read requests

New config option for the disk secition "read-balancing", with
the values: prefer-local, prefer-remote, round-robin, when-congested-remote.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  1 +
 drivers/block/drbd/drbd_receiver.c |  2 +-
 drivers/block/drbd/drbd_req.c      | 57 +++++++++++++++++++++++++++++++++++++-
 include/linux/drbd.h               |  8 ++++++
 include/linux/drbd_genl.h          |  1 +
 include/linux/drbd_limits.h        |  1 +
 6 files changed, 68 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d397681fb7aa..e2cccb40f5af 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -698,6 +698,7 @@ enum {
 	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
 	B_RS_H_DONE,		/* Before resync handler done (already executed) */
 	DISCARD_MY_DATA,	/* discard_my_data flag per volume */
+	READ_BALANCE_RR,
 };
 
 struct drbd_bitmap; /* opaque for drbd_conf */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e546dd3fab8a..733b8bd663d5 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -4974,7 +4974,7 @@ static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
-	dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
+	dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
 
 	return validate_req_change_req_state(mdev, p->block_id, sector,
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index ceb04a94aace..98251e2a7fb7 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -563,6 +563,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
 			atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
 
+		if (!(req->rq_state & RQ_WRITE) &&
+		    mdev->state.disk == D_UP_TO_DATE &&
+		    !IS_ERR_OR_NULL(req->private_bio))
+			goto goto_read_retry_local;
+
 		/* if it is still queued, we may not complete it here.
 		 * it will be canceled soon. */
 		if (!(req->rq_state & RQ_NET_QUEUED))
@@ -625,10 +630,22 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
 
 		req->rq_state |= RQ_NET_DONE;
+
+		if (!(req->rq_state & RQ_WRITE) &&
+		    mdev->state.disk == D_UP_TO_DATE &&
+		    !IS_ERR_OR_NULL(req->private_bio))
+			goto goto_read_retry_local;
+
 		_req_may_be_done_not_susp(req, m);
 		/* else: done by HANDED_OVER_TO_NETWORK */
 		break;
 
+	goto_read_retry_local:
+		req->rq_state |= RQ_LOCAL_PENDING;
+		req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+		generic_make_request(req->private_bio);
+		break;
+
 	case FAIL_FROZEN_DISK_IO:
 		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
 			break;
@@ -689,6 +706,11 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		dec_ap_pending(mdev);
 		req->rq_state &= ~RQ_NET_PENDING;
 		req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
+		if (!IS_ERR_OR_NULL(req->private_bio)) {
+			bio_put(req->private_bio);
+			req->private_bio = NULL;
+			put_ldev(mdev);
+		}
 		_req_may_be_done_not_susp(req, m);
 		break;
 	};
@@ -723,6 +745,35 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int
 	return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
 }
 
+static bool remote_due_to_read_balancing(struct drbd_conf *mdev)
+{
+	enum drbd_read_balancing rbm;
+	struct backing_dev_info *bdi;
+
+	if (mdev->state.pdsk < D_UP_TO_DATE)
+		return false;
+
+	rcu_read_lock();
+	rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing;
+	rcu_read_unlock();
+
+	switch (rbm) {
+	case RB_CONGESTED_REMOTE:
+		bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
+		return bdi_read_congested(bdi);
+	case RB_LEAST_PENDING:
+		return atomic_read(&mdev->local_cnt) >
+			atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt);
+	case RB_ROUND_ROBIN:
+		return test_and_change_bit(READ_BALANCE_RR, &mdev->flags);
+	case RB_PREFER_REMOTE:
+		return true;
+	case RB_PREFER_LOCAL:
+	default:
+		return false;
+	}
+}
+
 /*
  * complete_conflicting_writes  -  wait for any conflicting write requests
  *
@@ -790,6 +841,10 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s
 				bio_put(req->private_bio);
 				req->private_bio = NULL;
 				put_ldev(mdev);
+			} else if (remote_due_to_read_balancing(mdev)) {
+				/* Keep the private bio in case we need it
+				   for a local retry */
+				local = 0;
 			}
 		}
 		remote = !local && mdev->state.pdsk >= D_UP_TO_DATE;
@@ -1017,7 +1072,7 @@ fail_free_complete:
 	if (req->rq_state & RQ_IN_ACT_LOG)
 		drbd_al_complete_io(mdev, &req->i);
 fail_and_free_req:
-	if (local) {
+	if (!IS_ERR_OR_NULL(req->private_bio)) {
 		bio_put(req->private_bio);
 		req->private_bio = NULL;
 		put_ldev(mdev);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1e9f754b66ac..157ba3d74dc7 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -102,6 +102,14 @@ enum drbd_on_congestion {
 	OC_DISCONNECT,
 };
 
+enum drbd_read_balancing {
+	RB_PREFER_LOCAL,
+	RB_PREFER_REMOTE,
+	RB_ROUND_ROBIN,
+	RB_LEAST_PENDING,
+	RB_CONGESTED_REMOTE,
+};
+
 /* KEEP the order, do not delete or insert. Only append. */
 enum drbd_ret_code {
 	ERR_CODE_BASE		= 100,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 2e6cefefe5e5..826008f297fe 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -129,6 +129,7 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
 	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
 	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
+	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 6d0a24331ed2..17ef66a5c114 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -161,6 +161,7 @@
 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
 #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
 #define DRBD_ON_CONGESTION_DEF OC_BLOCK
+#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
 
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
-- 
cgit v1.2.3


From d60de03a6694302b691bdf858ede9cbdfb7112d6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Thu, 17 Nov 2011 10:12:31 +0100
Subject: drbd: Load balancing method: striping

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_req.c | 13 +++++++++++--
 include/linux/drbd.h          |  6 ++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 98251e2a7fb7..5b28de0c5960 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -745,10 +745,11 @@ static bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int
 	return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
 }
 
-static bool remote_due_to_read_balancing(struct drbd_conf *mdev)
+static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector)
 {
 	enum drbd_read_balancing rbm;
 	struct backing_dev_info *bdi;
+	int stripe_shift;
 
 	if (mdev->state.pdsk < D_UP_TO_DATE)
 		return false;
@@ -764,6 +765,14 @@ static bool remote_due_to_read_balancing(struct drbd_conf *mdev)
 	case RB_LEAST_PENDING:
 		return atomic_read(&mdev->local_cnt) >
 			atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt);
+	case RB_32K_STRIPING:  /* stripe_shift = 15 */
+	case RB_64K_STRIPING:
+	case RB_128K_STRIPING:
+	case RB_256K_STRIPING:
+	case RB_512K_STRIPING:
+	case RB_1M_STRIPING:   /* stripe_shift = 20 */
+		stripe_shift = (rbm - RB_32K_STRIPING + 15);
+		return (sector >> (stripe_shift - 9)) & 1;
 	case RB_ROUND_ROBIN:
 		return test_and_change_bit(READ_BALANCE_RR, &mdev->flags);
 	case RB_PREFER_REMOTE:
@@ -841,7 +850,7 @@ int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long s
 				bio_put(req->private_bio);
 				req->private_bio = NULL;
 				put_ldev(mdev);
-			} else if (remote_due_to_read_balancing(mdev)) {
+			} else if (remote_due_to_read_balancing(mdev, sector)) {
 				/* Keep the private bio in case we need it
 				   for a local retry */
 				local = 0;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 157ba3d74dc7..1e86156c10f7 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -108,6 +108,12 @@ enum drbd_read_balancing {
 	RB_ROUND_ROBIN,
 	RB_LEAST_PENDING,
 	RB_CONGESTED_REMOTE,
+	RB_32K_STRIPING,
+	RB_64K_STRIPING,
+	RB_128K_STRIPING,
+	RB_256K_STRIPING,
+	RB_512K_STRIPING,
+	RB_1M_STRIPING,
 };
 
 /* KEEP the order, do not delete or insert. Only append. */
-- 
cgit v1.2.3


From 26ec92871be1e6bd48d0be9ab38ee1ebbeea49f1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Wed, 11 Jul 2012 20:36:03 +0200
Subject: drbd: Stop using NLA_PUT*().

These macros no longer exist in kernel version v3.5-rc1.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c      | 54 +++++++++++++++++++++++----------------
 include/linux/genl_magic_func.h   |  8 +++---
 include/linux/genl_magic_struct.h | 16 ++++++------
 3 files changed, 45 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index cbd45de533cb..dc5bd6bbb280 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2554,13 +2554,17 @@ int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsi
 	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
 	if (!nla)
 		goto nla_put_failure;
-	if (vnr != VOLUME_UNSPECIFIED)
-		NLA_PUT_U32(skb, T_ctx_volume, vnr);
-	NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name);
-	if (tconn->my_addr_len)
-		NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr);
-	if (tconn->peer_addr_len)
-		NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr);
+	if (vnr != VOLUME_UNSPECIFIED &&
+	    nla_put_u32(skb, T_ctx_volume, vnr))
+		goto nla_put_failure;
+	if (nla_put_string(skb, T_ctx_resource_name, tconn->name))
+		goto nla_put_failure;
+	if (tconn->my_addr_len &&
+	    nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr))
+		goto nla_put_failure;
+	if (tconn->peer_addr_len &&
+	    nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr))
+		goto nla_put_failure;
 	nla_nest_end(skb, nla);
 	return 0;
 
@@ -2618,20 +2622,23 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
 	if (!nla)
 		goto nla_put_failure;
-	NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY);
-	NLA_PUT_U32(skb, T_current_state, mdev->state.i);
-	NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid);
-	NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev));
+	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
+	    nla_put_u32(skb, T_current_state, mdev->state.i) ||
+	    nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) ||
+	    nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)))
+		goto nla_put_failure;
 
 	if (got_ldev) {
-		NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags);
-		NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
-		NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev));
-		NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev));
+		if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) ||
+		    nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid) ||
+		    nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) ||
+		    nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev)))
+			goto nla_put_failure;
 		if (C_SYNC_SOURCE <= mdev->state.conn &&
 		    C_PAUSED_SYNC_T >= mdev->state.conn) {
-			NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total);
-			NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed);
+			if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) ||
+			    nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed))
+				goto nla_put_failure;
 		}
 	}
 
@@ -2641,15 +2648,18 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		case SIB_GET_STATUS_REPLY:
 			break;
 		case SIB_STATE_CHANGE:
-			NLA_PUT_U32(skb, T_prev_state, sib->os.i);
-			NLA_PUT_U32(skb, T_new_state, sib->ns.i);
+			if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
+			    nla_put_u32(skb, T_new_state, sib->ns.i))
+				goto nla_put_failure;
 			break;
 		case SIB_HELPER_POST:
-			NLA_PUT_U32(skb,
-				T_helper_exit_code, sib->helper_exit_code);
+			if (nla_put_u32(skb, T_helper_exit_code,
+					sib->helper_exit_code))
+				goto nla_put_failure;
 			/* fall through */
 		case SIB_HELPER_PRE:
-			NLA_PUT_STRING(skb, T_helper, sib->helper_name);
+			if (nla_put_string(skb, T_helper, sib->helper_name))
+				goto nla_put_failure;
 			break;
 		}
 	}
diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index 0b8a88e2e83e..023bc346b877 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -367,7 +367,8 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 		__is_signed)						\
 	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
 		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
-		__put(skb, attr_nr, s->name);				\
+		if (__put(skb, attr_nr, s->name))			\
+			goto nla_put_failure;				\
 	}
 
 #undef __array
@@ -375,9 +376,10 @@ static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
 		__get, __put, __is_signed)				\
 	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
 		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
-		__put(skb, attr_nr, min_t(int, maxlen,			\
+		if (__put(skb, attr_nr, min_t(int, maxlen,		\
 			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
-						s->name);		\
+						s->name))		\
+			goto nla_put_failure;				\
 	}
 
 #include GENL_MAGIC_INCLUDE_FILE
diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h
index 1d0bd79e27b3..eecd19b37001 100644
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -65,28 +65,28 @@ extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
 /* possible field types */
 #define __flg_field(attr_nr, attr_flag, name) \
 	__field(attr_nr, attr_flag, name, NLA_U8, char, \
-			nla_get_u8, NLA_PUT_U8, false)
+			nla_get_u8, nla_put_u8, false)
 #define __u8_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
-			nla_get_u8, NLA_PUT_U8, false)
+			nla_get_u8, nla_put_u8, false)
 #define __u16_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
-			nla_get_u16, NLA_PUT_U16, false)
+			nla_get_u16, nla_put_u16, false)
 #define __u32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
-			nla_get_u32, NLA_PUT_U32, false)
+			nla_get_u32, nla_put_u32, false)
 #define __s32_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
-			nla_get_u32, NLA_PUT_U32, true)
+			nla_get_u32, nla_put_u32, true)
 #define __u64_field(attr_nr, attr_flag, name)	\
 	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
-			nla_get_u64, NLA_PUT_U64, false)
+			nla_get_u64, nla_put_u64, false)
 #define __str_field(attr_nr, attr_flag, name, maxlen) \
 	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
-			nla_strlcpy, NLA_PUT, false)
+			nla_strlcpy, nla_put, false)
 #define __bin_field(attr_nr, attr_flag, name, maxlen) \
 	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
-			nla_memcpy, NLA_PUT, false)
+			nla_memcpy, nla_put, false)
 
 /* fields with default values */
 #define __flg_field_def(attr_nr, attr_flag, name, default) \
-- 
cgit v1.2.3


From 9a51ab1c1b3c1e21f076cdd571bbe6ca7d1b504c Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Mon, 20 Feb 2012 21:53:28 +0100
Subject: drbd: New disk option al-updates

By disabling al-updates one might increase performace. The price for
that is that in case a crashed primary (that had al-updates disabled)
is reintegraded, it will receive a full-resync instead of a bitmap
based resync.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_actlog.c | 12 ++++++++++--
 drivers/block/drbd/drbd_nl.c     | 17 +++++++++++++++--
 include/linux/drbd.h             |  1 +
 include/linux/drbd_genl.h        |  3 +++
 include/linux/drbd_limits.h      |  1 +
 5 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 9eae28944312..83d48d210b69 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -276,8 +276,16 @@ void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
 		/* Double check: it may have been committed by someone else,
 		 * while we have been waiting for the lock. */
 		if (mdev->act_log->pending_changes) {
-			al_write_transaction(mdev);
-			mdev->al_writ_cnt++;
+			bool write_al_updates;
+
+			rcu_read_lock();
+			write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates;
+			rcu_read_unlock();
+
+			if (write_al_updates) {
+				al_write_transaction(mdev);
+				mdev->al_writ_cnt++;
+			}
 
 			spin_lock_irq(&mdev->al_lock);
 			/* FIXME
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index dc5bd6bbb280..c5d4fac1a111 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1230,6 +1230,11 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 
 	mutex_unlock(&mdev->tconn->conf_update);
 
+	if (new_disk_conf->al_updates)
+		mdev->ldev->md.flags &= MDF_AL_DISABLED;
+	else
+		mdev->ldev->md.flags |= MDF_AL_DISABLED;
+
 	drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
 
 	drbd_md_sync(mdev);
@@ -1545,7 +1550,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	} else if (dd == grew)
 		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
-	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
+	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
+	    (test_bit(CRASHED_PRIMARY, &mdev->flags) &&
+	     drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) {
 		dev_info(DEV, "Assuming that all blocks are out of sync "
 		     "(aka FullSync)\n");
 		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
@@ -1588,13 +1595,19 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	if (ns.disk == D_CONSISTENT &&
 	    (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE))
 		ns.disk = D_UP_TO_DATE;
-	rcu_read_unlock();
 
 	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
 	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
 	   this point, because drbd_request_state() modifies these
 	   flags. */
 
+	if (rcu_dereference(mdev->ldev->disk_conf)->al_updates)
+		mdev->ldev->md.flags &= MDF_AL_DISABLED;
+	else
+		mdev->ldev->md.flags |= MDF_AL_DISABLED;
+
+	rcu_read_unlock();
+
 	/* In case we are C_CONNECTED postpone any decision on the new disk
 	   state after the negotiation phase. */
 	if (mdev->state.conn == C_CONNECTED) {
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1e86156c10f7..36ae7dd28d90 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -338,6 +338,7 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
 #define MDF_PEER_OUT_DATED	(1 << 5)
 #define MDF_CRASHED_PRIMARY	(1 << 6)
 #define MDF_AL_CLEAN		(1 << 7)
+#define MDF_AL_DISABLED		(1 << 8)
 
 enum drbd_uuid_index {
 	UI_CURRENT,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 826008f297fe..92ec4b50a885 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -130,6 +130,8 @@ GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
 	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
 	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
 	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
+	/* 9: __u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
+	__flg_field_def(23,     0 /* OPTIONAL */,	al_updates, DRBD_AL_UPDATES_DEF)
 )
 
 GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
@@ -168,6 +170,7 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
 	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
 	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
 	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
+	/* 9: __u32_field_def(30,	DRBD_GENLA_F_MANDATORY,	fencing_policy, DRBD_FENCING_DEF) */
 )
 
 GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 17ef66a5c114..1fa19c5f5e64 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -210,6 +210,7 @@
 #define DRBD_DISK_DRAIN_DEF	1
 #define DRBD_MD_FLUSHES_DEF	1
 #define DRBD_TCP_CORK_DEF	1
+#define DRBD_AL_UPDATES_DEF     1
 
 #define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
 #define DRBD_ALWAYS_ASBP_DEF	0
-- 
cgit v1.2.3


From 37be2f59d3149b95afaeeeff94edde2c07f165d2 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 8 Nov 2012 11:22:46 -0500
Subject: ext4: remove ext4_handle_release_buffer()

ext4_handle_release_buffer() was intended to remove journal
write access from a buffer, but it doesn't actually do anything
at all other than add a BUFFER_TRACE point, but it's not reliably
used for that either.  Remove all the associated dead code.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
---
 fs/ext4/ext4_jbd2.h   |  7 -------
 fs/ext4/resize.c      | 17 +++--------------
 fs/ext4/xattr.c       |  1 -
 fs/jbd2/journal.c     |  1 -
 fs/jbd2/transaction.c | 11 -----------
 include/linux/jbd2.h  |  1 -
 6 files changed, 3 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 56d258c18303..7177f9b21cb2 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -254,13 +254,6 @@ static inline void ext4_handle_sync(handle_t *handle)
 		handle->h_sync = 1;
 }
 
-static inline void ext4_handle_release_buffer(handle_t *handle,
-						struct buffer_head *bh)
-{
-	if (ext4_handle_valid(handle))
-		jbd2_journal_release_buffer(handle, bh);
-}
-
 static inline int ext4_handle_is_aborted(handle_t *handle)
 {
 	if (ext4_handle_valid(handle))
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 47bf06a2765d..d99387b89edd 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -783,7 +783,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 
 	err = ext4_journal_get_write_access(handle, gdb_bh);
 	if (unlikely(err))
-		goto exit_sbh;
+		goto exit_dind;
 
 	err = ext4_journal_get_write_access(handle, dind);
 	if (unlikely(err))
@@ -792,7 +792,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	/* ext4_reserve_inode_write() gets a reference on the iloc */
 	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (unlikely(err))
-		goto exit_dindj;
+		goto exit_dind;
 
 	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
 				     sizeof(struct buffer_head *),
@@ -846,12 +846,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 
 exit_inode:
 	ext4_kvfree(n_group_desc);
-	/* ext4_handle_release_buffer(handle, iloc.bh); */
 	brelse(iloc.bh);
-exit_dindj:
-	/* ext4_handle_release_buffer(handle, dind); */
-exit_sbh:
-	/* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
 exit_dind:
 	brelse(dind);
 exit_bh:
@@ -969,14 +964,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	for (i = 0; i < reserved_gdb; i++) {
-		if ((err = ext4_journal_get_write_access(handle, primary[i]))) {
-			/*
-			int j;
-			for (j = 0; j < i; j++)
-				ext4_handle_release_buffer(handle, primary[j]);
-			 */
+		if ((err = ext4_journal_get_write_access(handle, primary[i])))
 			goto exit_bh;
-		}
 	}
 
 	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 2cdb98d62980..b1adda1b750d 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -794,7 +794,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			int offset = (char *)s->here - bs->bh->b_data;
 
 			unlock_buffer(bs->bh);
-			ext4_handle_release_buffer(handle, bs->bh);
 			if (ce) {
 				mb_cache_entry_release(ce);
 				ce = NULL;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 484b8d1c6cb6..dbf41f9452db 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -60,7 +60,6 @@ EXPORT_SYMBOL(jbd2_journal_get_create_access);
 EXPORT_SYMBOL(jbd2_journal_get_undo_access);
 EXPORT_SYMBOL(jbd2_journal_set_triggers);
 EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
-EXPORT_SYMBOL(jbd2_journal_release_buffer);
 EXPORT_SYMBOL(jbd2_journal_forget);
 #if 0
 EXPORT_SYMBOL(journal_sync_buffer);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a74ba4659549..deffd945c8e2 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1207,17 +1207,6 @@ out:
 	return ret;
 }
 
-/*
- * jbd2_journal_release_buffer: undo a get_write_access without any buffer
- * updates, if the update decided in the end that it didn't need access.
- *
- */
-void
-jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
-{
-	BUFFER_TRACE(bh, "entry");
-}
-
 /**
  * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
  * @handle: transaction handle
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 3efc43f3f162..de7f55682088 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1096,7 +1096,6 @@ extern int	 jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
 void		 jbd2_journal_set_triggers(struct buffer_head *,
 					   struct jbd2_buffer_trigger_type *type);
 extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
-extern void	 jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
 extern void	 jbd2_journal_invalidatepage(journal_t *,
-- 
cgit v1.2.3


From fd47d3e1c2949838e858379aaf2bc20647be2912 Mon Sep 17 00:00:00 2001
From: Behan Webster <behanw@converseincode.com>
Date: Thu, 8 Nov 2012 11:24:46 -0500
Subject: jbd2: remove VLAIS usage from JBD2 code

The use of variable length arrays in structs (VLAIS) in the Linux Kernel code
precludes the use of compilers which don't implement VLAIS (for instance the
Clang compiler). Since ctx is always a 32-bit CRC, hard coding a size of 4
bytes accomplishes the same thing without the use of VLAIS. This is the same
technique already employed in fs/ext4/ext4.h

Signed-off-by: Mark Charlebois <charlebm@gmail.com>
Signed-off-by: Behan Webster <behanw@converseincode.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/linux/jbd2.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index de7f55682088..1be23d9fdacb 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1302,15 +1302,21 @@ static inline int jbd_space_needed(journal_t *journal)
 
 extern int jbd_blocks_per_page(struct inode *inode);
 
+/* JBD uses a CRC32 checksum */
+#define JBD_MAX_CHECKSUM_SIZE 4
+
 static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
 			      const void *address, unsigned int length)
 {
 	struct {
 		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(journal->j_chksum_driver)];
+		char ctx[JBD_MAX_CHECKSUM_SIZE];
 	} desc;
 	int err;
 
+	BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
+		JBD_MAX_CHECKSUM_SIZE);
+
 	desc.shash.tfm = journal->j_chksum_driver;
 	desc.shash.flags = 0;
 	*(u32 *)desc.ctx = crc;
-- 
cgit v1.2.3


From 2be975c6d920de989ff5e4bc09ffe87e59d94662 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 3 Nov 2012 12:16:12 -0700
Subject: Input: introduce managed input devices (add devres support)

There is a demand from driver's writers to use managed devices framework
for their drivers. Unfortunately up to this moment input devices did not
provide support for managed devices and that lead to mixing two styles
of resource management which usually introduced more bugs, such as
manually unregistering input device but relying in devres to free
interrupt handler which (unless device is properly shut off) can cause
ISR to reference already freed memory.

This change introduces devm_input_allocate_device() that will allocate
managed instance of input device so that driver writers who prefer
using devm_* framework do not have to mix 2 styles.

Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/input.c | 176 ++++++++++++++++++++++++++++++++++++++++++--------
 include/linux/input.h |   7 +-
 2 files changed, 155 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index f1be1a77edf3..ce01332f7b3a 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1726,7 +1726,7 @@ EXPORT_SYMBOL_GPL(input_class);
 /**
  * input_allocate_device - allocate memory for new input device
  *
- * Returns prepared struct input_dev or NULL.
+ * Returns prepared struct input_dev or %NULL.
  *
  * NOTE: Use input_free_device() to free devices that have not been
  * registered; input_unregister_device() should be used for already
@@ -1753,6 +1753,70 @@ struct input_dev *input_allocate_device(void)
 }
 EXPORT_SYMBOL(input_allocate_device);
 
+struct input_devres {
+	struct input_dev *input;
+};
+
+static int devm_input_device_match(struct device *dev, void *res, void *data)
+{
+	struct input_devres *devres = res;
+
+	return devres->input == data;
+}
+
+static void devm_input_device_release(struct device *dev, void *res)
+{
+	struct input_devres *devres = res;
+	struct input_dev *input = devres->input;
+
+	dev_dbg(dev, "%s: dropping reference to %s\n",
+		__func__, dev_name(&input->dev));
+	input_put_device(input);
+}
+
+/**
+ * devm_input_allocate_device - allocate managed input device
+ * @dev: device owning the input device being created
+ *
+ * Returns prepared struct input_dev or %NULL.
+ *
+ * Managed input devices do not need to be explicitly unregistered or
+ * freed as it will be done automatically when owner device unbinds from
+ * its driver (or binding fails). Once managed input device is allocated,
+ * it is ready to be set up and registered in the same fashion as regular
+ * input device. There are no special devm_input_device_[un]register()
+ * variants, regular ones work with both managed and unmanaged devices.
+ *
+ * NOTE: the owner device is set up as parent of input device and users
+ * should not override it.
+ */
+
+struct input_dev *devm_input_allocate_device(struct device *dev)
+{
+	struct input_dev *input;
+	struct input_devres *devres;
+
+	devres = devres_alloc(devm_input_device_release,
+			      sizeof(struct input_devres), GFP_KERNEL);
+	if (!devres)
+		return NULL;
+
+	input = input_allocate_device();
+	if (!input) {
+		devres_free(devres);
+		return NULL;
+	}
+
+	input->dev.parent = dev;
+	input->devres_managed = true;
+
+	devres->input = input;
+	devres_add(dev, devres);
+
+	return input;
+}
+EXPORT_SYMBOL(devm_input_allocate_device);
+
 /**
  * input_free_device - free memory occupied by input_dev structure
  * @dev: input device to free
@@ -1769,8 +1833,14 @@ EXPORT_SYMBOL(input_allocate_device);
  */
 void input_free_device(struct input_dev *dev)
 {
-	if (dev)
+	if (dev) {
+		if (dev->devres_managed)
+			WARN_ON(devres_destroy(dev->dev.parent,
+						devm_input_device_release,
+						devm_input_device_match,
+						dev));
 		input_put_device(dev);
+	}
 }
 EXPORT_SYMBOL(input_free_device);
 
@@ -1891,6 +1961,38 @@ static void input_cleanse_bitmasks(struct input_dev *dev)
 	INPUT_CLEANSE_BITMASK(dev, SW, sw);
 }
 
+static void __input_unregister_device(struct input_dev *dev)
+{
+	struct input_handle *handle, *next;
+
+	input_disconnect_device(dev);
+
+	mutex_lock(&input_mutex);
+
+	list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
+		handle->handler->disconnect(handle);
+	WARN_ON(!list_empty(&dev->h_list));
+
+	del_timer_sync(&dev->timer);
+	list_del_init(&dev->node);
+
+	input_wakeup_procfs_readers();
+
+	mutex_unlock(&input_mutex);
+
+	device_del(&dev->dev);
+}
+
+static void devm_input_device_unregister(struct device *dev, void *res)
+{
+	struct input_devres *devres = res;
+	struct input_dev *input = devres->input;
+
+	dev_dbg(dev, "%s: unregistering device %s\n",
+		__func__, dev_name(&input->dev));
+	__input_unregister_device(input);
+}
+
 /**
  * input_register_device - register device with input core
  * @dev: device to be registered
@@ -1906,11 +2008,21 @@ static void input_cleanse_bitmasks(struct input_dev *dev)
 int input_register_device(struct input_dev *dev)
 {
 	static atomic_t input_no = ATOMIC_INIT(0);
+	struct input_devres *devres = NULL;
 	struct input_handler *handler;
 	unsigned int packet_size;
 	const char *path;
 	int error;
 
+	if (dev->devres_managed) {
+		devres = devres_alloc(devm_input_device_unregister,
+				      sizeof(struct input_devres), GFP_KERNEL);
+		if (!devres)
+			return -ENOMEM;
+
+		devres->input = dev;
+	}
+
 	/* Every input device generates EV_SYN/SYN_REPORT events. */
 	__set_bit(EV_SYN, dev->evbit);
 
@@ -1926,8 +2038,10 @@ int input_register_device(struct input_dev *dev)
 
 	dev->max_vals = max(dev->hint_events_per_packet, packet_size) + 2;
 	dev->vals = kcalloc(dev->max_vals, sizeof(*dev->vals), GFP_KERNEL);
-	if (!dev->vals)
-		return -ENOMEM;
+	if (!dev->vals) {
+		error = -ENOMEM;
+		goto err_devres_free;
+	}
 
 	/*
 	 * If delay and period are pre-set by the driver, then autorepeating
@@ -1952,7 +2066,7 @@ int input_register_device(struct input_dev *dev)
 
 	error = device_add(&dev->dev);
 	if (error)
-		return error;
+		goto err_free_vals;
 
 	path = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
 	pr_info("%s as %s\n",
@@ -1961,10 +2075,8 @@ int input_register_device(struct input_dev *dev)
 	kfree(path);
 
 	error = mutex_lock_interruptible(&input_mutex);
-	if (error) {
-		device_del(&dev->dev);
-		return error;
-	}
+	if (error)
+		goto err_device_del;
 
 	list_add_tail(&dev->node, &input_dev_list);
 
@@ -1975,7 +2087,21 @@ int input_register_device(struct input_dev *dev)
 
 	mutex_unlock(&input_mutex);
 
+	if (dev->devres_managed) {
+		dev_dbg(dev->dev.parent, "%s: registering %s with devres.\n",
+			__func__, dev_name(&dev->dev));
+		devres_add(dev->dev.parent, devres);
+	}
 	return 0;
+
+err_device_del:
+	device_del(&dev->dev);
+err_free_vals:
+	kfree(dev->vals);
+	dev->vals = NULL;
+err_devres_free:
+	devres_free(devres);
+	return error;
 }
 EXPORT_SYMBOL(input_register_device);
 
@@ -1988,24 +2114,20 @@ EXPORT_SYMBOL(input_register_device);
  */
 void input_unregister_device(struct input_dev *dev)
 {
-	struct input_handle *handle, *next;
-
-	input_disconnect_device(dev);
-
-	mutex_lock(&input_mutex);
-
-	list_for_each_entry_safe(handle, next, &dev->h_list, d_node)
-		handle->handler->disconnect(handle);
-	WARN_ON(!list_empty(&dev->h_list));
-
-	del_timer_sync(&dev->timer);
-	list_del_init(&dev->node);
-
-	input_wakeup_procfs_readers();
-
-	mutex_unlock(&input_mutex);
-
-	device_unregister(&dev->dev);
+	if (dev->devres_managed) {
+		WARN_ON(devres_destroy(dev->dev.parent,
+					devm_input_device_unregister,
+					devm_input_device_match,
+					dev));
+		__input_unregister_device(dev);
+		/*
+		 * We do not do input_put_device() here because it will be done
+		 * when 2nd devres fires up.
+		 */
+	} else {
+		__input_unregister_device(dev);
+		input_put_device(dev);
+	}
 }
 EXPORT_SYMBOL(input_unregister_device);
 
diff --git a/include/linux/input.h b/include/linux/input.h
index cab994ba6d91..5538cc09a4f5 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -112,6 +112,8 @@ struct input_value {
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
+ * @devres_managed: indicates that devices is managed with devres framework
+ *	and needs not be explicitly unregistered or freed.
  */
 struct input_dev {
 	const char *name;
@@ -180,6 +182,8 @@ struct input_dev {
 	unsigned int num_vals;
 	unsigned int max_vals;
 	struct input_value *vals;
+
+	bool devres_managed;
 };
 #define to_input_dev(d) container_of(d, struct input_dev, dev)
 
@@ -323,7 +327,8 @@ struct input_handle {
 	struct list_head	h_node;
 };
 
-struct input_dev *input_allocate_device(void);
+struct input_dev __must_check *input_allocate_device(void);
+struct input_dev __must_check *devm_input_allocate_device(struct device *);
 void input_free_device(struct input_dev *dev);
 
 static inline struct input_dev *input_get_device(struct input_dev *dev)
-- 
cgit v1.2.3


From 58ffa580a748dd16b1e5ab260bea39cdbd1e94ef Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Thu, 26 Jul 2012 14:09:49 +0200
Subject: drbd: introduce stop-sector to online verify

We now can schedule only a specific range of sectors for online verify,
or interrupt a running verify without interrupting the connection.

Had to bump the protocol version differently, we are now 101.
Added verify_can_do_stop_sector() { protocol >= 97 && protocol != 100; }

Also, the return value convention for worker callbacks has changed,
we returned "true/false" for "keep the connection up" in 8.3,
we return 0 for success and <= for failure in 8.4.
Affected: receive_state()

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  7 +++++++
 drivers/block/drbd/drbd_nl.c       | 14 +++++++++-----
 drivers/block/drbd/drbd_proc.c     | 12 +++++++++---
 drivers/block/drbd/drbd_receiver.c | 10 +++++++++-
 drivers/block/drbd/drbd_state.c    | 17 +++++++++++++----
 drivers/block/drbd/drbd_worker.c   | 33 +++++++++++++++++++++++++++------
 include/linux/drbd.h               |  2 +-
 include/linux/drbd_genl.h          |  1 +
 8 files changed, 76 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 22adfc7189de..eddc4388a1b1 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -971,6 +971,7 @@ struct drbd_conf {
 
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
+	sector_t ov_stop_sector;
 	/* where are we now? (sector) */
 	sector_t ov_position;
 	/* Start sector of out of sync range (to merge printk reporting). */
@@ -2264,6 +2265,12 @@ static inline void dec_ap_bio(struct drbd_conf *mdev)
 		wake_up(&mdev->misc_wait);
 }
 
+static inline bool verify_can_do_stop_sector(struct drbd_conf *mdev)
+{
+	return mdev->tconn->agreed_pro_version >= 97 &&
+		mdev->tconn->agreed_pro_version != 100;
+}
+
 static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
 {
 	int changed = mdev->ed_uuid != val;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 4afd626ca3dc..eefb56308aea 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2939,6 +2939,7 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
+	struct start_ov_parms parms;
 
 	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
 	if (!adm_ctx.reply_skb)
@@ -2947,19 +2948,22 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 
 	mdev = adm_ctx.mdev;
+
+	/* resume from last known position, if possible */
+	parms.ov_start_sector = mdev->ov_start_sector;
+	parms.ov_stop_sector = ULLONG_MAX;
 	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
-		/* resume from last known position, if possible */
-		struct start_ov_parms parms =
-			{ .ov_start_sector = mdev->ov_start_sector };
 		int err = start_ov_parms_from_attrs(&parms, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
 			goto out;
 		}
-		/* w_make_ov_request expects position to be aligned */
-		mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
 	}
+	/* w_make_ov_request expects position to be aligned */
+	mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
+	mdev->ov_stop_sector = parms.ov_stop_sector;
+
 	/* If there is still bitmap IO pending, e.g. previous resync or verify
 	 * just being finished, wait for it before requesting a new resync. */
 	drbd_suspend_io(mdev);
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index e0f0d2a6d538..56672a61eb94 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 		 * we convert to sectors in the display below. */
 		unsigned long bm_bits = drbd_bm_bits(mdev);
 		unsigned long bit_pos;
+		unsigned long long stop_sector = 0;
 		if (mdev->state.conn == C_VERIFY_S ||
-		    mdev->state.conn == C_VERIFY_T)
+		    mdev->state.conn == C_VERIFY_T) {
 			bit_pos = bm_bits - mdev->ov_left;
-		else
+			if (verify_can_do_stop_sector(mdev))
+				stop_sector = mdev->ov_stop_sector;
+		} else
 			bit_pos = mdev->bm_resync_fo;
 		/* Total sectors may be slightly off for oddly
 		 * sized devices. So what. */
 		seq_printf(seq,
-			"\t%3d%% sector pos: %llu/%llu\n",
+			"\t%3d%% sector pos: %llu/%llu",
 			(int)(bit_pos / (bm_bits/100+1)),
 			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
 			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
+		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
+			seq_printf(seq, " stop sector: %llu", stop_sector);
+		seq_printf(seq, "\n");
 	}
 }
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 7fe6b01618d4..8fddec96dfbe 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3843,7 +3843,7 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
 	 * already decided to close the connection again,
 	 * we must not "re-establish" it here. */
 	if (os.conn <= C_TEAR_DOWN)
-		return false;
+		return -ECONNRESET;
 
 	/* If this is the "end of sync" confirmation, usually the peer disk
 	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
@@ -3875,6 +3875,14 @@ static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
 		}
 	}
 
+	/* explicit verify finished notification, stop sector reached. */
+	if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
+	    peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
+		ov_out_of_sync_print(mdev);
+		drbd_resync_finished(mdev);
+		return 0;
+	}
+
 	/* peer says his disk is inconsistent, while we think it is uptodate,
 	 * and this happens while the peer still thinks we have a sync going on,
 	 * but we think we are already done with the sync.
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 444581828d70..12f2b4fbe559 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -975,13 +975,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 	wake_up(&mdev->state_wait);
 	wake_up(&mdev->tconn->ping_wait);
 
-	/* aborted verify run. log the last position */
+	/* Aborted verify run, or we reached the stop sector.
+	 * Log the last position, unless end-of-device. */
 	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
-	    ns.conn < C_CONNECTED) {
+	    ns.conn <= C_CONNECTED) {
 		mdev->ov_start_sector =
 			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
-		dev_info(DEV, "Online Verify reached sector %llu\n",
-			(unsigned long long)mdev->ov_start_sector);
+		if (mdev->ov_left)
+			dev_info(DEV, "Online Verify reached sector %llu\n",
+				(unsigned long long)mdev->ov_start_sector);
 	}
 
 	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
@@ -1422,6 +1424,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
 		drbd_send_state(mdev, ns);
 
+	/* Verify finished, or reached stop sector.  Peer did not know about
+	 * the stop sector, and we may even have changed the stop sector during
+	 * verify to interrupt/stop early.  Send the new state. */
+	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
+	&& verify_can_do_stop_sector(mdev))
+		drbd_send_state(mdev, ns);
+
 	/* Wake up role changes, that were delayed because of connection establishing */
 	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) {
 		if (test_and_clear_bit(STATE_SENT, &mdev->tconn->flags))
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 9d7e1fb0f431..1c9c6fd332c3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -692,6 +692,7 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
 	int number, i, size;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel))
 		return 1;
@@ -700,9 +701,17 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
 
 	sector = mdev->ov_position;
 	for (i = 0; i < number; i++) {
-		if (sector >= capacity) {
+		if (sector >= capacity)
 			return 1;
-		}
+
+		/* We check for "finished" only in the reply path:
+		 * w_e_end_ov_reply().
+		 * We need to send at least one request out. */
+		stop_sector_reached = i > 0
+			&& verify_can_do_stop_sector(mdev)
+			&& sector >= mdev->ov_stop_sector;
+		if (stop_sector_reached)
+			break;
 
 		size = BM_BLOCK_SIZE;
 
@@ -726,7 +735,8 @@ static int w_make_ov_request(struct drbd_work *w, int cancel)
 
  requeue:
 	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
-	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
+	if (i == 0 || !stop_sector_reached)
+		mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	return 1;
 }
 
@@ -792,7 +802,12 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
 	if (dt <= 0)
 		dt = 1;
+	
 	db = mdev->rs_total;
+	/* adjust for verify start and stop sectors, respective reached position */
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		db -= mdev->ov_left;
+
 	dbdt = Bit2KB(db/dt);
 	mdev->rs_paused /= HZ;
 
@@ -815,7 +830,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	ns.conn = C_CONNECTED;
 
 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
-	     verify_done ? "Online verify " : "Resync",
+	     verify_done ? "Online verify" : "Resync",
 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
 
 	n_oos = drbd_bm_total_weight(mdev);
@@ -896,7 +911,9 @@ out:
 	mdev->rs_total  = 0;
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
-	if (verify_done)
+
+	/* reset start sector, if we reached end of device */
+	if (verify_done && mdev->ov_left == 0)
 		mdev->ov_start_sector = 0;
 
 	drbd_md_sync(mdev);
@@ -1144,6 +1161,7 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
 	unsigned int size = peer_req->i.size;
 	int digest_size;
 	int err, eq = 0;
+	bool stop_sector_reached = false;
 
 	if (unlikely(cancel)) {
 		drbd_free_peer_req(mdev, peer_req);
@@ -1194,7 +1212,10 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
 	if ((mdev->ov_left & 0x200) == 0x200)
 		drbd_advance_rs_marks(mdev, mdev->ov_left);
 
-	if (mdev->ov_left == 0) {
+	stop_sector_reached = verify_can_do_stop_sector(mdev) &&
+		(sector + (size>>9)) >= mdev->ov_stop_sector;
+
+	if (mdev->ov_left == 0 || stop_sector_reached) {
 		ov_out_of_sync_print(mdev);
 		drbd_resync_finished(mdev);
 	}
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 36ae7dd28d90..5171c3530886 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -55,7 +55,7 @@ extern const char *drbd_buildtag(void);
 #define REL_VERSION "8.3.11"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 100
+#define PRO_VERSION_MAX 101
 
 
 enum drbd_io_error_p {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 92ec4b50a885..9430e9ab37a8 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -215,6 +215,7 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
 
 GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
 	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector)
+	__u64_field(2, DRBD_GENLA_F_MANDATORY,	ov_stop_sector)
 )
 
 GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
-- 
cgit v1.2.3


From 3174f8c5045ad247563434c4b4897bd89313eafc Mon Sep 17 00:00:00 2001
From: Philipp Marek <philipp.marek@linbit.com>
Date: Sat, 3 Mar 2012 21:04:30 +0100
Subject: drbd: pass some more information to userspace.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 11 ++++++++++-
 include/linux/drbd_genl.h    | 10 ++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index eefb56308aea..466d6b1d9309 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2666,7 +2666,16 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 	if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
 	    nla_put_u32(skb, T_current_state, mdev->state.i) ||
 	    nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) ||
-	    nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)))
+	    nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) ||
+	    nla_put_u64(skb, T_send_cnt, mdev->send_cnt) ||
+	    nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) ||
+	    nla_put_u64(skb, T_read_cnt, mdev->read_cnt) ||
+	    nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) ||
+	    nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) ||
+	    nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) ||
+	    nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) ||
+	    nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) ||
+	    nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt)))
 		goto nla_put_failure;
 
 	if (got_ldev) {
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 9430e9ab37a8..d0d8fac8a6e4 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -211,6 +211,16 @@ GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
 	/* for pre and post notifications of helper execution */
 	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32)
 	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code)
+
+	__u64_field(15,                      0, send_cnt)
+	__u64_field(16,                      0, recv_cnt)
+	__u64_field(17,                      0, read_cnt)
+	__u64_field(18,                      0, writ_cnt)
+	__u64_field(19,                      0, al_writ_cnt)
+	__u64_field(20,                      0, bm_writ_cnt)
+	__u32_field(21,                      0, ap_bio_cnt)
+	__u32_field(22,                      0, ap_pending_cnt)
+	__u32_field(23,                      0, rs_pending_cnt)
 )
 
 GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
-- 
cgit v1.2.3


From eb12010e9af119c84e6b2214064a98681027e0e3 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 1 Aug 2012 12:46:20 +0200
Subject: drbd: disambiguation, s/ERR_DISCARD/ERR_DISCARD_IMPOSSIBLE/

If for some reason (typically "split-brained" cluster manager)
drbd replica data has diverged, we can chose a victim,
and reconnect using "--discard-my-data", causing the victim
to become sync-target, fetching all changed blocks from the peer.

If we are Primary, we are potentially in use, and we refuse to
"roll back" changes to the data below the page cache and other users.

Rename the error symbol for this to ERR_DISCARD_IMPOSSIBLE.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 2 +-
 include/linux/drbd.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 35bb572a2076..d1073705bf1f 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1829,7 +1829,7 @@ _check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct n
 				return ERR_STONITH_AND_PROT_A;
 		}
 		if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data)
-			return ERR_DISCARD;
+			return ERR_DISCARD_IMPOSSIBLE;
 	}
 
 	if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 5171c3530886..0b93e5e2e064 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -136,7 +136,7 @@ enum drbd_ret_code {
 	ERR_AUTH_ALG		= 120,
 	ERR_AUTH_ALG_ND		= 121,
 	ERR_NOMEM		= 122,
-	ERR_DISCARD		= 123,
+	ERR_DISCARD_IMPOSSIBLE	= 123,
 	ERR_DISK_CONFIGURED	= 124,
 	ERR_NET_CONFIGURED	= 125,
 	ERR_MANDATORY_TAG	= 126,
-- 
cgit v1.2.3


From 328e0f125bf41f4f33f684db22015f92cb44fe56 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 19 Oct 2012 14:37:47 +0200
Subject: drbd: Broadcast sync progress no more often than once per second

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h    | 1 +
 drivers/block/drbd/drbd_nl.c     | 6 ++++++
 drivers/block/drbd/drbd_worker.c | 4 ++++
 include/linux/drbd.h             | 4 ++--
 4 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 057ffed6eb7e..784f4eb2ed61 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -965,6 +965,7 @@ struct drbd_conf {
 	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
 	/* current index into rs_mark_{left,time} */
 	int rs_last_mark;
+	unsigned long rs_last_bcast; /* [unit jiffies] */
 
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 298dd3e35e02..d339a2754a85 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -3295,6 +3295,12 @@ void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
 	unsigned seq;
 	int err = -ENOMEM;
 
+	if (sib->sib_reason == SIB_SYNC_PROGRESS &&
+	    time_after(jiffies, mdev->rs_last_bcast + HZ))
+		mdev->rs_last_bcast = jiffies;
+	else
+		return;
+
 	seq = atomic_inc_return(&drbd_genl_seq);
 	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
 	if (!msg)
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 64a7305c678a..424dc7bdf9b7 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1696,6 +1696,10 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 	write_unlock_irq(&global_state_lock);
 
 	if (r == SS_SUCCESS) {
+		/* reset rs_last_bcast when a resync or verify is started,
+		 * to deal with potential jiffies wrap. */
+		mdev->rs_last_bcast = jiffies - HZ;
+
 		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 0b93e5e2e064..0c5a18ec322c 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,8 +52,8 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.11"
-#define API_VERSION 88
+#define REL_VERSION "8.4.2"
+#define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
 
-- 
cgit v1.2.3


From 800963fd598e2acbcd3a21a17e3ab3c185ad0d6a Mon Sep 17 00:00:00 2001
From: Henrik Rydberg <rydberg@euromail.se>
Date: Sat, 10 Nov 2012 00:32:36 -0800
Subject: Input: document new members of struct input_dev

Fixes kernel-doc warnings for the members added in 3.7-rc1.

Signed-off-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 5538cc09a4f5..82ce323b9986 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -112,6 +112,9 @@ struct input_value {
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
+ * @num_vals: number of values queued in the current frame
+ * @max_vals: maximum number of values queued in a frame
+ * @vals: array of values queued in the current frame
  * @devres_managed: indicates that devices is managed with devres framework
  *	and needs not be explicitly unregistered or freed.
  */
-- 
cgit v1.2.3


From 76f93992e4c44f30be797d5c99d6f369ed001747 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 5 Nov 2012 16:10:31 +0100
Subject: mfd: Provide the STMPE driver with its own IRQ domain

The STMPE driver is yet another IRQ controller which requires its
own IRQ domain. So, we provide it with one.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe.c       | 82 +++++++++++++++++++++++++++++------------------
 include/linux/mfd/stmpe.h |  2 ++
 2 files changed, 52 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index ad13cb00a749..5c8d8f260df2 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/mfd/core.h>
@@ -757,7 +758,9 @@ static irqreturn_t stmpe_irq(int irq, void *data)
 	int i;
 
 	if (variant->id_val == STMPE801_ID) {
-		handle_nested_irq(stmpe->irq_base);
+		int base = irq_create_mapping(stmpe->domain, 0);
+
+		handle_nested_irq(base);
 		return IRQ_HANDLED;
 	}
 
@@ -778,8 +781,9 @@ static irqreturn_t stmpe_irq(int irq, void *data)
 		while (status) {
 			int bit = __ffs(status);
 			int line = bank * 8 + bit;
+			int nestedirq = irq_create_mapping(stmpe->domain, line);
 
-			handle_nested_irq(stmpe->irq_base + line);
+			handle_nested_irq(nestedirq);
 			status &= ~(1 << bit);
 		}
 
@@ -820,7 +824,7 @@ static void stmpe_irq_sync_unlock(struct irq_data *data)
 static void stmpe_irq_mask(struct irq_data *data)
 {
 	struct stmpe *stmpe = irq_data_get_irq_chip_data(data);
-	int offset = data->irq - stmpe->irq_base;
+	int offset = data->hwirq;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
@@ -830,7 +834,7 @@ static void stmpe_irq_mask(struct irq_data *data)
 static void stmpe_irq_unmask(struct irq_data *data)
 {
 	struct stmpe *stmpe = irq_data_get_irq_chip_data(data);
-	int offset = data->irq - stmpe->irq_base;
+	int offset = data->hwirq;
 	int regoffset = offset / 8;
 	int mask = 1 << (offset % 8);
 
@@ -845,43 +849,62 @@ static struct irq_chip stmpe_irq_chip = {
 	.irq_unmask		= stmpe_irq_unmask,
 };
 
-static int __devinit stmpe_irq_init(struct stmpe *stmpe)
+static int stmpe_irq_map(struct irq_domain *d, unsigned int virq,
+                                irq_hw_number_t hwirq)
 {
+	struct stmpe *stmpe = d->host_data;
 	struct irq_chip *chip = NULL;
-	int num_irqs = stmpe->variant->num_irqs;
-	int base = stmpe->irq_base;
-	int irq;
 
 	if (stmpe->variant->id_val != STMPE801_ID)
 		chip = &stmpe_irq_chip;
 
-	for (irq = base; irq < base + num_irqs; irq++) {
-		irq_set_chip_data(irq, stmpe);
-		irq_set_chip_and_handler(irq, chip, handle_edge_irq);
-		irq_set_nested_thread(irq, 1);
+	irq_set_chip_data(virq, stmpe);
+	irq_set_chip_and_handler(virq, chip, handle_edge_irq);
+	irq_set_nested_thread(virq, 1);
 #ifdef CONFIG_ARM
-		set_irq_flags(irq, IRQF_VALID);
+	set_irq_flags(virq, IRQF_VALID);
 #else
-		irq_set_noprobe(irq);
+	irq_set_noprobe(virq);
 #endif
-	}
 
 	return 0;
 }
 
-static void stmpe_irq_remove(struct stmpe *stmpe)
+static void stmpe_irq_unmap(struct irq_domain *d, unsigned int virq)
 {
-	int num_irqs = stmpe->variant->num_irqs;
-	int base = stmpe->irq_base;
-	int irq;
-
-	for (irq = base; irq < base + num_irqs; irq++) {
 #ifdef CONFIG_ARM
-		set_irq_flags(irq, 0);
+		set_irq_flags(virq, 0);
 #endif
-		irq_set_chip_and_handler(irq, NULL, NULL);
-		irq_set_chip_data(irq, NULL);
+		irq_set_chip_and_handler(virq, NULL, NULL);
+		irq_set_chip_data(virq, NULL);
+}
+
+static struct irq_domain_ops stmpe_irq_ops = {
+        .map    = stmpe_irq_map,
+        .unmap  = stmpe_irq_unmap,
+        .xlate  = irq_domain_xlate_twocell,
+};
+
+static int __devinit stmpe_irq_init(struct stmpe *stmpe)
+{
+	int base = stmpe->irq_base;
+	int num_irqs = stmpe->variant->num_irqs;
+
+	if (base) {
+		stmpe->domain = irq_domain_add_legacy(
+			NULL, num_irqs, base, 0, &stmpe_irq_ops, stmpe);
+	}
+	else {
+		stmpe->domain = irq_domain_add_linear(
+			NULL, num_irqs, &stmpe_irq_ops, stmpe);
+	}
+
+	if (!stmpe->domain) {
+		dev_err(stmpe->dev, "Failed to create irqdomain\n");
+		return -ENOSYS;
 	}
+
+	return 0;
 }
 
 static int __devinit stmpe_chip_init(struct stmpe *stmpe)
@@ -954,7 +977,7 @@ static int __devinit stmpe_add_device(struct stmpe *stmpe,
 				      struct mfd_cell *cell)
 {
 	return mfd_add_devices(stmpe->dev, stmpe->pdata->id, cell, 1,
-			       NULL, stmpe->irq_base, NULL);
+			       NULL, stmpe->irq_base, stmpe->domain);
 }
 
 static int __devinit stmpe_devices_init(struct stmpe *stmpe)
@@ -1067,7 +1090,7 @@ int __devinit stmpe_probe(struct stmpe_client_info *ci, int partnum)
 		if (ret) {
 			dev_err(stmpe->dev, "failed to request IRQ: %d\n",
 					ret);
-			goto out_removeirq;
+			goto free_gpio;
 		}
 	}
 
@@ -1083,9 +1106,6 @@ out_removedevs:
 	mfd_remove_devices(stmpe->dev);
 	if (stmpe->irq >= 0)
 		free_irq(stmpe->irq, stmpe);
-out_removeirq:
-	if (stmpe->irq >= 0)
-		stmpe_irq_remove(stmpe);
 free_gpio:
 	if (pdata->irq_over_gpio)
 		gpio_free(pdata->irq_gpio);
@@ -1098,10 +1118,8 @@ int stmpe_remove(struct stmpe *stmpe)
 {
 	mfd_remove_devices(stmpe->dev);
 
-	if (stmpe->irq >= 0) {
+	if (stmpe->irq >= 0)
 		free_irq(stmpe->irq, stmpe);
-		stmpe_irq_remove(stmpe);
-	}
 
 	if (stmpe->pdata->irq_over_gpio)
 		gpio_free(stmpe->pdata->irq_gpio);
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index f8d5b4d5843f..15dac790365f 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -62,6 +62,7 @@ struct stmpe_client_info;
  * @lock: lock protecting I/O operations
  * @irq_lock: IRQ bus lock
  * @dev: device, mostly for dev_dbg()
+ * @irq_domain: IRQ domain
  * @client: client - i2c or spi
  * @ci: client specific information
  * @partnum: part number
@@ -79,6 +80,7 @@ struct stmpe {
 	struct mutex lock;
 	struct mutex irq_lock;
 	struct device *dev;
+	struct irq_domain *domain;
 	void *client;
 	struct stmpe_client_info *ci;
 	enum stmpe_partnum partnum;
-- 
cgit v1.2.3


From 90a38d999739f35f4fc925c875e6ee518546b66c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 15 Oct 2012 22:44:45 +0200
Subject: mfd: Remove Unicode Byte Order Marks from da9055
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Older gcc (< 4.4) doesn't like files starting with Unicode BOMs:

include/linux/mfd/da9055/core.h:1: error: stray ‘\357’ in program
include/linux/mfd/da9055/core.h:1: error: stray ‘\273’ in program
include/linux/mfd/da9055/core.h:1: error: stray ‘\277’ in program
include/linux/mfd/da9055/pdata.h:1: error: stray ‘\357’ in program
include/linux/mfd/da9055/pdata.h:1: error: stray ‘\273’ in program
include/linux/mfd/da9055/pdata.h:1: error: stray ‘\277’ in program
include/linux/mfd/da9055/reg.h:1: error: stray ‘\357’ in program
include/linux/mfd/da9055/reg.h:1: error: stray ‘\273’ in program
include/linux/mfd/da9055/reg.h:1: error: stray ‘\277’ in program

Remove the BOMs, the rest of the files is plain ASCII anyway.

Output of "file" before:

include/linux/mfd/da9055/core.h:  UTF-8 Unicode (with BOM) C program text
include/linux/mfd/da9055/pdata.h: UTF-8 Unicode (with BOM) C program text
include/linux/mfd/da9055/reg.h:   UTF-8 Unicode (with BOM) C program text

Output of "file" after:

include/linux/mfd/da9055/core.h:  ASCII C program text
include/linux/mfd/da9055/pdata.h: ASCII C program text
include/linux/mfd/da9055/reg.h:   ASCII C program text

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/da9055/core.h  | 2 +-
 include/linux/mfd/da9055/pdata.h | 2 +-
 include/linux/mfd/da9055/reg.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/da9055/core.h b/include/linux/mfd/da9055/core.h
index c96ad682c59e..956afa445998 100644
--- a/include/linux/mfd/da9055/core.h
+++ b/include/linux/mfd/da9055/core.h
@@ -1,4 +1,4 @@
-﻿/*
+/*
  * da9055 declarations for DA9055 PMICs.
  *
  * Copyright(c) 2012 Dialog Semiconductor Ltd.
diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h
index 147293b4471d..b9b204edb4e9 100644
--- a/include/linux/mfd/da9055/pdata.h
+++ b/include/linux/mfd/da9055/pdata.h
@@ -1,4 +1,4 @@
-﻿/* Copyright (C) 2012 Dialog Semiconductor Ltd.
+/* Copyright (C) 2012 Dialog Semiconductor Ltd.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/include/linux/mfd/da9055/reg.h b/include/linux/mfd/da9055/reg.h
index df237ee54803..2b592e072dbf 100644
--- a/include/linux/mfd/da9055/reg.h
+++ b/include/linux/mfd/da9055/reg.h
@@ -1,4 +1,4 @@
-﻿/*
+/*
  * DA9055 declarations for DA9055 PMICs.
  *
  * Copyright(c) 2012 Dialog Semiconductor Ltd.
-- 
cgit v1.2.3


From 89d16665d388837b30972081d97b814be26d68a2 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 9 Nov 2012 21:02:56 +0000
Subject: efivarfs: Use query_variable_info() to limit kmalloc()

We don't want someone who can write EFI variables to be able to
allocate arbitrarily large amounts of memory, so cap it to something
sensible like the amount of free space for EFI variables.

Acked-by: Jeremy Kerr <jeremy.kerr@canonical.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efivars.c | 43 ++++++++++++++++++++++++++++++++++---------
 include/linux/efi.h        |  1 +
 2 files changed, 35 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 9ac934018bba..d6b8d2f628fa 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -694,28 +694,51 @@ static ssize_t efivarfs_file_write(struct file *file,
 	struct inode *inode = file->f_mapping->host;
 	unsigned long datasize = count - sizeof(attributes);
 	unsigned long newdatasize;
+	u64 storage_size, remaining_size, max_size;
 	ssize_t bytes = 0;
 
 	if (count < sizeof(attributes))
 		return -EINVAL;
 
-	data = kmalloc(datasize, GFP_KERNEL);
+	if (copy_from_user(&attributes, userbuf, sizeof(attributes)))
+		return -EFAULT;
 
-	if (!data)
-		return -ENOMEM;
+	if (attributes & ~(EFI_VARIABLE_MASK))
+		return -EINVAL;
 
 	efivars = var->efivars;
 
-	if (copy_from_user(&attributes, userbuf, sizeof(attributes))) {
-		bytes = -EFAULT;
-		goto out;
+	/*
+	 * Ensure that the user can't allocate arbitrarily large
+	 * amounts of memory. Pick a default size of 64K if
+	 * QueryVariableInfo() isn't supported by the firmware.
+	 */
+	spin_lock(&efivars->lock);
+
+	if (!efivars->ops->query_variable_info)
+		status = EFI_UNSUPPORTED;
+	else {
+		const struct efivar_operations *fops = efivars->ops;
+		status = fops->query_variable_info(attributes, &storage_size,
+						   &remaining_size, &max_size);
 	}
 
-	if (attributes & ~(EFI_VARIABLE_MASK)) {
-		bytes = -EINVAL;
-		goto out;
+	spin_unlock(&efivars->lock);
+
+	if (status != EFI_SUCCESS) {
+		if (status != EFI_UNSUPPORTED)
+			return efi_status_to_err(status);
+
+		remaining_size = 65536;
 	}
 
+	if (datasize > remaining_size)
+		return -ENOSPC;
+
+	data = kmalloc(datasize, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
 	if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) {
 		bytes = -EFAULT;
 		goto out;
@@ -1709,6 +1732,8 @@ efivars_init(void)
 	ops.get_variable = efi.get_variable;
 	ops.set_variable = efi.set_variable;
 	ops.get_next_variable = efi.get_next_variable;
+	ops.query_variable_info = efi.query_variable_info;
+
 	error = register_efivars(&__efivars, &ops, efi_kobj);
 	if (error)
 		goto err_put;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5e2308d9c6be..f80079cd84f4 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -646,6 +646,7 @@ struct efivar_operations {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
+	efi_query_variable_info_t *query_variable_info;
 };
 
 struct efivars {
-- 
cgit v1.2.3


From ab7edb149c7548541ee588b8372c2041b6f1cbc8 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Thu, 11 Oct 2012 13:55:32 +0200
Subject: mfd: twl6040: Convert to use regmap_irq

With regmap_irq it is possible to remove the twl6040-irq.c file and
simplify the code.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |   4 +-
 drivers/mfd/Makefile        |   2 +-
 drivers/mfd/twl6040-core.c  |  55 ++++++++----
 drivers/mfd/twl6040-irq.c   | 205 --------------------------------------------
 include/linux/mfd/twl6040.h |  10 +--
 5 files changed, 48 insertions(+), 228 deletions(-)
 delete mode 100644 drivers/mfd/twl6040-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 3f2187ae8c5d..34242cada125 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -321,10 +321,10 @@ config MFD_TWL4030_AUDIO
 
 config TWL6040_CORE
 	bool "Support for TWL6040 audio codec"
-	depends on I2C=y && GENERIC_HARDIRQS
+	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
-	select IRQ_DOMAIN
+	select REGMAP_IRQ
 	default n
 	help
 	  Say yes here if you want support for Texas Instruments TWL6040 audio
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index a4093a44304a..05bebf66ccd2 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -67,7 +67,7 @@ obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o twl6030-irq.o
 obj-$(CONFIG_TWL4030_MADC)      += twl4030-madc.o
 obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 obj-$(CONFIG_MFD_TWL4030_AUDIO)	+= twl4030-audio.o
-obj-$(CONFIG_TWL6040_CORE)	+= twl6040-core.o twl6040-irq.o
+obj-$(CONFIG_TWL6040_CORE)	+= twl6040-core.o
 
 obj-$(CONFIG_MFD_MC13XXX)	+= mc13xxx-core.o
 obj-$(CONFIG_MFD_MC13XXX_SPI)	+= mc13xxx-spi.o
diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c
index 5817bc6d09dc..e5f7b795afff 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040-core.c
@@ -499,6 +499,25 @@ static struct regmap_config twl6040_regmap_config = {
 	.readable_reg = twl6040_readable_reg,
 };
 
+static const struct regmap_irq twl6040_irqs[] = {
+	{ .reg_offset = 0, .mask = TWL6040_THINT, },
+	{ .reg_offset = 0, .mask = TWL6040_PLUGINT | TWL6040_UNPLUGINT, },
+	{ .reg_offset = 0, .mask = TWL6040_HOOKINT, },
+	{ .reg_offset = 0, .mask = TWL6040_HFINT, },
+	{ .reg_offset = 0, .mask = TWL6040_VIBINT, },
+	{ .reg_offset = 0, .mask = TWL6040_READYINT, },
+};
+
+static struct regmap_irq_chip twl6040_irq_chip = {
+	.name = "twl6040",
+	.irqs = twl6040_irqs,
+	.num_irqs = ARRAY_SIZE(twl6040_irqs),
+
+	.num_regs = 1,
+	.status_base = TWL6040_REG_INTID,
+	.mask_base = TWL6040_REG_INTMR,
+};
+
 static int __devinit twl6040_probe(struct i2c_client *client,
 				     const struct i2c_device_id *id)
 {
@@ -574,21 +593,27 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 			goto gpio_err;
 	}
 
-	/* codec interrupt */
-	ret = twl6040_irq_init(twl6040);
-	if (ret)
+	ret = regmap_add_irq_chip(twl6040->regmap, twl6040->irq,
+			IRQF_ONESHOT, 0, &twl6040_irq_chip,
+			&twl6040->irq_data);
+	if (ret < 0)
 		goto irq_init_err;
 
-	ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_READY,
-				   NULL, twl6040_readyint_handler, IRQF_ONESHOT,
+	twl6040->irq_ready = regmap_irq_get_virq(twl6040->irq_data,
+					       TWL6040_IRQ_READY);
+	twl6040->irq_th = regmap_irq_get_virq(twl6040->irq_data,
+					       TWL6040_IRQ_TH);
+
+	ret = request_threaded_irq(twl6040->irq_ready, NULL,
+				   twl6040_readyint_handler, IRQF_ONESHOT,
 				   "twl6040_irq_ready", twl6040);
 	if (ret) {
 		dev_err(twl6040->dev, "READY IRQ request failed: %d\n", ret);
 		goto readyirq_err;
 	}
 
-	ret = request_threaded_irq(twl6040->irq_base + TWL6040_IRQ_TH,
-				   NULL, twl6040_thint_handler, IRQF_ONESHOT,
+	ret = request_threaded_irq(twl6040->irq_th, NULL,
+				   twl6040_thint_handler, IRQF_ONESHOT,
 				   "twl6040_irq_th", twl6040);
 	if (ret) {
 		dev_err(twl6040->dev, "Thermal IRQ request failed: %d\n", ret);
@@ -604,7 +629,7 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 	 * The ASoC codec can work without pdata, pass the platform_data only if
 	 * it has been provided.
 	 */
-	irq = twl6040->irq_base + TWL6040_IRQ_PLUG;
+	irq = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_PLUG);
 	cell = &twl6040->cells[children];
 	cell->name = "twl6040-codec";
 	twl6040_codec_rsrc[0].start = irq;
@@ -618,7 +643,7 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 	children++;
 
 	if (twl6040_has_vibra(pdata, node)) {
-		irq = twl6040->irq_base + TWL6040_IRQ_VIB;
+		irq = regmap_irq_get_virq(twl6040->irq_data, TWL6040_IRQ_VIB);
 
 		cell = &twl6040->cells[children];
 		cell->name = "twl6040-vibra";
@@ -657,11 +682,11 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 	return 0;
 
 mfd_err:
-	free_irq(twl6040->irq_base + TWL6040_IRQ_TH, twl6040);
+	free_irq(twl6040->irq_th, twl6040);
 thirq_err:
-	free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040);
+	free_irq(twl6040->irq_ready, twl6040);
 readyirq_err:
-	twl6040_irq_exit(twl6040);
+	regmap_del_irq_chip(twl6040->irq, twl6040->irq_data);
 irq_init_err:
 	if (gpio_is_valid(twl6040->audpwron))
 		gpio_free(twl6040->audpwron);
@@ -685,9 +710,9 @@ static int __devexit twl6040_remove(struct i2c_client *client)
 	if (gpio_is_valid(twl6040->audpwron))
 		gpio_free(twl6040->audpwron);
 
-	free_irq(twl6040->irq_base + TWL6040_IRQ_READY, twl6040);
-	free_irq(twl6040->irq_base + TWL6040_IRQ_TH, twl6040);
-	twl6040_irq_exit(twl6040);
+	free_irq(twl6040->irq_ready, twl6040);
+	free_irq(twl6040->irq_th, twl6040);
+	regmap_del_irq_chip(twl6040->irq, twl6040->irq_data);
 
 	mfd_remove_devices(&client->dev);
 	i2c_set_clientdata(client, NULL);
diff --git a/drivers/mfd/twl6040-irq.c b/drivers/mfd/twl6040-irq.c
deleted file mode 100644
index 4b42543da228..000000000000
--- a/drivers/mfd/twl6040-irq.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Interrupt controller support for TWL6040
- *
- * Author:     Misael Lopez Cruz <misael.lopez@ti.com>
- *
- * Copyright:   (C) 2011 Texas Instruments, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/irq.h>
-#include <linux/of.h>
-#include <linux/irqdomain.h>
-#include <linux/interrupt.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/twl6040.h>
-
-struct twl6040_irq_data {
-	int mask;
-	int status;
-};
-
-static struct twl6040_irq_data twl6040_irqs[] = {
-	{
-		.mask = TWL6040_THMSK,
-		.status = TWL6040_THINT,
-	},
-	{
-		.mask = TWL6040_PLUGMSK,
-		.status = TWL6040_PLUGINT | TWL6040_UNPLUGINT,
-	},
-	{
-		.mask = TWL6040_HOOKMSK,
-		.status = TWL6040_HOOKINT,
-	},
-	{
-		.mask = TWL6040_HFMSK,
-		.status = TWL6040_HFINT,
-	},
-	{
-		.mask = TWL6040_VIBMSK,
-		.status = TWL6040_VIBINT,
-	},
-	{
-		.mask = TWL6040_READYMSK,
-		.status = TWL6040_READYINT,
-	},
-};
-
-static inline
-struct twl6040_irq_data *irq_to_twl6040_irq(struct twl6040 *twl6040,
-					    int irq)
-{
-	return &twl6040_irqs[irq - twl6040->irq_base];
-}
-
-static void twl6040_irq_lock(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&twl6040->irq_mutex);
-}
-
-static void twl6040_irq_sync_unlock(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-
-	/* write back to hardware any change in irq mask */
-	if (twl6040->irq_masks_cur != twl6040->irq_masks_cache) {
-		twl6040->irq_masks_cache = twl6040->irq_masks_cur;
-		twl6040_reg_write(twl6040, TWL6040_REG_INTMR,
-				  twl6040->irq_masks_cur);
-	}
-
-	mutex_unlock(&twl6040->irq_mutex);
-}
-
-static void twl6040_irq_enable(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-	struct twl6040_irq_data *irq_data = irq_to_twl6040_irq(twl6040,
-							       data->irq);
-
-	twl6040->irq_masks_cur &= ~irq_data->mask;
-}
-
-static void twl6040_irq_disable(struct irq_data *data)
-{
-	struct twl6040 *twl6040 = irq_data_get_irq_chip_data(data);
-	struct twl6040_irq_data *irq_data = irq_to_twl6040_irq(twl6040,
-							       data->irq);
-
-	twl6040->irq_masks_cur |= irq_data->mask;
-}
-
-static struct irq_chip twl6040_irq_chip = {
-	.name			= "twl6040",
-	.irq_bus_lock		= twl6040_irq_lock,
-	.irq_bus_sync_unlock	= twl6040_irq_sync_unlock,
-	.irq_enable		= twl6040_irq_enable,
-	.irq_disable		= twl6040_irq_disable,
-};
-
-static irqreturn_t twl6040_irq_thread(int irq, void *data)
-{
-	struct twl6040 *twl6040 = data;
-	u8 intid;
-	int i;
-
-	intid = twl6040_reg_read(twl6040, TWL6040_REG_INTID);
-
-	/* apply masking and report (backwards to handle READYINT first) */
-	for (i = ARRAY_SIZE(twl6040_irqs) - 1; i >= 0; i--) {
-		if (twl6040->irq_masks_cur & twl6040_irqs[i].mask)
-			intid &= ~twl6040_irqs[i].status;
-		if (intid & twl6040_irqs[i].status)
-			handle_nested_irq(twl6040->irq_base + i);
-	}
-
-	/* ack unmasked irqs */
-	twl6040_reg_write(twl6040, TWL6040_REG_INTID, intid);
-
-	return IRQ_HANDLED;
-}
-
-int twl6040_irq_init(struct twl6040 *twl6040)
-{
-	struct device_node *node = twl6040->dev->of_node;
-	int i, nr_irqs, irq_base, ret;
-	u8 val;
-
-	mutex_init(&twl6040->irq_mutex);
-
-	/* mask the individual interrupt sources */
-	twl6040->irq_masks_cur = TWL6040_ALLINT_MSK;
-	twl6040->irq_masks_cache = TWL6040_ALLINT_MSK;
-	twl6040_reg_write(twl6040, TWL6040_REG_INTMR, TWL6040_ALLINT_MSK);
-
-	nr_irqs = ARRAY_SIZE(twl6040_irqs);
-
-	irq_base = irq_alloc_descs(-1, 0, nr_irqs, 0);
-	if (IS_ERR_VALUE(irq_base)) {
-		dev_err(twl6040->dev, "Fail to allocate IRQ descs\n");
-		return irq_base;
-	}
-	twl6040->irq_base = irq_base;
-
-	irq_domain_add_legacy(node, ARRAY_SIZE(twl6040_irqs), irq_base, 0,
-			      &irq_domain_simple_ops, NULL);
-
-	/* Register them with genirq */
-	for (i = irq_base; i < irq_base + nr_irqs; i++) {
-		irq_set_chip_data(i, twl6040);
-		irq_set_chip_and_handler(i, &twl6040_irq_chip,
-					 handle_level_irq);
-		irq_set_nested_thread(i, 1);
-
-		/* ARM needs us to explicitly flag the IRQ as valid
-		 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-		set_irq_flags(i, IRQF_VALID);
-#else
-		irq_set_noprobe(i);
-#endif
-	}
-
-	ret = request_threaded_irq(twl6040->irq, NULL, twl6040_irq_thread,
-				   IRQF_ONESHOT, "twl6040", twl6040);
-	if (ret) {
-		dev_err(twl6040->dev, "failed to request IRQ %d: %d\n",
-			twl6040->irq, ret);
-		return ret;
-	}
-
-	/* reset interrupts */
-	val = twl6040_reg_read(twl6040, TWL6040_REG_INTID);
-
-	/* interrupts cleared on write */
-	twl6040_clear_bits(twl6040, TWL6040_REG_ACCCTL, TWL6040_INTCLRMODE);
-
-	return 0;
-}
-EXPORT_SYMBOL(twl6040_irq_init);
-
-void twl6040_irq_exit(struct twl6040 *twl6040)
-{
-	free_irq(twl6040->irq, twl6040);
-}
-EXPORT_SYMBOL(twl6040_irq_exit);
diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index a8eff4ad9be5..94ac944d12f0 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h
@@ -207,10 +207,12 @@ struct twl6040_platform_data {
 };
 
 struct regmap;
+struct regmap_irq_chips_data;
 
 struct twl6040 {
 	struct device *dev;
 	struct regmap *regmap;
+	struct regmap_irq_chip_data *irq_data;
 	struct regulator_bulk_data supplies[2]; /* supplies for vio, v2v1 */
 	struct mutex mutex;
 	struct mutex irq_mutex;
@@ -228,9 +230,8 @@ struct twl6040 {
 	unsigned int mclk;
 
 	unsigned int irq;
-	unsigned int irq_base;
-	u8 irq_masks_cur;
-	u8 irq_masks_cache;
+	unsigned int irq_ready;
+	unsigned int irq_th;
 };
 
 int twl6040_reg_read(struct twl6040 *twl6040, unsigned int reg);
@@ -245,8 +246,7 @@ int twl6040_set_pll(struct twl6040 *twl6040, int pll_id,
 		    unsigned int freq_in, unsigned int freq_out);
 int twl6040_get_pll(struct twl6040 *twl6040);
 unsigned int twl6040_get_sysclk(struct twl6040 *twl6040);
-int twl6040_irq_init(struct twl6040 *twl6040);
-void twl6040_irq_exit(struct twl6040 *twl6040);
+
 /* Get the combined status of the vibra control register */
 int twl6040_get_vibralr_status(struct twl6040 *twl6040);
 
-- 
cgit v1.2.3


From 605511a848ae3ac4b2ce272ae6cbf8930b29ebb3 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 13 Nov 2012 19:18:05 +0530
Subject: mfd: Convert tps6586x to irq_domain

Allocate the irq base if it base is not porvided i.e.
in case of device tree invocation of this driver.
Convert the tps6586x driver to irq domain, using a
legacy IRQ mapping if an irq_base is specified in
platform data or dynamically allocated and otherwise
using a linear mapping.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps6586x.c       | 91 ++++++++++++++++++++++++++++++++------------
 include/linux/mfd/tps6586x.h |  1 +
 2 files changed, 67 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 467464368773..2cdf1e6c00c8 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -17,12 +17,14 @@
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/machine.h>
@@ -116,6 +118,7 @@ struct tps6586x {
 	int			irq_base;
 	u32			irq_en;
 	u8			mask_reg[5];
+	struct irq_domain	*irq_domain;
 };
 
 static inline struct tps6586x *dev_to_tps6586x(struct device *dev)
@@ -184,6 +187,14 @@ int tps6586x_update(struct device *dev, int reg, uint8_t val, uint8_t mask)
 }
 EXPORT_SYMBOL_GPL(tps6586x_update);
 
+int tps6586x_irq_get_virq(struct device *dev, int irq)
+{
+	struct tps6586x *tps6586x = dev_to_tps6586x(dev);
+
+	return irq_create_mapping(tps6586x->irq_domain, irq);
+}
+EXPORT_SYMBOL_GPL(tps6586x_irq_get_virq);
+
 static int __remove_subdev(struct device *dev, void *unused)
 {
 	platform_device_unregister(to_platform_device(dev));
@@ -205,7 +216,7 @@ static void tps6586x_irq_lock(struct irq_data *data)
 static void tps6586x_irq_enable(struct irq_data *irq_data)
 {
 	struct tps6586x *tps6586x = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->irq - tps6586x->irq_base;
+	unsigned int __irq = irq_data->hwirq;
 	const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq];
 
 	tps6586x->mask_reg[data->mask_reg] &= ~data->mask_mask;
@@ -216,7 +227,7 @@ static void tps6586x_irq_disable(struct irq_data *irq_data)
 {
 	struct tps6586x *tps6586x = irq_data_get_irq_chip_data(irq_data);
 
-	unsigned int __irq = irq_data->irq - tps6586x->irq_base;
+	unsigned int __irq = irq_data->hwirq;
 	const struct tps6586x_irq_data *data = &tps6586x_irqs[__irq];
 
 	tps6586x->mask_reg[data->mask_reg] |= data->mask_mask;
@@ -239,6 +250,39 @@ static void tps6586x_irq_sync_unlock(struct irq_data *data)
 	mutex_unlock(&tps6586x->irq_lock);
 }
 
+static struct irq_chip tps6586x_irq_chip = {
+	.name = "tps6586x",
+	.irq_bus_lock = tps6586x_irq_lock,
+	.irq_bus_sync_unlock = tps6586x_irq_sync_unlock,
+	.irq_disable = tps6586x_irq_disable,
+	.irq_enable = tps6586x_irq_enable,
+};
+
+static int tps6586x_irq_map(struct irq_domain *h, unsigned int virq,
+				irq_hw_number_t hw)
+{
+	struct tps6586x *tps6586x = h->host_data;
+
+	irq_set_chip_data(virq, tps6586x);
+	irq_set_chip_and_handler(virq, &tps6586x_irq_chip, handle_simple_irq);
+	irq_set_nested_thread(virq, 1);
+
+	/* ARM needs us to explicitly flag the IRQ as valid
+	 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+	set_irq_flags(virq, IRQF_VALID);
+#else
+	irq_set_noprobe(virq);
+#endif
+
+	return 0;
+}
+
+static struct irq_domain_ops tps6586x_domain_ops = {
+	.map    = tps6586x_irq_map,
+	.xlate  = irq_domain_xlate_twocell,
+};
+
 static irqreturn_t tps6586x_irq(int irq, void *data)
 {
 	struct tps6586x *tps6586x = data;
@@ -259,7 +303,8 @@ static irqreturn_t tps6586x_irq(int irq, void *data)
 		int i = __ffs(acks);
 
 		if (tps6586x->irq_en & (1 << i))
-			handle_nested_irq(tps6586x->irq_base + i);
+			handle_nested_irq(
+				irq_find_mapping(tps6586x->irq_domain, i));
 
 		acks &= ~(1 << i);
 	}
@@ -272,11 +317,8 @@ static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
 {
 	int i, ret;
 	u8 tmp[4];
-
-	if (!irq_base) {
-		dev_warn(tps6586x->dev, "No interrupt support on IRQ base\n");
-		return -EINVAL;
-	}
+	int new_irq_base;
+	int irq_num = ARRAY_SIZE(tps6586x_irqs);
 
 	mutex_init(&tps6586x->irq_lock);
 	for (i = 0; i < 5; i++) {
@@ -286,25 +328,24 @@ static int __devinit tps6586x_irq_init(struct tps6586x *tps6586x, int irq,
 
 	tps6586x_reads(tps6586x->dev, TPS6586X_INT_ACK1, sizeof(tmp), tmp);
 
-	tps6586x->irq_base = irq_base;
-
-	tps6586x->irq_chip.name = "tps6586x";
-	tps6586x->irq_chip.irq_enable = tps6586x_irq_enable;
-	tps6586x->irq_chip.irq_disable = tps6586x_irq_disable;
-	tps6586x->irq_chip.irq_bus_lock = tps6586x_irq_lock;
-	tps6586x->irq_chip.irq_bus_sync_unlock = tps6586x_irq_sync_unlock;
-
-	for (i = 0; i < ARRAY_SIZE(tps6586x_irqs); i++) {
-		int __irq = i + tps6586x->irq_base;
-		irq_set_chip_data(__irq, tps6586x);
-		irq_set_chip_and_handler(__irq, &tps6586x->irq_chip,
-					 handle_simple_irq);
-		irq_set_nested_thread(__irq, 1);
-#ifdef CONFIG_ARM
-		set_irq_flags(__irq, IRQF_VALID);
-#endif
+	if  (irq_base > 0) {
+		new_irq_base = irq_alloc_descs(irq_base, 0, irq_num, -1);
+		if (new_irq_base < 0) {
+			dev_err(tps6586x->dev,
+				"Failed to alloc IRQs: %d\n", new_irq_base);
+			return new_irq_base;
+		}
+	} else {
+		new_irq_base = 0;
 	}
 
+	tps6586x->irq_domain = irq_domain_add_simple(tps6586x->dev->of_node,
+				irq_num, new_irq_base, &tps6586x_domain_ops,
+				tps6586x);
+	if (!tps6586x->irq_domain) {
+		dev_err(tps6586x->dev, "Failed to create IRQ domain\n");
+		return -ENOMEM;
+	}
 	ret = request_threaded_irq(irq, NULL, tps6586x_irq, IRQF_ONESHOT,
 				   "tps6586x", tps6586x);
 
diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h
index 2dd123194958..ebd8b08a386b 100644
--- a/include/linux/mfd/tps6586x.h
+++ b/include/linux/mfd/tps6586x.h
@@ -93,5 +93,6 @@ extern int tps6586x_set_bits(struct device *dev, int reg, uint8_t bit_mask);
 extern int tps6586x_clr_bits(struct device *dev, int reg, uint8_t bit_mask);
 extern int tps6586x_update(struct device *dev, int reg, uint8_t val,
 			   uint8_t mask);
+extern int tps6586x_irq_get_virq(struct device *dev, int irq);
 
 #endif /*__LINUX_MFD_TPS6586X_H */
-- 
cgit v1.2.3


From 2cd451792db174382aaca96c75bc3bf47a6065fe Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Wed, 14 Nov 2012 11:14:26 +0800
Subject: spi/omap: fix D0/D1 direction confusion

0384e90b8 ("spi/mcspi: allow configuration of pin directions") did what
it claimed to do the wrong way around. D0/D1 is configured as output by
*clearing* the bits in the conf registers, hence also breaking the
former default behaviour.

Fix this before that change is merged to mainline.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 Documentation/devicetree/bindings/spi/omap-spi.txt | 6 +++---
 drivers/spi/spi-omap2-mcspi.c                      | 6 +++---
 include/linux/platform_data/spi-omap2-mcspi.h      | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt
index 2ef0a6b85653..938809c6829b 100644
--- a/Documentation/devicetree/bindings/spi/omap-spi.txt
+++ b/Documentation/devicetree/bindings/spi/omap-spi.txt
@@ -6,9 +6,9 @@ Required properties:
   - "ti,omap4-spi" for OMAP4+.
 - ti,spi-num-cs : Number of chipselect supported  by the instance.
 - ti,hwmods: Name of the hwmod associated to the McSPI
-- ti,pindir-d0-in-d1-out: Select the D0 pin as input and D1 as
-			  output. The default is D0 as output and
-			  D1 as input.
+- ti,pindir-d0-out-d1-in: Select the D0 pin as output and D1 as
+			  input. The default is D0 as input and
+			  D1 as output.
 
 Example:
 
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 51046332677c..89f73c425c3d 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -766,7 +766,7 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi,
 	/* standard 4-wire master mode:  SCK, MOSI/out, MISO/in, nCS
 	 * REVISIT: this controller could support SPI_3WIRE mode.
 	 */
-	if (mcspi->pin_dir == MCSPI_PINDIR_D0_OUT_D1_IN) {
+	if (mcspi->pin_dir == MCSPI_PINDIR_D0_IN_D1_OUT) {
 		l &= ~OMAP2_MCSPI_CHCONF_IS;
 		l &= ~OMAP2_MCSPI_CHCONF_DPE1;
 		l |= OMAP2_MCSPI_CHCONF_DPE0;
@@ -1188,8 +1188,8 @@ static int __devinit omap2_mcspi_probe(struct platform_device *pdev)
 		of_property_read_u32(node, "ti,spi-num-cs", &num_cs);
 		master->num_chipselect = num_cs;
 		master->bus_num = bus_num++;
-		if (of_get_property(node, "ti,pindir-d0-in-d1-out", NULL))
-			mcspi->pin_dir = MCSPI_PINDIR_D0_IN_D1_OUT;
+		if (of_get_property(node, "ti,pindir-d0-out-d1-in", NULL))
+			mcspi->pin_dir = MCSPI_PINDIR_D0_OUT_D1_IN;
 	} else {
 		pdata = pdev->dev.platform_data;
 		master->num_chipselect = pdata->num_cs;
diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h
index ce70f7b5a8e1..a65572d53211 100644
--- a/include/linux/platform_data/spi-omap2-mcspi.h
+++ b/include/linux/platform_data/spi-omap2-mcspi.h
@@ -7,8 +7,8 @@
 
 #define OMAP4_MCSPI_REG_OFFSET 0x100
 
-#define MCSPI_PINDIR_D0_OUT_D1_IN	0
-#define MCSPI_PINDIR_D0_IN_D1_OUT	1
+#define MCSPI_PINDIR_D0_IN_D1_OUT	0
+#define MCSPI_PINDIR_D0_OUT_D1_IN	1
 
 struct omap2_mcspi_platform_config {
 	unsigned short	num_cs;
-- 
cgit v1.2.3


From 49839dc93970789cea46f5171cd7f6ec11af64c7 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul@pwsan.com>
Date: Tue, 6 Nov 2012 16:31:32 +0000
Subject: Revert "ARM: OMAP: convert I2C driver to PM QoS for MPU latency
 constraints"

This reverts commit 3db11feffc1ad2ab9dea27789e6b5b3032827adc
(ARM: OMAP: convert I2C driver to PM QoS for MPU latency constraints).
This commit causes I2C timeouts to appear on several OMAP3430/3530-based
boards:

  http://marc.info/?l=linux-arm-kernel&m=135071372426971&w=2
  http://marc.info/?l=linux-arm-kernel&m=135067558415214&w=2
  http://marc.info/?l=linux-arm-kernel&m=135216013608196&w=2

and appears to have been sent for merging before one of its prerequisites
was merged:

  http://marc.info/?l=linux-arm-kernel&m=135219411617621&w=2

Signed-off-by: Paul Walmsley <paul@pwsan.com>
Acked-by: Jean Pihet <j-pihet@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/arm/plat-omap/i2c.c      | 21 +++++++++++++++++++++
 drivers/i2c/busses/i2c-omap.c | 32 ++++++++++++++------------------
 include/linux/i2c-omap.h      |  1 +
 3 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c
index a5683a84c6ee..6013831a043e 100644
--- a/arch/arm/plat-omap/i2c.c
+++ b/arch/arm/plat-omap/i2c.c
@@ -26,12 +26,14 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/i2c-omap.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 
 #include <mach/irqs.h>
 #include <plat/i2c.h>
+#include <plat/omap-pm.h>
 #include <plat/omap_device.h>
 
 #define OMAP_I2C_SIZE		0x3f
@@ -127,6 +129,16 @@ static inline int omap1_i2c_add_bus(int bus_id)
 
 
 #ifdef CONFIG_ARCH_OMAP2PLUS
+/*
+ * XXX This function is a temporary compatibility wrapper - only
+ * needed until the I2C driver can be converted to call
+ * omap_pm_set_max_dev_wakeup_lat() and handle a return code.
+ */
+static void omap_pm_set_max_mpu_wakeup_lat_compat(struct device *dev, long t)
+{
+	omap_pm_set_max_mpu_wakeup_lat(dev, t);
+}
+
 static inline int omap2_i2c_add_bus(int bus_id)
 {
 	int l;
@@ -158,6 +170,15 @@ static inline int omap2_i2c_add_bus(int bus_id)
 	dev_attr = (struct omap_i2c_dev_attr *)oh->dev_attr;
 	pdata->flags = dev_attr->flags;
 
+	/*
+	 * When waiting for completion of a i2c transfer, we need to
+	 * set a wake up latency constraint for the MPU. This is to
+	 * ensure quick enough wakeup from idle, when transfer
+	 * completes.
+	 * Only omap3 has support for constraints
+	 */
+	if (cpu_is_omap34xx())
+		pdata->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat_compat;
 	pdev = omap_device_build(name, bus_id, oh, pdata,
 			sizeof(struct omap_i2c_bus_platform_data),
 			NULL, 0, 0);
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index db31eaed6ea5..0b0254312d21 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -43,7 +43,6 @@
 #include <linux/slab.h>
 #include <linux/i2c-omap.h>
 #include <linux/pm_runtime.h>
-#include <linux/pm_qos.h>
 
 /* I2C controller revisions */
 #define OMAP_I2C_OMAP1_REV_2		0x20
@@ -187,8 +186,9 @@ struct omap_i2c_dev {
 	int			reg_shift;      /* bit shift for I2C register addresses */
 	struct completion	cmd_complete;
 	struct resource		*ioarea;
-	u32			latency;	/* maximum MPU wkup latency */
-	struct pm_qos_request	pm_qos_request;
+	u32			latency;	/* maximum mpu wkup latency */
+	void			(*set_mpu_wkup_lat)(struct device *dev,
+						    long latency);
 	u32			speed;		/* Speed of bus in kHz */
 	u32			dtrev;		/* extra revision from DT */
 	u32			flags;
@@ -494,7 +494,9 @@ static void omap_i2c_resize_fifo(struct omap_i2c_dev *dev, u8 size, bool is_rx)
 		dev->b_hw = 1; /* Enable hardware fixes */
 
 	/* calculate wakeup latency constraint for MPU */
-	dev->latency = (1000000 * dev->threshold) / (1000 * dev->speed / 8);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->latency = (1000000 * dev->threshold) /
+			(1000 * dev->speed / 8);
 }
 
 /*
@@ -629,16 +631,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	if (r < 0)
 		goto out;
 
-	/*
-	 * When waiting for completion of a i2c transfer, we need to
-	 * set a wake up latency constraint for the MPU. This is to
-	 * ensure quick enough wakeup from idle, when transfer
-	 * completes.
-	 */
-	if (dev->latency)
-		pm_qos_add_request(&dev->pm_qos_request,
-				   PM_QOS_CPU_DMA_LATENCY,
-				   dev->latency);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, dev->latency);
 
 	for (i = 0; i < num; i++) {
 		r = omap_i2c_xfer_msg(adap, &msgs[i], (i == (num - 1)));
@@ -646,8 +640,8 @@ omap_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 			break;
 	}
 
-	if (dev->latency)
-		pm_qos_remove_request(&dev->pm_qos_request);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, -1);
 
 	if (r == 0)
 		r = num;
@@ -1104,6 +1098,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	} else if (pdata != NULL) {
 		dev->speed = pdata->clkrate;
 		dev->flags = pdata->flags;
+		dev->set_mpu_wkup_lat = pdata->set_mpu_wkup_lat;
 		dev->dtrev = pdata->rev;
 	}
 
@@ -1159,8 +1154,9 @@ omap_i2c_probe(struct platform_device *pdev)
 			dev->b_hw = 1; /* Enable hardware fixes */
 
 		/* calculate wakeup latency constraint for MPU */
-		dev->latency = (1000000 * dev->fifo_size) /
-			       (1000 * dev->speed / 8);
+		if (dev->set_mpu_wkup_lat != NULL)
+			dev->latency = (1000000 * dev->fifo_size) /
+				       (1000 * dev->speed / 8);
 	}
 
 	/* reset ASAP, clearing any IRQs */
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index df804ba73e0b..92a0dc75bc74 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -34,6 +34,7 @@ struct omap_i2c_bus_platform_data {
 	u32		clkrate;
 	u32		rev;
 	u32		flags;
+	void		(*set_mpu_wkup_lat)(struct device *dev, long set);
 };
 
 #endif
-- 
cgit v1.2.3


From 2c88ab8c5af7d637d2a9d14b607fa6100fa64236 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti D <shubhrajyoti@ti.com>
Date: Mon, 5 Nov 2012 17:53:39 +0530
Subject: ARM: i2c: omap: Remove the i207 errata flag

The commit [i2c: omap: use revision check for OMAP_I2C_FLAG_APPLY_ERRATA_I207]
uses the revision id instead of the flag. So the flag can be safely removed.

Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Shubhrajyoti D <shubhrajyoti@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/arm/mach-omap2/omap_hwmod_2430_data.c | 3 +--
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++------
 drivers/i2c/busses/i2c-omap.c              | 3 +--
 include/linux/i2c-omap.h                   | 1 -
 4 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index c455e41b0237..b79ccf6efbe8 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -76,8 +76,7 @@ static struct omap_hwmod_class i2c_class = {
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags		= OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-			  OMAP_I2C_FLAG_BUS_SHIFT_2 |
+	.flags		= OMAP_I2C_FLAG_BUS_SHIFT_2 |
 			  OMAP_I2C_FLAG_FORCE_19200_INT_CLK,
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index f67b7ee07dd4..943222c40489 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -791,8 +791,7 @@ static struct omap_hwmod omap3xxx_dss_venc_hwmod = {
 /* I2C1 */
 static struct omap_i2c_dev_attr i2c1_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags		= OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-			  OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
+	.flags		= OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
 			  OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
@@ -818,8 +817,7 @@ static struct omap_hwmod omap3xxx_i2c1_hwmod = {
 /* I2C2 */
 static struct omap_i2c_dev_attr i2c2_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
+	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
 		 OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
@@ -845,8 +843,7 @@ static struct omap_hwmod omap3xxx_i2c2_hwmod = {
 /* I2C3 */
 static struct omap_i2c_dev_attr i2c3_dev_attr = {
 	.fifo_depth	= 64, /* bytes */
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
+	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
 		 OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 0ca50e71731b..11e645ab1e79 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1028,8 +1028,7 @@ static const struct i2c_algorithm omap_i2c_algo = {
 #ifdef CONFIG_OF
 static struct omap_i2c_bus_platform_data omap3_pdata = {
 	.rev = OMAP_I2C_IP_VERSION_1,
-	.flags = OMAP_I2C_FLAG_APPLY_ERRATA_I207 |
-		 OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
+	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
 		 OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index 92a0dc75bc74..1b25c04f82d9 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -21,7 +21,6 @@
 #define OMAP_I2C_FLAG_SIMPLE_CLOCK		BIT(1)
 #define OMAP_I2C_FLAG_16BIT_DATA_REG		BIT(2)
 #define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE	BIT(3)
-#define OMAP_I2C_FLAG_APPLY_ERRATA_I207	BIT(4)
 #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK	BIT(5)
 #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK	BIT(6)
 /* how the CPU address bus must be translated for I2C unit access */
-- 
cgit v1.2.3


From 1cf3d8b3d24cd383ddfd5442c83ec5c355ffc2f7 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Tue, 2 Oct 2012 16:57:57 +0000
Subject: powerpc+of: Add of node/property notification chain for adds and
 removes

This patch moves the notification chain for updates to the device tree
from the powerpc/pseries code to the base OF code. This makes this
functionality available to all architectures.

Additionally the notification chain is updated to allow notifications
for property add/remove/update. To make this work a pointer to a new
struct (of_prop_reconfig) is passed to the routines in the notification chain.
The of_prop_reconfig property contains a pointer to the node containing the
property and a pointer to the property itself. In the case of property
updates, the property pointer refers to the new property.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/pSeries_reconfig.h     | 32 -----------
 arch/powerpc/kernel/prom.c                      |  6 +-
 arch/powerpc/platforms/pseries/dlpar.c          | 14 +++--
 arch/powerpc/platforms/pseries/hotplug-cpu.c    |  8 +--
 arch/powerpc/platforms/pseries/hotplug-memory.c | 60 ++++++++++++++------
 arch/powerpc/platforms/pseries/iommu.c          |  6 +-
 arch/powerpc/platforms/pseries/reconfig.c       | 68 ++---------------------
 arch/powerpc/platforms/pseries/setup.c          |  6 +-
 drivers/crypto/nx/nx-842.c                      | 20 +++----
 drivers/crypto/nx/nx.c                          |  1 -
 drivers/of/base.c                               | 74 +++++++++++++++++++++++--
 include/linux/of.h                              | 22 ++++++--
 12 files changed, 163 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/pSeries_reconfig.h b/arch/powerpc/include/asm/pSeries_reconfig.h
index c07edfe98b98..adc00d2e75b0 100644
--- a/arch/powerpc/include/asm/pSeries_reconfig.h
+++ b/arch/powerpc/include/asm/pSeries_reconfig.h
@@ -2,43 +2,11 @@
 #define _PPC64_PSERIES_RECONFIG_H
 #ifdef __KERNEL__
 
-#include <linux/notifier.h>
-
-/*
- * Use this API if your code needs to know about OF device nodes being
- * added or removed on pSeries systems.
- */
-
-#define PSERIES_RECONFIG_ADD		0x0001
-#define PSERIES_RECONFIG_REMOVE		0x0002
-#define PSERIES_DRCONF_MEM_ADD		0x0003
-#define PSERIES_DRCONF_MEM_REMOVE	0x0004
-#define PSERIES_UPDATE_PROPERTY		0x0005
-
-/**
- * pSeries_reconfig_notify - Notifier value structure for OFDT property updates
- *
- * @node: Device tree node which owns the property being updated
- * @property: Updated property
- */
-struct pSeries_reconfig_prop_update {
-	struct device_node *node;
-	struct property *property;
-};
-
 #ifdef CONFIG_PPC_PSERIES
-extern int pSeries_reconfig_notifier_register(struct notifier_block *);
-extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
-extern int pSeries_reconfig_notify(unsigned long action, void *p);
 /* Not the best place to put this, will be fixed when we move some
  * of the rtas suspend-me stuff to pseries */
 extern void pSeries_coalesce_init(void);
 #else /* !CONFIG_PPC_PSERIES */
-static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
-{
-	return 0;
-}
-static inline void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { }
 static inline void pSeries_coalesce_init(void) { }
 #endif /* CONFIG_PPC_PSERIES */
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 37725e86651e..6feb60c3c6e3 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -32,6 +32,7 @@
 #include <linux/debugfs.h>
 #include <linux/irq.h>
 #include <linux/memblock.h>
+#include <linux/of.h>
 
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -49,7 +50,6 @@
 #include <asm/btext.h>
 #include <asm/sections.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/pci-bridge.h>
 #include <asm/kexec.h>
 #include <asm/opal.h>
@@ -802,7 +802,7 @@ static int prom_reconfig_notifier(struct notifier_block *nb,
 	int err;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = of_finish_dynamic_node(node);
 		if (err < 0)
 			printk(KERN_ERR "finish_node returned %d\n", err);
@@ -821,7 +821,7 @@ static struct notifier_block prom_reconfig_nb = {
 
 static int __init prom_reconfig_setup(void)
 {
-	return pSeries_reconfig_notifier_register(&prom_reconfig_nb);
+	return of_reconfig_notifier_register(&prom_reconfig_nb);
 }
 __initcall(prom_reconfig_setup);
 #endif
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index e36789bd4e6c..a1a7b9a67ffd 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -16,13 +16,13 @@
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 #include "offline_states.h"
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/uaccess.h>
 #include <asm/rtas.h>
-#include <asm/pSeries_reconfig.h>
 
 struct cc_workarea {
 	u32	drc_index;
@@ -262,24 +262,26 @@ int dlpar_attach_node(struct device_node *dn)
 	if (!dn->parent)
 		return -ENOMEM;
 
-	rc = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, dn);
+	rc = of_attach_node(dn);
 	if (rc) {
 		printk(KERN_ERR "Failed to add device node %s\n",
 		       dn->full_name);
 		return rc;
 	}
 
-	of_attach_node(dn);
 	of_node_put(dn->parent);
 	return 0;
 }
 
 int dlpar_detach_node(struct device_node *dn)
 {
-	pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, dn);
-	of_detach_node(dn);
-	of_node_put(dn); /* Must decrement the refcount */
+	int rc;
+
+	rc = of_detach_node(dn);
+	if (rc)
+		return rc;
 
+	of_node_put(dn); /* Must decrement the refcount */
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 64c97d8ac0c5..a38956269fbf 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -23,12 +23,12 @@
 #include <linux/delay.h>
 #include <linux/sched.h>	/* for idle_task_exit */
 #include <linux/cpu.h>
+#include <linux/of.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/firmware.h>
 #include <asm/machdep.h>
 #include <asm/vdso_datapage.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/xics.h>
 #include "plpar_wrappers.h"
 #include "offline_states.h"
@@ -333,10 +333,10 @@ static int pseries_smp_notifier(struct notifier_block *nb,
 	int err = 0;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = pseries_add_processor(node);
 		break;
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		pseries_remove_processor(node);
 		break;
 	}
@@ -399,7 +399,7 @@ static int __init pseries_cpu_hotplug_init(void)
 
 	/* Processors can be added/removed only on LPAR */
 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
-		pSeries_reconfig_notifier_register(&pseries_smp_nb);
+		of_reconfig_notifier_register(&pseries_smp_nb);
 		cpu_maps_update_begin();
 		if (cede_offline_enabled && parse_cede_parameters() == 0) {
 			default_offline_state = CPU_STATE_INACTIVE;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ecdb0a6b3171..2372c609fa2b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -16,7 +16,6 @@
 
 #include <asm/firmware.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/sparsemem.h>
 
 static unsigned long get_memblock_size(void)
@@ -187,42 +186,69 @@ static int pseries_add_memory(struct device_node *np)
 	return (ret < 0) ? -EINVAL : 0;
 }
 
-static int pseries_drconf_memory(unsigned long *base, unsigned int action)
+static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
 {
+	struct of_drconf_cell *new_drmem, *old_drmem;
 	unsigned long memblock_size;
-	int rc;
+	u32 entries;
+	u32 *p;
+	int i, rc = -EINVAL;
 
 	memblock_size = get_memblock_size();
 	if (!memblock_size)
 		return -EINVAL;
 
-	if (action == PSERIES_DRCONF_MEM_ADD) {
-		rc = memblock_add(*base, memblock_size);
-		rc = (rc < 0) ? -EINVAL : 0;
-	} else if (action == PSERIES_DRCONF_MEM_REMOVE) {
-		rc = pseries_remove_memblock(*base, memblock_size);
-	} else {
-		rc = -EINVAL;
+	p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL);
+	if (!p)
+		return -EINVAL;
+
+	/* The first int of the property is the number of lmb's described
+	 * by the property. This is followed by an array of of_drconf_cell
+	 * entries. Get the niumber of entries and skip to the array of
+	 * of_drconf_cell's.
+	 */
+	entries = *p++;
+	old_drmem = (struct of_drconf_cell *)p;
+
+	p = (u32 *)pr->prop->value;
+	p++;
+	new_drmem = (struct of_drconf_cell *)p;
+
+	for (i = 0; i < entries; i++) {
+		if ((old_drmem[i].flags & DRCONF_MEM_ASSIGNED) &&
+		    (!(new_drmem[i].flags & DRCONF_MEM_ASSIGNED))) {
+			rc = pseries_remove_memblock(old_drmem[i].base_addr,
+						     memblock_size);
+			break;
+		} else if ((!(old_drmem[i].flags & DRCONF_MEM_ASSIGNED)) &&
+			   (new_drmem[i].flags & DRCONF_MEM_ASSIGNED)) {
+			rc = memblock_add(old_drmem[i].base_addr,
+					  memblock_size);
+			rc = (rc < 0) ? -EINVAL : 0;
+			break;
+		}
 	}
 
 	return rc;
 }
 
 static int pseries_memory_notifier(struct notifier_block *nb,
-				unsigned long action, void *node)
+				   unsigned long action, void *node)
 {
+	struct of_prop_reconfig *pr;
 	int err = 0;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		err = pseries_add_memory(node);
 		break;
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		err = pseries_remove_memory(node);
 		break;
-	case PSERIES_DRCONF_MEM_ADD:
-	case PSERIES_DRCONF_MEM_REMOVE:
-		err = pseries_drconf_memory(node, action);
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		pr = (struct of_prop_reconfig *)node;
+		if (!strcmp(pr->prop->name, "ibm,dynamic-memory"))
+			err = pseries_update_drconf_memory(pr);
 		break;
 	}
 	return notifier_from_errno(err);
@@ -235,7 +261,7 @@ static struct notifier_block pseries_mem_nb = {
 static int __init pseries_memory_hotplug_init(void)
 {
 	if (firmware_has_feature(FW_FEATURE_LPAR))
-		pSeries_reconfig_notifier_register(&pseries_mem_nb);
+		of_reconfig_notifier_register(&pseries_mem_nb);
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6153eea27ce7..da5594c441e4 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -36,13 +36,13 @@
 #include <linux/dma-mapping.h>
 #include <linux/crash_dump.h>
 #include <linux/memory.h>
+#include <linux/of.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/iommu.h>
 #include <asm/pci-bridge.h>
 #include <asm/machdep.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/firmware.h>
 #include <asm/tce.h>
 #include <asm/ppc-pci.h>
@@ -1294,7 +1294,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 	struct direct_window *window;
 
 	switch (action) {
-	case PSERIES_RECONFIG_REMOVE:
+	case OF_RECONFIG_DETACH_NODE:
 		if (pci && pci->iommu_table)
 			iommu_free_table(pci->iommu_table, np->full_name);
 
@@ -1357,7 +1357,7 @@ void iommu_init_early_pSeries(void)
 	}
 
 
-	pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
+	of_reconfig_notifier_register(&iommu_reconfig_nb);
 	register_memory_notifier(&iommu_mem_nb);
 
 	set_pci_dma_ops(&dma_iommu_ops);
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index f99f1ca8035b..720a0cc2e69f 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -16,11 +16,11 @@
 #include <linux/notifier.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/uaccess.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/mmu.h>
 
 /**
@@ -55,28 +55,6 @@ static struct device_node *derive_parent(const char *path)
 	return parent;
 }
 
-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
-
-int pSeries_reconfig_notifier_register(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_register);
-
-void pSeries_reconfig_notifier_unregister(struct notifier_block *nb)
-{
-	blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(pSeries_reconfig_notifier_unregister);
-
-int pSeries_reconfig_notify(unsigned long action, void *p)
-{
-	int err = blocking_notifier_call_chain(&pSeries_reconfig_chain,
-						action, p);
-
-	return notifier_to_errno(err);
-}
-
 static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
 {
 	struct device_node *np;
@@ -100,13 +78,12 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist
 		goto out_err;
 	}
 
-	err = pSeries_reconfig_notify(PSERIES_RECONFIG_ADD, np);
+	err = of_attach_node(np);
 	if (err) {
 		printk(KERN_ERR "Failed to add device node %s\n", path);
 		goto out_err;
 	}
 
-	of_attach_node(np);
 	of_node_put(np->parent);
 
 	return 0;
@@ -134,9 +111,7 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
 		return -EBUSY;
 	}
 
-	pSeries_reconfig_notify(PSERIES_RECONFIG_REMOVE, np);
 	of_detach_node(np);
-
 	of_node_put(parent);
 	of_node_put(np); /* Must decrement the refcount */
 	return 0;
@@ -381,10 +356,9 @@ static int do_remove_property(char *buf, size_t bufsize)
 static int do_update_property(char *buf, size_t bufsize)
 {
 	struct device_node *np;
-	struct pSeries_reconfig_prop_update upd_value;
 	unsigned char *value;
 	char *name, *end, *next_prop;
-	int rc, length;
+	int length;
 	struct property *newprop;
 	buf = parse_node(buf, bufsize, &np);
 	end = buf + bufsize;
@@ -406,41 +380,7 @@ static int do_update_property(char *buf, size_t bufsize)
 	if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
 		slb_set_size(*(int *)value);
 
-	upd_value.node = np;
-	upd_value.property = newprop;
-	pSeries_reconfig_notify(PSERIES_UPDATE_PROPERTY, &upd_value);
-
-	rc = prom_update_property(np, newprop);
-	if (rc)
-		return rc;
-
-	/* For memory under the ibm,dynamic-reconfiguration-memory node
-	 * of the device tree, adding and removing memory is just an update
-	 * to the ibm,dynamic-memory property instead of adding/removing a
-	 * memory node in the device tree.  For these cases we still need to
-	 * involve the notifier chain.
-	 */
-	if (!strcmp(name, "ibm,dynamic-memory")) {
-		int action;
-
-		next_prop = parse_next_property(next_prop, end, &name,
-						&length, &value);
-		if (!next_prop)
-			return -EINVAL;
-
-		if (!strcmp(name, "add"))
-			action = PSERIES_DRCONF_MEM_ADD;
-		else
-			action = PSERIES_DRCONF_MEM_REMOVE;
-
-		rc = pSeries_reconfig_notify(action, value);
-		if (rc) {
-			prom_update_property(np, newprop);
-			return rc;
-		}
-	}
-
-	return 0;
+	return prom_update_property(np, newprop);
 }
 
 /**
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index e3cb7ae61658..e1a5b8a32d25 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -40,6 +40,7 @@
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/cpuidle.h>
+#include <linux/of.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -63,7 +64,6 @@
 #include <asm/smp.h>
 #include <asm/firmware.h>
 #include <asm/eeh.h>
-#include <asm/pSeries_reconfig.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
@@ -258,7 +258,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act
 	int err = NOTIFY_OK;
 
 	switch (action) {
-	case PSERIES_RECONFIG_ADD:
+	case OF_RECONFIG_ATTACH_NODE:
 		pci = np->parent->data;
 		if (pci) {
 			update_dn_pci_info(np, pci->phb);
@@ -389,7 +389,7 @@ static void __init pSeries_setup_arch(void)
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
 	find_and_init_phbs();
-	pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
+	of_reconfig_notifier_register(&pci_dn_reconfig_nb);
 
 	pSeries_nvram_init();
 
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 0ce625738677..6c4c000671c5 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -28,7 +28,6 @@
 #include <linux/slab.h>
 
 #include <asm/page.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/vio.h>
 
 #include "nx_csbcpb.h" /* struct nx_csbcpb */
@@ -1014,26 +1013,23 @@ error_out:
  *	NOTIFY_BAD encoded with error number on failure, use
  *		notifier_to_errno() to decode this value
  */
-static int nx842_OF_notifier(struct notifier_block *np,
-					unsigned long action,
-					void *update)
+static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
+			     void *update)
 {
-	struct pSeries_reconfig_prop_update *upd;
+	struct of_prop_reconfig *upd = update;
 	struct nx842_devdata *local_devdata;
 	struct device_node *node = NULL;
 
-	upd = (struct pSeries_reconfig_prop_update *)update;
-
 	rcu_read_lock();
 	local_devdata = rcu_dereference(devdata);
 	if (local_devdata)
 		node = local_devdata->dev->of_node;
 
 	if (local_devdata &&
-			action == PSERIES_UPDATE_PROPERTY &&
-			!strcmp(upd->node->name, node->name)) {
+			action == OF_RECONFIG_UPDATE_PROPERTY &&
+			!strcmp(upd->dn->name, node->name)) {
 		rcu_read_unlock();
-		nx842_OF_upd(upd->property);
+		nx842_OF_upd(upd->prop);
 	} else
 		rcu_read_unlock();
 
@@ -1182,7 +1178,7 @@ static int __init nx842_probe(struct vio_dev *viodev,
 	synchronize_rcu();
 	kfree(old_devdata);
 
-	pSeries_reconfig_notifier_register(&nx842_of_nb);
+	of_reconfig_notifier_register(&nx842_of_nb);
 
 	ret = nx842_OF_upd(NULL);
 	if (ret && ret != -ENODEV) {
@@ -1228,7 +1224,7 @@ static int __exit nx842_remove(struct vio_dev *viodev)
 	spin_lock_irqsave(&devdata_mutex, flags);
 	old_devdata = rcu_dereference_check(devdata,
 			lockdep_is_held(&devdata_mutex));
-	pSeries_reconfig_notifier_unregister(&nx842_of_nb);
+	of_reconfig_notifier_unregister(&nx842_of_nb);
 	rcu_assign_pointer(devdata, NULL);
 	spin_unlock_irqrestore(&devdata_mutex, flags);
 	synchronize_rcu();
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 638110efae9b..f7a8a16aa7d3 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -33,7 +33,6 @@
 #include <linux/scatterlist.h>
 #include <linux/device.h>
 #include <linux/of.h>
-#include <asm/pSeries_reconfig.h>
 #include <asm/hvcall.h>
 #include <asm/vio.h>
 
diff --git a/drivers/of/base.c b/drivers/of/base.c
index bbd073f53c9f..87b63850e8dc 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1028,6 +1028,24 @@ int of_parse_phandle_with_args(struct device_node *np, const char *list_name,
 }
 EXPORT_SYMBOL(of_parse_phandle_with_args);
 
+#if defined(CONFIG_OF_DYNAMIC)
+static int of_property_notify(int action, struct device_node *np,
+			      struct property *prop)
+{
+	struct of_prop_reconfig pr;
+
+	pr.dn = np;
+	pr.prop = prop;
+	return of_reconfig_notify(action, &pr);
+}
+#else
+static int of_property_notify(int action, struct device_node *np,
+			      struct property *prop)
+{
+	return 0;
+}
+#endif
+
 /**
  * prom_add_property - Add a property to a node
  */
@@ -1035,6 +1053,11 @@ int prom_add_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 	unsigned long flags;
+	int rc;
+
+	rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop);
+	if (rc)
+		return rc;
 
 	prop->next = NULL;
 	write_lock_irqsave(&devtree_lock, flags);
@@ -1072,6 +1095,11 @@ int prom_remove_property(struct device_node *np, struct property *prop)
 	struct property **next;
 	unsigned long flags;
 	int found = 0;
+	int rc;
+
+	rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
@@ -1114,7 +1142,11 @@ int prom_update_property(struct device_node *np,
 {
 	struct property **next, *oldprop;
 	unsigned long flags;
-	int found = 0;
+	int rc, found = 0;
+
+	rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
+	if (rc)
+		return rc;
 
 	if (!newprop->name)
 		return -EINVAL;
@@ -1160,6 +1192,26 @@ int prom_update_property(struct device_node *np,
  * device tree nodes.
  */
 
+static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
+
+int of_reconfig_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&of_reconfig_chain, nb);
+}
+
+int of_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
+}
+
+int of_reconfig_notify(unsigned long action, void *p)
+{
+	int rc;
+
+	rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
+	return notifier_to_errno(rc);
+}
+
 #ifdef CONFIG_PROC_DEVICETREE
 static void of_add_proc_dt_entry(struct device_node *dn)
 {
@@ -1179,9 +1231,14 @@ static void of_add_proc_dt_entry(struct device_node *dn)
 /**
  * of_attach_node - Plug a device node into the tree and global list.
  */
-void of_attach_node(struct device_node *np)
+int of_attach_node(struct device_node *np)
 {
 	unsigned long flags;
+	int rc;
+
+	rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 	np->sibling = np->parent->child;
@@ -1191,6 +1248,7 @@ void of_attach_node(struct device_node *np)
 	write_unlock_irqrestore(&devtree_lock, flags);
 
 	of_add_proc_dt_entry(np);
+	return 0;
 }
 
 #ifdef CONFIG_PROC_DEVICETREE
@@ -1220,23 +1278,28 @@ static void of_remove_proc_dt_entry(struct device_node *dn)
  * The caller must hold a reference to the node.  The memory associated with
  * the node is not freed until its refcount goes to zero.
  */
-void of_detach_node(struct device_node *np)
+int of_detach_node(struct device_node *np)
 {
 	struct device_node *parent;
 	unsigned long flags;
+	int rc = 0;
+
+	rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+	if (rc)
+		return rc;
 
 	write_lock_irqsave(&devtree_lock, flags);
 
 	if (of_node_check_flag(np, OF_DETACHED)) {
 		/* someone already detached it */
 		write_unlock_irqrestore(&devtree_lock, flags);
-		return;
+		return rc;
 	}
 
 	parent = np->parent;
 	if (!parent) {
 		write_unlock_irqrestore(&devtree_lock, flags);
-		return;
+		return rc;
 	}
 
 	if (allnodes == np)
@@ -1265,6 +1328,7 @@ void of_detach_node(struct device_node *np)
 	write_unlock_irqrestore(&devtree_lock, flags);
 
 	of_remove_proc_dt_entry(np);
+	return rc;
 }
 #endif /* defined(CONFIG_OF_DYNAMIC) */
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 72843b72a2b2..fb5d87b66e3e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -22,6 +22,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/spinlock.h>
 #include <linux/topology.h>
+#include <linux/notifier.h>
 
 #include <asm/byteorder.h>
 #include <asm/errno.h>
@@ -272,11 +273,24 @@ extern int prom_remove_property(struct device_node *np, struct property *prop);
 extern int prom_update_property(struct device_node *np,
 				struct property *newprop);
 
-#if defined(CONFIG_OF_DYNAMIC)
 /* For updating the device tree at runtime */
-extern void of_attach_node(struct device_node *);
-extern void of_detach_node(struct device_node *);
-#endif
+#define OF_RECONFIG_ATTACH_NODE		0x0001
+#define OF_RECONFIG_DETACH_NODE		0x0002
+#define OF_RECONFIG_ADD_PROPERTY	0x0003
+#define OF_RECONFIG_REMOVE_PROPERTY	0x0004
+#define OF_RECONFIG_UPDATE_PROPERTY	0x0005
+
+struct of_prop_reconfig {
+	struct device_node	*dn;
+	struct property		*prop;
+};
+
+extern int of_reconfig_notifier_register(struct notifier_block *);
+extern int of_reconfig_notifier_unregister(struct notifier_block *);
+extern int of_reconfig_notify(unsigned long, void *);
+
+extern int of_attach_node(struct device_node *);
+extern int of_detach_node(struct device_node *);
 
 #define of_match_ptr(_ptr)	(_ptr)
 
-- 
cgit v1.2.3


From 79d1c712958f94372482ad74578b00f44e744c12 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Tue, 2 Oct 2012 16:58:46 +0000
Subject: powerpc+of: Rename the drivers/of prom_* functions to of_*

Rename the prom_*_property routines of the generic OF code to of_*_property.
This brings them in line with the naming used by the rest of the OF code.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Acked-by: Geoff Levand <geoff@infradead.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/arm/mach-mxs/mach-mxs.c              |  2 +-
 arch/powerpc/kernel/machine_kexec.c       | 14 +++++++-------
 arch/powerpc/kernel/machine_kexec_64.c    |  8 ++++----
 arch/powerpc/kernel/pci_32.c              |  2 +-
 arch/powerpc/platforms/85xx/p1022_ds.c    |  6 +++---
 arch/powerpc/platforms/ps3/os-area.c      |  6 +++---
 arch/powerpc/platforms/pseries/iommu.c    |  4 ++--
 arch/powerpc/platforms/pseries/mobility.c |  4 ++--
 arch/powerpc/platforms/pseries/reconfig.c |  6 +++---
 drivers/macintosh/smu.c                   |  2 +-
 drivers/of/base.c                         | 15 +++++++--------
 include/linux/of.h                        |  7 +++----
 12 files changed, 37 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index 4748ec551a68..d61b915ce52c 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c
@@ -211,7 +211,7 @@ static void __init update_fec_mac_prop(enum mac_oui oui)
 		macaddr[4] = (val >> 8) & 0xff;
 		macaddr[5] = (val >> 0) & 0xff;
 
-		prom_update_property(np, newmac);
+		of_update_property(np, newmac);
 	}
 }
 
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index fa9f6c72f557..e1ec57e87b3b 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -218,23 +218,23 @@ static void __init export_crashk_values(struct device_node *node)
 	 * be sure what's in them, so remove them. */
 	prop = of_find_property(node, "linux,crashkernel-base", NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	prop = of_find_property(node, "linux,crashkernel-size", NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	if (crashk_res.start != 0) {
-		prom_add_property(node, &crashk_base_prop);
+		of_add_property(node, &crashk_base_prop);
 		crashk_size = resource_size(&crashk_res);
-		prom_add_property(node, &crashk_size_prop);
+		of_add_property(node, &crashk_size_prop);
 	}
 
 	/*
 	 * memory_limit is required by the kexec-tools to limit the
 	 * crash regions to the actual memory used.
 	 */
-	prom_update_property(node, &memory_limit_prop);
+	of_update_property(node, &memory_limit_prop);
 }
 
 static int __init kexec_setup(void)
@@ -249,11 +249,11 @@ static int __init kexec_setup(void)
 	/* remove any stale properties so ours can be found */
 	prop = of_find_property(node, kernel_end_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	/* information needed by userspace when using default_machine_kexec */
 	kernel_end = __pa(_end);
-	prom_add_property(node, &kernel_end_prop);
+	of_add_property(node, &kernel_end_prop);
 
 	export_crashk_values(node);
 
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index d7f609086a99..7206701b1ff1 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -389,14 +389,14 @@ static int __init export_htab_values(void)
 	/* remove any stale propertys so ours can be found */
 	prop = of_find_property(node, htab_base_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 	prop = of_find_property(node, htab_size_prop.name, NULL);
 	if (prop)
-		prom_remove_property(node, prop);
+		of_remove_property(node, prop);
 
 	htab_base = __pa(htab_address);
-	prom_add_property(node, &htab_base_prop);
-	prom_add_property(node, &htab_size_prop);
+	of_add_property(node, &htab_base_prop);
+	of_add_property(node, &htab_size_prop);
 
 	of_node_put(node);
 	return 0;
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 4b06ec5a502e..64f526a321f5 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -208,7 +208,7 @@ pci_create_OF_bus_map(void)
 		of_prop->name = "pci-OF-bus-map";
 		of_prop->length = 256;
 		of_prop->value = &of_prop[1];
-		prom_add_property(dn, of_prop);
+		of_add_property(dn, of_prop);
 		of_node_put(dn);
 	}
 }
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 848a3e98e1c1..8fb12570b2f5 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -539,7 +539,7 @@ static void __init p1022_ds_setup_arch(void)
 				};
 
 				/*
-				 * prom_update_property() is called before
+				 * of_update_property() is called before
 				 * kmalloc() is available, so the 'new' object
 				 * should be allocated in the global area.
 				 * The easiest way is to do that is to
@@ -548,7 +548,7 @@ static void __init p1022_ds_setup_arch(void)
 				 */
 				pr_info("p1022ds: disabling %s node",
 					np2->full_name);
-				prom_update_property(np2, &nor_status);
+				of_update_property(np2, &nor_status);
 				of_node_put(np2);
 			}
 
@@ -564,7 +564,7 @@ static void __init p1022_ds_setup_arch(void)
 
 				pr_info("p1022ds: disabling %s node",
 					np2->full_name);
-				prom_update_property(np2, &nand_status);
+				of_update_property(np2, &nand_status);
 				of_node_put(np2);
 			}
 
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index 56d26bc4fd41..09787139834d 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -280,13 +280,13 @@ static void os_area_set_property(struct device_node *node,
 
 	if (tmp) {
 		pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name);
-		prom_remove_property(node, tmp);
+		of_remove_property(node, tmp);
 	}
 
-	result = prom_add_property(node, prop);
+	result = of_add_property(node, prop);
 
 	if (result)
-		pr_debug("%s:%d prom_set_property failed\n", __func__,
+		pr_debug("%s:%d of_set_property failed\n", __func__,
 			__LINE__);
 }
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index da5594c441e4..e2685badb5db 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -760,7 +760,7 @@ static void remove_ddw(struct device_node *np)
 	__remove_ddw(np, ddw_avail, liobn);
 
 delprop:
-	ret = prom_remove_property(np, win64);
+	ret = of_remove_property(np, win64);
 	if (ret)
 		pr_warning("%s: failed to remove direct window property: %d\n",
 			np->full_name, ret);
@@ -1070,7 +1070,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		goto out_free_window;
 	}
 
-	ret = prom_add_property(pdn, win64);
+	ret = of_add_property(pdn, win64);
 	if (ret) {
 		dev_err(&dev->dev, "unable to add dma window property for %s: %d",
 			 pdn->full_name, ret);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index dd30b12edfe4..6573808cc5f3 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -116,7 +116,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
 	}
 
 	if (!more) {
-		prom_update_property(dn, new_prop);
+		of_update_property(dn, new_prop);
 		new_prop = NULL;
 	}
 
@@ -172,7 +172,7 @@ static int update_dt_node(u32 phandle)
 
 			case 0x80000000:
 				prop = of_find_property(dn, prop_name, NULL);
-				prom_remove_property(dn, prop);
+				of_remove_property(dn, prop);
 				prop = NULL;
 				break;
 
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 720a0cc2e69f..30b358dc2beb 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -326,7 +326,7 @@ static int do_add_property(char *buf, size_t bufsize)
 	if (!prop)
 		return -ENOMEM;
 
-	prom_add_property(np, prop);
+	of_add_property(np, prop);
 
 	return 0;
 }
@@ -350,7 +350,7 @@ static int do_remove_property(char *buf, size_t bufsize)
 
 	prop = of_find_property(np, buf, NULL);
 
-	return prom_remove_property(np, prop);
+	return of_remove_property(np, prop);
 }
 
 static int do_update_property(char *buf, size_t bufsize)
@@ -380,7 +380,7 @@ static int do_update_property(char *buf, size_t bufsize)
 	if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
 		slb_set_size(*(int *)value);
 
-	return prom_update_property(np, newprop);
+	return of_update_property(np, newprop);
 }
 
 /**
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 7d5a6b40b31c..5b939509db3b 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -997,7 +997,7 @@ static struct smu_sdbp_header *smu_create_sdb_partition(int id)
 		       "%02x !\n", id, hdr->id);
 		goto failure;
 	}
-	if (prom_add_property(smu->of_node, prop)) {
+	if (of_add_property(smu->of_node, prop)) {
 		printk(KERN_DEBUG "SMU: Failed creating sdb-partition-%02x "
 		       "property !\n", id);
 		goto failure;
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 87b63850e8dc..02d94c4ea83c 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1047,9 +1047,9 @@ static int of_property_notify(int action, struct device_node *np,
 #endif
 
 /**
- * prom_add_property - Add a property to a node
+ * of_add_property - Add a property to a node
  */
-int prom_add_property(struct device_node *np, struct property *prop)
+int of_add_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 	unsigned long flags;
@@ -1083,14 +1083,14 @@ int prom_add_property(struct device_node *np, struct property *prop)
 }
 
 /**
- * prom_remove_property - Remove a property from a node.
+ * of_remove_property - Remove a property from a node.
  *
  * Note that we don't actually remove it, since we have given out
  * who-knows-how-many pointers to the data using get-property.
  * Instead we just move the property to the "dead properties"
  * list, so it won't be found any more.
  */
-int prom_remove_property(struct device_node *np, struct property *prop)
+int of_remove_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 	unsigned long flags;
@@ -1129,7 +1129,7 @@ int prom_remove_property(struct device_node *np, struct property *prop)
 }
 
 /*
- * prom_update_property - Update a property in a node, if the property does
+ * of_update_property - Update a property in a node, if the property does
  * not exist, add it.
  *
  * Note that we don't actually remove it, since we have given out
@@ -1137,8 +1137,7 @@ int prom_remove_property(struct device_node *np, struct property *prop)
  * Instead we just move the property to the "dead properties" list,
  * and add the new property to the property list
  */
-int prom_update_property(struct device_node *np,
-			 struct property *newprop)
+int of_update_property(struct device_node *np, struct property *newprop)
 {
 	struct property **next, *oldprop;
 	unsigned long flags;
@@ -1153,7 +1152,7 @@ int prom_update_property(struct device_node *np,
 
 	oldprop = of_find_property(np, newprop->name, NULL);
 	if (!oldprop)
-		return prom_add_property(np, newprop);
+		return of_add_property(np, newprop);
 
 	write_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
diff --git a/include/linux/of.h b/include/linux/of.h
index fb5d87b66e3e..a093b2fe5dfb 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -268,10 +268,9 @@ extern int of_alias_get_id(struct device_node *np, const char *stem);
 
 extern int of_machine_is_compatible(const char *compat);
 
-extern int prom_add_property(struct device_node* np, struct property* prop);
-extern int prom_remove_property(struct device_node *np, struct property *prop);
-extern int prom_update_property(struct device_node *np,
-				struct property *newprop);
+extern int of_add_property(struct device_node *np, struct property *prop);
+extern int of_remove_property(struct device_node *np, struct property *prop);
+extern int of_update_property(struct device_node *np, struct property *newprop);
 
 /* For updating the device tree at runtime */
 #define OF_RECONFIG_ATTACH_NODE		0x0001
-- 
cgit v1.2.3


From 621eb19ce1ec216e03ad354cb0c4061736b2a436 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 14 Nov 2012 10:48:05 -0500
Subject: svcrpc: Revert "sunrpc/cache.h: replace simple_strtoul"

Commit bbf43dc888833ac0539e437dbaeb28bfd4fbab9f "sunrpc/cache.h: replace
simple_strtoul" introduced new range-checking which could cause get_int
to fail on unsigned integers too large to be represented as an int.

We could parse them as unsigned instead--but it turns out svcgssd is
actually passing down "-1" in some cases.  Which is perhaps stupid, but
there's nothing we can do about it now.

So just revert back to the previous "sloppy" behavior that accepts
either representation.

Cc: stable@vger.kernel.org
Reported-by: Sven Geggus <lists@fuchsschwanzdomain.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index f792794f6634..5dc9ee4d616e 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -217,6 +217,8 @@ extern int qword_get(char **bpp, char *dest, int bufsize);
 static inline int get_int(char **bpp, int *anint)
 {
 	char buf[50];
+	char *ep;
+	int rv;
 	int len = qword_get(bpp, buf, sizeof(buf));
 
 	if (len < 0)
@@ -224,9 +226,11 @@ static inline int get_int(char **bpp, int *anint)
 	if (len == 0)
 		return -ENOENT;
 
-	if (kstrtoint(buf, 0, anint))
+	rv = simple_strtol(buf, &ep, 0);
+	if (*ep)
 		return -EINVAL;
 
+	*anint = rv;
 	return 0;
 }
 
-- 
cgit v1.2.3


From fc05d5a30dc19dd4c6d161e551719a8c597c7890 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 4 Oct 2012 09:28:49 +0200
Subject: mtd: delete nomadik_nand driver

The nomadik_nand driver is really just a subset of the FSMC
NAND driver, and there are no users anymore so let's delete
it.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/Kconfig                       |   6 -
 drivers/mtd/nand/Makefile                      |   1 -
 drivers/mtd/nand/nomadik_nand.c                | 235 -------------------------
 include/linux/platform_data/mtd-nomadik-nand.h |  16 --
 4 files changed, 258 deletions(-)
 delete mode 100644 drivers/mtd/nand/nomadik_nand.c
 delete mode 100644 include/linux/platform_data/mtd-nomadik-nand.h

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index ee803d611e4e..a803d9ba55bd 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -526,12 +526,6 @@ config MTD_NAND_MXC
 	  This enables the driver for the NAND flash controller on the
 	  MXC processors.
 
-config MTD_NAND_NOMADIK
-	tristate "ST Nomadik 8815 NAND support"
-	depends on ARCH_NOMADIK
-	help
-	  Driver for the NAND flash controller on the Nomadik, with ECC.
-
 config MTD_NAND_SH_FLCTL
 	tristate "Support for NAND on Renesas SuperH FLCTL"
 	depends on SUPERH || ARCH_SHMOBILE
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 38358c90771e..44fca0553365 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -49,7 +49,6 @@ obj-$(CONFIG_MTD_NAND_MXC)		+= mxc_nand.o
 obj-$(CONFIG_MTD_NAND_SOCRATES)		+= socrates_nand.o
 obj-$(CONFIG_MTD_NAND_TXX9NDFMC)	+= txx9ndfmc.o
 obj-$(CONFIG_MTD_NAND_NUC900)		+= nuc900_nand.o
-obj-$(CONFIG_MTD_NAND_NOMADIK)		+= nomadik_nand.o
 obj-$(CONFIG_MTD_NAND_MPC5121_NFC)	+= mpc5121_nfc.o
 obj-$(CONFIG_MTD_NAND_RICOH)		+= r852.o
 obj-$(CONFIG_MTD_NAND_JZ4740)		+= jz4740_nand.o
diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
deleted file mode 100644
index 9ee0c4edfacf..000000000000
--- a/drivers/mtd/nand/nomadik_nand.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- *  drivers/mtd/nand/nomadik_nand.c
- *
- *  Overview:
- *  	Driver for on-board NAND flash on Nomadik Platforms
- *
- * Copyright © 2007 STMicroelectronics Pvt. Ltd.
- * Author: Sachin Verma <sachin.verma@st.com>
- *
- * Copyright © 2009 Alessandro Rubini
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand.h>
-#include <linux/mtd/nand_ecc.h>
-#include <linux/platform_device.h>
-#include <linux/mtd/partitions.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/platform_data/mtd-nomadik-nand.h>
-#include <mach/fsmc.h>
-
-#include <mtd/mtd-abi.h>
-
-struct nomadik_nand_host {
-	struct mtd_info		mtd;
-	struct nand_chip	nand;
-	void __iomem *data_va;
-	void __iomem *cmd_va;
-	void __iomem *addr_va;
-	struct nand_bbt_descr *bbt_desc;
-};
-
-static struct nand_ecclayout nomadik_ecc_layout = {
-	.eccbytes = 3 * 4,
-	.eccpos = { /* each subpage has 16 bytes: pos 2,3,4 hosts ECC */
-		0x02, 0x03, 0x04,
-		0x12, 0x13, 0x14,
-		0x22, 0x23, 0x24,
-		0x32, 0x33, 0x34},
-	/* let's keep bytes 5,6,7 for us, just in case we change ECC algo */
-	.oobfree = { {0x08, 0x08}, {0x18, 0x08}, {0x28, 0x08}, {0x38, 0x08} },
-};
-
-static void nomadik_ecc_control(struct mtd_info *mtd, int mode)
-{
-	/* No need to enable hw ecc, it's on by default */
-}
-
-static void nomadik_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
-{
-	struct nand_chip *nand = mtd->priv;
-	struct nomadik_nand_host *host = nand->priv;
-
-	if (cmd == NAND_CMD_NONE)
-		return;
-
-	if (ctrl & NAND_CLE)
-		writeb(cmd, host->cmd_va);
-	else
-		writeb(cmd, host->addr_va);
-}
-
-static int nomadik_nand_probe(struct platform_device *pdev)
-{
-	struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data;
-	struct nomadik_nand_host *host;
-	struct mtd_info *mtd;
-	struct nand_chip *nand;
-	struct resource *res;
-	int ret = 0;
-
-	/* Allocate memory for the device structure (and zero it) */
-	host = kzalloc(sizeof(struct nomadik_nand_host), GFP_KERNEL);
-	if (!host) {
-		dev_err(&pdev->dev, "Failed to allocate device structure.\n");
-		return -ENOMEM;
-	}
-
-	/* Call the client's init function, if any */
-	if (pdata->init)
-		ret = pdata->init();
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Init function failed\n");
-		goto err;
-	}
-
-	/* ioremap three regions */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->addr_va = ioremap(res->start, resource_size(res));
-
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->data_va = ioremap(res->start, resource_size(res));
-
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd");
-	if (!res) {
-		ret = -EIO;
-		goto err_unmap;
-	}
-	host->cmd_va = ioremap(res->start, resource_size(res));
-
-	if (!host->addr_va || !host->data_va || !host->cmd_va) {
-		ret = -ENOMEM;
-		goto err_unmap;
-	}
-
-	/* Link all private pointers */
-	mtd = &host->mtd;
-	nand = &host->nand;
-	mtd->priv = nand;
-	nand->priv = host;
-
-	host->mtd.owner = THIS_MODULE;
-	nand->IO_ADDR_R = host->data_va;
-	nand->IO_ADDR_W = host->data_va;
-	nand->cmd_ctrl = nomadik_cmd_ctrl;
-
-	/*
-	 * This stanza declares ECC_HW but uses soft routines. It's because
-	 * HW claims to make the calculation but not the correction. However,
-	 * I haven't managed to get the desired data out of it until now.
-	 */
-	nand->ecc.mode = NAND_ECC_SOFT;
-	nand->ecc.layout = &nomadik_ecc_layout;
-	nand->ecc.hwctl = nomadik_ecc_control;
-	nand->ecc.size = 512;
-	nand->ecc.bytes = 3;
-
-	nand->options = pdata->options;
-
-	/*
-	 * Scan to find existence of the device
-	 */
-	if (nand_scan(&host->mtd, 1)) {
-		ret = -ENXIO;
-		goto err_unmap;
-	}
-
-	mtd_device_register(&host->mtd, pdata->parts, pdata->nparts);
-
-	platform_set_drvdata(pdev, host);
-	return 0;
-
- err_unmap:
-	if (host->cmd_va)
-		iounmap(host->cmd_va);
-	if (host->data_va)
-		iounmap(host->data_va);
-	if (host->addr_va)
-		iounmap(host->addr_va);
- err:
-	kfree(host);
-	return ret;
-}
-
-/*
- * Clean up routine
- */
-static int nomadik_nand_remove(struct platform_device *pdev)
-{
-	struct nomadik_nand_host *host = platform_get_drvdata(pdev);
-	struct nomadik_nand_platform_data *pdata = pdev->dev.platform_data;
-
-	if (pdata->exit)
-		pdata->exit();
-
-	if (host) {
-		nand_release(&host->mtd);
-		iounmap(host->cmd_va);
-		iounmap(host->data_va);
-		iounmap(host->addr_va);
-		kfree(host);
-	}
-	return 0;
-}
-
-static int nomadik_nand_suspend(struct device *dev)
-{
-	struct nomadik_nand_host *host = dev_get_drvdata(dev);
-	int ret = 0;
-	if (host)
-		ret = mtd_suspend(&host->mtd);
-	return ret;
-}
-
-static int nomadik_nand_resume(struct device *dev)
-{
-	struct nomadik_nand_host *host = dev_get_drvdata(dev);
-	if (host)
-		mtd_resume(&host->mtd);
-	return 0;
-}
-
-static const struct dev_pm_ops nomadik_nand_pm_ops = {
-	.suspend = nomadik_nand_suspend,
-	.resume = nomadik_nand_resume,
-};
-
-static struct platform_driver nomadik_nand_driver = {
-	.probe = nomadik_nand_probe,
-	.remove = nomadik_nand_remove,
-	.driver = {
-		.owner = THIS_MODULE,
-		.name = "nomadik_nand",
-		.pm = &nomadik_nand_pm_ops,
-	},
-};
-
-module_platform_driver(nomadik_nand_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("ST Microelectronics (sachin.verma@st.com)");
-MODULE_DESCRIPTION("NAND driver for Nomadik Platform");
diff --git a/include/linux/platform_data/mtd-nomadik-nand.h b/include/linux/platform_data/mtd-nomadik-nand.h
deleted file mode 100644
index c3c8254c22a5..000000000000
--- a/include/linux/platform_data/mtd-nomadik-nand.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __ASM_ARCH_NAND_H
-#define __ASM_ARCH_NAND_H
-
-struct nomadik_nand_platform_data {
-	struct mtd_partition *parts;
-	int nparts;
-	int options;
-	int (*init) (void);
-	int (*exit) (void);
-};
-
-#define NAND_IO_DATA	0x40000000
-#define NAND_IO_CMD	0x40800000
-#define NAND_IO_ADDR	0x41000000
-
-#endif				/* __ASM_ARCH_NAND_H */
-- 
cgit v1.2.3


From 6d7b42a447f92eb3e7e410bbf62042693eb040f7 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Thu, 4 Oct 2012 15:14:16 +0200
Subject: mtd: fsmc_nand: pass the ale and cmd resource via resource

Do not use the platform_data to pass resource and be smart in the drivers.
Just pass it via resource

Switch to devm_request_and_ioremap at the sametime

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-By: Vipin Kumar <vipin.kumar@st.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 .../devicetree/bindings/mtd/fsmc-nand.txt          | 12 +++---
 arch/arm/boot/dts/spear13xx.dtsi                   | 10 ++---
 arch/arm/boot/dts/spear300.dtsi                    |  8 ++--
 arch/arm/boot/dts/spear310.dtsi                    |  8 ++--
 arch/arm/boot/dts/spear320.dtsi                    |  8 ++--
 arch/arm/boot/dts/spear600.dtsi                    |  8 ++--
 arch/arm/mach-u300/core.c                          | 14 ++++++-
 drivers/mtd/nand/fsmc_nand.c                       | 44 ++++++----------------
 include/linux/mtd/fsmc.h                           |  3 --
 9 files changed, 49 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
index e2c663b354d2..e3ea32e7de3e 100644
--- a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
@@ -3,9 +3,7 @@
 Required properties:
 - compatible : "st,spear600-fsmc-nand"
 - reg : Address range of the mtd chip
-- reg-names: Should contain the reg names "fsmc_regs" and "nand_data"
-- st,ale-off : Chip specific offset to ALE
-- st,cle-off : Chip specific offset to CLE
+- reg-names: Should contain the reg names "fsmc_regs", "nand_data", "nand_addr" and "nand_cmd"
 
 Optional properties:
 - bank-width : Width (in bytes) of the device.  If not present, the width
@@ -19,10 +17,10 @@ Example:
 		#address-cells = <1>;
 		#size-cells = <1>;
 		reg = <0xd1800000 0x1000	/* FSMC Register */
-		       0xd2000000 0x4000>;	/* NAND Base */
-		reg-names = "fsmc_regs", "nand_data";
-		st,ale-off = <0x20000>;
-		st,cle-off = <0x10000>;
+		       0xd2000000 0x0010	/* NAND Base DATA */
+		       0xd2020000 0x0010	/* NAND Base ADDR */
+		       0xd2010000 0x0010>;	/* NAND Base CMD */
+		reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 
 		bank-width = <1>;
 		nand-skip-bbtscan;
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index f7b84aced654..14a6d15c2a81 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -104,15 +104,15 @@
 			compatible = "st,spear600-fsmc-nand";
 			#address-cells = <1>;
 			#size-cells = <1>;
-			reg = <0xb0000000 0x1000	/* FSMC Register */
-			       0xb0800000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
+			reg = <0xb0000000 0x1000	/* FSMC Register*/
+			       0xb0800000 0x0010	/* NAND Base DATA */
+			       0xb0820000 0x0010	/* NAND Base ADDR */
+			       0xb0810000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			interrupts = <0 20 0x4
 				      0 21 0x4
 				      0 22 0x4
 				      0 23 0x4>;
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi
index ed3627c116cc..bc436387d7f9 100644
--- a/arch/arm/boot/dts/spear300.dtsi
+++ b/arch/arm/boot/dts/spear300.dtsi
@@ -38,10 +38,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x94000000 0x1000	/* FSMC Register */
-			       0x80000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0x80000000 0x0010	/* NAND Base DATA */
+			       0x80020000 0x0010	/* NAND Base ADDR */
+			       0x80010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi
index 62fc4fb3e5f9..7840e529aba2 100644
--- a/arch/arm/boot/dts/spear310.dtsi
+++ b/arch/arm/boot/dts/spear310.dtsi
@@ -32,10 +32,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x44000000 0x1000	/* FSMC Register */
-			       0x40000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x10000>;
-			st,cle-off = <0x20000>;
+			       0x40000000 0x0010	/* NAND Base DATA */
+			       0x40020000 0x0010	/* NAND Base ADDR */
+			       0x40010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi
index 1f49d69595a0..5ad820641ac0 100644
--- a/arch/arm/boot/dts/spear320.dtsi
+++ b/arch/arm/boot/dts/spear320.dtsi
@@ -38,10 +38,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0x4c000000 0x1000	/* FSMC Register */
-			       0x50000000 0x0010>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0x50000000 0x0010	/* NAND Base DATA */
+			       0x50020000 0x0010	/* NAND Base ADDR */
+			       0x50010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/spear600.dtsi b/arch/arm/boot/dts/spear600.dtsi
index a3c36e47d7ef..4ecc66f5ac88 100644
--- a/arch/arm/boot/dts/spear600.dtsi
+++ b/arch/arm/boot/dts/spear600.dtsi
@@ -67,10 +67,10 @@
 			#address-cells = <1>;
 			#size-cells = <1>;
 			reg = <0xd1800000 0x1000	/* FSMC Register */
-			       0xd2000000 0x4000>;	/* NAND Base */
-			reg-names = "fsmc_regs", "nand_data";
-			st,ale-off = <0x20000>;
-			st,cle-off = <0x10000>;
+			       0xd2000000 0x0010	/* NAND Base DATA */
+			       0xd2020000 0x0010	/* NAND Base ADDR */
+			       0xd2010000 0x0010>;	/* NAND Base CMD */
+			reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index b8efac4daed8..f642a1552346 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -251,6 +251,18 @@ static struct resource rtc_resources[] = {
  * but these are not yet used by the driver.
  */
 static struct resource fsmc_resources[] = {
+	{
+		.name  = "nand_addr",
+		.start = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_ALE,
+		.end   = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_ALE + SZ_16K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "nand_cmd",
+		.start = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_CLE,
+		.end   = U300_NAND_CS0_PHYS_BASE + PLAT_NAND_CLE + SZ_16K - 1,
+		.flags = IORESOURCE_MEM,
+	},
 	{
 		.name  = "nand_data",
 		.start = U300_NAND_CS0_PHYS_BASE,
@@ -1496,8 +1508,6 @@ static struct fsmc_nand_platform_data nand_platform_data = {
 	.nr_partitions = ARRAY_SIZE(u300_partitions),
 	.options = NAND_SKIP_BBTSCAN,
 	.width = FSMC_NAND_BW8,
-	.ale_off = PLAT_NAND_ALE,
-	.cle_off = PLAT_NAND_CLE,
 };
 
 static struct platform_device nand_device = {
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 38d26240d8b1..cb8645087151 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -876,8 +876,6 @@ static int __devinit fsmc_nand_probe_config_dt(struct platform_device *pdev,
 			return -EINVAL;
 		}
 	}
-	of_property_read_u32(np, "st,ale-off", &pdata->ale_off);
-	of_property_read_u32(np, "st,cle-off", &pdata->cle_off);
 	if (of_get_property(np, "nand-skip-bbtscan", NULL))
 		pdata->options = NAND_SKIP_BBTSCAN;
 
@@ -935,41 +933,28 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (!res)
 		return -EINVAL;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
-				pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory data resourse\n");
-		return -ENOENT;
-	}
-
-	host->data_pa = (dma_addr_t)res->start;
-	host->data_va = devm_ioremap(&pdev->dev, res->start,
-			resource_size(res));
+	host->data_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->data_va) {
 		dev_err(&pdev->dev, "data ioremap failed\n");
 		return -ENOMEM;
 	}
+	host->data_pa = (dma_addr_t)res->start;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start + pdata->ale_off,
-			resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory ale resourse\n");
-		return -ENOENT;
-	}
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr");
+	if (!res)
+		return -EINVAL;
 
-	host->addr_va = devm_ioremap(&pdev->dev, res->start + pdata->ale_off,
-			resource_size(res));
+	host->addr_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->addr_va) {
 		dev_err(&pdev->dev, "ale ioremap failed\n");
 		return -ENOMEM;
 	}
 
-	if (!devm_request_mem_region(&pdev->dev, res->start + pdata->cle_off,
-			resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory cle resourse\n");
-		return -ENOENT;
-	}
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd");
+	if (!res)
+		return -EINVAL;
 
-	host->cmd_va = devm_ioremap(&pdev->dev, res->start + pdata->cle_off,
-			resource_size(res));
+	host->cmd_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->cmd_va) {
 		dev_err(&pdev->dev, "ale ioremap failed\n");
 		return -ENOMEM;
@@ -979,14 +964,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	if (!res)
 		return -EINVAL;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
-			pdev->name)) {
-		dev_err(&pdev->dev, "Failed to get memory regs resourse\n");
-		return -ENOENT;
-	}
-
-	host->regs_va = devm_ioremap(&pdev->dev, res->start,
-			resource_size(res));
+	host->regs_va = devm_request_and_ioremap(&pdev->dev, res);
 	if (!host->regs_va) {
 		dev_err(&pdev->dev, "regs ioremap failed\n");
 		return -ENOMEM;
diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index b20029221fb1..d6ed61ef451d 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h
@@ -155,9 +155,6 @@ struct fsmc_nand_platform_data {
 	unsigned int		width;
 	unsigned int		bank;
 
-	/* CLE, ALE offsets */
-	unsigned int		cle_off;
-	unsigned int		ale_off;
 	enum access_mode	mode;
 
 	void			(*select_bank)(uint32_t bank, uint32_t busw);
-- 
cgit v1.2.3


From 2f25ae97fe4b424d88d765797c46456c7c0f1bae Mon Sep 17 00:00:00 2001
From: Vipin Kumar <vipin.kumar@st.com>
Date: Tue, 9 Oct 2012 16:14:53 +0530
Subject: mtd: nand: Increase the ecc placement locations to 640

Few devices like H27UBG8T2CTR have a writesize/oobsize of 8KB/640B.
This means that the maximum oobsize has gone up to 640 bytes and consequently
the maximum ecc placement locations have also gone up to 640.

Signed-off-by: Vipin Kumar <vipin.kumar@st.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/mtd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 81d61e704599..f9ac2897b86b 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -98,7 +98,7 @@ struct mtd_oob_ops {
 };
 
 #define MTD_MAX_OOBFREE_ENTRIES_LARGE	32
-#define MTD_MAX_ECCPOS_ENTRIES_LARGE	448
+#define MTD_MAX_ECCPOS_ENTRIES_LARGE	640
 /*
  * Internal ECC layout control structure. For historical reasons, there is a
  * similar, smaller struct nand_ecclayout_user (in mtd-abi.h) that is retained
-- 
cgit v1.2.3


From 5de0b52ea8f8f5149502867acff2efb5efaf1fc2 Mon Sep 17 00:00:00 2001
From: Huang Shijie <shijie8@gmail.com>
Date: Sat, 13 Oct 2012 13:03:29 -0400
Subject: mtd: gpmi: remove unneccessary header

The whole gpmi-nand driver has turned to pure devicetree supported.
So the linux/mtd/gpmi-nand.h is not neccessary now. Just remove it,
and move some macros to the gpmi-nand driver itself.

Signed-off-by: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/gpmi-nand/gpmi-lib.c  |  1 -
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c |  7 +++-
 drivers/mtd/nand/gpmi-nand/gpmi-nand.h |  1 -
 include/linux/mtd/gpmi-nand.h          | 68 ----------------------------------
 4 files changed, 6 insertions(+), 71 deletions(-)
 delete mode 100644 include/linux/mtd/gpmi-nand.h

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index 3502accd4bc3..1585c5b1c8bf 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -18,7 +18,6 @@
  * with this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
-#include <linux/mtd/gpmi-nand.h>
 #include <linux/delay.h>
 #include <linux/clk.h>
 
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index e2c56fc4574b..d37619882fa6 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/mtd/gpmi-nand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/of.h>
@@ -33,6 +32,12 @@
 #include <linux/of_mtd.h>
 #include "gpmi-nand.h"
 
+/* Resource names for the GPMI NAND driver. */
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
+
 /* add our owner bbt descriptor */
 static uint8_t scan_ff_pattern[] = { 0xff };
 static struct nand_bbt_descr gpmi_bbt_descr = {
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index 7ac25c1e58f9..3d93a5e39090 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -130,7 +130,6 @@ struct gpmi_nand_data {
 	/* System Interface */
 	struct device		*dev;
 	struct platform_device	*pdev;
-	struct gpmi_nand_platform_data	*pdata;
 
 	/* Resources */
 	struct resources	resources;
diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
deleted file mode 100644
index ed3c4e09f3d1..000000000000
--- a/include/linux/mtd/gpmi-nand.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef __MACH_MXS_GPMI_NAND_H__
-#define __MACH_MXS_GPMI_NAND_H__
-
-/* The size of the resources is fixed. */
-#define GPMI_NAND_RES_SIZE	6
-
-/* Resource names for the GPMI NAND driver. */
-#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
-#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
-#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
-#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
-#define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
-#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
-
-/**
- * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
- *
- * This structure communicates platform-specific information to the GPMI NAND
- * driver that can't be expressed as resources.
- *
- * @platform_init:           A pointer to a function the driver will call to
- *                           initialize the platform (e.g., set up the pin mux).
- * @min_prop_delay_in_ns:    Minimum propagation delay of GPMI signals to and
- *                           from the NAND Flash device, in nanoseconds.
- * @max_prop_delay_in_ns:    Maximum propagation delay of GPMI signals to and
- *                           from the NAND Flash device, in nanoseconds.
- * @max_chip_count:          The maximum number of chips for which the driver
- *                           should configure the hardware. This value most
- *                           likely reflects the number of pins that are
- *                           connected to a NAND Flash device. If this is
- *                           greater than the SoC hardware can support, the
- *                           driver will print a message and fail to initialize.
- * @partitions:              An optional pointer to an array of partition
- *                           descriptions.
- * @partition_count:         The number of elements in the partitions array.
- */
-struct gpmi_nand_platform_data {
-	/* SoC hardware information. */
-	int		(*platform_init)(void);
-
-	/* NAND Flash information. */
-	unsigned int	min_prop_delay_in_ns;
-	unsigned int	max_prop_delay_in_ns;
-	unsigned int	max_chip_count;
-
-	/* Medium information. */
-	struct		mtd_partition *partitions;
-	unsigned	partition_count;
-};
-#endif
-- 
cgit v1.2.3


From e8a9d8f31c592eea89f1b0d3fd425e7a96944e88 Mon Sep 17 00:00:00 2001
From: Bastian Hecht <hechtb@googlemail.com>
Date: Fri, 19 Oct 2012 12:15:34 +0200
Subject: mtd: sh_flctl: Minor cleanups

Some small fixes to avoid sparse and smatch complain. Other cosmetic fixes
as well.

- Change of the type of the member index in struct sh_flctl from signed
to unsigned. We use index by addressing array members, so unsigned is more
concise here. Adapt functions relying on sh_flctl::index.
- Remove a blurring cast in write_fiforeg().
- Apply consistent naming scheme when refering to the data buffer.
- Shorten some unnecessarily verbose functions.
- Remove spaces at start of lines.

Signed-off-by: Bastian Hecht <hechtb@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/sh_flctl.c  | 37 ++++++++++++++++---------------------
 include/linux/mtd/sh_flctl.h |  2 +-
 2 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 4fbfe96e37a1..78d18c0f132f 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -225,7 +225,7 @@ static enum flctl_ecc_res_t wait_recfifo_ready
 
 		for (i = 0; i < 3; i++) {
 			uint8_t org;
-			int index;
+			unsigned int index;
 
 			data = readl(ecc_reg[i]);
 
@@ -305,28 +305,29 @@ static enum flctl_ecc_res_t read_ecfiforeg
 	return res;
 }
 
-static void write_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+static void write_fiforeg(struct sh_flctl *flctl, int rlen,
+						unsigned int offset)
 {
 	int i, len_4align;
-	unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
-	void *fifo_addr = (void *)FLDTFIFO(flctl);
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
 
 	len_4align = (rlen + 3) / 4;
 	for (i = 0; i < len_4align; i++) {
 		wait_wfifo_ready(flctl);
-		writel(cpu_to_be32(data[i]), fifo_addr);
+		writel(cpu_to_be32(buf[i]), FLDTFIFO(flctl));
 	}
 }
 
-static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
+static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen,
+						unsigned int offset)
 {
 	int i, len_4align;
-	unsigned long *data = (unsigned long *)&flctl->done_buff[offset];
+	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
 
 	len_4align = (rlen + 3) / 4;
 	for (i = 0; i < len_4align; i++) {
 		wait_wecfifo_ready(flctl);
-		writel(cpu_to_be32(data[i]), FLECFIFO(flctl));
+		writel(cpu_to_be32(buf[i]), FLECFIFO(flctl));
 	}
 }
 
@@ -748,41 +749,35 @@ static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
 static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 
-	memcpy(&flctl->done_buff[index], buf, len);
+	memcpy(&flctl->done_buff[flctl->index], buf, len);
 	flctl->index += len;
 }
 
 static uint8_t flctl_read_byte(struct mtd_info *mtd)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 	uint8_t data;
 
-	data = flctl->done_buff[index];
+	data = flctl->done_buff[flctl->index];
 	flctl->index++;
 	return data;
 }
 
 static uint16_t flctl_read_word(struct mtd_info *mtd)
 {
-       struct sh_flctl *flctl = mtd_to_flctl(mtd);
-       int index = flctl->index;
-       uint16_t data;
-       uint16_t *buf = (uint16_t *)&flctl->done_buff[index];
+	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	uint16_t *buf = (uint16_t *)&flctl->done_buff[flctl->index];
 
-       data = *buf;
-       flctl->index += 2;
-       return data;
+	flctl->index += 2;
+	return *buf;
 }
 
 static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	int index = flctl->index;
 
-	memcpy(buf, &flctl->done_buff[index], len);
+	memcpy(buf, &flctl->done_buff[flctl->index], len);
 	flctl->index += len;
 }
 
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 01e4b15b280e..481557688d05 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -147,7 +147,7 @@ struct sh_flctl {
 
 	uint8_t	done_buff[2048 + 64];	/* max size 2048 + 64 */
 	int	read_bytes;
-	int	index;
+	unsigned int index;
 	int	seqin_column;		/* column in SEQIN cmd */
 	int	seqin_page_addr;	/* page_addr in SEQIN cmd */
 	uint32_t seqin_read_cmd;		/* read cmd in SEQIN cmd */
-- 
cgit v1.2.3


From 83738d87e3a0a4096e1419a65b8228130d183df6 Mon Sep 17 00:00:00 2001
From: Bastian Hecht <hechtb@googlemail.com>
Date: Fri, 19 Oct 2012 12:15:35 +0200
Subject: mtd: sh_flctl: Add DMA capabilty

The code probes if DMA channels can get allocated and tears them down at
removal/failure if needed.
If available it uses them to transfer the data part (not ECC). On
failure we fall back to PIO mode.

Based on Guennadi Liakhovetski's code from the sh_mmcif driver.

Signed-off-by: Bastian Hecht <hechtb@gmail.com>
Reviewed-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/sh_flctl.c  | 173 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/sh_flctl.h |  12 +++
 2 files changed, 183 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 78d18c0f132f..6dc0369aa44b 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -23,11 +23,15 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/sh_dma.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
@@ -106,6 +110,84 @@ static void wait_completion(struct sh_flctl *flctl)
 	writeb(0x0, FLTRCR(flctl));
 }
 
+static void flctl_dma_complete(void *param)
+{
+	struct sh_flctl *flctl = param;
+
+	complete(&flctl->dma_complete);
+}
+
+static void flctl_release_dma(struct sh_flctl *flctl)
+{
+	if (flctl->chan_fifo0_rx) {
+		dma_release_channel(flctl->chan_fifo0_rx);
+		flctl->chan_fifo0_rx = NULL;
+	}
+	if (flctl->chan_fifo0_tx) {
+		dma_release_channel(flctl->chan_fifo0_tx);
+		flctl->chan_fifo0_tx = NULL;
+	}
+}
+
+static void flctl_setup_dma(struct sh_flctl *flctl)
+{
+	dma_cap_mask_t mask;
+	struct dma_slave_config cfg;
+	struct platform_device *pdev = flctl->pdev;
+	struct sh_flctl_platform_data *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (!pdata)
+		return;
+
+	if (pdata->slave_id_fifo0_tx <= 0 || pdata->slave_id_fifo0_rx <= 0)
+		return;
+
+	/* We can only either use DMA for both Tx and Rx or not use it at all */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	flctl->chan_fifo0_tx = dma_request_channel(mask, shdma_chan_filter,
+					    (void *)pdata->slave_id_fifo0_tx);
+	dev_dbg(&pdev->dev, "%s: TX: got channel %p\n", __func__,
+		flctl->chan_fifo0_tx);
+
+	if (!flctl->chan_fifo0_tx)
+		return;
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.slave_id = pdata->slave_id_fifo0_tx;
+	cfg.direction = DMA_MEM_TO_DEV;
+	cfg.dst_addr = (dma_addr_t)FLDTFIFO(flctl);
+	cfg.src_addr = 0;
+	ret = dmaengine_slave_config(flctl->chan_fifo0_tx, &cfg);
+	if (ret < 0)
+		goto err;
+
+	flctl->chan_fifo0_rx = dma_request_channel(mask, shdma_chan_filter,
+					    (void *)pdata->slave_id_fifo0_rx);
+	dev_dbg(&pdev->dev, "%s: RX: got channel %p\n", __func__,
+		flctl->chan_fifo0_rx);
+
+	if (!flctl->chan_fifo0_rx)
+		goto err;
+
+	cfg.slave_id = pdata->slave_id_fifo0_rx;
+	cfg.direction = DMA_DEV_TO_MEM;
+	cfg.dst_addr = 0;
+	cfg.src_addr = (dma_addr_t)FLDTFIFO(flctl);
+	ret = dmaengine_slave_config(flctl->chan_fifo0_rx, &cfg);
+	if (ret < 0)
+		goto err;
+
+	init_completion(&flctl->dma_complete);
+
+	return;
+
+err:
+	flctl_release_dma(flctl);
+}
+
 static void set_addr(struct mtd_info *mtd, int column, int page_addr)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
@@ -261,6 +343,70 @@ static void wait_wecfifo_ready(struct sh_flctl *flctl)
 	timeout_error(flctl, __func__);
 }
 
+static int flctl_dma_fifo0_transfer(struct sh_flctl *flctl, unsigned long *buf,
+					int len, enum dma_data_direction dir)
+{
+	struct dma_async_tx_descriptor *desc = NULL;
+	struct dma_chan *chan;
+	enum dma_transfer_direction tr_dir;
+	dma_addr_t dma_addr;
+	dma_cookie_t cookie = -EINVAL;
+	uint32_t reg;
+	int ret;
+
+	if (dir == DMA_FROM_DEVICE) {
+		chan = flctl->chan_fifo0_rx;
+		tr_dir = DMA_DEV_TO_MEM;
+	} else {
+		chan = flctl->chan_fifo0_tx;
+		tr_dir = DMA_MEM_TO_DEV;
+	}
+
+	dma_addr = dma_map_single(chan->device->dev, buf, len, dir);
+
+	if (dma_addr)
+		desc = dmaengine_prep_slave_single(chan, dma_addr, len,
+			tr_dir, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	if (desc) {
+		reg = readl(FLINTDMACR(flctl));
+		reg |= DREQ0EN;
+		writel(reg, FLINTDMACR(flctl));
+
+		desc->callback = flctl_dma_complete;
+		desc->callback_param = flctl;
+		cookie = dmaengine_submit(desc);
+
+		dma_async_issue_pending(chan);
+	} else {
+		/* DMA failed, fall back to PIO */
+		flctl_release_dma(flctl);
+		dev_warn(&flctl->pdev->dev,
+			 "DMA failed, falling back to PIO\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	ret =
+	wait_for_completion_timeout(&flctl->dma_complete,
+				msecs_to_jiffies(3000));
+
+	if (ret <= 0) {
+		chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+		dev_err(&flctl->pdev->dev, "wait_for_completion_timeout\n");
+	}
+
+out:
+	reg = readl(FLINTDMACR(flctl));
+	reg &= ~DREQ0EN;
+	writel(reg, FLINTDMACR(flctl));
+
+	dma_unmap_single(chan->device->dev, dma_addr, len, dir);
+
+	/* ret > 0 is success */
+	return ret;
+}
+
 static void read_datareg(struct sh_flctl *flctl, int offset)
 {
 	unsigned long data;
@@ -279,11 +425,20 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset)
 
 	len_4align = (rlen + 3) / 4;
 
+	/* initiate DMA transfer */
+	if (flctl->chan_fifo0_rx && rlen >= 32 &&
+		flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_DEV_TO_MEM) > 0)
+			goto convert;	/* DMA success */
+
+	/* do polling transfer */
 	for (i = 0; i < len_4align; i++) {
 		wait_rfifo_ready(flctl);
 		buf[i] = readl(FLDTFIFO(flctl));
-		buf[i] = be32_to_cpu(buf[i]);
 	}
+
+convert:
+	for (i = 0; i < len_4align; i++)
+		buf[i] = be32_to_cpu(buf[i]);
 }
 
 static enum flctl_ecc_res_t read_ecfiforeg
@@ -325,9 +480,19 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen,
 	unsigned long *buf = (unsigned long *)&flctl->done_buff[offset];
 
 	len_4align = (rlen + 3) / 4;
+
+	for (i = 0; i < len_4align; i++)
+		buf[i] = cpu_to_be32(buf[i]);
+
+	/* initiate DMA transfer */
+	if (flctl->chan_fifo0_tx && rlen >= 32 &&
+		flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_MEM_TO_DEV) > 0)
+			return;	/* DMA success */
+
+	/* do polling transfer */
 	for (i = 0; i < len_4align; i++) {
 		wait_wecfifo_ready(flctl);
-		writel(cpu_to_be32(buf[i]), FLECFIFO(flctl));
+		writel(buf[i], FLECFIFO(flctl));
 	}
 }
 
@@ -925,6 +1090,8 @@ static int __devinit flctl_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_resume(&pdev->dev);
 
+	flctl_setup_dma(flctl);
+
 	ret = nand_scan_ident(flctl_mtd, 1, NULL);
 	if (ret)
 		goto err_chip;
@@ -942,6 +1109,7 @@ static int __devinit flctl_probe(struct platform_device *pdev)
 	return 0;
 
 err_chip:
+	flctl_release_dma(flctl);
 	pm_runtime_disable(&pdev->dev);
 	free_irq(irq, flctl);
 err_flste:
@@ -955,6 +1123,7 @@ static int __devexit flctl_remove(struct platform_device *pdev)
 {
 	struct sh_flctl *flctl = platform_get_drvdata(pdev);
 
+	flctl_release_dma(flctl);
 	nand_release(&flctl->mtd);
 	pm_runtime_disable(&pdev->dev);
 	free_irq(platform_get_irq(pdev, 0), flctl);
diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 481557688d05..1c28f8879b1c 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -20,6 +20,7 @@
 #ifndef __SH_FLCTL_H__
 #define __SH_FLCTL_H__
 
+#include <linux/completion.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
@@ -107,6 +108,7 @@
 #define ESTERINTE	(0x1 << 24)	/* ECC error interrupt enable */
 #define AC1CLR		(0x1 << 19)	/* ECC FIFO clear */
 #define AC0CLR		(0x1 << 18)	/* Data FIFO clear */
+#define DREQ0EN		(0x1 << 16)	/* FLDTFIFODMA Request Enable */
 #define ECERB		(0x1 << 9)	/* ECC error */
 #define STERB		(0x1 << 8)	/* Status error */
 #define STERINTE	(0x1 << 4)	/* Status error enable */
@@ -138,6 +140,8 @@ enum flctl_ecc_res_t {
 	FL_TIMEOUT
 };
 
+struct dma_chan;
+
 struct sh_flctl {
 	struct mtd_info		mtd;
 	struct nand_chip	chip;
@@ -161,6 +165,11 @@ struct sh_flctl {
 	unsigned hwecc:1;	/* Hardware ECC (0 = disabled, 1 = enabled) */
 	unsigned holden:1;	/* Hardware has FLHOLDCR and HOLDEN is set */
 	unsigned qos_request:1;	/* QoS request to prevent deep power shutdown */
+
+	/* DMA related objects */
+	struct dma_chan		*chan_fifo0_rx;
+	struct dma_chan		*chan_fifo0_tx;
+	struct completion	dma_complete;
 };
 
 struct sh_flctl_platform_data {
@@ -170,6 +179,9 @@ struct sh_flctl_platform_data {
 
 	unsigned has_hwecc:1;
 	unsigned use_holden:1;
+
+	unsigned int            slave_id_fifo0_tx;
+	unsigned int            slave_id_fifo0_rx;
 };
 
 static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo)
-- 
cgit v1.2.3


From 9ef525a9141b14d23613faad303cf48a20814f1b Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Thu, 25 Oct 2012 09:43:10 -0400
Subject: mtd: Fix kernel-doc content to avoid warning.

Add missing colons to fix kernel-doc generation warnings.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/nand.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 24e915957e4f..9d8a6048aacd 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -471,8 +471,8 @@ struct nand_buffers {
  *			non 0 if ONFI supported.
  * @onfi_params:	[INTERN] holds the ONFI page parameter when ONFI is
  *			supported, 0 otherwise.
- * @onfi_set_features	[REPLACEABLE] set the features for ONFI nand
- * @onfi_get_features	[REPLACEABLE] get the features for ONFI nand
+ * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
+ * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
  * @ecclayout:		[REPLACEABLE] the default ECC placement scheme
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
-- 
cgit v1.2.3


From 3e9ce49e0ef95e22790a74720f0068696b2477c9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 29 Oct 2012 22:47:26 +0530
Subject: mtd: map: Fix compilation warning

This patch is an attempt to fix following compilation warning.

In file included from drivers/mtd/chips/cfi_cmdset_0001.c:35:0:
drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wmaybe-uninitialized]

I could have used uninitialized_var() too, but didn't used it as the final else
part of map_word_load() is missing. So there is a chance that it might be passed
uninitialized. Better initialize to zero.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 3595a0236b0f..56c7936e0c65 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -328,7 +328,7 @@ static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word
 
 static inline map_word map_word_load(struct map_info *map, const void *ptr)
 {
-	map_word r;
+	map_word r = {{0} };
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = *(unsigned char *)ptr;
-- 
cgit v1.2.3


From ebd5ac165f2aaefb767c53112c2010b0ff3df688 Mon Sep 17 00:00:00 2001
From: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Date: Wed, 24 Oct 2012 19:58:10 +0900
Subject: i2c: i2c-sh_mobile: support I2C hardware block with a faster
 operating clock

On newer SH-/R-Mobile SoCs, a clock supply to the I2C hardware block,
which is used to generate the SCL clock output, is getting faster than
before, while on the other hand, the SCL clock control registers, ICCH
and ICCL, stay unchanged in 9-bit-wide (8+1).

On such silicons, the internal SCL clock counter gets incremented every
2 clocks of the operating clock.

This patch makes it configurable through platform data.

Signed-off-by: Shinya Kuribayashi <shinya.kuribayashi.px@renesas.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/i2c/busses/i2c-sh_mobile.c | 5 +++++
 include/linux/i2c/i2c-sh_mobile.h  | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 4dc0cc3611c2..4c283583bea0 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -120,6 +120,7 @@ struct sh_mobile_i2c_data {
 	void __iomem *reg;
 	struct i2c_adapter adap;
 	unsigned long bus_speed;
+	unsigned int clks_per_count;
 	struct clk *clk;
 	u_int8_t icic;
 	u_int8_t flags;
@@ -231,6 +232,7 @@ static void sh_mobile_i2c_init(struct sh_mobile_i2c_data *pd)
 	/* Get clock rate after clock is enabled */
 	clk_enable(pd->clk);
 	i2c_clk_khz = clk_get_rate(pd->clk) / 1000;
+	i2c_clk_khz /= pd->clks_per_count;
 
 	if (pd->bus_speed == STANDARD_MODE) {
 		tLOW	= 47;	/* tLOW = 4.7 us */
@@ -658,6 +660,9 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 	pd->bus_speed = STANDARD_MODE;
 	if (pdata && pdata->bus_speed)
 		pd->bus_speed = pdata->bus_speed;
+	pd->clks_per_count = 1;
+	if (pdata && pdata->clks_per_count)
+		pd->clks_per_count = pdata->clks_per_count;
 
 	/* The IIC blocks on SH-Mobile ARM processors
 	 * come with two new bits in ICIC.
diff --git a/include/linux/i2c/i2c-sh_mobile.h b/include/linux/i2c/i2c-sh_mobile.h
index beda7081aead..06e3089795fb 100644
--- a/include/linux/i2c/i2c-sh_mobile.h
+++ b/include/linux/i2c/i2c-sh_mobile.h
@@ -5,6 +5,7 @@
 
 struct i2c_sh_mobile_platform_data {
 	unsigned long bus_speed;
+	unsigned int clks_per_count;
 };
 
 #endif /* __I2C_SH_MOBILE_H__ */
-- 
cgit v1.2.3


From d611d41b46c96195b9a168a21992782458826e07 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 6 Nov 2012 22:55:27 +0100
Subject: mtd: diskonchip: use inline functions for DocRead/DocWrite

The diskonchip drivers traditionally use home-grown macros for
doing MMIO accesses, which cause a lot of warnings, at least
on ARM machines:

drivers/mtd/devices/doc2000.c: In function 'doc_write':
drivers/mtd/devices/doc2000.c:854:5: warning: value computed is not used [-Wunused-value]
drivers/mtd/devices/doc2000.c: In function 'doc_erase':
drivers/mtd/devices/doc2000.c:1123:5: warning: value computed is not used [-Wunused-value
drivers/mtd/nand/diskonchip.c: In function 'doc2000_read_byte':
drivers/mtd/nand/diskonchip.c:318:3: warning: value computed is not used [-Wunused-value]

A nicer solution is to use the architecture-defined I/O accessors.
Here, we use the __raw_readl/__raw_writel style, instead of the
proper readl/writel ones, in order to preserve the odd semantics
of the existing macros that have their own barrier implementation
and no byte swap. It would be nice to fix this properly and use
the correct accessors as well as make the word size independent
from the architecture, but I guess the hardware is obsolete
enough that we should better not mess the driver an more than
necessary.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/doc2000.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/doc2000.h b/include/linux/mtd/doc2000.h
index 0f6fea73a1f6..407d1e556c39 100644
--- a/include/linux/mtd/doc2000.h
+++ b/include/linux/mtd/doc2000.h
@@ -92,12 +92,26 @@
  * Others use readb/writeb
  */
 #if defined(__arm__)
-#define ReadDOC_(adr, reg)      ((unsigned char)(*(volatile __u32 *)(((unsigned long)adr)+((reg)<<2))))
-#define WriteDOC_(d, adr, reg)  do{ *(volatile __u32 *)(((unsigned long)adr)+((reg)<<2)) = (__u32)d; wmb();} while(0)
+static inline u8 ReadDOC_(u32 __iomem *addr, unsigned long reg)
+{
+	return __raw_readl(addr + reg);
+}
+static inline void WriteDOC_(u8 data, u32 __iomem *addr, unsigned long reg)
+{
+	__raw_writel(data, addr + reg);
+	wmb();
+}
 #define DOC_IOREMAP_LEN 0x8000
 #elif defined(__ppc__)
-#define ReadDOC_(adr, reg)      ((unsigned char)(*(volatile __u16 *)(((unsigned long)adr)+((reg)<<1))))
-#define WriteDOC_(d, adr, reg)  do{ *(volatile __u16 *)(((unsigned long)adr)+((reg)<<1)) = (__u16)d; wmb();} while(0)
+static inline u8 ReadDOC_(u16 __iomem *addr, unsigned long reg)
+{
+	return __raw_readw(addr + reg);
+}
+static inline void WriteDOC_(u8 data, u16 __iomem *addr, unsigned long reg)
+{
+	__raw_writew(data, addr + reg);
+	wmb();
+}
 #define DOC_IOREMAP_LEN 0x4000
 #else
 #define ReadDOC_(adr, reg)      readb((void __iomem *)(adr) + (reg))
-- 
cgit v1.2.3


From 5d27aa5af04f58f3020de1c224dcf8a62151fd58 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 6 Nov 2012 22:55:28 +0100
Subject: mtd: uninitialized variable warning in map.h

The map_word_load() function initializes exactly
as many words in the buffer as required, but gcc
cannot figure this out and gives a misleading
warning. Marking the local variable as
uninitialized_var shuts up that warning.

Without this patch, building acs5k_defconfig results in:

drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_panic_write':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]
drivers/mtd/chips/cfi_cmdset_0002.c: In function 'cfi_amdstd_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]
drivers/mtd/chips/cfi_cmdset_0001.c: In function 'cfi_intelext_write_words':
include/linux/mtd/map.h:331:11: warning: 'r.x[0]' may be used uninitialized in this function [-Wuninitialized]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 56c7936e0c65..f6eb4332ac92 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -391,7 +391,7 @@ static inline map_word map_word_ff(struct map_info *map)
 
 static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
 {
-	map_word r;
+	map_word uninitialized_var(r);
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = __raw_readb(map->virt + ofs);
-- 
cgit v1.2.3


From 33a5f6261a61af28f7b4c86f9f958da0f206c914 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 11 Oct 2012 17:52:56 +0200
Subject: nohz: Add API to check tick state

We need some quick way to check if the CPU has stopped
its tick. This will be useful to implement the printk tick
using the irq work subsystem.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/tick.h     | 17 ++++++++++++++++-
 kernel/time/tick-sched.c |  2 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index f37fceb69b73..2307dd31b966 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -8,6 +8,8 @@
 
 #include <linux/clockchips.h>
 #include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
@@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
 #endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
 # ifdef CONFIG_NO_HZ
+DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
+
+static inline int tick_nohz_tick_stopped(void)
+{
+	return __this_cpu_read(tick_cpu_sched.tick_stopped);
+}
+
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-# else
+
+# else /* !CONFIG_NO_HZ */
+static inline int tick_nohz_tick_stopped(void)
+{
+	return 0;
+}
+
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a40260885265..9e945aa090ac 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -28,7 +28,7 @@
 /*
  * Per cpu nohz control structure
  */
-static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
 /*
  * The time, when the last jiffy update happened. Protected by xtime_lock.
-- 
cgit v1.2.3


From 00b42959106a9ca1c2899e591ae4e9a83ad6af05 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 7 Nov 2012 21:03:07 +0100
Subject: irq_work: Don't stop the tick with pending works

Don't stop the tick if we have pending irq works on the
queue, otherwise if the arch can't raise self-IPIs, we may not
find an opportunity to execute the pending works for a while.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/irq_work.h |  6 ++++++
 kernel/irq_work.c        | 11 +++++++++++
 kernel/time/tick-sched.c |  3 ++-
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 6a9e8f5399e2..a69704f37204 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -20,4 +20,10 @@ bool irq_work_queue(struct irq_work *work);
 void irq_work_run(void);
 void irq_work_sync(struct irq_work *work);
 
+#ifdef CONFIG_IRQ_WORK
+bool irq_work_needs_cpu(void);
+#else
+static bool irq_work_needs_cpu(void) { return false; }
+#endif
+
 #endif /* _LINUX_IRQ_WORK_H */
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 64eddd59ed83..b3c113a14727 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -99,6 +99,17 @@ bool irq_work_queue(struct irq_work *work)
 }
 EXPORT_SYMBOL_GPL(irq_work_queue);
 
+bool irq_work_needs_cpu(void)
+{
+	struct llist_head *this_list;
+
+	this_list = &__get_cpu_var(irq_work_list);
+	if (llist_empty(this_list))
+		return false;
+
+	return true;
+}
+
 /*
  * Run the irq_work entries on this cpu. Requires to be ran from hardirq
  * context with local IRQs disabled.
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9e945aa090ac..f249e8c3e58e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -20,6 +20,7 @@
 #include <linux/profile.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/irq_work.h>
 
 #include <asm/irq_regs.h>
 
@@ -289,7 +290,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	} while (read_seqretry(&xtime_lock, seq));
 
 	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
-	    arch_needs_cpu(cpu)) {
+	    arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
 	} else {
-- 
cgit v1.2.3


From 60a8f428320918458a9a21052777eada68eebfd8 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 16 Nov 2012 19:39:17 -0800
Subject: x86, mm: Move after_bootmem to mm_internel.h

it is only used in arch/x86/mm/init*.c

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1353123563-3103-41-git-send-email-yinghai@kernel.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/mm/mm_internal.h | 2 ++
 include/linux/mm.h        | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index dc79ac121774..6b563a118891 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -14,4 +14,6 @@ unsigned long kernel_physical_mapping_init(unsigned long start,
 					     unsigned long page_size_mask);
 void zone_sizes_init(void);
 
+extern int after_bootmem;
+
 #endif	/* __X86_MM_INTERNAL_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bcaab4e6fe91..64d5271a3d36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1355,7 +1355,6 @@ extern void __init mmap_init(void);
 extern void show_mem(unsigned int flags);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
-extern int after_bootmem;
 
 extern __printf(3, 4)
 void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
-- 
cgit v1.2.3


From bc6679aef673f9dcb8f718528fc3df49ff661af9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 19 Oct 2012 16:43:41 -0400
Subject: irq_work: Make self-IPIs optable

On irq work initialization, let the user choose to define it
as "lazy" or not. "Lazy" means that we don't want to send
an IPI (provided the arch can anyway) when we enqueue this
work but we rather prefer to wait for the next timer tick
to execute our work if possible.

This is going to be a benefit for non-urgent enqueuers
(like printk in the future) that may prefer not to raise
an IPI storm in case of frequent enqueuing on short periods
of time.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/irq_work.h | 14 ++++++++++++++
 kernel/irq_work.c        | 47 +++++++++++++++++++++++++++--------------------
 2 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index a69704f37204..b28eb60c8bf6 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,20 @@
 
 #include <linux/llist.h>
 
+/*
+ * An entry can be in one of four states:
+ *
+ * free	     NULL, 0 -> {claimed}       : free to be used
+ * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
+ * pending   next, 3 -> {busy}          : queued, pending callback
+ * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
+ */
+
+#define IRQ_WORK_PENDING	1UL
+#define IRQ_WORK_BUSY		2UL
+#define IRQ_WORK_FLAGS		3UL
+#define IRQ_WORK_LAZY		4UL /* Doesn't want IPI, wait for tick */
+
 struct irq_work {
 	unsigned long flags;
 	struct llist_node llnode;
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 480f74715ba9..7f3a59bc8e3d 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -12,24 +12,15 @@
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/irqflags.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <asm/processor.h>
 
-/*
- * An entry can be in one of four states:
- *
- * free	     NULL, 0 -> {claimed}       : free to be used
- * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
- * pending   next, 3 -> {busy}          : queued, pending callback
- * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
- */
-
-#define IRQ_WORK_PENDING	1UL
-#define IRQ_WORK_BUSY		2UL
-#define IRQ_WORK_FLAGS		3UL
 
 static DEFINE_PER_CPU(struct llist_head, irq_work_list);
+static DEFINE_PER_CPU(int, irq_work_raised);
 
 /*
  * Claim the entry so that no one else will poke at it.
@@ -69,14 +60,19 @@ void __weak arch_irq_work_raise(void)
  */
 static void __irq_work_queue(struct irq_work *work)
 {
-	bool empty;
-
 	preempt_disable();
 
-	empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
-	/* The list was empty, raise self-interrupt to start processing. */
-	if (empty)
-		arch_irq_work_raise();
+	llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
+
+	/*
+	 * If the work is not "lazy" or the tick is stopped, raise the irq
+	 * work interrupt (if supported by the arch), otherwise, just wait
+	 * for the next tick.
+	 */
+	if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
+		if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
+			arch_irq_work_raise();
+	}
 
 	preempt_enable();
 }
@@ -117,10 +113,19 @@ bool irq_work_needs_cpu(void)
 
 static void __irq_work_run(void)
 {
+	unsigned long flags;
 	struct irq_work *work;
 	struct llist_head *this_list;
 	struct llist_node *llnode;
 
+
+	/*
+	 * Reset the "raised" state right before we check the list because
+	 * an NMI may enqueue after we find the list empty from the runner.
+	 */
+	__this_cpu_write(irq_work_raised, 0);
+	barrier();
+
 	this_list = &__get_cpu_var(irq_work_list);
 	if (llist_empty(this_list))
 		return;
@@ -140,13 +145,15 @@ static void __irq_work_run(void)
 		 * to claim that work don't rely on us to handle their data
 		 * while we are in the middle of the func.
 		 */
-		xchg(&work->flags, IRQ_WORK_BUSY);
+		flags = work->flags & ~IRQ_WORK_PENDING;
+		xchg(&work->flags, flags);
+
 		work->func(work);
 		/*
 		 * Clear the BUSY bit and return to the free state if
 		 * no-one else claimed it meanwhile.
 		 */
-		(void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
+		(void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
 	}
 }
 
-- 
cgit v1.2.3


From 74876a98a87a115254b3a66a14b27320b7f0acaa Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 12 Oct 2012 18:00:23 +0200
Subject: printk: Wake up klogd using irq_work

klogd is woken up asynchronously from the tick in order
to do it safely.

However if printk is called when the tick is stopped, the reader
won't be woken up until the next interrupt, which might not fire
for a while. As a result, the user may miss some message.

To fix this, lets implement the printk tick using a lazy irq work.
This subsystem takes care of the timer tick state and can
fix up accordingly.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/printk.h   |  3 ---
 init/Kconfig             |  1 +
 kernel/printk.c          | 36 ++++++++++++++++++++----------------
 kernel/time/tick-sched.c |  2 +-
 kernel/timer.c           |  1 -
 5 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9afc01e5a0a6..86c4b6294713 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...)
 extern asmlinkage __printf(1, 2)
 void early_printk(const char *fmt, ...);
 
-extern int printk_needs_cpu(int cpu);
-extern void printk_tick(void);
-
 #ifdef CONFIG_PRINTK
 asmlinkage __printf(5, 0)
 int vprintk_emit(int facility, int level,
diff --git a/init/Kconfig b/init/Kconfig
index cdc152c75727..c575566be47d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1196,6 +1196,7 @@ config HOTPLUG
 config PRINTK
 	default y
 	bool "Enable support for printk" if EXPERT
+	select IRQ_WORK
 	help
 	  This option enables normal printk support. Removing it
 	  eliminates most of the message strings from the kernel image
diff --git a/kernel/printk.c b/kernel/printk.c
index 2d607f4d1797..c9104feba5ec 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -42,6 +42,7 @@
 #include <linux/notifier.h>
 #include <linux/rculist.h>
 #include <linux/poll.h>
+#include <linux/irq_work.h>
 
 #include <asm/uaccess.h>
 
@@ -1955,30 +1956,32 @@ int is_console_locked(void)
 static DEFINE_PER_CPU(int, printk_pending);
 static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
 
-void printk_tick(void)
+static void wake_up_klogd_work_func(struct irq_work *irq_work)
 {
-	if (__this_cpu_read(printk_pending)) {
-		int pending = __this_cpu_xchg(printk_pending, 0);
-		if (pending & PRINTK_PENDING_SCHED) {
-			char *buf = __get_cpu_var(printk_sched_buf);
-			printk(KERN_WARNING "[sched_delayed] %s", buf);
-		}
-		if (pending & PRINTK_PENDING_WAKEUP)
-			wake_up_interruptible(&log_wait);
+	int pending = __this_cpu_xchg(printk_pending, 0);
+
+	if (pending & PRINTK_PENDING_SCHED) {
+		char *buf = __get_cpu_var(printk_sched_buf);
+		printk(KERN_WARNING "[sched_delayed] %s", buf);
 	}
-}
 
-int printk_needs_cpu(int cpu)
-{
-	if (cpu_is_offline(cpu))
-		printk_tick();
-	return __this_cpu_read(printk_pending);
+	if (pending & PRINTK_PENDING_WAKEUP)
+		wake_up_interruptible(&log_wait);
 }
 
+static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+	.func = wake_up_klogd_work_func,
+	.flags = IRQ_WORK_LAZY,
+};
+
 void wake_up_klogd(void)
 {
-	if (waitqueue_active(&log_wait))
+	preempt_disable();
+	if (waitqueue_active(&log_wait)) {
 		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+		irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
+	}
+	preempt_enable();
 }
 
 static void console_cont_flush(char *text, size_t size)
@@ -2458,6 +2461,7 @@ int printk_sched(const char *fmt, ...)
 	va_end(args);
 
 	__this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
+	irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
 	local_irq_restore(flags);
 
 	return r;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f249e8c3e58e..822d7572bf2d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -289,7 +289,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		time_delta = timekeeping_max_deferment();
 	} while (read_seqretry(&xtime_lock, seq));
 
-	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
+	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
 	    arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
diff --git a/kernel/timer.c b/kernel/timer.c
index 367d00858482..ff3b5165737b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1351,7 +1351,6 @@ void update_process_times(int user_tick)
 	account_process_tick(p, user_tick);
 	run_local_timers();
 	rcu_check_callbacks(cpu, user_tick);
-	printk_tick();
 #ifdef CONFIG_IRQ_WORK
 	if (in_irq())
 		irq_work_run();
-- 
cgit v1.2.3


From ea2ce92e44dc83b7a69c2aedd9c52bfe7fee1a62 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Tue, 9 Oct 2012 22:25:29 +0530
Subject: power_supply: Add support for CHARGE_CONTROL_* attributes

Add support for power supply attributes CHARGE_CONTROL_LIMIT
and CHARGE_CONTROL_LIMIT_MAX.

These new attributes will enable the user space to implement
custom charging algorithms based on platform state.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 Documentation/power/power_supply_class.txt | 3 +++
 drivers/power/power_supply_sysfs.c         | 2 ++
 include/linux/power_supply.h               | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 9c647bd7c5a9..3f10b39b0346 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -123,6 +123,9 @@ CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
 CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
 power supply object.
 
+CHARGE_CONTROL_LIMIT - current charge control limit setting
+CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting
+
 ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
 
 CAPACITY - capacity in percents.
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 395c2cfa16c0..40fa3b7cae54 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -164,6 +164,8 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(constant_charge_current_max),
 	POWER_SUPPLY_ATTR(constant_charge_voltage),
 	POWER_SUPPLY_ATTR(constant_charge_voltage_max),
+	POWER_SUPPLY_ATTR(charge_control_limit),
+	POWER_SUPPLY_ATTR(charge_control_limit_max),
 	POWER_SUPPLY_ATTR(energy_full_design),
 	POWER_SUPPLY_ATTR(energy_empty_design),
 	POWER_SUPPLY_ATTR(energy_full),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index e5ef45834c3c..445b4b249af5 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -114,6 +114,8 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
+	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT,
+	POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX,
 	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN,
 	POWER_SUPPLY_PROP_ENERGY_FULL,
-- 
cgit v1.2.3


From 952aeeb3ee28bc12a70744e40636de40688eb60d Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Tue, 9 Oct 2012 22:25:59 +0530
Subject: power_supply: Register power supply for thermal cooling device

This patch registers the power supply as a cooling device if the power
supply has support for charge throttling.

Now with this change low level drivers need not register with thermal
framework as it is automatically done by power supply framework.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 drivers/power/power_supply_core.c | 96 +++++++++++++++++++++++++++++++++++++++
 include/linux/power_supply.h      |  1 +
 2 files changed, 97 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 2436f1350013..f984da1066ec 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -216,6 +216,86 @@ static void psy_unregister_thermal(struct power_supply *psy)
 		return;
 	thermal_zone_device_unregister(psy->tzd);
 }
+
+/* thermal cooling device callbacks */
+static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd,
+					unsigned long *state)
+{
+	struct power_supply *psy;
+	union power_supply_propval val;
+	int ret;
+
+	psy = tcd->devdata;
+	ret = psy->get_property(psy,
+		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
+	if (!ret)
+		*state = val.intval;
+
+	return ret;
+}
+
+static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd,
+					unsigned long *state)
+{
+	struct power_supply *psy;
+	union power_supply_propval val;
+	int ret;
+
+	psy = tcd->devdata;
+	ret = psy->get_property(psy,
+		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+	if (!ret)
+		*state = val.intval;
+
+	return ret;
+}
+
+static int ps_set_cur_charge_cntl_limit(struct thermal_cooling_device *tcd,
+					unsigned long state)
+{
+	struct power_supply *psy;
+	union power_supply_propval val;
+	int ret;
+
+	psy = tcd->devdata;
+	val.intval = state;
+	ret = psy->set_property(psy,
+		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+
+	return ret;
+}
+
+static struct thermal_cooling_device_ops psy_tcd_ops = {
+	.get_max_state = ps_get_max_charge_cntl_limit,
+	.get_cur_state = ps_get_cur_chrage_cntl_limit,
+	.set_cur_state = ps_set_cur_charge_cntl_limit,
+};
+
+static int psy_register_cooler(struct power_supply *psy)
+{
+	int i;
+
+	/* Register for cooling device if psy can control charging */
+	for (i = 0; i < psy->num_properties; i++) {
+		if (psy->properties[i] ==
+				POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT) {
+			psy->tcd = thermal_cooling_device_register(
+							(char *)psy->name,
+							psy, &psy_tcd_ops);
+			if (IS_ERR(psy->tcd))
+				return PTR_ERR(psy->tcd);
+			break;
+		}
+	}
+	return 0;
+}
+
+static void psy_unregister_cooler(struct power_supply *psy)
+{
+	if (IS_ERR_OR_NULL(psy->tcd))
+		return;
+	thermal_cooling_device_unregister(psy->tcd);
+}
 #else
 static int psy_register_thermal(struct power_supply *psy)
 {
@@ -225,6 +305,15 @@ static int psy_register_thermal(struct power_supply *psy)
 static void psy_unregister_thermal(struct power_supply *psy)
 {
 }
+
+static int psy_register_cooler(struct power_supply *psy)
+{
+	return 0;
+}
+
+static void psy_unregister_cooler(struct power_supply *psy)
+{
+}
 #endif
 
 int power_supply_register(struct device *parent, struct power_supply *psy)
@@ -259,6 +348,10 @@ int power_supply_register(struct device *parent, struct power_supply *psy)
 	if (rc)
 		goto register_thermal_failed;
 
+	rc = psy_register_cooler(psy);
+	if (rc)
+		goto register_cooler_failed;
+
 	rc = power_supply_create_triggers(psy);
 	if (rc)
 		goto create_triggers_failed;
@@ -268,6 +361,8 @@ int power_supply_register(struct device *parent, struct power_supply *psy)
 	goto success;
 
 create_triggers_failed:
+	psy_unregister_cooler(psy);
+register_cooler_failed:
 	psy_unregister_thermal(psy);
 register_thermal_failed:
 	device_del(dev);
@@ -284,6 +379,7 @@ void power_supply_unregister(struct power_supply *psy)
 	cancel_work_sync(&psy->changed_work);
 	sysfs_remove_link(&psy->dev->kobj, "powers");
 	power_supply_remove_triggers(psy);
+	psy_unregister_cooler(psy);
 	psy_unregister_thermal(psy);
 	device_unregister(psy->dev);
 }
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 445b4b249af5..1f0ab90aff00 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -188,6 +188,7 @@ struct power_supply {
 	struct work_struct changed_work;
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd;
+	struct thermal_cooling_device *tcd;
 #endif
 
 #ifdef CONFIG_LEDS_TRIGGERS
-- 
cgit v1.2.3


From 08d816b8cb09de1fd8268c8f1dbc97c3cc435d67 Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@ti.com>
Date: Mon, 22 Oct 2012 00:18:54 +0000
Subject: lp8788-charger: Fix ADC channel names

The name of ADC channel is configurable in the platform side. This name is
referenced in the IIO consumer driver. To get the IIO channel, specific
name in the platform data is used as an parameter of the
iio_channel_get(). Thus, lp8788_adc_id platform data are replaced with
specific names.

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 drivers/power/lp8788-charger.c | 44 ++++++------------------------------------
 include/linux/mfd/lp8788.h     |  8 ++++----
 2 files changed, 10 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/lp8788-charger.c b/drivers/power/lp8788-charger.c
index 1afa5f7a5359..fb592bfbec6e 100644
--- a/drivers/power/lp8788-charger.c
+++ b/drivers/power/lp8788-charger.c
@@ -584,50 +584,18 @@ static void lp8788_setup_adc_channel(const char *consumer_name,
 				struct lp8788_charger *pchg)
 {
 	struct lp8788_charger_platform_data *pdata = pchg->pdata;
-	struct device *dev = pchg->lp->dev;
 	struct iio_channel *chan;
-	enum lp8788_adc_id id;
-	const char *chan_name[LPADC_MAX] = {
-		[LPADC_VBATT_5P5] = "vbatt-5p5",
-		[LPADC_VBATT_6P0] = "vbatt-6p0",
-		[LPADC_VBATT_5P0] = "vbatt-5p0",
-		[LPADC_ADC1]      = "adc1",
-		[LPADC_ADC2]      = "adc2",
-		[LPADC_ADC3]      = "adc3",
-		[LPADC_ADC4]      = "adc4",
-	};
 
 	if (!pdata)
 		return;
 
-	id = pdata->vbatt_adc;
-	switch (id) {
-	case LPADC_VBATT_5P5:
-	case LPADC_VBATT_6P0:
-	case LPADC_VBATT_5P0:
-		chan = iio_channel_get(consumer_name, chan_name[id]);
-		pchg->chan[LP8788_VBATT] = IS_ERR(chan) ? NULL : chan;
-		break;
-	default:
-		dev_err(dev, "invalid ADC id for VBATT: %d\n", id);
-		pchg->chan[LP8788_VBATT] = NULL;
-		break;
-	}
+	/* ADC channel for battery voltage */
+	chan = iio_channel_get(consumer_name, pdata->adc_vbatt);
+	pchg->chan[LP8788_VBATT] = IS_ERR(chan) ? NULL : chan;
 
-	id = pdata->batt_temp_adc;
-	switch (id) {
-	case LPADC_ADC1:
-	case LPADC_ADC2:
-	case LPADC_ADC3:
-	case LPADC_ADC4:
-		chan = iio_channel_get(consumer_name, chan_name[id]);
-		pchg->chan[LP8788_BATT_TEMP] = IS_ERR(chan) ? NULL : chan;
-		break;
-	default:
-		dev_err(dev, "invalid ADC id for BATT_TEMP : %d\n", id);
-		pchg->chan[LP8788_BATT_TEMP] = NULL;
-		break;
-	}
+	/* ADC channel for battery temperature */
+	chan = iio_channel_get(consumer_name, pdata->adc_batt_temp);
+	pchg->chan[LP8788_BATT_TEMP] = IS_ERR(chan) ? NULL : chan;
 }
 
 static void lp8788_release_adc_channel(struct lp8788_charger *pchg)
diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h
index cec364bdccfa..2a32b16f79cb 100644
--- a/include/linux/mfd/lp8788.h
+++ b/include/linux/mfd/lp8788.h
@@ -211,16 +211,16 @@ struct lp8788_chg_param {
 
 /*
  * struct lp8788_charger_platform_data
- * @vbatt_adc         : adc selection id for battery voltage
- * @batt_temp_adc     : adc selection id for battery temperature
+ * @adc_vbatt         : adc channel name for battery voltage
+ * @adc_batt_temp     : adc channel name for battery temperature
  * @max_vbatt_mv      : used for calculating battery capacity
  * @chg_params        : initial charging parameters
  * @num_chg_params    : numbers of charging parameters
  * @charger_event     : the charger event can be reported to the platform side
  */
 struct lp8788_charger_platform_data {
-	enum lp8788_adc_id vbatt_adc;
-	enum lp8788_adc_id batt_temp_adc;
+	const char *adc_vbatt;
+	const char *adc_batt_temp;
 	unsigned int max_vbatt_mv;
 	struct lp8788_chg_param *chg_params;
 	int num_chg_params;
-- 
cgit v1.2.3


From 0bc98cc6155205b615a9d29a2d54d1b839521c04 Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali.rohar@gmail.com>
Date: Fri, 2 Nov 2012 20:18:11 +0100
Subject: power_supply: Add bq2415x charger driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bq2415x charger driver is needed for example on Nokia N900 for
charging battery. Driver is part of open source project to replace
proprietary battery management.

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 drivers/power/bq2415x_charger.c       | 1644 +++++++++++++++++++++++++++++++++
 include/linux/power/bq2415x_charger.h |   90 ++
 2 files changed, 1734 insertions(+)
 create mode 100644 drivers/power/bq2415x_charger.c
 create mode 100644 include/linux/power/bq2415x_charger.h

(limited to 'include/linux')

diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c
new file mode 100644
index 000000000000..017b3c27f769
--- /dev/null
+++ b/drivers/power/bq2415x_charger.c
@@ -0,0 +1,1644 @@
+/*
+    bq2415x_charger.c - bq2415x charger driver
+    Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/*
+  Datasheets:
+  http://www.ti.com/product/bq24150
+  http://www.ti.com/product/bq24150a
+  http://www.ti.com/product/bq24152
+  http://www.ti.com/product/bq24153
+  http://www.ti.com/product/bq24153a
+  http://www.ti.com/product/bq24155
+*/
+
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/err.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/idr.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+
+#include <linux/power/bq2415x_charger.h>
+
+/* timeout for resetting chip timer */
+#define BQ2415X_TIMER_TIMEOUT		10
+
+#define BQ2415X_REG_STATUS		0x00
+#define BQ2415X_REG_CONTROL		0x01
+#define BQ2415X_REG_VOLTAGE		0x02
+#define BQ2415X_REG_VENDER		0x03
+#define BQ2415X_REG_CURRENT		0x04
+
+/* reset state for all registers */
+#define BQ2415X_RESET_STATUS		BIT(6)
+#define BQ2415X_RESET_CONTROL		(BIT(4)|BIT(5))
+#define BQ2415X_RESET_VOLTAGE		(BIT(1)|BIT(3))
+#define BQ2415X_RESET_CURRENT		(BIT(0)|BIT(3)|BIT(7))
+
+/* status register */
+#define BQ2415X_BIT_TMR_RST		7
+#define BQ2415X_BIT_OTG			7
+#define BQ2415X_BIT_EN_STAT		6
+#define BQ2415X_MASK_STAT		(BIT(4)|BIT(5))
+#define BQ2415X_SHIFT_STAT		4
+#define BQ2415X_BIT_BOOST		3
+#define BQ2415X_MASK_FAULT		(BIT(0)|BIT(1)|BIT(2))
+#define BQ2415X_SHIFT_FAULT		0
+
+/* control register */
+#define BQ2415X_MASK_LIMIT		(BIT(6)|BIT(7))
+#define BQ2415X_SHIFT_LIMIT		6
+#define BQ2415X_MASK_VLOWV		(BIT(4)|BIT(5))
+#define BQ2415X_SHIFT_VLOWV		4
+#define BQ2415X_BIT_TE			3
+#define BQ2415X_BIT_CE			2
+#define BQ2415X_BIT_HZ_MODE		1
+#define BQ2415X_BIT_OPA_MODE		0
+
+/* voltage register */
+#define BQ2415X_MASK_VO		(BIT(2)|BIT(3)|BIT(4)|BIT(5)|BIT(6)|BIT(7))
+#define BQ2415X_SHIFT_VO		2
+#define BQ2415X_BIT_OTG_PL		1
+#define BQ2415X_BIT_OTG_EN		0
+
+/* vender register */
+#define BQ2415X_MASK_VENDER		(BIT(5)|BIT(6)|BIT(7))
+#define BQ2415X_SHIFT_VENDER		5
+#define BQ2415X_MASK_PN			(BIT(3)|BIT(4))
+#define BQ2415X_SHIFT_PN		3
+#define BQ2415X_MASK_REVISION		(BIT(0)|BIT(1)|BIT(2))
+#define BQ2415X_SHIFT_REVISION		0
+
+/* current register */
+#define BQ2415X_MASK_RESET		BIT(7)
+#define BQ2415X_MASK_VI_CHRG		(BIT(4)|BIT(5)|BIT(6))
+#define BQ2415X_SHIFT_VI_CHRG		4
+/* N/A					BIT(3) */
+#define BQ2415X_MASK_VI_TERM		(BIT(0)|BIT(1)|BIT(2))
+#define BQ2415X_SHIFT_VI_TERM		0
+
+
+enum bq2415x_command {
+	BQ2415X_TIMER_RESET,
+	BQ2415X_OTG_STATUS,
+	BQ2415X_STAT_PIN_STATUS,
+	BQ2415X_STAT_PIN_ENABLE,
+	BQ2415X_STAT_PIN_DISABLE,
+	BQ2415X_CHARGE_STATUS,
+	BQ2415X_BOOST_STATUS,
+	BQ2415X_FAULT_STATUS,
+
+	BQ2415X_CHARGE_TERMINATION_STATUS,
+	BQ2415X_CHARGE_TERMINATION_ENABLE,
+	BQ2415X_CHARGE_TERMINATION_DISABLE,
+	BQ2415X_CHARGER_STATUS,
+	BQ2415X_CHARGER_ENABLE,
+	BQ2415X_CHARGER_DISABLE,
+	BQ2415X_HIGH_IMPEDANCE_STATUS,
+	BQ2415X_HIGH_IMPEDANCE_ENABLE,
+	BQ2415X_HIGH_IMPEDANCE_DISABLE,
+	BQ2415X_BOOST_MODE_STATUS,
+	BQ2415X_BOOST_MODE_ENABLE,
+	BQ2415X_BOOST_MODE_DISABLE,
+
+	BQ2415X_OTG_LEVEL,
+	BQ2415X_OTG_ACTIVATE_HIGH,
+	BQ2415X_OTG_ACTIVATE_LOW,
+	BQ2415X_OTG_PIN_STATUS,
+	BQ2415X_OTG_PIN_ENABLE,
+	BQ2415X_OTG_PIN_DISABLE,
+
+	BQ2415X_VENDER_CODE,
+	BQ2415X_PART_NUMBER,
+	BQ2415X_REVISION,
+};
+
+enum bq2415x_chip {
+	BQUNKNOWN,
+	BQ24150,
+	BQ24150A,
+	BQ24151,
+	BQ24151A,
+	BQ24152,
+	BQ24153,
+	BQ24153A,
+	BQ24155,
+	BQ24156,
+	BQ24156A,
+	BQ24158,
+};
+
+static char *bq2415x_chip_name[] = {
+	"unknown",
+	"bq24150",
+	"bq24150a",
+	"bq24151",
+	"bq24151a",
+	"bq24152",
+	"bq24153",
+	"bq24153a",
+	"bq24155",
+	"bq24156",
+	"bq24156a",
+	"bq24158",
+};
+
+struct bq2415x_device {
+	struct device *dev;
+	struct bq2415x_platform_data init_data;
+	struct power_supply charger;
+	struct delayed_work work;
+	enum bq2415x_mode reported_mode;/* mode reported by hook function */
+	enum bq2415x_mode mode;		/* current configured mode */
+	enum bq2415x_chip chip;
+	const char *timer_error;
+	char *model;
+	char *name;
+	int autotimer;	/* 1 - if driver automatically reset timer, 0 - not */
+	int automode;	/* 1 - enabled, 0 - disabled; -1 - not supported */
+	int id;
+};
+
+/* each registered chip must have unique id */
+static DEFINE_IDR(bq2415x_id);
+
+static DEFINE_MUTEX(bq2415x_id_mutex);
+static DEFINE_MUTEX(bq2415x_timer_mutex);
+static DEFINE_MUTEX(bq2415x_i2c_mutex);
+
+/**** i2c read functions ****/
+
+/* read value from register */
+static int bq2415x_i2c_read(struct bq2415x_device *bq, u8 reg)
+{
+	struct i2c_client *client = to_i2c_client(bq->dev);
+	struct i2c_msg msg[2];
+	u8 val;
+	int ret;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	msg[0].addr = client->addr;
+	msg[0].flags = 0;
+	msg[0].buf = &reg;
+	msg[0].len = sizeof(reg);
+	msg[1].addr = client->addr;
+	msg[1].flags = I2C_M_RD;
+	msg[1].buf = &val;
+	msg[1].len = sizeof(val);
+
+	mutex_lock(&bq2415x_i2c_mutex);
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+	mutex_unlock(&bq2415x_i2c_mutex);
+
+	if (ret < 0)
+		return ret;
+
+	return val;
+}
+
+/* read value from register, apply mask and right shift it */
+static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg,
+				u8 mask, u8 shift)
+{
+	int ret;
+
+	if (shift > 8)
+		return -EINVAL;
+
+	ret = bq2415x_i2c_read(bq, reg);
+	if (ret < 0)
+		return ret;
+	else
+		return (ret & mask) >> shift;
+}
+
+/* read value from register and return one specified bit */
+static int bq2415x_i2c_read_bit(struct bq2415x_device *bq, u8 reg, u8 bit)
+{
+	if (bit > 8)
+		return -EINVAL;
+	else
+		return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit);
+}
+
+/**** i2c write functions ****/
+
+/* write value to register */
+static int bq2415x_i2c_write(struct bq2415x_device *bq, u8 reg, u8 val)
+{
+	struct i2c_client *client = to_i2c_client(bq->dev);
+	struct i2c_msg msg[1];
+	u8 data[2];
+	int ret;
+
+	data[0] = reg;
+	data[1] = val;
+
+	msg[0].addr = client->addr;
+	msg[0].flags = 0;
+	msg[0].buf = data;
+	msg[0].len = ARRAY_SIZE(data);
+
+	mutex_lock(&bq2415x_i2c_mutex);
+	ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+	mutex_unlock(&bq2415x_i2c_mutex);
+
+	/* i2c_transfer returns number of messages transferred */
+	if (ret < 0)
+		return ret;
+	else if (ret != 1)
+		return -EIO;
+
+	return 0;
+}
+
+/* read value from register, change it with mask left shifted and write back */
+static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val,
+				u8 mask, u8 shift)
+{
+	int ret;
+
+	if (shift > 8)
+		return -EINVAL;
+
+	ret = bq2415x_i2c_read(bq, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~mask;
+	ret |= val << shift;
+
+	return bq2415x_i2c_write(bq, reg, ret);
+}
+
+/* change only one bit in register */
+static int bq2415x_i2c_write_bit(struct bq2415x_device *bq, u8 reg,
+				bool val, u8 bit)
+{
+	if (bit > 8)
+		return -EINVAL;
+	else
+		return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit);
+}
+
+/**** global functions ****/
+
+/* exec command function */
+static int bq2415x_exec_command(struct bq2415x_device *bq,
+				enum bq2415x_command command)
+{
+	int ret;
+	switch (command) {
+	case BQ2415X_TIMER_RESET:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS,
+				1, BQ2415X_BIT_TMR_RST);
+	case BQ2415X_OTG_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS,
+				BQ2415X_BIT_OTG);
+	case BQ2415X_STAT_PIN_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS,
+				BQ2415X_BIT_EN_STAT);
+	case BQ2415X_STAT_PIN_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS, 1,
+				BQ2415X_BIT_EN_STAT);
+	case BQ2415X_STAT_PIN_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS, 0,
+				BQ2415X_BIT_EN_STAT);
+	case BQ2415X_CHARGE_STATUS:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_STATUS,
+				BQ2415X_MASK_STAT, BQ2415X_SHIFT_STAT);
+	case BQ2415X_BOOST_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_STATUS,
+				BQ2415X_BIT_BOOST);
+	case BQ2415X_FAULT_STATUS:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_STATUS,
+			BQ2415X_MASK_FAULT, BQ2415X_SHIFT_FAULT);
+
+	case BQ2415X_CHARGE_TERMINATION_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL,
+				BQ2415X_BIT_TE);
+	case BQ2415X_CHARGE_TERMINATION_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				1, BQ2415X_BIT_TE);
+	case BQ2415X_CHARGE_TERMINATION_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				0, BQ2415X_BIT_TE);
+	case BQ2415X_CHARGER_STATUS:
+		ret = bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL,
+			BQ2415X_BIT_CE);
+		if (ret < 0)
+			return ret;
+		else
+			return ret > 0 ? 0 : 1;
+	case BQ2415X_CHARGER_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				0, BQ2415X_BIT_CE);
+	case BQ2415X_CHARGER_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				1, BQ2415X_BIT_CE);
+	case BQ2415X_HIGH_IMPEDANCE_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL,
+				BQ2415X_BIT_HZ_MODE);
+	case BQ2415X_HIGH_IMPEDANCE_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				1, BQ2415X_BIT_HZ_MODE);
+	case BQ2415X_HIGH_IMPEDANCE_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				0, BQ2415X_BIT_HZ_MODE);
+	case BQ2415X_BOOST_MODE_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_CONTROL,
+				BQ2415X_BIT_OPA_MODE);
+	case BQ2415X_BOOST_MODE_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				1, BQ2415X_BIT_OPA_MODE);
+	case BQ2415X_BOOST_MODE_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_CONTROL,
+				0, BQ2415X_BIT_OPA_MODE);
+
+	case BQ2415X_OTG_LEVEL:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_VOLTAGE,
+				BQ2415X_BIT_OTG_PL);
+	case BQ2415X_OTG_ACTIVATE_HIGH:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE,
+				1, BQ2415X_BIT_OTG_PL);
+	case BQ2415X_OTG_ACTIVATE_LOW:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE,
+				0, BQ2415X_BIT_OTG_PL);
+	case BQ2415X_OTG_PIN_STATUS:
+		return bq2415x_i2c_read_bit(bq, BQ2415X_REG_VOLTAGE,
+				BQ2415X_BIT_OTG_EN);
+	case BQ2415X_OTG_PIN_ENABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE,
+				1, BQ2415X_BIT_OTG_EN);
+	case BQ2415X_OTG_PIN_DISABLE:
+		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_VOLTAGE,
+				0, BQ2415X_BIT_OTG_EN);
+
+	case BQ2415X_VENDER_CODE:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER,
+			BQ2415X_MASK_VENDER, BQ2415X_SHIFT_VENDER);
+	case BQ2415X_PART_NUMBER:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER,
+				BQ2415X_MASK_PN, BQ2415X_SHIFT_PN);
+	case BQ2415X_REVISION:
+		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER,
+			BQ2415X_MASK_REVISION, BQ2415X_SHIFT_REVISION);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+/* detect chip type */
+static enum bq2415x_chip bq2415x_detect_chip(struct bq2415x_device *bq)
+{
+	struct i2c_client *client = to_i2c_client(bq->dev);
+	int ret = bq2415x_exec_command(bq, BQ2415X_PART_NUMBER);
+
+	if (ret < 0)
+		return ret;
+
+	switch (client->addr) {
+	case 0x6b:
+		switch (ret) {
+		case 0:
+			if (bq->chip == BQ24151A)
+				return bq->chip;
+			else
+				return BQ24151;
+		case 1:
+			if (bq->chip == BQ24150A ||
+				bq->chip == BQ24152 ||
+				bq->chip == BQ24155)
+				return bq->chip;
+			else
+				return BQ24150;
+		case 2:
+			if (bq->chip == BQ24153A)
+				return bq->chip;
+			else
+				return BQ24153;
+		default:
+			return BQUNKNOWN;
+		}
+		break;
+
+	case 0x6a:
+		switch (ret) {
+		case 0:
+			if (bq->chip == BQ24156A)
+				return bq->chip;
+			else
+				return BQ24156;
+		case 2:
+			return BQ24158;
+		default:
+			return BQUNKNOWN;
+		}
+		break;
+	}
+
+	return BQUNKNOWN;
+}
+
+/* detect chip revision */
+static int bq2415x_detect_revision(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_exec_command(bq, BQ2415X_REVISION);
+	int chip = bq2415x_detect_chip(bq);
+	if (ret < 0 || chip < 0)
+		return -1;
+
+	switch (chip) {
+	case BQ24150:
+	case BQ24150A:
+	case BQ24151:
+	case BQ24151A:
+	case BQ24152:
+		if (ret >= 0 && ret <= 3)
+			return ret;
+		else
+			return -1;
+
+	case BQ24153:
+	case BQ24153A:
+	case BQ24156:
+	case BQ24156A:
+	case BQ24158:
+		if (ret == 3)
+			return 0;
+		else if (ret == 1)
+			return 1;
+		else
+			return -1;
+
+	case BQ24155:
+		if (ret == 3)
+			return 3;
+		else
+			return -1;
+
+	case BQUNKNOWN:
+		return -1;
+	}
+
+	return -1;
+}
+
+/* return chip vender code */
+static int bq2415x_get_vender_code(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE);
+	if (ret < 0)
+		return 0;
+	else /* convert to binary */
+		return (ret & 0x1) +
+			((ret >> 1) & 0x1) * 10 +
+			((ret >> 2) & 0x1) * 100;
+}
+
+/* reset all chip registers to default state */
+static void bq2415x_reset_chip(struct bq2415x_device *bq)
+{
+	bq2415x_i2c_write(bq, BQ2415X_REG_CURRENT, BQ2415X_RESET_CURRENT);
+	bq2415x_i2c_write(bq, BQ2415X_REG_VOLTAGE, BQ2415X_RESET_VOLTAGE);
+	bq2415x_i2c_write(bq, BQ2415X_REG_CONTROL, BQ2415X_RESET_CONTROL);
+	bq2415x_i2c_write(bq, BQ2415X_REG_STATUS, BQ2415X_RESET_STATUS);
+	bq->timer_error = NULL;
+}
+
+/**** properties functions ****/
+
+/* set current limit in mA */
+static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA)
+{
+	int val;
+	if (mA <= 100)
+		val = 0;
+	else if (mA <= 500)
+		val = 1;
+	else if (mA <= 800)
+		val = 2;
+	else
+		val = 3;
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val,
+			BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT);
+}
+
+/* get current limit in mA */
+static int bq2415x_get_current_limit(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
+			BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT);
+	if (ret < 0)
+		return ret;
+	else if (ret == 0)
+		return 100;
+	else if (ret == 1)
+		return 500;
+	else if (ret == 2)
+		return 800;
+	else if (ret == 3)
+		return 1800;
+	else
+		return -EINVAL;
+}
+
+/* set weak battery voltage in mV */
+static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV)
+{
+	/* round to 100mV */
+	int val;
+	if (mV <= 3400 + 50)
+		val = 0;
+	else if (mV <= 3500 + 50)
+		val = 1;
+	else if (mV <= 3600 + 50)
+		val = 2;
+	else
+		val = 3;
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val,
+			BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV);
+}
+
+/* get weak battery voltage in mV */
+static int bq2415x_get_weak_battery_voltage(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
+			BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV);
+	if (ret < 0)
+		return ret;
+	else
+		return 100 * (34 + ret);
+}
+
+/* set battery regulation voltage in mV */
+static int bq2415x_set_battery_regulation_voltage(struct bq2415x_device *bq,
+						int mV)
+{
+	int val = (mV/10 - 350) / 2;
+
+	if (val < 0)
+		val = 0;
+	else if (val > 94) /* FIXME: Max is 94 or 122 ? Set max value ? */
+		return -EINVAL;
+
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_VOLTAGE, val,
+			BQ2415X_MASK_VO, BQ2415X_SHIFT_VO);
+}
+
+/* get battery regulation voltage in mV */
+static int bq2415x_get_battery_regulation_voltage(struct bq2415x_device *bq)
+{
+	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_VOLTAGE,
+			BQ2415X_MASK_VO, BQ2415X_SHIFT_VO);
+	if (ret < 0)
+		return ret;
+	else
+		return 10 * (350 + 2*ret);
+}
+
+/* set charge current in mA (platform data must provide resistor sense) */
+static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA)
+{
+	int val;
+	if (bq->init_data.resistor_sense <= 0)
+		return -ENOSYS;
+
+	val = (mA * bq->init_data.resistor_sense - 37400) / 6800;
+
+	if (val < 0)
+		val = 0;
+	else if (val > 7)
+		val = 7;
+
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CURRENT, val,
+			BQ2415X_MASK_VI_CHRG | BQ2415X_MASK_RESET,
+			BQ2415X_SHIFT_VI_CHRG);
+}
+
+/* get charge current in mA (platform data must provide resistor sense) */
+static int bq2415x_get_charge_current(struct bq2415x_device *bq)
+{
+	int ret;
+	if (bq->init_data.resistor_sense <= 0)
+		return -ENOSYS;
+
+	ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CURRENT,
+			BQ2415X_MASK_VI_CHRG, BQ2415X_SHIFT_VI_CHRG);
+	if (ret < 0)
+		return ret;
+	else
+		return (37400 + 6800*ret) / bq->init_data.resistor_sense;
+}
+
+/* set termination current in mA (platform data must provide resistor sense) */
+static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA)
+{
+	int val;
+	if (bq->init_data.resistor_sense <= 0)
+		return -ENOSYS;
+
+	val = (mA * bq->init_data.resistor_sense - 3400) / 3400;
+
+	if (val < 0)
+		val = 0;
+	else if (val > 7)
+		val = 7;
+
+	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CURRENT, val,
+			BQ2415X_MASK_VI_TERM | BQ2415X_MASK_RESET,
+			BQ2415X_SHIFT_VI_TERM);
+}
+
+/* get termination current in mA (platform data must provide resistor sense) */
+static int bq2415x_get_termination_current(struct bq2415x_device *bq)
+{
+	int ret;
+	if (bq->init_data.resistor_sense <= 0)
+		return -ENOSYS;
+
+	ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CURRENT,
+			BQ2415X_MASK_VI_TERM, BQ2415X_SHIFT_VI_TERM);
+	if (ret < 0)
+		return ret;
+	else
+		return (3400 + 3400*ret) / bq->init_data.resistor_sense;
+}
+
+/* set default value of property */
+#define bq2415x_set_default_value(bq, prop) \
+	do { \
+		int ret = 0; \
+		if (bq->init_data.prop != -1) \
+			ret = bq2415x_set_##prop(bq, bq->init_data.prop); \
+		if (ret < 0) \
+			return ret; \
+	} while (0)
+
+/* set default values of all properties */
+static int bq2415x_set_defaults(struct bq2415x_device *bq)
+{
+	bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE);
+	bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE);
+	bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_DISABLE);
+	bq2415x_set_default_value(bq, current_limit);
+	bq2415x_set_default_value(bq, weak_battery_voltage);
+	bq2415x_set_default_value(bq, battery_regulation_voltage);
+	if (bq->init_data.resistor_sense > 0) {
+		bq2415x_set_default_value(bq, charge_current);
+		bq2415x_set_default_value(bq, termination_current);
+		bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_ENABLE);
+	}
+	bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE);
+	return 0;
+}
+
+/**** charger mode functions ****/
+
+/* set charger mode */
+static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode)
+{
+	int ret = 0;
+	int charger = 0;
+	int boost = 0;
+
+	if (mode == BQ2415X_MODE_HOST_CHARGER ||
+		mode == BQ2415X_MODE_DEDICATED_CHARGER)
+			charger = 1;
+
+	if (mode == BQ2415X_MODE_BOOST)
+		boost = 1;
+
+	if (!charger)
+		ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE);
+
+	if (!boost)
+		ret = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE);
+
+	if (ret < 0)
+		return ret;
+
+	switch (mode) {
+	case BQ2415X_MODE_NONE:
+		dev_dbg(bq->dev, "changing mode to: N/A\n");
+		ret = bq2415x_set_current_limit(bq, 100);
+		break;
+	case BQ2415X_MODE_HOST_CHARGER:
+		dev_dbg(bq->dev, "changing mode to: Host/HUB charger\n");
+		ret = bq2415x_set_current_limit(bq, 500);
+		break;
+	case BQ2415X_MODE_DEDICATED_CHARGER:
+		dev_dbg(bq->dev, "changing mode to: Dedicated charger\n");
+		ret = bq2415x_set_current_limit(bq, 1800);
+		break;
+	case BQ2415X_MODE_BOOST: /* Boost mode */
+		dev_dbg(bq->dev, "changing mode to: Boost\n");
+		ret = bq2415x_set_current_limit(bq, 100);
+		break;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	if (charger)
+		ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE);
+	else if (boost)
+		ret = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_ENABLE);
+
+	if (ret < 0)
+		return ret;
+
+	bq2415x_set_default_value(bq, weak_battery_voltage);
+	bq2415x_set_default_value(bq, battery_regulation_voltage);
+
+	bq->mode = mode;
+	sysfs_notify(&bq->charger.dev->kobj, NULL, "mode");
+
+	return 0;
+
+}
+
+/* hook function called by other driver which set reported mode */
+static void bq2415x_hook_function(enum bq2415x_mode mode, void *data)
+{
+	struct bq2415x_device *bq = data;
+
+	if (!bq)
+		return;
+
+	dev_dbg(bq->dev, "hook function was called\n");
+	bq->reported_mode = mode;
+
+	/* if automode is not enabled do not tell about reported_mode */
+	if (bq->automode < 1)
+		return;
+
+	sysfs_notify(&bq->charger.dev->kobj, NULL, "reported_mode");
+	bq2415x_set_mode(bq, bq->reported_mode);
+
+}
+
+/**** timer functions ****/
+
+/* enable/disable auto resetting chip timer */
+static void bq2415x_set_autotimer(struct bq2415x_device *bq, int state)
+{
+	mutex_lock(&bq2415x_timer_mutex);
+
+	if (bq->autotimer == state) {
+		mutex_unlock(&bq2415x_timer_mutex);
+		return;
+	}
+
+	bq->autotimer = state;
+
+	if (state) {
+		schedule_delayed_work(&bq->work, BQ2415X_TIMER_TIMEOUT * HZ);
+		bq2415x_exec_command(bq, BQ2415X_TIMER_RESET);
+		bq->timer_error = NULL;
+	} else {
+		cancel_delayed_work_sync(&bq->work);
+	}
+
+	mutex_unlock(&bq2415x_timer_mutex);
+}
+
+/* called by bq2415x_timer_work on timer error */
+static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg)
+{
+	bq->timer_error = msg;
+	sysfs_notify(&bq->charger.dev->kobj, NULL, "timer");
+	dev_err(bq->dev, "%s\n", msg);
+	if (bq->automode > 0)
+		bq->automode = 0;
+	bq2415x_set_mode(bq, BQ2415X_MODE_NONE);
+	bq2415x_set_autotimer(bq, 0);
+}
+
+/* delayed work function for auto resetting chip timer */
+static void bq2415x_timer_work(struct work_struct *work)
+{
+	struct bq2415x_device *bq = container_of(work, struct bq2415x_device,
+						work.work);
+	int ret, error, boost;
+
+	if (!bq->autotimer)
+		return;
+
+	ret = bq2415x_exec_command(bq, BQ2415X_TIMER_RESET);
+	if (ret < 0) {
+		bq2415x_timer_error(bq, "Resetting timer failed");
+		return;
+	}
+
+	boost = bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_STATUS);
+	if (boost < 0) {
+		bq2415x_timer_error(bq, "Unknown error");
+		return;
+	}
+
+	error = bq2415x_exec_command(bq, BQ2415X_FAULT_STATUS);
+	if (error < 0) {
+		bq2415x_timer_error(bq, "Unknown error");
+		return;
+	}
+
+	if (boost) {
+		switch (error) {
+		/* Non fatal errors, chip is OK */
+		case 0: /* No error */
+			break;
+		case 6: /* Timer expired */
+			dev_err(bq->dev, "Timer expired\n");
+			break;
+		case 3: /* Battery voltage too low */
+			dev_err(bq->dev, "Battery voltage to low\n");
+			break;
+
+		/* Fatal errors, disable and reset chip */
+		case 1: /* Overvoltage protection (chip fried) */
+			bq2415x_timer_error(bq,
+				"Overvoltage protection (chip fried)");
+			return;
+		case 2: /* Overload */
+			bq2415x_timer_error(bq, "Overload");
+			return;
+		case 4: /* Battery overvoltage protection */
+			bq2415x_timer_error(bq,
+				"Battery overvoltage protection");
+			return;
+		case 5: /* Thermal shutdown (too hot) */
+			bq2415x_timer_error(bq,
+					"Thermal shutdown (too hot)");
+			return;
+		case 7: /* N/A */
+			bq2415x_timer_error(bq, "Unknown error");
+			return;
+		}
+	} else {
+		switch (error) {
+		/* Non fatal errors, chip is OK */
+		case 0: /* No error */
+			break;
+		case 2: /* Sleep mode */
+			dev_err(bq->dev, "Sleep mode\n");
+			break;
+		case 3: /* Poor input source */
+			dev_err(bq->dev, "Poor input source\n");
+			break;
+		case 6: /* Timer expired */
+			dev_err(bq->dev, "Timer expired\n");
+			break;
+		case 7: /* No battery */
+			dev_err(bq->dev, "No battery\n");
+			break;
+
+		/* Fatal errors, disable and reset chip */
+		case 1: /* Overvoltage protection (chip fried) */
+			bq2415x_timer_error(bq,
+				"Overvoltage protection (chip fried)");
+			return;
+		case 4: /* Battery overvoltage protection */
+			bq2415x_timer_error(bq,
+				"Battery overvoltage protection");
+			return;
+		case 5: /* Thermal shutdown (too hot) */
+			bq2415x_timer_error(bq,
+				"Thermal shutdown (too hot)");
+			return;
+		}
+	}
+
+	schedule_delayed_work(&bq->work, BQ2415X_TIMER_TIMEOUT * HZ);
+}
+
+/**** power supply interface code ****/
+
+static enum power_supply_property bq2415x_power_supply_props[] = {
+	/* TODO: maybe add more power supply properties */
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+};
+
+static int bq2415x_power_supply_get_property(struct power_supply *psy,
+	enum power_supply_property psp, union power_supply_propval *val)
+{
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	int ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = bq2415x_exec_command(bq, BQ2415X_CHARGE_STATUS);
+		if (ret < 0)
+			return ret;
+		else if (ret == 0) /* Ready */
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		else if (ret == 1) /* Charge in progress */
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else if (ret == 2) /* Charge done */
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		else
+			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = bq->model;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int bq2415x_power_supply_init(struct bq2415x_device *bq)
+{
+	int ret;
+	int chip;
+	char revstr[8];
+
+	bq->charger.name = bq->name;
+	bq->charger.type = POWER_SUPPLY_TYPE_USB;
+	bq->charger.properties = bq2415x_power_supply_props;
+	bq->charger.num_properties = ARRAY_SIZE(bq2415x_power_supply_props);
+	bq->charger.get_property = bq2415x_power_supply_get_property;
+
+	ret = bq2415x_detect_chip(bq);
+	if (ret < 0)
+		chip = BQUNKNOWN;
+	else
+		chip = ret;
+
+	ret = bq2415x_detect_revision(bq);
+	if (ret < 0)
+		strcpy(revstr, "unknown");
+	else
+		sprintf(revstr, "1.%d", ret);
+
+	bq->model = kasprintf(GFP_KERNEL,
+				"chip %s, revision %s, vender code %.3d",
+				bq2415x_chip_name[chip], revstr,
+				bq2415x_get_vender_code(bq));
+	if (!bq->model) {
+		dev_err(bq->dev, "failed to allocate model name\n");
+		return -ENOMEM;
+	}
+
+	ret = power_supply_register(bq->dev, &bq->charger);
+	if (ret) {
+		kfree(bq->model);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void bq2415x_power_supply_exit(struct bq2415x_device *bq)
+{
+	bq->autotimer = 0;
+	if (bq->automode > 0)
+		bq->automode = 0;
+	cancel_delayed_work_sync(&bq->work);
+	power_supply_unregister(&bq->charger);
+	kfree(bq->model);
+}
+
+/**** additional sysfs entries for power supply interface ****/
+
+/* show *_status entries */
+static ssize_t bq2415x_sysfs_show_status(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	enum bq2415x_command command;
+	int ret;
+
+	if (strcmp(attr->attr.name, "otg_status") == 0)
+		command = BQ2415X_OTG_STATUS;
+	else if (strcmp(attr->attr.name, "charge_status") == 0)
+		command = BQ2415X_CHARGE_STATUS;
+	else if (strcmp(attr->attr.name, "boost_status") == 0)
+		command = BQ2415X_BOOST_STATUS;
+	else if (strcmp(attr->attr.name, "fault_status") == 0)
+		command = BQ2415X_FAULT_STATUS;
+	else
+		return -EINVAL;
+
+	ret = bq2415x_exec_command(bq, command);
+	if (ret < 0)
+		return ret;
+	else
+		return sprintf(buf, "%d\n", ret);
+}
+
+/* set timer entry:
+     auto - enable auto mode
+     off - disable auto mode
+     (other values) - reset chip timer
+*/
+static ssize_t bq2415x_sysfs_set_timer(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	int ret = 0;
+
+	if (strncmp(buf, "auto", 4) == 0)
+		bq2415x_set_autotimer(bq, 1);
+	else if (strncmp(buf, "off", 3) == 0)
+		bq2415x_set_autotimer(bq, 0);
+	else
+		ret = bq2415x_exec_command(bq, BQ2415X_TIMER_RESET);
+
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* show timer entry (auto or off) */
+static ssize_t bq2415x_sysfs_show_timer(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+
+	if (bq->timer_error)
+		return sprintf(buf, "%s\n", bq->timer_error);
+
+	if (bq->autotimer)
+		return sprintf(buf, "auto\n");
+	else
+		return sprintf(buf, "off\n");
+}
+
+/* set mode entry:
+     auto - if automode is supported, enable it and set mode to reported
+     none - disable charger and boost mode
+     host - charging mode for host/hub chargers (current limit 500mA)
+     dedicated - charging mode for dedicated chargers (unlimited current limit)
+     boost - disable charger and enable boost mode
+*/
+static ssize_t bq2415x_sysfs_set_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	enum bq2415x_mode mode;
+	int ret = 0;
+
+	if (strncmp(buf, "auto", 4) == 0) {
+		if (bq->automode < 0)
+			return -ENOSYS;
+		bq->automode = 1;
+		mode = bq->reported_mode;
+	} else if (strncmp(buf, "none", 4) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_NONE;
+	} else if (strncmp(buf, "host", 4) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_HOST_CHARGER;
+	} else if (strncmp(buf, "dedicated", 9) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_DEDICATED_CHARGER;
+	} else if (strncmp(buf, "boost", 5) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_BOOST;
+	} else if (strncmp(buf, "reset", 5) == 0) {
+		bq2415x_reset_chip(bq);
+		bq2415x_set_defaults(bq);
+		if (bq->automode <= 0)
+			return count;
+		bq->automode = 1;
+		mode = bq->reported_mode;
+	} else
+		return -EINVAL;
+
+	ret = bq2415x_set_mode(bq, mode);
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* show mode entry (auto, none, host, dedicated or boost) */
+static ssize_t bq2415x_sysfs_show_mode(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	ssize_t ret = 0;
+
+	if (bq->automode > 0)
+		ret += sprintf(buf+ret, "auto (");
+
+	switch (bq->mode) {
+	case BQ2415X_MODE_NONE:
+		ret += sprintf(buf+ret, "none");
+		break;
+	case BQ2415X_MODE_HOST_CHARGER:
+		ret += sprintf(buf+ret, "host");
+		break;
+	case BQ2415X_MODE_DEDICATED_CHARGER:
+		ret += sprintf(buf+ret, "dedicated");
+		break;
+	case BQ2415X_MODE_BOOST:
+		ret += sprintf(buf+ret, "boost");
+		break;
+	}
+
+	if (bq->automode > 0)
+		ret += sprintf(buf+ret, ")");
+
+	ret += sprintf(buf+ret, "\n");
+	return ret;
+}
+
+/* show reported_mode entry (none, host, dedicated or boost) */
+static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+
+	if (bq->automode < 0)
+		return -EINVAL;
+
+	switch (bq->reported_mode) {
+	case BQ2415X_MODE_NONE:
+		return sprintf(buf, "none\n");
+	case BQ2415X_MODE_HOST_CHARGER:
+		return sprintf(buf, "host\n");
+	case BQ2415X_MODE_DEDICATED_CHARGER:
+		return sprintf(buf, "dedicated\n");
+	case BQ2415X_MODE_BOOST:
+		return sprintf(buf, "boost\n");
+	}
+
+	return -EINVAL;
+}
+
+/* directly set raw value to chip register, format: 'register value' */
+static ssize_t bq2415x_sysfs_set_registers(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	ssize_t ret = 0;
+	unsigned int reg;
+	unsigned int val;
+
+	if (sscanf(buf, "%x %x", &reg, &val) != 2)
+		return -EINVAL;
+
+	if (reg > 4 || val > 255)
+		return -EINVAL;
+
+	ret = bq2415x_i2c_write(bq, reg, val);
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* print value of chip register, format: 'register=value' */
+static ssize_t bq2415x_sysfs_print_reg(struct bq2415x_device *bq,
+					u8 reg, char *buf)
+{
+	int ret = bq2415x_i2c_read(bq, reg);
+	if (ret < 0)
+		return sprintf(buf, "%#.2x=error %d\n", reg, ret);
+	else
+		return sprintf(buf, "%#.2x=%#.2x\n", reg, ret);
+}
+
+/* show all raw values of chip register, format per line: 'register=value' */
+static ssize_t bq2415x_sysfs_show_registers(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	ssize_t ret = 0;
+
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_STATUS, buf+ret);
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_CONTROL, buf+ret);
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_VOLTAGE, buf+ret);
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_VENDER, buf+ret);
+	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_CURRENT, buf+ret);
+	return ret;
+}
+
+/* set current and voltage limit entries (in mA or mV) */
+static ssize_t bq2415x_sysfs_set_limit(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	long val;
+	int ret;
+
+	if (kstrtol(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (strcmp(attr->attr.name, "current_limit") == 0)
+		ret = bq2415x_set_current_limit(bq, val);
+	else if (strcmp(attr->attr.name, "weak_battery_voltage") == 0)
+		ret = bq2415x_set_weak_battery_voltage(bq, val);
+	else if (strcmp(attr->attr.name, "battery_regulation_voltage") == 0)
+		ret = bq2415x_set_battery_regulation_voltage(bq, val);
+	else if (strcmp(attr->attr.name, "charge_current") == 0)
+		ret = bq2415x_set_charge_current(bq, val);
+	else if (strcmp(attr->attr.name, "termination_current") == 0)
+		ret = bq2415x_set_termination_current(bq, val);
+	else
+		return -EINVAL;
+
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* show current and voltage limit entries (in mA or mV) */
+static ssize_t bq2415x_sysfs_show_limit(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	int ret;
+
+	if (strcmp(attr->attr.name, "current_limit") == 0)
+		ret = bq2415x_get_current_limit(bq);
+	else if (strcmp(attr->attr.name, "weak_battery_voltage") == 0)
+		ret = bq2415x_get_weak_battery_voltage(bq);
+	else if (strcmp(attr->attr.name, "battery_regulation_voltage") == 0)
+		ret = bq2415x_get_battery_regulation_voltage(bq);
+	else if (strcmp(attr->attr.name, "charge_current") == 0)
+		ret = bq2415x_get_charge_current(bq);
+	else if (strcmp(attr->attr.name, "termination_current") == 0)
+		ret = bq2415x_get_termination_current(bq);
+	else
+		return -EINVAL;
+
+	if (ret < 0)
+		return ret;
+	else
+		return sprintf(buf, "%d\n", ret);
+}
+
+/* set *_enable entries */
+static ssize_t bq2415x_sysfs_set_enable(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	enum bq2415x_command command;
+	long val;
+	int ret;
+
+	if (kstrtol(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	if (strcmp(attr->attr.name, "charge_termination_enable") == 0)
+		command = val ? BQ2415X_CHARGE_TERMINATION_ENABLE :
+			BQ2415X_CHARGE_TERMINATION_DISABLE;
+	else if (strcmp(attr->attr.name, "high_impedance_enable") == 0)
+		command = val ? BQ2415X_HIGH_IMPEDANCE_ENABLE :
+			BQ2415X_HIGH_IMPEDANCE_DISABLE;
+	else if (strcmp(attr->attr.name, "otg_pin_enable") == 0)
+		command = val ? BQ2415X_OTG_PIN_ENABLE :
+			BQ2415X_OTG_PIN_DISABLE;
+	else if (strcmp(attr->attr.name, "stat_pin_enable") == 0)
+		command = val ? BQ2415X_STAT_PIN_ENABLE :
+			BQ2415X_STAT_PIN_DISABLE;
+	else
+		return -EINVAL;
+
+	ret = bq2415x_exec_command(bq, command);
+	if (ret < 0)
+		return ret;
+	else
+		return count;
+}
+
+/* show *_enable entries */
+static ssize_t bq2415x_sysfs_show_enable(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct power_supply *psy = dev_get_drvdata(dev);
+	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
+						charger);
+	enum bq2415x_command command;
+	int ret;
+
+	if (strcmp(attr->attr.name, "charge_termination_enable") == 0)
+		command = BQ2415X_CHARGE_TERMINATION_STATUS;
+	else if (strcmp(attr->attr.name, "high_impedance_enable") == 0)
+		command = BQ2415X_HIGH_IMPEDANCE_STATUS;
+	else if (strcmp(attr->attr.name, "otg_pin_enable") == 0)
+		command = BQ2415X_OTG_PIN_STATUS;
+	else if (strcmp(attr->attr.name, "stat_pin_enable") == 0)
+		command = BQ2415X_STAT_PIN_STATUS;
+	else
+		return -EINVAL;
+
+	ret = bq2415x_exec_command(bq, command);
+	if (ret < 0)
+		return ret;
+	else
+		return sprintf(buf, "%d\n", ret);
+}
+
+static DEVICE_ATTR(current_limit, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+static DEVICE_ATTR(weak_battery_voltage, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+static DEVICE_ATTR(battery_regulation_voltage, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+static DEVICE_ATTR(charge_current, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+static DEVICE_ATTR(termination_current, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_limit, bq2415x_sysfs_set_limit);
+
+static DEVICE_ATTR(charge_termination_enable, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable);
+static DEVICE_ATTR(high_impedance_enable, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable);
+static DEVICE_ATTR(otg_pin_enable, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable);
+static DEVICE_ATTR(stat_pin_enable, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_enable, bq2415x_sysfs_set_enable);
+
+static DEVICE_ATTR(reported_mode, S_IRUGO,
+		bq2415x_sysfs_show_reported_mode, NULL);
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_mode, bq2415x_sysfs_set_mode);
+static DEVICE_ATTR(timer, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_timer, bq2415x_sysfs_set_timer);
+
+static DEVICE_ATTR(registers, S_IWUSR | S_IRUGO,
+		bq2415x_sysfs_show_registers, bq2415x_sysfs_set_registers);
+
+static DEVICE_ATTR(otg_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
+static DEVICE_ATTR(charge_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
+static DEVICE_ATTR(boost_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
+static DEVICE_ATTR(fault_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
+
+static struct attribute *bq2415x_sysfs_attributes[] = {
+	&dev_attr_current_limit.attr,
+	&dev_attr_weak_battery_voltage.attr,
+	&dev_attr_battery_regulation_voltage.attr,
+	&dev_attr_charge_current.attr,
+	&dev_attr_termination_current.attr,
+
+	&dev_attr_charge_termination_enable.attr,
+	&dev_attr_high_impedance_enable.attr,
+	&dev_attr_otg_pin_enable.attr,
+	&dev_attr_stat_pin_enable.attr,
+
+	&dev_attr_reported_mode.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_timer.attr,
+
+	&dev_attr_registers.attr,
+
+	&dev_attr_otg_status.attr,
+	&dev_attr_charge_status.attr,
+	&dev_attr_boost_status.attr,
+	&dev_attr_fault_status.attr,
+	NULL,
+};
+
+static const struct attribute_group bq2415x_sysfs_attr_group = {
+	.attrs = bq2415x_sysfs_attributes,
+};
+
+static int bq2415x_sysfs_init(struct bq2415x_device *bq)
+{
+	return sysfs_create_group(&bq->charger.dev->kobj,
+			&bq2415x_sysfs_attr_group);
+}
+
+static void bq2415x_sysfs_exit(struct bq2415x_device *bq)
+{
+	sysfs_remove_group(&bq->charger.dev->kobj, &bq2415x_sysfs_attr_group);
+}
+
+/* main bq2415x probe function */
+static int bq2415x_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	int ret;
+	int num;
+	char *name;
+	struct bq2415x_device *bq;
+
+	if (!client->dev.platform_data) {
+		dev_err(&client->dev, "platform data not set\n");
+		return -ENODEV;
+	}
+
+	/* Get new ID for the new device */
+	ret = idr_pre_get(&bq2415x_id, GFP_KERNEL);
+	if (ret == 0)
+		return -ENOMEM;
+
+	mutex_lock(&bq2415x_id_mutex);
+	ret = idr_get_new(&bq2415x_id, client, &num);
+	mutex_unlock(&bq2415x_id_mutex);
+
+	if (ret < 0)
+		return ret;
+
+	name = kasprintf(GFP_KERNEL, "%s-%d", id->name, num);
+	if (!name) {
+		dev_err(&client->dev, "failed to allocate device name\n");
+		ret = -ENOMEM;
+		goto error_1;
+	}
+
+	bq = kzalloc(sizeof(*bq), GFP_KERNEL);
+	if (!bq) {
+		dev_err(&client->dev, "failed to allocate device data\n");
+		ret = -ENOMEM;
+		goto error_2;
+	}
+
+	i2c_set_clientdata(client, bq);
+
+	bq->id = num;
+	bq->dev = &client->dev;
+	bq->chip = id->driver_data;
+	bq->name = name;
+	bq->mode = BQ2415X_MODE_NONE;
+	bq->reported_mode = BQ2415X_MODE_NONE;
+	bq->autotimer = 0;
+	bq->automode = 0;
+
+	memcpy(&bq->init_data, client->dev.platform_data,
+			sizeof(bq->init_data));
+
+	bq2415x_reset_chip(bq);
+
+	ret = bq2415x_power_supply_init(bq);
+	if (ret) {
+		dev_err(bq->dev, "failed to register power supply: %d\n", ret);
+		goto error_3;
+	}
+
+	ret = bq2415x_sysfs_init(bq);
+	if (ret) {
+		dev_err(bq->dev, "failed to create sysfs entries: %d\n", ret);
+		goto error_4;
+	}
+
+	ret = bq2415x_set_defaults(bq);
+	if (ret) {
+		dev_err(bq->dev, "failed to set default values: %d\n", ret);
+		goto error_5;
+	}
+
+	if (bq->init_data.set_mode_hook) {
+		if (bq->init_data.set_mode_hook(
+				bq2415x_hook_function, bq)) {
+			bq->automode = 1;
+			bq2415x_set_mode(bq, bq->reported_mode);
+			dev_info(bq->dev, "automode enabled\n");
+		} else {
+			bq->automode = -1;
+			dev_info(bq->dev, "automode failed\n");
+		}
+	} else {
+		bq->automode = -1;
+		dev_info(bq->dev, "automode not supported\n");
+	}
+
+	INIT_DELAYED_WORK(&bq->work, bq2415x_timer_work);
+	bq2415x_set_autotimer(bq, 1);
+
+	dev_info(bq->dev, "driver registered\n");
+	return 0;
+
+error_5:
+	bq2415x_sysfs_exit(bq);
+error_4:
+	bq2415x_power_supply_exit(bq);
+error_3:
+	kfree(bq);
+error_2:
+	kfree(name);
+error_1:
+	mutex_lock(&bq2415x_id_mutex);
+	idr_remove(&bq2415x_id, num);
+	mutex_unlock(&bq2415x_id_mutex);
+
+	return ret;
+}
+
+/* main bq2415x remove function */
+
+static int bq2415x_remove(struct i2c_client *client)
+{
+	struct bq2415x_device *bq = i2c_get_clientdata(client);
+
+	if (bq->init_data.set_mode_hook)
+		bq->init_data.set_mode_hook(NULL, NULL);
+
+	bq2415x_sysfs_exit(bq);
+	bq2415x_power_supply_exit(bq);
+
+	bq2415x_reset_chip(bq);
+
+	mutex_lock(&bq2415x_id_mutex);
+	idr_remove(&bq2415x_id, bq->id);
+	mutex_unlock(&bq2415x_id_mutex);
+
+	dev_info(bq->dev, "driver unregistered\n");
+
+	kfree(bq->name);
+	kfree(bq);
+
+	return 0;
+}
+
+static const struct i2c_device_id bq2415x_i2c_id_table[] = {
+	{ "bq2415x", BQUNKNOWN },
+	{ "bq24150", BQ24150 },
+	{ "bq24150a", BQ24150A },
+	{ "bq24151", BQ24151 },
+	{ "bq24151a", BQ24151A },
+	{ "bq24152", BQ24152 },
+	{ "bq24153", BQ24153 },
+	{ "bq24153a", BQ24153A },
+	{ "bq24155", BQ24155 },
+	{ "bq24156", BQ24156 },
+	{ "bq24156a", BQ24156A },
+	{ "bq24158", BQ24158 },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, bq2415x_i2c_id_table);
+
+static struct i2c_driver bq2415x_driver = {
+	.driver = {
+		.name = "bq2415x-charger",
+	},
+	.probe = bq2415x_probe,
+	.remove = bq2415x_remove,
+	.id_table = bq2415x_i2c_id_table,
+};
+
+static int __init bq2415x_init(void)
+{
+	return i2c_add_driver(&bq2415x_driver);
+}
+module_init(bq2415x_init);
+
+static void __exit bq2415x_exit(void)
+{
+	i2c_del_driver(&bq2415x_driver);
+}
+module_exit(bq2415x_exit);
+
+MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
+MODULE_DESCRIPTION("bq2415x charger driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h
new file mode 100644
index 000000000000..fb6bf4d8aea8
--- /dev/null
+++ b/include/linux/power/bq2415x_charger.h
@@ -0,0 +1,90 @@
+/*
+    bq2415x_charger.h - bq2415x charger driver
+    Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#ifndef BQ2415X_CHARGER_H
+#define BQ2415X_CHARGER_H
+
+/*
+  This is platform data for bq2415x chip. It contains default board voltages
+  and currents which can be also later configured via sysfs. If value is -1
+  then default chip value (specified in datasheet) will be used.
+
+  Value resistor_sense is needed for for configuring charge and termination
+  current. It it is less or equal to zero, configuring charge and termination
+  current will not be possible.
+
+  Function set_mode_hook is needed for automode (setting correct current limit
+  when charger is connected/disconnected or setting boost mode). When is NULL,
+  automode function is disabled. When is not NULL, it must have this prototype:
+
+    int (*set_mode_hook)(
+      void (*hook)(enum bq2415x_mode mode, void *data),
+      void *data)
+
+  hook is hook function (see below) and data is pointer to driver private data
+
+  bq2415x driver will call it as:
+
+    platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device);
+
+  Board/platform function set_mode_hook return non zero value when hook
+  function was successful registered. Platform code should call that hook
+  function (which get from pointer, with data) every time when charger was
+  connected/disconnected or require to enable boost mode. bq2415x driver then
+  will set correct current limit, enable/disable charger or boost mode.
+
+  Hook function has this prototype:
+
+    void hook(enum bq2415x_mode mode, void *data);
+
+  mode is bq2415x mode (charger or boost)
+  data is pointer to driver private data (which get from set_charger_type_hook)
+
+  When bq driver is being unloaded, it call function:
+
+    platform_data->set_mode_hook(NULL, NULL);
+
+  (hook function and driver private data are NULL)
+
+  After that board/platform code must not call driver hook function! It is
+  possible that pointer to hook function will not be valid and calling will
+  cause undefined result.
+
+*/
+
+/* Supported modes with maximal current limit */
+enum bq2415x_mode {
+	BQ2415X_MODE_NONE,		/* unknown or no charger (100mA) */
+	BQ2415X_MODE_HOST_CHARGER,	/* usb host/hub charger (500mA) */
+	BQ2415X_MODE_DEDICATED_CHARGER, /* dedicated charger (unlimited) */
+	BQ2415X_MODE_BOOST,		/* boost mode (charging disabled) */
+};
+
+struct bq2415x_platform_data {
+	int current_limit;		/* mA */
+	int weak_battery_voltage;	/* mV */
+	int battery_regulation_voltage;	/* mV */
+	int charge_current;		/* mA */
+	int termination_current;	/* mA */
+	int resistor_sense;		/* m ohm */
+	int (*set_mode_hook)(void (*hook)(enum bq2415x_mode mode, void *data),
+			     void *data);
+};
+
+#endif
-- 
cgit v1.2.3


From 6c47a3e00c6e4f3cdac7566c1480de34d9e32e07 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Sun, 18 Nov 2012 15:35:32 -0800
Subject: bq2415x_charger: Fix style issues

I hate doing these style fixups myself, but I also hate inconsitent code.
Normally I just ask to resubmit the patch with the issues fixed, but N900
is special: I have a selfish interest in it. :)

Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/bq2415x_charger.c       | 268 +++++++++++++++++++---------------
 include/linux/power/bq2415x_charger.h | 131 +++++++++--------
 2 files changed, 215 insertions(+), 184 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c
index 017b3c27f769..ee842b37f462 100644
--- a/drivers/power/bq2415x_charger.c
+++ b/drivers/power/bq2415x_charger.c
@@ -1,31 +1,32 @@
 /*
-    bq2415x_charger.c - bq2415x charger driver
-    Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
+ * bq2415x charger driver
+ *
+ * Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
 
 /*
-  Datasheets:
-  http://www.ti.com/product/bq24150
-  http://www.ti.com/product/bq24150a
-  http://www.ti.com/product/bq24152
-  http://www.ti.com/product/bq24153
-  http://www.ti.com/product/bq24153a
-  http://www.ti.com/product/bq24155
-*/
+ * Datasheets:
+ * http://www.ti.com/product/bq24150
+ * http://www.ti.com/product/bq24150a
+ * http://www.ti.com/product/bq24152
+ * http://www.ti.com/product/bq24153
+ * http://www.ti.com/product/bq24153a
+ * http://www.ti.com/product/bq24155
+ */
 
 #include <linux/version.h>
 #include <linux/kernel.h>
@@ -222,7 +223,7 @@ static int bq2415x_i2c_read(struct bq2415x_device *bq, u8 reg)
 
 /* read value from register, apply mask and right shift it */
 static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg,
-				u8 mask, u8 shift)
+				 u8 mask, u8 shift)
 {
 	int ret;
 
@@ -232,8 +233,7 @@ static int bq2415x_i2c_read_mask(struct bq2415x_device *bq, u8 reg,
 	ret = bq2415x_i2c_read(bq, reg);
 	if (ret < 0)
 		return ret;
-	else
-		return (ret & mask) >> shift;
+	return (ret & mask) >> shift;
 }
 
 /* read value from register and return one specified bit */
@@ -241,8 +241,7 @@ static int bq2415x_i2c_read_bit(struct bq2415x_device *bq, u8 reg, u8 bit)
 {
 	if (bit > 8)
 		return -EINVAL;
-	else
-		return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit);
+	return bq2415x_i2c_read_mask(bq, reg, BIT(bit), bit);
 }
 
 /**** i2c write functions ****/
@@ -278,7 +277,7 @@ static int bq2415x_i2c_write(struct bq2415x_device *bq, u8 reg, u8 val)
 
 /* read value from register, change it with mask left shifted and write back */
 static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val,
-				u8 mask, u8 shift)
+				  u8 mask, u8 shift)
 {
 	int ret;
 
@@ -297,12 +296,11 @@ static int bq2415x_i2c_write_mask(struct bq2415x_device *bq, u8 reg, u8 val,
 
 /* change only one bit in register */
 static int bq2415x_i2c_write_bit(struct bq2415x_device *bq, u8 reg,
-				bool val, u8 bit)
+				 bool val, u8 bit)
 {
 	if (bit > 8)
 		return -EINVAL;
-	else
-		return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit);
+	return bq2415x_i2c_write_mask(bq, reg, val, BIT(bit), bit);
 }
 
 /**** global functions ****/
@@ -312,6 +310,7 @@ static int bq2415x_exec_command(struct bq2415x_device *bq,
 				enum bq2415x_command command)
 {
 	int ret;
+
 	switch (command) {
 	case BQ2415X_TIMER_RESET:
 		return bq2415x_i2c_write_bit(bq, BQ2415X_REG_STATUS,
@@ -407,10 +406,8 @@ static int bq2415x_exec_command(struct bq2415x_device *bq,
 	case BQ2415X_REVISION:
 		return bq2415x_i2c_read_mask(bq, BQ2415X_REG_VENDER,
 			BQ2415X_MASK_REVISION, BQ2415X_SHIFT_REVISION);
-
-	default:
-		return -EINVAL;
 	}
+	return -EINVAL;
 }
 
 /* detect chip type */
@@ -470,6 +467,7 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq)
 {
 	int ret = bq2415x_exec_command(bq, BQ2415X_REVISION);
 	int chip = bq2415x_detect_chip(bq);
+
 	if (ret < 0 || chip < 0)
 		return -1;
 
@@ -483,7 +481,6 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq)
 			return ret;
 		else
 			return -1;
-
 	case BQ24153:
 	case BQ24153A:
 	case BQ24156:
@@ -495,13 +492,11 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq)
 			return 1;
 		else
 			return -1;
-
 	case BQ24155:
 		if (ret == 3)
 			return 3;
 		else
 			return -1;
-
 	case BQUNKNOWN:
 		return -1;
 	}
@@ -512,13 +507,16 @@ static int bq2415x_detect_revision(struct bq2415x_device *bq)
 /* return chip vender code */
 static int bq2415x_get_vender_code(struct bq2415x_device *bq)
 {
-	int ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE);
+	int ret;
+
+	ret = bq2415x_exec_command(bq, BQ2415X_VENDER_CODE);
 	if (ret < 0)
 		return 0;
-	else /* convert to binary */
-		return (ret & 0x1) +
-			((ret >> 1) & 0x1) * 10 +
-			((ret >> 2) & 0x1) * 100;
+
+	/* convert to binary */
+	return (ret & 0x1) +
+	       ((ret >> 1) & 0x1) * 10 +
+	       ((ret >> 2) & 0x1) * 100;
 }
 
 /* reset all chip registers to default state */
@@ -537,6 +535,7 @@ static void bq2415x_reset_chip(struct bq2415x_device *bq)
 static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA)
 {
 	int val;
+
 	if (mA <= 100)
 		val = 0;
 	else if (mA <= 500)
@@ -545,6 +544,7 @@ static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA)
 		val = 2;
 	else
 		val = 3;
+
 	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val,
 			BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT);
 }
@@ -552,7 +552,9 @@ static int bq2415x_set_current_limit(struct bq2415x_device *bq, int mA)
 /* get current limit in mA */
 static int bq2415x_get_current_limit(struct bq2415x_device *bq)
 {
-	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
+	int ret;
+
+	ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
 			BQ2415X_MASK_LIMIT, BQ2415X_SHIFT_LIMIT);
 	if (ret < 0)
 		return ret;
@@ -564,15 +566,15 @@ static int bq2415x_get_current_limit(struct bq2415x_device *bq)
 		return 800;
 	else if (ret == 3)
 		return 1800;
-	else
-		return -EINVAL;
+	return -EINVAL;
 }
 
 /* set weak battery voltage in mV */
 static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV)
 {
-	/* round to 100mV */
 	int val;
+
+	/* round to 100mV */
 	if (mV <= 3400 + 50)
 		val = 0;
 	else if (mV <= 3500 + 50)
@@ -581,6 +583,7 @@ static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV)
 		val = 2;
 	else
 		val = 3;
+
 	return bq2415x_i2c_write_mask(bq, BQ2415X_REG_CONTROL, val,
 			BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV);
 }
@@ -588,17 +591,18 @@ static int bq2415x_set_weak_battery_voltage(struct bq2415x_device *bq, int mV)
 /* get weak battery voltage in mV */
 static int bq2415x_get_weak_battery_voltage(struct bq2415x_device *bq)
 {
-	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
+	int ret;
+
+	ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_CONTROL,
 			BQ2415X_MASK_VLOWV, BQ2415X_SHIFT_VLOWV);
 	if (ret < 0)
 		return ret;
-	else
-		return 100 * (34 + ret);
+	return 100 * (34 + ret);
 }
 
 /* set battery regulation voltage in mV */
 static int bq2415x_set_battery_regulation_voltage(struct bq2415x_device *bq,
-						int mV)
+						  int mV)
 {
 	int val = (mV/10 - 350) / 2;
 
@@ -616,21 +620,21 @@ static int bq2415x_get_battery_regulation_voltage(struct bq2415x_device *bq)
 {
 	int ret = bq2415x_i2c_read_mask(bq, BQ2415X_REG_VOLTAGE,
 			BQ2415X_MASK_VO, BQ2415X_SHIFT_VO);
+
 	if (ret < 0)
 		return ret;
-	else
-		return 10 * (350 + 2*ret);
+	return 10 * (350 + 2*ret);
 }
 
 /* set charge current in mA (platform data must provide resistor sense) */
 static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA)
 {
 	int val;
+
 	if (bq->init_data.resistor_sense <= 0)
 		return -ENOSYS;
 
 	val = (mA * bq->init_data.resistor_sense - 37400) / 6800;
-
 	if (val < 0)
 		val = 0;
 	else if (val > 7)
@@ -645,6 +649,7 @@ static int bq2415x_set_charge_current(struct bq2415x_device *bq, int mA)
 static int bq2415x_get_charge_current(struct bq2415x_device *bq)
 {
 	int ret;
+
 	if (bq->init_data.resistor_sense <= 0)
 		return -ENOSYS;
 
@@ -652,19 +657,18 @@ static int bq2415x_get_charge_current(struct bq2415x_device *bq)
 			BQ2415X_MASK_VI_CHRG, BQ2415X_SHIFT_VI_CHRG);
 	if (ret < 0)
 		return ret;
-	else
-		return (37400 + 6800*ret) / bq->init_data.resistor_sense;
+	return (37400 + 6800*ret) / bq->init_data.resistor_sense;
 }
 
 /* set termination current in mA (platform data must provide resistor sense) */
 static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA)
 {
 	int val;
+
 	if (bq->init_data.resistor_sense <= 0)
 		return -ENOSYS;
 
 	val = (mA * bq->init_data.resistor_sense - 3400) / 3400;
-
 	if (val < 0)
 		val = 0;
 	else if (val > 7)
@@ -679,6 +683,7 @@ static int bq2415x_set_termination_current(struct bq2415x_device *bq, int mA)
 static int bq2415x_get_termination_current(struct bq2415x_device *bq)
 {
 	int ret;
+
 	if (bq->init_data.resistor_sense <= 0)
 		return -ENOSYS;
 
@@ -686,8 +691,7 @@ static int bq2415x_get_termination_current(struct bq2415x_device *bq)
 			BQ2415X_MASK_VI_TERM, BQ2415X_SHIFT_VI_TERM);
 	if (ret < 0)
 		return ret;
-	else
-		return (3400 + 3400*ret) / bq->init_data.resistor_sense;
+	return (3400 + 3400*ret) / bq->init_data.resistor_sense;
 }
 
 /* set default value of property */
@@ -706,14 +710,17 @@ static int bq2415x_set_defaults(struct bq2415x_device *bq)
 	bq2415x_exec_command(bq, BQ2415X_BOOST_MODE_DISABLE);
 	bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE);
 	bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_DISABLE);
+
 	bq2415x_set_default_value(bq, current_limit);
 	bq2415x_set_default_value(bq, weak_battery_voltage);
 	bq2415x_set_default_value(bq, battery_regulation_voltage);
+
 	if (bq->init_data.resistor_sense > 0) {
 		bq2415x_set_default_value(bq, charge_current);
 		bq2415x_set_default_value(bq, termination_current);
 		bq2415x_exec_command(bq, BQ2415X_CHARGE_TERMINATION_ENABLE);
 	}
+
 	bq2415x_exec_command(bq, BQ2415X_CHARGER_ENABLE);
 	return 0;
 }
@@ -844,8 +851,10 @@ static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg)
 static void bq2415x_timer_work(struct work_struct *work)
 {
 	struct bq2415x_device *bq = container_of(work, struct bq2415x_device,
-						work.work);
-	int ret, error, boost;
+						 work.work);
+	int ret;
+	int error;
+	int boost;
 
 	if (!bq->autotimer)
 		return;
@@ -946,10 +955,11 @@ static enum power_supply_property bq2415x_power_supply_props[] = {
 };
 
 static int bq2415x_power_supply_get_property(struct power_supply *psy,
-	enum power_supply_property psp, union power_supply_propval *val)
+					     enum power_supply_property psp,
+					     union power_supply_propval *val)
 {
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	int ret;
 
 	switch (psp) {
@@ -1031,7 +1041,8 @@ static void bq2415x_power_supply_exit(struct bq2415x_device *bq)
 
 /* show *_status entries */
 static ssize_t bq2415x_sysfs_show_status(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					 struct device_attribute *attr,
+					 char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
@@ -1053,17 +1064,19 @@ static ssize_t bq2415x_sysfs_show_status(struct device *dev,
 	ret = bq2415x_exec_command(bq, command);
 	if (ret < 0)
 		return ret;
-	else
-		return sprintf(buf, "%d\n", ret);
+	return sprintf(buf, "%d\n", ret);
 }
 
-/* set timer entry:
-     auto - enable auto mode
-     off - disable auto mode
-     (other values) - reset chip timer
-*/
+/*
+ * set timer entry:
+ *    auto - enable auto mode
+ *    off - disable auto mode
+ *    (other values) - reset chip timer
+ */
 static ssize_t bq2415x_sysfs_set_timer(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+				       struct device_attribute *attr,
+				       const char *buf,
+				       size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
@@ -1079,40 +1092,42 @@ static ssize_t bq2415x_sysfs_set_timer(struct device *dev,
 
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* show timer entry (auto or off) */
 static ssize_t bq2415x_sysfs_show_timer(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					struct device_attribute *attr,
+					char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 
 	if (bq->timer_error)
 		return sprintf(buf, "%s\n", bq->timer_error);
 
 	if (bq->autotimer)
 		return sprintf(buf, "auto\n");
-	else
-		return sprintf(buf, "off\n");
+	return sprintf(buf, "off\n");
 }
 
-/* set mode entry:
-     auto - if automode is supported, enable it and set mode to reported
-     none - disable charger and boost mode
-     host - charging mode for host/hub chargers (current limit 500mA)
-     dedicated - charging mode for dedicated chargers (unlimited current limit)
-     boost - disable charger and enable boost mode
-*/
+/*
+ * set mode entry:
+ *    auto - if automode is supported, enable it and set mode to reported
+ *    none - disable charger and boost mode
+ *    host - charging mode for host/hub chargers (current limit 500mA)
+ *    dedicated - charging mode for dedicated chargers (unlimited current limit)
+ *    boost - disable charger and enable boost mode
+ */
 static ssize_t bq2415x_sysfs_set_mode(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+				      struct device_attribute *attr,
+				      const char *buf,
+				      size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	enum bq2415x_mode mode;
 	int ret = 0;
 
@@ -1144,19 +1159,20 @@ static ssize_t bq2415x_sysfs_set_mode(struct device *dev,
 			return count;
 		bq->automode = 1;
 		mode = bq->reported_mode;
-	} else
+	} else {
 		return -EINVAL;
+	}
 
 	ret = bq2415x_set_mode(bq, mode);
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* show mode entry (auto, none, host, dedicated or boost) */
 static ssize_t bq2415x_sysfs_show_mode(struct device *dev,
-		struct device_attribute *attr, char *buf)
+				       struct device_attribute *attr,
+				       char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
@@ -1190,11 +1206,12 @@ static ssize_t bq2415x_sysfs_show_mode(struct device *dev,
 
 /* show reported_mode entry (none, host, dedicated or boost) */
 static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev,
-		struct device_attribute *attr, char *buf)
+						struct device_attribute *attr,
+						char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 
 	if (bq->automode < 0)
 		return -EINVAL;
@@ -1215,11 +1232,13 @@ static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev,
 
 /* directly set raw value to chip register, format: 'register value' */
 static ssize_t bq2415x_sysfs_set_registers(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+					   struct device_attribute *attr,
+					   const char *buf,
+					   size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	ssize_t ret = 0;
 	unsigned int reg;
 	unsigned int val;
@@ -1233,28 +1252,29 @@ static ssize_t bq2415x_sysfs_set_registers(struct device *dev,
 	ret = bq2415x_i2c_write(bq, reg, val);
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* print value of chip register, format: 'register=value' */
 static ssize_t bq2415x_sysfs_print_reg(struct bq2415x_device *bq,
-					u8 reg, char *buf)
+				       u8 reg,
+				       char *buf)
 {
 	int ret = bq2415x_i2c_read(bq, reg);
+
 	if (ret < 0)
 		return sprintf(buf, "%#.2x=error %d\n", reg, ret);
-	else
-		return sprintf(buf, "%#.2x=%#.2x\n", reg, ret);
+	return sprintf(buf, "%#.2x=%#.2x\n", reg, ret);
 }
 
 /* show all raw values of chip register, format per line: 'register=value' */
 static ssize_t bq2415x_sysfs_show_registers(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					    struct device_attribute *attr,
+					    char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	ssize_t ret = 0;
 
 	ret += bq2415x_sysfs_print_reg(bq, BQ2415X_REG_STATUS, buf+ret);
@@ -1267,11 +1287,13 @@ static ssize_t bq2415x_sysfs_show_registers(struct device *dev,
 
 /* set current and voltage limit entries (in mA or mV) */
 static ssize_t bq2415x_sysfs_set_limit(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+				       struct device_attribute *attr,
+				       const char *buf,
+				       size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	long val;
 	int ret;
 
@@ -1293,17 +1315,17 @@ static ssize_t bq2415x_sysfs_set_limit(struct device *dev,
 
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* show current and voltage limit entries (in mA or mV) */
 static ssize_t bq2415x_sysfs_show_limit(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					struct device_attribute *attr,
+					char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	int ret;
 
 	if (strcmp(attr->attr.name, "current_limit") == 0)
@@ -1321,17 +1343,18 @@ static ssize_t bq2415x_sysfs_show_limit(struct device *dev,
 
 	if (ret < 0)
 		return ret;
-	else
-		return sprintf(buf, "%d\n", ret);
+	return sprintf(buf, "%d\n", ret);
 }
 
 /* set *_enable entries */
 static ssize_t bq2415x_sysfs_set_enable(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+					struct device_attribute *attr,
+					const char *buf,
+					size_t count)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	enum bq2415x_command command;
 	long val;
 	int ret;
@@ -1357,17 +1380,17 @@ static ssize_t bq2415x_sysfs_set_enable(struct device *dev,
 	ret = bq2415x_exec_command(bq, command);
 	if (ret < 0)
 		return ret;
-	else
-		return count;
+	return count;
 }
 
 /* show *_enable entries */
 static ssize_t bq2415x_sysfs_show_enable(struct device *dev,
-		struct device_attribute *attr, char *buf)
+					 struct device_attribute *attr,
+					 char *buf)
 {
 	struct power_supply *psy = dev_get_drvdata(dev);
 	struct bq2415x_device *bq = container_of(psy, struct bq2415x_device,
-						charger);
+						 charger);
 	enum bq2415x_command command;
 	int ret;
 
@@ -1385,8 +1408,7 @@ static ssize_t bq2415x_sysfs_show_enable(struct device *dev,
 	ret = bq2415x_exec_command(bq, command);
 	if (ret < 0)
 		return ret;
-	else
-		return sprintf(buf, "%d\n", ret);
+	return sprintf(buf, "%d\n", ret);
 }
 
 static DEVICE_ATTR(current_limit, S_IWUSR | S_IRUGO,
@@ -1425,6 +1447,10 @@ static DEVICE_ATTR(boost_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
 static DEVICE_ATTR(fault_status, S_IRUGO, bq2415x_sysfs_show_status, NULL);
 
 static struct attribute *bq2415x_sysfs_attributes[] = {
+	/*
+	 * TODO: some (appropriate) of these attrs should be switched to
+	 * use power supply class props.
+	 */
 	&dev_attr_current_limit.attr,
 	&dev_attr_weak_battery_voltage.attr,
 	&dev_attr_battery_regulation_voltage.attr,
@@ -1466,7 +1492,7 @@ static void bq2415x_sysfs_exit(struct bq2415x_device *bq)
 
 /* main bq2415x probe function */
 static int bq2415x_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+			 const struct i2c_device_id *id)
 {
 	int ret;
 	int num;
diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h
index fb6bf4d8aea8..97a1665eaeaf 100644
--- a/include/linux/power/bq2415x_charger.h
+++ b/include/linux/power/bq2415x_charger.h
@@ -1,72 +1,77 @@
 /*
-    bq2415x_charger.h - bq2415x charger driver
-    Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-*/
+ * bq2415x charger driver
+ *
+ * Copyright (C) 2011-2012  Pali Rohár <pali.rohar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
 
 #ifndef BQ2415X_CHARGER_H
 #define BQ2415X_CHARGER_H
 
 /*
-  This is platform data for bq2415x chip. It contains default board voltages
-  and currents which can be also later configured via sysfs. If value is -1
-  then default chip value (specified in datasheet) will be used.
-
-  Value resistor_sense is needed for for configuring charge and termination
-  current. It it is less or equal to zero, configuring charge and termination
-  current will not be possible.
-
-  Function set_mode_hook is needed for automode (setting correct current limit
-  when charger is connected/disconnected or setting boost mode). When is NULL,
-  automode function is disabled. When is not NULL, it must have this prototype:
-
-    int (*set_mode_hook)(
-      void (*hook)(enum bq2415x_mode mode, void *data),
-      void *data)
-
-  hook is hook function (see below) and data is pointer to driver private data
-
-  bq2415x driver will call it as:
-
-    platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device);
-
-  Board/platform function set_mode_hook return non zero value when hook
-  function was successful registered. Platform code should call that hook
-  function (which get from pointer, with data) every time when charger was
-  connected/disconnected or require to enable boost mode. bq2415x driver then
-  will set correct current limit, enable/disable charger or boost mode.
-
-  Hook function has this prototype:
-
-    void hook(enum bq2415x_mode mode, void *data);
-
-  mode is bq2415x mode (charger or boost)
-  data is pointer to driver private data (which get from set_charger_type_hook)
-
-  When bq driver is being unloaded, it call function:
-
-    platform_data->set_mode_hook(NULL, NULL);
-
-  (hook function and driver private data are NULL)
-
-  After that board/platform code must not call driver hook function! It is
-  possible that pointer to hook function will not be valid and calling will
-  cause undefined result.
-
-*/
+ * This is platform data for bq2415x chip. It contains default board
+ * voltages and currents which can be also later configured via sysfs. If
+ * value is -1 then default chip value (specified in datasheet) will be
+ * used.
+ *
+ * Value resistor_sense is needed for for configuring charge and
+ * termination current. It it is less or equal to zero, configuring charge
+ * and termination current will not be possible.
+ *
+ * Function set_mode_hook is needed for automode (setting correct current
+ * limit when charger is connected/disconnected or setting boost mode).
+ * When is NULL, automode function is disabled. When is not NULL, it must
+ * have this prototype:
+ *
+ *    int (*set_mode_hook)(
+ *      void (*hook)(enum bq2415x_mode mode, void *data),
+ *      void *data)
+ *
+ * hook is hook function (see below) and data is pointer to driver private
+ * data
+ *
+ * bq2415x driver will call it as:
+ *
+ *    platform_data->set_mode_hook(bq2415x_hook_function, bq2415x_device);
+ *
+ * Board/platform function set_mode_hook return non zero value when hook
+ * function was successful registered. Platform code should call that hook
+ * function (which get from pointer, with data) every time when charger
+ * was connected/disconnected or require to enable boost mode. bq2415x
+ * driver then will set correct current limit, enable/disable charger or
+ * boost mode.
+ *
+ * Hook function has this prototype:
+ *
+ *    void hook(enum bq2415x_mode mode, void *data);
+ *
+ * mode is bq2415x mode (charger or boost)
+ * data is pointer to driver private data (which get from
+ * set_charger_type_hook)
+ *
+ * When bq driver is being unloaded, it call function:
+ *
+ *    platform_data->set_mode_hook(NULL, NULL);
+ *
+ * (hook function and driver private data are NULL)
+ *
+ * After that board/platform code must not call driver hook function! It
+ * is possible that pointer to hook function will not be valid and calling
+ * will cause undefined result.
+ */
 
 /* Supported modes with maximal current limit */
 enum bq2415x_mode {
-- 
cgit v1.2.3


From e0f1abeba5c2d8a2183566717d99294fd1a29c2e Mon Sep 17 00:00:00 2001
From: "Rajanikanth H.V" <rajanikanth.hv@stericsson.com>
Date: Sun, 18 Nov 2012 18:45:41 -0800
Subject: ab8500: Add devicetree support for fuelgauge

- This patch adds device tree support for fuelgauge driver
- optimize bm devices platform_data usage and of_probe(...)
  Note: of_probe() routine for battery managed devices is made
  common across all bm drivers.
- test status:
  - interrupt numbers assigned differs between legacy and FDT mode.

Signed-off-by: Rajanikanth H.V <rajanikanth.hv@stericsson.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 Documentation/devicetree/bindings/mfd/ab8500.txt   |   7 +-
 .../devicetree/bindings/power_supply/ab8500/fg.txt |  58 +++
 arch/arm/boot/dts/dbx5x0.dtsi                      |  12 +-
 drivers/mfd/ab8500-core.c                          |   5 +
 drivers/power/Makefile                             |   2 +-
 drivers/power/ab8500_bmdata.c                      | 521 +++++++++++++++++++++
 drivers/power/ab8500_btemp.c                       |  16 +-
 drivers/power/ab8500_charger.c                     |  16 +-
 drivers/power/ab8500_fg.c                          |  82 ++--
 drivers/power/abx500_chargalg.c                    |   8 +-
 include/linux/mfd/abx500.h                         |  36 +-
 11 files changed, 667 insertions(+), 96 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
 create mode 100644 drivers/power/ab8500_bmdata.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/ab8500.txt b/Documentation/devicetree/bindings/mfd/ab8500.txt
index ce83c8d3c00e..6ca8d817ef92 100644
--- a/Documentation/devicetree/bindings/mfd/ab8500.txt
+++ b/Documentation/devicetree/bindings/mfd/ab8500.txt
@@ -24,7 +24,12 @@ ab8500-bm                :                      :              : Battery Manager
 ab8500-btemp             :                      :              : Battery Temperature
 ab8500-charger           :                      :              : Battery Charger
 ab8500-codec             :                      :              : Audio Codec
-ab8500-fg                :                      :              : Fuel Gauge
+ab8500-fg                : 			: vddadc       : Fuel Gauge
+			 : NCONV_ACCU           :	       : Accumulate N Sample Conversion
+			 : BATT_OVV		:	       : Battery Over Voltage
+			 : LOW_BAT_F		:	       : LOW threshold battery voltage
+			 : CC_INT_CALIB		:	       : Coulomb Counter Internal Calibration
+			 : CCEOC		:	       : Coulomb Counter End of Conversion
 ab8500-gpadc             : HW_CONV_END          : vddadc       : Analogue to Digital Converter
                            SW_CONV_END          :              :
 ab8500-gpio              :                      :              : GPIO Controller
diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt b/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
new file mode 100644
index 000000000000..ccafcb9112fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
@@ -0,0 +1,58 @@
+=== AB8500 Fuel Gauge Driver ===
+
+AB8500 is a mixed signal multimedia and power management
+device comprising: power and energy-management-module,
+wall-charger, usb-charger, audio codec, general purpose adc,
+tvout, clock management and sim card interface.
+
+Fuelgauge support is part of energy-management-modules, other
+components of this module are:
+main-charger, usb-combo-charger and battery-temperature-monitoring.
+
+The properties below describes the node for fuelgauge driver.
+
+Required Properties:
+- compatible = This shall be: "stericsson,ab8500-fg"
+- battery = Shall be battery specific information
+	Example:
+	ab8500_fg {
+		compatible = "stericsson,ab8500-fg";
+		battery	   = <&ab8500_battery>;
+	};
+
+dependent node:
+	ab8500_battery: ab8500_battery {
+	};
+	This node will provide information on 'thermistor interface' and
+	'battery technology type' used.
+
+Properties of this node are:
+thermistor-on-batctrl:
+	A boolean value indicating thermistor interface	to battery
+
+	Note:
+	'btemp' and 'batctrl' are the pins interfaced for battery temperature
+	measurement, 'btemp' signal is used when NTC(negative temperature
+	coefficient) resister is interfaced external to battery whereas
+	'batctrl' pin is used when NTC resister is internal to battery.
+
+	Example:
+	ab8500_battery: ab8500_battery {
+		thermistor-on-batctrl;
+	};
+	indicates: NTC resister is internal to battery, 'batctrl' is used
+		for thermal measurement.
+
+	The absence of property 'thermal-on-batctrl' indicates
+	NTC resister is external to battery and  'btemp' signal is used
+	for thermal measurement.
+
+battery-type:
+	This shall be the battery manufacturing technology type,
+	allowed types are:
+		"UNKNOWN" "NiMH" "LION" "LIPO" "LiFe" "NiCd" "LiMn"
+	Example:
+	ab8500_battery: ab8500_battery {
+		stericsson,battery-type = "LIPO";
+	}
+
diff --git a/arch/arm/boot/dts/dbx5x0.dtsi b/arch/arm/boot/dts/dbx5x0.dtsi
index 4b0e0ca08f40..0c81986904c5 100644
--- a/arch/arm/boot/dts/dbx5x0.dtsi
+++ b/arch/arm/boot/dts/dbx5x0.dtsi
@@ -352,7 +352,17 @@
 					vddadc-supply = <&ab8500_ldo_tvout_reg>;
 				};
 
-				ab8500-usb {
+				ab8500_battery: ab8500_battery {
+					stericsson,battery-type = "LIPO";
+					thermistor-on-batctrl;
+				};
+
+				ab8500_fg {
+					compatible = "stericsson,ab8500-fg";
+					battery	   = <&ab8500_battery>;
+				};
+
+				ab8500_usb {
 					compatible = "stericsson,ab8500-usb";
 					interrupts = < 90 0x4
 						       96 0x4
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 1667c77b5cde..7c3017ba73e4 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1051,8 +1051,13 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = {
 	},
 	{
 		.name = "ab8500-fg",
+		.of_compatible = "stericsson,ab8500-fg",
 		.num_resources = ARRAY_SIZE(ab8500_fg_resources),
 		.resources = ab8500_fg_resources,
+#ifndef CONFIG_OF
+		.platform_data = &ab8500_bm_data,
+		.pdata_size = sizeof(ab8500_bm_data),
+#endif
 	},
 	{
 		.name = "ab8500-chargalg",
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 74dfd9570154..696e3a960b3e 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_CHARGER_PCF50633)	+= pcf50633-charger.o
 obj-$(CONFIG_BATTERY_JZ4740)	+= jz4740-battery.o
 obj-$(CONFIG_BATTERY_INTEL_MID)	+= intel_mid_battery.o
 obj-$(CONFIG_BATTERY_RX51)	+= rx51_battery.o
-obj-$(CONFIG_AB8500_BM)		+= ab8500_charger.o ab8500_btemp.o ab8500_fg.o abx500_chargalg.o
+obj-$(CONFIG_AB8500_BM)		+= ab8500_bmdata.o ab8500_charger.o ab8500_btemp.o ab8500_fg.o abx500_chargalg.o
 obj-$(CONFIG_CHARGER_ISP1704)	+= isp1704_charger.o
 obj-$(CONFIG_CHARGER_MAX8903)	+= max8903_charger.o
 obj-$(CONFIG_CHARGER_TWL4030)	+= twl4030_charger.o
diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
new file mode 100644
index 000000000000..e7639b6659f7
--- /dev/null
+++ b/drivers/power/ab8500_bmdata.c
@@ -0,0 +1,521 @@
+#include <linux/export.h>
+#include <linux/power_supply.h>
+#include <linux/of.h>
+#include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-bm.h>
+
+/*
+ * These are the defined batteries that uses a NTC and ID resistor placed
+ * inside of the battery pack.
+ * Note that the res_to_temp table must be strictly sorted by falling resistance
+ * values to work.
+ */
+static struct abx500_res_to_temp temp_tbl_A_thermistor[] = {
+	{-5, 53407},
+	{ 0, 48594},
+	{ 5, 43804},
+	{10, 39188},
+	{15, 34870},
+	{20, 30933},
+	{25, 27422},
+	{30, 24347},
+	{35, 21694},
+	{40, 19431},
+	{45, 17517},
+	{50, 15908},
+	{55, 14561},
+	{60, 13437},
+	{65, 12500},
+};
+
+static struct abx500_res_to_temp temp_tbl_B_thermistor[] = {
+	{-5, 165418},
+	{ 0, 159024},
+	{ 5, 151921},
+	{10, 144300},
+	{15, 136424},
+	{20, 128565},
+	{25, 120978},
+	{30, 113875},
+	{35, 107397},
+	{40, 101629},
+	{45,  96592},
+	{50,  92253},
+	{55,  88569},
+	{60,  85461},
+	{65,  82869},
+};
+
+static struct abx500_v_to_cap cap_tbl_A_thermistor[] = {
+	{4171,	100},
+	{4114,	 95},
+	{4009,	 83},
+	{3947,	 74},
+	{3907,	 67},
+	{3863,	 59},
+	{3830,	 56},
+	{3813,	 53},
+	{3791,	 46},
+	{3771,	 33},
+	{3754,	 25},
+	{3735,	 20},
+	{3717,	 17},
+	{3681,	 13},
+	{3664,	  8},
+	{3651,	  6},
+	{3635,	  5},
+	{3560,	  3},
+	{3408,    1},
+	{3247,	  0},
+};
+
+static struct abx500_v_to_cap cap_tbl_B_thermistor[] = {
+	{4161,	100},
+	{4124,	 98},
+	{4044,	 90},
+	{4003,	 85},
+	{3966,	 80},
+	{3933,	 75},
+	{3888,	 67},
+	{3849,	 60},
+	{3813,	 55},
+	{3787,	 47},
+	{3772,	 30},
+	{3751,	 25},
+	{3718,	 20},
+	{3681,	 16},
+	{3660,	 14},
+	{3589,	 10},
+	{3546,	  7},
+	{3495,	  4},
+	{3404,	  2},
+	{3250,	  0},
+};
+
+static struct abx500_v_to_cap cap_tbl[] = {
+	{4186,	100},
+	{4163,	 99},
+	{4114,	 95},
+	{4068,	 90},
+	{3990,	 80},
+	{3926,	 70},
+	{3898,	 65},
+	{3866,	 60},
+	{3833,	 55},
+	{3812,	 50},
+	{3787,	 40},
+	{3768,	 30},
+	{3747,	 25},
+	{3730,	 20},
+	{3705,	 15},
+	{3699,	 14},
+	{3684,	 12},
+	{3672,	  9},
+	{3657,	  7},
+	{3638,	  6},
+	{3556,	  4},
+	{3424,	  2},
+	{3317,	  1},
+	{3094,	  0},
+};
+
+/*
+ * Note that the res_to_temp table must be strictly sorted by falling
+ * resistance values to work.
+ */
+static struct abx500_res_to_temp temp_tbl[] = {
+	{-5, 214834},
+	{ 0, 162943},
+	{ 5, 124820},
+	{10,  96520},
+	{15,  75306},
+	{20,  59254},
+	{25,  47000},
+	{30,  37566},
+	{35,  30245},
+	{40,  24520},
+	{45,  20010},
+	{50,  16432},
+	{55,  13576},
+	{60,  11280},
+	{65,   9425},
+};
+
+/*
+ * Note that the batres_vs_temp table must be strictly sorted by falling
+ * temperature values to work.
+ */
+static struct batres_vs_temp temp_to_batres_tbl_thermistor[] = {
+	{ 40, 120},
+	{ 30, 135},
+	{ 20, 165},
+	{ 10, 230},
+	{ 00, 325},
+	{-10, 445},
+	{-20, 595},
+};
+
+/*
+ * Note that the batres_vs_temp table must be strictly sorted by falling
+ * temperature values to work.
+ */
+static struct batres_vs_temp temp_to_batres_tbl_ext_thermistor[] = {
+	{ 60, 300},
+	{ 30, 300},
+	{ 20, 300},
+	{ 10, 300},
+	{ 00, 300},
+	{-10, 300},
+	{-20, 300},
+};
+
+/* battery resistance table for LI ION 9100 battery */
+static struct batres_vs_temp temp_to_batres_tbl_9100[] = {
+	{ 60, 180},
+	{ 30, 180},
+	{ 20, 180},
+	{ 10, 180},
+	{ 00, 180},
+	{-10, 180},
+	{-20, 180},
+};
+
+static struct abx500_battery_type bat_type_thermistor[] = {
+[BATTERY_UNKNOWN] = {
+	/* First element always represent the UNKNOWN battery */
+	.name = POWER_SUPPLY_TECHNOLOGY_UNKNOWN,
+	.resis_high = 0,
+	.resis_low = 0,
+	.battery_resistance = 300,
+	.charge_full_design = 612,
+	.nominal_voltage = 3700,
+	.termination_vol = 4050,
+	.termination_curr = 200,
+	.recharge_vol = 3990,
+	.normal_cur_lvl = 400,
+	.normal_vol_lvl = 4100,
+	.maint_a_cur_lvl = 400,
+	.maint_a_vol_lvl = 4050,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 400,
+	.maint_b_vol_lvl = 4000,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LIPO,
+	.resis_high = 53407,
+	.resis_low = 12500,
+	.battery_resistance = 300,
+	.charge_full_design = 900,
+	.nominal_voltage = 3600,
+	.termination_vol = 4150,
+	.termination_curr = 80,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl_A_thermistor),
+	.r_to_t_tbl = temp_tbl_A_thermistor,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl_A_thermistor),
+	.v_to_cap_tbl = cap_tbl_A_thermistor,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+
+},
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LIPO,
+	.resis_high = 165418,
+	.resis_low = 82869,
+	.battery_resistance = 300,
+	.charge_full_design = 900,
+	.nominal_voltage = 3600,
+	.termination_vol = 4150,
+	.termination_curr = 80,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl_B_thermistor),
+	.r_to_t_tbl = temp_tbl_B_thermistor,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl_B_thermistor),
+	.v_to_cap_tbl = cap_tbl_B_thermistor,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+};
+
+static struct abx500_battery_type bat_type_ext_thermistor[] = {
+[BATTERY_UNKNOWN] = {
+	/* First element always represent the UNKNOWN battery */
+	.name = POWER_SUPPLY_TECHNOLOGY_UNKNOWN,
+	.resis_high = 0,
+	.resis_low = 0,
+	.battery_resistance = 300,
+	.charge_full_design = 612,
+	.nominal_voltage = 3700,
+	.termination_vol = 4050,
+	.termination_curr = 200,
+	.recharge_vol = 3990,
+	.normal_cur_lvl = 400,
+	.normal_vol_lvl = 4100,
+	.maint_a_cur_lvl = 400,
+	.maint_a_vol_lvl = 4050,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 400,
+	.maint_b_vol_lvl = 4000,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+/*
+ * These are the batteries that doesn't have an internal NTC resistor to measure
+ * its temperature. The temperature in this case is measure with a NTC placed
+ * near the battery but on the PCB.
+ */
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LIPO,
+	.resis_high = 76000,
+	.resis_low = 53000,
+	.battery_resistance = 300,
+	.charge_full_design = 900,
+	.nominal_voltage = 3700,
+	.termination_vol = 4150,
+	.termination_curr = 100,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LION,
+	.resis_high = 30000,
+	.resis_low = 10000,
+	.battery_resistance = 300,
+	.charge_full_design = 950,
+	.nominal_voltage = 3700,
+	.termination_vol = 4150,
+	.termination_curr = 100,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+{
+	.name = POWER_SUPPLY_TECHNOLOGY_LION,
+	.resis_high = 95000,
+	.resis_low = 76001,
+	.battery_resistance = 300,
+	.charge_full_design = 950,
+	.nominal_voltage = 3700,
+	.termination_vol = 4150,
+	.termination_curr = 100,
+	.recharge_vol = 4130,
+	.normal_cur_lvl = 700,
+	.normal_vol_lvl = 4200,
+	.maint_a_cur_lvl = 600,
+	.maint_a_vol_lvl = 4150,
+	.maint_a_chg_timer_h = 60,
+	.maint_b_cur_lvl = 600,
+	.maint_b_vol_lvl = 4100,
+	.maint_b_chg_timer_h = 200,
+	.low_high_cur_lvl = 300,
+	.low_high_vol_lvl = 4000,
+	.n_temp_tbl_elements = ARRAY_SIZE(temp_tbl),
+	.r_to_t_tbl = temp_tbl,
+	.n_v_cap_tbl_elements = ARRAY_SIZE(cap_tbl),
+	.v_to_cap_tbl = cap_tbl,
+	.n_batres_tbl_elements = ARRAY_SIZE(temp_to_batres_tbl_thermistor),
+	.batres_tbl = temp_to_batres_tbl_thermistor,
+},
+};
+
+static const struct abx500_bm_capacity_levels cap_levels = {
+	.critical	= 2,
+	.low		= 10,
+	.normal		= 70,
+	.high		= 95,
+	.full		= 100,
+};
+
+static const struct abx500_fg_parameters fg = {
+	.recovery_sleep_timer = 10,
+	.recovery_total_time = 100,
+	.init_timer = 1,
+	.init_discard_time = 5,
+	.init_total_time = 40,
+	.high_curr_time = 60,
+	.accu_charging = 30,
+	.accu_high_curr = 30,
+	.high_curr_threshold = 50,
+	.lowbat_threshold = 3100,
+	.battok_falling_th_sel0 = 2860,
+	.battok_raising_th_sel1 = 2860,
+	.user_cap_limit = 15,
+	.maint_thres = 97,
+};
+
+static const struct abx500_maxim_parameters maxi_params = {
+	.ena_maxi = true,
+	.chg_curr = 910,
+	.wait_cycles = 10,
+	.charger_curr_step = 100,
+};
+
+static const struct abx500_bm_charger_parameters chg = {
+	.usb_volt_max		= 5500,
+	.usb_curr_max		= 1500,
+	.ac_volt_max		= 7500,
+	.ac_curr_max		= 1500,
+};
+
+struct abx500_bm_data ab8500_bm_data = {
+	.temp_under		= 3,
+	.temp_low		= 8,
+	.temp_high		= 43,
+	.temp_over		= 48,
+	.main_safety_tmr_h	= 4,
+	.temp_interval_chg	= 20,
+	.temp_interval_nochg	= 120,
+	.usb_safety_tmr_h	= 4,
+	.bkup_bat_v		= BUP_VCH_SEL_2P6V,
+	.bkup_bat_i		= BUP_ICH_SEL_150UA,
+	.no_maintenance		= false,
+	.adc_therm		= ABx500_ADC_THERM_BATCTRL,
+	.chg_unknown_bat	= false,
+	.enable_overshoot	= false,
+	.fg_res			= 100,
+	.cap_levels		= &cap_levels,
+	.bat_type		= bat_type_thermistor,
+	.n_btypes		= 3,
+	.batt_id		= 0,
+	.interval_charging	= 5,
+	.interval_not_charging	= 120,
+	.temp_hysteresis	= 3,
+	.gnd_lift_resistance	= 34,
+	.maxi			= &maxi_params,
+	.chg_params		= &chg,
+	.fg_params		= &fg,
+};
+
+int __devinit
+bmdevs_of_probe(struct device *dev,
+		struct device_node *np,
+		struct abx500_bm_data **battery)
+{
+	struct	abx500_battery_type *btype;
+	struct  device_node *np_bat_supply;
+	struct	abx500_bm_data *bat;
+	const char *btech;
+	char bat_tech[8];
+	int i, thermistor;
+
+	*battery = &ab8500_bm_data;
+
+	/* get phandle to 'battery-info' node */
+	np_bat_supply = of_parse_phandle(np, "battery", 0);
+	if (!np_bat_supply) {
+		dev_err(dev, "missing property battery\n");
+		return -EINVAL;
+	}
+	if (of_property_read_bool(np_bat_supply,
+			"thermistor-on-batctrl"))
+		thermistor = NTC_INTERNAL;
+	else
+		thermistor = NTC_EXTERNAL;
+
+	bat = *battery;
+	if (thermistor == NTC_EXTERNAL) {
+		bat->n_btypes  = 4;
+		bat->bat_type  = bat_type_ext_thermistor;
+		bat->adc_therm = ABx500_ADC_THERM_BATTEMP;
+	}
+	btech = of_get_property(np_bat_supply,
+		"stericsson,battery-type", NULL);
+	if (!btech) {
+		dev_warn(dev, "missing property battery-name/type\n");
+		strcpy(bat_tech, "UNKNOWN");
+	} else {
+		strcpy(bat_tech, btech);
+	}
+
+	if (strncmp(bat_tech, "LION", 4) == 0) {
+		bat->no_maintenance  = true;
+		bat->chg_unknown_bat = true;
+		bat->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600;
+		bat->bat_type[BATTERY_UNKNOWN].termination_vol    = 4150;
+		bat->bat_type[BATTERY_UNKNOWN].recharge_vol	  = 4130;
+		bat->bat_type[BATTERY_UNKNOWN].normal_cur_lvl	  = 520;
+		bat->bat_type[BATTERY_UNKNOWN].normal_vol_lvl	  = 4200;
+	}
+	/* select the battery resolution table */
+	for (i = 0; i < bat->n_btypes; ++i) {
+		btype = (bat->bat_type + i);
+		if (thermistor == NTC_EXTERNAL) {
+			btype->batres_tbl =
+				temp_to_batres_tbl_ext_thermistor;
+		} else if (strncmp(bat_tech, "LION", 4) == 0) {
+			btype->batres_tbl =
+				temp_to_batres_tbl_9100;
+		} else {
+			btype->batres_tbl =
+				temp_to_batres_tbl_thermistor;
+		}
+	}
+	of_node_put(np_bat_supply);
+	return 0;
+}
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index e3b6395b20dd..abc2abc16a5d 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -93,7 +93,7 @@ struct ab8500_btemp {
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
 	struct ab8500_fg *fg;
-	struct abx500_btemp_platform_data *pdata;
+	struct abx500_bmdevs_plat_data *pdata;
 	struct abx500_bm_data *bat;
 	struct power_supply btemp_psy;
 	struct ab8500_btemp_events events;
@@ -962,10 +962,10 @@ static int __devexit ab8500_btemp_remove(struct platform_device *pdev)
 
 static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 {
+	struct abx500_bmdevs_plat_data *plat_data = pdev->dev.platform_data;
+	struct ab8500_btemp *di;
 	int irq, i, ret = 0;
 	u8 val;
-	struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data;
-	struct ab8500_btemp *di;
 
 	if (!plat_data) {
 		dev_err(&pdev->dev, "No platform data\n");
@@ -982,21 +982,13 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 	di->gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
 
 	/* get btemp specific platform data */
-	di->pdata = plat_data->btemp;
+	di->pdata = plat_data;
 	if (!di->pdata) {
 		dev_err(di->dev, "no btemp platform data supplied\n");
 		ret = -EINVAL;
 		goto free_device_info;
 	}
 
-	/* get battery specific platform data */
-	di->bat = plat_data->battery;
-	if (!di->bat) {
-		dev_err(di->dev, "no battery platform data supplied\n");
-		ret = -EINVAL;
-		goto free_device_info;
-	}
-
 	/* BTEMP supply */
 	di->btemp_psy.name = "ab8500_btemp";
 	di->btemp_psy.type = POWER_SUPPLY_TYPE_BATTERY;
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 26ff759e2220..723edb47b1d8 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -220,7 +220,7 @@ struct ab8500_charger {
 	bool autopower;
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
-	struct abx500_charger_platform_data *pdata;
+	struct abx500_bmdevs_plat_data *pdata;
 	struct abx500_bm_data *bat;
 	struct ab8500_charger_event_flags flags;
 	struct ab8500_charger_usb_state usb_state;
@@ -2533,9 +2533,9 @@ static int __devexit ab8500_charger_remove(struct platform_device *pdev)
 
 static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 {
-	int irq, i, charger_status, ret = 0;
-	struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data;
+	struct abx500_bmdevs_plat_data *plat_data = pdev->dev.platform_data;
 	struct ab8500_charger *di;
+	int irq, i, charger_status, ret = 0;
 
 	if (!plat_data) {
 		dev_err(&pdev->dev, "No platform data\n");
@@ -2555,21 +2555,13 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 	spin_lock_init(&di->usb_state.usb_lock);
 
 	/* get charger specific platform data */
-	di->pdata = plat_data->charger;
+	di->pdata = plat_data;
 	if (!di->pdata) {
 		dev_err(di->dev, "no charger platform data supplied\n");
 		ret = -EINVAL;
 		goto free_device_info;
 	}
 
-	/* get battery specific platform data */
-	di->bat = plat_data->battery;
-	if (!di->bat) {
-		dev_err(di->dev, "no battery platform data supplied\n");
-		ret = -EINVAL;
-		goto free_device_info;
-	}
-
 	di->autopower = false;
 
 	/* AC supply */
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 2db8cc254399..ed62ef788eb5 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -22,15 +22,16 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/kobject.h>
-#include <linux/mfd/abx500/ab8500.h>
-#include <linux/mfd/abx500.h>
 #include <linux/slab.h>
-#include <linux/mfd/abx500/ab8500-bm.h>
 #include <linux/delay.h>
-#include <linux/mfd/abx500/ab8500-gpadc.h>
-#include <linux/mfd/abx500.h>
 #include <linux/time.h>
+#include <linux/of.h>
 #include <linux/completion.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-bm.h>
+#include <linux/mfd/abx500/ab8500-gpadc.h>
 
 #define MILLI_TO_MICRO			1000
 #define FG_LSB_IN_MA			1627
@@ -172,7 +173,6 @@ struct inst_curr_result_list {
  * @avg_cap:		Average capacity filter
  * @parent:		Pointer to the struct ab8500
  * @gpadc:		Pointer to the struct gpadc
- * @pdata:		Pointer to the abx500_fg platform data
  * @bat:		Pointer to the abx500_bm platform data
  * @fg_psy:		Structure that holds the FG specific battery properties
  * @fg_wq:		Work queue for running the FG algorithm
@@ -212,7 +212,6 @@ struct ab8500_fg {
 	struct ab8500_fg_avg_cap avg_cap;
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
-	struct abx500_fg_platform_data *pdata;
 	struct abx500_bm_data *bat;
 	struct power_supply fg_psy;
 	struct workqueue_struct *fg_wq;
@@ -2429,7 +2428,6 @@ static int __devexit ab8500_fg_remove(struct platform_device *pdev)
 	flush_scheduled_work();
 	power_supply_unregister(&di->fg_psy);
 	platform_set_drvdata(pdev, NULL);
-	kfree(di);
 	return ret;
 }
 
@@ -2442,21 +2440,39 @@ static struct ab8500_fg_interrupts ab8500_fg_irq[] = {
 	{"CCEOC", ab8500_fg_cc_data_end_handler},
 };
 
+static char *supply_interface[] = {
+	"ab8500_chargalg",
+	"ab8500_usb",
+};
+
 static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 {
+	struct device_node *np = pdev->dev.of_node;
+	struct ab8500_fg *di;
 	int i, irq;
 	int ret = 0;
-	struct abx500_bm_plat_data *plat_data = pdev->dev.platform_data;
-	struct ab8500_fg *di;
-
-	if (!plat_data) {
-		dev_err(&pdev->dev, "No platform data\n");
-		return -EINVAL;
-	}
 
-	di = kzalloc(sizeof(*di), GFP_KERNEL);
-	if (!di)
+	di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
+	if (!di) {
+		dev_err(&pdev->dev, "%s no mem for ab8500_fg\n", __func__);
 		return -ENOMEM;
+	}
+	di->bat = pdev->mfd_cell->platform_data;
+	if (!di->bat) {
+		if (np) {
+			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			if (ret) {
+				dev_err(&pdev->dev,
+					"failed to get battery information\n");
+				return ret;
+			}
+		} else {
+			dev_err(&pdev->dev, "missing dt node for ab8500_fg\n");
+			return -EINVAL;
+		}
+	} else {
+		dev_info(&pdev->dev, "falling back to legacy platform data\n");
+	}
 
 	mutex_init(&di->cc_lock);
 
@@ -2465,29 +2481,13 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	di->parent = dev_get_drvdata(pdev->dev.parent);
 	di->gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
 
-	/* get fg specific platform data */
-	di->pdata = plat_data->fg;
-	if (!di->pdata) {
-		dev_err(di->dev, "no fg platform data supplied\n");
-		ret = -EINVAL;
-		goto free_device_info;
-	}
-
-	/* get battery specific platform data */
-	di->bat = plat_data->battery;
-	if (!di->bat) {
-		dev_err(di->dev, "no battery platform data supplied\n");
-		ret = -EINVAL;
-		goto free_device_info;
-	}
-
 	di->fg_psy.name = "ab8500_fg";
 	di->fg_psy.type = POWER_SUPPLY_TYPE_BATTERY;
 	di->fg_psy.properties = ab8500_fg_props;
 	di->fg_psy.num_properties = ARRAY_SIZE(ab8500_fg_props);
 	di->fg_psy.get_property = ab8500_fg_get_property;
-	di->fg_psy.supplied_to = di->pdata->supplied_to;
-	di->fg_psy.num_supplicants = di->pdata->num_supplicants;
+	di->fg_psy.supplied_to = supply_interface;
+	di->fg_psy.num_supplicants = ARRAY_SIZE(supply_interface),
 	di->fg_psy.external_power_changed = ab8500_fg_external_power_changed;
 
 	di->bat_cap.max_mah_design = MILLI_TO_MICRO *
@@ -2506,8 +2506,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	di->fg_wq = create_singlethread_workqueue("ab8500_fg_wq");
 	if (di->fg_wq == NULL) {
 		dev_err(di->dev, "failed to create work queue\n");
-		ret = -ENOMEM;
-		goto free_device_info;
+		return -ENOMEM;
 	}
 
 	/* Init work for running the fg algorithm instantly */
@@ -2606,12 +2605,14 @@ free_irq:
 	}
 free_inst_curr_wq:
 	destroy_workqueue(di->fg_wq);
-free_device_info:
-	kfree(di);
-
 	return ret;
 }
 
+static const struct of_device_id ab8500_fg_match[] = {
+	{ .compatible = "stericsson,ab8500-fg", },
+	{ },
+};
+
 static struct platform_driver ab8500_fg_driver = {
 	.probe = ab8500_fg_probe,
 	.remove = __devexit_p(ab8500_fg_remove),
@@ -2620,6 +2621,7 @@ static struct platform_driver ab8500_fg_driver = {
 	.driver = {
 		.name = "ab8500-fg",
 		.owner = THIS_MODULE,
+		.of_match_table = ab8500_fg_match,
 	},
 };
 
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index 4d302803ffcc..758ea76de2f8 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -231,7 +231,7 @@ struct abx500_chargalg {
 	struct abx500_chargalg_charger_info chg_info;
 	struct abx500_chargalg_battery_data batt_data;
 	struct abx500_chargalg_suspension_status susp_status;
-	struct abx500_chargalg_platform_data *pdata;
+	struct abx500_bmdevs_plat_data *pdata;
 	struct abx500_bm_data *bat;
 	struct power_supply chargalg_psy;
 	struct ux500_charger *ac_chg;
@@ -1802,7 +1802,7 @@ static int __devexit abx500_chargalg_remove(struct platform_device *pdev)
 
 static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 {
-	struct abx500_bm_plat_data *plat_data;
+	struct abx500_bmdevs_plat_data *plat_data;
 	int ret = 0;
 
 	struct abx500_chargalg *di =
@@ -1812,10 +1812,8 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 
 	/* get device struct */
 	di->dev = &pdev->dev;
-
 	plat_data = pdev->dev.platform_data;
-	di->pdata = plat_data->chargalg;
-	di->bat = plat_data->battery;
+	di->pdata = plat_data;
 
 	/* chargalg supply */
 	di->chargalg_psy.name = "abx500_chargalg";
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 5d5298d56026..33f2c58554f4 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -267,39 +267,27 @@ struct abx500_bm_data {
 	int gnd_lift_resistance;
 	const struct abx500_maxim_parameters *maxi;
 	const struct abx500_bm_capacity_levels *cap_levels;
-	const struct abx500_battery_type *bat_type;
+	struct abx500_battery_type *bat_type;
 	const struct abx500_bm_charger_parameters *chg_params;
 	const struct abx500_fg_parameters *fg_params;
 };
 
-struct abx500_chargalg_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-};
-
-struct abx500_charger_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-	bool autopower_cfg;
-};
+extern struct abx500_bm_data ab8500_bm_data;
 
-struct abx500_btemp_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
+struct abx500_bmdevs_plat_data {
+	char	**supplied_to;
+	size_t	num_supplicants;
+	bool	autopower_cfg;
 };
 
-struct abx500_fg_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
+enum {
+	NTC_EXTERNAL = 0,
+	NTC_INTERNAL,
 };
 
-struct abx500_bm_plat_data {
-	struct abx500_bm_data *battery;
-	struct abx500_charger_platform_data *charger;
-	struct abx500_btemp_platform_data *btemp;
-	struct abx500_fg_platform_data *fg;
-	struct abx500_chargalg_platform_data *chargalg;
-};
+int bmdevs_of_probe(struct device *dev,
+		struct device_node *np,
+		struct abx500_bm_data **battery);
 
 int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg,
 	u8 value);
-- 
cgit v1.2.3


From a12810ab9fcf0c9fd5e50b5e350a3ffbeaa571be Mon Sep 17 00:00:00 2001
From: "Rajanikanth H.V" <rajanikanth.hv@stericsson.com>
Date: Wed, 31 Oct 2012 15:40:33 +0000
Subject: ab8500: Add devicetree support for chargalg

This patch adds device tree support for charging algorithm driver

Signed-off-by: Rajanikanth H.V <rajanikanth.hv@stericsson.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 .../bindings/power_supply/ab8500/chargalg.txt      | 16 +++++++
 arch/arm/boot/dts/dbx5x0.dtsi                      |  5 ++
 drivers/mfd/ab8500-core.c                          |  5 ++
 drivers/power/abx500_chargalg.c                    | 54 +++++++++++++++-------
 include/linux/mfd/abx500.h                         |  6 ---
 5 files changed, 64 insertions(+), 22 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt b/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt
new file mode 100644
index 000000000000..ef5328371122
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt
@@ -0,0 +1,16 @@
+=== AB8500 Charging Algorithm Driver ===
+
+The properties below describes the node for chargalg driver.
+
+Required Properties:
+- compatible = Shall be: "stericsson,ab8500-chargalg"
+- battery = Shall be battery specific information
+
+Example:
+ab8500_chargalg {
+	compatible = "stericsson,ab8500-chargalg";
+	battery	   = <&ab8500_battery>;
+};
+
+For information on battery specific node, Ref:
+Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
diff --git a/arch/arm/boot/dts/dbx5x0.dtsi b/arch/arm/boot/dts/dbx5x0.dtsi
index 1d4ad3098aeb..12a68af44903 100644
--- a/arch/arm/boot/dts/dbx5x0.dtsi
+++ b/arch/arm/boot/dts/dbx5x0.dtsi
@@ -373,6 +373,11 @@
 					vddadc-supply	= <&ab8500_ldo_tvout_reg>;
 				};
 
+				ab8500_chargalg {
+					compatible	= "stericsson,ab8500-chargalg";
+					battery		= <&ab8500_battery>;
+				};
+
 				ab8500_usb {
 					compatible = "stericsson,ab8500-usb";
 					interrupts = < 90 0x4
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index c7a120bfd50a..5ec70f26b9d5 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1071,8 +1071,13 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = {
 	},
 	{
 		.name = "ab8500-chargalg",
+		.of_compatible = "stericsson,ab8500-chargalg",
 		.num_resources = ARRAY_SIZE(ab8500_chargalg_resources),
 		.resources = ab8500_chargalg_resources,
+#ifndef CONFIG_OF
+		.platform_data = &ab8500_bm_data,
+		.pdata_size = sizeof(ab8500_bm_data),
+#endif
 	},
 };
 
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index 758ea76de2f8..dcdc4393b9e7 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -21,6 +21,8 @@
 #include <linux/completion.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
+#include <linux/of.h>
+#include <linux/mfd/core.h>
 #include <linux/mfd/abx500.h>
 #include <linux/mfd/abx500/ux500_chargalg.h>
 #include <linux/mfd/abx500/ab8500-bm.h>
@@ -205,7 +207,6 @@ enum maxim_ret {
  * @chg_info:		information about connected charger types
  * @batt_data:		data of the battery
  * @susp_status:	current charger suspension status
- * @pdata:		pointer to the abx500_chargalg platform data
  * @bat:		pointer to the abx500_bm platform data
  * @chargalg_psy:	structure that holds the battery properties exposed by
  *			the charging algorithm
@@ -231,7 +232,6 @@ struct abx500_chargalg {
 	struct abx500_chargalg_charger_info chg_info;
 	struct abx500_chargalg_battery_data batt_data;
 	struct abx500_chargalg_suspension_status susp_status;
-	struct abx500_bmdevs_plat_data *pdata;
 	struct abx500_bm_data *bat;
 	struct power_supply chargalg_psy;
 	struct ux500_charger *ac_chg;
@@ -1795,25 +1795,44 @@ static int __devexit abx500_chargalg_remove(struct platform_device *pdev)
 	flush_scheduled_work();
 	power_supply_unregister(&di->chargalg_psy);
 	platform_set_drvdata(pdev, NULL);
-	kfree(di);
 
 	return 0;
 }
 
+static char *supply_interface[] = {
+	"ab8500_fg",
+};
+
 static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 {
-	struct abx500_bmdevs_plat_data *plat_data;
+	struct device_node *np = pdev->dev.of_node;
+	struct abx500_chargalg *di;
 	int ret = 0;
 
-	struct abx500_chargalg *di =
-		kzalloc(sizeof(struct abx500_chargalg), GFP_KERNEL);
-	if (!di)
+	di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
+	if (!di) {
+		dev_err(&pdev->dev, "%s no mem for ab8500_chargalg\n", __func__);
 		return -ENOMEM;
+	}
+	di->bat = pdev->mfd_cell->platform_data;
+	if (!di->bat) {
+		if (np) {
+			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			if (ret) {
+				dev_err(&pdev->dev,
+					"failed to get battery information\n");
+				return ret;
+			}
+		} else {
+			dev_err(&pdev->dev, "missing dt node for ab8500_chargalg\n");
+			return -EINVAL;
+		}
+	} else {
+		dev_info(&pdev->dev, "falling back to legacy platform data\n");
+	}
 
 	/* get device struct */
 	di->dev = &pdev->dev;
-	plat_data = pdev->dev.platform_data;
-	di->pdata = plat_data;
 
 	/* chargalg supply */
 	di->chargalg_psy.name = "abx500_chargalg";
@@ -1821,8 +1840,8 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 	di->chargalg_psy.properties = abx500_chargalg_props;
 	di->chargalg_psy.num_properties = ARRAY_SIZE(abx500_chargalg_props);
 	di->chargalg_psy.get_property = abx500_chargalg_get_property;
-	di->chargalg_psy.supplied_to = di->pdata->supplied_to;
-	di->chargalg_psy.num_supplicants = di->pdata->num_supplicants;
+	di->chargalg_psy.supplied_to = supply_interface;
+	di->chargalg_psy.num_supplicants = ARRAY_SIZE(supply_interface),
 	di->chargalg_psy.external_power_changed =
 		abx500_chargalg_external_power_changed;
 
@@ -1842,7 +1861,7 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 		create_singlethread_workqueue("abx500_chargalg_wq");
 	if (di->chargalg_wq == NULL) {
 		dev_err(di->dev, "failed to create work queue\n");
-		goto free_device_info;
+		return -ENOMEM;
 	}
 
 	/* Init work for chargalg */
@@ -1883,20 +1902,23 @@ free_psy:
 	power_supply_unregister(&di->chargalg_psy);
 free_chargalg_wq:
 	destroy_workqueue(di->chargalg_wq);
-free_device_info:
-	kfree(di);
-
 	return ret;
 }
 
+static const struct of_device_id ab8500_chargalg_match[] = {
+	{ .compatible = "stericsson,ab8500-chargalg", },
+	{ },
+};
+
 static struct platform_driver abx500_chargalg_driver = {
 	.probe = abx500_chargalg_probe,
 	.remove = __devexit_p(abx500_chargalg_remove),
 	.suspend = abx500_chargalg_suspend,
 	.resume = abx500_chargalg_resume,
 	.driver = {
-		.name = "abx500-chargalg",
+		.name = "ab8500-chargalg",
 		.owner = THIS_MODULE,
+		.of_match_table = ab8500_chargalg_match,
 	},
 };
 
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 33f2c58554f4..2138bd33021a 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -274,12 +274,6 @@ struct abx500_bm_data {
 
 extern struct abx500_bm_data ab8500_bm_data;
 
-struct abx500_bmdevs_plat_data {
-	char	**supplied_to;
-	size_t	num_supplicants;
-	bool	autopower_cfg;
-};
-
 enum {
 	NTC_EXTERNAL = 0,
 	NTC_INTERNAL,
-- 
cgit v1.2.3


From 0857ba3c24c308f42a242fe8a1894772750230ce Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 18 Nov 2012 18:36:19 +0200
Subject: i2c: i2c-cbus-gpio: introduce driver

Add i2c driver to enable access to devices behind CBUS on Nokia Internet
Tablets.

The patch also adds CBUS I2C configuration for N8x0 which is one of the
users of this driver.

Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 .../devicetree/bindings/i2c/i2c-cbus-gpio.txt      |  27 ++
 arch/arm/mach-omap2/board-n8x0.c                   |  42 +++
 drivers/i2c/busses/Kconfig                         |  10 +
 drivers/i2c/busses/Makefile                        |   1 +
 drivers/i2c/busses/i2c-cbus-gpio.c                 | 300 +++++++++++++++++++++
 include/linux/platform_data/i2c-cbus-gpio.h        |  27 ++
 6 files changed, 407 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
 create mode 100644 drivers/i2c/busses/i2c-cbus-gpio.c
 create mode 100644 include/linux/platform_data/i2c-cbus-gpio.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
new file mode 100644
index 000000000000..8ce9cd2855b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
@@ -0,0 +1,27 @@
+Device tree bindings for i2c-cbus-gpio driver
+
+Required properties:
+	- compatible = "i2c-cbus-gpio";
+	- gpios: clk, dat, sel
+	- #address-cells = <1>;
+	- #size-cells = <0>;
+
+Optional properties:
+	- child nodes conforming to i2c bus binding
+
+Example:
+
+i2c@0 {
+	compatible = "i2c-cbus-gpio";
+	gpios = <&gpio 66 0 /* clk */
+		 &gpio 65 0 /* dat */
+		 &gpio 64 0 /* sel */
+		>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	retu-mfd: retu@1 {
+		compatible = "retu-mfd";
+		reg = <0x1>;
+	};
+};
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index d95f727ca39a..bbfd74263c42 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -16,10 +16,12 @@
 #include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/stddef.h>
 #include <linux/i2c.h>
 #include <linux/spi/spi.h>
 #include <linux/usb/musb.h>
+#include <linux/platform_data/i2c-cbus-gpio.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <linux/platform_data/mtd-onenand-omap2.h>
 #include <sound/tlv320aic3x.h>
@@ -39,6 +41,45 @@
 #define TUSB6010_GPIO_ENABLE	0
 #define TUSB6010_DMACHAN	0x3f
 
+#if defined(CONFIG_I2C_CBUS_GPIO) || defined(CONFIG_I2C_CBUS_GPIO_MODULE)
+static struct i2c_cbus_platform_data n8x0_cbus_data = {
+	.clk_gpio = 66,
+	.dat_gpio = 65,
+	.sel_gpio = 64,
+};
+
+static struct platform_device n8x0_cbus_device = {
+	.name	= "i2c-cbus-gpio",
+	.id	= 3,
+	.dev	= {
+		.platform_data = &n8x0_cbus_data,
+	},
+};
+
+static struct i2c_board_info n8x0_i2c_board_info_3[] __initdata = {
+	{
+		I2C_BOARD_INFO("retu-mfd", 0x01),
+	},
+};
+
+static void __init n8x0_cbus_init(void)
+{
+	const int retu_irq_gpio = 108;
+
+	if (gpio_request_one(retu_irq_gpio, GPIOF_IN, "Retu IRQ"))
+		return;
+	irq_set_irq_type(gpio_to_irq(retu_irq_gpio), IRQ_TYPE_EDGE_RISING);
+	n8x0_i2c_board_info_3[0].irq = gpio_to_irq(retu_irq_gpio);
+	i2c_register_board_info(3, n8x0_i2c_board_info_3,
+				ARRAY_SIZE(n8x0_i2c_board_info_3));
+	platform_device_register(&n8x0_cbus_device);
+}
+#else /* CONFIG_I2C_CBUS_GPIO */
+static void __init n8x0_cbus_init(void)
+{
+}
+#endif /* CONFIG_I2C_CBUS_GPIO */
+
 #if defined(CONFIG_USB_MUSB_TUSB6010) || defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
 /*
  * Enable or disable power to TUSB6010. When enabling, turn on 3.3 V and
@@ -677,6 +718,7 @@ static void __init n8x0_init_machine(void)
 	gpmc_onenand_init(board_onenand_data);
 	n8x0_mmc_init();
 	n8x0_usb_init();
+	n8x0_cbus_init();
 }
 
 MACHINE_START(NOKIA_N800, "Nokia N800")
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e9df4612b7eb..e949edf644d4 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -337,6 +337,16 @@ config I2C_BLACKFIN_TWI_CLK_KHZ
 	help
 	  The unit of the TWI clock is kHz.
 
+config I2C_CBUS_GPIO
+	tristate "CBUS I2C driver"
+	depends on GENERIC_GPIO
+	help
+	  Support for CBUS access using I2C API. Mostly relevant for Nokia
+	  Internet Tablets (770, N800 and N810).
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called i2c-cbus-gpio.
+
 config I2C_CPM
 	tristate "Freescale CPM1 or CPM2 (MPC8xx/826x)"
 	depends on (CPM1 || CPM2) && OF_I2C
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 395b516ffa08..f9e3e0b5c827 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_I2C_POWERMAC)	+= i2c-powermac.o
 obj-$(CONFIG_I2C_AT91)		+= i2c-at91.o
 obj-$(CONFIG_I2C_AU1550)	+= i2c-au1550.o
 obj-$(CONFIG_I2C_BLACKFIN_TWI)	+= i2c-bfin-twi.o
+obj-$(CONFIG_I2C_CBUS_GPIO)	+= i2c-cbus-gpio.o
 obj-$(CONFIG_I2C_CPM)		+= i2c-cpm.o
 obj-$(CONFIG_I2C_DAVINCI)	+= i2c-davinci.o
 obj-$(CONFIG_I2C_DESIGNWARE_CORE)	+= i2c-designware-core.o
diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c
new file mode 100644
index 000000000000..98386d659318
--- /dev/null
+++ b/drivers/i2c/busses/i2c-cbus-gpio.c
@@ -0,0 +1,300 @@
+/*
+ * CBUS I2C driver for Nokia Internet Tablets.
+ *
+ * Copyright (C) 2004-2010 Nokia Corporation
+ *
+ * Based on code written by Juha Yrjölä, David Weinehall, Mikko Ylinen and
+ * Felipe Balbi. Converted to I2C driver by Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/io.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_gpio.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/i2c-cbus-gpio.h>
+
+/*
+ * Bit counts are derived from Nokia implementation. These should be checked
+ * if other CBUS implementations appear.
+ */
+#define CBUS_ADDR_BITS	3
+#define CBUS_REG_BITS	5
+
+struct cbus_host {
+	spinlock_t	lock;		/* host lock */
+	struct device	*dev;
+	int		clk_gpio;
+	int		dat_gpio;
+	int		sel_gpio;
+};
+
+/**
+ * cbus_send_bit - sends one bit over the bus
+ * @host: the host we're using
+ * @bit: one bit of information to send
+ */
+static void cbus_send_bit(struct cbus_host *host, unsigned bit)
+{
+	gpio_set_value(host->dat_gpio, bit ? 1 : 0);
+	gpio_set_value(host->clk_gpio, 1);
+	gpio_set_value(host->clk_gpio, 0);
+}
+
+/**
+ * cbus_send_data - sends @len amount of data over the bus
+ * @host: the host we're using
+ * @data: the data to send
+ * @len: size of the transfer
+ */
+static void cbus_send_data(struct cbus_host *host, unsigned data, unsigned len)
+{
+	int i;
+
+	for (i = len; i > 0; i--)
+		cbus_send_bit(host, data & (1 << (i - 1)));
+}
+
+/**
+ * cbus_receive_bit - receives one bit from the bus
+ * @host: the host we're using
+ */
+static int cbus_receive_bit(struct cbus_host *host)
+{
+	int ret;
+
+	gpio_set_value(host->clk_gpio, 1);
+	ret = gpio_get_value(host->dat_gpio);
+	gpio_set_value(host->clk_gpio, 0);
+	return ret;
+}
+
+/**
+ * cbus_receive_word - receives 16-bit word from the bus
+ * @host: the host we're using
+ */
+static int cbus_receive_word(struct cbus_host *host)
+{
+	int ret = 0;
+	int i;
+
+	for (i = 16; i > 0; i--) {
+		int bit = cbus_receive_bit(host);
+
+		if (bit < 0)
+			return bit;
+
+		if (bit)
+			ret |= 1 << (i - 1);
+	}
+	return ret;
+}
+
+/**
+ * cbus_transfer - transfers data over the bus
+ * @host: the host we're using
+ * @rw: read/write flag
+ * @dev: device address
+ * @reg: register address
+ * @data: if @rw == I2C_SBUS_WRITE data to send otherwise 0
+ */
+static int cbus_transfer(struct cbus_host *host, char rw, unsigned dev,
+			 unsigned reg, unsigned data)
+{
+	unsigned long flags;
+	int ret;
+
+	/* We don't want interrupts disturbing our transfer */
+	spin_lock_irqsave(&host->lock, flags);
+
+	/* Reset state and start of transfer, SEL stays down during transfer */
+	gpio_set_value(host->sel_gpio, 0);
+
+	/* Set the DAT pin to output */
+	gpio_direction_output(host->dat_gpio, 1);
+
+	/* Send the device address */
+	cbus_send_data(host, dev, CBUS_ADDR_BITS);
+
+	/* Send the rw flag */
+	cbus_send_bit(host, rw == I2C_SMBUS_READ);
+
+	/* Send the register address */
+	cbus_send_data(host, reg, CBUS_REG_BITS);
+
+	if (rw == I2C_SMBUS_WRITE) {
+		cbus_send_data(host, data, 16);
+		ret = 0;
+	} else {
+		ret = gpio_direction_input(host->dat_gpio);
+		if (ret) {
+			dev_dbg(host->dev, "failed setting direction\n");
+			goto out;
+		}
+		gpio_set_value(host->clk_gpio, 1);
+
+		ret = cbus_receive_word(host);
+		if (ret < 0) {
+			dev_dbg(host->dev, "failed receiving data\n");
+			goto out;
+		}
+	}
+
+	/* Indicate end of transfer, SEL goes up until next transfer */
+	gpio_set_value(host->sel_gpio, 1);
+	gpio_set_value(host->clk_gpio, 1);
+	gpio_set_value(host->clk_gpio, 0);
+
+out:
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return ret;
+}
+
+static int cbus_i2c_smbus_xfer(struct i2c_adapter	*adapter,
+			       u16			addr,
+			       unsigned short		flags,
+			       char			read_write,
+			       u8			command,
+			       int			size,
+			       union i2c_smbus_data	*data)
+{
+	struct cbus_host *chost = i2c_get_adapdata(adapter);
+	int ret;
+
+	if (size != I2C_SMBUS_WORD_DATA)
+		return -EINVAL;
+
+	ret = cbus_transfer(chost, read_write == I2C_SMBUS_READ, addr,
+			    command, data->word);
+	if (ret < 0)
+		return ret;
+
+	if (read_write == I2C_SMBUS_READ)
+		data->word = ret;
+
+	return 0;
+}
+
+static u32 cbus_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_SMBUS_READ_WORD_DATA | I2C_FUNC_SMBUS_WRITE_WORD_DATA;
+}
+
+static const struct i2c_algorithm cbus_i2c_algo = {
+	.smbus_xfer	= cbus_i2c_smbus_xfer,
+	.functionality	= cbus_i2c_func,
+};
+
+static int cbus_i2c_remove(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+
+	return i2c_del_adapter(adapter);
+}
+
+static int cbus_i2c_probe(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter;
+	struct cbus_host *chost;
+	int ret;
+
+	adapter = devm_kzalloc(&pdev->dev, sizeof(struct i2c_adapter),
+			       GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	chost = devm_kzalloc(&pdev->dev, sizeof(*chost), GFP_KERNEL);
+	if (!chost)
+		return -ENOMEM;
+
+	if (pdev->dev.of_node) {
+		struct device_node *dnode = pdev->dev.of_node;
+		if (of_gpio_count(dnode) != 3)
+			return -ENODEV;
+		chost->clk_gpio = of_get_gpio(dnode, 0);
+		chost->dat_gpio = of_get_gpio(dnode, 1);
+		chost->sel_gpio = of_get_gpio(dnode, 2);
+	} else if (pdev->dev.platform_data) {
+		struct i2c_cbus_platform_data *pdata = pdev->dev.platform_data;
+		chost->clk_gpio = pdata->clk_gpio;
+		chost->dat_gpio = pdata->dat_gpio;
+		chost->sel_gpio = pdata->sel_gpio;
+	} else {
+		return -ENODEV;
+	}
+
+	adapter->owner		= THIS_MODULE;
+	adapter->class		= I2C_CLASS_HWMON;
+	adapter->dev.parent	= &pdev->dev;
+	adapter->nr		= pdev->id;
+	adapter->timeout	= HZ;
+	adapter->algo		= &cbus_i2c_algo;
+	strlcpy(adapter->name, "CBUS I2C adapter", sizeof(adapter->name));
+
+	spin_lock_init(&chost->lock);
+	chost->dev = &pdev->dev;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->clk_gpio,
+				    GPIOF_OUT_INIT_LOW, "CBUS clk");
+	if (ret)
+		return ret;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->dat_gpio, GPIOF_IN,
+				    "CBUS data");
+	if (ret)
+		return ret;
+
+	ret = devm_gpio_request_one(&pdev->dev, chost->sel_gpio,
+				    GPIOF_OUT_INIT_HIGH, "CBUS sel");
+	if (ret)
+		return ret;
+
+	i2c_set_adapdata(adapter, chost);
+	platform_set_drvdata(pdev, adapter);
+
+	return i2c_add_numbered_adapter(adapter);
+}
+
+#if defined(CONFIG_OF)
+static const struct of_device_id i2c_cbus_dt_ids[] = {
+	{ .compatible = "i2c-cbus-gpio", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, i2c_cbus_dt_ids);
+#endif
+
+static struct platform_driver cbus_i2c_driver = {
+	.probe	= cbus_i2c_probe,
+	.remove	= cbus_i2c_remove,
+	.driver	= {
+		.owner	= THIS_MODULE,
+		.name	= "i2c-cbus-gpio",
+	},
+};
+module_platform_driver(cbus_i2c_driver);
+
+MODULE_ALIAS("platform:i2c-cbus-gpio");
+MODULE_DESCRIPTION("CBUS I2C driver");
+MODULE_AUTHOR("Juha Yrjölä");
+MODULE_AUTHOR("David Weinehall");
+MODULE_AUTHOR("Mikko Ylinen");
+MODULE_AUTHOR("Felipe Balbi");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/i2c-cbus-gpio.h b/include/linux/platform_data/i2c-cbus-gpio.h
new file mode 100644
index 000000000000..6faa992a9502
--- /dev/null
+++ b/include/linux/platform_data/i2c-cbus-gpio.h
@@ -0,0 +1,27 @@
+/*
+ * i2c-cbus-gpio.h - CBUS I2C platform_data definition
+ *
+ * Copyright (C) 2004-2009 Nokia Corporation
+ *
+ * Written by Felipe Balbi and Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __INCLUDE_LINUX_I2C_CBUS_GPIO_H
+#define __INCLUDE_LINUX_I2C_CBUS_GPIO_H
+
+struct i2c_cbus_platform_data {
+	int dat_gpio;
+	int clk_gpio;
+	int sel_gpio;
+};
+
+#endif /* __INCLUDE_LINUX_I2C_CBUS_GPIO_H */
-- 
cgit v1.2.3


From 49f4d8b93ccf9454284b6f524b96c66d8d7fbccc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 2 Aug 2012 04:25:10 -0700
Subject: pidns: Capture the user namespace and filter ns_last_pid

- Capture the the user namespace that creates the pid namespace
- Use that user namespace to test if it is ok to write to
  /proc/sys/kernel/ns_last_pid.

Zhao Hongjiang <zhaohongjiang@huawei.com> noticed I was missing a put_user_ns
in when destroying a pid_ns.  I have foloded his patch into this one
so that bisects will work properly.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/pid_namespace.h |  8 +++++---
 kernel/nsproxy.c              |  2 +-
 kernel/pid.c                  |  1 +
 kernel/pid_namespace.c        | 17 ++++++++++++-----
 4 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 65e3e87eacc5..c89c9cfcd247 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -31,6 +31,7 @@ struct pid_namespace {
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct bsd_acct_struct *bacct;
 #endif
+	struct user_namespace *user_ns;
 	kgid_t pid_gid;
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
@@ -46,7 +47,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 	return ns;
 }
 
-extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
+extern struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *ns);
 extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
 extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
 extern void put_pid_ns(struct pid_namespace *ns);
@@ -59,8 +61,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 	return ns;
 }
 
-static inline struct pid_namespace *
-copy_pid_ns(unsigned long flags, struct pid_namespace *ns)
+static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *ns)
 {
 	if (flags & CLONE_NEWPID)
 		ns = ERR_PTR(-EINVAL);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 7e1c3de1ce45..ca27d2c5264d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -84,7 +84,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 		goto out_ipc;
 	}
 
-	new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
+	new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), task_active_pid_ns(tsk));
 	if (IS_ERR(new_nsp->pid_ns)) {
 		err = PTR_ERR(new_nsp->pid_ns);
 		goto out_pid;
diff --git a/kernel/pid.c b/kernel/pid.c
index aebd4f5aaf41..2a624f1486e1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = {
 	.last_pid = 0,
 	.level = 0,
 	.child_reaper = &init_task,
+	.user_ns = &init_user_ns,
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7b07cc0dfb75..b2604950aa50 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -10,6 +10,7 @@
 
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 #include <linux/syscalls.h>
 #include <linux/err.h>
 #include <linux/acct.h>
@@ -74,7 +75,8 @@ err_alloc:
 /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
 #define MAX_PID_NS_LEVEL 32
 
-static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
+static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
+	struct pid_namespace *parent_pid_ns)
 {
 	struct pid_namespace *ns;
 	unsigned int level = parent_pid_ns->level + 1;
@@ -102,6 +104,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
 	kref_init(&ns->kref);
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
+	ns->user_ns = get_user_ns(user_ns);
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -117,6 +120,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
 
 out_put_parent_pid_ns:
 	put_pid_ns(parent_pid_ns);
+	put_user_ns(user_ns);
 out_free_map:
 	kfree(ns->pidmap[0].page);
 out_free:
@@ -131,16 +135,18 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
+	put_user_ns(ns->user_ns);
 	kmem_cache_free(pid_ns_cachep, ns);
 }
 
-struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
+struct pid_namespace *copy_pid_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct pid_namespace *old_ns)
 {
 	if (!(flags & CLONE_NEWPID))
 		return get_pid_ns(old_ns);
 	if (flags & (CLONE_THREAD|CLONE_PARENT))
 		return ERR_PTR(-EINVAL);
-	return create_pid_namespace(old_ns);
+	return create_pid_namespace(user_ns, old_ns);
 }
 
 static void free_pid_ns(struct kref *kref)
@@ -239,9 +245,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 static int pid_ns_ctl_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct pid_namespace *pid_ns = task_active_pid_ns(current);
 	struct ctl_table tmp = *table;
 
-	if (write && !capable(CAP_SYS_ADMIN))
+	if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/*
@@ -250,7 +257,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
 	 * it should synchronize its usage with external means.
 	 */
 
-	tmp.data = &current->nsproxy->pid_ns->last_pid;
+	tmp.data = &pid_ns->last_pid;
 	return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 }
 
-- 
cgit v1.2.3


From 0a01f2cc390e10633a54f72c608cc3fe19a50c3d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 1 Aug 2012 10:33:47 -0700
Subject: pidns: Make the pidns proc mount/umount logic obvious.

Track the number of pids in the proc hash table.  When the number of
pids goes to 0 schedule work to unmount the kernel mount of proc.

Move the mount of proc into alloc_pid when we allocate the pid for
init.

Remove the surprising calls of pid_ns_release proc in fork and
proc_flush_task.  Those code paths really shouldn't know about proc
namespace implementation details and people have demonstrated several
times that finding and understanding those code paths is difficult and
non-obvious.

Because of the call path detach pid is alwasy called with the
rtnl_lock held free_pid is not allowed to sleep, so the work to
unmounting proc is moved to a work queue.  This has the side benefit
of not blocking the entire world waiting for the unnecessary
rcu_barrier in deactivate_locked_super.

In the process of making the code clear and obvious this fixes a bug
reported by Gao feng <gaofeng@cn.fujitsu.com> where we would leak a
mount of proc during clone(CLONE_NEWPID|CLONE_NEWNET) if copy_pid_ns
succeeded and copy_net_ns failed.

Acked-by: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/base.c                |  4 ----
 fs/proc/root.c                |  5 -----
 include/linux/pid_namespace.h |  2 ++
 kernel/fork.c                 |  2 --
 kernel/pid.c                  | 21 +++++++++++++++++----
 kernel/pid_namespace.c        | 14 +++++++-------
 6 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6177fc238fdb..7621dc51cff8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2590,10 +2590,6 @@ void proc_flush_task(struct task_struct *task)
 		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
 					tgid->numbers[i].nr);
 	}
-
-	upid = &pid->numbers[pid->level];
-	if (upid->nr == 1)
-		pid_ns_release_proc(upid->ns);
 }
 
 static struct dentry *proc_pid_instantiate(struct inode *dir,
diff --git a/fs/proc/root.c b/fs/proc/root.c
index fc1609321a78..f2f251158d35 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -155,11 +155,6 @@ void __init proc_root_init(void)
 	err = register_filesystem(&proc_fs_type);
 	if (err)
 		return;
-	err = pid_ns_prepare_proc(&init_pid_ns);
-	if (err) {
-		unregister_filesystem(&proc_fs_type);
-		return;
-	}
 
 	proc_self_init();
 	proc_symlink("mounts", NULL, "self/mounts");
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index c89c9cfcd247..4c96acdb2489 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,6 +21,7 @@ struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
+	int nr_hashed;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
 	unsigned int level;
@@ -32,6 +33,7 @@ struct pid_namespace {
 	struct bsd_acct_struct *bacct;
 #endif
 	struct user_namespace *user_ns;
+	struct work_struct proc_work;
 	kgid_t pid_gid;
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7798c247f4b9..666dc8b06606 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1476,8 +1476,6 @@ bad_fork_cleanup_io:
 	if (p->io_context)
 		exit_io_context(p);
 bad_fork_cleanup_namespaces:
-	if (unlikely(clone_flags & CLONE_NEWPID))
-		pid_ns_release_proc(p->nsproxy->pid_ns);
 	exit_task_namespaces(p);
 bad_fork_cleanup_mm:
 	if (p->mm)
diff --git a/kernel/pid.c b/kernel/pid.c
index 3a5f238c1ca0..e957f8b09136 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
 #include <linux/syscalls.h>
+#include <linux/proc_fs.h>
 
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -270,8 +271,12 @@ void free_pid(struct pid *pid)
 	unsigned long flags;
 
 	spin_lock_irqsave(&pidmap_lock, flags);
-	for (i = 0; i <= pid->level; i++)
-		hlist_del_rcu(&pid->numbers[i].pid_chain);
+	for (i = 0; i <= pid->level; i++) {
+		struct upid *upid = pid->numbers + i;
+		hlist_del_rcu(&upid->pid_chain);
+		if (--upid->ns->nr_hashed == 0)
+			schedule_work(&upid->ns->proc_work);
+	}
 	spin_unlock_irqrestore(&pidmap_lock, flags);
 
 	for (i = 0; i <= pid->level; i++)
@@ -293,6 +298,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 		goto out;
 
 	tmp = ns;
+	pid->level = ns->level;
 	for (i = ns->level; i >= 0; i--) {
 		nr = alloc_pidmap(tmp);
 		if (nr < 0)
@@ -303,17 +309,23 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 		tmp = tmp->parent;
 	}
 
+	if (unlikely(is_child_reaper(pid))) {
+		if (pid_ns_prepare_proc(ns))
+			goto out_free;
+	}
+
 	get_pid_ns(ns);
-	pid->level = ns->level;
 	atomic_set(&pid->count, 1);
 	for (type = 0; type < PIDTYPE_MAX; ++type)
 		INIT_HLIST_HEAD(&pid->tasks[type]);
 
 	upid = pid->numbers + ns->level;
 	spin_lock_irq(&pidmap_lock);
-	for ( ; upid >= pid->numbers; --upid)
+	for ( ; upid >= pid->numbers; --upid) {
 		hlist_add_head_rcu(&upid->pid_chain,
 				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+		upid->ns->nr_hashed++;
+	}
 	spin_unlock_irq(&pidmap_lock);
 
 out:
@@ -570,6 +582,7 @@ void __init pidmap_init(void)
 	/* Reserve PID 0. We never call free_pidmap(0) */
 	set_bit(0, init_pid_ns.pidmap[0].page);
 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+	init_pid_ns.nr_hashed = 1;
 
 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index b2604950aa50..84591cfeefc1 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -72,6 +72,12 @@ err_alloc:
 	return NULL;
 }
 
+static void proc_cleanup_work(struct work_struct *work)
+{
+	struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
+	pid_ns_release_proc(ns);
+}
+
 /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
 #define MAX_PID_NS_LEVEL 32
 
@@ -105,6 +111,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
 	ns->user_ns = get_user_ns(user_ns);
+	INIT_WORK(&ns->proc_work, proc_cleanup_work);
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -112,15 +119,8 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	for (i = 1; i < PIDMAP_ENTRIES; i++)
 		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
 
-	err = pid_ns_prepare_proc(ns);
-	if (err)
-		goto out_put_parent_pid_ns;
-
 	return ns;
 
-out_put_parent_pid_ns:
-	put_pid_ns(parent_pid_ns);
-	put_user_ns(user_ns);
 out_free_map:
 	kfree(ns->pidmap[0].page);
 out_free:
-- 
cgit v1.2.3


From 57e8391d327609cbf12d843259c968b9e5c1838f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 18:17:03 -0800
Subject: pidns: Add setns support

- Pid namespaces are designed to be inescapable so verify that the
  passed in pid namespace is a child of the currently active
  pid namespace or the currently active pid namespace itself.

  Allowing the currently active pid namespace is important so
  the effects of an earlier setns can be cancelled.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/namespaces.c    |  3 +++
 include/linux/proc_fs.h |  1 +
 kernel/pid_namespace.c  | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b178ed733c36..85ca047e35f1 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -24,6 +24,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_IPC_NS
 	&ipcns_operations,
 #endif
+#ifdef CONFIG_PID_NS
+	&pidns_operations,
+#endif
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3fd2e871ff1b..acaafcd40aa5 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -251,6 +251,7 @@ struct proc_ns_operations {
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 0dbbc66b6ec6..f78fc48c86bc 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -301,6 +301,60 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
 	return 0;
 }
 
+static void *pidns_get(struct task_struct *task)
+{
+	struct pid_namespace *ns;
+
+	rcu_read_lock();
+	ns = get_pid_ns(task_active_pid_ns(task));
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void pidns_put(void *ns)
+{
+	put_pid_ns(ns);
+}
+
+static int pidns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct pid_namespace *active = task_active_pid_ns(current);
+	struct pid_namespace *ancestor, *new = ns;
+
+	if (!ns_capable(new->user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/*
+	 * Only allow entering the current active pid namespace
+	 * or a child of the current active pid namespace.
+	 *
+	 * This is required for fork to return a usable pid value and
+	 * this maintains the property that processes and their
+	 * children can not escape their current pid namespace.
+	 */
+	if (new->level < active->level)
+		return -EINVAL;
+
+	ancestor = new;
+	while (ancestor->level > active->level)
+		ancestor = ancestor->parent;
+	if (ancestor != active)
+		return -EINVAL;
+
+	put_pid_ns(nsproxy->pid_ns);
+	nsproxy->pid_ns = get_pid_ns(new);
+	return 0;
+}
+
+const struct proc_ns_operations pidns_operations = {
+	.name		= "pid",
+	.type		= CLONE_NEWPID,
+	.get		= pidns_get,
+	.put		= pidns_put,
+	.install	= pidns_install,
+};
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
-- 
cgit v1.2.3


From 8823c079ba7136dc1948d6f6dcb5f8022bde438e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 7 Mar 2010 18:49:36 -0800
Subject: vfs: Add setns support for the mount namespace

setns support for the mount namespace is a little tricky as an
arbitrary decision must be made about what to set fs->root and
fs->pwd to, as there is no expectation of a relationship between
the two mount namespaces.  Therefore I arbitrarily find the root
mount point, and follow every mount on top of it to find the top
of the mount stack.  Then I set fs->root and fs->pwd to that
location.  The topmost root of the mount stack seems like a
reasonable place to be.

Bind mount support for the mount namespace inodes has the
possibility of creating circular dependencies between mount
namespaces.  Circular dependencies can result in loops that
prevent mount namespaces from every being freed.  I avoid
creating those circular dependencies by adding a sequence number
to the mount namespace and require all bind mounts be of a
younger mount namespace into an older mount namespace.

Add a helper function proc_ns_inode so it is possible to
detect when we are attempting to bind mound a namespace inode.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/mount.h              |  1 +
 fs/namespace.c          | 95 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/namespaces.c    |  5 +++
 include/linux/proc_fs.h |  7 ++++
 4 files changed, 108 insertions(+)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 4f291f9de641..e9c37dd3d00d 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -6,6 +6,7 @@ struct mnt_namespace {
 	atomic_t		count;
 	struct mount *	root;
 	struct list_head	list;
+	u64			seq;	/* Sequence number to prevent loops */
 	wait_queue_head_t poll;
 	int event;
 };
diff --git a/fs/namespace.c b/fs/namespace.c
index 24960626bb6b..d287e7e74644 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -20,6 +20,7 @@
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
 #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
 #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
 #include "pnode.h"
 #include "internal.h"
 
@@ -1308,6 +1309,26 @@ static int mount_is_safe(struct path *path)
 #endif
 }
 
+static bool mnt_ns_loop(struct path *path)
+{
+	/* Could bind mounting the mount namespace inode cause a
+	 * mount namespace loop?
+	 */
+	struct inode *inode = path->dentry->d_inode;
+	struct proc_inode *ei;
+	struct mnt_namespace *mnt_ns;
+
+	if (!proc_ns_inode(inode))
+		return false;
+
+	ei = PROC_I(inode);
+	if (ei->ns_ops != &mntns_operations)
+		return false;
+
+	mnt_ns = ei->ns;
+	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+}
+
 struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 					int flag)
 {
@@ -1655,6 +1676,10 @@ static int do_loopback(struct path *path, const char *old_name,
 	if (err)
 		return err;
 
+	err = -EINVAL;
+	if (mnt_ns_loop(&old_path))
+		goto out; 
+
 	err = lock_mount(path);
 	if (err)
 		goto out;
@@ -2261,6 +2286,15 @@ dput_out:
 	return retval;
 }
 
+/*
+ * Assign a sequence number so we can detect when we attempt to bind
+ * mount a reference to an older mount namespace into the current
+ * mount namespace, preventing reference counting loops.  A 64bit
+ * number incrementing at 10Ghz will take 12,427 years to wrap which
+ * is effectively never, so we can ignore the possibility.
+ */
+static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
+
 static struct mnt_namespace *alloc_mnt_ns(void)
 {
 	struct mnt_namespace *new_ns;
@@ -2268,6 +2302,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
+	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
 	atomic_set(&new_ns->count, 1);
 	new_ns->root = NULL;
 	INIT_LIST_HEAD(&new_ns->list);
@@ -2681,3 +2716,63 @@ bool our_mnt(struct vfsmount *mnt)
 {
 	return check_mnt(real_mount(mnt));
 }
+
+static void *mntns_get(struct task_struct *task)
+{
+	struct mnt_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	rcu_read_lock();
+	nsproxy = task_nsproxy(task);
+	if (nsproxy) {
+		ns = nsproxy->mnt_ns;
+		get_mnt_ns(ns);
+	}
+	rcu_read_unlock();
+
+	return ns;
+}
+
+static void mntns_put(void *ns)
+{
+	put_mnt_ns(ns);
+}
+
+static int mntns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct fs_struct *fs = current->fs;
+	struct mnt_namespace *mnt_ns = ns;
+	struct path root;
+
+	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT))
+		return -EINVAL;
+
+	if (fs->users != 1)
+		return -EINVAL;
+
+	get_mnt_ns(mnt_ns);
+	put_mnt_ns(nsproxy->mnt_ns);
+	nsproxy->mnt_ns = mnt_ns;
+
+	/* Find the root */
+	root.mnt    = &mnt_ns->root->mnt;
+	root.dentry = mnt_ns->root->mnt.mnt_root;
+	path_get(&root);
+	while(d_mountpoint(root.dentry) && follow_down_one(&root))
+		;
+
+	/* Update the pwd and root */
+	set_fs_pwd(fs, &root);
+	set_fs_root(fs, &root);
+
+	path_put(&root);
+	return 0;
+}
+
+const struct proc_ns_operations mntns_operations = {
+	.name		= "mnt",
+	.type		= CLONE_NEWNS,
+	.get		= mntns_get,
+	.put		= mntns_put,
+	.install	= mntns_install,
+};
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 85ca047e35f1..2a17fd9ae6a9 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -27,6 +27,7 @@ static const struct proc_ns_operations *ns_entries[] = {
 #ifdef CONFIG_PID_NS
 	&pidns_operations,
 #endif
+	&mntns_operations,
 };
 
 static const struct file_operations ns_file_operations = {
@@ -201,3 +202,7 @@ out_invalid:
 	return ERR_PTR(-EINVAL);
 }
 
+bool proc_ns_inode(struct inode *inode)
+{
+	return inode->i_fop == &ns_file_operations;
+}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index acaafcd40aa5..9014c041e752 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -174,6 +174,7 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 	struct proc_dir_entry *parent);
 
 extern struct file *proc_ns_fget(int fd);
+extern bool proc_ns_inode(struct inode *inode);
 
 #else
 
@@ -229,6 +230,11 @@ static inline struct file *proc_ns_fget(int fd)
 	return ERR_PTR(-EINVAL);
 }
 
+static inline bool proc_ns_inode(struct inode *inode)
+{
+	return false;
+}
+
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
@@ -252,6 +258,7 @@ extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations mntns_operations;
 
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
-- 
cgit v1.2.3


From 771b1371686e0a63e938ada28de020b9a0040f55 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Jul 2012 21:08:32 -0700
Subject: vfs: Add a user namespace reference from struct mnt_namespace

This will allow for support for unprivileged mounts in a new user namespace.

Acked-by: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/mount.h                    |  1 +
 fs/namespace.c                | 24 ++++++++++++++++--------
 include/linux/mnt_namespace.h |  3 ++-
 kernel/nsproxy.c              |  2 +-
 4 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index e9c37dd3d00d..630fafc616bb 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -6,6 +6,7 @@ struct mnt_namespace {
 	atomic_t		count;
 	struct mount *	root;
 	struct list_head	list;
+	struct user_namespace	*user_ns;
 	u64			seq;	/* Sequence number to prevent loops */
 	wait_queue_head_t poll;
 	int event;
diff --git a/fs/namespace.c b/fs/namespace.c
index d287e7e74644..207c7ba84ad3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/capability.h>
 #include <linux/mnt_namespace.h>
+#include <linux/user_namespace.h>
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/idr.h>
@@ -2286,6 +2287,12 @@ dput_out:
 	return retval;
 }
 
+static void free_mnt_ns(struct mnt_namespace *ns)
+{
+	put_user_ns(ns->user_ns);
+	kfree(ns);
+}
+
 /*
  * Assign a sequence number so we can detect when we attempt to bind
  * mount a reference to an older mount namespace into the current
@@ -2295,7 +2302,7 @@ dput_out:
  */
 static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
 
-static struct mnt_namespace *alloc_mnt_ns(void)
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2308,6 +2315,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
 	INIT_LIST_HEAD(&new_ns->list);
 	init_waitqueue_head(&new_ns->poll);
 	new_ns->event = 0;
+	new_ns->user_ns = get_user_ns(user_ns);
 	return new_ns;
 }
 
@@ -2316,7 +2324,7 @@ static struct mnt_namespace *alloc_mnt_ns(void)
  * copied from the namespace of the passed in task structure.
  */
 static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
-		struct fs_struct *fs)
+		struct user_namespace *user_ns, struct fs_struct *fs)
 {
 	struct mnt_namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
@@ -2324,7 +2332,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	struct mount *old = mnt_ns->root;
 	struct mount *new;
 
-	new_ns = alloc_mnt_ns();
+	new_ns = alloc_mnt_ns(user_ns);
 	if (IS_ERR(new_ns))
 		return new_ns;
 
@@ -2333,7 +2341,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
 	if (IS_ERR(new)) {
 		up_write(&namespace_sem);
-		kfree(new_ns);
+		free_mnt_ns(new_ns);
 		return ERR_CAST(new);
 	}
 	new_ns->root = new;
@@ -2374,7 +2382,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 }
 
 struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
-		struct fs_struct *new_fs)
+		struct user_namespace *user_ns, struct fs_struct *new_fs)
 {
 	struct mnt_namespace *new_ns;
 
@@ -2384,7 +2392,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	if (!(flags & CLONE_NEWNS))
 		return ns;
 
-	new_ns = dup_mnt_ns(ns, new_fs);
+	new_ns = dup_mnt_ns(ns, user_ns, new_fs);
 
 	put_mnt_ns(ns);
 	return new_ns;
@@ -2396,7 +2404,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
  */
 static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
 {
-	struct mnt_namespace *new_ns = alloc_mnt_ns();
+	struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
 	if (!IS_ERR(new_ns)) {
 		struct mount *mnt = real_mount(m);
 		mnt->mnt_ns = new_ns;
@@ -2682,7 +2690,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	br_write_unlock(&vfsmount_lock);
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
-	kfree(ns);
+	free_mnt_ns(ns);
 }
 
 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 5a8e3903d770..12b2ab510323 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -4,9 +4,10 @@
 
 struct mnt_namespace;
 struct fs_struct;
+struct user_namespace;
 
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
-		struct fs_struct *);
+		struct user_namespace *, struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
 
 extern const struct file_operations proc_mounts_operations;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index b8d4d8709d70..7f8b051fc19f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -66,7 +66,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	if (!new_nsp)
 		return ERR_PTR(-ENOMEM);
 
-	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
+	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, task_cred_xxx(tsk, user_ns), new_fs);
 	if (IS_ERR(new_nsp->mnt_ns)) {
 		err = PTR_ERR(new_nsp->mnt_ns);
 		goto out_ns;
-- 
cgit v1.2.3


From 0c55cfc4166d9a0f38de779bd4d75a90afbe7734 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Jul 2012 21:42:03 -0700
Subject: vfs: Allow unprivileged manipulation of the mount namespace.

- Add a filesystem flag to mark filesystems that are safe to mount as
  an unprivileged user.

- Add a filesystem flag to mark filesystems that don't need MNT_NODEV
  when mounted by an unprivileged user.

- Relax the permission checks to allow unprivileged users that have
  CAP_SYS_ADMIN permissions in the user namespace referred to by the
  current mount namespace to be allowed to mount, unmount, and move
  filesystems.

Acked-by: "Serge E. Hallyn" <serge@hallyn.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c     | 69 ++++++++++++++++++++++++++++++++++--------------------
 include/linux/fs.h |  2 ++
 2 files changed, 45 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 4dfcaf05d17c..9ddc86f93221 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1269,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		goto dput_and_out;
 
 	retval = do_umount(mnt, flags);
@@ -1295,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
 
 static int mount_is_safe(struct path *path)
 {
-	if (capable(CAP_SYS_ADMIN))
+	if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return 0;
 	return -EPERM;
 #ifdef notyet
@@ -1633,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)
 	int type;
 	int err = 0;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	if (path->dentry != path->mnt->mnt_root)
@@ -1797,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)
 	struct mount *p;
 	struct mount *old;
 	int err = 0;
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -1884,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
 	return ERR_PTR(err);
 }
 
-static struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
-{
-	struct file_system_type *type = get_fs_type(fstype);
-	struct vfsmount *mnt;
-	if (!type)
-		return ERR_PTR(-ENODEV);
-	mnt = vfs_kern_mount(type, flags, name, data);
-	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
-	    !mnt->mnt_sb->s_subtype)
-		mnt = fs_set_subtype(mnt, fstype);
-	put_filesystem(type);
-	return mnt;
-}
-
 /*
  * add a mount into a namespace's mount tree
  */
@@ -1944,20 +1929,46 @@ unlock:
  * create a new mount for userspace and request it to be added into the
  * namespace's tree
  */
-static int do_new_mount(struct path *path, const char *type, int flags,
+static int do_new_mount(struct path *path, const char *fstype, int flags,
 			int mnt_flags, const char *name, void *data)
 {
+	struct file_system_type *type;
+	struct user_namespace *user_ns;
 	struct vfsmount *mnt;
 	int err;
 
-	if (!type)
+	if (!fstype)
 		return -EINVAL;
 
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
+	user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	mnt = do_kern_mount(type, flags, name, data);
+	type = get_fs_type(fstype);
+	if (!type)
+		return -ENODEV;
+
+	if (user_ns != &init_user_ns) {
+		if (!(type->fs_flags & FS_USERNS_MOUNT)) {
+			put_filesystem(type);
+			return -EPERM;
+		}
+		/* Only in special cases allow devices from mounts
+		 * created outside the initial user namespace.
+		 */
+		if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+			flags |= MS_NODEV;
+			mnt_flags |= MNT_NODEV;
+		}
+	}
+
+	mnt = vfs_kern_mount(type, flags, name, data);
+	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
+	    !mnt->mnt_sb->s_subtype)
+		mnt = fs_set_subtype(mnt, fstype);
+
+	put_filesystem(type);
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
@@ -2549,7 +2560,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	struct mount *new_mnt, *root_mnt;
 	int error;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	error = user_path_dir(new_root, &new);
@@ -2631,8 +2642,13 @@ static void __init init_mount_tree(void)
 	struct vfsmount *mnt;
 	struct mnt_namespace *ns;
 	struct path root;
+	struct file_system_type *type;
 
-	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
+	type = get_fs_type("rootfs");
+	if (!type)
+		panic("Can't find rootfs type");
+	mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
+	put_filesystem(type);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
 
@@ -2757,7 +2773,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
 	struct mnt_namespace *mnt_ns = ns;
 	struct path root;
 
-	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_CHROOT))
+	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
+	    !nsown_capable(CAP_SYS_CHROOT))
 		return -EINVAL;
 
 	if (fs->users != 1)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b33cfc97b9ca..5037aa6817fd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1812,6 +1812,8 @@ struct file_system_type {
 #define FS_REQUIRES_DEV		1 
 #define FS_BINARY_MOUNTDATA	2
 #define FS_HAS_SUBTYPE		4
+#define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
+#define FS_USERNS_DEV_MOUNT	16 /* A userns mount does not imply MNT_NODEV */
 #define FS_REVAL_DOT		16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
-- 
cgit v1.2.3


From 4b20db3de8dab005b07c74161cb041db8c5ff3a7 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 6 Nov 2012 11:31:49 +0000
Subject: kref: Implement kref_get_unless_zero v3

This function is intended to simplify locking around refcounting for
objects that can be looked up from a lookup structure, and which are
removed from that lookup structure in the object destructor.
Operations on such objects require at least a read lock around
lookup + kref_get, and a write lock around kref_put + remove from lookup
structure. Furthermore, RCU implementations become extremely tricky.
With a lookup followed by a kref_get_unless_zero *with return value check*
locking in the kref_put path can be deferred to the actual removal from
the lookup structure and RCU lookups become trivial.

v2: Formatting fixes.
v3: Invert the return value.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/kref.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 65af6887872f..4972e6e9ca93 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -111,4 +111,25 @@ static inline int kref_put_mutex(struct kref *kref,
 	}
 	return 0;
 }
+
+/**
+ * kref_get_unless_zero - Increment refcount for object unless it is zero.
+ * @kref: object.
+ *
+ * Return non-zero if the increment succeeded. Otherwise return 0.
+ *
+ * This function is intended to simplify locking around refcounting for
+ * objects that can be looked up from a lookup structure, and which are
+ * removed from that lookup structure in the object destructor.
+ * Operations on such objects require at least a read lock around
+ * lookup + kref_get, and a write lock around kref_put + remove from lookup
+ * structure. Furthermore, RCU implementations become extremely tricky.
+ * With a lookup followed by a kref_get_unless_zero *with return value check*
+ * locking in the kref_put path can be deferred to the actual removal from
+ * the lookup structure and RCU lookups become trivial.
+ */
+static inline int __must_check kref_get_unless_zero(struct kref *kref)
+{
+	return atomic_add_unless(&kref->refcount, 1, 0);
+}
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3


From d851718f65d646c5033a28fa60631440c6dc0d4f Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 8 Nov 2012 18:39:41 +0900
Subject: extcon: Add missing header file to extcon.h

This patch add missing header file(sysfs.h/device.h) to extcon.h
because 'struct extcon_dev' define attribute field(attr_g_muex/
attrs_muex/d_attrs_mues) and device_type field(extcon_dev_type).

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/extcon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 2c26c14cd710..54c00ce9ce7a 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -23,7 +23,9 @@
 #ifndef __LINUX_EXTCON_H__
 #define __LINUX_EXTCON_H__
 
+#include <linux/device.h>
 #include <linux/notifier.h>
+#include <linux/sysfs.h>
 
 #define SUPPORTED_CABLE_MAX	32
 #define CABLE_NAME_MAX		30
-- 
cgit v1.2.3


From 43c1af0f4861b721def8c67ed6af2a69a4efcca3 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 13 Nov 2012 19:33:57 +0530
Subject: mfd: tps65910: Use regmap irq framework for interrupt support

Implement irq support of tps65910 with regmap irq framework
in place of implementing locally.
This reduces the code size significantly and easy to maintain.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65910-irq.c   | 375 +++++++++++++++++++++----------------------
 include/linux/mfd/tps65910.h | 139 +++++++++++-----
 2 files changed, 278 insertions(+), 236 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
index 09aab3e4776d..554543a584a1 100644
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c
@@ -24,171 +24,184 @@
 #include <linux/gpio.h>
 #include <linux/mfd/tps65910.h>
 
-/*
- * This is a threaded IRQ handler so can access I2C/SPI.  Since all
- * interrupts are clear on read the IRQ line will be reasserted and
- * the physical IRQ will be handled again if another interrupt is
- * asserted while we run - in the normal course of events this is a
- * rare occurrence so we save I2C/SPI reads.  We're also assuming that
- * it's rare to get lots of interrupts firing simultaneously so try to
- * minimise I/O.
- */
-static irqreturn_t tps65910_irq(int irq, void *irq_data)
-{
-	struct tps65910 *tps65910 = irq_data;
-	unsigned int reg;
-	u32 irq_sts;
-	u32 irq_mask;
-	int i;
-
-	tps65910_reg_read(tps65910, TPS65910_INT_STS, &reg);
-	irq_sts = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_STS2, &reg);
-	irq_sts |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_STS3, &reg);
-		irq_sts |= reg << 16;
-	}
-
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
-	irq_mask = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
-	irq_mask |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
-		irq_mask |= reg << 16;
-	}
-
-	irq_sts &= ~irq_mask;
-
-	if (!irq_sts)
-		return IRQ_NONE;
-
-	for (i = 0; i < tps65910->irq_num; i++) {
-
-		if (!(irq_sts & (1 << i)))
-			continue;
-
-		handle_nested_irq(irq_find_mapping(tps65910->domain, i));
-	}
-
-	/* Write the STS register back to clear IRQs we handled */
-	reg = irq_sts & 0xFF;
-	irq_sts >>= 8;
-	tps65910_reg_write(tps65910, TPS65910_INT_STS, reg);
-	reg = irq_sts & 0xFF;
-	tps65910_reg_write(tps65910, TPS65910_INT_STS2, reg);
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		reg = irq_sts >> 8;
-		tps65910_reg_write(tps65910, TPS65910_INT_STS3, reg);
-	}
-
-	return IRQ_HANDLED;
-}
-
-static void tps65910_irq_lock(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&tps65910->irq_lock);
-}
-
-static void tps65910_irq_sync_unlock(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-	u32 reg_mask;
-	unsigned int reg;
-
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK, &reg);
-	reg_mask = reg;
-	tps65910_reg_read(tps65910, TPS65910_INT_MSK2, &reg);
-	reg_mask |= reg << 8;
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65911:
-		tps65910_reg_read(tps65910, TPS65910_INT_MSK3, &reg);
-		reg_mask |= reg << 16;
-	}
 
-	if (tps65910->irq_mask != reg_mask) {
-		reg = tps65910->irq_mask & 0xFF;
-		tps65910_reg_write(tps65910, TPS65910_INT_MSK, reg);
-		reg = tps65910->irq_mask >> 8 & 0xFF;
-		tps65910_reg_write(tps65910, TPS65910_INT_MSK2, reg);
-		switch (tps65910_chip_id(tps65910)) {
-		case TPS65911:
-			reg = tps65910->irq_mask >> 16;
-			tps65910_reg_write(tps65910, TPS65910_INT_MSK3, reg);
-		}
-	}
-	mutex_unlock(&tps65910->irq_lock);
-}
-
-static void tps65910_irq_enable(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	tps65910->irq_mask &= ~(1 << data->hwirq);
-}
-
-static void tps65910_irq_disable(struct irq_data *data)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-
-	tps65910->irq_mask |= (1 << data->hwirq);
-}
+static const struct regmap_irq tps65911_irqs[] = {
+	/* INT_STS */
+	[TPS65911_IRQ_PWRHOLD_F] = {
+		.mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_VBAT_VMHI] = {
+		.mask = INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON] = {
+		.mask = INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON_LP] = {
+		.mask = INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRHOLD_R] = {
+		.mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_HOTDIE] = {
+		.mask = INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_ALARM] = {
+		.mask = INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_PERIOD] = {
+		.mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65911_IRQ_GPIO0_R] = {
+		.mask = INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO0_F] = {
+		.mask = INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_R] = {
+		.mask = INT_MSK2_GPIO1_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_F] = {
+		.mask = INT_MSK2_GPIO1_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_R] = {
+		.mask = INT_MSK2_GPIO2_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_F] = {
+		.mask = INT_MSK2_GPIO2_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_R] = {
+		.mask = INT_MSK2_GPIO3_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_F] = {
+		.mask = INT_MSK2_GPIO3_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+
+	/* INT_STS3 */
+	[TPS65911_IRQ_GPIO4_R] = {
+		.mask = INT_MSK3_GPIO4_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO4_F] = {
+		.mask = INT_MSK3_GPIO4_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_R] = {
+		.mask = INT_MSK3_GPIO5_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_F] = {
+		.mask = INT_MSK3_GPIO5_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_WTCHDG] = {
+		.mask = INT_MSK3_WTCHDG_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_H] = {
+		.mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_L] = {
+		.mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_PWRDN] = {
+		.mask = INT_MSK3_PWRDN_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+};
 
-#ifdef CONFIG_PM_SLEEP
-static int tps65910_irq_set_wake(struct irq_data *data, unsigned int enable)
-{
-	struct tps65910 *tps65910 = irq_data_get_irq_chip_data(data);
-	return irq_set_irq_wake(tps65910->chip_irq, enable);
-}
-#else
-#define tps65910_irq_set_wake NULL
-#endif
+static const struct regmap_irq tps65910_irqs[] = {
+	/* INT_STS */
+	[TPS65910_IRQ_VBAT_VMBDCH] = {
+		.mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_VBAT_VMHI] = {
+		.mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON] = {
+		.mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON_LP] = {
+		.mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRHOLD] = {
+		.mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_HOTDIE] = {
+		.mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_ALARM] = {
+		.mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_PERIOD] = {
+		.mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65910_IRQ_GPIO_R] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65910_IRQ_GPIO_F] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+};
 
-static struct irq_chip tps65910_irq_chip = {
+static struct regmap_irq_chip tps65911_irq_chip = {
 	.name = "tps65910",
-	.irq_bus_lock = tps65910_irq_lock,
-	.irq_bus_sync_unlock = tps65910_irq_sync_unlock,
-	.irq_disable = tps65910_irq_disable,
-	.irq_enable = tps65910_irq_enable,
-	.irq_set_wake = tps65910_irq_set_wake,
+	.irqs = tps65911_irqs,
+	.num_irqs = ARRAY_SIZE(tps65911_irqs),
+	.num_regs = 3,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_MSK,
 };
 
-static int tps65910_irq_map(struct irq_domain *h, unsigned int virq,
-				irq_hw_number_t hw)
-{
-	struct tps65910 *tps65910 = h->host_data;
-
-	irq_set_chip_data(virq, tps65910);
-	irq_set_chip_and_handler(virq, &tps65910_irq_chip, handle_edge_irq);
-	irq_set_nested_thread(virq, 1);
-
-	/* ARM needs us to explicitly flag the IRQ as valid
-	 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-	set_irq_flags(virq, IRQF_VALID);
-#else
-	irq_set_noprobe(virq);
-#endif
-
-	return 0;
-}
-
-static struct irq_domain_ops tps65910_domain_ops = {
-	.map	= tps65910_irq_map,
-	.xlate	= irq_domain_xlate_twocell,
+static struct regmap_irq_chip tps65910_irq_chip = {
+	.name = "tps65910",
+	.irqs = tps65910_irqs,
+	.num_irqs = ARRAY_SIZE(tps65910_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_MSK,
 };
 
 int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		    struct tps65910_platform_data *pdata)
 {
-	int ret;
-	int flags = IRQF_ONESHOT;
+	int ret = 0;
+	static struct regmap_irq_chip *tps6591x_irqs_chip;
 
 	if (!irq) {
 		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
@@ -200,61 +213,31 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
 		return -EINVAL;
 	}
 
+
 	switch (tps65910_chip_id(tps65910)) {
 	case TPS65910:
-		tps65910->irq_num = TPS65910_NUM_IRQ;
+		tps6591x_irqs_chip = &tps65910_irq_chip;
 		break;
 	case TPS65911:
-		tps65910->irq_num = TPS65911_NUM_IRQ;
+		tps6591x_irqs_chip = &tps65911_irq_chip;
 		break;
 	}
 
-	if (pdata->irq_base > 0) {
-		pdata->irq_base = irq_alloc_descs(pdata->irq_base, 0,
-					tps65910->irq_num, -1);
-		if (pdata->irq_base < 0) {
-			dev_warn(tps65910->dev, "Failed to alloc IRQs: %d\n",
-					pdata->irq_base);
-			return pdata->irq_base;
-		}
-	}
-
-	tps65910->irq_mask = 0xFFFFFF;
-
-	mutex_init(&tps65910->irq_lock);
 	tps65910->chip_irq = irq;
-	tps65910->irq_base = pdata->irq_base;
-
-	if (pdata->irq_base > 0)
-		tps65910->domain = irq_domain_add_legacy(tps65910->dev->of_node,
-					tps65910->irq_num,
-					pdata->irq_base,
-					0,
-					&tps65910_domain_ops, tps65910);
-	else
-		tps65910->domain = irq_domain_add_linear(tps65910->dev->of_node,
-					tps65910->irq_num,
-					&tps65910_domain_ops, tps65910);
-
-	if (!tps65910->domain) {
-		dev_err(tps65910->dev, "Failed to create IRQ domain\n");
-		return -ENOMEM;
+	ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq,
+		IRQF_ONESHOT, pdata->irq_base,
+		tps6591x_irqs_chip, &tps65910->irq_data);
+	if (ret < 0) {
+		dev_warn(tps65910->dev,
+				"Failed to add irq_chip %d\n", ret);
+		return ret;
 	}
-
-	ret = request_threaded_irq(irq, NULL, tps65910_irq, flags,
-				   "tps65910", tps65910);
-
-	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
-
-	if (ret != 0)
-		dev_err(tps65910->dev, "Failed to request IRQ: %d\n", ret);
-
 	return ret;
 }
 
 int tps65910_irq_exit(struct tps65910 *tps65910)
 {
-	if (tps65910->chip_irq)
-		free_irq(tps65910->chip_irq, tps65910);
+	if (tps65910->chip_irq > 0)
+		regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data);
 	return 0;
 }
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 02e894f3ff45..b564ac29590a 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -572,6 +572,49 @@
 #define SPARE_SPARE_MASK				0xFF
 #define SPARE_SPARE_SHIFT				0
 
+#define TPS65910_INT_STS_RTC_PERIOD_IT_MASK			0x80
+#define TPS65910_INT_STS_RTC_PERIOD_IT_SHIFT			7
+#define TPS65910_INT_STS_RTC_ALARM_IT_MASK			0x40
+#define TPS65910_INT_STS_RTC_ALARM_IT_SHIFT			6
+#define TPS65910_INT_STS_HOTDIE_IT_MASK				0x20
+#define TPS65910_INT_STS_HOTDIE_IT_SHIFT			5
+#define TPS65910_INT_STS_PWRHOLD_F_IT_MASK			0x10
+#define TPS65910_INT_STS_PWRHOLD_F_IT_SHIFT			4
+#define TPS65910_INT_STS_PWRON_LP_IT_MASK			0x08
+#define TPS65910_INT_STS_PWRON_LP_IT_SHIFT			3
+#define TPS65910_INT_STS_PWRON_IT_MASK				0x04
+#define TPS65910_INT_STS_PWRON_IT_SHIFT				2
+#define TPS65910_INT_STS_VMBHI_IT_MASK				0x02
+#define TPS65910_INT_STS_VMBHI_IT_SHIFT				1
+#define TPS65910_INT_STS_VMBDCH_IT_MASK				0x01
+#define TPS65910_INT_STS_VMBDCH_IT_SHIFT			0
+
+#define TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK			0x80
+#define TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_SHIFT		7
+#define TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK			0x40
+#define TPS65910_INT_MSK_RTC_ALARM_IT_MSK_SHIFT			6
+#define TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK			0x20
+#define TPS65910_INT_MSK_HOTDIE_IT_MSK_SHIFT			5
+#define TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK			0x10
+#define TPS65910_INT_MSK_PWRHOLD_IT_MSK_SHIFT			4
+#define TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK			0x08
+#define TPS65910_INT_MSK_PWRON_LP_IT_MSK_SHIFT			3
+#define TPS65910_INT_MSK_PWRON_IT_MSK_MASK			0x04
+#define TPS65910_INT_MSK_PWRON_IT_MSK_SHIFT			2
+#define TPS65910_INT_MSK_VMBHI_IT_MSK_MASK			0x02
+#define TPS65910_INT_MSK_VMBHI_IT_MSK_SHIFT			1
+#define TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK			0x01
+#define TPS65910_INT_MSK_VMBDCH_IT_MSK_SHIFT			0
+
+#define TPS65910_INT_STS2_GPIO0_F_IT_SHIFT			2
+#define TPS65910_INT_STS2_GPIO0_F_IT_MASK			0x02
+#define TPS65910_INT_STS2_GPIO0_R_IT_SHIFT			1
+#define TPS65910_INT_STS2_GPIO0_R_IT_MASK			0x01
+
+#define TPS65910_INT_MSK2_GPIO0_F_IT_MSK_SHIFT			2
+#define TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK			0x02
+#define TPS65910_INT_MSK2_GPIO0_R_IT_MSK_SHIFT			1
+#define TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK			0x01
 
 /*Register INT_STS  (0x80) register.RegisterDescription */
 #define INT_STS_RTC_PERIOD_IT_MASK			0x80
@@ -580,16 +623,16 @@
 #define INT_STS_RTC_ALARM_IT_SHIFT			6
 #define INT_STS_HOTDIE_IT_MASK				0x20
 #define INT_STS_HOTDIE_IT_SHIFT				5
-#define INT_STS_PWRHOLD_IT_MASK				0x10
-#define INT_STS_PWRHOLD_IT_SHIFT			4
+#define INT_STS_PWRHOLD_R_IT_MASK			0x10
+#define INT_STS_PWRHOLD_R_IT_SHIFT			4
 #define INT_STS_PWRON_LP_IT_MASK			0x08
 #define INT_STS_PWRON_LP_IT_SHIFT			3
 #define INT_STS_PWRON_IT_MASK				0x04
 #define INT_STS_PWRON_IT_SHIFT				2
 #define INT_STS_VMBHI_IT_MASK				0x02
 #define INT_STS_VMBHI_IT_SHIFT				1
-#define INT_STS_VMBDCH_IT_MASK				0x01
-#define INT_STS_VMBDCH_IT_SHIFT				0
+#define INT_STS_PWRHOLD_F_IT_MASK			0x01
+#define INT_STS_PWRHOLD_F_IT_SHIFT			0
 
 
 /*Register INT_MSK  (0x80) register.RegisterDescription */
@@ -599,16 +642,16 @@
 #define INT_MSK_RTC_ALARM_IT_MSK_SHIFT			6
 #define INT_MSK_HOTDIE_IT_MSK_MASK			0x20
 #define INT_MSK_HOTDIE_IT_MSK_SHIFT			5
-#define INT_MSK_PWRHOLD_IT_MSK_MASK			0x10
-#define INT_MSK_PWRHOLD_IT_MSK_SHIFT			4
+#define INT_MSK_PWRHOLD_R_IT_MSK_MASK			0x10
+#define INT_MSK_PWRHOLD_R_IT_MSK_SHIFT			4
 #define INT_MSK_PWRON_LP_IT_MSK_MASK			0x08
 #define INT_MSK_PWRON_LP_IT_MSK_SHIFT			3
 #define INT_MSK_PWRON_IT_MSK_MASK			0x04
 #define INT_MSK_PWRON_IT_MSK_SHIFT			2
 #define INT_MSK_VMBHI_IT_MSK_MASK			0x02
 #define INT_MSK_VMBHI_IT_MSK_SHIFT			1
-#define INT_MSK_VMBDCH_IT_MSK_MASK			0x01
-#define INT_MSK_VMBDCH_IT_MSK_SHIFT			0
+#define INT_MSK_PWRHOLD_F_IT_MSK_MASK			0x01
+#define INT_MSK_PWRHOLD_F_IT_MSK_SHIFT			0
 
 
 /*Register INT_STS2  (0x80) register.RegisterDescription */
@@ -650,6 +693,14 @@
 
 
 /*Register INT_STS3  (0x80) register.RegisterDescription */
+#define INT_STS3_PWRDN_IT_MASK				0x80
+#define INT_STS3_PWRDN_IT_SHIFT				7
+#define INT_STS3_VMBCH2_L_IT_MASK			0x40
+#define INT_STS3_VMBCH2_L_IT_SHIFT			6
+#define INT_STS3_VMBCH2_H_IT_MASK			0x20
+#define INT_STS3_VMBCH2_H_IT_SHIFT			5
+#define INT_STS3_WTCHDG_IT_MASK				0x10
+#define INT_STS3_WTCHDG_IT_SHIFT			4
 #define INT_STS3_GPIO5_F_IT_MASK			0x08
 #define INT_STS3_GPIO5_F_IT_SHIFT			3
 #define INT_STS3_GPIO5_R_IT_MASK			0x04
@@ -661,6 +712,14 @@
 
 
 /*Register INT_MSK3  (0x80) register.RegisterDescription */
+#define INT_MSK3_PWRDN_IT_MSK_MASK			0x80
+#define INT_MSK3_PWRDN_IT_MSK_SHIFT			7
+#define INT_MSK3_VMBCH2_L_IT_MSK_MASK			0x40
+#define INT_MSK3_VMBCH2_L_IT_MSK_SHIFT			6
+#define INT_MSK3_VMBCH2_H_IT_MSK_MASK			0x20
+#define INT_MSK3_VMBCH2_H_IT_MSK_SHIFT			5
+#define INT_MSK3_WTCHDG_IT_MSK_MASK			0x10
+#define INT_MSK3_WTCHDG_IT_MSK_SHIFT			4
 #define INT_MSK3_GPIO5_F_IT_MSK_MASK			0x08
 #define INT_MSK3_GPIO5_F_IT_MSK_SHIFT			3
 #define INT_MSK3_GPIO5_R_IT_MSK_MASK			0x04
@@ -721,34 +780,32 @@
 #define TPS65910_IRQ_GPIO_F				9
 #define TPS65910_NUM_IRQ				10
 
-#define TPS65911_IRQ_VBAT_VMBDCH			0
-#define TPS65911_IRQ_VBAT_VMBDCH2L			1
-#define TPS65911_IRQ_VBAT_VMBDCH2H			2
-#define TPS65911_IRQ_VBAT_VMHI				3
-#define TPS65911_IRQ_PWRON				4
-#define TPS65911_IRQ_PWRON_LP				5
-#define TPS65911_IRQ_PWRHOLD_F				6
-#define TPS65911_IRQ_PWRHOLD_R				7
-#define TPS65911_IRQ_HOTDIE				8
-#define TPS65911_IRQ_RTC_ALARM				9
-#define TPS65911_IRQ_RTC_PERIOD				10
-#define TPS65911_IRQ_GPIO0_R				11
-#define TPS65911_IRQ_GPIO0_F				12
-#define TPS65911_IRQ_GPIO1_R				13
-#define TPS65911_IRQ_GPIO1_F				14
-#define TPS65911_IRQ_GPIO2_R				15
-#define TPS65911_IRQ_GPIO2_F				16
-#define TPS65911_IRQ_GPIO3_R				17
-#define TPS65911_IRQ_GPIO3_F				18
-#define TPS65911_IRQ_GPIO4_R				19
-#define TPS65911_IRQ_GPIO4_F				20
-#define TPS65911_IRQ_GPIO5_R				21
-#define TPS65911_IRQ_GPIO5_F				22
-#define TPS65911_IRQ_WTCHDG				23
-#define TPS65911_IRQ_PWRDN				24
-
-#define TPS65911_NUM_IRQ				25
-
+#define TPS65911_IRQ_PWRHOLD_F				0
+#define TPS65911_IRQ_VBAT_VMHI				1
+#define TPS65911_IRQ_PWRON				2
+#define TPS65911_IRQ_PWRON_LP				3
+#define TPS65911_IRQ_PWRHOLD_R				4
+#define TPS65911_IRQ_HOTDIE				5
+#define TPS65911_IRQ_RTC_ALARM				6
+#define TPS65911_IRQ_RTC_PERIOD				7
+#define TPS65911_IRQ_GPIO0_R				8
+#define TPS65911_IRQ_GPIO0_F				9
+#define TPS65911_IRQ_GPIO1_R				10
+#define TPS65911_IRQ_GPIO1_F				11
+#define TPS65911_IRQ_GPIO2_R				12
+#define TPS65911_IRQ_GPIO2_F				13
+#define TPS65911_IRQ_GPIO3_R				14
+#define TPS65911_IRQ_GPIO3_F				15
+#define TPS65911_IRQ_GPIO4_R				16
+#define TPS65911_IRQ_GPIO4_F				17
+#define TPS65911_IRQ_GPIO5_R				18
+#define TPS65911_IRQ_GPIO5_F				19
+#define TPS65911_IRQ_WTCHDG				20
+#define TPS65911_IRQ_VMBCH2_H				21
+#define TPS65911_IRQ_VMBCH2_L				22
+#define TPS65911_IRQ_PWRDN				23
+
+#define TPS65911_NUM_IRQ				24
 
 /* GPIO Register Definitions */
 #define TPS65910_GPIO_DEB				BIT(2)
@@ -848,11 +905,8 @@ struct tps65910 {
 	struct tps65910_board *of_plat_data;
 
 	/* IRQ Handling */
-	struct mutex irq_lock;
 	int chip_irq;
-	int irq_base;
-	int irq_num;
-	u32 irq_mask;
+	struct regmap_irq_chip_data *irq_data;
 	struct irq_domain *domain;
 };
 
@@ -900,4 +954,9 @@ static inline int tps65910_reg_update_bits(struct tps65910 *tps65910, u8 reg,
 	return regmap_update_bits(tps65910->regmap, reg, mask, val);
 }
 
+static inline int tps65910_irq_get_virq(struct tps65910 *tps65910, int irq)
+{
+	return regmap_irq_get_virq(tps65910->irq_data, irq);
+}
+
 #endif /*  __LINUX_MFD_TPS65910_H */
-- 
cgit v1.2.3


From 4aab3fadad32ff4df05832beff7c16fd6ad938aa Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 13 Nov 2012 19:33:58 +0530
Subject: mfd: tps65910: Move interrupt implementation code to mfd file

In place of implementing the irq support in separate file,
moving implementation to main mfd file.
The irq files only contains the table and init steps only
and does not need extra file to have this only for this
purpose.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile         |   2 +-
 drivers/mfd/tps65910-irq.c   | 243 -------------------------------------------
 drivers/mfd/tps65910.c       | 216 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps65910.h |   4 -
 4 files changed, 217 insertions(+), 248 deletions(-)
 delete mode 100644 drivers/mfd/tps65910-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8a68fc7ea870..a30c49ecdd95 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -56,7 +56,7 @@ obj-$(CONFIG_TPS6105X)		+= tps6105x.o
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_TPS6507X)		+= tps6507x.o
 obj-$(CONFIG_MFD_TPS65217)	+= tps65217.o
-obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o tps65910-irq.o
+obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
 tps65912-objs                   := tps65912-core.o tps65912-irq.o
 obj-$(CONFIG_MFD_TPS65912)	+= tps65912.o
 obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
deleted file mode 100644
index 554543a584a1..000000000000
--- a/drivers/mfd/tps65910-irq.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * tps65910-irq.c  --  TI TPS6591x
- *
- * Copyright 2010 Texas Instruments Inc.
- *
- * Author: Graeme Gregory <gg@slimlogic.co.uk>
- * Author: Jorge Eduardo Candelaria <jedu@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bug.h>
-#include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/gpio.h>
-#include <linux/mfd/tps65910.h>
-
-
-static const struct regmap_irq tps65911_irqs[] = {
-	/* INT_STS */
-	[TPS65911_IRQ_PWRHOLD_F] = {
-		.mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_VBAT_VMHI] = {
-		.mask = INT_MSK_VMBHI_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_PWRON] = {
-		.mask = INT_MSK_PWRON_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_PWRON_LP] = {
-		.mask = INT_MSK_PWRON_LP_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_PWRHOLD_R] = {
-		.mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_HOTDIE] = {
-		.mask = INT_MSK_HOTDIE_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_RTC_ALARM] = {
-		.mask = INT_MSK_RTC_ALARM_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65911_IRQ_RTC_PERIOD] = {
-		.mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-
-	/* INT_STS2 */
-	[TPS65911_IRQ_GPIO0_R] = {
-		.mask = INT_MSK2_GPIO0_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO0_F] = {
-		.mask = INT_MSK2_GPIO0_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO1_R] = {
-		.mask = INT_MSK2_GPIO1_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO1_F] = {
-		.mask = INT_MSK2_GPIO1_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO2_R] = {
-		.mask = INT_MSK2_GPIO2_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO2_F] = {
-		.mask = INT_MSK2_GPIO2_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO3_R] = {
-		.mask = INT_MSK2_GPIO3_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65911_IRQ_GPIO3_F] = {
-		.mask = INT_MSK2_GPIO3_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-
-	/* INT_STS3 */
-	[TPS65911_IRQ_GPIO4_R] = {
-		.mask = INT_MSK3_GPIO4_R_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_GPIO4_F] = {
-		.mask = INT_MSK3_GPIO4_F_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_GPIO5_R] = {
-		.mask = INT_MSK3_GPIO5_R_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_GPIO5_F] = {
-		.mask = INT_MSK3_GPIO5_F_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_WTCHDG] = {
-		.mask = INT_MSK3_WTCHDG_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_VMBCH2_H] = {
-		.mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_VMBCH2_L] = {
-		.mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-	[TPS65911_IRQ_PWRDN] = {
-		.mask = INT_MSK3_PWRDN_IT_MSK_MASK,
-		.reg_offset = 2,
-	},
-};
-
-static const struct regmap_irq tps65910_irqs[] = {
-	/* INT_STS */
-	[TPS65910_IRQ_VBAT_VMBDCH] = {
-		.mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_VBAT_VMHI] = {
-		.mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_PWRON] = {
-		.mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_PWRON_LP] = {
-		.mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_PWRHOLD] = {
-		.mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_HOTDIE] = {
-		.mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_RTC_ALARM] = {
-		.mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-	[TPS65910_IRQ_RTC_PERIOD] = {
-		.mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK,
-		.reg_offset = 0,
-	},
-
-	/* INT_STS2 */
-	[TPS65910_IRQ_GPIO_R] = {
-		.mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-	[TPS65910_IRQ_GPIO_F] = {
-		.mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK,
-		.reg_offset = 1,
-	},
-};
-
-static struct regmap_irq_chip tps65911_irq_chip = {
-	.name = "tps65910",
-	.irqs = tps65911_irqs,
-	.num_irqs = ARRAY_SIZE(tps65911_irqs),
-	.num_regs = 3,
-	.irq_reg_stride = 2,
-	.status_base = TPS65910_INT_STS,
-	.mask_base = TPS65910_INT_MSK,
-	.ack_base = TPS65910_INT_MSK,
-};
-
-static struct regmap_irq_chip tps65910_irq_chip = {
-	.name = "tps65910",
-	.irqs = tps65910_irqs,
-	.num_irqs = ARRAY_SIZE(tps65910_irqs),
-	.num_regs = 2,
-	.irq_reg_stride = 2,
-	.status_base = TPS65910_INT_STS,
-	.mask_base = TPS65910_INT_MSK,
-	.ack_base = TPS65910_INT_MSK,
-};
-
-int tps65910_irq_init(struct tps65910 *tps65910, int irq,
-		    struct tps65910_platform_data *pdata)
-{
-	int ret = 0;
-	static struct regmap_irq_chip *tps6591x_irqs_chip;
-
-	if (!irq) {
-		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
-		return -EINVAL;
-	}
-
-	if (!pdata) {
-		dev_warn(tps65910->dev, "No interrupt support, no pdata\n");
-		return -EINVAL;
-	}
-
-
-	switch (tps65910_chip_id(tps65910)) {
-	case TPS65910:
-		tps6591x_irqs_chip = &tps65910_irq_chip;
-		break;
-	case TPS65911:
-		tps6591x_irqs_chip = &tps65911_irq_chip;
-		break;
-	}
-
-	tps65910->chip_irq = irq;
-	ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq,
-		IRQF_ONESHOT, pdata->irq_base,
-		tps6591x_irqs_chip, &tps65910->irq_data);
-	if (ret < 0) {
-		dev_warn(tps65910->dev,
-				"Failed to add irq_chip %d\n", ret);
-		return ret;
-	}
-	return ret;
-}
-
-int tps65910_irq_exit(struct tps65910 *tps65910)
-{
-	if (tps65910->chip_irq > 0)
-		regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data);
-	return 0;
-}
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 27fbbe510101..d4d4eb5b5b6f 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -19,6 +19,9 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/mfd/core.h>
 #include <linux/regmap.h>
 #include <linux/mfd/tps65910.h>
@@ -50,6 +53,219 @@ static struct mfd_cell tps65910s[] = {
 };
 
 
+static const struct regmap_irq tps65911_irqs[] = {
+	/* INT_STS */
+	[TPS65911_IRQ_PWRHOLD_F] = {
+		.mask = INT_MSK_PWRHOLD_F_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_VBAT_VMHI] = {
+		.mask = INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON] = {
+		.mask = INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRON_LP] = {
+		.mask = INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_PWRHOLD_R] = {
+		.mask = INT_MSK_PWRHOLD_R_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_HOTDIE] = {
+		.mask = INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_ALARM] = {
+		.mask = INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65911_IRQ_RTC_PERIOD] = {
+		.mask = INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65911_IRQ_GPIO0_R] = {
+		.mask = INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO0_F] = {
+		.mask = INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_R] = {
+		.mask = INT_MSK2_GPIO1_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO1_F] = {
+		.mask = INT_MSK2_GPIO1_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_R] = {
+		.mask = INT_MSK2_GPIO2_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO2_F] = {
+		.mask = INT_MSK2_GPIO2_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_R] = {
+		.mask = INT_MSK2_GPIO3_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65911_IRQ_GPIO3_F] = {
+		.mask = INT_MSK2_GPIO3_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+
+	/* INT_STS2 */
+	[TPS65911_IRQ_GPIO4_R] = {
+		.mask = INT_MSK3_GPIO4_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO4_F] = {
+		.mask = INT_MSK3_GPIO4_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_R] = {
+		.mask = INT_MSK3_GPIO5_R_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_GPIO5_F] = {
+		.mask = INT_MSK3_GPIO5_F_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_WTCHDG] = {
+		.mask = INT_MSK3_WTCHDG_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_H] = {
+		.mask = INT_MSK3_VMBCH2_H_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_VMBCH2_L] = {
+		.mask = INT_MSK3_VMBCH2_L_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+	[TPS65911_IRQ_PWRDN] = {
+		.mask = INT_MSK3_PWRDN_IT_MSK_MASK,
+		.reg_offset = 2,
+	},
+};
+
+static const struct regmap_irq tps65910_irqs[] = {
+	/* INT_STS */
+	[TPS65910_IRQ_VBAT_VMBDCH] = {
+		.mask = TPS65910_INT_MSK_VMBDCH_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_VBAT_VMHI] = {
+		.mask = TPS65910_INT_MSK_VMBHI_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON] = {
+		.mask = TPS65910_INT_MSK_PWRON_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRON_LP] = {
+		.mask = TPS65910_INT_MSK_PWRON_LP_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_PWRHOLD] = {
+		.mask = TPS65910_INT_MSK_PWRHOLD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_HOTDIE] = {
+		.mask = TPS65910_INT_MSK_HOTDIE_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_ALARM] = {
+		.mask = TPS65910_INT_MSK_RTC_ALARM_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+	[TPS65910_IRQ_RTC_PERIOD] = {
+		.mask = TPS65910_INT_MSK_RTC_PERIOD_IT_MSK_MASK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[TPS65910_IRQ_GPIO_R] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_F_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+	[TPS65910_IRQ_GPIO_F] = {
+		.mask = TPS65910_INT_MSK2_GPIO0_R_IT_MSK_MASK,
+		.reg_offset = 1,
+	},
+};
+
+static struct regmap_irq_chip tps65911_irq_chip = {
+	.name = "tps65910",
+	.irqs = tps65911_irqs,
+	.num_irqs = ARRAY_SIZE(tps65911_irqs),
+	.num_regs = 3,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_MSK,
+};
+
+static struct regmap_irq_chip tps65910_irq_chip = {
+	.name = "tps65910",
+	.irqs = tps65910_irqs,
+	.num_irqs = ARRAY_SIZE(tps65910_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = TPS65910_INT_STS,
+	.mask_base = TPS65910_INT_MSK,
+	.ack_base = TPS65910_INT_MSK,
+};
+
+static int tps65910_irq_init(struct tps65910 *tps65910, int irq,
+		    struct tps65910_platform_data *pdata)
+{
+	int ret = 0;
+	static struct regmap_irq_chip *tps6591x_irqs_chip;
+
+	if (!irq) {
+		dev_warn(tps65910->dev, "No interrupt support, no core IRQ\n");
+		return -EINVAL;
+	}
+
+	if (!pdata) {
+		dev_warn(tps65910->dev, "No interrupt support, no pdata\n");
+		return -EINVAL;
+	}
+
+	switch (tps65910_chip_id(tps65910)) {
+	case TPS65910:
+		tps6591x_irqs_chip = &tps65910_irq_chip;
+		break;
+	case TPS65911:
+		tps6591x_irqs_chip = &tps65911_irq_chip;
+		break;
+	}
+
+	tps65910->chip_irq = irq;
+	ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq,
+		IRQF_ONESHOT, pdata->irq_base,
+		tps6591x_irqs_chip, &tps65910->irq_data);
+	if (ret < 0)
+		dev_warn(tps65910->dev, "Failed to add irq_chip %d\n", ret);
+	return ret;
+}
+
+static int tps65910_irq_exit(struct tps65910 *tps65910)
+{
+	if (tps65910->chip_irq > 0)
+		regmap_del_irq_chip(tps65910->chip_irq, tps65910->irq_data);
+	return 0;
+}
+
 static bool is_volatile_reg(struct device *dev, unsigned int reg)
 {
 	struct tps65910 *tps65910 = dev_get_drvdata(dev);
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index b564ac29590a..0b16903f7e88 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -915,10 +915,6 @@ struct tps65910_platform_data {
 	int irq_base;
 };
 
-int tps65910_irq_init(struct tps65910 *tps65910, int irq,
-		struct tps65910_platform_data *pdata);
-int tps65910_irq_exit(struct tps65910 *tps65910);
-
 static inline int tps65910_chip_id(struct tps65910 *tps65910)
 {
 	return tps65910->id;
-- 
cgit v1.2.3


From 8bad1abd6303476d6f77878aa8ea737d5d1b625c Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Thu, 4 Oct 2012 00:15:05 -0300
Subject: mfd: da9052: Introduce da9052-irq.c

Create a da9052-irq.c file so that it can handle interrupt related functions.

This is useful for allowing the da9052 drivers to use such functions
when dealing with da9052 interrupts.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile              |   1 +
 drivers/mfd/da9052-core.c         | 271 ++---------------------------------
 drivers/mfd/da9052-irq.c          | 288 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/da9052/da9052.h |  10 ++
 4 files changed, 307 insertions(+), 263 deletions(-)
 create mode 100644 drivers/mfd/da9052-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index a30c49ecdd95..632cce09e407 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -90,6 +90,7 @@ obj-$(CONFIG_UCB1400_CORE)	+= ucb1400_core.o
 
 obj-$(CONFIG_PMIC_DA903X)	+= da903x.o
 
+obj-$(CONFIG_PMIC_DA9052)	+= da9052-irq.o
 obj-$(CONFIG_PMIC_DA9052)	+= da9052-core.o
 obj-$(CONFIG_MFD_DA9052_SPI)	+= da9052-spi.o
 obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index c96cdbc0daff..2153f9bba9ef 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -15,7 +15,6 @@
 #include <linux/delay.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -24,16 +23,6 @@
 #include <linux/mfd/da9052/pdata.h>
 #include <linux/mfd/da9052/reg.h>
 
-#define DA9052_NUM_IRQ_REGS		4
-#define DA9052_IRQ_MASK_POS_1		0x01
-#define DA9052_IRQ_MASK_POS_2		0x02
-#define DA9052_IRQ_MASK_POS_3		0x04
-#define DA9052_IRQ_MASK_POS_4		0x08
-#define DA9052_IRQ_MASK_POS_5		0x10
-#define DA9052_IRQ_MASK_POS_6		0x20
-#define DA9052_IRQ_MASK_POS_7		0x40
-#define DA9052_IRQ_MASK_POS_8		0x80
-
 static bool da9052_reg_readable(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -425,15 +414,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(da9052_adc_manual_read);
 
-static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data)
-{
-	struct da9052 *da9052 = irq_data;
-
-	complete(&da9052->done);
-
-	return IRQ_HANDLED;
-}
-
 int da9052_adc_read_temp(struct da9052 *da9052)
 {
 	int tbat;
@@ -447,74 +427,6 @@ int da9052_adc_read_temp(struct da9052 *da9052)
 }
 EXPORT_SYMBOL_GPL(da9052_adc_read_temp);
 
-static struct resource da9052_rtc_resource = {
-	.name = "ALM",
-	.start = DA9052_IRQ_ALARM,
-	.end   = DA9052_IRQ_ALARM,
-	.flags = IORESOURCE_IRQ,
-};
-
-static struct resource da9052_onkey_resource = {
-	.name = "ONKEY",
-	.start = DA9052_IRQ_NONKEY,
-	.end   = DA9052_IRQ_NONKEY,
-	.flags = IORESOURCE_IRQ,
-};
-
-static struct resource da9052_bat_resources[] = {
-	{
-		.name = "BATT TEMP",
-		.start = DA9052_IRQ_TBAT,
-		.end   = DA9052_IRQ_TBAT,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "DCIN DET",
-		.start = DA9052_IRQ_DCIN,
-		.end   = DA9052_IRQ_DCIN,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "DCIN REM",
-		.start = DA9052_IRQ_DCINREM,
-		.end   = DA9052_IRQ_DCINREM,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "VBUS DET",
-		.start = DA9052_IRQ_VBUS,
-		.end   = DA9052_IRQ_VBUS,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "VBUS REM",
-		.start = DA9052_IRQ_VBUSREM,
-		.end   = DA9052_IRQ_VBUSREM,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "CHG END",
-		.start = DA9052_IRQ_CHGEND,
-		.end   = DA9052_IRQ_CHGEND,
-		.flags = IORESOURCE_IRQ,
-	},
-};
-
-static struct resource da9052_tsi_resources[] = {
-	{
-		.name = "PENDWN",
-		.start = DA9052_IRQ_PENDOWN,
-		.end   = DA9052_IRQ_PENDOWN,
-		.flags = IORESOURCE_IRQ,
-	},
-	{
-		.name = "TSIRDY",
-		.start = DA9052_IRQ_TSIREADY,
-		.end   = DA9052_IRQ_TSIREADY,
-		.flags = IORESOURCE_IRQ,
-	},
-};
-
 static struct mfd_cell __devinitdata da9052_subdev_info[] = {
 	{
 		.name = "da9052-regulator",
@@ -574,13 +486,9 @@ static struct mfd_cell __devinitdata da9052_subdev_info[] = {
 	},
 	{
 		.name = "da9052-onkey",
-		.resources = &da9052_onkey_resource,
-		.num_resources = 1,
 	},
 	{
 		.name = "da9052-rtc",
-		.resources = &da9052_rtc_resource,
-		.num_resources = 1,
 	},
 	{
 		.name = "da9052-gpio",
@@ -602,160 +510,15 @@ static struct mfd_cell __devinitdata da9052_subdev_info[] = {
 	},
 	{
 		.name = "da9052-tsi",
-		.resources = da9052_tsi_resources,
-		.num_resources = ARRAY_SIZE(da9052_tsi_resources),
 	},
 	{
 		.name = "da9052-bat",
-		.resources = da9052_bat_resources,
-		.num_resources = ARRAY_SIZE(da9052_bat_resources),
 	},
 	{
 		.name = "da9052-watchdog",
 	},
 };
 
-static struct regmap_irq da9052_irqs[] = {
-	[DA9052_IRQ_DCIN] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_VBUS] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_DCINREM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_VBUSREM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_VDDLOW] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_ALARM] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_SEQRDY] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_COMP1V2] = {
-		.reg_offset = 0,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_NONKEY] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_IDFLOAT] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_IDGND] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_CHGEND] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_TBAT] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_ADC_EOM] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_PENDOWN] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_TSIREADY] = {
-		.reg_offset = 1,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_GPI0] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_GPI1] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_GPI2] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_GPI3] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_GPI4] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_GPI5] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_GPI6] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_GPI7] = {
-		.reg_offset = 2,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-	[DA9052_IRQ_GPI8] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_1,
-	},
-	[DA9052_IRQ_GPI9] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_2,
-	},
-	[DA9052_IRQ_GPI10] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_3,
-	},
-	[DA9052_IRQ_GPI11] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_4,
-	},
-	[DA9052_IRQ_GPI12] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_5,
-	},
-	[DA9052_IRQ_GPI13] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_6,
-	},
-	[DA9052_IRQ_GPI14] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_7,
-	},
-	[DA9052_IRQ_GPI15] = {
-		.reg_offset = 3,
-		.mask = DA9052_IRQ_MASK_POS_8,
-	},
-};
-
-static struct regmap_irq_chip da9052_regmap_irq_chip = {
-	.name = "da9052_irq",
-	.status_base = DA9052_EVENT_A_REG,
-	.mask_base = DA9052_IRQ_MASK_A_REG,
-	.ack_base = DA9052_EVENT_A_REG,
-	.num_regs = DA9052_NUM_IRQ_REGS,
-	.irqs = da9052_irqs,
-	.num_irqs = ARRAY_SIZE(da9052_irqs),
-};
-
 struct regmap_config da9052_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -769,15 +532,10 @@ struct regmap_config da9052_regmap_config = {
 };
 EXPORT_SYMBOL_GPL(da9052_regmap_config);
 
-static int da9052_map_irq(struct da9052 *da9052, int irq)
-{
-	return regmap_irq_get_virq(da9052->irq_data, irq);
-}
-
 int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 {
 	struct da9052_pdata *pdata = da9052->dev->platform_data;
-	int ret, i;
+	int ret;
 
 	mutex_init(&da9052->auxadc_lock);
 	init_completion(&da9052->done);
@@ -787,22 +545,12 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 
 	da9052->chip_id = chip_id;
 
-	ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq,
-				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				  -1, &da9052_regmap_irq_chip,
-				  &da9052->irq_data);
-	if (ret < 0) {
-		dev_err(da9052->dev, "regmap_add_irq_chip failed: %d\n", ret);
-		goto regmap_err;
+	ret = da9052_irq_init(da9052);
+	if (ret != 0) {
+		dev_err(da9052->dev, "da9052_irq_init failed: %d\n", ret);
+		return ret;
 	}
 
-	i = da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM);
-	ret = request_threaded_irq(i, NULL, da9052_auxadc_irq,
-				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				   "adc-irq", da9052);
-	if (ret != 0)
-		dev_err(da9052->dev, "DA9052 ADC IRQ failed ret=%d\n", ret);
-
 	ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info,
 			      ARRAY_SIZE(da9052_subdev_info), NULL, 0, NULL);
 	if (ret) {
@@ -813,18 +561,15 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 	return 0;
 
 err:
-	free_irq(da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM), da9052);
-	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
-	mfd_remove_devices(da9052->dev);
-regmap_err:
+	da9052_irq_exit(da9052);
+
 	return ret;
 }
 
 void da9052_device_exit(struct da9052 *da9052)
 {
-	free_irq(da9052_map_irq(da9052, DA9052_IRQ_ADC_EOM), da9052);
-	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
 	mfd_remove_devices(da9052->dev);
+	da9052_irq_exit(da9052);
 }
 
 MODULE_AUTHOR("David Dajun Chen <dchen@diasemi.com>");
diff --git a/drivers/mfd/da9052-irq.c b/drivers/mfd/da9052-irq.c
new file mode 100644
index 000000000000..57ae7841f536
--- /dev/null
+++ b/drivers/mfd/da9052-irq.c
@@ -0,0 +1,288 @@
+/*
+ * DA9052 interrupt support
+ *
+ * Author: Fabio Estevam <fabio.estevam@freescale.com>
+ * Based on arizona-irq.c, which is:
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#include <linux/mfd/da9052/da9052.h>
+#include <linux/mfd/da9052/reg.h>
+
+#define DA9052_NUM_IRQ_REGS		4
+#define DA9052_IRQ_MASK_POS_1		0x01
+#define DA9052_IRQ_MASK_POS_2		0x02
+#define DA9052_IRQ_MASK_POS_3		0x04
+#define DA9052_IRQ_MASK_POS_4		0x08
+#define DA9052_IRQ_MASK_POS_5		0x10
+#define DA9052_IRQ_MASK_POS_6		0x20
+#define DA9052_IRQ_MASK_POS_7		0x40
+#define DA9052_IRQ_MASK_POS_8		0x80
+
+static struct regmap_irq da9052_irqs[] = {
+	[DA9052_IRQ_DCIN] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_VBUS] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_DCINREM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_VBUSREM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_VDDLOW] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_ALARM] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_SEQRDY] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_COMP1V2] = {
+		.reg_offset = 0,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_NONKEY] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_IDFLOAT] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_IDGND] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_CHGEND] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_TBAT] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_ADC_EOM] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_PENDOWN] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_TSIREADY] = {
+		.reg_offset = 1,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_GPI0] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_GPI1] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_GPI2] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_GPI3] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_GPI4] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_GPI5] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_GPI6] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_GPI7] = {
+		.reg_offset = 2,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+	[DA9052_IRQ_GPI8] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_1,
+	},
+	[DA9052_IRQ_GPI9] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_2,
+	},
+	[DA9052_IRQ_GPI10] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_3,
+	},
+	[DA9052_IRQ_GPI11] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_4,
+	},
+	[DA9052_IRQ_GPI12] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_5,
+	},
+	[DA9052_IRQ_GPI13] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_6,
+	},
+	[DA9052_IRQ_GPI14] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_7,
+	},
+	[DA9052_IRQ_GPI15] = {
+		.reg_offset = 3,
+		.mask = DA9052_IRQ_MASK_POS_8,
+	},
+};
+
+static struct regmap_irq_chip da9052_regmap_irq_chip = {
+	.name = "da9052_irq",
+	.status_base = DA9052_EVENT_A_REG,
+	.mask_base = DA9052_IRQ_MASK_A_REG,
+	.ack_base = DA9052_EVENT_A_REG,
+	.num_regs = DA9052_NUM_IRQ_REGS,
+	.irqs = da9052_irqs,
+	.num_irqs = ARRAY_SIZE(da9052_irqs),
+};
+
+static int da9052_map_irq(struct da9052 *da9052, int irq)
+{
+	return regmap_irq_get_virq(da9052->irq_data, irq);
+}
+
+int da9052_enable_irq(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	enable_irq(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_enable_irq);
+
+int da9052_disable_irq(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	disable_irq(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_disable_irq);
+
+int da9052_disable_irq_nosync(struct da9052 *da9052, int irq)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	disable_irq_nosync(irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(da9052_disable_irq_nosync);
+
+int da9052_request_irq(struct da9052 *da9052, int irq, char *name,
+			   irq_handler_t handler, void *data)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return irq;
+
+	return request_threaded_irq(irq, NULL, handler,
+				     IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				     name, data);
+}
+EXPORT_SYMBOL_GPL(da9052_request_irq);
+
+void da9052_free_irq(struct da9052 *da9052, int irq, void *data)
+{
+	irq = da9052_map_irq(da9052, irq);
+	if (irq < 0)
+		return;
+
+	free_irq(irq, data);
+}
+EXPORT_SYMBOL_GPL(da9052_free_irq);
+
+static irqreturn_t da9052_auxadc_irq(int irq, void *irq_data)
+{
+	struct da9052 *da9052 = irq_data;
+
+	complete(&da9052->done);
+
+	return IRQ_HANDLED;
+}
+
+int da9052_irq_init(struct da9052 *da9052)
+{
+	int ret;
+
+	ret = regmap_add_irq_chip(da9052->regmap, da9052->chip_irq,
+				  IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				  -1, &da9052_regmap_irq_chip,
+				  &da9052->irq_data);
+	if (ret < 0) {
+		dev_err(da9052->dev, "regmap_add_irq_chip failed: %d\n", ret);
+		goto regmap_err;
+	}
+
+	ret = da9052_request_irq(da9052, DA9052_IRQ_ADC_EOM, "adc-irq",
+			    da9052_auxadc_irq, da9052);
+
+	if (ret != 0) {
+		dev_err(da9052->dev, "DA9052_IRQ_ADC_EOM failed: %d\n", ret);
+		goto request_irq_err;
+	}
+
+	return 0;
+
+request_irq_err:
+	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
+regmap_err:
+	return ret;
+
+}
+
+int da9052_irq_exit(struct da9052 *da9052)
+{
+	da9052_free_irq(da9052, DA9052_IRQ_ADC_EOM , da9052);
+	regmap_del_irq_chip(da9052->chip_irq, da9052->irq_data);
+
+	return 0;
+}
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index 0507c4c21a7d..86dd93de6ff2 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -146,4 +146,14 @@ void da9052_device_exit(struct da9052 *da9052);
 
 extern struct regmap_config da9052_regmap_config;
 
+int da9052_irq_init(struct da9052 *da9052);
+int da9052_irq_exit(struct da9052 *da9052);
+int da9052_request_irq(struct da9052 *da9052, int irq, char *name,
+			   irq_handler_t handler, void *data);
+void da9052_free_irq(struct da9052 *da9052, int irq, void *data);
+
+int da9052_enable_irq(struct da9052 *da9052, int irq);
+int da9052_disable_irq(struct da9052 *da9052, int irq);
+int da9052_disable_irq_nosync(struct da9052 *da9052, int irq);
+
 #endif /* __MFD_DA9052_DA9052_H */
-- 
cgit v1.2.3


From f01312d846016dbd38cc9865e580298fb61f2aa7 Mon Sep 17 00:00:00 2001
From: Lars Poeschel <poeschel@lemonage.de>
Date: Mon, 5 Nov 2012 15:48:23 +0100
Subject: mfd: Add viperboard driver

Add mfd driver for Nano River Technologies viperboard.

Signed-off-by: Lars Poeschel <poeschel@lemonage.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig            |  14 +++++
 drivers/mfd/Makefile           |   1 +
 drivers/mfd/viperboard.c       | 129 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/viperboard.h | 105 +++++++++++++++++++++++++++++++++
 4 files changed, 249 insertions(+)
 create mode 100644 drivers/mfd/viperboard.c
 create mode 100644 include/linux/mfd/viperboard.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 34242cada125..b280639a7634 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1065,6 +1065,20 @@ config MFD_PALMAS
 	  If you say yes here you get support for the Palmas
 	  series of PMIC chips from Texas Instruments.
 
+config MFD_VIPERBOARD
+        tristate "Support for Nano River Technologies Viperboard"
+	select MFD_CORE
+	depends on USB
+	default n
+	help
+	  Say yes here if you want support for Nano River Technologies
+	  Viperboard.
+	  There are mfd cell drivers available for i2c master, adc and
+	  both gpios found on the board. The spi part does not yet
+	  have a driver.
+	  You need to select the mfd cell drivers separately.
+	  The drivers do not support all features the board exposes.
+
 endmenu
 endif
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 632cce09e407..1c3ee7c76906 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -139,6 +139,7 @@ obj-$(CONFIG_MFD_TPS65090)	+= tps65090.o
 obj-$(CONFIG_MFD_AAT2870_CORE)	+= aat2870-core.o
 obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
+obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
new file mode 100644
index 000000000000..cd1e2fe95647
--- /dev/null
+++ b/drivers/mfd/viperboard.c
@@ -0,0 +1,129 @@
+/*
+ *  Nano River Technologies viperboard driver
+ *
+ *  This is the core driver for the viperboard. There are cell drivers
+ *  available for I2C, ADC and both GPIOs. SPI is not yet supported.
+ *  The drivers do not support all features the board exposes. See user
+ *  manual of the viperboard.
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+#include <linux/mfd/core.h>
+#include <linux/mfd/viperboard.h>
+
+#include <linux/usb.h>
+
+
+static const struct usb_device_id vprbrd_table[] = {
+	{ USB_DEVICE(0x2058, 0x1005) },   /* Nano River Technologies */
+	{ }                               /* Terminating entry */
+};
+
+MODULE_DEVICE_TABLE(usb, vprbrd_table);
+
+static struct mfd_cell vprbrd_devs[] = {
+};
+
+static int vprbrd_probe(struct usb_interface *interface,
+			      const struct usb_device_id *id)
+{
+	struct vprbrd *vb;
+
+	u16 version = 0;
+	int pipe, ret;
+	unsigned char buf[1];
+
+	/* allocate memory for our device state and initialize it */
+	vb = kzalloc(sizeof(*vb), GFP_KERNEL);
+	if (vb == NULL) {
+		dev_err(&interface->dev, "Out of memory\n");
+		return -ENOMEM;
+	}
+
+	mutex_init(&vb->lock);
+
+	vb->usb_dev = usb_get_dev(interface_to_usbdev(interface));
+
+	/* save our data pointer in this interface device */
+	usb_set_intfdata(interface, vb);
+	dev_set_drvdata(&vb->pdev.dev, vb);
+
+	/* get version information, major first, minor then */
+	pipe = usb_rcvctrlpipe(vb->usb_dev, 0);
+	ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MAJOR,
+		VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, buf, 1,
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret == 1)
+		version = buf[0];
+
+	ret = usb_control_msg(vb->usb_dev, pipe, VPRBRD_USB_REQUEST_MINOR,
+		VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, buf, 1,
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret == 1) {
+		version <<= 8;
+		version = version | buf[0];
+	}
+
+	dev_info(&interface->dev,
+		 "version %x.%02x found at bus %03d address %03d\n",
+		 version >> 8, version & 0xff,
+		 vb->usb_dev->bus->busnum, vb->usb_dev->devnum);
+
+	ret = mfd_add_devices(&interface->dev, -1, vprbrd_devs,
+				ARRAY_SIZE(vprbrd_devs), NULL, 0, NULL);
+	if (ret != 0) {
+		dev_err(&interface->dev, "Failed to add mfd devices to core.");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (vb) {
+		usb_put_dev(vb->usb_dev);
+		kfree(vb);
+	}
+
+	return ret;
+}
+
+static void vprbrd_disconnect(struct usb_interface *interface)
+{
+	struct vprbrd *vb = usb_get_intfdata(interface);
+
+	mfd_remove_devices(&interface->dev);
+	usb_set_intfdata(interface, NULL);
+	usb_put_dev(vb->usb_dev);
+	kfree(vb);
+
+	dev_dbg(&interface->dev, "disconnected\n");
+}
+
+static struct usb_driver vprbrd_driver = {
+	.name		= "viperboard",
+	.probe		= vprbrd_probe,
+	.disconnect	= vprbrd_disconnect,
+	.id_table	= vprbrd_table,
+};
+
+module_usb_driver(vprbrd_driver);
+
+MODULE_DESCRIPTION("Nano River Technologies viperboard mfd core driver");
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
new file mode 100644
index 000000000000..0d1342466db5
--- /dev/null
+++ b/include/linux/mfd/viperboard.h
@@ -0,0 +1,105 @@
+/*
+ *  include/linux/mfd/viperboard.h
+ *
+ *  Nano River Technologies viperboard definitions
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_VIPERBOARD_H__
+#define __MFD_VIPERBOARD_H__
+
+#include <linux/types.h>
+#include <linux/usb.h>
+
+#define VPRBRD_EP_OUT               0x02
+#define VPRBRD_EP_IN                0x86
+
+#define VPRBRD_I2C_MSG_LEN          512 /* max length of a msg on USB level */
+
+#define VPRBRD_I2C_FREQ_6MHZ        1                        /*   6 MBit/s */
+#define VPRBRD_I2C_FREQ_3MHZ        2                        /*   3 MBit/s */
+#define VPRBRD_I2C_FREQ_1MHZ        3                        /*   1 MBit/s */
+#define VPRBRD_I2C_FREQ_FAST        4                        /* 400 kbit/s */
+#define VPRBRD_I2C_FREQ_400KHZ      VPRBRD_I2C_FREQ_FAST
+#define VPRBRD_I2C_FREQ_200KHZ      5                        /* 200 kbit/s */
+#define VPRBRD_I2C_FREQ_STD         6                        /* 100 kbit/s */
+#define VPRBRD_I2C_FREQ_100KHZ      VPRBRD_I2C_FREQ_STD
+#define VPRBRD_I2C_FREQ_10KHZ       7                        /*  10 kbit/s */
+
+#define VPRBRD_I2C_CMD_WRITE        0x00
+#define VPRBRD_I2C_CMD_READ         0x01
+#define VPRBRD_I2C_CMD_ADDR         0x02
+
+#define VPRBRD_USB_TYPE_OUT	    0x40
+#define VPRBRD_USB_TYPE_IN	    0xc0
+#define VPRBRD_USB_TIMEOUT_MS       100
+#define VPRBRD_USB_REQUEST_MAJOR    0xea
+#define VPRBRD_USB_REQUEST_MINOR    0xeb
+
+struct vprbrd_i2c_write_hdr {
+	u8 cmd;
+	u16 addr;
+	u8 len1;
+	u8 len2;
+	u8 last;
+	u8 chan;
+	u16 spi;
+} __packed;
+
+struct vprbrd_i2c_read_hdr {
+	u8 cmd;
+	u16 addr;
+	u8 len0;
+	u8 len1;
+	u8 len2;
+	u8 len3;
+	u8 len4;
+	u8 len5;
+	u16 tf1;                        /* transfer 1 length */
+	u16 tf2;                        /* transfer 2 length */
+} __packed;
+
+struct vprbrd_i2c_status {
+	u8 unknown[11];
+	u8 status;
+} __packed;
+
+struct vprbrd_i2c_write_msg {
+	struct vprbrd_i2c_write_hdr header;
+	u8 data[VPRBRD_I2C_MSG_LEN
+		- sizeof(struct vprbrd_i2c_write_hdr)];
+} __packed;
+
+struct vprbrd_i2c_read_msg {
+	struct vprbrd_i2c_read_hdr header;
+	u8 data[VPRBRD_I2C_MSG_LEN
+		- sizeof(struct vprbrd_i2c_read_hdr)];
+} __packed;
+
+struct vprbrd_i2c_addr_msg {
+	u8 cmd;
+	u8 addr;
+	u8 unknown1;
+	u16 len;
+	u8 unknown2;
+	u8 unknown3;
+} __packed;
+
+/* Structure to hold all device specific stuff */
+struct vprbrd {
+	struct usb_device *usb_dev; /* the usb device for this device */
+	struct mutex lock;
+	u8 buf[sizeof(struct vprbrd_i2c_write_msg)];
+	struct platform_device pdev;
+};
+
+#endif /* __MFD_VIPERBOARD_H__ */
-- 
cgit v1.2.3


From 9d5b72de0d1627b130fa69c5edf58b5b2df4ca50 Mon Sep 17 00:00:00 2001
From: Lars Poeschel <poeschel@lemonage.de>
Date: Mon, 5 Nov 2012 15:48:24 +0100
Subject: gpio: Add viperboard gpio driver

This adds the mfd cell to use the gpio a and gpio b part
of the Nano River Technologies viperboard.

Signed-off-by: Lars Poeschel <poeschel@lemonage.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/Kconfig           |  13 ++
 drivers/gpio/Makefile          |   1 +
 drivers/gpio/gpio-viperboard.c | 517 +++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/viperboard.c       |   3 +
 include/linux/mfd/viperboard.h |   2 +
 5 files changed, 536 insertions(+)
 create mode 100644 drivers/gpio/gpio-viperboard.c

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index d055cee36942..c3bbd08bcdc3 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -649,4 +649,17 @@ config GPIO_MSIC
 	  Enable support for GPIO on intel MSIC controllers found in
 	  intel MID devices
 
+comment "USB GPIO expanders:"
+
+config GPIO_VIPERBOARD
+	tristate "Viperboard GPIO a & b support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the GPIO signals of Nano River
+	  Technologies Viperboard. There are two GPIO chips on the
+	  board: gpioa and gpiob.
+          See viperboard API specification and Nano
+          River Tech's viperboard.h for detailed meaning
+          of the module parameters.
+
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 9aeed6707326..16a1385226e9 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_GPIO_TPS65912)	+= gpio-tps65912.o
 obj-$(CONFIG_GPIO_TWL4030)	+= gpio-twl4030.o
 obj-$(CONFIG_GPIO_TWL6040)	+= gpio-twl6040.o
 obj-$(CONFIG_GPIO_UCB1400)	+= gpio-ucb1400.o
+obj-$(CONFIG_GPIO_VIPERBOARD)	+= gpio-viperboard.o
 obj-$(CONFIG_GPIO_VR41XX)	+= gpio-vr41xx.o
 obj-$(CONFIG_GPIO_VT8500)	+= gpio-vt8500.o
 obj-$(CONFIG_GPIO_VX855)	+= gpio-vx855.o
diff --git a/drivers/gpio/gpio-viperboard.c b/drivers/gpio/gpio-viperboard.c
new file mode 100644
index 000000000000..13772996cf24
--- /dev/null
+++ b/drivers/gpio/gpio-viperboard.c
@@ -0,0 +1,517 @@
+/*
+ *  Nano River Technologies viperboard GPIO lib driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/gpio.h>
+
+#include <linux/mfd/viperboard.h>
+
+#define VPRBRD_GPIOA_CLK_1MHZ		0
+#define VPRBRD_GPIOA_CLK_100KHZ		1
+#define VPRBRD_GPIOA_CLK_10KHZ		2
+#define VPRBRD_GPIOA_CLK_1KHZ		3
+#define VPRBRD_GPIOA_CLK_100HZ		4
+#define VPRBRD_GPIOA_CLK_10HZ		5
+
+#define VPRBRD_GPIOA_FREQ_DEFAULT	1000
+
+#define VPRBRD_GPIOA_CMD_CONT		0x00
+#define VPRBRD_GPIOA_CMD_PULSE		0x01
+#define VPRBRD_GPIOA_CMD_PWM		0x02
+#define VPRBRD_GPIOA_CMD_SETOUT		0x03
+#define VPRBRD_GPIOA_CMD_SETIN		0x04
+#define VPRBRD_GPIOA_CMD_SETINT		0x05
+#define VPRBRD_GPIOA_CMD_GETIN		0x06
+
+#define VPRBRD_GPIOB_CMD_SETDIR		0x00
+#define VPRBRD_GPIOB_CMD_SETVAL		0x01
+
+struct vprbrd_gpioa_msg {
+	u8 cmd;
+	u8 clk;
+	u8 offset;
+	u8 t1;
+	u8 t2;
+	u8 invert;
+	u8 pwmlevel;
+	u8 outval;
+	u8 risefall;
+	u8 answer;
+	u8 __fill;
+} __packed;
+
+struct vprbrd_gpiob_msg {
+	u8 cmd;
+	u16 val;
+	u16 mask;
+} __packed;
+
+struct vprbrd_gpio {
+	struct gpio_chip gpioa; /* gpio a related things */
+	u32 gpioa_out;
+	u32 gpioa_val;
+	struct gpio_chip gpiob; /* gpio b related things */
+	u32 gpiob_out;
+	u32 gpiob_val;
+	struct vprbrd *vb;
+};
+
+/* gpioa sampling clock module parameter */
+static unsigned char gpioa_clk;
+static unsigned int gpioa_freq = VPRBRD_GPIOA_FREQ_DEFAULT;
+module_param(gpioa_freq, uint, 0);
+MODULE_PARM_DESC(gpioa_freq,
+	"gpio-a sampling freq in Hz (default is 1000Hz) valid values: 10, 100, 1000, 10000, 100000, 1000000");
+
+/* ----- begin of gipo a chip -------------------------------------------- */
+
+static int vprbrd_gpioa_get(struct gpio_chip *chip,
+		unsigned offset)
+{
+	int ret, answer, error = 0;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	/* if io is set to output, just return the saved value */
+	if (gpio->gpioa_out & (1 << offset))
+		return gpio->gpioa_val & (1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_GETIN;
+	gamsg->clk = 0x00;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = 0x00;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		error = -EREMOTEIO;
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_IN, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	answer = gamsg->answer & 0x01;
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		error = -EREMOTEIO;
+
+	if (error)
+		return error;
+
+	return answer;
+}
+
+static void vprbrd_gpioa_set(struct gpio_chip *chip,
+		unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	if (gpio->gpioa_out & (1 << offset)) {
+		if (value)
+			gpio->gpioa_val |= (1 << offset);
+		else
+			gpio->gpioa_val &= ~(1 << offset);
+
+		mutex_lock(&vb->lock);
+
+		gamsg->cmd = VPRBRD_GPIOA_CMD_SETOUT;
+		gamsg->clk = 0x00;
+		gamsg->offset = offset;
+		gamsg->t1 = 0x00;
+		gamsg->t2 = 0x00;
+		gamsg->invert = 0x00;
+		gamsg->pwmlevel = 0x00;
+		gamsg->outval = value;
+		gamsg->risefall = 0x00;
+		gamsg->answer = 0x00;
+		gamsg->__fill = 0x00;
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0),
+			VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT,
+			0x0000,	0x0000, gamsg,
+			sizeof(struct vprbrd_gpioa_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_gpioa_msg))
+			dev_err(chip->dev, "usb error setting pin value\n");
+	}
+}
+
+static int vprbrd_gpioa_direction_input(struct gpio_chip *chip,
+			unsigned offset)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	gpio->gpioa_out &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_SETIN;
+	gamsg->clk = gpioa_clk;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = 0x00;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+static int vprbrd_gpioa_direction_output(struct gpio_chip *chip,
+			unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpioa);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpioa_msg *gamsg = (struct vprbrd_gpioa_msg *)vb->buf;
+
+	gpio->gpioa_out |= (1 << offset);
+	if (value)
+		gpio->gpioa_val |= (1 << offset);
+	else
+		gpio->gpioa_val &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	gamsg->cmd = VPRBRD_GPIOA_CMD_SETOUT;
+	gamsg->clk = 0x00;
+	gamsg->offset = offset;
+	gamsg->t1 = 0x00;
+	gamsg->t2 = 0x00;
+	gamsg->invert = 0x00;
+	gamsg->pwmlevel = 0x00;
+	gamsg->outval = value;
+	gamsg->risefall = 0x00;
+	gamsg->answer = 0x00;
+	gamsg->__fill = 0x00;
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOA, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gamsg, sizeof(struct vprbrd_gpioa_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpioa_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+/* ----- end of gpio a chip ---------------------------------------------- */
+
+/* ----- begin of gipo b chip -------------------------------------------- */
+
+static int vprbrd_gpiob_setdir(struct vprbrd *vb, unsigned offset,
+	unsigned dir)
+{
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+	int ret;
+
+	gbmsg->cmd = VPRBRD_GPIOB_CMD_SETDIR;
+	gbmsg->val = cpu_to_be16(dir << offset);
+	gbmsg->mask = cpu_to_be16(0x0001 << offset);
+
+	ret = usb_control_msg(vb->usb_dev, usb_sndctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_OUT, 0x0000,
+		0x0000, gbmsg, sizeof(struct vprbrd_gpiob_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if (ret != sizeof(struct vprbrd_gpiob_msg))
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+static int vprbrd_gpiob_get(struct gpio_chip *chip,
+		unsigned offset)
+{
+	int ret;
+	u16 val;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+
+	/* if io is set to output, just return the saved value */
+	if (gpio->gpiob_out & (1 << offset))
+		return gpio->gpiob_val & (1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_IN, 0x0000,
+		0x0000, gbmsg,	sizeof(struct vprbrd_gpiob_msg),
+		VPRBRD_USB_TIMEOUT_MS);
+	val = gbmsg->val;
+
+	mutex_unlock(&vb->lock);
+
+	if (ret != sizeof(struct vprbrd_gpiob_msg))
+		return ret;
+
+	/* cache the read values */
+	gpio->gpiob_val = be16_to_cpu(val);
+
+	return (gpio->gpiob_val >> offset) & 0x1;
+}
+
+static void vprbrd_gpiob_set(struct gpio_chip *chip,
+		unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+	struct vprbrd_gpiob_msg *gbmsg = (struct vprbrd_gpiob_msg *)vb->buf;
+
+	if (gpio->gpiob_out & (1 << offset)) {
+		if (value)
+			gpio->gpiob_val |= (1 << offset);
+		else
+			gpio->gpiob_val &= ~(1 << offset);
+
+		mutex_lock(&vb->lock);
+
+		gbmsg->cmd = VPRBRD_GPIOB_CMD_SETVAL;
+		gbmsg->val = cpu_to_be16(value << offset);
+		gbmsg->mask = cpu_to_be16(0x0001 << offset);
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0),
+			VPRBRD_USB_REQUEST_GPIOB, VPRBRD_USB_TYPE_OUT,
+			0x0000,	0x0000, gbmsg,
+			sizeof(struct vprbrd_gpiob_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_gpiob_msg))
+			dev_err(chip->dev, "usb error setting pin value\n");
+	}
+}
+
+static int vprbrd_gpiob_direction_input(struct gpio_chip *chip,
+			unsigned offset)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+
+	gpio->gpiob_out &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = vprbrd_gpiob_setdir(vb, offset, 0);
+
+	mutex_unlock(&vb->lock);
+
+	if (ret)
+		dev_err(chip->dev, "usb error setting pin to input\n");
+
+	return ret;
+}
+
+static int vprbrd_gpiob_direction_output(struct gpio_chip *chip,
+			unsigned offset, int value)
+{
+	int ret;
+	struct vprbrd_gpio *gpio =
+			container_of(chip, struct vprbrd_gpio, gpiob);
+	struct vprbrd *vb = gpio->vb;
+
+	gpio->gpiob_out |= (1 << offset);
+	if (value)
+		gpio->gpiob_val |= (1 << offset);
+	else
+		gpio->gpiob_val &= ~(1 << offset);
+
+	mutex_lock(&vb->lock);
+
+	ret = vprbrd_gpiob_setdir(vb, offset, 1);
+	if (ret)
+		dev_err(chip->dev, "usb error setting pin to output\n");
+
+	mutex_unlock(&vb->lock);
+
+	vprbrd_gpiob_set(chip, offset, value);
+
+	return ret;
+}
+
+/* ----- end of gpio b chip ---------------------------------------------- */
+
+static int __devinit vprbrd_gpio_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_gpio *vb_gpio;
+	int ret;
+
+	vb_gpio = devm_kzalloc(&pdev->dev, sizeof(*vb_gpio), GFP_KERNEL);
+	if (vb_gpio == NULL)
+		return -ENOMEM;
+
+	vb_gpio->vb = vb;
+	/* registering gpio a */
+	vb_gpio->gpioa.label = "viperboard gpio a";
+	vb_gpio->gpioa.dev = &pdev->dev;
+	vb_gpio->gpioa.owner = THIS_MODULE;
+	vb_gpio->gpioa.base = -1;
+	vb_gpio->gpioa.ngpio = 16;
+	vb_gpio->gpioa.can_sleep = 1;
+	vb_gpio->gpioa.set = vprbrd_gpioa_set;
+	vb_gpio->gpioa.get = vprbrd_gpioa_get;
+	vb_gpio->gpioa.direction_input = vprbrd_gpioa_direction_input;
+	vb_gpio->gpioa.direction_output = vprbrd_gpioa_direction_output;
+	ret = gpiochip_add(&vb_gpio->gpioa);
+	if (ret < 0) {
+		dev_err(vb_gpio->gpioa.dev, "could not add gpio a");
+		goto err_gpioa;
+	}
+
+	/* registering gpio b */
+	vb_gpio->gpiob.label = "viperboard gpio b";
+	vb_gpio->gpiob.dev = &pdev->dev;
+	vb_gpio->gpiob.owner = THIS_MODULE;
+	vb_gpio->gpiob.base = -1;
+	vb_gpio->gpiob.ngpio = 16;
+	vb_gpio->gpiob.can_sleep = 1;
+	vb_gpio->gpiob.set = vprbrd_gpiob_set;
+	vb_gpio->gpiob.get = vprbrd_gpiob_get;
+	vb_gpio->gpiob.direction_input = vprbrd_gpiob_direction_input;
+	vb_gpio->gpiob.direction_output = vprbrd_gpiob_direction_output;
+	ret = gpiochip_add(&vb_gpio->gpiob);
+	if (ret < 0) {
+		dev_err(vb_gpio->gpiob.dev, "could not add gpio b");
+		goto err_gpiob;
+	}
+
+	platform_set_drvdata(pdev, vb_gpio);
+
+	return ret;
+
+err_gpiob:
+	ret = gpiochip_remove(&vb_gpio->gpioa);
+
+err_gpioa:
+	return ret;
+}
+
+static int __devexit vprbrd_gpio_remove(struct platform_device *pdev)
+{
+	struct vprbrd_gpio *vb_gpio = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&vb_gpio->gpiob);
+	if (ret == 0)
+		ret = gpiochip_remove(&vb_gpio->gpioa);
+
+	return ret;
+}
+
+static struct platform_driver vprbrd_gpio_driver = {
+	.driver.name	= "viperboard-gpio",
+	.driver.owner	= THIS_MODULE,
+	.probe		= vprbrd_gpio_probe,
+	.remove		= __devexit_p(vprbrd_gpio_remove),
+};
+
+static int __init vprbrd_gpio_init(void)
+{
+	switch (gpioa_freq) {
+	case 1000000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_1MHZ;
+		break;
+	case 100000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_100KHZ;
+		break;
+	case 10000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_10KHZ;
+		break;
+	case 1000:
+		gpioa_clk = VPRBRD_GPIOA_CLK_1KHZ;
+		break;
+	case 100:
+		gpioa_clk = VPRBRD_GPIOA_CLK_100HZ;
+		break;
+	case 10:
+		gpioa_clk = VPRBRD_GPIOA_CLK_10HZ;
+		break;
+	default:
+		pr_warn("invalid gpioa_freq (%d)\n", gpioa_freq);
+		gpioa_clk = VPRBRD_GPIOA_CLK_1KHZ;
+	}
+
+	return platform_driver_register(&vprbrd_gpio_driver);
+}
+subsys_initcall(vprbrd_gpio_init);
+
+static void __exit vprbrd_gpio_exit(void)
+{
+	platform_driver_unregister(&vprbrd_gpio_driver);
+}
+module_exit(vprbrd_gpio_exit);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("GPIO driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-gpio");
diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
index cd1e2fe95647..c75ac08f639c 100644
--- a/drivers/mfd/viperboard.c
+++ b/drivers/mfd/viperboard.c
@@ -38,6 +38,9 @@ static const struct usb_device_id vprbrd_table[] = {
 MODULE_DEVICE_TABLE(usb, vprbrd_table);
 
 static struct mfd_cell vprbrd_devs[] = {
+	{
+		.name = "viperboard-gpio",
+	},
 };
 
 static int vprbrd_probe(struct usb_interface *interface,
diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
index 0d1342466db5..42d339fac3e5 100644
--- a/include/linux/mfd/viperboard.h
+++ b/include/linux/mfd/viperboard.h
@@ -44,6 +44,8 @@
 #define VPRBRD_USB_TIMEOUT_MS       100
 #define VPRBRD_USB_REQUEST_MAJOR    0xea
 #define VPRBRD_USB_REQUEST_MINOR    0xeb
+#define VPRBRD_USB_REQUEST_GPIOA    0xed
+#define VPRBRD_USB_REQUEST_GPIOB    0xdd
 
 struct vprbrd_i2c_write_hdr {
 	u8 cmd;
-- 
cgit v1.2.3


From 174a13aa8669331605b138aaf61569dd7189e453 Mon Sep 17 00:00:00 2001
From: Lars Poeschel <poeschel@lemonage.de>
Date: Mon, 19 Nov 2012 16:36:04 +0100
Subject: i2c: Add viperboard i2c master driver

This adds the mfd cell to use the i2c part of the Nano River Technologies
viperboard as i2c master.

Signed-off-by: Lars Poeschel <poeschel@lemonage.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/i2c/busses/Kconfig          |  10 +
 drivers/i2c/busses/Makefile         |   1 +
 drivers/i2c/busses/i2c-viperboard.c | 480 ++++++++++++++++++++++++++++++++++++
 drivers/mfd/viperboard.c            |   3 +
 include/linux/mfd/viperboard.h      |   2 +
 5 files changed, 496 insertions(+)
 create mode 100644 drivers/i2c/busses/i2c-viperboard.c

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e9df4612b7eb..c7bff51fe524 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -818,6 +818,16 @@ config I2C_TINY_USB
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-tiny-usb.
 
+config I2C_VIPERBOARD
+	tristate "Viperboard I2C master support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the I2C part of the Nano River
+	  Technologies Viperboard as I2C master.
+          See viperboard API specification and Nano
+          River Tech's viperboard.h for detailed meaning
+          of the module parameters.
+
 comment "Other I2C/SMBus bus drivers"
 
 config I2C_ACORN
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 395b516ffa08..e5cb209d276c 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_I2C_PARPORT)	+= i2c-parport.o
 obj-$(CONFIG_I2C_PARPORT_LIGHT)	+= i2c-parport-light.o
 obj-$(CONFIG_I2C_TAOS_EVM)	+= i2c-taos-evm.o
 obj-$(CONFIG_I2C_TINY_USB)	+= i2c-tiny-usb.o
+obj-$(CONFIG_I2C_VIPERBOARD)	+= i2c-viperboard.o
 
 # Other I2C/SMBus bus drivers
 obj-$(CONFIG_I2C_ACORN)		+= i2c-acorn.o
diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c
new file mode 100644
index 000000000000..f5fa20dea906
--- /dev/null
+++ b/drivers/i2c/busses/i2c-viperboard.c
@@ -0,0 +1,480 @@
+/*
+ *  Nano River Technologies viperboard i2c master driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/i2c.h>
+
+#include <linux/mfd/viperboard.h>
+
+struct vprbrd_i2c {
+	struct i2c_adapter i2c;
+	u8 bus_freq_param;
+};
+
+/* i2c bus frequency module parameter */
+static u8 i2c_bus_param;
+static unsigned int i2c_bus_freq = 100;
+module_param(i2c_bus_freq, int, 0);
+MODULE_PARM_DESC(i2c_bus_freq,
+	"i2c bus frequency in khz (default is 100) valid values: 10, 100, 200, 400, 1000, 3000, 6000");
+
+static int vprbrd_i2c_status(struct i2c_adapter *i2c,
+	struct vprbrd_i2c_status *status, bool prev_error)
+{
+	u16 bytes_xfer;
+	int ret;
+	struct vprbrd *vb = (struct vprbrd *)i2c->algo_data;
+
+	/* check for protocol error */
+	bytes_xfer = sizeof(struct vprbrd_i2c_status);
+
+	ret = usb_control_msg(vb->usb_dev, usb_rcvctrlpipe(vb->usb_dev, 0),
+		VPRBRD_USB_REQUEST_I2C, VPRBRD_USB_TYPE_IN, 0x0000, 0x0000,
+		status, bytes_xfer, VPRBRD_USB_TIMEOUT_MS);
+
+	if (ret != bytes_xfer)
+		prev_error = true;
+
+	if (prev_error) {
+		dev_err(&i2c->dev, "failure in usb communication\n");
+		return -EREMOTEIO;
+	}
+
+	dev_dbg(&i2c->dev, "  status = %d\n", status->status);
+	if (status->status != 0x00) {
+		dev_err(&i2c->dev, "failure: i2c protocol error\n");
+		return -EPROTO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_receive(struct usb_device *usb_dev,
+	struct vprbrd_i2c_read_msg *rmsg, int bytes_xfer)
+{
+	int ret, bytes_actual;
+	int error = 0;
+
+	/* send the read request */
+	ret = usb_bulk_msg(usb_dev,
+		usb_sndbulkpipe(usb_dev, VPRBRD_EP_OUT), rmsg,
+		sizeof(struct vprbrd_i2c_read_hdr), &bytes_actual,
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0)
+		|| (bytes_actual != sizeof(struct vprbrd_i2c_read_hdr))) {
+		dev_err(&usb_dev->dev, "failure transmitting usb\n");
+		error = -EREMOTEIO;
+	}
+
+	/* read the actual data */
+	ret = usb_bulk_msg(usb_dev,
+		usb_rcvbulkpipe(usb_dev, VPRBRD_EP_IN), rmsg,
+		bytes_xfer, &bytes_actual, VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0) || (bytes_xfer != bytes_actual)) {
+		dev_err(&usb_dev->dev, "failure receiving usb\n");
+		error = -EREMOTEIO;
+	}
+	return error;
+}
+
+static int vprbrd_i2c_addr(struct usb_device *usb_dev,
+	struct vprbrd_i2c_addr_msg *amsg)
+{
+	int ret, bytes_actual;
+
+	ret = usb_bulk_msg(usb_dev,
+		usb_sndbulkpipe(usb_dev, VPRBRD_EP_OUT), amsg,
+		sizeof(struct vprbrd_i2c_addr_msg), &bytes_actual,
+		VPRBRD_USB_TIMEOUT_MS);
+
+	if ((ret < 0) ||
+			(sizeof(struct vprbrd_i2c_addr_msg) != bytes_actual)) {
+		dev_err(&usb_dev->dev, "failure transmitting usb\n");
+		return -EREMOTEIO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_read(struct vprbrd *vb, struct i2c_msg *msg)
+{
+	int ret;
+	u16 remain_len, bytes_xfer, len1, len2,
+		start = 0x0000;
+	struct vprbrd_i2c_read_msg *rmsg =
+		(struct vprbrd_i2c_read_msg *)vb->buf;
+
+	remain_len = msg->len;
+	rmsg->header.cmd = VPRBRD_I2C_CMD_READ;
+	while (remain_len > 0) {
+		rmsg->header.addr = cpu_to_le16(start + 0x4000);
+		if (remain_len <= 255) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len;
+			rmsg->header.len1 = 0x00;
+			rmsg->header.len2 = 0x00;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 510) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len - 255;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0x00;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 512) {
+			len1 = remain_len;
+			len2 = 0x00;
+			rmsg->header.len0 = remain_len - 510;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = 0x00;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 767) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 512;
+			rmsg->header.len4 = 0x00;
+			rmsg->header.len5 = 0x00;
+			bytes_xfer = remain_len;
+			remain_len = 0;
+		} else if (remain_len <= 1022) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 767;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0x00;
+			remain_len = 0;
+		} else if (remain_len <= 1024) {
+			len1 = 512;
+			len2 = remain_len - 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = remain_len - 1022;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0xff;
+			remain_len = 0;
+		} else {
+			len1 = 512;
+			len2 = 512;
+			rmsg->header.len0 = 0x02;
+			rmsg->header.len1 = 0xff;
+			rmsg->header.len2 = 0xff;
+			rmsg->header.len3 = 0x02;
+			rmsg->header.len4 = 0xff;
+			rmsg->header.len5 = 0xff;
+			remain_len -= 1024;
+			start += 1024;
+		}
+		rmsg->header.tf1 = cpu_to_le16(len1);
+		rmsg->header.tf2 = cpu_to_le16(len2);
+
+		/* first read transfer */
+		ret = vprbrd_i2c_receive(vb->usb_dev, rmsg, len1);
+		if (ret < 0)
+			return ret;
+		/* copy the received data */
+		memcpy(msg->buf + start, rmsg, len1);
+
+		/* second read transfer if neccessary */
+		if (len2 > 0) {
+			ret = vprbrd_i2c_receive(vb->usb_dev, rmsg, len2);
+			if (ret < 0)
+				return ret;
+			/* copy the received data */
+			memcpy(msg->buf + start + 512, rmsg, len2);
+		}
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_write(struct vprbrd *vb, struct i2c_msg *msg)
+{
+	int ret, bytes_actual;
+	u16 remain_len, bytes_xfer,
+		start = 0x0000;
+	struct vprbrd_i2c_write_msg *wmsg =
+		(struct vprbrd_i2c_write_msg *)vb->buf;
+
+	remain_len = msg->len;
+	wmsg->header.cmd = VPRBRD_I2C_CMD_WRITE;
+	wmsg->header.last = 0x00;
+	wmsg->header.chan = 0x00;
+	wmsg->header.spi = 0x0000;
+	while (remain_len > 0) {
+		wmsg->header.addr = cpu_to_le16(start + 0x4000);
+		if (remain_len > 503) {
+			wmsg->header.len1 = 0xff;
+			wmsg->header.len2 = 0xf8;
+			remain_len -= 503;
+			bytes_xfer = 503 + sizeof(struct vprbrd_i2c_write_hdr);
+			start += 503;
+		} else if (remain_len > 255) {
+			wmsg->header.len1 = 0xff;
+			wmsg->header.len2 = (remain_len - 255);
+			bytes_xfer = remain_len +
+				sizeof(struct vprbrd_i2c_write_hdr);
+			remain_len = 0;
+		} else {
+			wmsg->header.len1 = remain_len;
+			wmsg->header.len2 = 0x00;
+			bytes_xfer = remain_len +
+				sizeof(struct vprbrd_i2c_write_hdr);
+			remain_len = 0;
+		}
+		memcpy(wmsg->data, msg->buf + start,
+			bytes_xfer - sizeof(struct vprbrd_i2c_write_hdr));
+
+		ret = usb_bulk_msg(vb->usb_dev,
+			usb_sndbulkpipe(vb->usb_dev,
+			VPRBRD_EP_OUT), wmsg,
+			bytes_xfer, &bytes_actual, VPRBRD_USB_TIMEOUT_MS);
+		if ((ret < 0) || (bytes_xfer != bytes_actual))
+			return -EREMOTEIO;
+	}
+	return 0;
+}
+
+static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs,
+		int num)
+{
+	struct i2c_msg *pmsg;
+	int i, ret,
+		error = 0;
+	struct vprbrd *vb = (struct vprbrd *)i2c->algo_data;
+	struct vprbrd_i2c_addr_msg *amsg =
+		(struct vprbrd_i2c_addr_msg *)vb->buf;
+	struct vprbrd_i2c_status *smsg = (struct vprbrd_i2c_status *)vb->buf;
+
+	dev_dbg(&i2c->dev, "master xfer %d messages:\n", num);
+
+	for (i = 0 ; i < num ; i++) {
+		pmsg = &msgs[i];
+
+		dev_dbg(&i2c->dev,
+			"  %d: %s (flags %d) %d bytes to 0x%02x\n",
+			i, pmsg->flags & I2C_M_RD ? "read" : "write",
+			pmsg->flags, pmsg->len, pmsg->addr);
+
+		/* msgs longer than 2048 bytes are not supported by adapter */
+		if (pmsg->len > 2048)
+			return -EINVAL;
+
+		mutex_lock(&vb->lock);
+		/* directly send the message */
+		if (pmsg->flags & I2C_M_RD) {
+			/* read data */
+			amsg->cmd = VPRBRD_I2C_CMD_ADDR;
+			amsg->unknown2 = 0x00;
+			amsg->unknown3 = 0x00;
+			amsg->addr = pmsg->addr;
+			amsg->unknown1 = 0x01;
+			amsg->len = cpu_to_le16(pmsg->len);
+			/* send the addr and len, we're interested to board */
+			ret = vprbrd_i2c_addr(vb->usb_dev, amsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_read(vb, pmsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_status(i2c, smsg, error);
+			if (ret < 0)
+				error = ret;
+			/* in case of protocol error, return the error */
+			if (error < 0)
+				goto error;
+		} else {
+			/* write data */
+			ret = vprbrd_i2c_write(vb, pmsg);
+
+			amsg->cmd = VPRBRD_I2C_CMD_ADDR;
+			amsg->unknown2 = 0x00;
+			amsg->unknown3 = 0x00;
+			amsg->addr = pmsg->addr;
+			amsg->unknown1 = 0x00;
+			amsg->len = cpu_to_le16(pmsg->len);
+			/* send the addr, the data goes to to board */
+			ret = vprbrd_i2c_addr(vb->usb_dev, amsg);
+			if (ret < 0)
+				error = ret;
+
+			ret = vprbrd_i2c_status(i2c, smsg, error);
+			if (ret < 0)
+				error = ret;
+
+			if (error < 0)
+				goto error;
+		}
+		mutex_unlock(&vb->lock);
+	}
+	return 0;
+error:
+	mutex_unlock(&vb->lock);
+	return error;
+}
+
+static u32 vprbrd_i2c_func(struct i2c_adapter *i2c)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+/* This is the actual algorithm we define */
+static const struct i2c_algorithm vprbrd_algorithm = {
+	.master_xfer	= vprbrd_i2c_xfer,
+	.functionality	= vprbrd_i2c_func,
+};
+
+static int __devinit vprbrd_i2c_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_i2c *vb_i2c;
+	int ret;
+	int pipe;
+
+	vb_i2c = kzalloc(sizeof(*vb_i2c), GFP_KERNEL);
+	if (vb_i2c == NULL)
+		return -ENOMEM;
+
+	/* setup i2c adapter description */
+	vb_i2c->i2c.owner = THIS_MODULE;
+	vb_i2c->i2c.class = I2C_CLASS_HWMON;
+	vb_i2c->i2c.algo = &vprbrd_algorithm;
+	vb_i2c->i2c.algo_data = vb;
+	/* save the param in usb capabable memory */
+	vb_i2c->bus_freq_param = i2c_bus_param;
+
+	snprintf(vb_i2c->i2c.name, sizeof(vb_i2c->i2c.name),
+		 "viperboard at bus %03d device %03d",
+		 vb->usb_dev->bus->busnum, vb->usb_dev->devnum);
+
+	/* setting the bus frequency */
+	if ((i2c_bus_param <= VPRBRD_I2C_FREQ_10KHZ)
+		&& (i2c_bus_param >= VPRBRD_I2C_FREQ_6MHZ)) {
+		pipe = usb_sndctrlpipe(vb->usb_dev, 0);
+		ret = usb_control_msg(vb->usb_dev, pipe,
+			VPRBRD_USB_REQUEST_I2C_FREQ, VPRBRD_USB_TYPE_OUT,
+			0x0000, 0x0000, &vb_i2c->bus_freq_param, 1,
+			VPRBRD_USB_TIMEOUT_MS);
+	    if (ret != 1) {
+		dev_err(&pdev->dev,
+			"failure setting i2c_bus_freq to %d\n", i2c_bus_freq);
+		ret = -EIO;
+		goto error;
+	    }
+	} else {
+		dev_err(&pdev->dev,
+			"invalid i2c_bus_freq setting:%d\n", i2c_bus_freq);
+		ret = -EIO;
+		goto error;
+	}
+
+	vb_i2c->i2c.dev.parent = &pdev->dev;
+
+	/* attach to i2c layer */
+	i2c_add_adapter(&vb_i2c->i2c);
+
+	platform_set_drvdata(pdev, vb_i2c);
+
+	return 0;
+
+error:
+	kfree(vb_i2c);
+	return ret;
+}
+
+static int __devexit vprbrd_i2c_remove(struct platform_device *pdev)
+{
+	struct vprbrd_i2c *vb_i2c = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = i2c_del_adapter(&vb_i2c->i2c);
+
+	return ret;
+}
+
+static struct platform_driver vprbrd_i2c_driver = {
+	.driver.name	= "viperboard-i2c",
+	.driver.owner	= THIS_MODULE,
+	.probe		= vprbrd_i2c_probe,
+	.remove		= __devexit_p(vprbrd_i2c_remove),
+};
+
+static int __init vprbrd_i2c_init(void)
+{
+	switch (i2c_bus_freq) {
+	case 6000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_6MHZ;
+		break;
+	case 3000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_3MHZ;
+		break;
+	case 1000:
+		i2c_bus_param = VPRBRD_I2C_FREQ_1MHZ;
+		break;
+	case 400:
+		i2c_bus_param = VPRBRD_I2C_FREQ_400KHZ;
+		break;
+	case 200:
+		i2c_bus_param = VPRBRD_I2C_FREQ_200KHZ;
+		break;
+	case 100:
+		i2c_bus_param = VPRBRD_I2C_FREQ_100KHZ;
+		break;
+	case 10:
+		i2c_bus_param = VPRBRD_I2C_FREQ_10KHZ;
+		break;
+	default:
+		pr_warn("invalid i2c_bus_freq (%d)\n", i2c_bus_freq);
+		i2c_bus_param = VPRBRD_I2C_FREQ_100KHZ;
+	}
+
+	return platform_driver_register(&vprbrd_i2c_driver);
+}
+subsys_initcall(vprbrd_i2c_init);
+
+static void __exit vprbrd_i2c_exit(void)
+{
+	platform_driver_unregister(&vprbrd_i2c_driver);
+}
+module_exit(vprbrd_i2c_exit);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("I2C master driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-i2c");
diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
index c75ac08f639c..4f74ec868a52 100644
--- a/drivers/mfd/viperboard.c
+++ b/drivers/mfd/viperboard.c
@@ -41,6 +41,9 @@ static struct mfd_cell vprbrd_devs[] = {
 	{
 		.name = "viperboard-gpio",
 	},
+	{
+		.name = "viperboard-i2c",
+	},
 };
 
 static int vprbrd_probe(struct usb_interface *interface,
diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
index 42d339fac3e5..ef7851430d86 100644
--- a/include/linux/mfd/viperboard.h
+++ b/include/linux/mfd/viperboard.h
@@ -42,6 +42,8 @@
 #define VPRBRD_USB_TYPE_OUT	    0x40
 #define VPRBRD_USB_TYPE_IN	    0xc0
 #define VPRBRD_USB_TIMEOUT_MS       100
+#define VPRBRD_USB_REQUEST_I2C_FREQ 0xe6
+#define VPRBRD_USB_REQUEST_I2C      0xe9
 #define VPRBRD_USB_REQUEST_MAJOR    0xea
 #define VPRBRD_USB_REQUEST_MINOR    0xeb
 #define VPRBRD_USB_REQUEST_GPIOA    0xed
-- 
cgit v1.2.3


From ffd8a6e7a77802dd09559be78dea63956aa8f1d5 Mon Sep 17 00:00:00 2001
From: Lars Poeschel <poeschel@lemonage.de>
Date: Mon, 5 Nov 2012 15:48:26 +0100
Subject: iio: adc: Add viperboard adc driver

This adds the mfd cell to use the adc part of the Nano River Technologies
viperboard.

Signed-off-by: Lars Poeschel <poeschel@lemonage.de>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/iio/adc/Kconfig          |   7 ++
 drivers/iio/adc/Makefile         |   1 +
 drivers/iio/adc/viperboard_adc.c | 181 +++++++++++++++++++++++++++++++++++++++
 drivers/mfd/viperboard.c         |   3 +
 include/linux/mfd/viperboard.h   |   1 +
 5 files changed, 193 insertions(+)
 create mode 100644 drivers/iio/adc/viperboard_adc.c

(limited to 'include/linux')

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 1401ed1af39f..bb3053739c80 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -67,4 +67,11 @@ config TI_AM335X_ADC
 	  Say yes here to build support for Texas Instruments ADC
 	  driver which is also a MFD client.
 
+config VIPERBOARD_ADC
+	tristate "Viperboard ADC support"
+	depends on MFD_VIPERBOARD && USB
+	help
+	  Say yes here to access the ADC part of the Nano River
+	  Technologies Viperboard.
+
 endmenu
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 4410a90fc84b..4268fa987fe6 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_AD7791) += ad7791.o
 obj-$(CONFIG_AT91_ADC) += at91_adc.o
 obj-$(CONFIG_LP8788_ADC) += lp8788_adc.o
 obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o
+obj-$(CONFIG_VIPERBOARD_ADC) += viperboard_adc.o
\ No newline at end of file
diff --git a/drivers/iio/adc/viperboard_adc.c b/drivers/iio/adc/viperboard_adc.c
new file mode 100644
index 000000000000..10136a8b20d4
--- /dev/null
+++ b/drivers/iio/adc/viperboard_adc.c
@@ -0,0 +1,181 @@
+/*
+ *  Nano River Technologies viperboard IIO ADC driver
+ *
+ *  (C) 2012 by Lemonage GmbH
+ *  Author: Lars Poeschel <poeschel@lemonage.de>
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <linux/usb.h>
+#include <linux/iio/iio.h>
+
+#include <linux/mfd/viperboard.h>
+
+#define VPRBRD_ADC_CMD_GET		0x00
+
+struct vprbrd_adc_msg {
+	u8 cmd;
+	u8 chan;
+	u8 val;
+} __packed;
+
+struct vprbrd_adc {
+	struct vprbrd *vb;
+};
+
+#define VPRBRD_ADC_CHANNEL(_index) {			\
+	.type = IIO_VOLTAGE,				\
+	.indexed = 1,					\
+	.channel = _index,				\
+	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT,	\
+	.scan_index = _index,				\
+	.scan_type = {					\
+		.sign = 'u',				\
+		.realbits = 8,				\
+		.storagebits = 8,			\
+	},						\
+}
+
+static struct iio_chan_spec const vprbrd_adc_iio_channels[] = {
+	VPRBRD_ADC_CHANNEL(0),
+	VPRBRD_ADC_CHANNEL(1),
+	VPRBRD_ADC_CHANNEL(2),
+	VPRBRD_ADC_CHANNEL(3),
+};
+
+static int vprbrd_iio_read_raw(struct iio_dev *iio_dev,
+				struct iio_chan_spec const *chan,
+				int *val,
+				int *val2,
+				long info)
+{
+	int ret, error = 0;
+	struct vprbrd_adc *adc = iio_priv(iio_dev);
+	struct vprbrd *vb = adc->vb;
+	struct vprbrd_adc_msg *admsg = (struct vprbrd_adc_msg *)vb->buf;
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		mutex_lock(&vb->lock);
+
+		admsg->cmd = VPRBRD_ADC_CMD_GET;
+		admsg->chan = chan->scan_index;
+		admsg->val = 0x00;
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_sndctrlpipe(vb->usb_dev, 0), VPRBRD_USB_REQUEST_ADC,
+			VPRBRD_USB_TYPE_OUT, 0x0000, 0x0000, admsg,
+			sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS);
+		if (ret != sizeof(struct vprbrd_adc_msg)) {
+			dev_err(&iio_dev->dev, "usb send error on adc read\n");
+			error = -EREMOTEIO;
+		}
+
+		ret = usb_control_msg(vb->usb_dev,
+			usb_rcvctrlpipe(vb->usb_dev, 0), VPRBRD_USB_REQUEST_ADC,
+			VPRBRD_USB_TYPE_IN, 0x0000, 0x0000, admsg,
+			sizeof(struct vprbrd_adc_msg), VPRBRD_USB_TIMEOUT_MS);
+
+		*val = admsg->val;
+
+		mutex_unlock(&vb->lock);
+
+		if (ret != sizeof(struct vprbrd_adc_msg)) {
+			dev_err(&iio_dev->dev, "usb recv error on adc read\n");
+			error = -EREMOTEIO;
+		}
+
+		if (error)
+			goto error;
+
+		return IIO_VAL_INT;
+	default:
+		error = -EINVAL;
+		break;
+	}
+error:
+	return error;
+}
+
+static const struct iio_info vprbrd_adc_iio_info = {
+	.read_raw = &vprbrd_iio_read_raw,
+	.driver_module = THIS_MODULE,
+};
+
+static int __devinit vprbrd_adc_probe(struct platform_device *pdev)
+{
+	struct vprbrd *vb = dev_get_drvdata(pdev->dev.parent);
+	struct vprbrd_adc *adc;
+	struct iio_dev *indio_dev;
+	int ret;
+
+	/* registering iio */
+	indio_dev = iio_device_alloc(sizeof(*adc));
+	if (!indio_dev) {
+		dev_err(&pdev->dev, "failed allocating iio device\n");
+		return -ENOMEM;
+	}
+
+	adc = iio_priv(indio_dev);
+	adc->vb = vb;
+	indio_dev->name = "viperboard adc";
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->info = &vprbrd_adc_iio_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = vprbrd_adc_iio_channels;
+	indio_dev->num_channels = ARRAY_SIZE(vprbrd_adc_iio_channels);
+
+	ret = iio_device_register(indio_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "could not register iio (adc)");
+		goto error;
+	}
+
+	platform_set_drvdata(pdev, indio_dev);
+
+	return 0;
+
+error:
+	iio_device_free(indio_dev);
+	return ret;
+}
+
+static int __devexit vprbrd_adc_remove(struct platform_device *pdev)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+
+	iio_device_unregister(indio_dev);
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static struct platform_driver vprbrd_adc_driver = {
+	.driver = {
+		.name	= "viperboard-adc",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= vprbrd_adc_probe,
+	.remove		= __devexit_p(vprbrd_adc_remove),
+};
+
+module_platform_driver(vprbrd_adc_driver);
+
+MODULE_AUTHOR("Lars Poeschel <poeschel@lemonage.de>");
+MODULE_DESCRIPTION("IIO ADC driver for Nano River Techs Viperboard");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:viperboard-adc");
diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c
index 4f74ec868a52..276122b37199 100644
--- a/drivers/mfd/viperboard.c
+++ b/drivers/mfd/viperboard.c
@@ -44,6 +44,9 @@ static struct mfd_cell vprbrd_devs[] = {
 	{
 		.name = "viperboard-i2c",
 	},
+	{
+		.name = "viperboard-adc",
+	},
 };
 
 static int vprbrd_probe(struct usb_interface *interface,
diff --git a/include/linux/mfd/viperboard.h b/include/linux/mfd/viperboard.h
index ef7851430d86..193452848c04 100644
--- a/include/linux/mfd/viperboard.h
+++ b/include/linux/mfd/viperboard.h
@@ -46,6 +46,7 @@
 #define VPRBRD_USB_REQUEST_I2C      0xe9
 #define VPRBRD_USB_REQUEST_MAJOR    0xea
 #define VPRBRD_USB_REQUEST_MINOR    0xeb
+#define VPRBRD_USB_REQUEST_ADC      0xec
 #define VPRBRD_USB_REQUEST_GPIOA    0xed
 #define VPRBRD_USB_REQUEST_GPIOB    0xdd
 
-- 
cgit v1.2.3


From 1950c7164646bfeeb82c34bc299d82119706afb5 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi <ciminaghi@gnudd.com>
Date: Fri, 9 Nov 2012 15:19:52 +0100
Subject: mfd: sta2x11-mfd: Add apb-soc regs driver and factor out common code

A driver for the apb-soc registers is needed by the clock
infrastructure code to configure and control clocks on the sta2x11
chip.

Since some of the functions in sta2x11-mfd.c were almost identical
for the two existing platform devices, the following changes
have been performed to avoid further code duplication while
adding the apb-soc-regs driver:

* The sctl_regs and apbreg_regs fields in struct sta2x11_mfd
have been turned into just one array of pointers accessed by
device index.
* Platform probe methods have become one-liners invoking a
common probe with the device's index as second parameter.
* For loops have been inserted where the same operations
were performed for each of the two bars of a pci device.
* The apbreg_mask and sctl_mask functions were almost identical,
so they were turned into inline functions invoking a common
__sta2x11_mfd_mask() with the platform device's index as last
parameter. To do this, enum sta2x11_mfd_plat_dev has been declared in
sta2x11-mfd.h and more device types have been added to it.

Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sta2x11-mfd.c       | 350 +++++++++++++++++++++++++++-------------
 include/linux/mfd/sta2x11-mfd.h | 156 +++++++++++++++++-
 2 files changed, 395 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index d35da6820bea..9e01b84aa8bc 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -40,8 +40,7 @@ struct sta2x11_mfd {
 	struct sta2x11_instance *instance;
 	spinlock_t lock;
 	struct list_head list;
-	void __iomem *sctl_regs;
-	void __iomem *apbreg_regs;
+	void __iomem *regs[sta2x11_n_mfd_plat_devs];
 };
 
 static LIST_HEAD(sta2x11_mfd_list);
@@ -100,56 +99,33 @@ static int __devexit mfd_remove(struct pci_dev *pdev)
 	return 0;
 }
 
-/* These two functions are exported and are not expected to fail */
-u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+/* This function is exported and is not expected to fail */
+u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val,
+		       enum sta2x11_mfd_plat_dev index)
 {
 	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
 	u32 r;
 	unsigned long flags;
+	void __iomem *regs = mfd->regs[index];
 
 	if (!mfd) {
 		dev_warn(&pdev->dev, ": can't access sctl regs\n");
 		return 0;
 	}
-	if (!mfd->sctl_regs) {
+	if (!regs) {
 		dev_warn(&pdev->dev, ": system ctl not initialized\n");
 		return 0;
 	}
 	spin_lock_irqsave(&mfd->lock, flags);
-	r = readl(mfd->sctl_regs + reg);
+	r = readl(regs + reg);
 	r &= ~mask;
 	r |= val;
 	if (mask)
-		writel(r, mfd->sctl_regs + reg);
+		writel(r, regs + reg);
 	spin_unlock_irqrestore(&mfd->lock, flags);
 	return r;
 }
-EXPORT_SYMBOL(sta2x11_sctl_mask);
-
-u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
-{
-	struct sta2x11_mfd *mfd = sta2x11_mfd_find(pdev);
-	u32 r;
-	unsigned long flags;
-
-	if (!mfd) {
-		dev_warn(&pdev->dev, ": can't access apb regs\n");
-		return 0;
-	}
-	if (!mfd->apbreg_regs) {
-		dev_warn(&pdev->dev, ": apb bridge not initialized\n");
-		return 0;
-	}
-	spin_lock_irqsave(&mfd->lock, flags);
-	r = readl(mfd->apbreg_regs + reg);
-	r &= ~mask;
-	r |= val;
-	if (mask)
-		writel(r, mfd->apbreg_regs + reg);
-	spin_unlock_irqrestore(&mfd->lock, flags);
-	return r;
-}
-EXPORT_SYMBOL(sta2x11_apbreg_mask);
+EXPORT_SYMBOL(__sta2x11_mfd_mask);
 
 /* Two debugfs files, for our registers (FIXME: one instance only) */
 #define REG(regname) {.name = #regname, .offset = SCTL_ ## regname}
@@ -180,51 +156,103 @@ static struct debugfs_regset32 apbreg_regset = {
 	.nregs = ARRAY_SIZE(sta2x11_apbreg_regs),
 };
 
-static struct dentry *sta2x11_sctl_debugfs;
-static struct dentry *sta2x11_apbreg_debugfs;
+#define REG(regname) {.name = #regname, .offset = regname}
+static struct debugfs_reg32 sta2x11_apb_soc_regs_regs[] = {
+	REG(PCIE_EP1_FUNC3_0_INTR_REG), REG(PCIE_EP1_FUNC7_4_INTR_REG),
+	REG(PCIE_EP2_FUNC3_0_INTR_REG), REG(PCIE_EP2_FUNC7_4_INTR_REG),
+	REG(PCIE_EP3_FUNC3_0_INTR_REG), REG(PCIE_EP3_FUNC7_4_INTR_REG),
+	REG(PCIE_EP4_FUNC3_0_INTR_REG), REG(PCIE_EP4_FUNC7_4_INTR_REG),
+	REG(PCIE_INTR_ENABLE0_REG), REG(PCIE_INTR_ENABLE1_REG),
+	REG(PCIE_EP1_FUNC_TC_REG), REG(PCIE_EP2_FUNC_TC_REG),
+	REG(PCIE_EP3_FUNC_TC_REG), REG(PCIE_EP4_FUNC_TC_REG),
+	REG(PCIE_EP1_FUNC_F_REG), REG(PCIE_EP2_FUNC_F_REG),
+	REG(PCIE_EP3_FUNC_F_REG), REG(PCIE_EP4_FUNC_F_REG),
+	REG(PCIE_PAB_AMBA_SW_RST_REG), REG(PCIE_PM_STATUS_0_PORT_0_4),
+	REG(PCIE_PM_STATUS_7_0_EP1), REG(PCIE_PM_STATUS_7_0_EP2),
+	REG(PCIE_PM_STATUS_7_0_EP3), REG(PCIE_PM_STATUS_7_0_EP4),
+	REG(PCIE_DEV_ID_0_EP1_REG), REG(PCIE_CC_REV_ID_0_EP1_REG),
+	REG(PCIE_DEV_ID_1_EP1_REG), REG(PCIE_CC_REV_ID_1_EP1_REG),
+	REG(PCIE_DEV_ID_2_EP1_REG), REG(PCIE_CC_REV_ID_2_EP1_REG),
+	REG(PCIE_DEV_ID_3_EP1_REG), REG(PCIE_CC_REV_ID_3_EP1_REG),
+	REG(PCIE_DEV_ID_4_EP1_REG), REG(PCIE_CC_REV_ID_4_EP1_REG),
+	REG(PCIE_DEV_ID_5_EP1_REG), REG(PCIE_CC_REV_ID_5_EP1_REG),
+	REG(PCIE_DEV_ID_6_EP1_REG), REG(PCIE_CC_REV_ID_6_EP1_REG),
+	REG(PCIE_DEV_ID_7_EP1_REG), REG(PCIE_CC_REV_ID_7_EP1_REG),
+	REG(PCIE_DEV_ID_0_EP2_REG), REG(PCIE_CC_REV_ID_0_EP2_REG),
+	REG(PCIE_DEV_ID_1_EP2_REG), REG(PCIE_CC_REV_ID_1_EP2_REG),
+	REG(PCIE_DEV_ID_2_EP2_REG), REG(PCIE_CC_REV_ID_2_EP2_REG),
+	REG(PCIE_DEV_ID_3_EP2_REG), REG(PCIE_CC_REV_ID_3_EP2_REG),
+	REG(PCIE_DEV_ID_4_EP2_REG), REG(PCIE_CC_REV_ID_4_EP2_REG),
+	REG(PCIE_DEV_ID_5_EP2_REG), REG(PCIE_CC_REV_ID_5_EP2_REG),
+	REG(PCIE_DEV_ID_6_EP2_REG), REG(PCIE_CC_REV_ID_6_EP2_REG),
+	REG(PCIE_DEV_ID_7_EP2_REG), REG(PCIE_CC_REV_ID_7_EP2_REG),
+	REG(PCIE_DEV_ID_0_EP3_REG), REG(PCIE_CC_REV_ID_0_EP3_REG),
+	REG(PCIE_DEV_ID_1_EP3_REG), REG(PCIE_CC_REV_ID_1_EP3_REG),
+	REG(PCIE_DEV_ID_2_EP3_REG), REG(PCIE_CC_REV_ID_2_EP3_REG),
+	REG(PCIE_DEV_ID_3_EP3_REG), REG(PCIE_CC_REV_ID_3_EP3_REG),
+	REG(PCIE_DEV_ID_4_EP3_REG), REG(PCIE_CC_REV_ID_4_EP3_REG),
+	REG(PCIE_DEV_ID_5_EP3_REG), REG(PCIE_CC_REV_ID_5_EP3_REG),
+	REG(PCIE_DEV_ID_6_EP3_REG), REG(PCIE_CC_REV_ID_6_EP3_REG),
+	REG(PCIE_DEV_ID_7_EP3_REG), REG(PCIE_CC_REV_ID_7_EP3_REG),
+	REG(PCIE_DEV_ID_0_EP4_REG), REG(PCIE_CC_REV_ID_0_EP4_REG),
+	REG(PCIE_DEV_ID_1_EP4_REG), REG(PCIE_CC_REV_ID_1_EP4_REG),
+	REG(PCIE_DEV_ID_2_EP4_REG), REG(PCIE_CC_REV_ID_2_EP4_REG),
+	REG(PCIE_DEV_ID_3_EP4_REG), REG(PCIE_CC_REV_ID_3_EP4_REG),
+	REG(PCIE_DEV_ID_4_EP4_REG), REG(PCIE_CC_REV_ID_4_EP4_REG),
+	REG(PCIE_DEV_ID_5_EP4_REG), REG(PCIE_CC_REV_ID_5_EP4_REG),
+	REG(PCIE_DEV_ID_6_EP4_REG), REG(PCIE_CC_REV_ID_6_EP4_REG),
+	REG(PCIE_DEV_ID_7_EP4_REG), REG(PCIE_CC_REV_ID_7_EP4_REG),
+	REG(PCIE_SUBSYS_VEN_ID_REG), REG(PCIE_COMMON_CLOCK_CONFIG_0_4_0),
+	REG(PCIE_MIPHYP_SSC_EN_REG), REG(PCIE_MIPHYP_ADDR_REG),
+	REG(PCIE_L1_ASPM_READY_REG), REG(PCIE_EXT_CFG_RDY_REG),
+	REG(PCIE_SoC_INT_ROUTER_STATUS0_REG),
+	REG(PCIE_SoC_INT_ROUTER_STATUS1_REG),
+	REG(PCIE_SoC_INT_ROUTER_STATUS2_REG),
+	REG(PCIE_SoC_INT_ROUTER_STATUS3_REG),
+	REG(DMA_IP_CTRL_REG), REG(DISP_BRIDGE_PU_PD_CTRL_REG),
+	REG(VIP_PU_PD_CTRL_REG), REG(USB_MLB_PU_PD_CTRL_REG),
+	REG(SDIO_PU_PD_MISCFUNC_CTRL_REG1), REG(SDIO_PU_PD_MISCFUNC_CTRL_REG2),
+	REG(UART_PU_PD_CTRL_REG), REG(ARM_Lock), REG(SYS_IO_CHAR_REG1),
+	REG(SYS_IO_CHAR_REG2), REG(SATA_CORE_ID_REG), REG(SATA_CTRL_REG),
+	REG(I2C_HSFIX_MISC_REG), REG(SPARE2_RESERVED), REG(SPARE3_RESERVED),
+	REG(MASTER_LOCK_REG), REG(SYSTEM_CONFIG_STATUS_REG),
+	REG(MSP_CLK_CTRL_REG), REG(COMPENSATION_REG1), REG(COMPENSATION_REG2),
+	REG(COMPENSATION_REG3), REG(TEST_CTL_REG),
+};
+#undef REG
 
-/* Probe for the two platform devices */
-static int sta2x11_sctl_probe(struct platform_device *dev)
-{
-	struct pci_dev **pdev;
-	struct sta2x11_mfd *mfd;
-	struct resource *res;
+static struct debugfs_regset32 apb_soc_regs_regset = {
+	.regs = sta2x11_apb_soc_regs_regs,
+	.nregs = ARRAY_SIZE(sta2x11_apb_soc_regs_regs),
+};
 
-	pdev = dev->dev.platform_data;
-	mfd = sta2x11_mfd_find(*pdev);
-	if (!mfd)
-		return -ENODEV;
 
-	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENOMEM;
+static struct dentry *sta2x11_mfd_debugfs[sta2x11_n_mfd_plat_devs];
 
-	if (!request_mem_region(res->start, resource_size(res),
-				"sta2x11-sctl"))
-		return -EBUSY;
+static struct debugfs_regset32 *sta2x11_mfd_regset[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = &sctl_regset,
+	[sta2x11_apbreg] = &apbreg_regset,
+	[sta2x11_apb_soc_regs] = &apb_soc_regs_regset,
+};
 
-	mfd->sctl_regs = ioremap(res->start, resource_size(res));
-	if (!mfd->sctl_regs) {
-		release_mem_region(res->start, resource_size(res));
-		return -ENOMEM;
-	}
-	sctl_regset.base = mfd->sctl_regs;
-	sta2x11_sctl_debugfs = debugfs_create_regset32("sta2x11-sctl",
-						  S_IFREG | S_IRUGO,
-						  NULL, &sctl_regset);
-	return 0;
-}
+static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = "sta2x11-sctl",
+	[sta2x11_apbreg] = "sta2x11-apbreg",
+	[sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs",
+};
 
-static int sta2x11_apbreg_probe(struct platform_device *dev)
+/* Probe for the three platform devices */
+
+static int sta2x11_mfd_platform_probe(struct platform_device *dev,
+				      enum sta2x11_mfd_plat_dev index)
 {
 	struct pci_dev **pdev;
 	struct sta2x11_mfd *mfd;
 	struct resource *res;
+	const char *name = sta2x11_mfd_names[index];
+	struct debugfs_regset32 *regset = sta2x11_mfd_regset[index];
 
 	pdev = dev->dev.platform_data;
-	dev_dbg(&dev->dev, "%s: pdata is %p\n", __func__, pdev);
-	dev_dbg(&dev->dev, "%s: *pdata is %p\n", __func__, *pdev);
-
 	mfd = sta2x11_mfd_find(*pdev);
 	if (!mfd)
 		return -ENODEV;
@@ -233,25 +261,37 @@ static int sta2x11_apbreg_probe(struct platform_device *dev)
 	if (!res)
 		return -ENOMEM;
 
-	if (!request_mem_region(res->start, resource_size(res),
-				"sta2x11-apbreg"))
+	if (!request_mem_region(res->start, resource_size(res), name))
 		return -EBUSY;
 
-	mfd->apbreg_regs = ioremap(res->start, resource_size(res));
-	if (!mfd->apbreg_regs) {
+	mfd->regs[index] = ioremap(res->start, resource_size(res));
+	if (!mfd->regs[index]) {
 		release_mem_region(res->start, resource_size(res));
 		return -ENOMEM;
 	}
-	dev_dbg(&dev->dev, "%s: regbase %p\n", __func__, mfd->apbreg_regs);
-
-	apbreg_regset.base = mfd->apbreg_regs;
-	sta2x11_apbreg_debugfs = debugfs_create_regset32("sta2x11-apbreg",
-						  S_IFREG | S_IRUGO,
-						  NULL, &apbreg_regset);
+	regset->base = mfd->regs[index];
+	sta2x11_mfd_debugfs[index] = debugfs_create_regset32(name,
+							     S_IFREG | S_IRUGO,
+							     NULL, regset);
 	return 0;
 }
 
-/* The two platform drivers */
+static int sta2x11_sctl_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_sctl);
+}
+
+static int sta2x11_apbreg_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_apbreg);
+}
+
+static int sta2x11_apb_soc_regs_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_apb_soc_regs);
+}
+
+/* The three platform drivers */
 static struct platform_driver sta2x11_sctl_platform_driver = {
 	.driver = {
 		.name	= "sta2x11-sctl",
@@ -280,13 +320,29 @@ static int __init sta2x11_apbreg_init(void)
 	return platform_driver_register(&sta2x11_platform_driver);
 }
 
+static struct platform_driver sta2x11_apb_soc_regs_platform_driver = {
+	.driver = {
+		.name	= "sta2x11-apb-soc-regs",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sta2x11_apb_soc_regs_probe,
+};
+
+static int __init sta2x11_apb_soc_regs_init(void)
+{
+	pr_info("%s\n", __func__);
+	return platform_driver_register(&sta2x11_apb_soc_regs_platform_driver);
+}
+
 /*
- * What follows is the PCI device that hosts the above two pdevs.
+ * What follows are the PCI devices that host the above pdevs.
  * Each logic block is 4kB and they are all consecutive: we use this info.
  */
 
-/* Bar 0 */
-enum bar0_cells {
+/* Mfd 0 device */
+
+/* Mfd 0, Bar 0 */
+enum mfd0_bar0_cells {
 	STA2X11_GPIO_0 = 0,
 	STA2X11_GPIO_1,
 	STA2X11_GPIO_2,
@@ -295,8 +351,8 @@ enum bar0_cells {
 	STA2X11_SCR,
 	STA2X11_TIME,
 };
-/* Bar 1 */
-enum bar1_cells {
+/* Mfd 0 , Bar 1 */
+enum mfd0_bar1_cells {
 	STA2X11_APBREG = 0,
 };
 #define CELL_4K(_name, _cell) { \
@@ -330,17 +386,46 @@ static const __devinitconst struct resource apbreg_resources[] = {
 #define DEV(_name, _r) \
 	{ .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, }
 
-static __devinitdata struct mfd_cell sta2x11_mfd_bar0[] = {
+static __devinitdata struct mfd_cell sta2x11_mfd0_bar0[] = {
 	DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */
 	DEV("sta2x11-sctl", sctl_resources),
 	DEV("sta2x11-scr", scr_resources),
 	DEV("sta2x11-time", time_resources),
 };
 
-static __devinitdata struct mfd_cell sta2x11_mfd_bar1[] = {
+static __devinitdata struct mfd_cell sta2x11_mfd0_bar1[] = {
 	DEV("sta2x11-apbreg", apbreg_resources),
 };
 
+/* Mfd 1 devices */
+
+/* Mfd 1, Bar 0 */
+enum mfd1_bar0_cells {
+	STA2X11_VIC = 0,
+};
+
+/* Mfd 1, Bar 1 */
+enum mfd1_bar1_cells {
+	STA2X11_APB_SOC_REGS = 0,
+};
+
+static const __devinitconst struct resource vic_resources[] = {
+	CELL_4K("sta2x11-vic", STA2X11_VIC),
+};
+
+static const __devinitconst struct resource apb_soc_regs_resources[] = {
+	CELL_4K("sta2x11-apb-soc-regs", STA2X11_APB_SOC_REGS),
+};
+
+static __devinitdata struct mfd_cell sta2x11_mfd1_bar0[] = {
+	DEV("sta2x11-vic", vic_resources),
+};
+
+static __devinitdata struct mfd_cell sta2x11_mfd1_bar1[] = {
+	DEV("sta2x11-apb-soc-regs", apb_soc_regs_resources),
+};
+
+
 static int sta2x11_mfd_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	pci_save_state(pdev);
@@ -363,10 +448,63 @@ static int sta2x11_mfd_resume(struct pci_dev *pdev)
 	return 0;
 }
 
+struct sta2x11_mfd_bar_setup_data {
+	struct mfd_cell *cells;
+	int ncells;
+};
+
+struct sta2x11_mfd_setup_data {
+	struct sta2x11_mfd_bar_setup_data bars[2];
+};
+
+#define STA2X11_MFD0 0
+#define STA2X11_MFD1 1
+
+static struct sta2x11_mfd_setup_data mfd_setup_data[] = {
+	/* Mfd 0: gpio, sctl, scr, timers / apbregs */
+	[STA2X11_MFD0] = {
+		.bars = {
+			[0] = {
+				.cells = sta2x11_mfd0_bar0,
+				.ncells = ARRAY_SIZE(sta2x11_mfd0_bar0),
+			},
+			[1] = {
+				.cells = sta2x11_mfd0_bar1,
+				.ncells = ARRAY_SIZE(sta2x11_mfd0_bar1),
+			},
+		},
+	},
+	/* Mfd 1: vic / apb-soc-regs */
+	[STA2X11_MFD1] = {
+		.bars = {
+			[0] = {
+				.cells = sta2x11_mfd1_bar0,
+				.ncells = ARRAY_SIZE(sta2x11_mfd1_bar0),
+			},
+			[1] = {
+				.cells = sta2x11_mfd1_bar1,
+				.ncells = ARRAY_SIZE(sta2x11_mfd1_bar1),
+			},
+		},
+	},
+};
+
+static void __devinit sta2x11_mfd_setup(struct pci_dev *pdev,
+					struct sta2x11_mfd_setup_data *sd)
+{
+	int i, j;
+	for (i = 0; i < ARRAY_SIZE(sd->bars); i++)
+		for (j = 0; j < sd->bars[i].ncells; j++) {
+			sd->bars[i].cells[j].pdata_size = sizeof(pdev);
+			sd->bars[i].cells[j].platform_data = &pdev;
+		}
+}
+
 static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
 				       const struct pci_device_id *pci_id)
 {
 	int err, i;
+	struct sta2x11_mfd_setup_data *setup_data;
 	struct sta2x11_gpio_pdata *gpio_data;
 
 	dev_info(&pdev->dev, "%s\n", __func__);
@@ -381,6 +519,10 @@ static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
 	if (err)
 		dev_info(&pdev->dev, "Enable msi failed\n");
 
+	setup_data = pci_id->device == PCI_DEVICE_ID_STMICRO_GPIO ?
+		&mfd_setup_data[STA2X11_MFD0] :
+		&mfd_setup_data[STA2X11_MFD1];
+
 	/* Read gpio config data as pci device's platform data */
 	gpio_data = dev_get_platdata(&pdev->dev);
 	if (!gpio_data)
@@ -392,35 +534,23 @@ static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
 		pdev, &pdev);
 
 	/* platform data is the pci device for all of them */
-	for (i = 0; i < ARRAY_SIZE(sta2x11_mfd_bar0); i++) {
-		sta2x11_mfd_bar0[i].pdata_size = sizeof(pdev);
-		sta2x11_mfd_bar0[i].platform_data = &pdev;
-	}
-	sta2x11_mfd_bar1[0].pdata_size = sizeof(pdev);
-	sta2x11_mfd_bar1[0].platform_data = &pdev;
+	sta2x11_mfd_setup(pdev, setup_data);
 
 	/* Record this pdev before mfd_add_devices: their probe looks for it */
 	sta2x11_mfd_add(pdev, GFP_ATOMIC);
 
-
-	err = mfd_add_devices(&pdev->dev, -1,
-			      sta2x11_mfd_bar0,
-			      ARRAY_SIZE(sta2x11_mfd_bar0),
-			      &pdev->resource[0],
-			      0, NULL);
-	if (err) {
-		dev_err(&pdev->dev, "mfd_add_devices[0] failed: %d\n", err);
-		goto err_disable;
-	}
-
-	err = mfd_add_devices(&pdev->dev, -1,
-			      sta2x11_mfd_bar1,
-			      ARRAY_SIZE(sta2x11_mfd_bar1),
-			      &pdev->resource[1],
-			      0, NULL);
-	if (err) {
-		dev_err(&pdev->dev, "mfd_add_devices[1] failed: %d\n", err);
-		goto err_disable;
+	/* Just 2 bars for all mfd's at present */
+	for (i = 0; i < 2; i++) {
+		err = mfd_add_devices(&pdev->dev, -1,
+				      setup_data->bars[i].cells,
+				      setup_data->bars[i].ncells,
+				      &pdev->resource[i],
+				      0, NULL);
+		if (err) {
+			dev_err(&pdev->dev,
+				"mfd_add_devices[%d] failed: %d\n", i, err);
+			goto err_disable;
+		}
 	}
 
 	return 0;
@@ -434,6 +564,7 @@ err_disable:
 
 static DEFINE_PCI_DEVICE_TABLE(sta2x11_mfd_tbl) = {
 	{PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_GPIO)},
+	{PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_VIC)},
 	{0,},
 };
 
@@ -459,6 +590,7 @@ static int __init sta2x11_mfd_init(void)
  */
 subsys_initcall(sta2x11_apbreg_init);
 subsys_initcall(sta2x11_sctl_init);
+subsys_initcall(sta2x11_apb_soc_regs_init);
 rootfs_initcall(sta2x11_mfd_init);
 
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index d179227e866f..4d858797d7e2 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -26,6 +26,20 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 
+enum sta2x11_mfd_plat_dev {
+	sta2x11_sctl = 0,
+	sta2x11_gpio,
+	sta2x11_scr,
+	sta2x11_time,
+	sta2x11_apbreg,
+	sta2x11_apb_soc_regs,
+	sta2x11_vic,
+	sta2x11_n_mfd_plat_devs,
+};
+
+extern u32
+__sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev);
+
 /*
  * The MFD PCI block includes the GPIO peripherals and other register blocks.
  * For GPIO, we have 32*4 bits (I use "gsta" for "gpio sta2x11".)
@@ -182,7 +196,11 @@ struct sta2x11_gpio_pdata {
  * The APB bridge has its own registers, needed by our users as well.
  * They are accessed with the following read/mask/write function.
  */
-u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
+static inline u32
+sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apbreg);
+}
 
 /* CAN and MLB */
 #define APBREG_BSR	0x00	/* Bridge Status Reg */
@@ -211,7 +229,11 @@ u32 sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
  * The system controller has its own registers. Some of these are accessed
  * by out users as well, using the following read/mask/write/function
  */
-u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
+static inline
+u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_sctl);
+}
 
 #define SCTL_SCCTL		0x00	/* System controller control register */
 #define SCTL_ARMCFG		0x04	/* ARM configuration register */
@@ -321,4 +343,134 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val);
 #define SCTL_SCPEREN1_I2C3		(1 << 16)
 #define SCTL_SCPEREN1_USB_PHY		(1 << 17)
 
+/*
+ * APB-SOC registers
+ */
+static inline
+u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
+{
+	return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apb_soc_regs);
+}
+
+#define PCIE_EP1_FUNC3_0_INTR_REG	0x000
+#define PCIE_EP1_FUNC7_4_INTR_REG	0x004
+#define PCIE_EP2_FUNC3_0_INTR_REG	0x008
+#define PCIE_EP2_FUNC7_4_INTR_REG	0x00c
+#define PCIE_EP3_FUNC3_0_INTR_REG	0x010
+#define PCIE_EP3_FUNC7_4_INTR_REG	0x014
+#define PCIE_EP4_FUNC3_0_INTR_REG	0x018
+#define PCIE_EP4_FUNC7_4_INTR_REG	0x01c
+#define PCIE_INTR_ENABLE0_REG		0x020
+#define PCIE_INTR_ENABLE1_REG		0x024
+#define PCIE_EP1_FUNC_TC_REG		0x028
+#define PCIE_EP2_FUNC_TC_REG		0x02c
+#define PCIE_EP3_FUNC_TC_REG		0x030
+#define PCIE_EP4_FUNC_TC_REG		0x034
+#define PCIE_EP1_FUNC_F_REG		0x038
+#define PCIE_EP2_FUNC_F_REG		0x03c
+#define PCIE_EP3_FUNC_F_REG		0x040
+#define PCIE_EP4_FUNC_F_REG		0x044
+#define PCIE_PAB_AMBA_SW_RST_REG	0x048
+#define PCIE_PM_STATUS_0_PORT_0_4	0x04c
+#define PCIE_PM_STATUS_7_0_EP1		0x050
+#define PCIE_PM_STATUS_7_0_EP2		0x054
+#define PCIE_PM_STATUS_7_0_EP3		0x058
+#define PCIE_PM_STATUS_7_0_EP4		0x05c
+#define PCIE_DEV_ID_0_EP1_REG		0x060
+#define PCIE_CC_REV_ID_0_EP1_REG	0x064
+#define PCIE_DEV_ID_1_EP1_REG		0x068
+#define PCIE_CC_REV_ID_1_EP1_REG	0x06c
+#define PCIE_DEV_ID_2_EP1_REG		0x070
+#define PCIE_CC_REV_ID_2_EP1_REG	0x074
+#define PCIE_DEV_ID_3_EP1_REG		0x078
+#define PCIE_CC_REV_ID_3_EP1_REG	0x07c
+#define PCIE_DEV_ID_4_EP1_REG		0x080
+#define PCIE_CC_REV_ID_4_EP1_REG	0x084
+#define PCIE_DEV_ID_5_EP1_REG		0x088
+#define PCIE_CC_REV_ID_5_EP1_REG	0x08c
+#define PCIE_DEV_ID_6_EP1_REG		0x090
+#define PCIE_CC_REV_ID_6_EP1_REG	0x094
+#define PCIE_DEV_ID_7_EP1_REG		0x098
+#define PCIE_CC_REV_ID_7_EP1_REG	0x09c
+#define PCIE_DEV_ID_0_EP2_REG		0x0a0
+#define PCIE_CC_REV_ID_0_EP2_REG	0x0a4
+#define PCIE_DEV_ID_1_EP2_REG		0x0a8
+#define PCIE_CC_REV_ID_1_EP2_REG	0x0ac
+#define PCIE_DEV_ID_2_EP2_REG		0x0b0
+#define PCIE_CC_REV_ID_2_EP2_REG	0x0b4
+#define PCIE_DEV_ID_3_EP2_REG		0x0b8
+#define PCIE_CC_REV_ID_3_EP2_REG	0x0bc
+#define PCIE_DEV_ID_4_EP2_REG		0x0c0
+#define PCIE_CC_REV_ID_4_EP2_REG	0x0c4
+#define PCIE_DEV_ID_5_EP2_REG		0x0c8
+#define PCIE_CC_REV_ID_5_EP2_REG	0x0cc
+#define PCIE_DEV_ID_6_EP2_REG		0x0d0
+#define PCIE_CC_REV_ID_6_EP2_REG	0x0d4
+#define PCIE_DEV_ID_7_EP2_REG		0x0d8
+#define PCIE_CC_REV_ID_7_EP2_REG	0x0dC
+#define PCIE_DEV_ID_0_EP3_REG		0x0e0
+#define PCIE_CC_REV_ID_0_EP3_REG	0x0e4
+#define PCIE_DEV_ID_1_EP3_REG		0x0e8
+#define PCIE_CC_REV_ID_1_EP3_REG	0x0ec
+#define PCIE_DEV_ID_2_EP3_REG		0x0f0
+#define PCIE_CC_REV_ID_2_EP3_REG	0x0f4
+#define PCIE_DEV_ID_3_EP3_REG		0x0f8
+#define PCIE_CC_REV_ID_3_EP3_REG	0x0fc
+#define PCIE_DEV_ID_4_EP3_REG		0x100
+#define PCIE_CC_REV_ID_4_EP3_REG	0x104
+#define PCIE_DEV_ID_5_EP3_REG		0x108
+#define PCIE_CC_REV_ID_5_EP3_REG	0x10c
+#define PCIE_DEV_ID_6_EP3_REG		0x110
+#define PCIE_CC_REV_ID_6_EP3_REG	0x114
+#define PCIE_DEV_ID_7_EP3_REG		0x118
+#define PCIE_CC_REV_ID_7_EP3_REG	0x11c
+#define PCIE_DEV_ID_0_EP4_REG		0x120
+#define PCIE_CC_REV_ID_0_EP4_REG	0x124
+#define PCIE_DEV_ID_1_EP4_REG		0x128
+#define PCIE_CC_REV_ID_1_EP4_REG	0x12c
+#define PCIE_DEV_ID_2_EP4_REG		0x130
+#define PCIE_CC_REV_ID_2_EP4_REG	0x134
+#define PCIE_DEV_ID_3_EP4_REG		0x138
+#define PCIE_CC_REV_ID_3_EP4_REG	0x13c
+#define PCIE_DEV_ID_4_EP4_REG		0x140
+#define PCIE_CC_REV_ID_4_EP4_REG	0x144
+#define PCIE_DEV_ID_5_EP4_REG		0x148
+#define PCIE_CC_REV_ID_5_EP4_REG	0x14c
+#define PCIE_DEV_ID_6_EP4_REG		0x150
+#define PCIE_CC_REV_ID_6_EP4_REG	0x154
+#define PCIE_DEV_ID_7_EP4_REG		0x158
+#define PCIE_CC_REV_ID_7_EP4_REG	0x15c
+#define PCIE_SUBSYS_VEN_ID_REG		0x160
+#define PCIE_COMMON_CLOCK_CONFIG_0_4_0	0x164
+#define PCIE_MIPHYP_SSC_EN_REG		0x168
+#define PCIE_MIPHYP_ADDR_REG		0x16c
+#define PCIE_L1_ASPM_READY_REG		0x170
+#define PCIE_EXT_CFG_RDY_REG		0x174
+#define PCIE_SoC_INT_ROUTER_STATUS0_REG 0x178
+#define PCIE_SoC_INT_ROUTER_STATUS1_REG 0x17c
+#define PCIE_SoC_INT_ROUTER_STATUS2_REG 0x180
+#define PCIE_SoC_INT_ROUTER_STATUS3_REG 0x184
+#define DMA_IP_CTRL_REG			0x324
+#define DISP_BRIDGE_PU_PD_CTRL_REG	0x328
+#define VIP_PU_PD_CTRL_REG		0x32c
+#define USB_MLB_PU_PD_CTRL_REG		0x330
+#define SDIO_PU_PD_MISCFUNC_CTRL_REG1	0x334
+#define SDIO_PU_PD_MISCFUNC_CTRL_REG2	0x338
+#define UART_PU_PD_CTRL_REG		0x33c
+#define ARM_Lock			0x340
+#define SYS_IO_CHAR_REG1		0x344
+#define SYS_IO_CHAR_REG2		0x348
+#define SATA_CORE_ID_REG		0x34c
+#define SATA_CTRL_REG			0x350
+#define I2C_HSFIX_MISC_REG		0x354
+#define SPARE2_RESERVED			0x358
+#define SPARE3_RESERVED			0x35c
+#define MASTER_LOCK_REG			0x368
+#define SYSTEM_CONFIG_STATUS_REG	0x36c
+#define MSP_CLK_CTRL_REG		0x39c
+#define COMPENSATION_REG1		0x3c4
+#define COMPENSATION_REG2		0x3c8
+#define COMPENSATION_REG3		0x3cc
+#define TEST_CTL_REG			0x3d0
+
 #endif /* __STA2X11_MFD_H */
-- 
cgit v1.2.3


From d94e25535a7979a6c81922496f475a5dd0e006b4 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi <ciminaghi@gnudd.com>
Date: Fri, 9 Nov 2012 15:19:53 +0100
Subject: mfd: sta2x11-mfd: Add regmap support

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig             |   1 +
 drivers/mfd/sta2x11-mfd.c       | 234 ++++++++++++++++++++++------------------
 include/linux/mfd/sta2x11-mfd.h |   1 +
 3 files changed, 131 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b280639a7634..59359a7a2493 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1046,6 +1046,7 @@ config MFD_STA2X11
 	bool "STA2X11 multi function device support"
 	depends on STA2X11
 	select MFD_CORE
+	select REGMAP_MMIO
 
 config MFD_SYSCON
 	bool "System Controller Register R/W Based on Regmap"
diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index 9e01b84aa8bc..0cac2013bbf6 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -27,17 +27,25 @@
 #include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
-#include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/sta2x11-mfd.h>
+#include <linux/regmap.h>
 
 #include <asm/sta2x11.h>
 
+static inline int __reg_within_range(unsigned int r,
+				     unsigned int start,
+				     unsigned int end)
+{
+	return ((r >= start) && (r <= end));
+}
+
 /* This describes STA2X11 MFD chip for us, we may have several */
 struct sta2x11_mfd {
 	struct sta2x11_instance *instance;
+	struct regmap *regmap[sta2x11_n_mfd_plat_devs];
 	spinlock_t lock;
 	struct list_head list;
 	void __iomem *regs[sta2x11_n_mfd_plat_devs];
@@ -127,118 +135,126 @@ u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val,
 }
 EXPORT_SYMBOL(__sta2x11_mfd_mask);
 
-/* Two debugfs files, for our registers (FIXME: one instance only) */
-#define REG(regname) {.name = #regname, .offset = SCTL_ ## regname}
-static struct debugfs_reg32 sta2x11_sctl_regs[] = {
-	REG(SCCTL), REG(ARMCFG), REG(SCPLLCTL), REG(SCPLLFCTRL),
-	REG(SCRESFRACT), REG(SCRESCTRL1), REG(SCRESXTRL2), REG(SCPEREN0),
-	REG(SCPEREN1), REG(SCPEREN2), REG(SCGRST), REG(SCPCIPMCR1),
-	REG(SCPCIPMCR2), REG(SCPCIPMSR1), REG(SCPCIPMSR2), REG(SCPCIPMSR3),
-	REG(SCINTREN), REG(SCRISR), REG(SCCLKSTAT0), REG(SCCLKSTAT1),
-	REG(SCCLKSTAT2), REG(SCRSTSTA),
-};
-#undef REG
+/*
+ * Special sta2x11-mfd regmap lock/unlock functions
+ */
 
-static struct debugfs_regset32 sctl_regset = {
-	.regs = sta2x11_sctl_regs,
-	.nregs = ARRAY_SIZE(sta2x11_sctl_regs),
-};
+static void sta2x11_regmap_lock(void *__lock)
+{
+	spinlock_t *lock = __lock;
+	spin_lock(lock);
+}
 
-#define REG(regname) {.name = #regname, .offset = regname}
-static struct debugfs_reg32 sta2x11_apbreg_regs[] = {
-	REG(APBREG_BSR), REG(APBREG_PAER), REG(APBREG_PWAC), REG(APBREG_PRAC),
-	REG(APBREG_PCG), REG(APBREG_PUR), REG(APBREG_EMU_PCG),
-};
-#undef REG
+static void sta2x11_regmap_unlock(void *__lock)
+{
+	spinlock_t *lock = __lock;
+	spin_unlock(lock);
+}
 
-static struct debugfs_regset32 apbreg_regset = {
-	.regs = sta2x11_apbreg_regs,
-	.nregs = ARRAY_SIZE(sta2x11_apbreg_regs),
+static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = "sta2x11-sctl",
+	[sta2x11_apbreg] = "sta2x11-apbreg",
+	[sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs",
 };
 
-#define REG(regname) {.name = #regname, .offset = regname}
-static struct debugfs_reg32 sta2x11_apb_soc_regs_regs[] = {
-	REG(PCIE_EP1_FUNC3_0_INTR_REG), REG(PCIE_EP1_FUNC7_4_INTR_REG),
-	REG(PCIE_EP2_FUNC3_0_INTR_REG), REG(PCIE_EP2_FUNC7_4_INTR_REG),
-	REG(PCIE_EP3_FUNC3_0_INTR_REG), REG(PCIE_EP3_FUNC7_4_INTR_REG),
-	REG(PCIE_EP4_FUNC3_0_INTR_REG), REG(PCIE_EP4_FUNC7_4_INTR_REG),
-	REG(PCIE_INTR_ENABLE0_REG), REG(PCIE_INTR_ENABLE1_REG),
-	REG(PCIE_EP1_FUNC_TC_REG), REG(PCIE_EP2_FUNC_TC_REG),
-	REG(PCIE_EP3_FUNC_TC_REG), REG(PCIE_EP4_FUNC_TC_REG),
-	REG(PCIE_EP1_FUNC_F_REG), REG(PCIE_EP2_FUNC_F_REG),
-	REG(PCIE_EP3_FUNC_F_REG), REG(PCIE_EP4_FUNC_F_REG),
-	REG(PCIE_PAB_AMBA_SW_RST_REG), REG(PCIE_PM_STATUS_0_PORT_0_4),
-	REG(PCIE_PM_STATUS_7_0_EP1), REG(PCIE_PM_STATUS_7_0_EP2),
-	REG(PCIE_PM_STATUS_7_0_EP3), REG(PCIE_PM_STATUS_7_0_EP4),
-	REG(PCIE_DEV_ID_0_EP1_REG), REG(PCIE_CC_REV_ID_0_EP1_REG),
-	REG(PCIE_DEV_ID_1_EP1_REG), REG(PCIE_CC_REV_ID_1_EP1_REG),
-	REG(PCIE_DEV_ID_2_EP1_REG), REG(PCIE_CC_REV_ID_2_EP1_REG),
-	REG(PCIE_DEV_ID_3_EP1_REG), REG(PCIE_CC_REV_ID_3_EP1_REG),
-	REG(PCIE_DEV_ID_4_EP1_REG), REG(PCIE_CC_REV_ID_4_EP1_REG),
-	REG(PCIE_DEV_ID_5_EP1_REG), REG(PCIE_CC_REV_ID_5_EP1_REG),
-	REG(PCIE_DEV_ID_6_EP1_REG), REG(PCIE_CC_REV_ID_6_EP1_REG),
-	REG(PCIE_DEV_ID_7_EP1_REG), REG(PCIE_CC_REV_ID_7_EP1_REG),
-	REG(PCIE_DEV_ID_0_EP2_REG), REG(PCIE_CC_REV_ID_0_EP2_REG),
-	REG(PCIE_DEV_ID_1_EP2_REG), REG(PCIE_CC_REV_ID_1_EP2_REG),
-	REG(PCIE_DEV_ID_2_EP2_REG), REG(PCIE_CC_REV_ID_2_EP2_REG),
-	REG(PCIE_DEV_ID_3_EP2_REG), REG(PCIE_CC_REV_ID_3_EP2_REG),
-	REG(PCIE_DEV_ID_4_EP2_REG), REG(PCIE_CC_REV_ID_4_EP2_REG),
-	REG(PCIE_DEV_ID_5_EP2_REG), REG(PCIE_CC_REV_ID_5_EP2_REG),
-	REG(PCIE_DEV_ID_6_EP2_REG), REG(PCIE_CC_REV_ID_6_EP2_REG),
-	REG(PCIE_DEV_ID_7_EP2_REG), REG(PCIE_CC_REV_ID_7_EP2_REG),
-	REG(PCIE_DEV_ID_0_EP3_REG), REG(PCIE_CC_REV_ID_0_EP3_REG),
-	REG(PCIE_DEV_ID_1_EP3_REG), REG(PCIE_CC_REV_ID_1_EP3_REG),
-	REG(PCIE_DEV_ID_2_EP3_REG), REG(PCIE_CC_REV_ID_2_EP3_REG),
-	REG(PCIE_DEV_ID_3_EP3_REG), REG(PCIE_CC_REV_ID_3_EP3_REG),
-	REG(PCIE_DEV_ID_4_EP3_REG), REG(PCIE_CC_REV_ID_4_EP3_REG),
-	REG(PCIE_DEV_ID_5_EP3_REG), REG(PCIE_CC_REV_ID_5_EP3_REG),
-	REG(PCIE_DEV_ID_6_EP3_REG), REG(PCIE_CC_REV_ID_6_EP3_REG),
-	REG(PCIE_DEV_ID_7_EP3_REG), REG(PCIE_CC_REV_ID_7_EP3_REG),
-	REG(PCIE_DEV_ID_0_EP4_REG), REG(PCIE_CC_REV_ID_0_EP4_REG),
-	REG(PCIE_DEV_ID_1_EP4_REG), REG(PCIE_CC_REV_ID_1_EP4_REG),
-	REG(PCIE_DEV_ID_2_EP4_REG), REG(PCIE_CC_REV_ID_2_EP4_REG),
-	REG(PCIE_DEV_ID_3_EP4_REG), REG(PCIE_CC_REV_ID_3_EP4_REG),
-	REG(PCIE_DEV_ID_4_EP4_REG), REG(PCIE_CC_REV_ID_4_EP4_REG),
-	REG(PCIE_DEV_ID_5_EP4_REG), REG(PCIE_CC_REV_ID_5_EP4_REG),
-	REG(PCIE_DEV_ID_6_EP4_REG), REG(PCIE_CC_REV_ID_6_EP4_REG),
-	REG(PCIE_DEV_ID_7_EP4_REG), REG(PCIE_CC_REV_ID_7_EP4_REG),
-	REG(PCIE_SUBSYS_VEN_ID_REG), REG(PCIE_COMMON_CLOCK_CONFIG_0_4_0),
-	REG(PCIE_MIPHYP_SSC_EN_REG), REG(PCIE_MIPHYP_ADDR_REG),
-	REG(PCIE_L1_ASPM_READY_REG), REG(PCIE_EXT_CFG_RDY_REG),
-	REG(PCIE_SoC_INT_ROUTER_STATUS0_REG),
-	REG(PCIE_SoC_INT_ROUTER_STATUS1_REG),
-	REG(PCIE_SoC_INT_ROUTER_STATUS2_REG),
-	REG(PCIE_SoC_INT_ROUTER_STATUS3_REG),
-	REG(DMA_IP_CTRL_REG), REG(DISP_BRIDGE_PU_PD_CTRL_REG),
-	REG(VIP_PU_PD_CTRL_REG), REG(USB_MLB_PU_PD_CTRL_REG),
-	REG(SDIO_PU_PD_MISCFUNC_CTRL_REG1), REG(SDIO_PU_PD_MISCFUNC_CTRL_REG2),
-	REG(UART_PU_PD_CTRL_REG), REG(ARM_Lock), REG(SYS_IO_CHAR_REG1),
-	REG(SYS_IO_CHAR_REG2), REG(SATA_CORE_ID_REG), REG(SATA_CTRL_REG),
-	REG(I2C_HSFIX_MISC_REG), REG(SPARE2_RESERVED), REG(SPARE3_RESERVED),
-	REG(MASTER_LOCK_REG), REG(SYSTEM_CONFIG_STATUS_REG),
-	REG(MSP_CLK_CTRL_REG), REG(COMPENSATION_REG1), REG(COMPENSATION_REG2),
-	REG(COMPENSATION_REG3), REG(TEST_CTL_REG),
+static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return !__reg_within_range(reg, SCTL_SCPCIECSBRST, SCTL_SCRSTSTA);
+}
+
+static struct regmap_config sta2x11_sctl_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = SCTL_SCRSTSTA,
+	.writeable_reg = sta2x11_sctl_writeable_reg,
 };
-#undef REG
 
-static struct debugfs_regset32 apb_soc_regs_regset = {
-	.regs = sta2x11_apb_soc_regs_regs,
-	.nregs = ARRAY_SIZE(sta2x11_apb_soc_regs_regs),
+static bool sta2x11_apbreg_readable_reg(struct device *dev, unsigned int reg)
+{
+	/* Two blocks (CAN and MLB, SARAC) 0x100 bytes apart */
+	if (reg >= APBREG_BSR_SARAC)
+		reg -= APBREG_BSR_SARAC;
+	switch (reg) {
+	case APBREG_BSR:
+	case APBREG_PAER:
+	case APBREG_PWAC:
+	case APBREG_PRAC:
+	case APBREG_PCG:
+	case APBREG_PUR:
+	case APBREG_EMU_PCG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool sta2x11_apbreg_writeable_reg(struct device *dev, unsigned int reg)
+{
+	if (reg >= APBREG_BSR_SARAC)
+		reg -= APBREG_BSR_SARAC;
+	if (!sta2x11_apbreg_readable_reg(dev, reg))
+		return false;
+	return reg != APBREG_PAER;
+}
+
+static struct regmap_config sta2x11_apbreg_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = APBREG_EMU_PCG_SARAC,
+	.readable_reg = sta2x11_apbreg_readable_reg,
+	.writeable_reg = sta2x11_apbreg_writeable_reg,
 };
 
+static bool sta2x11_apb_soc_regs_readable_reg(struct device *dev,
+					      unsigned int reg)
+{
+	return reg <= PCIE_SoC_INT_ROUTER_STATUS3_REG ||
+		__reg_within_range(reg, DMA_IP_CTRL_REG, SPARE3_RESERVED) ||
+		__reg_within_range(reg, MASTER_LOCK_REG,
+				   SYSTEM_CONFIG_STATUS_REG) ||
+		reg == MSP_CLK_CTRL_REG ||
+		__reg_within_range(reg, COMPENSATION_REG1, TEST_CTL_REG);
+}
 
-static struct dentry *sta2x11_mfd_debugfs[sta2x11_n_mfd_plat_devs];
+static bool sta2x11_apb_soc_regs_writeable_reg(struct device *dev,
+					       unsigned int reg)
+{
+	if (!sta2x11_apb_soc_regs_readable_reg(dev, reg))
+		return false;
+	switch (reg) {
+	case PCIE_COMMON_CLOCK_CONFIG_0_4_0:
+	case SYSTEM_CONFIG_STATUS_REG:
+	case COMPENSATION_REG1:
+	case PCIE_SoC_INT_ROUTER_STATUS0_REG...PCIE_SoC_INT_ROUTER_STATUS3_REG:
+	case PCIE_PM_STATUS_0_PORT_0_4...PCIE_PM_STATUS_7_0_EP4:
+		return false;
+	default:
+		return true;
+	}
+}
 
-static struct debugfs_regset32 *sta2x11_mfd_regset[sta2x11_n_mfd_plat_devs] = {
-	[sta2x11_sctl] = &sctl_regset,
-	[sta2x11_apbreg] = &apbreg_regset,
-	[sta2x11_apb_soc_regs] = &apb_soc_regs_regset,
+static struct regmap_config sta2x11_apb_soc_regs_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_lock,
+	.unlock = sta2x11_regmap_unlock,
+	.max_register = TEST_CTL_REG,
+	.readable_reg = sta2x11_apb_soc_regs_readable_reg,
+	.writeable_reg = sta2x11_apb_soc_regs_writeable_reg,
 };
 
-static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
-	[sta2x11_sctl] = "sta2x11-sctl",
-	[sta2x11_apbreg] = "sta2x11-apbreg",
-	[sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs",
+static struct regmap_config *
+sta2x11_mfd_regmap_configs[sta2x11_n_mfd_plat_devs] = {
+	[sta2x11_sctl] = &sta2x11_sctl_regmap_config,
+	[sta2x11_apbreg] = &sta2x11_apbreg_regmap_config,
+	[sta2x11_apb_soc_regs] = &sta2x11_apb_soc_regs_regmap_config,
 };
 
 /* Probe for the three platform devices */
@@ -250,12 +266,14 @@ static int sta2x11_mfd_platform_probe(struct platform_device *dev,
 	struct sta2x11_mfd *mfd;
 	struct resource *res;
 	const char *name = sta2x11_mfd_names[index];
-	struct debugfs_regset32 *regset = sta2x11_mfd_regset[index];
+	struct regmap_config *regmap_config = sta2x11_mfd_regmap_configs[index];
 
 	pdev = dev->dev.platform_data;
 	mfd = sta2x11_mfd_find(*pdev);
 	if (!mfd)
 		return -ENODEV;
+	if (!regmap_config)
+		return -ENODEV;
 
 	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!res)
@@ -269,10 +287,16 @@ static int sta2x11_mfd_platform_probe(struct platform_device *dev,
 		release_mem_region(res->start, resource_size(res));
 		return -ENOMEM;
 	}
-	regset->base = mfd->regs[index];
-	sta2x11_mfd_debugfs[index] = debugfs_create_regset32(name,
-							     S_IFREG | S_IRUGO,
-							     NULL, regset);
+	regmap_config->lock_arg = &mfd->lock;
+	/*
+	   No caching, registers could be reached both via regmap and via
+	   void __iomem *
+	*/
+	regmap_config->cache_type = REGCACHE_NONE;
+	mfd->regmap[index] = devm_regmap_init_mmio(&dev->dev, mfd->regs[index],
+						   regmap_config);
+	WARN_ON(!mfd->regmap[index]);
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index 4d858797d7e2..83344304f2cf 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -246,6 +246,7 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
 #define SCTL_SCPEREN1		0x20	/* Peripheral clock enable register 1 */
 #define SCTL_SCPEREN2		0x24	/* Peripheral clock enable register 2 */
 #define SCTL_SCGRST		0x28	/* Peripheral global reset */
+#define SCTL_SCPCIECSBRST       0x2c    /* PCIe PAB CSB reset status register */
 #define SCTL_SCPCIPMCR1		0x30	/* PCI power management control 1 */
 #define SCTL_SCPCIPMCR2		0x34	/* PCI power management control 2 */
 #define SCTL_SCPCIPMSR1		0x38	/* PCI power management status 1 */
-- 
cgit v1.2.3


From 29f5b5a326b44c55e81b15308255ba695fecb323 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi <ciminaghi@gnudd.com>
Date: Fri, 9 Nov 2012 15:19:54 +0100
Subject: mfd: sta2x11-mfd: Add sta2x11_mfd_get_regs_data() function

A couple of predefined clocks (mux and gated) need to be
initialized with the virtual address of the clock's controlling
register and the address of a spinlock used to protect against
races.

This function exports such data for all the mfd cells.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sta2x11-mfd.c       | 22 ++++++++++++++++++++++
 include/linux/mfd/sta2x11-mfd.h |  5 +++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index 0cac2013bbf6..6d12ab42aba3 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -135,6 +135,28 @@ u32 __sta2x11_mfd_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val,
 }
 EXPORT_SYMBOL(__sta2x11_mfd_mask);
 
+int sta2x11_mfd_get_regs_data(struct platform_device *dev,
+			      enum sta2x11_mfd_plat_dev index,
+			      void __iomem **regs,
+			      spinlock_t **lock)
+{
+	struct pci_dev *pdev = *(struct pci_dev **)(dev->dev.platform_data);
+	struct sta2x11_mfd *mfd;
+
+	if (!pdev)
+		return -ENODEV;
+	mfd = sta2x11_mfd_find(pdev);
+	if (!mfd)
+		return -ENODEV;
+	if (index >= sta2x11_n_mfd_plat_devs)
+		return -ENODEV;
+	*regs = mfd->regs[index];
+	*lock = &mfd->lock[index];
+	pr_debug("%s %d *regs = %p\n", __func__, __LINE__, *regs);
+	return *regs ? 0 : -ENODEV;
+}
+EXPORT_SYMBOL(sta2x11_mfd_get_regs_data);
+
 /*
  * Special sta2x11-mfd regmap lock/unlock functions
  */
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index 83344304f2cf..e813e5efbe4e 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -474,4 +474,9 @@ u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
 #define COMPENSATION_REG3		0x3cc
 #define TEST_CTL_REG			0x3d0
 
+extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev,
+				     enum sta2x11_mfd_plat_dev index,
+				     void __iomem **regs,
+				     spinlock_t **lock);
+
 #endif /* __STA2X11_MFD_H */
-- 
cgit v1.2.3


From b18adafccd497245a6bc5b867bf9cba7e01f8729 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi <ciminaghi@gnudd.com>
Date: Fri, 9 Nov 2012 15:19:55 +0100
Subject: mfd: sta2x11-mfd: Use defines for platform devices' names

Since there are now many sta2x11-mfd platform devices, using defines
for their names looks like a better solution.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sta2x11-mfd.c       | 42 +++++++++++++++++++++--------------------
 include/linux/mfd/sta2x11-mfd.h |  8 ++++++++
 2 files changed, 30 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index 6d12ab42aba3..8d38ef264722 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -174,9 +174,9 @@ static void sta2x11_regmap_unlock(void *__lock)
 }
 
 static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
-	[sta2x11_sctl] = "sta2x11-sctl",
-	[sta2x11_apbreg] = "sta2x11-apbreg",
-	[sta2x11_apb_soc_regs] = "sta2x11-apb-soc-regs",
+	[sta2x11_sctl] = STA2X11_MFD_SCTL_NAME,
+	[sta2x11_apbreg] = STA2X11_MFD_APBREG_NAME,
+	[sta2x11_apb_soc_regs] = STA2X11_MFD_APB_SOC_REGS_NAME,
 };
 
 static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg)
@@ -340,7 +340,7 @@ static int sta2x11_apb_soc_regs_probe(struct platform_device *dev)
 /* The three platform drivers */
 static struct platform_driver sta2x11_sctl_platform_driver = {
 	.driver = {
-		.name	= "sta2x11-sctl",
+		.name	= STA2X11_MFD_SCTL_NAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sta2x11_sctl_probe,
@@ -354,7 +354,7 @@ static int __init sta2x11_sctl_init(void)
 
 static struct platform_driver sta2x11_platform_driver = {
 	.driver = {
-		.name	= "sta2x11-apbreg",
+		.name	= STA2X11_MFD_APBREG_NAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sta2x11_apbreg_probe,
@@ -368,7 +368,7 @@ static int __init sta2x11_apbreg_init(void)
 
 static struct platform_driver sta2x11_apb_soc_regs_platform_driver = {
 	.driver = {
-		.name	= "sta2x11-apb-soc-regs",
+		.name	= STA2X11_MFD_APB_SOC_REGS_NAME,
 		.owner	= THIS_MODULE,
 	},
 	.probe		= sta2x11_apb_soc_regs_probe,
@@ -409,38 +409,40 @@ enum mfd0_bar1_cells {
 
 static const __devinitconst struct resource gpio_resources[] = {
 	{
-		.name = "sta2x11_gpio", /* 4 consecutive cells, 1 driver */
+		/* 4 consecutive cells, 1 driver */
+		.name = STA2X11_MFD_GPIO_NAME,
 		.start = 0,
 		.end = (4 * 4096) - 1,
 		.flags = IORESOURCE_MEM,
 	}
 };
 static const __devinitconst struct resource sctl_resources[] = {
-	CELL_4K("sta2x11-sctl", STA2X11_SCTL),
+	CELL_4K(STA2X11_MFD_SCTL_NAME, STA2X11_SCTL),
 };
 static const __devinitconst struct resource scr_resources[] = {
-	CELL_4K("sta2x11-scr", STA2X11_SCR),
+	CELL_4K(STA2X11_MFD_SCR_NAME, STA2X11_SCR),
 };
 static const __devinitconst struct resource time_resources[] = {
-	CELL_4K("sta2x11-time", STA2X11_TIME),
+	CELL_4K(STA2X11_MFD_TIME_NAME, STA2X11_TIME),
 };
 
 static const __devinitconst struct resource apbreg_resources[] = {
-	CELL_4K("sta2x11-apbreg", STA2X11_APBREG),
+	CELL_4K(STA2X11_MFD_APBREG_NAME, STA2X11_APBREG),
 };
 
 #define DEV(_name, _r) \
 	{ .name = _name, .num_resources = ARRAY_SIZE(_r), .resources = _r, }
 
 static __devinitdata struct mfd_cell sta2x11_mfd0_bar0[] = {
-	DEV("sta2x11-gpio", gpio_resources), /* offset 0: we add pdata later */
-	DEV("sta2x11-sctl", sctl_resources),
-	DEV("sta2x11-scr", scr_resources),
-	DEV("sta2x11-time", time_resources),
+	/* offset 0: we add pdata later */
+	DEV(STA2X11_MFD_GPIO_NAME, gpio_resources),
+	DEV(STA2X11_MFD_SCTL_NAME, sctl_resources),
+	DEV(STA2X11_MFD_SCR_NAME,  scr_resources),
+	DEV(STA2X11_MFD_TIME_NAME, time_resources),
 };
 
 static __devinitdata struct mfd_cell sta2x11_mfd0_bar1[] = {
-	DEV("sta2x11-apbreg", apbreg_resources),
+	DEV(STA2X11_MFD_APBREG_NAME, apbreg_resources),
 };
 
 /* Mfd 1 devices */
@@ -456,19 +458,19 @@ enum mfd1_bar1_cells {
 };
 
 static const __devinitconst struct resource vic_resources[] = {
-	CELL_4K("sta2x11-vic", STA2X11_VIC),
+	CELL_4K(STA2X11_MFD_VIC_NAME, STA2X11_VIC),
 };
 
 static const __devinitconst struct resource apb_soc_regs_resources[] = {
-	CELL_4K("sta2x11-apb-soc-regs", STA2X11_APB_SOC_REGS),
+	CELL_4K(STA2X11_MFD_APB_SOC_REGS_NAME, STA2X11_APB_SOC_REGS),
 };
 
 static __devinitdata struct mfd_cell sta2x11_mfd1_bar0[] = {
-	DEV("sta2x11-vic", vic_resources),
+	DEV(STA2X11_MFD_VIC_NAME, vic_resources),
 };
 
 static __devinitdata struct mfd_cell sta2x11_mfd1_bar1[] = {
-	DEV("sta2x11-apb-soc-regs", apb_soc_regs_resources),
+	DEV(STA2X11_MFD_APB_SOC_REGS_NAME, apb_soc_regs_resources),
 };
 
 
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index e813e5efbe4e..058de2bacf76 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -37,6 +37,14 @@ enum sta2x11_mfd_plat_dev {
 	sta2x11_n_mfd_plat_devs,
 };
 
+#define STA2X11_MFD_SCTL_NAME	       "sta2x11-sctl"
+#define STA2X11_MFD_GPIO_NAME	       "sta2x11-gpio"
+#define STA2X11_MFD_SCR_NAME	       "sta2x11-scr"
+#define STA2X11_MFD_TIME_NAME	       "sta2x11-time"
+#define STA2X11_MFD_APBREG_NAME	       "sta2x11-apbreg"
+#define STA2X11_MFD_APB_SOC_REGS_NAME  "sta2x11-apb-soc-regs"
+#define STA2X11_MFD_VIC_NAME	       "sta2x11-vic"
+
 extern u32
 __sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev);
 
-- 
cgit v1.2.3


From dba6c1aeea4dd0e251e41c3f585abf4a06a4f057 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi <ciminaghi@gnudd.com>
Date: Fri, 9 Nov 2012 15:19:59 +0100
Subject: mfd: sta2x11-mfd: Add scr (otp registers) platform driver

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/sta2x11-mfd.c       | 52 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/mfd/sta2x11-mfd.h |  7 ++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index da65839f1d86..7365f0f89df2 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -175,10 +175,16 @@ static void sta2x11_regmap_unlock(void *__lock)
 	spin_unlock(lock);
 }
 
+/* OTP (one time programmable registers do not require locking */
+static void sta2x11_regmap_nolock(void *__lock)
+{
+}
+
 static const char *sta2x11_mfd_names[sta2x11_n_mfd_plat_devs] = {
 	[sta2x11_sctl] = STA2X11_MFD_SCTL_NAME,
 	[sta2x11_apbreg] = STA2X11_MFD_APBREG_NAME,
 	[sta2x11_apb_soc_regs] = STA2X11_MFD_APB_SOC_REGS_NAME,
+	[sta2x11_scr] = STA2X11_MFD_SCR_NAME,
 };
 
 static bool sta2x11_sctl_writeable_reg(struct device *dev, unsigned int reg)
@@ -196,6 +202,28 @@ static struct regmap_config sta2x11_sctl_regmap_config = {
 	.writeable_reg = sta2x11_sctl_writeable_reg,
 };
 
+static bool sta2x11_scr_readable_reg(struct device *dev, unsigned int reg)
+{
+	return (reg == STA2X11_SECR_CR) ||
+		__reg_within_range(reg, STA2X11_SECR_FVR0, STA2X11_SECR_FVR1);
+}
+
+static bool sta2x11_scr_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return false;
+}
+
+static struct regmap_config sta2x11_scr_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.lock = sta2x11_regmap_nolock,
+	.unlock = sta2x11_regmap_nolock,
+	.max_register = STA2X11_SECR_FVR1,
+	.readable_reg = sta2x11_scr_readable_reg,
+	.writeable_reg = sta2x11_scr_writeable_reg,
+};
+
 static bool sta2x11_apbreg_readable_reg(struct device *dev, unsigned int reg)
 {
 	/* Two blocks (CAN and MLB, SARAC) 0x100 bytes apart */
@@ -279,9 +307,10 @@ sta2x11_mfd_regmap_configs[sta2x11_n_mfd_plat_devs] = {
 	[sta2x11_sctl] = &sta2x11_sctl_regmap_config,
 	[sta2x11_apbreg] = &sta2x11_apbreg_regmap_config,
 	[sta2x11_apb_soc_regs] = &sta2x11_apb_soc_regs_regmap_config,
+	[sta2x11_scr] = &sta2x11_scr_regmap_config,
 };
 
-/* Probe for the three platform devices */
+/* Probe for the four platform devices */
 
 static int sta2x11_mfd_platform_probe(struct platform_device *dev,
 				      enum sta2x11_mfd_plat_dev index)
@@ -339,6 +368,11 @@ static int sta2x11_apb_soc_regs_probe(struct platform_device *dev)
 	return sta2x11_mfd_platform_probe(dev, sta2x11_apb_soc_regs);
 }
 
+static int sta2x11_scr_probe(struct platform_device *dev)
+{
+	return sta2x11_mfd_platform_probe(dev, sta2x11_scr);
+}
+
 /* The three platform drivers */
 static struct platform_driver sta2x11_sctl_platform_driver = {
 	.driver = {
@@ -382,6 +416,21 @@ static int __init sta2x11_apb_soc_regs_init(void)
 	return platform_driver_register(&sta2x11_apb_soc_regs_platform_driver);
 }
 
+static struct platform_driver sta2x11_scr_platform_driver = {
+	.driver = {
+		.name = STA2X11_MFD_SCR_NAME,
+		.owner = THIS_MODULE,
+	},
+	.probe = sta2x11_scr_probe,
+};
+
+static int __init sta2x11_scr_init(void)
+{
+	pr_info("%s\n", __func__);
+	return platform_driver_register(&sta2x11_scr_platform_driver);
+}
+
+
 /*
  * What follows are the PCI devices that host the above pdevs.
  * Each logic block is 4kB and they are all consecutive: we use this info.
@@ -631,6 +680,7 @@ static int __init sta2x11_mfd_init(void)
 subsys_initcall(sta2x11_apbreg_init);
 subsys_initcall(sta2x11_sctl_init);
 subsys_initcall(sta2x11_apb_soc_regs_init);
+subsys_initcall(sta2x11_scr_init);
 rootfs_initcall(sta2x11_mfd_init);
 
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index 058de2bacf76..08cad95758c6 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -482,6 +482,13 @@ u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
 #define COMPENSATION_REG3		0x3cc
 #define TEST_CTL_REG			0x3d0
 
+/*
+ * SECR (OTP) registers
+ */
+#define STA2X11_SECR_CR			0x00
+#define STA2X11_SECR_FVR0		0x10
+#define STA2X11_SECR_FVR1		0x14
+
 extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev,
 				     enum sta2x11_mfd_plat_dev index,
 				     void __iomem **regs,
-- 
cgit v1.2.3


From 818b5c2528b9e31101bb39018fd211dcf159a696 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi <ciminaghi@gnudd.com>
Date: Fri, 9 Nov 2012 15:20:00 +0100
Subject: mfd: sta2x11-mfd: Add defines for some sta2x11 sctl registers

These are required for the clock infrastructure code to properly configure
and control the sta2x11 PLLs.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/sta2x11-mfd.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h
index 08cad95758c6..9a855ac11cbf 100644
--- a/include/linux/mfd/sta2x11-mfd.h
+++ b/include/linux/mfd/sta2x11-mfd.h
@@ -246,8 +246,29 @@ u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val)
 #define SCTL_SCCTL		0x00	/* System controller control register */
 #define SCTL_ARMCFG		0x04	/* ARM configuration register */
 #define SCTL_SCPLLCTL		0x08	/* PLL control status register */
+
+#define SCTL_SCPLLCTL_AUDIO_PLL_PD	     BIT(1)
+#define SCTL_SCPLLCTL_FRAC_CONTROL	     BIT(3)
+#define SCTL_SCPLLCTL_STRB_BYPASS	     BIT(6)
+#define SCTL_SCPLLCTL_STRB_INPUT	     BIT(8)
+
 #define SCTL_SCPLLFCTRL		0x0c	/* PLL frequency control register */
+
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_MASK	0xff
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_SHIFT	  10
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_MASK	   7
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_SHIFT	  21
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_MASK	   7
+#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_SHIFT	  18
+#define SCTL_SCPLLFCTRL_DITHER_DISABLE_MASK     0x03
+#define SCTL_SCPLLFCTRL_DITHER_DISABLE_SHIFT       4
+
+
 #define SCTL_SCRESFRACT		0x10	/* PLL fractional input register */
+
+#define SCTL_SCRESFRACT_MASK	0x0000ffff
+
+
 #define SCTL_SCRESCTRL1		0x14	/* Peripheral reset control 1 */
 #define SCTL_SCRESXTRL2		0x18	/* Peripheral reset control 2 */
 #define SCTL_SCPEREN0		0x1c	/* Peripheral clock enable register 0 */
-- 
cgit v1.2.3


From bcf58e725ddc45d31addbc6627d4f0edccc824c1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Jul 2012 04:02:49 -0700
Subject: userns: Make create_new_namespaces take a user_ns parameter

Modify create_new_namespaces to explicitly take a user namespace
parameter, instead of implicitly through the task_struct.

This allows an implementation of unshare(CLONE_NEWUSER) where
the new user namespace is not stored onto the current task_struct
until after all of the namespaces are created.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/ipc_namespace.h |  7 ++++---
 include/linux/utsname.h       |  6 +++---
 ipc/namespace.c               | 10 ++++------
 kernel/nsproxy.c              | 22 +++++++++++++---------
 kernel/utsname.c              |  9 ++++-----
 5 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 5499c92a9153..f03af702a39d 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -133,7 +133,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 
 #if defined(CONFIG_IPC_NS)
 extern struct ipc_namespace *copy_ipcs(unsigned long flags,
-				       struct task_struct *tsk);
+	struct user_namespace *user_ns, struct ipc_namespace *ns);
+
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 {
 	if (ns)
@@ -144,12 +145,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
-					      struct task_struct *tsk)
+	struct user_namespace *user_ns, struct ipc_namespace *ns)
 {
 	if (flags & CLONE_NEWIPC)
 		return ERR_PTR(-EINVAL);
 
-	return tsk->nsproxy->ipc_ns;
+	return ns;
 }
 
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 2b345206722a..221f4a0a7502 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -33,7 +33,7 @@ static inline void get_uts_ns(struct uts_namespace *ns)
 }
 
 extern struct uts_namespace *copy_utsname(unsigned long flags,
-					  struct task_struct *tsk);
+	struct user_namespace *user_ns, struct uts_namespace *old_ns);
 extern void free_uts_ns(struct kref *kref);
 
 static inline void put_uts_ns(struct uts_namespace *ns)
@@ -50,12 +50,12 @@ static inline void put_uts_ns(struct uts_namespace *ns)
 }
 
 static inline struct uts_namespace *copy_utsname(unsigned long flags,
-						 struct task_struct *tsk)
+	struct user_namespace *user_ns, struct uts_namespace *old_ns)
 {
 	if (flags & CLONE_NEWUTS)
 		return ERR_PTR(-EINVAL);
 
-	return tsk->nsproxy->uts_ns;
+	return old_ns;
 }
 #endif
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 6ed33c05cb66..72c868277793 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,7 +16,7 @@
 
 #include "util.h"
 
-static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
+static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 					   struct ipc_namespace *old_ns)
 {
 	struct ipc_namespace *ns;
@@ -46,19 +46,17 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
 	ipcns_notify(IPCNS_CREATED);
 	register_ipcns_notifier(ns);
 
-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns));
+	ns->user_ns = get_user_ns(user_ns);
 
 	return ns;
 }
 
 struct ipc_namespace *copy_ipcs(unsigned long flags,
-				struct task_struct *tsk)
+	struct user_namespace *user_ns, struct ipc_namespace *ns)
 {
-	struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
-
 	if (!(flags & CLONE_NEWIPC))
 		return get_ipc_ns(ns);
-	return create_ipc_ns(tsk, ns);
+	return create_ipc_ns(user_ns, ns);
 }
 
 /*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 4357a0a7d17d..2ddd81657a2a 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -57,7 +57,8 @@ static inline struct nsproxy *create_nsproxy(void)
  * leave it to the caller to do proper locking and attach it to task.
  */
 static struct nsproxy *create_new_namespaces(unsigned long flags,
-			struct task_struct *tsk, struct fs_struct *new_fs)
+	struct task_struct *tsk, struct user_namespace *user_ns,
+	struct fs_struct *new_fs)
 {
 	struct nsproxy *new_nsp;
 	int err;
@@ -66,31 +67,31 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	if (!new_nsp)
 		return ERR_PTR(-ENOMEM);
 
-	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, task_cred_xxx(tsk, user_ns), new_fs);
+	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
 	if (IS_ERR(new_nsp->mnt_ns)) {
 		err = PTR_ERR(new_nsp->mnt_ns);
 		goto out_ns;
 	}
 
-	new_nsp->uts_ns = copy_utsname(flags, tsk);
+	new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
 	if (IS_ERR(new_nsp->uts_ns)) {
 		err = PTR_ERR(new_nsp->uts_ns);
 		goto out_uts;
 	}
 
-	new_nsp->ipc_ns = copy_ipcs(flags, tsk);
+	new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
 	if (IS_ERR(new_nsp->ipc_ns)) {
 		err = PTR_ERR(new_nsp->ipc_ns);
 		goto out_ipc;
 	}
 
-	new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->pid_ns);
+	new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns);
 	if (IS_ERR(new_nsp->pid_ns)) {
 		err = PTR_ERR(new_nsp->pid_ns);
 		goto out_pid;
 	}
 
-	new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns);
+	new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
 	if (IS_ERR(new_nsp->net_ns)) {
 		err = PTR_ERR(new_nsp->net_ns);
 		goto out_net;
@@ -152,7 +153,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 		goto out;
 	}
 
-	new_ns = create_new_namespaces(flags, tsk, tsk->fs);
+	new_ns = create_new_namespaces(flags, tsk,
+				       task_cred_xxx(tsk, user_ns), tsk->fs);
 	if (IS_ERR(new_ns)) {
 		err = PTR_ERR(new_ns);
 		goto out;
@@ -186,6 +188,7 @@ void free_nsproxy(struct nsproxy *ns)
 int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 		struct nsproxy **new_nsp, struct fs_struct *new_fs)
 {
+	struct user_namespace *user_ns;
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
@@ -195,7 +198,8 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	if (!nsown_capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	*new_nsp = create_new_namespaces(unshare_flags, current,
+	user_ns = current_user_ns();
+	*new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
 				new_fs ? new_fs : current->fs);
 	if (IS_ERR(*new_nsp)) {
 		err = PTR_ERR(*new_nsp);
@@ -252,7 +256,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 	if (nstype && (ops->type != nstype))
 		goto out;
 
-	new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
+	new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
 	if (IS_ERR(new_nsproxy)) {
 		err = PTR_ERR(new_nsproxy);
 		goto out;
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 4a9362f9325d..fdc619eb61ef 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -32,7 +32,7 @@ static struct uts_namespace *create_uts_ns(void)
  * @old_ns: namespace to clone
  * Return NULL on error (failure to kmalloc), new ns otherwise
  */
-static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
+static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 					  struct uts_namespace *old_ns)
 {
 	struct uts_namespace *ns;
@@ -43,7 +43,7 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
 
 	down_read(&uts_sem);
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
-	ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns));
+	ns->user_ns = get_user_ns(user_ns);
 	up_read(&uts_sem);
 	return ns;
 }
@@ -55,9 +55,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
  * versa.
  */
 struct uts_namespace *copy_utsname(unsigned long flags,
-				   struct task_struct *tsk)
+	struct user_namespace *user_ns, struct uts_namespace *old_ns)
 {
-	struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
 	struct uts_namespace *new_ns;
 
 	BUG_ON(!old_ns);
@@ -66,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags,
 	if (!(flags & CLONE_NEWUTS))
 		return old_ns;
 
-	new_ns = clone_uts_ns(tsk, old_ns);
+	new_ns = clone_uts_ns(user_ns, old_ns);
 
 	put_uts_ns(old_ns);
 	return new_ns;
-- 
cgit v1.2.3


From 4c44aaafa8108f584831850ab48a975e971db2de Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Jul 2012 05:05:21 -0700
Subject: userns: Kill task_user_ns

The task_user_ns function hides the fact that it is getting the user
namespace from struct cred on the task.  struct cred may go away as
soon as the rcu lock is released.  This leads to a race where we
can dereference a stale user namespace pointer.

To make it obvious a struct cred is involved kill task_user_ns.

To kill the race modify the users of task_user_ns to only
reference the user namespace while the rcu lock is held.

Cc: Kees Cook <keescook@chromium.org>
Cc: James Morris <james.l.morris@oracle.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/cred.h     |  2 --
 kernel/ptrace.c          | 10 ++++++++--
 kernel/sched/core.c      | 10 ++++++++--
 security/yama/yama_lsm.c | 12 +++++++++---
 4 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index ebbed2ce6637..856d2622d832 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -357,10 +357,8 @@ static inline void put_cred(const struct cred *_cred)
 extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
 #define current_user_ns()	(current_cred_xxx(user_ns))
-#define task_user_ns(task)	(task_cred_xxx((task), user_ns))
 #else
 #define current_user_ns()	(&init_user_ns)
-#define task_user_ns(task)	(&init_user_ns)
 #endif
 
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1f5e55dda955..7b09b88862cc 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -215,8 +215,12 @@ ok:
 	smp_rmb();
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
-	if (!dumpable  && !ptrace_has_cap(task_user_ns(task), mode))
+	rcu_read_lock();
+	if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+		rcu_read_unlock();
 		return -EPERM;
+	}
+	rcu_read_unlock();
 
 	return security_ptrace_access_check(task, mode);
 }
@@ -280,8 +284,10 @@ static int ptrace_attach(struct task_struct *task, long request,
 
 	if (seize)
 		flags |= PT_SEIZED;
-	if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
+	rcu_read_lock();
+	if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
 		flags |= PT_PTRACE_CAP;
+	rcu_read_unlock();
 	task->ptrace = flags;
 
 	__ptrace_link(task, current);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..2f5eb1838b3e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4029,8 +4029,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 		goto out_free_cpus_allowed;
 	}
 	retval = -EPERM;
-	if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
-		goto out_unlock;
+	if (!check_same_owner(p)) {
+		rcu_read_lock();
+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+			rcu_read_unlock();
+			goto out_unlock;
+		}
+		rcu_read_unlock();
+	}
 
 	retval = security_task_setscheduler(p);
 	if (retval)
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index b4c29848b49d..0e72239aeb05 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -262,14 +262,18 @@ int yama_ptrace_access_check(struct task_struct *child,
 			/* No additional restrictions. */
 			break;
 		case YAMA_SCOPE_RELATIONAL:
+			rcu_read_lock();
 			if (!task_is_descendant(current, child) &&
 			    !ptracer_exception_found(current, child) &&
-			    !ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+			    !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
 				rc = -EPERM;
+			rcu_read_unlock();
 			break;
 		case YAMA_SCOPE_CAPABILITY:
-			if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+			rcu_read_lock();
+			if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
 				rc = -EPERM;
+			rcu_read_unlock();
 			break;
 		case YAMA_SCOPE_NO_ATTACH:
 		default:
@@ -307,8 +311,10 @@ int yama_ptrace_traceme(struct task_struct *parent)
 	/* Only disallow PTRACE_TRACEME on more aggressive settings. */
 	switch (ptrace_scope) {
 	case YAMA_SCOPE_CAPABILITY:
-		if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE))
+		rcu_read_lock();
+		if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
 			rc = -EPERM;
+		rcu_read_unlock();
 		break;
 	case YAMA_SCOPE_NO_ATTACH:
 		rc = -EPERM;
-- 
cgit v1.2.3


From cde1975bc242f3e1072bde623ef378e547b73f91 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Jul 2012 06:24:06 -0700
Subject: userns: Implent proc namespace operations

This allows entering a user namespace, and the ability
to store a reference to a user namespace with a bind
mount.

Addition of missing userns_ns_put in userns_install
from Gao feng <gaofeng@cn.fujitsu.com>

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/namespaces.c    |  4 +++
 include/linux/proc_fs.h |  1 +
 kernel/user_namespace.c | 90 +++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 78 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 2a17fd9ae6a9..030250c27d70 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -11,6 +11,7 @@
 #include <net/net_namespace.h>
 #include <linux/ipc_namespace.h>
 #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
 #include "internal.h"
 
 
@@ -26,6 +27,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 #endif
 #ifdef CONFIG_PID_NS
 	&pidns_operations,
+#endif
+#ifdef CONFIG_USER_NS
+	&userns_operations,
 #endif
 	&mntns_operations,
 };
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 9014c041e752..31447819bc55 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -258,6 +258,7 @@ extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations userns_operations;
 extern const struct proc_ns_operations mntns_operations;
 
 union proc_op {
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 49096d559e08..a9460774e77d 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 #include <linux/highuid.h>
 #include <linux/cred.h>
 #include <linux/securebits.h>
@@ -26,6 +27,24 @@ static struct kmem_cache *user_ns_cachep __read_mostly;
 static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *map);
 
+static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
+{
+	/* Start with the same capabilities as init but useless for doing
+	 * anything as the capabilities are bound to the new user namespace.
+	 */
+	cred->securebits = SECUREBITS_DEFAULT;
+	cred->cap_inheritable = CAP_EMPTY_SET;
+	cred->cap_permitted = CAP_FULL_SET;
+	cred->cap_effective = CAP_FULL_SET;
+	cred->cap_bset = CAP_FULL_SET;
+#ifdef CONFIG_KEYS
+	key_put(cred->request_key_auth);
+	cred->request_key_auth = NULL;
+#endif
+	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
+	cred->user_ns = user_ns;
+}
+
 /*
  * Create a new user namespace, deriving the creator from the user in the
  * passed credentials, and replacing that user with the new root user for the
@@ -53,27 +72,12 @@ int create_user_ns(struct cred *new)
 		return -ENOMEM;
 
 	kref_init(&ns->kref);
+	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
 	ns->owner = owner;
 	ns->group = group;
 
-	/* Start with the same capabilities as init but useless for doing
-	 * anything as the capabilities are bound to the new user namespace.
-	 */
-	new->securebits = SECUREBITS_DEFAULT;
-	new->cap_inheritable = CAP_EMPTY_SET;
-	new->cap_permitted = CAP_FULL_SET;
-	new->cap_effective = CAP_FULL_SET;
-	new->cap_bset = CAP_FULL_SET;
-#ifdef CONFIG_KEYS
-	key_put(new->request_key_auth);
-	new->request_key_auth = NULL;
-#endif
-	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
-
-	/* Leave the new->user_ns reference with the new user namespace. */
-	/* Leave the reference to our user_ns with the new cred. */
-	new->user_ns = ns;
+	set_cred_user_ns(new, ns);
 
 	return 0;
 }
@@ -737,6 +741,58 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
 	return false;
 }
 
+static void *userns_get(struct task_struct *task)
+{
+	struct user_namespace *user_ns;
+
+	rcu_read_lock();
+	user_ns = get_user_ns(__task_cred(task)->user_ns);
+	rcu_read_unlock();
+
+	return user_ns;
+}
+
+static void userns_put(void *ns)
+{
+	put_user_ns(ns);
+}
+
+static int userns_install(struct nsproxy *nsproxy, void *ns)
+{
+	struct user_namespace *user_ns = ns;
+	struct cred *cred;
+
+	/* Don't allow gaining capabilities by reentering
+	 * the same user namespace.
+	 */
+	if (user_ns == current_user_ns())
+		return -EINVAL;
+
+	/* Threaded many not enter a different user namespace */
+	if (atomic_read(&current->mm->mm_users) > 1)
+		return -EINVAL;
+
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	cred = prepare_creds();
+	if (!cred)
+		return -ENOMEM;
+
+	put_user_ns(cred->user_ns);
+	set_cred_user_ns(cred, get_user_ns(user_ns));
+
+	return commit_creds(cred);
+}
+
+const struct proc_ns_operations userns_operations = {
+	.name		= "user",
+	.type		= CLONE_NEWUSER,
+	.get		= userns_get,
+	.put		= userns_put,
+	.install	= userns_install,
+};
+
 static __init int user_namespaces_init(void)
 {
 	user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
-- 
cgit v1.2.3


From b2e0d98705e60e45bbb3c0032c48824ad7ae0704 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 26 Jul 2012 05:15:35 -0700
Subject: userns: Implement unshare of the user namespace

- Add CLONE_THREAD to the unshare flags if CLONE_NEWUSER is selected
  As changing user namespaces is only valid if all there is only
  a single thread.
- Restore the code to add CLONE_VM if CLONE_THREAD is selected and
  the code to addCLONE_SIGHAND if CLONE_VM is selected.
  Making the constraints in the code clear.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/nsproxy.h        |  2 +-
 include/linux/user_namespace.h |  9 +++++++++
 kernel/fork.c                  | 25 ++++++++++++++++++++++---
 kernel/nsproxy.c               |  8 ++++----
 kernel/user_namespace.c        | 15 +++++++++++++++
 5 files changed, 51 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cc37a55ad004..10e5947491c7 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk);
 void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
 void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
-	struct fs_struct *);
+	struct cred *, struct fs_struct *);
 int __init nsproxy_cache_init(void);
 
 static inline void put_nsproxy(struct nsproxy *ns)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 95142cae446a..17651f08d67f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -39,6 +39,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 }
 
 extern int create_user_ns(struct cred *new);
+extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
 extern void free_user_ns(struct kref *kref);
 
 static inline void put_user_ns(struct user_namespace *ns)
@@ -66,6 +67,14 @@ static inline int create_user_ns(struct cred *new)
 	return -EINVAL;
 }
 
+static inline int unshare_userns(unsigned long unshare_flags,
+				 struct cred **new_cred)
+{
+	if (unshare_flags & CLONE_NEWUSER)
+		return -EINVAL;
+	return 0;
+}
+
 static inline void put_user_ns(struct user_namespace *ns)
 {
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c29abb19014..38e53b87402c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1687,7 +1687,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
-				CLONE_NEWPID))
+				CLONE_NEWUSER|CLONE_NEWPID))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing to
@@ -1754,10 +1754,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
 	struct fs_struct *fs, *new_fs = NULL;
 	struct files_struct *fd, *new_fd = NULL;
+	struct cred *new_cred = NULL;
 	struct nsproxy *new_nsproxy = NULL;
 	int do_sysvsem = 0;
 	int err;
 
+	/*
+	 * If unsharing a user namespace must also unshare the thread.
+	 */
+	if (unshare_flags & CLONE_NEWUSER)
+		unshare_flags |= CLONE_THREAD;
 	/*
 	 * If unsharing a pid namespace must also unshare the thread.
 	 */
@@ -1795,11 +1801,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 	err = unshare_fd(unshare_flags, &new_fd);
 	if (err)
 		goto bad_unshare_cleanup_fs;
-	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
+	err = unshare_userns(unshare_flags, &new_cred);
 	if (err)
 		goto bad_unshare_cleanup_fd;
+	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
+					 new_cred, new_fs);
+	if (err)
+		goto bad_unshare_cleanup_cred;
 
-	if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
+	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
 			 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1832,11 +1842,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 		}
 
 		task_unlock(current);
+
+		if (new_cred) {
+			/* Install the new user namespace */
+			commit_creds(new_cred);
+			new_cred = NULL;
+		}
 	}
 
 	if (new_nsproxy)
 		put_nsproxy(new_nsproxy);
 
+bad_unshare_cleanup_cred:
+	if (new_cred)
+		put_cred(new_cred);
 bad_unshare_cleanup_fd:
 	if (new_fd)
 		put_files_struct(new_fd);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 2ddd81657a2a..78e2ecb20165 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -186,7 +186,7 @@ void free_nsproxy(struct nsproxy *ns)
  * On success, returns the new nsproxy.
  */
 int unshare_nsproxy_namespaces(unsigned long unshare_flags,
-		struct nsproxy **new_nsp, struct fs_struct *new_fs)
+	struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
 {
 	struct user_namespace *user_ns;
 	int err = 0;
@@ -195,12 +195,12 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 			       CLONE_NEWNET | CLONE_NEWPID)))
 		return 0;
 
-	if (!nsown_capable(CAP_SYS_ADMIN))
+	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	user_ns = current_user_ns();
 	*new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
-				new_fs ? new_fs : current->fs);
+					 new_fs ? new_fs : current->fs);
 	if (IS_ERR(*new_nsp)) {
 		err = PTR_ERR(*new_nsp);
 		goto out;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a9460774e77d..ce92f7e6290a 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -82,6 +82,21 @@ int create_user_ns(struct cred *new)
 	return 0;
 }
 
+int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
+{
+	struct cred *cred;
+
+	if (!(unshare_flags & CLONE_NEWUSER))
+		return 0;
+
+	cred = prepare_creds();
+	if (!cred)
+		return -ENOMEM;
+
+	*new_cred = cred;
+	return create_user_ns(cred);
+}
+
 void free_user_ns(struct kref *kref)
 {
 	struct user_namespace *parent, *ns =
-- 
cgit v1.2.3


From 33d6dce607573b5fd7a43168e0d91221b3ca532b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 17 Jun 2011 13:33:20 -0700
Subject: proc: Generalize proc inode allocation

Generalize the proc inode allocation so that it can be
used without having to having to create a proc_dir_entry.

This will allow namespace file descriptors to remain light
weight entitities but still have the same inode number
when the backing namespace is the same.

Acked-by: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/generic.c       | 26 +++++++++++++-------------
 include/linux/proc_fs.h | 10 ++++++++++
 2 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 0d80cef4cfb9..7b3ae3cc0ef9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
  * Return an inode number between PROC_DYNAMIC_FIRST and
  * 0xffffffff, or zero on failure.
  */
-static unsigned int get_inode_number(void)
+int proc_alloc_inum(unsigned int *inum)
 {
 	unsigned int i;
 	int error;
 
 retry:
-	if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
-		return 0;
+	if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
+		return -ENOMEM;
 
 	spin_lock(&proc_inum_lock);
 	error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
 	if (error == -EAGAIN)
 		goto retry;
 	else if (error)
-		return 0;
+		return error;
 
 	if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
 		spin_lock(&proc_inum_lock);
 		ida_remove(&proc_inum_ida, i);
 		spin_unlock(&proc_inum_lock);
-		return 0;
+		return -ENOSPC;
 	}
-	return PROC_DYNAMIC_FIRST + i;
+	*inum = PROC_DYNAMIC_FIRST + i;
+	return 0;
 }
 
-static void release_inode_number(unsigned int inum)
+void proc_free_inum(unsigned int inum)
 {
 	spin_lock(&proc_inum_lock);
 	ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
 {
-	unsigned int i;
 	struct proc_dir_entry *tmp;
+	int ret;
 	
-	i = get_inode_number();
-	if (i == 0)
-		return -EAGAIN;
-	dp->low_ino = i;
+	ret = proc_alloc_inum(&dp->low_ino);
+	if (ret)
+		return ret;
 
 	if (S_ISDIR(dp->mode)) {
 		if (dp->proc_iops == NULL) {
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data);
 
 static void free_proc_entry(struct proc_dir_entry *de)
 {
-	release_inode_number(de->low_ino);
+	proc_free_inum(de->low_ino);
 
 	if (S_ISLNK(de->mode))
 		kfree(de->data);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 31447819bc55..bf1d000fbba6 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -176,6 +176,8 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 extern struct file *proc_ns_fget(int fd);
 extern bool proc_ns_inode(struct inode *inode);
 
+extern int proc_alloc_inum(unsigned int *pino);
+extern void proc_free_inum(unsigned int inum);
 #else
 
 #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@ -235,6 +237,14 @@ static inline bool proc_ns_inode(struct inode *inode)
 	return false;
 }
 
+static inline int proc_alloc_inum(unsigned int *inum)
+{
+	*inum = 1;
+	return 0;
+}
+static inline void proc_free_inum(unsigned int inum)
+{
+}
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
-- 
cgit v1.2.3


From 98f842e675f96ffac96e6c50315790912b2812be Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 15 Jun 2011 10:21:48 -0700
Subject: proc: Usable inode numbers for the namespace file descriptors.

Assign a unique proc inode to each namespace, and use that
inode number to ensure we only allocate at most one proc
inode for every namespace in proc.

A single proc inode per namespace allows userspace to test
to see if two processes are in the same namespace.

This has been a long requested feature and only blocked because
a naive implementation would put the id in a global space and
would ultimately require having a namespace for the names of
namespaces, making migration and certain virtualization tricks
impossible.

We still don't have per superblock inode numbers for proc, which
appears necessary for application unaware checkpoint/restart and
migrations (if the application is using namespace file descriptors)
but that is now allowd by the design if it becomes important.

I have preallocated the ipc and uts initial proc inode numbers so
their structures can be statically initialized.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/mount.h                     |  1 +
 fs/namespace.c                 | 14 ++++++++++++++
 fs/proc/namespaces.c           | 24 ++++++++++++++----------
 include/linux/ipc_namespace.h  |  2 ++
 include/linux/pid_namespace.h  |  1 +
 include/linux/proc_fs.h        |  7 ++++++-
 include/linux/user_namespace.h |  1 +
 include/linux/utsname.h        |  1 +
 include/net/net_namespace.h    |  2 ++
 init/version.c                 |  2 ++
 ipc/msgutil.c                  |  2 ++
 ipc/namespace.c                | 16 ++++++++++++++++
 kernel/pid.c                   |  1 +
 kernel/pid_namespace.c         | 12 ++++++++++++
 kernel/user.c                  |  2 ++
 kernel/user_namespace.c        | 15 +++++++++++++++
 kernel/utsname.c               | 17 ++++++++++++++++-
 net/core/net_namespace.c       | 24 ++++++++++++++++++++++++
 18 files changed, 132 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index 630fafc616bb..cd5007980400 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -4,6 +4,7 @@
 
 struct mnt_namespace {
 	atomic_t		count;
+	unsigned int		proc_inum;
 	struct mount *	root;
 	struct list_head	list;
 	struct user_namespace	*user_ns;
diff --git a/fs/namespace.c b/fs/namespace.c
index cab78a74aca3..c1bbe86f4920 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2301,6 +2301,7 @@ dput_out:
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
+	proc_free_inum(ns->proc_inum);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
@@ -2317,10 +2318,16 @@ static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
 static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 {
 	struct mnt_namespace *new_ns;
+	int ret;
 
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
+	ret = proc_alloc_inum(&new_ns->proc_inum);
+	if (ret) {
+		kfree(new_ns);
+		return ERR_PTR(ret);
+	}
 	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
 	atomic_set(&new_ns->count, 1);
 	new_ns->root = NULL;
@@ -2799,10 +2806,17 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int mntns_inum(void *ns)
+{
+	struct mnt_namespace *mnt_ns = ns;
+	return mnt_ns->proc_inum;
+}
+
 const struct proc_ns_operations mntns_operations = {
 	.name		= "mnt",
 	.type		= CLONE_NEWNS,
 	.get		= mntns_get,
 	.put		= mntns_put,
 	.install	= mntns_install,
+	.inum		= mntns_inum,
 };
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 7a6d8d69cdb8..b7a47196c8c3 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -82,7 +82,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	inode = new_inode(sb);
+	inode = iget_locked(sb, ns_ops->inum(ns));
 	if (!inode) {
 		dput(dentry);
 		ns_ops->put(ns);
@@ -90,13 +90,17 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 	}
 
 	ei = PROC_I(inode);
-	inode->i_ino = get_next_ino();
-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-	inode->i_op = &ns_inode_operations;
-	inode->i_mode = S_IFREG | S_IRUGO;
-	inode->i_fop = &ns_file_operations;
-	ei->ns_ops = ns_ops;
-	ei->ns = ns;
+	if (inode->i_state & I_NEW) {
+		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+		inode->i_op = &ns_inode_operations;
+		inode->i_mode = S_IFREG | S_IRUGO;
+		inode->i_fop = &ns_file_operations;
+		ei->ns_ops = ns_ops;
+		ei->ns = ns;
+		unlock_new_inode(inode);
+	} else {
+		ns_ops->put(ns);
+	}
 
 	d_set_d_op(dentry, &ns_dentry_operations);
 	result = d_instantiate_unique(dentry, inode);
@@ -162,12 +166,12 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
 	if (!ns)
 		goto out_put_task;
 
-	snprintf(name, sizeof(name), "%s", ns_ops->name);
+	snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
 	len = strlen(name);
 
 	if (len > buflen)
 		len = buflen;
-	if (copy_to_user(buffer, ns_ops->name, len))
+	if (copy_to_user(buffer, name, len))
 		len = -EFAULT;
 
 	ns_ops->put(ns);
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index f03af702a39d..fe771978e877 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -67,6 +67,8 @@ struct ipc_namespace {
 
 	/* user_ns which owns the ipc ns */
 	struct user_namespace *user_ns;
+
+	unsigned int	proc_inum;
 };
 
 extern struct ipc_namespace init_ipc_ns;
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 4c96acdb2489..bf285999273a 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -37,6 +37,7 @@ struct pid_namespace {
 	kgid_t pid_gid;
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
+	unsigned int proc_inum;
 };
 
 extern struct pid_namespace init_pid_ns;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index bf1d000fbba6..2e24018b7cec 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -28,7 +28,11 @@ struct mm_struct;
  */
 
 enum {
-	PROC_ROOT_INO = 1,
+	PROC_ROOT_INO		= 1,
+	PROC_IPC_INIT_INO	= 0xEFFFFFFFU,
+	PROC_UTS_INIT_INO	= 0xEFFFFFFEU,
+	PROC_USER_INIT_INO	= 0xEFFFFFFDU,
+	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
 };
 
 /*
@@ -263,6 +267,7 @@ struct proc_ns_operations {
 	void *(*get)(struct task_struct *task);
 	void (*put)(void *ns);
 	int (*install)(struct nsproxy *nsproxy, void *ns);
+	unsigned int (*inum)(void *ns);
 };
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 17651f08d67f..b9bd2e6c73cc 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct user_namespace {
 	struct user_namespace	*parent;
 	kuid_t			owner;
 	kgid_t			group;
+	unsigned int		proc_inum;
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 221f4a0a7502..239e27733d6c 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -23,6 +23,7 @@ struct uts_namespace {
 	struct kref kref;
 	struct new_utsname name;
 	struct user_namespace *user_ns;
+	unsigned int proc_inum;
 };
 extern struct uts_namespace init_uts_ns;
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index c5a43f56b796..de644bcd8613 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -56,6 +56,8 @@ struct net {
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 
+	unsigned int		proc_inum;
+
 	struct proc_dir_entry 	*proc_net;
 	struct proc_dir_entry 	*proc_net_stat;
 
diff --git a/init/version.c b/init/version.c
index 86fe0ccb997a..58170f18912d 100644
--- a/init/version.c
+++ b/init/version.c
@@ -12,6 +12,7 @@
 #include <linux/utsname.h>
 #include <generated/utsrelease.h>
 #include <linux/version.h>
+#include <linux/proc_fs.h>
 
 #ifndef CONFIG_KALLSYMS
 #define version(a) Version_ ## a
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
 		.domainname	= UTS_DOMAINNAME,
 	},
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_UTS_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_uts_ns);
 
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 26143d377c95..6471f1bdae96 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -16,6 +16,7 @@
 #include <linux/msg.h>
 #include <linux/ipc_namespace.h>
 #include <linux/utsname.h>
+#include <linux/proc_fs.h>
 #include <asm/uaccess.h>
 
 #include "util.h"
@@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock);
 struct ipc_namespace init_ipc_ns = {
 	.count		= ATOMIC_INIT(1),
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_IPC_INIT_INO,
 };
 
 atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 72c868277793..cf3386a51de2 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (ns == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
+
 	atomic_set(&ns->count, 1);
 	err = mq_init_ns(ns);
 	if (err) {
+		proc_free_inum(ns->proc_inum);
 		kfree(ns);
 		return ERR_PTR(err);
 	}
@@ -111,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	 */
 	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
 
@@ -172,10 +180,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new)
 	return 0;
 }
 
+static unsigned int ipcns_inum(void *vp)
+{
+	struct ipc_namespace *ns = vp;
+
+	return ns->proc_inum;
+}
+
 const struct proc_ns_operations ipcns_operations = {
 	.name		= "ipc",
 	.type		= CLONE_NEWIPC,
 	.get		= ipcns_get,
 	.put		= ipcns_put,
 	.install	= ipcns_install,
+	.inum		= ipcns_inum,
 };
diff --git a/kernel/pid.c b/kernel/pid.c
index 6e8da291de49..3026ddae0a34 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -80,6 +80,7 @@ struct pid_namespace init_pid_ns = {
 	.level = 0,
 	.child_reaper = &init_task,
 	.user_ns = &init_user_ns,
+	.proc_inum = PROC_PID_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 68508d330634..560da0dab230 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -107,6 +107,10 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	if (ns->pid_cachep == NULL)
 		goto out_free_map;
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err)
+		goto out_free_map;
+
 	kref_init(&ns->kref);
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
@@ -133,6 +137,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 {
 	int i;
 
+	proc_free_inum(ns->proc_inum);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
 	put_user_ns(ns->user_ns);
@@ -345,12 +350,19 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int pidns_inum(void *ns)
+{
+	struct pid_namespace *pid_ns = ns;
+	return pid_ns->proc_inum;
+}
+
 const struct proc_ns_operations pidns_operations = {
 	.name		= "pid",
 	.type		= CLONE_NEWPID,
 	.get		= pidns_get,
 	.put		= pidns_put,
 	.install	= pidns_install,
+	.inum		= pidns_inum,
 };
 
 static __init int pid_namespaces_init(void)
diff --git a/kernel/user.c b/kernel/user.c
index 750acffbe9ec..33acb5e53a5f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
 
 /*
  * userns count is 1 for root user, 1 for init_uts_ns,
@@ -51,6 +52,7 @@ struct user_namespace init_user_ns = {
 	},
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
+	.proc_inum = PROC_USER_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_user_ns);
 
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 89f6eaed067a..f5975ccf9348 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -58,6 +58,7 @@ int create_user_ns(struct cred *new)
 	struct user_namespace *ns, *parent_ns = new->user_ns;
 	kuid_t owner = new->euid;
 	kgid_t group = new->egid;
+	int ret;
 
 	/* The creator needs a mapping in the parent user namespace
 	 * or else we won't be able to reasonably tell userspace who
@@ -71,6 +72,12 @@ int create_user_ns(struct cred *new)
 	if (!ns)
 		return -ENOMEM;
 
+	ret = proc_alloc_inum(&ns->proc_inum);
+	if (ret) {
+		kmem_cache_free(user_ns_cachep, ns);
+		return ret;
+	}
+
 	kref_init(&ns->kref);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
@@ -103,6 +110,7 @@ void free_user_ns(struct kref *kref)
 		container_of(kref, struct user_namespace, kref);
 
 	parent = ns->parent;
+	proc_free_inum(ns->proc_inum);
 	kmem_cache_free(user_ns_cachep, ns);
 	put_user_ns(parent);
 }
@@ -808,12 +816,19 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
 	return commit_creds(cred);
 }
 
+static unsigned int userns_inum(void *ns)
+{
+	struct user_namespace *user_ns = ns;
+	return user_ns->proc_inum;
+}
+
 const struct proc_ns_operations userns_operations = {
 	.name		= "user",
 	.type		= CLONE_NEWUSER,
 	.get		= userns_get,
 	.put		= userns_put,
 	.install	= userns_install,
+	.inum		= userns_inum,
 };
 
 static __init int user_namespaces_init(void)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index fdc619eb61ef..f6336d51d64c 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -36,11 +36,18 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 					  struct uts_namespace *old_ns)
 {
 	struct uts_namespace *ns;
+	int err;
 
 	ns = create_uts_ns();
 	if (!ns)
 		return ERR_PTR(-ENOMEM);
 
+	err = proc_alloc_inum(&ns->proc_inum);
+	if (err) {
+		kfree(ns);
+		return ERR_PTR(err);
+	}
+
 	down_read(&uts_sem);
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
 	ns->user_ns = get_user_ns(user_ns);
@@ -77,6 +84,7 @@ void free_uts_ns(struct kref *kref)
 
 	ns = container_of(kref, struct uts_namespace, kref);
 	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
 
@@ -114,11 +122,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
 	return 0;
 }
 
+static unsigned int utsns_inum(void *vp)
+{
+	struct uts_namespace *ns = vp;
+
+	return ns->proc_inum;
+}
+
 const struct proc_ns_operations utsns_operations = {
 	.name		= "uts",
 	.type		= CLONE_NEWUTS,
 	.get		= utsns_get,
 	.put		= utsns_put,
 	.install	= utsns_install,
+	.inum		= utsns_inum,
 };
-
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ec2870b44c1f..2e9a3132b8dd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -381,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
 }
 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
+static __net_init int net_ns_net_init(struct net *net)
+{
+	return proc_alloc_inum(&net->proc_inum);
+}
+
+static __net_exit void net_ns_net_exit(struct net *net)
+{
+	proc_free_inum(net->proc_inum);
+}
+
+static struct pernet_operations __net_initdata net_ns_ops = {
+	.init = net_ns_net_init,
+	.exit = net_ns_net_exit,
+};
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -412,6 +427,8 @@ static int __init net_ns_init(void)
 
 	mutex_unlock(&net_mutex);
 
+	register_pernet_subsys(&net_ns_ops);
+
 	return 0;
 }
 
@@ -640,11 +657,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
+static unsigned int netns_inum(void *ns)
+{
+	struct net *net = ns;
+	return net->proc_inum;
+}
+
 const struct proc_ns_operations netns_operations = {
 	.name		= "net",
 	.type		= CLONE_NEWNET,
 	.get		= netns_get,
 	.put		= netns_put,
 	.install	= netns_install,
+	.inum		= netns_inum,
 };
 #endif
-- 
cgit v1.2.3


From 97fa4cf442ff2872000d9110686371775795a32b Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Sat, 17 Nov 2012 15:22:22 +0100
Subject: clk: mvebu: add mvebu core clocks.

This driver allows to provide DT clocks for core clocks found on
Marvell Kirkwood, Dove & 370/XP SoCs. The core clock frequencies and
ratios are determined by decoding the Sample-At-Reset registers.

Although technically correct, using a divider of 0 will lead to
div_by_zero panic. Let's use a ratio of 0/1 instead to fail later
with a zero clock.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Tested-by Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 .../devicetree/bindings/clock/mvebu-core-clock.txt |  47 ++
 drivers/clk/Kconfig                                |   2 +
 drivers/clk/Makefile                               |   1 +
 drivers/clk/mvebu/Kconfig                          |   3 +
 drivers/clk/mvebu/Makefile                         |   1 +
 drivers/clk/mvebu/clk-core.c                       | 675 +++++++++++++++++++++
 drivers/clk/mvebu/clk-core.h                       |  18 +
 drivers/clk/mvebu/clk.c                            |  23 +
 include/linux/clk/mvebu.h                          |  22 +
 9 files changed, 792 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
 create mode 100644 drivers/clk/mvebu/Kconfig
 create mode 100644 drivers/clk/mvebu/Makefile
 create mode 100644 drivers/clk/mvebu/clk-core.c
 create mode 100644 drivers/clk/mvebu/clk-core.h
 create mode 100644 drivers/clk/mvebu/clk.c
 create mode 100644 include/linux/clk/mvebu.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
new file mode 100644
index 000000000000..1e662948661e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
@@ -0,0 +1,47 @@
+* Core Clock bindings for Marvell MVEBU SoCs
+
+Marvell MVEBU SoCs usually allow to determine core clock frequencies by
+reading the Sample-At-Reset (SAR) register. The core clock consumer should
+specify the desired clock by having the clock ID in its "clocks" phandle cell.
+
+The following is a list of provided IDs and clock names on Armada 370/XP:
+ 0 = tclk    (Internal Bus clock)
+ 1 = cpuclk  (CPU clock)
+ 2 = nbclk   (L2 Cache clock)
+ 3 = hclk    (DRAM control clock)
+ 4 = dramclk (DDR clock)
+
+The following is a list of provided IDs and clock names on Kirkwood and Dove:
+ 0 = tclk   (Internal Bus clock)
+ 1 = cpuclk (CPU0 clock)
+ 2 = l2clk  (L2 Cache clock derived from CPU0 clock)
+ 3 = ddrclk (DDR controller clock derived from CPU0 clock)
+
+Required properties:
+- compatible : shall be one of the following:
+	"marvell,armada-370-core-clock" - For Armada 370 SoC core clocks
+	"marvell,armada-xp-core-clock" - For Armada XP SoC core clocks
+	"marvell,dove-core-clock" - for Dove SoC core clocks
+	"marvell,kirkwood-core-clock" - for Kirkwood SoC (except mv88f6180)
+	"marvell,mv88f6180-core-clock" - for Kirkwood MV88f6180 SoC
+- reg : shall be the register address of the Sample-At-Reset (SAR) register
+- #clock-cells : from common clock binding; shall be set to 1
+
+Optional properties:
+- clock-output-names : from common clock binding; allows overwrite default clock
+	output names ("tclk", "cpuclk", "l2clk", "ddrclk")
+
+Example:
+
+core_clk: core-clocks@d0214 {
+	compatible = "marvell,dove-core-clock";
+	reg = <0xd0214 0x4>;
+	#clock-cells = <1>;
+};
+
+spi0: spi@10600 {
+	compatible = "marvell,orion-spi";
+	/* ... */
+	/* get tclk from core clock provider */
+	clocks = <&core_clk 0>;
+};
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index bace9e98f75d..60427c0d23e6 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -54,3 +54,5 @@ config COMMON_CLK_MAX77686
 	  This driver supports Maxim 77686 crystal oscillator clock. 
 
 endmenu
+
+source "drivers/clk/mvebu/Kconfig"
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 71a25b91de00..d0a14ae8d49c 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PLAT_SPEAR)	+= spear/
 obj-$(CONFIG_ARCH_U300)		+= clk-u300.o
 obj-$(CONFIG_COMMON_CLK_VERSATILE) += versatile/
 obj-$(CONFIG_ARCH_PRIMA2)	+= clk-prima2.o
+obj-$(CONFIG_PLAT_ORION)	+= mvebu/
 ifeq ($(CONFIG_COMMON_CLK), y)
 obj-$(CONFIG_ARCH_MMP)		+= mmp/
 endif
diff --git a/drivers/clk/mvebu/Kconfig b/drivers/clk/mvebu/Kconfig
new file mode 100644
index 000000000000..fd7bf97ff74d
--- /dev/null
+++ b/drivers/clk/mvebu/Kconfig
@@ -0,0 +1,3 @@
+config MVEBU_CLK_CORE
+       bool
+
diff --git a/drivers/clk/mvebu/Makefile b/drivers/clk/mvebu/Makefile
new file mode 100644
index 000000000000..de1d9617f75a
--- /dev/null
+++ b/drivers/clk/mvebu/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MVEBU_CLK_CORE) 	+= clk.o clk-core.o
diff --git a/drivers/clk/mvebu/clk-core.c b/drivers/clk/mvebu/clk-core.c
new file mode 100644
index 000000000000..69056a7479e8
--- /dev/null
+++ b/drivers/clk/mvebu/clk-core.c
@@ -0,0 +1,675 @@
+/*
+ * Marvell EBU clock core handling defined at reset
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include "clk-core.h"
+
+struct core_ratio {
+	int id;
+	const char *name;
+};
+
+struct core_clocks {
+	u32 (*get_tclk_freq)(void __iomem *sar);
+	u32 (*get_cpu_freq)(void __iomem *sar);
+	void (*get_clk_ratio)(void __iomem *sar, int id, int *mult, int *div);
+	const struct core_ratio *ratios;
+	int num_ratios;
+};
+
+static struct clk_onecell_data clk_data;
+
+static void __init mvebu_clk_core_setup(struct device_node *np,
+				 struct core_clocks *coreclk)
+{
+	const char *tclk_name = "tclk";
+	const char *cpuclk_name = "cpuclk";
+	void __iomem *base;
+	unsigned long rate;
+	int n;
+
+	base = of_iomap(np, 0);
+	if (WARN_ON(!base))
+		return;
+
+	/*
+	 * Allocate struct for TCLK, cpu clk, and core ratio clocks
+	 */
+	clk_data.clk_num = 2 + coreclk->num_ratios;
+	clk_data.clks = kzalloc(clk_data.clk_num * sizeof(struct clk *),
+				GFP_KERNEL);
+	if (WARN_ON(!clk_data.clks))
+		return;
+
+	/*
+	 * Register TCLK
+	 */
+	of_property_read_string_index(np, "clock-output-names", 0,
+				      &tclk_name);
+	rate = coreclk->get_tclk_freq(base);
+	clk_data.clks[0] = clk_register_fixed_rate(NULL, tclk_name, NULL,
+						   CLK_IS_ROOT, rate);
+	WARN_ON(IS_ERR(clk_data.clks[0]));
+
+	/*
+	 * Register CPU clock
+	 */
+	of_property_read_string_index(np, "clock-output-names", 1,
+				      &cpuclk_name);
+	rate = coreclk->get_cpu_freq(base);
+	clk_data.clks[1] = clk_register_fixed_rate(NULL, cpuclk_name, NULL,
+						   CLK_IS_ROOT, rate);
+	WARN_ON(IS_ERR(clk_data.clks[1]));
+
+	/*
+	 * Register fixed-factor clocks derived from CPU clock
+	 */
+	for (n = 0; n < coreclk->num_ratios; n++) {
+		const char *rclk_name = coreclk->ratios[n].name;
+		int mult, div;
+
+		of_property_read_string_index(np, "clock-output-names",
+					      2+n, &rclk_name);
+		coreclk->get_clk_ratio(base, coreclk->ratios[n].id,
+				       &mult, &div);
+		clk_data.clks[2+n] = clk_register_fixed_factor(NULL, rclk_name,
+				       cpuclk_name, 0, mult, div);
+		WARN_ON(IS_ERR(clk_data.clks[2+n]));
+	};
+
+	/*
+	 * SAR register isn't needed anymore
+	 */
+	iounmap(base);
+
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+}
+
+#ifdef CONFIG_MACH_ARMADA_370_XP
+/*
+ * Armada 370/XP Sample At Reset is a 64 bit bitfiled split in two
+ * register of 32 bits
+ */
+
+#define SARL				    0	/* Low part [0:31] */
+#define	    SARL_AXP_PCLK_FREQ_OPT	    21
+#define	    SARL_AXP_PCLK_FREQ_OPT_MASK	    0x7
+#define	    SARL_A370_PCLK_FREQ_OPT	    11
+#define	    SARL_A370_PCLK_FREQ_OPT_MASK    0xF
+#define	    SARL_AXP_FAB_FREQ_OPT	    24
+#define	    SARL_AXP_FAB_FREQ_OPT_MASK	    0xF
+#define	    SARL_A370_FAB_FREQ_OPT	    15
+#define	    SARL_A370_FAB_FREQ_OPT_MASK	    0x1F
+#define	    SARL_A370_TCLK_FREQ_OPT	    20
+#define	    SARL_A370_TCLK_FREQ_OPT_MASK    0x1
+#define SARH				    4	/* High part [32:63] */
+#define	    SARH_AXP_PCLK_FREQ_OPT	    (52-32)
+#define	    SARH_AXP_PCLK_FREQ_OPT_MASK	    0x1
+#define	    SARH_AXP_PCLK_FREQ_OPT_SHIFT    3
+#define	    SARH_AXP_FAB_FREQ_OPT	    (51-32)
+#define	    SARH_AXP_FAB_FREQ_OPT_MASK	    0x1
+#define	    SARH_AXP_FAB_FREQ_OPT_SHIFT	    4
+
+static const u32 __initconst armada_370_tclk_frequencies[] = {
+	16600000,
+	20000000,
+};
+
+static u32 __init armada_370_get_tclk_freq(void __iomem *sar)
+{
+	u8 tclk_freq_select = 0;
+
+	tclk_freq_select = ((readl(sar) >> SARL_A370_TCLK_FREQ_OPT) &
+			    SARL_A370_TCLK_FREQ_OPT_MASK);
+	return armada_370_tclk_frequencies[tclk_freq_select];
+}
+
+static const u32 __initconst armada_370_cpu_frequencies[] = {
+	400000000,
+	533000000,
+	667000000,
+	800000000,
+	1000000000,
+	1067000000,
+	1200000000,
+};
+
+static u32 __init armada_370_get_cpu_freq(void __iomem *sar)
+{
+	u32 cpu_freq;
+	u8 cpu_freq_select = 0;
+
+	cpu_freq_select = ((readl(sar) >> SARL_A370_PCLK_FREQ_OPT) &
+			   SARL_A370_PCLK_FREQ_OPT_MASK);
+	if (cpu_freq_select > ARRAY_SIZE(armada_370_cpu_frequencies)) {
+		pr_err("CPU freq select unsuported %d\n", cpu_freq_select);
+		cpu_freq = 0;
+	} else
+		cpu_freq = armada_370_cpu_frequencies[cpu_freq_select];
+
+	return cpu_freq;
+}
+
+enum { A370_XP_NBCLK, A370_XP_HCLK, A370_XP_DRAMCLK };
+
+static const struct core_ratio __initconst armada_370_xp_core_ratios[] = {
+	{ .id = A370_XP_NBCLK,	 .name = "nbclk" },
+	{ .id = A370_XP_HCLK,	 .name = "hclk" },
+	{ .id = A370_XP_DRAMCLK, .name = "dramclk" },
+};
+
+static const int __initconst armada_370_xp_nbclk_ratios[32][2] = {
+	{0, 1}, {1, 2}, {2, 2}, {2, 2},
+	{1, 2}, {1, 2}, {1, 1}, {2, 3},
+	{0, 1}, {1, 2}, {2, 4}, {0, 1},
+	{1, 2}, {0, 1}, {0, 1}, {2, 2},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{2, 3}, {0, 1}, {0, 1}, {0, 1},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{0, 1}, {0, 1}, {0, 1}, {0, 1},
+};
+
+static const int __initconst armada_370_xp_hclk_ratios[32][2] = {
+	{0, 1}, {1, 2}, {2, 6}, {2, 3},
+	{1, 3}, {1, 4}, {1, 2}, {2, 6},
+	{0, 1}, {1, 6}, {2, 10}, {0, 1},
+	{1, 4}, {0, 1}, {0, 1}, {2, 5},
+	{0, 1}, {0, 1}, {0, 1}, {1, 2},
+	{2, 6}, {0, 1}, {0, 1}, {0, 1},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{0, 1}, {0, 1}, {0, 1}, {0, 1},
+};
+
+static const int __initconst armada_370_xp_dramclk_ratios[32][2] = {
+	{0, 1}, {1, 2}, {2, 3}, {2, 3},
+	{1, 3}, {1, 2}, {1, 2}, {2, 6},
+	{0, 1}, {1, 3}, {2, 5}, {0, 1},
+	{1, 4}, {0, 1}, {0, 1}, {2, 5},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{2, 3}, {0, 1}, {0, 1}, {0, 1},
+	{0, 1}, {0, 1}, {0, 1}, {1, 1},
+	{0, 1}, {0, 1}, {0, 1}, {0, 1},
+};
+
+static void __init armada_370_xp_get_clk_ratio(u32 opt,
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	switch (id) {
+	case A370_XP_NBCLK:
+		*mult = armada_370_xp_nbclk_ratios[opt][0];
+		*div = armada_370_xp_nbclk_ratios[opt][1];
+		break;
+	case A370_XP_HCLK:
+		*mult = armada_370_xp_hclk_ratios[opt][0];
+		*div = armada_370_xp_hclk_ratios[opt][1];
+		break;
+	case A370_XP_DRAMCLK:
+		*mult = armada_370_xp_dramclk_ratios[opt][0];
+		*div = armada_370_xp_dramclk_ratios[opt][1];
+		break;
+	}
+}
+
+static void __init armada_370_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	u32 opt = ((readl(sar) >> SARL_A370_FAB_FREQ_OPT) &
+		SARL_A370_FAB_FREQ_OPT_MASK);
+
+	armada_370_xp_get_clk_ratio(opt, sar, id, mult, div);
+}
+
+
+static const struct core_clocks armada_370_core_clocks = {
+	.get_tclk_freq = armada_370_get_tclk_freq,
+	.get_cpu_freq = armada_370_get_cpu_freq,
+	.get_clk_ratio = armada_370_get_clk_ratio,
+	.ratios = armada_370_xp_core_ratios,
+	.num_ratios = ARRAY_SIZE(armada_370_xp_core_ratios),
+};
+
+static const u32 __initconst armada_xp_cpu_frequencies[] = {
+	1000000000,
+	1066000000,
+	1200000000,
+	1333000000,
+	1500000000,
+	1666000000,
+	1800000000,
+	2000000000,
+	667000000,
+	0,
+	800000000,
+	1600000000,
+};
+
+/* For Armada XP TCLK frequency is fix: 250MHz */
+static u32 __init armada_xp_get_tclk_freq(void __iomem *sar)
+{
+	return 250 * 1000 * 1000;
+}
+
+static u32 __init armada_xp_get_cpu_freq(void __iomem *sar)
+{
+	u32 cpu_freq;
+	u8 cpu_freq_select = 0;
+
+	cpu_freq_select = ((readl(sar) >> SARL_AXP_PCLK_FREQ_OPT) &
+			   SARL_AXP_PCLK_FREQ_OPT_MASK);
+	/*
+	 * The upper bit is not contiguous to the other ones and
+	 * located in the high part of the SAR registers
+	 */
+	cpu_freq_select |= (((readl(sar+4) >> SARH_AXP_PCLK_FREQ_OPT) &
+			     SARH_AXP_PCLK_FREQ_OPT_MASK)
+			    << SARH_AXP_PCLK_FREQ_OPT_SHIFT);
+	if (cpu_freq_select > ARRAY_SIZE(armada_xp_cpu_frequencies)) {
+		pr_err("CPU freq select unsuported: %d\n", cpu_freq_select);
+		cpu_freq = 0;
+	} else
+		cpu_freq = armada_xp_cpu_frequencies[cpu_freq_select];
+
+	return cpu_freq;
+}
+
+static void __init armada_xp_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+
+	u32 opt = ((readl(sar) >> SARL_AXP_FAB_FREQ_OPT) &
+	      SARL_AXP_FAB_FREQ_OPT_MASK);
+	/*
+	 * The upper bit is not contiguous to the other ones and
+	 * located in the high part of the SAR registers
+	 */
+	opt |= (((readl(sar+4) >> SARH_AXP_FAB_FREQ_OPT) &
+		SARH_AXP_FAB_FREQ_OPT_MASK)
+	       << SARH_AXP_FAB_FREQ_OPT_SHIFT);
+
+	armada_370_xp_get_clk_ratio(opt, sar, id, mult, div);
+}
+
+static const struct core_clocks armada_xp_core_clocks = {
+	.get_tclk_freq = armada_xp_get_tclk_freq,
+	.get_cpu_freq = armada_xp_get_cpu_freq,
+	.get_clk_ratio = armada_xp_get_clk_ratio,
+	.ratios = armada_370_xp_core_ratios,
+	.num_ratios = ARRAY_SIZE(armada_370_xp_core_ratios),
+};
+
+#endif /* CONFIG_MACH_ARMADA_370_XP */
+
+/*
+ * Dove PLL sample-at-reset configuration
+ *
+ * SAR0[8:5]   : CPU frequency
+ *		 5  = 1000 MHz
+ *		 6  =  933 MHz
+ *		 7  =  933 MHz
+ *		 8  =  800 MHz
+ *		 9  =  800 MHz
+ *		 10 =  800 MHz
+ *		 11 = 1067 MHz
+ *		 12 =  667 MHz
+ *		 13 =  533 MHz
+ *		 14 =  400 MHz
+ *		 15 =  333 MHz
+ *		 others reserved.
+ *
+ * SAR0[11:9]  : CPU to L2 Clock divider ratio
+ *		 0 = (1/1) * CPU
+ *		 2 = (1/2) * CPU
+ *		 4 = (1/3) * CPU
+ *		 6 = (1/4) * CPU
+ *		 others reserved.
+ *
+ * SAR0[15:12] : CPU to DDR DRAM Clock divider ratio
+ *		 0  = (1/1) * CPU
+ *		 2  = (1/2) * CPU
+ *		 3  = (2/5) * CPU
+ *		 4  = (1/3) * CPU
+ *		 6  = (1/4) * CPU
+ *		 8  = (1/5) * CPU
+ *		 10 = (1/6) * CPU
+ *		 12 = (1/7) * CPU
+ *		 14 = (1/8) * CPU
+ *		 15 = (1/10) * CPU
+ *		 others reserved.
+ *
+ * SAR0[24:23] : TCLK frequency
+ *		 0 = 166 MHz
+ *		 1 = 125 MHz
+ *		 others reserved.
+ */
+#ifdef CONFIG_ARCH_DOVE
+#define SAR_DOVE_CPU_FREQ		5
+#define SAR_DOVE_CPU_FREQ_MASK		0xf
+#define SAR_DOVE_L2_RATIO		9
+#define SAR_DOVE_L2_RATIO_MASK		0x7
+#define SAR_DOVE_DDR_RATIO		12
+#define SAR_DOVE_DDR_RATIO_MASK		0xf
+#define SAR_DOVE_TCLK_FREQ		23
+#define SAR_DOVE_TCLK_FREQ_MASK		0x3
+
+static const u32 __initconst dove_tclk_frequencies[] = {
+	166666667,
+	125000000,
+	0, 0
+};
+
+static u32 __init dove_get_tclk_freq(void __iomem *sar)
+{
+	u32 opt = (readl(sar) >> SAR_DOVE_TCLK_FREQ) &
+		SAR_DOVE_TCLK_FREQ_MASK;
+	return dove_tclk_frequencies[opt];
+}
+
+static const u32 __initconst dove_cpu_frequencies[] = {
+	0, 0, 0, 0, 0,
+	1000000000,
+	933333333, 933333333,
+	800000000, 800000000, 800000000,
+	1066666667,
+	666666667,
+	533333333,
+	400000000,
+	333333333
+};
+
+static u32 __init dove_get_cpu_freq(void __iomem *sar)
+{
+	u32 opt = (readl(sar) >> SAR_DOVE_CPU_FREQ) &
+		SAR_DOVE_CPU_FREQ_MASK;
+	return dove_cpu_frequencies[opt];
+}
+
+enum { DOVE_CPU_TO_L2, DOVE_CPU_TO_DDR };
+
+static const struct core_ratio __initconst dove_core_ratios[] = {
+	{ .id = DOVE_CPU_TO_L2, .name = "l2clk", },
+	{ .id = DOVE_CPU_TO_DDR, .name = "ddrclk", }
+};
+
+static const int __initconst dove_cpu_l2_ratios[8][2] = {
+	{ 1, 1 }, { 0, 1 }, { 1, 2 }, { 0, 1 },
+	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 0, 1 }
+};
+
+static const int __initconst dove_cpu_ddr_ratios[16][2] = {
+	{ 1, 1 }, { 0, 1 }, { 1, 2 }, { 2, 5 },
+	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 0, 1 },
+	{ 1, 5 }, { 0, 1 }, { 1, 6 }, { 0, 1 },
+	{ 1, 7 }, { 0, 1 }, { 1, 8 }, { 1, 10 }
+};
+
+static void __init dove_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	switch (id) {
+	case DOVE_CPU_TO_L2:
+	{
+		u32 opt = (readl(sar) >> SAR_DOVE_L2_RATIO) &
+			SAR_DOVE_L2_RATIO_MASK;
+		*mult = dove_cpu_l2_ratios[opt][0];
+		*div = dove_cpu_l2_ratios[opt][1];
+		break;
+	}
+	case DOVE_CPU_TO_DDR:
+	{
+		u32 opt = (readl(sar) >> SAR_DOVE_DDR_RATIO) &
+			SAR_DOVE_DDR_RATIO_MASK;
+		*mult = dove_cpu_ddr_ratios[opt][0];
+		*div = dove_cpu_ddr_ratios[opt][1];
+		break;
+	}
+	}
+}
+
+static const struct core_clocks dove_core_clocks = {
+	.get_tclk_freq = dove_get_tclk_freq,
+	.get_cpu_freq = dove_get_cpu_freq,
+	.get_clk_ratio = dove_get_clk_ratio,
+	.ratios = dove_core_ratios,
+	.num_ratios = ARRAY_SIZE(dove_core_ratios),
+};
+#endif /* CONFIG_ARCH_DOVE */
+
+/*
+ * Kirkwood PLL sample-at-reset configuration
+ * (6180 has different SAR layout than other Kirkwood SoCs)
+ *
+ * SAR0[4:3,22,1] : CPU frequency (6281,6292,6282)
+ *	4  =  600 MHz
+ *	6  =  800 MHz
+ *	7  = 1000 MHz
+ *	9  = 1200 MHz
+ *	12 = 1500 MHz
+ *	13 = 1600 MHz
+ *	14 = 1800 MHz
+ *	15 = 2000 MHz
+ *	others reserved.
+ *
+ * SAR0[19,10:9] : CPU to L2 Clock divider ratio (6281,6292,6282)
+ *	1 = (1/2) * CPU
+ *	3 = (1/3) * CPU
+ *	5 = (1/4) * CPU
+ *	others reserved.
+ *
+ * SAR0[8:5] : CPU to DDR DRAM Clock divider ratio (6281,6292,6282)
+ *	2 = (1/2) * CPU
+ *	4 = (1/3) * CPU
+ *	6 = (1/4) * CPU
+ *	7 = (2/9) * CPU
+ *	8 = (1/5) * CPU
+ *	9 = (1/6) * CPU
+ *	others reserved.
+ *
+ * SAR0[4:2] : Kirkwood 6180 cpu/l2/ddr clock configuration (6180 only)
+ *	5 = [CPU =  600 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/3) * CPU]
+ *	6 = [CPU =  800 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/4) * CPU]
+ *	7 = [CPU = 1000 MHz, L2 = (1/2) * CPU, DDR = 200 MHz = (1/5) * CPU]
+ *	others reserved.
+ *
+ * SAR0[21] : TCLK frequency
+ *	0 = 200 MHz
+ *	1 = 166 MHz
+ *	others reserved.
+ */
+#ifdef CONFIG_ARCH_KIRKWOOD
+#define SAR_KIRKWOOD_CPU_FREQ(x)	\
+	(((x & (1 <<  1)) >>  1) |	\
+	 ((x & (1 << 22)) >> 21) |	\
+	 ((x & (3 <<  3)) >>  1))
+#define SAR_KIRKWOOD_L2_RATIO(x)	\
+	(((x & (3 <<  9)) >> 9) |	\
+	 (((x & (1 << 19)) >> 17)))
+#define SAR_KIRKWOOD_DDR_RATIO		5
+#define SAR_KIRKWOOD_DDR_RATIO_MASK	0xf
+#define SAR_MV88F6180_CLK		2
+#define SAR_MV88F6180_CLK_MASK		0x7
+#define SAR_KIRKWOOD_TCLK_FREQ		21
+#define SAR_KIRKWOOD_TCLK_FREQ_MASK	0x1
+
+enum { KIRKWOOD_CPU_TO_L2, KIRKWOOD_CPU_TO_DDR };
+
+static const struct core_ratio __initconst kirkwood_core_ratios[] = {
+	{ .id = KIRKWOOD_CPU_TO_L2, .name = "l2clk", },
+	{ .id = KIRKWOOD_CPU_TO_DDR, .name = "ddrclk", }
+};
+
+static u32 __init kirkwood_get_tclk_freq(void __iomem *sar)
+{
+	u32 opt = (readl(sar) >> SAR_KIRKWOOD_TCLK_FREQ) &
+		SAR_KIRKWOOD_TCLK_FREQ_MASK;
+	return (opt) ? 166666667 : 200000000;
+}
+
+static const u32 __initconst kirkwood_cpu_frequencies[] = {
+	0, 0, 0, 0,
+	600000000,
+	0,
+	800000000,
+	1000000000,
+	0,
+	1200000000,
+	0, 0,
+	1500000000,
+	1600000000,
+	1800000000,
+	2000000000
+};
+
+static u32 __init kirkwood_get_cpu_freq(void __iomem *sar)
+{
+	u32 opt = SAR_KIRKWOOD_CPU_FREQ(readl(sar));
+	return kirkwood_cpu_frequencies[opt];
+}
+
+static const int __initconst kirkwood_cpu_l2_ratios[8][2] = {
+	{ 0, 1 }, { 1, 2 }, { 0, 1 }, { 1, 3 },
+	{ 0, 1 }, { 1, 4 }, { 0, 1 }, { 0, 1 }
+};
+
+static const int __initconst kirkwood_cpu_ddr_ratios[16][2] = {
+	{ 0, 1 }, { 0, 1 }, { 1, 2 }, { 0, 1 },
+	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 2, 9 },
+	{ 1, 5 }, { 1, 6 }, { 0, 1 }, { 0, 1 },
+	{ 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }
+};
+
+static void __init kirkwood_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	switch (id) {
+	case KIRKWOOD_CPU_TO_L2:
+	{
+		u32 opt = SAR_KIRKWOOD_L2_RATIO(readl(sar));
+		*mult = kirkwood_cpu_l2_ratios[opt][0];
+		*div = kirkwood_cpu_l2_ratios[opt][1];
+		break;
+	}
+	case KIRKWOOD_CPU_TO_DDR:
+	{
+		u32 opt = (readl(sar) >> SAR_KIRKWOOD_DDR_RATIO) &
+			SAR_KIRKWOOD_DDR_RATIO_MASK;
+		*mult = kirkwood_cpu_ddr_ratios[opt][0];
+		*div = kirkwood_cpu_ddr_ratios[opt][1];
+		break;
+	}
+	}
+}
+
+static const struct core_clocks kirkwood_core_clocks = {
+	.get_tclk_freq = kirkwood_get_tclk_freq,
+	.get_cpu_freq = kirkwood_get_cpu_freq,
+	.get_clk_ratio = kirkwood_get_clk_ratio,
+	.ratios = kirkwood_core_ratios,
+	.num_ratios = ARRAY_SIZE(kirkwood_core_ratios),
+};
+
+static const u32 __initconst mv88f6180_cpu_frequencies[] = {
+	0, 0, 0, 0, 0,
+	600000000,
+	800000000,
+	1000000000
+};
+
+static u32 __init mv88f6180_get_cpu_freq(void __iomem *sar)
+{
+	u32 opt = (readl(sar) >> SAR_MV88F6180_CLK) & SAR_MV88F6180_CLK_MASK;
+	return mv88f6180_cpu_frequencies[opt];
+}
+
+static const int __initconst mv88f6180_cpu_ddr_ratios[8][2] = {
+	{ 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
+	{ 0, 1 }, { 1, 3 }, { 1, 4 }, { 1, 5 }
+};
+
+static void __init mv88f6180_get_clk_ratio(
+	void __iomem *sar, int id, int *mult, int *div)
+{
+	switch (id) {
+	case KIRKWOOD_CPU_TO_L2:
+	{
+		/* mv88f6180 has a fixed 1:2 CPU-to-L2 ratio */
+		*mult = 1;
+		*div = 2;
+		break;
+	}
+	case KIRKWOOD_CPU_TO_DDR:
+	{
+		u32 opt = (readl(sar) >> SAR_MV88F6180_CLK) &
+			SAR_MV88F6180_CLK_MASK;
+		*mult = mv88f6180_cpu_ddr_ratios[opt][0];
+		*div = mv88f6180_cpu_ddr_ratios[opt][1];
+		break;
+	}
+	}
+}
+
+static const struct core_clocks mv88f6180_core_clocks = {
+	.get_tclk_freq = kirkwood_get_tclk_freq,
+	.get_cpu_freq = mv88f6180_get_cpu_freq,
+	.get_clk_ratio = mv88f6180_get_clk_ratio,
+	.ratios = kirkwood_core_ratios,
+	.num_ratios = ARRAY_SIZE(kirkwood_core_ratios),
+};
+#endif /* CONFIG_ARCH_KIRKWOOD */
+
+static const __initdata struct of_device_id clk_core_match[] = {
+#ifdef CONFIG_MACH_ARMADA_370_XP
+	{
+		.compatible = "marvell,armada-370-core-clock",
+		.data = &armada_370_core_clocks,
+	},
+	{
+		.compatible = "marvell,armada-xp-core-clock",
+		.data = &armada_xp_core_clocks,
+	},
+#endif
+#ifdef CONFIG_ARCH_DOVE
+	{
+		.compatible = "marvell,dove-core-clock",
+		.data = &dove_core_clocks,
+	},
+#endif
+
+#ifdef CONFIG_ARCH_KIRKWOOD
+	{
+		.compatible = "marvell,kirkwood-core-clock",
+		.data = &kirkwood_core_clocks,
+	},
+	{
+		.compatible = "marvell,mv88f6180-core-clock",
+		.data = &mv88f6180_core_clocks,
+	},
+#endif
+
+	{ }
+};
+
+void __init mvebu_core_clk_init(void)
+{
+	struct device_node *np;
+
+	for_each_matching_node(np, clk_core_match) {
+		const struct of_device_id *match =
+			of_match_node(clk_core_match, np);
+		mvebu_clk_core_setup(np, (struct core_clocks *)match->data);
+	}
+}
diff --git a/drivers/clk/mvebu/clk-core.h b/drivers/clk/mvebu/clk-core.h
new file mode 100644
index 000000000000..28b5e02e9885
--- /dev/null
+++ b/drivers/clk/mvebu/clk-core.h
@@ -0,0 +1,18 @@
+/*
+ *  * Marvell EBU clock core handling defined at reset
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __MVEBU_CLK_CORE_H
+#define __MVEBU_CLK_CORE_H
+
+void __init mvebu_core_clk_init(void);
+
+#endif
diff --git a/drivers/clk/mvebu/clk.c b/drivers/clk/mvebu/clk.c
new file mode 100644
index 000000000000..e6742acf9880
--- /dev/null
+++ b/drivers/clk/mvebu/clk.c
@@ -0,0 +1,23 @@
+/*
+ * Marvell EBU SoC clock handling.
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/clk/mvebu.h>
+#include <linux/of.h>
+#include "clk-core.h"
+
+void __init mvebu_clocks_init(void)
+{
+	mvebu_core_clk_init();
+}
diff --git a/include/linux/clk/mvebu.h b/include/linux/clk/mvebu.h
new file mode 100644
index 000000000000..8c4ae713b063
--- /dev/null
+++ b/include/linux/clk/mvebu.h
@@ -0,0 +1,22 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __CLK_MVEBU_H_
+#define __CLK_MVEBU_H_
+
+void __init mvebu_clocks_init(void);
+
+#endif
-- 
cgit v1.2.3


From 60d151f38799d5e15845ee04b73cbf3839f1b06c Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Mon, 29 Oct 2012 16:54:49 +0100
Subject: dma: mv_xor: allow channels to be registered directly from the main
 device

Extend the XOR engine driver (currently called "mv_xor_shared") so
that XOR channels can be passed in the platform_data structure, and be
registered from there.

This will allow the users of the driver to be converted to the single
platform_driver variant of the mv_xor driver.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/dma/mv_xor.c                     | 45 ++++++++++++++++++++++++++++++++
 drivers/dma/mv_xor.h                     |  8 +++---
 include/linux/platform_data/dma-mv_xor.h |  3 +++
 3 files changed, 53 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index f7b99193e884..6ed3162cccc9 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1292,7 +1292,9 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
 	struct mv_xor_shared_private *msp;
+	struct mv_xor_shared_platform_data *pdata = pdev->dev.platform_data;
 	struct resource *res;
+	int i, ret;
 
 	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
 
@@ -1334,12 +1336,55 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 	if (!IS_ERR(msp->clk))
 		clk_prepare_enable(msp->clk);
 
+	if (pdata && pdata->channels) {
+		for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+			struct mv_xor_platform_data *cd;
+			int irq;
+
+			cd = &pdata->channels[i];
+			if (!cd) {
+				ret = -ENODEV;
+				goto err_channel_add;
+			}
+
+			irq = platform_get_irq(pdev, i);
+			if (irq < 0) {
+				ret = irq;
+				goto err_channel_add;
+			}
+
+			msp->channels[i] =
+				mv_xor_channel_add(msp, pdev, cd->hw_id,
+						   cd->cap_mask,
+						   cd->pool_size, irq);
+			if (IS_ERR(msp->channels[i])) {
+				ret = PTR_ERR(msp->channels[i]);
+				goto err_channel_add;
+			}
+		}
+	}
+
 	return 0;
+
+err_channel_add:
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++)
+		if (msp->channels[i])
+			mv_xor_channel_remove(msp->channels[i]);
+
+	clk_disable_unprepare(msp->clk);
+	clk_put(msp->clk);
+	return ret;
 }
 
 static int mv_xor_shared_remove(struct platform_device *pdev)
 {
 	struct mv_xor_shared_private *msp = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+		if (msp->channels[i])
+			mv_xor_channel_remove(msp->channels[i]);
+	}
 
 	if (!IS_ERR(msp->clk)) {
 		clk_disable_unprepare(msp->clk);
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index a0641aebbdef..686575f6b9f5 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -26,6 +26,7 @@
 #define USE_TIMER
 #define MV_XOR_SLOT_SIZE		64
 #define MV_XOR_THRESHOLD		1
+#define MV_XOR_MAX_CHANNELS             2
 
 #define XOR_OPERATION_MODE_XOR		0
 #define XOR_OPERATION_MODE_MEMCPY	2
@@ -53,9 +54,10 @@
 #define WINDOW_BAR_ENABLE(chan)	(0x240 + ((chan) << 2))
 
 struct mv_xor_shared_private {
-	void __iomem	*xor_base;
-	void __iomem	*xor_high_base;
-	struct clk	*clk;
+	void __iomem	     *xor_base;
+	void __iomem	     *xor_high_base;
+	struct clk	     *clk;
+	struct mv_xor_device *channels[MV_XOR_MAX_CHANNELS];
 };
 
 
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 2ba1f7d76eef..f2aed978ca25 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -20,5 +20,8 @@ struct mv_xor_platform_data {
 	size_t				pool_size;
 };
 
+struct mv_xor_shared_platform_data {
+	struct mv_xor_platform_data    *channels;
+};
 
 #endif
-- 
cgit v1.2.3


From 18b2a02c7c8db8bb87a165d26c269968d3dd47bd Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 30 Oct 2012 11:54:34 +0100
Subject: dma: mv_xor: remove sub-driver 'mv_xor'

Now that XOR channels are directly registered by the main
'mv_xor_shared' device ->probe() function and all users of the
'mv_xor' device have been removed, we can get rid of the latter.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/dma/mv_xor.c                     | 49 +-------------------------------
 include/linux/platform_data/dma-mv_xor.h |  1 -
 2 files changed, 1 insertion(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 6ed3162cccc9..be3907bdef14 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1219,35 +1219,6 @@ mv_xor_channel_add(struct mv_xor_shared_private *msp,
 	return ERR_PTR(ret);
 }
 
-static int __devexit mv_xor_remove(struct platform_device *pdev)
-{
-	struct mv_xor_device *device = platform_get_drvdata(pdev);
-	return mv_xor_channel_remove(device);
-}
-
-static int __devinit mv_xor_probe(struct platform_device *pdev)
-{
-	struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
-	struct mv_xor_shared_private *msp =
-		platform_get_drvdata(plat_data->shared);
-	struct mv_xor_device *mv_xor_device;
-	int irq;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return irq;
-
-	mv_xor_device = mv_xor_channel_add(msp, pdev, plat_data->hw_id,
-					   plat_data->cap_mask,
-					   plat_data->pool_size, irq);
-	if (IS_ERR(mv_xor_device))
-		return PTR_ERR(mv_xor_device);
-
-	platform_set_drvdata(pdev, mv_xor_device);
-
-	return 0;
-}
-
 static void
 mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
 			 const struct mbus_dram_target_info *dram)
@@ -1279,15 +1250,6 @@ mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
 	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
 }
 
-static struct platform_driver mv_xor_driver = {
-	.probe		= mv_xor_probe,
-	.remove		= __devexit_p(mv_xor_remove),
-	.driver		= {
-		.owner	= THIS_MODULE,
-		.name	= MV_XOR_NAME,
-	},
-};
-
 static int mv_xor_shared_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
@@ -1406,15 +1368,7 @@ static struct platform_driver mv_xor_shared_driver = {
 
 static int __init mv_xor_init(void)
 {
-	int rc;
-
-	rc = platform_driver_register(&mv_xor_shared_driver);
-	if (!rc) {
-		rc = platform_driver_register(&mv_xor_driver);
-		if (rc)
-			platform_driver_unregister(&mv_xor_shared_driver);
-	}
-	return rc;
+	return platform_driver_register(&mv_xor_shared_driver);
 }
 module_init(mv_xor_init);
 
@@ -1422,7 +1376,6 @@ module_init(mv_xor_init);
 #if 0
 static void __exit mv_xor_exit(void)
 {
-	platform_driver_unregister(&mv_xor_driver);
 	platform_driver_unregister(&mv_xor_shared_driver);
 	return;
 }
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index f2aed978ca25..6abe5f9326b6 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -11,7 +11,6 @@
 #include <linux/mbus.h>
 
 #define MV_XOR_SHARED_NAME	"mv_xor_shared"
-#define MV_XOR_NAME		"mv_xor"
 
 struct mv_xor_platform_data {
 	struct platform_device		*shared;
-- 
cgit v1.2.3


From 2ccc469cfecee291707dd50e5842cbf206bc17d7 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 31 Oct 2012 13:24:41 +0100
Subject: dma: mv_xor: remove 'shared' from mv_xor_platform_data

This member of the platform_data structure is no longer used, so get
rid of it.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 8 --------
 include/linux/platform_data/dma-mv_xor.h | 1 -
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index c6e6666986ff..5a66211d523c 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -625,16 +625,12 @@ static struct resource orion_xor0_shared_resources[] = {
 	},
 };
 
-static struct platform_device orion_xor0_shared;
-
 static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = {
 	{
-		.shared		= &orion_xor0_shared,
 		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
 	},
 	{
-		.shared		= &orion_xor0_shared,
 		.hw_id		= 1,
 		.pool_size	= PAGE_SIZE,
 	},
@@ -704,16 +700,12 @@ static struct resource orion_xor1_shared_resources[] = {
 	},
 };
 
-static struct platform_device orion_xor1_shared;
-
 static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = {
 	{
-		.shared		= &orion_xor1_shared,
 		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
 	},
 	{
-		.shared		= &orion_xor1_shared,
 		.hw_id		= 1,
 		.pool_size	= PAGE_SIZE,
 	},
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 6abe5f9326b6..4a0980b14c9b 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -13,7 +13,6 @@
 #define MV_XOR_SHARED_NAME	"mv_xor_shared"
 
 struct mv_xor_platform_data {
-	struct platform_device		*shared;
 	int				hw_id;
 	dma_cap_mask_t			cap_mask;
 	size_t				pool_size;
-- 
cgit v1.2.3


From e39f6ec1f9c1d6a7011adf6d95d8d80bad0586b1 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 30 Oct 2012 11:56:26 +0100
Subject: dma: mv_xor: rename mv_xor_platform_data to mv_xor_channel_data

mv_xor_platform_data used to be the platform_data structure associated
to the 'mv_xor' driver. This driver no longer exists, and this data
structure really contains the properties of each XOR channel part of a
given XOR engine. Therefore 'struct mv_xor_channel_data' is a more
appropriate name.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 28 ++++++++++++++--------------
 drivers/dma/mv_xor.c                     |  2 +-
 include/linux/platform_data/dma-mv_xor.h |  4 ++--
 3 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 5a66211d523c..7ffbe77c52cb 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -625,7 +625,7 @@ static struct resource orion_xor0_shared_resources[] = {
 	},
 };
 
-static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = {
+static struct mv_xor_channel_data orion_xor0_channels_data[2] = {
 	{
 		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
@@ -637,7 +637,7 @@ static struct mv_xor_platform_data orion_xor0_channels_pdata[2] = {
 };
 
 static struct mv_xor_shared_platform_data orion_xor0_pdata = {
-	.channels = orion_xor0_channels_pdata,
+	.channels = orion_xor0_channels_data,
 };
 
 static struct platform_device orion_xor0_shared = {
@@ -671,12 +671,12 @@ void __init orion_xor0_init(unsigned long mapbase_low,
 	 * two engines can't do memset simultaneously, this limitation
 	 * satisfied by removing memset support from one of the engines.
 	 */
-	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_pdata[0].cap_mask);
-	dma_cap_set(DMA_XOR, orion_xor0_channels_pdata[0].cap_mask);
+	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[0].cap_mask);
+	dma_cap_set(DMA_XOR, orion_xor0_channels_data[0].cap_mask);
 
-	dma_cap_set(DMA_MEMSET, orion_xor0_channels_pdata[1].cap_mask);
-	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_pdata[1].cap_mask);
-	dma_cap_set(DMA_XOR, orion_xor0_channels_pdata[1].cap_mask);
+	dma_cap_set(DMA_MEMSET, orion_xor0_channels_data[1].cap_mask);
+	dma_cap_set(DMA_MEMCPY, orion_xor0_channels_data[1].cap_mask);
+	dma_cap_set(DMA_XOR, orion_xor0_channels_data[1].cap_mask);
 
 	platform_device_register(&orion_xor0_shared);
 }
@@ -700,7 +700,7 @@ static struct resource orion_xor1_shared_resources[] = {
 	},
 };
 
-static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = {
+static struct mv_xor_channel_data orion_xor1_channels_data[2] = {
 	{
 		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
@@ -712,7 +712,7 @@ static struct mv_xor_platform_data orion_xor1_channels_pdata[2] = {
 };
 
 static struct mv_xor_shared_platform_data orion_xor1_pdata = {
-	.channels = orion_xor1_channels_pdata,
+	.channels = orion_xor1_channels_data,
 };
 
 static struct platform_device orion_xor1_shared = {
@@ -746,12 +746,12 @@ void __init orion_xor1_init(unsigned long mapbase_low,
 	 * two engines can't do memset simultaneously, this limitation
 	 * satisfied by removing memset support from one of the engines.
 	 */
-	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_pdata[0].cap_mask);
-	dma_cap_set(DMA_XOR, orion_xor1_channels_pdata[0].cap_mask);
+	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[0].cap_mask);
+	dma_cap_set(DMA_XOR, orion_xor1_channels_data[0].cap_mask);
 
-	dma_cap_set(DMA_MEMSET, orion_xor1_channels_pdata[1].cap_mask);
-	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_pdata[1].cap_mask);
-	dma_cap_set(DMA_XOR, orion_xor1_channels_pdata[1].cap_mask);
+	dma_cap_set(DMA_MEMSET, orion_xor1_channels_data[1].cap_mask);
+	dma_cap_set(DMA_MEMCPY, orion_xor1_channels_data[1].cap_mask);
+	dma_cap_set(DMA_XOR, orion_xor1_channels_data[1].cap_mask);
 
 	platform_device_register(&orion_xor1_shared);
 }
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index be3907bdef14..c7926e417281 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1300,7 +1300,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 
 	if (pdata && pdata->channels) {
 		for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
-			struct mv_xor_platform_data *cd;
+			struct mv_xor_channel_data *cd;
 			int irq;
 
 			cd = &pdata->channels[i];
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 4a0980b14c9b..40ea3d5f5b9f 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -12,14 +12,14 @@
 
 #define MV_XOR_SHARED_NAME	"mv_xor_shared"
 
-struct mv_xor_platform_data {
+struct mv_xor_channel_data {
 	int				hw_id;
 	dma_cap_mask_t			cap_mask;
 	size_t				pool_size;
 };
 
 struct mv_xor_shared_platform_data {
-	struct mv_xor_platform_data    *channels;
+	struct mv_xor_channel_data    *channels;
 };
 
 #endif
-- 
cgit v1.2.3


From 7dde453d628687c0e991cfc55c9fd299a804aee6 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 30 Oct 2012 11:58:14 +0100
Subject: dma: mv_xor: rename mv_xor_shared_platform_data to
 mv_xor_platform_data

'struct mv_xor_shared_platform_data' used to be the platform_data
structure for the 'mv_xor_shared', but this driver is going to be
renamed simply 'mv_xor', so also rename its platform_data structure
accordingly.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 4 ++--
 drivers/dma/mv_xor.c                     | 2 +-
 include/linux/platform_data/dma-mv_xor.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 7ffbe77c52cb..edd57a68fa8c 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -636,7 +636,7 @@ static struct mv_xor_channel_data orion_xor0_channels_data[2] = {
 	},
 };
 
-static struct mv_xor_shared_platform_data orion_xor0_pdata = {
+static struct mv_xor_platform_data orion_xor0_pdata = {
 	.channels = orion_xor0_channels_data,
 };
 
@@ -711,7 +711,7 @@ static struct mv_xor_channel_data orion_xor1_channels_data[2] = {
 	},
 };
 
-static struct mv_xor_shared_platform_data orion_xor1_pdata = {
+static struct mv_xor_platform_data orion_xor1_pdata = {
 	.channels = orion_xor1_channels_data,
 };
 
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index c7926e417281..ac598168b21f 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1254,7 +1254,7 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
 	struct mv_xor_shared_private *msp;
-	struct mv_xor_shared_platform_data *pdata = pdev->dev.platform_data;
+	struct mv_xor_platform_data *pdata = pdev->dev.platform_data;
 	struct resource *res;
 	int i, ret;
 
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 40ea3d5f5b9f..82a5f4b84afe 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -18,7 +18,7 @@ struct mv_xor_channel_data {
 	size_t				pool_size;
 };
 
-struct mv_xor_shared_platform_data {
+struct mv_xor_platform_data {
 	struct mv_xor_channel_data    *channels;
 };
 
-- 
cgit v1.2.3


From 0dddee7a7d42192267ebef0fe15be8b296b665c8 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 30 Oct 2012 11:59:42 +0100
Subject: dma: mv_xor: change the driver name to 'mv_xor'

Since we got rid of the per-XOR channel 'mv_xor' driver, now the
per-XOR engine driver that used to be called 'mv_xor_shared' can
simply be named 'mv_xor'.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/mach-dove/common.c              | 9 +++++----
 arch/arm/mach-kirkwood/board-dt.c        | 5 +++--
 arch/arm/mach-kirkwood/common.c          | 4 ++--
 arch/arm/plat-orion/common.c             | 4 ++--
 drivers/dma/mv_xor.c                     | 2 +-
 include/linux/platform_data/dma-mv_xor.h | 2 +-
 6 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index 6a2c4dc413a8..f4ac5b06014b 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -33,6 +33,7 @@
 #include <linux/irq.h>
 #include <plat/time.h>
 #include <linux/platform_data/usb-ehci-orion.h>
+#include <linux/platform_data/dma-mv_xor.h>
 #include <plat/irq.h>
 #include <plat/common.h>
 #include <plat/addr-map.h>
@@ -124,8 +125,8 @@ static void __init dove_clk_init(void)
 	orion_clkdev_add(NULL, "mv_crypto", crypto);
 	orion_clkdev_add(NULL, "dove-ac97", ac97);
 	orion_clkdev_add(NULL, "dove-pdma", pdma);
-	orion_clkdev_add(NULL, "mv_xor_shared.0", xor0);
-	orion_clkdev_add(NULL, "mv_xor_shared.1", xor1);
+	orion_clkdev_add(NULL, MV_XOR_NAME ".0", xor0);
+	orion_clkdev_add(NULL, MV_XOR_NAME ".1", xor1);
 }
 
 /*****************************************************************************
@@ -410,11 +411,11 @@ static void __init dove_legacy_clk_init(void)
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CLOCK_GATING_BIT_XOR0;
-	orion_clkdev_add(NULL, "mv_xor_shared.0",
+	orion_clkdev_add(NULL, MV_XOR_NAME ".0",
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CLOCK_GATING_BIT_XOR1;
-	orion_clkdev_add(NULL, "mv_xor_shared.1",
+	orion_clkdev_add(NULL, MV_XOR_NAME ".1",
 			 of_clk_get_from_provider(&clkspec));
 }
 
diff --git a/arch/arm/mach-kirkwood/board-dt.c b/arch/arm/mach-kirkwood/board-dt.c
index 8bdfaa4db091..294ad5a4fd98 100644
--- a/arch/arm/mach-kirkwood/board-dt.c
+++ b/arch/arm/mach-kirkwood/board-dt.c
@@ -21,6 +21,7 @@
 #include <asm/mach/map.h>
 #include <mach/bridge-regs.h>
 #include <linux/platform_data/usb-ehci-orion.h>
+#include <linux/platform_data/dma-mv_xor.h>
 #include <plat/irq.h>
 #include <plat/common.h>
 #include "common.h"
@@ -60,11 +61,11 @@ static void __init kirkwood_legacy_clk_init(void)
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CGC_BIT_XOR0;
-	orion_clkdev_add(NULL, "mv_xor_shared.0",
+	orion_clkdev_add(NULL, MV_XOR_NAME ".0",
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CGC_BIT_XOR1;
-	orion_clkdev_add(NULL, "mv_xor_shared.1",
+	orion_clkdev_add(NULL, MV_XOR_NAME ".1",
 			 of_clk_get_from_provider(&clkspec));
 
 	clkspec.args[0] = CGC_BIT_PEX1;
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index 2c6c218fb79e..401dac1a8d80 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -260,8 +260,8 @@ void __init kirkwood_clk_init(void)
 	orion_clkdev_add(NULL, "orion_nand", runit);
 	orion_clkdev_add(NULL, "mvsdio", sdio);
 	orion_clkdev_add(NULL, "mv_crypto", crypto);
-	orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".0", xor0);
-	orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".1", xor1);
+	orion_clkdev_add(NULL, MV_XOR_NAME ".0", xor0);
+	orion_clkdev_add(NULL, MV_XOR_NAME ".1", xor1);
 	orion_clkdev_add("0", "pcie", pex0);
 	orion_clkdev_add("1", "pcie", pex1);
 	orion_clkdev_add(NULL, "kirkwood-i2s", audio);
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index edd57a68fa8c..31517cef8c4d 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -641,7 +641,7 @@ static struct mv_xor_platform_data orion_xor0_pdata = {
 };
 
 static struct platform_device orion_xor0_shared = {
-	.name		= MV_XOR_SHARED_NAME,
+	.name		= MV_XOR_NAME,
 	.id		= 0,
 	.num_resources	= ARRAY_SIZE(orion_xor0_shared_resources),
 	.resource	= orion_xor0_shared_resources,
@@ -716,7 +716,7 @@ static struct mv_xor_platform_data orion_xor1_pdata = {
 };
 
 static struct platform_device orion_xor1_shared = {
-	.name		= MV_XOR_SHARED_NAME,
+	.name		= MV_XOR_NAME,
 	.id		= 1,
 	.num_resources	= ARRAY_SIZE(orion_xor1_shared_resources),
 	.resource	= orion_xor1_shared_resources,
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index ac598168b21f..0ed5183eb5a3 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1361,7 +1361,7 @@ static struct platform_driver mv_xor_shared_driver = {
 	.remove		= mv_xor_shared_remove,
 	.driver		= {
 		.owner	= THIS_MODULE,
-		.name	= MV_XOR_SHARED_NAME,
+		.name	= MV_XOR_NAME,
 	},
 };
 
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 82a5f4b84afe..367bb216c4a7 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -10,7 +10,7 @@
 #include <linux/dmaengine.h>
 #include <linux/mbus.h>
 
-#define MV_XOR_SHARED_NAME	"mv_xor_shared"
+#define MV_XOR_NAME	"mv_xor"
 
 struct mv_xor_channel_data {
 	int				hw_id;
-- 
cgit v1.2.3


From 9aedbdbab39c8aa58c0b2a0791fb10df6eebc123 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 15 Nov 2012 15:36:37 +0100
Subject: dma: mv_xor: remove hw_id field from platform_data

There is no need for the platform_data to give this ID, it is simply
the channel number, so we can compute it inside the driver when
registering the channels.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 4 ----
 drivers/dma/mv_xor.c                     | 6 +++---
 include/linux/platform_data/dma-mv_xor.h | 1 -
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 31517cef8c4d..09d836060bf4 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -627,11 +627,9 @@ static struct resource orion_xor0_shared_resources[] = {
 
 static struct mv_xor_channel_data orion_xor0_channels_data[2] = {
 	{
-		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
 	},
 	{
-		.hw_id		= 1,
 		.pool_size	= PAGE_SIZE,
 	},
 };
@@ -702,11 +700,9 @@ static struct resource orion_xor1_shared_resources[] = {
 
 static struct mv_xor_channel_data orion_xor1_channels_data[2] = {
 	{
-		.hw_id		= 0,
 		.pool_size	= PAGE_SIZE,
 	},
 	{
-		.hw_id		= 1,
 		.pool_size	= PAGE_SIZE,
 	},
 };
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index a6a5a28574c4..fc983bf38438 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1088,7 +1088,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
 static struct mv_xor_chan *
 mv_xor_channel_add(struct mv_xor_device *xordev,
 		   struct platform_device *pdev,
-		   int hw_id, dma_cap_mask_t cap_mask,
+		   int idx, dma_cap_mask_t cap_mask,
 		   size_t pool_size, int irq)
 {
 	int ret = 0;
@@ -1101,7 +1101,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 		goto err_free_dma;
 	}
 
-	mv_chan->idx = hw_id;
+	mv_chan->idx = idx;
 
 	dma_dev = &mv_chan->dmadev;
 
@@ -1295,7 +1295,7 @@ static int mv_xor_probe(struct platform_device *pdev)
 			}
 
 			xordev->channels[i] =
-				mv_xor_channel_add(xordev, pdev, cd->hw_id,
+				mv_xor_channel_add(xordev, pdev, i,
 						   cd->cap_mask,
 						   cd->pool_size, irq);
 			if (IS_ERR(xordev->channels[i])) {
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index 367bb216c4a7..b18dc2496186 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -13,7 +13,6 @@
 #define MV_XOR_NAME	"mv_xor"
 
 struct mv_xor_channel_data {
-	int				hw_id;
 	dma_cap_mask_t			cap_mask;
 	size_t				pool_size;
 };
-- 
cgit v1.2.3


From b503fa01990f6875640339d8f4ba98dbc068f821 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 15 Nov 2012 15:55:30 +0100
Subject: dma: mv_xor: remove the pool_size from platform_data

The pool_size is always PAGE_SIZE, and since it is a software
configuration paramter (and not a hardware description parameter), we
cannot make it part of the Device Tree binding, so we'd better remove
it from the platform_data as well.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/common.c             | 18 ++----------------
 drivers/dma/mv_xor.c                     | 15 ++++++---------
 drivers/dma/mv_xor.h                     |  1 +
 include/linux/platform_data/dma-mv_xor.h |  1 -
 4 files changed, 9 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 09d836060bf4..2d4b6414609f 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -625,14 +625,7 @@ static struct resource orion_xor0_shared_resources[] = {
 	},
 };
 
-static struct mv_xor_channel_data orion_xor0_channels_data[2] = {
-	{
-		.pool_size	= PAGE_SIZE,
-	},
-	{
-		.pool_size	= PAGE_SIZE,
-	},
-};
+static struct mv_xor_channel_data orion_xor0_channels_data[2];
 
 static struct mv_xor_platform_data orion_xor0_pdata = {
 	.channels = orion_xor0_channels_data,
@@ -698,14 +691,7 @@ static struct resource orion_xor1_shared_resources[] = {
 	},
 };
 
-static struct mv_xor_channel_data orion_xor1_channels_data[2] = {
-	{
-		.pool_size	= PAGE_SIZE,
-	},
-	{
-		.pool_size	= PAGE_SIZE,
-	},
-};
+static struct mv_xor_channel_data orion_xor1_channels_data[2];
 
 static struct mv_xor_platform_data orion_xor1_pdata = {
 	.channels = orion_xor1_channels_data,
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index fc983bf38438..ec741b4607e2 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -603,7 +603,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
 	int idx;
 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
 	struct mv_xor_desc_slot *slot = NULL;
-	int num_descs_in_pool = mv_chan->pool_size/MV_XOR_SLOT_SIZE;
+	int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE;
 
 	/* Allocate descriptor slots */
 	idx = mv_chan->slots_allocated;
@@ -1074,7 +1074,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
 
 	dma_async_device_unregister(&mv_chan->dmadev);
 
-	dma_free_coherent(dev, mv_chan->pool_size,
+	dma_free_coherent(dev, MV_XOR_POOL_SIZE,
 			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
 
 	list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels,
@@ -1088,8 +1088,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
 static struct mv_xor_chan *
 mv_xor_channel_add(struct mv_xor_device *xordev,
 		   struct platform_device *pdev,
-		   int idx, dma_cap_mask_t cap_mask,
-		   size_t pool_size, int irq)
+		   int idx, dma_cap_mask_t cap_mask, int irq)
 {
 	int ret = 0;
 	struct mv_xor_chan *mv_chan;
@@ -1109,9 +1108,8 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 	 * note: writecombine gives slightly better performance, but
 	 * requires that we explicitly flush the writes
 	 */
-	mv_chan->pool_size = pool_size;
 	mv_chan->dma_desc_pool_virt =
-	  dma_alloc_writecombine(&pdev->dev, mv_chan->pool_size,
+	  dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE,
 				 &mv_chan->dma_desc_pool, GFP_KERNEL);
 	if (!mv_chan->dma_desc_pool_virt)
 		return ERR_PTR(-ENOMEM);
@@ -1193,7 +1191,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 	return mv_chan;
 
  err_free_dma:
-	dma_free_coherent(&pdev->dev, pool_size,
+	dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE,
 			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
 	return ERR_PTR(ret);
 }
@@ -1296,8 +1294,7 @@ static int mv_xor_probe(struct platform_device *pdev)
 
 			xordev->channels[i] =
 				mv_xor_channel_add(xordev, pdev, i,
-						   cd->cap_mask,
-						   cd->pool_size, irq);
+						   cd->cap_mask, irq);
 			if (IS_ERR(xordev->channels[i])) {
 				ret = PTR_ERR(xordev->channels[i]);
 				goto err_channel_add;
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index dab9f30e564a..698b4487b348 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 
 #define USE_TIMER
+#define MV_XOR_POOL_SIZE		PAGE_SIZE
 #define MV_XOR_SLOT_SIZE		64
 #define MV_XOR_THRESHOLD		1
 #define MV_XOR_MAX_CHANNELS             2
diff --git a/include/linux/platform_data/dma-mv_xor.h b/include/linux/platform_data/dma-mv_xor.h
index b18dc2496186..8ec18f64e396 100644
--- a/include/linux/platform_data/dma-mv_xor.h
+++ b/include/linux/platform_data/dma-mv_xor.h
@@ -14,7 +14,6 @@
 
 struct mv_xor_channel_data {
 	dma_cap_mask_t			cap_mask;
-	size_t				pool_size;
 };
 
 struct mv_xor_platform_data {
-- 
cgit v1.2.3


From ae72ae676045274c82f3c25159a9dd7cfcf5ffae Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 11:02:55 -0500
Subject: NFSv4.1: Don't confuse CREATE_SESSION arguments and results

Don't store the target request and response sizes in the same
variables used to store the server's replies to those targets.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c         | 30 +++++++++++++++++-------------
 include/linux/nfs_fs_sb.h |  3 +++
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a32d953b08de..3e572dc316e4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5807,8 +5807,8 @@ void nfs4_destroy_session(struct nfs4_session *session)
 static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
 {
 	struct nfs4_session *session = args->client->cl_session;
-	unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz,
-		     mxresp_sz = session->fc_attrs.max_resp_sz;
+	unsigned int mxrqst_sz = session->fc_target_max_rqst_sz,
+		     mxresp_sz = session->fc_target_max_resp_sz;
 
 	if (mxrqst_sz == 0)
 		mxrqst_sz = NFS_MAX_FILE_IO_SIZE;
@@ -6015,24 +6015,28 @@ int nfs4_init_session(struct nfs_server *server)
 {
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_session *session;
-	unsigned int rsize, wsize;
+	unsigned int target_max_rqst_sz = NFS_MAX_FILE_IO_SIZE;
+	unsigned int target_max_resp_sz = NFS_MAX_FILE_IO_SIZE;
 
 	if (!nfs4_has_session(clp))
 		return 0;
 
+	if (server->rsize != 0)
+		target_max_resp_sz = server->rsize;
+	target_max_resp_sz += nfs41_maxread_overhead;
+
+	if (server->wsize != 0)
+		target_max_rqst_sz = server->wsize;
+	target_max_rqst_sz += nfs41_maxwrite_overhead;
+
 	session = clp->cl_session;
 	spin_lock(&clp->cl_lock);
 	if (test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) {
-
-		rsize = server->rsize;
-		if (rsize == 0)
-			rsize = NFS_MAX_FILE_IO_SIZE;
-		wsize = server->wsize;
-		if (wsize == 0)
-			wsize = NFS_MAX_FILE_IO_SIZE;
-
-		session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
-		session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
+		/* Initialise targets and channel attributes */
+		session->fc_target_max_rqst_sz = target_max_rqst_sz;
+		session->fc_attrs.max_rqst_sz = target_max_rqst_sz;
+		session->fc_target_max_resp_sz = target_max_resp_sz;
+		session->fc_attrs.max_resp_sz = target_max_resp_sz;
 	}
 	spin_unlock(&clp->cl_lock);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a9e76ee1adca..97c8f9191880 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -242,6 +242,9 @@ struct nfs4_session {
 	struct nfs4_channel_attrs	bc_attrs;
 	struct nfs4_slot_table		bc_slot_table;
 	struct nfs_client		*clp;
+	/* Create session arguments */
+	unsigned int			fc_target_max_rqst_sz;
+	unsigned int			fc_target_max_resp_sz;
 };
 
 #endif /* CONFIG_NFS_V4 */
-- 
cgit v1.2.3


From 933602e368c4452260c9bff4fbb3baba35cf987a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Nov 2012 12:12:38 -0500
Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving sr_renewal_time

Store the renewal time inside the session slot instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 15 +++++++++------
 include/linux/nfs_xdr.h |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5e5cc5a5065f..14b39742b6e4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -486,6 +486,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
+	struct nfs4_slot *slot;
 	unsigned long timestamp;
 	struct nfs_client *clp;
 
@@ -502,12 +503,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 	if (!RPC_WAS_SENT(task))
 		goto out;
 
+	slot = res->sr_slot;
+
 	/* Check the SEQUENCE operation status */
 	switch (res->sr_status) {
 	case 0:
 		/* Update the slot's sequence and clientid lease timer */
-		++res->sr_slot->seq_nr;
-		timestamp = res->sr_renewal_time;
+		++slot->seq_nr;
+		timestamp = slot->renewal_time;
 		clp = res->sr_session->clp;
 		do_renew_lease(clp, timestamp);
 		/* Check sequence flags */
@@ -521,12 +524,12 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 */
 		dprintk("%s: slot=%td seq=%d: Operation in progress\n",
 			__func__,
-			res->sr_slot - res->sr_session->fc_slot_table.slots,
-			res->sr_slot->seq_nr);
+			slot - res->sr_session->fc_slot_table.slots,
+			slot->seq_nr);
 		goto out_retry;
 	default:
 		/* Just update the slot sequence no. */
-		++res->sr_slot->seq_nr;
+		++slot->seq_nr;
 	}
 out:
 	/* The session may be reset by one of the error handlers. */
@@ -637,6 +640,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 
 	rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
 	slot = tbl->slots + slotid;
+	slot->renewal_time = jiffies;
 	args->sa_session = session;
 	args->sa_slotid = slotid;
 
@@ -644,7 +648,6 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 
 	res->sr_session = session;
 	res->sr_slot = slot;
-	res->sr_renewal_time = jiffies;
 	res->sr_status_flags = 0;
 	/*
 	 * sr_status is only set in decode_sequence, and so will remain
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a73ea89789d1..9cb1c63a70c2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -187,6 +187,7 @@ struct nfs4_channel_attrs {
 
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
+	unsigned long		renewal_time;
 	u32		 	seq_nr;
 };
 
@@ -200,7 +201,6 @@ struct nfs4_sequence_res {
 	struct nfs4_session	*sr_session;
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
-	unsigned long		sr_renewal_time;
 	u32			sr_status_flags;
 };
 
-- 
cgit v1.2.3


From 22a8578fca5a47e643bb4f70c232d0ec84db9e4e Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <elezegarcia@gmail.com>
Date: Sat, 10 Nov 2012 13:08:20 -0300
Subject: mtd: mtd_blkdevs: Replace request handler kthread with a workqueue

By replacing a kthread with a workqueue, the code is now a bit clearer.
There's also a slight reduction of code size (numbers apply for x86):
Before:
   text	   data	    bss	    dec	    hex	filename
   3248	     36	      0	   3284	    cd4	drivers/mtd/mtd_blkdevs.o

After:
   text	   data	    bss	    dec	    hex	filename
   3150	     36	      0	   3186	    c72	drivers/mtd/mtd_blkdevs.o

Due to lack of real hardware, tests have been performed on an emulated
environment with mtdswap and mtdblock over nandsim devices.
Some real testing should be done, before merging this patch.

Signed-off-by: Ezequiel Garcia <elezegarcia@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/mtd_blkdevs.c    | 47 ++++++++++++++------------------------------
 include/linux/mtd/blktrans.h |  4 +++-
 2 files changed, 18 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 26ef2a72bdae..5ad39bb5ab4c 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,7 +32,6 @@
 #include <linux/hdreg.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
-#include <linux/kthread.h>
 #include <asm/uaccess.h>
 
 #include "mtdcore.h"
@@ -121,16 +120,14 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev)
 {
-	if (kthread_should_stop())
-		return 1;
-
 	return dev->bg_stop;
 }
 EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background);
 
-static int mtd_blktrans_thread(void *arg)
+static void mtd_blktrans_work(struct work_struct *work)
 {
-	struct mtd_blktrans_dev *dev = arg;
+	struct mtd_blktrans_dev *dev =
+		container_of(work, struct mtd_blktrans_dev, work);
 	struct mtd_blktrans_ops *tr = dev->tr;
 	struct request_queue *rq = dev->rq;
 	struct request *req = NULL;
@@ -138,7 +135,7 @@ static int mtd_blktrans_thread(void *arg)
 
 	spin_lock_irq(rq->queue_lock);
 
-	while (!kthread_should_stop()) {
+	while (1) {
 		int res;
 
 		dev->bg_stop = false;
@@ -156,15 +153,7 @@ static int mtd_blktrans_thread(void *arg)
 				background_done = !dev->bg_stop;
 				continue;
 			}
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			if (kthread_should_stop())
-				set_current_state(TASK_RUNNING);
-
-			spin_unlock_irq(rq->queue_lock);
-			schedule();
-			spin_lock_irq(rq->queue_lock);
-			continue;
+			break;
 		}
 
 		spin_unlock_irq(rq->queue_lock);
@@ -185,8 +174,6 @@ static int mtd_blktrans_thread(void *arg)
 		__blk_end_request_all(req, -EIO);
 
 	spin_unlock_irq(rq->queue_lock);
-
-	return 0;
 }
 
 static void mtd_blktrans_request(struct request_queue *rq)
@@ -199,10 +186,8 @@ static void mtd_blktrans_request(struct request_queue *rq)
 	if (!dev)
 		while ((req = blk_fetch_request(rq)) != NULL)
 			__blk_end_request_all(req, -ENODEV);
-	else {
-		dev->bg_stop = true;
-		wake_up_process(dev->thread);
-	}
+	else
+		queue_work(dev->wq, &dev->work);
 }
 
 static int blktrans_open(struct block_device *bdev, fmode_t mode)
@@ -437,14 +422,13 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 
 	gd->queue = new->rq;
 
-	/* Create processing thread */
-	/* TODO: workqueue ? */
-	new->thread = kthread_run(mtd_blktrans_thread, new,
-			"%s%d", tr->name, new->mtd->index);
-	if (IS_ERR(new->thread)) {
-		ret = PTR_ERR(new->thread);
+	/* Create processing workqueue */
+	new->wq = alloc_workqueue("%s%d", 0, 0,
+				  tr->name, new->mtd->index);
+	if (!new->wq)
 		goto error4;
-	}
+	INIT_WORK(&new->work, mtd_blktrans_work);
+
 	gd->driverfs_dev = &new->mtd->dev;
 
 	if (new->readonly)
@@ -484,9 +468,8 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 	/* Stop new requests to arrive */
 	del_gendisk(old->disk);
 
-
-	/* Stop the thread */
-	kthread_stop(old->thread);
+	/* Stop workqueue. This will perform any pending request. */
+	destroy_workqueue(old->wq);
 
 	/* Kill current requests */
 	spin_lock_irqsave(&old->queue_lock, flags);
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index ed270bd2e4df..4eb0a50d0c55 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/kref.h>
 #include <linux/sysfs.h>
+#include <linux/workqueue.h>
 
 struct hd_geometry;
 struct mtd_info;
@@ -43,7 +44,8 @@ struct mtd_blktrans_dev {
 	struct kref ref;
 	struct gendisk *disk;
 	struct attribute_group *disk_attributes;
-	struct task_struct *thread;
+	struct workqueue_struct *wq;
+	struct work_struct work;
 	struct request_queue *rq;
 	spinlock_t queue_lock;
 	void *priv;
-- 
cgit v1.2.3


From edd7eabc85e2f8d76a933b9639bebfe7f98861e4 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Wed, 14 Nov 2012 21:09:28 +0530
Subject: mfd: Add TI TPS80031 mfd core driver

TPS80031/ TPS80032 Fully Integrated Power Management with Power
Path and Battery Charger. The device provides five configurable
step-down converters, 11 general purpose LDOs, USB OTG Module,
ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with
Power Path from USB, 32K clock generator.

Add the mfd core driver for TPS80031/TPS80032.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviwed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig          |  14 +
 drivers/mfd/Makefile         |   1 +
 drivers/mfd/tps80031.c       | 573 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps80031.h | 637 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1225 insertions(+)
 create mode 100644 drivers/mfd/tps80031.c
 create mode 100644 include/linux/mfd/tps80031.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 59359a7a2493..ca633df7c330 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -265,6 +265,20 @@ config MFD_TPS65912_SPI
 	  If you say yes here you get support for the TPS65912 series of
 	  PM chips with SPI interface.
 
+config MFD_TPS80031
+	bool "TI TPS80031/TPS80032 Power Management chips"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	select IRQ_DOMAIN
+	help
+	  If you say yes here you get support for the Texas Instruments
+	  TPS80031/ TPS80032 Fully Integrated Power Management with Power
+	  Path and Battery Charger. The device provides five configurable
+	  step-down converters, 11 general purpose LDOs, USB OTG Module,
+	  ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with
+	  Power Path from USB, 32K clock generator.
+
 config MENELAUS
 	bool "Texas Instruments TWL92330/Menelaus PM chip"
 	depends on I2C=y && ARCH_OMAP2
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 1c3ee7c76906..8072460e99d2 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -61,6 +61,7 @@ tps65912-objs                   := tps65912-core.o tps65912-irq.o
 obj-$(CONFIG_MFD_TPS65912)	+= tps65912.o
 obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
 obj-$(CONFIG_MFD_TPS65912_SPI)  += tps65912-spi.o
+obj-$(CONFIG_MFD_TPS80031)	+= tps80031.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o twl6030-irq.o
diff --git a/drivers/mfd/tps80031.c b/drivers/mfd/tps80031.c
new file mode 100644
index 000000000000..f64005efa6fa
--- /dev/null
+++ b/drivers/mfd/tps80031.c
@@ -0,0 +1,573 @@
+/*
+ * tps80031.c -- TI TPS80031/TPS80032 mfd core driver.
+ *
+ * MFD core driver for TI TPS80031/TPS80032 Fully Integrated
+ * Power Management with Power Path and Battery Charger
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/tps80031.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+static struct resource tps80031_rtc_resources[] = {
+	{
+		.start = TPS80031_INT_RTC_ALARM,
+		.end = TPS80031_INT_RTC_ALARM,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/* TPS80031 sub mfd devices */
+static struct mfd_cell tps80031_cell[] = {
+	{
+		.name = "tps80031-pmic",
+	},
+	{
+		.name = "tps80031-clock",
+	},
+	{
+		.name = "tps80031-rtc",
+		.num_resources = ARRAY_SIZE(tps80031_rtc_resources),
+		.resources = tps80031_rtc_resources,
+	},
+	{
+		.name = "tps80031-gpadc",
+	},
+	{
+		.name = "tps80031-fuel-gauge",
+	},
+	{
+		.name = "tps80031-charger",
+	},
+};
+
+static int tps80031_slave_address[TPS80031_NUM_SLAVES] = {
+	TPS80031_I2C_ID0_ADDR,
+	TPS80031_I2C_ID1_ADDR,
+	TPS80031_I2C_ID2_ADDR,
+	TPS80031_I2C_ID3_ADDR,
+};
+
+struct tps80031_pupd_data {
+	u8	reg;
+	u8	pullup_bit;
+	u8	pulldown_bit;
+};
+
+#define TPS80031_IRQ(_reg, _mask)			\
+	{							\
+		.reg_offset = (TPS80031_INT_MSK_LINE_##_reg) -	\
+				TPS80031_INT_MSK_LINE_A,	\
+		.mask = BIT(_mask),				\
+	}
+
+static const struct regmap_irq tps80031_main_irqs[] = {
+	[TPS80031_INT_PWRON]		= TPS80031_IRQ(A, 0),
+	[TPS80031_INT_RPWRON]		= TPS80031_IRQ(A, 1),
+	[TPS80031_INT_SYS_VLOW]		= TPS80031_IRQ(A, 2),
+	[TPS80031_INT_RTC_ALARM]	= TPS80031_IRQ(A, 3),
+	[TPS80031_INT_RTC_PERIOD]	= TPS80031_IRQ(A, 4),
+	[TPS80031_INT_HOT_DIE]		= TPS80031_IRQ(A, 5),
+	[TPS80031_INT_VXX_SHORT]	= TPS80031_IRQ(A, 6),
+	[TPS80031_INT_SPDURATION]	= TPS80031_IRQ(A, 7),
+	[TPS80031_INT_WATCHDOG]		= TPS80031_IRQ(B, 0),
+	[TPS80031_INT_BAT]		= TPS80031_IRQ(B, 1),
+	[TPS80031_INT_SIM]		= TPS80031_IRQ(B, 2),
+	[TPS80031_INT_MMC]		= TPS80031_IRQ(B, 3),
+	[TPS80031_INT_RES]		= TPS80031_IRQ(B, 4),
+	[TPS80031_INT_GPADC_RT]		= TPS80031_IRQ(B, 5),
+	[TPS80031_INT_GPADC_SW2_EOC]	= TPS80031_IRQ(B, 6),
+	[TPS80031_INT_CC_AUTOCAL]	= TPS80031_IRQ(B, 7),
+	[TPS80031_INT_ID_WKUP]		= TPS80031_IRQ(C, 0),
+	[TPS80031_INT_VBUSS_WKUP]	= TPS80031_IRQ(C, 1),
+	[TPS80031_INT_ID]		= TPS80031_IRQ(C, 2),
+	[TPS80031_INT_VBUS]		= TPS80031_IRQ(C, 3),
+	[TPS80031_INT_CHRG_CTRL]	= TPS80031_IRQ(C, 4),
+	[TPS80031_INT_EXT_CHRG]		= TPS80031_IRQ(C, 5),
+	[TPS80031_INT_INT_CHRG]		= TPS80031_IRQ(C, 6),
+	[TPS80031_INT_RES2]		= TPS80031_IRQ(C, 7),
+};
+
+static struct regmap_irq_chip tps80031_irq_chip = {
+	.name = "tps80031",
+	.irqs = tps80031_main_irqs,
+	.num_irqs = ARRAY_SIZE(tps80031_main_irqs),
+	.num_regs = 3,
+	.status_base = TPS80031_INT_STS_A,
+	.mask_base = TPS80031_INT_MSK_LINE_A,
+};
+
+#define PUPD_DATA(_reg, _pulldown_bit, _pullup_bit)	\
+	{						\
+		.reg = TPS80031_CFG_INPUT_PUPD##_reg,	\
+		.pulldown_bit = _pulldown_bit,		\
+		.pullup_bit = _pullup_bit,		\
+	}
+
+static const struct tps80031_pupd_data tps80031_pupds[] = {
+	[TPS80031_PREQ1]		= PUPD_DATA(1, BIT(0),	BIT(1)),
+	[TPS80031_PREQ2A]		= PUPD_DATA(1, BIT(2),	BIT(3)),
+	[TPS80031_PREQ2B]		= PUPD_DATA(1, BIT(4),	BIT(5)),
+	[TPS80031_PREQ2C]		= PUPD_DATA(1, BIT(6),	BIT(7)),
+	[TPS80031_PREQ3]		= PUPD_DATA(2, BIT(0),	BIT(1)),
+	[TPS80031_NRES_WARM]		= PUPD_DATA(2, 0,	BIT(2)),
+	[TPS80031_PWM_FORCE]		= PUPD_DATA(2, BIT(5),	0),
+	[TPS80031_CHRG_EXT_CHRG_STATZ]	= PUPD_DATA(2, 0,	BIT(6)),
+	[TPS80031_SIM]			= PUPD_DATA(3, BIT(0),	BIT(1)),
+	[TPS80031_MMC]			= PUPD_DATA(3, BIT(2),	BIT(3)),
+	[TPS80031_GPADC_START]		= PUPD_DATA(3, BIT(4),	0),
+	[TPS80031_DVSI2C_SCL]		= PUPD_DATA(4, 0,	BIT(0)),
+	[TPS80031_DVSI2C_SDA]		= PUPD_DATA(4, 0,	BIT(1)),
+	[TPS80031_CTLI2C_SCL]		= PUPD_DATA(4, 0,	BIT(2)),
+	[TPS80031_CTLI2C_SDA]		= PUPD_DATA(4, 0,	BIT(3)),
+};
+static struct tps80031 *tps80031_power_off_dev;
+
+int tps80031_ext_power_req_config(struct device *dev,
+		unsigned long ext_ctrl_flag, int preq_bit,
+		int state_reg_add, int trans_reg_add)
+{
+	u8 res_ass_reg = 0;
+	int preq_mask_bit = 0;
+	int ret;
+
+	if (!(ext_ctrl_flag & TPS80031_EXT_PWR_REQ))
+		return 0;
+
+	if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ1) {
+		res_ass_reg = TPS80031_PREQ1_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 5;
+	} else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ2) {
+		res_ass_reg = TPS80031_PREQ2_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 6;
+	} else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ3) {
+		res_ass_reg = TPS80031_PREQ3_RES_ASS_A + (preq_bit >> 3);
+		preq_mask_bit = 7;
+	}
+
+	/* Configure REQ_ASS registers */
+	ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1, res_ass_reg,
+					BIT(preq_bit & 0x7));
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x setbit failed, err = %d\n",
+				res_ass_reg, ret);
+		return ret;
+	}
+
+	/* Unmask the PREQ */
+	ret = tps80031_clr_bits(dev, TPS80031_SLAVE_ID1,
+			TPS80031_PHOENIX_MSK_TRANSITION, BIT(preq_mask_bit));
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x clrbit failed, err = %d\n",
+			TPS80031_PHOENIX_MSK_TRANSITION, ret);
+		return ret;
+	}
+
+	/* Switch regulator control to resource now */
+	if (ext_ctrl_flag & (TPS80031_PWR_REQ_INPUT_PREQ2 |
+					TPS80031_PWR_REQ_INPUT_PREQ3)) {
+		ret = tps80031_update(dev, TPS80031_SLAVE_ID1, state_reg_add,
+						0x0, TPS80031_STATE_MASK);
+		if (ret < 0)
+			dev_err(dev, "reg 0x%02x update failed, err = %d\n",
+				state_reg_add, ret);
+	} else {
+		ret = tps80031_update(dev, TPS80031_SLAVE_ID1, trans_reg_add,
+				TPS80031_TRANS_SLEEP_OFF,
+				TPS80031_TRANS_SLEEP_MASK);
+		if (ret < 0)
+			dev_err(dev, "reg 0x%02x update failed, err = %d\n",
+				trans_reg_add, ret);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tps80031_ext_power_req_config);
+
+static void tps80031_power_off(void)
+{
+	dev_info(tps80031_power_off_dev->dev, "switching off PMU\n");
+	tps80031_write(tps80031_power_off_dev->dev, TPS80031_SLAVE_ID1,
+				TPS80031_PHOENIX_DEV_ON, TPS80031_DEVOFF);
+}
+
+static void tps80031_pupd_init(struct tps80031 *tps80031,
+			       struct tps80031_platform_data *pdata)
+{
+	struct tps80031_pupd_init_data *pupd_init_data = pdata->pupd_init_data;
+	int data_size = pdata->pupd_init_data_size;
+	int i;
+
+	for (i = 0; i < data_size; ++i) {
+		struct tps80031_pupd_init_data *pupd_init = &pupd_init_data[i];
+		const struct tps80031_pupd_data *pupd =
+			&tps80031_pupds[pupd_init->input_pin];
+		u8 update_value = 0;
+		u8 update_mask = pupd->pulldown_bit | pupd->pullup_bit;
+
+		if (pupd_init->setting == TPS80031_PUPD_PULLDOWN)
+			update_value = pupd->pulldown_bit;
+		else if (pupd_init->setting == TPS80031_PUPD_PULLUP)
+			update_value = pupd->pullup_bit;
+
+		tps80031_update(tps80031->dev, TPS80031_SLAVE_ID1, pupd->reg,
+				update_value, update_mask);
+	}
+}
+
+static int tps80031_init_ext_control(struct tps80031 *tps80031,
+			struct tps80031_platform_data *pdata)
+{
+	struct device *dev = tps80031->dev;
+	int ret;
+	int i;
+
+	/* Clear all external control for this rail */
+	for (i = 0; i < 9; ++i) {
+		ret = tps80031_write(dev, TPS80031_SLAVE_ID1,
+				TPS80031_PREQ1_RES_ASS_A + i, 0);
+		if (ret < 0) {
+			dev_err(dev, "reg 0x%02x write failed, err = %d\n",
+				TPS80031_PREQ1_RES_ASS_A + i, ret);
+			return ret;
+		}
+	}
+
+	/* Mask the PREQ */
+	ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1,
+			TPS80031_PHOENIX_MSK_TRANSITION, 0x7 << 5);
+	if (ret < 0) {
+		dev_err(dev, "reg 0x%02x set_bits failed, err = %d\n",
+			TPS80031_PHOENIX_MSK_TRANSITION, ret);
+		return ret;
+	}
+	return ret;
+}
+
+static int __devinit tps80031_irq_init(struct tps80031 *tps80031, int irq,
+				int irq_base)
+{
+	struct device *dev = tps80031->dev;
+	int i, ret;
+
+	/*
+	 * The MASK register used for updating status register when
+	 * interrupt occurs and LINE register used to pass the status
+	 * to actual interrupt line.  As per datasheet:
+	 * When INT_MSK_LINE [i] is set to 1, the associated interrupt
+	 * number i is INT line masked, which means that no interrupt is
+	 * generated on the INT line.
+	 * When INT_MSK_LINE [i] is set to 0, the associated interrupt
+	 * number i is  line enabled: An interrupt is generated on the
+	 * INT line.
+	 * In any case, the INT_STS [i] status bit may or may not be updated,
+	 * only linked to the INT_MSK_STS [i] configuration register bit.
+	 *
+	 * When INT_MSK_STS [i] is set to 1, the associated interrupt number
+	 * i is status masked, which means that no interrupt is stored in
+	 * the INT_STS[i] status bit. Note that no interrupt number i is
+	 * generated on the INT line, even if the INT_MSK_LINE [i] register
+	 * bit is set to 0.
+	 * When INT_MSK_STS [i] is set to 0, the associated interrupt number i
+	 * is status enabled: An interrupt status is updated in the INT_STS [i]
+	 * register. The interrupt may or may not be generated on the INT line,
+	 * depending on the INT_MSK_LINE [i] configuration register bit.
+	 */
+	for (i = 0; i < 3; i++)
+		tps80031_write(dev, TPS80031_SLAVE_ID2,
+				TPS80031_INT_MSK_STS_A + i, 0x00);
+
+	ret = regmap_add_irq_chip(tps80031->regmap[TPS80031_SLAVE_ID2], irq,
+			IRQF_ONESHOT, irq_base,
+			&tps80031_irq_chip, &tps80031->irq_data);
+	if (ret < 0) {
+		dev_err(dev, "add irq failed, err = %d\n", ret);
+		return ret;
+	}
+	return ret;
+}
+
+static bool rd_wr_reg_id0(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SMPS1_CFG_FORCE ... TPS80031_SMPS2_CFG_VOLTAGE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id1(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SECONDS_REG ... TPS80031_RTC_RESET_STATUS_REG:
+	case TPS80031_VALIDITY0 ... TPS80031_VALIDITY7:
+	case TPS80031_PHOENIX_START_CONDITION ... TPS80031_KEY_PRESS_DUR_CFG:
+	case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE:
+	case TPS80031_BROADCAST_ADDR_ALL ... TPS80031_BROADCAST_ADDR_CLK_RST:
+	case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE:
+	case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE:
+	case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C:
+	case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING:
+	case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD:
+	case TPS80031_BACKUP_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_volatile_reg_id1(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE:
+	case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE:
+	case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE:
+	case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C:
+	case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING:
+	case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id2(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_USB_VENDOR_ID_LSB ... TPS80031_USB_OTG_REVISION:
+	case TPS80031_GPADC_CTRL ... TPS80031_CTRL_P1:
+	case TPS80031_RTCH0_LSB ... TPS80031_GPCH0_MSB:
+	case TPS80031_TOGGLE1 ... TPS80031_VIBMODE:
+	case TPS80031_PWM1ON ... TPS80031_PWM2OFF:
+	case TPS80031_FG_REG_00 ... TPS80031_FG_REG_11:
+	case TPS80031_INT_STS_A ... TPS80031_INT_MSK_STS_C:
+	case TPS80031_CONTROLLER_CTRL2 ... TPS80031_LED_PWM_CTRL2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rd_wr_reg_id3(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case TPS80031_GPADC_TRIM0 ... TPS80031_GPADC_TRIM18:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config tps80031_regmap_configs[] = {
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id0,
+		.readable_reg = rd_wr_reg_id0,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id1,
+		.readable_reg = rd_wr_reg_id1,
+		.volatile_reg = is_volatile_reg_id1,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id2,
+		.readable_reg = rd_wr_reg_id2,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+	{
+		.reg_bits = 8,
+		.val_bits = 8,
+		.writeable_reg = rd_wr_reg_id3,
+		.readable_reg = rd_wr_reg_id3,
+		.max_register = TPS80031_MAX_REGISTER,
+	},
+};
+
+static int __devinit tps80031_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct tps80031_platform_data *pdata = client->dev.platform_data;
+	struct tps80031 *tps80031;
+	int ret;
+	uint8_t es_version;
+	uint8_t ep_ver;
+	int i;
+
+	if (!pdata) {
+		dev_err(&client->dev, "tps80031 requires platform data\n");
+		return -EINVAL;
+	}
+
+	tps80031 = devm_kzalloc(&client->dev, sizeof(*tps80031), GFP_KERNEL);
+	if (!tps80031) {
+		dev_err(&client->dev, "Malloc failed for tps80031\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031_slave_address[i] == client->addr)
+			tps80031->clients[i] = client;
+		else
+			tps80031->clients[i] = i2c_new_dummy(client->adapter,
+						tps80031_slave_address[i]);
+		if (!tps80031->clients[i]) {
+			dev_err(&client->dev, "can't attach client %d\n", i);
+			ret = -ENOMEM;
+			goto fail_client_reg;
+		}
+
+		i2c_set_clientdata(tps80031->clients[i], tps80031);
+		tps80031->regmap[i] = devm_regmap_init_i2c(tps80031->clients[i],
+					&tps80031_regmap_configs[i]);
+		if (IS_ERR(tps80031->regmap[i])) {
+			ret = PTR_ERR(tps80031->regmap[i]);
+			dev_err(&client->dev,
+				"regmap %d init failed, err %d\n", i, ret);
+			goto fail_client_reg;
+		}
+	}
+
+	ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
+			TPS80031_JTAGVERNUM, &es_version);
+	if (ret < 0) {
+		dev_err(&client->dev,
+			"Silicon version number read failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
+			TPS80031_EPROM_REV, &ep_ver);
+	if (ret < 0) {
+		dev_err(&client->dev,
+			"Silicon eeprom version read failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	dev_info(&client->dev, "ES version 0x%02x and EPROM version 0x%02x\n",
+					es_version, ep_ver);
+	tps80031->es_version = es_version;
+	tps80031->dev = &client->dev;
+	i2c_set_clientdata(client, tps80031);
+	tps80031->chip_info = id->driver_data;
+
+	ret = tps80031_irq_init(tps80031, client->irq, pdata->irq_base);
+	if (ret) {
+		dev_err(&client->dev, "IRQ init failed: %d\n", ret);
+		goto fail_client_reg;
+	}
+
+	tps80031_pupd_init(tps80031, pdata);
+
+	tps80031_init_ext_control(tps80031, pdata);
+
+	ret = mfd_add_devices(tps80031->dev, -1,
+			tps80031_cell, ARRAY_SIZE(tps80031_cell),
+			NULL, 0,
+			regmap_irq_get_domain(tps80031->irq_data));
+	if (ret < 0) {
+		dev_err(&client->dev, "mfd_add_devices failed: %d\n", ret);
+		goto fail_mfd_add;
+	}
+
+	if (pdata->use_power_off && !pm_power_off) {
+		tps80031_power_off_dev = tps80031;
+		pm_power_off = tps80031_power_off;
+	}
+	return 0;
+
+fail_mfd_add:
+	regmap_del_irq_chip(client->irq, tps80031->irq_data);
+
+fail_client_reg:
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031->clients[i]  && (tps80031->clients[i] != client))
+			i2c_unregister_device(tps80031->clients[i]);
+	}
+	return ret;
+}
+
+static int __devexit tps80031_remove(struct i2c_client *client)
+{
+	struct tps80031 *tps80031 = i2c_get_clientdata(client);
+	int i;
+
+	if (tps80031_power_off_dev == tps80031) {
+		tps80031_power_off_dev = NULL;
+		pm_power_off = NULL;
+	}
+
+	mfd_remove_devices(tps80031->dev);
+
+	regmap_del_irq_chip(client->irq, tps80031->irq_data);
+
+	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
+		if (tps80031->clients[i] != client)
+			i2c_unregister_device(tps80031->clients[i]);
+	}
+	return 0;
+}
+
+static const struct i2c_device_id tps80031_id_table[] = {
+	{ "tps80031", TPS80031 },
+	{ "tps80032", TPS80032 },
+};
+MODULE_DEVICE_TABLE(i2c, tps80031_id_table);
+
+static struct i2c_driver tps80031_driver = {
+	.driver	= {
+		.name	= "tps80031",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= tps80031_probe,
+	.remove		= __devexit_p(tps80031_remove),
+	.id_table	= tps80031_id_table,
+};
+
+static int __init tps80031_init(void)
+{
+	return i2c_add_driver(&tps80031_driver);
+}
+subsys_initcall(tps80031_init);
+
+static void __exit tps80031_exit(void)
+{
+	i2c_del_driver(&tps80031_driver);
+}
+module_exit(tps80031_exit);
+
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_DESCRIPTION("TPS80031 core driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/tps80031.h b/include/linux/mfd/tps80031.h
new file mode 100644
index 000000000000..2c75c9c9318f
--- /dev/null
+++ b/include/linux/mfd/tps80031.h
@@ -0,0 +1,637 @@
+/*
+ * tps80031.h -- TI TPS80031 and TI TPS80032 PMIC driver.
+ *
+ * Copyright (c) 2012, NVIDIA Corporation.
+ *
+ * Author: Laxman Dewangan <ldewangan@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
+ * whether express or implied; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307, USA
+ */
+
+#ifndef __LINUX_MFD_TPS80031_H
+#define __LINUX_MFD_TPS80031_H
+
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+/* Pull-ups/Pull-downs */
+#define TPS80031_CFG_INPUT_PUPD1			0xF0
+#define TPS80031_CFG_INPUT_PUPD2			0xF1
+#define TPS80031_CFG_INPUT_PUPD3			0xF2
+#define TPS80031_CFG_INPUT_PUPD4			0xF3
+#define TPS80031_CFG_LDO_PD1				0xF4
+#define TPS80031_CFG_LDO_PD2				0xF5
+#define TPS80031_CFG_SMPS_PD				0xF6
+
+/* Real Time Clock */
+#define TPS80031_SECONDS_REG				0x00
+#define TPS80031_MINUTES_REG				0x01
+#define TPS80031_HOURS_REG				0x02
+#define TPS80031_DAYS_REG				0x03
+#define TPS80031_MONTHS_REG				0x04
+#define TPS80031_YEARS_REG				0x05
+#define TPS80031_WEEKS_REG				0x06
+#define TPS80031_ALARM_SECONDS_REG			0x08
+#define TPS80031_ALARM_MINUTES_REG			0x09
+#define TPS80031_ALARM_HOURS_REG			0x0A
+#define TPS80031_ALARM_DAYS_REG				0x0B
+#define TPS80031_ALARM_MONTHS_REG			0x0C
+#define TPS80031_ALARM_YEARS_REG			0x0D
+#define TPS80031_RTC_CTRL_REG				0x10
+#define TPS80031_RTC_STATUS_REG				0x11
+#define TPS80031_RTC_INTERRUPTS_REG			0x12
+#define TPS80031_RTC_COMP_LSB_REG			0x13
+#define TPS80031_RTC_COMP_MSB_REG			0x14
+#define TPS80031_RTC_RESET_STATUS_REG			0x16
+
+/*PMC Master Module */
+#define TPS80031_PHOENIX_START_CONDITION		0x1F
+#define TPS80031_PHOENIX_MSK_TRANSITION			0x20
+#define TPS80031_STS_HW_CONDITIONS			0x21
+#define TPS80031_PHOENIX_LAST_TURNOFF_STS		0x22
+#define TPS80031_VSYSMIN_LO_THRESHOLD			0x23
+#define TPS80031_VSYSMIN_HI_THRESHOLD			0x24
+#define TPS80031_PHOENIX_DEV_ON				0x25
+#define TPS80031_STS_PWR_GRP_STATE			0x27
+#define TPS80031_PH_CFG_VSYSLOW				0x28
+#define TPS80031_PH_STS_BOOT				0x29
+#define TPS80031_PHOENIX_SENS_TRANSITION		0x2A
+#define TPS80031_PHOENIX_SEQ_CFG			0x2B
+#define TPS80031_PRIMARY_WATCHDOG_CFG			0X2C
+#define TPS80031_KEY_PRESS_DUR_CFG			0X2D
+#define TPS80031_SMPS_LDO_SHORT_STS			0x2E
+
+/* PMC Slave Module - Broadcast */
+#define TPS80031_BROADCAST_ADDR_ALL			0x31
+#define TPS80031_BROADCAST_ADDR_REF			0x32
+#define TPS80031_BROADCAST_ADDR_PROV			0x33
+#define TPS80031_BROADCAST_ADDR_CLK_RST			0x34
+
+/* PMC Slave Module  SMPS Regulators */
+#define TPS80031_SMPS4_CFG_TRANS			0x41
+#define TPS80031_SMPS4_CFG_STATE			0x42
+#define TPS80031_SMPS4_CFG_VOLTAGE			0x44
+#define TPS80031_VIO_CFG_TRANS				0x47
+#define TPS80031_VIO_CFG_STATE				0x48
+#define TPS80031_VIO_CFG_FORCE				0x49
+#define TPS80031_VIO_CFG_VOLTAGE			0x4A
+#define TPS80031_VIO_CFG_STEP				0x48
+#define TPS80031_SMPS1_CFG_TRANS			0x53
+#define TPS80031_SMPS1_CFG_STATE			0x54
+#define TPS80031_SMPS1_CFG_FORCE			0x55
+#define TPS80031_SMPS1_CFG_VOLTAGE			0x56
+#define TPS80031_SMPS1_CFG_STEP				0x57
+#define TPS80031_SMPS2_CFG_TRANS			0x59
+#define TPS80031_SMPS2_CFG_STATE			0x5A
+#define TPS80031_SMPS2_CFG_FORCE			0x5B
+#define TPS80031_SMPS2_CFG_VOLTAGE			0x5C
+#define TPS80031_SMPS2_CFG_STEP				0x5D
+#define TPS80031_SMPS3_CFG_TRANS			0x65
+#define TPS80031_SMPS3_CFG_STATE			0x66
+#define TPS80031_SMPS3_CFG_VOLTAGE			0x68
+
+/* PMC Slave Module  LDO Regulators */
+#define TPS80031_VANA_CFG_TRANS				0x81
+#define TPS80031_VANA_CFG_STATE				0x82
+#define TPS80031_VANA_CFG_VOLTAGE			0x83
+#define TPS80031_LDO2_CFG_TRANS				0x85
+#define TPS80031_LDO2_CFG_STATE				0x86
+#define TPS80031_LDO2_CFG_VOLTAGE			0x87
+#define TPS80031_LDO4_CFG_TRANS				0x89
+#define TPS80031_LDO4_CFG_STATE				0x8A
+#define TPS80031_LDO4_CFG_VOLTAGE			0x8B
+#define TPS80031_LDO3_CFG_TRANS				0x8D
+#define TPS80031_LDO3_CFG_STATE				0x8E
+#define TPS80031_LDO3_CFG_VOLTAGE			0x8F
+#define TPS80031_LDO6_CFG_TRANS				0x91
+#define TPS80031_LDO6_CFG_STATE				0x92
+#define TPS80031_LDO6_CFG_VOLTAGE			0x93
+#define TPS80031_LDOLN_CFG_TRANS			0x95
+#define TPS80031_LDOLN_CFG_STATE			0x96
+#define TPS80031_LDOLN_CFG_VOLTAGE			0x97
+#define TPS80031_LDO5_CFG_TRANS				0x99
+#define TPS80031_LDO5_CFG_STATE				0x9A
+#define TPS80031_LDO5_CFG_VOLTAGE			0x9B
+#define TPS80031_LDO1_CFG_TRANS				0x9D
+#define TPS80031_LDO1_CFG_STATE				0x9E
+#define TPS80031_LDO1_CFG_VOLTAGE			0x9F
+#define TPS80031_LDOUSB_CFG_TRANS			0xA1
+#define TPS80031_LDOUSB_CFG_STATE			0xA2
+#define TPS80031_LDOUSB_CFG_VOLTAGE			0xA3
+#define TPS80031_LDO7_CFG_TRANS				0xA5
+#define TPS80031_LDO7_CFG_STATE				0xA6
+#define TPS80031_LDO7_CFG_VOLTAGE			0xA7
+
+/* PMC Slave Module  External Control */
+#define TPS80031_REGEN1_CFG_TRANS			0xAE
+#define TPS80031_REGEN1_CFG_STATE			0xAF
+#define TPS80031_REGEN2_CFG_TRANS			0xB1
+#define TPS80031_REGEN2_CFG_STATE			0xB2
+#define TPS80031_SYSEN_CFG_TRANS			0xB4
+#define TPS80031_SYSEN_CFG_STATE			0xB5
+
+/* PMC Slave Module  Internal Control */
+#define TPS80031_NRESPWRON_CFG_TRANS			0xB7
+#define TPS80031_NRESPWRON_CFG_STATE			0xB8
+#define TPS80031_CLK32KAO_CFG_TRANS			0xBA
+#define TPS80031_CLK32KAO_CFG_STATE			0xBB
+#define TPS80031_CLK32KG_CFG_TRANS			0xBD
+#define TPS80031_CLK32KG_CFG_STATE			0xBE
+#define TPS80031_CLK32KAUDIO_CFG_TRANS			0xC0
+#define TPS80031_CLK32KAUDIO_CFG_STATE			0xC1
+#define TPS80031_VRTC_CFG_TRANS				0xC3
+#define TPS80031_VRTC_CFG_STATE				0xC4
+#define TPS80031_BIAS_CFG_TRANS				0xC6
+#define TPS80031_BIAS_CFG_STATE				0xC7
+#define TPS80031_VSYSMIN_HI_CFG_TRANS			0xC9
+#define TPS80031_VSYSMIN_HI_CFG_STATE			0xCA
+#define TPS80031_RC6MHZ_CFG_TRANS			0xCC
+#define TPS80031_RC6MHZ_CFG_STATE			0xCD
+#define TPS80031_TMP_CFG_TRANS				0xCF
+#define TPS80031_TMP_CFG_STATE				0xD0
+
+/* PMC Slave Module  resources assignment */
+#define TPS80031_PREQ1_RES_ASS_A			0xD7
+#define TPS80031_PREQ1_RES_ASS_B			0xD8
+#define TPS80031_PREQ1_RES_ASS_C			0xD9
+#define TPS80031_PREQ2_RES_ASS_A			0xDA
+#define TPS80031_PREQ2_RES_ASS_B			0xDB
+#define TPS80031_PREQ2_RES_ASS_C			0xDC
+#define TPS80031_PREQ3_RES_ASS_A			0xDD
+#define TPS80031_PREQ3_RES_ASS_B			0xDE
+#define TPS80031_PREQ3_RES_ASS_C			0xDF
+
+/* PMC Slave Module  Miscellaneous */
+#define TPS80031_SMPS_OFFSET				0xE0
+#define TPS80031_SMPS_MULT				0xE3
+#define TPS80031_MISC1					0xE4
+#define TPS80031_MISC2					0xE5
+#define TPS80031_BBSPOR_CFG				0xE6
+#define TPS80031_TMP_CFG				0xE7
+
+/* Battery Charging Controller and Indicator LED */
+#define TPS80031_CONTROLLER_CTRL2			0xDA
+#define TPS80031_CONTROLLER_VSEL_COMP			0xDB
+#define TPS80031_CHARGERUSB_VSYSREG			0xDC
+#define TPS80031_CHARGERUSB_VICHRG_PC			0xDD
+#define TPS80031_LINEAR_CHRG_STS			0xDE
+#define TPS80031_CONTROLLER_INT_MASK			0xE0
+#define TPS80031_CONTROLLER_CTRL1			0xE1
+#define TPS80031_CONTROLLER_WDG				0xE2
+#define TPS80031_CONTROLLER_STAT1			0xE3
+#define TPS80031_CHARGERUSB_INT_STATUS			0xE4
+#define TPS80031_CHARGERUSB_INT_MASK			0xE5
+#define TPS80031_CHARGERUSB_STATUS_INT1			0xE6
+#define TPS80031_CHARGERUSB_STATUS_INT2			0xE7
+#define TPS80031_CHARGERUSB_CTRL1			0xE8
+#define TPS80031_CHARGERUSB_CTRL2			0xE9
+#define TPS80031_CHARGERUSB_CTRL3			0xEA
+#define TPS80031_CHARGERUSB_STAT1			0xEB
+#define TPS80031_CHARGERUSB_VOREG			0xEC
+#define TPS80031_CHARGERUSB_VICHRG			0xED
+#define TPS80031_CHARGERUSB_CINLIMIT			0xEE
+#define TPS80031_CHARGERUSB_CTRLLIMIT1			0xEF
+#define TPS80031_CHARGERUSB_CTRLLIMIT2			0xF0
+#define TPS80031_LED_PWM_CTRL1				0xF4
+#define TPS80031_LED_PWM_CTRL2				0xF5
+
+/* USB On-The-Go  */
+#define TPS80031_BACKUP_REG				0xFA
+#define TPS80031_USB_VENDOR_ID_LSB			0x00
+#define TPS80031_USB_VENDOR_ID_MSB			0x01
+#define TPS80031_USB_PRODUCT_ID_LSB			0x02
+#define TPS80031_USB_PRODUCT_ID_MSB			0x03
+#define TPS80031_USB_VBUS_CTRL_SET			0x04
+#define TPS80031_USB_VBUS_CTRL_CLR			0x05
+#define TPS80031_USB_ID_CTRL_SET			0x06
+#define TPS80031_USB_ID_CTRL_CLR			0x07
+#define TPS80031_USB_VBUS_INT_SRC			0x08
+#define TPS80031_USB_VBUS_INT_LATCH_SET			0x09
+#define TPS80031_USB_VBUS_INT_LATCH_CLR			0x0A
+#define TPS80031_USB_VBUS_INT_EN_LO_SET			0x0B
+#define TPS80031_USB_VBUS_INT_EN_LO_CLR			0x0C
+#define TPS80031_USB_VBUS_INT_EN_HI_SET			0x0D
+#define TPS80031_USB_VBUS_INT_EN_HI_CLR			0x0E
+#define TPS80031_USB_ID_INT_SRC				0x0F
+#define TPS80031_USB_ID_INT_LATCH_SET			0x10
+#define TPS80031_USB_ID_INT_LATCH_CLR			0x11
+#define TPS80031_USB_ID_INT_EN_LO_SET			0x12
+#define TPS80031_USB_ID_INT_EN_LO_CLR			0x13
+#define TPS80031_USB_ID_INT_EN_HI_SET			0x14
+#define TPS80031_USB_ID_INT_EN_HI_CLR			0x15
+#define TPS80031_USB_OTG_ADP_CTRL			0x16
+#define TPS80031_USB_OTG_ADP_HIGH			0x17
+#define TPS80031_USB_OTG_ADP_LOW			0x18
+#define TPS80031_USB_OTG_ADP_RISE			0x19
+#define TPS80031_USB_OTG_REVISION			0x1A
+
+/* Gas Gauge */
+#define TPS80031_FG_REG_00				0xC0
+#define TPS80031_FG_REG_01				0xC1
+#define TPS80031_FG_REG_02				0xC2
+#define TPS80031_FG_REG_03				0xC3
+#define TPS80031_FG_REG_04				0xC4
+#define TPS80031_FG_REG_05				0xC5
+#define TPS80031_FG_REG_06				0xC6
+#define TPS80031_FG_REG_07				0xC7
+#define TPS80031_FG_REG_08				0xC8
+#define TPS80031_FG_REG_09				0xC9
+#define TPS80031_FG_REG_10				0xCA
+#define TPS80031_FG_REG_11				0xCB
+
+/* General Purpose ADC */
+#define TPS80031_GPADC_CTRL				0x2E
+#define TPS80031_GPADC_CTRL2				0x2F
+#define TPS80031_RTSELECT_LSB				0x32
+#define TPS80031_RTSELECT_ISB				0x33
+#define TPS80031_RTSELECT_MSB				0x34
+#define TPS80031_GPSELECT_ISB				0x35
+#define TPS80031_CTRL_P1				0x36
+#define TPS80031_RTCH0_LSB				0x37
+#define TPS80031_RTCH0_MSB				0x38
+#define TPS80031_RTCH1_LSB				0x39
+#define TPS80031_RTCH1_MSB				0x3A
+#define TPS80031_GPCH0_LSB				0x3B
+#define TPS80031_GPCH0_MSB				0x3C
+
+/* SIM, MMC and Battery Detection */
+#define TPS80031_SIMDEBOUNCING				0xEB
+#define TPS80031_SIMCTRL				0xEC
+#define TPS80031_MMCDEBOUNCING				0xED
+#define TPS80031_MMCCTRL				0xEE
+#define TPS80031_BATDEBOUNCING				0xEF
+
+/* Vibrator Driver and PWMs */
+#define TPS80031_VIBCTRL				0x9B
+#define TPS80031_VIBMODE				0x9C
+#define TPS80031_PWM1ON					0xBA
+#define TPS80031_PWM1OFF				0xBB
+#define TPS80031_PWM2ON					0xBD
+#define TPS80031_PWM2OFF				0xBE
+
+/* Control Interface */
+#define TPS80031_INT_STS_A				0xD0
+#define TPS80031_INT_STS_B				0xD1
+#define TPS80031_INT_STS_C				0xD2
+#define TPS80031_INT_MSK_LINE_A				0xD3
+#define TPS80031_INT_MSK_LINE_B				0xD4
+#define TPS80031_INT_MSK_LINE_C				0xD5
+#define TPS80031_INT_MSK_STS_A				0xD6
+#define TPS80031_INT_MSK_STS_B				0xD7
+#define TPS80031_INT_MSK_STS_C				0xD8
+#define TPS80031_TOGGLE1				0x90
+#define TPS80031_TOGGLE2				0x91
+#define TPS80031_TOGGLE3				0x92
+#define TPS80031_PWDNSTATUS1				0x93
+#define TPS80031_PWDNSTATUS2				0x94
+#define TPS80031_VALIDITY0				0x17
+#define TPS80031_VALIDITY1				0x18
+#define TPS80031_VALIDITY2				0x19
+#define TPS80031_VALIDITY3				0x1A
+#define TPS80031_VALIDITY4				0x1B
+#define TPS80031_VALIDITY5				0x1C
+#define TPS80031_VALIDITY6				0x1D
+#define TPS80031_VALIDITY7				0x1E
+
+/* Version number related register */
+#define TPS80031_JTAGVERNUM				0x87
+#define TPS80031_EPROM_REV				0xDF
+
+/* GPADC Trimming Bits. */
+#define TPS80031_GPADC_TRIM0				0xCC
+#define TPS80031_GPADC_TRIM1				0xCD
+#define TPS80031_GPADC_TRIM2				0xCE
+#define TPS80031_GPADC_TRIM3				0xCF
+#define TPS80031_GPADC_TRIM4				0xD0
+#define TPS80031_GPADC_TRIM5				0xD1
+#define TPS80031_GPADC_TRIM6				0xD2
+#define TPS80031_GPADC_TRIM7				0xD3
+#define TPS80031_GPADC_TRIM8				0xD4
+#define TPS80031_GPADC_TRIM9				0xD5
+#define TPS80031_GPADC_TRIM10				0xD6
+#define TPS80031_GPADC_TRIM11				0xD7
+#define TPS80031_GPADC_TRIM12				0xD8
+#define TPS80031_GPADC_TRIM13				0xD9
+#define TPS80031_GPADC_TRIM14				0xDA
+#define TPS80031_GPADC_TRIM15				0xDB
+#define TPS80031_GPADC_TRIM16				0xDC
+#define TPS80031_GPADC_TRIM17				0xDD
+#define TPS80031_GPADC_TRIM18				0xDE
+
+/* TPS80031_CONTROLLER_STAT1 bit fields */
+#define TPS80031_CONTROLLER_STAT1_BAT_TEMP		0
+#define TPS80031_CONTROLLER_STAT1_BAT_REMOVED		1
+#define TPS80031_CONTROLLER_STAT1_VBUS_DET		2
+#define TPS80031_CONTROLLER_STAT1_VAC_DET		3
+#define TPS80031_CONTROLLER_STAT1_FAULT_WDG		4
+#define TPS80031_CONTROLLER_STAT1_LINCH_GATED		6
+/* TPS80031_CONTROLLER_INT_MASK bit filed */
+#define TPS80031_CONTROLLER_INT_MASK_MVAC_DET		0
+#define TPS80031_CONTROLLER_INT_MASK_MVBUS_DET		1
+#define TPS80031_CONTROLLER_INT_MASK_MBAT_TEMP		2
+#define TPS80031_CONTROLLER_INT_MASK_MFAULT_WDG		3
+#define TPS80031_CONTROLLER_INT_MASK_MBAT_REMOVED	4
+#define TPS80031_CONTROLLER_INT_MASK_MLINCH_GATED	5
+
+#define TPS80031_CHARGE_CONTROL_SUB_INT_MASK		0x3F
+
+/* TPS80031_PHOENIX_DEV_ON bit field */
+#define TPS80031_DEVOFF					0x1
+
+#define TPS80031_EXT_CONTROL_CFG_TRANS			0
+#define TPS80031_EXT_CONTROL_CFG_STATE			1
+
+/* State register field */
+#define TPS80031_STATE_OFF				0x00
+#define TPS80031_STATE_ON				0x01
+#define TPS80031_STATE_MASK				0x03
+
+/* Trans register field */
+#define TPS80031_TRANS_ACTIVE_OFF			0x00
+#define TPS80031_TRANS_ACTIVE_ON			0x01
+#define TPS80031_TRANS_ACTIVE_MASK			0x03
+#define TPS80031_TRANS_SLEEP_OFF			0x00
+#define TPS80031_TRANS_SLEEP_ON				0x04
+#define TPS80031_TRANS_SLEEP_MASK			0x0C
+#define TPS80031_TRANS_OFF_OFF				0x00
+#define TPS80031_TRANS_OFF_ACTIVE			0x10
+#define TPS80031_TRANS_OFF_MASK				0x30
+
+#define TPS80031_EXT_PWR_REQ		(TPS80031_PWR_REQ_INPUT_PREQ1 | \
+					TPS80031_PWR_REQ_INPUT_PREQ2 | \
+					TPS80031_PWR_REQ_INPUT_PREQ3)
+
+/* TPS80031_BBSPOR_CFG bit field */
+#define TPS80031_BBSPOR_CHG_EN				0x8
+#define TPS80031_MAX_REGISTER				0xFF
+
+struct i2c_client;
+
+/* Supported chips */
+enum chips {
+	TPS80031 = 0x00000001,
+	TPS80032 = 0x00000002,
+};
+
+enum {
+	TPS80031_INT_PWRON,
+	TPS80031_INT_RPWRON,
+	TPS80031_INT_SYS_VLOW,
+	TPS80031_INT_RTC_ALARM,
+	TPS80031_INT_RTC_PERIOD,
+	TPS80031_INT_HOT_DIE,
+	TPS80031_INT_VXX_SHORT,
+	TPS80031_INT_SPDURATION,
+	TPS80031_INT_WATCHDOG,
+	TPS80031_INT_BAT,
+	TPS80031_INT_SIM,
+	TPS80031_INT_MMC,
+	TPS80031_INT_RES,
+	TPS80031_INT_GPADC_RT,
+	TPS80031_INT_GPADC_SW2_EOC,
+	TPS80031_INT_CC_AUTOCAL,
+	TPS80031_INT_ID_WKUP,
+	TPS80031_INT_VBUSS_WKUP,
+	TPS80031_INT_ID,
+	TPS80031_INT_VBUS,
+	TPS80031_INT_CHRG_CTRL,
+	TPS80031_INT_EXT_CHRG,
+	TPS80031_INT_INT_CHRG,
+	TPS80031_INT_RES2,
+	TPS80031_INT_BAT_TEMP_OVRANGE,
+	TPS80031_INT_BAT_REMOVED,
+	TPS80031_INT_VBUS_DET,
+	TPS80031_INT_VAC_DET,
+	TPS80031_INT_FAULT_WDG,
+	TPS80031_INT_LINCH_GATED,
+
+	/* Last interrupt id to get the end number */
+	TPS80031_INT_NR,
+};
+
+/* TPS80031 Slave IDs */
+#define TPS80031_NUM_SLAVES				4
+#define TPS80031_SLAVE_ID0				0
+#define TPS80031_SLAVE_ID1				1
+#define TPS80031_SLAVE_ID2				2
+#define TPS80031_SLAVE_ID3				3
+
+/* TPS80031 I2C addresses */
+#define TPS80031_I2C_ID0_ADDR				0x12
+#define TPS80031_I2C_ID1_ADDR				0x48
+#define TPS80031_I2C_ID2_ADDR				0x49
+#define TPS80031_I2C_ID3_ADDR				0x4A
+
+enum {
+	TPS80031_REGULATOR_VIO,
+	TPS80031_REGULATOR_SMPS1,
+	TPS80031_REGULATOR_SMPS2,
+	TPS80031_REGULATOR_SMPS3,
+	TPS80031_REGULATOR_SMPS4,
+	TPS80031_REGULATOR_VANA,
+	TPS80031_REGULATOR_LDO1,
+	TPS80031_REGULATOR_LDO2,
+	TPS80031_REGULATOR_LDO3,
+	TPS80031_REGULATOR_LDO4,
+	TPS80031_REGULATOR_LDO5,
+	TPS80031_REGULATOR_LDO6,
+	TPS80031_REGULATOR_LDO7,
+	TPS80031_REGULATOR_LDOLN,
+	TPS80031_REGULATOR_LDOUSB,
+	TPS80031_REGULATOR_VBUS,
+	TPS80031_REGULATOR_REGEN1,
+	TPS80031_REGULATOR_REGEN2,
+	TPS80031_REGULATOR_SYSEN,
+	TPS80031_REGULATOR_MAX,
+};
+
+/* Different configurations for the rails */
+enum {
+	/* USBLDO input selection */
+	TPS80031_USBLDO_INPUT_VSYS		= 0x00000001,
+	TPS80031_USBLDO_INPUT_PMID		= 0x00000002,
+
+	/* LDO3 output mode */
+	TPS80031_LDO3_OUTPUT_VIB		= 0x00000004,
+
+	/* VBUS configuration */
+	TPS80031_VBUS_DISCHRG_EN_PDN		= 0x00000004,
+	TPS80031_VBUS_SW_ONLY			= 0x00000008,
+	TPS80031_VBUS_SW_N_ID			= 0x00000010,
+};
+
+/* External controls requests */
+enum tps80031_ext_control {
+	TPS80031_PWR_REQ_INPUT_NONE		= 0x00000000,
+	TPS80031_PWR_REQ_INPUT_PREQ1		= 0x00000001,
+	TPS80031_PWR_REQ_INPUT_PREQ2		= 0x00000002,
+	TPS80031_PWR_REQ_INPUT_PREQ3		= 0x00000004,
+	TPS80031_PWR_OFF_ON_SLEEP		= 0x00000008,
+	TPS80031_PWR_ON_ON_SLEEP		= 0x00000010,
+};
+
+enum tps80031_pupd_pins {
+	TPS80031_PREQ1 = 0,
+	TPS80031_PREQ2A,
+	TPS80031_PREQ2B,
+	TPS80031_PREQ2C,
+	TPS80031_PREQ3,
+	TPS80031_NRES_WARM,
+	TPS80031_PWM_FORCE,
+	TPS80031_CHRG_EXT_CHRG_STATZ,
+	TPS80031_SIM,
+	TPS80031_MMC,
+	TPS80031_GPADC_START,
+	TPS80031_DVSI2C_SCL,
+	TPS80031_DVSI2C_SDA,
+	TPS80031_CTLI2C_SCL,
+	TPS80031_CTLI2C_SDA,
+};
+
+enum tps80031_pupd_settings {
+	TPS80031_PUPD_NORMAL,
+	TPS80031_PUPD_PULLDOWN,
+	TPS80031_PUPD_PULLUP,
+};
+
+struct tps80031 {
+	struct device		*dev;
+	unsigned long		chip_info;
+	int			es_version;
+	struct i2c_client	*clients[TPS80031_NUM_SLAVES];
+	struct regmap		*regmap[TPS80031_NUM_SLAVES];
+	struct regmap_irq_chip_data *irq_data;
+};
+
+struct tps80031_pupd_init_data {
+	int input_pin;
+	int setting;
+};
+
+/*
+ * struct tps80031_regulator_platform_data - tps80031 regulator platform data.
+ *
+ * @reg_init_data: The regulator init data.
+ * @ext_ctrl_flag: External control flag for sleep/power request control.
+ * @config_flags: Configuration flag to configure the rails.
+ *		  It should be ORed of config enums.
+ */
+
+struct tps80031_regulator_platform_data {
+	struct regulator_init_data *reg_init_data;
+	unsigned int ext_ctrl_flag;
+	unsigned int config_flags;
+};
+
+struct tps80031_platform_data {
+	int irq_base;
+	bool use_power_off;
+	struct tps80031_pupd_init_data *pupd_init_data;
+	int pupd_init_data_size;
+	struct tps80031_regulator_platform_data
+			*regulator_pdata[TPS80031_REGULATOR_MAX];
+};
+
+static inline int tps80031_write(struct device *dev, int sid,
+		int reg, uint8_t val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_write(tps80031->regmap[sid], reg, val);
+}
+
+static inline int tps80031_writes(struct device *dev, int sid, int reg,
+		int len, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_bulk_write(tps80031->regmap[sid], reg, val, len);
+}
+
+static inline int tps80031_read(struct device *dev, int sid,
+		int reg, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+	unsigned int ival;
+	int ret;
+
+	ret = regmap_read(tps80031->regmap[sid], reg, &ival);
+	if (ret < 0) {
+		dev_err(dev, "failed reading from reg 0x%02x\n", reg);
+		return ret;
+	}
+
+	*val = ival;
+	return ret;
+}
+
+static inline int tps80031_reads(struct device *dev, int sid,
+		int reg, int len, uint8_t *val)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_bulk_read(tps80031->regmap[sid], reg, val, len);
+}
+
+static inline int tps80031_set_bits(struct device *dev, int sid,
+		int reg, uint8_t bit_mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg,
+				bit_mask, bit_mask);
+}
+
+static inline int tps80031_clr_bits(struct device *dev, int sid,
+		int reg, uint8_t bit_mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg, bit_mask, 0);
+}
+
+static inline int tps80031_update(struct device *dev, int sid,
+		int reg, uint8_t val, uint8_t mask)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps80031->regmap[sid], reg, mask, val);
+}
+
+static inline unsigned long tps80031_get_chip_info(struct device *dev)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return tps80031->chip_info;
+}
+
+static inline int tps80031_get_pmu_version(struct device *dev)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return tps80031->es_version;
+}
+
+static inline int tps80031_irq_get_virq(struct device *dev, int irq)
+{
+	struct tps80031 *tps80031 = dev_get_drvdata(dev);
+
+	return regmap_irq_get_virq(tps80031->irq_data, irq);
+}
+
+extern int tps80031_ext_power_req_config(struct device *dev,
+		unsigned long ext_ctrl_flag, int preq_bit,
+		int state_reg_add, int trans_reg_add);
+#endif /*__LINUX_MFD_TPS80031_H */
-- 
cgit v1.2.3


From 3863db3e800c64e21e4effcc3de0f72cdb9b0d77 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 20 Nov 2012 08:44:47 +0530
Subject: mfd: tps65090: Remove unused member of struct tps65090

Remove unused member from tps65090 data structure as
these are not used.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65090.c       |  6 +-----
 include/linux/mfd/tps65090.h | 11 -----------
 2 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index f95f7f6b846c..3cfc9dcbe9dc 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -269,12 +269,9 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 		return -ENOMEM;
 	}
 
-	tps65090->client = client;
 	tps65090->dev = &client->dev;
 	i2c_set_clientdata(client, tps65090);
 
-	mutex_init(&tps65090->lock);
-
 	if (client->irq) {
 		ret = tps65090_irq_init(tps65090, client->irq, pdata->irq_base);
 		if (ret) {
@@ -284,8 +281,7 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 		}
 	}
 
-	tps65090->rmap = devm_regmap_init_i2c(tps65090->client,
-					      &tps65090_regmap_config);
+	tps65090->rmap = devm_regmap_init_i2c(client, &tps65090_regmap_config);
 	if (IS_ERR(tps65090->rmap)) {
 		ret = PTR_ERR(tps65090->rmap);
 		dev_err(&client->dev, "regmap_init failed with err: %d\n", ret);
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 6bc31d854626..6c576224f637 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -25,26 +25,15 @@
 #include <linux/irq.h>
 
 struct tps65090 {
-	struct mutex		lock;
 	struct device		*dev;
-	struct i2c_client	*client;
 	struct regmap		*rmap;
 	struct irq_chip		irq_chip;
 	struct mutex		irq_lock;
 	int			irq_base;
-	unsigned int		id;
-};
-
-struct tps65090_subdev_info {
-	int		id;
-	const char	*name;
-	void		*platform_data;
 };
 
 struct tps65090_platform_data {
 	int irq_base;
-	int num_subdevs;
-	struct tps65090_subdev_info *subdevs;
 };
 
 /*
-- 
cgit v1.2.3


From b9c79323166530a14c1fa8c10337eeaa54e3f98d Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 20 Nov 2012 08:44:48 +0530
Subject: mfd: tps65090: Move register access APIs to header

Since tps65090 register is accessed via regmap, moving
the register access APIs to header and making it as inline.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65090.c       | 34 ----------------------------------
 include/linux/mfd/tps65090.h | 39 +++++++++++++++++++++++++++++++++++----
 2 files changed, 35 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 3cfc9dcbe9dc..355a07749454 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -25,7 +25,6 @@
 #include <linux/i2c.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps65090.h>
-#include <linux/regmap.h>
 #include <linux/err.h>
 
 #define NUM_INT_REG 2
@@ -78,39 +77,6 @@ static struct mfd_cell tps65090s[] = {
 	},
 };
 
-int tps65090_write(struct device *dev, int reg, uint8_t val)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_write(tps->rmap, reg, val);
-}
-EXPORT_SYMBOL_GPL(tps65090_write);
-
-int tps65090_read(struct device *dev, int reg, uint8_t *val)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	unsigned int temp_val;
-	int ret;
-	ret = regmap_read(tps->rmap, reg, &temp_val);
-	if (!ret)
-		*val = temp_val;
-	return ret;
-}
-EXPORT_SYMBOL_GPL(tps65090_read);
-
-int tps65090_set_bits(struct device *dev, int reg, uint8_t bit_num)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), ~0u);
-}
-EXPORT_SYMBOL_GPL(tps65090_set_bits);
-
-int tps65090_clr_bits(struct device *dev, int reg, uint8_t bit_num)
-{
-	struct tps65090 *tps = dev_get_drvdata(dev);
-	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), 0u);
-}
-EXPORT_SYMBOL_GPL(tps65090_clr_bits);
-
 static void tps65090_irq_lock(struct irq_data *data)
 {
 	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data);
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 6c576224f637..1a5f916b7383 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -23,6 +23,7 @@
 #define __LINUX_MFD_TPS65090_H
 
 #include <linux/irq.h>
+#include <linux/regmap.h>
 
 struct tps65090 {
 	struct device		*dev;
@@ -40,9 +41,39 @@ struct tps65090_platform_data {
  * NOTE: the functions below are not intended for use outside
  * of the TPS65090 sub-device drivers
  */
-extern int tps65090_write(struct device *dev, int reg, uint8_t val);
-extern int tps65090_read(struct device *dev, int reg, uint8_t *val);
-extern int tps65090_set_bits(struct device *dev, int reg, uint8_t bit_num);
-extern int tps65090_clr_bits(struct device *dev, int reg, uint8_t bit_num);
+static inline int tps65090_write(struct device *dev, int reg, uint8_t val)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_write(tps->rmap, reg, val);
+}
+
+static inline int tps65090_read(struct device *dev, int reg, uint8_t *val)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+	unsigned int temp_val;
+	int ret;
+
+	ret = regmap_read(tps->rmap, reg, &temp_val);
+	if (!ret)
+		*val = temp_val;
+	return ret;
+}
+
+static inline int tps65090_set_bits(struct device *dev, int reg,
+		uint8_t bit_num)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), ~0u);
+}
+
+static inline int tps65090_clr_bits(struct device *dev, int reg,
+		uint8_t bit_num)
+{
+	struct tps65090 *tps = dev_get_drvdata(dev);
+
+	return regmap_update_bits(tps->rmap, reg, BIT(bit_num), 0u);
+}
 
 #endif /*__LINUX_MFD_TPS65090_H */
-- 
cgit v1.2.3


From 759f2598ef3876637e40d99a4ceb7a3d83a4d8d3 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Tue, 20 Nov 2012 08:44:49 +0530
Subject: mfd: tps65090: Use regmap irq framework for interrupt support

Use the regmap irq framework for implementing TPS65090 interrupt
support in place of implementing it locally.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65090.c       | 263 ++++++++++++++++---------------------------
 include/linux/mfd/tps65090.h |  23 +++-
 2 files changed, 118 insertions(+), 168 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 355a07749454..2eaae52cb5b8 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -38,35 +38,21 @@
 #define TPS65090_INT_MSK	0x2
 #define TPS65090_INT_MSK2	0x3
 
-struct tps65090_irq_data {
-	u8		mask_reg;
-	u8		mask_pos;
-};
-
-#define TPS65090_IRQ(_reg, _mask_pos)		\
-	{					\
-		.mask_reg	= (_reg),	\
-		.mask_pos	= (_mask_pos),	\
-	}
-
-static const struct tps65090_irq_data tps65090_irqs[] = {
-	[0]		= TPS65090_IRQ(0, 0),
-	[1]		= TPS65090_IRQ(0, 1),
-	[2]		= TPS65090_IRQ(0, 2),
-	[3]		= TPS65090_IRQ(0, 3),
-	[4]		= TPS65090_IRQ(0, 4),
-	[5]		= TPS65090_IRQ(0, 5),
-	[6]		= TPS65090_IRQ(0, 6),
-	[7]		= TPS65090_IRQ(0, 7),
-	[8]		= TPS65090_IRQ(1, 0),
-	[9]		= TPS65090_IRQ(1, 1),
-	[10]		= TPS65090_IRQ(1, 2),
-	[11]		= TPS65090_IRQ(1, 3),
-	[12]		= TPS65090_IRQ(1, 4),
-	[13]		= TPS65090_IRQ(1, 5),
-	[14]		= TPS65090_IRQ(1, 6),
-	[15]		= TPS65090_IRQ(1, 7),
-};
+#define TPS65090_INT1_MASK_VAC_STATUS_CHANGE		1
+#define TPS65090_INT1_MASK_VSYS_STATUS_CHANGE		2
+#define TPS65090_INT1_MASK_BAT_STATUS_CHANGE		3
+#define TPS65090_INT1_MASK_CHARGING_STATUS_CHANGE	4
+#define TPS65090_INT1_MASK_CHARGING_COMPLETE		5
+#define TPS65090_INT1_MASK_OVERLOAD_DCDC1		6
+#define TPS65090_INT1_MASK_OVERLOAD_DCDC2		7
+#define TPS65090_INT2_MASK_OVERLOAD_DCDC3		0
+#define TPS65090_INT2_MASK_OVERLOAD_FET1		1
+#define TPS65090_INT2_MASK_OVERLOAD_FET2		2
+#define TPS65090_INT2_MASK_OVERLOAD_FET3		3
+#define TPS65090_INT2_MASK_OVERLOAD_FET4		4
+#define TPS65090_INT2_MASK_OVERLOAD_FET5		5
+#define TPS65090_INT2_MASK_OVERLOAD_FET6		6
+#define TPS65090_INT2_MASK_OVERLOAD_FET7		7
 
 static struct mfd_cell tps65090s[] = {
 	{
@@ -77,132 +63,77 @@ static struct mfd_cell tps65090s[] = {
 	},
 };
 
-static void tps65090_irq_lock(struct irq_data *data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&tps65090->irq_lock);
-}
-
-static void tps65090_irq_mask(struct irq_data *irq_data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->hwirq;
-	const struct tps65090_irq_data *data = &tps65090_irqs[__irq];
-
-	tps65090_set_bits(tps65090->dev, (TPS65090_INT_MSK + data->mask_reg),
-		data->mask_pos);
-}
-
-static void tps65090_irq_unmask(struct irq_data *irq_data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(irq_data);
-	unsigned int __irq = irq_data->irq - tps65090->irq_base;
-	const struct tps65090_irq_data *data = &tps65090_irqs[__irq];
-
-	tps65090_clr_bits(tps65090->dev, (TPS65090_INT_MSK + data->mask_reg),
-		data->mask_pos);
-}
-
-static void tps65090_irq_sync_unlock(struct irq_data *data)
-{
-	struct tps65090 *tps65090 = irq_data_get_irq_chip_data(data);
-
-	mutex_unlock(&tps65090->irq_lock);
-}
-
-static irqreturn_t tps65090_irq(int irq, void *data)
-{
-	struct tps65090 *tps65090 = data;
-	int ret = 0;
-	u8 status, mask;
-	unsigned long int acks = 0;
-	int i;
-
-	for (i = 0; i < NUM_INT_REG; i++) {
-		ret = tps65090_read(tps65090->dev, TPS65090_INT_MSK + i, &mask);
-		if (ret < 0) {
-			dev_err(tps65090->dev,
-				"failed to read mask reg [addr:%d]\n",
-				TPS65090_INT_MSK + i);
-			return IRQ_NONE;
-		}
-		ret = tps65090_read(tps65090->dev, TPS65090_INT_STS + i,
-			&status);
-		if (ret < 0) {
-			dev_err(tps65090->dev,
-				"failed to read status reg [addr:%d]\n",
-				 TPS65090_INT_STS + i);
-			return IRQ_NONE;
-		}
-		if (status) {
-			/* Ack only those interrupts which are not masked */
-			status &= (~mask);
-			ret = tps65090_write(tps65090->dev,
-					TPS65090_INT_STS + i, status);
-			if (ret < 0) {
-				dev_err(tps65090->dev,
-					"failed to write interrupt status\n");
-				return IRQ_NONE;
-			}
-			acks |= (status << (i * 8));
-		}
-	}
-
-	for_each_set_bit(i, &acks, ARRAY_SIZE(tps65090_irqs))
-		handle_nested_irq(tps65090->irq_base + i);
-	return acks ? IRQ_HANDLED : IRQ_NONE;
-}
-
-static int __devinit tps65090_irq_init(struct tps65090 *tps65090, int irq,
-	int irq_base)
-{
-	int i, ret;
-
-	if (!irq_base) {
-		dev_err(tps65090->dev, "IRQ base not set\n");
-		return -EINVAL;
-	}
-
-	mutex_init(&tps65090->irq_lock);
-
-	for (i = 0; i < NUM_INT_REG; i++)
-		tps65090_write(tps65090->dev, TPS65090_INT_MSK + i, 0xFF);
-
-	for (i = 0; i < NUM_INT_REG; i++)
-		tps65090_write(tps65090->dev, TPS65090_INT_STS + i, 0xff);
-
-	tps65090->irq_base = irq_base;
-	tps65090->irq_chip.name = "tps65090";
-	tps65090->irq_chip.irq_mask = tps65090_irq_mask;
-	tps65090->irq_chip.irq_unmask = tps65090_irq_unmask;
-	tps65090->irq_chip.irq_bus_lock = tps65090_irq_lock;
-	tps65090->irq_chip.irq_bus_sync_unlock = tps65090_irq_sync_unlock;
-
-	for (i = 0; i < ARRAY_SIZE(tps65090_irqs); i++) {
-		int __irq = i + tps65090->irq_base;
-		irq_set_chip_data(__irq, tps65090);
-		irq_set_chip_and_handler(__irq, &tps65090->irq_chip,
-					 handle_simple_irq);
-		irq_set_nested_thread(__irq, 1);
-#ifdef CONFIG_ARM
-		set_irq_flags(__irq, IRQF_VALID);
-#endif
-	}
-
-	ret = request_threaded_irq(irq, NULL, tps65090_irq, IRQF_ONESHOT,
-				"tps65090", tps65090);
-	if (!ret) {
-		device_init_wakeup(tps65090->dev, 1);
-		enable_irq_wake(irq);
-	}
+static const struct regmap_irq tps65090_irqs[] = {
+	/* INT1 IRQs*/
+	[TPS65090_IRQ_VAC_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_VAC_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_VSYS_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_VSYS_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_BAT_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_BAT_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_CHARGING_STATUS_CHANGE] = {
+			.mask = TPS65090_INT1_MASK_CHARGING_STATUS_CHANGE,
+	},
+	[TPS65090_IRQ_CHARGING_COMPLETE] = {
+			.mask = TPS65090_INT1_MASK_CHARGING_COMPLETE,
+	},
+	[TPS65090_IRQ_OVERLOAD_DCDC1] = {
+			.mask = TPS65090_INT1_MASK_OVERLOAD_DCDC1,
+	},
+	[TPS65090_IRQ_OVERLOAD_DCDC2] = {
+			.mask = TPS65090_INT1_MASK_OVERLOAD_DCDC2,
+	},
+	/* INT2 IRQs*/
+	[TPS65090_IRQ_OVERLOAD_DCDC3] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_DCDC3,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET1] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET1,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET2] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET2,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET3] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET3,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET4] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET4,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET5] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET5,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET6] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET6,
+	},
+	[TPS65090_IRQ_OVERLOAD_FET7] = {
+			.reg_offset = 1,
+			.mask = TPS65090_INT2_MASK_OVERLOAD_FET7,
+	},
+};
 
-	return ret;
-}
+static struct regmap_irq_chip tps65090_irq_chip = {
+	.name = "tps65090",
+	.irqs = tps65090_irqs,
+	.num_irqs = ARRAY_SIZE(tps65090_irqs),
+	.num_regs = NUM_INT_REG,
+	.status_base = TPS65090_INT_STS,
+	.mask_base = TPS65090_INT_MSK,
+	.mask_invert = true,
+};
 
 static bool is_volatile_reg(struct device *dev, unsigned int reg)
 {
-	if (reg == TPS65090_INT_STS)
+	if ((reg == TPS65090_INT_STS) || (reg == TPS65090_INT_STS2))
 		return true;
 	else
 		return false;
@@ -238,24 +169,27 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 	tps65090->dev = &client->dev;
 	i2c_set_clientdata(client, tps65090);
 
-	if (client->irq) {
-		ret = tps65090_irq_init(tps65090, client->irq, pdata->irq_base);
-		if (ret) {
-			dev_err(&client->dev, "IRQ init failed with err: %d\n",
-				ret);
-			goto err_exit;
-		}
-	}
-
 	tps65090->rmap = devm_regmap_init_i2c(client, &tps65090_regmap_config);
 	if (IS_ERR(tps65090->rmap)) {
 		ret = PTR_ERR(tps65090->rmap);
 		dev_err(&client->dev, "regmap_init failed with err: %d\n", ret);
-		goto err_irq_exit;
+		return ret;
+	}
+
+	if (client->irq) {
+		ret = regmap_add_irq_chip(tps65090->rmap, client->irq,
+			IRQF_ONESHOT | IRQF_TRIGGER_LOW, pdata->irq_base,
+			&tps65090_irq_chip, &tps65090->irq_data);
+			if (ret) {
+				dev_err(&client->dev,
+					"IRQ init failed with err: %d\n", ret);
+			return ret;
+		}
 	}
 
 	ret = mfd_add_devices(tps65090->dev, -1, tps65090s,
-			      ARRAY_SIZE(tps65090s), NULL, 0, NULL);
+		ARRAY_SIZE(tps65090s), NULL,
+		regmap_irq_chip_get_base(tps65090->irq_data), NULL);
 	if (ret) {
 		dev_err(&client->dev, "add mfd devices failed with err: %d\n",
 			ret);
@@ -266,8 +200,7 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 
 err_irq_exit:
 	if (client->irq)
-		free_irq(client->irq, tps65090);
-err_exit:
+		regmap_del_irq_chip(client->irq, tps65090->irq_data);
 	return ret;
 }
 
@@ -277,7 +210,7 @@ static int __devexit tps65090_i2c_remove(struct i2c_client *client)
 
 	mfd_remove_devices(tps65090->dev);
 	if (client->irq)
-		free_irq(client->irq, tps65090);
+		regmap_del_irq_chip(client->irq, tps65090->irq_data);
 
 	return 0;
 }
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 1a5f916b7383..4bbbb1350b91 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -25,12 +25,29 @@
 #include <linux/irq.h>
 #include <linux/regmap.h>
 
+/* TPS65090 IRQs */
+enum {
+	TPS65090_IRQ_VAC_STATUS_CHANGE,
+	TPS65090_IRQ_VSYS_STATUS_CHANGE,
+	TPS65090_IRQ_BAT_STATUS_CHANGE,
+	TPS65090_IRQ_CHARGING_STATUS_CHANGE,
+	TPS65090_IRQ_CHARGING_COMPLETE,
+	TPS65090_IRQ_OVERLOAD_DCDC1,
+	TPS65090_IRQ_OVERLOAD_DCDC2,
+	TPS65090_IRQ_OVERLOAD_DCDC3,
+	TPS65090_IRQ_OVERLOAD_FET1,
+	TPS65090_IRQ_OVERLOAD_FET2,
+	TPS65090_IRQ_OVERLOAD_FET3,
+	TPS65090_IRQ_OVERLOAD_FET4,
+	TPS65090_IRQ_OVERLOAD_FET5,
+	TPS65090_IRQ_OVERLOAD_FET6,
+	TPS65090_IRQ_OVERLOAD_FET7,
+};
+
 struct tps65090 {
 	struct device		*dev;
 	struct regmap		*rmap;
-	struct irq_chip		irq_chip;
-	struct mutex		irq_lock;
-	int			irq_base;
+	struct regmap_irq_chip_data *irq_data;
 };
 
 struct tps65090_platform_data {
-- 
cgit v1.2.3


From c7b76dce8ac95fd464bfae741b830d407884c274 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 18 Nov 2012 18:36:20 +0200
Subject: mfd: Introduce retu-mfd driver

Retu is a multi-function device found on Nokia Internet Tablets
implementing at least watchdog, RTC, headset detection and power button
functionality.

This patch implements minimum functionality providing register access,
IRQ handling and power off functions.

Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig      |   9 ++
 drivers/mfd/Makefile     |   1 +
 drivers/mfd/retu-mfd.c   | 264 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/retu.h |  22 ++++
 4 files changed, 296 insertions(+)
 create mode 100644 drivers/mfd/retu-mfd.c
 create mode 100644 include/linux/mfd/retu.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index ca633df7c330..f5b839b718aa 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1094,6 +1094,15 @@ config MFD_VIPERBOARD
 	  You need to select the mfd cell drivers separately.
 	  The drivers do not support all features the board exposes.
 
+config MFD_RETU
+	tristate "Support for Retu multi-function device"
+	select MFD_CORE
+	depends on I2C
+	select REGMAP_IRQ
+	help
+	  Retu is a multi-function device found on Nokia Internet Tablets
+	  (770, N800 and N810).
+
 endmenu
 endif
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8072460e99d2..2689c8a0d781 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -145,3 +145,4 @@ obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
+obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
diff --git a/drivers/mfd/retu-mfd.c b/drivers/mfd/retu-mfd.c
new file mode 100644
index 000000000000..7ff4a37ab0c0
--- /dev/null
+++ b/drivers/mfd/retu-mfd.c
@@ -0,0 +1,264 @@
+/*
+ * Retu MFD driver
+ *
+ * Copyright (C) 2004, 2005 Nokia Corporation
+ *
+ * Based on code written by Juha Yrjölä, David Weinehall and Mikko Ylinen.
+ * Rewritten by Aaro Koskinen.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/retu.h>
+#include <linux/interrupt.h>
+#include <linux/moduleparam.h>
+
+/* Registers */
+#define RETU_REG_ASICR		0x00		/* ASIC ID and revision */
+#define RETU_REG_ASICR_VILMA	(1 << 7)	/* Bit indicating Vilma */
+#define RETU_REG_IDR		0x01		/* Interrupt ID */
+#define RETU_REG_IMR		0x02		/* Interrupt mask */
+
+/* Interrupt sources */
+#define RETU_INT_PWR		0		/* Power button */
+
+struct retu_dev {
+	struct regmap			*regmap;
+	struct device			*dev;
+	struct mutex			mutex;
+	struct regmap_irq_chip_data	*irq_data;
+};
+
+static struct resource retu_pwrbutton_res[] = {
+	{
+		.name	= "retu-pwrbutton",
+		.start	= RETU_INT_PWR,
+		.end	= RETU_INT_PWR,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell retu_devs[] = {
+	{
+		.name		= "retu-wdt"
+	},
+	{
+		.name		= "retu-pwrbutton",
+		.resources	= retu_pwrbutton_res,
+		.num_resources	= ARRAY_SIZE(retu_pwrbutton_res),
+	}
+};
+
+static struct regmap_irq retu_irqs[] = {
+	[RETU_INT_PWR] = {
+		.mask = 1 << RETU_INT_PWR,
+	}
+};
+
+static struct regmap_irq_chip retu_irq_chip = {
+	.name		= "RETU",
+	.irqs		= retu_irqs,
+	.num_irqs	= ARRAY_SIZE(retu_irqs),
+	.num_regs	= 1,
+	.status_base	= RETU_REG_IDR,
+	.mask_base	= RETU_REG_IMR,
+	.ack_base	= RETU_REG_IDR,
+};
+
+/* Retu device registered for the power off. */
+static struct retu_dev *retu_pm_power_off;
+
+int retu_read(struct retu_dev *rdev, u8 reg)
+{
+	int ret;
+	int value;
+
+	mutex_lock(&rdev->mutex);
+	ret = regmap_read(rdev->regmap, reg, &value);
+	mutex_unlock(&rdev->mutex);
+
+	return ret ? ret : value;
+}
+EXPORT_SYMBOL_GPL(retu_read);
+
+int retu_write(struct retu_dev *rdev, u8 reg, u16 data)
+{
+	int ret;
+
+	mutex_lock(&rdev->mutex);
+	ret = regmap_write(rdev->regmap, reg, data);
+	mutex_unlock(&rdev->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(retu_write);
+
+static void retu_power_off(void)
+{
+	struct retu_dev *rdev = retu_pm_power_off;
+	int reg;
+
+	mutex_lock(&retu_pm_power_off->mutex);
+
+	/* Ignore power button state */
+	regmap_read(rdev->regmap, RETU_REG_CC1, &reg);
+	regmap_write(rdev->regmap, RETU_REG_CC1, reg | 2);
+
+	/* Expire watchdog immediately */
+	regmap_write(rdev->regmap, RETU_REG_WATCHDOG, 0);
+
+	/* Wait for poweroff */
+	for (;;)
+		cpu_relax();
+
+	mutex_unlock(&retu_pm_power_off->mutex);
+}
+
+static int retu_regmap_read(void *context, const void *reg, size_t reg_size,
+			    void *val, size_t val_size)
+{
+	int ret;
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	BUG_ON(reg_size != 1 || val_size != 2);
+
+	ret = i2c_smbus_read_word_data(i2c, *(u8 const *)reg);
+	if (ret < 0)
+		return ret;
+
+	*(u16 *)val = ret;
+	return 0;
+}
+
+static int retu_regmap_write(void *context, const void *data, size_t count)
+{
+	u8 reg;
+	u16 val;
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	BUG_ON(count != sizeof(reg) + sizeof(val));
+	memcpy(&reg, data, sizeof(reg));
+	memcpy(&val, data + sizeof(reg), sizeof(val));
+	return i2c_smbus_write_word_data(i2c, reg, val);
+}
+
+static struct regmap_bus retu_bus = {
+	.read = retu_regmap_read,
+	.write = retu_regmap_write,
+	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
+};
+
+static struct regmap_config retu_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+};
+
+static int __devinit retu_probe(struct i2c_client *i2c,
+				const struct i2c_device_id *id)
+{
+	struct retu_dev *rdev;
+	int ret;
+
+	rdev = devm_kzalloc(&i2c->dev, sizeof(*rdev), GFP_KERNEL);
+	if (rdev == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, rdev);
+	rdev->dev = &i2c->dev;
+	mutex_init(&rdev->mutex);
+	rdev->regmap = devm_regmap_init(&i2c->dev, &retu_bus, &i2c->dev,
+					&retu_config);
+	if (IS_ERR(rdev->regmap))
+		return PTR_ERR(rdev->regmap);
+
+	ret = retu_read(rdev, RETU_REG_ASICR);
+	if (ret < 0) {
+		dev_err(rdev->dev, "could not read Retu revision: %d\n", ret);
+		return ret;
+	}
+
+	dev_info(rdev->dev, "Retu%s v%d.%d found\n",
+		 (ret & RETU_REG_ASICR_VILMA) ? " & Vilma" : "",
+		 (ret >> 4) & 0x7, ret & 0xf);
+
+	/* Mask all RETU interrupts. */
+	ret = retu_write(rdev, RETU_REG_IMR, 0xffff);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_add_irq_chip(rdev->regmap, i2c->irq, IRQF_ONESHOT, -1,
+				  &retu_irq_chip, &rdev->irq_data);
+	if (ret < 0)
+		return ret;
+
+	ret = mfd_add_devices(rdev->dev, -1, retu_devs, ARRAY_SIZE(retu_devs),
+			      NULL, regmap_irq_chip_get_base(rdev->irq_data),
+			      NULL);
+	if (ret < 0) {
+		regmap_del_irq_chip(i2c->irq, rdev->irq_data);
+		return ret;
+	}
+
+	if (!pm_power_off) {
+		retu_pm_power_off = rdev;
+		pm_power_off	  = retu_power_off;
+	}
+
+	return 0;
+}
+
+static int __devexit retu_remove(struct i2c_client *i2c)
+{
+	struct retu_dev *rdev = i2c_get_clientdata(i2c);
+
+	if (retu_pm_power_off == rdev) {
+		pm_power_off	  = NULL;
+		retu_pm_power_off = NULL;
+	}
+	mfd_remove_devices(rdev->dev);
+	regmap_del_irq_chip(i2c->irq, rdev->irq_data);
+
+	return 0;
+}
+
+static const struct i2c_device_id retu_id[] = {
+	{ "retu-mfd", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, retu_id);
+
+static struct i2c_driver retu_driver = {
+	.driver		= {
+		.name = "retu-mfd",
+		.owner = THIS_MODULE,
+	},
+	.probe		= retu_probe,
+	.remove		= retu_remove,
+	.id_table	= retu_id,
+};
+module_i2c_driver(retu_driver);
+
+MODULE_DESCRIPTION("Retu MFD driver");
+MODULE_AUTHOR("Juha Yrjölä");
+MODULE_AUTHOR("David Weinehall");
+MODULE_AUTHOR("Mikko Ylinen");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/retu.h b/include/linux/mfd/retu.h
new file mode 100644
index 000000000000..1e2715d5b836
--- /dev/null
+++ b/include/linux/mfd/retu.h
@@ -0,0 +1,22 @@
+/*
+ * Retu MFD driver interface
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ */
+
+#ifndef __LINUX_MFD_RETU_H
+#define __LINUX_MFD_RETU_H
+
+struct retu_dev;
+
+int retu_read(struct retu_dev *, u8);
+int retu_write(struct retu_dev *, u8, u16);
+
+/* Registers */
+#define RETU_REG_WATCHDOG	0x17		/* Watchdog */
+#define RETU_REG_CC1		0x0d		/* Common control register 1 */
+#define RETU_REG_STATUS		0x16		/* Status register */
+
+#endif /* __LINUX_MFD_RETU_H */
-- 
cgit v1.2.3


From 0e8f1398a388bbaa5ca965711b9ed5ac4794332d Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 13 Nov 2012 09:28:47 +0100
Subject: mfd: twl: Remove unused TWL_MODULE definitions

AUDIO and MADC only available on twl4030 series and the TWL_MODULE_* mapping
is not needed.

Acked-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/i2c/twl.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 9a5e28462324..7278c72479d1 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -73,9 +73,7 @@
 #define TWL4030_MODULE_SECURED_REG	0x17
 
 #define TWL_MODULE_USB		TWL4030_MODULE_USB
-#define TWL_MODULE_AUDIO_VOICE	TWL4030_MODULE_AUDIO_VOICE
 #define TWL_MODULE_PIH		TWL4030_MODULE_PIH
-#define TWL_MODULE_MADC		TWL4030_MODULE_MADC
 #define TWL_MODULE_MAIN_CHARGE	TWL4030_MODULE_MAIN_CHARGE
 #define TWL_MODULE_PM_MASTER	TWL4030_MODULE_PM_MASTER
 #define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
-- 
cgit v1.2.3


From da059ecfc9f9d98556607c6d6db065aa3b7f162d Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 13 Nov 2012 09:28:48 +0100
Subject: mfd: twl: Convert module id definitions to enums

Use enum list for the module definitions (TWL4030_MODULE_*) which will ease
up future work with the IDs.
At the same time group the IDs in block of five so it is easier to find the
ID we are looking for (to count the number they stand for).

At the same time define TWL_MODULE_LED so client drivers can switch to use
it as soon as it is possible.

Acked-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  |  9 +++----
 include/linux/i2c/twl.h | 65 +++++++++++++++++++++++++------------------------
 2 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index d666c9d454cb..bb33b52cfa57 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -66,9 +66,6 @@
 
 /* Triton Core internal information (BEGIN) */
 
-/* Last - for index max*/
-#define TWL4030_MODULE_LAST		TWL4030_MODULE_SECURED_REG
-
 #define TWL_NUM_SLAVES		4
 
 #define SUB_CHIP_ID0 0
@@ -184,7 +181,7 @@ struct twl_mapping {
 };
 static struct twl_mapping *twl_map;
 
-static struct twl_mapping twl4030_map[TWL4030_MODULE_LAST + 1] = {
+static struct twl_mapping twl4030_map[] = {
 	/*
 	 * NOTE:  don't change this table without updating the
 	 * <linux/i2c/twl.h> defines for TWL4030_MODULE_*
@@ -327,7 +324,7 @@ int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 	int sid;
 	struct twl_client *twl;
 
-	if (unlikely(mod_no > TWL_MODULE_LAST)) {
+	if (unlikely(mod_no >= TWL_MODULE_LAST)) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
@@ -369,7 +366,7 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 	int sid;
 	struct twl_client *twl;
 
-	if (unlikely(mod_no > TWL_MODULE_LAST)) {
+	if (unlikely(mod_no >= TWL_MODULE_LAST)) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 7278c72479d1..b1c44cccef31 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -39,39 +39,39 @@
  * address each module uses within a given i2c slave.
  */
 
-/* Slave 0 (i2c address 0x48) */
-#define TWL4030_MODULE_USB		0x00
-
-/* Slave 1 (i2c address 0x49) */
-#define TWL4030_MODULE_AUDIO_VOICE	0x01
-#define TWL4030_MODULE_GPIO		0x02
-#define TWL4030_MODULE_INTBR		0x03
-#define TWL4030_MODULE_PIH		0x04
-#define TWL4030_MODULE_TEST		0x05
-
-/* Slave 2 (i2c address 0x4a) */
-#define TWL4030_MODULE_KEYPAD		0x06
-#define TWL4030_MODULE_MADC		0x07
-#define TWL4030_MODULE_INTERRUPTS	0x08
-#define TWL4030_MODULE_LED		0x09
-#define TWL4030_MODULE_MAIN_CHARGE	0x0A
-#define TWL4030_MODULE_PRECHARGE	0x0B
-#define TWL4030_MODULE_PWM0		0x0C
-#define TWL4030_MODULE_PWM1		0x0D
-#define TWL4030_MODULE_PWMA		0x0E
-#define TWL4030_MODULE_PWMB		0x0F
-
-#define TWL5031_MODULE_ACCESSORY	0x10
-#define TWL5031_MODULE_INTERRUPTS	0x11
-
-/* Slave 3 (i2c address 0x4b) */
-#define TWL4030_MODULE_BACKUP		0x12
-#define TWL4030_MODULE_INT		0x13
-#define TWL4030_MODULE_PM_MASTER	0x14
-#define TWL4030_MODULE_PM_RECEIVER	0x15
-#define TWL4030_MODULE_RTC		0x16
-#define TWL4030_MODULE_SECURED_REG	0x17
+enum twl4030_module_ids {
+	TWL4030_MODULE_USB = 0,		/* Slave 0 (i2c address 0x48) */
+	TWL4030_MODULE_AUDIO_VOICE,	/* Slave 1 (i2c address 0x49) */
+	TWL4030_MODULE_GPIO,
+	TWL4030_MODULE_INTBR,
+	TWL4030_MODULE_PIH,
+
+	TWL4030_MODULE_TEST,
+	TWL4030_MODULE_KEYPAD,		/* Slave 2 (i2c address 0x4a) */
+	TWL4030_MODULE_MADC,
+	TWL4030_MODULE_INTERRUPTS,
+	TWL4030_MODULE_LED,
+
+	TWL4030_MODULE_MAIN_CHARGE,
+	TWL4030_MODULE_PRECHARGE,
+	TWL4030_MODULE_PWM0,
+	TWL4030_MODULE_PWM1,
+	TWL4030_MODULE_PWMA,
+
+	TWL4030_MODULE_PWMB,
+	TWL5031_MODULE_ACCESSORY,
+	TWL5031_MODULE_INTERRUPTS,
+	TWL4030_MODULE_BACKUP,		/* Slave 3 (i2c address 0x4b) */
+	TWL4030_MODULE_INT,
+
+	TWL4030_MODULE_PM_MASTER,
+	TWL4030_MODULE_PM_RECEIVER,
+	TWL4030_MODULE_RTC,
+	TWL4030_MODULE_SECURED_REG,
+	TWL4030_MODULE_LAST,
+};
 
+/* Similar functionalities implemented in TWL4030/6030 */
 #define TWL_MODULE_USB		TWL4030_MODULE_USB
 #define TWL_MODULE_PIH		TWL4030_MODULE_PIH
 #define TWL_MODULE_MAIN_CHARGE	TWL4030_MODULE_MAIN_CHARGE
@@ -79,6 +79,7 @@
 #define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
 #define TWL_MODULE_RTC		TWL4030_MODULE_RTC
 #define TWL_MODULE_PWM		TWL4030_MODULE_PWM0
+#define TWL_MODULE_LED		TWL4030_MODULE_LED
 
 #define TWL6030_MODULE_ID0	0x0D
 #define TWL6030_MODULE_ID1	0x0E
-- 
cgit v1.2.3


From 04575d5a3dc15d762c7428d2745d30b2d448e443 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 13 Nov 2012 09:28:49 +0100
Subject: mfd: twl: Use decimal numbers for TWL6030_MODULE_IDs

It is easier fro humans to understand decimal numbers than hexadecimals when
they are used as indexes.

Acked-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/i2c/twl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index b1c44cccef31..1ff54b114efc 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -81,9 +81,9 @@ enum twl4030_module_ids {
 #define TWL_MODULE_PWM		TWL4030_MODULE_PWM0
 #define TWL_MODULE_LED		TWL4030_MODULE_LED
 
-#define TWL6030_MODULE_ID0	0x0D
-#define TWL6030_MODULE_ID1	0x0E
-#define TWL6030_MODULE_ID2	0x0F
+#define TWL6030_MODULE_ID0	13
+#define TWL6030_MODULE_ID1	14
+#define TWL6030_MODULE_ID2	15
 
 #define GPIO_INTR_OFFSET	0
 #define KEYPAD_INTR_OFFSET	1
-- 
cgit v1.2.3


From ef85fb28305fad7617f307383ebba554a3a891a2 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 20 Nov 2012 22:24:27 +0000
Subject: bcma: add locking around GPIO register accesses

The GPIOs are access through some registers in the chip common core.
We need locking around these GPIO accesses, all GPIOs are accessed
through the same registers and parallel writes will cause problems.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4585
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/bcma/driver_chipcommon.c            | 47 ++++++++++++++++++++++++++---
 include/linux/bcma/bcma_driver_chipcommon.h |  3 ++
 2 files changed, 45 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index a4c3ebcc4c86..c9b63d9ff61d 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -30,6 +30,8 @@ void bcma_core_chipcommon_init(struct bcma_drv_cc *cc)
 	if (cc->setup_done)
 		return;
 
+	spin_lock_init(&cc->gpio_lock);
+
 	if (cc->core->id.rev >= 11)
 		cc->status = bcma_cc_read32(cc, BCMA_CC_CHIPSTAT);
 	cc->capabilities = bcma_cc_read32(cc, BCMA_CC_CAP);
@@ -84,28 +86,63 @@ u32 bcma_chipco_gpio_in(struct bcma_drv_cc *cc, u32 mask)
 
 u32 bcma_chipco_gpio_out(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUT, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUT, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUTEN, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOOUTEN, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOCTL, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOCTL, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_gpio_control);
 
 u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOIRQ, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOIRQ, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value)
 {
-	return bcma_cc_write32_masked(cc, BCMA_CC_GPIOPOL, mask, value);
+	unsigned long flags;
+	u32 res;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPOL, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 #ifdef CONFIG_BCMA_DRIVER_MIPS
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 1cf1749440ac..a085d986c804 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -567,6 +567,9 @@ struct bcma_drv_cc {
 	int nr_serial_ports;
 	struct bcma_serial_port serial_ports[4];
 #endif /* CONFIG_BCMA_DRIVER_MIPS */
+
+	/* Lock for GPIO register access. */
+	spinlock_t gpio_lock;
 };
 
 /* Register access */
-- 
cgit v1.2.3


From ea3488f4696fe22811eb19bee7088b732d3f8fe0 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 20 Nov 2012 22:24:28 +0000
Subject: bcma: add bcma_chipco_gpio_pull{up,down}

Add functions to access the GPIO registers for pullup and pulldown.
These are needed for handling gpio registration.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4586
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/bcma/driver_chipcommon.c            | 30 +++++++++++++++++++++++++++++
 include/linux/bcma/bcma_driver_chipcommon.h |  2 ++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index c9b63d9ff61d..0945383c2271 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -145,6 +145,36 @@ u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value)
 	return res;
 }
 
+u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	unsigned long flags;
+	u32 res;
+
+	if (cc->core->id.rev < 20)
+		return 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPULLUP, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
+}
+
+u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value)
+{
+	unsigned long flags;
+	u32 res;
+
+	if (cc->core->id.rev < 20)
+		return 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = bcma_cc_write32_masked(cc, BCMA_CC_GPIOPULLDOWN, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
+}
+
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 void bcma_chipco_serial_init(struct bcma_drv_cc *cc)
 {
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index a085d986c804..256702663b68 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -606,6 +606,8 @@ u32 bcma_chipco_gpio_outen(struct bcma_drv_cc *cc, u32 mask, u32 value);
 u32 bcma_chipco_gpio_control(struct bcma_drv_cc *cc, u32 mask, u32 value);
 u32 bcma_chipco_gpio_intmask(struct bcma_drv_cc *cc, u32 mask, u32 value);
 u32 bcma_chipco_gpio_polarity(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_pullup(struct bcma_drv_cc *cc, u32 mask, u32 value);
+u32 bcma_chipco_gpio_pulldown(struct bcma_drv_cc *cc, u32 mask, u32 value);
 
 /* PMU support */
 extern void bcma_pmu_init(struct bcma_drv_cc *cc);
-- 
cgit v1.2.3


From cf0936b06d8e98a157630e99f647e2ff6d29d7ad Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 20 Nov 2012 22:24:30 +0000
Subject: bcma: add GPIO driver

Register a GPIO driver to access the GPIOs provided by the chip.
The GPIOs of the SoC should always start at 0 and the other GPIOs could
start at a random position. There is just one SoC in a system and when
they start at 0 the number is predictable.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4587
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/bcma/Kconfig                        |  9 +++
 drivers/bcma/Makefile                       |  1 +
 drivers/bcma/bcma_private.h                 | 10 +++
 drivers/bcma/driver_gpio.c                  | 98 +++++++++++++++++++++++++++++
 drivers/bcma/main.c                         |  5 ++
 include/linux/bcma/bcma_driver_chipcommon.h |  5 ++
 6 files changed, 128 insertions(+)
 create mode 100644 drivers/bcma/driver_gpio.c

(limited to 'include/linux')

diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index a533af218368..d7b56a88c9f4 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -65,6 +65,15 @@ config BCMA_DRIVER_GMAC_CMN
 
 	  If unsure, say N
 
+config BCMA_DRIVER_GPIO
+	bool "BCMA GPIO driver"
+	depends on BCMA
+	select GPIOLIB
+	help
+	  Driver to provide access to the GPIO pins of the bcma bus.
+
+	  If unsure, say N
+
 config BCMA_DEBUG
 	bool "BCMA debugging"
 	depends on BCMA
diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile
index 8ad42d41b2f2..734b32f09c0a 100644
--- a/drivers/bcma/Makefile
+++ b/drivers/bcma/Makefile
@@ -6,6 +6,7 @@ bcma-y					+= driver_pci.o
 bcma-$(CONFIG_BCMA_DRIVER_PCI_HOSTMODE)	+= driver_pci_host.o
 bcma-$(CONFIG_BCMA_DRIVER_MIPS)		+= driver_mips.o
 bcma-$(CONFIG_BCMA_DRIVER_GMAC_CMN)	+= driver_gmac_cmn.o
+bcma-$(CONFIG_BCMA_DRIVER_GPIO)		+= driver_gpio.o
 bcma-$(CONFIG_BCMA_HOST_PCI)		+= host_pci.o
 bcma-$(CONFIG_BCMA_HOST_SOC)		+= host_soc.o
 obj-$(CONFIG_BCMA)			+= bcma.o
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 169fc58427d3..8cd80bff8e91 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -89,4 +89,14 @@ bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc);
 void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc);
 #endif /* CONFIG_BCMA_DRIVER_PCI_HOSTMODE */
 
+#ifdef CONFIG_BCMA_DRIVER_GPIO
+/* driver_gpio.c */
+int bcma_gpio_init(struct bcma_drv_cc *cc);
+#else
+static inline int bcma_gpio_init(struct bcma_drv_cc *cc)
+{
+	return -ENOTSUPP;
+}
+#endif /* CONFIG_BCMA_DRIVER_GPIO */
+
 #endif
diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
new file mode 100644
index 000000000000..9a6f585da2d9
--- /dev/null
+++ b/drivers/bcma/driver_gpio.c
@@ -0,0 +1,98 @@
+/*
+ * Broadcom specific AMBA
+ * GPIO driver
+ *
+ * Copyright 2011, Broadcom Corporation
+ * Copyright 2012, Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include <linux/gpio.h>
+#include <linux/export.h>
+#include <linux/bcma/bcma.h>
+
+#include "bcma_private.h"
+
+static inline struct bcma_drv_cc *bcma_gpio_get_cc(struct gpio_chip *chip)
+{
+	return container_of(chip, struct bcma_drv_cc, gpio);
+}
+
+static int bcma_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	return !!bcma_chipco_gpio_in(cc, 1 << gpio);
+}
+
+static void bcma_gpio_set_value(struct gpio_chip *chip, unsigned gpio,
+				int value)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	bcma_chipco_gpio_out(cc, 1 << gpio, value ? 1 << gpio : 0);
+}
+
+static int bcma_gpio_direction_input(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	bcma_chipco_gpio_outen(cc, 1 << gpio, 0);
+	return 0;
+}
+
+static int bcma_gpio_direction_output(struct gpio_chip *chip, unsigned gpio,
+				      int value)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	bcma_chipco_gpio_outen(cc, 1 << gpio, 1 << gpio);
+	bcma_chipco_gpio_out(cc, 1 << gpio, value ? 1 << gpio : 0);
+	return 0;
+}
+
+static int bcma_gpio_request(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	bcma_chipco_gpio_control(cc, 1 << gpio, 0);
+	/* clear pulldown */
+	bcma_chipco_gpio_pulldown(cc, 1 << gpio, 0);
+	/* Set pullup */
+	bcma_chipco_gpio_pullup(cc, 1 << gpio, 1 << gpio);
+
+	return 0;
+}
+
+static void bcma_gpio_free(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	/* clear pullup */
+	bcma_chipco_gpio_pullup(cc, 1 << gpio, 0);
+}
+
+int bcma_gpio_init(struct bcma_drv_cc *cc)
+{
+	struct gpio_chip *chip = &cc->gpio;
+
+	chip->label		= "bcma_gpio";
+	chip->owner		= THIS_MODULE;
+	chip->request		= bcma_gpio_request;
+	chip->free		= bcma_gpio_free;
+	chip->get		= bcma_gpio_get_value;
+	chip->set		= bcma_gpio_set_value;
+	chip->direction_input	= bcma_gpio_direction_input;
+	chip->direction_output	= bcma_gpio_direction_output;
+	chip->ngpio		= 16;
+	/* There is just one SoC in one device and its GPIO addresses should be
+	 * deterministic to address them more easily. The other buses could get
+	 * a random base number. */
+	if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC)
+		chip->base		= 0;
+	else
+		chip->base		= -1;
+
+	return gpiochip_add(chip);
+}
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index d865470bc951..478ba01ca0c2 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -152,6 +152,11 @@ static int bcma_register_cores(struct bcma_bus *bus)
 			bcma_err(bus, "Error registering NAND flash\n");
 	}
 #endif
+	err = bcma_gpio_init(&bus->drv_cc);
+	if (err == -ENOTSUPP)
+		bcma_debug(bus, "GPIO driver not activated\n");
+	else if (err)
+		bcma_err(bus, "Error registering GPIO driver: %i\n", err);
 
 	return 0;
 }
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 256702663b68..7d662a988d24 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -1,6 +1,8 @@
 #ifndef LINUX_BCMA_DRIVER_CC_H_
 #define LINUX_BCMA_DRIVER_CC_H_
 
+#include <linux/gpio.h>
+
 /** ChipCommon core registers. **/
 #define BCMA_CC_ID			0x0000
 #define  BCMA_CC_ID_ID			0x0000FFFF
@@ -570,6 +572,9 @@ struct bcma_drv_cc {
 
 	/* Lock for GPIO register access. */
 	spinlock_t gpio_lock;
+#ifdef CONFIG_BCMA_DRIVER_GPIO
+	struct gpio_chip gpio;
+#endif
 };
 
 /* Register access */
-- 
cgit v1.2.3


From da22f22e91f0d14d996c7258101575a5a06ddf85 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 20 Nov 2012 22:24:31 +0000
Subject: ssb: add ssb_chipco_gpio_pull{up,down}

Add functions to access the GPIO registers for pullup and pulldown.
These are needed for handling gpio registration.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4589
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/ssb/driver_chipcommon.c           | 16 ++++++++++++++++
 include/linux/ssb/ssb_driver_chipcommon.h |  2 ++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index e9d2ca11283b..4df492665565 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -442,6 +442,22 @@ u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value)
 	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value);
 }
 
+u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value)
+{
+	if (cc->dev->id.revision < 20)
+		return 0xffffffff;
+
+	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value);
+}
+
+u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value)
+{
+	if (cc->dev->id.revision < 20)
+		return 0xffffffff;
+
+	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value);
+}
+
 #ifdef CONFIG_SSB_SERIAL
 int ssb_chipco_serial_init(struct ssb_chipcommon *cc,
 			   struct ssb_serial_port *ports)
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index c2b02a5c86ae..c8d07c95d76e 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -644,6 +644,8 @@ u32 ssb_chipco_gpio_outen(struct ssb_chipcommon *cc, u32 mask, u32 value);
 u32 ssb_chipco_gpio_control(struct ssb_chipcommon *cc, u32 mask, u32 value);
 u32 ssb_chipco_gpio_intmask(struct ssb_chipcommon *cc, u32 mask, u32 value);
 u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value);
+u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value);
+u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value);
 
 #ifdef CONFIG_SSB_SERIAL
 extern int ssb_chipco_serial_init(struct ssb_chipcommon *cc,
-- 
cgit v1.2.3


From 394bc7e38be79987ed15de203920c3cddb724cc1 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 20 Nov 2012 22:24:32 +0000
Subject: ssb: add locking around gpio register accesses

The GPIOs are access through some registers in the chip common core or
over extif. We need locking around these GPIO accesses, all GPIOs are
accessed through the same registers and parallel writes will cause
problems.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4590
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/ssb/driver_chipcommon.c           | 66 +++++++++++++++++++++++++++----
 drivers/ssb/driver_extif.c                | 43 ++++++++++++++++++--
 drivers/ssb/main.c                        |  1 +
 drivers/ssb/ssb_private.h                 |  8 ++++
 include/linux/ssb/ssb_driver_chipcommon.h |  1 +
 include/linux/ssb/ssb_driver_extif.h      |  1 +
 6 files changed, 109 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 4df492665565..24e02bb2ecd8 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -284,6 +284,9 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc)
 {
 	if (!cc->dev)
 		return; /* We don't have a ChipCommon */
+
+	spin_lock_init(&cc->gpio_lock);
+
 	if (cc->dev->id.revision >= 11)
 		cc->status = chipco_read32(cc, SSB_CHIPCO_CHIPSTAT);
 	ssb_dprintk(KERN_INFO PFX "chipcommon status is 0x%x\n", cc->status);
@@ -418,44 +421,93 @@ u32 ssb_chipco_gpio_in(struct ssb_chipcommon *cc, u32 mask)
 
 u32 ssb_chipco_gpio_out(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUT, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUT, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_outen(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUTEN, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOOUTEN, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_control(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOCTL, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOCTL, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 EXPORT_SYMBOL(ssb_chipco_gpio_control);
 
 u32 ssb_chipco_gpio_intmask(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOIRQ, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOIRQ, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_polarity(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPOL, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_pullup(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
+	unsigned long flags;
+	u32 res = 0;
+
 	if (cc->dev->id.revision < 20)
 		return 0xffffffff;
 
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value);
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLUP, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_chipco_gpio_pulldown(struct ssb_chipcommon *cc, u32 mask, u32 value)
 {
+	unsigned long flags;
+	u32 res = 0;
+
 	if (cc->dev->id.revision < 20)
 		return 0xffffffff;
 
-	return chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value);
+	spin_lock_irqsave(&cc->gpio_lock, flags);
+	res = chipco_write32_masked(cc, SSB_CHIPCO_GPIOPULLDOWN, mask, value);
+	spin_unlock_irqrestore(&cc->gpio_lock, flags);
+
+	return res;
 }
 
 #ifdef CONFIG_SSB_SERIAL
diff --git a/drivers/ssb/driver_extif.c b/drivers/ssb/driver_extif.c
index dc47f30e9cf7..e1d0bb8ad725 100644
--- a/drivers/ssb/driver_extif.c
+++ b/drivers/ssb/driver_extif.c
@@ -118,6 +118,13 @@ void ssb_extif_watchdog_timer_set(struct ssb_extif *extif,
 	extif_write32(extif, SSB_EXTIF_WATCHDOG, ticks);
 }
 
+void ssb_extif_init(struct ssb_extif *extif)
+{
+	if (!extif->dev)
+		return; /* We don't have a Extif core */
+	spin_lock_init(&extif->gpio_lock);
+}
+
 u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask)
 {
 	return extif_read32(extif, SSB_EXTIF_GPIO_IN) & mask;
@@ -125,22 +132,50 @@ u32 ssb_extif_gpio_in(struct ssb_extif *extif, u32 mask)
 
 u32 ssb_extif_gpio_out(struct ssb_extif *extif, u32 mask, u32 value)
 {
-	return extif_write32_masked(extif, SSB_EXTIF_GPIO_OUT(0),
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&extif->gpio_lock, flags);
+	res = extif_write32_masked(extif, SSB_EXTIF_GPIO_OUT(0),
 				   mask, value);
+	spin_unlock_irqrestore(&extif->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_extif_gpio_outen(struct ssb_extif *extif, u32 mask, u32 value)
 {
-	return extif_write32_masked(extif, SSB_EXTIF_GPIO_OUTEN(0),
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&extif->gpio_lock, flags);
+	res = extif_write32_masked(extif, SSB_EXTIF_GPIO_OUTEN(0),
 				   mask, value);
+	spin_unlock_irqrestore(&extif->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_extif_gpio_polarity(struct ssb_extif *extif, u32 mask, u32 value)
 {
-	return extif_write32_masked(extif, SSB_EXTIF_GPIO_INTPOL, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&extif->gpio_lock, flags);
+	res = extif_write32_masked(extif, SSB_EXTIF_GPIO_INTPOL, mask, value);
+	spin_unlock_irqrestore(&extif->gpio_lock, flags);
+
+	return res;
 }
 
 u32 ssb_extif_gpio_intmask(struct ssb_extif *extif, u32 mask, u32 value)
 {
-	return extif_write32_masked(extif, SSB_EXTIF_GPIO_INTMASK, mask, value);
+	unsigned long flags;
+	u32 res = 0;
+
+	spin_lock_irqsave(&extif->gpio_lock, flags);
+	res = extif_write32_masked(extif, SSB_EXTIF_GPIO_INTMASK, mask, value);
+	spin_unlock_irqrestore(&extif->gpio_lock, flags);
+
+	return res;
 }
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index df0f145c22fc..6fe2d102734a 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -796,6 +796,7 @@ static int __devinit ssb_bus_register(struct ssb_bus *bus,
 	if (err)
 		goto err_pcmcia_exit;
 	ssb_chipcommon_init(&bus->chipco);
+	ssb_extif_init(&bus->extif);
 	ssb_mipscore_init(&bus->mipscore);
 	err = ssb_fetch_invariants(bus, get_invariants);
 	if (err) {
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index a305550b4b65..d6a1ba9394d9 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -211,4 +211,12 @@ static inline void b43_pci_ssb_bridge_exit(void)
 extern u32 ssb_pmu_get_cpu_clock(struct ssb_chipcommon *cc);
 extern u32 ssb_pmu_get_controlclock(struct ssb_chipcommon *cc);
 
+#ifdef CONFIG_SSB_DRIVER_EXTIF
+extern void ssb_extif_init(struct ssb_extif *extif);
+#else
+static inline void ssb_extif_init(struct ssb_extif *extif)
+{
+}
+#endif
+
 #endif /* LINUX_SSB_PRIVATE_H_ */
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index c8d07c95d76e..30b694345d47 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -590,6 +590,7 @@ struct ssb_chipcommon {
 	u32 status;
 	/* Fast Powerup Delay constant */
 	u16 fast_pwrup_delay;
+	spinlock_t gpio_lock;
 	struct ssb_chipcommon_pmu pmu;
 };
 
diff --git a/include/linux/ssb/ssb_driver_extif.h b/include/linux/ssb/ssb_driver_extif.h
index 91161f0aa22b..bd2306854a91 100644
--- a/include/linux/ssb/ssb_driver_extif.h
+++ b/include/linux/ssb/ssb_driver_extif.h
@@ -158,6 +158,7 @@
 
 struct ssb_extif {
 	struct ssb_device *dev;
+	spinlock_t gpio_lock;
 };
 
 static inline bool ssb_extif_available(struct ssb_extif *extif)
-- 
cgit v1.2.3


From ec43b08b5733494ad88aa618ecdf534320dd8207 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 20 Nov 2012 22:24:33 +0000
Subject: ssb: add GPIO driver

Register a GPIO driver to access the GPIOs provided by the chip.
The GPIOs of the SoC should always start at 0 and the other GPIOs could
start at a random position. There is just one SoC in a system and when
they start at 0 the number is predictable.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Patchwork: http://patchwork.linux-mips.org/patch/4591
Acked-by: Florian Fainelli <florian@openwrt.org>
---
 drivers/ssb/Kconfig       |   9 +++
 drivers/ssb/Makefile      |   1 +
 drivers/ssb/driver_gpio.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ssb/main.c        |   6 ++
 drivers/ssb/ssb_private.h |   9 +++
 include/linux/ssb/ssb.h   |   4 ++
 6 files changed, 205 insertions(+)
 create mode 100644 drivers/ssb/driver_gpio.c

(limited to 'include/linux')

diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 42cdaa9a4d8a..ff3c8a21f10d 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -160,4 +160,13 @@ config SSB_DRIVER_GIGE
 
 	  If unsure, say N
 
+config SSB_DRIVER_GPIO
+	bool "SSB GPIO driver"
+	depends on SSB
+	select GPIOLIB
+	help
+	  Driver to provide access to the GPIO pins on the bus.
+
+	  If unsure, say N
+
 endmenu
diff --git a/drivers/ssb/Makefile b/drivers/ssb/Makefile
index 656e58b92618..9159ba77c388 100644
--- a/drivers/ssb/Makefile
+++ b/drivers/ssb/Makefile
@@ -15,6 +15,7 @@ ssb-$(CONFIG_SSB_DRIVER_MIPS)		+= driver_mipscore.o
 ssb-$(CONFIG_SSB_DRIVER_EXTIF)		+= driver_extif.o
 ssb-$(CONFIG_SSB_DRIVER_PCICORE)	+= driver_pcicore.o
 ssb-$(CONFIG_SSB_DRIVER_GIGE)		+= driver_gige.o
+ssb-$(CONFIG_SSB_DRIVER_GPIO)		+= driver_gpio.o
 
 # b43 pci-ssb-bridge driver
 # Not strictly a part of SSB, but kept here for convenience
diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
new file mode 100644
index 000000000000..97ac0a38e3d0
--- /dev/null
+++ b/drivers/ssb/driver_gpio.c
@@ -0,0 +1,176 @@
+/*
+ * Sonics Silicon Backplane
+ * GPIO driver
+ *
+ * Copyright 2011, Broadcom Corporation
+ * Copyright 2012, Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include <linux/gpio.h>
+#include <linux/export.h>
+#include <linux/ssb/ssb.h>
+
+#include "ssb_private.h"
+
+static struct ssb_bus *ssb_gpio_get_bus(struct gpio_chip *chip)
+{
+	return container_of(chip, struct ssb_bus, gpio);
+}
+
+static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	return !!ssb_chipco_gpio_in(&bus->chipco, 1 << gpio);
+}
+
+static void ssb_gpio_chipco_set_value(struct gpio_chip *chip, unsigned gpio,
+				      int value)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_chipco_gpio_out(&bus->chipco, 1 << gpio, value ? 1 << gpio : 0);
+}
+
+static int ssb_gpio_chipco_direction_input(struct gpio_chip *chip,
+					   unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_chipco_gpio_outen(&bus->chipco, 1 << gpio, 0);
+	return 0;
+}
+
+static int ssb_gpio_chipco_direction_output(struct gpio_chip *chip,
+					    unsigned gpio, int value)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_chipco_gpio_outen(&bus->chipco, 1 << gpio, 1 << gpio);
+	ssb_chipco_gpio_out(&bus->chipco, 1 << gpio, value ? 1 << gpio : 0);
+	return 0;
+}
+
+static int ssb_gpio_chipco_request(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_chipco_gpio_control(&bus->chipco, 1 << gpio, 0);
+	/* clear pulldown */
+	ssb_chipco_gpio_pulldown(&bus->chipco, 1 << gpio, 0);
+	/* Set pullup */
+	ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 1 << gpio);
+
+	return 0;
+}
+
+static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	/* clear pullup */
+	ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 0);
+}
+
+static int ssb_gpio_chipco_init(struct ssb_bus *bus)
+{
+	struct gpio_chip *chip = &bus->gpio;
+
+	chip->label		= "ssb_chipco_gpio";
+	chip->owner		= THIS_MODULE;
+	chip->request		= ssb_gpio_chipco_request;
+	chip->free		= ssb_gpio_chipco_free;
+	chip->get		= ssb_gpio_chipco_get_value;
+	chip->set		= ssb_gpio_chipco_set_value;
+	chip->direction_input	= ssb_gpio_chipco_direction_input;
+	chip->direction_output	= ssb_gpio_chipco_direction_output;
+	chip->ngpio		= 16;
+	/* There is just one SoC in one device and its GPIO addresses should be
+	 * deterministic to address them more easily. The other buses could get
+	 * a random base number. */
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		chip->base		= 0;
+	else
+		chip->base		= -1;
+
+	return gpiochip_add(chip);
+}
+
+#ifdef CONFIG_SSB_DRIVER_EXTIF
+
+static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	return !!ssb_extif_gpio_in(&bus->extif, 1 << gpio);
+}
+
+static void ssb_gpio_extif_set_value(struct gpio_chip *chip, unsigned gpio,
+				     int value)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_extif_gpio_out(&bus->extif, 1 << gpio, value ? 1 << gpio : 0);
+}
+
+static int ssb_gpio_extif_direction_input(struct gpio_chip *chip,
+					  unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_extif_gpio_outen(&bus->extif, 1 << gpio, 0);
+	return 0;
+}
+
+static int ssb_gpio_extif_direction_output(struct gpio_chip *chip,
+					   unsigned gpio, int value)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	ssb_extif_gpio_outen(&bus->extif, 1 << gpio, 1 << gpio);
+	ssb_extif_gpio_out(&bus->extif, 1 << gpio, value ? 1 << gpio : 0);
+	return 0;
+}
+
+static int ssb_gpio_extif_init(struct ssb_bus *bus)
+{
+	struct gpio_chip *chip = &bus->gpio;
+
+	chip->label		= "ssb_extif_gpio";
+	chip->owner		= THIS_MODULE;
+	chip->get		= ssb_gpio_extif_get_value;
+	chip->set		= ssb_gpio_extif_set_value;
+	chip->direction_input	= ssb_gpio_extif_direction_input;
+	chip->direction_output	= ssb_gpio_extif_direction_output;
+	chip->ngpio		= 5;
+	/* There is just one SoC in one device and its GPIO addresses should be
+	 * deterministic to address them more easily. The other buses could get
+	 * a random base number. */
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		chip->base		= 0;
+	else
+		chip->base		= -1;
+
+	return gpiochip_add(chip);
+}
+
+#else
+static int ssb_gpio_extif_init(struct ssb_bus *bus)
+{
+	return -ENOTSUPP;
+}
+#endif
+
+int ssb_gpio_init(struct ssb_bus *bus)
+{
+	if (ssb_chipco_available(&bus->chipco))
+		return ssb_gpio_chipco_init(bus);
+	else if (ssb_extif_available(&bus->extif))
+		return ssb_gpio_extif_init(bus);
+	else
+		SSB_WARN_ON(1);
+
+	return -1;
+}
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 6fe2d102734a..87f0ddf4f3f3 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -798,6 +798,12 @@ static int __devinit ssb_bus_register(struct ssb_bus *bus,
 	ssb_chipcommon_init(&bus->chipco);
 	ssb_extif_init(&bus->extif);
 	ssb_mipscore_init(&bus->mipscore);
+	err = ssb_gpio_init(bus);
+	if (err == -ENOTSUPP)
+		ssb_dprintk(KERN_DEBUG PFX "GPIO driver not activated\n");
+	else if (err)
+		ssb_dprintk(KERN_ERR PFX
+			   "Error registering GPIO driver: %i\n", err);
 	err = ssb_fetch_invariants(bus, get_invariants);
 	if (err) {
 		ssb_bus_may_powerdown(bus);
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index d6a1ba9394d9..463b76a2140b 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -219,4 +219,13 @@ static inline void ssb_extif_init(struct ssb_extif *extif)
 }
 #endif
 
+#ifdef CONFIG_SSB_DRIVER_GPIO
+extern int ssb_gpio_init(struct ssb_bus *bus);
+#else /* CONFIG_SSB_DRIVER_GPIO */
+static inline int ssb_gpio_init(struct ssb_bus *bus)
+{
+	return -ENOTSUPP;
+}
+#endif /* CONFIG_SSB_DRIVER_GPIO */
+
 #endif /* LINUX_SSB_PRIVATE_H_ */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index bb674c02f306..3862a5bbd73b 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
+#include <linux/gpio.h>
 #include <linux/mod_devicetable.h>
 #include <linux/dma-mapping.h>
 
@@ -433,6 +434,9 @@ struct ssb_bus {
 	/* Lock for GPIO register access. */
 	spinlock_t gpio_lock;
 #endif /* EMBEDDED */
+#ifdef CONFIG_SSB_DRIVER_GPIO
+	struct gpio_chip gpio;
+#endif /* DRIVER_GPIO */
 
 	/* Internal-only stuff follows. Do not touch. */
 	struct list_head list;
-- 
cgit v1.2.3


From 743179849015dc71bb2ea63d8cd4bfa7fdfb4bc6 Mon Sep 17 00:00:00 2001
From: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Date: Thu, 15 Nov 2012 20:19:57 +0100
Subject: of_spi: add generic binding support to specify cs gpio

This will allow to use gpio for chip select with no modification in the
driver binding

When use the cs-gpios, the gpio number will be passed via the cs_gpio field
and the number of chip select will automatically increased with max(hw cs, gpio cs).

So if for example the controller has 2 CS lines, and the cs-gpios
property looks like this:

cs-gpios = <&gpio1 0 0> <0> <&gpio1 1 0> <&gpio1 2 0>;

Then it should be configured so that num_chipselect = 4 with the
following mapping:

cs0 : &gpio1 0 0
cs1 : native
cs2 : &gpio1 1 0
cs3 : &gpio1 2 0

Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: devicetree-discuss@lists.ozlabs.org
Cc: spi-devel-general@lists.sourceforge.net
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
[grant.likely: fixed up type of cs count so min() can do type checking]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 Documentation/devicetree/bindings/spi/spi-bus.txt | 20 +++++++++
 drivers/spi/spi.c                                 | 54 +++++++++++++++++++++--
 include/linux/spi/spi.h                           |  3 ++
 3 files changed, 74 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/spi/spi-bus.txt b/Documentation/devicetree/bindings/spi/spi-bus.txt
index d2c33d0f533e..77a8b0d39b54 100644
--- a/Documentation/devicetree/bindings/spi/spi-bus.txt
+++ b/Documentation/devicetree/bindings/spi/spi-bus.txt
@@ -12,6 +12,7 @@ The SPI master node requires the following properties:
 - #size-cells     - should be zero.
 - compatible      - name of SPI bus controller following generic names
     		recommended practice.
+- cs-gpios	  - (optional) gpios chip select.
 No other properties are required in the SPI bus node.  It is assumed
 that a driver for an SPI bus device will understand that it is an SPI bus.
 However, the binding does not attempt to define the specific method for
@@ -24,6 +25,22 @@ support describing the chip select layout.
 Optional property:
 - num-cs : total number of chipselects
 
+If cs-gpios is used the number of chip select will automatically increased
+with max(cs-gpios > hw cs)
+
+So if for example the controller has 2 CS lines, and the cs-gpios
+property looks like this:
+
+cs-gpios = <&gpio1 0 0> <0> <&gpio1 1 0> <&gpio1 2 0>;
+
+Then it should be configured so that num_chipselect = 4 with the
+following mapping:
+
+cs0 : &gpio1 0 0
+cs1 : native
+cs2 : &gpio1 1 0
+cs3 : &gpio1 2 0
+
 SPI slave nodes must be children of the SPI master node and can
 contain the following properties.
 - reg             - (required) chip select address of device.
@@ -37,6 +54,9 @@ contain the following properties.
 - spi-cs-high     - (optional) Empty property indicating device requires
     		chip select active high
 
+If a gpio chipselect is used for the SPI slave the gpio number will be passed
+via the cs_gpio
+
 SPI example for an MPC5200 SPI bus:
 	spi@f00 {
 		#address-cells = <1>;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 84c2861d6f4d..1587a4a5ff41 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
+#include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
 #include <linux/sched.h>
@@ -327,6 +328,7 @@ struct spi_device *spi_alloc_device(struct spi_master *master)
 	spi->dev.parent = &master->dev;
 	spi->dev.bus = &spi_bus_type;
 	spi->dev.release = spidev_release;
+	spi->cs_gpio = -EINVAL;
 	device_initialize(&spi->dev);
 	return spi;
 }
@@ -344,15 +346,16 @@ EXPORT_SYMBOL_GPL(spi_alloc_device);
 int spi_add_device(struct spi_device *spi)
 {
 	static DEFINE_MUTEX(spi_add_lock);
-	struct device *dev = spi->master->dev.parent;
+	struct spi_master *master = spi->master;
+	struct device *dev = master->dev.parent;
 	struct device *d;
 	int status;
 
 	/* Chipselects are numbered 0..max; validate. */
-	if (spi->chip_select >= spi->master->num_chipselect) {
+	if (spi->chip_select >= master->num_chipselect) {
 		dev_err(dev, "cs%d >= max %d\n",
 			spi->chip_select,
-			spi->master->num_chipselect);
+			master->num_chipselect);
 		return -EINVAL;
 	}
 
@@ -376,6 +379,9 @@ int spi_add_device(struct spi_device *spi)
 		goto done;
 	}
 
+	if (master->cs_gpios)
+		spi->cs_gpio = master->cs_gpios[spi->chip_select];
+
 	/* Drivers may modify this initial i/o setup, but will
 	 * normally rely on the device being setup.  Devices
 	 * using SPI_CS_HIGH can't coexist well otherwise...
@@ -946,6 +952,44 @@ struct spi_master *spi_alloc_master(struct device *dev, unsigned size)
 }
 EXPORT_SYMBOL_GPL(spi_alloc_master);
 
+#ifdef CONFIG_OF
+static int of_spi_register_master(struct spi_master *master)
+{
+	u16 nb;
+	int i, *cs;
+	struct device_node *np = master->dev.of_node;
+
+	if (!np)
+		return 0;
+
+	nb = of_gpio_named_count(np, "cs-gpios");
+	master->num_chipselect = max(nb, master->num_chipselect);
+
+	if (nb < 1)
+		return 0;
+
+	cs = devm_kzalloc(&master->dev,
+			  sizeof(int) * master->num_chipselect,
+			  GFP_KERNEL);
+	master->cs_gpios = cs;
+
+	if (!master->cs_gpios)
+		return -ENOMEM;
+
+	memset(cs, -EINVAL, master->num_chipselect);
+
+	for (i = 0; i < nb; i++)
+		cs[i] = of_get_named_gpio(np, "cs-gpios", i);
+
+	return 0;
+}
+#else
+static int of_spi_register_master(struct spi_master *master)
+{
+	return 0;
+}
+#endif
+
 /**
  * spi_register_master - register SPI master controller
  * @master: initialized master, originally from spi_alloc_master()
@@ -977,6 +1021,10 @@ int spi_register_master(struct spi_master *master)
 	if (!dev)
 		return -ENODEV;
 
+	status = of_spi_register_master(master);
+	if (status)
+		return status;
+
 	/* even if it's just one always-selected device, there must
 	 * be at least one chipselect
 	 */
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index fa702aeb5038..f62918946d86 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -90,6 +90,7 @@ struct spi_device {
 	void			*controller_state;
 	void			*controller_data;
 	char			modalias[SPI_NAME_SIZE];
+	int			cs_gpio;	/* chip select gpio */
 
 	/*
 	 * likely need more hooks for more protocol options affecting how
@@ -362,6 +363,8 @@ struct spi_master {
 	int (*transfer_one_message)(struct spi_master *master,
 				    struct spi_message *mesg);
 	int (*unprepare_transfer_hardware)(struct spi_master *master);
+	/* gpio chip select */
+	int			*cs_gpios;
 };
 
 static inline void *spi_master_get_devdata(struct spi_master *master)
-- 
cgit v1.2.3


From df072eb97dcfb819390227649f6b7c07a90aa2df Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Tue, 20 Nov 2012 19:22:33 +0900
Subject: extcon: kernel_doc style fix

Corrected kernel_doc entries.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 include/linux/extcon.h | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 54c00ce9ce7a..fcb51c88319f 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -76,12 +76,12 @@ struct extcon_cable;
 
 /**
  * struct extcon_dev - An extcon device represents one external connector.
- * @name	The name of this extcon device. Parent device name is used
+ * @name:	The name of this extcon device. Parent device name is used
  *		if NULL.
- * @supported_cable	Array of supported cable names ending with NULL.
+ * @supported_cable:	Array of supported cable names ending with NULL.
  *			If supported_cable is NULL, cable name related APIs
  *			are disabled.
- * @mutually_exclusive	Array of mutually exclusive set of cables that cannot
+ * @mutually_exclusive:	Array of mutually exclusive set of cables that cannot
  *			be attached simultaneously. The array should be
  *			ending with NULL or be NULL (no mutually exclusive
  *			cables). For example, if it is { 0x7, 0x30, 0}, then,
@@ -89,21 +89,21 @@ struct extcon_cable;
  *			be attached simulataneously. {0x7, 0} is equivalent to
  *			{0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there
  *			can be no simultaneous connections.
- * @print_name	An optional callback to override the method to print the
+ * @print_name:	An optional callback to override the method to print the
  *		name of the extcon device.
- * @print_state	An optional callback to override the method to print the
+ * @print_state:	An optional callback to override the method to print the
  *		status of the extcon device.
- * @dev		Device of this extcon. Do not provide at register-time.
- * @state	Attach/detach state of this extcon. Do not provide at
+ * @dev:	Device of this extcon. Do not provide at register-time.
+ * @state:	Attach/detach state of this extcon. Do not provide at
  *		register-time
- * @nh	Notifier for the state change events from this extcon
- * @entry	To support list of extcon devices so that users can search
+ * @nh:	Notifier for the state change events from this extcon
+ * @entry:	To support list of extcon devices so that users can search
  *		for extcon devices based on the extcon name.
- * @lock
- * @max_supported	Internal value to store the number of cables.
- * @extcon_dev_type	Device_type struct to provide attribute_groups
+ * @lock:
+ * @max_supported:	Internal value to store the number of cables.
+ * @extcon_dev_type:	Device_type struct to provide attribute_groups
  *			customized for each extcon device.
- * @cables	Sysfs subdirectories. Each represents one cable.
+ * @cables:	Sysfs subdirectories. Each represents one cable.
  *
  * In most cases, users only need to provide "User initializing data" of
  * this struct when registering an extcon. In some exceptional cases,
@@ -139,12 +139,12 @@ struct extcon_dev {
 
 /**
  * struct extcon_cable	- An internal data for each cable of extcon device.
- * @edev	The extcon device
- * @cable_index	Index of this cable in the edev
- * @attr_g	Attribute group for the cable
- * @attr_name	"name" sysfs entry
- * @attr_state	"state" sysfs entry
- * @attrs	Array pointing to attr_name and attr_state for attr_g
+ * @edev:	The extcon device
+ * @cable_index:	Index of this cable in the edev
+ * @attr_g:	Attribute group for the cable
+ * @attr_name:	"name" sysfs entry
+ * @attr_state:	"state" sysfs entry
+ * @attrs:	Array pointing to attr_name and attr_state for attr_g
  */
 struct extcon_cable {
 	struct extcon_dev *edev;
@@ -160,11 +160,11 @@ struct extcon_cable {
 /**
  * struct extcon_specific_cable_nb - An internal data for
  *				extcon_register_interest().
- * @internal_nb	a notifier block bridging extcon notifier and cable notifier.
- * @user_nb	user provided notifier block for events from a specific cable.
- * @cable_index	the target cable.
- * @edev	the target extcon device.
- * @previous_value	the saved previous event value.
+ * @internal_nb:	a notifier block bridging extcon notifier and cable notifier.
+ * @user_nb:	user provided notifier block for events from a specific cable.
+ * @cable_index:	the target cable.
+ * @edev:	the target extcon device.
+ * @previous_value:	the saved previous event value.
  */
 struct extcon_specific_cable_nb {
 	struct notifier_block internal_nb;
-- 
cgit v1.2.3


From 8d4b9e3182634d8b5afb5a144a8c6c24b187bcc1 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 12 Nov 2012 13:03:20 +0100
Subject: bcma: export PLL reading function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required by NAND flash driver for initializing wait counters.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/bcma/driver_chipcommon_pmu.c | 3 ++-
 include/linux/bcma/bcma.h            | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index 201faf106b3f..657f23517481 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -13,12 +13,13 @@
 #include <linux/export.h>
 #include <linux/bcma/bcma.h>
 
-static u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset)
+u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset)
 {
 	bcma_cc_write32(cc, BCMA_CC_PLLCTL_ADDR, offset);
 	bcma_cc_read32(cc, BCMA_CC_PLLCTL_ADDR);
 	return bcma_cc_read32(cc, BCMA_CC_PLLCTL_DATA);
 }
+EXPORT_SYMBOL_GPL(bcma_chipco_pll_read);
 
 void bcma_chipco_pll_write(struct bcma_drv_cc *cc, u32 offset, u32 value)
 {
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 4180eb78d575..4fb6bd7941d7 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -345,6 +345,7 @@ extern void bcma_core_set_clockmode(struct bcma_device *core,
 				    enum bcma_clkmode clkmode);
 extern void bcma_core_pll_ctl(struct bcma_device *core, u32 req, u32 status,
 			      bool on);
+extern u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset);
 #define BCMA_DMA_TRANSLATION_MASK	0xC0000000
 #define  BCMA_DMA_TRANSLATION_NONE	0x00000000
 #define  BCMA_DMA_TRANSLATION_DMA32_CMT	0x40000000 /* Client Mode Translation for 32-bit DMA */
-- 
cgit v1.2.3


From 83af24027b3df1af5c5a9aa9adcdcfeb3429d3be Mon Sep 17 00:00:00 2001
From: "Philip, Avinash" <avinashphilip@ti.com>
Date: Wed, 21 Nov 2012 13:10:44 +0530
Subject: pwm: Device tree support for PWM polarity

Add support for encoding PWM properties in bit encoded form with
of_pwm_xlate_with_flags() function support. Platforms require platform
specific PWM properties has to populate in 3rd cell of the pwm-specifier
and PWM driver should also set .of_xlate support with this function.
Currently PWM property polarity encoded in bit position 0 of the third
cell in pwm-specifier.

Signed-off-by: Philip, Avinash <avinashphilip@ti.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
---
 Documentation/devicetree/bindings/pwm/pwm.txt | 17 +++++++++++++---
 drivers/pwm/core.c                            | 28 +++++++++++++++++++++++++++
 include/linux/pwm.h                           |  3 +++
 3 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/pwm/pwm.txt b/Documentation/devicetree/bindings/pwm/pwm.txt
index 73ec962bfe8c..06e67247859a 100644
--- a/Documentation/devicetree/bindings/pwm/pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm.txt
@@ -37,10 +37,21 @@ device:
 		pwm-names = "backlight";
 	};
 
+Note that in the example above, specifying the "pwm-names" is redundant
+because the name "backlight" would be used as fallback anyway.
+
 pwm-specifier typically encodes the chip-relative PWM number and the PWM
-period in nanoseconds. Note that in the example above, specifying the
-"pwm-names" is redundant because the name "backlight" would be used as
-fallback anyway.
+period in nanoseconds.
+
+Optionally, the pwm-specifier can encode a number of flags in a third cell:
+- bit 0: PWM signal polarity (0: normal polarity, 1: inverse polarity)
+
+Example with optional PWM specifier for inverse polarity
+
+	bl: backlight {
+		pwms = <&pwm 0 5000000 1>;
+		pwm-names = "backlight";
+	};
 
 2) PWM controller nodes
 -----------------------
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index f5acdaa52707..780cb6b8a8f0 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -32,6 +32,9 @@
 
 #define MAX_PWMS 1024
 
+/* flags in the third cell of the DT PWM specifier */
+#define PWM_SPEC_POLARITY	(1 << 0)
+
 static DEFINE_MUTEX(pwm_lookup_lock);
 static LIST_HEAD(pwm_lookup_list);
 static DEFINE_MUTEX(pwm_lock);
@@ -129,6 +132,31 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
 	return 0;
 }
 
+struct pwm_device *
+of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
+{
+	struct pwm_device *pwm;
+
+	if (pc->of_pwm_n_cells < 3)
+		return ERR_PTR(-EINVAL);
+
+	if (args->args[0] >= pc->npwm)
+		return ERR_PTR(-EINVAL);
+
+	pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+	if (IS_ERR(pwm))
+		return pwm;
+
+	pwm_set_period(pwm, args->args[1]);
+
+	if (args->args[2] & PWM_SPEC_POLARITY)
+		pwm_set_polarity(pwm, PWM_POLARITY_INVERSED);
+	else
+		pwm_set_polarity(pwm, PWM_POLARITY_NORMAL);
+
+	return pwm;
+}
+
 static struct pwm_device *
 of_pwm_simple_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
 {
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 112b31436848..6d661f32e0e4 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -171,6 +171,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
 					 unsigned int index,
 					 const char *label);
 
+struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
+		const struct of_phandle_args *args);
+
 struct pwm_device *pwm_get(struct device *dev, const char *consumer);
 void pwm_put(struct pwm_device *pwm);
 
-- 
cgit v1.2.3


From acad189b08456722ca4a8984218d6f38f4563cbc Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 22 Nov 2012 11:12:04 +0100
Subject: mfd: Add an AS3711 PMIC MFD driver

AS3711 is a PMIC with multiple DCDC and LDO power supplies, GPIOs, an RTC,
a battery charger and a general purpose ADC. This patch adds support for
the MFD with support for a regulator driver and a backlight driver.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig        |   9 ++
 drivers/mfd/Makefile       |   1 +
 drivers/mfd/as3711.c       | 217 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/as3711.h | 126 ++++++++++++++++++++++++++
 4 files changed, 353 insertions(+)
 create mode 100644 drivers/mfd/as3711.c
 create mode 100644 include/linux/mfd/as3711.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f5b839b718aa..475c26696b95 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1103,6 +1103,15 @@ config MFD_RETU
 	  Retu is a multi-function device found on Nokia Internet Tablets
 	  (770, N800 and N810).
 
+config MFD_AS3711
+	bool "Support for AS3711"
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	depends on I2C=y
+	help
+	  Support for the AS3711 PMIC from AMS
+
 endmenu
 endif
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2689c8a0d781..f2216dfd963c 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -146,3 +146,4 @@ obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
 obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
+obj-$(CONFIG_MFD_AS3711)	+= as3711.o
diff --git a/drivers/mfd/as3711.c b/drivers/mfd/as3711.c
new file mode 100644
index 000000000000..e994c9691124
--- /dev/null
+++ b/drivers/mfd/as3711.c
@@ -0,0 +1,217 @@
+/*
+ * AS3711 PMIC MFC driver
+ *
+ * Copyright (C) 2012 Renesas Electronics Corporation
+ * Author: Guennadi Liakhovetski, <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License as
+ * published by the Free Software Foundation
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mfd/as3711.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+enum {
+	AS3711_REGULATOR,
+	AS3711_BACKLIGHT,
+};
+
+/*
+ * Ok to have it static: it is only used during probing and multiple I2C devices
+ * cannot be probed simultaneously. Just make sure to avoid stale data.
+ */
+static struct mfd_cell as3711_subdevs[] = {
+	[AS3711_REGULATOR] = {.name = "as3711-regulator",},
+	[AS3711_BACKLIGHT] = {.name = "as3711-backlight",},
+};
+
+static bool as3711_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_GPIO_SIGNAL_IN:
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+	case AS3711_CHARGER_STATUS_1:
+	case AS3711_CHARGER_STATUS_2:
+	case AS3711_REG_STATUS:
+		return true;
+	}
+	return false;
+}
+
+static bool as3711_precious_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+		return true;
+	}
+	return false;
+}
+
+static bool as3711_readable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AS3711_SD_1_VOLTAGE:
+	case AS3711_SD_2_VOLTAGE:
+	case AS3711_SD_3_VOLTAGE:
+	case AS3711_SD_4_VOLTAGE:
+	case AS3711_LDO_1_VOLTAGE:
+	case AS3711_LDO_2_VOLTAGE:
+	case AS3711_LDO_3_VOLTAGE:
+	case AS3711_LDO_4_VOLTAGE:
+	case AS3711_LDO_5_VOLTAGE:
+	case AS3711_LDO_6_VOLTAGE:
+	case AS3711_LDO_7_VOLTAGE:
+	case AS3711_LDO_8_VOLTAGE:
+	case AS3711_SD_CONTROL:
+	case AS3711_GPIO_SIGNAL_OUT:
+	case AS3711_GPIO_SIGNAL_IN:
+	case AS3711_SD_CONTROL_1:
+	case AS3711_SD_CONTROL_2:
+	case AS3711_CURR_CONTROL:
+	case AS3711_CURR1_VALUE:
+	case AS3711_CURR2_VALUE:
+	case AS3711_CURR3_VALUE:
+	case AS3711_STEPUP_CONTROL_1:
+	case AS3711_STEPUP_CONTROL_2:
+	case AS3711_STEPUP_CONTROL_4:
+	case AS3711_STEPUP_CONTROL_5:
+	case AS3711_REG_STATUS:
+	case AS3711_INTERRUPT_STATUS_1:
+	case AS3711_INTERRUPT_STATUS_2:
+	case AS3711_INTERRUPT_STATUS_3:
+	case AS3711_CHARGER_STATUS_1:
+	case AS3711_CHARGER_STATUS_2:
+	case AS3711_ASIC_ID_1:
+	case AS3711_ASIC_ID_2:
+		return true;
+	}
+	return false;
+}
+
+static const struct regmap_config as3711_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.volatile_reg = as3711_volatile_reg,
+	.readable_reg = as3711_readable_reg,
+	.precious_reg = as3711_precious_reg,
+	.max_register = AS3711_MAX_REGS,
+	.num_reg_defaults_raw = AS3711_MAX_REGS,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static int as3711_i2c_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	struct as3711 *as3711;
+	struct as3711_platform_data *pdata = client->dev.platform_data;
+	unsigned int id1, id2;
+	int ret;
+
+	if (!pdata)
+		dev_dbg(&client->dev, "Platform data not found\n");
+
+	as3711 = devm_kzalloc(&client->dev, sizeof(struct as3711), GFP_KERNEL);
+	if (!as3711) {
+		dev_err(&client->dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	as3711->dev = &client->dev;
+	i2c_set_clientdata(client, as3711);
+
+	if (client->irq)
+		dev_notice(&client->dev, "IRQ not supported yet\n");
+
+	as3711->regmap = devm_regmap_init_i2c(client, &as3711_regmap_config);
+	if (IS_ERR(as3711->regmap)) {
+		ret = PTR_ERR(as3711->regmap);
+		dev_err(&client->dev, "regmap initialization failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(as3711->regmap, AS3711_ASIC_ID_1, &id1);
+	if (!ret)
+		ret = regmap_read(as3711->regmap, AS3711_ASIC_ID_2, &id2);
+	if (ret < 0) {
+		dev_err(&client->dev, "regmap_read() failed: %d\n", ret);
+		return ret;
+	}
+	if (id1 != 0x8b)
+		return -ENODEV;
+	dev_info(as3711->dev, "AS3711 detected: %x:%x\n", id1, id2);
+
+	/* We can reuse as3711_subdevs[], it will be copied in mfd_add_devices() */
+	if (pdata) {
+		as3711_subdevs[AS3711_REGULATOR].platform_data = &pdata->regulator;
+		as3711_subdevs[AS3711_REGULATOR].pdata_size = sizeof(pdata->regulator);
+		as3711_subdevs[AS3711_BACKLIGHT].platform_data = &pdata->backlight;
+		as3711_subdevs[AS3711_BACKLIGHT].pdata_size = sizeof(pdata->backlight);
+	} else {
+		as3711_subdevs[AS3711_REGULATOR].platform_data = NULL;
+		as3711_subdevs[AS3711_REGULATOR].pdata_size = 0;
+		as3711_subdevs[AS3711_BACKLIGHT].platform_data = NULL;
+		as3711_subdevs[AS3711_BACKLIGHT].pdata_size = 0;
+	}
+
+	ret = mfd_add_devices(as3711->dev, -1, as3711_subdevs,
+			      ARRAY_SIZE(as3711_subdevs), NULL, 0, NULL);
+	if (ret < 0)
+		dev_err(&client->dev, "add mfd devices failed: %d\n", ret);
+
+	return ret;
+}
+
+static int as3711_i2c_remove(struct i2c_client *client)
+{
+	struct as3711 *as3711 = i2c_get_clientdata(client);
+
+	mfd_remove_devices(as3711->dev);
+	return 0;
+}
+
+static const struct i2c_device_id as3711_i2c_id[] = {
+	{.name = "as3711", .driver_data = 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, as3711_i2c_id);
+
+static struct i2c_driver as3711_i2c_driver = {
+	.driver = {
+		   .name = "as3711",
+		   .owner = THIS_MODULE,
+		   },
+	.probe = as3711_i2c_probe,
+	.remove = as3711_i2c_remove,
+	.id_table = as3711_i2c_id,
+};
+
+static int __init as3711_i2c_init(void)
+{
+	return i2c_add_driver(&as3711_i2c_driver);
+}
+/* Initialise early */
+subsys_initcall(as3711_i2c_init);
+
+static void __exit as3711_i2c_exit(void)
+{
+	i2c_del_driver(&as3711_i2c_driver);
+}
+module_exit(as3711_i2c_exit);
+
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
+MODULE_DESCRIPTION("AS3711 PMIC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/as3711.h b/include/linux/mfd/as3711.h
new file mode 100644
index 000000000000..38452ce1e892
--- /dev/null
+++ b/include/linux/mfd/as3711.h
@@ -0,0 +1,126 @@
+/*
+ * AS3711 PMIC MFC driver header
+ *
+ * Copyright (C) 2012 Renesas Electronics Corporation
+ * Author: Guennadi Liakhovetski, <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License as
+ * published by the Free Software Foundation
+ */
+
+#ifndef MFD_AS3711_H
+#define MFD_AS3711_H
+
+/*
+ * Client data
+ */
+
+/* Register addresses */
+#define AS3711_SD_1_VOLTAGE		0	/* Digital Step-Down */
+#define AS3711_SD_2_VOLTAGE		1
+#define AS3711_SD_3_VOLTAGE		2
+#define AS3711_SD_4_VOLTAGE		3
+#define AS3711_LDO_1_VOLTAGE		4	/* Analog LDO */
+#define AS3711_LDO_2_VOLTAGE		5
+#define AS3711_LDO_3_VOLTAGE		6	/* Digital LDO */
+#define AS3711_LDO_4_VOLTAGE		7
+#define AS3711_LDO_5_VOLTAGE		8
+#define AS3711_LDO_6_VOLTAGE		9
+#define AS3711_LDO_7_VOLTAGE		0xa
+#define AS3711_LDO_8_VOLTAGE		0xb
+#define AS3711_SD_CONTROL		0x10
+#define AS3711_GPIO_SIGNAL_OUT		0x20
+#define AS3711_GPIO_SIGNAL_IN		0x21
+#define AS3711_SD_CONTROL_1		0x30
+#define AS3711_SD_CONTROL_2		0x31
+#define AS3711_CURR_CONTROL		0x40
+#define AS3711_CURR1_VALUE		0x43
+#define AS3711_CURR2_VALUE		0x44
+#define AS3711_CURR3_VALUE		0x45
+#define AS3711_STEPUP_CONTROL_1		0x50
+#define AS3711_STEPUP_CONTROL_2		0x51
+#define AS3711_STEPUP_CONTROL_4		0x53
+#define AS3711_STEPUP_CONTROL_5		0x54
+#define AS3711_REG_STATUS		0x73
+#define AS3711_INTERRUPT_STATUS_1	0x77
+#define AS3711_INTERRUPT_STATUS_2	0x78
+#define AS3711_INTERRUPT_STATUS_3	0x79
+#define AS3711_CHARGER_STATUS_1		0x86
+#define AS3711_CHARGER_STATUS_2		0x87
+#define AS3711_ASIC_ID_1		0x90
+#define AS3711_ASIC_ID_2		0x91
+
+#define AS3711_MAX_REGS			0x92
+
+/* Regulators */
+enum {
+	AS3711_REGULATOR_SD_1,
+	AS3711_REGULATOR_SD_2,
+	AS3711_REGULATOR_SD_3,
+	AS3711_REGULATOR_SD_4,
+	AS3711_REGULATOR_LDO_1,
+	AS3711_REGULATOR_LDO_2,
+	AS3711_REGULATOR_LDO_3,
+	AS3711_REGULATOR_LDO_4,
+	AS3711_REGULATOR_LDO_5,
+	AS3711_REGULATOR_LDO_6,
+	AS3711_REGULATOR_LDO_7,
+	AS3711_REGULATOR_LDO_8,
+
+	AS3711_REGULATOR_MAX,
+};
+
+struct device;
+struct regmap;
+
+struct as3711 {
+	struct device *dev;
+	struct regmap *regmap;
+};
+
+#define AS3711_MAX_STEPDOWN 4
+#define AS3711_MAX_STEPUP 2
+#define AS3711_MAX_LDO 8
+
+enum as3711_su2_feedback {
+	AS3711_SU2_VOLTAGE,
+	AS3711_SU2_CURR1,
+	AS3711_SU2_CURR2,
+	AS3711_SU2_CURR3,
+	AS3711_SU2_CURR_AUTO,
+};
+
+enum as3711_su2_fbprot {
+	AS3711_SU2_LX_SD4,
+	AS3711_SU2_GPIO2,
+	AS3711_SU2_GPIO3,
+	AS3711_SU2_GPIO4,
+};
+
+/*
+ * Platform data
+ */
+
+struct as3711_regulator_pdata {
+	struct regulator_init_data *init_data[AS3711_REGULATOR_MAX];
+};
+
+struct as3711_bl_pdata {
+	const char *su1_fb;
+	int su1_max_uA;
+	const char *su2_fb;
+	int su2_max_uA;
+	enum as3711_su2_feedback su2_feedback;
+	enum as3711_su2_fbprot su2_fbprot;
+	bool su2_auto_curr1;
+	bool su2_auto_curr2;
+	bool su2_auto_curr3;
+};
+
+struct as3711_platform_data {
+	struct as3711_regulator_pdata regulator;
+	struct as3711_bl_pdata backlight;
+};
+
+#endif
-- 
cgit v1.2.3


From 1ad7e89940d5ac411928189e1a4a01901dbf590f Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Thu, 8 Nov 2012 16:12:25 -0800
Subject: block: store partition_meta_info.uuid as a string

This will allow other types of UUID to be stored here, aside from true
UUIDs.  This also simplifies code that uses this field, since it's usually
constructed from a, used as a, or compared to other, strings.

Note: A simplistic approach here would be to set uuid_str[36]=0 whenever a
/PARTNROFF option was found to be present.  However, this modifies the
input string, and causes subsequent calls to devt_from_partuuid() not to
see the /PARTNROFF option, which causes different results.  In order to
avoid misleading future maintainers, this parameter is marked const.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Will Drewry <wad@chromium.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c          |  8 +-------
 block/partitions/efi.c |  7 +------
 include/linux/genhd.h  |  8 ++++++--
 init/do_mounts.c       | 28 +++++++++++++++++-----------
 4 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index 6cace663a80e..b281f3a2d26a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -743,7 +743,6 @@ void __init printk_all_partitions(void)
 		struct hd_struct *part;
 		char name_buf[BDEVNAME_SIZE];
 		char devt_buf[BDEVT_SIZE];
-		char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5];
 
 		/*
 		 * Don't show empty devices or things that have been
@@ -762,16 +761,11 @@ void __init printk_all_partitions(void)
 		while ((part = disk_part_iter_next(&piter))) {
 			bool is_part0 = part == &disk->part0;
 
-			uuid_buf[0] = '\0';
-			if (part->info)
-				snprintf(uuid_buf, sizeof(uuid_buf), "%pU",
-					 part->info->uuid);
-
 			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
 			       bdevt_str(part_devt(part), devt_buf),
 			       (unsigned long long)part_nr_sects_read(part) >> 1
 			       , disk_name(disk, part->partno, name_buf),
-			       uuid_buf);
+			       part->info ? part->info->uuid : "");
 			if (is_part0) {
 				if (disk->driverfs_dev != NULL &&
 				    disk->driverfs_dev->driver != NULL)
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index 6296b403c67a..b62fb88b8711 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -620,7 +620,6 @@ int efi_partition(struct parsed_partitions *state)
 	gpt_entry *ptes = NULL;
 	u32 i;
 	unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
-	u8 unparsed_guid[37];
 
 	if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
 		kfree(gpt);
@@ -649,11 +648,7 @@ int efi_partition(struct parsed_partitions *state)
 			state->parts[i + 1].flags = ADDPART_FLAG_RAID;
 
 		info = &state->parts[i + 1].info;
-		/* Instead of doing a manual swap to big endian, reuse the
-		 * common ASCII hex format as the interim.
-		 */
-		efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
-		part_pack_uuid(unparsed_guid, info->uuid);
+		efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
 
 		/* Naively convert UTF16-LE to 7 bits. */
 		label_max = min(sizeof(info->volname) - 1,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4f440b3e89fe..79b8bba19363 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -88,10 +88,14 @@ struct disk_stats {
 };
 
 #define PARTITION_META_INFO_VOLNAMELTH	64
-#define PARTITION_META_INFO_UUIDLTH	16
+/*
+ * Enough for the string representation of any kind of UUID plus NULL.
+ * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
+ */
+#define PARTITION_META_INFO_UUIDLTH	37
 
 struct partition_meta_info {
-	u8 uuid[PARTITION_META_INFO_UUIDLTH];	/* always big endian */
+	char uuid[PARTITION_META_INFO_UUIDLTH];
 	u8 volname[PARTITION_META_INFO_VOLNAMELTH];
 };
 
diff --git a/init/do_mounts.c b/init/do_mounts.c
index f8a66424360d..b28ec5819325 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -69,23 +69,28 @@ __setup("ro", readonly);
 __setup("rw", readwrite);
 
 #ifdef CONFIG_BLOCK
+struct uuidcmp {
+	const char *uuid;
+	int len;
+};
+
 /**
  * match_dev_by_uuid - callback for finding a partition using its uuid
  * @dev:	device passed in by the caller
- * @data:	opaque pointer to a 36 byte char array with a UUID
+ * @data:	opaque pointer to the desired struct uuidcmp to match
  *
  * Returns 1 if the device matches, and 0 otherwise.
  */
 static int match_dev_by_uuid(struct device *dev, void *data)
 {
-	u8 *uuid = data;
+	struct uuidcmp *cmp = data;
 	struct hd_struct *part = dev_to_part(dev);
 
 	if (!part->info)
 		goto no_match;
 
-	if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
-			goto no_match;
+	if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len))
+		goto no_match;
 
 	return 1;
 no_match:
@@ -95,7 +100,7 @@ no_match:
 
 /**
  * devt_from_partuuid - looks up the dev_t of a partition by its UUID
- * @uuid:	min 36 byte char array containing a hex ascii UUID
+ * @uuid:	char array containing ascii UUID
  *
  * The function will return the first partition which contains a matching
  * UUID value in its partition_meta_info struct.  This does not search
@@ -106,11 +111,11 @@ no_match:
  *
  * Returns the matching dev_t on success or 0 on failure.
  */
-static dev_t devt_from_partuuid(char *uuid_str)
+static dev_t devt_from_partuuid(const char *uuid_str)
 {
 	dev_t res = 0;
+	struct uuidcmp cmp;
 	struct device *dev = NULL;
-	u8 uuid[16];
 	struct gendisk *disk;
 	struct hd_struct *part;
 	int offset = 0;
@@ -118,6 +123,9 @@ static dev_t devt_from_partuuid(char *uuid_str)
 	if (strlen(uuid_str) < 36)
 		goto done;
 
+	cmp.uuid = uuid_str;
+	cmp.len = 36;
+
 	/* Check for optional partition number offset attributes. */
 	if (uuid_str[36]) {
 		char c = 0;
@@ -134,10 +142,8 @@ static dev_t devt_from_partuuid(char *uuid_str)
 		}
 	}
 
-	/* Pack the requested UUID in the expected format. */
-	part_pack_uuid(uuid_str, uuid);
-
-	dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
+	dev = class_find_device(&block_class, NULL, &cmp,
+				&match_dev_by_uuid);
 	if (!dev)
 		goto done;
 
-- 
cgit v1.2.3


From 3f9be35bd9090eaa2f68ed9b24efdbf3abcf4b28 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 20 Nov 2012 10:34:56 +0800
Subject: mfd: rc5t583: Fix array subscript is above array bounds

I got below build warning while compiling this driver.
It's obviously RC5T583_MAX_INTERRUPT_MASK_REGS is 9 but irq_en_add
array only has 8 elements.

  CC      drivers/mfd/rc5t583-irq.o
drivers/mfd/rc5t583-irq.c: In function 'rc5t583_irq_sync_unlock':
drivers/mfd/rc5t583-irq.c:227: warning: array subscript is above array bounds
drivers/mfd/rc5t583-irq.c: In function 'rc5t583_irq_init':
drivers/mfd/rc5t583-irq.c:349: warning: array subscript is above array bounds

Since the number of interrupt enable registers is 8, this patch adds
define for RC5T583_MAX_INTERRUPT_EN_REGS to fix this bug.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/rc5t583-irq.c   | 2 +-
 include/linux/mfd/rc5t583.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rc5t583-irq.c b/drivers/mfd/rc5t583-irq.c
index fe00cdd6f83d..b41db5968706 100644
--- a/drivers/mfd/rc5t583-irq.c
+++ b/drivers/mfd/rc5t583-irq.c
@@ -345,7 +345,7 @@ int rc5t583_irq_init(struct rc5t583 *rc5t583, int irq, int irq_base)
 	mutex_init(&rc5t583->irq_lock);
 
 	/* Initailize all int register to 0 */
-	for (i = 0; i < RC5T583_MAX_INTERRUPT_MASK_REGS; i++)  {
+	for (i = 0; i < RC5T583_MAX_INTERRUPT_EN_REGS; i++)  {
 		ret = rc5t583_write(rc5t583->dev, irq_en_add[i],
 				rc5t583->irq_en_reg[i]);
 		if (ret < 0)
diff --git a/include/linux/mfd/rc5t583.h b/include/linux/mfd/rc5t583.h
index 36c242e52ef1..fd413ccab915 100644
--- a/include/linux/mfd/rc5t583.h
+++ b/include/linux/mfd/rc5t583.h
@@ -33,6 +33,7 @@
 /* Maximum number of main interrupts */
 #define MAX_MAIN_INTERRUPT		5
 #define RC5T583_MAX_GPEDGE_REG		2
+#define RC5T583_MAX_INTERRUPT_EN_REGS	8
 #define RC5T583_MAX_INTERRUPT_MASK_REGS	9
 
 /* Interrupt enable register */
@@ -304,7 +305,7 @@ struct rc5t583 {
 	uint8_t		intc_inten_reg;
 
 	/* For group interrupt bits and address */
-	uint8_t		irq_en_reg[RC5T583_MAX_INTERRUPT_MASK_REGS];
+	uint8_t		irq_en_reg[RC5T583_MAX_INTERRUPT_EN_REGS];
 
 	/* For gpio edge */
 	uint8_t		gpedge_reg[RC5T583_MAX_GPEDGE_REG];
-- 
cgit v1.2.3


From df1590d9ae5e37e07e7cf91107e4c2c946ce8bf4 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 12 Nov 2012 22:56:03 +0530
Subject: ARM: SPEAr3xx: Shirq: Move shirq controller out of plat/

This patch moves shirq interrupt controllers driver and header file out of
plat-spear directory. It is moved to drivers/irqchip/ directory.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/arm/mach-spear3xx/spear3xx.c        |   2 +-
 arch/arm/plat-spear/Makefile             |   2 +-
 arch/arm/plat-spear/include/plat/shirq.h |  66 -------
 arch/arm/plat-spear/shirq.c              | 315 ------------------------------
 drivers/irqchip/Makefile                 |   3 +-
 drivers/irqchip/spear-shirq.c            | 316 +++++++++++++++++++++++++++++++
 include/linux/irqchip/spear-shirq.h      |  64 +++++++
 7 files changed, 384 insertions(+), 384 deletions(-)
 delete mode 100644 arch/arm/plat-spear/include/plat/shirq.h
 delete mode 100644 arch/arm/plat-spear/shirq.c
 create mode 100644 drivers/irqchip/spear-shirq.c
 create mode 100644 include/linux/irqchip/spear-shirq.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c
index f1aaf5b168b2..38fe95db31a7 100644
--- a/arch/arm/mach-spear3xx/spear3xx.c
+++ b/arch/arm/mach-spear3xx/spear3xx.c
@@ -15,12 +15,12 @@
 
 #include <linux/amba/pl022.h>
 #include <linux/amba/pl08x.h>
+#include <linux/irqchip/spear-shirq.h>
 #include <linux/of_irq.h>
 #include <linux/io.h>
 #include <asm/hardware/pl080.h>
 #include <asm/hardware/vic.h>
 #include <plat/pl080.h>
-#include <plat/shirq.h>
 #include <mach/generic.h>
 #include <mach/spear.h>
 
diff --git a/arch/arm/plat-spear/Makefile b/arch/arm/plat-spear/Makefile
index 2607bd05c525..01e88532a5db 100644
--- a/arch/arm/plat-spear/Makefile
+++ b/arch/arm/plat-spear/Makefile
@@ -5,5 +5,5 @@
 # Common support
 obj-y	:= restart.o time.o
 
-obj-$(CONFIG_ARCH_SPEAR3XX)	+= pl080.o shirq.o
+obj-$(CONFIG_ARCH_SPEAR3XX)	+= pl080.o
 obj-$(CONFIG_ARCH_SPEAR6XX)	+= pl080.o
diff --git a/arch/arm/plat-spear/include/plat/shirq.h b/arch/arm/plat-spear/include/plat/shirq.h
deleted file mode 100644
index c51b355f00de..000000000000
--- a/arch/arm/plat-spear/include/plat/shirq.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * arch/arm/plat-spear/include/plat/shirq.h
- *
- * SPEAr platform shared irq layer header file
- *
- * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __PLAT_SHIRQ_H
-#define __PLAT_SHIRQ_H
-
-#include <linux/irq.h>
-#include <linux/types.h>
-
-/*
- * struct shirq_regs: shared irq register configuration
- *
- * enb_reg: enable register offset
- * reset_to_enb: val 1 indicates, we need to clear bit for enabling interrupt
- * status_reg: status register offset
- * status_reg_mask: status register valid mask
- * clear_reg: clear register offset
- * reset_to_clear: val 1 indicates, we need to clear bit for clearing interrupt
- */
-struct shirq_regs {
-	u32 enb_reg;
-	u32 reset_to_enb;
-	u32 status_reg;
-	u32 clear_reg;
-	u32 reset_to_clear;
-};
-
-/*
- * struct spear_shirq: shared irq structure
- *
- * irq: hardware irq number
- * irq_base: base irq in linux domain
- * irq_nr: no. of shared interrupts in a particular block
- * irq_bit_off: starting bit offset in the status register
- * invalid_irq: irq group is currently disabled
- * base: base address of shared irq register
- * regs: register configuration for shared irq block
- */
-struct spear_shirq {
-	u32 irq;
-	u32 irq_base;
-	u32 irq_nr;
-	u32 irq_bit_off;
-	int invalid_irq;
-	void __iomem *base;
-	struct shirq_regs regs;
-};
-
-int __init spear300_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-int __init spear310_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-int __init spear320_shirq_of_init(struct device_node *np,
-		struct device_node *parent);
-
-#endif /* __PLAT_SHIRQ_H */
diff --git a/arch/arm/plat-spear/shirq.c b/arch/arm/plat-spear/shirq.c
deleted file mode 100644
index 955c7249a5c1..000000000000
--- a/arch/arm/plat-spear/shirq.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * arch/arm/plat-spear/shirq.c
- *
- * SPEAr platform shared irq layer source file
- *
- * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/spinlock.h>
-#include <plat/shirq.h>
-
-static DEFINE_SPINLOCK(lock);
-
-/* spear300 shared irq registers offsets and masks */
-#define SPEAR300_INT_ENB_MASK_REG	0x54
-#define SPEAR300_INT_STS_MASK_REG	0x58
-
-static struct spear_shirq spear300_shirq_ras1 = {
-	.irq_nr = 9,
-	.irq_bit_off = 0,
-	.regs = {
-		.enb_reg = SPEAR300_INT_ENB_MASK_REG,
-		.status_reg = SPEAR300_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq *spear300_shirq_blocks[] = {
-	&spear300_shirq_ras1,
-};
-
-/* spear310 shared irq registers offsets and masks */
-#define SPEAR310_INT_STS_MASK_REG	0x04
-
-static struct spear_shirq spear310_shirq_ras1 = {
-	.irq_nr = 8,
-	.irq_bit_off = 0,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR310_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq spear310_shirq_ras2 = {
-	.irq_nr = 5,
-	.irq_bit_off = 8,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR310_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq spear310_shirq_ras3 = {
-	.irq_nr = 1,
-	.irq_bit_off = 13,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR310_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq spear310_shirq_intrcomm_ras = {
-	.irq_nr = 3,
-	.irq_bit_off = 14,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR310_INT_STS_MASK_REG,
-		.clear_reg = -1,
-	},
-};
-
-static struct spear_shirq *spear310_shirq_blocks[] = {
-	&spear310_shirq_ras1,
-	&spear310_shirq_ras2,
-	&spear310_shirq_ras3,
-	&spear310_shirq_intrcomm_ras,
-};
-
-/* spear320 shared irq registers offsets and masks */
-#define SPEAR320_INT_STS_MASK_REG		0x04
-#define SPEAR320_INT_CLR_MASK_REG		0x04
-#define SPEAR320_INT_ENB_MASK_REG		0x08
-
-static struct spear_shirq spear320_shirq_ras1 = {
-	.irq_nr = 3,
-	.irq_bit_off = 7,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR320_INT_STS_MASK_REG,
-		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
-		.reset_to_clear = 1,
-	},
-};
-
-static struct spear_shirq spear320_shirq_ras2 = {
-	.irq_nr = 1,
-	.irq_bit_off = 10,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR320_INT_STS_MASK_REG,
-		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
-		.reset_to_clear = 1,
-	},
-};
-
-static struct spear_shirq spear320_shirq_ras3 = {
-	.irq_nr = 3,
-	.irq_bit_off = 0,
-	.invalid_irq = 1,
-	.regs = {
-		.enb_reg = SPEAR320_INT_ENB_MASK_REG,
-		.reset_to_enb = 1,
-		.status_reg = SPEAR320_INT_STS_MASK_REG,
-		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
-		.reset_to_clear = 1,
-	},
-};
-
-static struct spear_shirq spear320_shirq_intrcomm_ras = {
-	.irq_nr = 11,
-	.irq_bit_off = 11,
-	.regs = {
-		.enb_reg = -1,
-		.status_reg = SPEAR320_INT_STS_MASK_REG,
-		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
-		.reset_to_clear = 1,
-	},
-};
-
-static struct spear_shirq *spear320_shirq_blocks[] = {
-	&spear320_shirq_ras3,
-	&spear320_shirq_ras1,
-	&spear320_shirq_ras2,
-	&spear320_shirq_intrcomm_ras,
-};
-
-static void shirq_irq_mask_unmask(struct irq_data *d, bool mask)
-{
-	struct spear_shirq *shirq = irq_data_get_irq_chip_data(d);
-	u32 val, offset = d->irq - shirq->irq_base;
-	unsigned long flags;
-
-	if (shirq->regs.enb_reg == -1)
-		return;
-
-	spin_lock_irqsave(&lock, flags);
-	val = readl(shirq->base + shirq->regs.enb_reg);
-
-	if (mask ^ shirq->regs.reset_to_enb)
-		val &= ~(0x1 << shirq->irq_bit_off << offset);
-	else
-		val |= 0x1 << shirq->irq_bit_off << offset;
-
-	writel(val, shirq->base + shirq->regs.enb_reg);
-	spin_unlock_irqrestore(&lock, flags);
-
-}
-
-static void shirq_irq_mask(struct irq_data *d)
-{
-	shirq_irq_mask_unmask(d, 1);
-}
-
-static void shirq_irq_unmask(struct irq_data *d)
-{
-	shirq_irq_mask_unmask(d, 0);
-}
-
-static struct irq_chip shirq_chip = {
-	.name		= "spear-shirq",
-	.irq_ack	= shirq_irq_mask,
-	.irq_mask	= shirq_irq_mask,
-	.irq_unmask	= shirq_irq_unmask,
-};
-
-static void shirq_handler(unsigned irq, struct irq_desc *desc)
-{
-	u32 i, j, val, mask, tmp;
-	struct irq_chip *chip;
-	struct spear_shirq *shirq = irq_get_handler_data(irq);
-
-	chip = irq_get_chip(irq);
-	chip->irq_ack(&desc->irq_data);
-
-	mask = ((0x1 << shirq->irq_nr) - 1) << shirq->irq_bit_off;
-	while ((val = readl(shirq->base + shirq->regs.status_reg) &
-				mask)) {
-
-		val >>= shirq->irq_bit_off;
-		for (i = 0, j = 1; i < shirq->irq_nr; i++, j <<= 1) {
-
-			if (!(j & val))
-				continue;
-
-			generic_handle_irq(shirq->irq_base + i);
-
-			/* clear interrupt */
-			if (shirq->regs.clear_reg == -1)
-				continue;
-
-			tmp = readl(shirq->base + shirq->regs.clear_reg);
-			if (shirq->regs.reset_to_clear)
-				tmp &= ~(j << shirq->irq_bit_off);
-			else
-				tmp |= (j << shirq->irq_bit_off);
-			writel(tmp, shirq->base + shirq->regs.clear_reg);
-		}
-	}
-	chip->irq_unmask(&desc->irq_data);
-}
-
-static void __init spear_shirq_register(struct spear_shirq *shirq)
-{
-	int i;
-
-	if (shirq->invalid_irq)
-		return;
-
-	irq_set_chained_handler(shirq->irq, shirq_handler);
-	for (i = 0; i < shirq->irq_nr; i++) {
-		irq_set_chip_and_handler(shirq->irq_base + i,
-					 &shirq_chip, handle_simple_irq);
-		set_irq_flags(shirq->irq_base + i, IRQF_VALID);
-		irq_set_chip_data(shirq->irq_base + i, shirq);
-	}
-
-	irq_set_handler_data(shirq->irq, shirq);
-}
-
-static int __init shirq_init(struct spear_shirq **shirq_blocks, int block_nr,
-		struct device_node *np)
-{
-	int i, irq_base, hwirq = 0, irq_nr = 0;
-	static struct irq_domain *shirq_domain;
-	void __iomem *base;
-
-	base = of_iomap(np, 0);
-	if (!base) {
-		pr_err("%s: failed to map shirq registers\n", __func__);
-		return -ENXIO;
-	}
-
-	for (i = 0; i < block_nr; i++)
-		irq_nr += shirq_blocks[i]->irq_nr;
-
-	irq_base = irq_alloc_descs(-1, 0, irq_nr, 0);
-	if (IS_ERR_VALUE(irq_base)) {
-		pr_err("%s: irq desc alloc failed\n", __func__);
-		goto err_unmap;
-	}
-
-	shirq_domain = irq_domain_add_legacy(np, irq_nr, irq_base, 0,
-			&irq_domain_simple_ops, NULL);
-	if (WARN_ON(!shirq_domain)) {
-		pr_warn("%s: irq domain init failed\n", __func__);
-		goto err_free_desc;
-	}
-
-	for (i = 0; i < block_nr; i++) {
-		shirq_blocks[i]->base = base;
-		shirq_blocks[i]->irq_base = irq_find_mapping(shirq_domain,
-				hwirq);
-		shirq_blocks[i]->irq = irq_of_parse_and_map(np, i);
-
-		spear_shirq_register(shirq_blocks[i]);
-		hwirq += shirq_blocks[i]->irq_nr;
-	}
-
-	return 0;
-
-err_free_desc:
-	irq_free_descs(irq_base, irq_nr);
-err_unmap:
-	iounmap(base);
-	return -ENXIO;
-}
-
-int __init spear300_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
-{
-	return shirq_init(spear300_shirq_blocks,
-			ARRAY_SIZE(spear300_shirq_blocks), np);
-}
-
-int __init spear310_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
-{
-	return shirq_init(spear310_shirq_blocks,
-			ARRAY_SIZE(spear310_shirq_blocks), np);
-}
-
-int __init spear320_shirq_of_init(struct device_node *np,
-		struct device_node *parent)
-{
-	return shirq_init(spear320_shirq_blocks,
-			ARRAY_SIZE(spear320_shirq_blocks), np);
-}
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 054321db4350..bf7b43696cdb 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -1 +1,2 @@
-obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o
+obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2835.o
+obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c
new file mode 100644
index 000000000000..80e1d2fd9d4c
--- /dev/null
+++ b/drivers/irqchip/spear-shirq.c
@@ -0,0 +1,316 @@
+/*
+ * SPEAr platform shared irq layer source file
+ *
+ * Copyright (C) 2009-2012 ST Microelectronics
+ * Viresh Kumar <viresh.linux@gmail.com>
+ *
+ * Copyright (C) 2012 ST Microelectronics
+ * Shiraz Hashim <shiraz.hashim@st.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/irqchip/spear-shirq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/spinlock.h>
+
+static DEFINE_SPINLOCK(lock);
+
+/* spear300 shared irq registers offsets and masks */
+#define SPEAR300_INT_ENB_MASK_REG	0x54
+#define SPEAR300_INT_STS_MASK_REG	0x58
+
+static struct spear_shirq spear300_shirq_ras1 = {
+	.irq_nr = 9,
+	.irq_bit_off = 0,
+	.regs = {
+		.enb_reg = SPEAR300_INT_ENB_MASK_REG,
+		.status_reg = SPEAR300_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq *spear300_shirq_blocks[] = {
+	&spear300_shirq_ras1,
+};
+
+/* spear310 shared irq registers offsets and masks */
+#define SPEAR310_INT_STS_MASK_REG	0x04
+
+static struct spear_shirq spear310_shirq_ras1 = {
+	.irq_nr = 8,
+	.irq_bit_off = 0,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR310_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq spear310_shirq_ras2 = {
+	.irq_nr = 5,
+	.irq_bit_off = 8,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR310_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq spear310_shirq_ras3 = {
+	.irq_nr = 1,
+	.irq_bit_off = 13,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR310_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq spear310_shirq_intrcomm_ras = {
+	.irq_nr = 3,
+	.irq_bit_off = 14,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR310_INT_STS_MASK_REG,
+		.clear_reg = -1,
+	},
+};
+
+static struct spear_shirq *spear310_shirq_blocks[] = {
+	&spear310_shirq_ras1,
+	&spear310_shirq_ras2,
+	&spear310_shirq_ras3,
+	&spear310_shirq_intrcomm_ras,
+};
+
+/* spear320 shared irq registers offsets and masks */
+#define SPEAR320_INT_STS_MASK_REG		0x04
+#define SPEAR320_INT_CLR_MASK_REG		0x04
+#define SPEAR320_INT_ENB_MASK_REG		0x08
+
+static struct spear_shirq spear320_shirq_ras1 = {
+	.irq_nr = 3,
+	.irq_bit_off = 7,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR320_INT_STS_MASK_REG,
+		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
+		.reset_to_clear = 1,
+	},
+};
+
+static struct spear_shirq spear320_shirq_ras2 = {
+	.irq_nr = 1,
+	.irq_bit_off = 10,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR320_INT_STS_MASK_REG,
+		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
+		.reset_to_clear = 1,
+	},
+};
+
+static struct spear_shirq spear320_shirq_ras3 = {
+	.irq_nr = 3,
+	.irq_bit_off = 0,
+	.invalid_irq = 1,
+	.regs = {
+		.enb_reg = SPEAR320_INT_ENB_MASK_REG,
+		.reset_to_enb = 1,
+		.status_reg = SPEAR320_INT_STS_MASK_REG,
+		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
+		.reset_to_clear = 1,
+	},
+};
+
+static struct spear_shirq spear320_shirq_intrcomm_ras = {
+	.irq_nr = 11,
+	.irq_bit_off = 11,
+	.regs = {
+		.enb_reg = -1,
+		.status_reg = SPEAR320_INT_STS_MASK_REG,
+		.clear_reg = SPEAR320_INT_CLR_MASK_REG,
+		.reset_to_clear = 1,
+	},
+};
+
+static struct spear_shirq *spear320_shirq_blocks[] = {
+	&spear320_shirq_ras3,
+	&spear320_shirq_ras1,
+	&spear320_shirq_ras2,
+	&spear320_shirq_intrcomm_ras,
+};
+
+static void shirq_irq_mask_unmask(struct irq_data *d, bool mask)
+{
+	struct spear_shirq *shirq = irq_data_get_irq_chip_data(d);
+	u32 val, offset = d->irq - shirq->irq_base;
+	unsigned long flags;
+
+	if (shirq->regs.enb_reg == -1)
+		return;
+
+	spin_lock_irqsave(&lock, flags);
+	val = readl(shirq->base + shirq->regs.enb_reg);
+
+	if (mask ^ shirq->regs.reset_to_enb)
+		val &= ~(0x1 << shirq->irq_bit_off << offset);
+	else
+		val |= 0x1 << shirq->irq_bit_off << offset;
+
+	writel(val, shirq->base + shirq->regs.enb_reg);
+	spin_unlock_irqrestore(&lock, flags);
+
+}
+
+static void shirq_irq_mask(struct irq_data *d)
+{
+	shirq_irq_mask_unmask(d, 1);
+}
+
+static void shirq_irq_unmask(struct irq_data *d)
+{
+	shirq_irq_mask_unmask(d, 0);
+}
+
+static struct irq_chip shirq_chip = {
+	.name		= "spear-shirq",
+	.irq_ack	= shirq_irq_mask,
+	.irq_mask	= shirq_irq_mask,
+	.irq_unmask	= shirq_irq_unmask,
+};
+
+static void shirq_handler(unsigned irq, struct irq_desc *desc)
+{
+	u32 i, j, val, mask, tmp;
+	struct irq_chip *chip;
+	struct spear_shirq *shirq = irq_get_handler_data(irq);
+
+	chip = irq_get_chip(irq);
+	chip->irq_ack(&desc->irq_data);
+
+	mask = ((0x1 << shirq->irq_nr) - 1) << shirq->irq_bit_off;
+	while ((val = readl(shirq->base + shirq->regs.status_reg) &
+				mask)) {
+
+		val >>= shirq->irq_bit_off;
+		for (i = 0, j = 1; i < shirq->irq_nr; i++, j <<= 1) {
+
+			if (!(j & val))
+				continue;
+
+			generic_handle_irq(shirq->irq_base + i);
+
+			/* clear interrupt */
+			if (shirq->regs.clear_reg == -1)
+				continue;
+
+			tmp = readl(shirq->base + shirq->regs.clear_reg);
+			if (shirq->regs.reset_to_clear)
+				tmp &= ~(j << shirq->irq_bit_off);
+			else
+				tmp |= (j << shirq->irq_bit_off);
+			writel(tmp, shirq->base + shirq->regs.clear_reg);
+		}
+	}
+	chip->irq_unmask(&desc->irq_data);
+}
+
+static void __init spear_shirq_register(struct spear_shirq *shirq)
+{
+	int i;
+
+	if (shirq->invalid_irq)
+		return;
+
+	irq_set_chained_handler(shirq->irq, shirq_handler);
+	for (i = 0; i < shirq->irq_nr; i++) {
+		irq_set_chip_and_handler(shirq->irq_base + i,
+					 &shirq_chip, handle_simple_irq);
+		set_irq_flags(shirq->irq_base + i, IRQF_VALID);
+		irq_set_chip_data(shirq->irq_base + i, shirq);
+	}
+
+	irq_set_handler_data(shirq->irq, shirq);
+}
+
+static int __init shirq_init(struct spear_shirq **shirq_blocks, int block_nr,
+		struct device_node *np)
+{
+	int i, irq_base, hwirq = 0, irq_nr = 0;
+	static struct irq_domain *shirq_domain;
+	void __iomem *base;
+
+	base = of_iomap(np, 0);
+	if (!base) {
+		pr_err("%s: failed to map shirq registers\n", __func__);
+		return -ENXIO;
+	}
+
+	for (i = 0; i < block_nr; i++)
+		irq_nr += shirq_blocks[i]->irq_nr;
+
+	irq_base = irq_alloc_descs(-1, 0, irq_nr, 0);
+	if (IS_ERR_VALUE(irq_base)) {
+		pr_err("%s: irq desc alloc failed\n", __func__);
+		goto err_unmap;
+	}
+
+	shirq_domain = irq_domain_add_legacy(np, irq_nr, irq_base, 0,
+			&irq_domain_simple_ops, NULL);
+	if (WARN_ON(!shirq_domain)) {
+		pr_warn("%s: irq domain init failed\n", __func__);
+		goto err_free_desc;
+	}
+
+	for (i = 0; i < block_nr; i++) {
+		shirq_blocks[i]->base = base;
+		shirq_blocks[i]->irq_base = irq_find_mapping(shirq_domain,
+				hwirq);
+		shirq_blocks[i]->irq = irq_of_parse_and_map(np, i);
+
+		spear_shirq_register(shirq_blocks[i]);
+		hwirq += shirq_blocks[i]->irq_nr;
+	}
+
+	return 0;
+
+err_free_desc:
+	irq_free_descs(irq_base, irq_nr);
+err_unmap:
+	iounmap(base);
+	return -ENXIO;
+}
+
+int __init spear300_shirq_of_init(struct device_node *np,
+		struct device_node *parent)
+{
+	return shirq_init(spear300_shirq_blocks,
+			ARRAY_SIZE(spear300_shirq_blocks), np);
+}
+
+int __init spear310_shirq_of_init(struct device_node *np,
+		struct device_node *parent)
+{
+	return shirq_init(spear310_shirq_blocks,
+			ARRAY_SIZE(spear310_shirq_blocks), np);
+}
+
+int __init spear320_shirq_of_init(struct device_node *np,
+		struct device_node *parent)
+{
+	return shirq_init(spear320_shirq_blocks,
+			ARRAY_SIZE(spear320_shirq_blocks), np);
+}
diff --git a/include/linux/irqchip/spear-shirq.h b/include/linux/irqchip/spear-shirq.h
new file mode 100644
index 000000000000..c8be16d213a3
--- /dev/null
+++ b/include/linux/irqchip/spear-shirq.h
@@ -0,0 +1,64 @@
+/*
+ * SPEAr platform shared irq layer header file
+ *
+ * Copyright (C) 2009-2012 ST Microelectronics
+ * Viresh Kumar <viresh.linux@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __SPEAR_SHIRQ_H
+#define __SPEAR_SHIRQ_H
+
+#include <linux/irq.h>
+#include <linux/types.h>
+
+/*
+ * struct shirq_regs: shared irq register configuration
+ *
+ * enb_reg: enable register offset
+ * reset_to_enb: val 1 indicates, we need to clear bit for enabling interrupt
+ * status_reg: status register offset
+ * status_reg_mask: status register valid mask
+ * clear_reg: clear register offset
+ * reset_to_clear: val 1 indicates, we need to clear bit for clearing interrupt
+ */
+struct shirq_regs {
+	u32 enb_reg;
+	u32 reset_to_enb;
+	u32 status_reg;
+	u32 clear_reg;
+	u32 reset_to_clear;
+};
+
+/*
+ * struct spear_shirq: shared irq structure
+ *
+ * irq: hardware irq number
+ * irq_base: base irq in linux domain
+ * irq_nr: no. of shared interrupts in a particular block
+ * irq_bit_off: starting bit offset in the status register
+ * invalid_irq: irq group is currently disabled
+ * base: base address of shared irq register
+ * regs: register configuration for shared irq block
+ */
+struct spear_shirq {
+	u32 irq;
+	u32 irq_base;
+	u32 irq_nr;
+	u32 irq_bit_off;
+	int invalid_irq;
+	void __iomem *base;
+	struct shirq_regs regs;
+};
+
+int __init spear300_shirq_of_init(struct device_node *np,
+		struct device_node *parent);
+int __init spear310_shirq_of_init(struct device_node *np,
+		struct device_node *parent);
+int __init spear320_shirq_of_init(struct device_node *np,
+		struct device_node *parent);
+
+#endif /* __SPEAR_SHIRQ_H */
-- 
cgit v1.2.3


From 08ff32352d6ff7083533dc1c25618d42f92ec28e Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 21 Oct 2012 14:59:24 +0000
Subject: mlx4: 64-byte CQE/EQE support

ConnectX-3 devices can use either 64- or 32-byte completion queue
entries (CQEs) and event queue entries (EQEs).  Using 64-byte
EQEs/CQEs performs better because each entry is aligned to a complete
cacheline.  This patch queries the HCA's capabilities, and if it
supports 64-byte CQEs and EQES the driver will configure the HW to
work in 64-byte mode.

The 32-byte vs 64-byte mode is global per HCA and not per CQ or EQ.

Since this mode is global, userspace (libmlx4) must be updated to work
with the configured CQE size, and guests using SR-IOV virtual
functions need to know both EQE and CQE size.

In case one of the 64-byte CQE/EQE capabilities is activated, the
patch makes sure that older guest drivers that use the QUERY_DEV_FUNC
command (e.g as done in mlx4_core of Linux 3.3..3.6) will notice that
they need an update to be able to work with the PPF. This is done by
changing the returned pf_context_behaviour not to be zero any more. In
case none of these capabilities is activated that value remains zero
and older guest drivers can run OK.

The SRIOV related flow is as follows

1. the PPF does the detection of the new capabilities using
   QUERY_DEV_CAP command.

2. the PPF activates the new capabilities using INIT_HCA.

3. the VF detects if the PPF activated the capabilities using
   QUERY_HCA, and if this is the case activates them for itself too.

Note that the VF detects that it must be aware to the new PF behaviour
using QUERY_FUNC_CAP.  Steps 1 and 2 apply also for native mode.

User space notification is done through a new field introduced in
struct mlx4_ib_ucontext which holds device capabilities for which user
space must take action. This changes the binary interface so the ABI
towards libmlx4 exposed through uverbs is bumped from 3 to 4 but only
when **needed** i.e. only when the driver does use 64-byte CQEs or
future device capabilities which must be in sync by user space. This
practice allows to work with unmodified libmlx4 on older devices (e.g
A0, B0) which don't support 64-byte CQEs.

In order to keep existing systems functional when they update to a
newer kernel that contains these changes in VF and userspace ABI, a
module parameter enable_64b_cqe_eqe must be set to enable 64-byte
mode; the default is currently false.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/cq.c                | 34 +++++++++++++++++------
 drivers/infiniband/hw/mlx4/main.c              | 27 ++++++++++++++----
 drivers/infiniband/hw/mlx4/mlx4_ib.h           |  1 +
 drivers/infiniband/hw/mlx4/user.h              | 12 +++++++-
 drivers/net/ethernet/mellanox/mlx4/cmd.c       |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_cq.c     |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |  1 +
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     |  5 ++--
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     |  5 ++--
 drivers/net/ethernet/mellanox/mlx4/eq.c        | 26 ++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/fw.c        | 30 +++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h        |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c      | 38 +++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 +
 include/linux/mlx4/device.h                    | 21 ++++++++++++++
 15 files changed, 175 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index c9eb6a6815ce..ae67df35dd4d 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
 
 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
 {
-	return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
+	return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
 }
 
 static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
 static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
 {
 	struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+	struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
 
-	return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+	return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
 		!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
 }
 
@@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 {
 	int err;
 
-	err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
+	err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
 			     PAGE_SIZE * 2, &buf->buf);
 
 	if (err)
 		goto out;
 
+	buf->entry_size = dev->dev->caps.cqe_size;
 	err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
 				    &buf->mtt);
 	if (err)
@@ -120,8 +122,7 @@ err_mtt:
 	mlx4_mtt_cleanup(dev->dev, &buf->mtt);
 
 err_buf:
-	mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
-			      &buf->buf);
+	mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
 
 out:
 	return err;
@@ -129,7 +130,7 @@ out:
 
 static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
 {
-	mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+	mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
 }
 
 static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
 			       u64 buf_addr, int cqe)
 {
 	int err;
+	int cqe_size = dev->dev->caps.cqe_size;
 
-	*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+	*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
 			    IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(*umem))
 		return PTR_ERR(*umem);
@@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
 {
 	struct mlx4_cqe *cqe, *new_cqe;
 	int i;
+	int cqe_size = cq->buf.entry_size;
+	int cqe_inc = cqe_size == 64 ? 1 : 0;
 
 	i = cq->mcq.cons_index;
 	cqe = get_cqe(cq, i & cq->ibcq.cqe);
+	cqe += cqe_inc;
+
 	while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
 		new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
 					   (i + 1) & cq->resize_buf->cqe);
-		memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+		memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
+		new_cqe += cqe_inc;
+
 		new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
 			(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
 		cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+		cqe += cqe_inc;
 	}
 	++cq->mcq.cons_index;
 }
@@ -438,6 +447,7 @@ err_buf:
 
 out:
 	mutex_unlock(&cq->resize_mutex);
+
 	return err;
 }
 
@@ -586,6 +596,9 @@ repoll:
 	if (!cqe)
 		return -EAGAIN;
 
+	if (cq->buf.entry_size == 64)
+		cqe++;
+
 	++cq->mcq.cons_index;
 
 	/*
@@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
 	int nfreed = 0;
 	struct mlx4_cqe *cqe, *dest;
 	u8 owner_bit;
+	int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
 
 	/*
 	 * First we need to find the current producer index, so we
@@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
 	 */
 	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+		cqe += cqe_inc;
+
 		if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
 			++nfreed;
 		} else if (nfreed) {
 			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+			dest += cqe_inc;
+
 			owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
 			memcpy(dest, cqe, sizeof *cqe);
 			dest->owner_sr_opcode = owner_bit |
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 718ec6b2bad2..e7d81c0d1ac5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
 {
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 	struct mlx4_ib_ucontext *context;
+	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
 	struct mlx4_ib_alloc_ucontext_resp resp;
 	int err;
 
 	if (!dev->ib_active)
 		return ERR_PTR(-EAGAIN);
 
-	resp.qp_tab_size      = dev->dev->caps.num_qps;
-	resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
-	resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
+		resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
+		resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+	} else {
+		resp.dev_caps	      = dev->dev->caps.userspace_caps;
+		resp.qp_tab_size      = dev->dev->caps.num_qps;
+		resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
+		resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+		resp.cqe_size	      = dev->dev->caps.cqe_size;
+	}
 
 	context = kmalloc(sizeof *context, GFP_KERNEL);
 	if (!context)
@@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
 
-	err = ib_copy_to_udata(udata, &resp, sizeof resp);
+	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+		err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
+	else
+		err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+
 	if (err) {
 		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
 		kfree(context);
@@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
 
-	ibdev->ib_dev.uverbs_abi_ver	= MLX4_IB_UVERBS_ABI_VERSION;
+	if (dev->caps.userspace_caps)
+		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+	else
+		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
 	ibdev->ib_dev.uverbs_cmd_mask	=
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e04cbc9a54a5..dcd845bc30f0 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -90,6 +90,7 @@ struct mlx4_ib_xrcd {
 struct mlx4_ib_cq_buf {
 	struct mlx4_buf		buf;
 	struct mlx4_mtt		mtt;
+	int			entry_size;
 };
 
 struct mlx4_ib_cq_resize {
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 13beedeeef9f..07e6769ef43b 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -40,7 +40,9 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define MLX4_IB_UVERBS_ABI_VERSION	3
+
+#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION	3
+#define MLX4_IB_UVERBS_ABI_VERSION		4
 
 /*
  * Make sure that all structs defined in this file remain laid out so
@@ -50,10 +52,18 @@
  * instead.
  */
 
+struct mlx4_ib_alloc_ucontext_resp_v3 {
+	__u32	qp_tab_size;
+	__u16	bf_reg_size;
+	__u16	bf_regs_per_page;
+};
+
 struct mlx4_ib_alloc_ucontext_resp {
+	__u32	dev_caps;
 	__u32	qp_tab_size;
 	__u16	bf_reg_size;
 	__u16	bf_regs_per_page;
+	__u32	cqe_size;
 };
 
 struct mlx4_ib_alloc_pd_resp {
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 3d1899ff1076..e791e705f7b1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1755,7 +1755,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 			spin_lock_init(&s_state->lock);
 		}
 
-		memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
+		memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
 		priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
 		INIT_WORK(&priv->mfunc.master.comm_work,
 			  mlx4_master_comm_channel);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index aa9c2f6cf3c0..b8d0854a7ad1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	int err;
 
 	cq->size = entries;
-	cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
+	cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
 
 	cq->ring = ring;
 	cq->is_tx = mode;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index edd9cb8d3e1d..93a325669582 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1600,6 +1600,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		goto out;
 	}
 	priv->rx_ring_num = prof->rx_ring_num;
+	priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
 	priv->mac_index = -1;
 	priv->msg_enable = MLX4_EN_MSG_LEVEL;
 	spin_lock_init(&priv->stats_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5aba5ecdf1e2..6fa106f6c0ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	struct ethhdr *ethh;
 	dma_addr_t dma;
 	u64 s_mac;
+	int factor = priv->cqe_factor;
 
 	if (!priv->port_up)
 		return 0;
@@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	 * descriptor offset can be deduced from the CQE index instead of
 	 * reading 'cqe->index' */
 	index = cq->mcq.cons_index & ring->size_mask;
-	cqe = &cq->buf[index];
+	cqe = &cq->buf[(index << factor) + factor];
 
 	/* Process all completed CQEs */
 	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -709,7 +710,7 @@ next:
 
 		++cq->mcq.cons_index;
 		index = (cq->mcq.cons_index) & ring->size_mask;
-		cqe = &cq->buf[index];
+		cqe = &cq->buf[(index << factor) + factor];
 		if (++polled == budget) {
 			/* We are here because we reached the NAPI budget -
 			 * flush only pending LRO sessions */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index b35094c590ba..25c157abdd92 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 	struct mlx4_cqe *buf = cq->buf;
 	u32 packets = 0;
 	u32 bytes = 0;
+	int factor = priv->cqe_factor;
 
 	if (!priv->port_up)
 		return;
 
 	index = cons_index & size_mask;
-	cqe = &buf[index];
+	cqe = &buf[(index << factor) + factor];
 	ring_index = ring->cons & size_mask;
 
 	/* Process all completed CQEs */
@@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 
 		++cons_index;
 		index = cons_index & size_mask;
-		cqe = &buf[index];
+		cqe = &buf[(index << factor) + factor];
 	}
 
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index b84a88bc44dc..c509a86db610 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
 	mb();
 }
 
-static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
 {
-	unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
-	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+	/* (entry & (eq->nent - 1)) gives us a cyclic array */
+	unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
+	/* CX3 is capable of extending the EQE from 32 to 64 bytes.
+	 * When this feature is enabled, the first (in the lower addresses)
+	 * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
+	 * contain the legacy EQE information.
+	 */
+	return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
 }
 
-static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
 {
-	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
+	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
 	return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
 }
 
@@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
 		return;
 	}
 
-	memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1);
+	memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
 	s_eqe->slave_id = slave;
 	/* ensure all information is written before setting the ownersip bit */
 	wmb();
@@ -441,7 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 	int i;
 	enum slave_port_gen_event gen_event;
 
-	while ((eqe = next_eqe_sw(eq))) {
+	while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
 		/*
 		 * Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
@@ -864,7 +870,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
 
 	eq->dev   = dev;
 	eq->nent  = roundup_pow_of_two(max(nent, 2));
-	npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
+	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
+	npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
 
 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
 				GFP_KERNEL);
@@ -966,8 +973,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cmd_mailbox *mailbox;
 	int err;
-	int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
 	int i;
+	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
+	int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 4f30b99324cf..9a9de51ecc91 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
 		[42] = "Multicast VEP steering support",
 		[48] = "Counters support",
 		[59] = "Port management change event support",
+		[61] = "64 byte EQE support",
+		[62] = "64 byte CQE support",
 	};
 	int i;
 
@@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		field = dev->caps.num_ports;
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
 
-		size = 0; /* no PF behaviour is set for now */
+		size = dev->caps.function_caps; /* set PF behaviours */
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
 
 		field = 0; /* protected FMR support not available as yet */
@@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
 		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
 
+	/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
+		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
+		dev->caps.eqe_size   = 64;
+		dev->caps.eqe_factor = 1;
+	} else {
+		dev->caps.eqe_size   = 32;
+		dev->caps.eqe_factor = 0;
+	}
+
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
+		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
+		dev->caps.cqe_size   = 64;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+	} else {
+		dev->caps.cqe_size   = 32;
+	}
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
@@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	struct mlx4_cmd_mailbox *mailbox;
 	__be32 *outbox;
 	int err;
+	u8 byte_field;
 
 #define QUERY_HCA_GLOBAL_CAPS_OFFSET	0x04
 
@@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 			 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
 	}
 
+	/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
+	if (byte_field & 0x20) /* 64-bytes eqe enabled */
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+	if (byte_field & 0x40) /* 64-bytes cqe enabled */
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+
 	/* TPT attributes */
 
 	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 85abe9c11a22..2c2e7ade2a34 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -172,6 +172,7 @@ struct mlx4_init_hca_param {
 	u8  log_uar_sz;
 	u8  uar_page_sz; /* log pg sz in 4k chunks */
 	u8  fs_hash_enable_bits;
+	u64 dev_cap_enabled;
 };
 
 struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2aa80afd98d2..4337f685175d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
 					 " Not in use with device managed"
 					 " flow steering");
 
+static bool enable_64b_cqe_eqe;
+module_param(enable_64b_cqe_eqe, bool, 0444);
+MODULE_PARM_DESC(enable_64b_cqe_eqe,
+		 "Enable 64 byte CQEs/EQEs when the the FW supports this");
+
 #define HCA_GLOBAL_CAP_MASK            0
-#define PF_CONTEXT_BEHAVIOUR_MASK      0
+
+#define PF_CONTEXT_BEHAVIOUR_MASK	MLX4_FUNC_CAP_64B_EQE_CQE
 
 static char mlx4_version[] __devinitdata =
 	DRV_NAME ": Mellanox ConnectX core driver v"
@@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
 
 	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
+
+	if (!enable_64b_cqe_eqe) {
+		if (dev_cap->flags &
+		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
+			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
+			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+		}
+	}
+
+	if ((dev_cap->flags &
+	    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
+	    mlx4_is_master(dev))
+		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
+
 	return 0;
 }
 /*The function checks if there are live vf, return the num of them*/
@@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		goto err_mem;
 	}
 
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
+		dev->caps.eqe_size   = 64;
+		dev->caps.eqe_factor = 1;
+	} else {
+		dev->caps.eqe_size   = 32;
+		dev->caps.eqe_factor = 0;
+	}
+
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
+		dev->caps.cqe_size   = 64;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+	} else {
+		dev->caps.cqe_size   = 32;
+	}
+
 	return 0;
 
 err_mem:
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 9d27e42264e2..73b5c2ac5bd5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -487,6 +487,7 @@ struct mlx4_en_priv {
 	int mac_index;
 	unsigned max_mtu;
 	int base_qpn;
+	int cqe_factor;
 
 	struct mlx4_en_rss_map rss_map;
 	__be32 ctrl_flags;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6d1acb04cd17..21821da2abfd 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -142,6 +142,8 @@ enum {
 	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48,
 	MLX4_DEV_CAP_FLAG_SENSE_SUPPORT	= 1LL << 55,
 	MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
+	MLX4_DEV_CAP_FLAG_64B_EQE	= 1LL << 61,
+	MLX4_DEV_CAP_FLAG_64B_CQE	= 1LL << 62
 };
 
 enum {
@@ -151,6 +153,20 @@ enum {
 	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3
 };
 
+enum {
+	MLX4_DEV_CAP_64B_EQE_ENABLED	= 1LL << 0,
+	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1
+};
+
+enum {
+	MLX4_USER_DEV_CAP_64B_CQE	= 1L << 0
+};
+
+enum {
+	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0
+};
+
+
 #define MLX4_ATTR_EXTENDED_PORT_INFO	cpu_to_be16(0xff90)
 
 enum {
@@ -419,6 +435,11 @@ struct mlx4_caps {
 	u32			max_counters;
 	u8			port_ib_mtu[MLX4_MAX_PORTS + 1];
 	u16			sqp_demux;
+	u32			eqe_size;
+	u32			cqe_size;
+	u8			eqe_factor;
+	u32			userspace_caps; /* userspace must be aware of these */
+	u32			function_caps;  /* VFs must be aware of these */
 };
 
 struct mlx4_buf_list {
-- 
cgit v1.2.3


From e95c17e9caff4b106cc95b761f3e07964a5a0d9f Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Tue, 16 Oct 2012 17:33:44 -0500
Subject: drivers/video: fsl-diu-fb: add support for set_gamma ioctls

The MPC5121 BSP comes with a gamma_set utility that initializes the gamma
table via an ioctl.  Unfortunately, the ioctl number that utility uses
is defined improperly, but we can still support it.

Signed-off-by: Timur Tabi <timur@freescale.com>
---
 drivers/video/fsl-diu-fb.c | 17 +++++++++++++++++
 include/linux/fsl-diu-fb.h |  9 +++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
index 5b12e1783fac..9c4054760627 100644
--- a/drivers/video/fsl-diu-fb.c
+++ b/drivers/video/fsl-diu-fb.c
@@ -1181,6 +1181,23 @@ static int fsl_diu_ioctl(struct fb_info *info, unsigned int cmd,
 			ad->ckmin_b = ck.blue_min;
 		}
 		break;
+#ifdef CONFIG_PPC_MPC512x
+	case MFB_SET_GAMMA: {
+		struct fsl_diu_data *data = mfbi->parent;
+
+		if (copy_from_user(data->gamma, buf, sizeof(data->gamma)))
+			return -EFAULT;
+		setbits32(&data->diu_reg->gamma, 0); /* Force table reload */
+		break;
+	}
+	case MFB_GET_GAMMA: {
+		struct fsl_diu_data *data = mfbi->parent;
+
+		if (copy_to_user(buf, data->gamma, sizeof(data->gamma)))
+			return -EFAULT;
+		break;
+	}
+#endif
 	default:
 		dev_err(info->dev, "unknown ioctl command (0x%08X)\n", cmd);
 		return -ENOIOCTLCMD;
diff --git a/include/linux/fsl-diu-fb.h b/include/linux/fsl-diu-fb.h
index 11c16a1fb9e3..a1e8277120c7 100644
--- a/include/linux/fsl-diu-fb.h
+++ b/include/linux/fsl-diu-fb.h
@@ -46,6 +46,15 @@ struct aoi_display_offset {
 #define MFB_SET_PIXFMT		_IOW('M', 8, __u32)
 #define MFB_GET_PIXFMT		_IOR('M', 8, __u32)
 
+/*
+ * The MPC5121 BSP comes with a gamma_set utility that initializes the
+ * gamma table.  Unfortunately, it uses bad values for the IOCTL commands,
+ * but there's nothing we can do about it now.  These ioctls are only
+ * supported on the MPC5121.
+ */
+#define MFB_SET_GAMMA		_IOW('M', 1, __u8)
+#define MFB_GET_GAMMA		_IOR('M', 1, __u8)
+
 /*
  * The original definitions of MFB_SET_PIXFMT and MFB_GET_PIXFMT used the
  * wrong value for 'size' field of the ioctl.  The current macros above use the
-- 
cgit v1.2.3


From a8df7b1ab70bfd6f261fa5e96985fca638299acc Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@gmail.com>
Date: Sun, 4 Nov 2012 01:54:34 -0800
Subject: leds: add led_trigger_rename function

Implements a "led_trigger_rename" function to rename a trigger with
proper locking.

This assumes that led name was originally allocated in non-constant
storage.

Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Cc: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-triggers.c | 13 +++++++++++++
 include/linux/leds.h        | 18 ++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 262eb4193710..a4fa4bf02d53 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -166,6 +166,19 @@ void led_trigger_set_default(struct led_classdev *led_cdev)
 }
 EXPORT_SYMBOL_GPL(led_trigger_set_default);
 
+void led_trigger_rename_static(const char *name, struct led_trigger *trig)
+{
+	/* new name must be on a temporary string to prevent races */
+	BUG_ON(name == trig->name);
+
+	down_write(&triggers_list_lock);
+	/* this assumes that trig->name was originaly allocated to
+	 * non constant storage */
+	strcpy((char *)trig->name, name);
+	up_write(&triggers_list_lock);
+}
+EXPORT_SYMBOL_GPL(led_trigger_rename_static);
+
 /* LED Trigger Interface */
 
 int led_trigger_register(struct led_trigger *trig)
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6e53bb31c220..8107592cfc41 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -139,6 +139,24 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev,
 extern void led_set_brightness(struct led_classdev *led_cdev,
 			       enum led_brightness brightness);
 
+/**
+ * led_trigger_rename_static - rename a trigger
+ * @name: the new trigger name
+ * @trig: the LED trigger to rename
+ *
+ * Change a LED trigger name by copying the string passed in
+ * name into current trigger name, which MUST be large
+ * enough for the new string.
+ *
+ * Note that name must NOT point to the same string used
+ * during LED registration, as that could lead to races.
+ *
+ * This is meant to be used on triggers with statically
+ * allocated name.
+ */
+extern void led_trigger_rename_static(const char *name,
+				      struct led_trigger *trig);
+
 /*
  * LED Triggers
  */
-- 
cgit v1.2.3


From 057407c732b7761cf417a1e6633a02d9d473340d Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Sun, 18 Nov 2012 21:35:55 -0800
Subject: led: Add dependency on CONFIG_LEDS_TRIGGERS to
 led_trigger_rename_static()

This patch fixes build warnings when CONFIG_LEDS_TRIGGERS is
disabled as below:
include/linux/leds.h:158:18: warning: 'struct led_trigger' declared inside parameter list [enabled by default]
include/linux/leds.h:158:18: warning: its scope is only this definition or declaration, which is probably not what you want [enabled
by default]

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Acked-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 include/linux/leds.h | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 8107592cfc41..0d9b5eed714e 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -139,24 +139,6 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev,
 extern void led_set_brightness(struct led_classdev *led_cdev,
 			       enum led_brightness brightness);
 
-/**
- * led_trigger_rename_static - rename a trigger
- * @name: the new trigger name
- * @trig: the LED trigger to rename
- *
- * Change a LED trigger name by copying the string passed in
- * name into current trigger name, which MUST be large
- * enough for the new string.
- *
- * Note that name must NOT point to the same string used
- * during LED registration, as that could lead to races.
- *
- * This is meant to be used on triggers with statically
- * allocated name.
- */
-extern void led_trigger_rename_static(const char *name,
-				      struct led_trigger *trig);
-
 /*
  * LED Triggers
  */
@@ -197,6 +179,23 @@ extern void led_trigger_blink_oneshot(struct led_trigger *trigger,
 				      unsigned long *delay_on,
 				      unsigned long *delay_off,
 				      int invert);
+/**
+ * led_trigger_rename_static - rename a trigger
+ * @name: the new trigger name
+ * @trig: the LED trigger to rename
+ *
+ * Change a LED trigger name by copying the string passed in
+ * name into current trigger name, which MUST be large
+ * enough for the new string.
+ *
+ * Note that name must NOT point to the same string used
+ * during LED registration, as that could lead to races.
+ *
+ * This is meant to be used on triggers with statically
+ * allocated name.
+ */
+extern void led_trigger_rename_static(const char *name,
+				      struct led_trigger *trig);
 
 #else
 
-- 
cgit v1.2.3


From e3725ec015dfbbeb896295cf2b3a995f28b0630e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Nov 2012 12:25:01 -0500
Subject: NFSv4.1: Shrink struct nfs4_sequence_res by moving the session
 pointer

Move the session pointer into the slot table, then have struct nfs4_slot
point to that slot table.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h          |  3 ++-
 fs/nfs/nfs4proc.c         | 33 +++++++++++++++++++++++----------
 fs/nfs/nfs4state.c        |  2 +-
 fs/nfs/nfs4xdr.c          |  8 +++++---
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  2 +-
 6 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 36880b9aa91e..42c58691fb41 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,7 +258,8 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
 extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
 				  bool sync);
 
-extern struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags);
+extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
+		u32 max_slots, gfp_t gfp_flags);
 
 static inline bool
 is_ds_only_client(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 14b39742b6e4..5b61c4a83191 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -467,25 +467,28 @@ void nfs4_check_drain_bc_complete(struct nfs4_session *ses)
 
 static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 {
+	struct nfs4_session *session;
 	struct nfs4_slot_table *tbl;
 
-	tbl = &res->sr_session->fc_slot_table;
 	if (!res->sr_slot) {
 		/* just wake up the next guy waiting since
 		 * we may have not consumed a slot after all */
 		dprintk("%s: No slot\n", __func__);
 		return;
 	}
+	tbl = res->sr_slot->table;
+	session = tbl->session;
 
 	spin_lock(&tbl->slot_tbl_lock);
 	nfs4_free_slot(tbl, res->sr_slot - tbl->slots);
-	nfs4_check_drain_fc_complete(res->sr_session);
+	nfs4_check_drain_fc_complete(session);
 	spin_unlock(&tbl->slot_tbl_lock);
 	res->sr_slot = NULL;
 }
 
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
+	struct nfs4_session *session;
 	struct nfs4_slot *slot;
 	unsigned long timestamp;
 	struct nfs_client *clp;
@@ -504,6 +507,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		goto out;
 
 	slot = res->sr_slot;
+	session = slot->table->session;
 
 	/* Check the SEQUENCE operation status */
 	switch (res->sr_status) {
@@ -511,7 +515,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		/* Update the slot's sequence and clientid lease timer */
 		++slot->seq_nr;
 		timestamp = slot->renewal_time;
-		clp = res->sr_session->clp;
+		clp = session->clp;
 		do_renew_lease(clp, timestamp);
 		/* Check sequence flags */
 		if (res->sr_status_flags != 0)
@@ -524,7 +528,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 */
 		dprintk("%s: slot=%td seq=%d: Operation in progress\n",
 			__func__,
-			slot - res->sr_session->fc_slot_table.slots,
+			slot - session->fc_slot_table.slots,
 			slot->seq_nr);
 		goto out_retry;
 	default:
@@ -546,7 +550,7 @@ out_retry:
 static int nfs4_sequence_done(struct rpc_task *task,
 			       struct nfs4_sequence_res *res)
 {
-	if (res->sr_session == NULL)
+	if (res->sr_slot == NULL)
 		return 1;
 	return nfs41_sequence_done(task, res);
 }
@@ -591,7 +595,6 @@ static void nfs41_init_sequence(struct nfs4_sequence_args *args,
 	args->sa_cache_this = 0;
 	if (cache_reply)
 		args->sa_cache_this = 1;
-	res->sr_session = NULL;
 	res->sr_slot = NULL;
 }
 
@@ -646,7 +649,6 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 
 	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
 
-	res->sr_session = session;
 	res->sr_slot = slot;
 	res->sr_status_flags = 0;
 	/*
@@ -5659,9 +5661,18 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	return status;
 }
 
-struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
+struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
+		u32 max_slots, gfp_t gfp_flags)
 {
-	return kmalloc_array(max_slots, sizeof(struct nfs4_slot), gfp_flags);
+	struct nfs4_slot *tbl;
+	u32 i;
+
+	tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags);
+	if (tbl != NULL) {
+		for (i = 0; i < max_slots; i++)
+			tbl[i].table = table;
+	}
+	return tbl;
 }
 
 static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
@@ -5699,7 +5710,7 @@ static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
 
 	/* Does the newly negotiated max_reqs match the existing slot table? */
 	if (max_reqs != tbl->max_slots) {
-		new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
+		new = nfs4_alloc_slots(tbl, max_reqs, GFP_NOFS);
 		if (!new)
 			goto out;
 	}
@@ -5738,11 +5749,13 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
 	dprintk("--> %s\n", __func__);
 	/* Fore channel */
 	tbl = &ses->fc_slot_table;
+	tbl->session = ses;
 	status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
 	if (status) /* -ENOMEM */
 		return status;
 	/* Back channel */
 	tbl = &ses->bc_slot_table;
+	tbl->session = ses;
 	status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
 	if (status && tbl->slots == NULL)
 		/* Fore and back channel share a connection so get
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 96fcbb97fd6a..9495789c425b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2033,7 +2033,7 @@ static int nfs4_recall_slot(struct nfs_client *clp)
 		return 0;
 	nfs4_begin_drain_session(clp);
 	fc_tbl = &clp->cl_session->fc_slot_table;
-	new = nfs4_alloc_slots(fc_tbl->target_max_slots, GFP_NOFS);
+	new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_max_slots, GFP_NOFS);
         if (!new)
 		return -ENOMEM;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 672d9b0ef2c5..4126f054610a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5507,12 +5507,13 @@ static int decode_sequence(struct xdr_stream *xdr,
 			   struct rpc_rqst *rqstp)
 {
 #if defined(CONFIG_NFS_V4_1)
+	struct nfs4_session *session;
 	struct nfs4_sessionid id;
 	u32 dummy;
 	int status;
 	__be32 *p;
 
-	if (!res->sr_session)
+	if (res->sr_slot == NULL)
 		return 0;
 
 	status = decode_op_hdr(xdr, OP_SEQUENCE);
@@ -5526,8 +5527,9 @@ static int decode_sequence(struct xdr_stream *xdr,
 	 * sequence number, the server is looney tunes.
 	 */
 	status = -EREMOTEIO;
+	session = res->sr_slot->table->session;
 
-	if (memcmp(id.data, res->sr_session->sess_id.data,
+	if (memcmp(id.data, session->sess_id.data,
 		   NFS4_MAX_SESSIONID_LEN)) {
 		dprintk("%s Invalid session id\n", __func__);
 		goto out_err;
@@ -5545,7 +5547,7 @@ static int decode_sequence(struct xdr_stream *xdr,
 	}
 	/* slot id */
 	dummy = be32_to_cpup(p++);
-	if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
+	if (dummy != res->sr_slot - session->fc_slot_table.slots) {
 		dprintk("%s Invalid slot id\n", __func__);
 		goto out_err;
 	}
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 97c8f9191880..b0412873d29c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -209,6 +209,7 @@ struct nfs_server {
 /* Sessions */
 #define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
 struct nfs4_slot_table {
+	struct nfs4_session *session;		/* Parent session */
 	struct nfs4_slot *slots;		/* seqid per slot */
 	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
 	spinlock_t	slot_tbl_lock;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9cb1c63a70c2..0fd88ab0e814 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -187,6 +187,7 @@ struct nfs4_channel_attrs {
 
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
+	struct nfs4_slot_table	*table;
 	unsigned long		renewal_time;
 	u32		 	seq_nr;
 };
@@ -198,7 +199,6 @@ struct nfs4_sequence_args {
 };
 
 struct nfs4_sequence_res {
-	struct nfs4_session	*sr_session;
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
-- 
cgit v1.2.3


From df2fabffbace8988f3265585ec793ff9deccdea7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Nov 2012 12:45:06 -0500
Subject: NFSv4.1: Label each entry in the session slot tables with its slot
 number

Instead of doing slot table pointer gymnastics every time we want to
know which slot we're using.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 12 +++++++-----
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  1 +
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5b61c4a83191..4311dba49c58 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -526,9 +526,9 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
 		 * of RFC5661.
 		 */
-		dprintk("%s: slot=%td seq=%d: Operation in progress\n",
+		dprintk("%s: slot=%u seq=%u: Operation in progress\n",
 			__func__,
-			slot - session->fc_slot_table.slots,
+			slot->slot_nr,
 			slot->seq_nr);
 		goto out_retry;
 	default:
@@ -671,9 +671,9 @@ int nfs4_setup_sequence(const struct nfs_server *server,
 	if (session == NULL)
 		goto out;
 
-	dprintk("--> %s clp %p session %p sr_slot %td\n",
+	dprintk("--> %s clp %p session %p sr_slot %d\n",
 		__func__, session->clp, session, res->sr_slot ?
-			res->sr_slot - session->fc_slot_table.slots : -1);
+			res->sr_slot->slot_nr : -1);
 
 	ret = nfs41_setup_sequence(session, args, res, task);
 out:
@@ -5669,8 +5669,10 @@ struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
 
 	tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags);
 	if (tbl != NULL) {
-		for (i = 0; i < max_slots; i++)
+		for (i = 0; i < max_slots; i++) {
 			tbl[i].table = table;
+			tbl[i].slot_nr = i;
+		}
 	}
 	return tbl;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4126f054610a..50bac7066160 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5547,7 +5547,7 @@ static int decode_sequence(struct xdr_stream *xdr,
 	}
 	/* slot id */
 	dummy = be32_to_cpup(p++);
-	if (dummy != res->sr_slot - session->fc_slot_table.slots) {
+	if (dummy != res->sr_slot->slot_nr) {
 		dprintk("%s Invalid slot id\n", __func__);
 		goto out_err;
 	}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0fd88ab0e814..9c9b76c94b46 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -189,6 +189,7 @@ struct nfs4_channel_attrs {
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
 	unsigned long		renewal_time;
+	u32			slot_nr;
 	u32		 	seq_nr;
 };
 
-- 
cgit v1.2.3


From 2b2fa71723f955d5b4a0f4edd99cf3cd69ceafd1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Nov 2012 12:58:36 -0500
Subject: NFSv4.1: Simplify struct nfs4_sequence_args too

Replace the session pointer + slotid with a pointer to the
allocated slot.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       |  6 +++---
 fs/nfs/nfs4xdr.c        | 21 ++++++++++-----------
 include/linux/nfs_xdr.h |  3 +--
 3 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4311dba49c58..6c41a34e34b4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -591,7 +591,7 @@ out:
 static void nfs41_init_sequence(struct nfs4_sequence_args *args,
 		struct nfs4_sequence_res *res, int cache_reply)
 {
-	args->sa_session = NULL;
+	args->sa_slot = NULL;
 	args->sa_cache_this = 0;
 	if (cache_reply)
 		args->sa_cache_this = 1;
@@ -644,8 +644,8 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 	rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
 	slot = tbl->slots + slotid;
 	slot->renewal_time = jiffies;
-	args->sa_session = session;
-	args->sa_slotid = slotid;
+
+	args->sa_slot = slot;
 
 	dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 50bac7066160..27b0fec1a6b0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1833,18 +1833,16 @@ static void encode_sequence(struct xdr_stream *xdr,
 			    struct compound_hdr *hdr)
 {
 #if defined(CONFIG_NFS_V4_1)
-	struct nfs4_session *session = args->sa_session;
+	struct nfs4_session *session;
 	struct nfs4_slot_table *tp;
-	struct nfs4_slot *slot;
+	struct nfs4_slot *slot = args->sa_slot;
 	__be32 *p;
 
-	if (!session)
+	if (slot == NULL)
 		return;
 
-	tp = &session->fc_slot_table;
-
-	WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
-	slot = tp->slots + args->sa_slotid;
+	tp = slot->table;
+	session = tp->session;
 
 	encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
 
@@ -1858,12 +1856,12 @@ static void encode_sequence(struct xdr_stream *xdr,
 		((u32 *)session->sess_id.data)[1],
 		((u32 *)session->sess_id.data)[2],
 		((u32 *)session->sess_id.data)[3],
-		slot->seq_nr, args->sa_slotid,
+		slot->seq_nr, slot->slot_nr,
 		tp->highest_used_slotid, args->sa_cache_this);
 	p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16);
 	p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
 	*p++ = cpu_to_be32(slot->seq_nr);
-	*p++ = cpu_to_be32(args->sa_slotid);
+	*p++ = cpu_to_be32(slot->slot_nr);
 	*p++ = cpu_to_be32(tp->highest_used_slotid);
 	*p = cpu_to_be32(args->sa_cache_this);
 #endif /* CONFIG_NFS_V4_1 */
@@ -2025,8 +2023,9 @@ static void encode_free_stateid(struct xdr_stream *xdr,
 static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
 {
 #if defined(CONFIG_NFS_V4_1)
-	if (args->sa_session)
-		return args->sa_session->clp->cl_mvops->minor_version;
+
+	if (args->sa_slot)
+		return args->sa_slot->table->session->clp->cl_mvops->minor_version;
 #endif /* CONFIG_NFS_V4_1 */
 	return 0;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9c9b76c94b46..deb31bbbb857 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -194,8 +194,7 @@ struct nfs4_slot {
 };
 
 struct nfs4_sequence_args {
-	struct nfs4_session	*sa_session;
-	u32			sa_slotid;
+	struct nfs4_slot	*sa_slot;
 	u8			sa_cache_this;
 };
 
-- 
cgit v1.2.3


From 31fbcda71489d8cbe2b82819eaab4818524e3a49 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 Sep 2012 10:29:07 +0100
Subject: Input: bu21013_ts - move GPIO init and exit functions into the driver

These GPIO init and exit functions have no place in platform data, they
should be part of the driver instead,

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/arm/mach-ux500/board-mop500-stuib.c | 71 +-------------------------------
 drivers/input/touchscreen/bu21013_ts.c   | 69 ++++++++++++++++++++++---------
 include/linux/input/bu21013.h            | 10 +----
 3 files changed, 53 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500-stuib.c b/arch/arm/mach-ux500/board-mop500-stuib.c
index 564f57d5d8a7..7e1f294f0434 100644
--- a/arch/arm/mach-ux500/board-mop500-stuib.c
+++ b/arch/arm/mach-ux500/board-mop500-stuib.c
@@ -77,9 +77,6 @@ static struct i2c_board_info __initdata mop500_i2c0_devices_stuib[] = {
  * BU21013 ROHM touchscreen interface on the STUIBs
  */
 
-/* tracks number of bu21013 devices being enabled */
-static int bu21013_devices;
-
 #define TOUCH_GPIO_PIN  84
 
 #define TOUCH_XMAX	384
@@ -88,73 +85,8 @@ static int bu21013_devices;
 #define PRCMU_CLOCK_OCR		0x1CC
 #define TSC_EXT_CLOCK_9_6MHZ	0x840000
 
-/**
- * bu21013_gpio_board_init : configures the touch panel.
- * @reset_pin: reset pin number
- * This function can be used to configures
- * the voltage and reset the touch panel controller.
- */
-static int bu21013_gpio_board_init(int reset_pin)
-{
-	int retval = 0;
-
-	bu21013_devices++;
-	if (bu21013_devices == 1) {
-		retval = gpio_request(reset_pin, "touchp_reset");
-		if (retval) {
-			printk(KERN_ERR "Unable to request gpio reset_pin");
-			return retval;
-		}
-		retval = gpio_direction_output(reset_pin, 1);
-		if (retval < 0) {
-			printk(KERN_ERR "%s: gpio direction failed\n",
-					__func__);
-			return retval;
-		}
-	}
-
-	return retval;
-}
-
-/**
- * bu21013_gpio_board_exit : deconfigures the touch panel controller
- * @reset_pin: reset pin number
- * This function can be used to deconfigures the chip selection
- * for touch panel controller.
- */
-static int bu21013_gpio_board_exit(int reset_pin)
-{
-	int retval = 0;
-
-	if (bu21013_devices == 1) {
-		retval = gpio_direction_output(reset_pin, 0);
-		if (retval < 0) {
-			printk(KERN_ERR "%s: gpio direction failed\n",
-					__func__);
-			return retval;
-		}
-		gpio_set_value(reset_pin, 0);
-	}
-	bu21013_devices--;
-
-	return retval;
-}
-
-/**
- * bu21013_read_pin_val : get the interrupt pin value
- * This function can be used to get the interrupt pin value for touch panel
- * controller.
- */
-static int bu21013_read_pin_val(void)
-{
-	return gpio_get_value(TOUCH_GPIO_PIN);
-}
-
 static struct bu21013_platform_device tsc_plat_device = {
-	.cs_en = bu21013_gpio_board_init,
-	.cs_dis = bu21013_gpio_board_exit,
-	.irq_read_val = bu21013_read_pin_val,
-	.irq = NOMADIK_GPIO_TO_IRQ(TOUCH_GPIO_PIN),
+	.touch_pin = TOUCH_GPIO_PIN,
 	.touch_x_max = TOUCH_XMAX,
 	.touch_y_max = TOUCH_YMAX,
 	.ext_clk = false,
@@ -171,7 +103,6 @@ static struct i2c_board_info __initdata u8500_i2c3_devices_stuib[] = {
 		I2C_BOARD_INFO("bu21013_tp", 0x5D),
 		.platform_data = &tsc_plat_device,
 	},
-
 };
 
 void __init mop500_stuib_init(void)
diff --git a/drivers/input/touchscreen/bu21013_ts.c b/drivers/input/touchscreen/bu21013_ts.c
index 1e8cddd06c60..c6f6a04ec673 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/regulator/consumer.h>
 #include <linux/module.h>
+#include <linux/gpio.h>
 
 #define PEN_DOWN_INTR	0
 #define MAX_FINGERS	2
@@ -148,11 +149,12 @@
 struct bu21013_ts_data {
 	struct i2c_client *client;
 	wait_queue_head_t wait;
-	bool touch_stopped;
 	const struct bu21013_platform_device *chip;
 	struct input_dev *in_dev;
-	unsigned int intr_pin;
 	struct regulator *regulator;
+	unsigned int irq;
+	unsigned int intr_pin;
+	bool touch_stopped;
 };
 
 /**
@@ -262,7 +264,7 @@ static irqreturn_t bu21013_gpio_irq(int irq, void *device_data)
 			return IRQ_NONE;
 		}
 
-		data->intr_pin = data->chip->irq_read_val();
+		data->intr_pin = gpio_get_value(data->chip->touch_pin);
 		if (data->intr_pin == PEN_DOWN_INTR)
 			wait_event_timeout(data->wait, data->touch_stopped,
 					   msecs_to_jiffies(2));
@@ -418,9 +420,32 @@ static void bu21013_free_irq(struct bu21013_ts_data *bu21013_data)
 {
 	bu21013_data->touch_stopped = true;
 	wake_up(&bu21013_data->wait);
-	free_irq(bu21013_data->chip->irq, bu21013_data);
+	free_irq(bu21013_data->irq, bu21013_data);
 }
 
+/**
+ * bu21013_cs_disable() - deconfigures the touch panel controller
+ * @bu21013_data: device structure pointer
+ *
+ * This function is used to deconfigure the chip selection
+ * for touch panel controller.
+ */
+static void bu21013_cs_disable(struct bu21013_ts_data *bu21013_data)
+{
+	int error;
+
+	error = gpio_direction_output(bu21013_data->chip->cs_pin, 0);
+	if (error < 0)
+		dev_warn(&bu21013_data->client->dev,
+			 "%s: gpio direction failed, error: %d\n",
+			 __func__, error);
+	else
+		gpio_set_value(bu21013_data->chip->cs_pin, 0);
+
+	gpio_free(bu21013_data->chip->cs_pin);
+}
+
+
 /**
  * bu21013_probe() - initializes the i2c-client touchscreen driver
  * @client: i2c client structure pointer
@@ -430,7 +455,7 @@ static void bu21013_free_irq(struct bu21013_ts_data *bu21013_data)
  * driver and returns integer.
  */
 static int bu21013_probe(struct i2c_client *client,
-					const struct i2c_device_id *id)
+			 const struct i2c_device_id *id)
 {
 	struct bu21013_ts_data *bu21013_data;
 	struct input_dev *in_dev;
@@ -449,6 +474,11 @@ static int bu21013_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
+	if (!gpio_is_valid(pdata->touch_pin)) {
+		dev_err(&client->dev, "invalid touch_pin supplied\n");
+		return -EINVAL;
+	}
+
 	bu21013_data = kzalloc(sizeof(struct bu21013_ts_data), GFP_KERNEL);
 	in_dev = input_allocate_device();
 	if (!bu21013_data || !in_dev) {
@@ -460,6 +490,7 @@ static int bu21013_probe(struct i2c_client *client,
 	bu21013_data->in_dev = in_dev;
 	bu21013_data->chip = pdata;
 	bu21013_data->client = client;
+	bu21013_data->irq = gpio_to_irq(pdata->touch_pin);
 
 	bu21013_data->regulator = regulator_get(&client->dev, "avdd");
 	if (IS_ERR(bu21013_data->regulator)) {
@@ -478,12 +509,11 @@ static int bu21013_probe(struct i2c_client *client,
 	init_waitqueue_head(&bu21013_data->wait);
 
 	/* configure the gpio pins */
-	if (pdata->cs_en) {
-		error = pdata->cs_en(pdata->cs_pin);
-		if (error < 0) {
-			dev_err(&client->dev, "chip init failed\n");
-			goto err_disable_regulator;
-		}
+	error = gpio_request_one(pdata->cs_pin, GPIOF_OUT_INIT_HIGH,
+				 "touchp_reset");
+	if (error < 0) {
+		dev_err(&client->dev, "Unable to request gpio reset_pin\n");
+		goto err_disable_regulator;
 	}
 
 	/* configure the touch panel controller */
@@ -508,12 +538,13 @@ static int bu21013_probe(struct i2c_client *client,
 						pdata->touch_y_max, 0, 0);
 	input_set_drvdata(in_dev, bu21013_data);
 
-	error = request_threaded_irq(pdata->irq, NULL, bu21013_gpio_irq,
+	error = request_threaded_irq(bu21013_data->irq, NULL, bu21013_gpio_irq,
 				     IRQF_TRIGGER_FALLING | IRQF_SHARED |
 					IRQF_ONESHOT,
 				     DRIVER_TP, bu21013_data);
 	if (error) {
-		dev_err(&client->dev, "request irq %d failed\n", pdata->irq);
+		dev_err(&client->dev, "request irq %d failed\n",
+			bu21013_data->irq);
 		goto err_cs_disable;
 	}
 
@@ -531,7 +562,7 @@ static int bu21013_probe(struct i2c_client *client,
 err_free_irq:
 	bu21013_free_irq(bu21013_data);
 err_cs_disable:
-	pdata->cs_dis(pdata->cs_pin);
+	bu21013_cs_disable(bu21013_data);
 err_disable_regulator:
 	regulator_disable(bu21013_data->regulator);
 err_put_regulator:
@@ -555,7 +586,7 @@ static int bu21013_remove(struct i2c_client *client)
 
 	bu21013_free_irq(bu21013_data);
 
-	bu21013_data->chip->cs_dis(bu21013_data->chip->cs_pin);
+	bu21013_cs_disable(bu21013_data);
 
 	input_unregister_device(bu21013_data->in_dev);
 
@@ -584,9 +615,9 @@ static int bu21013_suspend(struct device *dev)
 
 	bu21013_data->touch_stopped = true;
 	if (device_may_wakeup(&client->dev))
-		enable_irq_wake(bu21013_data->chip->irq);
+		enable_irq_wake(bu21013_data->irq);
 	else
-		disable_irq(bu21013_data->chip->irq);
+		disable_irq(bu21013_data->irq);
 
 	regulator_disable(bu21013_data->regulator);
 
@@ -621,9 +652,9 @@ static int bu21013_resume(struct device *dev)
 	bu21013_data->touch_stopped = false;
 
 	if (device_may_wakeup(&client->dev))
-		disable_irq_wake(bu21013_data->chip->irq);
+		disable_irq_wake(bu21013_data->irq);
 	else
-		enable_irq(bu21013_data->chip->irq);
+		enable_irq(bu21013_data->irq);
 
 	return 0;
 }
diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h
index 05e03284b92a..6230d76bde5d 100644
--- a/include/linux/input/bu21013.h
+++ b/include/linux/input/bu21013.h
@@ -9,13 +9,10 @@
 
 /**
  * struct bu21013_platform_device - Handle the platform data
- * @cs_en:	pointer to the cs enable function
- * @cs_dis:	pointer to the cs disable function
- * @irq_read_val:    pointer to read the pen irq value function
  * @touch_x_max: touch x max
  * @touch_y_max: touch y max
  * @cs_pin: chip select pin
- * @irq: irq pin
+ * @touch_pin: touch gpio pin
  * @ext_clk: external clock flag
  * @x_flip: x flip flag
  * @y_flip: y flip flag
@@ -24,13 +21,10 @@
  * This is used to handle the platform data
  */
 struct bu21013_platform_device {
-	int (*cs_en)(int reset_pin);
-	int (*cs_dis)(int reset_pin);
-	int (*irq_read_val)(void);
 	int touch_x_max;
 	int touch_y_max;
 	unsigned int cs_pin;
-	unsigned int irq;
+	unsigned int touch_pin;
 	bool ext_clk;
 	bool x_flip;
 	bool y_flip;
-- 
cgit v1.2.3


From 972deb4f49b5b6703d9c6117ba0aeda2180d4447 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti D <shubhrajyoti@ti.com>
Date: Mon, 26 Nov 2012 15:25:11 +0530
Subject: i2c: omap: Remove the OMAP_I2C_FLAG_RESET_REGS_POSTIDLE flag

The OMAP_I2C_FLAG_RESET_REGS_POSTIDLE is not used anymore
in the i2c driver. Remove the flag.

Signed-off-by: Shubhrajyoti D <shubhrajyoti@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 arch/arm/mach-omap2/omap_hwmod_33xx_data.c | 3 +--
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++------
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c | 3 +--
 drivers/i2c/busses/i2c-omap.c              | 3 +--
 include/linux/i2c-omap.h                   | 1 -
 5 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
index 59d5c1cd316d..c9a186bc6d40 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
@@ -1103,8 +1103,7 @@ static struct omap_hwmod_class i2c_class = {
 };
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
-	.flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE |
-		  OMAP_I2C_FLAG_RESET_REGS_POSTIDLE,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_NONE,
 };
 
 /* i2c1 */
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 943222c40489..36270bb637e4 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -791,8 +791,7 @@ static struct omap_hwmod omap3xxx_dss_venc_hwmod = {
 /* I2C1 */
 static struct omap_i2c_dev_attr i2c1_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags		= OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-			  OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags		= OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod omap3xxx_i2c1_hwmod = {
@@ -817,8 +816,7 @@ static struct omap_hwmod omap3xxx_i2c1_hwmod = {
 /* I2C2 */
 static struct omap_i2c_dev_attr i2c2_dev_attr = {
 	.fifo_depth	= 8, /* bytes */
-	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod omap3xxx_i2c2_hwmod = {
@@ -843,8 +841,7 @@ static struct omap_hwmod omap3xxx_i2c2_hwmod = {
 /* I2C3 */
 static struct omap_i2c_dev_attr i2c3_dev_attr = {
 	.fifo_depth	= 64, /* bytes */
-	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_hwmod_irq_info i2c3_mpu_irqs[] = {
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index 652d0285bd6d..eb40dbc6688e 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -1526,8 +1526,7 @@ static struct omap_hwmod_class omap44xx_i2c_hwmod_class = {
 };
 
 static struct omap_i2c_dev_attr i2c_dev_attr = {
-	.flags	= OMAP_I2C_FLAG_BUS_SHIFT_NONE |
-			OMAP_I2C_FLAG_RESET_REGS_POSTIDLE,
+	.flags	= OMAP_I2C_FLAG_BUS_SHIFT_NONE,
 };
 
 /* i2c1 */
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 248280136668..7a62acb7d262 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1038,8 +1038,7 @@ static const struct i2c_algorithm omap_i2c_algo = {
 #ifdef CONFIG_OF
 static struct omap_i2c_bus_platform_data omap3_pdata = {
 	.rev = OMAP_I2C_IP_VERSION_1,
-	.flags = OMAP_I2C_FLAG_RESET_REGS_POSTIDLE |
-		 OMAP_I2C_FLAG_BUS_SHIFT_2,
+	.flags = OMAP_I2C_FLAG_BUS_SHIFT_2,
 };
 
 static struct omap_i2c_bus_platform_data omap4_pdata = {
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
index 1b25c04f82d9..babe0cf6d56b 100644
--- a/include/linux/i2c-omap.h
+++ b/include/linux/i2c-omap.h
@@ -20,7 +20,6 @@
 #define OMAP_I2C_FLAG_NO_FIFO			BIT(0)
 #define OMAP_I2C_FLAG_SIMPLE_CLOCK		BIT(1)
 #define OMAP_I2C_FLAG_16BIT_DATA_REG		BIT(2)
-#define OMAP_I2C_FLAG_RESET_REGS_POSTIDLE	BIT(3)
 #define OMAP_I2C_FLAG_ALWAYS_ARMXOR_CLK	BIT(5)
 #define OMAP_I2C_FLAG_FORCE_19200_INT_CLK	BIT(6)
 /* how the CPU address bus must be translated for I2C unit access */
-- 
cgit v1.2.3


From 4b9347dcbe3b426d19bda99b2061a2d1fe5a2dce Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 15 Oct 2012 16:03:51 +0200
Subject: common: DMA-mapping: add DMA_ATTR_FORCE_CONTIGUOUS attribute

This patch adds DMA_ATTR_FORCE_CONTIGUOUS attribute to the DMA-mapping
subsystem.

By default DMA-mapping subsystem is allowed to assemble the buffer
allocated by dma_alloc_attrs() function from individual pages if it can
be mapped as contiguous chunk into device dma address space. By
specifing this attribute the allocated buffer is forced to be contiguous
also in physical memory.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 Documentation/DMA-attributes.txt | 9 +++++++++
 include/linux/dma-attrs.h        | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index f50309081ac7..e59480db9ee0 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -91,3 +91,12 @@ transferred to 'device' domain. This attribute can be also used for
 dma_unmap_{single,page,sg} functions family to force buffer to stay in
 device domain after releasing a mapping for it. Use this attribute with
 care!
+
+DMA_ATTR_FORCE_CONTIGUOUS
+-------------------------
+
+By default DMA-mapping subsystem is allowed to assemble the buffer
+allocated by dma_alloc_attrs() function from individual pages if it can
+be mapped as contiguous chunk into device dma address space. By
+specifing this attribute the allocated buffer is forced to be contiguous
+also in physical memory.
diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h
index f83f793223ff..c8e1831d7572 100644
--- a/include/linux/dma-attrs.h
+++ b/include/linux/dma-attrs.h
@@ -17,6 +17,7 @@ enum dma_attr {
 	DMA_ATTR_NON_CONSISTENT,
 	DMA_ATTR_NO_KERNEL_MAPPING,
 	DMA_ATTR_SKIP_CPU_SYNC,
+	DMA_ATTR_FORCE_CONTIGUOUS,
 	DMA_ATTR_MAX,
 };
 
-- 
cgit v1.2.3


From eed8c02e680c04cd737e0a9cef74e68d8eb0cefa Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 30 Nov 2012 11:42:40 +0100
Subject: wait: add wait_event_lock_irq() interface

New wait_event{_interruptible}_lock_irq{_cmd} macros added. This commit
moves the private wait_event_lock_irq() macro from MD to regular wait
includes, introduces new macro wait_event_lock_irq_cmd() instead of using
the old method with omitting cmd parameter which is ugly and makes a use
of new macros in the MD. It also introduces the _interruptible_ variant.

The use of new interface is when one have a special lock to protect data
structures used in the condition, or one also needs to invoke "cmd"
before putting it to sleep.

All new macros are expected to be called with the lock taken. The lock
is released before sleep and is reacquired afterwards. We will leave the
macro with the lock held.

Note to DM: IMO this should also fix theoretical race on waitqueue while
using simultaneously wait_event_lock_irq() and wait_event() because of
lack of locking around current state setting and wait queue removal.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Cc: Neil Brown <neilb@suse.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/md.c      |   2 +-
 drivers/md/md.h      |  26 --------
 drivers/md/raid1.c   |  15 +++--
 drivers/md/raid10.c  |  15 +++--
 drivers/md/raid5.c   |  12 ++--
 include/linux/wait.h | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 184 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9ab768acfb62..7e513a38cec7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
 	spin_lock_irq(&mddev->write_lock);
 	wait_event_lock_irq(mddev->sb_wait,
 			    !mddev->flush_bio,
-			    mddev->write_lock, /*nothing*/);
+			    mddev->write_lock);
 	mddev->flush_bio = bio;
 	spin_unlock_irq(&mddev->write_lock);
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index af443ab868db..1e2fc3d9c74c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -551,32 +551,6 @@ struct md_thread {
 
 #define THREAD_WAKEUP  0
 
-#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
-do {									\
-	wait_queue_t __wait;						\
-	init_waitqueue_entry(&__wait, current);				\
-									\
-	add_wait_queue(&wq, &__wait);					\
-	for (;;) {							\
-		set_current_state(TASK_UNINTERRUPTIBLE);		\
-		if (condition)						\
-			break;						\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	current->state = TASK_RUNNING;					\
-	remove_wait_queue(&wq, &__wait);				\
-} while (0)
-
-#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
-do {									\
-	if (condition)	 						\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
-} while (0)
-
 static inline void safe_put_page(struct page *p)
 {
 	if (p) put_page(p);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 8034fbd6190c..534dd74a2da0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf)
 
 	/* Wait until no block IO is waiting */
 	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf)
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf)
 				    (conf->nr_pending &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
+				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf)
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
+	wait_event_lock_irq_cmd(conf->wait_barrier,
+				conf->nr_pending == conf->nr_queued+1,
+				conf->resync_lock,
+				flush_pending_writes(conf));
 	spin_unlock_irq(&conf->resync_lock);
 }
 static void unfreeze_array(struct r1conf *conf)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 906ccbd0f7dc..9a08f621b27d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 
 	/* Wait until no block IO is waiting (unless 'force') */
 	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	/* block any new IO from starting */
 	conf->barrier++;
@@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 	/* Now wait for all pending IO to complete */
 	wait_event_lock_irq(conf->wait_barrier,
 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
+			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf)
 				    (conf->nr_pending &&
 				     current->bio_list &&
 				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
+				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
 	conf->nr_pending++;
@@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf)
 	spin_lock_irq(&conf->resync_lock);
 	conf->barrier++;
 	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
+	wait_event_lock_irq_cmd(conf->wait_barrier,
+				conf->nr_pending == conf->nr_queued+1,
+				conf->resync_lock,
+				flush_pending_writes(conf));
 
 	spin_unlock_irq(&conf->resync_lock);
 }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c5439dce0295..2bf617d6f4fd 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 	do {
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0 || noquiesce,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
 			if (!conf->inactive_blocked)
@@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 						    (atomic_read(&conf->active_stripes)
 						     < (conf->max_nr_stripes *3/4)
 						     || !conf->inactive_blocked),
-						    conf->device_lock,
-						    );
+						    conf->device_lock);
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, previous);
@@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    !list_empty(&conf->inactive_list),
-				    conf->device_lock,
-				    );
+				    conf->device_lock);
 		osh = get_free_stripe(conf);
 		spin_unlock_irq(&conf->device_lock);
 		atomic_set(&nsh->count, 1);
@@ -4000,7 +3998,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		atomic_inc(&conf->active_aligned_reads);
 		spin_unlock_irq(&conf->device_lock);
 
@@ -6088,7 +6086,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    atomic_read(&conf->active_stripes) == 0 &&
 				    atomic_read(&conf->active_aligned_reads) == 0,
-				    conf->device_lock, /* nothing */);
+				    conf->device_lock);
 		conf->quiesce = 1;
 		spin_unlock_irq(&conf->device_lock);
 		/* allow reshape to continue */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 168dfe122dd3..7cb64d4b499d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -550,6 +550,170 @@ do {									\
 	__ret;								\
 })
 
+
+#define __wait_event_lock_irq(wq, condition, lock, cmd)			\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_lock_irq_cmd - sleep until a condition gets true. The
+ *			     condition is checked under the lock. This
+ *			     is expected to be called with the lock
+ *			     taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd
+ *	  and schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *	 sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ */
+#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, cmd);		\
+} while (0)
+
+/**
+ * wait_event_lock_irq - sleep until a condition gets true. The
+ *			 condition is checked under the lock. This
+ *			 is expected to be called with the lock
+ *			 taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ */
+#define wait_event_lock_irq(wq, condition, lock)			\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_lock_irq(wq, condition, lock, );			\
+} while (0)
+
+
+#define __wait_event_interruptible_lock_irq(wq, condition,		\
+					    lock, ret, cmd)		\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		spin_unlock_irq(&lock);					\
+		cmd;							\
+		schedule();						\
+		spin_lock_irq(&lock);					\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
+ *		The condition is checked under the lock. This is expected to
+ *		be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before cmd and
+ *	  schedule() and reacquired afterwards.
+ * @cmd: a command which is invoked outside the critical section before
+ *	 sleep
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before invoking the cmd and going to sleep and is reacquired
+ * afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
+({									\
+	int __ret = 0;							\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq(wq, condition,	\
+						    lock, __ret, cmd);	\
+	__ret;								\
+})
+
+/**
+ * wait_event_interruptible_lock_irq - sleep until a condition gets true.
+ *		The condition is checked under the lock. This is expected
+ *		to be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ *
+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
+ * and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_lock_irq(wq, condition, lock)		\
+({									\
+	int __ret = 0;							\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq(wq, condition,	\
+						    lock, __ret, );	\
+	__ret;								\
+})
+
+
 /*
  * These are the old interfaces to sleep waiting for an event.
  * They are racy.  DO NOT use them, use the wait_event* interfaces above.
-- 
cgit v1.2.3


From 7b5a35225b0d4fd779cf79d7624e63d1957f6c4d Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 30 Nov 2012 11:42:41 +0100
Subject: loop: Limit the number of requests in the bio list

Currently there is not limitation of number of requests in the loop bio
list. This can lead into some nasty situations when the caller spawns
tons of bio requests taking huge amount of memory. This is even more
obvious with discard where blkdev_issue_discard() will submit all bios
for the range and wait for them to finish afterwards. On really big loop
devices and slow backing file system this can lead to OOM situation as
reported by Dave Chinner.

With this patch we will wait in loop_make_request() if the number of
bios in the loop bio list would exceed 'nr_congestion_on'.
We'll wake up the process as we process the bios form the list. Some
threshold hysteresis is in place to avoid high frequency oscillation.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reported-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 10 ++++++++++
 include/linux/loop.h |  3 +++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e9d594fd12cb..800aec7927de 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -463,6 +463,7 @@ out:
  */
 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 {
+	lo->lo_bio_count++;
 	bio_list_add(&lo->lo_bio_list, bio);
 }
 
@@ -471,6 +472,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio)
  */
 static struct bio *loop_get_bio(struct loop_device *lo)
 {
+	lo->lo_bio_count--;
 	return bio_list_pop(&lo->lo_bio_list);
 }
 
@@ -489,6 +491,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
 		goto out;
 	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
 		goto out;
+	if (lo->lo_bio_count >= q->nr_congestion_on)
+		wait_event_lock_irq(lo->lo_req_wait,
+				    lo->lo_bio_count < q->nr_congestion_off,
+				    lo->lo_lock);
 	loop_add_bio(lo, old_bio);
 	wake_up(&lo->lo_event);
 	spin_unlock_irq(&lo->lo_lock);
@@ -546,6 +552,8 @@ static int loop_thread(void *data)
 			continue;
 		spin_lock_irq(&lo->lo_lock);
 		bio = loop_get_bio(lo);
+		if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off)
+			wake_up(&lo->lo_req_wait);
 		spin_unlock_irq(&lo->lo_lock);
 
 		BUG_ON(!bio);
@@ -873,6 +881,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	lo->transfer = transfer_none;
 	lo->ioctl = NULL;
 	lo->lo_sizelimit = 0;
+	lo->lo_bio_count = 0;
 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 
@@ -1660,6 +1669,7 @@ static int loop_add(struct loop_device **l, int i)
 	lo->lo_number		= i;
 	lo->lo_thread		= NULL;
 	init_waitqueue_head(&lo->lo_event);
+	init_waitqueue_head(&lo->lo_req_wait);
 	spin_lock_init(&lo->lo_lock);
 	disk->major		= LOOP_MAJOR;
 	disk->first_minor	= i << part_shift;
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 6492181bcb1d..460b60fa7adf 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -53,10 +53,13 @@ struct loop_device {
 
 	spinlock_t		lo_lock;
 	struct bio_list		lo_bio_list;
+	unsigned int		lo_bio_count;
 	int			lo_state;
 	struct mutex		lo_ctl_mutex;
 	struct task_struct	*lo_thread;
 	wait_queue_head_t	lo_event;
+	/* wait queue for incoming requests */
+	wait_queue_head_t	lo_req_wait;
 
 	struct request_queue	*lo_queue;
 	struct gendisk		*lo_disk;
-- 
cgit v1.2.3


From 12a5105e04143569b3e9e5ef03cf9cad8862473a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 29 Nov 2012 00:17:15 +0530
Subject: mfd: stmpe: Get rid of irq_invert_polarity

Since the very first patch, stmpe core driver is using irq_invert_polarity as
part of platform data. But, nobody is actually using it in kernel till now.

Also, this is not something part of hardware specs, but is included to cater
some board mistakes or quirks.

So, better get rid of it. This is earlier discussed here:

https://lkml.org/lkml/2012/11/27/636

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe.c       | 7 -------
 include/linux/mfd/stmpe.h | 2 --
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index f9f7de796029..90c6151bc52e 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -953,13 +953,6 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe)
 			else
 				icr |= STMPE_ICR_LSB_HIGH;
 		}
-
-		if (stmpe->pdata->irq_invert_polarity) {
-			if (id == STMPE801_ID)
-				icr ^= STMPE801_REG_SYS_CTRL_INT_HI;
-			else
-				icr ^= STMPE_ICR_LSB_HIGH;
-		}
 	}
 
 	if (stmpe->pdata->autosleep) {
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 15dac790365f..383ac1512a39 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -190,7 +190,6 @@ struct stmpe_ts_platform_data {
  * @id: device id to distinguish between multiple STMPEs on the same board
  * @blocks: bitmask of blocks to enable (use STMPE_BLOCK_*)
  * @irq_trigger: IRQ trigger to use for the interrupt to the host
- * @irq_invert_polarity: IRQ line is connected with reversed polarity
  * @autosleep: bool to enable/disable stmpe autosleep
  * @autosleep_timeout: inactivity timeout in milliseconds for autosleep
  * @irq_base: base IRQ number.  %STMPE_NR_IRQS irqs will be used, or
@@ -207,7 +206,6 @@ struct stmpe_platform_data {
 	unsigned int blocks;
 	int irq_base;
 	unsigned int irq_trigger;
-	bool irq_invert_polarity;
 	bool autosleep;
 	bool irq_over_gpio;
 	int irq_gpio;
-- 
cgit v1.2.3


From 0e5fca8106199f5c680bb93e75c16381c4c256ce Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@ti.com>
Date: Thu, 29 Nov 2012 08:48:26 +0000
Subject: mfd: tps65910: Remove unused data

The 'io_mutex' is not used anywhere.
The regmap API supports the mutex internally, so no additional mutex
is required.

And 'domain' private data is unnecessary because the irq domain is
already registered by using regmap_add_irq_chip().

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Acked-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/tps65910.c       | 1 -
 include/linux/mfd/tps65910.h | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index ca3783350ccf..c160c2d76f79 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -486,7 +486,6 @@ static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
 	tps65910->dev = &i2c->dev;
 	tps65910->i2c_client = i2c;
 	tps65910->id = chip_id;
-	mutex_init(&tps65910->io_mutex);
 
 	tps65910->regmap = devm_regmap_init_i2c(i2c, &tps65910_regmap_config);
 	if (IS_ERR(tps65910->regmap)) {
diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h
index 0b16903f7e88..20e433e551e3 100644
--- a/include/linux/mfd/tps65910.h
+++ b/include/linux/mfd/tps65910.h
@@ -893,7 +893,6 @@ struct tps65910 {
 	struct device *dev;
 	struct i2c_client *i2c_client;
 	struct regmap *regmap;
-	struct mutex io_mutex;
 	unsigned int id;
 
 	/* Client devices */
@@ -907,7 +906,6 @@ struct tps65910 {
 	/* IRQ Handling */
 	int chip_irq;
 	struct regmap_irq_chip_data *irq_data;
-	struct irq_domain *domain;
 };
 
 struct tps65910_platform_data {
-- 
cgit v1.2.3


From 78ad724ade23c66650dcaba04f4d2307c2884c69 Mon Sep 17 00:00:00 2001
From: Sreekanth Reddy <Sreekanth.Reddy@lsi.com>
Date: Fri, 30 Nov 2012 07:58:10 +0530
Subject: [SCSI] miscdevice: Adding support for MPT3SAS_MINOR(222)

Signed-off-by: Sreekanth Reddy <Sreekanth.Reddy@lsi.com>
Reviewed-by: Nagalakshmi Nandigama <Nagalakshmi.Nandigama@lsi.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 include/linux/miscdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index e0deeb2cc939..09c2300ddb37 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -34,6 +34,7 @@
 #define MWAVE_MINOR		219	/* ACP/Mwave Modem */
 #define MPT_MINOR		220
 #define MPT2SAS_MINOR		221
+#define MPT3SAS_MINOR		222
 #define UINPUT_MINOR		223
 #define MISC_MCELOG_MINOR	227
 #define HPET_MINOR		228
-- 
cgit v1.2.3


From 0d0cdb028f9d9771e2b346038707734121f906e3 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Mon, 26 Nov 2012 13:55:25 +0800
Subject: libata: restore acpi disable functionality

Commit 66fa7f215 "libata-acpi: improve ACPI disabling" introdcued the
behaviour of disabling ATA ACPI if ata_acpi_on_devcfg failed the 2nd
time, but commit 30dcf76ac dropped this behaviour and this caused
problem for Dimitris Damigos, where his laptop can not resume correctly.

The bugzilla page for it is:
https://bugzilla.kernel.org/show_bug.cgi?id=49331

The problem is, ata_dev_push_id will fail the 2nd time it is invoked,
and due to disabling ACPI code is dropped, ata_acpi_on_devcfg which
calls ata_dev_push_id will keep failing and eventually made the device
disabled.

This patch restores the original behaviour, if acpi failed the 2nd time,
disable acpi functionality for the device(and we do not event need to
add a debug message for this as it is still there ;-).

Reported-by: Dimitris Damigos <damigos@freemail.gr>
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-acpi.c | 4 ++++
 include/linux/libata.h    | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 5b0ba3f20edc..ef01ac07502e 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -76,6 +76,9 @@ acpi_handle ata_dev_acpi_handle(struct ata_device *dev)
 	acpi_integer adr;
 	struct ata_port *ap = dev->link->ap;
 
+	if (dev->flags & ATA_DFLAG_ACPI_DISABLED)
+		return NULL;
+
 	if (ap->flags & ATA_FLAG_ACPI_SATA) {
 		if (!sata_pmp_attached(ap))
 			adr = SATA_ADR(ap->port_no, NO_PORT_MULT);
@@ -945,6 +948,7 @@ int ata_acpi_on_devcfg(struct ata_device *dev)
 		return rc;
 	}
 
+	dev->flags |= ATA_DFLAG_ACPI_DISABLED;
 	ata_dev_warn(dev, "ACPI: failed the second time, disabled\n");
 
 	/* We can safely continue if no _GTF command has been executed
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 77eeeda2b6e2..e931c9ad1078 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -163,6 +163,7 @@ enum {
 
 	ATA_DFLAG_DA		= (1 << 26), /* device supports Device Attention */
 	ATA_DFLAG_DEVSLP	= (1 << 27), /* device supports Device Sleep */
+	ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */
 
 	ATA_DEV_UNKNOWN		= 0,	/* unknown device */
 	ATA_DEV_ATA		= 1,	/* ATA device */
-- 
cgit v1.2.3


From de90cd71f68e947d3bd6c3f2ef5731ead010a768 Mon Sep 17 00:00:00 2001
From: Shane Huang <shane.huang@amd.com>
Date: Sun, 18 Nov 2012 04:44:41 +0800
Subject: libata: check SATA_SETTINGS log with HW Feature Ctrl

NCQ capability was used to check availability of SATA Settings page
from Identify Device Data Log, which contains DevSlp timing variables.
It does not work on some HDDs and leads to error messages.
IDENTIFY word 78 bit 5(Hardware Feature Control) should be used.

Quoting SATA spec 3.1:
If Hardware Feature Control is supported, then:
a) IDENTIFY DEVICE data word 78 bit 5 (see 13.2.1.18) shall be
set to one;
b) the SET FEATURES Select Hardware Feature Control subcommand
shall be supported (see 13.3.8);
c) page 08h of the Identify Device Data log (see 13.7.7) shall
be supported;

This patch is not tested on SATA HDD with DevSlp supported.

Reported-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Shane Huang <shane.huang@amd.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 3 +--
 include/linux/ata.h       | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index f46fbd3bd3fb..caffe73c1e4a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2330,9 +2330,8 @@ int ata_dev_configure(struct ata_device *dev)
 
 		/* Obtain SATA Settings page from Identify Device Data Log,
 		 * which contains DevSlp timing variables etc.
-		 * Exclude old devices with ata_id_has_ncq()
 		 */
-		if (ata_id_has_ncq(dev->id)) {
+		if (ata_id_has_hw_feature_ctrl(dev->id)) {
 			err_mask = ata_read_log_page(dev,
 						     ATA_LOG_SATA_ID_DEV_DATA,
 						     ATA_LOG_SATA_SETTINGS,
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 408da9502177..18cbb93fdbca 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -593,6 +593,7 @@ static inline int ata_is_data(u8 prot)
 #define ata_id_cdb_intr(id)	(((id)[ATA_ID_CONFIG] & 0x60) == 0x20)
 #define ata_id_has_da(id)	((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4))
 #define ata_id_has_devslp(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))
+#define ata_id_has_hw_feature_ctrl(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 5))
 
 static inline bool ata_id_has_hipm(const u16 *id)
 {
-- 
cgit v1.2.3


From b7db04d9264fca4b00e949da7b3180c50e243fca Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 2 Nov 2012 12:29:32 -0700
Subject: libata: implement ata_platform_remove_one()

This relatively simple boiler-plate code is repeated in several platform
drivers. We should implement a common version in libata.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 23 +++++++++++++++++++++++
 include/linux/libata.h    |  4 ++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 583f26d6d455..8e3f4a90c088 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -67,6 +67,7 @@
 #include <linux/cdrom.h>
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
+#include <linux/platform_device.h>
 
 #include "libata.h"
 #include "libata-transport.h"
@@ -6382,6 +6383,26 @@ int ata_pci_device_resume(struct pci_dev *pdev)
 
 #endif /* CONFIG_PCI */
 
+/**
+ *	ata_platform_remove_one - Platform layer callback for device removal
+ *	@pdev: Platform device that was removed
+ *
+ *	Platform layer indicates to libata via this hook that hot-unplug or
+ *	module unload event has occurred.  Detach all ports.  Resource
+ *	release is handled via devres.
+ *
+ *	LOCKING:
+ *	Inherited from platform layer (may sleep).
+ */
+int ata_platform_remove_one(struct platform_device *pdev)
+{
+	struct ata_host *host = platform_get_drvdata(pdev);
+
+	ata_host_detach(host);
+
+	return 0;
+}
+
 static int __init ata_parse_force_one(char **cur,
 				      struct ata_force_ent *force_ent,
 				      const char **reason)
@@ -6877,6 +6898,8 @@ EXPORT_SYMBOL_GPL(ata_pci_device_resume);
 #endif /* CONFIG_PM */
 #endif /* CONFIG_PCI */
 
+EXPORT_SYMBOL_GPL(ata_platform_remove_one);
+
 EXPORT_SYMBOL_GPL(__ata_ehi_push_desc);
 EXPORT_SYMBOL_GPL(ata_ehi_push_desc);
 EXPORT_SYMBOL_GPL(ata_ehi_clear_desc);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e931c9ad1078..83ba0ab2c915 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1115,6 +1115,10 @@ extern int ata_pci_device_resume(struct pci_dev *pdev);
 #endif /* CONFIG_PM */
 #endif /* CONFIG_PCI */
 
+struct platform_device;
+
+extern int ata_platform_remove_one(struct platform_device *pdev);
+
 /*
  * ACPI - drivers/ata/libata-acpi.c
  */
-- 
cgit v1.2.3


From 156f34d26a74c6ea060fb43d898f17509ed8e18a Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 2 Nov 2012 00:46:24 -0700
Subject: pata_platform: remove unused remove function

All users of __pata_platform_remove() have been converted to utilize the
common ata_platform_remove_one().

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/pata_platform.c  | 17 -----------------
 include/linux/ata_platform.h |  2 --
 2 files changed, 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 449aa29931bc..f4372d0c7ce6 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -178,23 +178,6 @@ int __devinit __pata_platform_probe(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__pata_platform_probe);
 
-/**
- *	__pata_platform_remove		-	unplug a platform interface
- *	@dev: device
- *
- *	A platform bus ATA device has been unplugged. Perform the needed
- *	cleanup. Also called on module unload for any active devices.
- */
-int __pata_platform_remove(struct device *dev)
-{
-	struct ata_host *host = dev_get_drvdata(dev);
-
-	ata_host_detach(host);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(__pata_platform_remove);
-
 static int __devinit pata_platform_probe(struct platform_device *pdev)
 {
 	struct resource *io_res;
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index b856a2a590d9..fe9989636b62 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -22,8 +22,6 @@ extern int __devinit __pata_platform_probe(struct device *dev,
 					   unsigned int ioport_shift,
 					   int __pio_mask);
 
-extern int __devexit __pata_platform_remove(struct device *dev);
-
 /*
  * Marvell SATA private data
  */
-- 
cgit v1.2.3


From 64b37b2a63eb2f80b65c7185f0013f8ffc637ae3 Mon Sep 17 00:00:00 2001
From: Matthieu CASTET <matthieu.castet@parrot.com>
Date: Tue, 6 Nov 2012 11:51:44 +0100
Subject: mtd: nand: add NAND_BUSWIDTH_AUTO to autodetect bus width

The driver call nand_scan_ident in 8 bit mode, then
readid or onfi detection are done (and detect bus width).
The driver should update its bus width before calling nand_scan_tail.

This work because readid and onfi are read work 8 byte mode.

Note that nand_scan_ident send command (NAND_CMD_RESET, NAND_CMD_READID, NAND_CMD_PARAM), address and read data
The ONFI specificication is not very clear for x16 device if high byte of address should be driven to 0,
but according to [1] it should be ok to not drive it during autodetection.

[1]
3.3.2. Target Initialization

[...]
The Read ID and Read Parameter Page commands only use the lower 8-bits of the data bus.
The host shall not issue commands that use a word data width on x16 devices until the host
determines the device supports a 16-bit data bus width in the parameter page.

Signed-off-by: Matthieu CASTET <matthieu.castet@parrot.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/nand_base.c | 14 +++++++++-----
 include/linux/mtd/nand.h     |  7 +++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 6f58e1633e2f..5851c51ac2df 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3250,11 +3250,15 @@ ident_done:
 			break;
 	}
 
-	/*
-	 * Check, if buswidth is correct. Hardware drivers should set
-	 * chip correct!
-	 */
-	if (busw != (chip->options & NAND_BUSWIDTH_16)) {
+	if (chip->options & NAND_BUSWIDTH_AUTO) {
+		WARN_ON(chip->options & NAND_BUSWIDTH_16);
+		chip->options |= busw;
+		nand_set_defaults(chip, busw);
+	} else if (busw != (chip->options & NAND_BUSWIDTH_16)) {
+		/*
+		 * Check, if buswidth is correct. Hardware drivers should set
+		 * chip correct!
+		 */
 		pr_info("NAND device: Manufacturer ID:"
 			" 0x%02x, Chip ID: 0x%02x (%s %s)\n", *maf_id,
 			*dev_id, nand_manuf_ids[maf_idx].name, mtd->name);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 9d8a6048aacd..7ccb3c59ed60 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -219,6 +219,13 @@ typedef enum {
 #define NAND_OWN_BUFFERS	0x00020000
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00040000
+/*
+ * Autodetect nand buswidth with readid/onfi.
+ * This suppose the driver will configure the hardware in 8 bits mode
+ * when calling nand_scan_ident, and update its configuration
+ * before calling nand_scan_tail.
+ */
+#define NAND_BUSWIDTH_AUTO      0x00080000
 
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
-- 
cgit v1.2.3


From 72b15b6ae97796c5fac687addde5dbfab872cf94 Mon Sep 17 00:00:00 2001
From: Omar Ramirez Luna <omar.luna@linaro.org>
Date: Mon, 19 Nov 2012 19:05:50 -0600
Subject: iommu/omap: Migrate to hwmod framework

Use hwmod data and device attributes to build and register an
omap device for iommu driver.

 - Update the naming convention in isp module.
 - Remove unneeded check for number of resources, as this is now
   handled by omap_device and prevents driver from loading.
 - Now unused, remove platform device and resource data, handling
   of sysconfig register for softreset purposes, use default
   latency structure.
 - Use hwmod API for reset handling.

Signed-off-by: Omar Ramirez Luna <omar.luna@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 arch/arm/mach-omap2/devices.c            |   2 +-
 arch/arm/mach-omap2/omap-iommu.c         | 168 +++++++------------------------
 drivers/iommu/omap-iommu.c               |  23 ++++-
 drivers/iommu/omap-iommu2.c              |  19 ----
 include/linux/platform_data/iommu-omap.h |   8 +-
 5 files changed, 64 insertions(+), 156 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index c15f5a97b51c..787a996ec4eb 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -214,7 +214,7 @@ static struct platform_device omap3isp_device = {
 };
 
 static struct omap_iommu_arch_data omap3_isp_iommu = {
-	.name = "isp",
+	.name = "mmu_isp",
 };
 
 int omap3_init_camera(struct isp_platform_data *pdata)
diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
index a6a4ff8744b7..02726a647b1d 100644
--- a/arch/arm/mach-omap2/omap-iommu.c
+++ b/arch/arm/mach-omap2/omap-iommu.c
@@ -12,153 +12,61 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
 
 #include <linux/platform_data/iommu-omap.h>
+#include <plat/omap_hwmod.h>
+#include <plat/omap_device.h>
 
-#include "soc.h"
-#include "common.h"
-
-struct iommu_device {
-	resource_size_t base;
-	int irq;
-	struct iommu_platform_data pdata;
-	struct resource res[2];
-};
-static struct iommu_device *devices;
-static int num_iommu_devices;
-
-#ifdef CONFIG_ARCH_OMAP3
-static struct iommu_device omap3_devices[] = {
-	{
-		.base = 0x480bd400,
-		.irq = 24 + OMAP_INTC_START,
-		.pdata = {
-			.name = "isp",
-			.nr_tlb_entries = 8,
-			.clk_name = "cam_ick",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-#if defined(CONFIG_OMAP_IOMMU_IVA2)
-	{
-		.base = 0x5d000000,
-		.irq = 28 + OMAP_INTC_START,
-		.pdata = {
-			.name = "iva2",
-			.nr_tlb_entries = 32,
-			.clk_name = "iva2_ck",
-			.da_start = 0x11000000,
-			.da_end = 0xFFFFF000,
-		},
-	},
-#endif
-};
-#define NR_OMAP3_IOMMU_DEVICES ARRAY_SIZE(omap3_devices)
-static struct platform_device *omap3_iommu_pdev[NR_OMAP3_IOMMU_DEVICES];
-#else
-#define omap3_devices		NULL
-#define NR_OMAP3_IOMMU_DEVICES	0
-#define omap3_iommu_pdev	NULL
-#endif
-
-#ifdef CONFIG_ARCH_OMAP4
-static struct iommu_device omap4_devices[] = {
-	{
-		.base = OMAP4_MMU1_BASE,
-		.irq = 100 + OMAP44XX_IRQ_GIC_START,
-		.pdata = {
-			.name = "ducati",
-			.nr_tlb_entries = 32,
-			.clk_name = "ipu_fck",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-	{
-		.base = OMAP4_MMU2_BASE,
-		.irq = 28 + OMAP44XX_IRQ_GIC_START,
-		.pdata = {
-			.name = "tesla",
-			.nr_tlb_entries = 32,
-			.clk_name = "dsp_fck",
-			.da_start = 0x0,
-			.da_end = 0xFFFFF000,
-		},
-	},
-};
-#define NR_OMAP4_IOMMU_DEVICES ARRAY_SIZE(omap4_devices)
-static struct platform_device *omap4_iommu_pdev[NR_OMAP4_IOMMU_DEVICES];
-#else
-#define omap4_devices		NULL
-#define NR_OMAP4_IOMMU_DEVICES	0
-#define omap4_iommu_pdev	NULL
-#endif
-
-static struct platform_device **omap_iommu_pdev;
-
-static int __init omap_iommu_init(void)
+static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused)
 {
-	int i, err;
-	struct resource res[] = {
-		{ .flags = IORESOURCE_MEM },
-		{ .flags = IORESOURCE_IRQ },
-	};
+	struct platform_device *pdev;
+	struct iommu_platform_data *pdata;
+	struct omap_mmu_dev_attr *a = (struct omap_mmu_dev_attr *)oh->dev_attr;
+	static int i;
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdata->name = oh->name;
+	pdata->clk_name = oh->main_clk;
+	pdata->nr_tlb_entries = a->nr_tlb_entries;
+	pdata->da_start = a->da_start;
+	pdata->da_end = a->da_end;
+
+	if (oh->rst_lines_cnt == 1) {
+		pdata->reset_name = oh->rst_lines->name;
+		pdata->assert_reset = omap_device_assert_hardreset;
+		pdata->deassert_reset = omap_device_deassert_hardreset;
+	}
 
-	if (cpu_is_omap34xx()) {
-		devices = omap3_devices;
-		omap_iommu_pdev = omap3_iommu_pdev;
-		num_iommu_devices = NR_OMAP3_IOMMU_DEVICES;
-	} else if (cpu_is_omap44xx()) {
-		devices = omap4_devices;
-		omap_iommu_pdev = omap4_iommu_pdev;
-		num_iommu_devices = NR_OMAP4_IOMMU_DEVICES;
-	} else
-		return -ENODEV;
+	pdev = omap_device_build("omap-iommu", i, oh, pdata, sizeof(*pdata),
+				NULL, 0, 0);
 
-	for (i = 0; i < num_iommu_devices; i++) {
-		struct platform_device *pdev;
-		const struct iommu_device *d = &devices[i];
+	kfree(pdata);
 
-		pdev = platform_device_alloc("omap-iommu", i);
-		if (!pdev) {
-			err = -ENOMEM;
-			goto err_out;
-		}
+	if (IS_ERR(pdev)) {
+		pr_err("%s: device build err: %ld\n", __func__, PTR_ERR(pdev));
+		return PTR_ERR(pdev);
+	}
 
-		res[0].start = d->base;
-		res[0].end = d->base + MMU_REG_SIZE - 1;
-		res[1].start = res[1].end = d->irq;
+	i++;
 
-		err = platform_device_add_resources(pdev, res,
-						    ARRAY_SIZE(res));
-		if (err)
-			goto err_out;
-		err = platform_device_add_data(pdev, &d->pdata,
-					       sizeof(d->pdata));
-		if (err)
-			goto err_out;
-		err = platform_device_add(pdev);
-		if (err)
-			goto err_out;
-		omap_iommu_pdev[i] = pdev;
-	}
 	return 0;
+}
 
-err_out:
-	while (i--)
-		platform_device_put(omap_iommu_pdev[i]);
-	return err;
+static int __init omap_iommu_init(void)
+{
+	return omap_hwmod_for_each_by_class("mmu", omap_iommu_dev_init, NULL);
 }
 /* must be ready before omap3isp is probed */
 subsys_initcall(omap_iommu_init);
 
 static void __exit omap_iommu_exit(void)
 {
-	int i;
-
-	for (i = 0; i < num_iommu_devices; i++)
-		platform_device_unregister(omap_iommu_pdev[i]);
+	/* Do nothing */
 }
 module_exit(omap_iommu_exit);
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index f8082da6179b..af9b4f31f594 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -143,13 +143,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_arch_version);
 static int iommu_enable(struct omap_iommu *obj)
 {
 	int err;
+	struct platform_device *pdev = to_platform_device(obj->dev);
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
-	if (!obj)
+	if (!obj || !pdata)
 		return -EINVAL;
 
 	if (!arch_iommu)
 		return -ENODEV;
 
+	if (pdata->deassert_reset) {
+		err = pdata->deassert_reset(pdev, pdata->reset_name);
+		if (err) {
+			dev_err(obj->dev, "deassert_reset failed: %d\n", err);
+			return err;
+		}
+	}
+
 	clk_enable(obj->clk);
 
 	err = arch_iommu->enable(obj);
@@ -159,12 +169,18 @@ static int iommu_enable(struct omap_iommu *obj)
 
 static void iommu_disable(struct omap_iommu *obj)
 {
-	if (!obj)
+	struct platform_device *pdev = to_platform_device(obj->dev);
+	struct iommu_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!obj || !pdata)
 		return;
 
 	arch_iommu->disable(obj);
 
 	clk_disable(obj->clk);
+
+	if (pdata->assert_reset)
+		pdata->assert_reset(pdev, pdata->reset_name);
 }
 
 /*
@@ -926,9 +942,6 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct iommu_platform_data *pdata = pdev->dev.platform_data;
 
-	if (pdev->num_resources != 2)
-		return -EINVAL;
-
 	obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
 	if (!obj)
 		return -ENOMEM;
diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c
index c02020292377..4a3a1c7a38c1 100644
--- a/drivers/iommu/omap-iommu2.c
+++ b/drivers/iommu/omap-iommu2.c
@@ -35,12 +35,8 @@
 #define MMU_SYS_IDLE_SMART	(2 << MMU_SYS_IDLE_SHIFT)
 #define MMU_SYS_IDLE_MASK	(3 << MMU_SYS_IDLE_SHIFT)
 
-#define MMU_SYS_SOFTRESET	(1 << 1)
 #define MMU_SYS_AUTOIDLE	1
 
-/* SYSSTATUS */
-#define MMU_SYS_RESETDONE	1
-
 /* IRQSTATUS & IRQENABLE */
 #define MMU_IRQ_MULTIHITFAULT	(1 << 4)
 #define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
@@ -97,7 +93,6 @@ static void __iommu_set_twl(struct omap_iommu *obj, bool on)
 static int omap2_iommu_enable(struct omap_iommu *obj)
 {
 	u32 l, pa;
-	unsigned long timeout;
 
 	if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd,  SZ_16K))
 		return -EINVAL;
@@ -106,20 +101,6 @@ static int omap2_iommu_enable(struct omap_iommu *obj)
 	if (!IS_ALIGNED(pa, SZ_16K))
 		return -EINVAL;
 
-	iommu_write_reg(obj, MMU_SYS_SOFTRESET, MMU_SYSCONFIG);
-
-	timeout = jiffies + msecs_to_jiffies(20);
-	do {
-		l = iommu_read_reg(obj, MMU_SYSSTATUS);
-		if (l & MMU_SYS_RESETDONE)
-			break;
-	} while (!time_after(jiffies, timeout));
-
-	if (!(l & MMU_SYS_RESETDONE)) {
-		dev_err(obj->dev, "can't take mmu out of reset\n");
-		return -ENODEV;
-	}
-
 	l = iommu_read_reg(obj, MMU_REVISION);
 	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
 		 (l >> 4) & 0xf, l & 0xf);
diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index c677b9f2fefa..ef2060d7eeb8 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/platform_device.h>
+
 #define MMU_REG_SIZE		256
 
 /**
@@ -43,7 +45,11 @@ struct omap_mmu_dev_attr {
 struct iommu_platform_data {
 	const char *name;
 	const char *clk_name;
-	const int nr_tlb_entries;
+	const char *reset_name;
+	int nr_tlb_entries;
 	u32 da_start;
 	u32 da_end;
+
+	int (*assert_reset)(struct platform_device *pdev, const char *name);
+	int (*deassert_reset)(struct platform_device *pdev, const char *name);
 };
-- 
cgit v1.2.3


From ebf7cda0f92effd8169b831fae81e9437dce1fef Mon Sep 17 00:00:00 2001
From: Omar Ramirez Luna <omar.luna@linaro.org>
Date: Mon, 19 Nov 2012 19:05:51 -0600
Subject: iommu/omap: Adapt to runtime pm

Use runtime PM functionality interfaced with hwmod enable/idle
functions, to replace direct clock operations and sysconfig
handling.

Due to reset sequence, pm_runtime_[get|put]_sync must be used, to
avoid possible operations with the module under reset. Because of
this and given that the driver uses spin_locks to protect their
critical sections, we must use pm_runtime_irq_safe in order for the
runtime ops to be happy, otherwise might_sleep_if checks in runtime
framework will complain.

The remaining pm_runtime out of iommu_enable and iommu_disable
corresponds to paths that can be accessed through debugfs, some of
them doesn't work if the module is not enabled first, but in future
if the mmu is idled withouth freeing, these are needed to debug.

Signed-off-by: Omar Ramirez Luna <omar.luna@linaro.org>
Tested-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 arch/arm/mach-omap2/omap-iommu.c         |  1 -
 drivers/iommu/omap-iommu.c               | 40 +++++++++++++++-----------------
 drivers/iommu/omap-iommu.h               |  3 ---
 drivers/iommu/omap-iommu2.c              | 17 --------------
 include/linux/platform_data/iommu-omap.h |  1 -
 5 files changed, 19 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
index 02726a647b1d..7642fc4672c1 100644
--- a/arch/arm/mach-omap2/omap-iommu.c
+++ b/arch/arm/mach-omap2/omap-iommu.c
@@ -31,7 +31,6 @@ static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused)
 		return -ENOMEM;
 
 	pdata->name = oh->name;
-	pdata->clk_name = oh->main_clk;
 	pdata->nr_tlb_entries = a->nr_tlb_entries;
 	pdata->da_start = a->da_start;
 	pdata->da_end = a->da_end;
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index af9b4f31f594..18108c1405e2 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -16,13 +16,13 @@
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
-#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/iommu.h>
 #include <linux/omap-iommu.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
+#include <linux/pm_runtime.h>
 
 #include <asm/cacheflush.h>
 
@@ -160,7 +160,7 @@ static int iommu_enable(struct omap_iommu *obj)
 		}
 	}
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	err = arch_iommu->enable(obj);
 
@@ -177,7 +177,7 @@ static void iommu_disable(struct omap_iommu *obj)
 
 	arch_iommu->disable(obj);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	if (pdata->assert_reset)
 		pdata->assert_reset(pdev, pdata->reset_name);
@@ -303,7 +303,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 	if (!obj || !obj->nr_tlb_entries || !e)
 		return -EINVAL;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	iotlb_lock_get(obj, &l);
 	if (l.base == obj->nr_tlb_entries) {
@@ -333,7 +333,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 
 	cr = iotlb_alloc_cr(obj, e);
 	if (IS_ERR(cr)) {
-		clk_disable(obj->clk);
+		pm_runtime_put_sync(obj->dev);
 		return PTR_ERR(cr);
 	}
 
@@ -347,7 +347,7 @@ static int load_iotlb_entry(struct omap_iommu *obj, struct iotlb_entry *e)
 		l.vict = l.base;
 	iotlb_lock_set(obj, &l);
 out:
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 	return err;
 }
 
@@ -377,7 +377,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da)
 	int i;
 	struct cr_regs cr;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	for_each_iotlb_cr(obj, obj->nr_tlb_entries, i, cr) {
 		u32 start;
@@ -396,7 +396,7 @@ static void flush_iotlb_page(struct omap_iommu *obj, u32 da)
 			iommu_write_reg(obj, 1, MMU_FLUSH_ENTRY);
 		}
 	}
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	if (i == obj->nr_tlb_entries)
 		dev_dbg(obj->dev, "%s: no page for %08x\n", __func__, da);
@@ -410,7 +410,7 @@ static void flush_iotlb_all(struct omap_iommu *obj)
 {
 	struct iotlb_lock l;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	l.base = 0;
 	l.vict = 0;
@@ -418,7 +418,7 @@ static void flush_iotlb_all(struct omap_iommu *obj)
 
 	iommu_write_reg(obj, 1, MMU_GFLUSH);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 }
 
 #if defined(CONFIG_OMAP_IOMMU_DEBUG) || defined(CONFIG_OMAP_IOMMU_DEBUG_MODULE)
@@ -428,11 +428,11 @@ ssize_t omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t bytes)
 	if (!obj || !buf)
 		return -EINVAL;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 
 	bytes = arch_iommu->dump_ctx(obj, buf, bytes);
 
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	return bytes;
 }
@@ -446,7 +446,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
 	struct cr_regs tmp;
 	struct cr_regs *p = crs;
 
-	clk_enable(obj->clk);
+	pm_runtime_get_sync(obj->dev);
 	iotlb_lock_get(obj, &saved);
 
 	for_each_iotlb_cr(obj, num, i, tmp) {
@@ -456,7 +456,7 @@ __dump_tlb_entries(struct omap_iommu *obj, struct cr_regs *crs, int num)
 	}
 
 	iotlb_lock_set(obj, &saved);
-	clk_disable(obj->clk);
+	pm_runtime_put_sync(obj->dev);
 
 	return  p - crs;
 }
@@ -946,10 +946,6 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev)
 	if (!obj)
 		return -ENOMEM;
 
-	obj->clk = clk_get(&pdev->dev, pdata->clk_name);
-	if (IS_ERR(obj->clk))
-		goto err_clk;
-
 	obj->nr_tlb_entries = pdata->nr_tlb_entries;
 	obj->name = pdata->name;
 	obj->dev = &pdev->dev;
@@ -992,6 +988,9 @@ static int __devinit omap_iommu_probe(struct platform_device *pdev)
 		goto err_irq;
 	platform_set_drvdata(pdev, obj);
 
+	pm_runtime_irq_safe(obj->dev);
+	pm_runtime_enable(obj->dev);
+
 	dev_info(&pdev->dev, "%s registered\n", obj->name);
 	return 0;
 
@@ -1000,8 +999,6 @@ err_irq:
 err_ioremap:
 	release_mem_region(res->start, resource_size(res));
 err_mem:
-	clk_put(obj->clk);
-err_clk:
 	kfree(obj);
 	return err;
 }
@@ -1022,7 +1019,8 @@ static int __devexit omap_iommu_remove(struct platform_device *pdev)
 	release_mem_region(res->start, resource_size(res));
 	iounmap(obj->regbase);
 
-	clk_put(obj->clk);
+	pm_runtime_disable(obj->dev);
+
 	dev_info(&pdev->dev, "%s removed\n", obj->name);
 	kfree(obj);
 	return 0;
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 2b5f3c04d167..120084206602 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -29,7 +29,6 @@ struct iotlb_entry {
 struct omap_iommu {
 	const char	*name;
 	struct module	*owner;
-	struct clk	*clk;
 	void __iomem	*regbase;
 	struct device	*dev;
 	void		*isr_priv;
@@ -116,8 +115,6 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
  * MMU Register offsets
  */
 #define MMU_REVISION		0x00
-#define MMU_SYSCONFIG		0x10
-#define MMU_SYSSTATUS		0x14
 #define MMU_IRQSTATUS		0x18
 #define MMU_IRQENABLE		0x1c
 #define MMU_WALKING_ST		0x40
diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c
index 4a3a1c7a38c1..d745094a69dd 100644
--- a/drivers/iommu/omap-iommu2.c
+++ b/drivers/iommu/omap-iommu2.c
@@ -28,15 +28,6 @@
  */
 #define IOMMU_ARCH_VERSION	0x00000011
 
-/* SYSCONF */
-#define MMU_SYS_IDLE_SHIFT	3
-#define MMU_SYS_IDLE_FORCE	(0 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_NONE	(1 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_SMART	(2 << MMU_SYS_IDLE_SHIFT)
-#define MMU_SYS_IDLE_MASK	(3 << MMU_SYS_IDLE_SHIFT)
-
-#define MMU_SYS_AUTOIDLE	1
-
 /* IRQSTATUS & IRQENABLE */
 #define MMU_IRQ_MULTIHITFAULT	(1 << 4)
 #define MMU_IRQ_TABLEWALKFAULT	(1 << 3)
@@ -105,11 +96,6 @@ static int omap2_iommu_enable(struct omap_iommu *obj)
 	dev_info(obj->dev, "%s: version %d.%d\n", obj->name,
 		 (l >> 4) & 0xf, l & 0xf);
 
-	l = iommu_read_reg(obj, MMU_SYSCONFIG);
-	l &= ~MMU_SYS_IDLE_MASK;
-	l |= (MMU_SYS_IDLE_SMART | MMU_SYS_AUTOIDLE);
-	iommu_write_reg(obj, l, MMU_SYSCONFIG);
-
 	iommu_write_reg(obj, pa, MMU_TTB);
 
 	__iommu_set_twl(obj, true);
@@ -123,7 +109,6 @@ static void omap2_iommu_disable(struct omap_iommu *obj)
 
 	l &= ~MMU_CNTL_MASK;
 	iommu_write_reg(obj, l, MMU_CNTL);
-	iommu_write_reg(obj, MMU_SYS_IDLE_FORCE, MMU_SYSCONFIG);
 
 	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
 }
@@ -252,8 +237,6 @@ omap2_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len)
 	char *p = buf;
 
 	pr_reg(REVISION);
-	pr_reg(SYSCONFIG);
-	pr_reg(SYSSTATUS);
 	pr_reg(IRQSTATUS);
 	pr_reg(IRQENABLE);
 	pr_reg(WALKING_ST);
diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index ef2060d7eeb8..5b429c43a297 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -44,7 +44,6 @@ struct omap_mmu_dev_attr {
 
 struct iommu_platform_data {
 	const char *name;
-	const char *clk_name;
 	const char *reset_name;
 	int nr_tlb_entries;
 	u32 da_start;
-- 
cgit v1.2.3


From cc248d4b1ddf05fefc1373d9d7a4dd1df71b6190 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 16:11:13 -0500
Subject: svcrpc: don't byte-swap sk_reclen in place

Byte-swapping in place is always a little dubious.

Let's instead define this field to always be big-endian, and do the
swapping on demand where we need it.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h | 12 +++++++++++-
 net/sunrpc/svcsock.c           | 26 +++++++++++---------------
 2 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 92ad02f0dcc0..613cf42227aa 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,11 +26,21 @@ struct svc_sock {
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	u32			sk_reclen;	/* length of record */
+	__be32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
+static inline u32 svc_sock_reclen(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK;
+}
+
+static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
+{
+	return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT;
+}
+
 /*
  * Function prototypes.
  */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 03827cef1fa7..d50de2b95036 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -950,8 +950,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 			return -EAGAIN;
 		}
 
-		svsk->sk_reclen = ntohl(svsk->sk_reclen);
-		if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) {
+		if (!(svc_sock_final_rec(svsk))) {
 			/* FIXME: technically, a record can be fragmented,
 			 *  and non-terminal fragments will not have the top
 			 *  bit set in the fragment length header.
@@ -961,21 +960,18 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 			goto err_delete;
 		}
 
-		svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK;
-		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
-		if (svsk->sk_reclen > serv->sv_max_mesg) {
+		dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
+		if (svc_sock_reclen(svsk) > serv->sv_max_mesg) {
 			net_notice_ratelimited("RPC: fragment too large: 0x%08lx\n",
-					       (unsigned long)svsk->sk_reclen);
+					(unsigned long)svc_sock_reclen(svsk));
 			goto err_delete;
 		}
 	}
 
-	if (svsk->sk_reclen < 8)
+	if (svc_sock_reclen(svsk) < 8)
 		goto err_delete; /* client is nuts. */
 
-	len = svsk->sk_reclen;
-
-	return len;
+	return svc_sock_reclen(svsk);
 error:
 	dprintk("RPC: TCP recv_record got %d\n", len);
 	return len;
@@ -1019,7 +1015,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	if (dst->iov_len < src->iov_len)
 		return -EAGAIN; /* whatever; just giving up. */
 	memcpy(dst->iov_base, src->iov_base, src->iov_len);
-	xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
+	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
 	rqstp->rq_arg.len = 0;
 	return 0;
 }
@@ -1064,12 +1060,12 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		goto error;
 
 	base = svc_tcp_restore_pages(svsk, rqstp);
-	want = svsk->sk_reclen - base;
+	want = svc_sock_reclen(svsk) - base;
 
 	vec = rqstp->rq_vec;
 
 	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
-						svsk->sk_reclen);
+						svc_sock_reclen(svsk));
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
 
@@ -1082,11 +1078,11 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		if (len < 0 && len != -EAGAIN)
 			goto err_other;
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
-			svsk->sk_tcplen, svsk->sk_reclen);
+			svsk->sk_tcplen, svc_sock_reclen(svsk));
 		goto err_noclose;
 	}
 
-	rqstp->rq_arg.len = svsk->sk_reclen;
+	rqstp->rq_arg.len = svc_sock_reclen(svsk);
 	rqstp->rq_arg.page_base = 0;
 	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
 		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
-- 
cgit v1.2.3


From 8af345f58ac9b350bb23c1457c613381d9f00472 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 3 Dec 2012 16:45:35 -0500
Subject: svcrpc: track rpc data length separately from sk_tcplen

Keep a separate field, sk_datalen, that tracks only the data contained
in a fragment, not including the fragment header.

For now, this is always just max(0, sk_tcplen - 4), but after we allow
multiple fragments sk_datalen will accumulate the total rpc data size
while sk_tcplen only tracks progress receiving the current fragment.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h | 11 +++++++++--
 net/sunrpc/svcsock.c           | 19 ++++++++++++-------
 2 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 613cf42227aa..62fd1b756e99 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -26,8 +26,15 @@ struct svc_sock {
 	void			(*sk_owspace)(struct sock *);
 
 	/* private TCP part */
-	__be32			sk_reclen;	/* length of record */
-	u32			sk_tcplen;	/* current read length */
+	/* On-the-wire fragment header: */
+	__be32			sk_reclen;
+	/* As we receive a record, this includes the length received so
+	 * far (including the fragment header): */
+	u32			sk_tcplen;
+	/* Total length of the data (not including fragment headers)
+	 * received so far in the fragments making up this rpc: */
+	u32			sk_datalen;
+
 	struct page *		sk_pages[RPCSVC_MAXPAGES];	/* received data */
 };
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 1db42b1ffe28..2b09e2306bfa 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -874,9 +874,9 @@ static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		return 0;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		if (rqstp->rq_pages[i] != NULL)
@@ -893,9 +893,9 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		return;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		svsk->sk_pages[i] = rqstp->rq_pages[i];
@@ -907,9 +907,9 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 {
 	unsigned int i, len, npages;
 
-	if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+	if (svsk->sk_datalen == 0)
 		goto out;
-	len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+	len = svsk->sk_datalen;
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		BUG_ON(svsk->sk_pages[i] == NULL);
@@ -918,6 +918,7 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
 	}
 out:
 	svsk->sk_tcplen = 0;
+	svsk->sk_datalen = 0;
 }
 
 /*
@@ -1066,8 +1067,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 	/* Now receive data */
 	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
-	if (len >= 0)
+	if (len >= 0) {
 		svsk->sk_tcplen += len;
+		svsk->sk_datalen += len;
+	}
 	if (len != want) {
 		svc_tcp_save_pages(svsk, rqstp);
 		if (len < 0 && len != -EAGAIN)
@@ -1100,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	/* Reset TCP read info */
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
+	svsk->sk_datalen = 0;
 	/* If we have more data, signal svc_xprt_enqueue() to try again */
 	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
@@ -1296,6 +1300,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 
 		svsk->sk_reclen = 0;
 		svsk->sk_tcplen = 0;
+		svsk->sk_datalen = 0;
 		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
 
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
-- 
cgit v1.2.3


From 8fa72d234da9b6b473bbb1f74d533663e4996e6b Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 5 Dec 2012 20:17:21 +0100
Subject: bdi: add a user-tunable cpu_list for the bdi flusher threads

In realtime environments, it may be desirable to keep the per-bdi
flusher threads from running on certain cpus.  This patch adds a
cpu_list file to /sys/class/bdi/* to enable this.  The default is to tie
the flusher threads to the same numa node as the backing device (though
I could be convinced to make it a mask of all cpus to avoid a change in
behaviour).

Thanks to Jeremy Eder for the original idea.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/backing-dev.h |  4 +++
 mm/backing-dev.c            | 84 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2a9a9abc9126..238521a19849 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,6 +18,7 @@
 #include <linux/writeback.h>
 #include <linux/atomic.h>
 #include <linux/sysctl.h>
+#include <linux/mutex.h>
 
 struct page;
 struct device;
@@ -105,6 +106,9 @@ struct backing_dev_info {
 
 	struct timer_list laptop_mode_wb_timer;
 
+	cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */
+	struct mutex flusher_cpumask_lock;
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
 	struct dentry *debug_stats;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee176..bd6a6cabef71 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <linux/slab.h>
 #include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio)
 
+static ssize_t cpu_list_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	struct bdi_writeback *wb = &bdi->wb;
+	cpumask_var_t newmask;
+	ssize_t ret;
+	struct task_struct *task;
+
+	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = cpulist_parse(buf, newmask);
+	if (!ret) {
+		spin_lock_bh(&bdi->wb_lock);
+		task = wb->task;
+		if (task)
+			get_task_struct(task);
+		spin_unlock_bh(&bdi->wb_lock);
+
+		mutex_lock(&bdi->flusher_cpumask_lock);
+		if (task) {
+			ret = set_cpus_allowed_ptr(task, newmask);
+			put_task_struct(task);
+		}
+		if (ret == 0) {
+			cpumask_copy(bdi->flusher_cpumask, newmask);
+			ret = count;
+		}
+		mutex_unlock(&bdi->flusher_cpumask_lock);
+
+	}
+	free_cpumask_var(newmask);
+
+	return ret;
+}
+
+static ssize_t cpu_list_show(struct device *dev,
+		struct device_attribute *attr, char *page)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	ssize_t ret;
+
+	mutex_lock(&bdi->flusher_cpumask_lock);
+	ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
+	mutex_unlock(&bdi->flusher_cpumask_lock);
+
+	return ret;
+}
+
 #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
 
 static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
 	__ATTR_RW(max_ratio),
+	__ATTR_RW(cpu_list),
 	__ATTR_NULL,
 };
 
@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
 				writeback_inodes_wb(&bdi->wb, 1024,
 						    WB_REASON_FORKER_THREAD);
 			} else {
+				int ret;
 				/*
 				 * The spinlock makes sure we do not lose
 				 * wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
 				spin_lock_bh(&bdi->wb_lock);
 				bdi->wb.task = task;
 				spin_unlock_bh(&bdi->wb_lock);
+				mutex_lock(&bdi->flusher_cpumask_lock);
+				ret = set_cpus_allowed_ptr(task,
+							bdi->flusher_cpumask);
+				mutex_unlock(&bdi->flusher_cpumask_lock);
+				if (ret)
+					printk_once("%s: failed to bind flusher"
+						    " thread %s, error %d\n",
+						    __func__, task->comm, ret);
 				wake_up_process(task);
 			}
 			bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 						dev_name(dev));
 		if (IS_ERR(wb->task))
 			return PTR_ERR(wb->task);
+	} else {
+		int node;
+		/*
+		 * Set up a default cpumask for the flusher threads that
+		 * includes all cpus on the same numa node as the device.
+		 * The mask may be overridden via sysfs.
+		 */
+		node = dev_to_node(bdi->dev);
+		if (node != NUMA_NO_NODE)
+			cpumask_copy(bdi->flusher_cpumask,
+				     cpumask_of_node(node));
 	}
 
 	bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	bdi_wb_init(&bdi->wb, bdi);
 
+	if (!bdi_cap_flush_forker(bdi)) {
+		bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+		if (!bdi->flusher_cpumask)
+			return -ENOMEM;
+		cpumask_setall(bdi->flusher_cpumask);
+		mutex_init(&bdi->flusher_cpumask_lock);
+	} else
+		bdi->flusher_cpumask = NULL;
+
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
 		if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
 err:
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
+		kfree(bdi->flusher_cpumask);
 	}
 
 	return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
 
 	bdi_unregister(bdi);
 
+	kfree(bdi->flusher_cpumask);
+
 	/*
 	 * If bdi_unregister() had already been called earlier, the
 	 * wakeup_timer could still be armed because bdi_prune_sb()
-- 
cgit v1.2.3


From 161b96c383c442f4d7dabbb8500a5fbd551b344d Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Wed, 7 Nov 2012 21:30:29 +0400
Subject: spi/clps711x: New SPI master driver

This patch add new driver for CLPS711X SPI master controller.
Due to platform limitations driver supports only 8 bit transfer mode.
Chip select control is handled via GPIO.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/spi/Kconfig                        |   7 +
 drivers/spi/Makefile                       |   1 +
 drivers/spi/spi-clps711x.c                 | 296 +++++++++++++++++++++++++++++
 include/linux/platform_data/spi-clps711x.h |  21 ++
 4 files changed, 325 insertions(+)
 create mode 100644 drivers/spi/spi-clps711x.c
 create mode 100644 include/linux/platform_data/spi-clps711x.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 1acae359cabe..2a13e637e46b 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -123,6 +123,13 @@ config SPI_BUTTERFLY
 	  inexpensive battery powered microcontroller evaluation board.
 	  This same cable can be used to flash new firmware.
 
+config SPI_CLPS711X
+	tristate "CLPS711X host SPI controller"
+	depends on ARCH_CLPS711X
+	help
+	  This enables dedicated general purpose SPI/Microwire1-compatible
+	  master mode interface (SSI1) for CLPS711X-based CPUs.
+
 config SPI_COLDFIRE_QSPI
 	tristate "Freescale Coldfire QSPI controller"
 	depends on (M520x || M523x || M5249 || M525x || M527x || M528x || M532x)
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index c48df47e4b0f..25ab4a1cd014 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_SPI_BFIN5XX)		+= spi-bfin5xx.o
 obj-$(CONFIG_SPI_BFIN_SPORT)		+= spi-bfin-sport.o
 obj-$(CONFIG_SPI_BITBANG)		+= spi-bitbang.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi-butterfly.o
+obj-$(CONFIG_SPI_CLPS711X)		+= spi-clps711x.o
 obj-$(CONFIG_SPI_COLDFIRE_QSPI)		+= spi-coldfire-qspi.o
 obj-$(CONFIG_SPI_DAVINCI)		+= spi-davinci.o
 obj-$(CONFIG_SPI_DESIGNWARE)		+= spi-dw.o
diff --git a/drivers/spi/spi-clps711x.c b/drivers/spi/spi-clps711x.c
new file mode 100644
index 000000000000..59677eb30f94
--- /dev/null
+++ b/drivers/spi/spi-clps711x.c
@@ -0,0 +1,296 @@
+/*
+ *  CLPS711X SPI bus driver
+ *
+ *  Copyright (C) 2012 Alexander Shiyan <shc_work@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/platform_data/spi-clps711x.h>
+
+#include <mach/hardware.h>
+
+#define DRIVER_NAME	"spi-clps711x"
+
+struct spi_clps711x_data {
+	struct completion	done;
+
+	struct clk		*spi_clk;
+	u32			max_speed_hz;
+
+	u8			*tx_buf;
+	u8			*rx_buf;
+	int			count;
+	int			len;
+
+	int			chipselect[0];
+};
+
+static int spi_clps711x_setup(struct spi_device *spi)
+{
+	struct spi_clps711x_data *hw = spi_master_get_devdata(spi->master);
+
+	if (spi->bits_per_word != 8) {
+		dev_err(&spi->dev, "Unsupported master bus width %i\n",
+			spi->bits_per_word);
+		return -EINVAL;
+	}
+
+	/* We are expect that SPI-device is not selected */
+	gpio_direction_output(hw->chipselect[spi->chip_select],
+			      !(spi->mode & SPI_CS_HIGH));
+
+	return 0;
+}
+
+static void spi_clps711x_setup_mode(struct spi_device *spi)
+{
+	/* Setup edge for transfer */
+	if (spi->mode & SPI_CPHA)
+		clps_writew(clps_readw(SYSCON3) | SYSCON3_ADCCKNSEN, SYSCON3);
+	else
+		clps_writew(clps_readw(SYSCON3) & ~SYSCON3_ADCCKNSEN, SYSCON3);
+}
+
+static int spi_clps711x_setup_xfer(struct spi_device *spi,
+				   struct spi_transfer *xfer)
+{
+	u32 speed = xfer->speed_hz ? : spi->max_speed_hz;
+	u8 bpw = xfer->bits_per_word ? : spi->bits_per_word;
+	struct spi_clps711x_data *hw = spi_master_get_devdata(spi->master);
+
+	if (bpw != 8) {
+		dev_err(&spi->dev, "Unsupported master bus width %i\n", bpw);
+		return -EINVAL;
+	}
+
+	/* Setup SPI frequency divider */
+	if (!speed || (speed >= hw->max_speed_hz))
+		clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) |
+			    SYSCON1_ADCKSEL(3), SYSCON1);
+	else if (speed >= (hw->max_speed_hz / 2))
+		clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) |
+			    SYSCON1_ADCKSEL(2), SYSCON1);
+	else if (speed >= (hw->max_speed_hz / 8))
+		clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) |
+			    SYSCON1_ADCKSEL(1), SYSCON1);
+	else
+		clps_writel((clps_readl(SYSCON1) & ~SYSCON1_ADCKSEL_MASK) |
+			    SYSCON1_ADCKSEL(0), SYSCON1);
+
+	return 0;
+}
+
+static int spi_clps711x_transfer_one_message(struct spi_master *master,
+					     struct spi_message *msg)
+{
+	struct spi_clps711x_data *hw = spi_master_get_devdata(master);
+	struct spi_transfer *xfer;
+	int status = 0, cs = hw->chipselect[msg->spi->chip_select];
+	u32 data;
+
+	spi_clps711x_setup_mode(msg->spi);
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (spi_clps711x_setup_xfer(msg->spi, xfer)) {
+			status = -EINVAL;
+			goto out_xfr;
+		}
+
+		gpio_set_value(cs, !!(msg->spi->mode & SPI_CS_HIGH));
+
+		INIT_COMPLETION(hw->done);
+
+		hw->count = 0;
+		hw->len = xfer->len;
+		hw->tx_buf = (u8 *)xfer->tx_buf;
+		hw->rx_buf = (u8 *)xfer->rx_buf;
+
+		/* Initiate transfer */
+		data = hw->tx_buf ? hw->tx_buf[hw->count] : 0;
+		clps_writel(data | SYNCIO_FRMLEN(8) | SYNCIO_TXFRMEN, SYNCIO);
+
+		wait_for_completion(&hw->done);
+
+		if (xfer->delay_usecs)
+			udelay(xfer->delay_usecs);
+
+		if (xfer->cs_change ||
+		    list_is_last(&xfer->transfer_list, &msg->transfers))
+			gpio_set_value(cs, !(msg->spi->mode & SPI_CS_HIGH));
+
+		msg->actual_length += xfer->len;
+	}
+
+out_xfr:
+	msg->status = status;
+	spi_finalize_current_message(master);
+
+	return 0;
+}
+
+static irqreturn_t spi_clps711x_isr(int irq, void *dev_id)
+{
+	struct spi_clps711x_data *hw = (struct spi_clps711x_data *)dev_id;
+	u32 data;
+
+	/* Handle RX */
+	data = clps_readb(SYNCIO);
+	if (hw->rx_buf)
+		hw->rx_buf[hw->count] = (u8)data;
+
+	hw->count++;
+
+	/* Handle TX */
+	if (hw->count < hw->len) {
+		data = hw->tx_buf ? hw->tx_buf[hw->count] : 0;
+		clps_writel(data | SYNCIO_FRMLEN(8) | SYNCIO_TXFRMEN, SYNCIO);
+	} else
+		complete(&hw->done);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit spi_clps711x_probe(struct platform_device *pdev)
+{
+	int i, ret;
+	struct spi_master *master;
+	struct spi_clps711x_data *hw;
+	struct spi_clps711x_pdata *pdata = dev_get_platdata(&pdev->dev);
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data supplied\n");
+		return -EINVAL;
+	}
+
+	if (pdata->num_chipselect < 1) {
+		dev_err(&pdev->dev, "At least one CS must be defined\n");
+		return -EINVAL;
+	}
+
+	master = spi_alloc_master(&pdev->dev,
+				  sizeof(struct spi_clps711x_data) +
+				  sizeof(int) * pdata->num_chipselect);
+	if (!master) {
+		dev_err(&pdev->dev, "SPI allocating memory error\n");
+		return -ENOMEM;
+	}
+
+	master->bus_num = pdev->id;
+	master->mode_bits = SPI_CPHA | SPI_CS_HIGH;
+	master->num_chipselect = pdata->num_chipselect;
+	master->setup = spi_clps711x_setup;
+	master->transfer_one_message = spi_clps711x_transfer_one_message;
+
+	hw = spi_master_get_devdata(master);
+
+	for (i = 0; i < master->num_chipselect; i++) {
+		hw->chipselect[i] = pdata->chipselect[i];
+		if (!gpio_is_valid(hw->chipselect[i])) {
+			dev_err(&pdev->dev, "Invalid CS GPIO %i\n", i);
+			ret = -EINVAL;
+			goto err_out;
+		}
+		if (gpio_request(hw->chipselect[i], DRIVER_NAME)) {
+			dev_err(&pdev->dev, "Can't get CS GPIO %i\n", i);
+			ret = -EINVAL;
+			goto err_out;
+		}
+	}
+
+	hw->spi_clk = devm_clk_get(&pdev->dev, "spi");
+	if (IS_ERR(hw->spi_clk)) {
+		dev_err(&pdev->dev, "Can't get clocks\n");
+		ret = PTR_ERR(hw->spi_clk);
+		goto err_out;
+	}
+	hw->max_speed_hz = clk_get_rate(hw->spi_clk);
+
+	init_completion(&hw->done);
+	platform_set_drvdata(pdev, master);
+
+	/* Disable extended mode due hardware problems */
+	clps_writew(clps_readw(SYSCON3) & ~SYSCON3_ADCCON, SYSCON3);
+
+	/* Clear possible pending interrupt */
+	clps_readl(SYNCIO);
+
+	ret = devm_request_irq(&pdev->dev, IRQ_SSEOTI, spi_clps711x_isr, 0,
+			       dev_name(&pdev->dev), hw);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't request IRQ\n");
+		clk_put(hw->spi_clk);
+		goto clk_out;
+	}
+
+	ret = spi_register_master(master);
+	if (!ret) {
+		dev_info(&pdev->dev,
+			 "SPI bus driver initialized. Master clock %u Hz\n",
+			 hw->max_speed_hz);
+		return 0;
+	}
+
+	dev_err(&pdev->dev, "Failed to register master\n");
+	devm_free_irq(&pdev->dev, IRQ_SSEOTI, hw);
+
+clk_out:
+	devm_clk_put(&pdev->dev, hw->spi_clk);
+
+err_out:
+	while (--i >= 0)
+		if (gpio_is_valid(hw->chipselect[i]))
+			gpio_free(hw->chipselect[i]);
+
+	platform_set_drvdata(pdev, NULL);
+	spi_master_put(master);
+	kfree(master);
+
+	return ret;
+}
+
+static int __devexit spi_clps711x_remove(struct platform_device *pdev)
+{
+	int i;
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_clps711x_data *hw = spi_master_get_devdata(master);
+
+	devm_free_irq(&pdev->dev, IRQ_SSEOTI, hw);
+
+	for (i = 0; i < master->num_chipselect; i++)
+		if (gpio_is_valid(hw->chipselect[i]))
+			gpio_free(hw->chipselect[i]);
+
+	devm_clk_put(&pdev->dev, hw->spi_clk);
+	platform_set_drvdata(pdev, NULL);
+	spi_unregister_master(master);
+	kfree(master);
+
+	return 0;
+}
+
+static struct platform_driver clps711x_spi_driver = {
+	.driver	= {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.probe	= spi_clps711x_probe,
+	.remove	= __devexit_p(spi_clps711x_remove),
+};
+module_platform_driver(clps711x_spi_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alexander Shiyan <shc_work@mail.ru>");
+MODULE_DESCRIPTION("CLPS711X SPI bus driver");
diff --git a/include/linux/platform_data/spi-clps711x.h b/include/linux/platform_data/spi-clps711x.h
new file mode 100644
index 000000000000..301956e63143
--- /dev/null
+++ b/include/linux/platform_data/spi-clps711x.h
@@ -0,0 +1,21 @@
+/*
+ *  CLPS711X SPI bus driver definitions
+ *
+ *  Copyright (C) 2012 Alexander Shiyan <shc_work@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H
+#define ____LINUX_PLATFORM_DATA_SPI_CLPS711X_H
+
+/* Board specific platform_data */
+struct spi_clps711x_pdata {
+	int *chipselect;	/* Array of GPIO-numbers */
+	int num_chipselect;	/* Total count of GPIOs */
+};
+
+#endif
-- 
cgit v1.2.3


From 464ee9f966404786ba4c6be35dc8362ee8e6ba4e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 12:49:27 -0500
Subject: NFSv4.1: Ensure that the client tracks the server
 target_highest_slotid

Dynamic slot allocation in NFSv4.1 depends on the client being able to
track the server's target value for the highest slotid in the
slot table.  See the reference in Section 2.10.6.1 of RFC5661.

To avoid ordering problems in the case where 2 SEQUENCE replies contain
conflicting updates to this target value, we also introduce a generation
counter, to track whether or not an RPC containing a SEQUENCE operation
was launched before or after the last update.

Also rename the nfs4_slot_table target_max_slots field to
'target_highest_slotid' to avoid confusion with a slot
table size or number of slots.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback_proc.c    |  2 +-
 fs/nfs/nfs4proc.c         | 25 +++++++++++++++++++++++++
 fs/nfs/nfs4state.c        |  7 +++----
 fs/nfs/nfs4xdr.c          |  4 ++--
 include/linux/nfs_fs_sb.h |  5 +++--
 include/linux/nfs_xdr.h   |  2 ++
 6 files changed, 36 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 0be08b964f38..0ef047b7d28d 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -576,7 +576,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
 	if (args->crsa_target_max_slots == fc_tbl->max_slots)
 		goto out;
 
-	fc_tbl->target_max_slots = args->crsa_target_max_slots;
+	fc_tbl->target_highest_slotid = args->crsa_target_max_slots;
 	nfs41_handle_recall_slot(cps->clp);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 197ef3e4e1f7..d91abaa522e8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -488,6 +488,28 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 	res->sr_slot = NULL;
 }
 
+/* Update the client's idea of target_highest_slotid */
+static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
+		u32 target_highest_slotid)
+{
+	if (tbl->target_highest_slotid == target_highest_slotid)
+		return;
+	tbl->target_highest_slotid = target_highest_slotid;
+	tbl->generation++;
+}
+
+static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot,
+		struct nfs4_sequence_res *res)
+{
+	spin_lock(&tbl->slot_tbl_lock);
+	if (tbl->generation != slot->generation)
+		goto out;
+	nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid);
+out:
+	spin_unlock(&tbl->slot_tbl_lock);
+}
+
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
 	struct nfs4_session *session;
@@ -522,6 +544,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		/* Check sequence flags */
 		if (res->sr_status_flags != 0)
 			nfs4_schedule_lease_recovery(clp);
+		nfs41_update_target_slotid(slot->table, slot, res);
 		break;
 	case -NFS4ERR_DELAY:
 		/* The server detected a resend of the RPC call and
@@ -583,6 +606,7 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
 		tbl->highest_used_slotid = slotid;
 	ret = &tbl->slots[slotid];
 	ret->renewal_time = jiffies;
+	ret->generation = tbl->generation;
 
 out:
 	dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
@@ -5693,6 +5717,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
 		tbl->max_slots = max_slots;
 	}
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
+	tbl->target_highest_slotid = max_slots - 1;
 	for (i = 0; i < tbl->max_slots; i++)
 		tbl->slots[i].seq_nr = ivalue;
 	spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9495789c425b..842cb8c2f65d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2033,17 +2033,16 @@ static int nfs4_recall_slot(struct nfs_client *clp)
 		return 0;
 	nfs4_begin_drain_session(clp);
 	fc_tbl = &clp->cl_session->fc_slot_table;
-	new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_max_slots, GFP_NOFS);
+	new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_highest_slotid + 1, GFP_NOFS);
         if (!new)
 		return -ENOMEM;
 
 	spin_lock(&fc_tbl->slot_tbl_lock);
-	for (i = 0; i < fc_tbl->target_max_slots; i++)
+	for (i = 0; i <= fc_tbl->target_highest_slotid; i++)
 		new[i].seq_nr = fc_tbl->slots[i].seq_nr;
 	old = fc_tbl->slots;
 	fc_tbl->slots = new;
-	fc_tbl->max_slots = fc_tbl->target_max_slots;
-	fc_tbl->target_max_slots = 0;
+	fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1;
 	clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots;
 	spin_unlock(&fc_tbl->slot_tbl_lock);
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 27b0fec1a6b0..05d34f1fcc19 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5552,8 +5552,8 @@ static int decode_sequence(struct xdr_stream *xdr,
 	}
 	/* highest slot id - currently not processed */
 	dummy = be32_to_cpup(p++);
-	/* target highest slot id - currently not processed */
-	dummy = be32_to_cpup(p++);
+	/* target highest slot id */
+	res->sr_target_highest_slotid = be32_to_cpup(p++);
 	/* result flags */
 	res->sr_status_flags = be32_to_cpup(p);
 	status = 0;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b0412873d29c..57d406997def 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -217,8 +217,9 @@ struct nfs4_slot_table {
 	u32		max_slots;		/* # slots in table */
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
-	u32		target_max_slots;	/* Set by CB_RECALL_SLOT as
-						 * the new max_slots */
+	u32		target_highest_slotid;	/* Server max_slot target */
+	unsigned long	generation;		/* Generation counter for
+						   target_highest_slotid */
 	struct completion complete;
 };
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index deb31bbbb857..08c47db7417f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,6 +188,7 @@ struct nfs4_channel_attrs {
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
+	unsigned long		generation;
 	unsigned long		renewal_time;
 	u32			slot_nr;
 	u32		 	seq_nr;
@@ -202,6 +203,7 @@ struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
+	u32			sr_target_highest_slotid;
 };
 
 struct nfs4_get_lease_time_args {
-- 
cgit v1.2.3


From da0507b7c95ccd4d9c86394eef42fe076032af30 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 18:10:30 -0500
Subject: NFSv4.1: Reset the sequence number for slots that have been
 deallocated

When the server tells us that it is dynamically resizing the session
replay cache, we should reset the sequence number for those slots
that have been deallocated.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c         | 18 ++++++++++++++++++
 fs/nfs/nfs4xdr.c          |  4 ++--
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d91abaa522e8..52435ec44193 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -498,6 +498,22 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
 	tbl->generation++;
 }
 
+static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl,
+		u32 highest_slotid)
+{
+	unsigned int max_slotid, i;
+
+	if (tbl->server_highest_slotid == highest_slotid)
+		return;
+	if (tbl->highest_used_slotid > highest_slotid)
+		return;
+	max_slotid = min(tbl->max_slots - 1, highest_slotid);
+	/* Reset the seq_nr for deallocated slots */
+	for (i = tbl->server_highest_slotid + 1; i <= max_slotid; i++)
+		tbl->slots[i].seq_nr = 1;
+	tbl->server_highest_slotid = highest_slotid;
+}
+
 static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
 		struct nfs4_slot *slot,
 		struct nfs4_sequence_res *res)
@@ -505,6 +521,7 @@ static void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
 	spin_lock(&tbl->slot_tbl_lock);
 	if (tbl->generation != slot->generation)
 		goto out;
+	nfs41_set_server_slotid_locked(tbl, res->sr_highest_slotid);
 	nfs41_set_target_slotid_locked(tbl, res->sr_target_highest_slotid);
 out:
 	spin_unlock(&tbl->slot_tbl_lock);
@@ -5718,6 +5735,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
 	}
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
 	tbl->target_highest_slotid = max_slots - 1;
+	tbl->server_highest_slotid = max_slots - 1;
 	for (i = 0; i < tbl->max_slots; i++)
 		tbl->slots[i].seq_nr = ivalue;
 	spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 05d34f1fcc19..a67040f51597 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5550,8 +5550,8 @@ static int decode_sequence(struct xdr_stream *xdr,
 		dprintk("%s Invalid slot id\n", __func__);
 		goto out_err;
 	}
-	/* highest slot id - currently not processed */
-	dummy = be32_to_cpup(p++);
+	/* highest slot id */
+	res->sr_highest_slotid = be32_to_cpup(p++);
 	/* target highest slot id */
 	res->sr_target_highest_slotid = be32_to_cpup(p++);
 	/* result flags */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 57d406997def..646e64bbff4c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -218,6 +218,7 @@ struct nfs4_slot_table {
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
 	u32		target_highest_slotid;	/* Server max_slot target */
+	u32		server_highest_slotid;	/* Server highest slotid */
 	unsigned long	generation;		/* Generation counter for
 						   target_highest_slotid */
 	struct completion complete;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 08c47db7417f..3ddb08fba935 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -203,6 +203,7 @@ struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
+	u32			sr_highest_slotid;
 	u32			sr_target_highest_slotid;
 };
 
-- 
cgit v1.2.3


From 97e548a93de213b149eea025a97d88e28143b445 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 14:45:48 -0500
Subject: NFSv4.1: Support dynamic resizing of the session slot table

Allow the server to control the size of the session slot table
by adjusting the value of sr_target_max_slots in the reply to the
SEQUENCE operation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c         | 12 ++++++++++--
 fs/nfs/nfs4state.c        |  6 +++---
 include/linux/nfs_fs_sb.h |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 62212231ce62..1792ece8b53c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -492,10 +492,17 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
 static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
 		u32 target_highest_slotid)
 {
+	unsigned int max_slotid, i;
+
 	if (tbl->target_highest_slotid == target_highest_slotid)
 		return;
 	tbl->target_highest_slotid = target_highest_slotid;
 	tbl->generation++;
+
+	max_slotid = min(tbl->max_slots - 1, tbl->target_highest_slotid);
+	for (i = tbl->max_slotid + 1; i <= max_slotid; i++)
+		rpc_wake_up_next(&tbl->slot_tbl_waitq);
+	tbl->max_slotid = max_slotid;
 }
 
 void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
@@ -622,8 +629,8 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
 	dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
 		tbl->max_slots);
-	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots);
-	if (slotid >= tbl->max_slots)
+	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
+	if (slotid > tbl->max_slotid)
 		goto out;
 	__set_bit(slotid, tbl->used_slots);
 	if (slotid > tbl->highest_used_slotid ||
@@ -5744,6 +5751,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
 	tbl->target_highest_slotid = max_slots - 1;
 	tbl->server_highest_slotid = max_slots - 1;
+	tbl->max_slotid = max_slots - 1;
 	for (i = 0; i < tbl->max_slots; i++)
 		tbl->slots[i].seq_nr = ivalue;
 	spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 842cb8c2f65d..1b7fa73c9436 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -254,15 +254,14 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
 {
 	struct nfs4_session *ses = clp->cl_session;
 	struct nfs4_slot_table *tbl;
-	int max_slots;
+	unsigned int i;
 
 	if (ses == NULL)
 		return;
 	tbl = &ses->fc_slot_table;
 	if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) {
 		spin_lock(&tbl->slot_tbl_lock);
-		max_slots = tbl->max_slots;
-		while (max_slots--) {
+		for (i = 0; i <= tbl->max_slotid; i++) {
 			if (rpc_wake_up_first(&tbl->slot_tbl_waitq,
 						nfs4_set_task_privileged,
 						NULL) == NULL)
@@ -2043,6 +2042,7 @@ static int nfs4_recall_slot(struct nfs_client *clp)
 	old = fc_tbl->slots;
 	fc_tbl->slots = new;
 	fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1;
+	fc_tbl->max_slotid = fc_tbl->target_highest_slotid;
 	clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots;
 	spin_unlock(&fc_tbl->slot_tbl_lock);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 646e64bbff4c..30715508fade 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -215,6 +215,7 @@ struct nfs4_slot_table {
 	spinlock_t	slot_tbl_lock;
 	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
 	u32		max_slots;		/* # slots in table */
+	u32		max_slotid;		/* Max allowed slotid value */
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
 						 * op for dynamic resizing */
 	u32		target_highest_slotid;	/* Server max_slot target */
-- 
cgit v1.2.3


From 87dda67e7386ba7d2164391ea58b34e028d8157b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 20 Nov 2012 19:49:20 -0500
Subject: NFSv4.1: Allow SEQUENCE to resize the slot table on the fly

Instead of an array of slots, use a singly linked list of slots that
can be dynamically appended to or shrunk.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |   4 +-
 fs/nfs/nfs4proc.c       | 174 +++++++++++++++++++++++++++++++-----------------
 fs/nfs/nfs4state.c      |  22 ++----
 include/linux/nfs_xdr.h |   1 +
 4 files changed, 121 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5d4e82b10c3c..856bc496a210 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,10 +258,10 @@ extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
 extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
 				  bool sync);
 
-extern struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
-		u32 max_slots, gfp_t gfp_flags);
 extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
 		u32 target_highest_slotid);
+extern int nfs4_resize_slot_table(struct nfs4_slot_table *tbl,
+		u32 max_reqs, u32 ivalue);
 
 static inline bool
 is_ds_only_client(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1792ece8b53c..fc65300172e1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -396,6 +396,27 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
 
 #if defined(CONFIG_NFS_V4_1)
 
+/*
+ * nfs4_shrink_slot_table - free retired slots from the slot table
+ */
+static void nfs4_shrink_slot_table(struct nfs4_slot_table  *tbl, u32 newsize)
+{
+	struct nfs4_slot **p;
+	if (newsize >= tbl->max_slots)
+		return;
+
+	p = &tbl->slots;
+	while (newsize--)
+		p = &(*p)->next;
+	while (*p) {
+		struct nfs4_slot *slot = *p;
+
+		*p = slot->next;
+		kfree(slot);
+		tbl->max_slots--;
+	}
+}
+
 /*
  * nfs4_free_slot - free a slot and efficiently update slot table.
  *
@@ -499,7 +520,7 @@ static void nfs41_set_target_slotid_locked(struct nfs4_slot_table *tbl,
 	tbl->target_highest_slotid = target_highest_slotid;
 	tbl->generation++;
 
-	max_slotid = min(tbl->max_slots - 1, tbl->target_highest_slotid);
+	max_slotid = min(NFS4_MAX_SLOT_TABLE - 1, tbl->target_highest_slotid);
 	for (i = tbl->max_slotid + 1; i <= max_slotid; i++)
 		rpc_wake_up_next(&tbl->slot_tbl_waitq);
 	tbl->max_slotid = max_slotid;
@@ -516,16 +537,12 @@ void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
 static void nfs41_set_server_slotid_locked(struct nfs4_slot_table *tbl,
 		u32 highest_slotid)
 {
-	unsigned int max_slotid, i;
-
 	if (tbl->server_highest_slotid == highest_slotid)
 		return;
 	if (tbl->highest_used_slotid > highest_slotid)
 		return;
-	max_slotid = min(tbl->max_slots - 1, highest_slotid);
-	/* Reset the seq_nr for deallocated slots */
-	for (i = tbl->server_highest_slotid + 1; i <= max_slotid; i++)
-		tbl->slots[i].seq_nr = 1;
+	/* Deallocate slots */
+	nfs4_shrink_slot_table(tbl, highest_slotid + 1);
 	tbl->server_highest_slotid = highest_slotid;
 }
 
@@ -612,6 +629,42 @@ static int nfs4_sequence_done(struct rpc_task *task,
 	return nfs41_sequence_done(task, res);
 }
 
+static struct nfs4_slot *nfs4_new_slot(struct nfs4_slot_table  *tbl,
+		u32 slotid, u32 seq_init, gfp_t gfp_mask)
+{
+	struct nfs4_slot *slot;
+
+	slot = kzalloc(sizeof(*slot), gfp_mask);
+	if (slot) {
+		slot->table = tbl;
+		slot->slot_nr = slotid;
+		slot->seq_nr = seq_init;
+	}
+	return slot;
+}
+
+static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table  *tbl,
+		u32 slotid, u32 seq_init, gfp_t gfp_mask)
+{
+	struct nfs4_slot **p, *slot;
+
+	p = &tbl->slots;
+	for (;;) {
+		if (*p == NULL) {
+			*p = nfs4_new_slot(tbl, tbl->max_slots,
+					seq_init, gfp_mask);
+			if (*p == NULL)
+				break;
+			tbl->max_slots++;
+		}
+		slot = *p;
+		if (slot->slot_nr == slotid)
+			return slot;
+		p = &slot->next;
+	}
+	return NULL;
+}
+
 /*
  * nfs4_alloc_slot - efficiently look for a free slot
  *
@@ -628,15 +681,17 @@ static struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
 
 	dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n",
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
-		tbl->max_slots);
+		tbl->max_slotid + 1);
 	slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
 	if (slotid > tbl->max_slotid)
 		goto out;
+	ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
+	if (ret == NULL)
+		goto out;
 	__set_bit(slotid, tbl->used_slots);
 	if (slotid > tbl->highest_used_slotid ||
 			tbl->highest_used_slotid == NFS4_NO_SLOT)
 		tbl->highest_used_slotid = slotid;
-	ret = &tbl->slots[slotid];
 	ret->renewal_time = jiffies;
 	ret->generation = tbl->generation;
 
@@ -5718,67 +5773,56 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	return status;
 }
 
-struct nfs4_slot *nfs4_alloc_slots(struct nfs4_slot_table *table,
-		u32 max_slots, gfp_t gfp_flags)
+static int nfs4_grow_slot_table(struct nfs4_slot_table *tbl,
+		 u32 max_reqs, u32 ivalue)
 {
-	struct nfs4_slot *tbl;
-	u32 i;
-
-	tbl = kmalloc_array(max_slots, sizeof(*tbl), gfp_flags);
-	if (tbl != NULL) {
-		for (i = 0; i < max_slots; i++) {
-			tbl[i].table = table;
-			tbl[i].slot_nr = i;
-		}
-	}
-	return tbl;
+	if (max_reqs <= tbl->max_slots)
+		return 0;
+	if (nfs4_find_or_create_slot(tbl, max_reqs - 1, ivalue, GFP_NOFS))
+		return 0;
+	return -ENOMEM;
 }
 
-static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
-		struct nfs4_slot *new,
-		u32 max_slots,
+static void nfs4_reset_slot_table(struct nfs4_slot_table *tbl,
+		u32 server_highest_slotid,
 		u32 ivalue)
 {
-	struct nfs4_slot *old = NULL;
-	u32 i;
+	struct nfs4_slot **p;
 
-	spin_lock(&tbl->slot_tbl_lock);
-	if (new) {
-		old = tbl->slots;
-		tbl->slots = new;
-		tbl->max_slots = max_slots;
+	nfs4_shrink_slot_table(tbl, server_highest_slotid + 1);
+	p = &tbl->slots;
+	while (*p) {
+		(*p)->seq_nr = ivalue;
+		p = &(*p)->next;
 	}
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
-	tbl->target_highest_slotid = max_slots - 1;
-	tbl->server_highest_slotid = max_slots - 1;
-	tbl->max_slotid = max_slots - 1;
-	for (i = 0; i < tbl->max_slots; i++)
-		tbl->slots[i].seq_nr = ivalue;
-	spin_unlock(&tbl->slot_tbl_lock);
-	kfree(old);
+	tbl->target_highest_slotid = server_highest_slotid;
+	tbl->server_highest_slotid = server_highest_slotid;
+	tbl->max_slotid = server_highest_slotid;
 }
 
 /*
  * (re)Initialise a slot table
  */
-static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
-				 u32 ivalue)
+static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl,
+		u32 max_reqs, u32 ivalue)
 {
-	struct nfs4_slot *new = NULL;
-	int ret = -ENOMEM;
+	int ret;
 
 	dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
 		max_reqs, tbl->max_slots);
 
-	/* Does the newly negotiated max_reqs match the existing slot table? */
-	if (max_reqs != tbl->max_slots) {
-		new = nfs4_alloc_slots(tbl, max_reqs, GFP_NOFS);
-		if (!new)
-			goto out;
-	}
-	ret = 0;
+	if (max_reqs > NFS4_MAX_SLOT_TABLE)
+		max_reqs = NFS4_MAX_SLOT_TABLE;
+
+	ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue);
+	if (ret)
+		goto out;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue);
+	spin_unlock(&tbl->slot_tbl_lock);
 
-	nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
 	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
 		tbl, tbl->slots, tbl->max_slots);
 out:
@@ -5786,18 +5830,28 @@ out:
 	return ret;
 }
 
+int nfs4_resize_slot_table(struct nfs4_slot_table *tbl,
+		 u32 max_reqs, u32 ivalue)
+{
+	int ret;
+
+	if (max_reqs > NFS4_MAX_SLOT_TABLE)
+		max_reqs = NFS4_MAX_SLOT_TABLE;
+	ret = nfs4_grow_slot_table(tbl, max_reqs, ivalue);
+	if (ret)
+		return ret;
+	spin_lock(&tbl->slot_tbl_lock);
+	nfs4_shrink_slot_table(tbl, max_reqs);
+	tbl->max_slotid = max_reqs - 1;
+	spin_unlock(&tbl->slot_tbl_lock);
+	return 0;
+}
+
 /* Destroy the slot table */
 static void nfs4_destroy_slot_tables(struct nfs4_session *session)
 {
-	if (session->fc_slot_table.slots != NULL) {
-		kfree(session->fc_slot_table.slots);
-		session->fc_slot_table.slots = NULL;
-	}
-	if (session->bc_slot_table.slots != NULL) {
-		kfree(session->bc_slot_table.slots);
-		session->bc_slot_table.slots = NULL;
-	}
-	return;
+	nfs4_shrink_slot_table(&session->fc_slot_table, 0);
+	nfs4_shrink_slot_table(&session->bc_slot_table, 0);
 }
 
 /*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1b7fa73c9436..c14b2c7ac8a7 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2025,29 +2025,15 @@ out:
 static int nfs4_recall_slot(struct nfs_client *clp)
 {
 	struct nfs4_slot_table *fc_tbl;
-	struct nfs4_slot *new, *old;
-	int i;
+	u32 new_size;
 
 	if (!nfs4_has_session(clp))
 		return 0;
 	nfs4_begin_drain_session(clp);
-	fc_tbl = &clp->cl_session->fc_slot_table;
-	new = nfs4_alloc_slots(fc_tbl, fc_tbl->target_highest_slotid + 1, GFP_NOFS);
-        if (!new)
-		return -ENOMEM;
 
-	spin_lock(&fc_tbl->slot_tbl_lock);
-	for (i = 0; i <= fc_tbl->target_highest_slotid; i++)
-		new[i].seq_nr = fc_tbl->slots[i].seq_nr;
-	old = fc_tbl->slots;
-	fc_tbl->slots = new;
-	fc_tbl->max_slots = fc_tbl->target_highest_slotid + 1;
-	fc_tbl->max_slotid = fc_tbl->target_highest_slotid;
-	clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots;
-	spin_unlock(&fc_tbl->slot_tbl_lock);
-
-	kfree(old);
-	return 0;
+	fc_tbl = &clp->cl_session->fc_slot_table;
+	new_size = fc_tbl->server_highest_slotid + 1;
+	return nfs4_resize_slot_table(fc_tbl, new_size, 1);
 }
 
 static int nfs4_bind_conn_to_session(struct nfs_client *clp)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3ddb08fba935..44d256f6021c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,6 +188,7 @@ struct nfs4_channel_attrs {
 /* nfs41 sessions slot seqid */
 struct nfs4_slot {
 	struct nfs4_slot_table	*table;
+	struct nfs4_slot	*next;
 	unsigned long		generation;
 	unsigned long		renewal_time;
 	u32			slot_nr;
-- 
cgit v1.2.3


From c34309a45ea491e5f0c0d0af49ccfa018ff35fc1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 26 Nov 2012 14:33:03 -0500
Subject: NFS: Remove unused function slot_idx

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 30715508fade..e707c1b69796 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -225,11 +225,6 @@ struct nfs4_slot_table {
 	struct completion complete;
 };
 
-static inline int slot_idx(struct nfs4_slot_table *tbl, struct nfs4_slot *sp)
-{
-	return sp - tbl->slots;
-}
-
 /*
  * Session related parameters
  */
-- 
cgit v1.2.3


From 76e697ba7e8d187f50e385d21a2b2f1709a62c14 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 26 Nov 2012 14:20:49 -0500
Subject: NFSv4.1: Move slot table and session struct definitions to
 nfs4session.h

Clean up. Gather NFSv4.1 slot definitions in fs/nfs/nfs4session.h.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/blocklayout/blocklayout.c |   1 +
 fs/nfs/callback_xdr.c            |   1 +
 fs/nfs/internal.h                |  21 --------
 fs/nfs/nfs4_fs.h                 |  12 -----
 fs/nfs/nfs4filelayout.c          |   1 +
 fs/nfs/nfs4session.h             | 101 +++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4state.c               |   1 +
 fs/nfs/nfs4xdr.c                 |   1 +
 fs/nfs/super.c                   |   1 +
 include/linux/nfs_fs_sb.h        |  49 -------------------
 include/linux/nfs_xdr.h          |  11 +----
 11 files changed, 108 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f1027b06a1a9..4fa788c93f46 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -40,6 +40,7 @@
 #include <linux/pagevec.h>
 
 #include "../pnfs.h"
+#include "../nfs4session.h"
 #include "../internal.h"
 #include "blocklayout.h"
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index ea6a7b190e6b..59461c957d9d 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -16,6 +16,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "internal.h"
+#include "nfs4session.h"
 
 #define CB_OP_TAGLEN_MAXSZ	(512)
 #define CB_OP_HDR_RES_MAXSZ	(2 + CB_OP_TAGLEN_MAXSZ)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8965a998b306..9bdbfc3884a9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -18,27 +18,6 @@ struct nfs_string;
  */
 #define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
 
-/*
- * Determine if sessions are in use.
- */
-static inline int nfs4_has_session(const struct nfs_client *clp)
-{
-#ifdef CONFIG_NFS_V4_1
-	if (clp->cl_session)
-		return 1;
-#endif /* CONFIG_NFS_V4_1 */
-	return 0;
-}
-
-static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
-{
-#ifdef CONFIG_NFS_V4_1
-	if (nfs4_has_session(clp))
-		return (clp->cl_session->flags & SESSION4_PERSIST);
-#endif /* CONFIG_NFS_V4_1 */
-	return 0;
-}
-
 static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
 {
 	if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cd3e3096b60a..322bd0168ebf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -29,11 +29,6 @@ enum nfs4_client_state {
 	NFS4CLNT_BIND_CONN_TO_SESSION,
 };
 
-enum nfs4_session_state {
-	NFS4_SESSION_INITING,
-	NFS4_SESSION_DRAINING,
-};
-
 #define NFS4_RENEW_TIMEOUT		0x01
 #define NFS4_RENEW_DELEGATION_CB	0x02
 
@@ -327,13 +322,6 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
 extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
 extern void nfs41_server_notify_target_slotid_update(struct nfs_client *clp);
 
-extern void nfs4_session_drain_complete(struct nfs4_session *session,
-		struct nfs4_slot_table *tbl);
-
-static inline bool nfs4_session_draining(struct nfs4_session *session)
-{
-	return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state);
-}
 #else
 static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
 {
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index bfb28fa38e74..591a1a7f8f94 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -35,6 +35,7 @@
 
 #include <linux/sunrpc/metrics.h>
 
+#include "nfs4session.h"
 #include "internal.h"
 #include "delegation.h"
 #include "nfs4filelayout.h"
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index cb47b1eb0886..e96323ff1d95 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -7,6 +7,68 @@
 #ifndef __LINUX_FS_NFS_NFS4SESSION_H
 #define __LINUX_FS_NFS_NFS4SESSION_H
 
+/* maximum number of slots to use */
+#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
+#define NFS4_MAX_SLOT_TABLE (256U)
+#define NFS4_NO_SLOT ((u32)-1)
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+
+/* Sessions slot seqid */
+struct nfs4_slot {
+	struct nfs4_slot_table	*table;
+	struct nfs4_slot	*next;
+	unsigned long		generation;
+	unsigned long		renewal_time;
+	u32			slot_nr;
+	u32		 	seq_nr;
+};
+
+/* Sessions */
+#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
+struct nfs4_slot_table {
+	struct nfs4_session *session;		/* Parent session */
+	struct nfs4_slot *slots;		/* seqid per slot */
+	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
+	spinlock_t	slot_tbl_lock;
+	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
+	u32		max_slots;		/* # slots in table */
+	u32		max_slotid;		/* Max allowed slotid value */
+	u32		highest_used_slotid;	/* sent to server on each SEQ.
+						 * op for dynamic resizing */
+	u32		target_highest_slotid;	/* Server max_slot target */
+	u32		server_highest_slotid;	/* Server highest slotid */
+	unsigned long	generation;		/* Generation counter for
+						   target_highest_slotid */
+	struct completion complete;
+};
+
+/*
+ * Session related parameters
+ */
+struct nfs4_session {
+	struct nfs4_sessionid		sess_id;
+	u32				flags;
+	unsigned long			session_state;
+	u32				hash_alg;
+	u32				ssv_len;
+
+	/* The fore and back channel */
+	struct nfs4_channel_attrs	fc_attrs;
+	struct nfs4_slot_table		fc_slot_table;
+	struct nfs4_channel_attrs	bc_attrs;
+	struct nfs4_slot_table		bc_slot_table;
+	struct nfs_client		*clp;
+	/* Create session arguments */
+	unsigned int			fc_target_max_rqst_sz;
+	unsigned int			fc_target_max_resp_sz;
+};
+
+enum nfs4_session_state {
+	NFS4_SESSION_INITING,
+	NFS4_SESSION_DRAINING,
+};
+
 #if defined(CONFIG_NFS_V4_1)
 extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
 extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
@@ -24,6 +86,31 @@ extern void nfs4_destroy_session(struct nfs4_session *session);
 extern int nfs4_init_session(struct nfs_server *server);
 extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
 
+extern void nfs4_session_drain_complete(struct nfs4_session *session,
+		struct nfs4_slot_table *tbl);
+
+static inline bool nfs4_session_draining(struct nfs4_session *session)
+{
+	return !!test_bit(NFS4_SESSION_DRAINING, &session->session_state);
+}
+
+/*
+ * Determine if sessions are in use.
+ */
+static inline int nfs4_has_session(const struct nfs_client *clp)
+{
+	if (clp->cl_session)
+		return 1;
+	return 0;
+}
+
+static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
+{
+	if (nfs4_has_session(clp))
+		return (clp->cl_session->flags & SESSION4_PERSIST);
+	return 0;
+}
+
 #else /* defined(CONFIG_NFS_V4_1) */
 
 static inline int nfs4_init_session(struct nfs_server *server)
@@ -31,5 +118,19 @@ static inline int nfs4_init_session(struct nfs_server *server)
 	return 0;
 }
 
+/*
+ * Determine if sessions are in use.
+ */
+static inline int nfs4_has_session(const struct nfs_client *clp)
+{
+	return 0;
+}
+
+static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
+{
+	return 0;
+}
+
 #endif /* defined(CONFIG_NFS_V4_1) */
+#endif /* IS_ENABLED(CONFIG_NFS_V4) */
 #endif /* __LINUX_FS_NFS_NFS4SESSION_H */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1077b9698381..1402283d152d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -57,6 +57,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "internal.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index a67040f51597..e786dc7582b1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -56,6 +56,7 @@
 
 #include "nfs4_fs.h"
 #include "internal.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 652d3f7176a9..e12cea4b36a5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -64,6 +64,7 @@
 #include "iostat.h"
 #include "internal.h"
 #include "fscache.h"
+#include "nfs4session.h"
 #include "pnfs.h"
 #include "nfs.h"
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e707c1b69796..6c6ed153a9b4 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -198,53 +198,4 @@ struct nfs_server {
 #define NFS_CAP_POSIX_LOCK	(1U << 14)
 #define NFS_CAP_UIDGID_NOMAP	(1U << 15)
 
-
-/* maximum number of slots to use */
-#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
-#define NFS4_MAX_SLOT_TABLE (256U)
-#define NFS4_NO_SLOT ((u32)-1)
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-
-/* Sessions */
-#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
-struct nfs4_slot_table {
-	struct nfs4_session *session;		/* Parent session */
-	struct nfs4_slot *slots;		/* seqid per slot */
-	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
-	spinlock_t	slot_tbl_lock;
-	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
-	u32		max_slots;		/* # slots in table */
-	u32		max_slotid;		/* Max allowed slotid value */
-	u32		highest_used_slotid;	/* sent to server on each SEQ.
-						 * op for dynamic resizing */
-	u32		target_highest_slotid;	/* Server max_slot target */
-	u32		server_highest_slotid;	/* Server highest slotid */
-	unsigned long	generation;		/* Generation counter for
-						   target_highest_slotid */
-	struct completion complete;
-};
-
-/*
- * Session related parameters
- */
-struct nfs4_session {
-	struct nfs4_sessionid		sess_id;
-	u32				flags;
-	unsigned long			session_state;
-	u32				hash_alg;
-	u32				ssv_len;
-
-	/* The fore and back channel */
-	struct nfs4_channel_attrs	fc_attrs;
-	struct nfs4_slot_table		fc_slot_table;
-	struct nfs4_channel_attrs	bc_attrs;
-	struct nfs4_slot_table		bc_slot_table;
-	struct nfs_client		*clp;
-	/* Create session arguments */
-	unsigned int			fc_target_max_rqst_sz;
-	unsigned int			fc_target_max_resp_sz;
-};
-
-#endif /* CONFIG_NFS_V4 */
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 44d256f6021c..2076149db1a4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -185,16 +185,7 @@ struct nfs4_channel_attrs {
 	u32			max_reqs;
 };
 
-/* nfs41 sessions slot seqid */
-struct nfs4_slot {
-	struct nfs4_slot_table	*table;
-	struct nfs4_slot	*next;
-	unsigned long		generation;
-	unsigned long		renewal_time;
-	u32			slot_nr;
-	u32		 	seq_nr;
-};
-
+struct nfs4_slot;
 struct nfs4_sequence_args {
 	struct nfs4_slot	*sa_slot;
 	u8			sa_cache_this;
-- 
cgit v1.2.3


From 8fe72bac8de784c4059b41a7dd6bb0151a3ae898 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 29 Oct 2012 19:02:20 -0400
Subject: NFSv4: Clean up handling of privileged operations

Privileged rpc calls are those that are run by the state recovery thread,
in cases where we're trying to recover the system after a server reboot
or a network partition. In those cases, we want to fence off all other
rpc calls (see nfs4_begin_drain_session()) so that they don't end up
using stateids or clientids that are in the process of being recovered.

Prior to this patch, we had to set up special callback functions in
order to declare an rpc call as being privileged.
By adding a new field to the sequence arguments, this patch simplifies
things considerably, and allows us to declare the rpc call as privileged
before it is run.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 114 ++++++++++++++++++------------------------------
 include/linux/nfs_xdr.h |   3 +-
 2 files changed, 44 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4b1635ce658d..38a709d78594 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -490,11 +490,17 @@ static void nfs41_init_sequence(struct nfs4_sequence_args *args,
 {
 	args->sa_slot = NULL;
 	args->sa_cache_this = 0;
+	args->sa_privileged = 0;
 	if (cache_reply)
 		args->sa_cache_this = 1;
 	res->sr_slot = NULL;
 }
 
+static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
+{
+	args->sa_privileged = 1;
+}
+
 int nfs41_setup_sequence(struct nfs4_session *session,
 				struct nfs4_sequence_args *args,
 				struct nfs4_sequence_res *res,
@@ -514,7 +520,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 
 	spin_lock(&tbl->slot_tbl_lock);
 	if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) &&
-	    !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+	    !args->sa_privileged) {
 		/* The state manager will wait until the slot table is empty */
 		dprintk("%s session is draining\n", __func__);
 		goto out_sleep;
@@ -548,6 +554,9 @@ out_success:
 	rpc_call_start(task);
 	return 0;
 out_sleep:
+	/* Privileged tasks are queued with top priority */
+	if (args->sa_privileged)
+		rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
 	rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
 	spin_unlock(&tbl->slot_tbl_lock);
 	return -EAGAIN;
@@ -593,12 +602,6 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
 	nfs41_setup_sequence(session, data->seq_args, data->seq_res, task);
 }
 
-static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs41_call_sync_prepare(task, calldata);
-}
-
 static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs41_call_sync_data *data = calldata;
@@ -611,17 +614,11 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
 	.rpc_call_done = nfs41_call_sync_done,
 };
 
-static const struct rpc_call_ops nfs41_call_priv_sync_ops = {
-	.rpc_call_prepare = nfs41_call_priv_sync_prepare,
-	.rpc_call_done = nfs41_call_sync_done,
-};
-
 static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 				   struct nfs_server *server,
 				   struct rpc_message *msg,
 				   struct nfs4_sequence_args *args,
-				   struct nfs4_sequence_res *res,
-				   int privileged)
+				   struct nfs4_sequence_res *res)
 {
 	int ret;
 	struct rpc_task *task;
@@ -637,8 +634,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 		.callback_data = &data
 	};
 
-	if (privileged)
-		task_setup.callback_ops = &nfs41_call_priv_sync_ops;
 	task = rpc_run_task(&task_setup);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
@@ -656,16 +651,21 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt,
 			    struct nfs4_sequence_args *args,
 			    struct nfs4_sequence_res *res)
 {
-	return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0);
+	return nfs4_call_sync_sequence(clnt, server, msg, args, res);
 }
 
 #else
-static inline
+static
 void nfs41_init_sequence(struct nfs4_sequence_args *args,
 		struct nfs4_sequence_res *res, int cache_reply)
 {
 }
 
+static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
+{
+}
+
+
 static int nfs4_sequence_done(struct rpc_task *task,
 			       struct nfs4_sequence_res *res)
 {
@@ -1475,13 +1475,6 @@ unlock_no_action:
 	rcu_read_unlock();
 out_no_action:
 	task->tk_action = NULL;
-
-}
-
-static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs4_open_prepare(task, calldata);
 }
 
 static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -1542,12 +1535,6 @@ static const struct rpc_call_ops nfs4_open_ops = {
 	.rpc_release = nfs4_open_release,
 };
 
-static const struct rpc_call_ops nfs4_recover_open_ops = {
-	.rpc_call_prepare = nfs4_recover_open_prepare,
-	.rpc_call_done = nfs4_open_done,
-	.rpc_release = nfs4_open_release,
-};
-
 static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 {
 	struct inode *dir = data->dir->d_inode;
@@ -1577,7 +1564,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 	data->rpc_status = 0;
 	data->cancelled = 0;
 	if (isrecover)
-		task_setup_data.callback_ops = &nfs4_recover_open_ops;
+		nfs4_set_sequence_privileged(&o_arg->seq_args);
 	task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
@@ -4558,8 +4545,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 		return;
 	/* Do we need to do an open_to_lock_owner? */
 	if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
-		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
+		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
 			goto out_release_lock_seqid;
+		}
 		data->arg.open_stateid = &state->stateid;
 		data->arg.new_lock_owner = 1;
 		data->res.open_seqid = data->arg.open_seqid;
@@ -4574,13 +4562,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 	nfs_release_seqid(data->arg.open_seqid);
 out_release_lock_seqid:
 	nfs_release_seqid(data->arg.lock_seqid);
-	dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
-}
-
-static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs4_lock_prepare(task, calldata);
+	dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
 }
 
 static void nfs4_lock_done(struct rpc_task *task, void *calldata)
@@ -4635,12 +4617,6 @@ static const struct rpc_call_ops nfs4_lock_ops = {
 	.rpc_release = nfs4_lock_release,
 };
 
-static const struct rpc_call_ops nfs4_recover_lock_ops = {
-	.rpc_call_prepare = nfs4_recover_lock_prepare,
-	.rpc_call_done = nfs4_lock_done,
-	.rpc_release = nfs4_lock_release,
-};
-
 static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
 {
 	switch (error) {
@@ -4683,15 +4659,15 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		return -ENOMEM;
 	if (IS_SETLKW(cmd))
 		data->arg.block = 1;
-	if (recovery_type > NFS_LOCK_NEW) {
-		if (recovery_type == NFS_LOCK_RECLAIM)
-			data->arg.reclaim = NFS_LOCK_RECLAIM;
-		task_setup_data.callback_ops = &nfs4_recover_lock_ops;
-	}
 	nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
 	msg.rpc_argp = &data->arg;
 	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
+	if (recovery_type > NFS_LOCK_NEW) {
+		if (recovery_type == NFS_LOCK_RECLAIM)
+			data->arg.reclaim = NFS_LOCK_RECLAIM;
+		nfs4_set_sequence_privileged(&data->arg.seq_args);
+	}
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -5432,7 +5408,6 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
 			(struct nfs4_get_lease_time_data *)calldata;
 
 	dprintk("--> %s\n", __func__);
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
 	/* just setup sequence, do not trigger session recovery
 	   since we're invoked within one */
 	nfs41_setup_sequence(data->clp->cl_session,
@@ -5500,6 +5475,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
 	int status;
 
 	nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
+	nfs4_set_sequence_privileged(&args.la_seq_args);
 	dprintk("--> %s\n", __func__);
 	task = rpc_run_task(&task_setup);
 
@@ -5775,26 +5751,15 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
 	nfs41_setup_sequence(clp->cl_session, args, res, task);
 }
 
-static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
-{
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
-	nfs41_sequence_prepare(task, data);
-}
-
 static const struct rpc_call_ops nfs41_sequence_ops = {
 	.rpc_call_done = nfs41_sequence_call_done,
 	.rpc_call_prepare = nfs41_sequence_prepare,
 	.rpc_release = nfs41_sequence_release,
 };
 
-static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
-	.rpc_call_done = nfs41_sequence_call_done,
-	.rpc_call_prepare = nfs41_sequence_prepare_privileged,
-	.rpc_release = nfs41_sequence_release,
-};
-
-static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
-					     const struct rpc_call_ops *seq_ops)
+static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
+		struct rpc_cred *cred,
+		bool is_privileged)
 {
 	struct nfs4_sequence_data *calldata;
 	struct rpc_message msg = {
@@ -5804,7 +5769,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = clp->cl_rpcclient,
 		.rpc_message = &msg,
-		.callback_ops = seq_ops,
+		.callback_ops = &nfs41_sequence_ops,
 		.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
 	};
 
@@ -5816,6 +5781,8 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
 		return ERR_PTR(-ENOMEM);
 	}
 	nfs41_init_sequence(&calldata->args, &calldata->res, 0);
+	if (is_privileged)
+		nfs4_set_sequence_privileged(&calldata->args);
 	msg.rpc_argp = &calldata->args;
 	msg.rpc_resp = &calldata->res;
 	calldata->clp = clp;
@@ -5831,7 +5798,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
 
 	if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
 		return 0;
-	task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
+	task = _nfs41_proc_sequence(clp, cred, false);
 	if (IS_ERR(task))
 		ret = PTR_ERR(task);
 	else
@@ -5845,7 +5812,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
 	struct rpc_task *task;
 	int ret;
 
-	task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
+	task = _nfs41_proc_sequence(clp, cred, true);
 	if (IS_ERR(task)) {
 		ret = PTR_ERR(task);
 		goto out;
@@ -5874,7 +5841,6 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_reclaim_complete_data *calldata = data;
 
-	rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
 	nfs41_setup_sequence(calldata->clp->cl_session,
 			&calldata->arg.seq_args,
 			&calldata->res.seq_res,
@@ -5955,6 +5921,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
 	calldata->arg.one_fs = 0;
 
 	nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
+	nfs4_set_sequence_privileged(&calldata->arg.seq_args);
 	msg.rpc_argp = &calldata->arg;
 	msg.rpc_resp = &calldata->res;
 	task_setup_data.callback_data = calldata;
@@ -6521,7 +6488,9 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
 
 	dprintk("NFS call  test_stateid %p\n", stateid);
 	nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
-	status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
+	nfs4_set_sequence_privileged(&args.seq_args);
+	status = nfs4_call_sync_sequence(server->client, server, &msg,
+			&args.seq_args, &res.seq_res);
 	if (status != NFS_OK) {
 		dprintk("NFS reply test_stateid: failed, %d\n", status);
 		return status;
@@ -6568,8 +6537,9 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
 
 	dprintk("NFS call  free_stateid %p\n", stateid);
 	nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+	nfs4_set_sequence_privileged(&args.seq_args);
 	status = nfs4_call_sync_sequence(server->client, server, &msg,
-					 &args.seq_args, &res.seq_res, 1);
+			&args.seq_args, &res.seq_res);
 	dprintk("NFS reply free_stateid: %d\n", status);
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2076149db1a4..baa673edb597 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,7 +188,8 @@ struct nfs4_channel_attrs {
 struct nfs4_slot;
 struct nfs4_sequence_args {
 	struct nfs4_slot	*sa_slot;
-	u8			sa_cache_this;
+	u8			sa_cache_this : 1,
+				sa_privileged : 1;
 };
 
 struct nfs4_sequence_res {
-- 
cgit v1.2.3


From 62ae082d883d167cdaa7895cf2972d85e178228a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 29 Nov 2012 17:10:01 -0500
Subject: NFSv4: Reorder the XDR structures to put sequence at the top, not
 bottom

Pre-condition for optimising the slot allocation and reintroducing FIFO
behaviour.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 138 ++++++++++++++++++++++++------------------------
 1 file changed, 69 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index baa673edb597..a55abd499c21 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -205,8 +205,8 @@ struct nfs4_get_lease_time_args {
 };
 
 struct nfs4_get_lease_time_res {
-	struct nfs_fsinfo	       *lr_fsinfo;
 	struct nfs4_sequence_res	lr_seq_res;
+	struct nfs_fsinfo	       *lr_fsinfo;
 };
 
 #define PNFS_LAYOUT_MAXSIZE 4096
@@ -224,23 +224,23 @@ struct pnfs_layout_range {
 };
 
 struct nfs4_layoutget_args {
+	struct nfs4_sequence_args seq_args;
 	__u32 type;
 	struct pnfs_layout_range range;
 	__u64 minlength;
 	__u32 maxcount;
 	struct inode *inode;
 	struct nfs_open_context *ctx;
-	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
 	struct nfs4_layoutdriver_data layout;
 };
 
 struct nfs4_layoutget_res {
+	struct nfs4_sequence_res seq_res;
 	__u32 return_on_close;
 	struct pnfs_layout_range range;
 	__u32 type;
 	nfs4_stateid stateid;
-	struct nfs4_sequence_res seq_res;
 	struct nfs4_layoutdriver_data *layoutp;
 };
 
@@ -251,38 +251,38 @@ struct nfs4_layoutget {
 };
 
 struct nfs4_getdevicelist_args {
+	struct nfs4_sequence_args seq_args;
 	const struct nfs_fh *fh;
 	u32 layoutclass;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_getdevicelist_res {
-	struct pnfs_devicelist *devlist;
 	struct nfs4_sequence_res seq_res;
+	struct pnfs_devicelist *devlist;
 };
 
 struct nfs4_getdeviceinfo_args {
-	struct pnfs_device *pdev;
 	struct nfs4_sequence_args seq_args;
+	struct pnfs_device *pdev;
 };
 
 struct nfs4_getdeviceinfo_res {
-	struct pnfs_device *pdev;
 	struct nfs4_sequence_res seq_res;
+	struct pnfs_device *pdev;
 };
 
 struct nfs4_layoutcommit_args {
+	struct nfs4_sequence_args seq_args;
 	nfs4_stateid stateid;
 	__u64 lastbytewritten;
 	struct inode *inode;
 	const u32 *bitmask;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_layoutcommit_res {
+	struct nfs4_sequence_res seq_res;
 	struct nfs_fattr *fattr;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res seq_res;
 	int status;
 };
 
@@ -296,11 +296,11 @@ struct nfs4_layoutcommit_data {
 };
 
 struct nfs4_layoutreturn_args {
+	struct nfs4_sequence_args seq_args;
 	struct pnfs_layout_hdr *layout;
 	struct inode *inode;
 	nfs4_stateid stateid;
 	__u32   layout_type;
-	struct nfs4_sequence_args seq_args;
 };
 
 struct nfs4_layoutreturn_res {
@@ -326,6 +326,7 @@ struct stateowner_id {
  * Arguments to the open call.
  */
 struct nfs_openargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *	fh;
 	struct nfs_seqid *	seqid;
 	int			open_flags;
@@ -346,10 +347,10 @@ struct nfs_openargs {
 	const u32 *		bitmask;
 	const u32 *		open_bitmap;
 	__u32			claim;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_openres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_fh           fh;
 	struct nfs4_change_info	cinfo;
@@ -364,7 +365,6 @@ struct nfs_openres {
 	__u32			attrset[NFS4_BITMAP_SIZE];
 	struct nfs4_string	*owner;
 	struct nfs4_string	*group_owner;
-	struct nfs4_sequence_res	seq_res;
 	__u32			access_request;
 	__u32			access_supported;
 	__u32			access_result;
@@ -388,20 +388,20 @@ struct nfs_open_confirmres {
  * Arguments to the close call.
  */
 struct nfs_closeargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *         fh;
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 	fmode_t			fmode;
 	const u32 *		bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_closeres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_fattr *	fattr;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 /*
  *  * Arguments to the lock,lockt, and locku call.
@@ -413,6 +413,7 @@ struct nfs_lowner {
 };
 
 struct nfs_lock_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_seqid *	lock_seqid;
@@ -423,40 +424,39 @@ struct nfs_lock_args {
 	unsigned char		block : 1;
 	unsigned char		reclaim : 1;
 	unsigned char		new_lock_owner : 1;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lock_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	lock_seqid;
 	struct nfs_seqid *	open_seqid;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_locku_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_seqid *	seqid;
 	nfs4_stateid *		stateid;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_locku_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid		stateid;
 	struct nfs_seqid *	seqid;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_lockt_args {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct file_lock *	fl;
 	struct nfs_lowner	lock_owner;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_lockt_res {
-	struct file_lock *	denied; /* LOCK, LOCKT failed */
 	struct nfs4_sequence_res	seq_res;
+	struct file_lock *	denied; /* LOCK, LOCKT failed */
 };
 
 struct nfs_release_lockowner_args {
@@ -464,22 +464,23 @@ struct nfs_release_lockowner_args {
 };
 
 struct nfs4_delegreturnargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *fhandle;
 	const nfs4_stateid *stateid;
 	const u32 * bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_delegreturnres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr * fattr;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the read call.
  */
 struct nfs_readargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
 	struct nfs_lock_context *lock_context;
@@ -487,20 +488,20 @@ struct nfs_readargs {
 	__u32			count;
 	unsigned int		pgbase;
 	struct page **		pages;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_readres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
 	__u32			count;
 	int                     eof;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the write call.
  */
 struct nfs_writeargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
 	struct nfs_lock_context *lock_context;
@@ -510,7 +511,6 @@ struct nfs_writeargs {
 	unsigned int		pgbase;
 	struct page **		pages;
 	const u32 *		bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_write_verifier {
@@ -523,65 +523,65 @@ struct nfs_writeverf {
 };
 
 struct nfs_writeres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
 	struct nfs_writeverf *	verf;
 	__u32			count;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Arguments to the commit call.
  */
 struct nfs_commitargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh		*fh;
 	__u64			offset;
 	__u32			count;
 	const u32		*bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_commitres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr	*fattr;
 	struct nfs_writeverf	*verf;
 	const struct nfs_server *server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
  * Common arguments to the unlink call
  */
 struct nfs_removeargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh	*fh;
 	struct qstr		name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_removeres {
+	struct nfs4_sequence_res 	seq_res;
 	const struct nfs_server *server;
 	struct nfs_fattr	*dir_attr;
 	struct nfs4_change_info	cinfo;
-	struct nfs4_sequence_res 	seq_res;
 };
 
 /*
  * Common arguments to the rename call
  */
 struct nfs_renameargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh		*old_dir;
 	const struct nfs_fh		*new_dir;
 	const struct qstr		*old_name;
 	const struct qstr		*new_name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_renameres {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server		*server;
 	struct nfs4_change_info		old_cinfo;
 	struct nfs_fattr		*old_fattr;
 	struct nfs4_change_info		new_cinfo;
 	struct nfs_fattr		*new_fattr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 /*
@@ -622,20 +622,20 @@ struct nfs_createargs {
 };
 
 struct nfs_setattrargs {
+	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *                 fh;
 	nfs4_stateid                    stateid;
 	struct iattr *                  iap;
 	const struct nfs_server *	server; /* Needed for name mapping */
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs_setaclargs {
+	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs_setaclres {
@@ -643,27 +643,27 @@ struct nfs_setaclres {
 };
 
 struct nfs_getaclargs {
+	struct nfs4_sequence_args 	seq_args;
 	struct nfs_fh *			fh;
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 /* getxattr ACL interface flags */
 #define NFS4_ACL_TRUNC		0x0001	/* ACL was truncated */
 struct nfs_getaclres {
+	struct nfs4_sequence_res	seq_res;
 	size_t				acl_len;
 	size_t				acl_data_offset;
 	int				acl_flags;
 	struct page *			acl_scratch;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_setattrres {
+	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *              fattr;
 	const struct nfs_server *	server;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs_linkargs {
@@ -828,21 +828,22 @@ struct nfs3_getaclres {
 typedef u64 clientid4;
 
 struct nfs4_accessargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
 	u32				access;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_accessres {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	u32				supported;
 	u32				access;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_create_arg {
+	struct nfs4_sequence_args 	seq_args;
 	u32				ftype;
 	union {
 		struct {
@@ -859,88 +860,88 @@ struct nfs4_create_arg {
 	const struct iattr *		attrs;
 	const struct nfs_fh *		dir_fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_create_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fh *			fh;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		dir_cinfo;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_fsinfo_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_fsinfo_res {
-	struct nfs_fsinfo	       *fsinfo;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_fsinfo	       *fsinfo;
 };
 
 struct nfs4_getattr_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_getattr_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_link_arg {
+	struct nfs4_sequence_args 	seq_args;
 	const struct nfs_fh *		fh;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args 	seq_args;
 };
 
 struct nfs4_link_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		cinfo;
 	struct nfs_fattr *		dir_attr;
-	struct nfs4_sequence_res	seq_res;
 };
 
 
 struct nfs4_lookup_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_lookup_res {
+	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs_fh *			fh;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_lookup_root_arg {
-	const u32 *			bitmask;
 	struct nfs4_sequence_args	seq_args;
+	const u32 *			bitmask;
 };
 
 struct nfs4_pathconf_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_pathconf_res {
-	struct nfs_pathconf	       *pathconf;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_pathconf	       *pathconf;
 };
 
 struct nfs4_readdir_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	u64				cookie;
 	nfs4_verifier			verifier;
@@ -949,21 +950,20 @@ struct nfs4_readdir_arg {
 	unsigned int			pgbase;	/* zero-copy data */
 	const u32 *			bitmask;
 	int				plus;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readdir_res {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_verifier			verifier;
 	unsigned int			pgbase;
-	struct nfs4_sequence_res	seq_res;
 };
 
 struct nfs4_readlink {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	unsigned int			pgbase;
 	unsigned int			pglen;   /* zero-copy data */
 	struct page **			pages;   /* zero-copy data */
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_readlink_res {
@@ -989,28 +989,28 @@ struct nfs4_setclientid_res {
 };
 
 struct nfs4_statfs_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_statfs_res {
-	struct nfs_fsstat	       *fsstat;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs_fsstat	       *fsstat;
 };
 
 struct nfs4_server_caps_arg {
-	struct nfs_fh		       *fhandle;
 	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh		       *fhandle;
 };
 
 struct nfs4_server_caps_res {
+	struct nfs4_sequence_res	seq_res;
 	u32				attr_bitmask[3];
 	u32				acl_bitmask;
 	u32				has_links;
 	u32				has_symlinks;
 	u32				fh_expire_type;
-	struct nfs4_sequence_res	seq_res;
 };
 
 #define NFS4_PATHNAME_MAXCOMPONENTS 512
@@ -1036,16 +1036,16 @@ struct nfs4_fs_locations {
 };
 
 struct nfs4_fs_locations_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *dir_fh;
 	const struct qstr *name;
 	struct page *page;
 	const u32 *bitmask;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_fs_locations_res {
-	struct nfs4_fs_locations       *fs_locations;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs4_fs_locations       *fs_locations;
 };
 
 struct nfs4_secinfo_oid {
@@ -1070,14 +1070,14 @@ struct nfs4_secinfo_flavors {
 };
 
 struct nfs4_secinfo_arg {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh		*dir_fh;
 	const struct qstr		*name;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs4_secinfo_res {
-	struct nfs4_secinfo_flavors	*flavors;
 	struct nfs4_sequence_res	seq_res;
+	struct nfs4_secinfo_flavors	*flavors;
 };
 
 #endif /* CONFIG_NFS_V4 */
@@ -1157,9 +1157,9 @@ struct nfs41_create_session_res {
 };
 
 struct nfs41_reclaim_complete_args {
+	struct nfs4_sequence_args	seq_args;
 	/* In the future extend to include curr_fh for use with migration */
 	unsigned char			one_fs:1;
-	struct nfs4_sequence_args	seq_args;
 };
 
 struct nfs41_reclaim_complete_res {
@@ -1169,28 +1169,28 @@ struct nfs41_reclaim_complete_res {
 #define SECINFO_STYLE_CURRENT_FH 0
 #define SECINFO_STYLE_PARENT 1
 struct nfs41_secinfo_no_name_args {
-	int				style;
 	struct nfs4_sequence_args	seq_args;
+	int				style;
 };
 
 struct nfs41_test_stateid_args {
-	nfs4_stateid			*stateid;
 	struct nfs4_sequence_args	seq_args;
+	nfs4_stateid			*stateid;
 };
 
 struct nfs41_test_stateid_res {
-	unsigned int			status;
 	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
 };
 
 struct nfs41_free_stateid_args {
-	nfs4_stateid			*stateid;
 	struct nfs4_sequence_args	seq_args;
+	nfs4_stateid			*stateid;
 };
 
 struct nfs41_free_stateid_res {
-	unsigned int			status;
 	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
 };
 
 #else
-- 
cgit v1.2.3


From c05eecf636101dd4347b2d8fa457626bf0088e0a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 30 Nov 2012 23:59:29 -0500
Subject: SUNRPC: Don't allow low priority tasks to pre-empt higher priority
 ones

Currently, the priority queues attempt to be 'fair' to lower priority
tasks by scheduling them after a certain number of higher priority tasks
have run. The problem is that both the transport send queue and
the NFSv4.1 session slot queue have strong ordering requirements.

This patch therefore removes the fairness code in favour of strong
ordering of task priorities.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  1 -
 net/sunrpc/sched.c           | 44 ++++++++++++++++++++++----------------------
 2 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index dc0c3cc3ada3..b64f8eb0b973 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -192,7 +192,6 @@ struct rpc_wait_queue {
 	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
-	unsigned char		count;			/* # task groups remaining serviced so far */
 	unsigned char		nr;			/* # tasks remaining for cookie */
 	unsigned short		qlen;			/* total # tasks waiting in queue */
 	struct rpc_timer	timer_list;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 1aefc9fef866..d17a704aaf5f 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -98,6 +98,23 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
+static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
+{
+	queue->priority = priority;
+}
+
+static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
+{
+	queue->owner = pid;
+	queue->nr = RPC_BATCH_COUNT;
+}
+
+static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
+{
+	rpc_set_waitqueue_priority(queue, queue->maxpriority);
+	rpc_set_waitqueue_owner(queue, 0);
+}
+
 /*
  * Add new request to a priority queue.
  */
@@ -109,9 +126,11 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
 	struct rpc_task *t;
 
 	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	q = &queue->tasks[queue_priority];
 	if (unlikely(queue_priority > queue->maxpriority))
-		q = &queue->tasks[queue->maxpriority];
+		queue_priority = queue->maxpriority;
+	if (queue_priority > queue->priority)
+		rpc_set_waitqueue_priority(queue, queue_priority);
+	q = &queue->tasks[queue_priority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
 			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
@@ -180,24 +199,6 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
 			task->tk_pid, queue, rpc_qname(queue));
 }
 
-static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
-{
-	queue->priority = priority;
-	queue->count = 1 << (priority * 2);
-}
-
-static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
-static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
-{
-	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
-}
-
 static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
 {
 	int i;
@@ -464,8 +465,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 		/*
 		 * Check if we need to switch queues.
 		 */
-		if (--queue->count)
-			goto new_owner;
+		goto new_owner;
 	}
 
 	/*
-- 
cgit v1.2.3


From cf66bb93e0f75e0a4ba1ec070692618fa028e994 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Mon, 3 Dec 2012 16:25:40 +0000
Subject: byteorder: allow arch to opt to use GCC intrinsics for byteswapping

Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16()
intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC,
48 for other platforms).

By using these instead of the inline assembler that most architectures
have in their __arch_swabXX() macros, we let the compiler see what's
actually happening. The resulting code should be at least as good, and
much *better* in the cases where it can be combined with a nearby load
or store, using a load-and-byteswap or store-and-byteswap instruction
(e.g. lwbrx/stwbrx on PowerPC, movbe on Atom).

When GCC is sufficiently recent *and* the architecture opts in to using
the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be
used in preference to the __arch_swabXX() macros. An architecture which
does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own
hand-crafted macros.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/Kconfig                   | 19 +++++++++++++++++++
 include/linux/compiler-gcc4.h  | 10 ++++++++++
 include/linux/compiler-intel.h |  7 +++++++
 include/uapi/linux/swab.h      | 12 +++++++++---
 4 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 366ec06a5185..c31416b10586 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -112,6 +112,25 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS
 	  See Documentation/unaligned-memory-access.txt for more
 	  information on the topic of unaligned memory accesses.
 
+config ARCH_USE_BUILTIN_BSWAP
+       bool
+       help
+	 Modern versions of GCC (since 4.4) have builtin functions
+	 for handling byte-swapping. Using these, instead of the old
+	 inline assembler that the architecture code provides in the
+	 __arch_bswapXX() macros, allows the compiler to see what's
+	 happening and offers more opportunity for optimisation. In
+	 particular, the compiler will be able to combine the byteswap
+	 with a nearby load or store and use load-and-swap or
+	 store-and-swap instructions if the architecture has them. It
+	 should almost *never* result in code which is worse than the
+	 hand-coded assembler in <asm/swab.h>.  But just in case it
+	 does, the use of the builtins is optional.
+
+	 Any architecture with load-and-swap or store-and-swap
+	 instructions should set this. And it shouldn't hurt to set it
+	 on architectures that don't have such instructions.
+
 config HAVE_SYSCALL_WRAPPERS
 	bool
 
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 412bc6c2b023..dc16a858e77c 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -63,3 +63,13 @@
 #define __compiletime_warning(message) __attribute__((warning(message)))
 #define __compiletime_error(message) __attribute__((error(message)))
 #endif
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#if __GNUC_MINOR__ >= 4
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#endif
+#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6)
+#define __HAVE_BUILTIN_BSWAP16__
+#endif
+#endif
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index d8e636e5607d..973ce10c40b6 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -29,3 +29,10 @@
 #endif
 
 #define uninitialized_var(x) x
+
+#ifndef __HAVE_BUILTIN_BSWAP16__
+/* icc has this, but it's called _bswap16 */
+#define __HAVE_BUILTIN_BSWAP16__
+#define __builtin_bswap16 _bswap16
+#endif
+
diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index e811474724c2..0e011eb91b5d 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -45,7 +45,9 @@
 
 static inline __attribute_const__ __u16 __fswab16(__u16 val)
 {
-#ifdef __arch_swab16
+#ifdef __HAVE_BUILTIN_BSWAP16__
+	return __builtin_bswap16(val);
+#elif defined (__arch_swab16)
 	return __arch_swab16(val);
 #else
 	return ___constant_swab16(val);
@@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val)
 
 static inline __attribute_const__ __u32 __fswab32(__u32 val)
 {
-#ifdef __arch_swab32
+#ifdef __HAVE_BUILTIN_BSWAP32__
+	return __builtin_bswap32(val);
+#elif defined(__arch_swab32)
 	return __arch_swab32(val);
 #else
 	return ___constant_swab32(val);
@@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val)
 
 static inline __attribute_const__ __u64 __fswab64(__u64 val)
 {
-#ifdef __arch_swab64
+#ifdef __HAVE_BUILTIN_BSWAP64__
+	return __builtin_bswap64(val);
+#elif defined (__arch_swab64)
 	return __arch_swab64(val);
 #elif defined(__SWAB_64_THRU_32__)
 	__u32 h = val >> 32;
-- 
cgit v1.2.3


From 3f3299d5c0268d6cc3f47b446e8aca436e4a5651 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 28 Nov 2012 13:42:38 +0100
Subject: block: Rename queue dead flag

QUEUE_FLAG_DEAD is used to indicate that queuing new requests must
stop. After this flag has been set queue draining starts. However,
during the queue draining phase it is still safe to invoke the
queue's request_fn, so QUEUE_FLAG_DYING is a better name for this
flag.

This patch has been generated by running the following command
over the kernel source tree:

git grep -lEw 'blk_queue_dead|QUEUE_FLAG_DEAD' |
    xargs sed -i.tmp -e 's/blk_queue_dead/blk_queue_dying/g'      \
        -e 's/QUEUE_FLAG_DEAD/QUEUE_FLAG_DYING/g';                \
sed -i.tmp -e "s/QUEUE_FLAG_DYING$(printf \\t)*5/QUEUE_FLAG_DYING$(printf \\t)5/g" \
    include/linux/blkdev.h;                                       \
sed -i.tmp -e 's/ DEAD/ DYING/g' -e 's/dead queue/a dying queue/' \
    -e 's/Dead queue/A dying queue/' block/blk-core.c

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Chanho Min <chanho.min@lge.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c      |  2 +-
 block/blk-core.c        | 26 +++++++++++++-------------
 block/blk-exec.c        |  2 +-
 block/blk-sysfs.c       |  4 ++--
 block/blk-throttle.c    |  2 +-
 block/blk.h             |  2 +-
 drivers/scsi/scsi_lib.c |  2 +-
 include/linux/blkdev.h  |  4 ++--
 8 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d0b770391ad4..5dea4e8dbc55 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -231,7 +231,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	 * we shouldn't allow anything to go through for a bypassing queue.
 	 */
 	if (unlikely(blk_queue_bypass(q)))
-		return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+		return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
 	return __blkg_lookup_create(blkcg, q, NULL);
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
diff --git a/block/blk-core.c b/block/blk-core.c
index ee0e5cafa859..1a95272cca50 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -473,20 +473,20 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * Mark @q DEAD, drain all pending requests, destroy and put it.  All
+ * Mark @q DYING, drain all pending requests, destroy and put it.  All
  * future requests will be failed immediately with -ENODEV.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
 	spinlock_t *lock = q->queue_lock;
 
-	/* mark @q DEAD, no new request or merges will be allowed afterwards */
+	/* mark @q DYING, no new request or merges will be allowed afterwards */
 	mutex_lock(&q->sysfs_lock);
-	queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
+	queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
 	spin_lock_irq(lock);
 
 	/*
-	 * Dead queue is permanently in bypass mode till released.  Note
+	 * A dying queue is permanently in bypass mode till released.  Note
 	 * that, unlike blk_queue_bypass_start(), we aren't performing
 	 * synchronize_rcu() after entering bypass mode to avoid the delay
 	 * as some drivers create and destroy a lot of queues while
@@ -499,11 +499,11 @@ void blk_cleanup_queue(struct request_queue *q)
 
 	queue_flag_set(QUEUE_FLAG_NOMERGES, q);
 	queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
-	queue_flag_set(QUEUE_FLAG_DEAD, q);
+	queue_flag_set(QUEUE_FLAG_DYING, q);
 	spin_unlock_irq(lock);
 	mutex_unlock(&q->sysfs_lock);
 
-	/* drain all requests queued before DEAD marking */
+	/* drain all requests queued before DYING marking */
 	blk_drain_queue(q, true);
 
 	/* @q won't process any more request, flush async actions */
@@ -716,7 +716,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue);
 
 bool blk_get_queue(struct request_queue *q)
 {
-	if (likely(!blk_queue_dead(q))) {
+	if (likely(!blk_queue_dying(q))) {
 		__blk_get_queue(q);
 		return true;
 	}
@@ -870,7 +870,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue;
 
-	if (unlikely(blk_queue_dead(q)))
+	if (unlikely(blk_queue_dying(q)))
 		return NULL;
 
 	may_queue = elv_may_queue(q, rw_flags);
@@ -1050,7 +1050,7 @@ retry:
 	if (rq)
 		return rq;
 
-	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dead(q))) {
+	if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) {
 		blk_put_rl(rl);
 		return NULL;
 	}
@@ -1910,7 +1910,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 		return -EIO;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock_irqrestore(q->queue_lock, flags);
 		return -ENODEV;
 	}
@@ -2885,9 +2885,9 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
 	trace_block_unplug(q, depth, !from_schedule);
 
 	/*
-	 * Don't mess with dead queue.
+	 * Don't mess with a dying queue.
 	 */
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		spin_unlock(q->queue_lock);
 		return;
 	}
@@ -2996,7 +2996,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		/*
 		 * Short-circuit if @q is dead
 		 */
-		if (unlikely(blk_queue_dead(q))) {
+		if (unlikely(blk_queue_dying(q))) {
 			__blk_end_request_all(rq, -ENODEV);
 			continue;
 		}
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 8b6dc5bd4dd0..4aec98df7ba5 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -60,7 +60,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
 	spin_lock_irq(q->queue_lock);
 
-	if (unlikely(blk_queue_dead(q))) {
+	if (unlikely(blk_queue_dying(q))) {
 		rq->errors = -ENXIO;
 		if (rq->end_io)
 			rq->end_io(rq, rq->errors);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ce6204608822..788147797a79 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -466,7 +466,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	if (!entry->show)
 		return -EIO;
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dead(q)) {
+	if (blk_queue_dying(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
@@ -488,7 +488,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 
 	q = container_of(kobj, struct request_queue, kobj);
 	mutex_lock(&q->sysfs_lock);
-	if (blk_queue_dead(q)) {
+	if (blk_queue_dying(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a9664fa0b609..31146225f3d0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -302,7 +302,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
 		/* if %NULL and @q is alive, fall back to root_tg */
 		if (!IS_ERR(blkg))
 			tg = blkg_to_tg(blkg);
-		else if (!blk_queue_dead(q))
+		else if (!blk_queue_dying(q))
 			tg = td_root_tg(td);
 	}
 
diff --git a/block/blk.h b/block/blk.h
index ca51543b248c..2218a8a78292 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -96,7 +96,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			q->flush_queue_delayed = 1;
 			return NULL;
 		}
-		if (unlikely(blk_queue_dead(q)) ||
+		if (unlikely(blk_queue_dying(q)) ||
 		    !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index da36a3a81a9e..f29a1a9b54d2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1406,7 +1406,7 @@ static int scsi_lld_busy(struct request_queue *q)
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost;
 
-	if (blk_queue_dead(q))
+	if (blk_queue_dying(q))
 		return 0;
 
 	shost = sdev->host;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1756001210d2..aba8246afe72 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -437,7 +437,7 @@ struct request_queue {
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
 #define	QUEUE_FLAG_SYNCFULL	3	/* read queue has been filled */
 #define QUEUE_FLAG_ASYNCFULL	4	/* write queue has been filled */
-#define QUEUE_FLAG_DEAD		5	/* queue being torn down */
+#define QUEUE_FLAG_DYING	5	/* queue being torn down */
 #define QUEUE_FLAG_BYPASS	6	/* act as dumb FIFO queue */
 #define QUEUE_FLAG_BIDI		7	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES     8	/* disable merge attempts */
@@ -521,7 +521,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
-#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
+#define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
 #define blk_queue_bypass(q)	test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
-- 
cgit v1.2.3


From c246e80d86736312933646896c4157daf511dadc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 6 Dec 2012 14:32:01 +0100
Subject: block: Avoid that request_fn is invoked on a dead queue

A block driver may start cleaning up resources needed by its
request_fn as soon as blk_cleanup_queue() finished, so request_fn
must not be invoked after draining finished. This is important
when blk_run_queue() is invoked without any requests in progress.
As an example, if blk_drain_queue() and scsi_run_queue() run in
parallel, blk_drain_queue() may have finished all requests after
scsi_run_queue() has taken a SCSI device off the starved list but
before that last function has had a chance to run the queue.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Chanho Min <chanho.min@lge.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 31 +++++++++++++++++++++++++++----
 block/blk-exec.c       |  2 +-
 block/blk.h            |  2 ++
 include/linux/blkdev.h |  2 ++
 4 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index a182b586b06a..f52d05ff5d24 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -292,6 +292,25 @@ void blk_sync_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
+/**
+ * __blk_run_queue_uncond - run a queue whether or not it has been stopped
+ * @q:	The queue to run
+ *
+ * Description:
+ *    Invoke request handling on a queue if there are any pending requests.
+ *    May be used to restart request handling after a request has completed.
+ *    This variant runs the queue whether or not the queue has been
+ *    stopped. Must be called with the queue lock held and interrupts
+ *    disabled. See also @blk_run_queue.
+ */
+inline void __blk_run_queue_uncond(struct request_queue *q)
+{
+	if (unlikely(blk_queue_dead(q)))
+		return;
+
+	q->request_fn(q);
+}
+
 /**
  * __blk_run_queue - run a single device queue
  * @q:	The queue to run
@@ -305,7 +324,7 @@ void __blk_run_queue(struct request_queue *q)
 	if (unlikely(blk_queue_stopped(q)))
 		return;
 
-	q->request_fn(q);
+	__blk_run_queue_uncond(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -477,8 +496,8 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
  * blk_cleanup_queue - shutdown a request queue
  * @q: request queue to shutdown
  *
- * Mark @q DYING, drain all pending requests, destroy and put it.  All
- * future requests will be failed immediately with -ENODEV.
+ * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
+ * put it.  All future requests will be failed immediately with -ENODEV.
  */
 void blk_cleanup_queue(struct request_queue *q)
 {
@@ -507,9 +526,13 @@ void blk_cleanup_queue(struct request_queue *q)
 	spin_unlock_irq(lock);
 	mutex_unlock(&q->sysfs_lock);
 
-	/* drain all requests queued before DYING marking */
+	/*
+	 * Drain all requests queued before DYING marking. Set DEAD flag to
+	 * prevent that q->request_fn() gets invoked after draining finished.
+	 */
 	spin_lock_irq(lock);
 	__blk_drain_queue(q, true);
+	queue_flag_set(QUEUE_FLAG_DEAD, q);
 	spin_unlock_irq(lock);
 
 	/* @q won't process any more request, flush async actions */
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 4aec98df7ba5..1320e74d79b8 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -72,7 +72,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	__blk_run_queue(q);
 	/* the queue is stopped so it won't be run */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
-		q->request_fn(q);
+		__blk_run_queue_uncond(q);
 	spin_unlock_irq(q->queue_lock);
 }
 EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
diff --git a/block/blk.h b/block/blk.h
index 2218a8a78292..47fdfdd41520 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -145,6 +145,8 @@ int blk_try_merge(struct request *rq, struct bio *bio);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
+void __blk_run_queue_uncond(struct request_queue *q);
+
 int blk_dev_init(void);
 
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aba8246afe72..8bc46c250ca4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -452,6 +452,7 @@ struct request_queue {
 #define QUEUE_FLAG_ADD_RANDOM  16	/* Contributes to random pool */
 #define QUEUE_FLAG_SECDISCARD  17	/* supports SECDISCARD */
 #define QUEUE_FLAG_SAME_FORCE  18	/* force complete on same CPU */
+#define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -522,6 +523,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
+#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 #define blk_queue_bypass(q)	test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
-- 
cgit v1.2.3


From 24faf6f604efe18236bded4303009fc252913bf0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 28 Nov 2012 13:46:45 +0100
Subject: block: Make blk_cleanup_queue() wait until request_fn finished

Some request_fn implementations, e.g. scsi_request_fn(), unlock
the queue lock internally. This may result in multiple threads
executing request_fn for the same queue simultaneously. Keep
track of the number of active request_fn calls and make sure that
blk_cleanup_queue() waits until all active request_fn invocations
have finished. A block driver may start cleaning up resources
needed by its request_fn as soon as blk_cleanup_queue() finished,
so blk_cleanup_queue() must wait for all outstanding request_fn
invocations to finish.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reported-by: Chanho Min <chanho.min@lge.com>
Cc: James Bottomley <JBottomley@Parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 10 ++++++++++
 include/linux/blkdev.h |  6 ++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9fb23537c7ad..473015ee28a2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -309,7 +309,16 @@ inline void __blk_run_queue_uncond(struct request_queue *q)
 	if (unlikely(blk_queue_dead(q)))
 		return;
 
+	/*
+	 * Some request_fn implementations, e.g. scsi_request_fn(), unlock
+	 * the queue lock internally. As a result multiple threads may be
+	 * running such a request function concurrently. Keep track of the
+	 * number of active request_fn invocations such that blk_drain_queue()
+	 * can wait until all these request_fn calls have finished.
+	 */
+	q->request_fn_active++;
 	q->request_fn(q);
+	q->request_fn_active--;
 }
 
 /**
@@ -408,6 +417,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 			__blk_run_queue(q);
 
 		drain |= q->nr_rqs_elvpriv;
+		drain |= q->request_fn_active;
 
 		/*
 		 * Unfortunately, requests are queued at and tracked from
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bc46c250ca4..c9d233e727f2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -378,6 +378,12 @@ struct request_queue {
 
 	unsigned int		nr_sorted;
 	unsigned int		in_flight[2];
+	/*
+	 * Number of active block driver functions for which blk_drain_queue()
+	 * must wait. Must be incremented around functions that unlock the
+	 * queue_lock internally, e.g. scsi_request_fn().
+	 */
+	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
 	struct timer_list	timeout;
-- 
cgit v1.2.3


From 80729beb3326fd682543f4f4ea534df47ab48967 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 28 Nov 2012 13:47:36 +0100
Subject: bsg: Remove unused function bsg_goose_queue()

The function bsg_goose_queue() does not have any in-tree callers,
so let's remove it.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c         | 13 -------------
 include/linux/bsg-lib.h |  1 -
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index deee61fbb741..650f427d915b 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -151,19 +151,6 @@ failjob_rls_job:
 	return -ENOMEM;
 }
 
-/*
- * bsg_goose_queue - restart queue in case it was stopped
- * @q: request q to be restarted
- */
-void bsg_goose_queue(struct request_queue *q)
-{
-	if (!q)
-		return;
-
-	blk_run_queue_async(q);
-}
-EXPORT_SYMBOL_GPL(bsg_goose_queue);
-
 /**
  * bsg_request_fn - generic handler for bsg requests
  * @q: request queue to manage
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 4d0fb3df2f4a..a226652a5a6c 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -67,6 +67,5 @@ void bsg_job_done(struct bsg_job *job, int result,
 int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
 		    bsg_job_fn *job_fn, int dd_job_size);
 void bsg_request_fn(struct request_queue *q);
-void bsg_goose_queue(struct request_queue *q);
 
 #endif
-- 
cgit v1.2.3


From be2f6f5a7833b99bdee97c4b877dcd2afc6cdd00 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 9 Dec 2012 15:40:30 +0100
Subject: mfd: wm5102: Add readback of DSP status 3 register

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm5102-tables.c           | 2 ++
 include/linux/mfd/arizona/registers.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 005de63d01d6..8844b2f9de87 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -2336,6 +2336,7 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_DSP1_CLOCKING_1:
 	case ARIZONA_DSP1_STATUS_1:
 	case ARIZONA_DSP1_STATUS_2:
+	case ARIZONA_DSP1_STATUS_3:
 		return true;
 	default:
 		return false;
@@ -2386,6 +2387,7 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_AOD_IRQ_RAW_STATUS:
 	case ARIZONA_DSP1_STATUS_1:
 	case ARIZONA_DSP1_STATUS_2:
+	case ARIZONA_DSP1_STATUS_3:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_MIC_DETECT_3:
 		return true;
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 7671a287dfee..81bca23c9a92 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -979,6 +979,7 @@
 #define ARIZONA_DSP1_CLOCKING_1                  0x1101
 #define ARIZONA_DSP1_STATUS_1                    0x1104
 #define ARIZONA_DSP1_STATUS_2                    0x1105
+#define ARIZONA_DSP1_STATUS_3                    0x1106
 #define ARIZONA_DSP2_CONTROL_1                   0x1200
 #define ARIZONA_DSP2_CLOCKING_1                  0x1201
 #define ARIZONA_DSP2_STATUS_1                    0x1204
-- 
cgit v1.2.3


From dd31866b0d55c9b70722ebad6ccd643223d9269e Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Date: Fri, 2 Nov 2012 17:06:26 +0900
Subject: f2fs: add on-disk layout

This adds a header file describing the on-disk layout of f2fs.

Signed-off-by: Changman Lee <cm224.lee@samsung.com>
Signed-off-by: Chul Lee <chur.lee@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 include/linux/f2fs_fs.h | 410 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 include/linux/f2fs_fs.h

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
new file mode 100644
index 000000000000..1429ece7caab
--- /dev/null
+++ b/include/linux/f2fs_fs.h
@@ -0,0 +1,410 @@
+/**
+ * include/linux/f2fs_fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *             http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_FS_H
+#define _LINUX_F2FS_FS_H
+
+#include <linux/pagemap.h>
+#include <linux/types.h>
+
+#define F2FS_SUPER_OFFSET		1024	/* byte-size offset */
+#define F2FS_LOG_SECTOR_SIZE		9	/* 9 bits for 512 byte */
+#define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
+#define F2FS_BLKSIZE			4096	/* support only 4KB block */
+#define F2FS_MAX_EXTENSION		64	/* # of extension entries */
+
+#define NULL_ADDR		0x0U
+#define NEW_ADDR		-1U
+
+#define F2FS_ROOT_INO(sbi)	(sbi->root_ino_num)
+#define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
+#define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
+
+/* This flag is used by node and meta inodes, and by recovery */
+#define GFP_F2FS_ZERO	(GFP_NOFS | __GFP_ZERO)
+
+/*
+ * For further optimization on multi-head logs, on-disk layout supports maximum
+ * 16 logs by default. The number, 16, is expected to cover all the cases
+ * enoughly. The implementaion currently uses no more than 6 logs.
+ * Half the logs are used for nodes, and the other half are used for data.
+ */
+#define MAX_ACTIVE_LOGS	16
+#define MAX_ACTIVE_NODE_LOGS	8
+#define MAX_ACTIVE_DATA_LOGS	8
+
+/*
+ * For superblock
+ */
+struct f2fs_super_block {
+	__le32 magic;			/* Magic Number */
+	__le16 major_ver;		/* Major Version */
+	__le16 minor_ver;		/* Minor Version */
+	__le32 log_sectorsize;		/* log2 sector size in bytes */
+	__le32 log_sectors_per_block;	/* log2 # of sectors per block */
+	__le32 log_blocksize;		/* log2 block size in bytes */
+	__le32 log_blocks_per_seg;	/* log2 # of blocks per segment */
+	__le32 segs_per_sec;		/* # of segments per section */
+	__le32 secs_per_zone;		/* # of sections per zone */
+	__le32 checksum_offset;		/* checksum offset inside super block */
+	__le64 block_count;		/* total # of user blocks */
+	__le32 section_count;		/* total # of sections */
+	__le32 segment_count;		/* total # of segments */
+	__le32 segment_count_ckpt;	/* # of segments for checkpoint */
+	__le32 segment_count_sit;	/* # of segments for SIT */
+	__le32 segment_count_nat;	/* # of segments for NAT */
+	__le32 segment_count_ssa;	/* # of segments for SSA */
+	__le32 segment_count_main;	/* # of segments for main area */
+	__le32 segment0_blkaddr;	/* start block address of segment 0 */
+	__le32 cp_blkaddr;		/* start block address of checkpoint */
+	__le32 sit_blkaddr;		/* start block address of SIT */
+	__le32 nat_blkaddr;		/* start block address of NAT */
+	__le32 ssa_blkaddr;		/* start block address of SSA */
+	__le32 main_blkaddr;		/* start block address of main area */
+	__le32 root_ino;		/* root inode number */
+	__le32 node_ino;		/* node inode number */
+	__le32 meta_ino;		/* meta inode number */
+	__u8 uuid[16];			/* 128-bit uuid for volume */
+	__le16 volume_name[512];	/* volume name */
+	__le32 extension_count;		/* # of extensions below */
+	__u8 extension_list[F2FS_MAX_EXTENSION][8];	/* extension array */
+} __packed;
+
+/*
+ * For checkpoint
+ */
+#define CP_ERROR_FLAG		0x00000008
+#define CP_COMPACT_SUM_FLAG	0x00000004
+#define CP_ORPHAN_PRESENT_FLAG	0x00000002
+#define CP_UMOUNT_FLAG		0x00000001
+
+struct f2fs_checkpoint {
+	__le64 checkpoint_ver;		/* checkpoint block version number */
+	__le64 user_block_count;	/* # of user blocks */
+	__le64 valid_block_count;	/* # of valid blocks in main area */
+	__le32 rsvd_segment_count;	/* # of reserved segments for gc */
+	__le32 overprov_segment_count;	/* # of overprovision segments */
+	__le32 free_segment_count;	/* # of free segments in main area */
+
+	/* information of current node segments */
+	__le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS];
+	__le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS];
+	/* information of current data segments */
+	__le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS];
+	__le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS];
+	__le32 ckpt_flags;		/* Flags : umount and journal_present */
+	__le32 cp_pack_total_block_count;	/* total # of one cp pack */
+	__le32 cp_pack_start_sum;	/* start block number of data summary */
+	__le32 valid_node_count;	/* Total number of valid nodes */
+	__le32 valid_inode_count;	/* Total number of valid inodes */
+	__le32 next_free_nid;		/* Next free node number */
+	__le32 sit_ver_bitmap_bytesize;	/* Default value 64 */
+	__le32 nat_ver_bitmap_bytesize; /* Default value 256 */
+	__le32 checksum_offset;		/* checksum offset inside cp block */
+	__le64 elapsed_time;		/* mounted time */
+	/* allocation type of current segment */
+	unsigned char alloc_type[MAX_ACTIVE_LOGS];
+
+	/* SIT and NAT version bitmap */
+	unsigned char sit_nat_version_bitmap[1];
+} __packed;
+
+/*
+ * For orphan inode management
+ */
+#define F2FS_ORPHANS_PER_BLOCK	1020
+
+struct f2fs_orphan_block {
+	__le32 ino[F2FS_ORPHANS_PER_BLOCK];	/* inode numbers */
+	__le32 reserved;	/* reserved */
+	__le16 blk_addr;	/* block index in current CP */
+	__le16 blk_count;	/* Number of orphan inode blocks in CP */
+	__le32 entry_count;	/* Total number of orphan nodes in current CP */
+	__le32 check_sum;	/* CRC32 for orphan inode block */
+} __packed;
+
+/*
+ * For NODE structure
+ */
+struct f2fs_extent {
+	__le32 fofs;		/* start file offset of the extent */
+	__le32 blk_addr;	/* start block address of the extent */
+	__le32 len;		/* lengh of the extent */
+} __packed;
+
+#define F2FS_MAX_NAME_LEN	256
+#define ADDRS_PER_INODE         923	/* Address Pointers in an Inode */
+#define ADDRS_PER_BLOCK         1018	/* Address Pointers in a Direct Block */
+#define NIDS_PER_BLOCK          1018	/* Node IDs in an Indirect Block */
+
+struct f2fs_inode {
+	__le16 i_mode;			/* file mode */
+	__u8 i_advise;			/* file hints */
+	__u8 i_reserved;		/* reserved */
+	__le32 i_uid;			/* user ID */
+	__le32 i_gid;			/* group ID */
+	__le32 i_links;			/* links count */
+	__le64 i_size;			/* file size in bytes */
+	__le64 i_blocks;		/* file size in blocks */
+	__le64 i_atime;			/* access time */
+	__le64 i_ctime;			/* change time */
+	__le64 i_mtime;			/* modification time */
+	__le32 i_atime_nsec;		/* access time in nano scale */
+	__le32 i_ctime_nsec;		/* change time in nano scale */
+	__le32 i_mtime_nsec;		/* modification time in nano scale */
+	__le32 i_generation;		/* file version (for NFS) */
+	__le32 i_current_depth;		/* only for directory depth */
+	__le32 i_xattr_nid;		/* nid to save xattr */
+	__le32 i_flags;			/* file attributes */
+	__le32 i_pino;			/* parent inode number */
+	__le32 i_namelen;		/* file name length */
+	__u8 i_name[F2FS_MAX_NAME_LEN];	/* file name for SPOR */
+
+	struct f2fs_extent i_ext;	/* caching a largest extent */
+
+	__le32 i_addr[ADDRS_PER_INODE];	/* Pointers to data blocks */
+
+	__le32 i_nid[5];		/* direct(2), indirect(2),
+						double_indirect(1) node id */
+} __packed;
+
+struct direct_node {
+	__le32 addr[ADDRS_PER_BLOCK];	/* array of data block address */
+} __packed;
+
+struct indirect_node {
+	__le32 nid[NIDS_PER_BLOCK];	/* array of data block address */
+} __packed;
+
+enum {
+	COLD_BIT_SHIFT = 0,
+	FSYNC_BIT_SHIFT,
+	DENT_BIT_SHIFT,
+	OFFSET_BIT_SHIFT
+};
+
+struct node_footer {
+	__le32 nid;		/* node id */
+	__le32 ino;		/* inode nunmber */
+	__le32 flag;		/* include cold/fsync/dentry marks and offset */
+	__le64 cp_ver;		/* checkpoint version */
+	__le32 next_blkaddr;	/* next node page block address */
+} __packed;
+
+struct f2fs_node {
+	/* can be one of three types: inode, direct, and indirect types */
+	union {
+		struct f2fs_inode i;
+		struct direct_node dn;
+		struct indirect_node in;
+	};
+	struct node_footer footer;
+} __packed;
+
+/*
+ * For NAT entries
+ */
+#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
+
+struct f2fs_nat_entry {
+	__u8 version;		/* latest version of cached nat entry */
+	__le32 ino;		/* inode number */
+	__le32 block_addr;	/* block address */
+} __packed;
+
+struct f2fs_nat_block {
+	struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For SIT entries
+ *
+ * Each segment is 2MB in size by default so that a bitmap for validity of
+ * there-in blocks should occupy 64 bytes, 512 bits.
+ * Not allow to change this.
+ */
+#define SIT_VBLOCK_MAP_SIZE 64
+#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
+
+/*
+ * Note that f2fs_sit_entry->vblocks has the following bit-field information.
+ * [15:10] : allocation type such as CURSEG_XXXX_TYPE
+ * [9:0] : valid block count
+ */
+#define SIT_VBLOCKS_SHIFT	10
+#define SIT_VBLOCKS_MASK	((1 << SIT_VBLOCKS_SHIFT) - 1)
+#define GET_SIT_VBLOCKS(raw_sit)				\
+	(le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK)
+#define GET_SIT_TYPE(raw_sit)					\
+	((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK)	\
+	 >> SIT_VBLOCKS_SHIFT)
+
+struct f2fs_sit_entry {
+	__le16 vblocks;				/* reference above */
+	__u8 valid_map[SIT_VBLOCK_MAP_SIZE];	/* bitmap for valid blocks */
+	__le64 mtime;				/* segment age for cleaning */
+} __packed;
+
+struct f2fs_sit_block {
+	struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK];
+} __packed;
+
+/*
+ * For segment summary
+ *
+ * One summary block contains exactly 512 summary entries, which represents
+ * exactly 2MB segment by default. Not allow to change the basic units.
+ *
+ * NOTE: For initializing fields, you must use set_summary
+ *
+ * - If data page, nid represents dnode's nid
+ * - If node page, nid represents the node page's nid.
+ *
+ * The ofs_in_node is used by only data page. It represents offset
+ * from node's page's beginning to get a data block address.
+ * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
+ */
+#define ENTRIES_IN_SUM		512
+#define	SUMMARY_SIZE		(sizeof(struct f2fs_summary))
+#define	SUM_FOOTER_SIZE		(sizeof(struct summary_footer))
+#define SUM_ENTRY_SIZE		(SUMMARY_SIZE * ENTRIES_IN_SUM)
+
+/* a summary entry for a 4KB-sized block in a segment */
+struct f2fs_summary {
+	__le32 nid;		/* parent node id */
+	union {
+		__u8 reserved[3];
+		struct {
+			__u8 version;		/* node version number */
+			__le16 ofs_in_node;	/* block index in parent node */
+		} __packed;
+	};
+} __packed;
+
+/* summary block type, node or data, is stored to the summary_footer */
+#define SUM_TYPE_NODE		(1)
+#define SUM_TYPE_DATA		(0)
+
+struct summary_footer {
+	unsigned char entry_type;	/* SUM_TYPE_XXX */
+	__u32 check_sum;		/* summary checksum */
+} __packed;
+
+#define SUM_JOURNAL_SIZE	(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\
+				SUM_ENTRY_SIZE)
+#define NAT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
+				sizeof(struct nat_journal_entry))
+#define NAT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
+				sizeof(struct nat_journal_entry))
+#define SIT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
+				sizeof(struct sit_journal_entry))
+#define SIT_JOURNAL_RESERVED	((SUM_JOURNAL_SIZE - 2) %\
+				sizeof(struct sit_journal_entry))
+/*
+ * frequently updated NAT/SIT entries can be stored in the spare area in
+ * summary blocks
+ */
+enum {
+	NAT_JOURNAL = 0,
+	SIT_JOURNAL
+};
+
+struct nat_journal_entry {
+	__le32 nid;
+	struct f2fs_nat_entry ne;
+} __packed;
+
+struct nat_journal {
+	struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
+	__u8 reserved[NAT_JOURNAL_RESERVED];
+} __packed;
+
+struct sit_journal_entry {
+	__le32 segno;
+	struct f2fs_sit_entry se;
+} __packed;
+
+struct sit_journal {
+	struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
+	__u8 reserved[SIT_JOURNAL_RESERVED];
+} __packed;
+
+/* 4KB-sized summary block structure */
+struct f2fs_summary_block {
+	struct f2fs_summary entries[ENTRIES_IN_SUM];
+	union {
+		__le16 n_nats;
+		__le16 n_sits;
+	};
+	/* spare area is used by NAT or SIT journals */
+	union {
+		struct nat_journal nat_j;
+		struct sit_journal sit_j;
+	};
+	struct summary_footer footer;
+} __packed;
+
+/*
+ * For directory operations
+ */
+#define F2FS_DOT_HASH		0
+#define F2FS_DDOT_HASH		F2FS_DOT_HASH
+#define F2FS_MAX_HASH		(~((0x3ULL) << 62))
+#define F2FS_HASH_COL_BIT	((0x1ULL) << 63)
+
+typedef __le32	f2fs_hash_t;
+
+/* One directory entry slot covers 8bytes-long file name */
+#define F2FS_NAME_LEN		8
+
+/* the number of dentry in a block */
+#define NR_DENTRY_IN_BLOCK	214
+
+/* MAX level for dir lookup */
+#define MAX_DIR_HASH_DEPTH	63
+
+#define SIZE_OF_DIR_ENTRY	11	/* by byte */
+#define SIZE_OF_DENTRY_BITMAP	((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
+					BITS_PER_BYTE)
+#define SIZE_OF_RESERVED	(PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
+				F2FS_NAME_LEN) * \
+				NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
+
+/* One directory entry slot representing F2FS_NAME_LEN-sized file name */
+struct f2fs_dir_entry {
+	__le32 hash_code;	/* hash code of file name */
+	__le32 ino;		/* inode number */
+	__le16 name_len;	/* lengh of file name */
+	__u8 file_type;		/* file type */
+} __packed;
+
+/* 4KB-sized directory entry block */
+struct f2fs_dentry_block {
+	/* validity bitmap for directory entries in each block */
+	__u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP];
+	__u8 reserved[SIZE_OF_RESERVED];
+	struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK];
+	__u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN];
+} __packed;
+
+/* file types used in inode_info->flags */
+enum {
+	F2FS_FT_UNKNOWN,
+	F2FS_FT_REG_FILE,
+	F2FS_FT_DIR,
+	F2FS_FT_CHRDEV,
+	F2FS_FT_BLKDEV,
+	F2FS_FT_FIFO,
+	F2FS_FT_SOCK,
+	F2FS_FT_SYMLINK,
+	F2FS_FT_MAX
+};
+
+#endif  /* _LINUX_F2FS_FS_H */
-- 
cgit v1.2.3


From 25ca923b2a766b9c93b63777ead351137533a623 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Date: Wed, 28 Nov 2012 16:12:41 +0900
Subject: f2fs: fix endian conversion bugs reported by sparse

This patch should resolve the bugs reported by the sparse tool.
Initial reports were written by "kbuild test robot" managed by fengguang.wu.

In my local machines, I've tested also by running:
> make C=2 CF="-D__CHECK_ENDIAN__"

Accordingly, I've found lots of warnings and bugs related to the endian
conversion. And I've fixed all at this moment.

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/checkpoint.c    | 32 +++++++++++++++++---------------
 fs/f2fs/data.c          |  2 +-
 fs/f2fs/debug.c         |  2 +-
 fs/f2fs/dir.c           |  8 ++++----
 fs/f2fs/f2fs.h          | 25 +++++++++++++++++++++++--
 fs/f2fs/hash.c          |  3 +--
 fs/f2fs/node.c          |  4 ++--
 fs/f2fs/node.h          |  2 +-
 fs/f2fs/recovery.c      |  2 +-
 fs/f2fs/segment.c       | 14 +++++++-------
 fs/f2fs/super.c         |  8 ++++----
 include/linux/f2fs_fs.h |  6 +++---
 12 files changed, 65 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ab743f92ee06..7c18f8efaadc 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -268,7 +268,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
 {
 	block_t start_blk, orphan_blkaddr, i, j;
 
-	if (!(F2FS_CKPT(sbi)->ckpt_flags & CP_ORPHAN_PRESENT_FLAG))
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
 		return 0;
 
 	sbi->por_doing = 1;
@@ -287,7 +287,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
 		f2fs_put_page(page, 1);
 	}
 	/* clear Orphan Flag */
-	F2FS_CKPT(sbi)->ckpt_flags &= (~CP_ORPHAN_PRESENT_FLAG);
+	clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
 	sbi->por_doing = 0;
 	return 0;
 }
@@ -376,7 +376,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
 	pre_version = le64_to_cpu(cp_block->checkpoint_ver);
 
 	/* Read the 2nd cp block in this CP pack */
-	cp_addr += le64_to_cpu(cp_block->cp_pack_total_block_count) - 1;
+	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
 	cp_page_2 = get_meta_page(sbi, cp_addr);
 
 	cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
@@ -605,8 +605,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	block_t start_blk;
 	struct page *cp_page;
 	unsigned int data_sum_blocks, orphan_blocks;
+	unsigned int crc32 = 0;
 	void *kaddr;
-	__u32 crc32 = 0;
 	int i;
 
 	/* Flush all the NAT/SIT pages */
@@ -646,33 +646,35 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	/* 2 cp  + n data seg summary + orphan inode blocks */
 	data_sum_blocks = npages_for_summary_flush(sbi);
 	if (data_sum_blocks < 3)
-		ckpt->ckpt_flags |= CP_COMPACT_SUM_FLAG;
+		set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 	else
-		ckpt->ckpt_flags &= (~CP_COMPACT_SUM_FLAG);
+		clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 
 	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
 					/ F2FS_ORPHANS_PER_BLOCK;
-	ckpt->cp_pack_start_sum = 1 + orphan_blocks;
-	ckpt->cp_pack_total_block_count = 2 + data_sum_blocks + orphan_blocks;
+	ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
 
 	if (is_umount) {
-		ckpt->ckpt_flags |= CP_UMOUNT_FLAG;
-		ckpt->cp_pack_total_block_count += NR_CURSEG_NODE_TYPE;
+		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+			data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
 	} else {
-		ckpt->ckpt_flags &= (~CP_UMOUNT_FLAG);
+		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
+			data_sum_blocks + orphan_blocks);
 	}
 
 	if (sbi->n_orphans)
-		ckpt->ckpt_flags |= CP_ORPHAN_PRESENT_FLAG;
+		set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 	else
-		ckpt->ckpt_flags &= (~CP_ORPHAN_PRESENT_FLAG);
+		clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 
 	/* update SIT/NAT bitmap */
 	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
 	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
 
 	crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
-	*(__u32 *)((unsigned char *)ckpt +
+	*(__le32 *)((unsigned char *)ckpt +
 				le32_to_cpu(ckpt->checksum_offset))
 				= cpu_to_le32(crc32);
 
@@ -716,7 +718,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	sbi->alloc_valid_block_count = 0;
 
 	/* Here, we only have one bio having CP pack */
-	if (sbi->ckpt->ckpt_flags & CP_ERROR_FLAG)
+	if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))
 		sbi->sb->s_flags |= MS_RDONLY;
 	else
 		sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c2fd0a80db16..5635cc5a9d4d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -545,7 +545,7 @@ redirty_out:
 
 #define MAX_DESIRED_PAGES_WP	4096
 
-int f2fs_write_data_pages(struct address_space *mapping,
+static int f2fs_write_data_pages(struct address_space *mapping,
 			    struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a56181c1b28b..fb62960a1dc1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -27,7 +27,7 @@
 static LIST_HEAD(f2fs_stat_list);
 static struct dentry *debugfs_root;
 
-void update_general_status(struct f2fs_sb_info *sbi)
+static void update_general_status(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_stat_info *si = sbi->stat_info;
 	int i;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 5975568d03df..5ec7a06120e1 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -80,7 +80,7 @@ static bool early_match_name(const char *name, int namelen,
 	if (le16_to_cpu(de->name_len) != namelen)
 		return false;
 
-	if (le32_to_cpu(de->hash_code) != namehash)
+	if (de->hash_code != namehash)
 		return false;
 
 	return true;
@@ -143,7 +143,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 	nbucket = dir_buckets(level);
 	nblock = bucket_blocks(level);
 
-	bidx = dir_block_index(level, namehash % nbucket);
+	bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
 	end_block = bidx + nblock;
 
 	for (; bidx < end_block; bidx++) {
@@ -406,7 +406,7 @@ start:
 	nbucket = dir_buckets(level);
 	nblock = bucket_blocks(level);
 
-	bidx = dir_block_index(level, (dentry_hash % nbucket));
+	bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
 
 	for (block = bidx; block <= (bidx + nblock - 1); block++) {
 		mutex_lock_op(sbi, DENTRY_OPS);
@@ -437,7 +437,7 @@ add_dentry:
 	wait_on_page_writeback(dentry_page);
 
 	de = &dentry_blk->dentry[bit_pos];
-	de->hash_code = cpu_to_le32(dentry_hash);
+	de->hash_code = dentry_hash;
 	de->name_len = cpu_to_le16(namelen);
 	memcpy(dentry_blk->filename[bit_pos], name, namelen);
 	de->ino = cpu_to_le32(inode->i_ino);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d3f5a70e2a49..8d7fde1bda1e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -463,6 +463,26 @@ static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi)
 	sbi->s_dirty = 0;
 }
 
+static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	return ckpt_flags & f;
+}
+
+static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	ckpt_flags |= f;
+	cp->ckpt_flags = cpu_to_le32(ckpt_flags);
+}
+
+static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+{
+	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+	ckpt_flags &= (~f);
+	cp->ckpt_flags = cpu_to_le32(ckpt_flags);
+}
+
 static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t)
 {
 	mutex_lock_nested(&sbi->fs_lock[t], t);
@@ -577,7 +597,8 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
 static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
-	int offset = (flag == NAT_BITMAP) ? ckpt->sit_ver_bitmap_bytesize : 0;
+	int offset = (flag == NAT_BITMAP) ?
+			le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
 	return &ckpt->sit_nat_version_bitmap + offset;
 }
 
@@ -587,7 +608,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver);
 
-	start_addr = le64_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
+	start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
 
 	/*
 	 * odd numbered checkpoint should at cp segment 0
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 098a1963d7c7..beb155e8d06d 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -92,7 +92,6 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
 	hash = buf[0];
 	minor_hash = buf[1];
 
-	f2fs_hash = hash;
-	f2fs_hash &= ~F2FS_HASH_COL_BIT;
+	f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
 	return f2fs_hash;
 }
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 216f04dc1177..5d421fe22575 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1445,8 +1445,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
 
 	memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
 	dst->i.i_size = 0;
-	dst->i.i_blocks = 1;
-	dst->i.i_links = 1;
+	dst->i.i_blocks = cpu_to_le64(1);
+	dst->i.i_links = cpu_to_le32(1);
 	dst->i.i_xattr_nid = 0;
 
 	new_ni = old_ni;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 5d525ed312ba..0ab92d643052 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -177,7 +177,7 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
 	void *kaddr = page_address(page);
 	struct f2fs_node *rn = (struct f2fs_node *)kaddr;
 	rn->footer.cp_ver = ckpt->checkpoint_ver;
-	rn->footer.next_blkaddr = blkaddr;
+	rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
 }
 
 static inline nid_t ino_of_node(struct page *node_page)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 7a43df0b72c1..222a7bb92214 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -81,7 +81,7 @@ static int recover_inode(struct inode *inode, struct page *node_page)
 	struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
 	struct f2fs_inode *raw_inode = &(raw_node->i);
 
-	inode->i_mode = le32_to_cpu(raw_inode->i_mode);
+	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
 	i_size_write(inode, le64_to_cpu(raw_inode->i_size));
 	inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
 	inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ed7c079cfc7f..d973c56e8bd6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -630,7 +630,7 @@ static void f2fs_end_io_write(struct bio *bio, int err)
 			SetPageError(page);
 			if (page->mapping)
 				set_bit(AS_EIO, &page->mapping->flags);
-			p->sbi->ckpt->ckpt_flags |= CP_ERROR_FLAG;
+			set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
 			set_page_dirty(page);
 		}
 		end_page_writeback(page);
@@ -1067,7 +1067,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
 							CURSEG_HOT_DATA]);
-		if (ckpt->ckpt_flags & CP_UMOUNT_FLAG)
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
 		else
 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
@@ -1076,7 +1076,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 							CURSEG_HOT_NODE]);
 		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
 							CURSEG_HOT_NODE]);
-		if (ckpt->ckpt_flags & CP_UMOUNT_FLAG)
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
 							type - CURSEG_HOT_NODE);
 		else
@@ -1087,7 +1087,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
 	sum = (struct f2fs_summary_block *)page_address(new);
 
 	if (IS_NODESEG(type)) {
-		if (ckpt->ckpt_flags & CP_UMOUNT_FLAG) {
+		if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
 			struct f2fs_summary *ns = &sum->entries[0];
 			int i;
 			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
@@ -1119,7 +1119,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
 {
 	int type = CURSEG_HOT_DATA;
 
-	if (sbi->ckpt->ckpt_flags & CP_COMPACT_SUM_FLAG) {
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
 		/* restore for compacted data summary */
 		if (read_compacted_summaries(sbi))
 			return -EINVAL;
@@ -1208,7 +1208,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
 
 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
 {
-	if (sbi->ckpt->ckpt_flags & CP_COMPACT_SUM_FLAG)
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
 		write_compacted_summaries(sbi, start_blk);
 	else
 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
@@ -1216,7 +1216,7 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
 
 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
 {
-	if (sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG)
+	if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
 	return;
 }
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8661c93538af..878bf382f848 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -89,7 +89,7 @@ static void f2fs_i_callback(struct rcu_head *head)
 	kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
 }
 
-void f2fs_destroy_inode(struct inode *inode)
+static void f2fs_destroy_inode(struct inode *inode)
 {
 	call_rcu(&inode->i_rcu, f2fs_i_callback);
 }
@@ -445,7 +445,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (sanity_check_raw_super(raw_super))
 		goto free_sb_buf;
 
-	sb->s_maxbytes = max_file_size(raw_super->log_blocksize);
+	sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
 	sb->s_max_links = F2FS_LINK_MAX;
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 
@@ -527,7 +527,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 
 	/* if there are nt orphan nodes free them */
 	err = -EINVAL;
-	if (!(sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG) &&
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
 				recover_orphan_inodes(sbi))
 		goto free_node_inode;
 
@@ -547,7 +547,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* recover fsynced data */
-	if (!(sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG) &&
+	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
 				!test_opt(sbi, DISABLE_ROLL_FORWARD))
 		recover_fsync_data(sbi);
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1429ece7caab..c2fbbc35c1e6 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -272,8 +272,8 @@ struct f2fs_sit_block {
  * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
  */
 #define ENTRIES_IN_SUM		512
-#define	SUMMARY_SIZE		(sizeof(struct f2fs_summary))
-#define	SUM_FOOTER_SIZE		(sizeof(struct summary_footer))
+#define	SUMMARY_SIZE		(7)	/* sizeof(struct summary) */
+#define	SUM_FOOTER_SIZE		(5)	/* sizeof(struct summary_footer) */
 #define SUM_ENTRY_SIZE		(SUMMARY_SIZE * ENTRIES_IN_SUM)
 
 /* a summary entry for a 4KB-sized block in a segment */
@@ -297,7 +297,7 @@ struct summary_footer {
 	__u32 check_sum;		/* summary checksum */
 } __packed;
 
-#define SUM_JOURNAL_SIZE	(PAGE_CACHE_SIZE - SUM_FOOTER_SIZE -\
+#define SUM_JOURNAL_SIZE	(F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
 				SUM_ENTRY_SIZE)
 #define NAT_JOURNAL_ENTRIES	((SUM_JOURNAL_SIZE - 2) /\
 				sizeof(struct nat_journal_entry))
-- 
cgit v1.2.3


From 457d08ee4fd91c8df17917ff2d32565e6adacbfc Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Sat, 8 Dec 2012 14:54:50 +0900
Subject: f2fs: introduce accessor to retrieve number of dentry slots

Simplify code by providing the accessor macro to retrieve the
number of dentry slots for a given filename length.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Amit Sahrawat <a.sahrawat@samsung.com>
---
 fs/f2fs/dir.c           | 13 +++++--------
 include/linux/f2fs_fs.h |  3 +++
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index fc02d8b43aea..d900c088c7c6 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -99,8 +99,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
 					NR_DENTRY_IN_BLOCK, 0);
 	while (bit_pos < NR_DENTRY_IN_BLOCK) {
 		de = &dentry_blk->dentry[bit_pos];
-		slots = (le16_to_cpu(de->name_len) + F2FS_NAME_LEN - 1) /
-							F2FS_NAME_LEN;
+		slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 
 		if (early_match_name(name, namelen, namehash, de)) {
 			if (!memcmp(dentry_blk->filename[bit_pos],
@@ -130,7 +129,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 		unsigned int level, const char *name, int namelen,
 			f2fs_hash_t namehash, struct page **res_page)
 {
-	int s = (namelen + F2FS_NAME_LEN - 1) / F2FS_NAME_LEN;
+	int s = GET_DENTRY_SLOTS(namelen);
 	unsigned int nbucket, nblock;
 	unsigned int bidx, end_block;
 	struct page *dentry_page;
@@ -383,7 +382,7 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 	int namelen = dentry->d_name.len;
 	struct page *dentry_page = NULL;
 	struct f2fs_dentry_block *dentry_blk = NULL;
-	int slots = (namelen + F2FS_NAME_LEN - 1) / F2FS_NAME_LEN;
+	int slots = GET_DENTRY_SLOTS(namelen);
 	int err = 0;
 	int i;
 
@@ -465,8 +464,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 	struct address_space *mapping = page->mapping;
 	struct inode *dir = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-	int slots = (le16_to_cpu(dentry->name_len) + F2FS_NAME_LEN - 1) /
-							F2FS_NAME_LEN;
+	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
 	void *kaddr = page_address(page);
 	int i;
 
@@ -641,8 +639,7 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
 				file->f_pos += bit_pos - start_bit_pos;
 				goto success;
 			}
-			slots = (le16_to_cpu(de->name_len) + F2FS_NAME_LEN - 1)
-								/ F2FS_NAME_LEN;
+			slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 			bit_pos += slots;
 		}
 		bit_pos = 0;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index c2fbbc35c1e6..f9a12f6243a5 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -363,6 +363,9 @@ typedef __le32	f2fs_hash_t;
 
 /* One directory entry slot covers 8bytes-long file name */
 #define F2FS_NAME_LEN		8
+#define F2FS_NAME_LEN_BITS	3
+
+#define GET_DENTRY_SLOTS(x)	((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS)
 
 /* the number of dentry in a block */
 #define NR_DENTRY_IN_BLOCK	214
-- 
cgit v1.2.3


From b0284de05e07d56ff7de154d0c9263788755f5eb Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 30 Nov 2012 10:09:42 +0000
Subject: ab8500_bm: Rename battery management platform data to something more
 logical

The platform specific battery management configuration data structure
is currently called 'bat' short for 'battery'; however, it contains
information for all components of the battery management group, rather
than information pertaining to the battery itself - there are other
structures for that. So, in keeping with its structure namesake
'abx500_bm_data', we rename it to 'bm' here. Using similar logic,
we're also renaming 'bmdevs_of_probe' to the more device specific
'ab8500_bm_of_probe'.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/power/ab8500_bmdata.c   |   6 +--
 drivers/power/ab8500_btemp.c    |  60 ++++++++++-----------
 drivers/power/ab8500_charger.c  |  24 ++++-----
 drivers/power/ab8500_fg.c       |  96 +++++++++++++++++-----------------
 drivers/power/abx500_chargalg.c | 112 ++++++++++++++++++++--------------------
 include/linux/mfd/abx500.h      |   6 +--
 6 files changed, 152 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
index df5a590d760e..c2fb2c554019 100644
--- a/drivers/power/ab8500_bmdata.c
+++ b/drivers/power/ab8500_bmdata.c
@@ -452,9 +452,9 @@ struct abx500_bm_data ab8500_bm_data = {
 	.fg_params              = &fg,
 };
 
-int __devinit bmdevs_of_probe(struct device *dev,
-			      struct device_node *np,
-			      struct abx500_bm_data **battery)
+int __devinit ab8500_bm_of_probe(struct device *dev,
+				 struct device_node *np,
+				 struct abx500_bm_data **battery)
 {
 	struct batres_vs_temp *tmp_batres_tbl;
 	struct device_node *np_bat_supply;
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index 5d1bf0bd6fa5..33ed0fccbd0e 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -78,7 +78,7 @@ struct ab8500_btemp_ranges {
  * @parent:		Pointer to the struct ab8500
  * @gpadc:		Pointer to the struct gpadc
  * @fg:			Pointer to the struct fg
- * @bat:		Pointer to the abx500_bm platform data
+ * @bm:           	Platform specific battery management information
  * @btemp_psy:		Structure for BTEMP specific battery properties
  * @events:		Structure for information about events triggered
  * @btemp_ranges:	Battery temperature range structure
@@ -95,7 +95,7 @@ struct ab8500_btemp {
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
 	struct ab8500_fg *fg;
-	struct abx500_bm_data *bat;
+	struct abx500_bm_data *bm;
 	struct power_supply btemp_psy;
 	struct ab8500_btemp_events events;
 	struct ab8500_btemp_ranges btemp_ranges;
@@ -149,13 +149,13 @@ static int ab8500_btemp_batctrl_volt_to_res(struct ab8500_btemp *di,
 		return (450000 * (v_batctrl)) / (1800 - v_batctrl);
 	}
 
-	if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL) {
+	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL) {
 		/*
 		 * If the battery has internal NTC, we use the current
 		 * source to calculate the resistance, 7uA or 20uA
 		 */
 		rbs = (v_batctrl * 1000
-		       - di->bat->gnd_lift_resistance * inst_curr)
+		       - di->bm->gnd_lift_resistance * inst_curr)
 		      / di->curr_source;
 	} else {
 		/*
@@ -211,7 +211,7 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 		return 0;
 
 	/* Only do this for batteries with internal NTC */
-	if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) {
+	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) {
 		if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_7UA)
 			curr = BAT_CTRL_7U_ENA;
 		else
@@ -243,7 +243,7 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 				__func__);
 			goto disable_curr_source;
 		}
-	} else if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) {
+	} else if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) {
 		dev_dbg(di->dev, "Disable BATCTRL curr source\n");
 
 		/* Write 0 to the curr bits */
@@ -459,9 +459,9 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di)
 	int rbat, rntc, vntc;
 	u8 id;
 
-	id = di->bat->batt_id;
+	id = di->bm->batt_id;
 
-	if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL &&
+	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL &&
 			id != BATTERY_UNKNOWN) {
 
 		rbat = ab8500_btemp_get_batctrl_res(di);
@@ -476,8 +476,8 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di)
 		}
 
 		temp = ab8500_btemp_res_to_temp(di,
-			di->bat->bat_type[id].r_to_t_tbl,
-			di->bat->bat_type[id].n_temp_tbl_elements, rbat);
+			di->bm->bat_type[id].r_to_t_tbl,
+			di->bm->bat_type[id].n_temp_tbl_elements, rbat);
 	} else {
 		vntc = ab8500_gpadc_convert(di->gpadc, BTEMP_BALL);
 		if (vntc < 0) {
@@ -493,8 +493,8 @@ static int ab8500_btemp_measure_temp(struct ab8500_btemp *di)
 		rntc = 230000 * vntc / (VTVOUT_V - vntc);
 
 		temp = ab8500_btemp_res_to_temp(di,
-			di->bat->bat_type[id].r_to_t_tbl,
-			di->bat->bat_type[id].n_temp_tbl_elements, rntc);
+			di->bm->bat_type[id].r_to_t_tbl,
+			di->bm->bat_type[id].n_temp_tbl_elements, rntc);
 		prev = temp;
 	}
 	dev_dbg(di->dev, "Battery temperature is %d\n", temp);
@@ -515,7 +515,7 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 	u8 i;
 
 	di->curr_source = BTEMP_BATCTRL_CURR_SRC_7UA;
-	di->bat->batt_id = BATTERY_UNKNOWN;
+	di->bm->batt_id = BATTERY_UNKNOWN;
 
 	res =  ab8500_btemp_get_batctrl_res(di);
 	if (res < 0) {
@@ -524,23 +524,23 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 	}
 
 	/* BATTERY_UNKNOWN is defined on position 0, skip it! */
-	for (i = BATTERY_UNKNOWN + 1; i < di->bat->n_btypes; i++) {
-		if ((res <= di->bat->bat_type[i].resis_high) &&
-			(res >= di->bat->bat_type[i].resis_low)) {
+	for (i = BATTERY_UNKNOWN + 1; i < di->bm->n_btypes; i++) {
+		if ((res <= di->bm->bat_type[i].resis_high) &&
+			(res >= di->bm->bat_type[i].resis_low)) {
 			dev_dbg(di->dev, "Battery detected on %s"
 				" low %d < res %d < high: %d"
 				" index: %d\n",
-				di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL ?
+				di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL ?
 				"BATCTRL" : "BATTEMP",
-				di->bat->bat_type[i].resis_low, res,
-				di->bat->bat_type[i].resis_high, i);
+				di->bm->bat_type[i].resis_low, res,
+				di->bm->bat_type[i].resis_high, i);
 
-			di->bat->batt_id = i;
+			di->bm->batt_id = i;
 			break;
 		}
 	}
 
-	if (di->bat->batt_id == BATTERY_UNKNOWN) {
+	if (di->bm->batt_id == BATTERY_UNKNOWN) {
 		dev_warn(di->dev, "Battery identified as unknown"
 			", resistance %d Ohm\n", res);
 		return -ENXIO;
@@ -550,13 +550,13 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 	 * We only have to change current source if the
 	 * detected type is Type 1, else we use the 7uA source
 	 */
-	if (di->bat->adc_therm == ABx500_ADC_THERM_BATCTRL &&
-			di->bat->batt_id == 1) {
+	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL &&
+			di->bm->batt_id == 1) {
 		dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n");
 		di->curr_source = BTEMP_BATCTRL_CURR_SRC_20UA;
 	}
 
-	return di->bat->batt_id;
+	return di->bm->batt_id;
 }
 
 /**
@@ -586,9 +586,9 @@ static void ab8500_btemp_periodic_work(struct work_struct *work)
 	}
 
 	if (di->events.ac_conn || di->events.usb_conn)
-		interval = di->bat->temp_interval_chg;
+		interval = di->bm->temp_interval_chg;
 	else
-		interval = di->bat->temp_interval_nochg;
+		interval = di->bm->temp_interval_nochg;
 
 	/* Schedule a new measurement */
 	queue_delayed_work(di->btemp_wq,
@@ -815,7 +815,7 @@ static int ab8500_btemp_get_property(struct power_supply *psy,
 			val->intval = 1;
 		break;
 	case POWER_SUPPLY_PROP_TECHNOLOGY:
-		val->intval = di->bat->bat_type[di->bat->batt_id].name;
+		val->intval = di->bm->bat_type[di->bm->batt_id].name;
 		break;
 	case POWER_SUPPLY_PROP_TEMP:
 		val->intval = ab8500_btemp_get_temp(di);
@@ -985,10 +985,10 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "%s no mem for ab8500_btemp\n", __func__);
 		return -ENOMEM;
 	}
-	di->bat = pdev->mfd_cell->platform_data;
-	if (!di->bat) {
+	di->bm = pdev->mfd_cell->platform_data;
+	if (!di->bm) {
 		if (np) {
-			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index d27dd7fec163..21dc8422778d 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -189,7 +189,7 @@ struct ab8500_charger_usb_state {
  * @autopower_cfg	platform specific power config support for "pwron after pwrloss"
  * @parent:		Pointer to the struct ab8500
  * @gpadc:		Pointer to the struct gpadc
- * @bat:		Pointer to the abx500_bm platform data
+ * @bm:           	Platform specific battery management information
  * @flags:		Structure for information about events triggered
  * @usb_state:		Structure for usb stack information
  * @ac_chg:		AC charger power supply
@@ -226,7 +226,7 @@ struct ab8500_charger {
 	bool autopower_cfg;
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
-	struct abx500_bm_data *bat;
+	struct abx500_bm_data *bm;
 	struct ab8500_charger_event_flags flags;
 	struct ab8500_charger_usb_state usb_state;
 	struct ux500_charger ac_chg;
@@ -1034,7 +1034,7 @@ static int ab8500_charger_set_vbus_in_curr(struct ab8500_charger *di,
 	int min_value;
 
 	/* We should always use to lowest current limit */
-	min_value = min(di->bat->chg_params->usb_curr_max, ich_in);
+	min_value = min(di->bm->chg_params->usb_curr_max, ich_in);
 
 	switch (min_value) {
 	case 100:
@@ -1176,7 +1176,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger,
 		volt_index = ab8500_voltage_to_regval(vset);
 		curr_index = ab8500_current_to_regval(iset);
 		input_curr_index = ab8500_current_to_regval(
-			di->bat->chg_params->ac_curr_max);
+			di->bm->chg_params->ac_curr_max);
 		if (volt_index < 0 || curr_index < 0 || input_curr_index < 0) {
 			dev_err(di->dev,
 				"Charger voltage or current too high, "
@@ -1193,7 +1193,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger,
 		}
 		/* MainChInputCurr: current that can be drawn from the charger*/
 		ret = ab8500_charger_set_main_in_curr(di,
-			di->bat->chg_params->ac_curr_max);
+			di->bm->chg_params->ac_curr_max);
 		if (ret) {
 			dev_err(di->dev, "%s Failed to set MainChInputCurr\n",
 				__func__);
@@ -1209,7 +1209,7 @@ static int ab8500_charger_ac_en(struct ux500_charger *charger,
 		}
 
 		/* Check if VBAT overshoot control should be enabled */
-		if (!di->bat->enable_overshoot)
+		if (!di->bm->enable_overshoot)
 			overshoot = MAIN_CH_NO_OVERSHOOT_ENA_N;
 
 		/* Enable Main Charger */
@@ -1376,7 +1376,7 @@ static int ab8500_charger_usb_en(struct ux500_charger *charger,
 			return ret;
 		}
 		/* Check if VBAT overshoot control should be enabled */
-		if (!di->bat->enable_overshoot)
+		if (!di->bm->enable_overshoot)
 			overshoot = USB_CHG_NO_OVERSHOOT_ENA_N;
 
 		/* Enable USB Charger */
@@ -2454,8 +2454,8 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di)
 	ret = abx500_set_register_interruptible(di->dev,
 		AB8500_RTC,
 		AB8500_RTC_BACKUP_CHG_REG,
-		di->bat->bkup_bat_v |
-		di->bat->bkup_bat_i);
+		di->bm->bkup_bat_v |
+		di->bm->bkup_bat_i);
 	if (ret) {
 		dev_err(di->dev, "failed to setup backup battery charging\n");
 		goto out;
@@ -2644,10 +2644,10 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "%s no mem for ab8500_charger\n", __func__);
 		return -ENOMEM;
 	}
-	di->bat = pdev->mfd_cell->platform_data;
-	if (!di->bat) {
+	di->bm = pdev->mfd_cell->platform_data;
+	if (!di->bm) {
 		if (np) {
-			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 5a9f58d4c0fb..4cf231375de3 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -173,7 +173,7 @@ struct inst_curr_result_list {
  * @avg_cap:		Average capacity filter
  * @parent:		Pointer to the struct ab8500
  * @gpadc:		Pointer to the struct gpadc
- * @bat:		Pointer to the abx500_bm platform data
+ * @bm:           	Platform specific battery management information
  * @fg_psy:		Structure that holds the FG specific battery properties
  * @fg_wq:		Work queue for running the FG algorithm
  * @fg_periodic_work:	Work to run the FG algorithm periodically
@@ -212,7 +212,7 @@ struct ab8500_fg {
 	struct ab8500_fg_avg_cap avg_cap;
 	struct ab8500 *parent;
 	struct ab8500_gpadc *gpadc;
-	struct abx500_bm_data *bat;
+	struct abx500_bm_data *bm;
 	struct power_supply fg_psy;
 	struct workqueue_struct *fg_wq;
 	struct delayed_work fg_periodic_work;
@@ -355,7 +355,7 @@ static int ab8500_fg_is_low_curr(struct ab8500_fg *di, int curr)
 	/*
 	 * We want to know if we're in low current mode
 	 */
-	if (curr > -di->bat->fg_params->high_curr_threshold)
+	if (curr > -di->bm->fg_params->high_curr_threshold)
 		return true;
 	else
 		return false;
@@ -648,7 +648,7 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res)
 	 * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm
 	 */
 	val = (val * QLSB_NANO_AMP_HOURS_X10 * 36 * 4) /
-		(1000 * di->bat->fg_res);
+		(1000 * di->bm->fg_res);
 
 	if (di->turn_off_fg) {
 		dev_dbg(di->dev, "%s Disable FG\n", __func__);
@@ -751,7 +751,7 @@ static void ab8500_fg_acc_cur_work(struct work_struct *work)
 	 * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm
 	 */
 	di->accu_charge = (val * QLSB_NANO_AMP_HOURS_X10) /
-		(100 * di->bat->fg_res);
+		(100 * di->bm->fg_res);
 
 	/*
 	 * Convert to unit value in mA
@@ -763,7 +763,7 @@ static void ab8500_fg_acc_cur_work(struct work_struct *work)
 	 * 112.9nAh assumes 10mOhm, but fg_res is in 0.1mOhm
 	 */
 	di->avg_curr = (val * QLSB_NANO_AMP_HOURS_X10 * 36) /
-		(1000 * di->bat->fg_res * (di->fg_samples / 4));
+		(1000 * di->bm->fg_res * (di->fg_samples / 4));
 
 	di->flags.conv_done = true;
 
@@ -815,8 +815,8 @@ static int ab8500_fg_volt_to_capacity(struct ab8500_fg *di, int voltage)
 	struct abx500_v_to_cap *tbl;
 	int cap = 0;
 
-	tbl = di->bat->bat_type[di->bat->batt_id].v_to_cap_tbl,
-	tbl_size = di->bat->bat_type[di->bat->batt_id].n_v_cap_tbl_elements;
+	tbl = di->bm->bat_type[di->bm->batt_id].v_to_cap_tbl,
+	tbl_size = di->bm->bat_type[di->bm->batt_id].n_v_cap_tbl_elements;
 
 	for (i = 0; i < tbl_size; ++i) {
 		if (voltage > tbl[i].voltage)
@@ -867,8 +867,8 @@ static int ab8500_fg_battery_resistance(struct ab8500_fg *di)
 	struct batres_vs_temp *tbl;
 	int resist = 0;
 
-	tbl = di->bat->bat_type[di->bat->batt_id].batres_tbl;
-	tbl_size = di->bat->bat_type[di->bat->batt_id].n_batres_tbl_elements;
+	tbl = di->bm->bat_type[di->bm->batt_id].batres_tbl;
+	tbl_size = di->bm->bat_type[di->bm->batt_id].n_batres_tbl_elements;
 
 	for (i = 0; i < tbl_size; ++i) {
 		if (di->bat_temp / 10 > tbl[i].temp)
@@ -889,11 +889,11 @@ static int ab8500_fg_battery_resistance(struct ab8500_fg *di)
 
 	dev_dbg(di->dev, "%s Temp: %d battery internal resistance: %d"
 	    " fg resistance %d, total: %d (mOhm)\n",
-		__func__, di->bat_temp, resist, di->bat->fg_res / 10,
-		(di->bat->fg_res / 10) + resist);
+		__func__, di->bat_temp, resist, di->bm->fg_res / 10,
+		(di->bm->fg_res / 10) + resist);
 
 	/* fg_res variable is in 0.1mOhm */
-	resist += di->bat->fg_res / 10;
+	resist += di->bm->fg_res / 10;
 
 	return resist;
 }
@@ -1111,14 +1111,14 @@ static int ab8500_fg_capacity_level(struct ab8500_fg *di)
 
 	percent = di->bat_cap.permille / 10;
 
-	if (percent <= di->bat->cap_levels->critical ||
+	if (percent <= di->bm->cap_levels->critical ||
 		di->flags.low_bat)
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
-	else if (percent <= di->bat->cap_levels->low)
+	else if (percent <= di->bm->cap_levels->low)
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
-	else if (percent <= di->bat->cap_levels->normal)
+	else if (percent <= di->bm->cap_levels->normal)
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
-	else if (percent <= di->bat->cap_levels->high)
+	else if (percent <= di->bm->cap_levels->high)
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_HIGH;
 	else
 		ret = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
@@ -1183,7 +1183,7 @@ static void ab8500_fg_check_capacity_limits(struct ab8500_fg *di, bool init)
 			di->bat_cap.prev_percent !=
 			(di->bat_cap.permille) / 10 &&
 			(di->bat_cap.permille / 10) <
-			di->bat->fg_params->maint_thres) {
+			di->bm->fg_params->maint_thres) {
 			dev_dbg(di->dev,
 				"battery reported full "
 				"but capacity dropping: %d\n",
@@ -1285,7 +1285,7 @@ static void ab8500_fg_algorithm_charging(struct ab8500_fg *di)
 	switch (di->charge_state) {
 	case AB8500_FG_CHARGE_INIT:
 		di->fg_samples = SEC_TO_SAMPLE(
-			di->bat->fg_params->accu_charging);
+			di->bm->fg_params->accu_charging);
 
 		ab8500_fg_coulomb_counter(di, true);
 		ab8500_fg_charge_state_to(di, AB8500_FG_CHARGE_READOUT);
@@ -1347,8 +1347,8 @@ static bool check_sysfs_capacity(struct ab8500_fg *di)
 	cap_permille = ab8500_fg_convert_mah_to_permille(di,
 		di->bat_cap.user_mah);
 
-	lower = di->bat_cap.permille - di->bat->fg_params->user_cap_limit * 10;
-	upper = di->bat_cap.permille + di->bat->fg_params->user_cap_limit * 10;
+	lower = di->bat_cap.permille - di->bm->fg_params->user_cap_limit * 10;
+	upper = di->bat_cap.permille + di->bm->fg_params->user_cap_limit * 10;
 
 	if (lower < 0)
 		lower = 0;
@@ -1388,7 +1388,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 	case AB8500_FG_DISCHARGE_INIT:
 		/* We use the FG IRQ to work on */
 		di->init_cnt = 0;
-		di->fg_samples = SEC_TO_SAMPLE(di->bat->fg_params->init_timer);
+		di->fg_samples = SEC_TO_SAMPLE(di->bm->fg_params->init_timer);
 		ab8500_fg_coulomb_counter(di, true);
 		ab8500_fg_discharge_state_to(di,
 			AB8500_FG_DISCHARGE_INITMEASURING);
@@ -1401,17 +1401,17 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 		 * samples to get an initial capacity.
 		 * Then go to READOUT
 		 */
-		sleep_time = di->bat->fg_params->init_timer;
+		sleep_time = di->bm->fg_params->init_timer;
 
 		/* Discard the first [x] seconds */
-		if (di->init_cnt > di->bat->fg_params->init_discard_time) {
+		if (di->init_cnt > di->bm->fg_params->init_discard_time) {
 			ab8500_fg_calc_cap_discharge_voltage(di, true);
 
 			ab8500_fg_check_capacity_limits(di, true);
 		}
 
 		di->init_cnt += sleep_time;
-		if (di->init_cnt > di->bat->fg_params->init_total_time)
+		if (di->init_cnt > di->bm->fg_params->init_total_time)
 			ab8500_fg_discharge_state_to(di,
 				AB8500_FG_DISCHARGE_READOUT_INIT);
 
@@ -1426,7 +1426,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 		/* Intentional fallthrough */
 
 	case AB8500_FG_DISCHARGE_RECOVERY:
-		sleep_time = di->bat->fg_params->recovery_sleep_timer;
+		sleep_time = di->bm->fg_params->recovery_sleep_timer;
 
 		/*
 		 * We should check the power consumption
@@ -1438,9 +1438,9 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 
 		if (ab8500_fg_is_low_curr(di, di->inst_curr)) {
 			if (di->recovery_cnt >
-				di->bat->fg_params->recovery_total_time) {
+				di->bm->fg_params->recovery_total_time) {
 				di->fg_samples = SEC_TO_SAMPLE(
-					di->bat->fg_params->accu_high_curr);
+					di->bm->fg_params->accu_high_curr);
 				ab8500_fg_coulomb_counter(di, true);
 				ab8500_fg_discharge_state_to(di,
 					AB8500_FG_DISCHARGE_READOUT);
@@ -1453,7 +1453,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 			di->recovery_cnt += sleep_time;
 		} else {
 			di->fg_samples = SEC_TO_SAMPLE(
-				di->bat->fg_params->accu_high_curr);
+				di->bm->fg_params->accu_high_curr);
 			ab8500_fg_coulomb_counter(di, true);
 			ab8500_fg_discharge_state_to(di,
 				AB8500_FG_DISCHARGE_READOUT);
@@ -1462,7 +1462,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 
 	case AB8500_FG_DISCHARGE_READOUT_INIT:
 		di->fg_samples = SEC_TO_SAMPLE(
-			di->bat->fg_params->accu_high_curr);
+			di->bm->fg_params->accu_high_curr);
 		ab8500_fg_coulomb_counter(di, true);
 		ab8500_fg_discharge_state_to(di,
 				AB8500_FG_DISCHARGE_READOUT);
@@ -1509,9 +1509,9 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 			}
 
 			di->high_curr_cnt +=
-				di->bat->fg_params->accu_high_curr;
+				di->bm->fg_params->accu_high_curr;
 			if (di->high_curr_cnt >
-				di->bat->fg_params->high_curr_time)
+				di->bm->fg_params->high_curr_time)
 				di->recovery_needed = true;
 
 			ab8500_fg_calc_cap_discharge_fg(di);
@@ -1528,7 +1528,7 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 		ab8500_fg_calc_cap_discharge_voltage(di, true);
 
 		di->fg_samples = SEC_TO_SAMPLE(
-			di->bat->fg_params->accu_high_curr);
+			di->bm->fg_params->accu_high_curr);
 		ab8500_fg_coulomb_counter(di, true);
 		ab8500_fg_discharge_state_to(di,
 				AB8500_FG_DISCHARGE_READOUT);
@@ -1721,7 +1721,7 @@ static void ab8500_fg_low_bat_work(struct work_struct *work)
 	vbat = ab8500_fg_bat_voltage(di);
 
 	/* Check if LOW_BAT still fulfilled */
-	if (vbat < di->bat->fg_params->lowbat_threshold) {
+	if (vbat < di->bm->fg_params->lowbat_threshold) {
 		di->flags.low_bat = true;
 		dev_warn(di->dev, "Battery voltage still LOW\n");
 
@@ -1779,8 +1779,8 @@ static int ab8500_fg_battok_init_hw_register(struct ab8500_fg *di)
 	int ret;
 	int new_val;
 
-	sel0 = di->bat->fg_params->battok_falling_th_sel0;
-	sel1 = di->bat->fg_params->battok_raising_th_sel1;
+	sel0 = di->bm->fg_params->battok_falling_th_sel0;
+	sel1 = di->bm->fg_params->battok_raising_th_sel1;
 
 	cbp_sel0 = ab8500_fg_battok_calc(di, sel0);
 	cbp_sel1 = ab8500_fg_battok_calc(di, sel1);
@@ -1963,7 +1963,7 @@ static int ab8500_fg_get_property(struct power_supply *psy,
 				di->bat_cap.max_mah);
 		break;
 	case POWER_SUPPLY_PROP_ENERGY_NOW:
-		if (di->flags.batt_unknown && !di->bat->chg_unknown_bat &&
+		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = ab8500_fg_convert_mah_to_uwh(di,
 					di->bat_cap.max_mah);
@@ -1978,21 +1978,21 @@ static int ab8500_fg_get_property(struct power_supply *psy,
 		val->intval = di->bat_cap.max_mah;
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_NOW:
-		if (di->flags.batt_unknown && !di->bat->chg_unknown_bat &&
+		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = di->bat_cap.max_mah;
 		else
 			val->intval = di->bat_cap.prev_mah;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (di->flags.batt_unknown && !di->bat->chg_unknown_bat &&
+		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = 100;
 		else
 			val->intval = di->bat_cap.prev_percent;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
-		if (di->flags.batt_unknown && !di->bat->chg_unknown_bat &&
+		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
 		else
@@ -2078,7 +2078,7 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
 				if (!di->flags.batt_id_received) {
 					const struct abx500_battery_type *b;
 
-					b = &(di->bat->bat_type[di->bat->batt_id]);
+					b = &(di->bm->bat_type[di->bm->batt_id]);
 
 					di->flags.batt_id_received = true;
 
@@ -2155,7 +2155,7 @@ static int ab8500_fg_init_hw_registers(struct ab8500_fg *di)
 		AB8500_SYS_CTRL2_BLOCK,
 		AB8500_LOW_BAT_REG,
 		ab8500_volt_to_regval(
-			di->bat->fg_params->lowbat_threshold) << 1 |
+			di->bm->fg_params->lowbat_threshold) << 1 |
 		LOW_BAT_ENABLE);
 	if (ret) {
 		dev_err(di->dev, "%s write failed\n", __func__);
@@ -2457,10 +2457,10 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "%s no mem for ab8500_fg\n", __func__);
 		return -ENOMEM;
 	}
-	di->bat = pdev->mfd_cell->platform_data;
-	if (!di->bat) {
+	di->bm = pdev->mfd_cell->platform_data;
+	if (!di->bm) {
 		if (np) {
-			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
@@ -2491,11 +2491,11 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	di->fg_psy.external_power_changed = ab8500_fg_external_power_changed;
 
 	di->bat_cap.max_mah_design = MILLI_TO_MICRO *
-		di->bat->bat_type[di->bat->batt_id].charge_full_design;
+		di->bm->bat_type[di->bm->batt_id].charge_full_design;
 
 	di->bat_cap.max_mah = di->bat_cap.max_mah_design;
 
-	di->vbat_nom = di->bat->bat_type[di->bat->batt_id].nominal_voltage;
+	di->vbat_nom = di->bm->bat_type[di->bm->batt_id].nominal_voltage;
 
 	di->init_capacity = true;
 
@@ -2549,7 +2549,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 		goto free_inst_curr_wq;
 	}
 
-	di->fg_samples = SEC_TO_SAMPLE(di->bat->fg_params->init_timer);
+	di->fg_samples = SEC_TO_SAMPLE(di->bm->fg_params->init_timer);
 	ab8500_fg_coulomb_counter(di, true);
 
 	/* Initialize completion used to notify completion of inst current */
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index dcdc4393b9e7..ea2e2eb652ef 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -207,7 +207,7 @@ enum maxim_ret {
  * @chg_info:		information about connected charger types
  * @batt_data:		data of the battery
  * @susp_status:	current charger suspension status
- * @bat:		pointer to the abx500_bm platform data
+ * @bm:           	Platform specific battery management information
  * @chargalg_psy:	structure that holds the battery properties exposed by
  *			the charging algorithm
  * @events:		structure for information about events triggered
@@ -232,7 +232,7 @@ struct abx500_chargalg {
 	struct abx500_chargalg_charger_info chg_info;
 	struct abx500_chargalg_battery_data batt_data;
 	struct abx500_chargalg_suspension_status susp_status;
-	struct abx500_bm_data *bat;
+	struct abx500_bm_data *bm;
 	struct power_supply chargalg_psy;
 	struct ux500_charger *ac_chg;
 	struct ux500_charger *usb_chg;
@@ -367,13 +367,13 @@ static void abx500_chargalg_start_safety_timer(struct abx500_chargalg *di)
 	case AC_CHG:
 		timer_expiration =
 		round_jiffies(jiffies +
-			(di->bat->main_safety_tmr_h * 3600 * HZ));
+			(di->bm->main_safety_tmr_h * 3600 * HZ));
 		break;
 
 	case USB_CHG:
 		timer_expiration =
 		round_jiffies(jiffies +
-			(di->bat->usb_safety_tmr_h * 3600 * HZ));
+			(di->bm->usb_safety_tmr_h * 3600 * HZ));
 		break;
 
 	default:
@@ -638,32 +638,32 @@ static void abx500_chargalg_start_charging(struct abx500_chargalg *di,
  */
 static void abx500_chargalg_check_temp(struct abx500_chargalg *di)
 {
-	if (di->batt_data.temp > (di->bat->temp_low + di->t_hyst_norm) &&
-		di->batt_data.temp < (di->bat->temp_high - di->t_hyst_norm)) {
+	if (di->batt_data.temp > (di->bm->temp_low + di->t_hyst_norm) &&
+		di->batt_data.temp < (di->bm->temp_high - di->t_hyst_norm)) {
 		/* Temp OK! */
 		di->events.btemp_underover = false;
 		di->events.btemp_lowhigh = false;
 		di->t_hyst_norm = 0;
 		di->t_hyst_lowhigh = 0;
 	} else {
-		if (((di->batt_data.temp >= di->bat->temp_high) &&
+		if (((di->batt_data.temp >= di->bm->temp_high) &&
 			(di->batt_data.temp <
-				(di->bat->temp_over - di->t_hyst_lowhigh))) ||
+				(di->bm->temp_over - di->t_hyst_lowhigh))) ||
 			((di->batt_data.temp >
-				(di->bat->temp_under + di->t_hyst_lowhigh)) &&
-			(di->batt_data.temp <= di->bat->temp_low))) {
+				(di->bm->temp_under + di->t_hyst_lowhigh)) &&
+			(di->batt_data.temp <= di->bm->temp_low))) {
 			/* TEMP minor!!!!! */
 			di->events.btemp_underover = false;
 			di->events.btemp_lowhigh = true;
-			di->t_hyst_norm = di->bat->temp_hysteresis;
+			di->t_hyst_norm = di->bm->temp_hysteresis;
 			di->t_hyst_lowhigh = 0;
-		} else if (di->batt_data.temp <= di->bat->temp_under ||
-			di->batt_data.temp >= di->bat->temp_over) {
+		} else if (di->batt_data.temp <= di->bm->temp_under ||
+			di->batt_data.temp >= di->bm->temp_over) {
 			/* TEMP major!!!!! */
 			di->events.btemp_underover = true;
 			di->events.btemp_lowhigh = false;
 			di->t_hyst_norm = 0;
-			di->t_hyst_lowhigh = di->bat->temp_hysteresis;
+			di->t_hyst_lowhigh = di->bm->temp_hysteresis;
 		} else {
 		/* Within hysteresis */
 		dev_dbg(di->dev, "Within hysteresis limit temp: %d "
@@ -682,12 +682,12 @@ static void abx500_chargalg_check_temp(struct abx500_chargalg *di)
  */
 static void abx500_chargalg_check_charger_voltage(struct abx500_chargalg *di)
 {
-	if (di->chg_info.usb_volt > di->bat->chg_params->usb_volt_max)
+	if (di->chg_info.usb_volt > di->bm->chg_params->usb_volt_max)
 		di->chg_info.usb_chg_ok = false;
 	else
 		di->chg_info.usb_chg_ok = true;
 
-	if (di->chg_info.ac_volt > di->bat->chg_params->ac_volt_max)
+	if (di->chg_info.ac_volt > di->bm->chg_params->ac_volt_max)
 		di->chg_info.ac_chg_ok = false;
 	else
 		di->chg_info.ac_chg_ok = true;
@@ -707,10 +707,10 @@ static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di)
 	if (di->charge_status == POWER_SUPPLY_STATUS_CHARGING &&
 		di->charge_state == STATE_NORMAL &&
 		!di->maintenance_chg && (di->batt_data.volt >=
-		di->bat->bat_type[di->bat->batt_id].termination_vol ||
+		di->bm->bat_type[di->bm->batt_id].termination_vol ||
 		di->events.usb_cv_active || di->events.ac_cv_active) &&
 		di->batt_data.avg_curr <
-		di->bat->bat_type[di->bat->batt_id].termination_curr &&
+		di->bm->bat_type[di->bm->batt_id].termination_curr &&
 		di->batt_data.avg_curr > 0) {
 		if (++di->eoc_cnt >= EOC_COND_CNT) {
 			di->eoc_cnt = 0;
@@ -733,12 +733,12 @@ static void abx500_chargalg_end_of_charge(struct abx500_chargalg *di)
 static void init_maxim_chg_curr(struct abx500_chargalg *di)
 {
 	di->ccm.original_iset =
-		di->bat->bat_type[di->bat->batt_id].normal_cur_lvl;
+		di->bm->bat_type[di->bm->batt_id].normal_cur_lvl;
 	di->ccm.current_iset =
-		di->bat->bat_type[di->bat->batt_id].normal_cur_lvl;
-	di->ccm.test_delta_i = di->bat->maxi->charger_curr_step;
-	di->ccm.max_current = di->bat->maxi->chg_curr;
-	di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+		di->bm->bat_type[di->bm->batt_id].normal_cur_lvl;
+	di->ccm.test_delta_i = di->bm->maxi->charger_curr_step;
+	di->ccm.max_current = di->bm->maxi->chg_curr;
+	di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 	di->ccm.level = 0;
 }
 
@@ -755,7 +755,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 {
 	int delta_i;
 
-	if (!di->bat->maxi->ena_maxi)
+	if (!di->bm->maxi->ena_maxi)
 		return MAXIM_RET_NOACTION;
 
 	delta_i = di->ccm.original_iset - di->batt_data.inst_curr;
@@ -766,7 +766,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 		if (di->ccm.wait_cnt == 0) {
 			dev_dbg(di->dev, "lowering current\n");
 			di->ccm.wait_cnt++;
-			di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+			di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 			di->ccm.max_current =
 				di->ccm.current_iset - di->ccm.test_delta_i;
 			di->ccm.current_iset = di->ccm.max_current;
@@ -791,7 +791,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 		if (di->ccm.current_iset == di->ccm.original_iset)
 			return MAXIM_RET_NOACTION;
 
-		di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+		di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 		di->ccm.current_iset = di->ccm.original_iset;
 		di->ccm.level = 0;
 
@@ -803,7 +803,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 		di->ccm.max_current) {
 		if (di->ccm.condition_cnt-- == 0) {
 			/* Increse the iset with cco.test_delta_i */
-			di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+			di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 			di->ccm.current_iset += di->ccm.test_delta_i;
 			di->ccm.level++;
 			dev_dbg(di->dev, " Maximization needed, increase"
@@ -818,7 +818,7 @@ static enum maxim_ret abx500_chargalg_chg_curr_maxim(struct abx500_chargalg *di)
 			return MAXIM_RET_NOACTION;
 		}
 	}  else {
-		di->ccm.condition_cnt = di->bat->maxi->wait_cycles;
+		di->ccm.condition_cnt = di->bm->maxi->wait_cycles;
 		return MAXIM_RET_NOACTION;
 	}
 }
@@ -838,7 +838,7 @@ static void handle_maxim_chg_curr(struct abx500_chargalg *di)
 		break;
 	case MAXIM_RET_IBAT_TOO_HIGH:
 		result = abx500_chargalg_update_chg_curr(di,
-			di->bat->bat_type[di->bat->batt_id].normal_cur_lvl);
+			di->bm->bat_type[di->bm->batt_id].normal_cur_lvl);
 		if (result)
 			dev_err(di->dev, "failed to set chg curr\n");
 		break;
@@ -1210,7 +1210,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 	 * this way
 	 */
 	if (!charger_status ||
-		(di->events.batt_unknown && !di->bat->chg_unknown_bat)) {
+		(di->events.batt_unknown && !di->bm->chg_unknown_bat)) {
 		if (di->charge_state != STATE_HANDHELD) {
 			di->events.safety_timer_expired = false;
 			abx500_chargalg_state_to(di, STATE_HANDHELD_INIT);
@@ -1394,8 +1394,8 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 
 	case STATE_NORMAL_INIT:
 		abx500_chargalg_start_charging(di,
-			di->bat->bat_type[di->bat->batt_id].normal_vol_lvl,
-			di->bat->bat_type[di->bat->batt_id].normal_cur_lvl);
+			di->bm->bat_type[di->bm->batt_id].normal_vol_lvl,
+			di->bm->bat_type[di->bm->batt_id].normal_cur_lvl);
 		abx500_chargalg_state_to(di, STATE_NORMAL);
 		abx500_chargalg_start_safety_timer(di);
 		abx500_chargalg_stop_maintenance_timer(di);
@@ -1411,7 +1411,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 		handle_maxim_chg_curr(di);
 		if (di->charge_status == POWER_SUPPLY_STATUS_FULL &&
 			di->maintenance_chg) {
-			if (di->bat->no_maintenance)
+			if (di->bm->no_maintenance)
 				abx500_chargalg_state_to(di,
 					STATE_WAIT_FOR_RECHARGE_INIT);
 			else
@@ -1429,7 +1429,7 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 
 	case STATE_WAIT_FOR_RECHARGE:
 		if (di->batt_data.volt <=
-			di->bat->bat_type[di->bat->batt_id].recharge_vol) {
+			di->bm->bat_type[di->bm->batt_id].recharge_vol) {
 			if (di->rch_cnt-- == 0)
 				abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
 		} else
@@ -1439,13 +1439,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 	case STATE_MAINTENANCE_A_INIT:
 		abx500_chargalg_stop_safety_timer(di);
 		abx500_chargalg_start_maintenance_timer(di,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_a_chg_timer_h);
+			di->bm->bat_type[
+				di->bm->batt_id].maint_a_chg_timer_h);
 		abx500_chargalg_start_charging(di,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_a_vol_lvl,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_a_cur_lvl);
+			di->bm->bat_type[
+				di->bm->batt_id].maint_a_vol_lvl,
+			di->bm->bat_type[
+				di->bm->batt_id].maint_a_cur_lvl);
 		abx500_chargalg_state_to(di, STATE_MAINTENANCE_A);
 		power_supply_changed(&di->chargalg_psy);
 		/* Intentional fallthrough*/
@@ -1459,13 +1459,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 
 	case STATE_MAINTENANCE_B_INIT:
 		abx500_chargalg_start_maintenance_timer(di,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_b_chg_timer_h);
+			di->bm->bat_type[
+				di->bm->batt_id].maint_b_chg_timer_h);
 		abx500_chargalg_start_charging(di,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_b_vol_lvl,
-			di->bat->bat_type[
-				di->bat->batt_id].maint_b_cur_lvl);
+			di->bm->bat_type[
+				di->bm->batt_id].maint_b_vol_lvl,
+			di->bm->bat_type[
+				di->bm->batt_id].maint_b_cur_lvl);
 		abx500_chargalg_state_to(di, STATE_MAINTENANCE_B);
 		power_supply_changed(&di->chargalg_psy);
 		/* Intentional fallthrough*/
@@ -1479,10 +1479,10 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 
 	case STATE_TEMP_LOWHIGH_INIT:
 		abx500_chargalg_start_charging(di,
-			di->bat->bat_type[
-				di->bat->batt_id].low_high_vol_lvl,
-			di->bat->bat_type[
-				di->bat->batt_id].low_high_cur_lvl);
+			di->bm->bat_type[
+				di->bm->batt_id].low_high_vol_lvl,
+			di->bm->bat_type[
+				di->bm->batt_id].low_high_cur_lvl);
 		abx500_chargalg_stop_maintenance_timer(di);
 		di->charge_status = POWER_SUPPLY_STATUS_CHARGING;
 		abx500_chargalg_state_to(di, STATE_TEMP_LOWHIGH);
@@ -1543,11 +1543,11 @@ static void abx500_chargalg_periodic_work(struct work_struct *work)
 	if (di->chg_info.conn_chg)
 		queue_delayed_work(di->chargalg_wq,
 			&di->chargalg_periodic_work,
-			di->bat->interval_charging * HZ);
+			di->bm->interval_charging * HZ);
 	else
 		queue_delayed_work(di->chargalg_wq,
 			&di->chargalg_periodic_work,
-			di->bat->interval_not_charging * HZ);
+			di->bm->interval_not_charging * HZ);
 }
 
 /**
@@ -1614,7 +1614,7 @@ static int abx500_chargalg_get_property(struct power_supply *psy,
 		if (di->events.batt_ovv) {
 			val->intval = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
 		} else if (di->events.btemp_underover) {
-			if (di->batt_data.temp <= di->bat->temp_under)
+			if (di->batt_data.temp <= di->bm->temp_under)
 				val->intval = POWER_SUPPLY_HEALTH_COLD;
 			else
 				val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
@@ -1814,10 +1814,10 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "%s no mem for ab8500_chargalg\n", __func__);
 		return -ENOMEM;
 	}
-	di->bat = pdev->mfd_cell->platform_data;
-	if (!di->bat) {
+	di->bm = pdev->mfd_cell->platform_data;
+	if (!di->bm) {
 		if (np) {
-			ret = bmdevs_of_probe(&pdev->dev, np, &di->bat);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 2138bd33021a..6ce749a0e9d4 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -279,9 +279,9 @@ enum {
 	NTC_INTERNAL,
 };
 
-int bmdevs_of_probe(struct device *dev,
-		struct device_node *np,
-		struct abx500_bm_data **battery);
+int ab8500_bm_of_probe(struct device *dev,
+		       struct device_node *np,
+		       struct abx500_bm_data **battery);
 
 int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg,
 	u8 value);
-- 
cgit v1.2.3


From 23a04f9f40f2b32ee593b768483105b1c776814d Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 29 Nov 2012 15:08:41 +0000
Subject: ab8500_bm: Always send platform specific battery information via
 pdata

Currently the AB8500 battery management subsystem receives platform
specific information via two different means depending on how the
platform is booted. If DT is not enabled, a reference to a *_bm_data
data structure containing each platform specific attribute is passed
though platform_data. However, if DT is enabled, then platform_data
is empty and the reference is gained though a DT specific probe
function. There are two issues here 1) the same reference is
being collected each time and 2) the DT way doesn't allow any
provisions to select different platform specific attributes, which
kind of defeats the object.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ab8500-core.c       |  8 --------
 drivers/power/ab8500_bmdata.c   | 33 ++++++++++++++-------------------
 drivers/power/ab8500_btemp.c    |  2 +-
 drivers/power/ab8500_charger.c  |  2 +-
 drivers/power/ab8500_fg.c       |  2 +-
 drivers/power/abx500_chargalg.c |  2 +-
 include/linux/mfd/abx500.h      |  2 +-
 7 files changed, 19 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 5ec70f26b9d5..bbd49d796902 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1044,40 +1044,32 @@ static struct mfd_cell __devinitdata ab8500_bm_devs[] = {
 		.of_compatible = "stericsson,ab8500-charger",
 		.num_resources = ARRAY_SIZE(ab8500_charger_resources),
 		.resources = ab8500_charger_resources,
-#ifndef CONFIG_OF
 		.platform_data = &ab8500_bm_data,
 		.pdata_size = sizeof(ab8500_bm_data),
-#endif
 	},
 	{
 		.name = "ab8500-btemp",
 		.of_compatible = "stericsson,ab8500-btemp",
 		.num_resources = ARRAY_SIZE(ab8500_btemp_resources),
 		.resources = ab8500_btemp_resources,
-#ifndef CONFIG_OF
 		.platform_data = &ab8500_bm_data,
 		.pdata_size = sizeof(ab8500_bm_data),
-#endif
 	},
 	{
 		.name = "ab8500-fg",
 		.of_compatible = "stericsson,ab8500-fg",
 		.num_resources = ARRAY_SIZE(ab8500_fg_resources),
 		.resources = ab8500_fg_resources,
-#ifndef CONFIG_OF
 		.platform_data = &ab8500_bm_data,
 		.pdata_size = sizeof(ab8500_bm_data),
-#endif
 	},
 	{
 		.name = "ab8500-chargalg",
 		.of_compatible = "stericsson,ab8500-chargalg",
 		.num_resources = ARRAY_SIZE(ab8500_chargalg_resources),
 		.resources = ab8500_chargalg_resources,
-#ifndef CONFIG_OF
 		.platform_data = &ab8500_bm_data,
 		.pdata_size = sizeof(ab8500_bm_data),
-#endif
 	},
 };
 
diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
index c2fb2c554019..6b772e5f515f 100644
--- a/drivers/power/ab8500_bmdata.c
+++ b/drivers/power/ab8500_bmdata.c
@@ -454,16 +454,13 @@ struct abx500_bm_data ab8500_bm_data = {
 
 int __devinit ab8500_bm_of_probe(struct device *dev,
 				 struct device_node *np,
-				 struct abx500_bm_data **battery)
+				 struct abx500_bm_data *bm)
 {
 	struct batres_vs_temp *tmp_batres_tbl;
 	struct device_node *np_bat_supply;
-	struct abx500_bm_data *bat;
 	const char *btech;
 	int i;
 
-	*battery = &ab8500_bm_data;
-
 	/* get phandle to 'battery-info' node */
 	np_bat_supply = of_parse_phandle(np, "battery", 0);
 	if (!np_bat_supply) {
@@ -477,16 +474,14 @@ int __devinit ab8500_bm_of_probe(struct device *dev,
 		return -EINVAL;
 	}
 
-	bat = *battery;
-
 	if (strncmp(btech, "LION", 4) == 0) {
-		bat->no_maintenance  = true;
-		bat->chg_unknown_bat = true;
-		bat->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600;
-		bat->bat_type[BATTERY_UNKNOWN].termination_vol    = 4150;
-		bat->bat_type[BATTERY_UNKNOWN].recharge_vol	  = 4130;
-		bat->bat_type[BATTERY_UNKNOWN].normal_cur_lvl	  = 520;
-		bat->bat_type[BATTERY_UNKNOWN].normal_vol_lvl	  = 4200;
+		bm->no_maintenance  = true;
+		bm->chg_unknown_bat = true;
+		bm->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600;
+		bm->bat_type[BATTERY_UNKNOWN].termination_vol    = 4150;
+		bm->bat_type[BATTERY_UNKNOWN].recharge_vol       = 4130;
+		bm->bat_type[BATTERY_UNKNOWN].normal_cur_lvl     = 520;
+		bm->bat_type[BATTERY_UNKNOWN].normal_vol_lvl     = 4200;
 	}
 
 	if (of_property_read_bool(np_bat_supply, "thermistor-on-batctrl")) {
@@ -495,15 +490,15 @@ int __devinit ab8500_bm_of_probe(struct device *dev,
 		else
 			tmp_batres_tbl = temp_to_batres_tbl_thermistor;
 	} else {
-		bat->n_btypes   = 4;
-		bat->bat_type   = bat_type_ext_thermistor;
-		bat->adc_therm  = ABx500_ADC_THERM_BATTEMP;
-		tmp_batres_tbl  = temp_to_batres_tbl_ext_thermistor;
+		bm->n_btypes   = 4;
+		bm->bat_type   = bat_type_ext_thermistor;
+		bm->adc_therm  = ABx500_ADC_THERM_BATTEMP;
+		tmp_batres_tbl = temp_to_batres_tbl_ext_thermistor;
 	}
 
 	/* select the battery resolution table */
-	for (i = 0; i < bat->n_btypes; ++i)
-		bat->bat_type[i]->batres_tbl = tmp_batres_tbl;
+	for (i = 0; i < bm->n_btypes; ++i)
+		bm->bat_type[i].batres_tbl = tmp_batres_tbl;
 
 	of_node_put(np_bat_supply);
 
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index 33ed0fccbd0e..c0f7dcceae45 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -988,7 +988,7 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 	di->bm = pdev->mfd_cell->platform_data;
 	if (!di->bm) {
 		if (np) {
-			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 21dc8422778d..c1077df26783 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2647,7 +2647,7 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 	di->bm = pdev->mfd_cell->platform_data;
 	if (!di->bm) {
 		if (np) {
-			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 4cf231375de3..8bb9df92627d 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -2460,7 +2460,7 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	di->bm = pdev->mfd_cell->platform_data;
 	if (!di->bm) {
 		if (np) {
-			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index ea2e2eb652ef..22a511c2a719 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -1817,7 +1817,7 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 	di->bm = pdev->mfd_cell->platform_data;
 	if (!di->bm) {
 		if (np) {
-			ret = ab8500_bm_of_probe(&pdev->dev, np, &di->bm);
+			ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
 			if (ret) {
 				dev_err(&pdev->dev,
 					"failed to get battery information\n");
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 6ce749a0e9d4..4906b1842d2f 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -281,7 +281,7 @@ enum {
 
 int ab8500_bm_of_probe(struct device *dev,
 		       struct device_node *np,
-		       struct abx500_bm_data **battery);
+		       struct abx500_bm_data *bm);
 
 int abx500_set_register_interruptible(struct device *dev, u8 bank, u8 reg,
 	u8 value);
-- 
cgit v1.2.3


From 3c58346525d82625e68e24f071804c2dc057b6f4 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Wed, 28 Nov 2012 16:23:01 +0000
Subject: slab: Simplify bootstrap

The nodelists field in kmem_cache is pointing to the first unused
object in the array field when bootstrap is complete.

A problem with the current approach is that the statically sized
kmem_cache structure use on boot can only contain NR_CPUS entries.
If the number of nodes plus the number of cpus is greater then we
would overwrite memory following the kmem_cache_boot definition.

Increase the size of the array field to ensure that also the node
pointers fit into the array field.

Once we do that we no longer need the kmem_cache_nodelists
array and we can then also use that structure elsewhere.

Acked-by: Glauber Costa <glommer@parallels.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab_def.h |  6 +++++-
 mm/slab.c                | 21 +++++++++++++--------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index cc290f0bdb34..45c0356fdc8c 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -89,9 +89,13 @@ struct kmem_cache {
 	 * (see kmem_cache_init())
 	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
 	 * is statically defined, so we reserve the max number of cpus.
+	 *
+	 * We also need to guarantee that the list is able to accomodate a
+	 * pointer for each node since "nodelists" uses the remainder of
+	 * available pointers.
 	 */
 	struct kmem_list3 **nodelists;
-	struct array_cache *array[NR_CPUS];
+	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
 	/*
 	 * Do not add fields after array[]
 	 */
diff --git a/mm/slab.c b/mm/slab.c
index e26bff5ed1a6..c7ea5234c4e9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -553,9 +553,7 @@ static struct arraycache_init initarray_generic =
     { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 
 /* internal cache of cache description objs */
-static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];
 static struct kmem_cache kmem_cache_boot = {
-	.nodelists = kmem_cache_nodelists,
 	.batchcount = 1,
 	.limit = BOOT_CPUCACHE_ENTRIES,
 	.shared = 1,
@@ -1559,6 +1557,15 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index)
 	}
 }
 
+/*
+ * The memory after the last cpu cache pointer is used for the
+ * the nodelists pointer.
+ */
+static void setup_nodelists_pointer(struct kmem_cache *cachep)
+{
+	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+}
+
 /*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
@@ -1573,15 +1580,14 @@ void __init kmem_cache_init(void)
 	int node;
 
 	kmem_cache = &kmem_cache_boot;
+	setup_nodelists_pointer(kmem_cache);
 
 	if (num_possible_nodes() == 1)
 		use_alien_caches = 0;
 
-	for (i = 0; i < NUM_INIT_LISTS; i++) {
+	for (i = 0; i < NUM_INIT_LISTS; i++)
 		kmem_list3_init(&initkmem_list3[i]);
-		if (i < MAX_NUMNODES)
-			kmem_cache->nodelists[i] = NULL;
-	}
+
 	set_up_list3s(kmem_cache, CACHE_CACHE);
 
 	/*
@@ -1619,7 +1625,6 @@ void __init kmem_cache_init(void)
 	list_add(&kmem_cache->list, &slab_caches);
 	kmem_cache->colour_off = cache_line_size();
 	kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
-	kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
 
 	/*
 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
@@ -2422,7 +2427,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	else
 		gfp = GFP_NOWAIT;
 
-	cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+	setup_nodelists_pointer(cachep);
 #if DEBUG
 
 	/*
-- 
cgit v1.2.3


From 7da4d641c58d201c3cc1835c05ca1a7fa26f0856 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 19 Nov 2012 03:14:23 +0100
Subject: mm: Count the number of pages affected in change_protection()

This will be used for three kinds of purposes:

 - to optimize mprotect()

 - to speed up working set scanning for working set areas that
   have not been touched

 - to more accurately scan per real working set

No change in functionality from this patch.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hugetlb.h |  8 +++++--
 include/linux/mm.h      |  3 +++
 mm/hugetlb.c            | 10 +++++++--
 mm/mprotect.c           | 58 +++++++++++++++++++++++++++++++++++++------------
 4 files changed, 61 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 225164842ab6..06e691baab86 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,7 @@ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
 				pud_t *pud, int write);
 int pmd_huge(pmd_t pmd);
 int pud_huge(pud_t pmd);
-void hugetlb_change_protection(struct vm_area_struct *vma,
+unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot);
 
 #else /* !CONFIG_HUGETLB_PAGE */
@@ -132,7 +132,11 @@ static inline void copy_huge_page(struct page *dst, struct page *src)
 {
 }
 
-#define hugetlb_change_protection(vma, address, end, newprot)
+static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+		unsigned long address, unsigned long end, pgprot_t newprot)
+{
+	return 0;
+}
 
 static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 			struct vm_area_struct *vma, unsigned long start,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bcaab4e6fe91..1856c62d82cd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1078,6 +1078,9 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
 extern unsigned long do_mremap(unsigned long addr,
 			       unsigned long old_len, unsigned long new_len,
 			       unsigned long flags, unsigned long new_addr);
+extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
+			      unsigned long end, pgprot_t newprot,
+			      int dirty_accountable);
 extern int mprotect_fixup(struct vm_area_struct *vma,
 			  struct vm_area_struct **pprev, unsigned long start,
 			  unsigned long end, unsigned long newflags);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 59a0059b39e2..712895eda0d0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3014,7 +3014,7 @@ same_page:
 	return i ? i : -EFAULT;
 }
 
-void hugetlb_change_protection(struct vm_area_struct *vma,
+unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		unsigned long address, unsigned long end, pgprot_t newprot)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -3022,6 +3022,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	pte_t *ptep;
 	pte_t pte;
 	struct hstate *h = hstate_vma(vma);
+	unsigned long pages = 0;
 
 	BUG_ON(address >= end);
 	flush_cache_range(vma, address, end);
@@ -3032,12 +3033,15 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 		ptep = huge_pte_offset(mm, address);
 		if (!ptep)
 			continue;
-		if (huge_pmd_unshare(mm, &address, ptep))
+		if (huge_pmd_unshare(mm, &address, ptep)) {
+			pages++;
 			continue;
+		}
 		if (!huge_pte_none(huge_ptep_get(ptep))) {
 			pte = huge_ptep_get_and_clear(mm, address, ptep);
 			pte = pte_mkhuge(pte_modify(pte, newprot));
 			set_huge_pte_at(mm, address, ptep, pte);
+			pages++;
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -3049,6 +3053,8 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 	 */
 	flush_tlb_range(vma, start, end);
 	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
+
+	return pages << h->order;
 }
 
 int hugetlb_reserve_pages(struct inode *inode,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a40992610ab6..1e265be25f85 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -35,12 +35,13 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 }
 #endif
 
-static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
 {
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
+	unsigned long pages = 0;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
@@ -60,6 +61,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				ptent = pte_mkwrite(ptent);
 
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
+			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 
@@ -72,18 +74,22 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 				set_pte_at(mm, addr, pte,
 					swp_entry_to_pte(entry));
 			}
+			pages++;
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
+
+	return pages;
 }
 
-static inline void change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
 {
 	pmd_t *pmd;
 	unsigned long next;
+	unsigned long pages = 0;
 
 	pmd = pmd_offset(pud, addr);
 	do {
@@ -91,35 +97,42 @@ static inline void change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 		if (pmd_trans_huge(*pmd)) {
 			if (next - addr != HPAGE_PMD_SIZE)
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			else if (change_huge_pmd(vma, pmd, addr, newprot))
+			else if (change_huge_pmd(vma, pmd, addr, newprot)) {
+				pages += HPAGE_PMD_NR;
 				continue;
+			}
 			/* fall through */
 		}
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
+		pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
 				 dirty_accountable);
 	} while (pmd++, addr = next, addr != end);
+
+	return pages;
 }
 
-static inline void change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
 {
 	pud_t *pud;
 	unsigned long next;
+	unsigned long pages = 0;
 
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		change_pmd_range(vma, pud, addr, next, newprot,
+		pages += change_pmd_range(vma, pud, addr, next, newprot,
 				 dirty_accountable);
 	} while (pud++, addr = next, addr != end);
+
+	return pages;
 }
 
-static void change_protection(struct vm_area_struct *vma,
+static unsigned long change_protection_range(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
 		int dirty_accountable)
 {
@@ -127,6 +140,7 @@ static void change_protection(struct vm_area_struct *vma,
 	pgd_t *pgd;
 	unsigned long next;
 	unsigned long start = addr;
+	unsigned long pages = 0;
 
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
@@ -135,10 +149,30 @@ static void change_protection(struct vm_area_struct *vma,
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		change_pud_range(vma, pgd, addr, next, newprot,
+		pages += change_pud_range(vma, pgd, addr, next, newprot,
 				 dirty_accountable);
 	} while (pgd++, addr = next, addr != end);
+
 	flush_tlb_range(vma, start, end);
+
+	return pages;
+}
+
+unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
+		       unsigned long end, pgprot_t newprot,
+		       int dirty_accountable)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long pages;
+
+	mmu_notifier_invalidate_range_start(mm, start, end);
+	if (is_vm_hugetlb_page(vma))
+		pages = hugetlb_change_protection(vma, start, end, newprot);
+	else
+		pages = change_protection_range(vma, start, end, newprot, dirty_accountable);
+	mmu_notifier_invalidate_range_end(mm, start, end);
+
+	return pages;
 }
 
 int
@@ -213,12 +247,8 @@ success:
 		dirty_accountable = 1;
 	}
 
-	mmu_notifier_invalidate_range_start(mm, start, end);
-	if (is_vm_hugetlb_page(vma))
-		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
-	else
-		change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
-	mmu_notifier_invalidate_range_end(mm, start, end);
+	change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
+
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
 	perf_event_mmap(vma);
-- 
cgit v1.2.3


From 5647bc293ab15f66a7b1cda850c5e9d162a6c7c2 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 19 Oct 2012 10:46:20 +0100
Subject: mm: compaction: Move migration fail/success stats to migrate.c

The compact_pages_moved and compact_pagemigrate_failed events are
convenient for determining if compaction is active and to what
degree migration is succeeding but it's at the wrong level. Other
users of migration may also want to know if migration is working
properly and this will be particularly true for any automated
NUMA migration. This patch moves the counters down to migration
with the new events called pgmigrate_success and pgmigrate_fail.
The compact_blocks_moved counter is removed because while it was
useful for debugging initially, it's worthless now as no meaningful
conclusions can be drawn from its value.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/vm_event_item.h | 4 +++-
 mm/compaction.c               | 4 ----
 mm/migrate.c                  | 6 ++++++
 mm/vmstat.c                   | 7 ++++---
 4 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 3d3114594370..8aa7cb94b5fb 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -38,8 +38,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
 		KSWAPD_SKIP_CONGESTION_WAIT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+#ifdef CONFIG_MIGRATION
+		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
+#endif
 #ifdef CONFIG_COMPACTION
-		COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
 		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
 #endif
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/compaction.c b/mm/compaction.c
index 9eef55838fca..00ad88395216 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -994,10 +994,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
-		count_vm_event(COMPACTBLOCKS);
-		count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining);
-		if (nr_remaining)
-			count_vm_events(COMPACTPAGEFAILED, nr_remaining);
 		trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
 						nr_remaining);
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 77ed2d773705..04687f69cc17 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -962,6 +962,7 @@ int migrate_pages(struct list_head *from,
 {
 	int retry = 1;
 	int nr_failed = 0;
+	int nr_succeeded = 0;
 	int pass = 0;
 	struct page *page;
 	struct page *page2;
@@ -988,6 +989,7 @@ int migrate_pages(struct list_head *from,
 				retry++;
 				break;
 			case 0:
+				nr_succeeded++;
 				break;
 			default:
 				/* Permanent failure */
@@ -998,6 +1000,10 @@ int migrate_pages(struct list_head *from,
 	}
 	rc = 0;
 out:
+	if (nr_succeeded)
+		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
+	if (nr_failed)
+		count_vm_events(PGMIGRATE_FAIL, nr_failed);
 	if (!swapwrite)
 		current->flags &= ~PF_SWAPWRITE;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c7370579111b..89a7fd665b32 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -774,10 +774,11 @@ const char * const vmstat_text[] = {
 
 	"pgrotated",
 
+#ifdef CONFIG_MIGRATION
+	"pgmigrate_success",
+	"pgmigrate_fail",
+#endif
 #ifdef CONFIG_COMPACTION
-	"compact_blocks_moved",
-	"compact_pages_moved",
-	"compact_pagemigrate_failed",
 	"compact_stall",
 	"compact_fail",
 	"compact_success",
-- 
cgit v1.2.3


From 7b2a2d4a18fffac3c4872021529b0657896db788 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 19 Oct 2012 14:07:31 +0100
Subject: mm: migrate: Add a tracepoint for migrate_pages

The pgmigrate_success and pgmigrate_fail vmstat counters tells the user
about migration activity but not the type or the reason. This patch adds
a tracepoint to identify the type of page migration and why the page is
being migrated.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/migrate.h        | 13 +++++++++--
 include/trace/events/migrate.h | 51 ++++++++++++++++++++++++++++++++++++++++++
 mm/compaction.c                |  3 ++-
 mm/memory-failure.c            |  3 ++-
 mm/memory_hotplug.c            |  3 ++-
 mm/mempolicy.c                 |  6 +++--
 mm/migrate.c                   | 10 +++++++--
 mm/page_alloc.c                |  3 ++-
 8 files changed, 82 insertions(+), 10 deletions(-)
 create mode 100644 include/trace/events/migrate.h

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ce7e6671968b..9d1c159e2427 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,6 +7,15 @@
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
+enum migrate_reason {
+	MR_COMPACTION,
+	MR_MEMORY_FAILURE,
+	MR_MEMORY_HOTPLUG,
+	MR_SYSCALL,		/* also applies to cpusets */
+	MR_MEMPOLICY_MBIND,
+	MR_CMA
+};
+
 #ifdef CONFIG_MIGRATION
 
 extern void putback_lru_pages(struct list_head *l);
@@ -14,7 +23,7 @@ extern int migrate_page(struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			enum migrate_mode mode);
+			enum migrate_mode mode, int reason);
 extern int migrate_huge_page(struct page *, new_page_t x,
 			unsigned long private, bool offlining,
 			enum migrate_mode mode);
@@ -35,7 +44,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		enum migrate_mode mode) { return -ENOSYS; }
+		enum migrate_mode mode, int reason) { return -ENOSYS; }
 static inline int migrate_huge_page(struct page *page, new_page_t x,
 		unsigned long private, bool offlining,
 		enum migrate_mode mode) { return -ENOSYS; }
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
new file mode 100644
index 000000000000..ec2a6ccfd7e5
--- /dev/null
+++ b/include/trace/events/migrate.h
@@ -0,0 +1,51 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM migrate
+
+#if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MIGRATE_H
+
+#define MIGRATE_MODE						\
+	{MIGRATE_ASYNC,		"MIGRATE_ASYNC"},		\
+	{MIGRATE_SYNC_LIGHT,	"MIGRATE_SYNC_LIGHT"},		\
+	{MIGRATE_SYNC,		"MIGRATE_SYNC"}		
+
+#define MIGRATE_REASON						\
+	{MR_COMPACTION,		"compaction"},			\
+	{MR_MEMORY_FAILURE,	"memory_failure"},		\
+	{MR_MEMORY_HOTPLUG,	"memory_hotplug"},		\
+	{MR_SYSCALL,		"syscall_or_cpuset"},		\
+	{MR_MEMPOLICY_MBIND,	"mempolicy_mbind"},		\
+	{MR_CMA,		"cma"}
+
+TRACE_EVENT(mm_migrate_pages,
+
+	TP_PROTO(unsigned long succeeded, unsigned long failed,
+		 enum migrate_mode mode, int reason),
+
+	TP_ARGS(succeeded, failed, mode, reason),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,		succeeded)
+		__field(	unsigned long,		failed)
+		__field(	enum migrate_mode,	mode)
+		__field(	int,			reason)
+	),
+
+	TP_fast_assign(
+		__entry->succeeded	= succeeded;
+		__entry->failed		= failed;
+		__entry->mode		= mode;
+		__entry->reason		= reason;
+	),
+
+	TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s",
+		__entry->succeeded,
+		__entry->failed,
+		__print_symbolic(__entry->mode, MIGRATE_MODE),
+		__print_symbolic(__entry->reason, MIGRATE_REASON))
+);
+
+#endif /* _TRACE_MIGRATE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/compaction.c b/mm/compaction.c
index 00ad88395216..2c077a78487c 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -990,7 +990,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				(unsigned long)cc, false,
-				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
+				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
+				MR_COMPACTION);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6c5899b9034a..ddb68a169e45 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1558,7 +1558,8 @@ int soft_offline_page(struct page *page, int flags)
 					    page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
-							false, MIGRATE_SYNC);
+							false, MIGRATE_SYNC,
+							MR_MEMORY_FAILURE);
 		if (ret) {
 			putback_lru_pages(&pagelist);
 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e4eeacae2b91..e598bd15c041 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -812,7 +812,8 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		 * migrate_pages returns # of failed pages.
 		 */
 		ret = migrate_pages(&source, alloc_migrate_target, 0,
-							true, MIGRATE_SYNC);
+							true, MIGRATE_SYNC,
+							MR_MEMORY_HOTPLUG);
 		if (ret)
 			putback_lru_pages(&source);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d04a8a54c294..66e90ecc2350 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -961,7 +961,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_node_page, dest,
-							false, MIGRATE_SYNC);
+							false, MIGRATE_SYNC,
+							MR_SYSCALL);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
@@ -1202,7 +1203,8 @@ static long do_mbind(unsigned long start, unsigned long len,
 		if (!list_empty(&pagelist)) {
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
 						(unsigned long)vma,
-						false, MIGRATE_SYNC);
+						false, MIGRATE_SYNC,
+						MR_MEMPOLICY_MBIND);
 			if (nr_failed)
 				putback_lru_pages(&pagelist);
 		}
diff --git a/mm/migrate.c b/mm/migrate.c
index 04687f69cc17..27be9c923dc1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -38,6 +38,9 @@
 
 #include <asm/tlbflush.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/migrate.h>
+
 #include "internal.h"
 
 /*
@@ -958,7 +961,7 @@ out:
  */
 int migrate_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		enum migrate_mode mode)
+		enum migrate_mode mode, int reason)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -1004,6 +1007,8 @@ out:
 		count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
 	if (nr_failed)
 		count_vm_events(PGMIGRATE_FAIL, nr_failed);
+	trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
+
 	if (!swapwrite)
 		current->flags &= ~PF_SWAPWRITE;
 
@@ -1145,7 +1150,8 @@ set_status:
 	err = 0;
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm, 0, MIGRATE_SYNC);
+				(unsigned long)pm, 0, MIGRATE_SYNC,
+				MR_SYSCALL);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7bb35ac0964a..5953dc2d196f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5707,7 +5707,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 
 		ret = migrate_pages(&cc->migratepages,
 				    alloc_migrate_target,
-				    0, false, MIGRATE_SYNC);
+				    0, false, MIGRATE_SYNC,
+				    MR_CMA);
 	}
 
 	putback_lru_pages(&cc->migratepages);
-- 
cgit v1.2.3


From 397487db696cae0b026a474a5cd66f4e372995e6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 19 Oct 2012 12:00:10 +0100
Subject: mm: compaction: Add scanned and isolated counters for compaction

Compaction already has tracepoints to count scanned and isolated pages
but it requires that ftrace be enabled and if that information has to be
written to disk then it can be disruptive. This patch adds vmstat counters
for compaction called compact_migrate_scanned, compact_free_scanned and
compact_isolated.

With these counters, it is possible to define a basic cost model for
compaction. This approximates of how much work compaction is doing and can
be compared that with an oprofile showing TLB misses and see if the cost of
compaction is being offset by THP for example. Minimally a compaction patch
can be evaluated in terms of whether it increases or decreases cost. The
basic cost model looks like this

Fundamental unit u:	a word	sizeof(void *)

Ca  = cost of struct page access = sizeof(struct page) / u

Cmc = Cost migrate page copy = (Ca + PAGE_SIZE/u) * 2
Cmf = Cost migrate failure   = Ca * 2
Ci  = Cost page isolation    = (Ca + Wi)
	where Wi is a constant that should reflect the approximate
	cost of the locking operation.

Csm = Cost migrate scanning = Ca
Csf = Cost free    scanning = Ca

Overall cost =	(Csm * compact_migrate_scanned) +
	      	(Csf * compact_free_scanned)    +
	      	(Ci  * compact_isolated)	+
		(Cmc * pgmigrate_success)	+
		(Cmf * pgmigrate_failed)

Where the values are read from /proc/vmstat.

This is very basic and ignores certain costs such as the allocation cost
to do a migrate page copy but any improvement to the model would still
use the same vmstat counters.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/vm_event_item.h | 2 ++
 mm/compaction.c               | 8 ++++++++
 mm/vmstat.c                   | 3 +++
 3 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 8aa7cb94b5fb..a1f750b8e72a 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -42,6 +42,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
 #endif
 #ifdef CONFIG_COMPACTION
+		COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
+		COMPACTISOLATED,
 		COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
 #endif
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/compaction.c b/mm/compaction.c
index 2c077a78487c..aee7443a4d5a 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -356,6 +356,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 	if (blockpfn == end_pfn)
 		update_pageblock_skip(cc, valid_page, total_isolated, false);
 
+	count_vm_events(COMPACTFREE_SCANNED, nr_scanned);
+	if (total_isolated)
+		count_vm_events(COMPACTISOLATED, total_isolated);
+
 	return total_isolated;
 }
 
@@ -646,6 +650,10 @@ next_pageblock:
 
 	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
 
+	count_vm_events(COMPACTMIGRATE_SCANNED, nr_scanned);
+	if (nr_isolated)
+		count_vm_events(COMPACTISOLATED, nr_isolated);
+
 	return low_pfn;
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 89a7fd665b32..3a067fabe190 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -779,6 +779,9 @@ const char * const vmstat_text[] = {
 	"pgmigrate_fail",
 #endif
 #ifdef CONFIG_COMPACTION
+	"compact_migrate_scanned",
+	"compact_free_scanned",
+	"compact_isolated",
 	"compact_stall",
 	"compact_fail",
 	"compact_success",
-- 
cgit v1.2.3


From 0b9d705297b273657923518dbea2377cd03532ed Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 5 Oct 2012 21:36:27 +0200
Subject: mm: numa: Support NUMA hinting page faults from gup/gup_fast

Introduce FOLL_NUMA to tell follow_page to check
pte/pmd_numa. get_user_pages must use FOLL_NUMA, and it's safe to do
so because it always invokes handle_mm_fault and retries the
follow_page later.

KVM secondary MMU page faults will trigger the NUMA hinting page
faults through gup_fast -> get_user_pages -> follow_page ->
handle_mm_fault.

Other follow_page callers like KSM should not use FOLL_NUMA, or they
would fail to get the pages if they use follow_page instead of
get_user_pages.

[ This patch was picked up from the AutoNUMA tree. ]

Originally-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
[ ported to this tree. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/mm.h |  1 +
 mm/memory.c        | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1856c62d82cd..fa1615211159 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1572,6 +1572,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_MLOCK	0x40	/* mark page as mlocked */
 #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
+#define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/memory.c b/mm/memory.c
index 7cf762857baa..cd8e0daf1912 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1517,6 +1517,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 		page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
 		goto out;
 	}
+	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+		goto no_page_table;
 	if (pmd_trans_huge(*pmd)) {
 		if (flags & FOLL_SPLIT) {
 			split_huge_page_pmd(mm, pmd);
@@ -1546,6 +1548,8 @@ split_fallthrough:
 	pte = *ptep;
 	if (!pte_present(pte))
 		goto no_page;
+	if ((flags & FOLL_NUMA) && pte_numa(pte))
+		goto no_page;
 	if ((flags & FOLL_WRITE) && !pte_write(pte))
 		goto unlock;
 
@@ -1697,6 +1701,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 	vm_flags &= (gup_flags & FOLL_FORCE) ?
 			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+
+	/*
+	 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
+	 * would be called on PROT_NONE ranges. We must never invoke
+	 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
+	 * page faults would unprotect the PROT_NONE ranges if
+	 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
+	 * bitflag. So to avoid that, don't set FOLL_NUMA if
+	 * FOLL_FORCE is set.
+	 */
+	if (!(gup_flags & FOLL_FORCE))
+		gup_flags |= FOLL_NUMA;
+
 	i = 0;
 
 	do {
-- 
cgit v1.2.3


From d10e63f29488b0f312a443f9507ea9b6fd3c9090 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 25 Oct 2012 14:16:31 +0200
Subject: mm: numa: Create basic numa page hinting infrastructure

Note: This patch started as "mm/mpol: Create special PROT_NONE
	infrastructure" and preserves the basic idea but steals *very*
	heavily from "autonuma: numa hinting page faults entry points" for
	the actual fault handlers without the migration parts.	The end
	result is barely recognisable as either patch so all Signed-off
	and Reviewed-bys are dropped. If Peter, Ingo and Andrea are ok with
	this version, I will re-add the signed-offs-by to reflect the history.

In order to facilitate a lazy -- fault driven -- migration of pages, create
a special transient PAGE_NUMA variant, we can then use the 'spurious'
protection faults to drive our migrations from.

The meaning of PAGE_NUMA depends on the architecture but on x86 it is
effectively PROT_NONE. Actual PROT_NONE mappings will not generate these
NUMA faults for the reason that the page fault code checks the permission on
the VMA (and will throw a segmentation fault on actual PROT_NONE mappings),
before it ever calls handle_mm_fault.

[dhillf@gmail.com: Fix typo]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/huge_mm.h |  10 +++++
 mm/huge_memory.c        |  22 ++++++++++
 mm/memory.c             | 112 ++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 141 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b31cb7da0346..a1d26a98c655 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -159,6 +159,10 @@ static inline struct page *compound_trans_head(struct page *page)
 	}
 	return page;
 }
+
+extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
+				  pmd_t pmd, pmd_t *pmdp);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -195,6 +199,12 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
 {
 	return 0;
 }
+
+static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
+					pmd_t pmd, pmd_t *pmdp)
+{
+}
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cd24aa562144..f5f37630c54d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1018,6 +1018,28 @@ out:
 	return page;
 }
 
+/* NUMA hinting page fault entry point for trans huge pmds */
+int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
+				pmd_t pmd, pmd_t *pmdp)
+{
+	struct page *page;
+	unsigned long haddr = addr & HPAGE_PMD_MASK;
+
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(pmd, *pmdp)))
+		goto out_unlock;
+
+	page = pmd_page(pmd);
+	pmd = pmd_mknonnuma(pmd);
+	set_pmd_at(mm, haddr, pmdp, pmd);
+	VM_BUG_ON(pmd_numa(*pmdp));
+	update_mmu_cache_pmd(vma, addr, pmdp);
+
+out_unlock:
+	spin_unlock(&mm->page_table_lock);
+	return 0;
+}
+
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 pmd_t *pmd, unsigned long addr)
 {
diff --git a/mm/memory.c b/mm/memory.c
index cd8e0daf1912..e30616f2cc3d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3448,6 +3448,103 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
 }
 
+int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
+{
+	struct page *page;
+	spinlock_t *ptl;
+
+	/*
+	* The "pte" at this point cannot be used safely without
+	* validation through pte_unmap_same(). It's of NUMA type but
+	* the pfn may be screwed if the read is non atomic.
+	*
+	* ptep_modify_prot_start is not called as this is clearing
+	* the _PAGE_NUMA bit and it is not really expected that there
+	* would be concurrent hardware modifications to the PTE.
+	*/
+	ptl = pte_lockptr(mm, pmd);
+	spin_lock(ptl);
+	if (unlikely(!pte_same(*ptep, pte)))
+		goto out_unlock;
+	pte = pte_mknonnuma(pte);
+	set_pte_at(mm, addr, ptep, pte);
+	update_mmu_cache(vma, addr, ptep);
+
+	page = vm_normal_page(vma, addr, pte);
+	if (!page) {
+		pte_unmap_unlock(ptep, ptl);
+		return 0;
+	}
+
+out_unlock:
+	pte_unmap_unlock(ptep, ptl);
+	return 0;
+}
+
+/* NUMA hinting page fault entry point for regular pmds */
+#ifdef CONFIG_NUMA_BALANCING
+static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		     unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t pmd;
+	pte_t *pte, *orig_pte;
+	unsigned long _addr = addr & PMD_MASK;
+	unsigned long offset;
+	spinlock_t *ptl;
+	bool numa = false;
+
+	spin_lock(&mm->page_table_lock);
+	pmd = *pmdp;
+	if (pmd_numa(pmd)) {
+		set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
+		numa = true;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	if (!numa)
+		return 0;
+
+	/* we're in a page fault so some vma must be in the range */
+	BUG_ON(!vma);
+	BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
+	offset = max(_addr, vma->vm_start) & ~PMD_MASK;
+	VM_BUG_ON(offset >= PMD_SIZE);
+	orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
+	pte += offset >> PAGE_SHIFT;
+	for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
+		pte_t pteval = *pte;
+		struct page *page;
+		if (!pte_present(pteval))
+			continue;
+		if (!pte_numa(pteval))
+			continue;
+		if (addr >= vma->vm_end) {
+			vma = find_vma(mm, addr);
+			/* there's a pte present so there must be a vma */
+			BUG_ON(!vma);
+			BUG_ON(addr < vma->vm_start);
+		}
+		if (pte_numa(pteval)) {
+			pteval = pte_mknonnuma(pteval);
+			set_pte_at(mm, addr, pte, pteval);
+		}
+		page = vm_normal_page(vma, addr, pteval);
+		if (unlikely(!page))
+			continue;
+	}
+	pte_unmap_unlock(orig_pte, ptl);
+
+	return 0;
+}
+#else
+static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		     unsigned long addr, pmd_t *pmdp)
+{
+	BUG();
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
@@ -3486,6 +3583,9 @@ int handle_pte_fault(struct mm_struct *mm,
 					pte, pmd, flags, entry);
 	}
 
+	if (pte_numa(entry))
+		return do_numa_page(mm, vma, address, entry, pte, pmd);
+
 	ptl = pte_lockptr(mm, pmd);
 	spin_lock(ptl);
 	if (unlikely(!pte_same(*pte, entry)))
@@ -3554,9 +3654,11 @@ retry:
 
 		barrier();
 		if (pmd_trans_huge(orig_pmd)) {
-			if (flags & FAULT_FLAG_WRITE &&
-			    !pmd_write(orig_pmd) &&
-			    !pmd_trans_splitting(orig_pmd)) {
+			if (pmd_numa(*pmd))
+				return do_huge_pmd_numa_page(mm, address,
+							     orig_pmd, pmd);
+
+			if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {
 				ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
 							  orig_pmd);
 				/*
@@ -3568,10 +3670,14 @@ retry:
 					goto retry;
 				return ret;
 			}
+
 			return 0;
 		}
 	}
 
+	if (pmd_numa(*pmd))
+		return do_pmd_numa_page(mm, vma, address, pmd);
+
 	/*
 	 * Use __pte_alloc instead of pte_alloc_map, because we can't
 	 * run pte_offset_map on the pmd, if an huge pmd could
-- 
cgit v1.2.3


From 771fb4d806a92bf6c988fcfbd286ae40a9374332 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Thu, 25 Oct 2012 14:16:30 +0200
Subject: mm: mempolicy: Check for misplaced page

This patch provides a new function to test whether a page resides
on a node that is appropriate for the mempolicy for the vma and
address where the page is supposed to be mapped.  This involves
looking up the node where the page belongs.  So, the function
returns that node so that it may be used to allocated the page
without consulting the policy again.

A subsequent patch will call this function from the fault path.
Because of this, I don't want to go ahead and allocate the page, e.g.,
via alloc_page_vma() only to have to free it if it has the correct
policy.  So, I just mimic the alloc_page_vma() node computation
logic--sort of.

Note:  we could use this function to implement a MPOL_MF_STRICT
behavior when migrating pages to match mbind() mempolicy--e.g.,
to ensure that pages in an interleaved range are reinterleaved
rather than left where they are when they reside on any page in
the interleave nodemask.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
[ Added MPOL_F_LAZY to trigger migrate-on-fault;
  simplified code now that we don't have to bother
  with special crap for interleaved ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mempolicy.h      |  8 +++++
 include/uapi/linux/mempolicy.h |  1 +
 mm/mempolicy.c                 | 76 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 85 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index e5ccb9ddd90e..c511e2523560 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -198,6 +198,8 @@ static inline int vma_migratable(struct vm_area_struct *vma)
 	return 1;
 }
 
+extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+
 #else
 
 struct mempolicy {};
@@ -323,5 +325,11 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 	return 0;
 }
 
+static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+				 unsigned long address)
+{
+	return -1; /* no node preference */
+}
+
 #endif /* CONFIG_NUMA */
 #endif
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index d23dca8367cc..472de8a5d37e 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -61,6 +61,7 @@ enum mpol_rebind_step {
 #define MPOL_F_SHARED  (1 << 0)	/* identify shared policies */
 #define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
 #define MPOL_F_REBINDING (1 << 2)	/* identify policies in rebinding */
+#define MPOL_F_MOF	(1 << 3) /* this policy wants migrate on fault */
 
 
 #endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c21e91477c4f..df1466d3d2d8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2181,6 +2181,82 @@ static void sp_free(struct sp_node *n)
 	kmem_cache_free(sn_cache, n);
 }
 
+/**
+ * mpol_misplaced - check whether current page node is valid in policy
+ *
+ * @page   - page to be checked
+ * @vma    - vm area where page mapped
+ * @addr   - virtual address where page mapped
+ *
+ * Lookup current policy node id for vma,addr and "compare to" page's
+ * node id.
+ *
+ * Returns:
+ *	-1	- not misplaced, page is in the right node
+ *	node	- node id where the page should be
+ *
+ * Policy determination "mimics" alloc_page_vma().
+ * Called from fault path where we know the vma and faulting address.
+ */
+int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr)
+{
+	struct mempolicy *pol;
+	struct zone *zone;
+	int curnid = page_to_nid(page);
+	unsigned long pgoff;
+	int polnid = -1;
+	int ret = -1;
+
+	BUG_ON(!vma);
+
+	pol = get_vma_policy(current, vma, addr);
+	if (!(pol->flags & MPOL_F_MOF))
+		goto out;
+
+	switch (pol->mode) {
+	case MPOL_INTERLEAVE:
+		BUG_ON(addr >= vma->vm_end);
+		BUG_ON(addr < vma->vm_start);
+
+		pgoff = vma->vm_pgoff;
+		pgoff += (addr - vma->vm_start) >> PAGE_SHIFT;
+		polnid = offset_il_node(pol, vma, pgoff);
+		break;
+
+	case MPOL_PREFERRED:
+		if (pol->flags & MPOL_F_LOCAL)
+			polnid = numa_node_id();
+		else
+			polnid = pol->v.preferred_node;
+		break;
+
+	case MPOL_BIND:
+		/*
+		 * allows binding to multiple nodes.
+		 * use current page if in policy nodemask,
+		 * else select nearest allowed node, if any.
+		 * If no allowed nodes, use current [!misplaced].
+		 */
+		if (node_isset(curnid, pol->v.nodes))
+			goto out;
+		(void)first_zones_zonelist(
+				node_zonelist(numa_node_id(), GFP_HIGHUSER),
+				gfp_zone(GFP_HIGHUSER),
+				&pol->v.nodes, &zone);
+		polnid = zone->node;
+		break;
+
+	default:
+		BUG();
+	}
+	if (curnid != polnid)
+		ret = polnid;
+out:
+	mpol_cond_put(pol);
+
+	return ret;
+}
+
 static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 {
 	pr_debug("deleting %lx-l%lx\n", n->start, n->end);
-- 
cgit v1.2.3


From 7039e1dbec6eeaa8ecab43a82d6589eeced995c3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Oct 2012 14:16:34 +0200
Subject: mm: migrate: Introduce migrate_misplaced_page()

Note: This was originally based on Peter's patch "mm/migrate: Introduce
	migrate_misplaced_page()" but borrows extremely heavily from Andrea's
	"autonuma: memory follows CPU algorithm and task/mm_autonuma stats
	collection". The end result is barely recognisable so signed-offs
	had to be dropped. If original authors are ok with it, I'll
	re-add the signed-off-bys.

Add migrate_misplaced_page() which deals with migrating pages from
faults.

Based-on-work-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Based-on-work-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/migrate.h |  11 +++++
 mm/migrate.c            | 108 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 117 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 9d1c159e2427..f0d0313eea6f 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -13,6 +13,7 @@ enum migrate_reason {
 	MR_MEMORY_HOTPLUG,
 	MR_SYSCALL,		/* also applies to cpusets */
 	MR_MEMPOLICY_MBIND,
+	MR_NUMA_MISPLACED,
 	MR_CMA
 };
 
@@ -73,4 +74,14 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 #define fail_migrate_page NULL
 
 #endif /* CONFIG_MIGRATION */
+
+#ifdef CONFIG_NUMA_BALANCING
+extern int migrate_misplaced_page(struct page *page, int node);
+#else
+static inline int migrate_misplaced_page(struct page *page, int node)
+{
+	return -EAGAIN; /* can't migrate now */
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 #endif /* _LINUX_MIGRATE_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index 27be9c923dc1..d168aec98427 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -282,7 +282,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page,
 		struct buffer_head *head, enum migrate_mode mode)
 {
-	int expected_count;
+	int expected_count = 0;
 	void **pslot;
 
 	if (!mapping) {
@@ -1415,4 +1415,108 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
  	}
  	return err;
 }
-#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * Returns true if this is a safe migration target node for misplaced NUMA
+ * pages. Currently it only checks the watermarks which crude
+ */
+static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
+				   int nr_migrate_pages)
+{
+	int z;
+	for (z = pgdat->nr_zones - 1; z >= 0; z--) {
+		struct zone *zone = pgdat->node_zones + z;
+
+		if (!populated_zone(zone))
+			continue;
+
+		if (zone->all_unreclaimable)
+			continue;
+
+		/* Avoid waking kswapd by allocating pages_to_migrate pages. */
+		if (!zone_watermark_ok(zone, 0,
+				       high_wmark_pages(zone) +
+				       nr_migrate_pages,
+				       0, 0))
+			continue;
+		return true;
+	}
+	return false;
+}
+
+static struct page *alloc_misplaced_dst_page(struct page *page,
+					   unsigned long data,
+					   int **result)
+{
+	int nid = (int) data;
+	struct page *newpage;
+
+	newpage = alloc_pages_exact_node(nid,
+					 (GFP_HIGHUSER_MOVABLE | GFP_THISNODE |
+					  __GFP_NOMEMALLOC | __GFP_NORETRY |
+					  __GFP_NOWARN) &
+					 ~GFP_IOFS, 0);
+	return newpage;
+}
+
+/*
+ * Attempt to migrate a misplaced page to the specified destination
+ * node. Caller is expected to have an elevated reference count on
+ * the page that will be dropped by this function before returning.
+ */
+int migrate_misplaced_page(struct page *page, int node)
+{
+	int isolated = 0;
+	LIST_HEAD(migratepages);
+
+	/*
+	 * Don't migrate pages that are mapped in multiple processes.
+	 * TODO: Handle false sharing detection instead of this hammer
+	 */
+	if (page_mapcount(page) != 1) {
+		put_page(page);
+		goto out;
+	}
+
+	/* Avoid migrating to a node that is nearly full */
+	if (migrate_balanced_pgdat(NODE_DATA(node), 1)) {
+		int page_lru;
+
+		if (isolate_lru_page(page)) {
+			put_page(page);
+			goto out;
+		}
+		isolated = 1;
+
+		/*
+		 * Page is isolated which takes a reference count so now the
+		 * callers reference can be safely dropped without the page
+		 * disappearing underneath us during migration
+		 */
+		put_page(page);
+
+		page_lru = page_is_file_cache(page);
+		inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+		list_add(&page->lru, &migratepages);
+	}
+
+	if (isolated) {
+		int nr_remaining;
+
+		nr_remaining = migrate_pages(&migratepages,
+				alloc_misplaced_dst_page,
+				node, false, MIGRATE_ASYNC,
+				MR_NUMA_MISPLACED);
+		if (nr_remaining) {
+			putback_lru_pages(&migratepages);
+			isolated = 0;
+		}
+	}
+	BUG_ON(!list_empty(&migratepages));
+out:
+	return isolated;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+#endif /* CONFIG_NUMA */
-- 
cgit v1.2.3


From 4daae3b4b9e49b7e0935499a352f1c59d90287d2 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 2 Nov 2012 11:33:45 +0000
Subject: mm: mempolicy: Use _PAGE_NUMA to migrate pages

Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but
	sufficiently different that the signed-off-bys were dropped

Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page()
pieces into an effective migrate on fault scheme.

Note that (on x86) we rely on PROT_NONE pages being !present and avoid
the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the
page-migration performance.

Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/huge_mm.h |  9 +++++----
 mm/huge_memory.c        | 31 ++++++++++++++++++++++++++++---
 mm/memory.c             | 32 +++++++++++++++++++++++++++-----
 3 files changed, 60 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a1d26a98c655..dabb5108d6c0 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page *page)
 	return page;
 }
 
-extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-				  pmd_t pmd, pmd_t *pmdp);
+extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+				unsigned long addr, pmd_t pmd, pmd_t *pmdp);
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
@@ -200,9 +200,10 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
 	return 0;
 }
 
-static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-					pmd_t pmd, pmd_t *pmdp)
+static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+					unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
+	return 0;
 }
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f5f37630c54d..5723b551c023 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -18,6 +18,7 @@
 #include <linux/freezer.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
+#include <linux/migrate.h>
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
 #include "internal.h"
@@ -1019,17 +1020,39 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-				pmd_t pmd, pmd_t *pmdp)
+int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
-	struct page *page;
+	struct page *page = NULL;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
+	int target_nid;
 
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_same(pmd, *pmdp)))
 		goto out_unlock;
 
 	page = pmd_page(pmd);
+	get_page(page);
+	spin_unlock(&mm->page_table_lock);
+
+	target_nid = mpol_misplaced(page, vma, haddr);
+	if (target_nid == -1)
+		goto clear_pmdnuma;
+
+	/*
+	 * Due to lacking code to migrate thp pages, we'll split
+	 * (which preserves the special PROT_NONE) and re-take the
+	 * fault on the normal pages.
+	 */
+	split_huge_page(page);
+	put_page(page);
+	return 0;
+
+clear_pmdnuma:
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(pmd, *pmdp)))
+		goto out_unlock;
+
 	pmd = pmd_mknonnuma(pmd);
 	set_pmd_at(mm, haddr, pmdp, pmd);
 	VM_BUG_ON(pmd_numa(*pmdp));
@@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
 
 out_unlock:
 	spin_unlock(&mm->page_table_lock);
+	if (page)
+		put_page(page);
 	return 0;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index e30616f2cc3d..d52542680e10 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,7 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
 {
-	struct page *page;
+	struct page *page = NULL;
 	spinlock_t *ptl;
+	int current_nid, target_nid;
 
 	/*
 	* The "pte" at this point cannot be used safely without
@@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	*/
 	ptl = pte_lockptr(mm, pmd);
 	spin_lock(ptl);
-	if (unlikely(!pte_same(*ptep, pte)))
-		goto out_unlock;
+	if (unlikely(!pte_same(*ptep, pte))) {
+		pte_unmap_unlock(ptep, ptl);
+		goto out;
+	}
+
 	pte = pte_mknonnuma(pte);
 	set_pte_at(mm, addr, ptep, pte);
 	update_mmu_cache(vma, addr, ptep);
@@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return 0;
 	}
 
-out_unlock:
+	get_page(page);
+	current_nid = page_to_nid(page);
+	target_nid = mpol_misplaced(page, vma, addr);
 	pte_unmap_unlock(ptep, ptl);
+	if (target_nid == -1) {
+		/*
+		 * Account for the fault against the current node if it not
+		 * being replaced regardless of where the page is located.
+		 */
+		current_nid = numa_node_id();
+		put_page(page);
+		goto out;
+	}
+
+	/* Migrate to the requested node */
+	if (migrate_misplaced_page(page, target_nid))
+		current_nid = target_nid;
+
+out:
 	return 0;
 }
 
@@ -3655,7 +3677,7 @@ retry:
 		barrier();
 		if (pmd_trans_huge(orig_pmd)) {
 			if (pmd_numa(*pmd))
-				return do_huge_pmd_numa_page(mm, address,
+				return do_huge_pmd_numa_page(mm, vma, address,
 							     orig_pmd, pmd);
 
 			if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {
-- 
cgit v1.2.3


From b24f53a0bea38b266d219ee651b22dba727c44ae Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Thu, 25 Oct 2012 14:16:32 +0200
Subject: mm: mempolicy: Add MPOL_MF_LAZY

NOTE: Once again there is a lot of patch stealing and the end result
	is sufficiently different that I had to drop the signed-offs.
	Will re-add if the original authors are ok with that.

This patch adds another mbind() flag to request "lazy migration".  The
flag, MPOL_MF_LAZY, modifies MPOL_MF_MOVE* such that the selected
pages are marked PROT_NONE. The pages will be migrated in the fault
path on "first touch", if the policy dictates at that time.

"Lazy Migration" will allow testing of migrate-on-fault via mbind().
Also allows applications to specify that only subsequently touched
pages be migrated to obey new policy, instead of all pages in range.
This can be useful for multi-threaded applications working on a
large shared data area that is initialized by an initial thread
resulting in all pages on one [or a few, if overflowed] nodes.
After PROT_NONE, the pages in regions assigned to the worker threads
will be automatically migrated local to the threads on 1st touch.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/mm.h             |   5 ++
 include/uapi/linux/mempolicy.h |  13 ++-
 mm/mempolicy.c                 | 185 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 185 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa1615211159..471185e29bab 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1551,6 +1551,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
 }
 #endif
 
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+void change_prot_numa(struct vm_area_struct *vma,
+			unsigned long start, unsigned long end);
+#endif
+
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 472de8a5d37e..6a1baae3775d 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -49,9 +49,16 @@ enum mpol_rebind_step {
 
 /* Flags for mbind */
 #define MPOL_MF_STRICT	(1<<0)	/* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE	(1<<1)	/* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to mapping */
-#define MPOL_MF_INTERNAL (1<<3)	/* Internal flags start here */
+#define MPOL_MF_MOVE	 (1<<1)	/* Move pages owned by this process to conform
+				   to policy */
+#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to policy */
+#define MPOL_MF_LAZY	 (1<<3)	/* Modifies '_MOVE:  lazy migrate on fault */
+#define MPOL_MF_INTERNAL (1<<4)	/* Internal flags start here */
+
+#define MPOL_MF_VALID	(MPOL_MF_STRICT   | 	\
+			 MPOL_MF_MOVE     | 	\
+			 MPOL_MF_MOVE_ALL |	\
+			 MPOL_MF_LAZY)
 
 /*
  * Internal flags that share the struct mempolicy flags word with
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index df1466d3d2d8..51d3ebd8561e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -90,6 +90,7 @@
 #include <linux/syscalls.h>
 #include <linux/ctype.h>
 #include <linux/mm_inline.h>
+#include <linux/mmu_notifier.h>
 
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -565,6 +566,145 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
 	return 0;
 }
 
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+/*
+ * Here we search for not shared page mappings (mapcount == 1) and we
+ * set up the pmd/pte_numa on those mappings so the very next access
+ * will fire a NUMA hinting page fault.
+ */
+static int
+change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, *_pte;
+	struct page *page;
+	unsigned long _address, end;
+	spinlock_t *ptl;
+	int ret = 0;
+
+	VM_BUG_ON(address & ~PAGE_MASK);
+
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		goto out;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd))
+		goto out;
+
+	if (pmd_trans_huge_lock(pmd, vma) == 1) {
+		int page_nid;
+		ret = HPAGE_PMD_NR;
+
+		VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+		if (pmd_numa(*pmd)) {
+			spin_unlock(&mm->page_table_lock);
+			goto out;
+		}
+
+		page = pmd_page(*pmd);
+
+		/* only check non-shared pages */
+		if (page_mapcount(page) != 1) {
+			spin_unlock(&mm->page_table_lock);
+			goto out;
+		}
+
+		page_nid = page_to_nid(page);
+
+		if (pmd_numa(*pmd)) {
+			spin_unlock(&mm->page_table_lock);
+			goto out;
+		}
+
+		set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
+		ret += HPAGE_PMD_NR;
+		/* defer TLB flush to lower the overhead */
+		spin_unlock(&mm->page_table_lock);
+		goto out;
+	}
+
+	if (pmd_trans_unstable(pmd))
+		goto out;
+	VM_BUG_ON(!pmd_present(*pmd));
+
+	end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
+	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+	for (_address = address, _pte = pte; _address < end;
+	     _pte++, _address += PAGE_SIZE) {
+		pte_t pteval = *_pte;
+		if (!pte_present(pteval))
+			continue;
+		if (pte_numa(pteval))
+			continue;
+		page = vm_normal_page(vma, _address, pteval);
+		if (unlikely(!page))
+			continue;
+		/* only check non-shared pages */
+		if (page_mapcount(page) != 1)
+			continue;
+
+		set_pte_at(mm, _address, _pte, pte_mknuma(pteval));
+
+		/* defer TLB flush to lower the overhead */
+		ret++;
+	}
+	pte_unmap_unlock(pte, ptl);
+
+	if (ret && !pmd_numa(*pmd)) {
+		spin_lock(&mm->page_table_lock);
+		set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
+		spin_unlock(&mm->page_table_lock);
+		/* defer TLB flush to lower the overhead */
+	}
+
+out:
+	return ret;
+}
+
+/* Assumes mmap_sem is held */
+void
+change_prot_numa(struct vm_area_struct *vma,
+			unsigned long address, unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	int progress = 0;
+
+	while (address < end) {
+		VM_BUG_ON(address < vma->vm_start ||
+			  address + PAGE_SIZE > vma->vm_end);
+
+		progress += change_prot_numa_range(mm, vma, address);
+		address = (address + PMD_SIZE) & PMD_MASK;
+	}
+
+	/*
+	 * Flush the TLB for the mm to start the NUMA hinting
+	 * page faults after we finish scanning this vma part
+	 * if there were any PTE updates
+	 */
+	if (progress) {
+		mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
+		flush_tlb_range(vma, address, end);
+		mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
+	}
+}
+#else
+static unsigned long change_prot_numa(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end)
+{
+	return 0;
+}
+#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
+
 /*
  * Check if all pages in a range are on a set of nodes.
  * If pagelist != NULL then isolate pages from the LRU and
@@ -583,22 +723,32 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 		return ERR_PTR(-EFAULT);
 	prev = NULL;
 	for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
+		unsigned long endvma = vma->vm_end;
+
+		if (endvma > end)
+			endvma = end;
+		if (vma->vm_start > start)
+			start = vma->vm_start;
+
 		if (!(flags & MPOL_MF_DISCONTIG_OK)) {
 			if (!vma->vm_next && vma->vm_end < end)
 				return ERR_PTR(-EFAULT);
 			if (prev && prev->vm_end < vma->vm_start)
 				return ERR_PTR(-EFAULT);
 		}
-		if (!is_vm_hugetlb_page(vma) &&
-		    ((flags & MPOL_MF_STRICT) ||
+
+		if (is_vm_hugetlb_page(vma))
+			goto next;
+
+		if (flags & MPOL_MF_LAZY) {
+			change_prot_numa(vma, start, endvma);
+			goto next;
+		}
+
+		if ((flags & MPOL_MF_STRICT) ||
 		     ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-				vma_migratable(vma)))) {
-			unsigned long endvma = vma->vm_end;
+		      vma_migratable(vma))) {
 
-			if (endvma > end)
-				endvma = end;
-			if (vma->vm_start > start)
-				start = vma->vm_start;
 			err = check_pgd_range(vma, start, endvma, nodes,
 						flags, private);
 			if (err) {
@@ -606,6 +756,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 				break;
 			}
 		}
+next:
 		prev = vma;
 	}
 	return first;
@@ -1138,8 +1289,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 	int err;
 	LIST_HEAD(pagelist);
 
-	if (flags & ~(unsigned long)(MPOL_MF_STRICT |
-				     MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+	if (flags & ~(unsigned long)MPOL_MF_VALID)
 		return -EINVAL;
 	if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
 		return -EPERM;
@@ -1162,6 +1312,9 @@ static long do_mbind(unsigned long start, unsigned long len,
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
+	if (flags & MPOL_MF_LAZY)
+		new->flags |= MPOL_F_MOF;
+
 	/*
 	 * If we are using the default policy then operation
 	 * on discontinuous address spaces is okay after all
@@ -1198,13 +1351,15 @@ static long do_mbind(unsigned long start, unsigned long len,
 	vma = check_range(mm, start, end, nmask,
 			  flags | MPOL_MF_INVERT, &pagelist);
 
-	err = PTR_ERR(vma);
-	if (!IS_ERR(vma)) {
-		int nr_failed = 0;
-
+	err = PTR_ERR(vma);	/* maybe ... */
+	if (!IS_ERR(vma) && mode != MPOL_NOOP)
 		err = mbind_range(mm, start, end, new);
 
+	if (!err) {
+		int nr_failed = 0;
+
 		if (!list_empty(&pagelist)) {
+			WARN_ON_ONCE(flags & MPOL_MF_LAZY);
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
 						(unsigned long)vma,
 						false, MIGRATE_SYNC,
@@ -1213,7 +1368,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 				putback_lru_pages(&pagelist);
 		}
 
-		if (!err && nr_failed && (flags & MPOL_MF_STRICT))
+		if (nr_failed && (flags & MPOL_MF_STRICT))
 			err = -EIO;
 	} else
 		putback_lru_pages(&pagelist);
-- 
cgit v1.2.3


From 4b10e7d562c90d0a72f324832c26653947a07381 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 25 Oct 2012 14:16:32 +0200
Subject: mm: mempolicy: Implement change_prot_numa() in terms of
 change_protection()

This patch converts change_prot_numa() to use change_protection(). As
pte_numa and friends check the PTE bits directly it is necessary for
change_protection() to use pmd_mknuma(). Hence the required
modifications to change_protection() are a little clumsy but the
end result is that most of the numa page table helpers are just one or
two instructions.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/huge_mm.h |   3 +-
 include/linux/mm.h      |   4 +-
 mm/huge_memory.c        |  14 ++++-
 mm/mempolicy.c          | 137 +++++-------------------------------------------
 mm/mprotect.c           |  72 +++++++++++++++++++------
 5 files changed, 85 insertions(+), 145 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index dabb5108d6c0..027ad04ef3a8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -27,7 +27,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
 			 unsigned long new_addr, unsigned long old_end,
 			 pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-			unsigned long addr, pgprot_t newprot);
+			unsigned long addr, pgprot_t newprot,
+			int prot_numa);
 
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 471185e29bab..d04c2f0aab36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1080,7 +1080,7 @@ extern unsigned long do_mremap(unsigned long addr,
 			       unsigned long flags, unsigned long new_addr);
 extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 			      unsigned long end, pgprot_t newprot,
-			      int dirty_accountable);
+			      int dirty_accountable, int prot_numa);
 extern int mprotect_fixup(struct vm_area_struct *vma,
 			  struct vm_area_struct **pprev, unsigned long start,
 			  unsigned long end, unsigned long newflags);
@@ -1552,7 +1552,7 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
 #endif
 
 #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
-void change_prot_numa(struct vm_area_struct *vma,
+unsigned long change_prot_numa(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end);
 #endif
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5723b551c023..d79f7a55bf6f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1147,7 +1147,7 @@ out:
 }
 
 int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-		unsigned long addr, pgprot_t newprot)
+		unsigned long addr, pgprot_t newprot, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	int ret = 0;
@@ -1155,7 +1155,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 	if (__pmd_trans_huge_lock(pmd, vma) == 1) {
 		pmd_t entry;
 		entry = pmdp_get_and_clear(mm, addr, pmd);
-		entry = pmd_modify(entry, newprot);
+		if (!prot_numa)
+			entry = pmd_modify(entry, newprot);
+		else {
+			struct page *page = pmd_page(*pmd);
+
+			/* only check non-shared pages */
+			if (page_mapcount(page) == 1 &&
+			    !pmd_numa(*pmd)) {
+				entry = pmd_mknuma(entry);
+			}
+		}
 		set_pmd_at(mm, addr, pmd, entry);
 		spin_unlock(&vma->vm_mm->page_table_lock);
 		ret = 1;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 51d3ebd8561e..75d4600a5e92 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -568,134 +568,23 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
 
 #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
 /*
- * Here we search for not shared page mappings (mapcount == 1) and we
- * set up the pmd/pte_numa on those mappings so the very next access
- * will fire a NUMA hinting page fault.
+ * This is used to mark a range of virtual addresses to be inaccessible.
+ * These are later cleared by a NUMA hinting fault. Depending on these
+ * faults, pages may be migrated for better NUMA placement.
+ *
+ * This is assuming that NUMA faults are handled using PROT_NONE. If
+ * an architecture makes a different choice, it will need further
+ * changes to the core.
  */
-static int
-change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma,
-			unsigned long address)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte, *_pte;
-	struct page *page;
-	unsigned long _address, end;
-	spinlock_t *ptl;
-	int ret = 0;
-
-	VM_BUG_ON(address & ~PAGE_MASK);
-
-	pgd = pgd_offset(mm, address);
-	if (!pgd_present(*pgd))
-		goto out;
-
-	pud = pud_offset(pgd, address);
-	if (!pud_present(*pud))
-		goto out;
-
-	pmd = pmd_offset(pud, address);
-	if (pmd_none(*pmd))
-		goto out;
-
-	if (pmd_trans_huge_lock(pmd, vma) == 1) {
-		int page_nid;
-		ret = HPAGE_PMD_NR;
-
-		VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
-		if (pmd_numa(*pmd)) {
-			spin_unlock(&mm->page_table_lock);
-			goto out;
-		}
-
-		page = pmd_page(*pmd);
-
-		/* only check non-shared pages */
-		if (page_mapcount(page) != 1) {
-			spin_unlock(&mm->page_table_lock);
-			goto out;
-		}
-
-		page_nid = page_to_nid(page);
-
-		if (pmd_numa(*pmd)) {
-			spin_unlock(&mm->page_table_lock);
-			goto out;
-		}
-
-		set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
-		ret += HPAGE_PMD_NR;
-		/* defer TLB flush to lower the overhead */
-		spin_unlock(&mm->page_table_lock);
-		goto out;
-	}
-
-	if (pmd_trans_unstable(pmd))
-		goto out;
-	VM_BUG_ON(!pmd_present(*pmd));
-
-	end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
-	pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-	for (_address = address, _pte = pte; _address < end;
-	     _pte++, _address += PAGE_SIZE) {
-		pte_t pteval = *_pte;
-		if (!pte_present(pteval))
-			continue;
-		if (pte_numa(pteval))
-			continue;
-		page = vm_normal_page(vma, _address, pteval);
-		if (unlikely(!page))
-			continue;
-		/* only check non-shared pages */
-		if (page_mapcount(page) != 1)
-			continue;
-
-		set_pte_at(mm, _address, _pte, pte_mknuma(pteval));
-
-		/* defer TLB flush to lower the overhead */
-		ret++;
-	}
-	pte_unmap_unlock(pte, ptl);
-
-	if (ret && !pmd_numa(*pmd)) {
-		spin_lock(&mm->page_table_lock);
-		set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
-		spin_unlock(&mm->page_table_lock);
-		/* defer TLB flush to lower the overhead */
-	}
-
-out:
-	return ret;
-}
-
-/* Assumes mmap_sem is held */
-void
-change_prot_numa(struct vm_area_struct *vma,
-			unsigned long address, unsigned long end)
+unsigned long change_prot_numa(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long end)
 {
-	struct mm_struct *mm = vma->vm_mm;
-	int progress = 0;
-
-	while (address < end) {
-		VM_BUG_ON(address < vma->vm_start ||
-			  address + PAGE_SIZE > vma->vm_end);
+	int nr_updated;
+	BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
 
-		progress += change_prot_numa_range(mm, vma, address);
-		address = (address + PMD_SIZE) & PMD_MASK;
-	}
+	nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
 
-	/*
-	 * Flush the TLB for the mm to start the NUMA hinting
-	 * page faults after we finish scanning this vma part
-	 * if there were any PTE updates
-	 */
-	if (progress) {
-		mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
-		flush_tlb_range(vma, address, end);
-		mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
-	}
+	return nr_updated;
 }
 #else
 static unsigned long change_prot_numa(struct vm_area_struct *vma,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 7c3628a8b486..7ef6ae964e8f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -35,10 +35,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 }
 #endif
 
-static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte, oldpte;
 	spinlock_t *ptl;
 	unsigned long pages = 0;
@@ -49,19 +50,39 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		oldpte = *pte;
 		if (pte_present(oldpte)) {
 			pte_t ptent;
+			bool updated = false;
 
 			ptent = ptep_modify_prot_start(mm, addr, pte);
-			ptent = pte_modify(ptent, newprot);
+			if (!prot_numa) {
+				ptent = pte_modify(ptent, newprot);
+				updated = true;
+			} else {
+				struct page *page;
+
+				page = vm_normal_page(vma, addr, oldpte);
+				if (page) {
+					/* only check non-shared pages */
+					if (!pte_numa(oldpte) &&
+					    page_mapcount(page) == 1) {
+						ptent = pte_mknuma(ptent);
+						updated = true;
+					}
+				}
+			}
 
 			/*
 			 * Avoid taking write faults for pages we know to be
 			 * dirty.
 			 */
-			if (dirty_accountable && pte_dirty(ptent))
+			if (dirty_accountable && pte_dirty(ptent)) {
 				ptent = pte_mkwrite(ptent);
+				updated = true;
+			}
+
+			if (updated)
+				pages++;
 
 			ptep_modify_prot_commit(mm, addr, pte, ptent);
-			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 
@@ -83,9 +104,25 @@ static unsigned long change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	return pages;
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmd)
+{
+	spin_lock(&mm->page_table_lock);
+	set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
+	spin_unlock(&mm->page_table_lock);
+}
+#else
+static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmd)
+{
+	BUG();
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -97,7 +134,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
 		if (pmd_trans_huge(*pmd)) {
 			if (next - addr != HPAGE_PMD_SIZE)
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			else if (change_huge_pmd(vma, pmd, addr, newprot)) {
+			else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) {
 				pages += HPAGE_PMD_NR;
 				continue;
 			}
@@ -105,8 +142,11 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
 		}
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		pages += change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
-				 dirty_accountable);
+		pages += change_pte_range(vma, pmd, addr, next, newprot,
+				 dirty_accountable, prot_numa);
+
+		if (prot_numa)
+			change_pmd_protnuma(vma->vm_mm, addr, pmd);
 	} while (pmd++, addr = next, addr != end);
 
 	return pages;
@@ -114,7 +154,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *
 
 static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -126,7 +166,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
 		if (pud_none_or_clear_bad(pud))
 			continue;
 		pages += change_pmd_range(vma, pud, addr, next, newprot,
-				 dirty_accountable);
+				 dirty_accountable, prot_numa);
 	} while (pud++, addr = next, addr != end);
 
 	return pages;
@@ -134,7 +174,7 @@ static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *
 
 static unsigned long change_protection_range(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long end, pgprot_t newprot,
-		int dirty_accountable)
+		int dirty_accountable, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
@@ -150,7 +190,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
 		pages += change_pud_range(vma, pgd, addr, next, newprot,
-				 dirty_accountable);
+				 dirty_accountable, prot_numa);
 	} while (pgd++, addr = next, addr != end);
 
 	/* Only flush the TLB if we actually modified any entries: */
@@ -162,7 +202,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 
 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 		       unsigned long end, pgprot_t newprot,
-		       int dirty_accountable)
+		       int dirty_accountable, int prot_numa)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long pages;
@@ -171,7 +211,7 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 	if (is_vm_hugetlb_page(vma))
 		pages = hugetlb_change_protection(vma, start, end, newprot);
 	else
-		pages = change_protection_range(vma, start, end, newprot, dirty_accountable);
+		pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
 	mmu_notifier_invalidate_range_end(mm, start, end);
 
 	return pages;
@@ -249,7 +289,7 @@ success:
 		dirty_accountable = 1;
 	}
 
-	change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable);
+	change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0);
 
 	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
 	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
-- 
cgit v1.2.3


From cbee9f88ec1b8dd6b58f25f54e4f52c82ed77690 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Oct 2012 14:16:43 +0200
Subject: mm: numa: Add fault driven placement and migration

NOTE: This patch is based on "sched, numa, mm: Add fault driven
	placement and migration policy" but as it throws away all the policy
	to just leave a basic foundation I had to drop the signed-offs-by.

This patch creates a bare-bones method for setting PTEs pte_numa in the
context of the scheduler that when faulted later will be faulted onto the
node the CPU is running on.  In itself this does nothing useful but any
placement policy will fundamentally depend on receiving hints on placement
from fault context and doing something intelligent about it.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
---
 arch/sh/mm/Kconfig       |   1 +
 arch/x86/Kconfig         |   2 +
 include/linux/mm_types.h |  11 +++++
 include/linux/sched.h    |  20 ++++++++
 kernel/sched/core.c      |  13 +++++
 kernel/sched/fair.c      | 125 +++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/features.h  |   7 +++
 kernel/sched/sched.h     |   6 +++
 kernel/sysctl.c          |  24 ++++++++-
 mm/huge_memory.c         |   5 +-
 mm/memory.c              |  14 +++++-
 11 files changed, 224 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index cb8f9920f4dd..0f7c852f355c 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -111,6 +111,7 @@ config VSYSCALL
 config NUMA
 	bool "Non Uniform Memory Access (NUMA) Support"
 	depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL
+	select ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	default n
 	help
 	  Some SH systems have many various memories scattered around
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 46c3bff3ced2..1137028fc6d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,8 @@ config X86
 	def_bool y
 	select HAVE_AOUT if X86_32
 	select HAVE_UNSTABLE_SCHED_CLOCK
+	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_WANTS_PROT_NUMA_PROT_NONE
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_PCSPKR_PLATFORM
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 31f8a3af7d94..ed8638c29b3e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -397,6 +397,17 @@ struct mm_struct {
 #endif
 #ifdef CONFIG_CPUMASK_OFFSTACK
 	struct cpumask cpumask_allocation;
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+	/*
+	 * numa_next_scan is the next time when the PTEs will me marked
+	 * pte_numa to gather statistics and migrate pages to new nodes
+	 * if necessary
+	 */
+	unsigned long numa_next_scan;
+
+	/* numa_scan_seq prevents two threads setting pte_numa */
+	int numa_scan_seq;
 #endif
 	struct uprobes_state uprobes_state;
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a02df2e..844af5b12cb2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1479,6 +1479,14 @@ struct task_struct {
 	short il_next;
 	short pref_node_fork;
 #endif
+#ifdef CONFIG_NUMA_BALANCING
+	int numa_scan_seq;
+	int numa_migrate_seq;
+	unsigned int numa_scan_period;
+	u64 node_stamp;			/* migration stamp  */
+	struct callback_head numa_work;
+#endif /* CONFIG_NUMA_BALANCING */
+
 	struct rcu_head rcu;
 
 	/*
@@ -1553,6 +1561,14 @@ struct task_struct {
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
+#ifdef CONFIG_NUMA_BALANCING
+extern void task_numa_fault(int node, int pages);
+#else
+static inline void task_numa_fault(int node, int pages)
+{
+}
+#endif
+
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
@@ -1990,6 +2006,10 @@ enum sched_tunable_scaling {
 };
 extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
 #ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927fda712..cad0d092ce3b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1533,6 +1533,19 @@ static void __sched_fork(struct task_struct *p)
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
 #endif
+
+#ifdef CONFIG_NUMA_BALANCING
+	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
+		p->mm->numa_next_scan = jiffies;
+		p->mm->numa_scan_seq = 0;
+	}
+
+	p->node_stamp = 0ULL;
+	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
+	p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
+	p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+	p->numa_work.next = &p->numa_work;
+#endif /* CONFIG_NUMA_BALANCING */
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6b800a14b990..6831abb5dbef 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -26,6 +26,8 @@
 #include <linux/slab.h>
 #include <linux/profile.h>
 #include <linux/interrupt.h>
+#include <linux/mempolicy.h>
+#include <linux/task_work.h>
 
 #include <trace/events/sched.h>
 
@@ -776,6 +778,126 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * numa task sample period in ms: 5s
+ */
+unsigned int sysctl_numa_balancing_scan_period_min = 5000;
+unsigned int sysctl_numa_balancing_scan_period_max = 5000*16;
+
+static void task_numa_placement(struct task_struct *p)
+{
+	int seq = ACCESS_ONCE(p->mm->numa_scan_seq);
+
+	if (p->numa_scan_seq == seq)
+		return;
+	p->numa_scan_seq = seq;
+
+	/* FIXME: Scheduling placement policy hints go here */
+}
+
+/*
+ * Got a PROT_NONE fault for a page on @node.
+ */
+void task_numa_fault(int node, int pages)
+{
+	struct task_struct *p = current;
+
+	/* FIXME: Allocate task-specific structure for placement policy here */
+
+	task_numa_placement(p);
+}
+
+/*
+ * The expensive part of numa migration is done from task_work context.
+ * Triggered from task_tick_numa().
+ */
+void task_numa_work(struct callback_head *work)
+{
+	unsigned long migrate, next_scan, now = jiffies;
+	struct task_struct *p = current;
+	struct mm_struct *mm = p->mm;
+
+	WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
+
+	work->next = work; /* protect against double add */
+	/*
+	 * Who cares about NUMA placement when they're dying.
+	 *
+	 * NOTE: make sure not to dereference p->mm before this check,
+	 * exit_task_work() happens _after_ exit_mm() so we could be called
+	 * without p->mm even though we still had it when we enqueued this
+	 * work.
+	 */
+	if (p->flags & PF_EXITING)
+		return;
+
+	/*
+	 * Enforce maximal scan/migration frequency..
+	 */
+	migrate = mm->numa_next_scan;
+	if (time_before(now, migrate))
+		return;
+
+	if (p->numa_scan_period == 0)
+		p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+
+	next_scan = now + 2*msecs_to_jiffies(p->numa_scan_period);
+	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
+		return;
+
+	ACCESS_ONCE(mm->numa_scan_seq)++;
+	{
+		struct vm_area_struct *vma;
+
+		down_read(&mm->mmap_sem);
+		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			if (!vma_migratable(vma))
+				continue;
+			change_prot_numa(vma, vma->vm_start, vma->vm_end);
+		}
+		up_read(&mm->mmap_sem);
+	}
+}
+
+/*
+ * Drive the periodic memory faults..
+ */
+void task_tick_numa(struct rq *rq, struct task_struct *curr)
+{
+	struct callback_head *work = &curr->numa_work;
+	u64 period, now;
+
+	/*
+	 * We don't care about NUMA placement if we don't have memory.
+	 */
+	if (!curr->mm || (curr->flags & PF_EXITING) || work->next != work)
+		return;
+
+	/*
+	 * Using runtime rather than walltime has the dual advantage that
+	 * we (mostly) drive the selection from busy threads and that the
+	 * task needs to have done some actual work before we bother with
+	 * NUMA placement.
+	 */
+	now = curr->se.sum_exec_runtime;
+	period = (u64)curr->numa_scan_period * NSEC_PER_MSEC;
+
+	if (now - curr->node_stamp > period) {
+		curr->node_stamp = now;
+
+		if (!time_before(jiffies, curr->mm->numa_next_scan)) {
+			init_task_work(work, task_numa_work); /* TODO: move this into sched_fork() */
+			task_work_add(curr, work, true);
+		}
+	}
+}
+#else
+static void task_tick_numa(struct rq *rq, struct task_struct *curr)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 static void
 account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
@@ -4954,6 +5076,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 		cfs_rq = cfs_rq_of(se);
 		entity_tick(cfs_rq, se, queued);
 	}
+
+	if (sched_feat_numa(NUMA))
+		task_tick_numa(rq, curr);
 }
 
 /*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index eebefcad7027..5fb7aefbec80 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -61,3 +61,10 @@ SCHED_FEAT(TTWU_QUEUE, true)
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
+
+/*
+ * Apply the automatic NUMA scheduling policy
+ */
+#ifdef CONFIG_NUMA_BALANCING
+SCHED_FEAT(NUMA,	true)
+#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7a7db09cfabc..ae31c051ff2f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -648,6 +648,12 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
 #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
 #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
 
+#ifdef CONFIG_NUMA_BALANCING
+#define sched_feat_numa(x) sched_feat(x)
+#else
+#define sched_feat_numa(x) (0)
+#endif
+
 static inline u64 global_rt_period(void)
 {
 	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 26f65eaa01f9..025e1ae50ef1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -256,9 +256,11 @@ static int min_sched_granularity_ns = 100000;		/* 100 usecs */
 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
 static int min_wakeup_granularity_ns;			/* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
+#ifdef CONFIG_SMP
 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-#endif
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_SCHED_DEBUG */
 
 #ifdef CONFIG_COMPACTION
 static int min_extfrag_threshold;
@@ -301,6 +303,7 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
+#ifdef CONFIG_SMP
 	{
 		.procname	= "sched_tunable_scaling",
 		.data		= &sysctl_sched_tunable_scaling,
@@ -347,7 +350,24 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-#endif
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_NUMA_BALANCING
+	{
+		.procname	= "numa_balancing_scan_period_min_ms",
+		.data		= &sysctl_numa_balancing_scan_period_min,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_max_ms",
+		.data		= &sysctl_numa_balancing_scan_period_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
 	{
 		.procname	= "sched_rt_period_us",
 		.data		= &sysctl_sched_rt_period,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d79f7a55bf6f..ee8133794a56 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1046,6 +1046,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	split_huge_page(page);
 	put_page(page);
+
 	return 0;
 
 clear_pmdnuma:
@@ -1060,8 +1061,10 @@ clear_pmdnuma:
 
 out_unlock:
 	spin_unlock(&mm->page_table_lock);
-	if (page)
+	if (page) {
 		put_page(page);
+		task_numa_fault(numa_node_id(), HPAGE_PMD_NR);
+	}
 	return 0;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index d52542680e10..8012c1907895 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3454,7 +3454,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct page *page = NULL;
 	spinlock_t *ptl;
-	int current_nid, target_nid;
+	int current_nid = -1;
+	int target_nid;
 
 	/*
 	* The "pte" at this point cannot be used safely without
@@ -3501,6 +3502,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		current_nid = target_nid;
 
 out:
+	task_numa_fault(current_nid, 1);
 	return 0;
 }
 
@@ -3537,6 +3539,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
 		pte_t pteval = *pte;
 		struct page *page;
+		int curr_nid;
 		if (!pte_present(pteval))
 			continue;
 		if (!pte_numa(pteval))
@@ -3554,6 +3557,15 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		page = vm_normal_page(vma, addr, pteval);
 		if (unlikely(!page))
 			continue;
+		/* only check non-shared pages */
+		if (unlikely(page_mapcount(page) != 1))
+			continue;
+		pte_unmap_unlock(pte, ptl);
+
+		curr_nid = page_to_nid(page);
+		task_numa_fault(curr_nid, 1);
+
+		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
 	pte_unmap_unlock(orig_pte, ptl);
 
-- 
cgit v1.2.3


From 6e5fb223e89dbe5cb5c563f8d4a4a0a7d62455a8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Oct 2012 14:16:45 +0200
Subject: mm: sched: numa: Implement constant, per task Working Set Sampling
 (WSS) rate

Previously, to probe the working set of a task, we'd use
a very simple and crude method: mark all of its address
space PROT_NONE.

That method has various (obvious) disadvantages:

 - it samples the working set at dissimilar rates,
   giving some tasks a sampling quality advantage
   over others.

 - creates performance problems for tasks with very
   large working sets

 - over-samples processes with large address spaces but
   which only very rarely execute

Improve that method by keeping a rotating offset into the
address space that marks the current position of the scan,
and advance it by a constant rate (in a CPU cycles execution
proportional manner). If the offset reaches the last mapped
address of the mm then it then it starts over at the first
address.

The per-task nature of the working set sampling functionality in this tree
allows such constant rate, per task, execution-weight proportional sampling
of the working set, with an adaptive sampling interval/frequency that
goes from once per 100ms up to just once per 8 seconds.  The current
sampling volume is 256 MB per interval.

As tasks mature and converge their working set, so does the
sampling rate slow down to just a trickle, 256 MB per 8
seconds of CPU time executed.

This, beyond being adaptive, also rate-limits rarely
executing systems and does not over-sample on overloaded
systems.

[ In AutoNUMA speak, this patch deals with the effective sampling
  rate of the 'hinting page fault'. AutoNUMA's scanning is
  currently rate-limited, but it is also fundamentally
  single-threaded, executing in the knuma_scand kernel thread,
  so the limit in AutoNUMA is global and does not scale up with
  the number of CPUs, nor does it scan tasks in an execution
  proportional manner.

  So the idea of rate-limiting the scanning was first implemented
  in the AutoNUMA tree via a global rate limit. This patch goes
  beyond that by implementing an execution rate proportional
  working set sampling rate that is not implemented via a single
  global scanning daemon. ]

[ Dan Carpenter pointed out a possible NULL pointer dereference in the
  first version of this patch. ]

Based-on-idea-by: Andrea Arcangeli <aarcange@redhat.com>
Bug-Found-By: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
[ Wrote changelog and fixed bug. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/mm_types.h |  3 +++
 include/linux/sched.h    |  1 +
 kernel/sched/fair.c      | 65 ++++++++++++++++++++++++++++++++++++++----------
 kernel/sysctl.c          |  7 ++++++
 4 files changed, 63 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ed8638c29b3e..d1e246c5e50c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -406,6 +406,9 @@ struct mm_struct {
 	 */
 	unsigned long numa_next_scan;
 
+	/* Restart point for scanning and setting pte_numa */
+	unsigned long numa_scan_offset;
+
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 844af5b12cb2..37841958d234 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2008,6 +2008,7 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 
 extern unsigned int sysctl_numa_balancing_scan_period_min;
 extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_numa_balancing_settle_count;
 
 #ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6831abb5dbef..0a349dd1fa60 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -780,10 +780,13 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 #ifdef CONFIG_NUMA_BALANCING
 /*
- * numa task sample period in ms: 5s
+ * numa task sample period in ms
  */
-unsigned int sysctl_numa_balancing_scan_period_min = 5000;
-unsigned int sysctl_numa_balancing_scan_period_max = 5000*16;
+unsigned int sysctl_numa_balancing_scan_period_min = 100;
+unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
+
+/* Portion of address space to scan in MB */
+unsigned int sysctl_numa_balancing_scan_size = 256;
 
 static void task_numa_placement(struct task_struct *p)
 {
@@ -808,6 +811,12 @@ void task_numa_fault(int node, int pages)
 	task_numa_placement(p);
 }
 
+static void reset_ptenuma_scan(struct task_struct *p)
+{
+	ACCESS_ONCE(p->mm->numa_scan_seq)++;
+	p->mm->numa_scan_offset = 0;
+}
+
 /*
  * The expensive part of numa migration is done from task_work context.
  * Triggered from task_tick_numa().
@@ -817,6 +826,9 @@ void task_numa_work(struct callback_head *work)
 	unsigned long migrate, next_scan, now = jiffies;
 	struct task_struct *p = current;
 	struct mm_struct *mm = p->mm;
+	struct vm_area_struct *vma;
+	unsigned long offset, end;
+	long length;
 
 	WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
 
@@ -846,18 +858,45 @@ void task_numa_work(struct callback_head *work)
 	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
 		return;
 
-	ACCESS_ONCE(mm->numa_scan_seq)++;
-	{
-		struct vm_area_struct *vma;
+	offset = mm->numa_scan_offset;
+	length = sysctl_numa_balancing_scan_size;
+	length <<= 20;
 
-		down_read(&mm->mmap_sem);
-		for (vma = mm->mmap; vma; vma = vma->vm_next) {
-			if (!vma_migratable(vma))
-				continue;
-			change_prot_numa(vma, vma->vm_start, vma->vm_end);
-		}
-		up_read(&mm->mmap_sem);
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, offset);
+	if (!vma) {
+		reset_ptenuma_scan(p);
+		offset = 0;
+		vma = mm->mmap;
+	}
+	for (; vma && length > 0; vma = vma->vm_next) {
+		if (!vma_migratable(vma))
+			continue;
+
+		/* Skip small VMAs. They are not likely to be of relevance */
+		if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < HPAGE_PMD_NR)
+			continue;
+
+		offset = max(offset, vma->vm_start);
+		end = min(ALIGN(offset + length, HPAGE_SIZE), vma->vm_end);
+		length -= end - offset;
+
+		change_prot_numa(vma, offset, end);
+
+		offset = end;
 	}
+
+	/*
+	 * It is possible to reach the end of the VMA list but the last few VMAs are
+	 * not guaranteed to the vma_migratable. If they are not, we would find the
+	 * !migratable VMA on the next scan but not reset the scanner to the start
+	 * so check it now.
+	 */
+	if (vma)
+		mm->numa_scan_offset = offset;
+	else
+		reset_ptenuma_scan(p);
+	up_read(&mm->mmap_sem);
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 025e1ae50ef1..7d3a2e0475e5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -366,6 +366,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "numa_balancing_scan_size_mb",
+		.data		= &sysctl_numa_balancing_scan_size,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_SCHED_DEBUG */
 	{
-- 
cgit v1.2.3


From 4b96a29ba891dd59734cb7be80a900fe93aa2d9f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Oct 2012 14:16:47 +0200
Subject: mm: sched: numa: Implement slow start for working set sampling

Add a 1 second delay before starting to scan the working set of
a task and starting to balance it amongst nodes.

[ note that before the constant per task WSS sampling rate patch
  the initial scan would happen much later still, in effect that
  patch caused this regression. ]

The theory is that short-run tasks benefit very little from NUMA
placement: they come and go, and they better stick to the node
they were started on. As tasks mature and rebalance to other CPUs
and nodes, so does their NUMA placement have to change and so
does it start to matter more and more.

In practice this change fixes an observable kbuild regression:

   # [ a perf stat --null --repeat 10 test of ten bzImage builds to /dev/shm ]

   !NUMA:
   45.291088843 seconds time elapsed                                          ( +-  0.40% )
   45.154231752 seconds time elapsed                                          ( +-  0.36% )

   +NUMA, no slow start:
   46.172308123 seconds time elapsed                                          ( +-  0.30% )
   46.343168745 seconds time elapsed                                          ( +-  0.25% )

   +NUMA, 1 sec slow start:
   45.224189155 seconds time elapsed                                          ( +-  0.25% )
   45.160866532 seconds time elapsed                                          ( +-  0.17% )

and it also fixes an observable perf bench (hackbench) regression:

   # perf stat --null --repeat 10 perf bench sched messaging

   -NUMA:

   -NUMA:                  0.246225691 seconds time elapsed                   ( +-  1.31% )
   +NUMA no slow start:    0.252620063 seconds time elapsed                   ( +-  1.13% )

   +NUMA 1sec delay:       0.248076230 seconds time elapsed                   ( +-  1.35% )

The implementation is simple and straightforward, most of the patch
deals with adding the /proc/sys/kernel/numa_balancing_scan_delay_ms tunable
knob.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
[ Wrote the changelog, ran measurements, tuned the default. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/sched.h | 1 +
 kernel/sched/core.c   | 2 +-
 kernel/sched/fair.c   | 5 +++++
 kernel/sysctl.c       | 7 +++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 37841958d234..7d95a232b5b9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2006,6 +2006,7 @@ enum sched_tunable_scaling {
 };
 extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 
+extern unsigned int sysctl_numa_balancing_scan_delay;
 extern unsigned int sysctl_numa_balancing_scan_period_min;
 extern unsigned int sysctl_numa_balancing_scan_period_max;
 extern unsigned int sysctl_numa_balancing_scan_size;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cad0d092ce3b..fbfc4843063f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1543,7 +1543,7 @@ static void __sched_fork(struct task_struct *p)
 	p->node_stamp = 0ULL;
 	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
 	p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0;
-	p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
 	p->numa_work.next = &p->numa_work;
 #endif /* CONFIG_NUMA_BALANCING */
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f6e1f25ed2bd..7727b0161579 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -788,6 +788,9 @@ unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
 /* Portion of address space to scan in MB */
 unsigned int sysctl_numa_balancing_scan_size = 256;
 
+/* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */
+unsigned int sysctl_numa_balancing_scan_delay = 1000;
+
 static void task_numa_placement(struct task_struct *p)
 {
 	int seq = ACCESS_ONCE(p->mm->numa_scan_seq);
@@ -929,6 +932,8 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
 	period = (u64)curr->numa_scan_period * NSEC_PER_MSEC;
 
 	if (now - curr->node_stamp > period) {
+		if (!curr->node_stamp)
+			curr->numa_scan_period = sysctl_numa_balancing_scan_period_min;
 		curr->node_stamp = now;
 
 		if (!time_before(jiffies, curr->mm->numa_next_scan)) {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7d3a2e0475e5..48a68cc258c1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -352,6 +352,13 @@ static struct ctl_table kern_table[] = {
 	},
 #endif /* CONFIG_SMP */
 #ifdef CONFIG_NUMA_BALANCING
+	{
+		.procname	= "numa_balancing_scan_delay_ms",
+		.data		= &sysctl_numa_balancing_scan_delay,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "numa_balancing_scan_period_min_ms",
 		.data		= &sysctl_numa_balancing_scan_period_min,
-- 
cgit v1.2.3


From 03c5a6e16322c997bf8f264851bfa3f532ad515f Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 2 Nov 2012 14:52:48 +0000
Subject: mm: numa: Add pte updates, hinting and migration stats

It is tricky to quantify the basic cost of automatic NUMA placement in a
meaningful manner. This patch adds some vmstats that can be used as part
of a basic costing model.

u    = basic unit = sizeof(void *)
Ca   = cost of struct page access = sizeof(struct page) / u
Cpte = Cost PTE access = Ca
Cupdate = Cost PTE update = (2 * Cpte) + (2 * Wlock)
	where Cpte is incurred twice for a read and a write and Wlock
	is a constant representing the cost of taking or releasing a
	lock
Cnumahint = Cost of a minor page fault = some high constant e.g. 1000
Cpagerw = Cost to read or write a full page = Ca + PAGE_SIZE/u
Ci = Cost of page isolation = Ca + Wi
	where Wi is a constant that should reflect the approximate cost
	of the locking operation
Cpagecopy = Cpagerw + (Cpagerw * Wnuma) + Ci + (Ci * Wnuma)
	where Wnuma is the approximate NUMA factor. 1 is local. 1.2
	would imply that remote accesses are 20% more expensive

Balancing cost = Cpte * numa_pte_updates +
		Cnumahint * numa_hint_faults +
		Ci * numa_pages_migrated +
		Cpagecopy * numa_pages_migrated

Note that numa_pages_migrated is used as a measure of how many pages
were isolated even though it would miss pages that failed to migrate. A
vmstat counter could have been added for it but the isolation cost is
pretty marginal in comparison to the overall cost so it seemed overkill.

The ideal way to measure automatic placement benefit would be to count
the number of remote accesses versus local accesses and do something like

	benefit = (remote_accesses_before - remove_access_after) * Wnuma

but the information is not readily available. As a workload converges, the
expection would be that the number of remote numa hints would reduce to 0.

	convergence = numa_hint_faults_local / numa_hint_faults
		where this is measured for the last N number of
		numa hints recorded. When the workload is fully
		converged the value is 1.

This can measure if the placement policy is converging and how fast it is
doing it.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
---
 include/linux/vm_event_item.h |  6 ++++++
 include/linux/vmstat.h        |  8 ++++++++
 mm/huge_memory.c              |  5 +++++
 mm/memory.c                   | 12 ++++++++++++
 mm/mempolicy.c                |  2 ++
 mm/migrate.c                  |  3 ++-
 mm/vmstat.c                   |  6 ++++++
 7 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index a1f750b8e72a..55600049e794 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -38,6 +38,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
 		KSWAPD_SKIP_CONGESTION_WAIT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+#ifdef CONFIG_NUMA_BALANCING
+		NUMA_PTE_UPDATES,
+		NUMA_HINT_FAULTS,
+		NUMA_HINT_FAULTS_LOCAL,
+		NUMA_PAGE_MIGRATE,
+#endif
 #ifdef CONFIG_MIGRATION
 		PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
 #endif
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 92a86b2cce33..a13291f7da88 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -80,6 +80,14 @@ static inline void vm_events_fold_cpu(int cpu)
 
 #endif /* CONFIG_VM_EVENT_COUNTERS */
 
+#ifdef CONFIG_NUMA_BALANCING
+#define count_vm_numa_event(x)     count_vm_event(x)
+#define count_vm_numa_events(x, y) count_vm_events(x, y)
+#else
+#define count_vm_numa_event(x) do {} while (0)
+#define count_vm_numa_events(x, y) do {} while (0)
+#endif /* CONFIG_NUMA_BALANCING */
+
 #define __count_zone_vm_events(item, zone, delta) \
 		__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
 		zone_idx(zone), delta)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ee8133794a56..f3a477fffd09 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1026,6 +1026,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct page *page = NULL;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
 	int target_nid;
+	int current_nid = -1;
 
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,6 +1035,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	page = pmd_page(pmd);
 	get_page(page);
 	spin_unlock(&mm->page_table_lock);
+	current_nid = page_to_nid(page);
+	count_vm_numa_event(NUMA_HINT_FAULTS);
+	if (current_nid == numa_node_id())
+		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
 
 	target_nid = mpol_misplaced(page, vma, haddr);
 	if (target_nid == -1)
diff --git a/mm/memory.c b/mm/memory.c
index 8012c1907895..8a7b4ccbe136 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3477,6 +3477,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	set_pte_at(mm, addr, ptep, pte);
 	update_mmu_cache(vma, addr, ptep);
 
+	count_vm_numa_event(NUMA_HINT_FAULTS);
 	page = vm_normal_page(vma, addr, pte);
 	if (!page) {
 		pte_unmap_unlock(ptep, ptl);
@@ -3485,6 +3486,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	get_page(page);
 	current_nid = page_to_nid(page);
+	if (current_nid == numa_node_id())
+		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
 	target_nid = mpol_misplaced(page, vma, addr);
 	pte_unmap_unlock(ptep, ptl);
 	if (target_nid == -1) {
@@ -3517,6 +3520,9 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long offset;
 	spinlock_t *ptl;
 	bool numa = false;
+	int local_nid = numa_node_id();
+	unsigned long nr_faults = 0;
+	unsigned long nr_faults_local = 0;
 
 	spin_lock(&mm->page_table_lock);
 	pmd = *pmdp;
@@ -3565,10 +3571,16 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		curr_nid = page_to_nid(page);
 		task_numa_fault(curr_nid, 1);
 
+		nr_faults++;
+		if (curr_nid == local_nid)
+			nr_faults_local++;
+
 		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
 	pte_unmap_unlock(orig_pte, ptl);
 
+	count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults);
+	count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local);
 	return 0;
 }
 #else
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a7a62fe7c280..516491fbfaa8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -583,6 +583,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
 	BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
 
 	nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
+	if (nr_updated)
+		count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
 
 	return nr_updated;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index c7d550011a64..23bba5d6edff 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1514,7 +1514,8 @@ int migrate_misplaced_page(struct page *page, int node)
 		if (nr_remaining) {
 			putback_lru_pages(&migratepages);
 			isolated = 0;
-		}
+		} else
+			count_vm_numa_event(NUMA_PAGE_MIGRATE);
 	}
 	BUG_ON(!list_empty(&migratepages));
 out:
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 3a067fabe190..c0f1f6db5182 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -774,6 +774,12 @@ const char * const vmstat_text[] = {
 
 	"pgrotated",
 
+#ifdef CONFIG_NUMA_BALANCING
+	"numa_pte_updates",
+	"numa_hint_faults",
+	"numa_hint_faults_local",
+	"numa_pages_migrated",
+#endif
 #ifdef CONFIG_MIGRATION
 	"pgmigrate_success",
 	"pgmigrate_fail",
-- 
cgit v1.2.3


From 8177a420ed7c16c171ed3c3aec5b0676db38c247 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 23 Mar 2012 20:56:34 +0100
Subject: mm: numa: Structures for Migrate On Fault per NUMA migration rate
 limiting

This defines the per-node data used by Migrate On Fault in order to
rate limit the migration. The rate limiting is applied independently
to each destination node.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mmzone.h | 13 +++++++++++++
 mm/page_alloc.c        |  5 +++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a23923ba8263..28601fdfcdb2 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -717,6 +717,19 @@ typedef struct pglist_data {
 	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */
 	int kswapd_max_order;
 	enum zone_type classzone_idx;
+#ifdef CONFIG_NUMA_BALANCING
+	/*
+	 * Lock serializing the per destination node AutoNUMA memory
+	 * migration rate limiting data.
+	 */
+	spinlock_t numabalancing_migrate_lock;
+
+	/* Rate limiting time interval */
+	unsigned long numabalancing_migrate_next_window;
+
+	/* Number of pages migrated during the rate limiting time interval */
+	unsigned long numabalancing_migrate_nr_pages;
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5953dc2d196f..ef025e20dbee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4449,6 +4449,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 	int ret;
 
 	pgdat_resize_init(pgdat);
+#ifdef CONFIG_NUMA_BALANCING
+	spin_lock_init(&pgdat->numabalancing_migrate_lock);
+	pgdat->numabalancing_migrate_nr_pages = 0;
+	pgdat->numabalancing_migrate_next_window = jiffies;
+#endif
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
 	pgdat_page_cgroup_init(pgdat);
-- 
cgit v1.2.3


From e14808b49f55e0e1135da5e4a154a540dd9f3662 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 19 Nov 2012 10:59:15 +0000
Subject: mm: numa: Rate limit setting of pte_numa if node is saturated

If there are a large number of NUMA hinting faults and all of them
are resulting in migrations it may indicate that memory is just
bouncing uselessly around. NUMA balancing cost is likely exceeding
any benefit from locality. Rate limit the PTE updates if the node
is migration rate-limited. As noted in the comments, this distorts
the NUMA faulting statistics.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/migrate.h |  6 ++++++
 kernel/sched/fair.c     |  9 +++++++++
 mm/migrate.c            | 20 ++++++++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index f0d0313eea6f..91556889adac 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -77,11 +77,17 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 #ifdef CONFIG_NUMA_BALANCING
 extern int migrate_misplaced_page(struct page *page, int node);
+extern int migrate_misplaced_page(struct page *page, int node);
+extern bool migrate_ratelimited(int node);
 #else
 static inline int migrate_misplaced_page(struct page *page, int node)
 {
 	return -EAGAIN; /* can't migrate now */
 }
+static inline bool migrate_ratelimited(int node)
+{
+	return false;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #endif /* _LINUX_MIGRATE_H */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7727b0161579..37e895a941ab 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -27,6 +27,7 @@
 #include <linux/profile.h>
 #include <linux/interrupt.h>
 #include <linux/mempolicy.h>
+#include <linux/migrate.h>
 #include <linux/task_work.h>
 
 #include <trace/events/sched.h>
@@ -861,6 +862,14 @@ void task_numa_work(struct callback_head *work)
 	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
 		return;
 
+	/*
+	 * Do not set pte_numa if the current running node is rate-limited.
+	 * This loses statistics on the fault but if we are unwilling to
+	 * migrate to this node, it is less likely we can do useful work
+	 */
+	if (migrate_ratelimited(numa_node_id()))
+		return;
+
 	start = mm->numa_scan_offset;
 	pages = sysctl_numa_balancing_scan_size;
 	pages <<= 20 - PAGE_SHIFT; /* MB in pages */
diff --git a/mm/migrate.c b/mm/migrate.c
index 4b8267f1842f..32a1afca6009 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1464,10 +1464,30 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
  * page migration rate limiting control.
  * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
  * window of time. Default here says do not migrate more than 1280M per second.
+ * If a node is rate-limited then PTE NUMA updates are also rate-limited. However
+ * as it is faults that reset the window, pte updates will happen unconditionally
+ * if there has not been a fault since @pteupdate_interval_millisecs after the
+ * throttle window closed.
  */
 static unsigned int migrate_interval_millisecs __read_mostly = 100;
+static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
 static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
 
+/* Returns true if NUMA migration is currently rate limited */
+bool migrate_ratelimited(int node)
+{
+	pg_data_t *pgdat = NODE_DATA(node);
+
+	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
+				msecs_to_jiffies(pteupdate_interval_millisecs)))
+		return false;
+
+	if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
+		return false;
+
+	return true;
+}
+
 /*
  * Attempt to migrate a misplaced page to the specified destination
  * node. Caller is expected to have an elevated reference count on
-- 
cgit v1.2.3


From 57e0a0309160b1b4ebde9f3c6a867cd96ac368bf Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 12 Nov 2012 09:06:20 +0000
Subject: mm: numa: Introduce last_nid to the page frame

This patch introduces a last_nid field to the page struct. This is used
to build a two-stage filter in the next patch that is aimed at
mitigating a problem whereby pages migrate to the wrong node when
referenced by a process that was running off its home node.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mm.h       | 30 ++++++++++++++++++++++++++++++
 include/linux/mm_types.h |  4 ++++
 mm/page_alloc.c          |  2 ++
 3 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d04c2f0aab36..d87f9ec4a145 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -693,6 +693,36 @@ static inline int page_to_nid(const struct page *page)
 }
 #endif
 
+#ifdef CONFIG_NUMA_BALANCING
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+	return xchg(&page->_last_nid, nid);
+}
+
+static inline int page_last_nid(struct page *page)
+{
+	return page->_last_nid;
+}
+static inline void reset_page_last_nid(struct page *page)
+{
+	page->_last_nid = -1;
+}
+#else
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+	return page_to_nid(page);
+}
+
+static inline int page_last_nid(struct page *page)
+{
+	return page_to_nid(page);
+}
+
+static inline void reset_page_last_nid(struct page *page)
+{
+}
+#endif
+
 static inline struct zone *page_zone(const struct page *page)
 {
 	return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d1e246c5e50c..c5fffa239861 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -175,6 +175,10 @@ struct page {
 	 */
 	void *shadow;
 #endif
+
+#ifdef CONFIG_NUMA_BALANCING
+	int _last_nid;
+#endif
 }
 /*
  * The struct page can be forced to be double word aligned so that atomic ops
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef025e20dbee..73f226a1206e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -608,6 +608,7 @@ static inline int free_pages_check(struct page *page)
 		bad_page(page);
 		return 1;
 	}
+	reset_page_last_nid(page);
 	if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
 		page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	return 0;
@@ -3826,6 +3827,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		mminit_verify_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		reset_page_mapcount(page);
+		reset_page_last_nid(page);
 		SetPageReserved(page);
 		/*
 		 * Mark the block movable so that blocks are reserved for
-- 
cgit v1.2.3


From b8593bfda1652755136333cdd362de125b283a9c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 21 Nov 2012 01:18:23 +0000
Subject: mm: sched: Adapt the scanning rate if a NUMA hinting fault does not
 migrate

The PTE scanning rate and fault rates are two of the biggest sources of
system CPU overhead with automatic NUMA placement.  Ideally a proper policy
would detect if a workload was properly placed, schedule and adjust the
PTE scanning rate accordingly. We do not track the necessary information
to do that but we at least know if we migrated or not.

This patch scans slower if a page was not migrated as the result of a
NUMA hinting fault up to sysctl_numa_balancing_scan_period_max which is
now higher than the previous default. Once every minute it will reset
the scanner in case of phase changes.

This is hilariously crude and the numbers are arbitrary. Workloads will
converge quite slowly in comparison to what a proper policy should be able
to do. On the plus side, we will chew up less CPU for workloads that have
no need for automatic balancing.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mm_types.h |  3 +++
 include/linux/sched.h    |  5 +++--
 kernel/sched/core.c      |  1 +
 kernel/sched/fair.c      | 29 +++++++++++++++++++++--------
 kernel/sysctl.c          |  7 +++++++
 mm/huge_memory.c         |  2 +-
 mm/memory.c              | 12 ++++++++----
 7 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c5fffa239861..e850a23dd6ec 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -410,6 +410,9 @@ struct mm_struct {
 	 */
 	unsigned long numa_next_scan;
 
+	/* numa_next_reset is when the PTE scanner period will be reset */
+	unsigned long numa_next_reset;
+
 	/* Restart point for scanning and setting pte_numa */
 	unsigned long numa_scan_offset;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d95a232b5b9..0f4ff2bd03f6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1562,9 +1562,9 @@ struct task_struct {
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
 #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages);
+extern void task_numa_fault(int node, int pages, bool migrated);
 #else
-static inline void task_numa_fault(int node, int pages)
+static inline void task_numa_fault(int node, int pages, bool migrated)
 {
 }
 #endif
@@ -2009,6 +2009,7 @@ extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
 extern unsigned int sysctl_numa_balancing_scan_delay;
 extern unsigned int sysctl_numa_balancing_scan_period_min;
 extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
 extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_numa_balancing_settle_count;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fbfc4843063f..9d255bc0e278 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1537,6 +1537,7 @@ static void __sched_fork(struct task_struct *p)
 #ifdef CONFIG_NUMA_BALANCING
 	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
 		p->mm->numa_next_scan = jiffies;
+		p->mm->numa_next_reset = jiffies;
 		p->mm->numa_scan_seq = 0;
 	}
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dd18087fd369..4b577863933f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -784,7 +784,8 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * numa task sample period in ms
  */
 unsigned int sysctl_numa_balancing_scan_period_min = 100;
-unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
+unsigned int sysctl_numa_balancing_scan_period_max = 100*50;
+unsigned int sysctl_numa_balancing_scan_period_reset = 100*600;
 
 /* Portion of address space to scan in MB */
 unsigned int sysctl_numa_balancing_scan_size = 256;
@@ -806,20 +807,19 @@ static void task_numa_placement(struct task_struct *p)
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
-void task_numa_fault(int node, int pages)
+void task_numa_fault(int node, int pages, bool migrated)
 {
 	struct task_struct *p = current;
 
 	/* FIXME: Allocate task-specific structure for placement policy here */
 
 	/*
-	 * Assume that as faults occur that pages are getting properly placed
-	 * and fewer NUMA hints are required. Note that this is a big
-	 * assumption, it assumes processes reach a steady steady with no
-	 * further phase changes.
+	 * If pages are properly placed (did not migrate) then scan slower.
+	 * This is reset periodically in case of phase changes
 	 */
-	p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
-				p->numa_scan_period + jiffies_to_msecs(2));
+        if (!migrated)
+		p->numa_scan_period = min(sysctl_numa_balancing_scan_period_max,
+			p->numa_scan_period + jiffies_to_msecs(10));
 
 	task_numa_placement(p);
 }
@@ -857,6 +857,19 @@ void task_numa_work(struct callback_head *work)
 	if (p->flags & PF_EXITING)
 		return;
 
+	/*
+	 * Reset the scan period if enough time has gone by. Objective is that
+	 * scanning will be reduced if pages are properly placed. As tasks
+	 * can enter different phases this needs to be re-examined. Lacking
+	 * proper tracking of reference behaviour, this blunt hammer is used.
+	 */
+	migrate = mm->numa_next_reset;
+	if (time_after(now, migrate)) {
+		p->numa_scan_period = sysctl_numa_balancing_scan_period_min;
+		next_scan = now + msecs_to_jiffies(sysctl_numa_balancing_scan_period_reset);
+		xchg(&mm->numa_next_reset, next_scan);
+	}
+
 	/*
 	 * Enforce maximal scan/migration frequency..
 	 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 48a68cc258c1..8906f90d6fa2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -366,6 +366,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "numa_balancing_scan_period_reset",
+		.data		= &sysctl_numa_balancing_scan_period_reset,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 		.procname	= "numa_balancing_scan_period_max_ms",
 		.data		= &sysctl_numa_balancing_scan_period_max,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 79b96064f8fc..199b261a257e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1068,7 +1068,7 @@ out_unlock:
 	spin_unlock(&mm->page_table_lock);
 	if (page) {
 		put_page(page);
-		task_numa_fault(numa_node_id(), HPAGE_PMD_NR);
+		task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
 	}
 	return 0;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 84c6d9eab182..39edb11b63dc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3468,6 +3468,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	int current_nid = -1;
 	int target_nid;
+	bool migrated = false;
 
 	/*
 	* The "pte" at this point cannot be used safely without
@@ -3509,12 +3510,13 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	/* Migrate to the requested node */
-	if (migrate_misplaced_page(page, target_nid))
+	migrated = migrate_misplaced_page(page, target_nid);
+	if (migrated)
 		current_nid = target_nid;
 
 out:
 	if (current_nid != -1)
-		task_numa_fault(current_nid, 1);
+		task_numa_fault(current_nid, 1, migrated);
 	return 0;
 }
 
@@ -3554,6 +3556,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct page *page;
 		int curr_nid = local_nid;
 		int target_nid;
+		bool migrated;
 		if (!pte_present(pteval))
 			continue;
 		if (!pte_numa(pteval))
@@ -3590,9 +3593,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 		/* Migrate to the requested node */
 		pte_unmap_unlock(pte, ptl);
-		if (migrate_misplaced_page(page, target_nid))
+		migrated = migrate_misplaced_page(page, target_nid);
+		if (migrated)
 			curr_nid = target_nid;
-		task_numa_fault(curr_nid, 1);
+		task_numa_fault(curr_nid, 1, migrated);
 
 		pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	}
-- 
cgit v1.2.3


From 1a687c2e9a99335c9e77392f050fe607fa18a652 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 22 Nov 2012 11:16:36 +0000
Subject: mm: sched: numa: Control enabling and disabling of NUMA balancing

This patch adds Kconfig options and kernel parameters to allow the
enabling and disabling of automatic NUMA balancing. The existance
of such a switch was and is very important when debugging problems
related to transparent hugepages and we should have the same for
automatic NUMA placement.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 Documentation/kernel-parameters.txt |  3 +++
 include/linux/sched.h               |  4 ++++
 init/Kconfig                        |  8 +++++++
 kernel/sched/core.c                 | 48 +++++++++++++++++++++++++------------
 kernel/sched/fair.c                 |  3 +++
 kernel/sched/features.h             |  6 +++--
 mm/mempolicy.c                      | 46 +++++++++++++++++++++++++++++++++++
 7 files changed, 101 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9776f068306b..2e8d2625b814 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1996,6 +1996,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	nr_uarts=	[SERIAL] maximum number of UARTs to be registered.
 
+	numa_balancing=	[KNL,X86] Enable or disable automatic NUMA balancing.
+			Allowed values are enable and disable
+
 	numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
 			one of ['zone', 'node', 'default'] can be specified
 			This can be set from sysctl after boot.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f4ff2bd03f6..b1e619f9ff1a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1563,10 +1563,14 @@ struct task_struct {
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int node, int pages, bool migrated);
+extern void set_numabalancing_state(bool enabled);
 #else
 static inline void task_numa_fault(int node, int pages, bool migrated)
 {
 }
+static inline void set_numabalancing_state(bool enabled)
+{
+}
 #endif
 
 /*
diff --git a/init/Kconfig b/init/Kconfig
index 9f00f004796a..18e2a5920a34 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -720,6 +720,14 @@ config ARCH_USES_NUMA_PROT_NONE
 	depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
 	depends on NUMA_BALANCING
 
+config NUMA_BALANCING_DEFAULT_ENABLED
+	bool "Automatically enable NUMA aware memory/task placement"
+	default y
+	depends on NUMA_BALANCING
+	help
+	  If set, autonumic NUMA balancing will be enabled if running on a NUMA
+	  machine.
+
 config NUMA_BALANCING
 	bool "Memory placement aware NUMA scheduler"
 	default y
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9d255bc0e278..7a45015274ab 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -192,23 +192,10 @@ static void sched_feat_disable(int i) { };
 static void sched_feat_enable(int i) { };
 #endif /* HAVE_JUMP_LABEL */
 
-static ssize_t
-sched_feat_write(struct file *filp, const char __user *ubuf,
-		size_t cnt, loff_t *ppos)
+static int sched_feat_set(char *cmp)
 {
-	char buf[64];
-	char *cmp;
-	int neg = 0;
 	int i;
-
-	if (cnt > 63)
-		cnt = 63;
-
-	if (copy_from_user(&buf, ubuf, cnt))
-		return -EFAULT;
-
-	buf[cnt] = 0;
-	cmp = strstrip(buf);
+	int neg = 0;
 
 	if (strncmp(cmp, "NO_", 3) == 0) {
 		neg = 1;
@@ -228,6 +215,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
 		}
 	}
 
+	return i;
+}
+
+static ssize_t
+sched_feat_write(struct file *filp, const char __user *ubuf,
+		size_t cnt, loff_t *ppos)
+{
+	char buf[64];
+	char *cmp;
+	int i;
+
+	if (cnt > 63)
+		cnt = 63;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+	cmp = strstrip(buf);
+
+	i = sched_feat_set(cmp);
 	if (i == __SCHED_FEAT_NR)
 		return -EINVAL;
 
@@ -1549,6 +1557,16 @@ static void __sched_fork(struct task_struct *p)
 #endif /* CONFIG_NUMA_BALANCING */
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+void set_numabalancing_state(bool enabled)
+{
+	if (enabled)
+		sched_feat_set("NUMA");
+	else
+		sched_feat_set("NO_NUMA");
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /*
  * fork()/clone()-time setup:
  */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4b577863933f..7a02a2082e95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -811,6 +811,9 @@ void task_numa_fault(int node, int pages, bool migrated)
 {
 	struct task_struct *p = current;
 
+	if (!sched_feat_numa(NUMA))
+		return;
+
 	/* FIXME: Allocate task-specific structure for placement policy here */
 
 	/*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 5fb7aefbec80..d2373a3e3252 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -63,8 +63,10 @@ SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
 
 /*
- * Apply the automatic NUMA scheduling policy
+ * Apply the automatic NUMA scheduling policy. Enabled automatically
+ * at runtime if running on a NUMA machine. Can be controlled via
+ * numa_balancing=
  */
 #ifdef CONFIG_NUMA_BALANCING
-SCHED_FEAT(NUMA,	true)
+SCHED_FEAT(NUMA,	false)
 #endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index fd20e28fd2ad..046308e9b999 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2521,6 +2521,50 @@ void mpol_free_shared_policy(struct shared_policy *p)
 	mutex_unlock(&p->mutex);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+static bool __initdata numabalancing_override;
+
+static void __init check_numabalancing_enable(void)
+{
+	bool numabalancing_default = false;
+
+	if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED))
+		numabalancing_default = true;
+
+	if (nr_node_ids > 1 && !numabalancing_override) {
+		printk(KERN_INFO "Enabling automatic NUMA balancing. "
+			"Configure with numa_balancing= or sysctl");
+		set_numabalancing_state(numabalancing_default);
+	}
+}
+
+static int __init setup_numabalancing(char *str)
+{
+	int ret = 0;
+	if (!str)
+		goto out;
+	numabalancing_override = true;
+
+	if (!strcmp(str, "enable")) {
+		set_numabalancing_state(true);
+		ret = 1;
+	} else if (!strcmp(str, "disable")) {
+		set_numabalancing_state(false);
+		ret = 1;
+	}
+out:
+	if (!ret)
+		printk(KERN_WARNING "Unable to parse numa_balancing=\n");
+
+	return ret;
+}
+__setup("numa_balancing=", setup_numabalancing);
+#else
+static inline void __init check_numabalancing_enable(void)
+{
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
 /* assumes fs == KERNEL_DS */
 void __init numa_policy_init(void)
 {
@@ -2571,6 +2615,8 @@ void __init numa_policy_init(void)
 
 	if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes))
 		printk("numa_policy_init: interleaving failed\n");
+
+	check_numabalancing_enable();
 }
 
 /* Reset policy of current process to default */
-- 
cgit v1.2.3


From 5bca23035391928c4c7301835accca3551b96cc2 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 22 Nov 2012 14:40:03 +0000
Subject: mm: sched: numa: Delay PTE scanning until a task is scheduled on a
 new node

Due to the fact that migrations are driven by the CPU a task is running
on there is no point tracking NUMA faults until one task runs on a new
node. This patch tracks the first node used by an address space. Until
it changes, PTE scanning is disabled and no NUMA hinting faults are
trapped. This should help workloads that are short-lived, do not care
about NUMA placement or have bound themselves to a single node.

This takes advantage of the logic in "mm: sched: numa: Implement slow
start for working set sampling" to delay when the checks are made. This
will take advantage of processes that set their CPU and node bindings
early in their lifetime. It will also potentially allow any initial load
balancing to take place.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/mm_types.h | 10 ++++++++++
 kernel/fork.c            |  3 +++
 kernel/sched/fair.c      | 18 ++++++++++++++++++
 kernel/sched/features.h  |  4 +++-
 4 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e850a23dd6ec..197422a1598c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -418,10 +418,20 @@ struct mm_struct {
 
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
+
+	/*
+	 * The first node a task was scheduled on. If a task runs on
+	 * a different node than Make PTE Scan Go Now.
+	 */
+	int first_nid;
 #endif
 	struct uprobes_state uprobes_state;
 };
 
+/* first nid will either be a valid NID or one of these values */
+#define NUMA_PTE_SCAN_INIT	-1
+#define NUMA_PTE_SCAN_ACTIVE	-2
+
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7d3aa2..296ea308096d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -820,6 +820,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	mm->pmd_huge_pte = NULL;
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+	mm->first_nid = NUMA_PTE_SCAN_INIT;
 #endif
 	if (!mm_init(mm, tsk))
 		goto fail_nomem;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a02a2082e95..3e18f611a5aa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -860,6 +860,24 @@ void task_numa_work(struct callback_head *work)
 	if (p->flags & PF_EXITING)
 		return;
 
+	/*
+	 * We do not care about task placement until a task runs on a node
+	 * other than the first one used by the address space. This is
+	 * largely because migrations are driven by what CPU the task
+	 * is running on. If it's never scheduled on another node, it'll
+	 * not migrate so why bother trapping the fault.
+	 */
+	if (mm->first_nid == NUMA_PTE_SCAN_INIT)
+		mm->first_nid = numa_node_id();
+	if (mm->first_nid != NUMA_PTE_SCAN_ACTIVE) {
+		/* Are we running on a new node yet? */
+		if (numa_node_id() == mm->first_nid &&
+		    !sched_feat_numa(NUMA_FORCE))
+			return;
+
+		mm->first_nid = NUMA_PTE_SCAN_ACTIVE;
+	}
+
 	/*
 	 * Reset the scan period if enough time has gone by. Objective is that
 	 * scanning will be reduced if pages are properly placed. As tasks
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index d2373a3e3252..e7c25fff1e94 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -65,8 +65,10 @@ SCHED_FEAT(LB_MIN, false)
 /*
  * Apply the automatic NUMA scheduling policy. Enabled automatically
  * at runtime if running on a NUMA machine. Can be controlled via
- * numa_balancing=
+ * numa_balancing=. Allow PTE scanning to be forced on UMA machines
+ * for debugging the core machinery.
  */
 #ifdef CONFIG_NUMA_BALANCING
 SCHED_FEAT(NUMA,	false)
+SCHED_FEAT(NUMA_FORCE,	false)
 #endif
-- 
cgit v1.2.3


From b32967ff101a7508f70be8de59b278d4df92fa00 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Mon, 19 Nov 2012 12:35:47 +0000
Subject: mm: numa: Add THP migration for the NUMA working set scanning fault
 case.

Note: This is very heavily based on a patch from Peter Zijlstra with
	fixes from Ingo Molnar, Hugh Dickins and Johannes Weiner.  That patch
	put a lot of migration logic into mm/huge_memory.c where it does
	not belong. This version puts tries to share some of the migration
	logic with migrate_misplaced_page.  However, it should be noted
	that now migrate.c is doing more with the pagetable manipulation
	than is preferred. The end result is barely recognisable so as
	before, the signed-offs had to be removed but will be re-added if
	the original authors are ok with it.

Add THP migration for the NUMA working set scanning fault case.

It uses the page lock to serialize. No migration pte dance is
necessary because the pte is already unmapped when we decide
to migrate.

[dhillf@gmail.com: Fix memory leak on isolation failure]
[dhillf@gmail.com: Fix transfer of last_nid information]
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/migrate.h |  15 ++++
 mm/huge_memory.c        |  59 +++++++++----
 mm/internal.h           |   7 +-
 mm/memcontrol.c         |   7 +-
 mm/migrate.c            | 231 +++++++++++++++++++++++++++++++++++++++---------
 5 files changed, 255 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 91556889adac..51eac4bdc606 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -79,6 +79,12 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 extern int migrate_misplaced_page(struct page *page, int node);
 extern int migrate_misplaced_page(struct page *page, int node);
 extern bool migrate_ratelimited(int node);
+extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			pmd_t *pmd, pmd_t entry,
+			unsigned long address,
+			struct page *page, int node);
+
 #else
 static inline int migrate_misplaced_page(struct page *page, int node)
 {
@@ -88,6 +94,15 @@ static inline bool migrate_ratelimited(int node)
 {
 	return false;
 }
+
+static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			pmd_t *pmd, pmd_t entry,
+			unsigned long address,
+			struct page *page, int node)
+{
+	return -EAGAIN;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #endif /* _LINUX_MIGRATE_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 199b261a257e..711baf84b153 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -600,7 +600,7 @@ out:
 }
 __setup("transparent_hugepage=", setup_transparent_hugepage);
 
-static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
 	if (likely(vma->vm_flags & VM_WRITE))
 		pmd = pmd_mkwrite(pmd);
@@ -1023,10 +1023,12 @@ out:
 int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
-	struct page *page = NULL;
+	struct page *page;
 	unsigned long haddr = addr & HPAGE_PMD_MASK;
 	int target_nid;
 	int current_nid = -1;
+	bool migrated;
+	bool page_locked = false;
 
 	spin_lock(&mm->page_table_lock);
 	if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,42 +1036,61 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	page = pmd_page(pmd);
 	get_page(page);
-	spin_unlock(&mm->page_table_lock);
 	current_nid = page_to_nid(page);
 	count_vm_numa_event(NUMA_HINT_FAULTS);
 	if (current_nid == numa_node_id())
 		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
 
 	target_nid = mpol_misplaced(page, vma, haddr);
-	if (target_nid == -1)
+	if (target_nid == -1) {
+		put_page(page);
 		goto clear_pmdnuma;
+	}
 
-	/*
-	 * Due to lacking code to migrate thp pages, we'll split
-	 * (which preserves the special PROT_NONE) and re-take the
-	 * fault on the normal pages.
-	 */
-	split_huge_page(page);
-	put_page(page);
-
-	return 0;
+	/* Acquire the page lock to serialise THP migrations */
+	spin_unlock(&mm->page_table_lock);
+	lock_page(page);
+	page_locked = true;
 
-clear_pmdnuma:
+	/* Confirm the PTE did not while locked */
 	spin_lock(&mm->page_table_lock);
-	if (unlikely(!pmd_same(pmd, *pmdp)))
+	if (unlikely(!pmd_same(pmd, *pmdp))) {
+		unlock_page(page);
+		put_page(page);
 		goto out_unlock;
+	}
+	spin_unlock(&mm->page_table_lock);
+
+	/* Migrate the THP to the requested node */
+	migrated = migrate_misplaced_transhuge_page(mm, vma,
+				pmdp, pmd, addr,
+				page, target_nid);
+	if (migrated)
+		current_nid = target_nid;
+	else {
+		spin_lock(&mm->page_table_lock);
+		if (unlikely(!pmd_same(pmd, *pmdp))) {
+			unlock_page(page);
+			goto out_unlock;
+		}
+		goto clear_pmdnuma;
+	}
+
+	task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
+	return 0;
 
+clear_pmdnuma:
 	pmd = pmd_mknonnuma(pmd);
 	set_pmd_at(mm, haddr, pmdp, pmd);
 	VM_BUG_ON(pmd_numa(*pmdp));
 	update_mmu_cache_pmd(vma, addr, pmdp);
+	if (page_locked)
+		unlock_page(page);
 
 out_unlock:
 	spin_unlock(&mm->page_table_lock);
-	if (page) {
-		put_page(page);
-		task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
-	}
+	if (current_nid != -1)
+		task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
 	return 0;
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index a4fa284f6bc2..7e60ac826f2b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -212,15 +212,18 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 {
 	if (TestClearPageMlocked(page)) {
 		unsigned long flags;
+		int nr_pages = hpage_nr_pages(page);
 
 		local_irq_save(flags);
-		__dec_zone_page_state(page, NR_MLOCK);
+		__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
 		SetPageMlocked(newpage);
-		__inc_zone_page_state(newpage, NR_MLOCK);
+		__mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
 		local_irq_restore(flags);
 	}
 }
 
+extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern unsigned long vma_address(struct page *page,
 				 struct vm_area_struct *vma);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dd39ba000b31..d97af9636ab2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3288,15 +3288,18 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 				  struct mem_cgroup **memcgp)
 {
 	struct mem_cgroup *memcg = NULL;
+	unsigned int nr_pages = 1;
 	struct page_cgroup *pc;
 	enum charge_type ctype;
 
 	*memcgp = NULL;
 
-	VM_BUG_ON(PageTransHuge(page));
 	if (mem_cgroup_disabled())
 		return;
 
+	if (PageTransHuge(page))
+		nr_pages <<= compound_order(page);
+
 	pc = lookup_page_cgroup(page);
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
@@ -3358,7 +3361,7 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 	 * charged to the res_counter since we plan on replacing the
 	 * old one and only one page is going to be left afterwards.
 	 */
-	__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
+	__mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
 }
 
 /* remove redundant charge if migration failed*/
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a5ce135eef0..c9400960fd52 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -410,7 +410,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
  */
 void migrate_page_copy(struct page *newpage, struct page *page)
 {
-	if (PageHuge(page))
+	if (PageHuge(page) || PageTransHuge(page))
 		copy_huge_page(newpage, page);
 	else
 		copy_highpage(newpage, page);
@@ -1491,25 +1491,10 @@ bool migrate_ratelimited(int node)
 	return true;
 }
 
-/*
- * Attempt to migrate a misplaced page to the specified destination
- * node. Caller is expected to have an elevated reference count on
- * the page that will be dropped by this function before returning.
- */
-int migrate_misplaced_page(struct page *page, int node)
+/* Returns true if the node is migrate rate-limited after the update */
+bool numamigrate_update_ratelimit(pg_data_t *pgdat)
 {
-	pg_data_t *pgdat = NODE_DATA(node);
-	int isolated = 0;
-	LIST_HEAD(migratepages);
-
-	/*
-	 * Don't migrate pages that are mapped in multiple processes.
-	 * TODO: Handle false sharing detection instead of this hammer
-	 */
-	if (page_mapcount(page) != 1) {
-		put_page(page);
-		goto out;
-	}
+	bool rate_limited = false;
 
 	/*
 	 * Rate-limit the amount of data that is being migrated to a node.
@@ -1522,13 +1507,18 @@ int migrate_misplaced_page(struct page *page, int node)
 		pgdat->numabalancing_migrate_next_window = jiffies +
 			msecs_to_jiffies(migrate_interval_millisecs);
 	}
-	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
-		spin_unlock(&pgdat->numabalancing_migrate_lock);
-		put_page(page);
-		goto out;
-	}
-	pgdat->numabalancing_migrate_nr_pages++;
+	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
+		rate_limited = true;
+	else
+		pgdat->numabalancing_migrate_nr_pages++;
 	spin_unlock(&pgdat->numabalancing_migrate_lock);
+	
+	return rate_limited;
+}
+
+int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
+{
+	int ret = 0;
 
 	/* Avoid migrating to a node that is nearly full */
 	if (migrate_balanced_pgdat(pgdat, 1)) {
@@ -1536,13 +1526,18 @@ int migrate_misplaced_page(struct page *page, int node)
 
 		if (isolate_lru_page(page)) {
 			put_page(page);
-			goto out;
+			return 0;
 		}
-		isolated = 1;
 
+		/* Page is isolated */
+		ret = 1;
 		page_lru = page_is_file_cache(page);
-		inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
-		list_add(&page->lru, &migratepages);
+		if (!PageTransHuge(page))
+			inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+		else
+			mod_zone_page_state(page_zone(page),
+					NR_ISOLATED_ANON + page_lru,
+					HPAGE_PMD_NR);
 	}
 
 	/*
@@ -1555,23 +1550,177 @@ int migrate_misplaced_page(struct page *page, int node)
 	 */
 	put_page(page);
 
-	if (isolated) {
-		int nr_remaining;
-
-		nr_remaining = migrate_pages(&migratepages,
-				alloc_misplaced_dst_page,
-				node, false, MIGRATE_ASYNC,
-				MR_NUMA_MISPLACED);
-		if (nr_remaining) {
-			putback_lru_pages(&migratepages);
-			isolated = 0;
-		} else
-			count_vm_numa_event(NUMA_PAGE_MIGRATE);
+	return ret;
+}
+
+/*
+ * Attempt to migrate a misplaced page to the specified destination
+ * node. Caller is expected to have an elevated reference count on
+ * the page that will be dropped by this function before returning.
+ */
+int migrate_misplaced_page(struct page *page, int node)
+{
+	pg_data_t *pgdat = NODE_DATA(node);
+	int isolated = 0;
+	int nr_remaining;
+	LIST_HEAD(migratepages);
+
+	/*
+	 * Don't migrate pages that are mapped in multiple processes.
+	 * TODO: Handle false sharing detection instead of this hammer
+	 */
+	if (page_mapcount(page) != 1) {
+		put_page(page);
+		goto out;
 	}
+
+	/*
+	 * Rate-limit the amount of data that is being migrated to a node.
+	 * Optimal placement is no good if the memory bus is saturated and
+	 * all the time is being spent migrating!
+	 */
+	if (numamigrate_update_ratelimit(pgdat)) {
+		put_page(page);
+		goto out;
+	}
+
+	isolated = numamigrate_isolate_page(pgdat, page);
+	if (!isolated)
+		goto out;
+
+	list_add(&page->lru, &migratepages);
+	nr_remaining = migrate_pages(&migratepages,
+			alloc_misplaced_dst_page,
+			node, false, MIGRATE_ASYNC,
+			MR_NUMA_MISPLACED);
+	if (nr_remaining) {
+		putback_lru_pages(&migratepages);
+		isolated = 0;
+	} else
+		count_vm_numa_event(NUMA_PAGE_MIGRATE);
 	BUG_ON(!list_empty(&migratepages));
 out:
 	return isolated;
 }
+
+int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+				struct vm_area_struct *vma,
+				pmd_t *pmd, pmd_t entry,
+				unsigned long address,
+				struct page *page, int node)
+{
+	unsigned long haddr = address & HPAGE_PMD_MASK;
+	pg_data_t *pgdat = NODE_DATA(node);
+	int isolated = 0;
+	struct page *new_page = NULL;
+	struct mem_cgroup *memcg = NULL;
+	int page_lru = page_is_file_cache(page);
+
+	/*
+	 * Don't migrate pages that are mapped in multiple processes.
+	 * TODO: Handle false sharing detection instead of this hammer
+	 */
+	if (page_mapcount(page) != 1)
+		goto out_dropref;
+
+	/*
+	 * Rate-limit the amount of data that is being migrated to a node.
+	 * Optimal placement is no good if the memory bus is saturated and
+	 * all the time is being spent migrating!
+	 */
+	if (numamigrate_update_ratelimit(pgdat))
+		goto out_dropref;
+
+	new_page = alloc_pages_node(node,
+		(GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
+	if (!new_page)
+		goto out_dropref;
+	page_xchg_last_nid(new_page, page_last_nid(page));
+
+	isolated = numamigrate_isolate_page(pgdat, page);
+	if (!isolated) {
+		put_page(new_page);
+		goto out_keep_locked;
+	}
+
+	/* Prepare a page as a migration target */
+	__set_page_locked(new_page);
+	SetPageSwapBacked(new_page);
+
+	/* anon mapping, we can simply copy page->mapping to the new page: */
+	new_page->mapping = page->mapping;
+	new_page->index = page->index;
+	migrate_page_copy(new_page, page);
+	WARN_ON(PageLRU(new_page));
+
+	/* Recheck the target PMD */
+	spin_lock(&mm->page_table_lock);
+	if (unlikely(!pmd_same(*pmd, entry))) {
+		spin_unlock(&mm->page_table_lock);
+
+		/* Reverse changes made by migrate_page_copy() */
+		if (TestClearPageActive(new_page))
+			SetPageActive(page);
+		if (TestClearPageUnevictable(new_page))
+			SetPageUnevictable(page);
+		mlock_migrate_page(page, new_page);
+
+		unlock_page(new_page);
+		put_page(new_page);		/* Free it */
+
+		unlock_page(page);
+		putback_lru_page(page);
+
+		count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
+		goto out;
+	}
+
+	/*
+	 * Traditional migration needs to prepare the memcg charge
+	 * transaction early to prevent the old page from being
+	 * uncharged when installing migration entries.  Here we can
+	 * save the potential rollback and start the charge transfer
+	 * only when migration is already known to end successfully.
+	 */
+	mem_cgroup_prepare_migration(page, new_page, &memcg);
+
+	entry = mk_pmd(new_page, vma->vm_page_prot);
+	entry = pmd_mknonnuma(entry);
+	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+	entry = pmd_mkhuge(entry);
+
+	page_add_new_anon_rmap(new_page, vma, haddr);
+
+	set_pmd_at(mm, haddr, pmd, entry);
+	update_mmu_cache_pmd(vma, address, entry);
+	page_remove_rmap(page);
+	/*
+	 * Finish the charge transaction under the page table lock to
+	 * prevent split_huge_page() from dividing up the charge
+	 * before it's fully transferred to the new page.
+	 */
+	mem_cgroup_end_migration(memcg, page, new_page, true);
+	spin_unlock(&mm->page_table_lock);
+
+	unlock_page(new_page);
+	unlock_page(page);
+	put_page(page);			/* Drop the rmap reference */
+	put_page(page);			/* Drop the LRU isolation reference */
+
+	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
+	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
+
+out:
+	mod_zone_page_state(page_zone(page),
+			NR_ISOLATED_ANON + page_lru,
+			-HPAGE_PMD_NR);
+	return isolated;
+
+out_dropref:
+	put_page(page);
+out_keep_locked:
+	return 0;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #endif /* CONFIG_NUMA */
-- 
cgit v1.2.3


From 220018d388b8ab1fca1c5f0c6474bab47ad2c9c0 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 5 Dec 2012 09:32:56 +0000
Subject: mm: numa: Add THP migration for the NUMA working set scanning fault
 case build fix

Commit "Add THP migration for the NUMA working set scanning fault case"
breaks the build because HPAGE_PMD_SHIFT and HPAGE_PMD_MASK defined to
explode without CONFIG_TRANSPARENT_HUGEPAGE:

mm/migrate.c: In function 'migrate_misplaced_transhuge_page_put':
mm/migrate.c:1549: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1564: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1566: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1573: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1606: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed
mm/migrate.c:1648: error: call to '__build_bug_failed' declared with attribute error: BUILD_BUG failed

CONFIG_NUMA_BALANCING allows compilation without enabling transparent
hugepages, so define the dummy function for such a configuration and only
define migrate_misplaced_transhuge_page_put() when transparent hugepages
are enabled.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/migrate.h | 16 +++++++++-------
 mm/migrate.c            |  2 ++
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 51eac4bdc606..d52afb9a790c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -79,12 +79,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 extern int migrate_misplaced_page(struct page *page, int node);
 extern int migrate_misplaced_page(struct page *page, int node);
 extern bool migrate_ratelimited(int node);
-extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
-			struct vm_area_struct *vma,
-			pmd_t *pmd, pmd_t entry,
-			unsigned long address,
-			struct page *page, int node);
-
 #else
 static inline int migrate_misplaced_page(struct page *page, int node)
 {
@@ -94,7 +88,15 @@ static inline bool migrate_ratelimited(int node)
 {
 	return false;
 }
+#endif /* CONFIG_NUMA_BALANCING */
 
+#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			pmd_t *pmd, pmd_t entry,
+			unsigned long address,
+			struct page *page, int node);
+#else
 static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 			struct vm_area_struct *vma,
 			pmd_t *pmd, pmd_t entry,
@@ -103,6 +105,6 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 {
 	return -EAGAIN;
 }
-#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
 
 #endif /* _LINUX_MIGRATE_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index c9400960fd52..9341a501d168 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1602,7 +1602,9 @@ int migrate_misplaced_page(struct page *page, int node)
 out:
 	return isolated;
 }
+#endif /* CONFIG_NUMA_BALANCING */
 
+#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
 int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 				struct vm_area_struct *vma,
 				pmd_t *pmd, pmd_t entry,
-- 
cgit v1.2.3


From 5a505085f043e8380f83610f79642853c051e2f1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 2 Dec 2012 19:56:46 +0000
Subject: mm/rmap: Convert the struct anon_vma::mutex to an rwsem

Convert the struct anon_vma::mutex to an rwsem, which will help
in solving a page-migration scalability problem. (Addressed in
a separate patch.)

The conversion is simple and straightforward: in every case
where we mutex_lock()ed we'll now down_write().

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/rmap.h | 16 ++++++++--------
 mm/huge_memory.c     |  4 ++--
 mm/mmap.c            |  8 ++++----
 mm/rmap.c            | 22 +++++++++++-----------
 4 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bfe1f4780644..f3f41d242e25 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -7,7 +7,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/memcontrol.h>
 
 /*
@@ -25,8 +25,8 @@
  * pointing to this anon_vma once its vma list is empty.
  */
 struct anon_vma {
-	struct anon_vma *root;	/* Root of this anon_vma tree */
-	struct mutex mutex;	/* Serialize access to vma list */
+	struct anon_vma *root;		/* Root of this anon_vma tree */
+	struct rw_semaphore rwsem;	/* W: modification, R: walking the list */
 	/*
 	 * The refcount is taken on an anon_vma when there is no
 	 * guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@ struct anon_vma_chain {
 	struct vm_area_struct *vma;
 	struct anon_vma *anon_vma;
 	struct list_head same_vma;   /* locked by mmap_sem & page_table_lock */
-	struct rb_node rb;			/* locked by anon_vma->mutex */
+	struct rb_node rb;			/* locked by anon_vma->rwsem */
 	unsigned long rb_subtree_last;
 #ifdef CONFIG_DEBUG_VM_RB
 	unsigned long cached_vma_start, cached_vma_last;
@@ -108,24 +108,24 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		mutex_lock(&anon_vma->root->mutex);
+		down_write(&anon_vma->root->rwsem);
 }
 
 static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
 {
 	struct anon_vma *anon_vma = vma->anon_vma;
 	if (anon_vma)
-		mutex_unlock(&anon_vma->root->mutex);
+		up_write(&anon_vma->root->rwsem);
 }
 
 static inline void anon_vma_lock(struct anon_vma *anon_vma)
 {
-	mutex_lock(&anon_vma->root->mutex);
+	down_write(&anon_vma->root->rwsem);
 }
 
 static inline void anon_vma_unlock(struct anon_vma *anon_vma)
 {
-	mutex_unlock(&anon_vma->root->mutex);
+	up_write(&anon_vma->root->rwsem);
 }
 
 /*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 711baf84b153..acd37fe55eb7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1292,7 +1292,7 @@ static int __split_huge_page_splitting(struct page *page,
 		 * We can't temporarily set the pmd to null in order
 		 * to split it, the pmd must remain marked huge at all
 		 * times or the VM won't take the pmd_trans_huge paths
-		 * and it won't wait on the anon_vma->root->mutex to
+		 * and it won't wait on the anon_vma->root->rwsem to
 		 * serialize against split_huge_page*.
 		 */
 		pmdp_splitting_flush(vma, address, pmd);
@@ -1495,7 +1495,7 @@ static int __split_huge_page_map(struct page *page,
 	return ret;
 }
 
-/* must be called with anon_vma->root->mutex hold */
+/* must be called with anon_vma->root->rwsem held */
 static void __split_huge_page(struct page *page,
 			      struct anon_vma *anon_vma)
 {
diff --git a/mm/mmap.c b/mm/mmap.c
index 9a796c41e7d9..88408632da66 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2561,15 +2561,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
 		 * The LSB of head.next can't change from under us
 		 * because we hold the mm_all_locks_mutex.
 		 */
-		mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
+		down_write(&anon_vma->root->rwsem);
 		/*
 		 * We can safely modify head.next after taking the
-		 * anon_vma->root->mutex. If some other vma in this mm shares
+		 * anon_vma->root->rwsem. If some other vma in this mm shares
 		 * the same anon_vma we won't take it again.
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us thanks to the
-		 * anon_vma->root->mutex.
+		 * anon_vma->root->rwsem.
 		 */
 		if (__test_and_set_bit(0, (unsigned long *)
 				       &anon_vma->root->rb_root.rb_node))
@@ -2671,7 +2671,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
 		 *
 		 * No need of atomic instructions here, head.next
 		 * can't change from under us until we release the
-		 * anon_vma->root->mutex.
+		 * anon_vma->root->rwsem.
 		 */
 		if (!__test_and_clear_bit(0, (unsigned long *)
 					  &anon_vma->root->rb_root.rb_node))
diff --git a/mm/rmap.c b/mm/rmap.c
index 2ee1ef0f317b..6e3ee3b82798 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -24,7 +24,7 @@
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
  *       mapping->i_mmap_mutex
- *         anon_vma->mutex
+ *         anon_vma->rwsem
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
  *             swap_lock (in swap_duplicate, swap_info_get)
@@ -37,7 +37,7 @@
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within bdi.wb->list_lock in __sync_single_inode)
  *
- * anon_vma->mutex,mapping->i_mutex      (memory_failure, collect_procs_anon)
+ * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
  *   ->tasklist_lock
  *     pte map lock
  */
@@ -103,7 +103,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
 	 * LOCK should suffice since the actual taking of the lock must
 	 * happen _before_ what follows.
 	 */
-	if (mutex_is_locked(&anon_vma->root->mutex)) {
+	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
 		anon_vma_lock(anon_vma);
 		anon_vma_unlock(anon_vma);
 	}
@@ -219,9 +219,9 @@ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct
 	struct anon_vma *new_root = anon_vma->root;
 	if (new_root != root) {
 		if (WARN_ON_ONCE(root))
-			mutex_unlock(&root->mutex);
+			up_write(&root->rwsem);
 		root = new_root;
-		mutex_lock(&root->mutex);
+		down_write(&root->rwsem);
 	}
 	return root;
 }
@@ -229,7 +229,7 @@ static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct
 static inline void unlock_anon_vma_root(struct anon_vma *root)
 {
 	if (root)
-		mutex_unlock(&root->mutex);
+		up_write(&root->rwsem);
 }
 
 /*
@@ -349,7 +349,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
 	/*
 	 * Iterate the list once more, it now only contains empty and unlinked
 	 * anon_vmas, destroy them. Could not do before due to __put_anon_vma()
-	 * needing to acquire the anon_vma->root->mutex.
+	 * needing to write-acquire the anon_vma->root->rwsem.
 	 */
 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma = avc->anon_vma;
@@ -365,7 +365,7 @@ static void anon_vma_ctor(void *data)
 {
 	struct anon_vma *anon_vma = data;
 
-	mutex_init(&anon_vma->mutex);
+	init_rwsem(&anon_vma->rwsem);
 	atomic_set(&anon_vma->refcount, 0);
 	anon_vma->rb_root = RB_ROOT;
 }
@@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
 
 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
 	root_anon_vma = ACCESS_ONCE(anon_vma->root);
-	if (mutex_trylock(&root_anon_vma->mutex)) {
+	if (down_write_trylock(&root_anon_vma->rwsem)) {
 		/*
 		 * If the page is still mapped, then this anon_vma is still
 		 * its anon_vma, and holding the mutex ensures that it will
 		 * not go away, see anon_vma_free().
 		 */
 		if (!page_mapped(page)) {
-			mutex_unlock(&root_anon_vma->mutex);
+			up_write(&root_anon_vma->rwsem);
 			anon_vma = NULL;
 		}
 		goto out;
@@ -1299,7 +1299,7 @@ out_mlock:
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->mutex or mapping->i_mmap_mutex.
+	 * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
-- 
cgit v1.2.3


From 4fc3f1d66b1ef0d7b8dc11f4ff1cc510f78b37d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 2 Dec 2012 19:56:50 +0000
Subject: mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon()
 more scalable

rmap_walk_anon() and try_to_unmap_anon() appears to be too
careful about locking the anon vma: while it needs protection
against anon vma list modifications, it does not need exclusive
access to the list itself.

Transforming this exclusive lock to a read-locked rwsem removes
a global lock from the hot path of page-migration intense
threaded workloads which can cause pathological performance like
this:

    96.43%        process 0  [kernel.kallsyms]  [k] perf_trace_sched_switch
                  |
                  --- perf_trace_sched_switch
                      __schedule
                      schedule
                      schedule_preempt_disabled
                      __mutex_lock_common.isra.6
                      __mutex_lock_slowpath
                      mutex_lock
                     |
                     |--50.61%-- rmap_walk
                     |          move_to_new_page
                     |          migrate_pages
                     |          migrate_misplaced_page
                     |          __do_numa_page.isra.69
                     |          handle_pte_fault
                     |          handle_mm_fault
                     |          __do_page_fault
                     |          do_page_fault
                     |          page_fault
                     |          __memset_sse2
                     |          |
                     |           --100.00%-- worker_thread
                     |                     |
                     |                      --100.00%-- start_thread
                     |
                      --49.39%-- page_lock_anon_vma
                                try_to_unmap_anon
                                try_to_unmap
                                migrate_pages
                                migrate_misplaced_page
                                __do_numa_page.isra.69
                                handle_pte_fault
                                handle_mm_fault
                                __do_page_fault
                                do_page_fault
                                page_fault
                                __memset_sse2
                                |
                                 --100.00%-- worker_thread
                                           start_thread

With this change applied the profile is now nicely flat
and there's no anon-vma related scheduling/blocking.

Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(),
to make it clearer that it's an exclusive write-lock in
that case - suggested by Rik van Riel.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 include/linux/huge_mm.h |  2 +-
 include/linux/rmap.h    | 17 ++++++++++++++---
 mm/huge_memory.c        |  6 +++---
 mm/ksm.c                |  6 +++---
 mm/memory-failure.c     |  4 ++--
 mm/migrate.c            |  2 +-
 mm/mmap.c               |  2 +-
 mm/mremap.c             |  2 +-
 mm/rmap.c               | 48 ++++++++++++++++++++++++------------------------
 9 files changed, 50 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 027ad04ef3a8..0d1208c0bdc4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -102,7 +102,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
 #define wait_split_huge_page(__anon_vma, __pmd)				\
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
-		anon_vma_lock(__anon_vma);				\
+		anon_vma_lock_write(__anon_vma);			\
 		anon_vma_unlock(__anon_vma);				\
 		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
 		       pmd_trans_huge(*____pmd));			\
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index f3f41d242e25..c20635c527a9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -118,7 +118,7 @@ static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
 		up_write(&anon_vma->root->rwsem);
 }
 
-static inline void anon_vma_lock(struct anon_vma *anon_vma)
+static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 {
 	down_write(&anon_vma->root->rwsem);
 }
@@ -128,6 +128,17 @@ static inline void anon_vma_unlock(struct anon_vma *anon_vma)
 	up_write(&anon_vma->root->rwsem);
 }
 
+static inline void anon_vma_lock_read(struct anon_vma *anon_vma)
+{
+	down_read(&anon_vma->root->rwsem);
+}
+
+static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
+{
+	up_read(&anon_vma->root->rwsem);
+}
+
+
 /*
  * anon_vma helper functions.
  */
@@ -220,8 +231,8 @@ int try_to_munlock(struct page *);
 /*
  * Called by memory-failure.c to kill processes.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page);
-void page_unlock_anon_vma(struct anon_vma *anon_vma);
+struct anon_vma *page_lock_anon_vma_read(struct page *page);
+void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
 int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
 /*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index acd37fe55eb7..a24c9cb9c83e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1549,7 +1549,7 @@ int split_huge_page(struct page *page)
 	int ret = 1;
 
 	BUG_ON(!PageAnon(page));
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		goto out;
 	ret = 0;
@@ -1562,7 +1562,7 @@ int split_huge_page(struct page *page)
 
 	BUG_ON(PageCompound(page));
 out_unlock:
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 out:
 	return ret;
 }
@@ -2074,7 +2074,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	if (!pmd_present(*pmd) || pmd_trans_huge(*pmd))
 		goto out;
 
-	anon_vma_lock(vma->anon_vma);
+	anon_vma_lock_write(vma->anon_vma);
 
 	pte = pte_offset_map(pmd, address);
 	ptl = pte_lockptr(mm, pmd);
diff --git a/mm/ksm.c b/mm/ksm.c
index ae539f0b8aa1..7fa37de1ee0c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1634,7 +1634,7 @@ again:
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
@@ -1688,7 +1688,7 @@ again:
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
@@ -1741,7 +1741,7 @@ again:
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
 					       0, ULONG_MAX) {
 			vma = vmac->vma;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ddb68a169e45..f2cd830f66c0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -402,7 +402,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 	struct anon_vma *av;
 	pgoff_t pgoff;
 
-	av = page_lock_anon_vma(page);
+	av = page_lock_anon_vma_read(page);
 	if (av == NULL)	/* Not actually mapped anymore */
 		return;
 
@@ -423,7 +423,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 		}
 	}
 	read_unlock(&tasklist_lock);
-	page_unlock_anon_vma(av);
+	page_unlock_anon_vma_read(av);
 }
 
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index f24e9cc49cc4..6e46485f014c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -754,7 +754,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	 */
 	if (PageAnon(page)) {
 		/*
-		 * Only page_lock_anon_vma() understands the subtleties of
+		 * Only page_lock_anon_vma_read() understands the subtleties of
 		 * getting a hold on an anon_vma from outside one of its mms.
 		 */
 		anon_vma = page_get_anon_vma(page);
diff --git a/mm/mmap.c b/mm/mmap.c
index 88408632da66..68a16b40c209 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -602,7 +602,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (anon_vma) {
 		VM_BUG_ON(adjust_next && next->anon_vma &&
 			  anon_vma != next->anon_vma);
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_pre_update_vma(vma);
 		if (adjust_next)
 			anon_vma_interval_tree_pre_update_vma(next);
diff --git a/mm/mremap.c b/mm/mremap.c
index 1b61c2d3307a..3dabd170753a 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -104,7 +104,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		}
 		if (vma->anon_vma) {
 			anon_vma = vma->anon_vma;
-			anon_vma_lock(anon_vma);
+			anon_vma_lock_write(anon_vma);
 		}
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 6e3ee3b82798..b0f612df7b9d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -87,24 +87,24 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
 	VM_BUG_ON(atomic_read(&anon_vma->refcount));
 
 	/*
-	 * Synchronize against page_lock_anon_vma() such that
+	 * Synchronize against page_lock_anon_vma_read() such that
 	 * we can safely hold the lock without the anon_vma getting
 	 * freed.
 	 *
 	 * Relies on the full mb implied by the atomic_dec_and_test() from
 	 * put_anon_vma() against the acquire barrier implied by
-	 * mutex_trylock() from page_lock_anon_vma(). This orders:
+	 * down_read_trylock() from page_lock_anon_vma_read(). This orders:
 	 *
-	 * page_lock_anon_vma()		VS	put_anon_vma()
-	 *   mutex_trylock()			  atomic_dec_and_test()
+	 * page_lock_anon_vma_read()	VS	put_anon_vma()
+	 *   down_read_trylock()		  atomic_dec_and_test()
 	 *   LOCK				  MB
-	 *   atomic_read()			  mutex_is_locked()
+	 *   atomic_read()			  rwsem_is_locked()
 	 *
 	 * LOCK should suffice since the actual taking of the lock must
 	 * happen _before_ what follows.
 	 */
 	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		anon_vma_unlock(anon_vma);
 	}
 
@@ -146,7 +146,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
  * allocate a new one.
  *
  * Anon-vma allocations are very subtle, because we may have
- * optimistically looked up an anon_vma in page_lock_anon_vma()
+ * optimistically looked up an anon_vma in page_lock_anon_vma_read()
  * and that may actually touch the spinlock even in the newly
  * allocated vma (it depends on RCU to make sure that the
  * anon_vma isn't actually destroyed).
@@ -181,7 +181,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 			allocated = anon_vma;
 		}
 
-		anon_vma_lock(anon_vma);
+		anon_vma_lock_write(anon_vma);
 		/* page_table_lock to protect against threads */
 		spin_lock(&mm->page_table_lock);
 		if (likely(!vma->anon_vma)) {
@@ -306,7 +306,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	get_anon_vma(anon_vma->root);
 	/* Mark this anon_vma as the one where our new (COWed) pages go. */
 	vma->anon_vma = anon_vma;
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_write(anon_vma);
 	anon_vma_chain_link(vma, avc, anon_vma);
 	anon_vma_unlock(anon_vma);
 
@@ -442,7 +442,7 @@ out:
  * atomic op -- the trylock. If we fail the trylock, we fall back to getting a
  * reference like with page_get_anon_vma() and then block on the mutex.
  */
-struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma_read(struct page *page)
 {
 	struct anon_vma *anon_vma = NULL;
 	struct anon_vma *root_anon_vma;
@@ -457,14 +457,14 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
 
 	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
 	root_anon_vma = ACCESS_ONCE(anon_vma->root);
-	if (down_write_trylock(&root_anon_vma->rwsem)) {
+	if (down_read_trylock(&root_anon_vma->rwsem)) {
 		/*
 		 * If the page is still mapped, then this anon_vma is still
 		 * its anon_vma, and holding the mutex ensures that it will
 		 * not go away, see anon_vma_free().
 		 */
 		if (!page_mapped(page)) {
-			up_write(&root_anon_vma->rwsem);
+			up_read(&root_anon_vma->rwsem);
 			anon_vma = NULL;
 		}
 		goto out;
@@ -484,15 +484,15 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
 
 	/* we pinned the anon_vma, its safe to sleep */
 	rcu_read_unlock();
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_read(anon_vma);
 
 	if (atomic_dec_and_test(&anon_vma->refcount)) {
 		/*
 		 * Oops, we held the last refcount, release the lock
 		 * and bail -- can't simply use put_anon_vma() because
-		 * we'll deadlock on the anon_vma_lock() recursion.
+		 * we'll deadlock on the anon_vma_lock_write() recursion.
 		 */
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_read(anon_vma);
 		__put_anon_vma(anon_vma);
 		anon_vma = NULL;
 	}
@@ -504,9 +504,9 @@ out:
 	return anon_vma;
 }
 
-void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 {
-	anon_vma_unlock(anon_vma);
+	anon_vma_unlock_read(anon_vma);
 }
 
 /*
@@ -732,7 +732,7 @@ static int page_referenced_anon(struct page *page,
 	struct anon_vma_chain *avc;
 	int referenced = 0;
 
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		return referenced;
 
@@ -754,7 +754,7 @@ static int page_referenced_anon(struct page *page,
 			break;
 	}
 
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 	return referenced;
 }
 
@@ -1474,7 +1474,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 	struct anon_vma_chain *avc;
 	int ret = SWAP_AGAIN;
 
-	anon_vma = page_lock_anon_vma(page);
+	anon_vma = page_lock_anon_vma_read(page);
 	if (!anon_vma)
 		return ret;
 
@@ -1501,7 +1501,7 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 			break;
 	}
 
-	page_unlock_anon_vma(anon_vma);
+	page_unlock_anon_vma_read(anon_vma);
 	return ret;
 }
 
@@ -1696,7 +1696,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 	int ret = SWAP_AGAIN;
 
 	/*
-	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
+	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
 	 * because that depends on page_mapped(); but not all its usages
 	 * are holding mmap_sem. Users without mmap_sem are required to
 	 * take a reference count to prevent the anon_vma disappearing
@@ -1704,7 +1704,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 	anon_vma = page_anon_vma(page);
 	if (!anon_vma)
 		return ret;
-	anon_vma_lock(anon_vma);
+	anon_vma_lock_read(anon_vma);
 	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
 		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
@@ -1712,7 +1712,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
 		if (ret != SWAP_AGAIN)
 			break;
 	}
-	anon_vma_unlock(anon_vma);
+	anon_vma_unlock_read(anon_vma);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 1daa8a0b2000974e07eacbabecd61bccadc933b9 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Mon, 10 Dec 2012 20:41:27 +0100
Subject: of/i2c: add dummy inline functions for when CONFIG_OF_I2C(_MODULE)
 isn't defined

If CONFIG_OF_I2C and CONFIG_OF_I2C_MODULE are undefined no declaration of
of_find_i2c_device_by_node and of_find_i2c_adapter_by_node will be
available. Add dummy inline functions to avoid compilation breakage.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_i2c.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h
index 1cb775f8e663..cfb545cd86b5 100644
--- a/include/linux/of_i2c.h
+++ b/include/linux/of_i2c.h
@@ -29,6 +29,18 @@ static inline void of_i2c_register_devices(struct i2c_adapter *adap)
 {
 	return;
 }
+
+static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
+{
+	return NULL;
+}
+
+/* must call put_device() when done with returned i2c_adapter device */
+static inline struct i2c_adapter *of_find_i2c_adapter_by_node(
+						struct device_node *node)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF_I2C */
 
 #endif /* __LINUX_OF_I2C_H */
-- 
cgit v1.2.3


From 93c667ca2598bd84f1bd3f2fa176af69707699fe Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Mon, 10 Dec 2012 20:41:30 +0100
Subject: of: *node argument to of_parse_phandle_with_args should be const

The "struct device_node *" argument of of_parse_phandle_with_args() can
be const. Making this change makes it explicit that the function will
not modify a node.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
[grant.likely: Resolved conflict with previous patch modifying of_parse_phandle()]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/base.c  | 2 +-
 include/linux/of.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 538e3cfad23e..be846408dbc1 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1025,7 +1025,7 @@ EXPORT_SYMBOL(of_parse_phandle);
  * To get a device_node of the `node2' node you may call this:
  * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
  */
-int of_parse_phandle_with_args(struct device_node *np, const char *list_name,
+int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
 				const char *cells_name, int index,
 				struct of_phandle_args *out_args)
 {
diff --git a/include/linux/of.h b/include/linux/of.h
index 60053bd7e79a..6cfea9aa401f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -273,7 +273,7 @@ extern int of_modalias_node(struct device_node *node, char *modalias, int len);
 extern struct device_node *of_parse_phandle(const struct device_node *np,
 					    const char *phandle_name,
 					    int index);
-extern int of_parse_phandle_with_args(struct device_node *np,
+extern int of_parse_phandle_with_args(const struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct of_phandle_args *out_args);
 
-- 
cgit v1.2.3


From d8153d4d8b7b6141770e1416c4a338161205ed1b Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:45 +0200
Subject: inotify, fanotify: replace fsnotify_put_group() with
 fsnotify_destroy_group()

Currently in fsnotify_put_group() the ref count of a group is decremented and if
it becomes 0 fsnotify_destroy_group() is called. Since a groups ref count is only
at group creation set to 1 and never increased after that a call to fsnotify_put_group()
always results in a call to fsnotify_destroy_group().
With this patch fsnotify_destroy_group() is called directly.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 14 +++++++-------
 fs/notify/group.c                  |  2 +-
 fs/notify/inotify/inotify_user.c   |  8 +++-----
 include/linux/fsnotify_backend.h   |  3 ++-
 4 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index d43803669739..82ae6d783c14 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -415,7 +415,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
 	/* matches the fanotify_init->fsnotify_alloc_group */
-	fsnotify_put_group(group);
+	fsnotify_destroy_group(group);
 
 	return 0;
 }
@@ -728,13 +728,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		break;
 	default:
 		fd = -EINVAL;
-		goto out_put_group;
+		goto out_destroy_group;
 	}
 
 	if (flags & FAN_UNLIMITED_QUEUE) {
 		fd = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
-			goto out_put_group;
+			goto out_destroy_group;
 		group->max_events = UINT_MAX;
 	} else {
 		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
@@ -743,7 +743,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if (flags & FAN_UNLIMITED_MARKS) {
 		fd = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
-			goto out_put_group;
+			goto out_destroy_group;
 		group->fanotify_data.max_marks = UINT_MAX;
 	} else {
 		group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
@@ -751,12 +751,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
-		goto out_put_group;
+		goto out_destroy_group;
 
 	return fd;
 
-out_put_group:
-	fsnotify_put_group(group);
+out_destroy_group:
+	fsnotify_destroy_group(group);
 	return fd;
 }
 
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 63fc294a4692..cfda328c3d11 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -50,7 +50,7 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group)
  * situtation, the fsnotify_final_destroy_group will get called when that final
  * mark is freed.
  */
-static void fsnotify_destroy_group(struct fsnotify_group *group)
+void fsnotify_destroy_group(struct fsnotify_group *group)
 {
 	/* clear all inode marks for this group */
 	fsnotify_clear_marks_by_group(group);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8445fbc8985c..dbafbfc8ceca 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -293,10 +293,8 @@ static int inotify_release(struct inode *ignored, struct file *file)
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
-	fsnotify_clear_marks_by_group(group);
-
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
-	fsnotify_put_group(group);
+	fsnotify_destroy_group(group);
 
 	return 0;
 }
@@ -712,7 +710,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 
 	if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
 	    inotify_max_user_instances) {
-		fsnotify_put_group(group);
+		fsnotify_destroy_group(group);
 		return ERR_PTR(-EMFILE);
 	}
 
@@ -741,7 +739,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	ret = anon_inode_getfd("inotify", &inotify_fops, group,
 				  O_RDONLY | flags);
 	if (ret < 0)
-		fsnotify_put_group(group);
+		fsnotify_destroy_group(group);
 
 	return ret;
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 63d966d5c2ea..d2ad345bdeec 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -364,7 +364,8 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode
 extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
-
+/* destroy group */
+extern void fsnotify_destroy_group(struct fsnotify_group *group);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
-- 
cgit v1.2.3


From 986129520479d689962a42c31acdeaf854ac91f5 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:46 +0200
Subject: fsnotify: introduce fsnotify_get_group()

Introduce fsnotify_get_group() which increments the reference counter of a group.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/group.c                | 8 ++++++++
 include/linux/fsnotify_backend.h | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/notify/group.c b/fs/notify/group.c
index cfda328c3d11..1d57c35f1043 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -62,6 +62,14 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
 		fsnotify_final_destroy_group(group);
 }
 
+/*
+ * Get reference to a group.
+ */
+void fsnotify_get_group(struct fsnotify_group *group)
+{
+	atomic_inc(&group->refcnt);
+}
+
 /*
  * Drop a reference to a group.  Free it if it's through.
  */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index d2ad345bdeec..e76cef75295d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -360,8 +360,10 @@ static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode
 
 /* called from fsnotify listeners, such as fanotify or dnotify */
 
-/* get a reference to an existing or create a new group */
+/* create a new group */
 extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
+/* get reference to a group */
+extern void fsnotify_get_group(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
 /* destroy group */
-- 
cgit v1.2.3


From 986ab09807ca9454c3f54aae4db7e1bb00daeed3 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:50 +0200
Subject: fsnotify: use a mutex instead of a spinlock to protect a groups mark
 list

Replaces the groups mark_lock spinlock with a mutex. Using a mutex instead
of a spinlock results in more flexibility (i.e it allows to sleep while the
lock is held).

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/group.c                |  2 +-
 fs/notify/inode_mark.c           |  4 ++--
 fs/notify/mark.c                 | 18 +++++++++---------
 fs/notify/vfsmount_mark.c        |  4 ++--
 include/linux/fsnotify_backend.h |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/group.c b/fs/notify/group.c
index 354044c47e23..1f7305711fc9 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -95,7 +95,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	init_waitqueue_head(&group->notification_waitq);
 	group->max_events = UINT_MAX;
 
-	spin_lock_init(&group->mark_lock);
+	mutex_init(&group->mark_mutex);
 	INIT_LIST_HEAD(&group->marks_list);
 
 	group->ops = ops;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index b13c00ac48eb..4e9071e37d5d 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -63,8 +63,8 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
 {
 	struct inode *inode = mark->i.inode;
 
+	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&mark->group->mark_lock);
 
 	spin_lock(&inode->i_lock);
 
@@ -191,8 +191,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
 
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&group->mark_lock);
 
 	spin_lock(&inode->i_lock);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 32447dc06c07..ab25b810b146 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -136,13 +136,13 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	group = mark->group;
 	spin_unlock(&mark->lock);
 
-	spin_lock(&group->mark_lock);
+	mutex_lock(&group->mark_mutex);
 	spin_lock(&mark->lock);
 
 	/* something else already called this function on this mark */
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
-		spin_unlock(&group->mark_lock);
+		mutex_unlock(&group->mark_mutex);
 		goto put_group;
 	}
 
@@ -159,7 +159,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	list_del_init(&mark->g_list);
 
 	spin_unlock(&mark->lock);
-	spin_unlock(&group->mark_lock);
+	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
 	list_add(&mark->destroy_list, &destroy_list);
@@ -232,11 +232,11 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 
 	/*
 	 * LOCKING ORDER!!!!
-	 * group->mark_lock
+	 * group->mark_mutex
 	 * mark->lock
 	 * inode->i_lock
 	 */
-	spin_lock(&group->mark_lock);
+	mutex_lock(&group->mark_mutex);
 
 	spin_lock(&mark->lock);
 	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
@@ -263,7 +263,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	fsnotify_set_mark_mask_locked(mark, mark->mask);
 	spin_unlock(&mark->lock);
 
-	spin_unlock(&group->mark_lock);
+	mutex_unlock(&group->mark_mutex);
 
 	if (inode)
 		__fsnotify_update_child_dentry_flags(inode);
@@ -277,7 +277,7 @@ err:
 	atomic_dec(&group->num_marks);
 
 	spin_unlock(&mark->lock);
-	spin_unlock(&group->mark_lock);
+	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
 	list_add(&mark->destroy_list, &destroy_list);
@@ -296,7 +296,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 	struct fsnotify_mark *lmark, *mark;
 	LIST_HEAD(free_list);
 
-	spin_lock(&group->mark_lock);
+	mutex_lock(&group->mark_mutex);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 		if (mark->flags & flags) {
 			list_add(&mark->free_g_list, &free_list);
@@ -304,7 +304,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 			fsnotify_get_mark(mark);
 		}
 	}
-	spin_unlock(&group->mark_lock);
+	mutex_unlock(&group->mark_mutex);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
 		fsnotify_destroy_mark(mark);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index b7b4b0e8554f..f26a348827f8 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -88,8 +88,8 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
 {
 	struct vfsmount *mnt = mark->m.mnt;
 
+	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&mark->group->mark_lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
 
@@ -151,8 +151,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
 
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 	assert_spin_locked(&mark->lock);
-	assert_spin_locked(&group->mark_lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e76cef75295d..c5848346840d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -141,7 +141,7 @@ struct fsnotify_group {
 	unsigned int priority;
 
 	/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
-	spinlock_t mark_lock;		/* protect marks_list */
+	struct mutex mark_mutex;	/* protect marks_list */
 	atomic_t num_marks;		/* 1 for each mark and 1 for not being
 					 * past the point of no return when freeing
 					 * a group */
-- 
cgit v1.2.3


From e2a29943e9a2ee2aa737a77f550f46ba72269db4 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:51 +0200
Subject: fsnotify: pass group to fsnotify_destroy_mark()

In fsnotify_destroy_mark() dont get the group from the passed mark anymore,
but pass the group itself as an additional parameter to the function.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          |  4 ++--
 fs/notify/fanotify/fanotify_user.c   |  4 ++--
 fs/notify/inode_mark.c               | 10 +++++++++-
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c     |  2 +-
 fs/notify/mark.c                     | 21 ++++-----------------
 fs/notify/vfsmount_mark.c            | 10 +++++++++-
 include/linux/fsnotify_backend.h     |  5 +++--
 kernel/audit_tree.c                  | 10 +++++-----
 kernel/audit_watch.c                 |  4 ++--
 10 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 3344bdd5506e..08b886f119ce 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -201,7 +201,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 
 	/* nothing else could have found us thanks to the dnotify_mark_mutex */
 	if (dn_mark->dn == NULL)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, dnotify_group);
 
 	mutex_unlock(&dnotify_mark_mutex);
 
@@ -385,7 +385,7 @@ out:
 	spin_unlock(&fsn_mark->lock);
 
 	if (destroy)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, dnotify_group);
 
 	mutex_unlock(&dnotify_mark_mutex);
 	fsnotify_put_mark(fsn_mark);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 599a01952c74..1218d10424d0 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -546,7 +546,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
 	if (destroy_mark)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, group);
 
 	fsnotify_put_mark(fsn_mark);
 	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
@@ -570,7 +570,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
 	if (destroy_mark)
-		fsnotify_destroy_mark(fsn_mark);
+		fsnotify_destroy_mark(fsn_mark, group);
 	/* matches the fsnotify_find_inode_mark() */
 	fsnotify_put_mark(fsn_mark);
 	if (removed & inode->i_fsnotify_mask)
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 4e9071e37d5d..21230209c957 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -99,8 +99,16 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
-		fsnotify_destroy_mark(mark);
+		struct fsnotify_group *group;
+
+		spin_lock(&mark->lock);
+		fsnotify_get_group(mark->group);
+		group = mark->group;
+		spin_unlock(&mark->lock);
+
+		fsnotify_destroy_mark(mark, group);
 		fsnotify_put_mark(mark);
+		fsnotify_put_group(group);
 	}
 }
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 74977fbf5aae..871569c7d609 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -132,7 +132,7 @@ static int inotify_handle_event(struct fsnotify_group *group,
 	}
 
 	if (inode_mark->mask & IN_ONESHOT)
-		fsnotify_destroy_mark(inode_mark);
+		fsnotify_destroy_mark(inode_mark, group);
 
 	return ret;
 }
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 246250f1db7a..00ff82ff7c9f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -816,7 +816,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 
 	ret = 0;
 
-	fsnotify_destroy_mark(&i_mark->fsn_mark);
+	fsnotify_destroy_mark(&i_mark->fsn_mark, group);
 
 	/* match ref taken by inotify_idr_find */
 	fsnotify_put_mark(&i_mark->fsn_mark);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index ab25b810b146..b77c833c8d0a 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -121,21 +121,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
  * The caller had better be holding a reference to this mark so we don't actually
  * do the final put under the mark->lock
  */
-void fsnotify_destroy_mark(struct fsnotify_mark *mark)
+void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+			   struct fsnotify_group *group)
 {
-	struct fsnotify_group *group;
 	struct inode *inode = NULL;
 
-	spin_lock(&mark->lock);
-	/* dont get the group from a mark that is not alive yet */
-	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
-		spin_unlock(&mark->lock);
-		return;
-	}
-	fsnotify_get_group(mark->group);
-	group = mark->group;
-	spin_unlock(&mark->lock);
-
 	mutex_lock(&group->mark_mutex);
 	spin_lock(&mark->lock);
 
@@ -143,7 +133,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
 		mutex_unlock(&group->mark_mutex);
-		goto put_group;
+		return;
 	}
 
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
@@ -194,9 +184,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	 */
 
 	atomic_dec(&group->num_marks);
-
-put_group:
-	fsnotify_put_group(group);
 }
 
 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
@@ -307,7 +294,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 	mutex_unlock(&group->mark_mutex);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
-		fsnotify_destroy_mark(mark);
+		fsnotify_destroy_mark(mark, group);
 		fsnotify_put_mark(mark);
 	}
 }
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index f26a348827f8..4df58b8ea64a 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -46,8 +46,16 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 	spin_unlock(&mnt->mnt_root->d_lock);
 
 	list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
-		fsnotify_destroy_mark(mark);
+		struct fsnotify_group *group;
+
+		spin_lock(&mark->lock);
+		fsnotify_get_group(mark->group);
+		group = mark->group;
+		spin_unlock(&mark->lock);
+
+		fsnotify_destroy_mark(mark, group);
 		fsnotify_put_mark(mark);
+		fsnotify_put_group(group);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index c5848346840d..140b4b8a100b 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -408,8 +408,9 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask
 /* attach the mark to both the group and the inode */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 			     struct inode *inode, struct vfsmount *mnt, int allow_dups);
-/* given a mark, flag it to be freed when all references are dropped */
-extern void fsnotify_destroy_mark(struct fsnotify_mark *mark);
+/* given a group and a mark, flag mark to be freed when all references are dropped */
+extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+				  struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the inode marks */
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index ed206fd88cca..e81175ef25f8 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -249,7 +249,7 @@ static void untag_chunk(struct node *p)
 		list_del_rcu(&chunk->hash);
 		spin_unlock(&hash_lock);
 		spin_unlock(&entry->lock);
-		fsnotify_destroy_mark(entry);
+		fsnotify_destroy_mark(entry, audit_tree_group);
 		goto out;
 	}
 
@@ -291,7 +291,7 @@ static void untag_chunk(struct node *p)
 		owner->root = new;
 	spin_unlock(&hash_lock);
 	spin_unlock(&entry->lock);
-	fsnotify_destroy_mark(entry);
+	fsnotify_destroy_mark(entry, audit_tree_group);
 	fsnotify_put_mark(&new->mark);	/* drop initial reference */
 	goto out;
 
@@ -331,7 +331,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
 		spin_unlock(&hash_lock);
 		chunk->dead = 1;
 		spin_unlock(&entry->lock);
-		fsnotify_destroy_mark(entry);
+		fsnotify_destroy_mark(entry, audit_tree_group);
 		fsnotify_put_mark(entry);
 		return 0;
 	}
@@ -412,7 +412,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 		spin_unlock(&chunk_entry->lock);
 		spin_unlock(&old_entry->lock);
 
-		fsnotify_destroy_mark(chunk_entry);
+		fsnotify_destroy_mark(chunk_entry, audit_tree_group);
 
 		fsnotify_put_mark(chunk_entry);
 		fsnotify_put_mark(old_entry);
@@ -443,7 +443,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	spin_unlock(&hash_lock);
 	spin_unlock(&chunk_entry->lock);
 	spin_unlock(&old_entry->lock);
-	fsnotify_destroy_mark(old_entry);
+	fsnotify_destroy_mark(old_entry, audit_tree_group);
 	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
 	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
 	return 0;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 3823281401b5..a66affc1c12c 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -349,7 +349,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
 	}
 	mutex_unlock(&audit_filter_mutex);
 
-	fsnotify_destroy_mark(&parent->mark);
+	fsnotify_destroy_mark(&parent->mark, audit_watch_group);
 }
 
 /* Get path information necessary for adding watches. */
@@ -456,7 +456,7 @@ void audit_remove_watch_rule(struct audit_krule *krule)
 
 		if (list_empty(&parent->watches)) {
 			audit_get_parent(parent);
-			fsnotify_destroy_mark(&parent->mark);
+			fsnotify_destroy_mark(&parent->mark, audit_watch_group);
 			audit_put_parent(parent);
 		}
 	}
-- 
cgit v1.2.3


From d5a335b845792d2a69ed1e244c0b233117b7db3c Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:52 +0200
Subject: fsnotify: introduce locked versions of fsnotify_add_mark() and
 fsnotify_remove_mark()

This patch introduces fsnotify_add_mark_locked() and fsnotify_remove_mark_locked()
which are essentially the same as fsnotify_add_mark() and fsnotify_remove_mark() but
assume that the caller has already taken the groups mark mutex.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c                 | 42 ++++++++++++++++++++++++++++------------
 include/linux/fsnotify_backend.h |  4 ++++
 2 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index b77c833c8d0a..f9dda0304a10 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -121,18 +121,18 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
  * The caller had better be holding a reference to this mark so we don't actually
  * do the final put under the mark->lock
  */
-void fsnotify_destroy_mark(struct fsnotify_mark *mark,
-			   struct fsnotify_group *group)
+void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
+				  struct fsnotify_group *group)
 {
 	struct inode *inode = NULL;
 
-	mutex_lock(&group->mark_mutex);
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
+
 	spin_lock(&mark->lock);
 
 	/* something else already called this function on this mark */
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
-		mutex_unlock(&group->mark_mutex);
 		return;
 	}
 
@@ -149,6 +149,8 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	list_del_init(&mark->g_list);
 
 	spin_unlock(&mark->lock);
+
+	/* release lock temporarily */
 	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
@@ -184,6 +186,16 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	 */
 
 	atomic_dec(&group->num_marks);
+
+	mutex_lock(&group->mark_mutex);
+}
+
+void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+			   struct fsnotify_group *group)
+{
+	mutex_lock(&group->mark_mutex);
+	fsnotify_destroy_mark_locked(mark, group);
+	mutex_unlock(&group->mark_mutex);
 }
 
 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
@@ -208,14 +220,15 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group.
  */
-int fsnotify_add_mark(struct fsnotify_mark *mark,
-		      struct fsnotify_group *group, struct inode *inode,
-		      struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+			     struct fsnotify_group *group, struct inode *inode,
+			     struct vfsmount *mnt, int allow_dups)
 {
 	int ret = 0;
 
 	BUG_ON(inode && mnt);
 	BUG_ON(!inode && !mnt);
+	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 
 	/*
 	 * LOCKING ORDER!!!!
@@ -223,8 +236,6 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	 * mark->lock
 	 * inode->i_lock
 	 */
-	mutex_lock(&group->mark_mutex);
-
 	spin_lock(&mark->lock);
 	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
 
@@ -250,8 +261,6 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	fsnotify_set_mark_mask_locked(mark, mark->mask);
 	spin_unlock(&mark->lock);
 
-	mutex_unlock(&group->mark_mutex);
-
 	if (inode)
 		__fsnotify_update_child_dentry_flags(inode);
 
@@ -264,7 +273,6 @@ err:
 	atomic_dec(&group->num_marks);
 
 	spin_unlock(&mark->lock);
-	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
 	list_add(&mark->destroy_list, &destroy_list);
@@ -274,6 +282,16 @@ err:
 	return ret;
 }
 
+int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
+		      struct inode *inode, struct vfsmount *mnt, int allow_dups)
+{
+	int ret;
+	mutex_lock(&group->mark_mutex);
+	ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
+	mutex_unlock(&group->mark_mutex);
+	return ret;
+}
+
 /*
  * clear any marks in a group in which mark->flags & flags is true
  */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 140b4b8a100b..26c06afa264e 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -408,9 +408,13 @@ extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask
 /* attach the mark to both the group and the inode */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 			     struct inode *inode, struct vfsmount *mnt, int allow_dups);
+extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group,
+				    struct inode *inode, struct vfsmount *mnt, int allow_dups);
 /* given a group and a mark, flag mark to be freed when all references are dropped */
 extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 				  struct fsnotify_group *group);
+extern void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
+					 struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the vfsmount marks */
 extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group);
 /* run all the marks in a group, and clear all of the inode marks */
-- 
cgit v1.2.3


From 64c20d2a20fce295c260ea6cb3b468edfa2fb07b Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Tue, 14 Jun 2011 17:29:53 +0200
Subject: fsnotify: dont put marks on temporary list when clearing marks by
 group

In clear_marks_by_group_flags() the mark list of a group is iterated and the
marks are put on a temporary list.
Since we introduced fsnotify_destroy_mark_locked() we dont need the temp list
any more and are able to remove the marks while the mark list is iterated and
the mark list mutex is held.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c                 | 10 ++--------
 include/linux/fsnotify_backend.h |  1 -
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index f9dda0304a10..0e93d90bb753 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -299,22 +299,16 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 					 unsigned int flags)
 {
 	struct fsnotify_mark *lmark, *mark;
-	LIST_HEAD(free_list);
 
 	mutex_lock(&group->mark_mutex);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 		if (mark->flags & flags) {
-			list_add(&mark->free_g_list, &free_list);
-			list_del_init(&mark->g_list);
 			fsnotify_get_mark(mark);
+			fsnotify_destroy_mark_locked(mark, group);
+			fsnotify_put_mark(mark);
 		}
 	}
 	mutex_unlock(&group->mark_mutex);
-
-	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
-		fsnotify_destroy_mark(mark, group);
-		fsnotify_put_mark(mark);
-	}
 }
 
 /*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 26c06afa264e..5a8899350456 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -287,7 +287,6 @@ struct fsnotify_mark {
 		struct fsnotify_inode_mark i;
 		struct fsnotify_vfsmount_mark m;
 	};
-	struct list_head free_g_list;	/* tmp list used when freeing this mark */
 	__u32 ignored_mask;		/* events types to ignore */
 #define FSNOTIFY_MARK_FLAG_INODE		0x01
 #define FSNOTIFY_MARK_FLAG_VFSMOUNT		0x02
-- 
cgit v1.2.3


From 6960b0d909cde5bdff49e4e5c1250edd10be7ebd Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Fri, 12 Aug 2011 01:13:31 +0200
Subject: fsnotify: change locking order

On Mon, Aug 01, 2011 at 04:38:22PM -0400, Eric Paris wrote:
>
> I finally built and tested a v3.0 kernel with these patches (I know I'm
> SOOOOOO far behind).  Not what I hoped for:
>
> > [  150.937798] VFS: Busy inodes after unmount of tmpfs. Self-destruct in 5 seconds.  Have a nice day...
> > [  150.945290] BUG: unable to handle kernel NULL pointer dereference at 0000000000000070
> > [  150.946012] IP: [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012] PGD 2bf9e067 PUD 2bf9f067 PMD 0
> > [  150.946012] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
> > [  150.946012] CPU 0
> > [  150.946012] Modules linked in: nfs lockd fscache auth_rpcgss nfs_acl sunrpc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ext4 jbd2 crc16 joydev ata_piix i2c_piix4 pcspkr uinput ipv6 autofs4 usbhid [last unloaded: scsi_wait_scan]
> > [  150.946012]
> > [  150.946012] Pid: 2764, comm: syscall_thrash Not tainted 3.0.0+ #1 Red Hat KVM
> > [  150.946012] RIP: 0010:[<ffffffff810ffd58>]  [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012] RSP: 0018:ffff88002c2e5df8  EFLAGS: 00010282
> > [  150.946012] RAX: 000000004e370d9f RBX: 0000000000000000 RCX: ffff88003a029438
> > [  150.946012] RDX: 0000000033630a5f RSI: 0000000000000000 RDI: ffff88003491c240
> > [  150.946012] RBP: ffff88002c2e5e08 R08: 0000000000000000 R09: 0000000000000000
> > [  150.946012] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88003a029428
> > [  150.946012] R13: ffff88003a029428 R14: ffff88003a029428 R15: ffff88003499a610
> > [  150.946012] FS:  00007f5a05420700(0000) GS:ffff88003f600000(0000) knlGS:0000000000000000
> > [  150.946012] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> > [  150.946012] CR2: 0000000000000070 CR3: 000000002a662000 CR4: 00000000000006f0
> > [  150.946012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [  150.946012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> > [  150.946012] Process syscall_thrash (pid: 2764, threadinfo ffff88002c2e4000, task ffff88002bfbc760)
> > [  150.946012] Stack:
> > [  150.946012]  ffff88003a029438 ffff88003a029428 ffff88002c2e5e38 ffffffff81102f76
> > [  150.946012]  ffff88003a029438 ffff88003a029598 ffffffff8160f9c0 ffff88002c221250
> > [  150.946012]  ffff88002c2e5e68 ffffffff8115e9be ffff88002c2e5e68 ffff88003a029438
> > [  150.946012] Call Trace:
> > [  150.946012]  [<ffffffff81102f76>] shmem_evict_inode+0x76/0x130
> > [  150.946012]  [<ffffffff8115e9be>] evict+0x7e/0x170
> > [  150.946012]  [<ffffffff8115ee40>] iput_final+0xd0/0x190
> > [  150.946012]  [<ffffffff8115ef33>] iput+0x33/0x40
> > [  150.946012]  [<ffffffff81180205>] fsnotify_destroy_mark_locked+0x145/0x160
> > [  150.946012]  [<ffffffff81180316>] fsnotify_destroy_mark+0x36/0x50
> > [  150.946012]  [<ffffffff81181937>] sys_inotify_rm_watch+0x77/0xd0
> > [  150.946012]  [<ffffffff815aca52>] system_call_fastpath+0x16/0x1b
> > [  150.946012] Code: 67 4a 00 b8 e4 ff ff ff eb aa 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83 ec 10 48 89 1c 24 4c 89 64 24 08 48 8b 9f 40 05 00 00
> > [  150.946012]  83 7b 70 00 74 1c 4c 8d a3 80 00 00 00 4c 89 e7 e8 d2 5d 4a
> > [  150.946012] RIP  [<ffffffff810ffd58>] shmem_free_inode+0x18/0x50
> > [  150.946012]  RSP <ffff88002c2e5df8>
> > [  150.946012] CR2: 0000000000000070
>
> Looks at aweful lot like the problem from:
> http://www.spinics.net/lists/linux-fsdevel/msg46101.html
>

I tried to reproduce this bug with your test program, but without success.
However, if I understand correctly, this occurs since we dont hold any locks when
we call iput() in mark_destroy(), right?
With the patches you tested, iput() is also not called within any lock, since the
groups mark_mutex is released temporarily before iput() is called.  This is, since
the original codes behaviour is similar.
However since we now have a mutex as the biggest lock, we can do what you
suggested (http://www.spinics.net/lists/linux-fsdevel/msg46107.html) and
call iput() with the mutex held to avoid the race.
The patch below implements this. It uses nested locking to avoid deadlock in case
we do the final iput() on an inode which still holds marks and thus would take
the mutex again when calling fsnotify_inode_delete() in destroy_inode().

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c                 | 20 ++++++++++----------
 include/linux/fsnotify_backend.h |  7 ++++---
 2 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 0e93d90bb753..fc6b49bf7360 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -150,6 +150,8 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 
 	spin_unlock(&mark->lock);
 
+	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
+		iput(inode);
 	/* release lock temporarily */
 	mutex_unlock(&group->mark_mutex);
 
@@ -157,6 +159,11 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 	list_add(&mark->destroy_list, &destroy_list);
 	spin_unlock(&destroy_lock);
 	wake_up(&destroy_waitq);
+	/*
+	 * We don't necessarily have a ref on mark from caller so the above destroy
+	 * may have actually freed it, unless this group provides a 'freeing_mark'
+	 * function which must be holding a reference.
+	 */
 
 	/*
 	 * Some groups like to know that marks are being freed.  This is a
@@ -178,22 +185,15 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 	 * is just a lazy update (and could be a perf win...)
 	 */
 
-	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
-		iput(inode);
-	/*
-	 * We don't necessarily have a ref on mark from caller so the above iput
-	 * may have already destroyed it.  Don't touch from now on.
-	 */
-
 	atomic_dec(&group->num_marks);
 
-	mutex_lock(&group->mark_mutex);
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 }
 
 void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 			   struct fsnotify_group *group)
 {
-	mutex_lock(&group->mark_mutex);
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 	fsnotify_destroy_mark_locked(mark, group);
 	mutex_unlock(&group->mark_mutex);
 }
@@ -300,7 +300,7 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 {
 	struct fsnotify_mark *lmark, *mark;
 
-	mutex_lock(&group->mark_mutex);
+	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
 		if (mark->flags & flags) {
 			fsnotify_get_mark(mark);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 5a8899350456..1af2f6a722c0 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -88,9 +88,10 @@ struct fsnotify_event_private_data;
  *		if the group is interested in this event.
  * handle_event - main call for a group to handle an fs event
  * free_group_priv - called when a group refcnt hits 0 to clean up the private union
- * freeing-mark - this means that a mark has been flagged to die when everything
- *		finishes using it.  The function is supplied with what must be a
- *		valid group and inode to use to clean up.
+ * freeing_mark - called when a mark is being destroyed for some reason.  The group
+ * 		MUST be holding a reference on each mark and that reference must be
+ * 		dropped in this function.  inotify uses this function to send
+ * 		userspace messages that marks have been removed.
  */
 struct fsnotify_ops {
 	bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode,
-- 
cgit v1.2.3


From 0a6b6bd5919a65030b557ec8fe81f6fb3e93744a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 14 Oct 2011 17:43:39 -0400
Subject: fsnotify: make fasync generic for both inotify and fanotify

inotify is supposed to support async signal notification when information
is available on the inotify fd.  This patch moves that support to generic
fsnotify functions so it can be used by all notification mechanisms.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c |  4 ++++
 fs/notify/group.c                  |  7 +++++++
 fs/notify/inotify/inotify_user.c   | 13 ++++---------
 fs/notify/notification.c           |  1 +
 include/linux/fsnotify_backend.h   |  5 ++++-
 5 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 1218d10424d0..f0e7a57bc899 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -414,6 +414,10 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
+
+	if (file->f_flags & FASYNC)
+		fsnotify_fasync(-1, file, 0);
+
 	/* matches the fanotify_init->fsnotify_alloc_group */
 	fsnotify_destroy_group(group);
 
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 1f7305711fc9..bd2625bd88b4 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -102,3 +102,10 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
 	return group;
 }
+
+int fsnotify_fasync(int fd, struct file *file, int on)
+{
+	struct fsnotify_group *group = file->private_data;
+
+	return fasync_helper(fd, file, on, &group->fsn_fa) >= 0 ? 0 : -EIO;
+}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 00ff82ff7c9f..68f7bec1e664 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -280,19 +280,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 	return ret;
 }
 
-static int inotify_fasync(int fd, struct file *file, int on)
-{
-	struct fsnotify_group *group = file->private_data;
-
-	return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
-}
-
 static int inotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
+	if (file->f_flags & FASYNC)
+		fsnotify_fasync(-1, file, 0);
+
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	fsnotify_destroy_group(group);
 
@@ -335,7 +331,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 static const struct file_operations inotify_fops = {
 	.poll		= inotify_poll,
 	.read		= inotify_read,
-	.fasync		= inotify_fasync,
+	.fasync		= fsnotify_fasync,
 	.release	= inotify_release,
 	.unlocked_ioctl	= inotify_ioctl,
 	.compat_ioctl	= inotify_ioctl,
@@ -706,7 +702,6 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 	spin_lock_init(&group->inotify_data.idr_lock);
 	idr_init(&group->inotify_data.idr);
 	group->inotify_data.last_wd = 0;
-	group->inotify_data.fa = NULL;
 	group->inotify_data.user = get_current_user();
 
 	if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index c887b1378f7e..b3963d8c9988 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -225,6 +225,7 @@ alloc_holder:
 	mutex_unlock(&group->notification_mutex);
 
 	wake_up(&group->notification_waitq);
+	kill_fasync(&group->fsn_fa, SIGIO, POLL_IN);
 	return return_event;
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 1af2f6a722c0..d5b0910d4961 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -148,6 +148,8 @@ struct fsnotify_group {
 					 * a group */
 	struct list_head marks_list;	/* all inode marks for this group */
 
+	struct fasync_struct    *fsn_fa;    /* async notification */
+
 	/* groups can define private fields here or use the void *private */
 	union {
 		void *private;
@@ -156,7 +158,6 @@ struct fsnotify_group {
 			spinlock_t	idr_lock;
 			struct idr      idr;
 			u32             last_wd;
-			struct fasync_struct    *fa;    /* async notification */
 			struct user_struct      *user;
 		} inotify_data;
 #endif
@@ -368,6 +369,8 @@ extern void fsnotify_get_group(struct fsnotify_group *group);
 extern void fsnotify_put_group(struct fsnotify_group *group);
 /* destroy group */
 extern void fsnotify_destroy_group(struct fsnotify_group *group);
+/* fasync handler function */
+extern int fsnotify_fasync(int fd, struct file *file, int on);
 /* take a reference to an event */
 extern void fsnotify_get_event(struct fsnotify_event *event);
 extern void fsnotify_put_event(struct fsnotify_event *event);
-- 
cgit v1.2.3


From 7056741fd9fc14a65608549a4657cf5178f05f63 Mon Sep 17 00:00:00 2001
From: Jim Kukunas <james.t.kukunas@linux.intel.com>
Date: Thu, 8 Nov 2012 13:47:44 -0800
Subject: lib/raid6: Add AVX2 optimized recovery functions

Optimize RAID6 recovery functions to take advantage of
the 256-bit YMM integer instructions introduced in AVX2.

The patch was tested and benchmarked before submission.
However hardware is not yet released so benchmark numbers
cannot be reported.

Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 arch/x86/Makefile       |   5 +-
 include/linux/raid/pq.h |   1 +
 lib/raid6/Makefile      |   2 +-
 lib/raid6/algos.c       |   3 +
 lib/raid6/recov_avx2.c  | 327 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/raid6/test/Makefile |   2 +-
 lib/raid6/x86.h         |  14 ++-
 7 files changed, 345 insertions(+), 9 deletions(-)
 create mode 100644 lib/raid6/recov_avx2.c

(limited to 'include/linux')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 58790bd85c1d..95477aae9ff7 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -123,9 +123,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
 # does binutils support specific instructions?
 asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
 avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
+avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
 
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
 
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 640c69ceec96..3156347452b9 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -109,6 +109,7 @@ struct raid6_recov_calls {
 
 extern const struct raid6_recov_calls raid6_recov_intx1;
 extern const struct raid6_recov_calls raid6_recov_ssse3;
+extern const struct raid6_recov_calls raid6_recov_avx2;
 
 /* Algorithm list */
 extern const struct raid6_calls * const raid6_algos[];
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index de06dfe165b8..8c2e22bef661 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o
 
-raid6_pq-y	+= algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \
+raid6_pq-y	+= algos.o recov.o recov_ssse3.o recov_avx2.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
 		   altivec8.o mmx.o sse1.o sse2.o
 hostprogs-y	+= mktables
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 589f5f50ad2e..8b7f55cadb45 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -72,6 +72,9 @@ EXPORT_SYMBOL_GPL(raid6_datap_recov);
 
 const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+#ifdef CONFIG_AS_AVX2
+	&raid6_recov_avx2,
+#endif
 	&raid6_recov_ssse3,
 #endif
 	&raid6_recov_intx1,
diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c
new file mode 100644
index 000000000000..43a9bab91879
--- /dev/null
+++ b/lib/raid6/recov_avx2.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+
+#if CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static int raid6_has_avx2(void)
+{
+	return boot_cpu_has(X86_FEATURE_AVX2) &&
+		boot_cpu_has(X86_FEATURE_AVX);
+}
+
+static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
+		int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+	const u8 x0f = 0x0f;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+		raid6_gfexp[failb]]];
+
+	kernel_fpu_begin();
+
+	/* ymm0 = x0f[16] */
+	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+	while (bytes) {
+#ifdef CONFIG_X86_64
+		asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
+		asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
+		asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
+		asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
+		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
+		asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
+		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
+		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
+
+		/*
+		 * 1 = dq[0]  ^ q[0]
+		 * 9 = dq[32] ^ q[32]
+		 * 0 = dp[0]  ^ p[0]
+		 * 8 = dp[32] ^ p[32]
+		 */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+		asm volatile("vpsraw $4, %ymm1, %ymm3");
+		asm volatile("vpsraw $4, %ymm9, %ymm12");
+		asm volatile("vpand %ymm7, %ymm1, %ymm1");
+		asm volatile("vpand %ymm7, %ymm9, %ymm9");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm12, %ymm12");
+		asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
+		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
+		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+		asm volatile("vpxor %ymm14, %ymm15, %ymm15");
+		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+		/*
+		 * 5 = qx[0]
+		 * 15 = qx[32]
+		 */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+		asm volatile("vpsraw $4, %ymm0, %ymm2");
+		asm volatile("vpsraw $4, %ymm8, %ymm6");
+		asm volatile("vpand %ymm7, %ymm0, %ymm3");
+		asm volatile("vpand %ymm7, %ymm8, %ymm14");
+		asm volatile("vpand %ymm7, %ymm2, %ymm2");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
+		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
+		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm12, %ymm13, %ymm13");
+
+		/*
+		 * 1  = pbmul[px[0]]
+		 * 13 = pbmul[px[32]]
+		 */
+		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm15, %ymm13, %ymm13");
+
+		/*
+		 * 1 = db = DQ
+		 * 13 = db[32] = DQ[32]
+		 */
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
+		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+		asm volatile("vpxor %ymm13, %ymm8, %ymm8");
+
+		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+		asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dp += 64;
+		dq += 64;
+#else
+		asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
+		asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
+		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
+		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
+
+		/* 1 = dq ^ q;  0 = dp ^ p */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
+
+		/*
+		 * 1 = dq ^ q
+		 * 3 = dq ^ p >> 4
+		 */
+		asm volatile("vpsraw $4, %ymm1, %ymm3");
+		asm volatile("vpand %ymm7, %ymm1, %ymm1");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
+		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
+
+		/* 5 = qx */
+
+		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
+
+		asm volatile("vpsraw $4, %ymm0, %ymm2");
+		asm volatile("vpand %ymm7, %ymm0, %ymm3");
+		asm volatile("vpand %ymm7, %ymm2, %ymm2");
+		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
+		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
+
+		/* 1 = pbmul[px] */
+		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
+		/* 1 = db = DQ */
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+
+		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
+		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dp += 32;
+		dq += 32;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
+		void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+	const u8 x0f = 0x0f;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	kernel_fpu_begin();
+
+	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
+
+	while (bytes) {
+#ifdef CONFIG_X86_64
+		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+		asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
+		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
+
+		/*
+		 * 3 = q[0] ^ dq[0]
+		 * 8 = q[32] ^ dq[32]
+		 */
+		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+		asm volatile("vmovapd %ymm0, %ymm13");
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+		asm volatile("vmovapd %ymm1, %ymm14");
+
+		asm volatile("vpsraw $4, %ymm3, %ymm6");
+		asm volatile("vpsraw $4, %ymm8, %ymm12");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm8, %ymm8");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpand %ymm7, %ymm12, %ymm12");
+		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+		asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+		asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
+		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm13, %ymm14, %ymm14");
+
+		/*
+		 * 1  = qmul[q[0]  ^ dq[0]]
+		 * 14 = qmul[q[32] ^ dq[32]]
+		 */
+		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+		asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
+		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+		asm volatile("vpxor %ymm14, %ymm12, %ymm12");
+
+		/*
+		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
+		 * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
+		 */
+
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
+		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+		asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
+
+		bytes -= 64;
+		p += 64;
+		q += 64;
+		dq += 64;
+#else
+		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
+		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
+
+		/* 3 = q ^ dq */
+
+		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
+		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
+
+		asm volatile("vpsraw $4, %ymm3, %ymm6");
+		asm volatile("vpand %ymm7, %ymm3, %ymm3");
+		asm volatile("vpand %ymm7, %ymm6, %ymm6");
+		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
+		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
+		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
+
+		/* 1 = qmul[q ^ dq] */
+
+		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
+		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
+
+		/* 2 = p ^ qmul[q ^ dq] */
+
+		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
+		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dq += 32;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_avx2 = {
+	.data2 = raid6_2data_recov_avx2,
+	.datap = raid6_datap_recov_avx2,
+	.valid = raid6_has_avx2,
+#ifdef CONFIG_X86_64
+	.name = "avx2x2",
+#else
+	.name = "avx2x1",
+#endif
+	.priority = 2,
+};
+
+#else
+#warning "your version of binutils lacks AVX2 support"
+#endif
+
+#endif
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index c76151d94764..d919c98ce266 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -23,7 +23,7 @@ RANLIB	 = ranlib
 all:	raid6.a raid6test
 
 raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
-	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \
+	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o recov_avx2.o algos.o \
 	 tables.o
 	 rm -f $@
 	 $(AR) cq $@ $^
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index d55d63232c55..b7595484a815 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void)
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
 #define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */
 #define X86_FEATURE_AVX	(4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_AVX2        (9*32+ 5) /* AVX2 instructions */
 #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
 
 /* Should work well enough on modern CPUs for testing */
 static inline int boot_cpu_has(int flag)
 {
-	u32 eax = (flag & 0x20) ? 0x80000001 : 1;
-	u32 ecx, edx;
+	u32 eax, ebx, ecx, edx;
+
+	eax = (flag & 0x100) ? 7 :
+		(flag & 0x20) ? 0x80000001 : 1;
+	ecx = 0;
 
 	asm volatile("cpuid"
-		     : "+a" (eax), "=d" (edx), "=c" (ecx)
-		     : : "ebx");
+		     : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx));
 
-	return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
+	return ((flag & 0x100 ? ebx :
+		(flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1;
 }
 
 #endif /* ndef __KERNEL__ */
-- 
cgit v1.2.3


From 2c935842bdb46f5f557426feb4d2bdfdad1aa5f9 Mon Sep 17 00:00:00 2001
From: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Date: Fri, 30 Nov 2012 13:10:39 -0800
Subject: lib/raid6: Add AVX2 optimized gen_syndrome functions

Add AVX2 optimized gen_syndrom functions, which is simply based on
sse2.c written by hpa.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/raid/pq.h |   3 +
 lib/raid6/Makefile      |   2 +-
 lib/raid6/algos.c       |   9 ++
 lib/raid6/avx2.c        | 251 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/raid6/test/Makefile |  12 ++-
 5 files changed, 275 insertions(+), 2 deletions(-)
 create mode 100644 lib/raid6/avx2.c

(limited to 'include/linux')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 3156347452b9..8dfaa2ce2e95 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -98,6 +98,9 @@ extern const struct raid6_calls raid6_altivec1;
 extern const struct raid6_calls raid6_altivec2;
 extern const struct raid6_calls raid6_altivec4;
 extern const struct raid6_calls raid6_altivec8;
+extern const struct raid6_calls raid6_avx2x1;
+extern const struct raid6_calls raid6_avx2x2;
+extern const struct raid6_calls raid6_avx2x4;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 8c2e22bef661..3430711b9bdf 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_RAID6_PQ)	+= raid6_pq.o
 
 raid6_pq-y	+= algos.o recov.o recov_ssse3.o recov_avx2.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
-		   altivec8.o mmx.o sse1.o sse2.o
+		   altivec8.o mmx.o sse1.o sse2.o avx2.o
 hostprogs-y	+= mktables
 
 quiet_cmd_unroll = UNROLL  $@
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 8b7f55cadb45..6d7316fe9f30 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -45,11 +45,20 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_sse1x2,
 	&raid6_sse2x1,
 	&raid6_sse2x2,
+#ifdef CONFIG_AS_AVX2
+	&raid6_avx2x1,
+	&raid6_avx2x2,
+#endif
 #endif
 #if defined(__x86_64__) && !defined(__arch_um__)
 	&raid6_sse2x1,
 	&raid6_sse2x2,
 	&raid6_sse2x4,
+#ifdef CONFIG_AS_AVX2
+	&raid6_avx2x1,
+	&raid6_avx2x2,
+	&raid6_avx2x4,
+#endif
 #endif
 #ifdef CONFIG_ALTIVEC
 	&raid6_altivec1,
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
new file mode 100644
index 000000000000..bc3b1dd436eb
--- /dev/null
+++ b/lib/raid6/avx2.c
@@ -0,0 +1,251 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright (C) 2012 Intel Corporation
+ *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
+ *
+ *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * AVX2 implementation of RAID-6 syndrome functions
+ *
+ */
+
+#ifdef CONFIG_AS_AVX2
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static const struct raid6_avx2_constants {
+	u64 x1d[4];
+} raid6_avx2_constants __aligned(32) = {
+	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+	  0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
+};
+
+static int raid6_have_avx2(void)
+{
+	return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
+}
+
+/*
+ * Plain AVX2 implementation
+ */
+static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm3,%ymm3,%ymm3");	/* Zero temp */
+
+	for (d = 0; d < bytes; d += 32) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+		asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
+		asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
+		for (z = z0-2; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+			asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
+		}
+		asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
+		asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+		asm volatile("vpand %ymm0,%ymm5,%ymm5");
+		asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+		asm volatile("vpxor %ymm6,%ymm2,%ymm2");
+		asm volatile("vpxor %ymm6,%ymm4,%ymm4");
+
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x1 = {
+	raid6_avx21_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x1",
+	1			/* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 AVX2 implementation
+ */
+static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
+
+	/* We uniformly assume a single prefetch covers at least 32 bytes */
+	for (d = 0; d < bytes; d += 64) {
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
+		asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
+		asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
+		asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
+		asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
+		for (z = z0-1; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+			asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+			asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpand %ymm0,%ymm7,%ymm7");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+			asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+		}
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x2 = {
+	raid6_avx22_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x2",
+	1			/* Has cache hints */
+};
+
+#ifdef CONFIG_X86_64
+
+/*
+ * Unrolled-by-4 AVX2 implementation
+ */
+static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	kernel_fpu_begin();
+
+	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
+	asm volatile("vpxor %ymm1,%ymm1,%ymm1");	/* Zero temp */
+	asm volatile("vpxor %ymm2,%ymm2,%ymm2");	/* P[0] */
+	asm volatile("vpxor %ymm3,%ymm3,%ymm3");	/* P[1] */
+	asm volatile("vpxor %ymm4,%ymm4,%ymm4");	/* Q[0] */
+	asm volatile("vpxor %ymm6,%ymm6,%ymm6");	/* Q[1] */
+	asm volatile("vpxor %ymm10,%ymm10,%ymm10");	/* P[2] */
+	asm volatile("vpxor %ymm11,%ymm11,%ymm11");	/* P[3] */
+	asm volatile("vpxor %ymm12,%ymm12,%ymm12");	/* Q[2] */
+	asm volatile("vpxor %ymm14,%ymm14,%ymm14");	/* Q[3] */
+
+	for (d = 0; d < bytes; d += 128) {
+		for (z = z0; z >= 0; z--) {
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
+			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
+			asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
+			asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
+			asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
+			asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
+			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
+			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
+			asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
+			asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
+			asm volatile("vpand %ymm0,%ymm5,%ymm5");
+			asm volatile("vpand %ymm0,%ymm7,%ymm7");
+			asm volatile("vpand %ymm0,%ymm13,%ymm13");
+			asm volatile("vpand %ymm0,%ymm15,%ymm15");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+			asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
+			asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
+			asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
+			asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
+			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
+			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
+			asm volatile("vpxor %ymm13,%ymm10,%ymm10");
+			asm volatile("vpxor %ymm15,%ymm11,%ymm11");
+			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
+			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
+			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
+			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
+		}
+		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
+		asm volatile("vpxor %ymm2,%ymm2,%ymm2");
+		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
+		asm volatile("vpxor %ymm3,%ymm3,%ymm3");
+		asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
+		asm volatile("vpxor %ymm10,%ymm10,%ymm10");
+		asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
+		asm volatile("vpxor %ymm11,%ymm11,%ymm11");
+		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
+		asm volatile("vpxor %ymm4,%ymm4,%ymm4");
+		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
+		asm volatile("vpxor %ymm6,%ymm6,%ymm6");
+		asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
+		asm volatile("vpxor %ymm12,%ymm12,%ymm12");
+		asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
+		asm volatile("vpxor %ymm14,%ymm14,%ymm14");
+	}
+
+	asm volatile("sfence" : : : "memory");
+	kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx2x4 = {
+	raid6_avx24_gen_syndrome,
+	raid6_have_avx2,
+	"avx2x4",
+	1			/* Has cache hints */
+};
+#endif
+
+#endif /* CONFIG_AS_AVX2 */
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index d919c98ce266..754cbac0f9f8 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -11,6 +11,16 @@ AWK	 = awk -f
 AR	 = ar
 RANLIB	 = ranlib
 
+ARCH := $(shell uname -m 2>/dev/null | sed -e /s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        CFLAGS += -DCONFIG_X86_32
+endif
+ifeq ($(ARCH),x86_64)
+        CFLAGS += -DCONFIG_X86_64
+endif
+CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1"| gcc -c -x assembler - &&\
+	     rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+
 .c.o:
 	$(CC) $(CFLAGS) -c -o $@ $<
 
@@ -22,7 +32,7 @@ RANLIB	 = ranlib
 
 all:	raid6.a raid6test
 
-raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
+raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o avx2.o \
 	 altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o recov_avx2.o algos.o \
 	 tables.o
 	 rm -f $@
-- 
cgit v1.2.3


From 83aff95eb9d60aff5497e9f44a2ae906b86d8e88 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Wed, 28 Nov 2012 12:28:24 -0800
Subject: libceph: remove 'osdtimeout' option

This would reset a connection with any OSD that had an outstanding
request that was taking more than N seconds.  The idea was that if the
OSD was buggy, the client could compensate by resending the request.

In reality, this only served to hide server bugs, and we haven't
actually seen such a bug in quite a while.  Moreover, the userspace
client code never did this.

More importantly, often the request is taking a long time because the
OSD is trying to recover, or overloaded, and killing the connection
and retrying would only make the situation worse by giving the OSD
more work to do.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 fs/ceph/super.c              |  2 --
 include/linux/ceph/libceph.h |  2 --
 net/ceph/ceph_common.c       |  3 +--
 net/ceph/osd_client.c        | 47 ++++----------------------------------------
 4 files changed, 5 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 2f586b0e5e0f..fcda1c73a1e5 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -403,8 +403,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
 	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
 		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
-	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
-		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
 	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
 		seq_printf(m, ",osdkeepalivetimeout=%d",
 			   opt->osd_keepalive_timeout);
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 42624789b06f..317aff8feb0a 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -43,7 +43,6 @@ struct ceph_options {
 	struct ceph_entity_addr my_addr;
 	int mount_timeout;
 	int osd_idle_ttl;
-	int osd_timeout;
 	int osd_keepalive_timeout;
 
 	/*
@@ -63,7 +62,6 @@ struct ceph_options {
  * defaults
  */
 #define CEPH_MOUNT_TIMEOUT_DEFAULT  60
-#define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
 #define CEPH_OSD_KEEPALIVE_DEFAULT  5
 #define CEPH_OSD_IDLE_TTL_DEFAULT    60
 
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a8020293f342..ee71ea26777a 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name,
 
 	/* start with defaults */
 	opt->flags = CEPH_OPT_DEFAULT;
-	opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;   /* seconds */
@@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name,
 
 			/* misc */
 		case Opt_osdtimeout:
-			opt->osd_timeout = intval;
+			pr_warning("ignoring deprecated osdtimeout option\n");
 			break;
 		case Opt_osdkeepalivetimeout:
 			opt->osd_keepalive_timeout = intval;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ccbdfbba9e53..7ebfe13267e6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -608,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 	}
 }
 
-static void kick_osd_requests(struct ceph_osd_client *osdc,
-			      struct ceph_osd *kickosd)
-{
-	mutex_lock(&osdc->request_mutex);
-	__kick_osd_requests(osdc, kickosd);
-	mutex_unlock(&osdc->request_mutex);
-}
-
 /*
  * If the osd connection drops, we need to resubmit all requests.
  */
@@ -629,7 +621,9 @@ static void osd_reset(struct ceph_connection *con)
 	dout("osd_reset osd%d\n", osd->o_osd);
 	osdc = osd->o_osdc;
 	down_read(&osdc->map_sem);
-	kick_osd_requests(osdc, osd);
+	mutex_lock(&osdc->request_mutex);
+	__kick_osd_requests(osdc, osd);
+	mutex_unlock(&osdc->request_mutex);
 	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
@@ -1091,12 +1085,10 @@ static void handle_timeout(struct work_struct *work)
 {
 	struct ceph_osd_client *osdc =
 		container_of(work, struct ceph_osd_client, timeout_work.work);
-	struct ceph_osd_request *req, *last_req = NULL;
+	struct ceph_osd_request *req;
 	struct ceph_osd *osd;
-	unsigned long timeout = osdc->client->options->osd_timeout * HZ;
 	unsigned long keepalive =
 		osdc->client->options->osd_keepalive_timeout * HZ;
-	unsigned long last_stamp = 0;
 	struct list_head slow_osds;
 	dout("timeout\n");
 	down_read(&osdc->map_sem);
@@ -1105,37 +1097,6 @@ static void handle_timeout(struct work_struct *work)
 
 	mutex_lock(&osdc->request_mutex);
 
-	/*
-	 * reset osds that appear to be _really_ unresponsive.  this
-	 * is a failsafe measure.. we really shouldn't be getting to
-	 * this point if the system is working properly.  the monitors
-	 * should mark the osd as failed and we should find out about
-	 * it from an updated osd map.
-	 */
-	while (timeout && !list_empty(&osdc->req_lru)) {
-		req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
-				 r_req_lru_item);
-
-		/* hasn't been long enough since we sent it? */
-		if (time_before(jiffies, req->r_stamp + timeout))
-			break;
-
-		/* hasn't been long enough since it was acked? */
-		if (req->r_request->ack_stamp == 0 ||
-		    time_before(jiffies, req->r_request->ack_stamp + timeout))
-			break;
-
-		BUG_ON(req == last_req && req->r_stamp == last_stamp);
-		last_req = req;
-		last_stamp = req->r_stamp;
-
-		osd = req->r_osd;
-		BUG_ON(!osd);
-		pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
-			   req->r_tid, osd->o_osd);
-		__kick_osd_requests(osdc, osd);
-	}
-
 	/*
 	 * ping osds that are a bit slow.  this ensures that if there
 	 * is a break in the TCP connection we will notice, and reopen
-- 
cgit v1.2.3


From d2cc4dde9206aa2c7fb237aa689d3277cc070547 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 29 Nov 2012 08:37:03 -0600
Subject: bdi_register: add __printf verification, fix arg mismatch

__printf is useful to verify format and arguments.

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 fs/ceph/super.c             | 2 +-
 include/linux/backing-dev.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index fcda1c73a1e5..1a144001b2e1 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -842,7 +842,7 @@ static int ceph_register_bdi(struct super_block *sb,
 		fsc->backing_dev_info.ra_pages =
 			default_backing_dev_info.ra_pages;
 
-	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
+	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
 			   atomic_long_inc_return(&bdi_seq));
 	if (!err)
 		sb->s_bdi = &fsc->backing_dev_info;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2a9a9abc9126..12731a19ef06 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -114,6 +114,7 @@ struct backing_dev_info {
 int bdi_init(struct backing_dev_info *bdi);
 void bdi_destroy(struct backing_dev_info *bdi);
 
+__printf(3, 4)
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
-- 
cgit v1.2.3


From 34e1169d996ab148490c01b65b4ee371cf8ffba2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 16 Oct 2012 07:31:07 +1030
Subject: module: add syscall to load module from fd

As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.

Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.

If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on.  In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.

This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
---
 arch/x86/syscalls/syscall_32.tbl |   1 +
 arch/x86/syscalls/syscall_64.tbl |   1 +
 include/linux/syscalls.h         |   1 +
 kernel/module.c                  | 367 +++++++++++++++++++++++----------------
 kernel/sys_ni.c                  |   1 +
 5 files changed, 223 insertions(+), 148 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index a47103fbc692..83b3838417ed 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -356,3 +356,4 @@
 347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
+350	i386	finit_module		sys_finit_module
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a582bfed95bb..7c58c84b7bc8 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -319,6 +319,7 @@
 310	64	process_vm_readv	sys_process_vm_readv
 311	64	process_vm_writev	sys_process_vm_writev
 312	common	kcmp			sys_kcmp
+313	common	finit_module		sys_finit_module
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 727f0cd73921..32bc035bcd68 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
+asmlinkage long sys_finit_module(int fd, const char __user *uargs);
 #endif
diff --git a/kernel/module.c b/kernel/module.c
index 6e48c3a43599..6d2c4e4ca1f5 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -21,6 +21,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kernel.h>
@@ -2425,18 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod,
 #endif
 
 #ifdef CONFIG_MODULE_SIG
-static int module_sig_check(struct load_info *info,
-			    const void *mod, unsigned long *_len)
+static int module_sig_check(struct load_info *info)
 {
 	int err = -ENOKEY;
-	unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
-	unsigned long len = *_len;
+	const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+	const void *mod = info->hdr;
 
-	if (len > markerlen &&
-	    memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
+	if (info->len > markerlen &&
+	    memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
 		/* We truncate the module to discard the signature */
-		*_len -= markerlen;
-		err = mod_verify_sig(mod, _len);
+		info->len -= markerlen;
+		err = mod_verify_sig(mod, &info->len);
 	}
 
 	if (!err) {
@@ -2454,59 +2454,97 @@ static int module_sig_check(struct load_info *info,
 	return err;
 }
 #else /* !CONFIG_MODULE_SIG */
-static int module_sig_check(struct load_info *info,
-			    void *mod, unsigned long *len)
+static int module_sig_check(struct load_info *info)
 {
 	return 0;
 }
 #endif /* !CONFIG_MODULE_SIG */
 
-/* Sets info->hdr, info->len and info->sig_ok. */
-static int copy_and_check(struct load_info *info,
-			  const void __user *umod, unsigned long len,
-			  const char __user *uargs)
+/* Sanity checks against invalid binaries, wrong arch, weird elf version. */
+static int elf_header_check(struct load_info *info)
 {
-	int err;
-	Elf_Ehdr *hdr;
+	if (info->len < sizeof(*(info->hdr)))
+		return -ENOEXEC;
+
+	if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0
+	    || info->hdr->e_type != ET_REL
+	    || !elf_check_arch(info->hdr)
+	    || info->hdr->e_shentsize != sizeof(Elf_Shdr))
+		return -ENOEXEC;
+
+	if (info->hdr->e_shoff >= info->len
+	    || (info->hdr->e_shnum * sizeof(Elf_Shdr) >
+		info->len - info->hdr->e_shoff))
+		return -ENOEXEC;
 
-	if (len < sizeof(*hdr))
+	return 0;
+}
+
+/* Sets info->hdr and info->len. */
+static int copy_module_from_user(const void __user *umod, unsigned long len,
+				  struct load_info *info)
+{
+	info->len = len;
+	if (info->len < sizeof(*(info->hdr)))
 		return -ENOEXEC;
 
 	/* Suck in entire file: we'll want most of it. */
-	if ((hdr = vmalloc(len)) == NULL)
+	info->hdr = vmalloc(info->len);
+	if (!info->hdr)
 		return -ENOMEM;
 
-	if (copy_from_user(hdr, umod, len) != 0) {
-		err = -EFAULT;
-		goto free_hdr;
+	if (copy_from_user(info->hdr, umod, info->len) != 0) {
+		vfree(info->hdr);
+		return -EFAULT;
 	}
 
-	err = module_sig_check(info, hdr, &len);
+	return 0;
+}
+
+/* Sets info->hdr and info->len. */
+static int copy_module_from_fd(int fd, struct load_info *info)
+{
+	struct file *file;
+	int err;
+	struct kstat stat;
+	loff_t pos;
+	ssize_t bytes = 0;
+
+	file = fget(fd);
+	if (!file)
+		return -ENOEXEC;
+
+	err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
 	if (err)
-		goto free_hdr;
+		goto out;
 
-	/* Sanity checks against insmoding binaries or wrong arch,
-	   weird elf version */
-	if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
-	    || hdr->e_type != ET_REL
-	    || !elf_check_arch(hdr)
-	    || hdr->e_shentsize != sizeof(Elf_Shdr)) {
-		err = -ENOEXEC;
-		goto free_hdr;
+	if (stat.size > INT_MAX) {
+		err = -EFBIG;
+		goto out;
 	}
-
-	if (hdr->e_shoff >= len ||
-	    hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) {
-		err = -ENOEXEC;
-		goto free_hdr;
+	info->hdr = vmalloc(stat.size);
+	if (!info->hdr) {
+		err = -ENOMEM;
+		goto out;
 	}
 
-	info->hdr = hdr;
-	info->len = len;
-	return 0;
+	pos = 0;
+	while (pos < stat.size) {
+		bytes = kernel_read(file, pos, (char *)(info->hdr) + pos,
+				    stat.size - pos);
+		if (bytes < 0) {
+			vfree(info->hdr);
+			err = bytes;
+			goto out;
+		}
+		if (bytes == 0)
+			break;
+		pos += bytes;
+	}
+	info->len = pos;
 
-free_hdr:
-	vfree(hdr);
+out:
+	fput(file);
 	return err;
 }
 
@@ -2945,33 +2983,123 @@ static bool finished_loading(const char *name)
 	return ret;
 }
 
+/* Call module constructors. */
+static void do_mod_ctors(struct module *mod)
+{
+#ifdef CONFIG_CONSTRUCTORS
+	unsigned long i;
+
+	for (i = 0; i < mod->num_ctors; i++)
+		mod->ctors[i]();
+#endif
+}
+
+/* This is where the real work happens */
+static int do_init_module(struct module *mod)
+{
+	int ret = 0;
+
+	blocking_notifier_call_chain(&module_notify_list,
+			MODULE_STATE_COMING, mod);
+
+	/* Set RO and NX regions for core */
+	set_section_ro_nx(mod->module_core,
+				mod->core_text_size,
+				mod->core_ro_size,
+				mod->core_size);
+
+	/* Set RO and NX regions for init */
+	set_section_ro_nx(mod->module_init,
+				mod->init_text_size,
+				mod->init_ro_size,
+				mod->init_size);
+
+	do_mod_ctors(mod);
+	/* Start the module */
+	if (mod->init != NULL)
+		ret = do_one_initcall(mod->init);
+	if (ret < 0) {
+		/* Init routine failed: abort.  Try to protect us from
+                   buggy refcounters. */
+		mod->state = MODULE_STATE_GOING;
+		synchronize_sched();
+		module_put(mod);
+		blocking_notifier_call_chain(&module_notify_list,
+					     MODULE_STATE_GOING, mod);
+		free_module(mod);
+		wake_up_all(&module_wq);
+		return ret;
+	}
+	if (ret > 0) {
+		printk(KERN_WARNING
+"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
+"%s: loading module anyway...\n",
+		       __func__, mod->name, ret,
+		       __func__);
+		dump_stack();
+	}
+
+	/* Now it's a first class citizen! */
+	mod->state = MODULE_STATE_LIVE;
+	blocking_notifier_call_chain(&module_notify_list,
+				     MODULE_STATE_LIVE, mod);
+
+	/* We need to finish all async code before the module init sequence is done */
+	async_synchronize_full();
+
+	mutex_lock(&module_mutex);
+	/* Drop initial reference. */
+	module_put(mod);
+	trim_init_extable(mod);
+#ifdef CONFIG_KALLSYMS
+	mod->num_symtab = mod->core_num_syms;
+	mod->symtab = mod->core_symtab;
+	mod->strtab = mod->core_strtab;
+#endif
+	unset_module_init_ro_nx(mod);
+	module_free(mod, mod->module_init);
+	mod->module_init = NULL;
+	mod->init_size = 0;
+	mod->init_ro_size = 0;
+	mod->init_text_size = 0;
+	mutex_unlock(&module_mutex);
+	wake_up_all(&module_wq);
+
+	return 0;
+}
+
+static int may_init_module(void)
+{
+	if (!capable(CAP_SYS_MODULE) || modules_disabled)
+		return -EPERM;
+
+	return 0;
+}
+
 /* Allocate and load the module: note that size of section 0 is always
    zero, and we rely on this for optional sections. */
-static struct module *load_module(void __user *umod,
-				  unsigned long len,
-				  const char __user *uargs)
+static int load_module(struct load_info *info, const char __user *uargs)
 {
-	struct load_info info = { NULL, };
 	struct module *mod, *old;
 	long err;
 
-	pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
-	       umod, len, uargs);
+	err = module_sig_check(info);
+	if (err)
+		goto free_copy;
 
-	/* Copy in the blobs from userspace, check they are vaguely sane. */
-	err = copy_and_check(&info, umod, len, uargs);
+	err = elf_header_check(info);
 	if (err)
-		return ERR_PTR(err);
+		goto free_copy;
 
 	/* Figure out module layout, and allocate all the memory. */
-	mod = layout_and_allocate(&info);
+	mod = layout_and_allocate(info);
 	if (IS_ERR(mod)) {
 		err = PTR_ERR(mod);
 		goto free_copy;
 	}
 
 #ifdef CONFIG_MODULE_SIG
-	mod->sig_ok = info.sig_ok;
+	mod->sig_ok = info->sig_ok;
 	if (!mod->sig_ok)
 		add_taint_module(mod, TAINT_FORCED_MODULE);
 #endif
@@ -2983,25 +3111,25 @@ static struct module *load_module(void __user *umod,
 
 	/* Now we've got everything in the final locations, we can
 	 * find optional sections. */
-	find_module_sections(mod, &info);
+	find_module_sections(mod, info);
 
 	err = check_module_license_and_versions(mod);
 	if (err)
 		goto free_unload;
 
 	/* Set up MODINFO_ATTR fields */
-	setup_modinfo(mod, &info);
+	setup_modinfo(mod, info);
 
 	/* Fix up syms, so that st_value is a pointer to location. */
-	err = simplify_symbols(mod, &info);
+	err = simplify_symbols(mod, info);
 	if (err < 0)
 		goto free_modinfo;
 
-	err = apply_relocations(mod, &info);
+	err = apply_relocations(mod, info);
 	if (err < 0)
 		goto free_modinfo;
 
-	err = post_relocation(mod, &info);
+	err = post_relocation(mod, info);
 	if (err < 0)
 		goto free_modinfo;
 
@@ -3041,14 +3169,14 @@ again:
 	}
 
 	/* This has to be done once we're sure module name is unique. */
-	dynamic_debug_setup(info.debug, info.num_debug);
+	dynamic_debug_setup(info->debug, info->num_debug);
 
 	/* Find duplicate symbols */
 	err = verify_export_symbols(mod);
 	if (err < 0)
 		goto ddebug;
 
-	module_bug_finalize(info.hdr, info.sechdrs, mod);
+	module_bug_finalize(info->hdr, info->sechdrs, mod);
 	list_add_rcu(&mod->list, &modules);
 	mutex_unlock(&module_mutex);
 
@@ -3059,16 +3187,17 @@ again:
 		goto unlink;
 
 	/* Link in to syfs. */
-	err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
+	err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
 	if (err < 0)
 		goto unlink;
 
 	/* Get rid of temporary copy. */
-	free_copy(&info);
+	free_copy(info);
 
 	/* Done! */
 	trace_module_load(mod);
-	return mod;
+
+	return do_init_module(mod);
 
  unlink:
 	mutex_lock(&module_mutex);
@@ -3077,7 +3206,7 @@ again:
 	module_bug_cleanup(mod);
 	wake_up_all(&module_wq);
  ddebug:
-	dynamic_debug_remove(info.debug);
+	dynamic_debug_remove(info->debug);
  unlock:
 	mutex_unlock(&module_mutex);
 	synchronize_sched();
@@ -3089,106 +3218,48 @@ again:
  free_unload:
 	module_unload_free(mod);
  free_module:
-	module_deallocate(mod, &info);
+	module_deallocate(mod, info);
  free_copy:
-	free_copy(&info);
-	return ERR_PTR(err);
-}
-
-/* Call module constructors. */
-static void do_mod_ctors(struct module *mod)
-{
-#ifdef CONFIG_CONSTRUCTORS
-	unsigned long i;
-
-	for (i = 0; i < mod->num_ctors; i++)
-		mod->ctors[i]();
-#endif
+	free_copy(info);
+	return err;
 }
 
-/* This is where the real work happens */
 SYSCALL_DEFINE3(init_module, void __user *, umod,
 		unsigned long, len, const char __user *, uargs)
 {
-	struct module *mod;
-	int ret = 0;
-
-	/* Must have permission */
-	if (!capable(CAP_SYS_MODULE) || modules_disabled)
-		return -EPERM;
+	int err;
+	struct load_info info = { };
 
-	/* Do all the hard work */
-	mod = load_module(umod, len, uargs);
-	if (IS_ERR(mod))
-		return PTR_ERR(mod);
+	err = may_init_module();
+	if (err)
+		return err;
 
-	blocking_notifier_call_chain(&module_notify_list,
-			MODULE_STATE_COMING, mod);
+	pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",
+	       umod, len, uargs);
 
-	/* Set RO and NX regions for core */
-	set_section_ro_nx(mod->module_core,
-				mod->core_text_size,
-				mod->core_ro_size,
-				mod->core_size);
+	err = copy_module_from_user(umod, len, &info);
+	if (err)
+		return err;
 
-	/* Set RO and NX regions for init */
-	set_section_ro_nx(mod->module_init,
-				mod->init_text_size,
-				mod->init_ro_size,
-				mod->init_size);
+	return load_module(&info, uargs);
+}
 
-	do_mod_ctors(mod);
-	/* Start the module */
-	if (mod->init != NULL)
-		ret = do_one_initcall(mod->init);
-	if (ret < 0) {
-		/* Init routine failed: abort.  Try to protect us from
-                   buggy refcounters. */
-		mod->state = MODULE_STATE_GOING;
-		synchronize_sched();
-		module_put(mod);
-		blocking_notifier_call_chain(&module_notify_list,
-					     MODULE_STATE_GOING, mod);
-		free_module(mod);
-		wake_up_all(&module_wq);
-		return ret;
-	}
-	if (ret > 0) {
-		printk(KERN_WARNING
-"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
-"%s: loading module anyway...\n",
-		       __func__, mod->name, ret,
-		       __func__);
-		dump_stack();
-	}
+SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs)
+{
+	int err;
+	struct load_info info = { };
 
-	/* Now it's a first class citizen! */
-	mod->state = MODULE_STATE_LIVE;
-	blocking_notifier_call_chain(&module_notify_list,
-				     MODULE_STATE_LIVE, mod);
+	err = may_init_module();
+	if (err)
+		return err;
 
-	/* We need to finish all async code before the module init sequence is done */
-	async_synchronize_full();
+	pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs);
 
-	mutex_lock(&module_mutex);
-	/* Drop initial reference. */
-	module_put(mod);
-	trim_init_extable(mod);
-#ifdef CONFIG_KALLSYMS
-	mod->num_symtab = mod->core_num_syms;
-	mod->symtab = mod->core_symtab;
-	mod->strtab = mod->core_strtab;
-#endif
-	unset_module_init_ro_nx(mod);
-	module_free(mod, mod->module_init);
-	mod->module_init = NULL;
-	mod->init_size = 0;
-	mod->init_ro_size = 0;
-	mod->init_text_size = 0;
-	mutex_unlock(&module_mutex);
-	wake_up_all(&module_wq);
+	err = copy_module_from_fd(fd, &info);
+	if (err)
+		return err;
 
-	return 0;
+	return load_module(&info, uargs);
 }
 
 static inline int within(unsigned long addr, void *start, unsigned long size)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index dbff751e4086..395084d4ce16 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapoff);
 cond_syscall(sys_kexec_load);
 cond_syscall(compat_sys_kexec_load);
 cond_syscall(sys_init_module);
+cond_syscall(sys_finit_module);
 cond_syscall(sys_delete_module);
 cond_syscall(sys_socketpair);
 cond_syscall(sys_bind);
-- 
cgit v1.2.3


From 2f3238aebedb243804f58d62d57244edec4149b2 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 22 Oct 2012 18:09:41 +1030
Subject: module: add flags arg to sys_finit_module()

Thanks to Michael Kerrisk for keeping us honest.  These flags are actually
useful for eliminating the only case where kmod has to mangle a module's
internals: for overriding module versioning.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Acked-by: Kees Cook <keescook@chromium.org>
---
 include/linux/syscalls.h    |  2 +-
 include/uapi/linux/module.h |  8 ++++++++
 kernel/module.c             | 40 ++++++++++++++++++++++++++--------------
 3 files changed, 35 insertions(+), 15 deletions(-)
 create mode 100644 include/uapi/linux/module.h

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 32bc035bcd68..8cf7b508cb50 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -868,5 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
-asmlinkage long sys_finit_module(int fd, const char __user *uargs);
+asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
 #endif
diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h
new file mode 100644
index 000000000000..38da4258b12f
--- /dev/null
+++ b/include/uapi/linux/module.h
@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MODULE_H
+#define _UAPI_LINUX_MODULE_H
+
+/* Flags for sys_finit_module: */
+#define MODULE_INIT_IGNORE_MODVERSIONS	1
+#define MODULE_INIT_IGNORE_VERMAGIC	2
+
+#endif /* _UAPI_LINUX_MODULE_H */
diff --git a/kernel/module.c b/kernel/module.c
index 6d2c4e4ca1f5..1395ca382fb5 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -60,6 +60,7 @@
 #include <linux/pfn.h>
 #include <linux/bsearch.h>
 #include <linux/fips.h>
+#include <uapi/linux/module.h>
 #include "module-internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -2553,7 +2554,7 @@ static void free_copy(struct load_info *info)
 	vfree(info->hdr);
 }
 
-static int rewrite_section_headers(struct load_info *info)
+static int rewrite_section_headers(struct load_info *info, int flags)
 {
 	unsigned int i;
 
@@ -2581,7 +2582,10 @@ static int rewrite_section_headers(struct load_info *info)
 	}
 
 	/* Track but don't keep modinfo and version sections. */
-	info->index.vers = find_sec(info, "__versions");
+	if (flags & MODULE_INIT_IGNORE_MODVERSIONS)
+		info->index.vers = 0; /* Pretend no __versions section! */
+	else
+		info->index.vers = find_sec(info, "__versions");
 	info->index.info = find_sec(info, ".modinfo");
 	info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
 	info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -2596,7 +2600,7 @@ static int rewrite_section_headers(struct load_info *info)
  * Return the temporary module pointer (we'll replace it with the final
  * one when we move the module sections around).
  */
-static struct module *setup_load_info(struct load_info *info)
+static struct module *setup_load_info(struct load_info *info, int flags)
 {
 	unsigned int i;
 	int err;
@@ -2607,7 +2611,7 @@ static struct module *setup_load_info(struct load_info *info)
 	info->secstrings = (void *)info->hdr
 		+ info->sechdrs[info->hdr->e_shstrndx].sh_offset;
 
-	err = rewrite_section_headers(info);
+	err = rewrite_section_headers(info, flags);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2645,11 +2649,14 @@ static struct module *setup_load_info(struct load_info *info)
 	return mod;
 }
 
-static int check_modinfo(struct module *mod, struct load_info *info)
+static int check_modinfo(struct module *mod, struct load_info *info, int flags)
 {
 	const char *modmagic = get_modinfo(info, "vermagic");
 	int err;
 
+	if (flags & MODULE_INIT_IGNORE_VERMAGIC)
+		modmagic = NULL;
+
 	/* This is allowed: modprobe --force will invalidate it. */
 	if (!modmagic) {
 		err = try_to_force_load(mod, "bad vermagic");
@@ -2885,18 +2892,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
 	return 0;
 }
 
-static struct module *layout_and_allocate(struct load_info *info)
+static struct module *layout_and_allocate(struct load_info *info, int flags)
 {
 	/* Module within temporary copy. */
 	struct module *mod;
 	Elf_Shdr *pcpusec;
 	int err;
 
-	mod = setup_load_info(info);
+	mod = setup_load_info(info, flags);
 	if (IS_ERR(mod))
 		return mod;
 
-	err = check_modinfo(mod, info);
+	err = check_modinfo(mod, info, flags);
 	if (err)
 		return ERR_PTR(err);
 
@@ -3078,7 +3085,8 @@ static int may_init_module(void)
 
 /* Allocate and load the module: note that size of section 0 is always
    zero, and we rely on this for optional sections. */
-static int load_module(struct load_info *info, const char __user *uargs)
+static int load_module(struct load_info *info, const char __user *uargs,
+		       int flags)
 {
 	struct module *mod, *old;
 	long err;
@@ -3092,7 +3100,7 @@ static int load_module(struct load_info *info, const char __user *uargs)
 		goto free_copy;
 
 	/* Figure out module layout, and allocate all the memory. */
-	mod = layout_and_allocate(info);
+	mod = layout_and_allocate(info, flags);
 	if (IS_ERR(mod)) {
 		err = PTR_ERR(mod);
 		goto free_copy;
@@ -3241,10 +3249,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	if (err)
 		return err;
 
-	return load_module(&info, uargs);
+	return load_module(&info, uargs, 0);
 }
 
-SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs)
+SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
 {
 	int err;
 	struct load_info info = { };
@@ -3253,13 +3261,17 @@ SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs)
 	if (err)
 		return err;
 
-	pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs);
+	pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
+
+	if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
+		      |MODULE_INIT_IGNORE_VERMAGIC))
+		return -EINVAL;
 
 	err = copy_module_from_fd(fd, &info);
 	if (err)
 		return err;
 
-	return load_module(&info, uargs);
+	return load_module(&info, uargs, flags);
 }
 
 static inline int within(unsigned long addr, void *start, unsigned long size)
-- 
cgit v1.2.3


From 2e72d51b4ac32989496870cd8171b3682fea1839 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 16 Oct 2012 07:32:07 +1030
Subject: security: introduce kernel_module_from_file hook

Now that kernel module origins can be reasoned about, provide a hook to
the LSMs to make policy decisions about the module file. This will let
Chrome OS enforce that loadable kernel modules can only come from its
read-only hash-verified root filesystem. Other LSMs can, for example,
read extended attributes for signatures, etc.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
Acked-by: Eric Paris <eparis@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/security.h | 13 +++++++++++++
 kernel/module.c          | 11 +++++++++++
 security/capability.c    |  6 ++++++
 security/security.c      |  5 +++++
 4 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 05e88bdcf7d9..0f6afc657f77 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -694,6 +694,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
+ * @kernel_module_from_file:
+ *	Load a kernel module from userspace.
+ *	@file contains the file structure pointing to the file containing
+ *	the kernel module to load. If the module is being loaded from a blob,
+ *	this argument will be NULL.
+ *	Return 0 if permission is granted.
  * @task_fix_setuid:
  *	Update the module's state after setting one or more of the user
  *	identity attributes of the current process.  The @flags parameter
@@ -1508,6 +1514,7 @@ struct security_operations {
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
+	int (*kernel_module_from_file)(struct file *file);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
 	int (*task_setpgid) (struct task_struct *p, pid_t pgid);
@@ -1765,6 +1772,7 @@ void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
+int security_kernel_module_from_file(struct file *file);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
@@ -2278,6 +2286,11 @@ static inline int security_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
+static inline int security_kernel_module_from_file(struct file *file)
+{
+	return 0;
+}
+
 static inline int security_task_fix_setuid(struct cred *new,
 					   const struct cred *old,
 					   int flags)
diff --git a/kernel/module.c b/kernel/module.c
index 1395ca382fb5..a1d2ed8bab93 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -29,6 +29,7 @@
 #include <linux/vmalloc.h>
 #include <linux/elf.h>
 #include <linux/proc_fs.h>
+#include <linux/security.h>
 #include <linux/seq_file.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
@@ -2485,10 +2486,16 @@ static int elf_header_check(struct load_info *info)
 static int copy_module_from_user(const void __user *umod, unsigned long len,
 				  struct load_info *info)
 {
+	int err;
+
 	info->len = len;
 	if (info->len < sizeof(*(info->hdr)))
 		return -ENOEXEC;
 
+	err = security_kernel_module_from_file(NULL);
+	if (err)
+		return err;
+
 	/* Suck in entire file: we'll want most of it. */
 	info->hdr = vmalloc(info->len);
 	if (!info->hdr)
@@ -2515,6 +2522,10 @@ static int copy_module_from_fd(int fd, struct load_info *info)
 	if (!file)
 		return -ENOEXEC;
 
+	err = security_kernel_module_from_file(file);
+	if (err)
+		goto out;
+
 	err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
 	if (err)
 		goto out;
diff --git a/security/capability.c b/security/capability.c
index b14a30c234b8..0fe5a026aef8 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -395,6 +395,11 @@ static int cap_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
+static int cap_kernel_module_from_file(struct file *file)
+{
+	return 0;
+}
+
 static int cap_task_setpgid(struct task_struct *p, pid_t pgid)
 {
 	return 0;
@@ -967,6 +972,7 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
 	set_to_cap_if_null(ops, kernel_module_request);
+	set_to_cap_if_null(ops, kernel_module_from_file);
 	set_to_cap_if_null(ops, task_fix_setuid);
 	set_to_cap_if_null(ops, task_setpgid);
 	set_to_cap_if_null(ops, task_getpgid);
diff --git a/security/security.c b/security/security.c
index 8dcd4ae10a5f..ce88630de15d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -820,6 +820,11 @@ int security_kernel_module_request(char *kmod_name)
 	return security_ops->kernel_module_request(kmod_name);
 }
 
+int security_kernel_module_from_file(struct file *file)
+{
+	return security_ops->kernel_module_from_file(file);
+}
+
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags)
 {
-- 
cgit v1.2.3


From fdf90729e57812cb12d7938e2dee7c71e875fb08 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 16 Oct 2012 12:40:08 +1030
Subject: ima: support new kernel module syscall

With the addition of the new kernel module syscall, which defines two
arguments - a file descriptor to the kernel module and a pointer to a NULL
terminated string of module arguments - it is now possible to measure and
appraise kernel modules like any other file on the file system.

This patch adds support to measure and appraise kernel modules in an
extensible and consistent manner.

To support filesystems without extended attribute support, additional
patches could pass the signature as the first parameter.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/ABI/testing/ima_policy |  3 ++-
 include/linux/ima.h                  |  6 ++++++
 security/integrity/ima/ima.h         |  2 +-
 security/integrity/ima/ima_api.c     |  4 ++--
 security/integrity/ima/ima_main.c    | 21 +++++++++++++++++++++
 security/integrity/ima/ima_policy.c  |  3 +++
 security/security.c                  |  7 ++++++-
 7 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 986946613542..ec0a38ef3145 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -23,7 +23,7 @@ Description:
 			lsm:	[[subj_user=] [subj_role=] [subj_type=]
 				 [obj_user=] [obj_role=] [obj_type=]]
 
-		base: 	func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK]
+		base: 	func:= [BPRM_CHECK][FILE_MMAP][FILE_CHECK][MODULE_CHECK]
 			mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC]
 			fsmagic:= hex value
 			uid:= decimal value
@@ -53,6 +53,7 @@ Description:
 			measure func=BPRM_CHECK
 			measure func=FILE_MMAP mask=MAY_EXEC
 			measure func=FILE_CHECK mask=MAY_READ uid=0
+			measure func=MODULE_CHECK uid=0
 			appraise fowner=0
 
 		The default policy measures all executables in bprm_check,
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 2c7223d7e73b..86c361e947b9 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -18,6 +18,7 @@ extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
+extern int ima_module_check(struct file *file);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -40,6 +41,11 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
+static inline int ima_module_check(struct file *file)
+{
+	return 0;
+}
+
 #endif /* CONFIG_IMA_H */
 
 #ifdef CONFIG_IMA_APPRAISE
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 6ee8826662cc..3b2adb794f15 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -127,7 +127,7 @@ struct integrity_iint_cache *integrity_iint_insert(struct inode *inode);
 struct integrity_iint_cache *integrity_iint_find(struct inode *inode);
 
 /* IMA policy related functions */
-enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK, POST_SETATTR };
+enum ima_hooks { FILE_CHECK = 1, FILE_MMAP, BPRM_CHECK, MODULE_CHECK, POST_SETATTR };
 
 int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
 		     int flags);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index b356884fb3ef..0cea3db21657 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -100,12 +100,12 @@ err_out:
  * ima_get_action - appraise & measure decision based on policy.
  * @inode: pointer to inode to measure
  * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXECUTE)
- * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP)
+ * @function: calling function (FILE_CHECK, BPRM_CHECK, FILE_MMAP, MODULE_CHECK)
  *
  * The policy is defined in terms of keypairs:
  * 		subj=, obj=, type=, func=, mask=, fsmagic=
  *	subj,obj, and type: are LSM specific.
- * 	func: FILE_CHECK | BPRM_CHECK | FILE_MMAP
+ * 	func: FILE_CHECK | BPRM_CHECK | FILE_MMAP | MODULE_CHECK
  * 	mask: contains the permission mask
  *	fsmagic: hex value
  *
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 73c9a268253e..45de18e9a6f2 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -280,6 +280,27 @@ int ima_file_check(struct file *file, int mask)
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
+/**
+ * ima_module_check - based on policy, collect/store/appraise measurement.
+ * @file: pointer to the file to be measured/appraised
+ *
+ * Measure/appraise kernel modules based on policy.
+ *
+ * Always return 0 and audit dentry_open failures.
+ * Return code is based upon measurement appraisal.
+ */
+int ima_module_check(struct file *file)
+{
+	int rc;
+
+	if (!file)
+		rc = INTEGRITY_UNKNOWN;
+	else
+		rc = process_measurement(file, file->f_dentry->d_name.name,
+					 MAY_EXEC, MODULE_CHECK);
+	return (ima_appraise & IMA_APPRAISE_ENFORCE) ? rc : 0;
+}
+
 static int __init init_ima(void)
 {
 	int error;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index c7dacd2eab7a..af7d182d5a46 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -80,6 +80,7 @@ static struct ima_rule_entry default_rules[] = {
 	 .flags = IMA_FUNC | IMA_MASK},
 	{.action = MEASURE,.func = FILE_CHECK,.mask = MAY_READ,.uid = GLOBAL_ROOT_UID,
 	 .flags = IMA_FUNC | IMA_MASK | IMA_UID},
+	{.action = MEASURE,.func = MODULE_CHECK, .flags = IMA_FUNC},
 };
 
 static struct ima_rule_entry default_appraise_rules[] = {
@@ -401,6 +402,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 			/* PATH_CHECK is for backwards compat */
 			else if (strcmp(args[0].from, "PATH_CHECK") == 0)
 				entry->func = FILE_CHECK;
+			else if (strcmp(args[0].from, "MODULE_CHECK") == 0)
+				entry->func = MODULE_CHECK;
 			else if (strcmp(args[0].from, "FILE_MMAP") == 0)
 				entry->func = FILE_MMAP;
 			else if (strcmp(args[0].from, "BPRM_CHECK") == 0)
diff --git a/security/security.c b/security/security.c
index ce88630de15d..daa97f4ac9d1 100644
--- a/security/security.c
+++ b/security/security.c
@@ -822,7 +822,12 @@ int security_kernel_module_request(char *kmod_name)
 
 int security_kernel_module_from_file(struct file *file)
 {
-	return security_ops->kernel_module_from_file(file);
+	int ret;
+
+	ret = security_ops->kernel_module_from_file(file);
+	if (ret)
+		return ret;
+	return ima_module_check(file);
 }
 
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
-- 
cgit v1.2.3


From 6f33d58794ef4cef4b2c706029810f9688bd3026 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 22 Nov 2012 12:30:25 +1030
Subject: __UNIQUE_ID()

Jan Beulich points out __COUNTER__ (gcc 4.3 and above), so let's use
that to create unique ids.  This is better than __LINE__ which we use
today, so provide a wrapper.

Stanislaw Gruszka <sgruszka@redhat.com> reported that some module parameters
start with a digit, so we need to prepend when we for the unique id.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Jan Beulich <jbeulich@suse.com>
---
 include/linux/compiler-gcc4.h | 2 ++
 include/linux/compiler.h      | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 412bc6c2b023..56c802cba7f6 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -31,6 +31,8 @@
 
 #define __linktime_error(message) __attribute__((__error__(message)))
 
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
 #if __GNUC_MINOR__ >= 5
 /*
  * Mark a position in code as unreachable.  This can be used to
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f430e4162f41..5f45335e1ac7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -42,6 +42,10 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __rcu
 #endif
 
+/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+
 #ifdef __KERNEL__
 
 #ifdef __GNUC__
@@ -164,6 +168,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
     (typeof(ptr)) (__ptr + (off)); })
 #endif
 
+/* Not-quite-unique ID. */
+#ifndef __UNIQUE_ID
+# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From 34182eea36fc1d70d748b0947c873314980ba806 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 22 Nov 2012 12:30:25 +1030
Subject: moduleparam: use __UNIQUE_ID()

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index d6a58065c09c..137b4198fc03 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -16,17 +16,15 @@
 /* Chosen so that structs with an unsigned long line up. */
 #define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long))
 
-#define ___module_cat(a,b) __mod_ ## a ## b
-#define __module_cat(a,b) ___module_cat(a,b)
 #ifdef MODULE
 #define __MODULE_INFO(tag, name, info)					  \
-static const char __module_cat(name,__LINE__)[]				  \
+static const char __UNIQUE_ID(name)[]					  \
   __used __attribute__((section(".modinfo"), unused, aligned(1)))	  \
   = __stringify(tag) "=" info
 #else  /* !MODULE */
 /* This struct is here for syntactic coherency, it is not used */
 #define __MODULE_INFO(tag, name, info)					  \
-  struct __module_cat(name,__LINE__) {}
+  struct __UNIQUE_ID(name) {}
 #endif
 #define __MODULE_PARM_TYPE(name, _type)					  \
   __MODULE_INFO(parmtype, name##type, #name ":" _type)
-- 
cgit v1.2.3


From facc0a6bd494ce21e31b34fc355ecf702518272b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Dec 2012 11:55:28 +1030
Subject: ASN.1: Define indefinite length marker constant

Define a constant to hold the marker value seen in an indefinite-length
element.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/asn1.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/asn1.h b/include/linux/asn1.h
index 5c3f4e4b9a23..eed6982860ba 100644
--- a/include/linux/asn1.h
+++ b/include/linux/asn1.h
@@ -64,4 +64,6 @@ enum asn1_tag {
 	ASN1_LONG_TAG	= 31	/* Long form tag */
 };
 
+#define ASN1_INDEFINITE_LENGTH 0x80
+
 #endif /* _LINUX_ASN1_H */
-- 
cgit v1.2.3


From 8349e5aeaadd160b7cce554a62a05be4b2d894aa Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Fri, 14 Dec 2012 09:34:01 -0500
Subject: Revert "libata: check SATA_SETTINGS log with HW Feature Ctrl"

This reverts commit de90cd71f68e947d3bd6c3f2ef5731ead010a768.

Shane Huang writes:

  Please suspend this patch because I just received two new
  DevSlp drives but found word 78 bit 5 is _not_ set.

  I'm checking with the drive vendor whether he gave me
  the wrong information. If bit 5 is not the necessary and
  sufficient condition, I will implement another patch to
  replace ata_device->sata_settings into ->devslp_timing.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 3 ++-
 include/linux/ata.h       | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 47d59616fe3d..9e8b99af400d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2331,8 +2331,9 @@ int ata_dev_configure(struct ata_device *dev)
 
 		/* Obtain SATA Settings page from Identify Device Data Log,
 		 * which contains DevSlp timing variables etc.
+		 * Exclude old devices with ata_id_has_ncq()
 		 */
-		if (ata_id_has_hw_feature_ctrl(dev->id)) {
+		if (ata_id_has_ncq(dev->id)) {
 			err_mask = ata_read_log_page(dev,
 						     ATA_LOG_SATA_ID_DEV_DATA,
 						     ATA_LOG_SATA_SETTINGS,
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 18cbb93fdbca..408da9502177 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -593,7 +593,6 @@ static inline int ata_is_data(u8 prot)
 #define ata_id_cdb_intr(id)	(((id)[ATA_ID_CONFIG] & 0x60) == 0x20)
 #define ata_id_has_da(id)	((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4))
 #define ata_id_has_devslp(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 8))
-#define ata_id_has_hw_feature_ctrl(id)	((id)[ATA_ID_FEATURE_SUPP] & (1 << 5))
 
 static inline bool ata_id_has_hipm(const u16 *id)
 {
-- 
cgit v1.2.3


From 8dd2cb7e880d2f77fba53b523c99133ad5054cfd Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Fri, 14 Dec 2012 11:15:36 +0800
Subject: block: discard granularity might not be power of 2

In MD raid case, discard granularity might not be power of 2, for example, a
4-disk raid5 has 3*chunk_size discard granularity. Correct the calculation for
such cases.

Reported-by: Neil Brown <neilb@suse.de>
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c        | 23 +++++++++++++----------
 block/blk-settings.c   |  6 +++---
 include/linux/blkdev.h |  7 ++++---
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9373b58dfab1..5677fd33d7d2 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -43,8 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = REQ_WRITE | REQ_DISCARD;
-	unsigned int max_discard_sectors;
-	unsigned int granularity, alignment, mask;
+	sector_t max_discard_sectors;
+	sector_t granularity, alignment;
 	struct bio_batch bb;
 	struct bio *bio;
 	int ret = 0;
@@ -57,15 +57,16 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
 	granularity = max(q->limits.discard_granularity >> 9, 1U);
-	mask = granularity - 1;
-	alignment = (bdev_discard_alignment(bdev) >> 9) & mask;
+	alignment = bdev_discard_alignment(bdev) >> 9;
+	alignment = sector_div(alignment, granularity);
 
 	/*
 	 * Ensure that max_discard_sectors is of the proper
 	 * granularity, so that requests stay aligned after a split.
 	 */
 	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
-	max_discard_sectors = round_down(max_discard_sectors, granularity);
+	sector_div(max_discard_sectors, granularity);
+	max_discard_sectors *= granularity;
 	if (unlikely(!max_discard_sectors)) {
 		/* Avoid infinite loop below. Being cautious never hurts. */
 		return -EOPNOTSUPP;
@@ -83,7 +84,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	while (nr_sects) {
 		unsigned int req_sects;
-		sector_t end_sect;
+		sector_t end_sect, tmp;
 
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio) {
@@ -98,10 +99,12 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		 * misaligned, stop the discard at the previous aligned sector.
 		 */
 		end_sect = sector + req_sects;
-		if (req_sects < nr_sects && (end_sect & mask) != alignment) {
-			end_sect =
-				round_down(end_sect - alignment, granularity)
-				+ alignment;
+		tmp = end_sect;
+		if (req_sects < nr_sects &&
+		    sector_div(tmp, granularity) != alignment) {
+			end_sect = end_sect - alignment;
+			sector_div(end_sect, granularity);
+			end_sect = end_sect * granularity + alignment;
 			req_sects = end_sect - sector;
 		}
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 779bb7646bcd..c50ecf0ea3b1 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -611,7 +611,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			bottom = b->discard_granularity + alignment;
 
 			/* Verify that top and bottom intervals line up */
-			if (max(top, bottom) & (min(top, bottom) - 1))
+			if ((max(top, bottom) % min(top, bottom)) != 0)
 				t->discard_misaligned = 1;
 		}
 
@@ -619,8 +619,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
-		t->discard_alignment = lcm(t->discard_alignment, alignment) &
-			(t->discard_granularity - 1);
+		t->discard_alignment = lcm(t->discard_alignment, alignment) %
+			t->discard_granularity;
 	}
 
 	return ret;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c9d233e727f2..acb4f7bbbd32 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1188,13 +1188,14 @@ static inline int queue_discard_alignment(struct request_queue *q)
 
 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
-	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+	sector_t alignment = sector << 9;
+	alignment = sector_div(alignment, lim->discard_granularity);
 
 	if (!lim->max_discard_sectors)
 		return 0;
 
-	return (lim->discard_granularity + lim->discard_alignment - alignment)
-		& (lim->discard_granularity - 1);
+	alignment = lim->discard_granularity + lim->discard_alignment - alignment;
+	return sector_div(alignment, lim->discard_granularity);
 }
 
 static inline int bdev_discard_alignment(struct block_device *bdev)
-- 
cgit v1.2.3


From 63b68901dfd590cc13d4fe5c08dec2ca75b3c4aa Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Fri, 14 Dec 2012 09:09:11 -0800
Subject: mfd: omap-usb-host: get rid of cpu_is_omap..() macros

Instead of using cpu_is_omap..() macros in the device driver we
rely on information provided in the platform data.

The only information we need is whether the USB Host module has
a single ULPI bypass control bit for all ports or individual bypass
control bits for each port. OMAP3 REV2.1 and earlier have the former.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
[tony@atomide.com: updated to remove plat/cpu.h]
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/usb-host.c         | 4 ++++
 drivers/mfd/omap-usb-host.c            | 3 +--
 include/linux/platform_data/usb-omap.h | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/usb-host.c b/arch/arm/mach-omap2/usb-host.c
index d1dbe125b34f..2e44e8a22884 100644
--- a/arch/arm/mach-omap2/usb-host.c
+++ b/arch/arm/mach-omap2/usb-host.c
@@ -508,6 +508,10 @@ void __init usbhs_init(const struct usbhs_omap_board_data *pdata)
 	if (cpu_is_omap34xx()) {
 		setup_ehci_io_mux(pdata->port_mode);
 		setup_ohci_io_mux(pdata->port_mode);
+
+		if (omap_rev() <= OMAP3430_REV_ES2_1)
+			usbhs_data.single_ulpi_bypass = true;
+
 	} else if (cpu_is_omap44xx()) {
 		setup_4430ehci_io_mux(pdata->port_mode);
 		setup_4430ohci_io_mux(pdata->port_mode);
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 770a0d01e0b9..05164d7f054b 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -25,7 +25,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 #include <linux/gpio.h>
-#include <plat/cpu.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/usb-omap.h>
 #include <linux/pm_runtime.h>
@@ -384,7 +383,7 @@ static void omap_usbhs_init(struct device *dev)
 			reg &= ~OMAP_UHH_HOSTCONFIG_P3_CONNECT_STATUS;
 
 		/* Bypass the TLL module for PHY mode operation */
-		if (cpu_is_omap3430() && (omap_rev() <= OMAP3430_REV_ES2_1)) {
+		if (pdata->single_ulpi_bypass) {
 			dev_dbg(dev, "OMAP3 ES version <= ES2.1\n");
 			if (is_ehci_phy_mode(pdata->port_mode[0]) ||
 				is_ehci_phy_mode(pdata->port_mode[1]) ||
diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h
index 8570bcfe6311..ef65b67c56c3 100644
--- a/include/linux/platform_data/usb-omap.h
+++ b/include/linux/platform_data/usb-omap.h
@@ -59,6 +59,9 @@ struct usbhs_omap_platform_data {
 
 	struct ehci_hcd_omap_platform_data	*ehci_data;
 	struct ohci_hcd_omap_platform_data	*ohci_data;
+
+	/* OMAP3 <= ES2.1 have a single ulpi bypass control bit */
+	unsigned				single_ulpi_bypass:1;
 };
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3


From d9ba573718666df2e7e30d671f81bba39d07f91c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 14 Dec 2012 09:09:11 -0800
Subject: ARM: OMAP: Move plat/omap-serial.h to
 include/linux/platform_data/serial-omap.h

We need to move this file to allow ARM multiplatform configurations
to build for omap2+. This can now be done as this file now only
contains platform_data.

cc: Russell King <linux@arm.linux.org.uk>
cc: Alan Cox <alan@linux.intel.com>
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Govindraj.R <govindraj.raja@ti.com>
cc: Kevin Hilman <khilman@ti.com>
cc: linux-serial@vger.kernel.org
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/serial.c                  |  3 +-
 arch/arm/plat-omap/include/plat/omap-serial.h | 51 ---------------------------
 drivers/tty/serial/omap-serial.c              |  3 +-
 include/linux/platform_data/serial-omap.h     | 51 +++++++++++++++++++++++++++
 4 files changed, 53 insertions(+), 55 deletions(-)
 delete mode 100644 arch/arm/plat-omap/include/plat/omap-serial.h
 create mode 100644 include/linux/platform_data/serial-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 93d102535c85..04fdbc4c499b 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -27,8 +27,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/console.h>
 #include <linux/omap-dma.h>
-
-#include <plat/omap-serial.h>
+#include <linux/platform_data/serial-omap.h>
 
 #include "common.h"
 #include "omap_hwmod.h"
diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
deleted file mode 100644
index ff9b0aab5281..000000000000
--- a/arch/arm/plat-omap/include/plat/omap-serial.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Driver for OMAP-UART controller.
- * Based on drivers/serial/8250.c
- *
- * Copyright (C) 2010 Texas Instruments.
- *
- * Authors:
- *	Govindraj R	<govindraj.raja@ti.com>
- *	Thara Gopinath	<thara@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __OMAP_SERIAL_H__
-#define __OMAP_SERIAL_H__
-
-#include <linux/serial_core.h>
-#include <linux/device.h>
-#include <linux/pm_qos.h>
-
-#define DRIVER_NAME	"omap_uart"
-
-/*
- * Use tty device name as ttyO, [O -> OMAP]
- * in bootargs we specify as console=ttyO0 if uart1
- * is used as console uart.
- */
-#define OMAP_SERIAL_NAME	"ttyO"
-
-struct omap_uart_port_info {
-	bool			dma_enabled;	/* To specify DMA Mode */
-	unsigned int		uartclk;	/* UART clock rate */
-	upf_t			flags;		/* UPF_* flags */
-	unsigned int		dma_rx_buf_size;
-	unsigned int		dma_rx_timeout;
-	unsigned int		autosuspend_timeout;
-	unsigned int		dma_rx_poll_rate;
-	int			DTR_gpio;
-	int			DTR_inverted;
-	int			DTR_present;
-
-	int (*get_context_loss_count)(struct device *);
-	void (*set_forceidle)(struct device *);
-	void (*set_noidle)(struct device *);
-	void (*enable_wakeup)(struct device *, bool);
-};
-
-#endif /* __OMAP_SERIAL_H__ */
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 23f797eb7a28..57d6b29c039c 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -41,8 +41,7 @@
 #include <linux/of.h>
 #include <linux/gpio.h>
 #include <linux/pinctrl/consumer.h>
-
-#include <plat/omap-serial.h>
+#include <linux/platform_data/serial-omap.h>
 
 #define OMAP_MAX_HSUART_PORTS	6
 
diff --git a/include/linux/platform_data/serial-omap.h b/include/linux/platform_data/serial-omap.h
new file mode 100644
index 000000000000..ff9b0aab5281
--- /dev/null
+++ b/include/linux/platform_data/serial-omap.h
@@ -0,0 +1,51 @@
+/*
+ * Driver for OMAP-UART controller.
+ * Based on drivers/serial/8250.c
+ *
+ * Copyright (C) 2010 Texas Instruments.
+ *
+ * Authors:
+ *	Govindraj R	<govindraj.raja@ti.com>
+ *	Thara Gopinath	<thara@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __OMAP_SERIAL_H__
+#define __OMAP_SERIAL_H__
+
+#include <linux/serial_core.h>
+#include <linux/device.h>
+#include <linux/pm_qos.h>
+
+#define DRIVER_NAME	"omap_uart"
+
+/*
+ * Use tty device name as ttyO, [O -> OMAP]
+ * in bootargs we specify as console=ttyO0 if uart1
+ * is used as console uart.
+ */
+#define OMAP_SERIAL_NAME	"ttyO"
+
+struct omap_uart_port_info {
+	bool			dma_enabled;	/* To specify DMA Mode */
+	unsigned int		uartclk;	/* UART clock rate */
+	upf_t			flags;		/* UPF_* flags */
+	unsigned int		dma_rx_buf_size;
+	unsigned int		dma_rx_timeout;
+	unsigned int		autosuspend_timeout;
+	unsigned int		dma_rx_poll_rate;
+	int			DTR_gpio;
+	int			DTR_inverted;
+	int			DTR_present;
+
+	int (*get_context_loss_count)(struct device *);
+	void (*set_forceidle)(struct device *);
+	void (*set_noidle)(struct device *);
+	void (*enable_wakeup)(struct device *, bool);
+};
+
+#endif /* __OMAP_SERIAL_H__ */
-- 
cgit v1.2.3


From 8e63b6a8adabb0551124c3b78f7f5f36912c3728 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 15 Dec 2012 15:21:52 -0500
Subject: NFSv4.1: Move the RPC timestamp out of the slot.

Shave a few bytes off the slot table size by moving the RPC timestamp
into the sequence results.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 14 +++++++-------
 fs/nfs/nfs4session.c    |  3 +--
 fs/nfs/nfs4session.h    |  1 -
 include/linux/nfs_xdr.h |  1 +
 4 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9003b8f6b77f..afb428e63b52 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -419,7 +419,6 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 {
 	struct nfs4_session *session;
 	struct nfs4_slot *slot;
-	unsigned long timestamp;
 	struct nfs_client *clp;
 	int ret = 1;
 
@@ -444,9 +443,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 	case 0:
 		/* Update the slot's sequence and clientid lease timer */
 		++slot->seq_nr;
-		timestamp = slot->renewal_time;
 		clp = session->clp;
-		do_renew_lease(clp, timestamp);
+		do_renew_lease(clp, res->sr_timestamp);
 		/* Check sequence flags */
 		if (res->sr_status_flags != 0)
 			nfs4_schedule_lease_recovery(clp);
@@ -473,10 +471,11 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 * Could this slot have been previously retired?
 		 * If so, then the server may be expecting seq_nr = 1!
 		 */
-		if (slot->seq_nr == 1)
-			break;
-		slot->seq_nr = 1;
-		goto retry_nowait;
+		if (slot->seq_nr != 1) {
+			slot->seq_nr = 1;
+			goto retry_nowait;
+		}
+		break;
 	case -NFS4ERR_SEQ_FALSE_RETRY:
 		++slot->seq_nr;
 		goto retry_nowait;
@@ -567,6 +566,7 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 			slot->slot_nr, slot->seq_nr);
 
 	res->sr_slot = slot;
+	res->sr_timestamp = jiffies;
 	res->sr_status_flags = 0;
 	/*
 	 * sr_status is only set in decode_sequence, and so will remain
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index 1e6c87c443a7..0e1cc1f4e51a 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -143,7 +143,6 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
 	if (slotid > tbl->highest_used_slotid ||
 			tbl->highest_used_slotid == NFS4_NO_SLOT)
 		tbl->highest_used_slotid = slotid;
-	ret->renewal_time = jiffies;
 	ret->generation = tbl->generation;
 
 out:
@@ -228,9 +227,9 @@ static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
 
 	if (nfs4_session_draining(tbl->session) && !args->sa_privileged)
 		return false;
-	slot->renewal_time = jiffies;
 	slot->generation = tbl->generation;
 	args->sa_slot = slot;
+	res->sr_timestamp = jiffies;
 	res->sr_slot = slot;
 	res->sr_status_flags = 0;
 	res->sr_status = 1;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 04f834cab16c..d17b08091d4b 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -19,7 +19,6 @@ struct nfs4_slot {
 	struct nfs4_slot_table	*table;
 	struct nfs4_slot	*next;
 	unsigned long		generation;
-	unsigned long		renewal_time;
 	u32			slot_nr;
 	u32		 	seq_nr;
 };
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a55abd499c21..29adb12c7ecf 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -194,6 +194,7 @@ struct nfs4_sequence_args {
 
 struct nfs4_sequence_res {
 	struct nfs4_slot	*sr_slot;	/* slot used to send request */
+	unsigned long		sr_timestamp;
 	int			sr_status;	/* sequence operation status */
 	u32			sr_status_flags;
 	u32			sr_highest_slotid;
-- 
cgit v1.2.3


From 11520e5e7c1855fc3bf202bb3be35a39d9efa034 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 15 Dec 2012 15:15:24 -0800
Subject: Revert "x86-64/efi: Use EFI to deal with platform wall clock (again)"

This reverts commit bd52276fa1d4 ("x86-64/efi: Use EFI to deal with
platform wall clock (again)"), and the two supporting commits:

  da5a108d05b4: "x86/kernel: remove tboot 1:1 page table creation code"

  185034e72d59: "x86, efi: 1:1 pagetable mapping for virtual EFI calls")

as they all depend semantically on commit 53b87cf088e2 ("x86, mm:
Include the entire kernel memory map in trampoline_pgd") that got
reverted earlier due to the problems it caused.

This was pointed out by Yinghai Lu, and verified by me on my Macbook Air
that uses EFI.

Pointed-out-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/efi.h     | 28 ++++-----------
 arch/x86/kernel/tboot.c        | 78 +++++++++++++++++++++++++++++++++++++++---
 arch/x86/mm/pageattr.c         | 10 +++---
 arch/x86/platform/efi/efi.c    | 30 +++++++++++++---
 arch/x86/platform/efi/efi_64.c | 15 --------
 include/linux/efi.h            |  2 ++
 init/main.c                    |  8 ++---
 7 files changed, 116 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index fd13815fe85c..6e8fdf5ad113 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -69,37 +69,23 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 	efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),		\
 		  (u64)(a4), (u64)(a5), (u64)(a6))
 
-extern unsigned long efi_call_virt_prelog(void);
-extern void efi_call_virt_epilog(unsigned long);
-
-#define efi_callx(x, func, ...)					\
-	({							\
-		efi_status_t __status;				\
-		unsigned long __pgd;				\
-								\
-		__pgd = efi_call_virt_prelog();			\
-		__status = efi_call##x(func, __VA_ARGS__);	\
-		efi_call_virt_epilog(__pgd);			\
-		__status;					\
-	})
-
 #define efi_call_virt0(f)				\
-	efi_callx(0, (void *)(efi.systab->runtime->f))
+	efi_call0((void *)(efi.systab->runtime->f))
 #define efi_call_virt1(f, a1)					\
-	efi_callx(1, (void *)(efi.systab->runtime->f), (u64)(a1))
+	efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
 #define efi_call_virt2(f, a1, a2)					\
-	efi_callx(2, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+	efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
 #define efi_call_virt3(f, a1, a2, a3)					\
-	efi_callx(3, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3))
 #define efi_call_virt4(f, a1, a2, a3, a4)				\
-	efi_callx(4, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4))
 #define efi_call_virt5(f, a1, a2, a3, a4, a5)				\
-	efi_callx(5, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5))
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)			\
-	efi_callx(6, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index d4f460f962ee..f84fe00fad48 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -103,13 +103,71 @@ void __init tboot_probe(void)
 	pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
 }
 
+static pgd_t *tboot_pg_dir;
+static struct mm_struct tboot_mm = {
+	.mm_rb          = RB_ROOT,
+	.pgd            = swapper_pg_dir,
+	.mm_users       = ATOMIC_INIT(2),
+	.mm_count       = ATOMIC_INIT(1),
+	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
+};
+
 static inline void switch_to_tboot_pt(void)
 {
-#ifdef CONFIG_X86_32
-	load_cr3(initial_page_table);
-#else
-	write_cr3(real_mode_header->trampoline_pgd);
-#endif
+	write_cr3(virt_to_phys(tboot_pg_dir));
+}
+
+static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
+			  pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(&tboot_mm, vaddr);
+	pud = pud_alloc(&tboot_mm, pgd, vaddr);
+	if (!pud)
+		return -1;
+	pmd = pmd_alloc(&tboot_mm, pud, vaddr);
+	if (!pmd)
+		return -1;
+	pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
+	if (!pte)
+		return -1;
+	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
+	pte_unmap(pte);
+	return 0;
+}
+
+static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
+			   unsigned long nr)
+{
+	/* Reuse the original kernel mapping */
+	tboot_pg_dir = pgd_alloc(&tboot_mm);
+	if (!tboot_pg_dir)
+		return -1;
+
+	for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
+		if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
+			return -1;
+	}
+
+	return 0;
+}
+
+static void tboot_create_trampoline(void)
+{
+	u32 map_base, map_size;
+
+	/* Create identity map for tboot shutdown code. */
+	map_base = PFN_DOWN(tboot->tboot_base);
+	map_size = PFN_UP(tboot->tboot_size);
+	if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
+		panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
+		      map_base, map_size);
 }
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -167,6 +225,14 @@ void tboot_shutdown(u32 shutdown_type)
 	if (!tboot_enabled())
 		return;
 
+	/*
+	 * if we're being called before the 1:1 mapping is set up then just
+	 * return and let the normal shutdown happen; this should only be
+	 * due to very early panic()
+	 */
+	if (!tboot_pg_dir)
+		return;
+
 	/* if this is S3 then set regions to MAC */
 	if (shutdown_type == TB_SHUTDOWN_S3)
 		if (tboot_setup_sleep())
@@ -277,6 +343,8 @@ static __init int tboot_late_init(void)
 	if (!tboot_enabled())
 		return 0;
 
+	tboot_create_trampoline();
+
 	atomic_set(&ap_wfs_count, 0);
 	register_hotcpu_notifier(&tboot_cpu_notifier);
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 931930a96160..a718e0d23503 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -919,13 +919,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
 	/*
 	 * On success we use clflush, when the CPU supports it to
-	 * avoid the wbindv. If the CPU does not support it, in the
-	 * error case, and during early boot (for EFI) we fall back
-	 * to cpa_flush_all (which uses wbinvd):
+	 * avoid the wbindv. If the CPU does not support it and in the
+	 * error case we fall back to cpa_flush_all (which uses
+	 * wbindv):
 	 */
-	if (early_boot_irqs_disabled)
-		__cpa_flush_all((void *)(long)cache);
-	else if (!ret && cpu_has_clflush) {
+	if (!ret && cpu_has_clflush) {
 		if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0a34d9e9c263..ad4439145f85 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -239,7 +239,22 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-static int efi_set_rtc_mmss(unsigned long nowtime)
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+					     efi_time_cap_t *tc)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	efi_call_phys_prelog();
+	status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+				virt_to_phys(tc));
+	efi_call_phys_epilog();
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
 {
 	int real_seconds, real_minutes;
 	efi_status_t 	status;
@@ -268,7 +283,7 @@ static int efi_set_rtc_mmss(unsigned long nowtime)
 	return 0;
 }
 
-static unsigned long efi_get_time(void)
+unsigned long efi_get_time(void)
 {
 	efi_status_t status;
 	efi_time_t eft;
@@ -624,13 +639,18 @@ static int __init efi_runtime_init(void)
 	}
 	/*
 	 * We will only need *early* access to the following
-	 * EFI runtime service before set_virtual_address_map
+	 * two EFI runtime services before set_virtual_address_map
 	 * is invoked.
 	 */
+	efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
 	efi_phys.set_virtual_address_map =
 		(efi_set_virtual_address_map_t *)
 		runtime->set_virtual_address_map;
-
+	/*
+	 * Make efi_get_time can be called before entering
+	 * virtual mode.
+	 */
+	efi.get_time = phys_efi_get_time;
 	early_iounmap(runtime, sizeof(efi_runtime_services_t));
 
 	return 0;
@@ -716,10 +736,12 @@ void __init efi_init(void)
 		efi_enabled = 0;
 		return;
 	}
+#ifdef CONFIG_X86_32
 	if (efi_is_native()) {
 		x86_platform.get_wallclock = efi_get_time;
 		x86_platform.set_wallclock = efi_set_rtc_mmss;
 	}
+#endif
 
 #if EFI_DEBUG
 	print_efi_memmap();
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 06c8b2e662ab..95fd505dfeb6 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -58,21 +58,6 @@ static void __init early_code_mapping_set_exec(int executable)
 	}
 }
 
-unsigned long efi_call_virt_prelog(void)
-{
-	unsigned long saved;
-
-	saved = read_cr3();
-	write_cr3(real_mode_header->trampoline_pgd);
-
-	return saved;
-}
-
-void efi_call_virt_epilog(unsigned long saved)
-{
-	write_cr3(saved);
-}
-
 void __init efi_call_phys_prelog(void)
 {
 	unsigned long vaddress;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 02a69418be18..8b84916dc671 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -587,6 +587,8 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
+extern unsigned long efi_get_time(void);
+extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
diff --git a/init/main.c b/init/main.c
index 6af5470b8067..63ae904a99a8 100644
--- a/init/main.c
+++ b/init/main.c
@@ -463,10 +463,6 @@ static void __init mm_init(void)
 	percpu_init_late();
 	pgtable_cache_init();
 	vmalloc_init();
-#ifdef CONFIG_X86
-	if (efi_enabled)
-		efi_enter_virtual_mode();
-#endif
 }
 
 asmlinkage void __init start_kernel(void)
@@ -607,6 +603,10 @@ asmlinkage void __init start_kernel(void)
 	calibrate_delay();
 	pidmap_init();
 	anon_vma_init();
+#ifdef CONFIG_X86
+	if (efi_enabled)
+		efi_enter_virtual_mode();
+#endif
 	thread_info_cache_init();
 	cred_init();
 	fork_init(totalram_pages);
-- 
cgit v1.2.3


From 9360b53661a2c7754517b2925580055bacc8ec38 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 17 Dec 2012 11:29:09 -0800
Subject: Revert "bdi: add a user-tunable cpu_list for the bdi flusher threads"

This reverts commit 8fa72d234da9b6b473bbb1f74d533663e4996e6b.

People disagree about how this should be done, so let's revert this for
now so that nobody starts using the new tuning interface.  Tejun is
thinking about a more generic interface for thread pool affinity.

Requested-by: Tejun Heo <tj@kernel.org>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h |  4 ---
 mm/backing-dev.c            | 84 ---------------------------------------------
 2 files changed, 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 238521a19849..2a9a9abc9126 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,7 +18,6 @@
 #include <linux/writeback.h>
 #include <linux/atomic.h>
 #include <linux/sysctl.h>
-#include <linux/mutex.h>
 
 struct page;
 struct device;
@@ -106,9 +105,6 @@ struct backing_dev_info {
 
 	struct timer_list laptop_mode_wb_timer;
 
-	cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */
-	struct mutex flusher_cpumask_lock;
-
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
 	struct dentry *debug_stats;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index bd6a6cabef71..d3ca2b3ee176 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -10,7 +10,6 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
-#include <linux/slab.h>
 #include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -222,63 +221,12 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio)
 
-static ssize_t cpu_list_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct backing_dev_info *bdi = dev_get_drvdata(dev);
-	struct bdi_writeback *wb = &bdi->wb;
-	cpumask_var_t newmask;
-	ssize_t ret;
-	struct task_struct *task;
-
-	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
-		return -ENOMEM;
-
-	ret = cpulist_parse(buf, newmask);
-	if (!ret) {
-		spin_lock_bh(&bdi->wb_lock);
-		task = wb->task;
-		if (task)
-			get_task_struct(task);
-		spin_unlock_bh(&bdi->wb_lock);
-
-		mutex_lock(&bdi->flusher_cpumask_lock);
-		if (task) {
-			ret = set_cpus_allowed_ptr(task, newmask);
-			put_task_struct(task);
-		}
-		if (ret == 0) {
-			cpumask_copy(bdi->flusher_cpumask, newmask);
-			ret = count;
-		}
-		mutex_unlock(&bdi->flusher_cpumask_lock);
-
-	}
-	free_cpumask_var(newmask);
-
-	return ret;
-}
-
-static ssize_t cpu_list_show(struct device *dev,
-		struct device_attribute *attr, char *page)
-{
-	struct backing_dev_info *bdi = dev_get_drvdata(dev);
-	ssize_t ret;
-
-	mutex_lock(&bdi->flusher_cpumask_lock);
-	ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
-	mutex_unlock(&bdi->flusher_cpumask_lock);
-
-	return ret;
-}
-
 #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
 
 static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
 	__ATTR_RW(max_ratio),
-	__ATTR_RW(cpu_list),
 	__ATTR_NULL,
 };
 
@@ -480,7 +428,6 @@ static int bdi_forker_thread(void *ptr)
 				writeback_inodes_wb(&bdi->wb, 1024,
 						    WB_REASON_FORKER_THREAD);
 			} else {
-				int ret;
 				/*
 				 * The spinlock makes sure we do not lose
 				 * wake-ups when racing with 'bdi_queue_work()'.
@@ -490,14 +437,6 @@ static int bdi_forker_thread(void *ptr)
 				spin_lock_bh(&bdi->wb_lock);
 				bdi->wb.task = task;
 				spin_unlock_bh(&bdi->wb_lock);
-				mutex_lock(&bdi->flusher_cpumask_lock);
-				ret = set_cpus_allowed_ptr(task,
-							bdi->flusher_cpumask);
-				mutex_unlock(&bdi->flusher_cpumask_lock);
-				if (ret)
-					printk_once("%s: failed to bind flusher"
-						    " thread %s, error %d\n",
-						    __func__, task->comm, ret);
 				wake_up_process(task);
 			}
 			bdi_clear_pending(bdi);
@@ -570,17 +509,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 						dev_name(dev));
 		if (IS_ERR(wb->task))
 			return PTR_ERR(wb->task);
-	} else {
-		int node;
-		/*
-		 * Set up a default cpumask for the flusher threads that
-		 * includes all cpus on the same numa node as the device.
-		 * The mask may be overridden via sysfs.
-		 */
-		node = dev_to_node(bdi->dev);
-		if (node != NUMA_NO_NODE)
-			cpumask_copy(bdi->flusher_cpumask,
-				     cpumask_of_node(node));
 	}
 
 	bdi_debug_register(bdi, dev_name(dev));
@@ -706,15 +634,6 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	bdi_wb_init(&bdi->wb, bdi);
 
-	if (!bdi_cap_flush_forker(bdi)) {
-		bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
-		if (!bdi->flusher_cpumask)
-			return -ENOMEM;
-		cpumask_setall(bdi->flusher_cpumask);
-		mutex_init(&bdi->flusher_cpumask_lock);
-	} else
-		bdi->flusher_cpumask = NULL;
-
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
 		if (err)
@@ -737,7 +656,6 @@ int bdi_init(struct backing_dev_info *bdi)
 err:
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
-		kfree(bdi->flusher_cpumask);
 	}
 
 	return err;
@@ -765,8 +683,6 @@ void bdi_destroy(struct backing_dev_info *bdi)
 
 	bdi_unregister(bdi);
 
-	kfree(bdi->flusher_cpumask);
-
 	/*
 	 * If bdi_unregister() had already been called earlier, the
 	 * wakeup_timer could still be armed because bdi_prune_sb()
-- 
cgit v1.2.3


From 7929d407e47fbf843fe1337fd95ed57785ae5e9d Mon Sep 17 00:00:00 2001
From: Matthew Leach <matthew@mattleach.net>
Date: Mon, 17 Dec 2012 15:59:32 -0800
Subject: include/linux/init.h: use the stringify operator for the
 __define_initcall macro

Currently the __define_initcall() macro takes three arguments, fn, id and
level.  The level argument is exactly the same as the id argument but
wrapped in quotes.  To overcome this need to specify three arguments to
the __define_initcall macro, where one argument is the stringification of
another, we can just use the stringification macro instead.

Signed-off-by: Matthew Leach <matthew@mattleach.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/machdep.h | 36 +++++++++++++++++-----------------
 include/linux/init.h               | 40 +++++++++++++++++++-------------------
 2 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index c4231973edd3..3c82daf8be99 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -320,28 +320,28 @@ static inline void log_error(char *buf, unsigned int err_type, int fatal)
 		ppc_md.log_error(buf, err_type, fatal);
 }
 
-#define __define_machine_initcall(mach,level,fn,id) \
+#define __define_machine_initcall(mach, fn, id) \
 	static int __init __machine_initcall_##mach##_##fn(void) { \
 		if (machine_is(mach)) return fn(); \
 		return 0; \
 	} \
-	__define_initcall(level,__machine_initcall_##mach##_##fn,id);
-
-#define machine_core_initcall(mach,fn)		__define_machine_initcall(mach,"1",fn,1)
-#define machine_core_initcall_sync(mach,fn)	__define_machine_initcall(mach,"1s",fn,1s)
-#define machine_postcore_initcall(mach,fn)	__define_machine_initcall(mach,"2",fn,2)
-#define machine_postcore_initcall_sync(mach,fn)	__define_machine_initcall(mach,"2s",fn,2s)
-#define machine_arch_initcall(mach,fn)		__define_machine_initcall(mach,"3",fn,3)
-#define machine_arch_initcall_sync(mach,fn)	__define_machine_initcall(mach,"3s",fn,3s)
-#define machine_subsys_initcall(mach,fn)	__define_machine_initcall(mach,"4",fn,4)
-#define machine_subsys_initcall_sync(mach,fn)	__define_machine_initcall(mach,"4s",fn,4s)
-#define machine_fs_initcall(mach,fn)		__define_machine_initcall(mach,"5",fn,5)
-#define machine_fs_initcall_sync(mach,fn)	__define_machine_initcall(mach,"5s",fn,5s)
-#define machine_rootfs_initcall(mach,fn)	__define_machine_initcall(mach,"rootfs",fn,rootfs)
-#define machine_device_initcall(mach,fn)	__define_machine_initcall(mach,"6",fn,6)
-#define machine_device_initcall_sync(mach,fn)	__define_machine_initcall(mach,"6s",fn,6s)
-#define machine_late_initcall(mach,fn)		__define_machine_initcall(mach,"7",fn,7)
-#define machine_late_initcall_sync(mach,fn)	__define_machine_initcall(mach,"7s",fn,7s)
+	__define_initcall(__machine_initcall_##mach##_##fn, id);
+
+#define machine_core_initcall(mach, fn)		__define_machine_initcall(mach, fn, 1)
+#define machine_core_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 1s)
+#define machine_postcore_initcall(mach, fn)	__define_machine_initcall(mach, fn, 2)
+#define machine_postcore_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 2s)
+#define machine_arch_initcall(mach, fn)		__define_machine_initcall(mach, fn, 3)
+#define machine_arch_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 3s)
+#define machine_subsys_initcall(mach, fn)	__define_machine_initcall(mach, fn, 4)
+#define machine_subsys_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 4s)
+#define machine_fs_initcall(mach, fn)		__define_machine_initcall(mach, fn, 5)
+#define machine_fs_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 5s)
+#define machine_rootfs_initcall(mach, fn)	__define_machine_initcall(mach, fn, rootfs)
+#define machine_device_initcall(mach, fn)	__define_machine_initcall(mach, fn, 6)
+#define machine_device_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 6s)
+#define machine_late_initcall(mach, fn)		__define_machine_initcall(mach, fn, 7)
+#define machine_late_initcall_sync(mach, fn)	__define_machine_initcall(mach, fn, 7s)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_MACHDEP_H */
diff --git a/include/linux/init.h b/include/linux/init.h
index f63692d6902e..a799273714ac 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -182,16 +182,16 @@ extern bool initcall_debug;
  * can point at the same handler without causing duplicate-symbol build errors.
  */
 
-#define __define_initcall(level,fn,id) \
+#define __define_initcall(fn, id) \
 	static initcall_t __initcall_##fn##id __used \
-	__attribute__((__section__(".initcall" level ".init"))) = fn
+	__attribute__((__section__(".initcall" #id ".init"))) = fn
 
 /*
  * Early initcalls run before initializing SMP.
  *
  * Only for built-in code, not modules.
  */
-#define early_initcall(fn)		__define_initcall("early",fn,early)
+#define early_initcall(fn)		__define_initcall(fn, early)
 
 /*
  * A "pure" initcall has no dependencies on anything else, and purely
@@ -200,23 +200,23 @@ extern bool initcall_debug;
  * This only exists for built-in code, not for modules.
  * Keep main.c:initcall_level_names[] in sync.
  */
-#define pure_initcall(fn)		__define_initcall("0",fn,0)
-
-#define core_initcall(fn)		__define_initcall("1",fn,1)
-#define core_initcall_sync(fn)		__define_initcall("1s",fn,1s)
-#define postcore_initcall(fn)		__define_initcall("2",fn,2)
-#define postcore_initcall_sync(fn)	__define_initcall("2s",fn,2s)
-#define arch_initcall(fn)		__define_initcall("3",fn,3)
-#define arch_initcall_sync(fn)		__define_initcall("3s",fn,3s)
-#define subsys_initcall(fn)		__define_initcall("4",fn,4)
-#define subsys_initcall_sync(fn)	__define_initcall("4s",fn,4s)
-#define fs_initcall(fn)			__define_initcall("5",fn,5)
-#define fs_initcall_sync(fn)		__define_initcall("5s",fn,5s)
-#define rootfs_initcall(fn)		__define_initcall("rootfs",fn,rootfs)
-#define device_initcall(fn)		__define_initcall("6",fn,6)
-#define device_initcall_sync(fn)	__define_initcall("6s",fn,6s)
-#define late_initcall(fn)		__define_initcall("7",fn,7)
-#define late_initcall_sync(fn)		__define_initcall("7s",fn,7s)
+#define pure_initcall(fn)		__define_initcall(fn, 0)
+
+#define core_initcall(fn)		__define_initcall(fn, 1)
+#define core_initcall_sync(fn)		__define_initcall(fn, 1s)
+#define postcore_initcall(fn)		__define_initcall(fn, 2)
+#define postcore_initcall_sync(fn)	__define_initcall(fn, 2s)
+#define arch_initcall(fn)		__define_initcall(fn, 3)
+#define arch_initcall_sync(fn)		__define_initcall(fn, 3s)
+#define subsys_initcall(fn)		__define_initcall(fn, 4)
+#define subsys_initcall_sync(fn)	__define_initcall(fn, 4s)
+#define fs_initcall(fn)			__define_initcall(fn, 5)
+#define fs_initcall_sync(fn)		__define_initcall(fn, 5s)
+#define rootfs_initcall(fn)		__define_initcall(fn, rootfs)
+#define device_initcall(fn)		__define_initcall(fn, 6)
+#define device_initcall_sync(fn)	__define_initcall(fn, 6s)
+#define late_initcall(fn)		__define_initcall(fn, 7)
+#define late_initcall_sync(fn)		__define_initcall(fn, 7s)
 
 #define __initcall(fn) device_initcall(fn)
 
-- 
cgit v1.2.3


From 965c8e59cfcf845ecde2265a1d1bfee5f011d302 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 17 Dec 2012 15:59:39 -0800
Subject: lseek: the "whence" argument is called "whence"

But the kernel decided to call it "origin" instead.  Fix most of the
sites.

Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/bad_inode.c           |  2 +-
 fs/block_dev.c           |  4 ++--
 fs/btrfs/file.c          | 16 ++++++++--------
 fs/ceph/dir.c            |  4 ++--
 fs/ceph/file.c           |  6 +++---
 fs/cifs/cifsfs.c         |  8 ++++----
 fs/configfs/dir.c        |  4 ++--
 fs/ext3/dir.c            |  6 +++---
 fs/ext4/dir.c            |  6 +++---
 fs/ext4/file.c           | 22 +++++++++++-----------
 fs/fuse/file.c           |  8 ++++----
 fs/gfs2/file.c           | 10 +++++-----
 fs/libfs.c               |  4 ++--
 fs/nfs/dir.c             |  6 +++---
 fs/nfs/file.c            | 10 +++++-----
 fs/ocfs2/extent_map.c    | 12 ++++++------
 fs/ocfs2/file.c          |  6 +++---
 fs/pstore/inode.c        |  6 +++---
 fs/read_write.c          | 40 ++++++++++++++++++++--------------------
 fs/seq_file.c            |  4 ++--
 fs/ubifs/dir.c           |  4 ++--
 include/linux/fs.h       | 12 ++++++------
 include/linux/ftrace.h   |  4 ++--
 include/linux/syscalls.h |  4 ++--
 kernel/trace/ftrace.c    |  4 ++--
 mm/shmem.c               | 20 ++++++++++----------
 26 files changed, 116 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index b1342ffb3cf6..922ad460bff9 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -16,7 +16,7 @@
 #include <linux/poll.h>
 
 
-static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t bad_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	return -EIO;
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ab3a456f6650..172f8491a2bd 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -321,7 +321,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
  * for a block special file file->f_path.dentry->d_inode->i_size is zero
  * so we compute the size by hand (just as in block_read/write above)
  */
-static loff_t block_llseek(struct file *file, loff_t offset, int origin)
+static loff_t block_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *bd_inode = file->f_mapping->host;
 	loff_t size;
@@ -331,7 +331,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 	size = i_size_read(bd_inode);
 
 	retval = -EINVAL;
-	switch (origin) {
+	switch (whence) {
 		case SEEK_END:
 			offset += size;
 			break;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a8ee75cb96ee..9c6673a9231f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2120,7 +2120,7 @@ out:
 	return ret;
 }
 
-static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
+static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct extent_map *em;
@@ -2154,7 +2154,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
 	 * before the position we want in case there is outstanding delalloc
 	 * going on here.
 	 */
-	if (origin == SEEK_HOLE && start != 0) {
+	if (whence == SEEK_HOLE && start != 0) {
 		if (start <= root->sectorsize)
 			em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
 						     root->sectorsize, 0);
@@ -2188,13 +2188,13 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
 				}
 			}
 
-			if (origin == SEEK_HOLE) {
+			if (whence == SEEK_HOLE) {
 				*offset = start;
 				free_extent_map(em);
 				break;
 			}
 		} else {
-			if (origin == SEEK_DATA) {
+			if (whence == SEEK_DATA) {
 				if (em->block_start == EXTENT_MAP_DELALLOC) {
 					if (start >= inode->i_size) {
 						free_extent_map(em);
@@ -2231,16 +2231,16 @@ out:
 	return ret;
 }
 
-static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 	case SEEK_CUR:
-		offset = generic_file_llseek(file, offset, origin);
+		offset = generic_file_llseek(file, offset, whence);
 		goto out;
 	case SEEK_DATA:
 	case SEEK_HOLE:
@@ -2249,7 +2249,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
 			return -ENXIO;
 		}
 
-		ret = find_desired_extent(inode, &offset, origin);
+		ret = find_desired_extent(inode, &offset, whence);
 		if (ret) {
 			mutex_unlock(&inode->i_mutex);
 			return ret;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e5b77319c97b..8c1aabe93b67 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -454,7 +454,7 @@ static void reset_readdir(struct ceph_file_info *fi)
 	fi->flags &= ~CEPH_F_ATEND;
 }
 
-static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file->f_mapping->host;
@@ -463,7 +463,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
 
 	mutex_lock(&inode->i_mutex);
 	retval = -EINVAL;
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += inode->i_size + 2;   /* FIXME */
 		break;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5840d2aaed15..d4dfdcf76d7f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -797,7 +797,7 @@ out:
 /*
  * llseek.  be sure to verify file size on SEEK_END.
  */
-static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
@@ -805,7 +805,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
 	mutex_lock(&inode->i_mutex);
 	__ceph_do_pending_vmtruncate(inode);
 
-	if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
+	if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
 		ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
 		if (ret < 0) {
 			offset = ret;
@@ -813,7 +813,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
 		}
 	}
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += inode->i_size;
 		break;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 210f0af83fc4..ce9f3c5421bf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -695,13 +695,13 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	return written;
 }
 
-static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
+static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 {
 	/*
-	 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+	 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
 	 * the cached file length
 	 */
-	if (origin != SEEK_SET && origin != SEEK_CUR) {
+	if (whence != SEEK_SET && whence != SEEK_CUR) {
 		int rc;
 		struct inode *inode = file->f_path.dentry->d_inode;
 
@@ -728,7 +728,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
 		if (rc < 0)
 			return (loff_t)rc;
 	}
-	return generic_file_llseek(file, offset, origin);
+	return generic_file_llseek(file, offset, whence);
 }
 
 static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7414ae24a79b..712b10f64c70 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1613,12 +1613,12 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
 	return 0;
 }
 
-static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
+static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry * dentry = file->f_path.dentry;
 
 	mutex_lock(&dentry->d_inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index c8fff930790d..dd91264ba94f 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -296,17 +296,17 @@ static inline loff_t ext3_get_htree_eof(struct file *filp)
  * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
  *       will be invalid once the directory was converted into a dx directory
  */
-loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int dx_dir = is_dx_dir(inode);
 	loff_t htree_max = ext3_get_htree_eof(file);
 
 	if (likely(dx_dir))
-		return generic_file_llseek_size(file, offset, origin,
+		return generic_file_llseek_size(file, offset, whence,
 					        htree_max, htree_max);
 	else
-		return generic_file_llseek(file, offset, origin);
+		return generic_file_llseek(file, offset, whence);
 }
 
 /*
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b8d877f6c1fa..80a28b297279 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -333,17 +333,17 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
  *
  * For non-htree, ext4_llseek already chooses the proper max offset.
  */
-loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int dx_dir = is_dx_dir(inode);
 	loff_t htree_max = ext4_get_htree_eof(file);
 
 	if (likely(dx_dir))
-		return generic_file_llseek_size(file, offset, origin,
+		return generic_file_llseek_size(file, offset, whence,
 						    htree_max, htree_max);
 	else
-		return ext4_llseek(file, offset, origin);
+		return ext4_llseek(file, offset, whence);
 }
 
 /*
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b64a60bf105a..d07c27ca594a 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -303,7 +303,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
  * page cache has data or not.
  */
 static int ext4_find_unwritten_pgoff(struct inode *inode,
-				     int origin,
+				     int whence,
 				     struct ext4_map_blocks *map,
 				     loff_t *offset)
 {
@@ -333,10 +333,10 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
 					  (pgoff_t)num);
 		if (nr_pages == 0) {
-			if (origin == SEEK_DATA)
+			if (whence == SEEK_DATA)
 				break;
 
-			BUG_ON(origin != SEEK_HOLE);
+			BUG_ON(whence != SEEK_HOLE);
 			/*
 			 * If this is the first time to go into the loop and
 			 * offset is not beyond the end offset, it will be a
@@ -352,7 +352,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 		 * offset is smaller than the first page offset, it will be a
 		 * hole at this offset.
 		 */
-		if (lastoff == startoff && origin == SEEK_HOLE &&
+		if (lastoff == startoff && whence == SEEK_HOLE &&
 		    lastoff < page_offset(pvec.pages[0])) {
 			found = 1;
 			break;
@@ -366,7 +366,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 			 * If the current offset is not beyond the end of given
 			 * range, it will be a hole.
 			 */
-			if (lastoff < endoff && origin == SEEK_HOLE &&
+			if (lastoff < endoff && whence == SEEK_HOLE &&
 			    page->index > end) {
 				found = 1;
 				*offset = lastoff;
@@ -391,10 +391,10 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 				do {
 					if (buffer_uptodate(bh) ||
 					    buffer_unwritten(bh)) {
-						if (origin == SEEK_DATA)
+						if (whence == SEEK_DATA)
 							found = 1;
 					} else {
-						if (origin == SEEK_HOLE)
+						if (whence == SEEK_HOLE)
 							found = 1;
 					}
 					if (found) {
@@ -416,7 +416,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
 		 * The no. of pages is less than our desired, that would be a
 		 * hole in there.
 		 */
-		if (nr_pages < num && origin == SEEK_HOLE) {
+		if (nr_pages < num && whence == SEEK_HOLE) {
 			found = 1;
 			*offset = lastoff;
 			break;
@@ -609,7 +609,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
  * by calling generic_file_llseek_size() with the appropriate maxbytes
  * value for each.
  */
-loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
+loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	loff_t maxbytes;
@@ -619,11 +619,11 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
 	else
 		maxbytes = inode->i_sb->s_maxbytes;
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_SET:
 	case SEEK_CUR:
 	case SEEK_END:
-		return generic_file_llseek_size(file, offset, origin,
+		return generic_file_llseek_size(file, offset, whence,
 						maxbytes, i_size_read(inode));
 	case SEEK_DATA:
 		return ext4_seek_data(file, offset, maxbytes);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 78d2837bc940..e21d4d8f87e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1599,19 +1599,19 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
 	return err ? 0 : outarg.block;
 }
 
-static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t retval;
 	struct inode *inode = file->f_path.dentry->d_inode;
 
 	/* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
-	if (origin == SEEK_CUR || origin == SEEK_SET)
-		return generic_file_llseek(file, offset, origin);
+	if (whence == SEEK_CUR || whence == SEEK_SET)
+		return generic_file_llseek(file, offset, whence);
 
 	mutex_lock(&inode->i_mutex);
 	retval = fuse_update_attributes(inode, NULL, file, NULL);
 	if (!retval)
-		retval = generic_file_llseek(file, offset, origin);
+		retval = generic_file_llseek(file, offset, whence);
 	mutex_unlock(&inode->i_mutex);
 
 	return retval;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index dfe2d8cb9b2c..991ab2d484dd 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -44,7 +44,7 @@
  * gfs2_llseek - seek to a location in a file
  * @file: the file
  * @offset: the offset
- * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
+ * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
  *
  * SEEK_END requires the glock for the file because it references the
  * file's size.
@@ -52,26 +52,26 @@
  * Returns: The new offset, or errno
  */
 
-static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
+static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
 	struct gfs2_holder i_gh;
 	loff_t error;
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END: /* These reference inode->i_size */
 	case SEEK_DATA:
 	case SEEK_HOLE:
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (!error) {
-			error = generic_file_llseek(file, offset, origin);
+			error = generic_file_llseek(file, offset, whence);
 			gfs2_glock_dq_uninit(&i_gh);
 		}
 		break;
 	case SEEK_CUR:
 	case SEEK_SET:
-		error = generic_file_llseek(file, offset, origin);
+		error = generic_file_llseek(file, offset, whence);
 		break;
 	default:
 		error = -EINVAL;
diff --git a/fs/libfs.c b/fs/libfs.c
index 7cc37ca19cd8..35fc6e74cd88 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -81,11 +81,11 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 	return 0;
 }
 
-loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
+loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	mutex_lock(&dentry->d_inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b9e66b7e0c14..1cc71f60b491 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -871,7 +871,7 @@ out:
 	return res;
 }
 
-static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
+static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
@@ -880,10 +880,10 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
 	dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name,
-			offset, origin);
+			offset, whence);
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += filp->f_pos;
 		case 0:
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 582bb8866131..3c2b893665ba 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -119,18 +119,18 @@ force_reval:
 	return __nfs_revalidate_inode(server, inode);
 }
 
-loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
+loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
 {
 	dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
 			filp->f_path.dentry->d_parent->d_name.name,
 			filp->f_path.dentry->d_name.name,
-			offset, origin);
+			offset, whence);
 
 	/*
-	 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
+	 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
 	 * the cached file length
 	 */
-	if (origin != SEEK_SET && origin != SEEK_CUR) {
+	if (whence != SEEK_SET && whence != SEEK_CUR) {
 		struct inode *inode = filp->f_mapping->host;
 
 		int retval = nfs_revalidate_file_size(inode, filp);
@@ -138,7 +138,7 @@ loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
 			return (loff_t)retval;
 	}
 
-	return generic_file_llseek(filp, offset, origin);
+	return generic_file_llseek(filp, offset, whence);
 }
 EXPORT_SYMBOL_GPL(nfs_file_llseek);
 
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 70b5863a2d64..f487aa343442 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -832,7 +832,7 @@ out:
 	return ret;
 }
 
-int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
+int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret;
@@ -843,7 +843,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_extent_rec rec;
 
-	BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE);
+	BUG_ON(whence != SEEK_DATA && whence != SEEK_HOLE);
 
 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
 	if (ret) {
@@ -859,7 +859,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 	}
 
 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
-		if (origin == SEEK_HOLE)
+		if (whence == SEEK_HOLE)
 			*offset = inode->i_size;
 		goto out_unlock;
 	}
@@ -888,8 +888,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 			is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 		}
 
-		if ((!is_data && origin == SEEK_HOLE) ||
-		    (is_data && origin == SEEK_DATA)) {
+		if ((!is_data && whence == SEEK_HOLE) ||
+		    (is_data && whence == SEEK_DATA)) {
 			if (extoff > *offset)
 				*offset = extoff;
 			goto out_unlock;
@@ -899,7 +899,7 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 			cpos += clen;
 	}
 
-	if (origin == SEEK_HOLE) {
+	if (whence == SEEK_HOLE) {
 		extoff = cpos;
 		extoff <<= cs_bits;
 		extlen = clen;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index dda089804942..fe492e1a3cfc 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2637,14 +2637,14 @@ bail:
 }
 
 /* Refer generic_file_llseek_unlocked() */
-static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int ret = 0;
 
 	mutex_lock(&inode->i_mutex);
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_SET:
 		break;
 	case SEEK_END:
@@ -2659,7 +2659,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin)
 		break;
 	case SEEK_DATA:
 	case SEEK_HOLE:
-		ret = ocfs2_seek_data_hole_offset(file, &offset, origin);
+		ret = ocfs2_seek_data_hole_offset(file, &offset, whence);
 		if (ret)
 			goto out;
 		break;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ed1d8c7212da..67de74ca85f4 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -151,13 +151,13 @@ static int pstore_file_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static loff_t pstore_file_llseek(struct file *file, loff_t off, int origin)
+static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
 {
 	struct seq_file *sf = file->private_data;
 
 	if (sf->op)
-		return seq_lseek(file, off, origin);
-	return default_llseek(file, off, origin);
+		return seq_lseek(file, off, whence);
+	return default_llseek(file, off, whence);
 }
 
 static const struct file_operations pstore_file_operations = {
diff --git a/fs/read_write.c b/fs/read_write.c
index d06534857e9e..1edaf099ddd7 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -54,7 +54,7 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
  * generic_file_llseek_size - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  * @size:	max size of this file in file system
  * @eof:	offset used for SEEK_END position
  *
@@ -67,12 +67,12 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
  * read/writes behave like SEEK_SET against seeks.
  */
 loff_t
-generic_file_llseek_size(struct file *file, loff_t offset, int origin,
+generic_file_llseek_size(struct file *file, loff_t offset, int whence,
 		loff_t maxsize, loff_t eof)
 {
 	struct inode *inode = file->f_mapping->host;
 
-	switch (origin) {
+	switch (whence) {
 	case SEEK_END:
 		offset += eof;
 		break;
@@ -122,17 +122,17 @@ EXPORT_SYMBOL(generic_file_llseek_size);
  * generic_file_llseek - generic llseek implementation for regular files
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  *
  * This is a generic implemenation of ->llseek useable for all normal local
  * filesystems.  It just updates the file offset to the value specified by
- * @offset and @origin under i_mutex.
+ * @offset and @whence under i_mutex.
  */
-loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
+loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 
-	return generic_file_llseek_size(file, offset, origin,
+	return generic_file_llseek_size(file, offset, whence,
 					inode->i_sb->s_maxbytes,
 					i_size_read(inode));
 }
@@ -142,32 +142,32 @@ EXPORT_SYMBOL(generic_file_llseek);
  * noop_llseek - No Operation Performed llseek implementation
  * @file:	file structure to seek on
  * @offset:	file offset to seek to
- * @origin:	type of seek
+ * @whence:	type of seek
  *
  * This is an implementation of ->llseek useable for the rare special case when
  * userspace expects the seek to succeed but the (device) file is actually not
  * able to perform the seek. In this case you use noop_llseek() instead of
  * falling back to the default implementation of ->llseek.
  */
-loff_t noop_llseek(struct file *file, loff_t offset, int origin)
+loff_t noop_llseek(struct file *file, loff_t offset, int whence)
 {
 	return file->f_pos;
 }
 EXPORT_SYMBOL(noop_llseek);
 
-loff_t no_llseek(struct file *file, loff_t offset, int origin)
+loff_t no_llseek(struct file *file, loff_t offset, int whence)
 {
 	return -ESPIPE;
 }
 EXPORT_SYMBOL(no_llseek);
 
-loff_t default_llseek(struct file *file, loff_t offset, int origin)
+loff_t default_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	loff_t retval;
 
 	mutex_lock(&inode->i_mutex);
-	switch (origin) {
+	switch (whence) {
 		case SEEK_END:
 			offset += i_size_read(inode);
 			break;
@@ -216,7 +216,7 @@ out:
 }
 EXPORT_SYMBOL(default_llseek);
 
-loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
+loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t (*fn)(struct file *, loff_t, int);
 
@@ -225,11 +225,11 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int origin)
 		if (file->f_op && file->f_op->llseek)
 			fn = file->f_op->llseek;
 	}
-	return fn(file, offset, origin);
+	return fn(file, offset, whence);
 }
 EXPORT_SYMBOL(vfs_llseek);
 
-SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
+SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
 {
 	off_t retval;
 	struct fd f = fdget(fd);
@@ -237,8 +237,8 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 		return -EBADF;
 
 	retval = -EINVAL;
-	if (origin <= SEEK_MAX) {
-		loff_t res = vfs_llseek(f.file, offset, origin);
+	if (whence <= SEEK_MAX) {
+		loff_t res = vfs_llseek(f.file, offset, whence);
 		retval = res;
 		if (res != (loff_t)retval)
 			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
@@ -250,7 +250,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 #ifdef __ARCH_WANT_SYS_LLSEEK
 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		unsigned long, offset_low, loff_t __user *, result,
-		unsigned int, origin)
+		unsigned int, whence)
 {
 	int retval;
 	struct fd f = fdget(fd);
@@ -260,11 +260,11 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		return -EBADF;
 
 	retval = -EINVAL;
-	if (origin > SEEK_MAX)
+	if (whence > SEEK_MAX)
 		goto out_putf;
 
 	offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
-			origin);
+			whence);
 
 	retval = (int)offset;
 	if (offset >= 0) {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 99dffab4c4e4..9d863fb501f9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -300,14 +300,14 @@ EXPORT_SYMBOL(seq_read);
  *
  *	Ready-made ->f_op->llseek()
  */
-loff_t seq_lseek(struct file *file, loff_t offset, int origin)
+loff_t seq_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct seq_file *m = file->private_data;
 	loff_t retval = -EINVAL;
 
 	mutex_lock(&m->lock);
 	m->version = file->f_version;
-	switch (origin) {
+	switch (whence) {
 		case 1:
 			offset += file->f_pos;
 		case 0:
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e271fba1651b..8a574776a493 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -453,11 +453,11 @@ out:
 }
 
 /* If a directory is seeked, we have to free saved readdir() state */
-static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin)
+static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	kfree(file->private_data);
 	file->private_data = NULL;
-	return generic_file_llseek(file, offset, origin);
+	return generic_file_llseek(file, offset, whence);
 }
 
 /* Free saved readdir() state when the directory is closed */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 408fb1e77a0a..029552ff774c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2286,9 +2286,9 @@ extern ino_t find_inode_number(struct dentry *, struct qstr *);
 #include <linux/err.h>
 
 /* needed for stackable file system support */
-extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
 
-extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
 
 extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
@@ -2396,11 +2396,11 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern loff_t noop_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
-extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
+extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
+extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
+extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
 extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
-		int origin, loff_t maxsize, loff_t eof);
+		int whence, loff_t maxsize, loff_t eof);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a52f2f4fe030..92691d85c320 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -394,7 +394,7 @@ ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
 			    size_t cnt, loff_t *ppos);
 ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
 			     size_t cnt, loff_t *ppos);
-loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
+loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence);
 int ftrace_regex_release(struct inode *inode, struct file *file);
 
 void __init
@@ -559,7 +559,7 @@ static inline ssize_t ftrace_filter_write(struct file *file, const char __user *
 			    size_t cnt, loff_t *ppos) { return -ENODEV; }
 static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
 			     size_t cnt, loff_t *ppos) { return -ENODEV; }
-static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
 {
 	return -ENODEV;
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91835e7f364d..36c3b07c5119 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -560,10 +560,10 @@ asmlinkage long sys_utime(char __user *filename,
 asmlinkage long sys_utimes(char __user *filename,
 				struct timeval __user *utimes);
 asmlinkage long sys_lseek(unsigned int fd, off_t offset,
-			  unsigned int origin);
+			  unsigned int whence);
 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
 			unsigned long offset_low, loff_t __user *result,
-			unsigned int origin);
+			unsigned int whence);
 asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
 asmlinkage long sys_readahead(int fd, loff_t offset, size_t count);
 asmlinkage long sys_readv(unsigned long fd,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index afd092de45b7..3ffe4c5ad3f3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2675,12 +2675,12 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
 }
 
 loff_t
-ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
 {
 	loff_t ret;
 
 	if (file->f_mode & FMODE_READ)
-		ret = seq_lseek(file, offset, origin);
+		ret = seq_lseek(file, offset, whence);
 	else
 		file->f_pos = ret = 1;
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 03f9ba8fb8e5..5c90d84c2b02 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1719,7 +1719,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
  * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
  */
 static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
-				    pgoff_t index, pgoff_t end, int origin)
+				    pgoff_t index, pgoff_t end, int whence)
 {
 	struct page *page;
 	struct pagevec pvec;
@@ -1733,13 +1733,13 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
 		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
 					pvec.nr, pvec.pages, indices);
 		if (!pvec.nr) {
-			if (origin == SEEK_DATA)
+			if (whence == SEEK_DATA)
 				index = end;
 			break;
 		}
 		for (i = 0; i < pvec.nr; i++, index++) {
 			if (index < indices[i]) {
-				if (origin == SEEK_HOLE) {
+				if (whence == SEEK_HOLE) {
 					done = true;
 					break;
 				}
@@ -1751,8 +1751,8 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
 					page = NULL;
 			}
 			if (index >= end ||
-			    (page && origin == SEEK_DATA) ||
-			    (!page && origin == SEEK_HOLE)) {
+			    (page && whence == SEEK_DATA) ||
+			    (!page && whence == SEEK_HOLE)) {
 				done = true;
 				break;
 			}
@@ -1765,15 +1765,15 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
 	return index;
 }
 
-static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
+static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	pgoff_t start, end;
 	loff_t new_offset;
 
-	if (origin != SEEK_DATA && origin != SEEK_HOLE)
-		return generic_file_llseek_size(file, offset, origin,
+	if (whence != SEEK_DATA && whence != SEEK_HOLE)
+		return generic_file_llseek_size(file, offset, whence,
 					MAX_LFS_FILESIZE, i_size_read(inode));
 	mutex_lock(&inode->i_mutex);
 	/* We're holding i_mutex so we can access i_size directly */
@@ -1785,12 +1785,12 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
 	else {
 		start = offset >> PAGE_CACHE_SHIFT;
 		end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-		new_offset = shmem_seek_hole_data(mapping, start, end, origin);
+		new_offset = shmem_seek_hole_data(mapping, start, end, whence);
 		new_offset <<= PAGE_CACHE_SHIFT;
 		if (new_offset > offset) {
 			if (new_offset < inode->i_size)
 				offset = new_offset;
-			else if (origin == SEEK_DATA)
+			else if (whence == SEEK_DATA)
 				offset = -ENXIO;
 			else
 				offset = inode->i_size;
-- 
cgit v1.2.3


From 8cc9764c9c7d01a6e2c3ddac8f0ac7716be01868 Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@ti.com>
Date: Mon, 17 Dec 2012 16:00:43 -0800
Subject: drivers/video/backlight/lp855x_bl.c: use generic PWM functions

The LP855x family devices support the PWM input for the backlight control.
 Period of the PWM is configurable in the platform side.  Platform
specific functions are unnecessary anymore because generic PWM functions
are used inside the driver.

(PWM input mode)
To set the brightness, new lp855x_pwm_ctrl() is used.
If a PWM device is not allocated, devm_pwm_get() is called.
The PWM consumer name is from the chip name such as 'lp8550' and 'lp8556'.
To get the brightness value, no additional handling is required.
Just the value of 'props.brightness' is returned.

If the PWM driver is not ready while initializing the LP855x driver, it's
OK.  The PWM device can be retrieved later, when the brightness value is
changed.

Documentation is updated with an example.

[akpm@linux-foundation.org: coding-style simplification, per Thierry]
Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Cc: Thierry Reding <thierry.reding@avionic-design.de>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Bryan Wu <bryan.wu@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/backlight/lp855x-driver.txt | 10 ++-------
 drivers/video/backlight/lp855x_bl.c       | 37 ++++++++++++++++++++++---------
 include/linux/platform_data/lp855x.h      |  9 ++------
 3 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
index f5e4caafab7d..1529394cfe8b 100644
--- a/Documentation/backlight/lp855x-driver.txt
+++ b/Documentation/backlight/lp855x-driver.txt
@@ -35,11 +35,8 @@ For supporting platform specific data, the lp855x platform data can be used.
 * mode : Brightness control mode. PWM or register based.
 * device_control : Value of DEVICE CONTROL register.
 * initial_brightness : Initial value of backlight brightness.
-* pwm_data : Platform specific pwm generation functions.
+* period_ns : Platform specific PWM period value. unit is nano.
 	     Only valid when brightness is pwm input mode.
-	     Functions should be implemented by PWM driver.
-	     - pwm_set_intensity() : set duty of PWM
-	     - pwm_get_intensity() : get current duty of PWM
 * load_new_rom_data :
 	0 : use default configuration data
 	1 : update values of eeprom or eprom registers on loading driver
@@ -71,8 +68,5 @@ static struct lp855x_platform_data lp8556_pdata = {
 	.mode = PWM_BASED,
 	.device_control = PWM_CONFIG(LP8556),
 	.initial_brightness = INITIAL_BRT,
-	.pwm_data = {
-		     .pwm_set_intensity = platform_pwm_set_intensity,
-		     .pwm_get_intensity = platform_pwm_get_intensity,
-		     },
+	.period_ns = 1000000,
 };
diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
index fd985e0681e9..b437541555ff 100644
--- a/drivers/video/backlight/lp855x_bl.c
+++ b/drivers/video/backlight/lp855x_bl.c
@@ -15,6 +15,7 @@
 #include <linux/backlight.h>
 #include <linux/err.h>
 #include <linux/platform_data/lp855x.h>
+#include <linux/pwm.h>
 
 /* Registers */
 #define BRIGHTNESS_CTRL		0x00
@@ -36,6 +37,7 @@ struct lp855x {
 	struct device *dev;
 	struct mutex xfer_lock;
 	struct lp855x_platform_data *pdata;
+	struct pwm_device *pwm;
 };
 
 static int lp855x_read_byte(struct lp855x *lp, u8 reg, u8 *data)
@@ -121,6 +123,28 @@ static int lp855x_init_registers(struct lp855x *lp)
 	return ret;
 }
 
+static void lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
+{
+	unsigned int period = lp->pdata->period_ns;
+	unsigned int duty = br * period / max_br;
+	struct pwm_device *pwm;
+
+	/* request pwm device with the consumer name */
+	if (!lp->pwm) {
+		pwm = devm_pwm_get(lp->dev, lp->chipname);
+		if (IS_ERR(pwm))
+			return;
+
+		lp->pwm = pwm;
+	}
+
+	pwm_config(lp->pwm, duty, period);
+	if (duty)
+		pwm_enable(lp->pwm);
+	else
+		pwm_disable(lp->pwm);
+}
+
 static int lp855x_bl_update_status(struct backlight_device *bl)
 {
 	struct lp855x *lp = bl_get_data(bl);
@@ -130,12 +154,10 @@ static int lp855x_bl_update_status(struct backlight_device *bl)
 		bl->props.brightness = 0;
 
 	if (mode == PWM_BASED) {
-		struct lp855x_pwm_data *pd = &lp->pdata->pwm_data;
 		int br = bl->props.brightness;
 		int max_br = bl->props.max_brightness;
 
-		if (pd->pwm_set_intensity)
-			pd->pwm_set_intensity(br, max_br);
+		lp855x_pwm_ctrl(lp, br, max_br);
 
 	} else if (mode == REGISTER_BASED) {
 		u8 val = bl->props.brightness;
@@ -150,14 +172,7 @@ static int lp855x_bl_get_brightness(struct backlight_device *bl)
 	struct lp855x *lp = bl_get_data(bl);
 	enum lp855x_brightness_ctrl_mode mode = lp->pdata->mode;
 
-	if (mode == PWM_BASED) {
-		struct lp855x_pwm_data *pd = &lp->pdata->pwm_data;
-		int max_br = bl->props.max_brightness;
-
-		if (pd->pwm_get_intensity)
-			bl->props.brightness = pd->pwm_get_intensity(max_br);
-
-	} else if (mode == REGISTER_BASED) {
+	if (mode == REGISTER_BASED) {
 		u8 val = 0;
 
 		lp855x_read_byte(lp, BRIGHTNESS_CTRL, &val);
diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h
index 761f31752367..e81f62d24ee2 100644
--- a/include/linux/platform_data/lp855x.h
+++ b/include/linux/platform_data/lp855x.h
@@ -89,11 +89,6 @@ enum lp8556_brightness_source {
 	LP8556_COMBINED2,	/* pwm + i2c after the shaper block */
 };
 
-struct lp855x_pwm_data {
-	void (*pwm_set_intensity) (int brightness, int max_brightness);
-	int (*pwm_get_intensity) (int max_brightness);
-};
-
 struct lp855x_rom_data {
 	u8 addr;
 	u8 val;
@@ -105,7 +100,7 @@ struct lp855x_rom_data {
  * @mode : brightness control by pwm or lp855x register
  * @device_control : value of DEVICE CONTROL register
  * @initial_brightness : initial value of backlight brightness
- * @pwm_data : platform specific pwm generation functions.
+ * @period_ns : platform specific pwm period value. unit is nano.
 		Only valid when mode is PWM_BASED.
  * @load_new_rom_data :
 	0 : use default configuration data
@@ -118,7 +113,7 @@ struct lp855x_platform_data {
 	enum lp855x_brightness_ctrl_mode mode;
 	u8 device_control;
 	int initial_brightness;
-	struct lp855x_pwm_data pwm_data;
+	unsigned int period_ns;
 	u8 load_new_rom_data;
 	int size_program;
 	struct lp855x_rom_data *rom_data;
-- 
cgit v1.2.3


From 762a936fba7bd9225ca9a96e4860f6969b6b5670 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Mon, 17 Dec 2012 16:01:06 -0800
Subject: backlight: add of_find_backlight_by_node()

This function finds the struct backlight_device for a given device tree
node.  A dummy function is provided so that it safely compiles out if OF
support is disabled.

[akpm@linux-foundation.org: Don't use IS_ENABLED(CONFIG_OF)]
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Reviewed-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Thierry Reding <thierry.reding@avionic-design.de>
Reviewed-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/backlight.c | 29 +++++++++++++++++++++++++++++
 include/linux/backlight.h           | 10 ++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 297db2fa91f5..345f6660d4b3 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -370,6 +370,35 @@ void backlight_device_unregister(struct backlight_device *bd)
 }
 EXPORT_SYMBOL(backlight_device_unregister);
 
+#ifdef CONFIG_OF
+static int of_parent_match(struct device *dev, void *data)
+{
+	return dev->parent && dev->parent->of_node == data;
+}
+
+/**
+ * of_find_backlight_by_node() - find backlight device by device-tree node
+ * @node: device-tree node of the backlight device
+ *
+ * Returns a pointer to the backlight device corresponding to the given DT
+ * node or NULL if no such backlight device exists or if the device hasn't
+ * been probed yet.
+ *
+ * This function obtains a reference on the backlight device and it is the
+ * caller's responsibility to drop the reference by calling put_device() on
+ * the backlight device's .dev field.
+ */
+struct backlight_device *of_find_backlight_by_node(struct device_node *node)
+{
+	struct device *dev;
+
+	dev = class_find_device(backlight_class, NULL, node, of_parent_match);
+
+	return dev ? to_backlight_device(dev) : NULL;
+}
+EXPORT_SYMBOL(of_find_backlight_by_node);
+#endif
+
 static void __exit backlight_class_exit(void)
 {
 	class_destroy(backlight_class);
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 5ffc6dda4675..da9a0825e007 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -134,4 +134,14 @@ struct generic_bl_info {
 	void (*kick_battery)(void);
 };
 
+#ifdef CONFIG_OF
+struct backlight_device *of_find_backlight_by_node(struct device_node *node);
+#else
+static inline struct backlight_device *
+of_find_backlight_by_node(struct device_node *node)
+{
+	return NULL;
+}
+#endif
+
 #endif
-- 
cgit v1.2.3


From b18888ab256f05626193be955a7a03f01d676f8c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 Dec 2012 16:01:18 -0800
Subject: string: introduce helper to get base file name from given path

There are several places in the kernel that use functionality like
basename(3) with the exception: in case of '/foo/bar/' we expect to get an
empty string.  Let's do it common helper for them.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: YAMANE Toshiaki <yamanetoshi@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 630125818ca8..ac889c5ea11b 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -143,4 +143,15 @@ static inline bool strstarts(const char *str, const char *prefix)
 
 extern size_t memweight(const void *ptr, size_t bytes);
 
+/**
+ * kbasename - return the last part of a pathname.
+ *
+ * @path: path to extract the filename from.
+ */
+static inline const char *kbasename(const char *path)
+{
+	const char *tail = strrchr(path, '/');
+	return tail ? tail + 1 : path;
+}
+
 #endif /* _LINUX_STRING_H_ */
-- 
cgit v1.2.3


From a1fd3e24d8a484b3265a6d485202afe093c058f3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Dec 2012 16:01:32 -0800
Subject: percpu_rw_semaphore: reimplement to not block the readers
 unnecessarily

Currently the writer does msleep() plus synchronize_sched() 3 times to
acquire/release the semaphore, and during this time the readers are
blocked completely.  Even if the "write" section was not actually started
or if it was already finished.

With this patch down_write/up_write does synchronize_sched() twice and
down_read/up_read are still possible during this time, just they use the
slow path.

percpu_down_write() first forces the readers to use rw_semaphore and
increment the "slow" counter to take the lock for reading, then it
takes that rw_semaphore for writing and blocks the readers.

Also.  With this patch the code relies on the documented behaviour of
synchronize_sched(), it doesn't try to pair synchronize_sched() with
barrier.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu-rwsem.h |  83 ++++-------------------
 lib/Makefile                 |   2 +-
 lib/percpu-rwsem.c           | 154 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+), 71 deletions(-)
 create mode 100644 lib/percpu-rwsem.c

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index bd1e86071e57..592f0d610d8e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -2,82 +2,25 @@
 #define _LINUX_PERCPU_RWSEM_H
 
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/percpu.h>
-#include <linux/rcupdate.h>
-#include <linux/delay.h>
+#include <linux/wait.h>
 
 struct percpu_rw_semaphore {
-	unsigned __percpu *counters;
-	bool locked;
-	struct mutex mtx;
+	unsigned int __percpu	*fast_read_ctr;
+	struct mutex		writer_mutex;
+	struct rw_semaphore	rw_sem;
+	atomic_t		slow_read_ctr;
+	wait_queue_head_t	write_waitq;
 };
 
-#define light_mb()	barrier()
-#define heavy_mb()	synchronize_sched_expedited()
+extern void percpu_down_read(struct percpu_rw_semaphore *);
+extern void percpu_up_read(struct percpu_rw_semaphore *);
 
-static inline void percpu_down_read(struct percpu_rw_semaphore *p)
-{
-	rcu_read_lock_sched();
-	if (unlikely(p->locked)) {
-		rcu_read_unlock_sched();
-		mutex_lock(&p->mtx);
-		this_cpu_inc(*p->counters);
-		mutex_unlock(&p->mtx);
-		return;
-	}
-	this_cpu_inc(*p->counters);
-	rcu_read_unlock_sched();
-	light_mb(); /* A, between read of p->locked and read of data, paired with D */
-}
+extern void percpu_down_write(struct percpu_rw_semaphore *);
+extern void percpu_up_write(struct percpu_rw_semaphore *);
 
-static inline void percpu_up_read(struct percpu_rw_semaphore *p)
-{
-	light_mb(); /* B, between read of the data and write to p->counter, paired with C */
-	this_cpu_dec(*p->counters);
-}
-
-static inline unsigned __percpu_count(unsigned __percpu *counters)
-{
-	unsigned total = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
-
-	return total;
-}
-
-static inline void percpu_down_write(struct percpu_rw_semaphore *p)
-{
-	mutex_lock(&p->mtx);
-	p->locked = true;
-	synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */
-	while (__percpu_count(p->counters))
-		msleep(1);
-	heavy_mb(); /* C, between read of p->counter and write to data, paired with B */
-}
-
-static inline void percpu_up_write(struct percpu_rw_semaphore *p)
-{
-	heavy_mb(); /* D, between write to data and write to p->locked, paired with A */
-	p->locked = false;
-	mutex_unlock(&p->mtx);
-}
-
-static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
-{
-	p->counters = alloc_percpu(unsigned);
-	if (unlikely(!p->counters))
-		return -ENOMEM;
-	p->locked = false;
-	mutex_init(&p->mtx);
-	return 0;
-}
-
-static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
-{
-	free_percpu(p->counters);
-	p->counters = NULL; /* catch use after free bugs */
-}
+extern int percpu_init_rwsem(struct percpu_rw_semaphore *);
+extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index e2152fa7ff4d..e959c20efb24 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -9,7 +9,7 @@ endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
-	 idr.o int_sqrt.o extable.o \
+	 idr.o int_sqrt.o extable.o percpu-rwsem.o \
 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
new file mode 100644
index 000000000000..2e03bcfe48f9
--- /dev/null
+++ b/lib/percpu-rwsem.c
@@ -0,0 +1,154 @@
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/percpu.h>
+#include <linux/wait.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+
+int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
+{
+	brw->fast_read_ctr = alloc_percpu(int);
+	if (unlikely(!brw->fast_read_ctr))
+		return -ENOMEM;
+
+	mutex_init(&brw->writer_mutex);
+	init_rwsem(&brw->rw_sem);
+	atomic_set(&brw->slow_read_ctr, 0);
+	init_waitqueue_head(&brw->write_waitq);
+	return 0;
+}
+
+void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+{
+	free_percpu(brw->fast_read_ctr);
+	brw->fast_read_ctr = NULL; /* catch use after free bugs */
+}
+
+/*
+ * This is the fast-path for down_read/up_read, it only needs to ensure
+ * there is no pending writer (!mutex_is_locked() check) and inc/dec the
+ * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
+ * serialize with the preempt-disabled section below.
+ *
+ * The nontrivial part is that we should guarantee acquire/release semantics
+ * in case when
+ *
+ *	R_W: down_write() comes after up_read(), the writer should see all
+ *	     changes done by the reader
+ * or
+ *	W_R: down_read() comes after up_write(), the reader should see all
+ *	     changes done by the writer
+ *
+ * If this helper fails the callers rely on the normal rw_semaphore and
+ * atomic_dec_and_test(), so in this case we have the necessary barriers.
+ *
+ * But if it succeeds we do not have any barriers, mutex_is_locked() or
+ * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
+ * reader inside the critical section. See the comments in down_write and
+ * up_write below.
+ */
+static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
+{
+	bool success = false;
+
+	preempt_disable();
+	if (likely(!mutex_is_locked(&brw->writer_mutex))) {
+		__this_cpu_add(*brw->fast_read_ctr, val);
+		success = true;
+	}
+	preempt_enable();
+
+	return success;
+}
+
+/*
+ * Like the normal down_read() this is not recursive, the writer can
+ * come after the first percpu_down_read() and create the deadlock.
+ */
+void percpu_down_read(struct percpu_rw_semaphore *brw)
+{
+	if (likely(update_fast_ctr(brw, +1)))
+		return;
+
+	down_read(&brw->rw_sem);
+	atomic_inc(&brw->slow_read_ctr);
+	up_read(&brw->rw_sem);
+}
+
+void percpu_up_read(struct percpu_rw_semaphore *brw)
+{
+	if (likely(update_fast_ctr(brw, -1)))
+		return;
+
+	/* false-positive is possible but harmless */
+	if (atomic_dec_and_test(&brw->slow_read_ctr))
+		wake_up_all(&brw->write_waitq);
+}
+
+static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
+{
+	unsigned int sum = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		sum += per_cpu(*brw->fast_read_ctr, cpu);
+		per_cpu(*brw->fast_read_ctr, cpu) = 0;
+	}
+
+	return sum;
+}
+
+/*
+ * A writer takes ->writer_mutex to exclude other writers and to force the
+ * readers to switch to the slow mode, note the mutex_is_locked() check in
+ * update_fast_ctr().
+ *
+ * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
+ * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
+ * counter it represents the number of active readers.
+ *
+ * Finally the writer takes ->rw_sem for writing and blocks the new readers,
+ * then waits until the slow counter becomes zero.
+ */
+void percpu_down_write(struct percpu_rw_semaphore *brw)
+{
+	/* also blocks update_fast_ctr() which checks mutex_is_locked() */
+	mutex_lock(&brw->writer_mutex);
+
+	/*
+	 * 1. Ensures mutex_is_locked() is visible to any down_read/up_read
+	 *    so that update_fast_ctr() can't succeed.
+	 *
+	 * 2. Ensures we see the result of every previous this_cpu_add() in
+	 *    update_fast_ctr().
+	 *
+	 * 3. Ensures that if any reader has exited its critical section via
+	 *    fast-path, it executes a full memory barrier before we return.
+	 *    See R_W case in the comment above update_fast_ctr().
+	 */
+	synchronize_sched_expedited();
+
+	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
+	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+
+	/* block the new readers completely */
+	down_write(&brw->rw_sem);
+
+	/* wait for all readers to complete their percpu_up_read() */
+	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+}
+
+void percpu_up_write(struct percpu_rw_semaphore *brw)
+{
+	/* allow the new readers, but only the slow-path */
+	up_write(&brw->rw_sem);
+
+	/*
+	 * Insert the barrier before the next fast-path in down_read,
+	 * see W_R case in the comment above update_fast_ctr().
+	 */
+	synchronize_sched_expedited();
+	mutex_unlock(&brw->writer_mutex);
+}
-- 
cgit v1.2.3


From 9390ef0c85fd065f01045fef708b046c98cda04c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Dec 2012 16:01:36 -0800
Subject: percpu_rw_semaphore: kill ->writer_mutex, add ->write_ctr

percpu_rw_semaphore->writer_mutex was only added to simplify the initial
rewrite, the only thing it protects is clear_fast_ctr() which otherwise
could be called by multiple writers.  ->rw_sem is enough to serialize the
writers.

Kill this mutex and add "atomic_t write_ctr" instead.  The writers
increment/decrement this counter, the readers check it is zero instead of
mutex_is_locked().

Move atomic_add(clear_fast_ctr(), slow_read_ctr) under down_write() to
avoid the race with other writers.  This is a bit sub-optimal, only the
first writer needs this and we do not need to exclude the readers at this
stage.  But this is simple, we do not want another internal lock until we
add more features.

And this speeds up the write-contended case.  Before this patch the racing
writers sleep in synchronize_sched_expedited() sequentially, with this
patch multiple synchronize_sched_expedited's can "overlap" with each
other.  Note: we can do more optimizations, this is only the first step.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu-rwsem.h |  4 ++--
 lib/percpu-rwsem.c           | 34 ++++++++++++++++------------------
 2 files changed, 18 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 592f0d610d8e..d2146a4f833e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -1,14 +1,14 @@
 #ifndef _LINUX_PERCPU_RWSEM_H
 #define _LINUX_PERCPU_RWSEM_H
 
-#include <linux/mutex.h>
+#include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
 
 struct percpu_rw_semaphore {
 	unsigned int __percpu	*fast_read_ctr;
-	struct mutex		writer_mutex;
+	atomic_t		write_ctr;
 	struct rw_semaphore	rw_sem;
 	atomic_t		slow_read_ctr;
 	wait_queue_head_t	write_waitq;
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
index 2e03bcfe48f9..ce92ab563a08 100644
--- a/lib/percpu-rwsem.c
+++ b/lib/percpu-rwsem.c
@@ -1,4 +1,4 @@
-#include <linux/mutex.h>
+#include <linux/atomic.h>
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
@@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
 	if (unlikely(!brw->fast_read_ctr))
 		return -ENOMEM;
 
-	mutex_init(&brw->writer_mutex);
 	init_rwsem(&brw->rw_sem);
+	atomic_set(&brw->write_ctr, 0);
 	atomic_set(&brw->slow_read_ctr, 0);
 	init_waitqueue_head(&brw->write_waitq);
 	return 0;
@@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
 
 /*
  * This is the fast-path for down_read/up_read, it only needs to ensure
- * there is no pending writer (!mutex_is_locked() check) and inc/dec the
+ * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
  * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
  * serialize with the preempt-disabled section below.
  *
@@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
  * If this helper fails the callers rely on the normal rw_semaphore and
  * atomic_dec_and_test(), so in this case we have the necessary barriers.
  *
- * But if it succeeds we do not have any barriers, mutex_is_locked() or
+ * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
  * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
  * reader inside the critical section. See the comments in down_write and
  * up_write below.
@@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 	bool success = false;
 
 	preempt_disable();
-	if (likely(!mutex_is_locked(&brw->writer_mutex))) {
+	if (likely(!atomic_read(&brw->write_ctr))) {
 		__this_cpu_add(*brw->fast_read_ctr, val);
 		success = true;
 	}
@@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
 }
 
 /*
- * A writer takes ->writer_mutex to exclude other writers and to force the
- * readers to switch to the slow mode, note the mutex_is_locked() check in
- * update_fast_ctr().
+ * A writer increments ->write_ctr to force the readers to switch to the
+ * slow mode, note the atomic_read() check in update_fast_ctr().
  *
  * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
  * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
@@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
  */
 void percpu_down_write(struct percpu_rw_semaphore *brw)
 {
-	/* also blocks update_fast_ctr() which checks mutex_is_locked() */
-	mutex_lock(&brw->writer_mutex);
-
+	/* tell update_fast_ctr() there is a pending writer */
+	atomic_inc(&brw->write_ctr);
 	/*
-	 * 1. Ensures mutex_is_locked() is visible to any down_read/up_read
+	 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
 	 *    so that update_fast_ctr() can't succeed.
 	 *
 	 * 2. Ensures we see the result of every previous this_cpu_add() in
@@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
 	 */
 	synchronize_sched_expedited();
 
+	/* exclude other writers, and block the new readers completely */
+	down_write(&brw->rw_sem);
+
 	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
 	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
 
-	/* block the new readers completely */
-	down_write(&brw->rw_sem);
-
 	/* wait for all readers to complete their percpu_up_read() */
 	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
 }
 
 void percpu_up_write(struct percpu_rw_semaphore *brw)
 {
-	/* allow the new readers, but only the slow-path */
+	/* release the lock, but the readers can't use the fast-path */
 	up_write(&brw->rw_sem);
-
 	/*
 	 * Insert the barrier before the next fast-path in down_read,
 	 * see W_R case in the comment above update_fast_ctr().
 	 */
 	synchronize_sched_expedited();
-	mutex_unlock(&brw->writer_mutex);
+	/* the last writer unblocks update_fast_ctr() */
+	atomic_dec(&brw->write_ctr);
 }
-- 
cgit v1.2.3


From 8ebe34731a0d1a9d89b536430afd98d0fadec99b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Dec 2012 16:01:38 -0800
Subject: percpu_rw_semaphore: add lockdep annotations

Add lockdep annotations.  Not only this can help to find the potential
problems, we do not want the false warnings if, say, the task takes two
different percpu_rw_semaphore's for reading.  IOW, at least ->rw_sem
should not use a single class.

This patch exposes this internal lock to lockdep so that it represents the
whole percpu_rw_semaphore.  This way we do not need to add another "fake"
->lockdep_map and lock_class_key.  More importantly, this also makes the
output from lockdep much more understandable if it finds the problem.

In short, with this patch from lockdep pov percpu_down_read() and
percpu_up_read() acquire/release ->rw_sem for reading, this matches the
actual semantics.  This abuses __up_read() but I hope this is fine and in
fact I'd like to have down_read_no_lockdep() as well,
percpu_down_read_recursive_readers() will need it.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu-rwsem.h | 10 +++++++++-
 lib/percpu-rwsem.c           | 21 +++++++++++++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index d2146a4f833e..3e88c9a7d57f 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -5,6 +5,7 @@
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
+#include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
 	unsigned int __percpu	*fast_read_ctr;
@@ -20,7 +21,14 @@ extern void percpu_up_read(struct percpu_rw_semaphore *);
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
-extern int percpu_init_rwsem(struct percpu_rw_semaphore *);
+extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
+				const char *, struct lock_class_key *);
 extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
+#define percpu_init_rwsem(brw)	\
+({								\
+	static struct lock_class_key rwsem_key;			\
+	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
+})
+
 #endif
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
index ce92ab563a08..652a8ee8efe9 100644
--- a/lib/percpu-rwsem.c
+++ b/lib/percpu-rwsem.c
@@ -2,18 +2,21 @@
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
+#include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
 
-int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
+int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
+			const char *name, struct lock_class_key *rwsem_key)
 {
 	brw->fast_read_ctr = alloc_percpu(int);
 	if (unlikely(!brw->fast_read_ctr))
 		return -ENOMEM;
 
-	init_rwsem(&brw->rw_sem);
+	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
+	__init_rwsem(&brw->rw_sem, name, rwsem_key);
 	atomic_set(&brw->write_ctr, 0);
 	atomic_set(&brw->slow_read_ctr, 0);
 	init_waitqueue_head(&brw->write_waitq);
@@ -66,19 +69,29 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
 /*
  * Like the normal down_read() this is not recursive, the writer can
  * come after the first percpu_down_read() and create the deadlock.
+ *
+ * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
+ * percpu_up_read() does rwsem_release(). This pairs with the usage
+ * of ->rw_sem in percpu_down/up_write().
  */
 void percpu_down_read(struct percpu_rw_semaphore *brw)
 {
-	if (likely(update_fast_ctr(brw, +1)))
+	might_sleep();
+	if (likely(update_fast_ctr(brw, +1))) {
+		rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
 		return;
+	}
 
 	down_read(&brw->rw_sem);
 	atomic_inc(&brw->slow_read_ctr);
-	up_read(&brw->rw_sem);
+	/* avoid up_read()->rwsem_release() */
+	__up_read(&brw->rw_sem);
 }
 
 void percpu_up_read(struct percpu_rw_semaphore *brw)
 {
+	rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
+
 	if (likely(update_fast_ctr(brw, -1)))
 		return;
 
-- 
cgit v1.2.3


From 0ad50c3896afbb3c103409a18260e601b87a744c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 17 Dec 2012 16:01:45 -0800
Subject: compat: generic compat_sys_sched_rr_get_interval() implementation

This function is used by sparc, powerpc tile and arm64 for compat support.
 The patch adds a generic implementation with a wrapper for PowerPC to do
the u32->int sign extension.

The reason for a single patch covering powerpc, tile, sparc and arm64 is
to keep it bisectable, otherwise kernel building may fail with mismatched
function declarations.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>  [for tile]
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/unistd.h   |  1 +
 arch/arm64/kernel/sys_compat.c    | 15 ---------------
 arch/powerpc/include/asm/systbl.h |  2 +-
 arch/powerpc/include/asm/unistd.h |  1 +
 arch/powerpc/kernel/sys_ppc32.c   | 17 ++++-------------
 arch/sparc/include/asm/unistd.h   |  1 +
 arch/sparc/kernel/sys_sparc32.c   | 14 --------------
 arch/tile/include/asm/compat.h    |  2 --
 arch/tile/include/asm/unistd.h    |  1 +
 arch/tile/kernel/compat.c         | 18 ------------------
 include/linux/compat.h            |  3 +++
 kernel/compat.c                   | 17 +++++++++++++++++
 12 files changed, 29 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index d69aeea6da1e..76fb7dd3350a 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -20,6 +20,7 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index f7b05edf8ce3..26e9c4eeaba8 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -28,21 +28,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd32.h>
 
-asmlinkage int compat_sys_sched_rr_get_interval(compat_pid_t pid,
-						struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
-	set_fs(old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 static inline void
 do_compat_cache_op(unsigned long start, unsigned long end, int flags)
 {
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 840838769853..cec8aae5cbf8 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -164,7 +164,7 @@ COMPAT_SYS_SPU(sched_getscheduler)
 SYSCALL_SPU(sched_yield)
 COMPAT_SYS_SPU(sched_get_priority_max)
 COMPAT_SYS_SPU(sched_get_priority_min)
-COMPAT_SYS_SPU(sched_rr_get_interval)
+SYSX_SPU(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval_wrapper,sys_sched_rr_get_interval)
 COMPAT_SYS_SPU(nanosleep)
 SYSCALL_SPU(mremap)
 SYSCALL_SPU(setresuid)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 76fe846ec40e..bcbbe413c606 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -54,6 +54,7 @@
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 9c2ed90ece8f..8a93778ed9f5 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -175,19 +175,10 @@ asmlinkage long compat_sys_prctl(u32 option, u32 arg2, u32 arg3, u32 arg4, u32 a
  * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
  * and the register representation of a signed int (msr in 64-bit mode) is performed.
  */
-asmlinkage long compat_sys_sched_rr_get_interval(u32 pid, struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs ();
-
-	/* The __user pointer cast is valid because of the set_fs() */
-	set_fs (KERNEL_DS);
-	ret = sys_sched_rr_get_interval((int)pid, (struct timespec __user *) &t);
-	set_fs (old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
+asmlinkage long compat_sys_sched_rr_get_interval_wrapper(u32 pid,
+							 struct compat_timespec __user *interval)
+{
+	return compat_sys_sched_rr_get_interval((int)pid, interval);
 }
 
 /* Note: it is necessary to treat mode as an unsigned int,
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index c3e5d8b64171..497386a7ed28 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -45,6 +45,7 @@
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_EXECVE
 
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index 03c7e929ec34..4a4cdc633f6b 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -211,20 +211,6 @@ asmlinkage long compat_sys_sysfs(int option, u32 arg1, u32 arg2)
 	return sys_sysfs(option, arg1, arg2);
 }
 
-asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs ();
-	
-	set_fs (KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t);
-	set_fs (old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 asmlinkage long compat_sys_rt_sigprocmask(int how,
 					  compat_sigset_t __user *set,
 					  compat_sigset_t __user *oset,
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index ca61fb4296b3..88f3c227afd9 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -296,8 +296,6 @@ long compat_sys_sync_file_range2(int fd, unsigned int flags,
 long compat_sys_fallocate(int fd, int mode,
 			  u32 offset_lo, u32 offset_hi,
 			  u32 len_lo, u32 len_hi);
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-				      struct compat_timespec __user *interval);
 
 /* Assembly trampoline to avoid clobbering r0. */
 long _compat_sys_rt_sigreturn(void);
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index b51c6ee3cd6c..fe841e7d4963 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -14,6 +14,7 @@
 /* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
 #ifdef CONFIG_COMPAT
 #define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_SYS_EXECVE
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index 9cd7cb6041c0..7f72401b4f45 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -76,24 +76,6 @@ long compat_sys_fallocate(int fd, int mode,
 			     ((loff_t)len_hi << 32) | len_lo);
 }
 
-
-
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-				      struct compat_timespec __user *interval)
-{
-	struct timespec t;
-	int ret;
-	mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret = sys_sched_rr_get_interval(pid,
-					(struct timespec __force __user *)&t);
-	set_fs(old_fs);
-	if (put_compat_timespec(&t, interval))
-		return -EFAULT;
-	return ret;
-}
-
 /* Provide the compat syscall number to call mapping. */
 #undef __SYSCALL
 #define __SYSCALL(nr, call) [nr] = (call),
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 784ebfe63c48..e4920bd58a47 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -588,6 +588,9 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
 				    compat_off_t __user *offset, compat_size_t count);
 
+asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+						 struct compat_timespec __user *interval);
+
 #else
 
 #define is_compat_task() (0)
diff --git a/kernel/compat.c b/kernel/compat.c
index c28a306ae05c..f6150e92dfc9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1215,6 +1215,23 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
 	return 0;
 }
 
+#ifdef __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
+asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
+						 struct compat_timespec __user *interval)
+{
+	struct timespec t;
+	int ret;
+	mm_segment_t old_fs = get_fs();
+
+	set_fs(KERNEL_DS);
+	ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
+	set_fs(old_fs);
+	if (put_compat_timespec(&t, interval))
+		return -EFAULT;
+	return ret;
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL */
+
 /*
  * Allocate user-space memory for the duration of a single system call,
  * in order to marshall parameters inside a compat thunk.
-- 
cgit v1.2.3


From 4c925d6031f719fad6ea8b1c94a636f4c0fea39b Mon Sep 17 00:00:00 2001
From: Eldad Zack <eldad@fogrefinery.com>
Date: Mon, 17 Dec 2012 16:03:04 -0800
Subject: kstrto*: add documentation

As Bruce Fields pointed out, kstrto* is currently lacking kerneldoc
comments.  This patch adds kerneldoc comments to common variants of
kstrto*: kstrto(u)l, kstrto(u)ll and kstrto(u)int.

Signed-off-by: Eldad Zack <eldad@fogrefinery.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Joe Perches <joe@perches.com>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Rob Landley <rob@landley.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/DocBook/kernel-api.tmpl |  3 ++
 include/linux/kernel.h                | 33 ++++++++++++++++++
 lib/kstrtox.c                         | 64 +++++++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 00687ee9d363..f75ab4c1b281 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -58,6 +58,9 @@
 
      <sect1><title>String Conversions</title>
 !Elib/vsprintf.c
+!Finclude/linux/kernel.h kstrtol
+!Finclude/linux/kernel.h kstrtoul
+!Elib/kstrtox.c
      </sect1>
      <sect1><title>String Manipulation</title>
 <!-- All functions are exported at now
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d97ed5897447..d140e8fb075f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -220,6 +220,23 @@ int __must_check _kstrtol(const char *s, unsigned int base, long *res);
 
 int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
 int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
+
+/**
+ * kstrtoul - convert a string to an unsigned long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+*/
 static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
 {
 	/*
@@ -233,6 +250,22 @@ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsign
 		return _kstrtoul(s, base, res);
 }
 
+/**
+ * kstrtol - convert a string to a long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
 {
 	/*
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index c3615eab0cc3..f78ae0c0c4e2 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -104,6 +104,22 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 	return 0;
 }
 
+/**
+ * kstrtoull - convert a string to an unsigned long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 {
 	if (s[0] == '+')
@@ -112,6 +128,22 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 }
 EXPORT_SYMBOL(kstrtoull);
 
+/**
+ * kstrtoll - convert a string to a long long
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoll(const char *s, unsigned int base, long long *res)
 {
 	unsigned long long tmp;
@@ -168,6 +200,22 @@ int _kstrtol(const char *s, unsigned int base, long *res)
 }
 EXPORT_SYMBOL(_kstrtol);
 
+/**
+ * kstrtouint - convert a string to an unsigned int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign, but not a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtouint(const char *s, unsigned int base, unsigned int *res)
 {
 	unsigned long long tmp;
@@ -183,6 +231,22 @@ int kstrtouint(const char *s, unsigned int base, unsigned int *res)
 }
 EXPORT_SYMBOL(kstrtouint);
 
+/**
+ * kstrtoint - convert a string to an int
+ * @s: The start of the string. The string must be null-terminated, and may also
+ *  include a single newline before its terminating null. The first character
+ *  may also be a plus sign or a minus sign.
+ * @base: The number base to use. The maximum supported base is 16. If base is
+ *  given as 0, then the base of the string is automatically detected with the
+ *  conventional semantics - If it begins with 0x the number will be parsed as a
+ *  hexadecimal (case insensitive), if it otherwise begins with 0, it will be
+ *  parsed as an octal number. Otherwise it will be parsed as a decimal.
+ * @res: Where to write the result of the conversion on success.
+ *
+ * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
+ * Used as a replacement for the obsolete simple_strtoull. Return code must
+ * be checked.
+ */
 int kstrtoint(const char *s, unsigned int base, int *res)
 {
 	long long tmp;
-- 
cgit v1.2.3


From 992fb6e170639b0849bace8e49bf31bd37c4123c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Dec 2012 16:03:07 -0800
Subject: ptrace: introduce PTRACE_O_EXITKILL

Ptrace jailers want to be sure that the tracee can never escape
from the control. However if the tracer dies unexpectedly the
tracee continues to run in potentially unsafe mode.

Add the new ptrace option PTRACE_O_EXITKILL. If the tracer exits
it sends SIGKILL to every tracee which has this bit set.

Note that the new option is not equal to the last-option << 1.  Because
currently all options have an event, and the new one starts the eventless
group.  It uses the random 20 bit, so we have the room for 12 more events,
but we can also add the new eventless options below this one.

Suggested by Amnon Shiloh.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Tested-by: Amnon Shiloh <u3557@miso.sublimeip.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Chris Evans <scarybeasts@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h      | 2 ++
 include/uapi/linux/ptrace.h | 5 ++++-
 kernel/ptrace.c             | 3 +++
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a89ff04bddd9..addfbe7c180e 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -32,6 +32,8 @@
 #define PT_TRACE_EXIT		PT_EVENT_FLAG(PTRACE_EVENT_EXIT)
 #define PT_TRACE_SECCOMP	PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP)
 
+#define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
+
 /* single stepping state bits (used on ARM and PA-RISC) */
 #define PT_SINGLESTEP_BIT	31
 #define PT_SINGLESTEP		(1<<PT_SINGLESTEP_BIT)
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 1ef6c056a9e4..022ab186a812 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -73,7 +73,10 @@
 #define PTRACE_O_TRACEEXIT	(1 << PTRACE_EVENT_EXIT)
 #define PTRACE_O_TRACESECCOMP	(1 << PTRACE_EVENT_SECCOMP)
 
-#define PTRACE_O_MASK		0x000000ff
+/* eventless options */
+#define PTRACE_O_EXITKILL	(1 << 20)
+
+#define PTRACE_O_MASK		(0x000000ff | PTRACE_O_EXITKILL)
 
 #include <asm/ptrace.h>
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1f5e55dda955..ec8118ab2a47 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -457,6 +457,9 @@ void exit_ptrace(struct task_struct *tracer)
 		return;
 
 	list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) {
+		if (unlikely(p->ptrace & PT_EXITKILL))
+			send_sig_info(SIGKILL, SEND_SIG_FORCED, p);
+
 		if (__ptrace_detach(tracer, p))
 			list_add(&p->ptrace_entry, &ptrace_dead);
 	}
-- 
cgit v1.2.3


From d740269867021faf4ce38a449353d2b986c34a67 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 17 Dec 2012 16:03:20 -0800
Subject: exec: use -ELOOP for max recursion depth

To avoid an explosion of request_module calls on a chain of abusive
scripts, fail maximum recursion with -ELOOP instead of -ENOEXEC. As soon
as maximum recursion depth is hit, the error will fail all the way back
up the chain, aborting immediately.

This also has the side-effect of stopping the user's shell from attempting
to reexecute the top-level file as a shell script. As seen in the
dash source:

        if (cmd != path_bshell && errno == ENOEXEC) {
                *argv-- = cmd;
                *argv = cmd = path_bshell;
                goto repeat;
        }

The above logic was designed for running scripts automatically that lacked
the "#!" header, not to re-try failed recursion. On a legitimate -ENOEXEC,
things continue to behave as the shell expects.

Additionally, when tracking recursion, the binfmt handlers should not be
involved. The recursion being tracked is the depth of calls through
search_binary_handler(), so that function should be exclusively responsible
for tracking the depth.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: halfdog <me@halfdog.net>
Cc: P J P <ppandit@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_em86.c        |  1 -
 fs/binfmt_misc.c        |  6 ------
 fs/binfmt_script.c      |  4 +---
 fs/exec.c               | 10 +++++-----
 include/linux/binfmts.h |  2 --
 5 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 4e6cce57d113..037a3e2b045b 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -42,7 +42,6 @@ static int load_em86(struct linux_binprm *bprm)
 			return -ENOEXEC;
 	}
 
-	bprm->recursion_depth++; /* Well, the bang-shell is implicit... */
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index b0b70fbea06c..9be335fb8a7c 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -117,10 +117,6 @@ static int load_misc_binary(struct linux_binprm *bprm)
 	if (!enabled)
 		goto _ret;
 
-	retval = -ENOEXEC;
-	if (bprm->recursion_depth > BINPRM_MAX_RECURSION)
-		goto _ret;
-
 	/* to keep locking time low, we copy the interpreter string */
 	read_lock(&entries_lock);
 	fmt = check_file(bprm);
@@ -197,8 +193,6 @@ static int load_misc_binary(struct linux_binprm *bprm)
 	if (retval < 0)
 		goto _error;
 
-	bprm->recursion_depth++;
-
 	retval = search_binary_handler(bprm);
 	if (retval < 0)
 		goto _error;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 8c954997e7f7..1610a91637e5 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -22,15 +22,13 @@ static int load_script(struct linux_binprm *bprm)
 	char interp[BINPRM_BUF_SIZE];
 	int retval;
 
-	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') ||
-	    (bprm->recursion_depth > BINPRM_MAX_RECURSION))
+	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
 		return -ENOEXEC;
 	/*
 	 * This section does the #! interpretation.
 	 * Sorta complicated, but hopefully it will work.  -TYT
 	 */
 
-	bprm->recursion_depth++;
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
diff --git a/fs/exec.c b/fs/exec.c
index 721a29929511..d5eb9e605ffd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1356,6 +1356,10 @@ int search_binary_handler(struct linux_binprm *bprm)
 	struct linux_binfmt *fmt;
 	pid_t old_pid, old_vpid;
 
+	/* This allows 4 levels of binfmt rewrites before failing hard. */
+	if (depth > 5)
+		return -ELOOP;
+
 	retval = security_bprm_check(bprm);
 	if (retval)
 		return retval;
@@ -1380,12 +1384,8 @@ int search_binary_handler(struct linux_binprm *bprm)
 			if (!try_module_get(fmt->module))
 				continue;
 			read_unlock(&binfmt_lock);
+			bprm->recursion_depth = depth + 1;
 			retval = fn(bprm);
-			/*
-			 * Restore the depth counter to its starting value
-			 * in this call, so we don't have to rely on every
-			 * load_binary function to restore it on return.
-			 */
 			bprm->recursion_depth = depth;
 			if (retval >= 0) {
 				if (depth == 0) {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2630c9b41a86..a4c2b565c835 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -54,8 +54,6 @@ struct linux_binprm {
 #define BINPRM_FLAGS_EXECFD_BIT 1
 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT)
 
-#define BINPRM_MAX_RECURSION 4
-
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
 	siginfo_t *siginfo;
-- 
cgit v1.2.3


From a5ba911ec3792168530d35e16a8ec3b6fc60bcb5 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Mon, 17 Dec 2012 16:03:22 -0800
Subject: pidns: remove unused is_container_init()

Since commit 1cdcbec1a337 ("CRED: Neuter sys_capset()")
is_container_init() has no callers.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: James Morris <jmorris@namei.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  6 ------
 kernel/pid.c          | 15 ---------------
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b089c92c609b..9914c662ed7b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1778,12 +1778,6 @@ static inline int is_global_init(struct task_struct *tsk)
 	return tsk->pid == 1;
 }
 
-/*
- * is_container_init:
- * check whether in the task is init in its own pid namespace.
- */
-extern int is_container_init(struct task_struct *tsk);
-
 extern struct pid *cad_pid;
 
 extern void free_task(struct task_struct *tsk);
diff --git a/kernel/pid.c b/kernel/pid.c
index fd996c1ed9f8..a54a1123c7cf 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -81,21 +81,6 @@ struct pid_namespace init_pid_ns = {
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
-int is_container_init(struct task_struct *tsk)
-{
-	int ret = 0;
-	struct pid *pid;
-
-	rcu_read_lock();
-	pid = task_pid(tsk);
-	if (pid != NULL && pid->numbers[pid->level].nr == 1)
-		ret = 1;
-	rcu_read_unlock();
-
-	return ret;
-}
-EXPORT_SYMBOL(is_container_init);
-
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
  * interrupt might come in and do read_lock(&tasklist_lock).
-- 
cgit v1.2.3


From 8529091e8e2ae25e0f4003086f619765ff255e4b Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Mon, 17 Dec 2012 16:03:24 -0800
Subject: linux/compiler.h: add __must_hold macro for functions called with a
 lock held

linux/compiler.h has macros to denote functions that acquire or release
locks, but not to denote functions called with a lock held that return
with the lock still held.  Add a __must_hold macro to cover that case.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Reported-by: Ed Cashin <ecashin@coraid.com>
Tested-by: Ed Cashin <ecashin@coraid.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f430e4162f41..b121554f1fe2 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -10,6 +10,7 @@
 # define __force	__attribute__((force))
 # define __nocast	__attribute__((nocast))
 # define __iomem	__attribute__((noderef, address_space(2)))
+# define __must_hold(x)	__attribute__((context(x,1,1)))
 # define __acquires(x)	__attribute__((context(x,0,1)))
 # define __releases(x)	__attribute__((context(x,1,0)))
 # define __acquire(x)	__context__(x,1)
@@ -33,6 +34,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __chk_user_ptr(x) (void)0
 # define __chk_io_ptr(x) (void)0
 # define __builtin_warning(x, y...) (1)
+# define __must_hold(x)
 # define __acquires(x)
 # define __releases(x)
 # define __acquire(x) (void)0
-- 
cgit v1.2.3


From 496f2f93b1cc286f5a4f4f9acdc1e5314978683f Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Mon, 17 Dec 2012 16:04:23 -0800
Subject: random32: rename random32 to prandom

This renames all random32 functions to have 'prandom_' prefix as follows:

  void prandom_seed(u32 seed);	/* rename from srandom32() */
  u32 prandom_u32(void);		/* rename from random32() */
  void prandom_seed_state(struct rnd_state *state, u64 seed);
  				/* rename from prandom32_seed() */
  u32 prandom_u32_state(struct rnd_state *state);
  				/* rename from prandom32() */

The purpose of this renaming is to prevent some kernel developers from
assuming that prandom32() and random32() might imply that only
prandom32() was the one using a pseudo-random number generator by
prandom32's "p", and the result may be a very embarassing security
exposure.  This concern was expressed by Theodore Ts'o.

And furthermore, I'm going to introduce new functions for getting the
requested number of pseudo-random bytes.  If I continue to use both
prandom32 and random32 prefixes for these functions, the confusion
is getting worse.

As a result of this renaming, "prandom_" is the common prefix for
pseudo-random number library.

Currently, srandom32() and random32() are preserved because it is
difficult to rename too many users at once.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Robert Love <robert.w.love@intel.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Cc: David Laight <david.laight@aculab.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/scsi/fcoe/fcoe_ctlr.c |  4 ++--
 include/linux/random.h        | 17 ++++++++++-----
 lib/interval_tree_test_main.c |  7 ++++---
 lib/random32.c                | 48 +++++++++++++++++++++----------------------
 lib/rbtree_test.c             |  6 +++---
 5 files changed, 45 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 2ebe03a4b51d..4a909d7cfde1 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -2144,7 +2144,7 @@ static void fcoe_ctlr_vn_restart(struct fcoe_ctlr *fip)
 	 */
 	port_id = fip->port_id;
 	if (fip->probe_tries)
-		port_id = prandom32(&fip->rnd_state) & 0xffff;
+		port_id = prandom_u32_state(&fip->rnd_state) & 0xffff;
 	else if (!port_id)
 		port_id = fip->lp->wwpn & 0xffff;
 	if (!port_id || port_id == 0xffff)
@@ -2169,7 +2169,7 @@ static void fcoe_ctlr_vn_restart(struct fcoe_ctlr *fip)
 static void fcoe_ctlr_vn_start(struct fcoe_ctlr *fip)
 {
 	fip->probe_tries = 0;
-	prandom32_seed(&fip->rnd_state, fip->lp->wwpn);
+	prandom_seed_state(&fip->rnd_state, fip->lp->wwpn);
 	fcoe_ctlr_vn_restart(fip);
 }
 
diff --git a/include/linux/random.h b/include/linux/random.h
index 6330ed47b38b..db6debc6649e 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -25,10 +25,17 @@ extern const struct file_operations random_fops, urandom_fops;
 unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
-u32 random32(void);
-void srandom32(u32 seed);
+u32 prandom_u32(void);
+void prandom_seed(u32 seed);
 
-u32 prandom32(struct rnd_state *);
+/*
+ * These macros are preserved for backward compatibility and should be
+ * removed as soon as a transition is finished.
+ */
+#define random32() prandom_u32()
+#define srandom32(seed) prandom_seed(seed)
+
+u32 prandom_u32_state(struct rnd_state *);
 
 /*
  * Handle minimum values for seeds
@@ -39,11 +46,11 @@ static inline u32 __seed(u32 x, u32 m)
 }
 
 /**
- * prandom32_seed - set seed for prandom32().
+ * prandom_seed_state - set seed for prandom_u32_state().
  * @state: pointer to state structure to receive the seed.
  * @seed: arbitrary 64-bit value to use as a seed.
  */
-static inline void prandom32_seed(struct rnd_state *state, u64 seed)
+static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
 	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
 
diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c
index b25903987f7a..245900b98c8e 100644
--- a/lib/interval_tree_test_main.c
+++ b/lib/interval_tree_test_main.c
@@ -30,7 +30,8 @@ static void init(void)
 {
 	int i;
 	for (i = 0; i < NODES; i++) {
-		u32 a = prandom32(&rnd), b = prandom32(&rnd);
+		u32 a = prandom_u32_state(&rnd);
+		u32 b = prandom_u32_state(&rnd);
 		if (a <= b) {
 			nodes[i].start = a;
 			nodes[i].last = b;
@@ -40,7 +41,7 @@ static void init(void)
 		}
 	}
 	for (i = 0; i < SEARCHES; i++)
-		queries[i] = prandom32(&rnd);
+		queries[i] = prandom_u32_state(&rnd);
 }
 
 static int interval_tree_test_init(void)
@@ -51,7 +52,7 @@ static int interval_tree_test_init(void)
 
 	printk(KERN_ALERT "interval tree insert/remove");
 
-	prandom32_seed(&rnd, 3141592653589793238ULL);
+	prandom_seed_state(&rnd, 3141592653589793238ULL);
 	init();
 
 	time1 = get_cycles();
diff --git a/lib/random32.c b/lib/random32.c
index 938bde5876ac..d1830fade915 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -42,13 +42,13 @@
 static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
 
 /**
- *	prandom32 - seeded pseudo-random number generator.
+ *	prandom_u32_state - seeded pseudo-random number generator.
  *	@state: pointer to state structure holding seeded state.
  *
  *	This is used for pseudo-randomness with no outside seeding.
- *	For more random results, use random32().
+ *	For more random results, use prandom_u32().
  */
-u32 prandom32(struct rnd_state *state)
+u32 prandom_u32_state(struct rnd_state *state)
 {
 #define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
 
@@ -58,32 +58,32 @@ u32 prandom32(struct rnd_state *state)
 
 	return (state->s1 ^ state->s2 ^ state->s3);
 }
-EXPORT_SYMBOL(prandom32);
+EXPORT_SYMBOL(prandom_u32_state);
 
 /**
- *	random32 - pseudo random number generator
+ *	prandom_u32 - pseudo random number generator
  *
  *	A 32 bit pseudo-random number is generated using a fast
  *	algorithm suitable for simulation. This algorithm is NOT
  *	considered safe for cryptographic use.
  */
-u32 random32(void)
+u32 prandom_u32(void)
 {
 	unsigned long r;
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
-	r = prandom32(state);
+	r = prandom_u32_state(state);
 	put_cpu_var(state);
 	return r;
 }
-EXPORT_SYMBOL(random32);
+EXPORT_SYMBOL(prandom_u32);
 
 /**
- *	srandom32 - add entropy to pseudo random number generator
+ *	prandom_seed - add entropy to pseudo random number generator
  *	@seed: seed value
  *
- *	Add some additional seeding to the random32() pool.
+ *	Add some additional seeding to the prandom pool.
  */
-void srandom32(u32 entropy)
+void prandom_seed(u32 entropy)
 {
 	int i;
 	/*
@@ -95,13 +95,13 @@ void srandom32(u32 entropy)
 		state->s1 = __seed(state->s1 ^ entropy, 1);
 	}
 }
-EXPORT_SYMBOL(srandom32);
+EXPORT_SYMBOL(prandom_seed);
 
 /*
  *	Generate some initially weak seeding values to allow
- *	to start the random32() engine.
+ *	to start the prandom_u32() engine.
  */
-static int __init random32_init(void)
+static int __init prandom_init(void)
 {
 	int i;
 
@@ -114,22 +114,22 @@ static int __init random32_init(void)
 		state->s3 = __seed(LCG(state->s2), 15);
 
 		/* "warm it up" */
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
-		prandom32(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
+		prandom_u32_state(state);
 	}
 	return 0;
 }
-core_initcall(random32_init);
+core_initcall(prandom_init);
 
 /*
  *	Generate better values after random number generator
  *	is fully initialized.
  */
-static int __init random32_reseed(void)
+static int __init prandom_reseed(void)
 {
 	int i;
 
@@ -143,8 +143,8 @@ static int __init random32_reseed(void)
 		state->s3 = __seed(seeds[2], 15);
 
 		/* mix it in */
-		prandom32(state);
+		prandom_u32_state(state);
 	}
 	return 0;
 }
-late_initcall(random32_reseed);
+late_initcall(prandom_reseed);
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index d7f491a54579..af38aedbd874 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -96,8 +96,8 @@ static void init(void)
 {
 	int i;
 	for (i = 0; i < NODES; i++) {
-		nodes[i].key = prandom32(&rnd);
-		nodes[i].val = prandom32(&rnd);
+		nodes[i].key = prandom_u32_state(&rnd);
+		nodes[i].val = prandom_u32_state(&rnd);
 	}
 }
 
@@ -155,7 +155,7 @@ static int rbtree_test_init(void)
 
 	printk(KERN_ALERT "rbtree testing");
 
-	prandom32_seed(&rnd, 3141592653589793238ULL);
+	prandom_seed_state(&rnd, 3141592653589793238ULL);
 	init();
 
 	time1 = get_cycles();
-- 
cgit v1.2.3


From 6582c665d6b882dad8329e05749fbcf119f1ab88 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Mon, 17 Dec 2012 16:04:25 -0800
Subject: prandom: introduce prandom_bytes() and prandom_bytes_state()

Add functions to get the requested number of pseudo-random bytes.

The difference from get_random_bytes() is that it generates pseudo-random
numbers by prandom_u32().  It doesn't consume the entropy pool, and the
sequence is reproducible if the same rnd_state is used.  So it is suitable
for generating random bytes for testing.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: David Laight <david.laight@aculab.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Robert Love <robert.w.love@intel.com>
Cc: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/random.h |  2 ++
 lib/random32.c         | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index db6debc6649e..d9846088c2c5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -26,6 +26,7 @@ unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);
+void prandom_bytes(void *buf, int nbytes);
 void prandom_seed(u32 seed);
 
 /*
@@ -36,6 +37,7 @@ void prandom_seed(u32 seed);
 #define srandom32(seed) prandom_seed(seed)
 
 u32 prandom_u32_state(struct rnd_state *);
+void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
 
 /*
  * Handle minimum values for seeds
diff --git a/lib/random32.c b/lib/random32.c
index d1830fade915..52280d5526be 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -77,6 +77,55 @@ u32 prandom_u32(void)
 }
 EXPORT_SYMBOL(prandom_u32);
 
+/*
+ *	prandom_bytes_state - get the requested number of pseudo-random bytes
+ *
+ *	@state: pointer to state structure holding seeded state.
+ *	@buf: where to copy the pseudo-random bytes to
+ *	@bytes: the requested number of bytes
+ *
+ *	This is used for pseudo-randomness with no outside seeding.
+ *	For more random results, use prandom_bytes().
+ */
+void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes)
+{
+	unsigned char *p = buf;
+	int i;
+
+	for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) {
+		u32 random = prandom_u32_state(state);
+		int j;
+
+		for (j = 0; j < sizeof(u32); j++) {
+			p[i + j] = random;
+			random >>= BITS_PER_BYTE;
+		}
+	}
+	if (i < bytes) {
+		u32 random = prandom_u32_state(state);
+
+		for (; i < bytes; i++) {
+			p[i] = random;
+			random >>= BITS_PER_BYTE;
+		}
+	}
+}
+EXPORT_SYMBOL(prandom_bytes_state);
+
+/**
+ *	prandom_bytes - get the requested number of pseudo-random bytes
+ *	@buf: where to copy the pseudo-random bytes to
+ *	@bytes: the requested number of bytes
+ */
+void prandom_bytes(void *buf, int bytes)
+{
+	struct rnd_state *state = &get_cpu_var(net_rand_state);
+
+	prandom_bytes_state(state, buf, bytes);
+	put_cpu_var(state);
+}
+EXPORT_SYMBOL(prandom_bytes);
+
 /**
  *	prandom_seed - add entropy to pseudo random number generator
  *	@seed: seed value
-- 
cgit v1.2.3


From 55985dd72ab27b47530dcc8bdddd28b69f4abe8b Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 17 Dec 2012 16:04:55 -0800
Subject: procfs: add ability to plug in auxiliary fdinfo providers

This patch brings ability to print out auxiliary data associated with
file in procfs interface /proc/pid/fdinfo/fd.

In particular further patches make eventfd, evenpoll, signalfd and
fsnotify to print additional information complete enough to restore
these objects after checkpoint.

To simplify the code we add show_fdinfo callback inside struct
file_operations (as Al and Pavel are proposing).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/fd.c       | 2 ++
 include/linux/fs.h | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index f28a875f8779..d7a4a28ef630 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -50,6 +50,8 @@ static int seq_show(struct seq_file *m, void *v)
 	if (!ret) {
                 seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
 			   (long long)file->f_pos, f_flags);
+		if (file->f_op->show_fdinfo)
+			ret = file->f_op->show_fdinfo(m, file);
 		fput(file);
 	}
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 029552ff774c..5abf703d06ba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -44,6 +44,7 @@ struct vm_area_struct;
 struct vfsmount;
 struct cred;
 struct swap_info_struct;
+struct seq_file;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -1543,6 +1544,7 @@ struct file_operations {
 	int (*setlease)(struct file *, long, struct file_lock **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
+	int (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
 
 struct inode_operations {
@@ -1578,8 +1580,6 @@ struct inode_operations {
 			   umode_t create_mode, int *opened);
 } ____cacheline_aligned;
 
-struct seq_file;
-
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-- 
cgit v1.2.3


From 138d22b58696c506799f8de759804083ff9effae Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 17 Dec 2012 16:05:02 -0800
Subject: fs, epoll: add procfs fdinfo helper

This allows us to print out eventpoll target file descriptor, events and
data, the /proc/pid/fdinfo/fd consists of

 | pos:	0
 | flags:	02
 | tfd:        5 events:       1d data: ffffffffffffffff enabled: 1

[avagin@: fix for unitialized ret variable]

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c          | 28 ++++++++++++++++++++++++++++
 fs/proc/array.c         |  2 +-
 fs/signalfd.c           | 18 ++++++++++++++++++
 include/linux/proc_fs.h |  3 +++
 4 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cd96649bfe62..be56b21435f8 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -38,6 +38,8 @@
 #include <asm/io.h>
 #include <asm/mman.h>
 #include <linux/atomic.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 /*
  * LOCKING:
@@ -783,8 +785,34 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
 	return pollflags != -1 ? pollflags : 0;
 }
 
+#ifdef CONFIG_PROC_FS
+static int ep_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct eventpoll *ep = f->private_data;
+	struct rb_node *rbp;
+	int ret = 0;
+
+	mutex_lock(&ep->mtx);
+	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
+
+		ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+				 epi->ffd.fd, epi->event.events,
+				 (long long)epi->event.data);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&ep->mtx);
+
+	return ret;
+}
+#endif
+
 /* File callbacks that implement the eventpoll file behaviour */
 static const struct file_operations eventpoll_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= ep_show_fdinfo,
+#endif
 	.release	= ep_eventpoll_release,
 	.poll		= ep_eventpoll_poll,
 	.llseek		= noop_llseek,
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 439544fec388..060a56a91278 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -220,7 +220,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	seq_putc(m, '\n');
 }
 
-static void render_sigset_t(struct seq_file *m, const char *header,
+void render_sigset_t(struct seq_file *m, const char *header,
 				sigset_t *set)
 {
 	int i;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8bee4e570911..b53486961735 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -29,6 +29,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
+#include <linux/proc_fs.h>
 
 void signalfd_cleanup(struct sighand_struct *sighand)
 {
@@ -227,7 +228,24 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
 	return total ? total: ret;
 }
 
+#ifdef CONFIG_PROC_FS
+static int signalfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+	struct signalfd_ctx *ctx = f->private_data;
+	sigset_t sigmask;
+
+	sigmask = ctx->sigmask;
+	signotset(&sigmask);
+	render_sigset_t(m, "sigmask:\t", &sigmask);
+
+	return 0;
+}
+#endif
+
 static const struct file_operations signalfd_fops = {
+#ifdef CONFIG_PROC_FS
+	.show_fdinfo	= signalfd_show_fdinfo,
+#endif
 	.release	= signalfd_release,
 	.poll		= signalfd_poll,
 	.read		= signalfd_read,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3fd2e871ff1b..b4f70f0a9a48 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -290,4 +290,7 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde)
 	return pde->parent->data;
 }
 
+#include <linux/signal.h>
+
+void render_sigset_t(struct seq_file *m, const char *header, sigset_t *set);
 #endif /* _LINUX_PROC_FS_H */
-- 
cgit v1.2.3


From 711c7bf9914060d7aaf3c1a15f38094a5d5e748f Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Mon, 17 Dec 2012 16:05:08 -0800
Subject: fs, exportfs: add exportfs_encode_inode_fh() helper

We will need this helper in the next patch to provide a file handle for
inotify marks in /proc/pid/fdinfo output.

The patch is rather providing the way to use inodes directly when dentry
is not available (like in case of inotify system).

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exportfs/expfs.c      | 19 ++++++++++++++-----
 include/linux/exportfs.h |  2 ++
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 10f137381ac7..606bb074c501 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -341,10 +341,21 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
 	return type;
 }
 
+int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
+			     int *max_len, struct inode *parent)
+{
+	const struct export_operations *nop = inode->i_sb->s_export_op;
+
+	if (nop && nop->encode_fh)
+		return nop->encode_fh(inode, fid->raw, max_len, parent);
+
+	return export_encode_fh(inode, fid, max_len, parent);
+}
+EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
+
 int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
 		int connectable)
 {
-	const struct export_operations *nop = dentry->d_sb->s_export_op;
 	int error;
 	struct dentry *p = NULL;
 	struct inode *inode = dentry->d_inode, *parent = NULL;
@@ -357,10 +368,8 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
 		 */
 		parent = p->d_inode;
 	}
-	if (nop && nop->encode_fh)
-		error = nop->encode_fh(inode, fid->raw, max_len, parent);
-	else
-		error = export_encode_fh(inode, fid, max_len, parent);
+
+	error = exportfs_encode_inode_fh(inode, fid, max_len, parent);
 	dput(p);
 
 	return error;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 12291a7ee275..c7e6b6392ab8 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -177,6 +177,8 @@ struct export_operations {
 	int (*commit_metadata)(struct inode *inode);
 };
 
+extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
+				    int *max_len, struct inode *parent);
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
 	int *max_len, int connectable);
 extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
-- 
cgit v1.2.3


From ebb351cf78ee6bf3262e6b4b6906f456c05e6d5e Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Tue, 4 Dec 2012 15:21:53 +0100
Subject: llist/xen-blkfront: implement safe version of llist_for_each_entry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement a safe version of llist_for_each_entry, and use it in
blkif_free. Previously grants where freed while iterating the list,
which lead to dereferences when trying to fetch the next item.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Acked-by:  Andrew Morton <akpm@linux-foundation.org>
[v2: Move the llist_for_each_entry_safe in llist.h]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c |  3 ++-
 include/linux/llist.h        | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 96e9b00db081..cfdb033c7107 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -792,6 +792,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 {
 	struct llist_node *all_gnts;
 	struct grant *persistent_gnt;
+	struct llist_node *n;
 
 	/* Prevent new requests being issued until we fix things up. */
 	spin_lock_irq(&info->io_lock);
@@ -804,7 +805,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	/* Remove all persistent grants */
 	if (info->persistent_gnts_c) {
 		all_gnts = llist_del_all(&info->persistent_gnts);
-		llist_for_each_entry(persistent_gnt, all_gnts, node) {
+		llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
 			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
 			__free_page(pfn_to_page(persistent_gnt->pfn));
 			kfree(persistent_gnt);
diff --git a/include/linux/llist.h b/include/linux/llist.h
index a5199f6d0e82..d0ab98f73d38 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -124,6 +124,31 @@ static inline void init_llist_head(struct llist_head *list)
 	     &(pos)->member != NULL;					\
 	     (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
 
+/**
+ * llist_for_each_entry_safe - iterate safely against remove over some entries
+ * of lock-less list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as a temporary storage.
+ * @node:	the fist entry of deleted list entries.
+ * @member:	the name of the llist_node with the struct.
+ *
+ * In general, some entries of the lock-less list can be traversed
+ * safely only after being removed from list, so start with an entry
+ * instead of list head. This variant allows removal of entries
+ * as we iterate.
+ *
+ * If being used on entries deleted from lock-less list directly, the
+ * traverse order is from the newest to the oldest added entry.  If
+ * you want to traverse from the oldest to the newest, you must
+ * reverse the order by yourself before traversing.
+ */
+#define llist_for_each_entry_safe(pos, n, node, member)		\
+	for ((pos) = llist_entry((node), typeof(*(pos)), member),	\
+	     (n) = (pos)->member.next;					\
+	     &(pos)->member != NULL;					\
+	     (pos) = llist_entry(n, typeof(*(pos)), member),		\
+	     (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL)
+
 /**
  * llist_empty - tests whether a lock-less list is empty
  * @head:	the list to test
-- 
cgit v1.2.3


From afc59400d6c65bad66d4ad0b2daf879cbff8e23e Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 10 Dec 2012 18:01:37 -0500
Subject: nfsd4: cleanup: replace rq_resused count by rq_next_page pointer

It may be a matter of personal taste, but I find this makes the code
clearer.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs2acl.c                       |  2 +-
 fs/nfsd/nfs3acl.c                       |  2 +-
 fs/nfsd/nfs3proc.c                      |  6 +++---
 fs/nfsd/nfs3xdr.c                       | 33 ++++++++++++++++-----------------
 fs/nfsd/nfs4xdr.c                       | 24 ++++++++++++------------
 fs/nfsd/nfsxdr.c                        | 11 ++++++-----
 fs/nfsd/vfs.c                           | 18 ++++++++----------
 include/linux/sunrpc/svc.h              |  6 +++---
 net/sunrpc/svc.c                        |  2 +-
 net/sunrpc/svcsock.c                    |  2 ++
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 10 +++++-----
 net/sunrpc/xprtrdma/svc_rdma_sendto.c   |  4 +++-
 12 files changed, 61 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index b314888825d5..9170861c804a 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -253,7 +253,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 		(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
 		(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 	while (w > 0) {
-		if (!rqstp->rq_respages[rqstp->rq_resused++])
+		if (!*(rqstp->rq_next_page++))
 			return 0;
 		w -= PAGE_SIZE;
 	}
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index a596e9d987e4..9cbc1a841f87 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -184,7 +184,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 			(resp->mask & NFS_ACL)   ? resp->acl_access  : NULL,
 			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 		while (w > 0) {
-			if (!rqstp->rq_respages[rqstp->rq_resused++])
+			if (!*(rqstp->rq_next_page++))
 				return 0;
 			w -= PAGE_SIZE;
 		}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 97d90d1c8608..1fc02dfdc5c4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -460,7 +460,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 	__be32	nfserr;
 	int	count = 0;
 	loff_t	offset;
-	int	i;
+	struct page **p;
 	caddr_t	page_addr = NULL;
 
 	dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
@@ -484,8 +484,8 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 				     &resp->common,
 				     nfs3svc_encode_entry_plus);
 	memcpy(resp->verf, argp->verf, 8);
-	for (i=1; i<rqstp->rq_resused ; i++) {
-		page_addr = page_address(rqstp->rq_respages[i]);
+	for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+		page_addr = page_address(*p);
 
 		if (((caddr_t)resp->buffer >= page_addr) &&
 		    ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2b8618de6c27..324c0baf7cda 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -325,7 +325,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readargs *args)
 {
 	unsigned int len;
-	int v,pn;
+	int v;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!(p = decode_fh(p, &args->fh)))
@@ -340,8 +340,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	/* set up the kvec */
 	v=0;
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
 		rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
 		len -= rqstp->rq_vec[v].iov_len;
 		v++;
@@ -463,8 +464,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 	len = ntohl(*p++);
 	if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
 		return 0;
-	args->tname = new =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->tname = new = page_address(*(rqstp->rq_next_page++));
 	args->tlen = len;
 	/* first copy and check from the first page */
 	old = (char*)p;
@@ -535,8 +535,7 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
-	args->buffer =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -567,8 +566,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	if (args->count > PAGE_SIZE)
 		args->count = PAGE_SIZE;
 
-	args->buffer =
-		page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -577,7 +575,7 @@ int
 nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
-	int len, pn;
+	int len;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!(p = decode_fh(p, &args->fh)))
@@ -592,9 +590,9 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 	args->count = len;
 
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
+		struct page *p = *(rqstp->rq_next_page++);
 		if (!args->buffer)
-			args->buffer = page_address(rqstp->rq_respages[pn]);
+			args->buffer = page_address(p);
 		len -= PAGE_SIZE;
 	}
 
@@ -880,7 +878,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 		       					common);
 	__be32		*p = cd->buffer;
 	caddr_t		curr_page_addr = NULL;
-	int		pn;		/* current page number */
+	struct page **	page;
 	int		slen;		/* string (name) length */
 	int		elen;		/* estimated entry length in words */
 	int		num_entry_words = 0;	/* actual number of words */
@@ -917,8 +915,9 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 	}
 
 	/* determine which page in rq_respages[] we are currently filling */
-	for (pn=1; pn < cd->rqstp->rq_resused; pn++) {
-		curr_page_addr = page_address(cd->rqstp->rq_respages[pn]);
+	for (page = cd->rqstp->rq_respages + 1;
+				page < cd->rqstp->rq_next_page; page++) {
+		curr_page_addr = page_address(*page);
 
 		if (((caddr_t)cd->buffer >= curr_page_addr) &&
 		    ((caddr_t)cd->buffer <  curr_page_addr + PAGE_SIZE))
@@ -933,14 +932,14 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
 		if (plus)
 			p = encode_entryplus_baggage(cd, p, name, namlen);
 		num_entry_words = p - cd->buffer;
-	} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
+	} else if (*(page+1) != NULL) {
 		/* temporarily encode entry into next page, then move back to
 		 * current and next page in rq_respages[] */
 		__be32 *p1, *tmp;
 		int len1, len2;
 
 		/* grab next page for temporary storage of entry */
-		p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]);
+		p1 = tmp = page_address(*(page+1));
 
 		p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d7a3be5ab777..0dc11586682f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2906,7 +2906,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_read *read)
 {
 	u32 eof;
-	int v, pn;
+	int v;
+	struct page *page;
 	unsigned long maxcount; 
 	long len;
 	__be32 *p;
@@ -2925,16 +2926,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	len = maxcount;
 	v = 0;
 	while (len > 0) {
-		pn = resp->rqstp->rq_resused;
-		if (!resp->rqstp->rq_respages[pn]) { /* ran out of pages */
+		page = *(resp->rqstp->rq_next_page);
+		if (!page) { /* ran out of pages */
 			maxcount -= len;
 			break;
 		}
-		resp->rqstp->rq_vec[v].iov_base =
-			page_address(resp->rqstp->rq_respages[pn]);
+		resp->rqstp->rq_vec[v].iov_base = page_address(page);
 		resp->rqstp->rq_vec[v].iov_len =
 			len < PAGE_SIZE ? len : PAGE_SIZE;
-		resp->rqstp->rq_resused++;
+		resp->rqstp->rq_next_page++;
 		v++;
 		len -= PAGE_SIZE;
 	}
@@ -2980,10 +2980,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
-	if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+	if (!*resp->rqstp->rq_next_page)
 		return nfserr_resource;
 
-	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+	page = page_address(*(resp->rqstp->rq_next_page++));
 
 	maxcount = PAGE_SIZE;
 	RESERVE_SPACE(4);
@@ -3031,7 +3031,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 		return nfserr;
 	if (resp->xbuf->page_len)
 		return nfserr_resource;
-	if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+	if (!*resp->rqstp->rq_next_page)
 		return nfserr_resource;
 
 	RESERVE_SPACE(NFS4_VERIFIER_SIZE);
@@ -3059,7 +3059,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 		goto err_no_verf;
 	}
 
-	page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
+	page = page_address(*(resp->rqstp->rq_next_page++));
 	readdir->common.err = 0;
 	readdir->buflen = maxcount;
 	readdir->buffer = page;
@@ -3082,8 +3082,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 	p = readdir->buffer;
 	*p++ = 0;	/* no more entries */
 	*p++ = htonl(readdir->common.err == nfserr_eof);
-	resp->xbuf->page_len = ((char*)p) - (char*)page_address(
-		resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+	resp->xbuf->page_len = ((char*)p) -
+		(char*)page_address(*(resp->rqstp->rq_next_page-1));
 
 	/* Use rest of head for padding and remaining ops: */
 	resp->xbuf->tail[0].iov_base = tailbase;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 65ec595e2226..979b42106979 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -246,7 +246,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readargs *args)
 {
 	unsigned int len;
-	int v,pn;
+	int v;
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
 
@@ -262,8 +262,9 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 	 */
 	v=0;
 	while (len > 0) {
-		pn = rqstp->rq_resused++;
-		rqstp->rq_vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+		struct page *p = *(rqstp->rq_next_page++);
+
+		rqstp->rq_vec[v].iov_base = page_address(p);
 		rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
 		len -= rqstp->rq_vec[v].iov_len;
 		v++;
@@ -355,7 +356,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
-	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
@@ -396,7 +397,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 	if (args->count > PAGE_SIZE)
 		args->count = PAGE_SIZE;
 
-	args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused++]);
+	args->buffer = page_address(*(rqstp->rq_next_page++));
 
 	return xdr_argsize_check(rqstp, p);
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b31e46eeb026..f0a6d88d7fff 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -886,7 +886,7 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		  struct splice_desc *sd)
 {
 	struct svc_rqst *rqstp = sd->u.data;
-	struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+	struct page **pp = rqstp->rq_next_page;
 	struct page *page = buf->page;
 	size_t size;
 
@@ -894,17 +894,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 
 	if (rqstp->rq_res.page_len == 0) {
 		get_page(page);
-		put_page(*pp);
-		*pp = page;
-		rqstp->rq_resused++;
+		put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
 		rqstp->rq_res.page_base = buf->offset;
 		rqstp->rq_res.page_len = size;
 	} else if (page != pp[-1]) {
 		get_page(page);
-		if (*pp)
-			put_page(*pp);
-		*pp = page;
-		rqstp->rq_resused++;
+		if (*rqstp->rq_next_page)
+			put_page(*rqstp->rq_next_page);
+		*(rqstp->rq_next_page++) = page;
 		rqstp->rq_res.page_len += size;
 	} else
 		rqstp->rq_res.page_len += size;
@@ -936,8 +934,8 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 			.u.data		= rqstp,
 		};
 
-		WARN_ON_ONCE(rqstp->rq_resused != 1);
-		rqstp->rq_resused = 1;
+		WARN_ON_ONCE(rqstp->rq_next_page != rqstp->rq_respages + 1);
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
 		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
 	} else {
 		oldfs = get_fs();
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d83db800fe02..676ddf53b3ee 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -243,6 +243,7 @@ struct svc_rqst {
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	int			rq_resused;	/* number of pages used for result */
+	struct page *		*rq_next_page; /* next reply page to use */
 
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 
@@ -338,9 +339,8 @@ xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p)
 
 static inline void svc_free_res_pages(struct svc_rqst *rqstp)
 {
-	while (rqstp->rq_resused) {
-		struct page **pp = (rqstp->rq_respages +
-				    --rqstp->rq_resused);
+	while (rqstp->rq_next_page != rqstp->rq_respages) {
+		struct page **pp = --rqstp->rq_next_page;
 		if (*pp) {
 			put_page(*pp);
 			*pp = NULL;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 529400d59755..c6abf1a6ba95 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1297,7 +1297,7 @@ svc_process(struct svc_rqst *rqstp)
 	 * Setup response xdr_buf.
 	 * Initially it has just one page
 	 */
-	rqstp->rq_resused = 1;
+	rqstp->rq_next_page = &rqstp->rq_respages[1];
 	resv->iov_base = page_address(rqstp->rq_respages[0]);
 	resv->iov_len = 0;
 	rqstp->rq_res.pages = rqstp->rq_respages + 1;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d8e5adfeac30..dcd5669c5154 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -601,6 +601,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 		rqstp->rq_respages = rqstp->rq_pages + 1 +
 			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
 	}
+	rqstp->rq_next_page = rqstp->rq_respages+1;
 
 	if (serv->sv_stats)
 		serv->sv_stats->netudpcnt++;
@@ -1066,6 +1067,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 						svsk->sk_datalen + want);
 
 	rqstp->rq_respages = &rqstp->rq_pages[pnum];
+	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
 	/* Now receive data */
 	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 41cb63b623df..0ce75524ed21 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -521,11 +521,11 @@ next_sge:
 		rqstp->rq_pages[ch_no] = NULL;
 
 	/*
-	 * Detach res pages. svc_release must see a resused count of
-	 * zero or it will attempt to put them.
+	 * Detach res pages. If svc_release sees any it will attempt to
+	 * put them.
 	 */
-	while (rqstp->rq_resused)
-		rqstp->rq_respages[--rqstp->rq_resused] = NULL;
+	while (rqstp->rq_next_page != rqstp->rq_respages)
+		*(--rqstp->rq_next_page) = NULL;
 
 	return err;
 }
@@ -550,7 +550,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
 
 	/* rq_respages starts after the last arg page */
 	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-	rqstp->rq_resused = 0;
+	rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
 
 	/* Rebuild rq_arg head and tail. */
 	rqstp->rq_arg.head[0] = head->arg.head[0];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 42eb7ba0b903..c1d124dc772b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -548,6 +548,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	int sge_no;
 	int sge_bytes;
 	int page_no;
+	int pages;
 	int ret;
 
 	/* Post a recv buffer to handle another request. */
@@ -611,7 +612,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	 * respages array. They are our pages until the I/O
 	 * completes.
 	 */
-	for (page_no = 0; page_no < rqstp->rq_resused; page_no++) {
+	pages = rqstp->rq_next_page - rqstp->rq_respages;
+	for (page_no = 0; page_no < pages; page_no++) {
 		ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
 		ctxt->count++;
 		rqstp->rq_respages[page_no] = NULL;
-- 
cgit v1.2.3


From 06ca287dbac9cc19d04ac2901b8c4882c03795ff Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 16 Oct 2012 23:56:14 +1030
Subject: virtio: move queue_index and num_free fields into core struct
 virtqueue.

They're generic concepts, so hoist them.  This also avoids accessor
functions (though kept around for merge with DaveM's net tree).

This goes even further than Jason Wang's 17bb6d4088 patch
("virtio-ring: move queue_index to vring_virtqueue") which moved the
queue_index from the specific transport.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_mmio.c |  4 ++--
 drivers/virtio/virtio_pci.c  |  6 ++----
 drivers/virtio/virtio_ring.c | 34 +++++++++++-----------------------
 include/linux/virtio.h       | 14 +++++++++++++-
 4 files changed, 28 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 6b1b7e184939..5a0e1d32ce13 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -225,7 +225,7 @@ static void vm_notify(struct virtqueue *vq)
 
 	/* We write the queue's selector into the notification register to
 	 * signal the other end */
-	writel(virtqueue_get_queue_index(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
+	writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
 }
 
 /* Notify all virtqueues on an interrupt. */
@@ -266,7 +266,7 @@ static void vm_del_vq(struct virtqueue *vq)
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
 	struct virtio_mmio_vq_info *info = vq->priv;
 	unsigned long flags, size;
-	unsigned int index = virtqueue_get_queue_index(vq);
+	unsigned int index = vq->index;
 
 	spin_lock_irqsave(&vm_dev->lock, flags);
 	list_del(&info->node);
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index b59237c1d540..e3ecc94591ad 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -203,8 +203,7 @@ static void vp_notify(struct virtqueue *vq)
 
 	/* we write the queue's selector into the notification register to
 	 * signal the other end */
-	iowrite16(virtqueue_get_queue_index(vq),
-		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
 /* Handle a configuration change: Tell driver if it wants to know. */
@@ -479,8 +478,7 @@ static void vp_del_vq(struct virtqueue *vq)
 	list_del(&info->node);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
-	iowrite16(virtqueue_get_queue_index(vq),
-		vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
 	if (vp_dev->msix_enabled) {
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 286c30cb393d..33a4ce009bcc 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -93,8 +93,6 @@ struct vring_virtqueue
 	/* Host publishes avail event idx */
 	bool event;
 
-	/* Number of free buffers */
-	unsigned int num_free;
 	/* Head of free buffer list. */
 	unsigned int free_head;
 	/* Number we've added since last sync. */
@@ -106,9 +104,6 @@ struct vring_virtqueue
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	void (*notify)(struct virtqueue *vq);
 
-	/* Index of the queue */
-	int queue_index;
-
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
 	unsigned int in_use;
@@ -167,7 +162,7 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 	desc[i-1].next = 0;
 
 	/* We're about to use a buffer */
-	vq->num_free--;
+	vq->vq.num_free--;
 
 	/* Use a single buffer which doesn't continue */
 	head = vq->free_head;
@@ -181,13 +176,6 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 	return head;
 }
 
-int virtqueue_get_queue_index(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-	return vq->queue_index;
-}
-EXPORT_SYMBOL_GPL(virtqueue_get_queue_index);
-
 /**
  * virtqueue_add_buf - expose buffer to other end
  * @vq: the struct virtqueue we're talking about.
@@ -235,7 +223,7 @@ int virtqueue_add_buf(struct virtqueue *_vq,
 
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && (out + in) > 1 && vq->num_free) {
+	if (vq->indirect && (out + in) > 1 && vq->vq.num_free) {
 		head = vring_add_indirect(vq, sg, out, in, gfp);
 		if (likely(head >= 0))
 			goto add_head;
@@ -244,9 +232,9 @@ int virtqueue_add_buf(struct virtqueue *_vq,
 	BUG_ON(out + in > vq->vring.num);
 	BUG_ON(out + in == 0);
 
-	if (vq->num_free < out + in) {
+	if (vq->vq.num_free < out + in) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
-			 out + in, vq->num_free);
+			 out + in, vq->vq.num_free);
 		/* FIXME: for historical reasons, we force a notify here if
 		 * there are outgoing parts to the buffer.  Presumably the
 		 * host should service the ring ASAP. */
@@ -257,7 +245,7 @@ int virtqueue_add_buf(struct virtqueue *_vq,
 	}
 
 	/* We're about to use some buffers from the free list. */
-	vq->num_free -= out + in;
+	vq->vq.num_free -= out + in;
 
 	head = vq->free_head;
 	for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
@@ -303,7 +291,7 @@ add_head:
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
 
-	return vq->num_free;
+	return vq->vq.num_free;
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_buf);
 
@@ -400,13 +388,13 @@ static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 
 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 		i = vq->vring.desc[i].next;
-		vq->num_free++;
+		vq->vq.num_free++;
 	}
 
 	vq->vring.desc[i].next = vq->free_head;
 	vq->free_head = head;
 	/* Plus final descriptor */
-	vq->num_free++;
+	vq->vq.num_free++;
 }
 
 static inline bool more_used(const struct vring_virtqueue *vq)
@@ -606,7 +594,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 		return buf;
 	}
 	/* That should have freed everything. */
-	BUG_ON(vq->num_free != vq->vring.num);
+	BUG_ON(vq->vq.num_free != vq->vring.num);
 
 	END_USE(vq);
 	return NULL;
@@ -660,12 +648,13 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 	vq->vq.callback = callback;
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
+	vq->vq.num_free = num;
+	vq->vq.index = index;
 	vq->notify = notify;
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
-	vq->queue_index = index;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
@@ -680,7 +669,6 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 
 	/* Put everything in free lists. */
-	vq->num_free = num;
 	vq->free_head = 0;
 	for (i = 0; i < num-1; i++) {
 		vq->vring.desc[i].next = i+1;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 533b1157f22e..4b8f17d90458 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -16,12 +16,20 @@
  * @name: the name of this virtqueue (mainly for debugging)
  * @vdev: the virtio device this queue was created for.
  * @priv: a pointer for the virtqueue implementation to use.
+ * @index: the zero-based ordinal number for this queue.
+ * @num_free: number of elements we expect to be able to fit.
+ *
+ * A note on @num_free: with indirect buffers, each buffer needs one
+ * element in the queue, otherwise a buffer will need one element per
+ * sg element.
  */
 struct virtqueue {
 	struct list_head list;
 	void (*callback)(struct virtqueue *vq);
 	const char *name;
 	struct virtio_device *vdev;
+	unsigned int index;
+	unsigned int num_free;
 	void *priv;
 };
 
@@ -50,7 +58,11 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 
 unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
-int virtqueue_get_queue_index(struct virtqueue *vq);
+/* FIXME: Obsolete accessor, but required for virtio_net merge. */
+static inline unsigned int virtqueue_get_queue_index(struct virtqueue *vq)
+{
+	return vq->index;
+}
 
 /**
  * virtio_device - representation of a device using virtio
-- 
cgit v1.2.3


From 9bffdca8c64a72ac54c47a552734ab457bc720d4 Mon Sep 17 00:00:00 2001
From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Date: Tue, 11 Dec 2012 11:04:50 +1030
Subject: virtio: use dev_to_virtio wrapper in virtio

Use dev_to_virtio wrapper in virtio to make code clearly.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c | 19 +++++++++----------
 include/linux/virtio.h  |  6 +++++-
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 1e8659ca27ef..1346ae8e14f3 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -10,33 +10,32 @@ static DEFINE_IDA(virtio_index_ida);
 static ssize_t device_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%04x\n", dev->id.device);
 }
 static ssize_t vendor_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%04x\n", dev->id.vendor);
 }
 static ssize_t status_show(struct device *_d,
 			   struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "0x%08x\n", dev->config->get_status(dev));
 }
 static ssize_t modalias_show(struct device *_d,
 			     struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
-
+	struct virtio_device *dev = dev_to_virtio(_d);
 	return sprintf(buf, "virtio:d%08Xv%08X\n",
 		       dev->id.device, dev->id.vendor);
 }
 static ssize_t features_show(struct device *_d,
 			     struct device_attribute *attr, char *buf)
 {
-	struct virtio_device *dev = container_of(_d, struct virtio_device, dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	unsigned int i;
 	ssize_t len = 0;
 
@@ -71,7 +70,7 @@ static inline int virtio_id_match(const struct virtio_device *dev,
 static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
 {
 	unsigned int i;
-	struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_dv);
 	const struct virtio_device_id *ids;
 
 	ids = container_of(_dr, struct virtio_driver, driver)->id_table;
@@ -83,7 +82,7 @@ static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
 
 static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
 {
-	struct virtio_device *dev = container_of(_dv,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_dv);
 
 	return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
 			      dev->id.device, dev->id.vendor);
@@ -111,7 +110,7 @@ EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
 static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	struct virtio_driver *drv = container_of(dev->dev.driver,
 						 struct virtio_driver, driver);
 	u32 device_features;
@@ -152,7 +151,7 @@ static int virtio_dev_probe(struct device *_d)
 
 static int virtio_dev_remove(struct device *_d)
 {
-	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
+	struct virtio_device *dev = dev_to_virtio(_d);
 	struct virtio_driver *drv = container_of(dev->dev.driver,
 						 struct virtio_driver, driver);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4b8f17d90458..5f1f80c76468 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -85,7 +85,11 @@ struct virtio_device {
 	void *priv;
 };
 
-#define dev_to_virtio(dev) container_of(dev, struct virtio_device, dev)
+static inline struct virtio_device *dev_to_virtio(struct device *_dev)
+{
+	return container_of(_dev, struct virtio_device, dev);
+}
+
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
-- 
cgit v1.2.3


From 9a2bdcc85d28506d4e5d4a9618fb133a3f40945d Mon Sep 17 00:00:00 2001
From: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Date: Mon, 10 Dec 2012 16:38:33 +0800
Subject: virtio: add drv_to_virtio to make code clearly

Add drv_to_virtio wrapper to get virtio_driver from device_driver.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c | 11 ++++-------
 include/linux/virtio.h  |  5 +++++
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 1346ae8e14f3..1c01ac3fad08 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -73,7 +73,7 @@ static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
 	struct virtio_device *dev = dev_to_virtio(_dv);
 	const struct virtio_device_id *ids;
 
-	ids = container_of(_dr, struct virtio_driver, driver)->id_table;
+	ids = drv_to_virtio(_dr)->id_table;
 	for (i = 0; ids[i].device; i++)
 		if (virtio_id_match(dev, &ids[i]))
 			return 1;
@@ -97,8 +97,7 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
 					 unsigned int fbit)
 {
 	unsigned int i;
-	struct virtio_driver *drv = container_of(vdev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 
 	for (i = 0; i < drv->feature_table_size; i++)
 		if (drv->feature_table[i] == fbit)
@@ -111,8 +110,7 @@ static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
 	struct virtio_device *dev = dev_to_virtio(_d);
-	struct virtio_driver *drv = container_of(dev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 	u32 device_features;
 
 	/* We have a driver! */
@@ -152,8 +150,7 @@ static int virtio_dev_probe(struct device *_d)
 static int virtio_dev_remove(struct device *_d)
 {
 	struct virtio_device *dev = dev_to_virtio(_d);
-	struct virtio_driver *drv = container_of(dev->dev.driver,
-						 struct virtio_driver, driver);
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 
 	drv->remove(dev);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 5f1f80c76468..cf8adb1f5b2c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -119,6 +119,11 @@ struct virtio_driver {
 #endif
 };
 
+static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
+{
+	return container_of(drv, struct virtio_driver, driver);
+}
+
 int register_virtio_driver(struct virtio_driver *drv);
 void unregister_virtio_driver(struct virtio_driver *drv);
 #endif /* _LINUX_VIRTIO_H */
-- 
cgit v1.2.3


From 7a64bf05b2a6fe3703062d13d389e3eb904741c6 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:21:51 -0800
Subject: mm: add a __GFP_KMEMCG flag

This flag is used to indicate to the callees that this allocation is a
kernel allocation in process context, and should be accounted to current's
memcg.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h             | 2 ++
 include/trace/events/gfpflags.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f74856e17e48..643c9a6f7f34 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,6 +30,7 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
+#define ___GFP_KMEMCG		0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
@@ -89,6 +90,7 @@ struct vm_area_struct;
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
+#define __GFP_KMEMCG	((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
 /*
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index d6fd8e5b14b7..1eddbf1557f2 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,6 +34,7 @@
 	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
 	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
+	{(unsigned long)__GFP_KMEMCG,		"GFP_KMEMCG"},		\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
 	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
-- 
cgit v1.2.3


From 7ae1e1d0f8ac2927ed7e3ca6d15e42d485903459 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:21:56 -0800
Subject: memcg: kmem controller infrastructure

Introduce infrastructure for tracking kernel memory pages to a given
memcg.  This will happen whenever the caller includes the flag
__GFP_KMEMCG flag, and the task belong to a memcg other than the root.

In memcontrol.h those functions are wrapped in inline acessors.  The idea
is to later on, patch those with static branches, so we don't incur any
overhead when no mem cgroups with limited kmem are being used.

Users of this functionality shall interact with the memcg core code
through the following functions:

memcg_kmem_newpage_charge: will return true if the group can handle the
                           allocation. At this point, struct page is not
                           yet allocated.

memcg_kmem_commit_charge: will either revert the charge, if struct page
                          allocation failed, or embed memcg information
                          into page_cgroup.

memcg_kmem_uncharge_page: called at free time, will revert the charge.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 110 +++++++++++++++++++++++++++++
 mm/memcontrol.c            | 170 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 280 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e98a74c0c9c0..afa2ad40457e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -21,6 +21,7 @@
 #define _LINUX_MEMCONTROL_H
 #include <linux/cgroup.h>
 #include <linux/vm_event_item.h>
+#include <linux/hardirq.h>
 
 struct mem_cgroup;
 struct page_cgroup;
@@ -414,5 +415,114 @@ static inline void sock_release_memcg(struct sock *sk)
 {
 }
 #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool memcg_kmem_enabled(void)
+{
+	return true;
+}
+
+/*
+ * In general, we'll do everything in our power to not incur in any overhead
+ * for non-memcg users for the kmem functions. Not even a function call, if we
+ * can avoid it.
+ *
+ * Therefore, we'll inline all those functions so that in the best case, we'll
+ * see that kmemcg is off for everybody and proceed quickly.  If it is on,
+ * we'll still do most of the flag checking inline. We check a lot of
+ * conditions, but because they are pretty simple, they are expected to be
+ * fast.
+ */
+bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg,
+					int order);
+void __memcg_kmem_commit_charge(struct page *page,
+				       struct mem_cgroup *memcg, int order);
+void __memcg_kmem_uncharge_pages(struct page *page, int order);
+
+/**
+ * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
+ * @gfp: the gfp allocation flags.
+ * @memcg: a pointer to the memcg this was charged against.
+ * @order: allocation order.
+ *
+ * returns true if the memcg where the current task belongs can hold this
+ * allocation.
+ *
+ * We return true automatically if this allocation is not to be accounted to
+ * any memcg.
+ */
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	if (!memcg_kmem_enabled())
+		return true;
+
+	/*
+	 * __GFP_NOFAIL allocations will move on even if charging is not
+	 * possible. Therefore we don't even try, and have this allocation
+	 * unaccounted. We could in theory charge it with
+	 * res_counter_charge_nofail, but we hope those allocations are rare,
+	 * and won't be worth the trouble.
+	 */
+	if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+		return true;
+	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+		return true;
+
+	/* If the test is dying, just let it go. */
+	if (unlikely(fatal_signal_pending(current)))
+		return true;
+
+	return __memcg_kmem_newpage_charge(gfp, memcg, order);
+}
+
+/**
+ * memcg_kmem_uncharge_pages: uncharge pages from memcg
+ * @page: pointer to struct page being freed
+ * @order: allocation order.
+ *
+ * there is no need to specify memcg here, since it is embedded in page_cgroup
+ */
+static inline void
+memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+	if (memcg_kmem_enabled())
+		__memcg_kmem_uncharge_pages(page, order);
+}
+
+/**
+ * memcg_kmem_commit_charge: embeds correct memcg in a page
+ * @page: pointer to struct page recently allocated
+ * @memcg: the memcg structure we charged against
+ * @order: allocation order.
+ *
+ * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
+ * failure of the allocation. if @page is NULL, this function will revert the
+ * charges. Otherwise, it will commit the memcg given by @memcg to the
+ * corresponding page_cgroup.
+ */
+static inline void
+memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+{
+	if (memcg_kmem_enabled() && memcg)
+		__memcg_kmem_commit_charge(page, memcg, order);
+}
+
+#else
+static inline bool
+memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
+{
+	return true;
+}
+
+static inline void memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+}
+
+static inline void
+memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bba1cb4bbb82..b9afa060b8d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -10,6 +10,10 @@
  * Copyright (C) 2009 Nokia Corporation
  * Author: Kirill A. Shutemov
  *
+ * Kernel Memory Controller
+ * Copyright (C) 2012 Parallels Inc. and Google Inc.
+ * Authors: Glauber Costa and Suleiman Souhlal
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -2661,6 +2665,172 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 	memcg_check_events(memcg, page);
 }
 
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
+{
+	return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
+		(memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
+}
+
+static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
+{
+	struct res_counter *fail_res;
+	struct mem_cgroup *_memcg;
+	int ret = 0;
+	bool may_oom;
+
+	ret = res_counter_charge(&memcg->kmem, size, &fail_res);
+	if (ret)
+		return ret;
+
+	/*
+	 * Conditions under which we can wait for the oom_killer. Those are
+	 * the same conditions tested by the core page allocator
+	 */
+	may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
+
+	_memcg = memcg;
+	ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
+				      &_memcg, may_oom);
+
+	if (ret == -EINTR)  {
+		/*
+		 * __mem_cgroup_try_charge() chosed to bypass to root due to
+		 * OOM kill or fatal signal.  Since our only options are to
+		 * either fail the allocation or charge it to this cgroup, do
+		 * it as a temporary condition. But we can't fail. From a
+		 * kmem/slab perspective, the cache has already been selected,
+		 * by mem_cgroup_kmem_get_cache(), so it is too late to change
+		 * our minds.
+		 *
+		 * This condition will only trigger if the task entered
+		 * memcg_charge_kmem in a sane state, but was OOM-killed during
+		 * __mem_cgroup_try_charge() above. Tasks that were already
+		 * dying when the allocation triggers should have been already
+		 * directed to the root cgroup in memcontrol.h
+		 */
+		res_counter_charge_nofail(&memcg->res, size, &fail_res);
+		if (do_swap_account)
+			res_counter_charge_nofail(&memcg->memsw, size,
+						  &fail_res);
+		ret = 0;
+	} else if (ret)
+		res_counter_uncharge(&memcg->kmem, size);
+
+	return ret;
+}
+
+static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+{
+	res_counter_uncharge(&memcg->kmem, size);
+	res_counter_uncharge(&memcg->res, size);
+	if (do_swap_account)
+		res_counter_uncharge(&memcg->memsw, size);
+}
+
+/*
+ * We need to verify if the allocation against current->mm->owner's memcg is
+ * possible for the given order. But the page is not allocated yet, so we'll
+ * need a further commit step to do the final arrangements.
+ *
+ * It is possible for the task to switch cgroups in this mean time, so at
+ * commit time, we can't rely on task conversion any longer.  We'll then use
+ * the handle argument to return to the caller which cgroup we should commit
+ * against. We could also return the memcg directly and avoid the pointer
+ * passing, but a boolean return value gives better semantics considering
+ * the compiled-out case as well.
+ *
+ * Returning true means the allocation is possible.
+ */
+bool
+__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
+{
+	struct mem_cgroup *memcg;
+	int ret;
+
+	*_memcg = NULL;
+	memcg = try_get_mem_cgroup_from_mm(current->mm);
+
+	/*
+	 * very rare case described in mem_cgroup_from_task. Unfortunately there
+	 * isn't much we can do without complicating this too much, and it would
+	 * be gfp-dependent anyway. Just let it go
+	 */
+	if (unlikely(!memcg))
+		return true;
+
+	if (!memcg_can_account_kmem(memcg)) {
+		css_put(&memcg->css);
+		return true;
+	}
+
+	mem_cgroup_get(memcg);
+
+	ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
+	if (!ret)
+		*_memcg = memcg;
+	else
+		mem_cgroup_put(memcg);
+
+	css_put(&memcg->css);
+	return (ret == 0);
+}
+
+void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
+			      int order)
+{
+	struct page_cgroup *pc;
+
+	VM_BUG_ON(mem_cgroup_is_root(memcg));
+
+	/* The page allocation failed. Revert */
+	if (!page) {
+		memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+		mem_cgroup_put(memcg);
+		return;
+	}
+
+	pc = lookup_page_cgroup(page);
+	lock_page_cgroup(pc);
+	pc->mem_cgroup = memcg;
+	SetPageCgroupUsed(pc);
+	unlock_page_cgroup(pc);
+}
+
+void __memcg_kmem_uncharge_pages(struct page *page, int order)
+{
+	struct mem_cgroup *memcg = NULL;
+	struct page_cgroup *pc;
+
+
+	pc = lookup_page_cgroup(page);
+	/*
+	 * Fast unlocked return. Theoretically might have changed, have to
+	 * check again after locking.
+	 */
+	if (!PageCgroupUsed(pc))
+		return;
+
+	lock_page_cgroup(pc);
+	if (PageCgroupUsed(pc)) {
+		memcg = pc->mem_cgroup;
+		ClearPageCgroupUsed(pc);
+	}
+	unlock_page_cgroup(pc);
+
+	/*
+	 * We trust that only if there is a memcg associated with the page, it
+	 * is a valid allocation
+	 */
+	if (!memcg)
+		return;
+
+	VM_BUG_ON(mem_cgroup_is_root(memcg));
+	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+	mem_cgroup_put(memcg);
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
-- 
cgit v1.2.3


From 6a1a0d3b625a4091e7a0eb249aefc6a644385149 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:00 -0800
Subject: mm: allocate kernel pages to the right memcg

When a process tries to allocate a page with the __GFP_KMEMCG flag, the
page allocator will call the corresponding memcg functions to validate
the allocation.  Tasks in the root memcg can always proceed.

To avoid adding markers to the page - and a kmem flag that would
necessarily follow, as much as doing page_cgroup lookups for no reason,
whoever is marking its allocations with __GFP_KMEMCG flag is responsible
for telling the page allocator that this is such an allocation at
free_pages() time.  This is done by the invocation of
__free_accounted_pages() and free_accounted_pages().

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  3 +++
 mm/page_alloc.c     | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 643c9a6f7f34..0f615eb23d05 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -367,6 +367,9 @@ extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_cold_page(struct page *page, int cold);
 extern void free_hot_cold_page_list(struct list_head *list, int cold);
 
+extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
+extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 62496edbd8dd..2ad2ad168efe 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2612,6 +2612,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	int migratetype = allocflags_to_migratetype(gfp_mask);
 	unsigned int cpuset_mems_cookie;
 	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
+	struct mem_cgroup *memcg = NULL;
 
 	gfp_mask &= gfp_allowed_mask;
 
@@ -2630,6 +2631,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	if (unlikely(!zonelist->_zonerefs->zone))
 		return NULL;
 
+	/*
+	 * Will only have any effect when __GFP_KMEMCG is set.  This is
+	 * verified in the (always inline) callee
+	 */
+	if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+		return NULL;
+
 retry_cpuset:
 	cpuset_mems_cookie = get_mems_allowed();
 
@@ -2665,6 +2673,8 @@ out:
 	if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
 		goto retry_cpuset;
 
+	memcg_kmem_commit_charge(page, memcg, order);
+
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2717,6 +2727,31 @@ void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
+/*
+ * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
+ * pages allocated with __GFP_KMEMCG.
+ *
+ * Those pages are accounted to a particular memcg, embedded in the
+ * corresponding page_cgroup. To avoid adding a hit in the allocator to search
+ * for that information only to find out that it is NULL for users who have no
+ * interest in that whatsoever, we provide these functions.
+ *
+ * The caller knows better which flags it relies on.
+ */
+void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+{
+	memcg_kmem_uncharge_pages(page, order);
+	__free_pages(page, order);
+}
+
+void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+{
+	if (addr != 0) {
+		VM_BUG_ON(!virt_addr_valid((void *)addr));
+		__free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+	}
+}
+
 static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
 {
 	if (addr) {
-- 
cgit v1.2.3


From 50bdd430c20566b13d8bc59946184b08f5875de6 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:04 -0800
Subject: res_counter: return amount of charges after res_counter_uncharge()

It is useful to know how many charges are still left after a call to
res_counter_uncharge.  While it is possible to issue a res_counter_read
after uncharge, this can be racy.

If we need, for instance, to take some action when the counters drop down
to 0, only one of the callers should see it.  This is the same semantics
as the atomic variables in the kernel.

Since the current return value is void, we don't need to worry about
anything breaking due to this change: nobody relied on that, and only
users appearing from now on will be checking this value.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/resource_counter.txt |  7 ++++---
 include/linux/res_counter.h                | 12 +++++++-----
 kernel/res_counter.c                       | 20 +++++++++++++-------
 3 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index 0c4a344e78fa..c4d99ed0b418 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -83,16 +83,17 @@ to work with it.
 	res_counter->lock internally (it must be called with res_counter->lock
 	held). The force parameter indicates whether we can bypass the limit.
 
- e. void res_counter_uncharge[_locked]
+ e. u64 res_counter_uncharge[_locked]
 			(struct res_counter *rc, unsigned long val)
 
 	When a resource is released (freed) it should be de-accounted
 	from the resource counter it was accounted to.  This is called
-	"uncharging".
+	"uncharging". The return value of this function indicate the amount
+	of charges still present in the counter.
 
 	The _locked routines imply that the res_counter->lock is taken.
 
- f. void res_counter_uncharge_until
+ f. u64 res_counter_uncharge_until
 		(struct res_counter *rc, struct res_counter *top,
 		 unsinged long val)
 
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 6f54e40fa218..5ae8456d9670 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -125,14 +125,16 @@ int res_counter_charge_nofail(struct res_counter *counter,
  *
  * these calls check for usage underflow and show a warning on the console
  * _locked call expects the counter->lock to be taken
+ *
+ * returns the total charges still present in @counter.
  */
 
-void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
+u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
-void res_counter_uncharge_until(struct res_counter *counter,
-				struct res_counter *top,
-				unsigned long val);
+u64 res_counter_uncharge_until(struct res_counter *counter,
+			       struct res_counter *top,
+			       unsigned long val);
 /**
  * res_counter_margin - calculate chargeable space of a counter
  * @cnt: the counter
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 3920d593e63c..ff55247e7049 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -86,33 +86,39 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
 	return __res_counter_charge(counter, val, limit_fail_at, true);
 }
 
-void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
+u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 {
 	if (WARN_ON(counter->usage < val))
 		val = counter->usage;
 
 	counter->usage -= val;
+	return counter->usage;
 }
 
-void res_counter_uncharge_until(struct res_counter *counter,
-				struct res_counter *top,
-				unsigned long val)
+u64 res_counter_uncharge_until(struct res_counter *counter,
+			       struct res_counter *top,
+			       unsigned long val)
 {
 	unsigned long flags;
 	struct res_counter *c;
+	u64 ret = 0;
 
 	local_irq_save(flags);
 	for (c = counter; c != top; c = c->parent) {
+		u64 r;
 		spin_lock(&c->lock);
-		res_counter_uncharge_locked(c, val);
+		r = res_counter_uncharge_locked(c, val);
+		if (c == counter)
+			ret = r;
 		spin_unlock(&c->lock);
 	}
 	local_irq_restore(flags);
+	return ret;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+u64 res_counter_uncharge(struct res_counter *counter, unsigned long val)
 {
-	res_counter_uncharge_until(counter, NULL, val);
+	return res_counter_uncharge_until(counter, NULL, val);
 }
 
 static inline unsigned long long *
-- 
cgit v1.2.3


From a8964b9b84f99c0b1b5d7c09520f89f0700e742e Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:09 -0800
Subject: memcg: use static branches when code not in use

We can use static branches to patch the code in or out when not used.

Because the _ACTIVE bit on kmem_accounted is only set after the increment
is done, we guarantee that the root memcg will always be selected for kmem
charges until all call sites are patched (see memcg_kmem_enabled).  This
guarantees that no mischarges are applied.

Static branch decrement happens when the last reference count from the
kmem accounting in memcg dies.  This will only happen when the charges
drop down to 0.

When that happens, we need to disable the static branch only on those
memcgs that enabled it.  To achieve this, we would be forced to complicate
the code by keeping track of which memcgs were the ones that actually
enabled limits, and which ones got it from its parents.

It is a lot simpler just to do static_key_slow_inc() on every child
that is accounted.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  4 ++-
 mm/memcontrol.c            | 79 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 78 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index afa2ad40457e..87d61e840ddd 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -22,6 +22,7 @@
 #include <linux/cgroup.h>
 #include <linux/vm_event_item.h>
 #include <linux/hardirq.h>
+#include <linux/jump_label.h>
 
 struct mem_cgroup;
 struct page_cgroup;
@@ -417,9 +418,10 @@ static inline void sock_release_memcg(struct sock *sk)
 #endif /* CONFIG_INET && CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_MEMCG_KMEM
+extern struct static_key memcg_kmem_enabled_key;
 static inline bool memcg_kmem_enabled(void)
 {
-	return true;
+	return static_key_false(&memcg_kmem_enabled_key);
 }
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9a62ac3ea881..bc70254558fa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -346,10 +346,13 @@ struct mem_cgroup {
 /* internal only representation about the status of kmem accounting. */
 enum {
 	KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
+	KMEM_ACCOUNTED_ACTIVATED, /* static key enabled. */
 	KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */
 };
 
-#define KMEM_ACCOUNTED_MASK (1 << KMEM_ACCOUNTED_ACTIVE)
+/* We account when limit is on, but only after call sites are patched */
+#define KMEM_ACCOUNTED_MASK \
+		((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED))
 
 #ifdef CONFIG_MEMCG_KMEM
 static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
@@ -362,6 +365,11 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
 	return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
 }
 
+static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
+{
+	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+}
+
 static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
 {
 	if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
@@ -532,6 +540,26 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
 }
 #endif
 
+#ifdef CONFIG_MEMCG_KMEM
+struct static_key memcg_kmem_enabled_key;
+
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
+{
+	if (memcg_kmem_is_active(memcg))
+		static_key_slow_dec(&memcg_kmem_enabled_key);
+}
+#else
+static void disarm_kmem_keys(struct mem_cgroup *memcg)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
+static void disarm_static_keys(struct mem_cgroup *memcg)
+{
+	disarm_sock_keys(memcg);
+	disarm_kmem_keys(memcg);
+}
+
 static void drain_all_stock_async(struct mem_cgroup *memcg);
 
 static struct mem_cgroup_per_zone *
@@ -4204,6 +4232,8 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 {
 	int ret = -EINVAL;
 #ifdef CONFIG_MEMCG_KMEM
+	bool must_inc_static_branch = false;
+
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 	/*
 	 * For simplicity, we won't allow this to be disabled.  It also can't
@@ -4234,7 +4264,15 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 		ret = res_counter_set_limit(&memcg->kmem, val);
 		VM_BUG_ON(ret);
 
-		memcg_kmem_set_active(memcg);
+		/*
+		 * After this point, kmem_accounted (that we test atomically in
+		 * the beginning of this conditional), is no longer 0. This
+		 * guarantees only one process will set the following boolean
+		 * to true. We don't need test_and_set because we're protected
+		 * by the set_limit_mutex anyway.
+		 */
+		memcg_kmem_set_activated(memcg);
+		must_inc_static_branch = true;
 		/*
 		 * kmem charges can outlive the cgroup. In the case of slab
 		 * pages, for instance, a page contain objects from various
@@ -4247,6 +4285,27 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 out:
 	mutex_unlock(&set_limit_mutex);
 	cgroup_unlock();
+
+	/*
+	 * We are by now familiar with the fact that we can't inc the static
+	 * branch inside cgroup_lock. See disarm functions for details. A
+	 * worker here is overkill, but also wrong: After the limit is set, we
+	 * must start accounting right away. Since this operation can't fail,
+	 * we can safely defer it to here - no rollback will be needed.
+	 *
+	 * The boolean used to control this is also safe, because
+	 * KMEM_ACCOUNTED_ACTIVATED guarantees that only one process will be
+	 * able to set it to true;
+	 */
+	if (must_inc_static_branch) {
+		static_key_slow_inc(&memcg_kmem_enabled_key);
+		/*
+		 * setting the active bit after the inc will guarantee no one
+		 * starts accounting before all call sites are patched
+		 */
+		memcg_kmem_set_active(memcg);
+	}
+
 #endif
 	return ret;
 }
@@ -4258,8 +4317,20 @@ static void memcg_propagate_kmem(struct mem_cgroup *memcg)
 		return;
 	memcg->kmem_account_flags = parent->kmem_account_flags;
 #ifdef CONFIG_MEMCG_KMEM
-	if (memcg_kmem_is_active(memcg))
+	/*
+	 * When that happen, we need to disable the static branch only on those
+	 * memcgs that enabled it. To achieve this, we would be forced to
+	 * complicate the code by keeping track of which memcgs were the ones
+	 * that actually enabled limits, and which ones got it from its
+	 * parents.
+	 *
+	 * It is a lot simpler just to do static_key_slow_inc() on every child
+	 * that is accounted.
+	 */
+	if (memcg_kmem_is_active(memcg)) {
 		mem_cgroup_get(memcg);
+		static_key_slow_inc(&memcg_kmem_enabled_key);
+	}
 #endif
 }
 
@@ -5184,7 +5255,7 @@ static void free_work(struct work_struct *work)
 	 * to move this code around, and make sure it is outside
 	 * the cgroup_lock.
 	 */
-	disarm_sock_keys(memcg);
+	disarm_static_keys(memcg);
 	if (size < PAGE_SIZE)
 		kfree(memcg);
 	else
-- 
cgit v1.2.3


From 2ad306b17c0ac5a1b1f250d5f772aeb87fdf1eba Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:18 -0800
Subject: fork: protect architectures where THREAD_SIZE >= PAGE_SIZE against
 fork bombs

Because those architectures will draw their stacks directly from the page
allocator, rather than the slab cache, we can directly pass __GFP_KMEMCG
flag, and issue the corresponding free_pages.

This code path is taken when the architecture doesn't define
CONFIG_ARCH_THREAD_INFO_ALLOCATOR (only ia64 seems to), and has
THREAD_SIZE >= PAGE_SIZE.  Luckily, most - if not all - of the remaining
architectures fall in this category.

This will guarantee that every stack page is accounted to the memcg the
process currently lives on, and will have the allocations to fail if they
go over limit.

For the time being, I am defining a new variant of THREADINFO_GFP, not to
mess with the other path.  Once the slab is also tracked by memcg, we can
get rid of that flag.

Tested to successfully protect against :(){ :|:& };:

Signed-off-by: Glauber Costa <glommer@parallels.com>
Acked-by: Frederic Weisbecker <fweisbec@redhat.com>
Acked-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/thread_info.h | 2 ++
 kernel/fork.c               | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ccc1899bd62e..e7e04736802f 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,6 +61,8 @@ extern long do_no_restart_syscall(struct restart_block *parm);
 # define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK)
 #endif
 
+#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
+
 /*
  * flag set/clear/test wrappers
  * - pass TIF_xxxx constants to these functions
diff --git a/kernel/fork.c b/kernel/fork.c
index c36c4e301efe..85f6d536608d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 						  int node)
 {
-	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+	struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
 					     THREAD_SIZE_ORDER);
 
 	return page ? page_address(page) : NULL;
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 
 static inline void free_thread_info(struct thread_info *ti)
 {
-	free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+	free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_info_cache;
-- 
cgit v1.2.3


From ba6c496ed834a37a26fc6fc87fc9aecb0fa0014d Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:27 -0800
Subject: slab/slub: struct memcg_params

For the kmem slab controller, we need to record some extra information in
the kmem_cache structure.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Signed-off-by: Suleiman Souhlal <suleiman@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h     | 24 ++++++++++++++++++++++++
 include/linux/slab_def.h |  3 +++
 include/linux/slub_def.h |  3 +++
 mm/slab.h                | 13 +++++++++++++
 4 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 743a10415122..00efba149222 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -176,6 +176,30 @@ void kmem_cache_free(struct kmem_cache *, void *);
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
 
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * struct kmem_cache will hold a pointer to it, so the memory cost while
+ * disabled is 1 pointer. The runtime cost while enabled, gets bigger than it
+ * would otherwise be if that would be bundled in kmem_cache: we'll need an
+ * extra pointer chase. But the trade off clearly lays in favor of not
+ * penalizing non-users.
+ *
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system.
+ *
+ * Child caches will hold extra metadata needed for its operation. Fields are:
+ *
+ * @memcg: pointer to the memcg this cache belongs to
+ */
+struct memcg_cache_params {
+	bool is_root_cache;
+	union {
+		struct kmem_cache *memcg_caches[0];
+		struct mem_cgroup *memcg;
+	};
+};
+
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 45c0356fdc8c..8bb6e0eaf3c6 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -81,6 +81,9 @@ struct kmem_cache {
 	 */
 	int obj_offset;
 #endif /* CONFIG_DEBUG_SLAB */
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_cache_params *memcg_params;
+#endif
 
 /* 6) per-cpu/per-node data, touched during every alloc/free */
 	/*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index df448adb7283..961e72eab907 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -101,6 +101,9 @@ struct kmem_cache {
 #ifdef CONFIG_SYSFS
 	struct kobject kobj;	/* For sysfs */
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_cache_params *memcg_params;
+#endif
 
 #ifdef CONFIG_NUMA
 	/*
diff --git a/mm/slab.h b/mm/slab.h
index 1cb9c9ee0e6f..49e7a8b1d27e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -100,4 +100,17 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
 void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
 ssize_t slabinfo_write(struct file *file, const char __user *buffer,
 		       size_t count, loff_t *ppos);
+
+#ifdef CONFIG_MEMCG_KMEM
+static inline bool is_root_cache(struct kmem_cache *s)
+{
+	return !s->memcg_params || s->memcg_params->is_root_cache;
+}
+#else
+static inline bool is_root_cache(struct kmem_cache *s)
+{
+	return true;
+}
+
+#endif
 #endif
-- 
cgit v1.2.3


From 2633d7a028239a738b793be5ca8fa6ac312f5793 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:34 -0800
Subject: slab/slub: consider a memcg parameter in kmem_create_cache

Allow a memcg parameter to be passed during cache creation.  When the slub
allocator is being used, it will only merge caches that belong to the same
memcg.  We'll do this by scanning the global list, and then translating
the cache to a memcg-specific cache

Default function is created as a wrapper, passing NULL to the memcg
version.  We only merge caches that belong to the same memcg.

A helper is provided, memcg_css_id: because slub needs a unique cache name
for sysfs.  Since this is visible, but not the canonical location for slab
data, the cache name is not used, the css_id should suffice.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 26 +++++++++++++++++++++++
 include/linux/slab.h       | 14 ++++++++++++-
 mm/memcontrol.c            | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/slab.h                  | 23 +++++++++++++++++----
 mm/slab_common.c           | 42 ++++++++++++++++++++++++++++++--------
 mm/slub.c                  | 19 +++++++++++++----
 6 files changed, 157 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 87d61e840ddd..0b69a0470007 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -28,6 +28,7 @@ struct mem_cgroup;
 struct page_cgroup;
 struct page;
 struct mm_struct;
+struct kmem_cache;
 
 /* Stats that can be updated by kernel. */
 enum mem_cgroup_page_stat_item {
@@ -441,6 +442,11 @@ void __memcg_kmem_commit_charge(struct page *page,
 				       struct mem_cgroup *memcg, int order);
 void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
+int memcg_cache_id(struct mem_cgroup *memcg);
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
+void memcg_release_cache(struct kmem_cache *cachep);
+void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
@@ -525,6 +531,26 @@ static inline void
 memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 {
 }
+
+static inline int memcg_cache_id(struct mem_cgroup *memcg)
+{
+	return -1;
+}
+
+static inline int memcg_register_cache(struct mem_cgroup *memcg,
+				       struct kmem_cache *s)
+{
+	return 0;
+}
+
+static inline void memcg_release_cache(struct kmem_cache *cachep)
+{
+}
+
+static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
+					struct kmem_cache *s)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 00efba149222..c0fcf28c15b2 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -116,6 +116,7 @@ struct kmem_cache {
 };
 #endif
 
+struct mem_cgroup;
 /*
  * struct kmem_cache related prototypes
  */
@@ -125,6 +126,9 @@ int slab_is_available(void);
 struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
 			void (*)(void *));
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
+			unsigned long, void (*)(void *));
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
@@ -191,15 +195,23 @@ void kmem_cache_free(struct kmem_cache *, void *);
  * Child caches will hold extra metadata needed for its operation. Fields are:
  *
  * @memcg: pointer to the memcg this cache belongs to
+ * @list: list_head for the list of all caches in this memcg
+ * @root_cache: pointer to the global, root cache, this cache was derived from
  */
 struct memcg_cache_params {
 	bool is_root_cache;
 	union {
 		struct kmem_cache *memcg_caches[0];
-		struct mem_cgroup *memcg;
+		struct {
+			struct mem_cgroup *memcg;
+			struct list_head list;
+			struct kmem_cache *root_cache;
+		};
 	};
 };
 
+int memcg_update_all_caches(int num_memcgs);
+
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e16694d5e118..3eafe6cf6ca4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -341,6 +341,14 @@ struct mem_cgroup {
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
 	struct tcp_memcontrol tcp_mem;
 #endif
+#if defined(CONFIG_MEMCG_KMEM)
+	/* analogous to slab_common's slab_caches list. per-memcg */
+	struct list_head memcg_slab_caches;
+	/* Not a spinlock, we can take a lot of time walking the list */
+	struct mutex slab_caches_mutex;
+        /* Index in the kmem_cache->memcg_params->memcg_caches array */
+	int kmemcg_id;
+#endif
 };
 
 /* internal only representation about the status of kmem accounting. */
@@ -2785,6 +2793,47 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
 		mem_cgroup_put(memcg);
 }
 
+void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep)
+{
+	if (!memcg)
+		return;
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
+	mutex_unlock(&memcg->slab_caches_mutex);
+}
+
+/*
+ * helper for acessing a memcg's index. It will be used as an index in the
+ * child cache array in kmem_cache, and also to derive its name. This function
+ * will return -1 when this is not a kmem-limited memcg.
+ */
+int memcg_cache_id(struct mem_cgroup *memcg)
+{
+	return memcg ? memcg->kmemcg_id : -1;
+}
+
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
+{
+	size_t size = sizeof(struct memcg_cache_params);
+
+	if (!memcg_kmem_enabled())
+		return 0;
+
+	s->memcg_params = kzalloc(size, GFP_KERNEL);
+	if (!s->memcg_params)
+		return -ENOMEM;
+
+	if (memcg)
+		s->memcg_params->memcg = memcg;
+	return 0;
+}
+
+void memcg_release_cache(struct kmem_cache *s)
+{
+	kfree(s->memcg_params);
+}
+
 /*
  * We need to verify if the allocation against current->mm->owner's memcg is
  * possible for the given order. But the page is not allocated yet, so we'll
@@ -5026,7 +5075,9 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
 #ifdef CONFIG_MEMCG_KMEM
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+	memcg->kmemcg_id = -1;
 	memcg_propagate_kmem(memcg);
+
 	return mem_cgroup_sockets_init(memcg, ss);
 };
 
diff --git a/mm/slab.h b/mm/slab.h
index 49e7a8b1d27e..abe582d20c79 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -43,12 +43,15 @@ extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
 extern void create_boot_cache(struct kmem_cache *, const char *name,
 			size_t size, unsigned long flags);
 
+struct mem_cgroup;
 #ifdef CONFIG_SLUB
-struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-	size_t align, unsigned long flags, void (*ctor)(void *));
+struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *));
 #else
-static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-	size_t align, unsigned long flags, void (*ctor)(void *))
+static inline struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *))
 { return NULL; }
 #endif
 
@@ -106,11 +109,23 @@ static inline bool is_root_cache(struct kmem_cache *s)
 {
 	return !s->memcg_params || s->memcg_params->is_root_cache;
 }
+
+static inline bool cache_match_memcg(struct kmem_cache *cachep,
+				     struct mem_cgroup *memcg)
+{
+	return (is_root_cache(cachep) && !memcg) ||
+				(cachep->memcg_params->memcg == memcg);
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
 	return true;
 }
 
+static inline bool cache_match_memcg(struct kmem_cache *cachep,
+				     struct mem_cgroup *memcg)
+{
+	return true;
+}
 #endif
 #endif
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a8e76d79ee65..3031badcc577 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -18,6 +18,7 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
+#include <linux/memcontrol.h>
 
 #include "slab.h"
 
@@ -27,7 +28,8 @@ DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
 #ifdef CONFIG_DEBUG_VM
-static int kmem_cache_sanity_check(const char *name, size_t size)
+static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name,
+				   size_t size)
 {
 	struct kmem_cache *s = NULL;
 
@@ -53,7 +55,13 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
 			continue;
 		}
 
-		if (!strcmp(s->name, name)) {
+		/*
+		 * For simplicity, we won't check this in the list of memcg
+		 * caches. We have control over memcg naming, and if there
+		 * aren't duplicates in the global list, there won't be any
+		 * duplicates in the memcg lists as well.
+		 */
+		if (!memcg && !strcmp(s->name, name)) {
 			pr_err("%s (%s): Cache name already exists.\n",
 			       __func__, name);
 			dump_stack();
@@ -66,7 +74,8 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
 	return 0;
 }
 #else
-static inline int kmem_cache_sanity_check(const char *name, size_t size)
+static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
+					  const char *name, size_t size)
 {
 	return 0;
 }
@@ -125,8 +134,9 @@ unsigned long calculate_alignment(unsigned long flags,
  * as davem.
  */
 
-struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align,
-		unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
+			size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *s = NULL;
 	int err = 0;
@@ -134,7 +144,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
 	get_online_cpus();
 	mutex_lock(&slab_mutex);
 
-	if (!kmem_cache_sanity_check(name, size) == 0)
+	if (!kmem_cache_sanity_check(memcg, name, size) == 0)
 		goto out_locked;
 
 	/*
@@ -145,7 +155,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
 	 */
 	flags &= CACHE_CREATE_MASK;
 
-	s = __kmem_cache_alias(name, size, align, flags, ctor);
+	s = __kmem_cache_alias(memcg, name, size, align, flags, ctor);
 	if (s)
 		goto out_locked;
 
@@ -154,6 +164,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
 		s->object_size = s->size = size;
 		s->align = calculate_alignment(flags, align, size);
 		s->ctor = ctor;
+
+		if (memcg_register_cache(memcg, s)) {
+			kmem_cache_free(kmem_cache, s);
+			err = -ENOMEM;
+			goto out_locked;
+		}
+
 		s->name = kstrdup(name, GFP_KERNEL);
 		if (!s->name) {
 			kmem_cache_free(kmem_cache, s);
@@ -163,10 +180,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
 
 		err = __kmem_cache_create(s, flags);
 		if (!err) {
-
 			s->refcount = 1;
 			list_add(&s->list, &slab_caches);
-
+			memcg_cache_list_add(memcg, s);
 		} else {
 			kfree(s->name);
 			kmem_cache_free(kmem_cache, s);
@@ -194,6 +210,13 @@ out_locked:
 
 	return s;
 }
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t align,
+		  unsigned long flags, void (*ctor)(void *))
+{
+	return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor);
+}
 EXPORT_SYMBOL(kmem_cache_create);
 
 void kmem_cache_destroy(struct kmem_cache *s)
@@ -209,6 +232,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 			if (s->flags & SLAB_DESTROY_BY_RCU)
 				rcu_barrier();
 
+			memcg_release_cache(s);
 			kfree(s->name);
 			kmem_cache_free(kmem_cache, s);
 		} else {
diff --git a/mm/slub.c b/mm/slub.c
index 87f9f32bf0cd..985332b38852 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -31,6 +31,7 @@
 #include <linux/fault-inject.h>
 #include <linux/stacktrace.h>
 #include <linux/prefetch.h>
+#include <linux/memcontrol.h>
 
 #include <trace/events/kmem.h>
 
@@ -3786,7 +3787,7 @@ static int slab_unmergeable(struct kmem_cache *s)
 	return 0;
 }
 
-static struct kmem_cache *find_mergeable(size_t size,
+static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
 		size_t align, unsigned long flags, const char *name,
 		void (*ctor)(void *))
 {
@@ -3822,17 +3823,21 @@ static struct kmem_cache *find_mergeable(size_t size,
 		if (s->size - size >= sizeof(void *))
 			continue;
 
+		if (!cache_match_memcg(s, memcg))
+			continue;
+
 		return s;
 	}
 	return NULL;
 }
 
-struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
-		size_t align, unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+		   size_t align, unsigned long flags, void (*ctor)(void *))
 {
 	struct kmem_cache *s;
 
-	s = find_mergeable(size, align, flags, name, ctor);
+	s = find_mergeable(memcg, size, align, flags, name, ctor);
 	if (s) {
 		s->refcount++;
 		/*
@@ -5156,6 +5161,12 @@ static char *create_unique_id(struct kmem_cache *s)
 	if (p != name + 1)
 		*p++ = '-';
 	p += sprintf(p, "%07d", s->size);
+
+#ifdef CONFIG_MEMCG_KMEM
+	if (!is_root_cache(s))
+		p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
+#endif
+
 	BUG_ON(p > name + ID_STR_LENGTH - 1);
 	return name;
 }
-- 
cgit v1.2.3


From 55007d849759252ddd573aeb36143b947202d509 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:38 -0800
Subject: memcg: allocate memory for memcg caches whenever a new memcg appears

Every cache that is considered a root cache (basically the "original"
caches, tied to the root memcg/no-memcg) will have an array that should be
large enough to store a cache pointer per each memcg in the system.

Theoreticaly, this is as high as 1 << sizeof(css_id), which is currently
in the 64k pointers range.  Most of the time, we won't be using that much.

What goes in this patch, is a simple scheme to dynamically allocate such
an array, in order to minimize memory usage for memcg caches.  Because we
would also like to avoid allocations all the time, at least for now, the
array will only grow.  It will tend to be big enough to hold the maximum
number of kmem-limited memcgs ever achieved.

We'll allocate it to be a minimum of 64 kmem-limited memcgs.  When we have
more than that, we'll start doubling the size of this array every time the
limit is reached.

Because we are only considering kmem limited memcgs, a natural point for
this to happen is when we write to the limit.  At that point, we already
have set_limit_mutex held, so that will become our natural synchronization
mechanism.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |   2 +
 mm/memcontrol.c            | 207 +++++++++++++++++++++++++++++++++++++++++----
 mm/slab_common.c           |  28 ++++++
 3 files changed, 221 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0b69a0470007..45085e14e023 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -447,6 +447,8 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
 void memcg_release_cache(struct kmem_cache *cachep);
 void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
+void memcg_update_array_size(int num_groups);
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3eafe6cf6ca4..db38b60e5f87 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -378,6 +378,11 @@ static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
 	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
 }
 
+static void memcg_kmem_clear_activated(struct mem_cgroup *memcg)
+{
+	clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+}
+
 static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
 {
 	if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
@@ -549,12 +554,48 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
+/*
+ * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
+ * There are two main reasons for not using the css_id for this:
+ *  1) this works better in sparse environments, where we have a lot of memcgs,
+ *     but only a few kmem-limited. Or also, if we have, for instance, 200
+ *     memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *     200 entry array for that.
+ *
+ *  2) In order not to violate the cgroup API, we would like to do all memory
+ *     allocation in ->create(). At that point, we haven't yet allocated the
+ *     css_id. Having a separate index prevents us from messing with the cgroup
+ *     core for this
+ *
+ * The current size of the caches array is stored in
+ * memcg_limited_groups_array_size.  It will double each time we have to
+ * increase it.
+ */
+static DEFINE_IDA(kmem_limited_groups);
+static int memcg_limited_groups_array_size;
+/*
+ * MIN_SIZE is different than 1, because we would like to avoid going through
+ * the alloc/free process all the time. In a small machine, 4 kmem-limited
+ * cgroups is a reasonable guess. In the future, it could be a parameter or
+ * tunable, but that is strictly not necessary.
+ *
+ * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * this constant directly from cgroup, but it is understandable that this is
+ * better kept as an internal representation in cgroup.c. In any case, the
+ * css_id space is not getting any smaller, and we don't have to necessarily
+ * increase ours as well if it increases.
+ */
+#define MEMCG_CACHES_MIN_SIZE 4
+#define MEMCG_CACHES_MAX_SIZE 65535
+
 struct static_key memcg_kmem_enabled_key;
 
 static void disarm_kmem_keys(struct mem_cgroup *memcg)
 {
-	if (memcg_kmem_is_active(memcg))
+	if (memcg_kmem_is_active(memcg)) {
 		static_key_slow_dec(&memcg_kmem_enabled_key);
+		ida_simple_remove(&kmem_limited_groups, memcg->kmemcg_id);
+	}
 	/*
 	 * This check can't live in kmem destruction function,
 	 * since the charges will outlive the cgroup
@@ -2813,6 +2854,120 @@ int memcg_cache_id(struct mem_cgroup *memcg)
 	return memcg ? memcg->kmemcg_id : -1;
 }
 
+/*
+ * This ends up being protected by the set_limit mutex, during normal
+ * operation, because that is its main call site.
+ *
+ * But when we create a new cache, we can call this as well if its parent
+ * is kmem-limited. That will have to hold set_limit_mutex as well.
+ */
+int memcg_update_cache_sizes(struct mem_cgroup *memcg)
+{
+	int num, ret;
+
+	num = ida_simple_get(&kmem_limited_groups,
+				0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
+	if (num < 0)
+		return num;
+	/*
+	 * After this point, kmem_accounted (that we test atomically in
+	 * the beginning of this conditional), is no longer 0. This
+	 * guarantees only one process will set the following boolean
+	 * to true. We don't need test_and_set because we're protected
+	 * by the set_limit_mutex anyway.
+	 */
+	memcg_kmem_set_activated(memcg);
+
+	ret = memcg_update_all_caches(num+1);
+	if (ret) {
+		ida_simple_remove(&kmem_limited_groups, num);
+		memcg_kmem_clear_activated(memcg);
+		return ret;
+	}
+
+	memcg->kmemcg_id = num;
+	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+	mutex_init(&memcg->slab_caches_mutex);
+	return 0;
+}
+
+static size_t memcg_caches_array_size(int num_groups)
+{
+	ssize_t size;
+	if (num_groups <= 0)
+		return 0;
+
+	size = 2 * num_groups;
+	if (size < MEMCG_CACHES_MIN_SIZE)
+		size = MEMCG_CACHES_MIN_SIZE;
+	else if (size > MEMCG_CACHES_MAX_SIZE)
+		size = MEMCG_CACHES_MAX_SIZE;
+
+	return size;
+}
+
+/*
+ * We should update the current array size iff all caches updates succeed. This
+ * can only be done from the slab side. The slab mutex needs to be held when
+ * calling this.
+ */
+void memcg_update_array_size(int num)
+{
+	if (num > memcg_limited_groups_array_size)
+		memcg_limited_groups_array_size = memcg_caches_array_size(num);
+}
+
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
+{
+	struct memcg_cache_params *cur_params = s->memcg_params;
+
+	VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache);
+
+	if (num_groups > memcg_limited_groups_array_size) {
+		int i;
+		ssize_t size = memcg_caches_array_size(num_groups);
+
+		size *= sizeof(void *);
+		size += sizeof(struct memcg_cache_params);
+
+		s->memcg_params = kzalloc(size, GFP_KERNEL);
+		if (!s->memcg_params) {
+			s->memcg_params = cur_params;
+			return -ENOMEM;
+		}
+
+		s->memcg_params->is_root_cache = true;
+
+		/*
+		 * There is the chance it will be bigger than
+		 * memcg_limited_groups_array_size, if we failed an allocation
+		 * in a cache, in which case all caches updated before it, will
+		 * have a bigger array.
+		 *
+		 * But if that is the case, the data after
+		 * memcg_limited_groups_array_size is certainly unused
+		 */
+		for (i = 0; i < memcg_limited_groups_array_size; i++) {
+			if (!cur_params->memcg_caches[i])
+				continue;
+			s->memcg_params->memcg_caches[i] =
+						cur_params->memcg_caches[i];
+		}
+
+		/*
+		 * Ideally, we would wait until all caches succeed, and only
+		 * then free the old one. But this is not worth the extra
+		 * pointer per-cache we'd have to have for this.
+		 *
+		 * It is not a big deal if some caches are left with a size
+		 * bigger than the others. And all updates will reset this
+		 * anyway.
+		 */
+		kfree(cur_params);
+	}
+	return 0;
+}
+
 int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 {
 	size_t size = sizeof(struct memcg_cache_params);
@@ -2820,6 +2975,9 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 	if (!memcg_kmem_enabled())
 		return 0;
 
+	if (!memcg)
+		size += memcg_limited_groups_array_size * sizeof(void *);
+
 	s->memcg_params = kzalloc(size, GFP_KERNEL);
 	if (!s->memcg_params)
 		return -ENOMEM;
@@ -4326,14 +4484,11 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
 		ret = res_counter_set_limit(&memcg->kmem, val);
 		VM_BUG_ON(ret);
 
-		/*
-		 * After this point, kmem_accounted (that we test atomically in
-		 * the beginning of this conditional), is no longer 0. This
-		 * guarantees only one process will set the following boolean
-		 * to true. We don't need test_and_set because we're protected
-		 * by the set_limit_mutex anyway.
-		 */
-		memcg_kmem_set_activated(memcg);
+		ret = memcg_update_cache_sizes(memcg);
+		if (ret) {
+			res_counter_set_limit(&memcg->kmem, RESOURCE_MAX);
+			goto out;
+		}
 		must_inc_static_branch = true;
 		/*
 		 * kmem charges can outlive the cgroup. In the case of slab
@@ -4372,11 +4527,13 @@ out:
 	return ret;
 }
 
-static void memcg_propagate_kmem(struct mem_cgroup *memcg)
+static int memcg_propagate_kmem(struct mem_cgroup *memcg)
 {
+	int ret = 0;
 	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
 	if (!parent)
-		return;
+		goto out;
+
 	memcg->kmem_account_flags = parent->kmem_account_flags;
 #ifdef CONFIG_MEMCG_KMEM
 	/*
@@ -4389,11 +4546,24 @@ static void memcg_propagate_kmem(struct mem_cgroup *memcg)
 	 * It is a lot simpler just to do static_key_slow_inc() on every child
 	 * that is accounted.
 	 */
-	if (memcg_kmem_is_active(memcg)) {
-		mem_cgroup_get(memcg);
-		static_key_slow_inc(&memcg_kmem_enabled_key);
-	}
+	if (!memcg_kmem_is_active(memcg))
+		goto out;
+
+	/*
+	 * destroy(), called if we fail, will issue static_key_slow_inc() and
+	 * mem_cgroup_put() if kmem is enabled. We have to either call them
+	 * unconditionally, or clear the KMEM_ACTIVE flag. I personally find
+	 * this more consistent, since it always leads to the same destroy path
+	 */
+	mem_cgroup_get(memcg);
+	static_key_slow_inc(&memcg_kmem_enabled_key);
+
+	mutex_lock(&set_limit_mutex);
+	ret = memcg_update_cache_sizes(memcg);
+	mutex_unlock(&set_limit_mutex);
 #endif
+out:
+	return ret;
 }
 
 /*
@@ -5075,8 +5245,12 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
 #ifdef CONFIG_MEMCG_KMEM
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+	int ret;
+
 	memcg->kmemcg_id = -1;
-	memcg_propagate_kmem(memcg);
+	ret = memcg_propagate_kmem(memcg);
+	if (ret)
+		return ret;
 
 	return mem_cgroup_sockets_init(memcg, ss);
 };
@@ -5479,6 +5653,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
 		res_counter_init(&memcg->res, &parent->res);
 		res_counter_init(&memcg->memsw, &parent->memsw);
 		res_counter_init(&memcg->kmem, &parent->kmem);
+
 		/*
 		 * We increment refcnt of the parent to ensure that we can
 		 * safely access it on res_counter_charge/uncharge.
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3031badcc577..1c424b6511bf 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -81,6 +81,34 @@ static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
 }
 #endif
 
+#ifdef CONFIG_MEMCG_KMEM
+int memcg_update_all_caches(int num_memcgs)
+{
+	struct kmem_cache *s;
+	int ret = 0;
+	mutex_lock(&slab_mutex);
+
+	list_for_each_entry(s, &slab_caches, list) {
+		if (!is_root_cache(s))
+			continue;
+
+		ret = memcg_update_cache_size(s, num_memcgs);
+		/*
+		 * See comment in memcontrol.c, memcg_update_cache_size:
+		 * Instead of freeing the memory, we'll just leave the caches
+		 * up to this point in an updated state.
+		 */
+		if (ret)
+			goto out;
+	}
+
+	memcg_update_array_size(num_memcgs);
+out:
+	mutex_unlock(&slab_mutex);
+	return ret;
+}
+#endif
+
 /*
  * Figure out what the alignment of the objects will be given a set of
  * flags, a user specified alignment and the size of the objects.
-- 
cgit v1.2.3


From d7f25f8a2f81252d1ac134470ba1d0a287cf8fcd Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:40 -0800
Subject: memcg: infrastructure to match an allocation to the right cache

The page allocator is able to bind a page to a memcg when it is
allocated.  But for the caches, we'd like to have as many objects as
possible in a page belonging to the same cache.

This is done in this patch by calling memcg_kmem_get_cache in the
beginning of every allocation function.  This function is patched out by
static branches when kernel memory controller is not being used.

It assumes that the task allocating, which determines the memcg in the
page allocator, belongs to the same cgroup throughout the whole process.
Misaccounting can happen if the task calls memcg_kmem_get_cache() while
belonging to a cgroup, and later on changes.  This is considered
acceptable, and should only happen upon task migration.

Before the cache is created by the memcg core, there is also a possible
imbalance: the task belongs to a memcg, but the cache being allocated from
is the global cache, since the child cache is not yet guaranteed to be
ready.  This case is also fine, since in this case the GFP_KMEMCG will not
be passed and the page allocator will not attempt any cgroup accounting.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  41 +++++++++
 init/Kconfig               |   1 -
 mm/memcontrol.c            | 217 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 258 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 45085e14e023..bd9b5d73bc2b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -449,6 +449,10 @@ void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
+
+struct kmem_cache *
+__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
@@ -518,6 +522,37 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
 		__memcg_kmem_commit_charge(page, memcg, order);
 }
 
+/**
+ * memcg_kmem_get_cache: selects the correct per-memcg cache for allocation
+ * @cachep: the original global kmem cache
+ * @gfp: allocation flags.
+ *
+ * This function assumes that the task allocating, which determines the memcg
+ * in the page allocator, belongs to the same cgroup throughout the whole
+ * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
+ * while belonging to a cgroup, and later on changes. This is considered
+ * acceptable, and should only happen upon task migration.
+ *
+ * Before the cache is created by the memcg core, there is also a possible
+ * imbalance: the task belongs to a memcg, but the cache being allocated from
+ * is the global cache, since the child cache is not yet guaranteed to be
+ * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
+ * passed and the page allocator will not attempt any cgroup accounting.
+ */
+static __always_inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+	if (!memcg_kmem_enabled())
+		return cachep;
+	if (gfp & __GFP_NOFAIL)
+		return cachep;
+	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
+		return cachep;
+	if (unlikely(fatal_signal_pending(current)))
+		return cachep;
+
+	return __memcg_kmem_get_cache(cachep, gfp);
+}
 #else
 static inline bool
 memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
@@ -553,6 +588,12 @@ static inline void memcg_cache_list_add(struct mem_cgroup *memcg,
 					struct kmem_cache *s)
 {
 }
+
+static inline struct kmem_cache *
+memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
+{
+	return cachep;
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/init/Kconfig b/init/Kconfig
index 19ccb33c99d9..7d30240e5bfe 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -883,7 +883,6 @@ config MEMCG_KMEM
 	bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
 	depends on MEMCG && EXPERIMENTAL
 	depends on SLUB || SLAB
-	default n
 	help
 	  The Kernel Memory extension for Memory Resource Controller can limit
 	  the amount of memory used by kernel objects in the system. Those are
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index db38b60e5f87..efd26620a60b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -588,7 +588,14 @@ static int memcg_limited_groups_array_size;
 #define MEMCG_CACHES_MIN_SIZE 4
 #define MEMCG_CACHES_MAX_SIZE 65535
 
+/*
+ * A lot of the calls to the cache allocation functions are expected to be
+ * inlined by the compiler. Since the calls to memcg_kmem_get_cache are
+ * conditional to this static branch, we'll have to allow modules that does
+ * kmem_cache_alloc and the such to see this symbol as well
+ */
 struct static_key memcg_kmem_enabled_key;
+EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 static void disarm_kmem_keys(struct mem_cgroup *memcg)
 {
@@ -2989,9 +2996,219 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 
 void memcg_release_cache(struct kmem_cache *s)
 {
+	struct kmem_cache *root;
+	struct mem_cgroup *memcg;
+	int id;
+
+	/*
+	 * This happens, for instance, when a root cache goes away before we
+	 * add any memcg.
+	 */
+	if (!s->memcg_params)
+		return;
+
+	if (s->memcg_params->is_root_cache)
+		goto out;
+
+	memcg = s->memcg_params->memcg;
+	id  = memcg_cache_id(memcg);
+
+	root = s->memcg_params->root_cache;
+	root->memcg_params->memcg_caches[id] = NULL;
+	mem_cgroup_put(memcg);
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_del(&s->memcg_params->list);
+	mutex_unlock(&memcg->slab_caches_mutex);
+
+out:
 	kfree(s->memcg_params);
 }
 
+static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
+{
+	char *name;
+	struct dentry *dentry;
+
+	rcu_read_lock();
+	dentry = rcu_dereference(memcg->css.cgroup->dentry);
+	rcu_read_unlock();
+
+	BUG_ON(dentry == NULL);
+
+	name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
+			 memcg_cache_id(memcg), dentry->d_name.name);
+
+	return name;
+}
+
+static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
+					 struct kmem_cache *s)
+{
+	char *name;
+	struct kmem_cache *new;
+
+	name = memcg_cache_name(memcg, s);
+	if (!name)
+		return NULL;
+
+	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
+				      (s->flags & ~SLAB_PANIC), s->ctor);
+
+	kfree(name);
+	return new;
+}
+
+/*
+ * This lock protects updaters, not readers. We want readers to be as fast as
+ * they can, and they will either see NULL or a valid cache value. Our model
+ * allow them to see NULL, in which case the root memcg will be selected.
+ *
+ * We need this lock because multiple allocations to the same cache from a non
+ * will span more than one worker. Only one of them can create the cache.
+ */
+static DEFINE_MUTEX(memcg_cache_mutex);
+static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+						  struct kmem_cache *cachep)
+{
+	struct kmem_cache *new_cachep;
+	int idx;
+
+	BUG_ON(!memcg_can_account_kmem(memcg));
+
+	idx = memcg_cache_id(memcg);
+
+	mutex_lock(&memcg_cache_mutex);
+	new_cachep = cachep->memcg_params->memcg_caches[idx];
+	if (new_cachep)
+		goto out;
+
+	new_cachep = kmem_cache_dup(memcg, cachep);
+
+	if (new_cachep == NULL) {
+		new_cachep = cachep;
+		goto out;
+	}
+
+	mem_cgroup_get(memcg);
+	new_cachep->memcg_params->root_cache = cachep;
+
+	cachep->memcg_params->memcg_caches[idx] = new_cachep;
+	/*
+	 * the readers won't lock, make sure everybody sees the updated value,
+	 * so they won't put stuff in the queue again for no reason
+	 */
+	wmb();
+out:
+	mutex_unlock(&memcg_cache_mutex);
+	return new_cachep;
+}
+
+struct create_work {
+	struct mem_cgroup *memcg;
+	struct kmem_cache *cachep;
+	struct work_struct work;
+};
+
+static void memcg_create_cache_work_func(struct work_struct *w)
+{
+	struct create_work *cw;
+
+	cw = container_of(w, struct create_work, work);
+	memcg_create_kmem_cache(cw->memcg, cw->cachep);
+	/* Drop the reference gotten when we enqueued. */
+	css_put(&cw->memcg->css);
+	kfree(cw);
+}
+
+/*
+ * Enqueue the creation of a per-memcg kmem_cache.
+ * Called with rcu_read_lock.
+ */
+static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+				       struct kmem_cache *cachep)
+{
+	struct create_work *cw;
+
+	cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
+	if (cw == NULL)
+		return;
+
+	/* The corresponding put will be done in the workqueue. */
+	if (!css_tryget(&memcg->css)) {
+		kfree(cw);
+		return;
+	}
+
+	cw->memcg = memcg;
+	cw->cachep = cachep;
+
+	INIT_WORK(&cw->work, memcg_create_cache_work_func);
+	schedule_work(&cw->work);
+}
+
+/*
+ * Return the kmem_cache we're supposed to use for a slab allocation.
+ * We try to use the current memcg's version of the cache.
+ *
+ * If the cache does not exist yet, if we are the first user of it,
+ * we either create it immediately, if possible, or create it asynchronously
+ * in a workqueue.
+ * In the latter case, we will let the current allocation go through with
+ * the original cache.
+ *
+ * Can't be called in interrupt context or from kernel threads.
+ * This function needs to be called with rcu_read_lock() held.
+ */
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
+					  gfp_t gfp)
+{
+	struct mem_cgroup *memcg;
+	int idx;
+
+	VM_BUG_ON(!cachep->memcg_params);
+	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
+
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
+	rcu_read_unlock();
+
+	if (!memcg_can_account_kmem(memcg))
+		return cachep;
+
+	idx = memcg_cache_id(memcg);
+
+	/*
+	 * barrier to mare sure we're always seeing the up to date value.  The
+	 * code updating memcg_caches will issue a write barrier to match this.
+	 */
+	read_barrier_depends();
+	if (unlikely(cachep->memcg_params->memcg_caches[idx] == NULL)) {
+		/*
+		 * If we are in a safe context (can wait, and not in interrupt
+		 * context), we could be be predictable and return right away.
+		 * This would guarantee that the allocation being performed
+		 * already belongs in the new cache.
+		 *
+		 * However, there are some clashes that can arrive from locking.
+		 * For instance, because we acquire the slab_mutex while doing
+		 * kmem_cache_dup, this means no further allocation could happen
+		 * with the slab_mutex held.
+		 *
+		 * Also, because cache creation issue get_online_cpus(), this
+		 * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
+		 * that ends up reversed during cpu hotplug. (cpuset allocates
+		 * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
+		 * better to defer everything.
+		 */
+		memcg_create_cache_enqueue(memcg, cachep);
+		return cachep;
+	}
+
+	return cachep->memcg_params->memcg_caches[idx];
+}
+EXPORT_SYMBOL(__memcg_kmem_get_cache);
+
 /*
  * We need to verify if the allocation against current->mm->owner's memcg is
  * possible for the given order. But the page is not allocated yet, so we'll
-- 
cgit v1.2.3


From 0e9d92f2d02d8c8320f0502307c688d07bdac2b3 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:42 -0800
Subject: memcg: skip memcg kmem allocations in specified code regions

Create a mechanism that skip memcg allocations during certain pieces of
our core code.  It basically works in the same way as
preempt_disable()/preempt_enable(): By marking a region under which all
allocations will be accounted to the root memcg.

We need this to prevent races in early cache creation, when we
allocate data using caches that are not necessarily created already.

Signed-off-by: Glauber Costa <glommer@parallels.com>
yCc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 +
 mm/memcontrol.c       | 57 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 55 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9914c662ed7b..f712465b05c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1597,6 +1597,7 @@ struct task_struct {
 		unsigned long nr_pages;	/* uncharged usage */
 		unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
 	} memcg_batch;
+	unsigned int memcg_kmem_skip_account;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	atomic_t ptrace_bp_refcnt;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index efd26620a60b..65302a083d2f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3025,6 +3025,37 @@ out:
 	kfree(s->memcg_params);
 }
 
+/*
+ * During the creation a new cache, we need to disable our accounting mechanism
+ * altogether. This is true even if we are not creating, but rather just
+ * enqueing new caches to be created.
+ *
+ * This is because that process will trigger allocations; some visible, like
+ * explicit kmallocs to auxiliary data structures, name strings and internal
+ * cache structures; some well concealed, like INIT_WORK() that can allocate
+ * objects during debug.
+ *
+ * If any allocation happens during memcg_kmem_get_cache, we will recurse back
+ * to it. This may not be a bounded recursion: since the first cache creation
+ * failed to complete (waiting on the allocation), we'll just try to create the
+ * cache again, failing at the same point.
+ *
+ * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
+ * memcg_kmem_skip_account. So we enclose anything that might allocate memory
+ * inside the following two functions.
+ */
+static inline void memcg_stop_kmem_account(void)
+{
+	VM_BUG_ON(!current->mm);
+	current->memcg_kmem_skip_account++;
+}
+
+static inline void memcg_resume_kmem_account(void)
+{
+	VM_BUG_ON(!current->mm);
+	current->memcg_kmem_skip_account--;
+}
+
 static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
 {
 	char *name;
@@ -3084,7 +3115,6 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 		goto out;
 
 	new_cachep = kmem_cache_dup(memcg, cachep);
-
 	if (new_cachep == NULL) {
 		new_cachep = cachep;
 		goto out;
@@ -3125,8 +3155,8 @@ static void memcg_create_cache_work_func(struct work_struct *w)
  * Enqueue the creation of a per-memcg kmem_cache.
  * Called with rcu_read_lock.
  */
-static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
-				       struct kmem_cache *cachep)
+static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+					 struct kmem_cache *cachep)
 {
 	struct create_work *cw;
 
@@ -3147,6 +3177,24 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
 	schedule_work(&cw->work);
 }
 
+static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+				       struct kmem_cache *cachep)
+{
+	/*
+	 * We need to stop accounting when we kmalloc, because if the
+	 * corresponding kmalloc cache is not yet created, the first allocation
+	 * in __memcg_create_cache_enqueue will recurse.
+	 *
+	 * However, it is better to enclose the whole function. Depending on
+	 * the debugging options enabled, INIT_WORK(), for instance, can
+	 * trigger an allocation. This too, will make us recurse. Because at
+	 * this point we can't allow ourselves back into memcg_kmem_get_cache,
+	 * the safest choice is to do it like this, wrapping the whole function.
+	 */
+	memcg_stop_kmem_account();
+	__memcg_create_cache_enqueue(memcg, cachep);
+	memcg_resume_kmem_account();
+}
 /*
  * Return the kmem_cache we're supposed to use for a slab allocation.
  * We try to use the current memcg's version of the cache.
@@ -3169,6 +3217,9 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
 	VM_BUG_ON(!cachep->memcg_params);
 	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
 
+	if (!current->mm || current->memcg_kmem_skip_account)
+		return cachep;
+
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
 	rcu_read_unlock();
-- 
cgit v1.2.3


From b9ce5ef49f00daf2254c6953c8d31f79aabccd34 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:46 -0800
Subject: sl[au]b: always get the cache from its page in kmem_cache_free()

struct page already has this information.  If we start chaining caches,
this information will always be more trustworthy than whatever is passed
into the function.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 +++++
 mm/slab.c                  |  6 +++++-
 mm/slab.h                  | 39 +++++++++++++++++++++++++++++++++++++++
 mm/slob.c                  |  2 +-
 mm/slub.c                  | 15 +++------------
 5 files changed, 53 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bd9b5d73bc2b..2298122e71ad 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -554,6 +554,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 	return __memcg_kmem_get_cache(cachep, gfp);
 }
 #else
+static inline bool memcg_kmem_enabled(void)
+{
+	return false;
+}
+
 static inline bool
 memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 {
diff --git a/mm/slab.c b/mm/slab.c
index c26ab9fbe1f5..bab6fec765a7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -87,7 +87,6 @@
  */
 
 #include	<linux/slab.h>
-#include	"slab.h"
 #include	<linux/mm.h>
 #include	<linux/poison.h>
 #include	<linux/swap.h>
@@ -128,6 +127,8 @@
 
 #include	"internal.h"
 
+#include	"slab.h"
+
 /*
  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
  *		  0 for faster, smaller code (especially in the critical paths).
@@ -3883,6 +3884,9 @@ EXPORT_SYMBOL(__kmalloc);
 void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 {
 	unsigned long flags;
+	cachep = cache_from_obj(cachep, objp);
+	if (!cachep)
+		return;
 
 	local_irq_save(flags);
 	debug_check_no_locks_freed(objp, cachep->object_size);
diff --git a/mm/slab.h b/mm/slab.h
index abe582d20c79..c95e922b166d 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -116,6 +116,13 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 	return (is_root_cache(cachep) && !memcg) ||
 				(cachep->memcg_params->memcg == memcg);
 }
+
+static inline bool slab_equal_or_root(struct kmem_cache *s,
+					struct kmem_cache *p)
+{
+	return (p == s) ||
+		(s->memcg_params && (p == s->memcg_params->root_cache));
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -127,5 +134,37 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 {
 	return true;
 }
+
+static inline bool slab_equal_or_root(struct kmem_cache *s,
+				      struct kmem_cache *p)
+{
+	return true;
+}
 #endif
+
+static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
+{
+	struct kmem_cache *cachep;
+	struct page *page;
+
+	/*
+	 * When kmemcg is not being used, both assignments should return the
+	 * same value. but we don't want to pay the assignment price in that
+	 * case. If it is not compiled in, the compiler should be smart enough
+	 * to not do even the assignment. In that case, slab_equal_or_root
+	 * will also be a constant.
+	 */
+	if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE))
+		return s;
+
+	page = virt_to_head_page(x);
+	cachep = page->slab_cache;
+	if (slab_equal_or_root(cachep, s))
+		return cachep;
+
+	pr_err("%s: Wrong slab cache. %s but object is from %s\n",
+		__FUNCTION__, cachep->name, s->name);
+	WARN_ON_ONCE(1);
+	return s;
+}
 #endif
diff --git a/mm/slob.c b/mm/slob.c
index 795bab7d391d..a99fdf7a0907 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -58,7 +58,6 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include "slab.h"
 
 #include <linux/mm.h>
 #include <linux/swap.h> /* struct reclaim_state */
@@ -73,6 +72,7 @@
 
 #include <linux/atomic.h>
 
+#include "slab.h"
 /*
  * slob_block has a field 'units', which indicates size of block if +ve,
  * or offset of next block if -ve (in SLOB_UNITs).
diff --git a/mm/slub.c b/mm/slub.c
index 985332b38852..6d5f2305d7a4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2611,19 +2611,10 @@ redo:
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
 {
-	struct page *page;
-
-	page = virt_to_head_page(x);
-
-	if (kmem_cache_debug(s) && page->slab_cache != s) {
-		pr_err("kmem_cache_free: Wrong slab cache. %s but object"
-			" is from  %s\n", page->slab_cache->name, s->name);
-		WARN_ON_ONCE(1);
+	s = cache_from_obj(s, x);
+	if (!s)
 		return;
-	}
-
-	slab_free(s, page, x, _RET_IP_);
-
+	slab_free(s, virt_to_head_page(x), x, _RET_IP_);
 	trace_kmem_cache_free(_RET_IP_, x);
 }
 EXPORT_SYMBOL(kmem_cache_free);
-- 
cgit v1.2.3


From d79923fad95b0cdf7770e024677180c734cb7148 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:48 -0800
Subject: sl[au]b: allocate objects from memcg cache

We are able to match a cache allocation to a particular memcg.  If the
task doesn't change groups during the allocation itself - a rare event,
this will give us a good picture about who is the first group to touch a
cache page.

This patch uses the now available infrastructure by calling
memcg_kmem_get_cache() before all the cache allocations.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 5 ++++-
 mm/memcontrol.c          | 3 +++
 mm/slab.c                | 6 +++++-
 mm/slub.c                | 7 ++++---
 4 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 961e72eab907..364ba6c9fe21 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -225,7 +225,10 @@ void *__kmalloc(size_t size, gfp_t flags);
 static __always_inline void *
 kmalloc_order(size_t size, gfp_t flags, unsigned int order)
 {
-	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+	void *ret;
+
+	flags |= (__GFP_COMP | __GFP_KMEMCG);
+	ret = (void *) __get_free_pages(flags, order);
 	kmemleak_alloc(ret, size, 1, flags);
 	return ret;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 65302a083d2f..cc13797d0fbc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3086,6 +3086,9 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
 	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
 				      (s->flags & ~SLAB_PANIC), s->ctor);
 
+	if (new)
+		new->allocflags |= __GFP_KMEMCG;
+
 	kfree(name);
 	return new;
 }
diff --git a/mm/slab.c b/mm/slab.c
index bab6fec765a7..e265865e8700 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1933,7 +1933,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	}
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
-	free_pages((unsigned long)addr, cachep->gfporder);
+	free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
@@ -3486,6 +3486,8 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
+	cachep = memcg_kmem_get_cache(cachep, flags);
+
 	cache_alloc_debugcheck_before(cachep, flags);
 	local_irq_save(save_flags);
 
@@ -3571,6 +3573,8 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 	if (slab_should_failslab(cachep, flags))
 		return NULL;
 
+	cachep = memcg_kmem_get_cache(cachep, flags);
+
 	cache_alloc_debugcheck_before(cachep, flags);
 	local_irq_save(save_flags);
 	objp = __do_cache_alloc(cachep, flags);
diff --git a/mm/slub.c b/mm/slub.c
index 6d5f2305d7a4..ef39e872b8eb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1405,7 +1405,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-	__free_pages(page, order);
+	__free_memcg_kmem_pages(page, order);
 }
 
 #define need_reserve_slab_rcu						\
@@ -2323,6 +2323,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 	if (slab_pre_alloc_hook(s, gfpflags))
 		return NULL;
 
+	s = memcg_kmem_get_cache(s, gfpflags);
 redo:
 
 	/*
@@ -3284,7 +3285,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 	struct page *page;
 	void *ptr = NULL;
 
-	flags |= __GFP_COMP | __GFP_NOTRACK;
+	flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
 	page = alloc_pages_node(node, flags, get_order(size));
 	if (page)
 		ptr = page_address(page);
@@ -3390,7 +3391,7 @@ void kfree(const void *x)
 	if (unlikely(!PageSlab(page))) {
 		BUG_ON(!PageCompound(page));
 		kmemleak_free(x);
-		__free_pages(page, compound_order(page));
+		__free_memcg_kmem_pages(page, compound_order(page));
 		return;
 	}
 	slab_free(page->slab_cache, page, object, _RET_IP_);
-- 
cgit v1.2.3


From 1f458cbf122288b23620ee822e19bcbb76c8d6ec Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:50 -0800
Subject: memcg: destroy memcg caches

Implement destruction of memcg caches.  Right now, only caches where our
reference counter is the last remaining are deleted.  If there are any
other reference counters around, we just leave the caches lying around
until they go away.

When that happens, a destruction function is called from the cache code.
Caches are only destroyed in process context, so we queue them up for
later processing in the general case.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  2 ++
 include/linux/slab.h       | 10 +++++++-
 mm/memcontrol.c            | 63 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/slab.c                  |  3 +++
 mm/slab.h                  | 23 +++++++++++++++++
 mm/slub.c                  |  7 +++++-
 6 files changed, 106 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2298122e71ad..79fcf0cd7186 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -453,6 +453,8 @@ void memcg_update_array_size(int num_groups);
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
  * @gfp: the gfp allocation flags.
diff --git a/include/linux/slab.h b/include/linux/slab.h
index c0fcf28c15b2..869efb8d2377 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -11,6 +11,8 @@
 
 #include <linux/gfp.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
+
 
 /*
  * Flags to pass to kmem_cache_create().
@@ -179,7 +181,6 @@ void kmem_cache_free(struct kmem_cache *, void *);
 #ifndef ARCH_SLAB_MINALIGN
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
-
 /*
  * This is the main placeholder for memcg-related information in kmem caches.
  * struct kmem_cache will hold a pointer to it, so the memory cost while
@@ -197,6 +198,10 @@ void kmem_cache_free(struct kmem_cache *, void *);
  * @memcg: pointer to the memcg this cache belongs to
  * @list: list_head for the list of all caches in this memcg
  * @root_cache: pointer to the global, root cache, this cache was derived from
+ * @dead: set to true after the memcg dies; the cache may still be around.
+ * @nr_pages: number of pages that belongs to this cache.
+ * @destroy: worker to be called whenever we are ready, or believe we may be
+ *           ready, to destroy this cache.
  */
 struct memcg_cache_params {
 	bool is_root_cache;
@@ -206,6 +211,9 @@ struct memcg_cache_params {
 			struct mem_cgroup *memcg;
 			struct list_head list;
 			struct kmem_cache *root_cache;
+			bool dead;
+			atomic_t nr_pages;
+			struct work_struct destroy;
 		};
 	};
 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cc13797d0fbc..270a36789859 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2779,6 +2779,19 @@ static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
 		(memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
 }
 
+/*
+ * This is a bit cumbersome, but it is rarely used and avoids a backpointer
+ * in the memcg_cache_params struct.
+ */
+static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
+{
+	struct kmem_cache *cachep;
+
+	VM_BUG_ON(p->is_root_cache);
+	cachep = p->root_cache;
+	return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
+}
+
 static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 {
 	struct res_counter *fail_res;
@@ -3056,6 +3069,31 @@ static inline void memcg_resume_kmem_account(void)
 	current->memcg_kmem_skip_account--;
 }
 
+static void kmem_cache_destroy_work_func(struct work_struct *w)
+{
+	struct kmem_cache *cachep;
+	struct memcg_cache_params *p;
+
+	p = container_of(w, struct memcg_cache_params, destroy);
+
+	cachep = memcg_params_to_cache(p);
+
+	if (!atomic_read(&cachep->memcg_params->nr_pages))
+		kmem_cache_destroy(cachep);
+}
+
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
+{
+	if (!cachep->memcg_params->dead)
+		return;
+
+	/*
+	 * We have to defer the actual destroying to a workqueue, because
+	 * we might currently be in a context that cannot sleep.
+	 */
+	schedule_work(&cachep->memcg_params->destroy);
+}
+
 static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
 {
 	char *name;
@@ -3125,6 +3163,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 
 	mem_cgroup_get(memcg);
 	new_cachep->memcg_params->root_cache = cachep;
+	atomic_set(&new_cachep->memcg_params->nr_pages , 0);
 
 	cachep->memcg_params->memcg_caches[idx] = new_cachep;
 	/*
@@ -3143,6 +3182,25 @@ struct create_work {
 	struct work_struct work;
 };
 
+static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+	struct kmem_cache *cachep;
+	struct memcg_cache_params *params;
+
+	if (!memcg_kmem_is_active(memcg))
+		return;
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
+		cachep = memcg_params_to_cache(params);
+		cachep->memcg_params->dead = true;
+		INIT_WORK(&cachep->memcg_params->destroy,
+			  kmem_cache_destroy_work_func);
+		schedule_work(&cachep->memcg_params->destroy);
+	}
+	mutex_unlock(&memcg->slab_caches_mutex);
+}
+
 static void memcg_create_cache_work_func(struct work_struct *w)
 {
 	struct create_work *cw;
@@ -3358,6 +3416,10 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
 	VM_BUG_ON(mem_cgroup_is_root(memcg));
 	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
 }
+#else
+static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -5975,6 +6037,7 @@ static void mem_cgroup_css_offline(struct cgroup *cont)
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
 	mem_cgroup_reparent_charges(memcg);
+	mem_cgroup_destroy_all_caches(memcg);
 }
 
 static void mem_cgroup_css_free(struct cgroup *cont)
diff --git a/mm/slab.c b/mm/slab.c
index e265865e8700..7467343f9fe7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1895,6 +1895,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 		if (page->pfmemalloc)
 			SetPageSlabPfmemalloc(page + i);
 	}
+	memcg_bind_pages(cachep, cachep->gfporder);
 
 	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
 		kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1931,6 +1932,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 		__ClearPageSlab(page);
 		page++;
 	}
+
+	memcg_release_pages(cachep, cachep->gfporder);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
 	free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
diff --git a/mm/slab.h b/mm/slab.h
index c95e922b166d..43d8a38b534f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -117,6 +117,21 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 				(cachep->memcg_params->memcg == memcg);
 }
 
+static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+{
+	if (!is_root_cache(s))
+		atomic_add(1 << order, &s->memcg_params->nr_pages);
+}
+
+static inline void memcg_release_pages(struct kmem_cache *s, int order)
+{
+	if (is_root_cache(s))
+		return;
+
+	if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
+		mem_cgroup_destroy_cache(s);
+}
+
 static inline bool slab_equal_or_root(struct kmem_cache *s,
 					struct kmem_cache *p)
 {
@@ -135,6 +150,14 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 	return true;
 }
 
+static inline void memcg_bind_pages(struct kmem_cache *s, int order)
+{
+}
+
+static inline void memcg_release_pages(struct kmem_cache *s, int order)
+{
+}
+
 static inline bool slab_equal_or_root(struct kmem_cache *s,
 				      struct kmem_cache *p)
 {
diff --git a/mm/slub.c b/mm/slub.c
index ef39e872b8eb..692177bebdf0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1344,6 +1344,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	void *start;
 	void *last;
 	void *p;
+	int order;
 
 	BUG_ON(flags & GFP_SLAB_BUG_MASK);
 
@@ -1352,7 +1353,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	if (!page)
 		goto out;
 
+	order = compound_order(page);
 	inc_slabs_node(s, page_to_nid(page), page->objects);
+	memcg_bind_pages(s, order);
 	page->slab_cache = s;
 	__SetPageSlab(page);
 	if (page->pfmemalloc)
@@ -1361,7 +1364,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	start = page_address(page);
 
 	if (unlikely(s->flags & SLAB_POISON))
-		memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
+		memset(start, POISON_INUSE, PAGE_SIZE << order);
 
 	last = start;
 	for_each_object(p, s, start, page->objects) {
@@ -1402,6 +1405,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 
 	__ClearPageSlabPfmemalloc(page);
 	__ClearPageSlab(page);
+
+	memcg_release_pages(s, order);
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-- 
cgit v1.2.3


From 7cf2798240a2a2230cb16a391beef98d8a7ad362 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:22:55 -0800
Subject: memcg/sl[au]b: track all the memcg children of a kmem_cache

This enables us to remove all the children of a kmem_cache being
destroyed, if for example the kernel module it's being used in gets
unloaded.  Otherwise, the children will still point to the destroyed
parent.

Signed-off-by: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 +++++
 mm/memcontrol.c            | 49 ++++++++++++++++++++++++++++++++++++++++++++--
 mm/slab_common.c           |  3 +++
 3 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 79fcf0cd7186..e119f3ef793c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -454,6 +454,7 @@ struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
 void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
@@ -601,6 +602,10 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	return cachep;
 }
+
+static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 270a36789859..4b68ec2c8df6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2772,6 +2772,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 	memcg_check_events(memcg, page);
 }
 
+static DEFINE_MUTEX(set_limit_mutex);
+
 #ifdef CONFIG_MEMCG_KMEM
 static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
 {
@@ -3176,6 +3178,51 @@ out:
 	return new_cachep;
 }
 
+void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+{
+	struct kmem_cache *c;
+	int i;
+
+	if (!s->memcg_params)
+		return;
+	if (!s->memcg_params->is_root_cache)
+		return;
+
+	/*
+	 * If the cache is being destroyed, we trust that there is no one else
+	 * requesting objects from it. Even if there are, the sanity checks in
+	 * kmem_cache_destroy should caught this ill-case.
+	 *
+	 * Still, we don't want anyone else freeing memcg_caches under our
+	 * noses, which can happen if a new memcg comes to life. As usual,
+	 * we'll take the set_limit_mutex to protect ourselves against this.
+	 */
+	mutex_lock(&set_limit_mutex);
+	for (i = 0; i < memcg_limited_groups_array_size; i++) {
+		c = s->memcg_params->memcg_caches[i];
+		if (!c)
+			continue;
+
+		/*
+		 * We will now manually delete the caches, so to avoid races
+		 * we need to cancel all pending destruction workers and
+		 * proceed with destruction ourselves.
+		 *
+		 * kmem_cache_destroy() will call kmem_cache_shrink internally,
+		 * and that could spawn the workers again: it is likely that
+		 * the cache still have active pages until this very moment.
+		 * This would lead us back to mem_cgroup_destroy_cache.
+		 *
+		 * But that will not execute at all if the "dead" flag is not
+		 * set, so flip it down to guarantee we are in control.
+		 */
+		c->memcg_params->dead = false;
+		cancel_delayed_work_sync(&c->memcg_params->destroy);
+		kmem_cache_destroy(c);
+	}
+	mutex_unlock(&set_limit_mutex);
+}
+
 struct create_work {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *cachep;
@@ -4284,8 +4331,6 @@ void mem_cgroup_print_bad_page(struct page *page)
 }
 #endif
 
-static DEFINE_MUTEX(set_limit_mutex);
-
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 				unsigned long long val)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1c424b6511bf..080a43804bf1 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -249,6 +249,9 @@ EXPORT_SYMBOL(kmem_cache_create);
 
 void kmem_cache_destroy(struct kmem_cache *s)
 {
+	/* Destroy all the children caches if we aren't a memcg cache */
+	kmem_cache_destroy_memcg_children(s);
+
 	get_online_cpus();
 	mutex_lock(&slab_mutex);
 	s->refcount--;
-- 
cgit v1.2.3


From 749c54151a6e5b229e4ae067dbc651e54b161fbc Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:23:01 -0800
Subject: memcg: aggregate memcg cache values in slabinfo

When we create caches in memcgs, we need to display their usage
information somewhere.  We'll adopt a scheme similar to /proc/meminfo,
with aggregate totals shown in the global file, and per-group information
stored in the group itself.

For the time being, only reads are allowed in the per-group cache.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  8 ++++++++
 include/linux/slab.h       |  4 ++++
 mm/memcontrol.c            | 30 +++++++++++++++++++++++++++++-
 mm/slab.h                  | 27 +++++++++++++++++++++++++++
 mm/slab_common.c           | 44 ++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 108 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e119f3ef793c..8dc7c746b44f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -420,6 +420,11 @@ static inline void sock_release_memcg(struct sock *sk)
 
 #ifdef CONFIG_MEMCG_KMEM
 extern struct static_key memcg_kmem_enabled_key;
+
+extern int memcg_limited_groups_array_size;
+#define for_each_memcg_cache_index(_idx)	\
+	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
+
 static inline bool memcg_kmem_enabled(void)
 {
 	return static_key_false(&memcg_kmem_enabled_key);
@@ -557,6 +562,9 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 	return __memcg_kmem_get_cache(cachep, gfp);
 }
 #else
+#define for_each_memcg_cache_index(_idx)	\
+	for (; NULL; )
+
 static inline bool memcg_kmem_enabled(void)
 {
 	return false;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 869efb8d2377..b9278663f22a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -220,6 +220,10 @@ struct memcg_cache_params {
 
 int memcg_update_all_caches(int num_memcgs);
 
+struct seq_file;
+int cache_show(struct kmem_cache *s, struct seq_file *m);
+void print_slabinfo_header(struct seq_file *m);
+
 /*
  * Common kmalloc functions provided by all allocators
  */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7633e0d429e0..a32d83c2e353 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -572,7 +572,8 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
  * increase it.
  */
 static DEFINE_IDA(kmem_limited_groups);
-static int memcg_limited_groups_array_size;
+int memcg_limited_groups_array_size;
+
 /*
  * MIN_SIZE is different than 1, because we would like to avoid going through
  * the alloc/free process all the time. In a small machine, 4 kmem-limited
@@ -2794,6 +2795,27 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
 	return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
 }
 
+#ifdef CONFIG_SLABINFO
+static int mem_cgroup_slabinfo_read(struct cgroup *cont, struct cftype *cft,
+					struct seq_file *m)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+	struct memcg_cache_params *params;
+
+	if (!memcg_can_account_kmem(memcg))
+		return -EIO;
+
+	print_slabinfo_header(m);
+
+	mutex_lock(&memcg->slab_caches_mutex);
+	list_for_each_entry(params, &memcg->memcg_slab_caches, list)
+		cache_show(memcg_params_to_cache(params), m);
+	mutex_unlock(&memcg->slab_caches_mutex);
+
+	return 0;
+}
+#endif
+
 static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 {
 	struct res_counter *fail_res;
@@ -5822,6 +5844,12 @@ static struct cftype mem_cgroup_files[] = {
 		.trigger = mem_cgroup_reset,
 		.read = mem_cgroup_read,
 	},
+#ifdef CONFIG_SLABINFO
+	{
+		.name = "kmem.slabinfo",
+		.read_seq_string = mem_cgroup_slabinfo_read,
+	},
+#endif
 #endif
 	{ },	/* terminate */
 };
diff --git a/mm/slab.h b/mm/slab.h
index 43d8a38b534f..ec5dae1c8e75 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -138,6 +138,23 @@ static inline bool slab_equal_or_root(struct kmem_cache *s,
 	return (p == s) ||
 		(s->memcg_params && (p == s->memcg_params->root_cache));
 }
+
+/*
+ * We use suffixes to the name in memcg because we can't have caches
+ * created in the system with the same name. But when we print them
+ * locally, better refer to them with the base name
+ */
+static inline const char *cache_name(struct kmem_cache *s)
+{
+	if (!is_root_cache(s))
+		return s->memcg_params->root_cache->name;
+	return s->name;
+}
+
+static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
+{
+	return s->memcg_params->memcg_caches[idx];
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -163,6 +180,16 @@ static inline bool slab_equal_or_root(struct kmem_cache *s,
 {
 	return true;
 }
+
+static inline const char *cache_name(struct kmem_cache *s)
+{
+	return s->name;
+}
+
+static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
+{
+	return NULL;
+}
 #endif
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 080a43804bf1..081f1b8d9a7b 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -322,7 +322,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
 
 
 #ifdef CONFIG_SLABINFO
-static void print_slabinfo_header(struct seq_file *m)
+void print_slabinfo_header(struct seq_file *m)
 {
 	/*
 	 * Output format version, so at least we can change it
@@ -366,16 +366,43 @@ static void s_stop(struct seq_file *m, void *p)
 	mutex_unlock(&slab_mutex);
 }
 
-static int s_show(struct seq_file *m, void *p)
+static void
+memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
+{
+	struct kmem_cache *c;
+	struct slabinfo sinfo;
+	int i;
+
+	if (!is_root_cache(s))
+		return;
+
+	for_each_memcg_cache_index(i) {
+		c = cache_from_memcg(s, i);
+		if (!c)
+			continue;
+
+		memset(&sinfo, 0, sizeof(sinfo));
+		get_slabinfo(c, &sinfo);
+
+		info->active_slabs += sinfo.active_slabs;
+		info->num_slabs += sinfo.num_slabs;
+		info->shared_avail += sinfo.shared_avail;
+		info->active_objs += sinfo.active_objs;
+		info->num_objs += sinfo.num_objs;
+	}
+}
+
+int cache_show(struct kmem_cache *s, struct seq_file *m)
 {
-	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
 	struct slabinfo sinfo;
 
 	memset(&sinfo, 0, sizeof(sinfo));
 	get_slabinfo(s, &sinfo);
 
+	memcg_accumulate_slabinfo(s, &sinfo);
+
 	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
-		   s->name, sinfo.active_objs, sinfo.num_objs, s->size,
+		   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
 		   sinfo.objects_per_slab, (1 << sinfo.cache_order));
 
 	seq_printf(m, " : tunables %4u %4u %4u",
@@ -387,6 +414,15 @@ static int s_show(struct seq_file *m, void *p)
 	return 0;
 }
 
+static int s_show(struct seq_file *m, void *p)
+{
+	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
+
+	if (!is_root_cache(s))
+		return 0;
+	return cache_show(s, m);
+}
+
 /*
  * slabinfo_op - iterator that generates /proc/slabinfo
  *
-- 
cgit v1.2.3


From 943a451a87d229ca564a27274b58eaeae35fde5d Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:23:03 -0800
Subject: slab: propagate tunable values

SLAB allows us to tune a particular cache behavior with tunables.  When
creating a new memcg cache copy, we'd like to preserve any tunables the
parent cache already had.

This could be done by an explicit call to do_tune_cpucache() after the
cache is created.  But this is not very convenient now that the caches are
created from common code, since this function is SLAB-specific.

Another method of doing that is taking advantage of the fact that
do_tune_cpucache() is always called from enable_cpucache(), which is
called at cache initialization.  We can just preset the values, and then
things work as expected.

It can also happen that a root cache has its tunables updated during
normal system operation.  In this case, we will propagate the change to
all caches that are already active.

This change will require us to move the assignment of root_cache in
memcg_params a bit earlier.  We need this to be already set - which
memcg_kmem_register_cache will do - when we reach __kmem_cache_create()

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  8 +++++---
 include/linux/slab.h       |  2 +-
 mm/memcontrol.c            | 10 ++++++----
 mm/slab.c                  | 44 +++++++++++++++++++++++++++++++++++++++++---
 mm/slab.h                  | 12 ++++++++++++
 mm/slab_common.c           |  7 ++++---
 6 files changed, 69 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8dc7c746b44f..ea02ff970836 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -448,7 +448,8 @@ void __memcg_kmem_commit_charge(struct page *page,
 void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
-int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+			 struct kmem_cache *root_cache);
 void memcg_release_cache(struct kmem_cache *cachep);
 void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
 
@@ -590,8 +591,9 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return -1;
 }
 
-static inline int memcg_register_cache(struct mem_cgroup *memcg,
-				       struct kmem_cache *s)
+static inline int
+memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+		     struct kmem_cache *root_cache)
 {
 	return 0;
 }
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b9278663f22a..5d168d7e0a28 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -130,7 +130,7 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			void (*)(void *));
 struct kmem_cache *
 kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t,
-			unsigned long, void (*)(void *));
+			unsigned long, void (*)(void *), struct kmem_cache *);
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a32d83c2e353..f3009b4bae51 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3012,7 +3012,8 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 	return 0;
 }
 
-int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
+int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
+			 struct kmem_cache *root_cache)
 {
 	size_t size = sizeof(struct memcg_cache_params);
 
@@ -3026,8 +3027,10 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 	if (!s->memcg_params)
 		return -ENOMEM;
 
-	if (memcg)
+	if (memcg) {
 		s->memcg_params->memcg = memcg;
+		s->memcg_params->root_cache = root_cache;
+	}
 	return 0;
 }
 
@@ -3186,7 +3189,7 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
 		return NULL;
 
 	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
-				      (s->flags & ~SLAB_PANIC), s->ctor);
+				      (s->flags & ~SLAB_PANIC), s->ctor, s);
 
 	if (new)
 		new->allocflags |= __GFP_KMEMCG;
@@ -3226,7 +3229,6 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 	}
 
 	mem_cgroup_get(memcg);
-	new_cachep->memcg_params->root_cache = cachep;
 	atomic_set(&new_cachep->memcg_params->nr_pages , 0);
 
 	cachep->memcg_params->memcg_caches[idx] = new_cachep;
diff --git a/mm/slab.c b/mm/slab.c
index 7467343f9fe7..4dcbf96a77b4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4041,7 +4041,7 @@ static void do_ccupdate_local(void *info)
 }
 
 /* Always called with the slab_mutex held */
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared, gfp_t gfp)
 {
 	struct ccupdate_struct *new;
@@ -4084,12 +4084,48 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 	return alloc_kmemlist(cachep, gfp);
 }
 
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+				int batchcount, int shared, gfp_t gfp)
+{
+	int ret;
+	struct kmem_cache *c = NULL;
+	int i = 0;
+
+	ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
+
+	if (slab_state < FULL)
+		return ret;
+
+	if ((ret < 0) || !is_root_cache(cachep))
+		return ret;
+
+	for_each_memcg_cache_index(i) {
+		c = cache_from_memcg(cachep, i);
+		if (c)
+			/* return value determined by the parent cache only */
+			__do_tune_cpucache(c, limit, batchcount, shared, gfp);
+	}
+
+	return ret;
+}
+
 /* Called with slab_mutex held always */
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	int err;
-	int limit, shared;
+	int limit = 0;
+	int shared = 0;
+	int batchcount = 0;
+
+	if (!is_root_cache(cachep)) {
+		struct kmem_cache *root = memcg_root_cache(cachep);
+		limit = root->limit;
+		shared = root->shared;
+		batchcount = root->batchcount;
+	}
 
+	if (limit && shared && batchcount)
+		goto skip_setup;
 	/*
 	 * The head array serves three purposes:
 	 * - create a LIFO ordering, i.e. return objects that are cache-warm
@@ -4131,7 +4167,9 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
 	if (limit > 32)
 		limit = 32;
 #endif
-	err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
+	batchcount = (limit + 1) / 2;
+skip_setup:
+	err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
 	if (err)
 		printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
 		       cachep->name, -err);
diff --git a/mm/slab.h b/mm/slab.h
index ec5dae1c8e75..34a98d642196 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -155,6 +155,13 @@ static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
 {
 	return s->memcg_params->memcg_caches[idx];
 }
+
+static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
+{
+	if (is_root_cache(s))
+		return s;
+	return s->memcg_params->root_cache;
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -190,6 +197,11 @@ static inline struct kmem_cache *cache_from_memcg(struct kmem_cache *s, int idx)
 {
 	return NULL;
 }
+
+static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
+{
+	return s;
+}
 #endif
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 081f1b8d9a7b..3f3cd97d3fdf 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -164,7 +164,8 @@ unsigned long calculate_alignment(unsigned long flags,
 
 struct kmem_cache *
 kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
-			size_t align, unsigned long flags, void (*ctor)(void *))
+			size_t align, unsigned long flags, void (*ctor)(void *),
+			struct kmem_cache *parent_cache)
 {
 	struct kmem_cache *s = NULL;
 	int err = 0;
@@ -193,7 +194,7 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size,
 		s->align = calculate_alignment(flags, align, size);
 		s->ctor = ctor;
 
-		if (memcg_register_cache(memcg, s)) {
+		if (memcg_register_cache(memcg, s, parent_cache)) {
 			kmem_cache_free(kmem_cache, s);
 			err = -ENOMEM;
 			goto out_locked;
@@ -243,7 +244,7 @@ struct kmem_cache *
 kmem_cache_create(const char *name, size_t size, size_t align,
 		  unsigned long flags, void (*ctor)(void *))
 {
-	return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor);
+	return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL);
 }
 EXPORT_SYMBOL(kmem_cache_create);
 
-- 
cgit v1.2.3


From 107dab5c92d5f9c3afe962036e47c207363255c7 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:23:05 -0800
Subject: slub: slub-specific propagation changes

SLUB allows us to tune a particular cache behavior with sysfs-based
tunables.  When creating a new memcg cache copy, we'd like to preserve any
tunables the parent cache already had.

This can be done by tapping into the store attribute function provided by
the allocator.  We of course don't need to mess with read-only fields.
Since the attributes can have multiple types and are stored internally by
sysfs, the best strategy is to issue a ->show() in the root cache, and
then ->store() in the memcg cache.

The drawback of that, is that sysfs can allocate up to a page in buffering
for show(), that we are likely not to need, but also can't guarantee.  To
avoid always allocating a page for that, we can update the caches at store
time with the maximum attribute size ever stored to the root cache.  We
will then get a buffer big enough to hold it.  The corolary to this, is
that if no stores happened, nothing will be propagated.

It can also happen that a root cache has its tunables updated during
normal system operation.  In this case, we will propagate the change to
all caches that are already active.

[akpm@linux-foundation.org: tweak code to avoid __maybe_unused]
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h |  1 +
 mm/slub.c                | 76 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 76 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 364ba6c9fe21..9db4825cd393 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -103,6 +103,7 @@ struct kmem_cache {
 #endif
 #ifdef CONFIG_MEMCG_KMEM
 	struct memcg_cache_params *memcg_params;
+	int max_attr_size; /* for propagation, maximum size of a stored attr */
 #endif
 
 #ifdef CONFIG_NUMA
diff --git a/mm/slub.c b/mm/slub.c
index 692177bebdf0..21c94d9695ec 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -201,13 +201,14 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
 static int sysfs_slab_add(struct kmem_cache *);
 static int sysfs_slab_alias(struct kmem_cache *, const char *);
 static void sysfs_slab_remove(struct kmem_cache *);
-
+static void memcg_propagate_slab_attrs(struct kmem_cache *s);
 #else
 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
 							{ return 0; }
 static inline void sysfs_slab_remove(struct kmem_cache *s) { }
 
+static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
 #endif
 
 static inline void stat(const struct kmem_cache *s, enum stat_item si)
@@ -3865,6 +3866,7 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
 	if (slab_state <= UP)
 		return 0;
 
+	memcg_propagate_slab_attrs(s);
 	mutex_unlock(&slab_mutex);
 	err = sysfs_slab_add(s);
 	mutex_lock(&slab_mutex);
@@ -5098,10 +5100,82 @@ static ssize_t slab_attr_store(struct kobject *kobj,
 		return -EIO;
 
 	err = attribute->store(s, buf, len);
+#ifdef CONFIG_MEMCG_KMEM
+	if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
+		int i;
+
+		mutex_lock(&slab_mutex);
+		if (s->max_attr_size < len)
+			s->max_attr_size = len;
 
+		for_each_memcg_cache_index(i) {
+			struct kmem_cache *c = cache_from_memcg(s, i);
+			/*
+			 * This function's return value is determined by the
+			 * parent cache only
+			 */
+			if (c)
+				attribute->store(c, buf, len);
+		}
+		mutex_unlock(&slab_mutex);
+	}
+#endif
 	return err;
 }
 
+static void memcg_propagate_slab_attrs(struct kmem_cache *s)
+{
+#ifdef CONFIG_MEMCG_KMEM
+	int i;
+	char *buffer = NULL;
+
+	if (!is_root_cache(s))
+		return;
+
+	/*
+	 * This mean this cache had no attribute written. Therefore, no point
+	 * in copying default values around
+	 */
+	if (!s->max_attr_size)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
+		char mbuf[64];
+		char *buf;
+		struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+
+		if (!attr || !attr->store || !attr->show)
+			continue;
+
+		/*
+		 * It is really bad that we have to allocate here, so we will
+		 * do it only as a fallback. If we actually allocate, though,
+		 * we can just use the allocated buffer until the end.
+		 *
+		 * Most of the slub attributes will tend to be very small in
+		 * size, but sysfs allows buffers up to a page, so they can
+		 * theoretically happen.
+		 */
+		if (buffer)
+			buf = buffer;
+		else if (s->max_attr_size < ARRAY_SIZE(mbuf))
+			buf = mbuf;
+		else {
+			buffer = (char *) get_zeroed_page(GFP_KERNEL);
+			if (WARN_ON(!buffer))
+				continue;
+			buf = buffer;
+		}
+
+		attr->show(s->memcg_params->root_cache, buf);
+		attr->store(s, buf, strlen(buf));
+	}
+
+	if (buffer)
+		free_page((unsigned long)buffer);
+#endif
+}
+
 static const struct sysfs_ops slab_sysfs_ops = {
 	.show = slab_attr_show,
 	.store = slab_attr_store,
-- 
cgit v1.2.3


From ebe945c27628fca03723582eba138acc2e2f3d15 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Tue, 18 Dec 2012 14:23:10 -0800
Subject: memcg: add comments clarifying aspects of cache attribute propagation

This patch clarifies two aspects of cache attribute propagation.

First, the expected context for the for_each_memcg_cache macro in
memcontrol.h.  The usages already in the codebase are safe.  In mm/slub.c,
it is trivially safe because the lock is acquired right before the loop.
In mm/slab.c, it is less so: the lock is acquired by an outer function a
few steps back in the stack, so a VM_BUG_ON() is added to make sure it is
indeed safe.

A comment is also added to detail why we are returning the value of the
parent cache and ignoring the children's when we propagate the attributes.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++++++
 mm/slab.c                  |  1 +
 mm/slub.c                  | 21 +++++++++++++++++----
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ea02ff970836..0108a56f814e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -422,6 +422,12 @@ static inline void sock_release_memcg(struct sock *sk)
 extern struct static_key memcg_kmem_enabled_key;
 
 extern int memcg_limited_groups_array_size;
+
+/*
+ * Helper macro to loop through all memcg-specific caches. Callers must still
+ * check if the cache is valid (it is either valid or NULL).
+ * the slab_mutex must be held when looping through those caches
+ */
 #define for_each_memcg_cache_index(_idx)	\
 	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
 
diff --git a/mm/slab.c b/mm/slab.c
index 4dcbf96a77b4..e7667a3584bc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4099,6 +4099,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 	if ((ret < 0) || !is_root_cache(cachep))
 		return ret;
 
+	VM_BUG_ON(!mutex_is_locked(&slab_mutex));
 	for_each_memcg_cache_index(i) {
 		c = cache_from_memcg(cachep, i);
 		if (c)
diff --git a/mm/slub.c b/mm/slub.c
index 21c94d9695ec..efe2cffc29b0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5108,12 +5108,25 @@ static ssize_t slab_attr_store(struct kobject *kobj,
 		if (s->max_attr_size < len)
 			s->max_attr_size = len;
 
+		/*
+		 * This is a best effort propagation, so this function's return
+		 * value will be determined by the parent cache only. This is
+		 * basically because not all attributes will have a well
+		 * defined semantics for rollbacks - most of the actions will
+		 * have permanent effects.
+		 *
+		 * Returning the error value of any of the children that fail
+		 * is not 100 % defined, in the sense that users seeing the
+		 * error code won't be able to know anything about the state of
+		 * the cache.
+		 *
+		 * Only returning the error code for the parent cache at least
+		 * has well defined semantics. The cache being written to
+		 * directly either failed or succeeded, in which case we loop
+		 * through the descendants with best-effort propagation.
+		 */
 		for_each_memcg_cache_index(i) {
 			struct kmem_cache *c = cache_from_memcg(s, i);
-			/*
-			 * This function's return value is determined by the
-			 * parent cache only
-			 */
 			if (c)
 				attribute->store(c, buf, len);
 		}
-- 
cgit v1.2.3


From 7179e7bf4592ac5a7b30257a7df6259ee81e51da Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@huawei.com>
Date: Tue, 18 Dec 2012 14:23:19 -0800
Subject: mm/hugetlb: create hugetlb cgroup file in hugetlb_init

Build kernel with CONFIG_HUGETLBFS=y,CONFIG_HUGETLB_PAGE=y and
CONFIG_CGROUP_HUGETLB=y, then specify hugepagesz=xx boot option, system
will fail to boot.

This failure is caused by following code path:

  setup_hugepagesz
    hugetlb_add_hstate
      hugetlb_cgroup_file_init
        cgroup_add_cftypes
          kzalloc <--slab is *not available* yet

For this path, slab is not available yet, so memory allocated will be
failed, and cause WARN_ON() in hugetlb_cgroup_file_init().

So I move hugetlb_cgroup_file_init() into hugetlb_init().

[akpm@linux-foundation.org: tweak coding-style, remove pointless __init on inlined function]
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h |  5 ++---
 mm/hugetlb.c                   | 11 +----------
 mm/hugetlb_cgroup.c            | 19 +++++++++++++++++--
 3 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index d73878c694b3..ce8217f7b5c2 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -62,7 +62,7 @@ extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 					 struct page *page);
 extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 					   struct hugetlb_cgroup *h_cg);
-extern int hugetlb_cgroup_file_init(int idx) __init;
+extern void hugetlb_cgroup_file_init(void) __init;
 extern void hugetlb_cgroup_migrate(struct page *oldhpage,
 				   struct page *newhpage);
 
@@ -111,9 +111,8 @@ hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 	return;
 }
 
-static inline int __init hugetlb_cgroup_file_init(int idx)
+static inline void hugetlb_cgroup_file_init(void)
 {
-	return 0;
 }
 
 static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e5318c7793ae..4f3ea0b1e57c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1906,14 +1906,12 @@ static int __init hugetlb_init(void)
 		default_hstate.max_huge_pages = default_hstate_max_huge_pages;
 
 	hugetlb_init_hstates();
-
 	gather_bootmem_prealloc();
-
 	report_hugepages();
 
 	hugetlb_sysfs_init();
-
 	hugetlb_register_all_nodes();
+	hugetlb_cgroup_file_init();
 
 	return 0;
 }
@@ -1943,13 +1941,6 @@ void __init hugetlb_add_hstate(unsigned order)
 	h->next_nid_to_free = first_node(node_states[N_MEMORY]);
 	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
 					huge_page_size(h)/1024);
-	/*
-	 * Add cgroup control files only if the huge page consists
-	 * of more than two normal pages. This is because we use
-	 * page[2].lru.next for storing cgoup details.
-	 */
-	if (order >= HUGETLB_CGROUP_MIN_ORDER)
-		hugetlb_cgroup_file_init(hugetlb_max_hstate - 1);
 
 	parsed_hstate = h;
 }
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index b5bde7a5c017..9cea7de22ffb 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -333,7 +333,7 @@ static char *mem_fmt(char *buf, int size, unsigned long hsize)
 	return buf;
 }
 
-int __init hugetlb_cgroup_file_init(int idx)
+static void __init __hugetlb_cgroup_file_init(int idx)
 {
 	char buf[32];
 	struct cftype *cft;
@@ -375,7 +375,22 @@ int __init hugetlb_cgroup_file_init(int idx)
 
 	WARN_ON(cgroup_add_cftypes(&hugetlb_subsys, h->cgroup_files));
 
-	return 0;
+	return;
+}
+
+void __init hugetlb_cgroup_file_init(void)
+{
+	struct hstate *h;
+
+	for_each_hstate(h) {
+		/*
+		 * Add cgroup control files only if the huge page consists
+		 * of more than two normal pages. This is because we use
+		 * page[2].lru.next for storing cgroup details.
+		 */
+		if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
+			__hugetlb_cgroup_file_init(hstate_index(h));
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 59771079c18c44e39106f0f30054025acafadb41 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 19 Dec 2012 07:18:35 -0800
Subject: blk: avoid divide-by-zero with zero discard granularity

Commit 8dd2cb7e880d ("block: discard granularity might not be power of
2") changed a couple of 'binary and' operations into modulus operations.
Which turned the harmless case of a zero discard_granularity into a
possible divide-by-zero.

The code also had a much more subtle bug: it was doing the modulus of a
value in bytes using 'sector_t'.  That was always conceptually wrong,
but didn't actually matter back when the code assumed a power-of-two
granularity: we only looked at the low bits anyway.

But with potentially arbitrary sector numbers, using a 'sector_t' to
express bytes is very very wrong: depending on configuration it limits
the starting offset of the device to just 32 bits, and any overflow
would result in a wrong value if the modulus wasn't a power-of-two.

So re-write the code to not only protect against the divide-by-zero, but
to do the starting sector arithmetic in sectors, and using the proper
types.

[ For any mathematicians out there: it also looks monumentally stupid to
  do the 'modulo granularity' operation *twice*, never mind having a "+
  granularity" in the second modulus op.

  But that's the easiest way to avoid negative values or overflow, and
  it is how the original code was done. ]

Reported-by: Ingo Molnar <mingo@kernel.org>
Reported-by: Doug Anderson <dianders@chromium.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Shaohua Li <shli@fusionio.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index acb4f7bbbd32..f94bc83011ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1188,14 +1188,25 @@ static inline int queue_discard_alignment(struct request_queue *q)
 
 static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
 {
-	sector_t alignment = sector << 9;
-	alignment = sector_div(alignment, lim->discard_granularity);
+	unsigned int alignment, granularity, offset;
 
 	if (!lim->max_discard_sectors)
 		return 0;
 
-	alignment = lim->discard_granularity + lim->discard_alignment - alignment;
-	return sector_div(alignment, lim->discard_granularity);
+	/* Why are these in bytes, not sectors? */
+	alignment = lim->discard_alignment >> 9;
+	granularity = lim->discard_granularity >> 9;
+	if (!granularity)
+		return 0;
+
+	/* Offset of the partition start in 'granularity' sectors */
+	offset = sector_div(sector, granularity);
+
+	/* And why do we do this modulus *again* in blkdev_issue_discard()? */
+	offset = (granularity + alignment - offset) % granularity;
+
+	/* Turn it back into bytes, gaah */
+	return offset << 9;
 }
 
 static inline int bdev_discard_alignment(struct block_device *bdev)
-- 
cgit v1.2.3


From ab28698d33af05abab0bcf8021eafe38f7434f24 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jogo@openwrt.org>
Date: Wed, 19 Dec 2012 09:10:09 -0600
Subject: of: define struct device in of_platform.h if !OF_DEVICE and
 !OF_ADDRESS

Fixes the following warning:

include/linux/of_platform.h:106:13: warning: 'struct device' declared
inside parameter list [enabled by default]
include/linux/of_platform.h:106:13: warning: its scope is only this
definition or declaration, which is probably not what you want [enabled
by default]

Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Signed-off-by: Rob Herring <robherring2@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of_platform.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index b47d2040c9f2..3863a4dbdf18 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -100,6 +100,7 @@ extern int of_platform_populate(struct device_node *root,
 
 #if !defined(CONFIG_OF_ADDRESS)
 struct of_dev_auxdata;
+struct device;
 static inline int of_platform_populate(struct device_node *root,
 					const struct of_device_id *matches,
 					const struct of_dev_auxdata *lookup,
-- 
cgit v1.2.3


From 3c439b5586e9200f7e6287ee77c175c4d5b0eeed Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 6 Dec 2012 17:12:00 +0000
Subject: mlx4_core: Allow choosing flow steering mode

Device managed flow steering will be enabled only under administrator
directive provided through setting the existing module parameter
log_num_mgm_entry_size to -1 (if the device actually supports flow
steering).  If flow steering isn't requested or not available, the
driver will use the value of log_num_mgm_entry_size and B0 steering.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 52 ++++++++++++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx4/mcg.c  |  7 +----
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  6 ++--
 include/linux/mlx4/device.h               |  1 +
 4 files changed, 50 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 680d81026cbd..e1bafffbc3b1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -85,15 +85,15 @@ static int probe_vf;
 module_param(probe_vf, int, 0644);
 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)");
 
-int mlx4_log_num_mgm_entry_size = 10;
+int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
 module_param_named(log_num_mgm_entry_size,
 			mlx4_log_num_mgm_entry_size, int, 0444);
 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
 					 " of qp per mcg, for example:"
-					 " 10 gives 248.range: 9<="
+					 " 10 gives 248.range: 7 <="
 					 " log_num_mgm_entry_size <= 12."
-					 " Not in use with device managed"
-					 " flow steering");
+					 " To activate device managed"
+					 " flow steering when available, set to -1");
 
 static bool enable_64b_cqe_eqe;
 module_param(enable_64b_cqe_eqe, bool, 0444);
@@ -1318,12 +1318,30 @@ static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
 	}
 }
 
+static int choose_log_fs_mgm_entry_size(int qp_per_entry)
+{
+	int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
+
+	for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
+	      i++) {
+		if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
+			break;
+	}
+
+	return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
+}
+
 static void choose_steering_mode(struct mlx4_dev *dev,
 				 struct mlx4_dev_cap *dev_cap)
 {
-	if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
+	if (mlx4_log_num_mgm_entry_size == -1 &&
+	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
 	    (!mlx4_is_mfunc(dev) ||
-	     (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1)))) {
+	     (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) &&
+	    choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
+		MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
+		dev->oper_log_mgm_entry_size =
+			choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
 		dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
 		dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
 		dev->caps.fs_log_max_ucast_qp_range_size =
@@ -1340,10 +1358,17 @@ static void choose_steering_mode(struct mlx4_dev *dev,
 				mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
 					  "set to use B0 steering. Falling back to A0 steering mode.\n");
 		}
+		dev->oper_log_mgm_entry_size =
+			mlx4_log_num_mgm_entry_size > 0 ?
+			mlx4_log_num_mgm_entry_size :
+			MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
 		dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
 	}
-	mlx4_dbg(dev, "Steering mode is: %s\n",
-		 mlx4_steering_mode_str(dev->caps.steering_mode));
+	mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
+		 "modparam log_num_mgm_entry_size = %d\n",
+		 mlx4_steering_mode_str(dev->caps.steering_mode),
+		 dev->oper_log_mgm_entry_size,
+		 mlx4_log_num_mgm_entry_size);
 }
 
 static int mlx4_init_hca(struct mlx4_dev *dev)
@@ -2479,6 +2504,17 @@ static int __init mlx4_verify_params(void)
 		port_type_array[0] = true;
 	}
 
+	if (mlx4_log_num_mgm_entry_size != -1 &&
+	    (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+	     mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
+		pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
+			   "in legal range (-1 or %d..%d)\n",
+			   mlx4_log_num_mgm_entry_size,
+			   MLX4_MIN_MGM_LOG_ENTRY_SIZE,
+			   MLX4_MAX_MGM_LOG_ENTRY_SIZE);
+		return -1;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index e151c21baf2b..1ee4db3c6400 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -54,12 +54,7 @@ struct mlx4_mgm {
 
 int mlx4_get_mgm_entry_size(struct mlx4_dev *dev)
 {
-	if (dev->caps.steering_mode ==
-	    MLX4_STEERING_MODE_DEVICE_MANAGED)
-		return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE;
-	else
-		return min((1 << mlx4_log_num_mgm_entry_size),
-			   MLX4_MAX_MGM_ENTRY_SIZE);
+	return 1 << dev->oper_log_mgm_entry_size;
 }
 
 int mlx4_get_qp_per_mgm(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 1cf42036d7bb..116c5c29d2d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -94,8 +94,10 @@ enum {
 };
 
 enum {
-	MLX4_MAX_MGM_ENTRY_SIZE = 0x1000,
-	MLX4_MAX_QP_PER_MGM	= 4 * (MLX4_MAX_MGM_ENTRY_SIZE / 16 - 2),
+	MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10,
+	MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7,
+	MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12,
+	MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2),
 	MLX4_MTT_ENTRY_PER_SEG	= 8,
 };
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 21821da2abfd..20ea939c22a6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -625,6 +625,7 @@ struct mlx4_dev {
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
 	int			num_vfs;
+	int			oper_log_mgm_entry_size;
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 };
-- 
cgit v1.2.3


From a1c088e01b71d90852b0df5a77cdae46bd0e0c05 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 18 Dec 2012 04:45:29 +0000
Subject: usbnet: handle PM failure gracefully

If a device fails to do remote wakeup, this is no reason
to abort an open totally. This patch just continues without
runtime PM.

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 15 ++++++++-------
 include/linux/usb/usbnet.h |  1 +
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index c04110ba677f..50ed7ab09c9f 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -719,7 +719,8 @@ int usbnet_stop (struct net_device *net)
 	dev->flags = 0;
 	del_timer_sync (&dev->delay);
 	tasklet_kill (&dev->bh);
-	if (info->manage_power)
+	if (info->manage_power &&
+	    !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags))
 		info->manage_power(dev, 0);
 	else
 		usb_autopm_put_interface(dev->intf);
@@ -794,14 +795,14 @@ int usbnet_open (struct net_device *net)
 	tasklet_schedule (&dev->bh);
 	if (info->manage_power) {
 		retval = info->manage_power(dev, 1);
-		if (retval < 0)
-			goto done_manage_power_error;
-		usb_autopm_put_interface(dev->intf);
+		if (retval < 0) {
+			retval = 0;
+			set_bit(EVENT_NO_RUNTIME_PM, &dev->flags);
+		} else {
+			usb_autopm_put_interface(dev->intf);
+		}
 	}
 	return retval;
-
-done_manage_power_error:
-	clear_bit(EVENT_DEV_OPEN, &dev->flags);
 done:
 	usb_autopm_put_interface(dev->intf);
 done_nopm:
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 9bbeabf66c54..288b32aadab2 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -69,6 +69,7 @@ struct usbnet {
 #		define EVENT_DEV_ASLEEP 6
 #		define EVENT_DEV_OPEN	7
 #		define EVENT_DEVICE_REPORT_IDLE	8
+#		define EVENT_NO_RUNTIME_PM	9
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
cgit v1.2.3


From 2dd7c8cf29769f6b66f26b501db2364640c2c9d0 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oliver@neukum.org>
Date: Tue, 18 Dec 2012 04:45:52 +0000
Subject: usbnet: generic manage_power()

Centralise common code for manage_power() in usbnet
by making a generic simple implementation

Signed-off-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 10 ++++++++++
 include/linux/usb/usbnet.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 50ed7ab09c9f..3d4bf01641b4 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1616,6 +1616,16 @@ void usbnet_device_suggests_idle(struct usbnet *dev)
 }
 EXPORT_SYMBOL(usbnet_device_suggests_idle);
 
+/*
+ * For devices that can do without special commands
+ */
+int usbnet_manage_power(struct usbnet *dev, int on)
+{
+	dev->intf->needs_remote_wakeup = on;
+	return 0;
+}
+EXPORT_SYMBOL(usbnet_manage_power);
+
 /*-------------------------------------------------------------------------*/
 static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 			     u16 value, u16 index, void *data, u16 size)
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 288b32aadab2..bd45eb7bedc8 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -241,4 +241,6 @@ extern void usbnet_set_msglevel(struct net_device *, u32);
 extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 extern int usbnet_nway_reset(struct net_device *net);
 
+extern int usbnet_manage_power(struct usbnet *, int);
+
 #endif /* __LINUX_USB_USBNET_H */
-- 
cgit v1.2.3


From cf13a84d174947df4bb809edfb4887393642303e Mon Sep 17 00:00:00 2001
From: Fabio Porcedda <fabio.porcedda@gmail.com>
Date: Fri, 5 Oct 2012 12:16:09 +0200
Subject: watchdog: WatchDog Timer Driver Core: fix comment

Signed-off-by: Fabio Porcedda <fabio.porcedda@gmail.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 87490ac4bd87..3a9df2f43be6 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -129,7 +129,7 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
 	return wdd->driver_data;
 }
 
-/* drivers/watchdog/core/watchdog_core.c */
+/* drivers/watchdog/watchdog_core.c */
 extern int watchdog_register_device(struct watchdog_device *);
 extern void watchdog_unregister_device(struct watchdog_device *);
 
-- 
cgit v1.2.3


From 468366138850f20543f1d4878028900672b23dae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 23 Nov 2012 09:12:59 -0500
Subject: COMPAT_SYSCALL_DEFINE: infrastructure

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/s390/include/asm/compat.h |  3 +++
 include/linux/compat.h         | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 18cd6b592650..f8c6df6cd1f0 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -7,6 +7,9 @@
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
+#define __SC_DELOUSE(t,v) (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v))
+
 #define PSW32_MASK_PER		0x40000000UL
 #define PSW32_MASK_DAT		0x04000000UL
 #define PSW32_MASK_IO		0x02000000UL
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 784ebfe63c48..a7877fa809fd 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -23,6 +23,48 @@
 #define COMPAT_USE_64BIT_TIME 0
 #endif
 
+#ifndef __SC_DELOUSE
+#define __SC_DELOUSE(t,v) ((t)(unsigned long)(v))
+#endif
+
+#define __SC_CCAST1(t1, a1)      __SC_DELOUSE(t1,a1)
+#define __SC_CCAST2(t2, a2, ...) __SC_DELOUSE(t2,a2), __SC_CCAST1(__VA_ARGS__)
+#define __SC_CCAST3(t3, a3, ...) __SC_DELOUSE(t3,a3), __SC_CCAST2(__VA_ARGS__)
+#define __SC_CCAST4(t4, a4, ...) __SC_DELOUSE(t4,a4), __SC_CCAST3(__VA_ARGS__)
+#define __SC_CCAST5(t5, a5, ...) __SC_DELOUSE(t5,a5), __SC_CCAST4(__VA_ARGS__)
+#define __SC_CCAST6(t6, a6, ...) __SC_DELOUSE(t6,a6), __SC_CCAST5(__VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE1(name, ...) \
+        COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE2(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE3(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE4(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE5(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_DEFINE6(name, ...) \
+	COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
+
+#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
+	asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__));	\
+	static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__));	\
+	asmlinkage long compat_SyS##name(__SC_LONG##x(__VA_ARGS__))	\
+	{								\
+		return (long) C_SYSC##name(__SC_CCAST##x(__VA_ARGS__));	\
+	}								\
+	SYSCALL_ALIAS(compat_sys##name, compat_SyS##name);		\
+	static inline long C_SYSC##name(__SC_DECL##x(__VA_ARGS__))
+
+#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
+	asmlinkage long compat_sys##name(__SC_DECL##x(__VA_ARGS__))
+
+#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
-- 
cgit v1.2.3


From ae903caae267154de7cf8576b130ff474630596b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Dec 2012 12:44:11 -0500
Subject: Bury the conditionals from kernel_thread/kernel_execve series

All architectures have
	CONFIG_GENERIC_KERNEL_THREAD
	CONFIG_GENERIC_KERNEL_EXECVE
	__ARCH_WANT_SYS_EXECVE
None of them have __ARCH_WANT_KERNEL_EXECVE and there are only two callers
of kernel_execve() (which is a trivial wrapper for do_execve() now) left.
Kill the conditionals and make both callers use do_execve().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig                             |  6 ------
 arch/alpha/Kconfig                       |  2 --
 arch/alpha/include/asm/unistd.h          |  1 -
 arch/arm/Kconfig                         |  2 --
 arch/arm/include/asm/unistd.h            |  1 -
 arch/arm64/Kconfig                       |  2 --
 arch/arm64/include/asm/unistd.h          |  1 -
 arch/avr32/Kconfig                       |  2 --
 arch/avr32/include/asm/unistd.h          |  1 -
 arch/blackfin/Kconfig                    |  2 --
 arch/blackfin/include/asm/unistd.h       |  1 -
 arch/c6x/Kconfig                         |  2 --
 arch/c6x/include/uapi/asm/unistd.h       |  1 -
 arch/cris/Kconfig                        |  2 --
 arch/cris/include/asm/unistd.h           |  1 -
 arch/frv/Kconfig                         |  2 --
 arch/frv/include/asm/unistd.h            |  1 -
 arch/h8300/Kconfig                       |  2 --
 arch/h8300/include/asm/unistd.h          |  1 -
 arch/hexagon/Kconfig                     |  2 --
 arch/hexagon/include/uapi/asm/unistd.h   |  1 -
 arch/ia64/Kconfig                        |  2 --
 arch/ia64/include/asm/unistd.h           |  1 -
 arch/m32r/Kconfig                        |  2 --
 arch/m32r/include/asm/unistd.h           |  1 -
 arch/m68k/Kconfig                        |  2 --
 arch/m68k/include/asm/unistd.h           |  1 -
 arch/microblaze/Kconfig                  |  2 --
 arch/microblaze/include/asm/unistd.h     |  1 -
 arch/mips/Kconfig                        |  2 --
 arch/mips/include/asm/unistd.h           |  1 -
 arch/mn10300/Kconfig                     |  2 --
 arch/mn10300/include/asm/unistd.h        |  1 -
 arch/openrisc/Kconfig                    |  2 --
 arch/openrisc/include/uapi/asm/unistd.h  |  1 -
 arch/parisc/Kconfig                      |  2 --
 arch/parisc/include/asm/unistd.h         |  1 -
 arch/powerpc/Kconfig                     |  2 --
 arch/powerpc/include/asm/unistd.h        |  1 -
 arch/s390/Kconfig                        |  2 --
 arch/s390/include/asm/unistd.h           |  1 -
 arch/score/Kconfig                       |  2 --
 arch/score/include/asm/unistd.h          |  1 -
 arch/sh/Kconfig                          |  2 --
 arch/sh/include/asm/unistd.h             |  1 -
 arch/sparc/Kconfig                       |  2 --
 arch/sparc/include/asm/unistd.h          |  1 -
 arch/tile/Kconfig                        |  2 --
 arch/tile/include/asm/unistd.h           |  1 -
 arch/unicore32/Kconfig                   |  2 --
 arch/unicore32/include/uapi/asm/unistd.h |  1 -
 arch/x86/Kconfig                         |  2 --
 arch/x86/include/asm/unistd.h            |  1 -
 arch/x86/um/Kconfig                      |  2 --
 arch/xtensa/Kconfig                      |  2 --
 arch/xtensa/include/asm/unistd.h         |  1 -
 fs/exec.c                                | 21 ---------------------
 include/linux/binfmts.h                  |  4 ----
 include/linux/sched.h                    |  2 --
 include/linux/syscalls.h                 |  9 ---------
 init/main.c                              |  4 +++-
 kernel/fork.c                            |  2 --
 kernel/kmod.c                            |  6 +++---
 63 files changed, 6 insertions(+), 131 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 8d698fb5ccc9..0a8dd0585d0d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -271,12 +271,6 @@ config ARCH_WANT_OLD_COMPAT_IPC
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	bool
 
-config GENERIC_KERNEL_THREAD
-	bool
-
-config GENERIC_KERNEL_EXECVE
-	bool
-
 config HAVE_ARCH_SECCOMP_FILTER
 	bool
 	help
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 5dd7f5db24d4..7e3710c0cce5 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -20,8 +20,6 @@ config ALPHA
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	help
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index eb3a4664ced2..68c75f2fadb9 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -481,7 +481,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8918a2dd89b4..b789654e7e2f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -11,8 +11,6 @@ config ARM
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 7cd13cc62624..21a2700d2957 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -41,7 +41,6 @@
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_SOCKETCALL
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4b03c56ec329..a846029bebcc 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -7,8 +7,6 @@ config ARM64
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
-	select GENERIC_KERNEL_EXECVE
-	select GENERIC_KERNEL_THREAD
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index d69aeea6da1e..738322945d1a 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -27,6 +27,5 @@
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 649aeb9acecb..06e73bf665e9 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -17,8 +17,6 @@ config AVR32
 	select GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index f05a9804e8e2..0bdf6371574e 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h
@@ -39,7 +39,6 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index ab9ff4075f4d..b6f3ad5441c5 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -45,8 +45,6 @@ config BLACKFIN
 	select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index 460514a1a4e1..38711e28baac 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -446,7 +446,6 @@
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_VFORK
 
 /*
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 66eab3703c75..f6a3648f5ec3 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -17,8 +17,6 @@ config C6X
 	select OF
 	select OF_EARLY_FLATTREE
 	select GENERIC_CLOCKEVENTS
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_RELA
 
 config MMU
diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h
index f3987a8703d9..e7d09a614d10 100644
--- a/arch/c6x/include/uapi/asm/unistd.h
+++ b/arch/c6x/include/uapi/asm/unistd.h
@@ -14,7 +14,6 @@
  *   more details.
  */
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 
 /* Use the standard ABI for syscalls. */
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 0cac6a49f230..c59a01dd9c0c 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -49,8 +49,6 @@ config CRIS
 	select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
 	select GENERIC_CMOS_UPDATE
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS2
 
 config HZ
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index f27b542e0ebc..5cda75a9cc1e 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -371,7 +371,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index df2eb4bd9fa2..9d262645f667 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -12,8 +12,6 @@ config FRV
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CPU_DEVICES
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config ZONE_DMA
 	bool
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index 1807d8ea8cb5..d685da17f5fb 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -29,7 +29,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 04bef4d25b4a..98fabd10e95f 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -8,8 +8,6 @@ config H8300
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SYMBOL_PREFIX
 	string
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index c2c2f5c7d6bf..566f94860c45 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -356,7 +356,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index e418803b6c8e..0744f7d7b1fd 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -31,8 +31,6 @@ config HEXAGON
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index 2af81533bd0f..4a87cc47075c 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -27,7 +27,6 @@
  */
 
 #define sys_mmap2 sys_mmap_pgoff
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 
 #include <asm-generic/unistd.h>
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 670600468128..3279646120e3 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -42,8 +42,6 @@ config IA64
 	select GENERIC_TIME_VSYSCALL_OLD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 1574bca86138..8b3ff2f5b861 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -29,7 +29,6 @@
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 
 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
 
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 5183f43a2cf7..f807721e19a5 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -15,8 +15,6 @@ config M32R
 	select GENERIC_ATOMIC64
 	select ARCH_USES_GETTIMEOFFSET
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SBUS
 	bool
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index d9e7351af2a4..cbfa39158fef 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -352,7 +352,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 953a7ba5d050..6710084e072a 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -15,8 +15,6 @@ config M68K
 	select FPU if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index a021d67cdd72..847994ce6804 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -31,7 +31,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 4bcf89148f3c..ba3b7c8c04b8 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -26,8 +26,6 @@ config MICROBLAZE
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 config SWAP
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 94d978986b75..38cabf4db548 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -422,7 +422,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
 #ifdef CONFIG_MMU
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4183e62f178c..dba9390d37cf 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -40,8 +40,6 @@ config MIPS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA if 64BIT
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 menu "Machine selection"
 
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index b306e2081cad..9e47cc11aa26 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -20,7 +20,6 @@
 #define __ARCH_OMIT_COMPAT_SYS_GETDENTS64
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 72471744a912..aa03f2e13385 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -8,8 +8,6 @@ config MN10300
 	select HAVE_ARCH_KGDB
 	select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
 	select GENERIC_CLOCKEVENTS
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_RELA
 
 config AM33_2
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index cabf8ba73b27..e6d2ed4ba68f 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -43,7 +43,6 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e7f1a2993f78..05f2ba41ff1a 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -22,8 +22,6 @@ config OPENRISC
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config MMU
 	def_bool y
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 5082b8066325..ce40b71df006 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -20,7 +20,6 @@
 
 #define sys_mmap2 sys_mmap_pgoff
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_CLONE
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index e688a2be30f6..b77feffbadea 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -22,8 +22,6 @@ config PARISC
 	select GENERIC_STRNCPY_FROM_USER
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 	help
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 1efef41659c9..3043194547cd 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -163,7 +163,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)	\
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 951a517a1a0f..17903f1f356b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,10 +141,8 @@ config PPC
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
-	select GENERIC_KERNEL_THREAD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 config EARLY_PRINTK
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 76fe846ec40e..784872f93711 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -55,7 +55,6 @@
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 3cbb8757704e..5029ebf7110e 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -137,8 +137,6 @@ config S390
 	select GENERIC_CLOCKEVENTS
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS2
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 086bb8eaf6ab..636530872516 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -53,7 +53,6 @@
 #   define __ARCH_WANT_COMPAT_SYS_TIME
 #   define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 # endif
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index 45893390c7dd..3b1482e7afac 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -13,8 +13,6 @@ config SCORE
        select GENERIC_CLOCKEVENTS
        select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select CLONE_BACKWARDS
 
 choice
diff --git a/arch/score/include/asm/unistd.h b/arch/score/include/asm/unistd.h
index 56001c93095a..9cb4260a5f3e 100644
--- a/arch/score/include/asm/unistd.h
+++ b/arch/score/include/asm/unistd.h
@@ -4,7 +4,6 @@
 #define __ARCH_WANT_SYSCALL_NO_FLAGS
 #define __ARCH_WANT_SYSCALL_OFF_T
 #define __ARCH_WANT_SYSCALL_DEPRECATED
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 8451317eed58..babc2b826c5c 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -40,8 +40,6 @@ config SUPERH
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index 43d3f26b2eab..012004ed3330 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -28,7 +28,6 @@
 # define __ARCH_WANT_SYS_SIGPENDING
 # define __ARCH_WANT_SYS_SIGPROCMASK
 # define __ARCH_WANT_SYS_RT_SIGACTION
-# define __ARCH_WANT_SYS_EXECVE
 # define __ARCH_WANT_SYS_FORK
 # define __ARCH_WANT_SYS_VFORK
 # define __ARCH_WANT_SYS_CLONE
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 0c7d365fa402..9f2edb5c5551 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -41,8 +41,6 @@ config SPARC
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index c3e5d8b64171..0ecea6ed943e 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -46,7 +46,6 @@
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
-#define __ARCH_WANT_SYS_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index ea7f61e8bc9e..875d008828b8 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -21,8 +21,6 @@ config TILE
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index b51c6ee3cd6c..940831fe9e94 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -16,6 +16,5 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #endif
 #define __ARCH_WANT_SYS_NEWFSTATAT
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index c4fbb21e802b..60651df5f952 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -16,8 +16,6 @@ config UNICORE32
 	select ARCH_WANT_FRAME_POINTERS
 	select GENERIC_IOMAP
 	select MODULES_USE_ELF_REL
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip
diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
index 00cf5e286fca..d4cc4559d848 100644
--- a/arch/unicore32/include/uapi/asm/unistd.h
+++ b/arch/unicore32/include/uapi/asm/unistd.h
@@ -12,5 +12,4 @@
 
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0df6e7d84539..01ca0ebaff0e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,8 +108,6 @@ config X86
 	select GENERIC_STRNLEN_USER
 	select HAVE_RCU_USER_QS if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_REL if X86_32
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 0e7dea7d3669..9dcfcc1d6f92 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -50,7 +50,6 @@
 # define __ARCH_WANT_SYS_TIME
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
-# define __ARCH_WANT_SYS_EXECVE
 # define __ARCH_WANT_SYS_FORK
 # define __ARCH_WANT_SYS_VFORK
 # define __ARCH_WANT_SYS_CLONE
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 8f51c39750d1..0fd20f241e40 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,8 +13,6 @@ endmenu
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 2481f267be29..03a8c107e07e 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -13,8 +13,6 @@ config XTENSA
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
 	select GENERIC_PCI_IOMAP
-	select GENERIC_KERNEL_THREAD
-	select GENERIC_KERNEL_EXECVE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select CLONE_BACKWARDS
 	help
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index e002dbcc88b6..eb63ea87815c 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -1,7 +1,6 @@
 #ifndef _XTENSA_UNISTD_H
 #define _XTENSA_UNISTD_H
 
-#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
 
diff --git a/fs/exec.c b/fs/exec.c
index 721a29929511..090ac91da2e9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1657,7 +1657,6 @@ int get_dumpable(struct mm_struct *mm)
 	return __get_dumpable(mm->flags);
 }
 
-#ifdef __ARCH_WANT_SYS_EXECVE
 SYSCALL_DEFINE3(execve,
 		const char __user *, filename,
 		const char __user *const __user *, argv,
@@ -1685,23 +1684,3 @@ asmlinkage long compat_sys_execve(const char __user * filename,
 	return error;
 }
 #endif
-#endif
-
-#ifdef __ARCH_WANT_KERNEL_EXECVE
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	int ret = do_execve(filename,
-			(const char __user *const __user *)argv,
-			(const char __user *const __user *)envp);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * We were successful.  We won't be returning to our caller, but
-	 * instead to user space by manipulating the kernel stack.
-	 */
-	ret_from_kernel_execve(current_pt_regs());
-}
-#endif
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2630c9b41a86..8c1388c6ae27 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -121,8 +121,4 @@ extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
-#ifdef __ARCH_WANT_KERNEL_EXECVE
-extern void ret_from_kernel_execve(struct pt_regs *normal) __noreturn;
-#endif
-
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1162258bcaf0..9e5a54e3d84f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2291,9 +2291,7 @@ extern int do_execve(const char *,
 		     const char __user * const __user *);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
-#ifdef CONFIG_GENERIC_KERNEL_THREAD
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-#endif
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91835e7f364d..9fe5f946526e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -827,15 +827,6 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
 				  const char  __user *pathname);
 asmlinkage long sys_syncfs(int fd);
 
-#ifndef CONFIG_GENERIC_KERNEL_EXECVE
-int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]);
-#else
-#define kernel_execve(filename, argv, envp) \
-	do_execve(filename, \
-		(const char __user *const __user *)argv, \
-		(const char __user *const __user *)envp)
-#endif
-
 asmlinkage long sys_fork(void);
 asmlinkage long sys_vfork(void);
 #ifdef CONFIG_CLONE_BACKWARDS
diff --git a/init/main.c b/init/main.c
index e33e09df3cbc..155ac208d581 100644
--- a/init/main.c
+++ b/init/main.c
@@ -797,7 +797,9 @@ static void __init do_pre_smp_initcalls(void)
 static int run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
-	return kernel_execve(init_filename, argv_init, envp_init);
+	return do_execve(init_filename,
+		(const char __user *const __user *)argv_init,
+		(const char __user *const __user *)envp_init);
 }
 
 static void __init kernel_init_freeable(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index 540730783433..389712ffc0ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1623,7 +1623,6 @@ long do_fork(unsigned long clone_flags,
 	return nr;
 }
 
-#ifdef CONFIG_GENERIC_KERNEL_THREAD
 /*
  * Create a kernel thread.
  */
@@ -1632,7 +1631,6 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
 		(unsigned long)arg, NULL, NULL);
 }
-#endif
 
 #ifdef __ARCH_WANT_SYS_FORK
 SYSCALL_DEFINE0(fork)
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1c317e386831..0023a87e8de6 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -219,9 +219,9 @@ static int ____call_usermodehelper(void *data)
 
 	commit_creds(new);
 
-	retval = kernel_execve(sub_info->path,
-			       (const char *const *)sub_info->argv,
-			       (const char *const *)sub_info->envp);
+	retval = do_execve(sub_info->path,
+			   (const char __user *const __user *)sub_info->argv,
+			   (const char __user *const __user *)sub_info->envp);
 	if (!retval)
 		return 0;
 
-- 
cgit v1.2.3


From 1ca97bb541a1f5a735e697a8bba763cde3aab452 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 18 Nov 2012 12:50:10 -0500
Subject: new helper: current_user_stack_pointer()

	Cross-architecture equivalent of rdusp(); default is
user_stack_pointer(current_pt_regs()) - that works for almost all
platforms that have usp saved in pt_regs.  The only exception from
that is ia64 - we want memory stack, not the backing store for
register one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/ptrace.h    | 1 +
 arch/blackfin/include/asm/ptrace.h | 1 +
 arch/cris/include/asm/ptrace.h     | 1 +
 arch/h8300/include/asm/ptrace.h    | 1 +
 arch/ia64/include/asm/ptrace.h     | 5 +++++
 arch/m68k/include/asm/ptrace.h     | 1 +
 include/linux/ptrace.h             | 4 ++++
 7 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index b4c5b2fbb647..fd53c74ac943 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -72,6 +72,7 @@ struct switch_stack {
 #define user_mode(regs) (((regs)->ps & 8) != 0)
 #define instruction_pointer(regs) ((regs)->pc)
 #define profile_pc(regs) instruction_pointer(regs)
+#define current_user_stack_pointer() rdusp()
 
 #define task_pt_regs(task) \
   ((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1)
diff --git a/arch/blackfin/include/asm/ptrace.h b/arch/blackfin/include/asm/ptrace.h
index 10d8641180f2..c42002506a26 100644
--- a/arch/blackfin/include/asm/ptrace.h
+++ b/arch/blackfin/include/asm/ptrace.h
@@ -106,6 +106,7 @@ struct pt_regs {
 #define arch_has_single_step()	(1)
 /* common code demands this function */
 #define ptrace_disable(child) user_disable_single_step(child)
+#define current_user_stack_pointer() rdusp()
 
 extern int is_user_addr_valid(struct task_struct *child,
 			      unsigned long start, unsigned long len);
diff --git a/arch/cris/include/asm/ptrace.h b/arch/cris/include/asm/ptrace.h
index 6618893bfe8e..551c081ab62b 100644
--- a/arch/cris/include/asm/ptrace.h
+++ b/arch/cris/include/asm/ptrace.h
@@ -10,6 +10,7 @@
 #define PTRACE_SETREGS            13
 
 #define profile_pc(regs) instruction_pointer(regs)
+#define current_user_stack_pointer() rdusp()
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
index 7468589a128b..6183371d0c93 100644
--- a/arch/h8300/include/asm/ptrace.h
+++ b/arch/h8300/include/asm/ptrace.h
@@ -63,6 +63,7 @@ struct pt_regs {
 #define current_pt_regs() ((struct pt_regs *) \
 	(THREAD_SIZE + (unsigned long)current_thread_info()) - 1)
 #define signal_pt_regs() ((struct pt_regs *)current->thread.esp0)
+#define current_user_stack_pointer() rdusp()
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _H8300_PTRACE_H */
diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index b0e973649cb9..845143990a1d 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -78,6 +78,11 @@ static inline long regs_return_value(struct pt_regs *regs)
 	unsigned long __ip = instruction_pointer(regs);			\
 	(__ip & ~3UL) + ((__ip & 3UL) << 2);				\
 })
+/*
+ * Why not default?  Because user_stack_pointer() on ia64 gives register
+ * stack backing store instead...
+ */
+#define current_user_stack_pointer() (current_pt_regs()->r12)
 
   /* given a pointer to a task_struct, return the user's pt_regs */
 # define task_pt_regs(t)		(((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
diff --git a/arch/m68k/include/asm/ptrace.h b/arch/m68k/include/asm/ptrace.h
index 0f717045bdde..a45cb6894ad3 100644
--- a/arch/m68k/include/asm/ptrace.h
+++ b/arch/m68k/include/asm/ptrace.h
@@ -15,6 +15,7 @@
 #define profile_pc(regs) instruction_pointer(regs)
 #define current_pt_regs() \
 	(struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1
+#define current_user_stack_pointer() rdusp()
 
 #define arch_has_single_step()	(1)
 
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index a89ff04bddd9..a3a9d085f932 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -342,6 +342,10 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #define signal_pt_regs() task_pt_regs(current)
 #endif
 
+#ifndef current_user_stack_pointer
+#define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
-- 
cgit v1.2.3


From 5c49574ffd7ac07eae8c3b065d19e6ebc7e4760f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 18 Nov 2012 15:29:16 -0500
Subject: new helper: restore_altstack()

to be used by rt_sigreturn instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/signal.h | 2 ++
 kernel/signal.c        | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index e19a011b43b7..5969522136fe 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -385,4 +385,6 @@ int unhandled_signal(struct task_struct *tsk, int sig);
 
 void signals_init(void);
 
+int restore_altstack(const stack_t __user *);
+
 #endif /* _LINUX_SIGNAL_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index e75e4bd2839b..887f2fefe207 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3103,6 +3103,13 @@ out:
 	return error;
 }
 
+int restore_altstack(const stack_t __user *uss)
+{
+	int err = do_sigaltstack(uss, NULL, current_user_stack_pointer());
+	/* squash all but EFAULT for now */
+	return err == -EFAULT ? err : 0;
+}
+
 #ifdef __ARCH_WANT_SYS_SIGPENDING
 
 /**
-- 
cgit v1.2.3


From 9b064fc3f95a8e44e929fdf4d6037334ea03d15b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Dec 2012 13:49:35 -0500
Subject: new helper: compat_user_stack_pointer()

Compat counterpart of current_user_stack_pointer(); for most of the biarch
architectures those two are identical, but e.g. arm64 and arm use different
registers for stack pointer...

Note that amd64 variants of current_user_stack_pointer/compat_user_stack_pointer
do *not* rely on pt_regs having been through FIXUP_TOP_OF_STACK.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/compat.h | 5 +++--
 arch/x86/include/asm/ptrace.h   | 7 +++++++
 include/linux/compat.h          | 3 +++
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 37e610dc084e..d9ec40217a27 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -209,10 +209,11 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
+#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
+
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
-	struct pt_regs *regs = task_pt_regs(current);
-	return (void __user *)regs->compat_sp - len;
+	return (void __user *)compat_user_stack_pointer() - len;
 }
 
 struct compat_ipc64_perm {
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 19f16ebaf4fa..7e560b6daf5d 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -203,6 +203,13 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 	return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
 #endif
 }
+
+#define current_user_stack_pointer()	this_cpu_read(old_rsp)
+/* ia32 vs. x32 difference */
+#define compat_user_stack_pointer()	\
+	(test_thread_flag(TIF_IA32) 	\
+	 ? current_pt_regs()->sp 	\
+	 : this_cpu_read(old_rsp))
 #endif
 
 #ifdef CONFIG_X86_32
diff --git a/include/linux/compat.h b/include/linux/compat.h
index a7877fa809fd..62bb76f91baf 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -65,6 +65,9 @@
 
 #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
 
+#ifndef compat_user_stack_pointer
+#define compat_user_stack_pointer() current_user_stack_pointer()
+#endif
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
-- 
cgit v1.2.3


From 6bf9adfc90370b695cb111116e15fdc0e1906270 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Dec 2012 14:09:47 -0500
Subject: introduce generic sys_sigaltstack(), switch x86 and um to it

Conditional on CONFIG_GENERIC_SIGALTSTACK; architectures that do not
select it are completely unaffected

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig                     | 3 +++
 arch/um/kernel/signal.c          | 5 -----
 arch/x86/Kconfig                 | 1 +
 arch/x86/include/asm/syscalls.h  | 3 ---
 arch/x86/kernel/entry_32.S       | 1 -
 arch/x86/kernel/entry_64.S       | 1 -
 arch/x86/kernel/signal.c         | 7 -------
 arch/x86/syscalls/syscall_32.tbl | 2 +-
 arch/x86/syscalls/syscall_64.tbl | 2 +-
 arch/x86/um/Kconfig              | 1 +
 arch/x86/um/sys_call_table_32.c  | 1 -
 arch/x86/um/sys_call_table_64.c  | 1 -
 include/linux/syscalls.h         | 6 ++++++
 kernel/signal.c                  | 6 ++++++
 14 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 0a8dd0585d0d..330176824594 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -335,6 +335,9 @@ config MODULES_USE_ELF_REL
 	  Modules only use ELF REL relocations.  Modules with ELF RELA
 	  relocations will give an error.
 
+config GENERIC_SIGALTSTACK
+	bool
+
 #
 # ABI hall of shame
 #
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index db18eb6124e1..48ccf718e290 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -132,8 +132,3 @@ long sys_sigsuspend(int history0, int history1, old_sigset_t mask)
 	siginitset(&blocked, mask);
 	return sigsuspend(&blocked);
 }
-
-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
-{
-	return do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs));
-}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 01ca0ebaff0e..f380614d7d89 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -111,6 +111,7 @@ config X86
 	select MODULES_USE_ELF_REL if X86_32
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
+	select GENERIC_SIGALTSTACK
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 2f8374718aa3..58b7e3eac0ae 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -25,9 +25,6 @@ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
 /* kernel/signal.c */
 long sys_rt_sigreturn(struct pt_regs *);
-long sys_sigaltstack(const stack_t __user *, stack_t __user *,
-		     struct pt_regs *);
-
 
 /* kernel/tls.c */
 asmlinkage int sys_set_thread_area(struct user_desc __user *);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c763116c5359..ff84d5469d77 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -739,7 +739,6 @@ ENTRY(ptregs_##name) ; \
 ENDPROC(ptregs_##name)
 
 PTREGSCALL1(iopl)
-PTREGSCALL2(sigaltstack)
 PTREGSCALL0(sigreturn)
 PTREGSCALL0(rt_sigreturn)
 PTREGSCALL2(vm86)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2363e820ed68..6e462019f195 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -864,7 +864,6 @@ END(stub_\func)
 	FORK_LIKE  clone
 	FORK_LIKE  fork
 	FORK_LIKE  vfork
-	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
 	PTREGSCALL stub_iopl, sys_iopl, %rsi
 
 ENTRY(ptregscall_common)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 70b27ee6118e..16d065c23baf 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -602,13 +602,6 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 }
 #endif /* CONFIG_X86_32 */
 
-long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-		struct pt_regs *regs)
-{
-	return do_sigaltstack(uss, uoss, regs->sp);
-}
-
 /*
  * Do a signal return; undo the signal stack.
  */
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index ee3c220ee500..62c7b222e45c 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -192,7 +192,7 @@
 183	i386	getcwd			sys_getcwd
 184	i386	capget			sys_capget
 185	i386	capset			sys_capset
-186	i386	sigaltstack		ptregs_sigaltstack		stub32_sigaltstack
+186	i386	sigaltstack		sys_sigaltstack			stub32_sigaltstack
 187	i386	sendfile		sys_sendfile			sys32_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a582bfed95bb..6ffa7f9d005e 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -137,7 +137,7 @@
 128	64	rt_sigtimedwait		sys_rt_sigtimedwait
 129	64	rt_sigqueueinfo		sys_rt_sigqueueinfo
 130	common	rt_sigsuspend		sys_rt_sigsuspend
-131	64	sigaltstack		stub_sigaltstack
+131	64	sigaltstack		sys_sigaltstack
 132	common	utime			sys_utime
 133	common	mknod			sys_mknod
 134	64	uselib
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 0fd20f241e40..96b89d874ead 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,6 +13,7 @@ endmenu
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_SIGALTSTACK
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 812e98c098e4..a0c3b0d1a122 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -27,7 +27,6 @@
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
 #define ptregs_vm86 sys_vm86
-#define ptregs_sigaltstack sys_sigaltstack
 
 #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
 #include <asm/syscalls_32.h>
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 170bd926a69c..f2f0723070ca 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -31,7 +31,6 @@
 #define stub_fork sys_fork
 #define stub_vfork sys_vfork
 #define stub_execve sys_execve
-#define stub_sigaltstack sys_sigaltstack
 #define stub_rt_sigreturn sys_rt_sigreturn
 
 #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 9fe5f946526e..6ca1e08210c6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -63,6 +63,7 @@ struct getcpu_cache;
 struct old_linux_dirent;
 struct perf_event_attr;
 struct file_handle;
+struct sigaltstack;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -299,6 +300,11 @@ asmlinkage long sys_personality(unsigned int personality);
 asmlinkage long sys_sigpending(old_sigset_t __user *set);
 asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
 				old_sigset_t __user *oset);
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss,
+				struct sigaltstack __user *uoss);
+#endif
+
 asmlinkage long sys_getitimer(int which, struct itimerval __user *value);
 asmlinkage long sys_setitimer(int which,
 				struct itimerval __user *value,
diff --git a/kernel/signal.c b/kernel/signal.c
index 887f2fefe207..f05f4c4150d9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3102,6 +3102,12 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
 out:
 	return error;
 }
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
+{
+	return do_sigaltstack(uss, uoss, current_user_stack_pointer());
+}
+#endif
 
 int restore_altstack(const stack_t __user *uss)
 {
-- 
cgit v1.2.3


From 9026843952adac5b123c7b8dc961e5c15828d9e1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 14 Dec 2012 14:47:53 -0500
Subject: generic compat_sys_sigaltstack()

Again, conditional on CONFIG_GENERIC_SIGALTSTACK

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/ia32/ia32_signal.c      | 50 +---------------------------------------
 arch/x86/ia32/ia32entry.S        |  1 -
 arch/x86/include/asm/ia32.h      | 10 ++------
 arch/x86/include/asm/sys_ia32.h  |  2 --
 arch/x86/kernel/entry_64.S       |  2 --
 arch/x86/kernel/signal.c         |  4 +---
 arch/x86/syscalls/syscall_32.tbl |  2 +-
 arch/x86/syscalls/syscall_64.tbl |  2 +-
 include/linux/compat.h           | 16 +++++++++++++
 kernel/signal.c                  | 45 ++++++++++++++++++++++++++++++++++++
 10 files changed, 67 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index efc6a958b71d..a866411a2fcc 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -136,52 +136,6 @@ asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
 	return sigsuspend(&blocked);
 }
 
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
-				  stack_ia32_t __user *uoss_ptr,
-				  struct pt_regs *regs)
-{
-	stack_t uss, uoss;
-	int ret, err = 0;
-	mm_segment_t seg;
-
-	if (uss_ptr) {
-		u32 ptr;
-
-		memset(&uss, 0, sizeof(stack_t));
-		if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)))
-			return -EFAULT;
-
-		get_user_try {
-			get_user_ex(ptr, &uss_ptr->ss_sp);
-			get_user_ex(uss.ss_flags, &uss_ptr->ss_flags);
-			get_user_ex(uss.ss_size, &uss_ptr->ss_size);
-		} get_user_catch(err);
-
-		if (err)
-			return -EFAULT;
-		uss.ss_sp = compat_ptr(ptr);
-	}
-	seg = get_fs();
-	set_fs(KERNEL_DS);
-	ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
-			     (stack_t __force __user *) &uoss, regs->sp);
-	set_fs(seg);
-	if (ret >= 0 && uoss_ptr)  {
-		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
-			return -EFAULT;
-
-		put_user_try {
-			put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp);
-			put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags);
-			put_user_ex(uoss.ss_size, &uoss_ptr->ss_size);
-		} put_user_catch(err);
-
-		if (err)
-			ret = -EFAULT;
-	}
-	return ret;
-}
-
 /*
  * Do a signal return; undo the signal stack.
  */
@@ -292,7 +246,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 	struct rt_sigframe_ia32 __user *frame;
 	sigset_t set;
 	unsigned int ax;
-	struct pt_regs tregs;
 
 	frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
 
@@ -306,8 +259,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	tregs = *regs;
-	if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 32e6f05ddaaa..102ff7cb3e41 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -464,7 +464,6 @@ GLOBAL(\label)
 
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
-	PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
 	PTREGSCALL stub32_execve, compat_sys_execve, %rcx
 	PTREGSCALL stub32_fork, sys_fork, %rdi
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index e6232773ce49..4c6da2e4bb1d 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -29,16 +29,10 @@ struct old_sigaction32 {
 	unsigned int sa_restorer;	/* Another 32 bit pointer */
 };
 
-typedef struct sigaltstack_ia32 {
-	unsigned int	ss_sp;
-	int		ss_flags;
-	unsigned int	ss_size;
-} stack_ia32_t;
-
 struct ucontext_ia32 {
 	unsigned int	  uc_flags;
 	unsigned int 	  uc_link;
-	stack_ia32_t	  uc_stack;
+	compat_stack_t	  uc_stack;
 	struct sigcontext_ia32 uc_mcontext;
 	compat_sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
@@ -46,7 +40,7 @@ struct ucontext_ia32 {
 struct ucontext_x32 {
 	unsigned int	  uc_flags;
 	unsigned int 	  uc_link;
-	stack_ia32_t	  uc_stack;
+	compat_stack_t	  uc_stack;
 	unsigned int	  uc__pad0;     /* needed for alignment */
 	struct sigcontext uc_mcontext;  /* the 64-bit sigcontext type */
 	compat_sigset_t	  uc_sigmask;	/* mask last for extensibility */
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index c76fae4d90be..31f61f96e0fb 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -69,8 +69,6 @@ asmlinkage long sys32_fallocate(int, int, unsigned,
 
 /* ia32/ia32_signal.c */
 asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
-				  stack_ia32_t __user *, struct pt_regs *);
 asmlinkage long sys32_sigreturn(struct pt_regs *);
 asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6e462019f195..86d81199bbde 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -912,8 +912,6 @@ ENTRY(stub_rt_sigreturn)
 END(stub_rt_sigreturn)
 
 #ifdef CONFIG_X86_X32_ABI
-	PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
-
 ENTRY(stub_x32_rt_sigreturn)
 	CFI_STARTPROC
 	addq $8, %rsp
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 16d065c23baf..b17ed37c61a2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -857,7 +857,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
 	struct rt_sigframe_x32 __user *frame;
 	sigset_t set;
 	unsigned long ax;
-	struct pt_regs tregs;
 
 	frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -871,8 +870,7 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	tregs = *regs;
-	if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+	if (compat_restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 62c7b222e45c..235226efaa7f 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -192,7 +192,7 @@
 183	i386	getcwd			sys_getcwd
 184	i386	capget			sys_capget
 185	i386	capset			sys_capset
-186	i386	sigaltstack		sys_sigaltstack			stub32_sigaltstack
+186	i386	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
 187	i386	sendfile		sys_sendfile			sys32_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 6ffa7f9d005e..c68cbe7174e7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -337,7 +337,7 @@
 522	x32	rt_sigpending		sys32_rt_sigpending
 523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
 524	x32	rt_sigqueueinfo		sys32_rt_sigqueueinfo
-525	x32	sigaltstack		stub_x32_sigaltstack
+525	x32	sigaltstack		compat_sys_sigaltstack
 526	x32	timer_create		compat_sys_timer_create
 527	x32	mq_notify		compat_sys_mq_notify
 528	x32	kexec_load		compat_sys_kexec_load
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 62bb76f91baf..cb5637e2ee2c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -68,6 +68,16 @@
 #ifndef compat_user_stack_pointer
 #define compat_user_stack_pointer() current_user_stack_pointer()
 #endif
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+#ifndef compat_sigaltstack	/* we'll need that for MIPS */
+typedef struct compat_sigaltstack {
+	compat_uptr_t			ss_sp;
+	int				ss_flags;
+	compat_size_t			ss_size;
+} compat_stack_t;
+#endif
+#endif
+
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
 
@@ -632,6 +642,12 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 
 asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
 				    compat_off_t __user *offset, compat_size_t count);
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
+				       compat_stack_t __user *uoss_ptr);
+
+int compat_restore_altstack(const compat_stack_t __user *uss);
+#endif
 
 #else
 
diff --git a/kernel/signal.c b/kernel/signal.c
index f05f4c4150d9..aee85bd76b8a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -31,6 +31,7 @@
 #include <linux/nsproxy.h>
 #include <linux/user_namespace.h>
 #include <linux/uprobes.h>
+#include <linux/compat.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
 
@@ -3116,6 +3117,50 @@ int restore_altstack(const stack_t __user *uss)
 	return err == -EFAULT ? err : 0;
 }
 
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_GENERIC_SIGALTSTACK
+asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
+				       compat_stack_t __user *uoss_ptr)
+{
+	stack_t uss, uoss;
+	int ret;
+	mm_segment_t seg;
+
+	if (uss_ptr) {
+		compat_stack_t uss32;
+
+		memset(&uss, 0, sizeof(stack_t));
+		if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t)))
+			return -EFAULT;
+		uss.ss_sp = compat_ptr(uss32.ss_sp);
+		uss.ss_flags = uss32.ss_flags;
+		uss.ss_size = uss32.ss_size;
+	}
+	seg = get_fs();
+	set_fs(KERNEL_DS);
+	ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
+			     (stack_t __force __user *) &uoss,
+			     compat_user_stack_pointer());
+	set_fs(seg);
+	if (ret >= 0 && uoss_ptr)  {
+		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(compat_stack_t)) ||
+		    __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
+		    __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
+		    __put_user(uoss.ss_size, &uoss_ptr->ss_size))
+			ret = -EFAULT;
+	}
+	return ret;
+}
+
+int compat_restore_altstack(const compat_stack_t __user *uss)
+{
+	int err = compat_sys_sigaltstack(uss, NULL);
+	/* squash all but -EFAULT for now */
+	return err == -EFAULT ? err : 0;
+}
+#endif
+#endif
+
 #ifdef __ARCH_WANT_SYS_SIGPENDING
 
 /**
-- 
cgit v1.2.3


From c40702c49faef05ae324f121d8b3e215244ee152 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 20 Nov 2012 14:24:26 -0500
Subject: new helpers: __save_altstack/__compat_save_altstack, switch x86 and
 um to those

note that they are relying on access_ok() already checked by caller.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/ia32/ia32_signal.c |  5 +----
 arch/x86/kernel/signal.c    | 18 ++++--------------
 arch/x86/um/signal.c        |  9 ++-------
 include/linux/compat.h      |  1 +
 include/linux/signal.h      |  1 +
 kernel/signal.c             | 16 ++++++++++++++++
 6 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index a866411a2fcc..a1daf4a65009 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -467,10 +467,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		if (ka->sa.sa_flags & SA_RESTORER)
 			restorer = ka->sa.sa_restorer;
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b17ed37c61a2..a6c8a347b8c6 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -363,10 +363,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  */
 		restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -413,7 +410,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	struct rt_sigframe __user *frame;
 	void __user *fp = NULL;
 	int err = 0;
-	struct task_struct *me = current;
 
 	frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
 
@@ -432,10 +428,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
 
 		/* Set up to return from userspace.  If provided, use a stub
 		   already in userspace.  */
@@ -502,10 +495,7 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
 		put_user_ex(0, &frame->uc.uc_link);
-		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-		put_user_ex(sas_ss_flags(regs->sp),
-			    &frame->uc.uc_stack.ss_flags);
-		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+		err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
 		put_user_ex(0, &frame->uc.uc__pad0);
 
 		if (ka->sa.sa_flags & SA_RESTORER) {
@@ -651,7 +641,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+	if (restore_altstack(&frame->uc.uc_stack))
 		goto badframe;
 
 	return ax;
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index bdaa08cfbcf4..71cef48ea5cd 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -342,9 +342,7 @@ static int copy_ucontext_to_user(struct ucontext __user *uc,
 {
 	int err = 0;
 
-	err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
-	err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
-	err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
+	err |= __save_altstack(&uc->uc_stack, sp);
 	err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, 0);
 	err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
 	return err;
@@ -529,10 +527,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
-	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
-	err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
-			  &frame->uc.uc_stack.ss_flags);
-	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+	err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs));
 	err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
 			       set->sig[0]);
 	err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index cb5637e2ee2c..334813307ec1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -647,6 +647,7 @@ asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
 				       compat_stack_t __user *uoss_ptr);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
+int __compat_save_altstack(compat_stack_t __user *, unsigned long);
 #endif
 
 #else
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 5969522136fe..0a89ffc48466 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -386,5 +386,6 @@ int unhandled_signal(struct task_struct *tsk, int sig);
 void signals_init(void);
 
 int restore_altstack(const stack_t __user *);
+int __save_altstack(stack_t __user *, unsigned long);
 
 #endif /* _LINUX_SIGNAL_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index aee85bd76b8a..f072513302c3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3117,6 +3117,14 @@ int restore_altstack(const stack_t __user *uss)
 	return err == -EFAULT ? err : 0;
 }
 
+int __save_altstack(stack_t __user *uss, unsigned long sp)
+{
+	struct task_struct *t = current;
+	return  __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
+		__put_user(sas_ss_flags(sp), &uss->ss_flags) |
+		__put_user(t->sas_ss_size, &uss->ss_size);
+}
+
 #ifdef CONFIG_COMPAT
 #ifdef CONFIG_GENERIC_SIGALTSTACK
 asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
@@ -3158,6 +3166,14 @@ int compat_restore_altstack(const compat_stack_t __user *uss)
 	/* squash all but -EFAULT for now */
 	return err == -EFAULT ? err : 0;
 }
+
+int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
+{
+	struct task_struct *t = current;
+	return  __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) |
+		__put_user(sas_ss_flags(sp), &uss->ss_flags) |
+		__put_user(t->sas_ss_size, &uss->ss_size);
+}
 #endif
 #endif
 
-- 
cgit v1.2.3


From ada65c74059f8c104f1b467c126205471634c435 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Wed, 12 Dec 2012 10:23:03 +0100
Subject: dma-buf: remove fallback for !CONFIG_DMA_SHARED_BUFFER

Documentation says that code requiring dma-buf should add it to
select, so inline fallbacks are not going to be used. A link error
will make it obvious what went wrong, instead of silently doing
nothing at runtime.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Rob Clark <rob.clark@linaro.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 include/linux/dma-buf.h | 99 -------------------------------------------------
 1 file changed, 99 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index eb48f3816df9..bd2e52ccc4f2 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -156,7 +156,6 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
 	get_file(dmabuf->file);
 }
 
-#ifdef CONFIG_DMA_SHARED_BUFFER
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 							struct device *dev);
 void dma_buf_detach(struct dma_buf *dmabuf,
@@ -184,103 +183,5 @@ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
 		 unsigned long);
 void *dma_buf_vmap(struct dma_buf *);
 void dma_buf_vunmap(struct dma_buf *, void *vaddr);
-#else
-
-static inline struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-							struct device *dev)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_detach(struct dma_buf *dmabuf,
-				  struct dma_buf_attachment *dmabuf_attach)
-{
-	return;
-}
-
-static inline struct dma_buf *dma_buf_export(void *priv,
-					     const struct dma_buf_ops *ops,
-					     size_t size, int flags)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline int dma_buf_fd(struct dma_buf *dmabuf, int flags)
-{
-	return -ENODEV;
-}
-
-static inline struct dma_buf *dma_buf_get(int fd)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_put(struct dma_buf *dmabuf)
-{
-	return;
-}
-
-static inline struct sg_table *dma_buf_map_attachment(
-	struct dma_buf_attachment *attach, enum dma_data_direction write)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
-			struct sg_table *sg, enum dma_data_direction dir)
-{
-	return;
-}
-
-static inline int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-					   size_t start, size_t len,
-					   enum dma_data_direction dir)
-{
-	return -ENODEV;
-}
-
-static inline void dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-					  size_t start, size_t len,
-					  enum dma_data_direction dir)
-{
-}
-
-static inline void *dma_buf_kmap_atomic(struct dma_buf *dmabuf,
-					unsigned long pnum)
-{
-	return NULL;
-}
-
-static inline void dma_buf_kunmap_atomic(struct dma_buf *dmabuf,
-					 unsigned long pnum, void *vaddr)
-{
-}
-
-static inline void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long pnum)
-{
-	return NULL;
-}
-
-static inline void dma_buf_kunmap(struct dma_buf *dmabuf,
-				  unsigned long pnum, void *vaddr)
-{
-}
-
-static inline int dma_buf_mmap(struct dma_buf *dmabuf,
-			       struct vm_area_struct *vma,
-			       unsigned long pgoff)
-{
-	return -ENODEV;
-}
-
-static inline void *dma_buf_vmap(struct dma_buf *dmabuf)
-{
-	return NULL;
-}
-
-static inline void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
-{
-}
-#endif /* CONFIG_DMA_SHARED_BUFFER */
 
 #endif /* __DMA_BUF_H__ */
-- 
cgit v1.2.3


From 39e3c9553f34381a1b664c27b0c696a266a5735e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 28 Nov 2012 11:30:53 -0500
Subject: vfs: remove DCACHE_NEED_LOOKUP

The code that relied on that flag was ripped out of btrfs quite some
time ago, and never added back. Josef indicated that he was going to
take a different approach to the problem in btrfs, and that we
could just eliminate this flag.

Cc: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/inode.c       | 16 +---------------
 fs/dcache.c            | 33 +--------------------------------
 fs/namei.c             | 11 +----------
 include/linux/dcache.h |  8 --------
 4 files changed, 3 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 67ed24ae86bb..16d9e8e191e6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4262,16 +4262,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	if (unlikely(d_need_lookup(dentry))) {
-		memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
-		kfree(dentry->d_fsdata);
-		dentry->d_fsdata = NULL;
-		/* This thing is hashed, drop it for now */
-		d_drop(dentry);
-	} else {
-		ret = btrfs_inode_by_name(dir, dentry, &location);
-	}
-
+	ret = btrfs_inode_by_name(dir, dentry, &location);
 	if (ret < 0)
 		return ERR_PTR(ret);
 
@@ -4341,11 +4332,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct dentry *ret;
 
 	ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
-	if (unlikely(d_need_lookup(dentry))) {
-		spin_lock(&dentry->d_lock);
-		dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
-		spin_unlock(&dentry->d_lock);
-	}
 	return ret;
 }
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 3a463d0c4fe8..1782be3fc3ef 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -454,24 +454,6 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
-/*
- * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
- * @dentry: dentry to drop
- *
- * This is called when we do a lookup on a placeholder dentry that needed to be
- * looked up.  The dentry should have been hashed in order for it to be found by
- * the lookup code, but now needs to be unhashed while we do the actual lookup
- * and clear the DCACHE_NEED_LOOKUP flag.
- */
-void d_clear_need_lookup(struct dentry *dentry)
-{
-	spin_lock(&dentry->d_lock);
-	__d_drop(dentry);
-	dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
-	spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(d_clear_need_lookup);
-
 /*
  * Finish off a dentry we've decided to kill.
  * dentry->d_lock must be held, returns with it unlocked.
@@ -565,13 +547,7 @@ repeat:
  	if (d_unhashed(dentry))
 		goto kill_it;
 
-	/*
-	 * If this dentry needs lookup, don't set the referenced flag so that it
-	 * is more likely to be cleaned up by the dcache shrinker in case of
-	 * memory pressure.
-	 */
-	if (!d_need_lookup(dentry))
-		dentry->d_flags |= DCACHE_REFERENCED;
+	dentry->d_flags |= DCACHE_REFERENCED;
 	dentry_lru_add(dentry);
 
 	dentry->d_count--;
@@ -1736,13 +1712,6 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 		return found;
 	}
 
-	/*
-	 * We are going to instantiate this dentry, unhash it and clear the
-	 * lookup flag so we can do that.
-	 */
-	if (unlikely(d_need_lookup(found)))
-		d_clear_need_lookup(found);
-
 	/*
 	 * Negative dentry: instantiate it unless the inode is a directory and
 	 * already has a dentry.
diff --git a/fs/namei.c b/fs/namei.c
index 35195ff9d194..25a41e02984b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1275,9 +1275,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 	*need_lookup = false;
 	dentry = d_lookup(dir, name);
 	if (dentry) {
-		if (d_need_lookup(dentry)) {
-			*need_lookup = true;
-		} else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
 			error = d_revalidate(dentry, flags);
 			if (unlikely(error <= 0)) {
 				if (error < 0) {
@@ -1383,8 +1381,6 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name,
 			return -ECHILD;
 		nd->seq = seq;
 
-		if (unlikely(d_need_lookup(dentry)))
-			goto unlazy;
 		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
 			status = d_revalidate(dentry, nd->flags);
 			if (unlikely(status <= 0)) {
@@ -1410,11 +1406,6 @@ unlazy:
 	if (unlikely(!dentry))
 		goto need_lookup;
 
-	if (unlikely(d_need_lookup(dentry))) {
-		dput(dentry);
-		goto need_lookup;
-	}
-
 	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
 		status = d_revalidate(dentry, nd->flags);
 	if (unlikely(status <= 0)) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 59200795482e..c1754b59ddd3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -202,7 +202,6 @@ struct dentry_operations {
 #define DCACHE_MOUNTED		0x10000	/* is a mountpoint */
 #define DCACHE_NEED_AUTOMOUNT	0x20000	/* handle automount on this dir */
 #define DCACHE_MANAGE_TRANSIT	0x40000	/* manage transit from this dirent */
-#define DCACHE_NEED_LOOKUP	0x80000 /* dentry requires i_op->lookup */
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
@@ -408,13 +407,6 @@ static inline bool d_mountpoint(struct dentry *dentry)
 	return dentry->d_flags & DCACHE_MOUNTED;
 }
 
-static inline bool d_need_lookup(struct dentry *dentry)
-{
-	return dentry->d_flags & DCACHE_NEED_LOOKUP;
-}
-
-extern void d_clear_need_lookup(struct dentry *dentry);
-
 extern int sysctl_vfs_cache_pressure;
 
 #endif	/* __LINUX_DCACHE_H */
-- 
cgit v1.2.3


From c4d6d8dbf335c7fa47341654a37c53a512b519bb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:32 +0000
Subject: CacheFiles: Fix the marking of cached pages

Under some circumstances CacheFiles defers the marking of pages with PG_fscache
so that it can take advantage of pagevecs to reduce the number of calls to
fscache_mark_pages_cached() and the netfs's hook to keep track of this.

There are, however, two problems with this:

 (1) It can lead to the PG_fscache mark being applied _after_ the page is set
     PG_uptodate and unlocked (by the call to fscache_end_io()).

 (2) CacheFiles's ref on the page is dropped immediately following
     fscache_end_io() - and so may not still be held when the mark is applied.
     This can lead to the page being passed back to the allocator before the
     mark is applied.

Fix this by, where appropriate, marking the page before calling
fscache_end_io() and releasing the page.  This means that we can't take
advantage of pagevecs and have to make a separate call for each page to the
marking routines.

The symptoms of this are Bad Page state errors cropping up under memory
pressure, for example:

BUG: Bad page state in process tar  pfn:002da
page:ffffea0000009fb0 count:0 mapcount:0 mapping:          (null) index:0x1447
page flags: 0x1000(private_2)
Pid: 4574, comm: tar Tainted: G        W   3.1.0-rc4-fsdevel+ #1064
Call Trace:
 [<ffffffff8109583c>] ? dump_page+0xb9/0xbe
 [<ffffffff81095916>] bad_page+0xd5/0xea
 [<ffffffff81095d82>] get_page_from_freelist+0x35b/0x46a
 [<ffffffff810961f3>] __alloc_pages_nodemask+0x362/0x662
 [<ffffffff810989da>] __do_page_cache_readahead+0x13a/0x267
 [<ffffffff81098942>] ? __do_page_cache_readahead+0xa2/0x267
 [<ffffffff81098d7b>] ra_submit+0x1c/0x20
 [<ffffffff8109900a>] ondemand_readahead+0x28b/0x29a
 [<ffffffff81098ee2>] ? ondemand_readahead+0x163/0x29a
 [<ffffffff810990ce>] page_cache_sync_readahead+0x38/0x3a
 [<ffffffff81091d8a>] generic_file_aio_read+0x2ab/0x67e
 [<ffffffffa008cfbe>] nfs_file_read+0xa4/0xc9 [nfs]
 [<ffffffff810c22c4>] do_sync_read+0xba/0xfa
 [<ffffffff81177a47>] ? security_file_permission+0x7b/0x84
 [<ffffffff810c25dd>] ? rw_verify_area+0xab/0xc8
 [<ffffffff810c29a4>] vfs_read+0xaa/0x13a
 [<ffffffff810c2a79>] sys_read+0x45/0x6c
 [<ffffffff813ac37b>] system_call_fastpath+0x16/0x1b

As can be seen, PG_private_2 (== PG_fscache) is set in the page flags.

Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was
set appropriately showed that sometimes it wasn't.  This led to the discovery
that sometimes the page has apparently been reclaimed by the time the marker
got to see it.

Reported-by: M. Stevens <m@tippett.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cachefiles/rdwr.c          | 34 ++++++++-----------------
 fs/fscache/page.c             | 59 ++++++++++++++++++++++++++-----------------
 include/linux/fscache-cache.h |  3 +++
 include/linux/fscache.h       | 12 ++++-----
 4 files changed, 56 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c994691d9445..3367abdcdac4 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -176,9 +176,8 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
 	recheck:
 		if (PageUptodate(monitor->back_page)) {
 			copy_highpage(monitor->netfs_page, monitor->back_page);
-
-			pagevec_add(&pagevec, monitor->netfs_page);
-			fscache_mark_pages_cached(monitor->op, &pagevec);
+			fscache_mark_page_cached(monitor->op,
+						 monitor->netfs_page);
 			error = 0;
 		} else if (!PageError(monitor->back_page)) {
 			/* the page has probably been truncated */
@@ -335,8 +334,7 @@ backing_page_already_present:
 backing_page_already_uptodate:
 	_debug("- uptodate");
 
-	pagevec_add(pagevec, netpage);
-	fscache_mark_pages_cached(op, pagevec);
+	fscache_mark_page_cached(op, netpage);
 
 	copy_highpage(netpage, backpage);
 	fscache_end_io(op, netpage, 0);
@@ -448,8 +446,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 						       &pagevec);
 	} else if (cachefiles_has_space(cache, 0, 1) == 0) {
 		/* there's space in the cache we can use */
-		pagevec_add(&pagevec, page);
-		fscache_mark_pages_cached(op, &pagevec);
+		fscache_mark_page_cached(op, page);
 		ret = -ENODATA;
 	} else {
 		ret = -ENOBUFS;
@@ -465,8 +462,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
  */
 static int cachefiles_read_backing_file(struct cachefiles_object *object,
 					struct fscache_retrieval *op,
-					struct list_head *list,
-					struct pagevec *mark_pvec)
+					struct list_head *list)
 {
 	struct cachefiles_one_read *monitor = NULL;
 	struct address_space *bmapping = object->backer->d_inode->i_mapping;
@@ -626,13 +622,13 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 		page_cache_release(backpage);
 		backpage = NULL;
 
-		if (!pagevec_add(mark_pvec, netpage))
-			fscache_mark_pages_cached(op, mark_pvec);
+		fscache_mark_page_cached(op, netpage);
 
 		page_cache_get(netpage);
 		if (!pagevec_add(&lru_pvec, netpage))
 			__pagevec_lru_add_file(&lru_pvec);
 
+		/* the netpage is unlocked and marked up to date here */
 		fscache_end_io(op, netpage, 0);
 		page_cache_release(netpage);
 		netpage = NULL;
@@ -775,15 +771,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	/* submit the apparently valid pages to the backing fs to be read from
 	 * disk */
 	if (nrbackpages > 0) {
-		ret2 = cachefiles_read_backing_file(object, op, &backpages,
-						    &pagevec);
+		ret2 = cachefiles_read_backing_file(object, op, &backpages);
 		if (ret2 == -ENOMEM || ret2 == -EINTR)
 			ret = ret2;
 	}
 
-	if (pagevec_count(&pagevec) > 0)
-		fscache_mark_pages_cached(op, &pagevec);
-
 	_leave(" = %d [nr=%u%s]",
 	       ret, *nr_pages, list_empty(pages) ? " empty" : "");
 	return ret;
@@ -806,7 +798,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
 {
 	struct cachefiles_object *object;
 	struct cachefiles_cache *cache;
-	struct pagevec pagevec;
 	int ret;
 
 	object = container_of(op->op.object,
@@ -817,13 +808,10 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
 	_enter("%p,{%lx},", object, page->index);
 
 	ret = cachefiles_has_space(cache, 0, 1);
-	if (ret == 0) {
-		pagevec_init(&pagevec, 0);
-		pagevec_add(&pagevec, page);
-		fscache_mark_pages_cached(op, &pagevec);
-	} else {
+	if (ret == 0)
+		fscache_mark_page_cached(op, page);
+	else
 		ret = -ENOBUFS;
-	}
 
 	_leave(" = %d", ret);
 	return ret;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 3f7a59bfa7ad..d7c663cfc923 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -914,6 +914,40 @@ done:
 }
 EXPORT_SYMBOL(__fscache_uncache_page);
 
+/**
+ * fscache_mark_page_cached - Mark a page as being cached
+ * @op: The retrieval op pages are being marked for
+ * @page: The page to be marked
+ *
+ * Mark a netfs page as being cached.  After this is called, the netfs
+ * must call fscache_uncache_page() to remove the mark.
+ */
+void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page)
+{
+	struct fscache_cookie *cookie = op->op.object->cookie;
+
+#ifdef CONFIG_FSCACHE_STATS
+	atomic_inc(&fscache_n_marks);
+#endif
+
+	_debug("- mark %p{%lx}", page, page->index);
+	if (TestSetPageFsCache(page)) {
+		static bool once_only;
+		if (!once_only) {
+			once_only = true;
+			printk(KERN_WARNING "FS-Cache:"
+			       " Cookie type %s marked page %lx"
+			       " multiple times\n",
+			       cookie->def->name, page->index);
+		}
+	}
+
+	if (cookie->def->mark_page_cached)
+		cookie->def->mark_page_cached(cookie->netfs_data,
+					      op->mapping, page);
+}
+EXPORT_SYMBOL(fscache_mark_page_cached);
+
 /**
  * fscache_mark_pages_cached - Mark pages as being cached
  * @op: The retrieval op pages are being marked for
@@ -925,32 +959,11 @@ EXPORT_SYMBOL(__fscache_uncache_page);
 void fscache_mark_pages_cached(struct fscache_retrieval *op,
 			       struct pagevec *pagevec)
 {
-	struct fscache_cookie *cookie = op->op.object->cookie;
 	unsigned long loop;
 
-#ifdef CONFIG_FSCACHE_STATS
-	atomic_add(pagevec->nr, &fscache_n_marks);
-#endif
-
-	for (loop = 0; loop < pagevec->nr; loop++) {
-		struct page *page = pagevec->pages[loop];
-
-		_debug("- mark %p{%lx}", page, page->index);
-		if (TestSetPageFsCache(page)) {
-			static bool once_only;
-			if (!once_only) {
-				once_only = true;
-				printk(KERN_WARNING "FS-Cache:"
-				       " Cookie type %s marked page %lx"
-				       " multiple times\n",
-				       cookie->def->name, page->index);
-			}
-		}
-	}
+	for (loop = 0; loop < pagevec->nr; loop++)
+		fscache_mark_page_cached(op, pagevec->pages[loop]);
 
-	if (cookie->def->mark_pages_cached)
-		cookie->def->mark_pages_cached(cookie->netfs_data,
-					       op->mapping, pagevec);
 	pagevec_reinit(pagevec);
 }
 EXPORT_SYMBOL(fscache_mark_pages_cached);
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index ce31408b1e47..9879183b55d8 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -504,6 +504,9 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache);
 
 extern void fscache_io_error(struct fscache_cache *cache);
 
+extern void fscache_mark_page_cached(struct fscache_retrieval *op,
+				     struct page *page);
+
 extern void fscache_mark_pages_cached(struct fscache_retrieval *op,
 				      struct pagevec *pagevec);
 
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 9ec20dec3353..f4b6353543bf 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -135,14 +135,14 @@ struct fscache_cookie_def {
 	 */
 	void (*put_context)(void *cookie_netfs_data, void *context);
 
-	/* indicate pages that now have cache metadata retained
-	 * - this function should mark the specified pages as now being cached
-	 * - the pages will have been marked with PG_fscache before this is
+	/* indicate page that now have cache metadata retained
+	 * - this function should mark the specified page as now being cached
+	 * - the page will have been marked with PG_fscache before this is
 	 *   called, so this is optional
 	 */
-	void (*mark_pages_cached)(void *cookie_netfs_data,
-				  struct address_space *mapping,
-				  struct pagevec *cached_pvec);
+	void (*mark_page_cached)(void *cookie_netfs_data,
+				 struct address_space *mapping,
+				 struct page *page);
 
 	/* indicate the cookie is no longer cached
 	 * - this function is called when the backing store currently caching
-- 
cgit v1.2.3


From ef46ed888efb1e8da33be5d33c9b54476289a43b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:35 +0000
Subject: FS-Cache: Make cookie relinquishment wait for outstanding reads

Make fscache_relinquish_cookie() log a warning and wait if there are any
outstanding reads left on the cookie it was given.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/fscache/cookie.c           | 18 ++++++++++++++----
 fs/fscache/operation.c        | 10 ++++++++--
 include/linux/fscache-cache.h |  1 +
 3 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 0666996adf80..66be9eccede0 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -442,22 +442,32 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
 
 	event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE;
 
+try_again:
 	spin_lock(&cookie->lock);
 
 	/* break links with all the active objects */
 	while (!hlist_empty(&cookie->backing_objects)) {
+		int n_reads;
 		object = hlist_entry(cookie->backing_objects.first,
 				     struct fscache_object,
 				     cookie_link);
 
 		_debug("RELEASE OBJ%x", object->debug_id);
 
-		if (atomic_read(&object->n_reads)) {
+		set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags);
+		n_reads = atomic_read(&object->n_reads);
+		if (n_reads) {
+			int n_ops = object->n_ops;
+			int n_in_progress = object->n_in_progress;
 			spin_unlock(&cookie->lock);
 			printk(KERN_ERR "FS-Cache:"
-			       " Cookie '%s' still has %d outstanding reads\n",
-			       cookie->def->name, atomic_read(&object->n_reads));
-			BUG();
+			       " Cookie '%s' still has %d outstanding reads (%d,%d)\n",
+			       cookie->def->name,
+			       n_reads, n_ops, n_in_progress);
+			wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS,
+				    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
+			printk("Wait finished\n");
+			goto try_again;
 		}
 
 		/* detach each cache object from the object cookie */
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 30afdfa7aec7..c857ab824d6e 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -340,8 +340,14 @@ void fscache_put_operation(struct fscache_operation *op)
 
 	object = op->object;
 
-	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags))
-		atomic_dec(&object->n_reads);
+	if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) {
+		if (atomic_dec_and_test(&object->n_reads)) {
+			clear_bit(FSCACHE_COOKIE_WAITING_ON_READS,
+				  &object->cookie->flags);
+			wake_up_bit(&object->cookie->flags,
+				    FSCACHE_COOKIE_WAITING_ON_READS);
+		}
+	}
 
 	/* now... we may get called with the object spinlock held, so we
 	 * complete the cleanup here only if we can immediately acquire the
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 9879183b55d8..e3d6d939d959 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -301,6 +301,7 @@ struct fscache_cookie {
 #define FSCACHE_COOKIE_PENDING_FILL	3	/* T if pending initial fill on object */
 #define FSCACHE_COOKIE_FILLING		4	/* T if filling object incrementally */
 #define FSCACHE_COOKIE_UNAVAILABLE	5	/* T if cookie is unavailable (error, etc) */
+#define FSCACHE_COOKIE_WAITING_ON_READS	6	/* T if cookie is waiting on reads */
 };
 
 extern struct fscache_cookie fscache_fsdef_index;
-- 
cgit v1.2.3


From 9f10523f891928330b7529da54c1a3cc65180b1a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:35 +0000
Subject: FS-Cache: Fix operation state management and accounting

Fix the state management of internal fscache operations and the accounting of
what operations are in what states.

This is done by:

 (1) Give struct fscache_operation a enum variable that directly represents the
     state it's currently in, rather than spreading this knowledge over a bunch
     of flags, who's processing the operation at the moment and whether it is
     queued or not.

     This makes it easier to write assertions to check the state at various
     points and to prevent invalid state transitions.

 (2) Add an 'operation complete' state and supply a function to indicate the
     completion of an operation (fscache_op_complete()) and make things call
     it.  The final call to fscache_put_operation() can then check that an op
     in the appropriate state (complete or cancelled).

 (3) Adjust the use of object->n_ops, ->n_in_progress, ->n_exclusive to better
     govern the state of an object:

	(a) The ->n_ops is now the number of extant operations on the object
	    and is now decremented by fscache_put_operation() only.

	(b) The ->n_in_progress is simply the number of objects that have been
	    taken off of the object's pending queue for the purposes of being
	    run.  This is decremented by fscache_op_complete() only.

	(c) The ->n_exclusive is the number of exclusive ops that have been
	    submitted and queued or are in progress.  It is decremented by
	    fscache_op_complete() and by fscache_cancel_op().

     fscache_put_operation() and fscache_operation_gc() now no longer try to
     clean up ->n_exclusive and ->n_in_progress.  That was leading to double
     decrements against fscache_cancel_op().

     fscache_cancel_op() now no longer decrements ->n_ops.  That was leading to
     double decrements against fscache_put_operation().

     fscache_submit_exclusive_op() now decides whether it has to queue an op
     based on ->n_in_progress being > 0 rather than ->n_ops > 0 as the latter
     will persist in being true even after all preceding operations have been
     cancelled or completed.  Furthermore, if an object is active and there are
     runnable ops against it, there must be at least one op running.

 (4) Add a remaining-pages counter (n_pages) to struct fscache_retrieval and
     provide a function to record completion of the pages as they complete.

     When n_pages reaches 0, the operation is deemed to be complete and
     fscache_op_complete() is called.

     Add calls to fscache_retrieval_complete() anywhere we've finished with a
     page we've been given to read or allocate for.  This includes places where
     we just return pages to the netfs for reading from the server and where
     accessing the cache fails and we discard the proposed netfs page.

The bugs in the unfixed state management manifest themselves as oopses like the
following where the operation completion gets out of sync with return of the
cookie by the netfs.  This is possible because the cache unlocks and returns
all the netfs pages before recording its completion - which means that there's
nothing to stop the netfs discarding them and returning the cookie.


FS-Cache: Cookie 'NFS.fh' still has outstanding reads
------------[ cut here ]------------
kernel BUG at fs/fscache/cookie.c:519!
invalid opcode: 0000 [#1] SMP
CPU 1
Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc

Pid: 400, comm: kswapd0 Not tainted 3.1.0-rc7-fsdevel+ #1090                  /DG965RY
RIP: 0010:[<ffffffffa007050a>]  [<ffffffffa007050a>] __fscache_relinquish_cookie+0x170/0x343 [fscache]
RSP: 0018:ffff8800368cfb00  EFLAGS: 00010282
RAX: 000000000000003c RBX: ffff880023cc8790 RCX: 0000000000000000
RDX: 0000000000002f2e RSI: 0000000000000001 RDI: ffffffff813ab86c
RBP: ffff8800368cfb50 R08: 0000000000000002 R09: 0000000000000000
R10: ffff88003a1b7890 R11: ffff88001df6e488 R12: ffff880023d8ed98
R13: ffff880023cc8798 R14: 0000000000000004 R15: ffff88003b8bf370
FS:  0000000000000000(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00000000008ba008 CR3: 0000000023d93000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process kswapd0 (pid: 400, threadinfo ffff8800368ce000, task ffff88003b8bf040)
Stack:
 ffff88003b8bf040 ffff88001df6e528 ffff88001df6e528 ffffffffa00b46b0
 ffff88003b8bf040 ffff88001df6e488 ffff88001df6e620 ffffffffa00b46b0
 ffff88001ebd04c8 0000000000000004 ffff8800368cfb70 ffffffffa00b2c91
Call Trace:
 [<ffffffffa00b2c91>] nfs_fscache_release_inode_cookie+0x3b/0x47 [nfs]
 [<ffffffffa008f25f>] nfs_clear_inode+0x3c/0x41 [nfs]
 [<ffffffffa0090df1>] nfs4_evict_inode+0x2f/0x33 [nfs]
 [<ffffffff810d8d47>] evict+0xa1/0x15c
 [<ffffffff810d8e2e>] dispose_list+0x2c/0x38
 [<ffffffff810d9ebd>] prune_icache_sb+0x28c/0x29b
 [<ffffffff810c56b7>] prune_super+0xd5/0x140
 [<ffffffff8109b615>] shrink_slab+0x102/0x1ab
 [<ffffffff8109d690>] balance_pgdat+0x2f2/0x595
 [<ffffffff8103e009>] ? process_timeout+0xb/0xb
 [<ffffffff8109dba3>] kswapd+0x270/0x289
 [<ffffffff8104c5ea>] ? __init_waitqueue_head+0x46/0x46
 [<ffffffff8109d933>] ? balance_pgdat+0x595/0x595
 [<ffffffff8104bf7a>] kthread+0x7f/0x87
 [<ffffffff813ad6b4>] kernel_thread_helper+0x4/0x10
 [<ffffffff81026b98>] ? finish_task_switch+0x45/0xc0
 [<ffffffff813abcdd>] ? retint_restore_args+0xe/0xe
 [<ffffffff8104befb>] ? __init_kthread_worker+0x53/0x53
 [<ffffffff813ad6b0>] ? gs_change+0xb/0xb

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/backend-api.txt | 26 ++++++-
 Documentation/filesystems/caching/operations.txt  |  2 +-
 fs/cachefiles/rdwr.c                              | 31 ++++++--
 fs/fscache/object.c                               |  2 -
 fs/fscache/operation.c                            | 91 +++++++++++++++--------
 fs/fscache/page.c                                 | 25 ++++++-
 include/linux/fscache-cache.h                     | 37 +++++++--
 7 files changed, 164 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
index 382d52cdaf2d..f4769b9399df 100644
--- a/Documentation/filesystems/caching/backend-api.txt
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -419,7 +419,10 @@ performed on the denizens of the cache.  These are held in a structure of type:
 
      If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
      returned if possible or fscache_end_io() called with a suitable error
-     code..
+     code.
+
+     fscache_put_retrieval() should be called after a page or pages are dealt
+     with.  This will complete the operation when all pages are dealt with.
 
 
  (*) Request pages be read from cache [mandatory]:
@@ -526,6 +529,27 @@ FS-Cache provides some utilities that a cache backend may make use of:
      error value should be 0 if successful and an error otherwise.
 
 
+ (*) Record that one or more pages being retrieved or allocated have been dealt
+     with:
+
+	void fscache_retrieval_complete(struct fscache_retrieval *op,
+					int n_pages);
+
+     This is called to record the fact that one or more pages have been dealt
+     with and are no longer the concern of this operation.  When the number of
+     pages remaining in the operation reaches 0, the operation will be
+     completed.
+
+
+ (*) Record operation completion:
+
+	void fscache_op_complete(struct fscache_operation *op);
+
+     This is called to record the completion of an operation.  This deducts
+     this operation from the parent object's run state, potentially permitting
+     one or more pending operations to start running.
+
+
  (*) Set highest store limit:
 
 	void fscache_set_store_limit(struct fscache_object *object,
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt
index b6b070c57cbf..bee2a5f93d60 100644
--- a/Documentation/filesystems/caching/operations.txt
+++ b/Documentation/filesystems/caching/operations.txt
@@ -174,7 +174,7 @@ Operations are used through the following procedure:
      necessary (the object might have died whilst the thread was waiting).
 
      When it has finished doing its processing, it should call
-     fscache_put_operation() on it.
+     fscache_op_complete() and fscache_put_operation() on it.
 
  (4) The operation holds an effective lock upon the object, preventing other
      exclusive ops conflicting until it is released.  The operation can be
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index bf123d9c3206..93a0815e0498 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -197,6 +197,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
 
 		fscache_end_io(op, monitor->netfs_page, error);
 		page_cache_release(monitor->netfs_page);
+		fscache_retrieval_complete(op, 1);
 		fscache_put_retrieval(op);
 		kfree(monitor);
 
@@ -339,6 +340,7 @@ backing_page_already_uptodate:
 
 	copy_highpage(netpage, backpage);
 	fscache_end_io(op, netpage, 0);
+	fscache_retrieval_complete(op, 1);
 
 success:
 	_debug("success");
@@ -360,6 +362,7 @@ read_error:
 		goto out;
 io_error:
 	cachefiles_io_error_obj(object, "Page read error on backing file");
+	fscache_retrieval_complete(op, 1);
 	ret = -ENOBUFS;
 	goto out;
 
@@ -369,6 +372,7 @@ nomem_monitor:
 	fscache_put_retrieval(monitor->op);
 	kfree(monitor);
 nomem:
+	fscache_retrieval_complete(op, 1);
 	_leave(" = -ENOMEM");
 	return -ENOMEM;
 }
@@ -407,7 +411,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	_enter("{%p},{%lx},,,", object, page->index);
 
 	if (!object->backer)
-		return -ENOBUFS;
+		goto enobufs;
 
 	inode = object->backer->d_inode;
 	ASSERT(S_ISREG(inode->i_mode));
@@ -416,7 +420,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 
 	/* calculate the shift required to use bmap */
 	if (inode->i_sb->s_blocksize > PAGE_SIZE)
-		return -ENOBUFS;
+		goto enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 
@@ -448,13 +452,19 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
 	} else if (cachefiles_has_space(cache, 0, 1) == 0) {
 		/* there's space in the cache we can use */
 		fscache_mark_page_cached(op, page);
+		fscache_retrieval_complete(op, 1);
 		ret = -ENODATA;
 	} else {
-		ret = -ENOBUFS;
+		goto enobufs;
 	}
 
 	_leave(" = %d", ret);
 	return ret;
+
+enobufs:
+	fscache_retrieval_complete(op, 1);
+	_leave(" = -ENOBUFS");
+	return -ENOBUFS;
 }
 
 /*
@@ -632,6 +642,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 
 		/* the netpage is unlocked and marked up to date here */
 		fscache_end_io(op, netpage, 0);
+		fscache_retrieval_complete(op, 1);
 		page_cache_release(netpage);
 		netpage = NULL;
 		continue;
@@ -659,6 +670,7 @@ out:
 	list_for_each_entry_safe(netpage, _n, list, lru) {
 		list_del(&netpage->lru);
 		page_cache_release(netpage);
+		fscache_retrieval_complete(op, 1);
 	}
 
 	_leave(" = %d", ret);
@@ -707,7 +719,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	       *nr_pages);
 
 	if (!object->backer)
-		return -ENOBUFS;
+		goto all_enobufs;
 
 	space = 1;
 	if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
@@ -720,7 +732,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 
 	/* calculate the shift required to use bmap */
 	if (inode->i_sb->s_blocksize > PAGE_SIZE)
-		return -ENOBUFS;
+		goto all_enobufs;
 
 	shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
 
@@ -760,7 +772,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 			nrbackpages++;
 		} else if (space && pagevec_add(&pagevec, page) == 0) {
 			fscache_mark_pages_cached(op, &pagevec);
+			fscache_retrieval_complete(op, 1);
 			ret = -ENODATA;
+		} else {
+			fscache_retrieval_complete(op, 1);
 		}
 	}
 
@@ -781,6 +796,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
 	_leave(" = %d [nr=%u%s]",
 	       ret, *nr_pages, list_empty(pages) ? " empty" : "");
 	return ret;
+
+all_enobufs:
+	fscache_retrieval_complete(op, *nr_pages);
+	return -ENOBUFS;
 }
 
 /*
@@ -815,6 +834,7 @@ int cachefiles_allocate_page(struct fscache_retrieval *op,
 	else
 		ret = -ENOBUFS;
 
+	fscache_retrieval_complete(op, 1);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -864,6 +884,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op,
 		ret = -ENOBUFS;
 	}
 
+	fscache_retrieval_complete(op, *nr_pages);
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index b6b897c550ac..773bc798a416 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -587,8 +587,6 @@ static void fscache_object_available(struct fscache_object *object)
 	if (object->n_in_progress == 0) {
 		if (object->n_ops > 0) {
 			ASSERTCMP(object->n_ops, >=, object->n_obj_ops);
-			ASSERTIF(object->n_ops > object->n_obj_ops,
-				 !list_empty(&object->pending_ops));
 			fscache_start_operations(object);
 		} else {
 			ASSERT(list_empty(&object->pending_ops));
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index c857ab824d6e..748f9553c2cb 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -37,6 +37,7 @@ void fscache_enqueue_operation(struct fscache_operation *op)
 	ASSERT(op->processor != NULL);
 	ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
 
 	fscache_stat(&fscache_n_op_enqueue);
 	switch (op->flags & FSCACHE_OP_TYPE) {
@@ -64,6 +65,9 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
 static void fscache_run_op(struct fscache_object *object,
 			   struct fscache_operation *op)
 {
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING);
+
+	op->state = FSCACHE_OP_ST_IN_PROGRESS;
 	object->n_in_progress++;
 	if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
 		wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -80,22 +84,23 @@ static void fscache_run_op(struct fscache_object *object,
 int fscache_submit_exclusive_op(struct fscache_object *object,
 				struct fscache_operation *op)
 {
-	int ret;
-
 	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
 
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
+	ASSERTCMP(atomic_read(&op->usage), >, 0);
+
 	spin_lock(&object->lock);
 	ASSERTCMP(object->n_ops, >=, object->n_in_progress);
 	ASSERTCMP(object->n_ops, >=, object->n_exclusive);
 	ASSERT(list_empty(&op->pend_link));
 
-	ret = -ENOBUFS;
+	op->state = FSCACHE_OP_ST_PENDING;
 	if (fscache_object_is_active(object)) {
 		op->object = object;
 		object->n_ops++;
 		object->n_exclusive++;	/* reads and writes must wait */
 
-		if (object->n_ops > 1) {
+		if (object->n_in_progress > 0) {
 			atomic_inc(&op->usage);
 			list_add_tail(&op->pend_link, &object->pending_ops);
 			fscache_stat(&fscache_n_op_pend);
@@ -111,7 +116,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
 
 		/* need to issue a new write op after this */
 		clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
-		ret = 0;
 	} else if (object->state == FSCACHE_OBJECT_CREATING) {
 		op->object = object;
 		object->n_ops++;
@@ -119,14 +123,13 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
 		atomic_inc(&op->usage);
 		list_add_tail(&op->pend_link, &object->pending_ops);
 		fscache_stat(&fscache_n_op_pend);
-		ret = 0;
 	} else {
 		/* not allowed to submit ops in any other state */
 		BUG();
 	}
 
 	spin_unlock(&object->lock);
-	return ret;
+	return 0;
 }
 
 /*
@@ -186,6 +189,7 @@ int fscache_submit_op(struct fscache_object *object,
 	_enter("{OBJ%x OP%x},{%u}",
 	       object->debug_id, op->debug_id, atomic_read(&op->usage));
 
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
 	spin_lock(&object->lock);
@@ -196,6 +200,7 @@ int fscache_submit_op(struct fscache_object *object,
 	ostate = object->state;
 	smp_rmb();
 
+	op->state = FSCACHE_OP_ST_PENDING;
 	if (fscache_object_is_active(object)) {
 		op->object = object;
 		object->n_ops++;
@@ -225,12 +230,15 @@ int fscache_submit_op(struct fscache_object *object,
 		   object->state == FSCACHE_OBJECT_LC_DYING ||
 		   object->state == FSCACHE_OBJECT_WITHDRAWING) {
 		fscache_stat(&fscache_n_op_rejected);
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	} else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) {
 		fscache_report_unexpected_submission(object, op, ostate);
 		ASSERT(!fscache_object_is_active(object));
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	} else {
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		ret = -ENOBUFS;
 	}
 
@@ -290,13 +298,18 @@ int fscache_cancel_op(struct fscache_operation *op)
 
 	_enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id);
 
+	ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING);
+	ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED);
+	ASSERTCMP(atomic_read(&op->usage), >, 0);
+
 	spin_lock(&object->lock);
 
 	ret = -EBUSY;
-	if (!list_empty(&op->pend_link)) {
+	if (op->state == FSCACHE_OP_ST_PENDING) {
+		ASSERT(!list_empty(&op->pend_link));
 		fscache_stat(&fscache_n_op_cancelled);
 		list_del_init(&op->pend_link);
-		object->n_ops--;
+		op->state = FSCACHE_OP_ST_CANCELLED;
 		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
 			object->n_exclusive--;
 		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
@@ -310,6 +323,37 @@ int fscache_cancel_op(struct fscache_operation *op)
 	return ret;
 }
 
+/*
+ * Record the completion of an in-progress operation.
+ */
+void fscache_op_complete(struct fscache_operation *op)
+{
+	struct fscache_object *object = op->object;
+
+	_enter("OBJ%x", object->debug_id);
+
+	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS);
+	ASSERTCMP(object->n_in_progress, >, 0);
+	ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags),
+		    object->n_exclusive, >, 0);
+	ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags),
+		    object->n_in_progress, ==, 1);
+
+	spin_lock(&object->lock);
+
+	op->state = FSCACHE_OP_ST_COMPLETE;
+
+	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
+		object->n_exclusive--;
+	object->n_in_progress--;
+	if (object->n_in_progress == 0)
+		fscache_start_operations(object);
+
+	spin_unlock(&object->lock);
+	_leave("");
+}
+EXPORT_SYMBOL(fscache_op_complete);
+
 /*
  * release an operation
  * - queues pending ops if this is the last in-progress op
@@ -328,8 +372,9 @@ void fscache_put_operation(struct fscache_operation *op)
 		return;
 
 	_debug("PUT OP");
-	if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
-		BUG();
+	ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE,
+		    op->state, ==, FSCACHE_OP_ST_CANCELLED);
+	op->state = FSCACHE_OP_ST_DEAD;
 
 	fscache_stat(&fscache_n_op_release);
 
@@ -365,16 +410,6 @@ void fscache_put_operation(struct fscache_operation *op)
 		return;
 	}
 
-	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
-		ASSERTCMP(object->n_exclusive, >, 0);
-		object->n_exclusive--;
-	}
-
-	ASSERTCMP(object->n_in_progress, >, 0);
-	object->n_in_progress--;
-	if (object->n_in_progress == 0)
-		fscache_start_operations(object);
-
 	ASSERTCMP(object->n_ops, >, 0);
 	object->n_ops--;
 	if (object->n_ops == 0)
@@ -413,23 +448,14 @@ void fscache_operation_gc(struct work_struct *work)
 		spin_unlock(&cache->op_gc_list_lock);
 
 		object = op->object;
+		spin_lock(&object->lock);
 
 		_debug("GC DEFERRED REL OBJ%x OP%x",
 		       object->debug_id, op->debug_id);
 		fscache_stat(&fscache_n_op_gc);
 
 		ASSERTCMP(atomic_read(&op->usage), ==, 0);
-
-		spin_lock(&object->lock);
-		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
-			ASSERTCMP(object->n_exclusive, >, 0);
-			object->n_exclusive--;
-		}
-
-		ASSERTCMP(object->n_in_progress, >, 0);
-		object->n_in_progress--;
-		if (object->n_in_progress == 0)
-			fscache_start_operations(object);
+		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD);
 
 		ASSERTCMP(object->n_ops, >, 0);
 		object->n_ops--;
@@ -437,6 +463,7 @@ void fscache_operation_gc(struct work_struct *work)
 			fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED);
 
 		spin_unlock(&object->lock);
+		kfree(op);
 
 	} while (count++ < 20);
 
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 248a12e22532..b38b13d2a555 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -162,6 +162,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
 			fscache_abort_object(object);
 	}
 
+	fscache_op_complete(op);
 	_leave("");
 }
 
@@ -223,6 +224,8 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op)
 
 	_enter("{OP%x}", op->op.debug_id);
 
+	ASSERTCMP(op->n_pages, ==, 0);
+
 	fscache_hist(fscache_retrieval_histogram, op->start_time);
 	if (op->context)
 		fscache_put_context(op->op.object->cookie, op->context);
@@ -320,6 +323,11 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object,
 	_debug("<<< GO");
 
 check_if_dead:
+	if (op->op.state == FSCACHE_OP_ST_CANCELLED) {
+		fscache_stat(stat_object_dead);
+		_leave(" = -ENOBUFS [cancelled]");
+		return -ENOBUFS;
+	}
 	if (unlikely(fscache_object_is_dead(object))) {
 		fscache_stat(stat_object_dead);
 		return -ENOBUFS;
@@ -364,6 +372,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
+	op->n_pages = 1;
 
 	spin_lock(&cookie->lock);
 
@@ -375,10 +384,10 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 	ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP);
 
 	atomic_inc(&object->n_reads);
-	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
+	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
 
 	if (fscache_submit_op(object, &op->op) < 0)
-		goto nobufs_unlock;
+		goto nobufs_unlock_dec;
 	spin_unlock(&cookie->lock);
 
 	fscache_stat(&fscache_n_retrieval_ops);
@@ -425,6 +434,8 @@ error:
 	_leave(" = %d", ret);
 	return ret;
 
+nobufs_unlock_dec:
+	atomic_dec(&object->n_reads);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
 	kfree(op);
@@ -482,6 +493,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(mapping, end_io_func, context);
 	if (!op)
 		return -ENOMEM;
+	op->n_pages = *nr_pages;
 
 	spin_lock(&cookie->lock);
 
@@ -491,10 +503,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 			     struct fscache_object, cookie_link);
 
 	atomic_inc(&object->n_reads);
-	set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
+	__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
 
 	if (fscache_submit_op(object, &op->op) < 0)
-		goto nobufs_unlock;
+		goto nobufs_unlock_dec;
 	spin_unlock(&cookie->lock);
 
 	fscache_stat(&fscache_n_retrieval_ops);
@@ -541,6 +553,8 @@ error:
 	_leave(" = %d", ret);
 	return ret;
 
+nobufs_unlock_dec:
+	atomic_dec(&object->n_reads);
 nobufs_unlock:
 	spin_unlock(&cookie->lock);
 	kfree(op);
@@ -583,6 +597,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
 	if (!op)
 		return -ENOMEM;
+	op->n_pages = 1;
 
 	spin_lock(&cookie->lock);
 
@@ -696,6 +711,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
 		fscache_abort_object(object);
+		fscache_op_complete(&op->op);
 	} else {
 		fscache_enqueue_operation(&op->op);
 	}
@@ -710,6 +726,7 @@ superseded:
 	spin_unlock(&cookie->stores_lock);
 	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
 	spin_unlock(&object->lock);
+	fscache_op_complete(&op->op);
 	_leave("");
 }
 
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index e3d6d939d959..f5facd1d333f 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -75,6 +75,16 @@ extern wait_queue_head_t fscache_cache_cleared_wq;
 typedef void (*fscache_operation_release_t)(struct fscache_operation *op);
 typedef void (*fscache_operation_processor_t)(struct fscache_operation *op);
 
+enum fscache_operation_state {
+	FSCACHE_OP_ST_BLANK,		/* Op is not yet submitted */
+	FSCACHE_OP_ST_INITIALISED,	/* Op is initialised */
+	FSCACHE_OP_ST_PENDING,		/* Op is blocked from running */
+	FSCACHE_OP_ST_IN_PROGRESS,	/* Op is in progress */
+	FSCACHE_OP_ST_COMPLETE,		/* Op is complete */
+	FSCACHE_OP_ST_CANCELLED,	/* Op has been cancelled */
+	FSCACHE_OP_ST_DEAD		/* Op is now dead */
+};
+
 struct fscache_operation {
 	struct work_struct	work;		/* record for async ops */
 	struct list_head	pend_link;	/* link in object->pending_ops */
@@ -86,10 +96,10 @@ struct fscache_operation {
 #define FSCACHE_OP_MYTHREAD	0x0002	/* - processing is done be issuing thread, not pool */
 #define FSCACHE_OP_WAITING	4	/* cleared when op is woken */
 #define FSCACHE_OP_EXCLUSIVE	5	/* exclusive op, other ops must wait */
-#define FSCACHE_OP_DEAD		6	/* op is now dead */
-#define FSCACHE_OP_DEC_READ_CNT	7	/* decrement object->n_reads on destruction */
-#define FSCACHE_OP_KEEP_FLAGS	0xc0	/* flags to keep when repurposing an op */
+#define FSCACHE_OP_DEC_READ_CNT	6	/* decrement object->n_reads on destruction */
+#define FSCACHE_OP_KEEP_FLAGS	0x0070	/* flags to keep when repurposing an op */
 
+	enum fscache_operation_state state;
 	atomic_t		usage;
 	unsigned		debug_id;	/* debugging ID */
 
@@ -106,6 +116,7 @@ extern atomic_t fscache_op_debug_id;
 extern void fscache_op_work_func(struct work_struct *work);
 
 extern void fscache_enqueue_operation(struct fscache_operation *);
+extern void fscache_op_complete(struct fscache_operation *);
 extern void fscache_put_operation(struct fscache_operation *);
 
 /**
@@ -122,6 +133,7 @@ static inline void fscache_operation_init(struct fscache_operation *op,
 {
 	INIT_WORK(&op->work, fscache_op_work_func);
 	atomic_set(&op->usage, 1);
+	op->state = FSCACHE_OP_ST_INITIALISED;
 	op->debug_id = atomic_inc_return(&fscache_op_debug_id);
 	op->processor = processor;
 	op->release = release;
@@ -138,6 +150,7 @@ struct fscache_retrieval {
 	void			*context;	/* netfs read context (pinned) */
 	struct list_head	to_do;		/* list of things to be done by the backend */
 	unsigned long		start_time;	/* time at which retrieval started */
+	unsigned		n_pages;	/* number of pages to be retrieved */
 };
 
 typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op,
@@ -173,9 +186,23 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
 	fscache_enqueue_operation(&op->op);
 }
 
+/**
+ * fscache_retrieval_complete - Record (partial) completion of a retrieval
+ * @op: The retrieval operation affected
+ * @n_pages: The number of pages to account for
+ */
+static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
+					      int n_pages)
+{
+	op->n_pages -= n_pages;
+	if (op->n_pages <= 0)
+		fscache_op_complete(&op->op);
+}
+
 /**
  * fscache_put_retrieval - Drop a reference to a retrieval operation
  * @op: The retrieval operation affected
+ * @n_pages: The number of pages to account for
  *
  * Drop a reference to a retrieval operation.
  */
@@ -333,10 +360,10 @@ struct fscache_object {
 
 	int			debug_id;	/* debugging ID */
 	int			n_children;	/* number of child objects */
-	int			n_ops;		/* number of ops outstanding on object */
+	int			n_ops;		/* number of extant ops on object */
 	int			n_obj_ops;	/* number of object ops outstanding on object */
 	int			n_in_progress;	/* number of ops in progress */
-	int			n_exclusive;	/* number of exclusive ops queued */
+	int			n_exclusive;	/* number of exclusive ops queued or in progress */
 	atomic_t		n_reads;	/* number of read ops in progress */
 	spinlock_t		lock;		/* state and operations lock */
 
-- 
cgit v1.2.3


From ef778e7ae67cd426c30cad43378b908f5eb0bad5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:36 +0000
Subject: FS-Cache: Provide proper invalidation

Provide a proper invalidation method rather than relying on the netfs retiring
the cookie it has and getting a new one.  The problem with this is that isn't
easy for the netfs to make sure that it has completed/cancelled all its
outstanding storage and retrieval operations on the cookie it is retiring.

Instead, have the cache provide an invalidation method that will cancel or wait
for all currently outstanding operations before invalidating the cache, and
will cause new operations to queue up behind that.  Whilst invalidation is in
progress, some requests will be rejected until the cache can stack a barrier on
the operation queue to cause new operations to be deferred behind it.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/caching/backend-api.txt | 12 ++++
 Documentation/filesystems/caching/netfs-api.txt   | 46 ++++++++++++---
 Documentation/filesystems/caching/object.txt      | 23 +++++---
 fs/fscache/cookie.c                               | 60 +++++++++++++++++++
 fs/fscache/internal.h                             | 10 ++++
 fs/fscache/object.c                               | 72 +++++++++++++++++++++++
 fs/fscache/operation.c                            | 32 ++++++++++
 fs/fscache/page.c                                 | 51 ++++++++++++++++
 fs/fscache/stats.c                                | 11 +++-
 include/linux/fscache-cache.h                     |  8 ++-
 include/linux/fscache.h                           | 38 ++++++++++++
 11 files changed, 345 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
index f4769b9399df..d78bab9622c6 100644
--- a/Documentation/filesystems/caching/backend-api.txt
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -308,6 +308,18 @@ performed on the denizens of the cache.  These are held in a structure of type:
      obtained by calling object->cookie->def->get_aux()/get_attr().
 
 
+ (*) Invalidate data object [mandatory]:
+
+	int (*invalidate_object)(struct fscache_operation *op)
+
+     This is called to invalidate a data object (as pointed to by op->object).
+     All the data stored for this object should be discarded and an
+     attr_changed operation should be performed.  The caller will follow up
+     with an object update operation.
+
+     fscache_op_complete() must be called on op before returning.
+
+
  (*) Discard object [mandatory]:
 
 	void (*drop_object)(struct fscache_object *object)
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 7cc6bf2871eb..97e6c0ecc5ef 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -35,8 +35,9 @@ This document contains the following sections:
 	(12) Index and data file update
 	(13) Miscellaneous cookie operations
 	(14) Cookie unregistration
-	(15) Index and data file invalidation
-	(16) FS-Cache specific page flags.
+	(15) Index invalidation
+	(16) Data file invalidation
+	(17) FS-Cache specific page flags.
 
 
 =============================
@@ -767,13 +768,42 @@ the cookies for "child" indices, objects and pages have been relinquished
 first.
 
 
-================================
-INDEX AND DATA FILE INVALIDATION
-================================
+==================
+INDEX INVALIDATION
+==================
+
+There is no direct way to invalidate an index subtree.  To do this, the caller
+should relinquish and retire the cookie they have, and then acquire a new one.
+
+
+======================
+DATA FILE INVALIDATION
+======================
+
+Sometimes it will be necessary to invalidate an object that contains data.
+Typically this will be necessary when the server tells the netfs of a foreign
+change - at which point the netfs has to throw away all the state it had for an
+inode and reload from the server.
+
+To indicate that a cache object should be invalidated, the following function
+can be called:
+
+	void fscache_invalidate(struct fscache_cookie *cookie);
+
+This can be called with spinlocks held as it defers the work to a thread pool.
+All extant storage, retrieval and attribute change ops at this point are
+cancelled and discarded.  Some future operations will be rejected until the
+cache has had a chance to insert a barrier in the operations queue.  After
+that, operations will be queued again behind the invalidation operation.
+
+The invalidation operation will perform an attribute change operation and an
+auxiliary data update operation as it is very likely these will have changed.
+
+Using the following function, the netfs can wait for the invalidation operation
+to have reached a point at which it can start submitting ordinary operations
+once again:
 
-There is no direct way to invalidate an index subtree or a data file.  To do
-this, the caller should relinquish and retire the cookie they have, and then
-acquire a new one.
+	void fscache_wait_on_invalidate(struct fscache_cookie *cookie);
 
 
 ===========================
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt
index 58313348da87..100ff41127e4 100644
--- a/Documentation/filesystems/caching/object.txt
+++ b/Documentation/filesystems/caching/object.txt
@@ -216,7 +216,14 @@ servicing netfs requests:
      The normal running state.  In this state, requests the netfs makes will be
      passed on to the cache.
 
- (6) State FSCACHE_OBJECT_UPDATING.
+ (6) State FSCACHE_OBJECT_INVALIDATING.
+
+     The object is undergoing invalidation.  When the state comes here, it
+     discards all pending read, write and attribute change operations as it is
+     going to clear out the cache entirely and reinitialise it.  It will then
+     continue to the FSCACHE_OBJECT_UPDATING state.
+
+ (7) State FSCACHE_OBJECT_UPDATING.
 
      The state machine comes here to update the object in the cache from the
      netfs's records.  This involves updating the auxiliary data that is used
@@ -225,13 +232,13 @@ servicing netfs requests:
 And there are terminal states in which an object cleans itself up, deallocates
 memory and potentially deletes stuff from disk:
 
- (7) State FSCACHE_OBJECT_LC_DYING.
+ (8) State FSCACHE_OBJECT_LC_DYING.
 
      The object comes here if it is dying because of a lookup or creation
      error.  This would be due to a disk error or system error of some sort.
      Temporary data is cleaned up, and the parent is released.
 
- (8) State FSCACHE_OBJECT_DYING.
+ (9) State FSCACHE_OBJECT_DYING.
 
      The object comes here if it is dying due to an error, because its parent
      cookie has been relinquished by the netfs or because the cache is being
@@ -241,27 +248,27 @@ memory and potentially deletes stuff from disk:
      can destroy themselves.  This object waits for all its children to go away
      before advancing to the next state.
 
- (9) State FSCACHE_OBJECT_ABORT_INIT.
+(10) State FSCACHE_OBJECT_ABORT_INIT.
 
      The object comes to this state if it was waiting on its parent in
      FSCACHE_OBJECT_INIT, but its parent died.  The object will destroy itself
      so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
 
-(10) State FSCACHE_OBJECT_RELEASING.
-(11) State FSCACHE_OBJECT_RECYCLING.
+(11) State FSCACHE_OBJECT_RELEASING.
+(12) State FSCACHE_OBJECT_RECYCLING.
 
      The object comes to one of these two states when dying once it is rid of
      all its children, if it is dying because the netfs relinquished its
      cookie.  In the first state, the cached data is expected to persist, and
      in the second it will be deleted.
 
-(12) State FSCACHE_OBJECT_WITHDRAWING.
+(13) State FSCACHE_OBJECT_WITHDRAWING.
 
      The object transits to this state if the cache decides it wants to
      withdraw the object from service, perhaps to make space, but also due to
      error or just because the whole cache is being withdrawn.
 
-(13) State FSCACHE_OBJECT_DEAD.
+(14) State FSCACHE_OBJECT_DEAD.
 
      The object transits to this state when the in-memory object record is
      ready to be deleted.  The object processor shouldn't ever see an object in
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 66be9eccede0..8dcb114758e3 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -369,6 +369,66 @@ cant_attach_object:
 	return ret;
 }
 
+/*
+ * Invalidate an object.  Callable with spinlocks held.
+ */
+void __fscache_invalidate(struct fscache_cookie *cookie)
+{
+	struct fscache_object *object;
+
+	_enter("{%s}", cookie->def->name);
+
+	fscache_stat(&fscache_n_invalidates);
+
+	/* Only permit invalidation of data files.  Invalidating an index will
+	 * require the caller to release all its attachments to the tree rooted
+	 * there, and if it's doing that, it may as well just retire the
+	 * cookie.
+	 */
+	ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
+
+	/* We will be updating the cookie too. */
+	BUG_ON(!cookie->def->get_aux);
+
+	/* If there's an object, we tell the object state machine to handle the
+	 * invalidation on our behalf, otherwise there's nothing to do.
+	 */
+	if (!hlist_empty(&cookie->backing_objects)) {
+		spin_lock(&cookie->lock);
+
+		if (!hlist_empty(&cookie->backing_objects) &&
+		    !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING,
+				      &cookie->flags)) {
+			object = hlist_entry(cookie->backing_objects.first,
+					     struct fscache_object,
+					     cookie_link);
+			if (object->state < FSCACHE_OBJECT_DYING)
+				fscache_raise_event(
+					object, FSCACHE_OBJECT_EV_INVALIDATE);
+		}
+
+		spin_unlock(&cookie->lock);
+	}
+
+	_leave("");
+}
+EXPORT_SYMBOL(__fscache_invalidate);
+
+/*
+ * Wait for object invalidation to complete.
+ */
+void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
+{
+	_enter("%p", cookie);
+
+	wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
+		    fscache_wait_bit_interruptible,
+		    TASK_UNINTERRUPTIBLE);
+
+	_leave("");
+}
+EXPORT_SYMBOL(__fscache_wait_on_invalidate);
+
 /*
  * update the index entries backing a cookie
  */
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index f6aad48d38a8..c81179303930 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -122,10 +122,16 @@ extern int fscache_submit_exclusive_op(struct fscache_object *,
 extern int fscache_submit_op(struct fscache_object *,
 			     struct fscache_operation *);
 extern int fscache_cancel_op(struct fscache_operation *);
+extern void fscache_cancel_all_ops(struct fscache_object *);
 extern void fscache_abort_object(struct fscache_object *);
 extern void fscache_start_operations(struct fscache_object *);
 extern void fscache_operation_gc(struct work_struct *);
 
+/*
+ * page.c
+ */
+extern void fscache_invalidate_writes(struct fscache_cookie *);
+
 /*
  * proc.c
  */
@@ -205,6 +211,9 @@ extern atomic_t fscache_n_acquires_ok;
 extern atomic_t fscache_n_acquires_nobufs;
 extern atomic_t fscache_n_acquires_oom;
 
+extern atomic_t fscache_n_invalidates;
+extern atomic_t fscache_n_invalidates_run;
+
 extern atomic_t fscache_n_updates;
 extern atomic_t fscache_n_updates_null;
 extern atomic_t fscache_n_updates_run;
@@ -237,6 +246,7 @@ extern atomic_t fscache_n_cop_alloc_object;
 extern atomic_t fscache_n_cop_lookup_object;
 extern atomic_t fscache_n_cop_lookup_complete;
 extern atomic_t fscache_n_cop_grab_object;
+extern atomic_t fscache_n_cop_invalidate_object;
 extern atomic_t fscache_n_cop_update_object;
 extern atomic_t fscache_n_cop_drop_object;
 extern atomic_t fscache_n_cop_put_object;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 773bc798a416..80b549141ea6 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -14,6 +14,7 @@
 
 #define FSCACHE_DEBUG_LEVEL COOKIE
 #include <linux/module.h>
+#include <linux/slab.h>
 #include "internal.h"
 
 const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
@@ -22,6 +23,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
 	[FSCACHE_OBJECT_CREATING]	= "OBJECT_CREATING",
 	[FSCACHE_OBJECT_AVAILABLE]	= "OBJECT_AVAILABLE",
 	[FSCACHE_OBJECT_ACTIVE]		= "OBJECT_ACTIVE",
+	[FSCACHE_OBJECT_INVALIDATING]	= "OBJECT_INVALIDATING",
 	[FSCACHE_OBJECT_UPDATING]	= "OBJECT_UPDATING",
 	[FSCACHE_OBJECT_DYING]		= "OBJECT_DYING",
 	[FSCACHE_OBJECT_LC_DYING]	= "OBJECT_LC_DYING",
@@ -39,6 +41,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
 	[FSCACHE_OBJECT_CREATING]	= "CRTN",
 	[FSCACHE_OBJECT_AVAILABLE]	= "AVBL",
 	[FSCACHE_OBJECT_ACTIVE]		= "ACTV",
+	[FSCACHE_OBJECT_INVALIDATING]	= "INVL",
 	[FSCACHE_OBJECT_UPDATING]	= "UPDT",
 	[FSCACHE_OBJECT_DYING]		= "DYNG",
 	[FSCACHE_OBJECT_LC_DYING]	= "LCDY",
@@ -54,6 +57,7 @@ static void fscache_put_object(struct fscache_object *);
 static void fscache_initialise_object(struct fscache_object *);
 static void fscache_lookup_object(struct fscache_object *);
 static void fscache_object_available(struct fscache_object *);
+static void fscache_invalidate_object(struct fscache_object *);
 static void fscache_release_object(struct fscache_object *);
 static void fscache_withdraw_object(struct fscache_object *);
 static void fscache_enqueue_dependents(struct fscache_object *);
@@ -78,6 +82,15 @@ static inline void fscache_done_parent_op(struct fscache_object *object)
 	spin_unlock(&parent->lock);
 }
 
+/*
+ * Notify netfs of invalidation completion.
+ */
+static inline void fscache_invalidation_complete(struct fscache_cookie *cookie)
+{
+	if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags))
+		wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
+}
+
 /*
  * process events that have been sent to an object's state machine
  * - initiates parent lookup
@@ -125,6 +138,16 @@ static void fscache_object_state_machine(struct fscache_object *object)
 	case FSCACHE_OBJECT_ACTIVE:
 		goto active_transit;
 
+		/* Invalidate an object on disk */
+	case FSCACHE_OBJECT_INVALIDATING:
+		clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events);
+		fscache_stat(&fscache_n_invalidates_run);
+		fscache_stat(&fscache_n_cop_invalidate_object);
+		fscache_invalidate_object(object);
+		fscache_stat_d(&fscache_n_cop_invalidate_object);
+		fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
+		goto active_transit;
+
 		/* update the object metadata on disk */
 	case FSCACHE_OBJECT_UPDATING:
 		clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events);
@@ -275,6 +298,9 @@ active_transit:
 	case FSCACHE_OBJECT_EV_ERROR:
 		new_state = FSCACHE_OBJECT_DYING;
 		goto change_state;
+	case FSCACHE_OBJECT_EV_INVALIDATE:
+		new_state = FSCACHE_OBJECT_INVALIDATING;
+		goto change_state;
 	case FSCACHE_OBJECT_EV_UPDATE:
 		new_state = FSCACHE_OBJECT_UPDATING;
 		goto change_state;
@@ -679,6 +705,7 @@ static void fscache_withdraw_object(struct fscache_object *object)
 		if (object->cookie == cookie) {
 			hlist_del_init(&object->cookie_link);
 			object->cookie = NULL;
+			fscache_invalidation_complete(cookie);
 			detached = true;
 		}
 		spin_unlock(&cookie->lock);
@@ -888,3 +915,48 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
 	return result;
 }
 EXPORT_SYMBOL(fscache_check_aux);
+
+/*
+ * Asynchronously invalidate an object.
+ */
+static void fscache_invalidate_object(struct fscache_object *object)
+{
+	struct fscache_operation *op;
+	struct fscache_cookie *cookie = object->cookie;
+
+	_enter("{OBJ%x}", object->debug_id);
+
+	/* Reject any new read/write ops and abort any that are pending. */
+	fscache_invalidate_writes(cookie);
+	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
+	fscache_cancel_all_ops(object);
+
+	/* Now we have to wait for in-progress reads and writes */
+	op = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op) {
+		fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR);
+		_leave(" [ENOMEM]");
+		return;
+	}
+
+	fscache_operation_init(op, object->cache->ops->invalidate_object, NULL);
+	op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
+
+	spin_lock(&cookie->lock);
+	if (fscache_submit_exclusive_op(object, op) < 0)
+		BUG();
+	spin_unlock(&cookie->lock);
+	fscache_put_operation(op);
+
+	/* Once we've completed the invalidation, we know there will be no data
+	 * stored in the cache and thus we can reinstate the data-check-skip
+	 * optimisation.
+	 */
+	set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
+
+	/* We can allow read and write requests to come in once again.  They'll
+	 * queue up behind our exclusive invalidation operation.
+	 */
+	fscache_invalidation_complete(cookie);
+	_leave("");
+}
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 748f9553c2cb..c58dbe613266 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -323,6 +323,38 @@ int fscache_cancel_op(struct fscache_operation *op)
 	return ret;
 }
 
+/*
+ * Cancel all pending operations on an object
+ */
+void fscache_cancel_all_ops(struct fscache_object *object)
+{
+	struct fscache_operation *op;
+
+	_enter("OBJ%x", object->debug_id);
+
+	spin_lock(&object->lock);
+
+	while (!list_empty(&object->pending_ops)) {
+		op = list_entry(object->pending_ops.next,
+				struct fscache_operation, pend_link);
+		fscache_stat(&fscache_n_op_cancelled);
+		list_del_init(&op->pend_link);
+
+		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING);
+		op->state = FSCACHE_OP_ST_CANCELLED;
+
+		if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
+			object->n_exclusive--;
+		if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
+			wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
+		fscache_put_operation(op);
+		cond_resched_lock(&object->lock);
+	}
+
+	spin_unlock(&object->lock);
+	_leave("");
+}
+
 /*
  * Record the completion of an in-progress operation.
  */
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index b38b13d2a555..7bf9d2557052 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -361,6 +361,11 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 	if (hlist_empty(&cookie->backing_objects))
 		goto nobufs;
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(page, !=, NULL);
 
@@ -483,6 +488,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	if (hlist_empty(&cookie->backing_objects))
 		goto nobufs;
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(*nr_pages, >, 0);
 	ASSERT(!list_empty(pages));
@@ -591,6 +601,11 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
 	ASSERTCMP(page, !=, NULL);
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	if (fscache_wait_for_deferred_lookup(cookie) < 0)
 		return -ERESTARTSYS;
 
@@ -730,6 +745,37 @@ superseded:
 	_leave("");
 }
 
+/*
+ * Clear the pages pending writing for invalidation
+ */
+void fscache_invalidate_writes(struct fscache_cookie *cookie)
+{
+	struct page *page;
+	void *results[16];
+	int n, i;
+
+	_enter("");
+
+	while (spin_lock(&cookie->stores_lock),
+	       n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0,
+					      ARRAY_SIZE(results),
+					      FSCACHE_COOKIE_PENDING_TAG),
+	       n > 0) {
+		for (i = n - 1; i >= 0; i--) {
+			page = results[i];
+			radix_tree_delete(&cookie->stores, page->index);
+		}
+
+		spin_unlock(&cookie->stores_lock);
+
+		for (i = n - 1; i >= 0; i--)
+			page_cache_release(results[i]);
+	}
+
+	spin_unlock(&cookie->stores_lock);
+	_leave("");
+}
+
 /*
  * request a page be stored in the cache
  * - returns:
@@ -776,6 +822,11 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 
 	fscache_stat(&fscache_n_stores);
 
+	if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
+		_leave(" = -ENOBUFS [invalidating]");
+		return -ENOBUFS;
+	}
+
 	op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY);
 	if (!op)
 		goto nomem;
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 4765190d537f..51cdaee14109 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -80,6 +80,9 @@ atomic_t fscache_n_acquires_ok;
 atomic_t fscache_n_acquires_nobufs;
 atomic_t fscache_n_acquires_oom;
 
+atomic_t fscache_n_invalidates;
+atomic_t fscache_n_invalidates_run;
+
 atomic_t fscache_n_updates;
 atomic_t fscache_n_updates_null;
 atomic_t fscache_n_updates_run;
@@ -112,6 +115,7 @@ atomic_t fscache_n_cop_alloc_object;
 atomic_t fscache_n_cop_lookup_object;
 atomic_t fscache_n_cop_lookup_complete;
 atomic_t fscache_n_cop_grab_object;
+atomic_t fscache_n_cop_invalidate_object;
 atomic_t fscache_n_cop_update_object;
 atomic_t fscache_n_cop_drop_object;
 atomic_t fscache_n_cop_put_object;
@@ -168,6 +172,10 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_object_created),
 		   atomic_read(&fscache_n_object_lookups_timed_out));
 
+	seq_printf(m, "Invals : n=%u run=%u\n",
+		   atomic_read(&fscache_n_invalidates),
+		   atomic_read(&fscache_n_invalidates_run));
+
 	seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
 		   atomic_read(&fscache_n_updates),
 		   atomic_read(&fscache_n_updates_null),
@@ -246,7 +254,8 @@ static int fscache_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&fscache_n_cop_lookup_object),
 		   atomic_read(&fscache_n_cop_lookup_complete),
 		   atomic_read(&fscache_n_cop_grab_object));
-	seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n",
+	seq_printf(m, "CacheOp: inv=%d upo=%d dro=%d pto=%d atc=%d syn=%d\n",
+		   atomic_read(&fscache_n_cop_invalidate_object),
 		   atomic_read(&fscache_n_cop_update_object),
 		   atomic_read(&fscache_n_cop_drop_object),
 		   atomic_read(&fscache_n_cop_put_object),
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index f5facd1d333f..1e454ad7a832 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -254,6 +254,9 @@ struct fscache_cache_ops {
 	/* store the updated auxiliary data on an object */
 	void (*update_object)(struct fscache_object *object);
 
+	/* Invalidate an object */
+	void (*invalidate_object)(struct fscache_operation *op);
+
 	/* discard the resources pinned by an object and effect retirement if
 	 * necessary */
 	void (*drop_object)(struct fscache_object *object);
@@ -329,6 +332,7 @@ struct fscache_cookie {
 #define FSCACHE_COOKIE_FILLING		4	/* T if filling object incrementally */
 #define FSCACHE_COOKIE_UNAVAILABLE	5	/* T if cookie is unavailable (error, etc) */
 #define FSCACHE_COOKIE_WAITING_ON_READS	6	/* T if cookie is waiting on reads */
+#define FSCACHE_COOKIE_INVALIDATING	7	/* T if cookie is being invalidated */
 };
 
 extern struct fscache_cookie fscache_fsdef_index;
@@ -345,6 +349,7 @@ struct fscache_object {
 		/* active states */
 		FSCACHE_OBJECT_AVAILABLE,	/* cleaning up object after creation */
 		FSCACHE_OBJECT_ACTIVE,		/* object is usable */
+		FSCACHE_OBJECT_INVALIDATING,	/* object is invalidating */
 		FSCACHE_OBJECT_UPDATING,	/* object is updating */
 
 		/* terminal states */
@@ -378,7 +383,8 @@ struct fscache_object {
 #define FSCACHE_OBJECT_EV_RELEASE	4	/* T if netfs requested object release */
 #define FSCACHE_OBJECT_EV_RETIRE	5	/* T if netfs requested object retirement */
 #define FSCACHE_OBJECT_EV_WITHDRAW	6	/* T if cache requested object withdrawal */
-#define FSCACHE_OBJECT_EVENTS_MASK	0x7f	/* mask of all events*/
+#define FSCACHE_OBJECT_EV_INVALIDATE	7	/* T if cache requested object invalidation */
+#define FSCACHE_OBJECT_EVENTS_MASK	0xff	/* mask of all events*/
 
 	unsigned long		flags;
 #define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index f4b6353543bf..7a086235da4b 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -185,6 +185,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie(
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, int);
 extern void __fscache_update_cookie(struct fscache_cookie *);
 extern int __fscache_attr_changed(struct fscache_cookie *);
+extern void __fscache_invalidate(struct fscache_cookie *);
+extern void __fscache_wait_on_invalidate(struct fscache_cookie *);
 extern int __fscache_read_or_alloc_page(struct fscache_cookie *,
 					struct page *,
 					fscache_rw_complete_t,
@@ -389,6 +391,42 @@ int fscache_attr_changed(struct fscache_cookie *cookie)
 		return -ENOBUFS;
 }
 
+/**
+ * fscache_invalidate - Notify cache that an object needs invalidation
+ * @cookie: The cookie representing the cache object
+ *
+ * Notify the cache that an object is needs to be invalidated and that it
+ * should abort any retrievals or stores it is doing on the cache.  The object
+ * is then marked non-caching until such time as the invalidation is complete.
+ *
+ * This can be called with spinlocks held.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_invalidate(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_invalidate(cookie);
+}
+
+/**
+ * fscache_wait_on_invalidate - Wait for invalidation to complete
+ * @cookie: The cookie representing the cache object
+ *
+ * Wait for the invalidation of an object to complete.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+void fscache_wait_on_invalidate(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_wait_on_invalidate(cookie);
+}
+
 /**
  * fscache_reserve_space - Reserve data space for a cached object
  * @cookie: The cookie representing the cache object
-- 
cgit v1.2.3


From a02de9608595c8ef649ef03ae735b0b45e3d4396 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Dec 2012 21:52:36 +0000
Subject: VFS: Make more complete truncate operation available to CacheFiles

Make a more complete truncate operation available to CacheFiles (including
security checks and suchlike) so that it can use this to clear invalidated
cache files.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c          | 50 +++++++++++++++++++++++++++-----------------------
 include/linux/fs.h |  1 +
 2 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index 182d8667b7bd..c819bbdab47f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -61,33 +61,22 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 	return ret;
 }
 
-static long do_sys_truncate(const char __user *pathname, loff_t length)
+long vfs_truncate(struct path *path, loff_t length)
 {
-	struct path path;
 	struct inode *inode;
-	int error;
-
-	error = -EINVAL;
-	if (length < 0)	/* sorry, but loff_t says... */
-		goto out;
+	long error;
 
-	error = user_path(pathname, &path);
-	if (error)
-		goto out;
-	inode = path.dentry->d_inode;
+	inode = path->dentry->d_inode;
 
 	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
-	error = -EISDIR;
 	if (S_ISDIR(inode->i_mode))
-		goto dput_and_out;
-
-	error = -EINVAL;
+		return -EISDIR;
 	if (!S_ISREG(inode->i_mode))
-		goto dput_and_out;
+		return -EINVAL;
 
-	error = mnt_want_write(path.mnt);
+	error = mnt_want_write(path->mnt);
 	if (error)
-		goto dput_and_out;
+		goto out;
 
 	error = inode_permission(inode, MAY_WRITE);
 	if (error)
@@ -111,19 +100,34 @@ static long do_sys_truncate(const char __user *pathname, loff_t length)
 
 	error = locks_verify_truncate(inode, NULL, length);
 	if (!error)
-		error = security_path_truncate(&path);
+		error = security_path_truncate(path);
 	if (!error)
-		error = do_truncate(path.dentry, length, 0, NULL);
+		error = do_truncate(path->dentry, length, 0, NULL);
 
 put_write_and_out:
 	put_write_access(inode);
 mnt_drop_write_and_out:
-	mnt_drop_write(path.mnt);
-dput_and_out:
-	path_put(&path);
+	mnt_drop_write(path->mnt);
 out:
 	return error;
 }
+EXPORT_SYMBOL_GPL(vfs_truncate);
+
+static long do_sys_truncate(const char __user *pathname, loff_t length)
+{
+	struct path path;
+	int error;
+
+	if (length < 0)	/* sorry, but loff_t says... */
+		return -EINVAL;
+
+	error = user_path(pathname, &path);
+	if (!error) {
+		error = vfs_truncate(&path, length);
+		path_put(&path);
+	}
+	return error;
+}
 
 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a823d4be38e7..017a15b707e2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1999,6 +1999,7 @@ struct filename {
 	bool			separate; /* should "name" be freed? */
 };
 
+extern long vfs_truncate(struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
 extern int do_fallocate(struct file *file, int mode, loff_t offset,
-- 
cgit v1.2.3


From 36a02de5d7981435931d4608ee3e510b752e072b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 5 Dec 2012 13:34:46 +0000
Subject: FS-Cache: Convert the object event ID #defines into an enum

Convert the fscache_object event IDs from #defines into an enum.  Also add an
extra label to the enum to carry the event count and redefine the event mask
in terms of that.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 1e454ad7a832..73e68c8d5df4 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -337,6 +337,23 @@ struct fscache_cookie {
 
 extern struct fscache_cookie fscache_fsdef_index;
 
+/*
+ * Event list for fscache_object::{event_mask,events}
+ */
+enum {
+	FSCACHE_OBJECT_EV_REQUEUE,	/* T if object should be requeued */
+	FSCACHE_OBJECT_EV_UPDATE,	/* T if object should be updated */
+	FSCACHE_OBJECT_EV_INVALIDATE,	/* T if cache requested object invalidation */
+	FSCACHE_OBJECT_EV_CLEARED,	/* T if accessors all gone */
+	FSCACHE_OBJECT_EV_ERROR,	/* T if fatal error occurred during processing */
+	FSCACHE_OBJECT_EV_RELEASE,	/* T if netfs requested object release */
+	FSCACHE_OBJECT_EV_RETIRE,	/* T if netfs requested object retirement */
+	FSCACHE_OBJECT_EV_WITHDRAW,	/* T if cache requested object withdrawal */
+	NR_FSCACHE_OBJECT_EVENTS
+};
+
+#define FSCACHE_OBJECT_EVENTS_MASK ((1UL << NR_FSCACHE_OBJECT_EVENTS) - 1)
+
 /*
  * on-disk cache file or index handle
  */
@@ -376,15 +393,6 @@ struct fscache_object {
 	unsigned long		event_mask;	/* events this object is interested in */
 	unsigned long		events;		/* events to be processed by this object
 						 * (order is important - using fls) */
-#define FSCACHE_OBJECT_EV_REQUEUE	0	/* T if object should be requeued */
-#define FSCACHE_OBJECT_EV_UPDATE	1	/* T if object should be updated */
-#define FSCACHE_OBJECT_EV_CLEARED	2	/* T if accessors all gone */
-#define FSCACHE_OBJECT_EV_ERROR		3	/* T if fatal error occurred during processing */
-#define FSCACHE_OBJECT_EV_RELEASE	4	/* T if netfs requested object release */
-#define FSCACHE_OBJECT_EV_RETIRE	5	/* T if netfs requested object retirement */
-#define FSCACHE_OBJECT_EV_WITHDRAW	6	/* T if cache requested object withdrawal */
-#define FSCACHE_OBJECT_EV_INVALIDATE	7	/* T if cache requested object invalidation */
-#define FSCACHE_OBJECT_EVENTS_MASK	0xff	/* mask of all events*/
 
 	unsigned long		flags;
 #define FSCACHE_OBJECT_LOCK		0	/* T if object is busy being processed */
-- 
cgit v1.2.3


From 1f372dff1da37e2b36ae9085368fa46896398598 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 Dec 2012 20:03:13 +0000
Subject: FS-Cache: Mark cancellation of in-progress operation

Mark as cancelled an operation that is in progress rather than pending at the
time it is cancelled, and call fscache_complete_op() to cancel an operation so
that blocked ops can be started.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/interface.c     |  2 +-
 fs/fscache/operation.c        |  7 ++++---
 fs/fscache/page.c             | 10 +++++-----
 include/linux/fscache-cache.h |  4 ++--
 4 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 7a9d574b961c..746ce532e130 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -484,7 +484,7 @@ static void cachefiles_invalidate_object(struct fscache_operation *op)
 		}
 	}
 
-	fscache_op_complete(op);
+	fscache_op_complete(op, true);
 	_leave("");
 }
 
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 9e6b7d232bb1..36c59604130d 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -363,9 +363,9 @@ void fscache_cancel_all_ops(struct fscache_object *object)
 }
 
 /*
- * Record the completion of an in-progress operation.
+ * Record the completion or cancellation of an in-progress operation.
  */
-void fscache_op_complete(struct fscache_operation *op)
+void fscache_op_complete(struct fscache_operation *op, bool cancelled)
 {
 	struct fscache_object *object = op->object;
 
@@ -380,7 +380,8 @@ void fscache_op_complete(struct fscache_operation *op)
 
 	spin_lock(&object->lock);
 
-	op->state = FSCACHE_OP_ST_COMPLETE;
+	op->state = cancelled ?
+		FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE;
 
 	if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
 		object->n_exclusive--;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index ef0218f5080d..8a92b9fabe83 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -171,7 +171,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
 			fscache_abort_object(object);
 	}
 
-	fscache_op_complete(op);
+	fscache_op_complete(op, true);
 	_leave("");
 }
 
@@ -704,7 +704,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 		 * exists, so we should just cancel this write operation.
 		 */
 		spin_unlock(&object->lock);
-		op->op.state = FSCACHE_OP_ST_CANCELLED;
+		fscache_op_complete(&op->op, false);
 		_leave(" [inactive]");
 		return;
 	}
@@ -717,7 +717,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 		 * cancel this write operation.
 		 */
 		spin_unlock(&object->lock);
-		op->op.state = FSCACHE_OP_ST_CANCELLED;
+		fscache_op_complete(&op->op, false);
 		_leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}",
 		       _op->flags, _op->state, object->state, object->flags);
 		return;
@@ -755,7 +755,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
 		fscache_abort_object(object);
-		fscache_op_complete(&op->op);
+		fscache_op_complete(&op->op, true);
 	} else {
 		fscache_enqueue_operation(&op->op);
 	}
@@ -770,7 +770,7 @@ superseded:
 	spin_unlock(&cookie->stores_lock);
 	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
 	spin_unlock(&object->lock);
-	fscache_op_complete(&op->op);
+	fscache_op_complete(&op->op, true);
 	_leave("");
 }
 
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 73e68c8d5df4..5dfa0aa216b6 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -116,7 +116,7 @@ extern atomic_t fscache_op_debug_id;
 extern void fscache_op_work_func(struct work_struct *work);
 
 extern void fscache_enqueue_operation(struct fscache_operation *);
-extern void fscache_op_complete(struct fscache_operation *);
+extern void fscache_op_complete(struct fscache_operation *, bool);
 extern void fscache_put_operation(struct fscache_operation *);
 
 /**
@@ -196,7 +196,7 @@ static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
 {
 	op->n_pages -= n_pages;
 	if (op->n_pages <= 0)
-		fscache_op_complete(&op->op);
+		fscache_op_complete(&op->op, true);
 }
 
 /**
-- 
cgit v1.2.3


From d30357f2f0ec0bfb67fd39f8f76d22d02d78631e Mon Sep 17 00:00:00 2001
From: Marco Stornelli <marco.stornelli@gmail.com>
Date: Sat, 15 Dec 2012 11:59:20 +0100
Subject: vfs: drop vmtruncate

Removed vmtruncate

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/libfs.c         | 2 --
 include/linux/fs.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/libfs.c b/fs/libfs.c
index 35fc6e74cd88..916da8c4158b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -369,8 +369,6 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 	struct inode *inode = dentry->d_inode;
 	int error;
 
-	WARN_ON_ONCE(inode->i_op->truncate);
-
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a823d4be38e7..a0c5ba57ffc5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1565,7 +1565,6 @@ struct inode_operations {
 	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *);
-	void (*truncate) (struct inode *);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
-- 
cgit v1.2.3


From 7898575fc81bd707ce0844cb06874d48e39bbe09 Mon Sep 17 00:00:00 2001
From: Marco Stornelli <marco.stornelli@gmail.com>
Date: Sat, 15 Dec 2012 12:00:02 +0100
Subject: mm: drop vmtruncate

Removed vmtruncate

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mm.h |  1 -
 mm/truncate.c      | 23 -----------------------
 2 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7f4f906190bd..63204078f72b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1007,7 +1007,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 
 extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
-extern int vmtruncate(struct inode *inode, loff_t offset);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/mm/truncate.c b/mm/truncate.c
index d51ce92d6e83..c75b736e54b7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -576,29 +576,6 @@ void truncate_setsize(struct inode *inode, loff_t newsize)
 }
 EXPORT_SYMBOL(truncate_setsize);
 
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @newsize: file offset to start truncating
- *
- * This function is deprecated and truncate_setsize or truncate_pagecache
- * should be used instead, together with filesystem specific block truncation.
- */
-int vmtruncate(struct inode *inode, loff_t newsize)
-{
-	int error;
-
-	error = inode_newsize_ok(inode, newsize);
-	if (error)
-		return error;
-
-	truncate_setsize(inode, newsize);
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-}
-EXPORT_SYMBOL(vmtruncate);
-
 /**
  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
  * @inode: inode
-- 
cgit v1.2.3


From 471667391a92bf7bf2cd4ff31a3ad88e5dec934b Mon Sep 17 00:00:00 2001
From: Alessio Igor Bogani <abogani@kernel.org>
Date: Thu, 13 Dec 2012 12:22:39 +0100
Subject: vfs: Remove useless function prototypes

Commit 8e22cc88d68ca1a46d7d582938f979eb640ed30f removes the (un)lock_super
function definitions but forgets to remove their prototypes.

Signed-off-by: Alessio Igor Bogani <abogani@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a0c5ba57ffc5..05cd238ad941 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1445,10 +1445,6 @@ static inline void sb_start_intwrite(struct super_block *sb)
 
 extern bool inode_owner_or_capable(const struct inode *inode);
 
-/* not quite ready to be deprecated, but... */
-extern void lock_super(struct super_block *);
-extern void unlock_super(struct super_block *);
-
 /*
  * VFS helper functions..
  */
-- 
cgit v1.2.3


From b9d6ba94b875192ef5e2dab92d72beea33b83c3d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 20 Dec 2012 14:59:40 -0500
Subject: vfs: add a retry_estale helper function to handle retries on ESTALE

This function is expected to be called from path-based syscalls to help
them decide whether to try the lookup and call again in the event that
they got an -ESTALE return back on an earier try.

Currently, we only retry the call once on an ESTALE error, but in the
event that we decide that that's not enough in the future, we should be
able to change the logic in this helper without too much effort.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4bf19d8174ed..66542b644804 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -98,4 +98,20 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/**
+ * retry_estale - determine whether the caller should retry an operation
+ * @error: the error that would currently be returned
+ * @flags: flags being used for next lookup attempt
+ *
+ * Check to see if the error code was -ESTALE, and then determine whether
+ * to retry the call based on whether "flags" already has LOOKUP_REVAL set.
+ *
+ * Returns true if the caller should try the operation again.
+ */
+static inline bool
+retry_estale(const long error, const unsigned int flags)
+{
+	return error == -ESTALE && !(flags & LOOKUP_REVAL);
+}
+
 #endif /* _LINUX_NAMEI_H */
-- 
cgit v1.2.3


From 1ac12b4b6d707937f9de6d09622823b2fd0c93ef Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 11 Dec 2012 12:10:06 -0500
Subject: vfs: turn is_dir argument to kern_path_create into a lookup_flags arg

Where we can pass in LOOKUP_DIRECTORY or LOOKUP_REVAL. Any other flags
passed in here are currently ignored.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/syscalls.c |  2 +-
 drivers/base/devtmpfs.c                      |  2 +-
 fs/namei.c                                   | 21 ++++++++++++++++-----
 include/linux/namei.h                        |  4 ++--
 4 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 5b7d8ffbf890..baee994fe810 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -66,7 +66,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
 	struct dentry *dentry;
 	int ret;
 
-	dentry = user_path_create(AT_FDCWD, pathname, &path, 1);
+	dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
 	ret = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
 		ret = spufs_create(&path, dentry, flags, mode, neighbor);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 147d1a4dd269..17cf7cad601e 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -148,7 +148,7 @@ static int dev_mkdir(const char *name, umode_t mode)
 	struct path path;
 	int err;
 
-	dentry = kern_path_create(AT_FDCWD, name, &path, 1);
+	dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
diff --git a/fs/namei.c b/fs/namei.c
index 25a41e02984b..8f8e41f6eb52 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3030,12 +3030,22 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 	return file;
 }
 
-struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
+struct dentry *kern_path_create(int dfd, const char *pathname,
+				struct path *path, unsigned int lookup_flags)
 {
 	struct dentry *dentry = ERR_PTR(-EEXIST);
 	struct nameidata nd;
 	int err2;
-	int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
+	int error;
+	bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
+
+	/*
+	 * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
+	 * other flags passed in are ignored!
+	 */
+	lookup_flags &= LOOKUP_REVAL;
+
+	error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd);
 	if (error)
 		return ERR_PTR(error);
 
@@ -3099,13 +3109,14 @@ void done_path_create(struct path *path, struct dentry *dentry)
 }
 EXPORT_SYMBOL(done_path_create);
 
-struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
+struct dentry *user_path_create(int dfd, const char __user *pathname,
+				struct path *path, unsigned int lookup_flags)
 {
 	struct filename *tmp = getname(pathname);
 	struct dentry *res;
 	if (IS_ERR(tmp))
 		return ERR_CAST(tmp);
-	res = kern_path_create(dfd, tmp->name, path, is_dir);
+	res = kern_path_create(dfd, tmp->name, path, lookup_flags);
 	putname(tmp);
 	return res;
 }
@@ -3228,7 +3239,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
 	struct path path;
 	int error;
 
-	dentry = user_path_create(dfd, pathname, &path, 1);
+	dentry = user_path_create(dfd, pathname, &path, LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 66542b644804..e998c030061d 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -65,8 +65,8 @@ extern int user_path_at_empty(int, const char __user *, unsigned, struct path *,
 
 extern int kern_path(const char *, unsigned, struct path *);
 
-extern struct dentry *kern_path_create(int, const char *, struct path *, int);
-extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
+extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
+extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
-- 
cgit v1.2.3


From b66c5984017533316fd1951770302649baf1aa33 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 20 Dec 2012 15:05:16 -0800
Subject: exec: do not leave bprm->interp on stack

If a series of scripts are executed, each triggering module loading via
unprintable bytes in the script header, kernel stack contents can leak
into the command line.

Normally execution of binfmt_script and binfmt_misc happens recursively.
However, when modules are enabled, and unprintable bytes exist in the
bprm->buf, execution will restart after attempting to load matching
binfmt modules.  Unfortunately, the logic in binfmt_script and
binfmt_misc does not expect to get restarted.  They leave bprm->interp
pointing to their local stack.  This means on restart bprm->interp is
left pointing into unused stack memory which can then be copied into the
userspace argv areas.

After additional study, it seems that both recursion and restart remains
the desirable way to handle exec with scripts, misc, and modules.  As
such, we need to protect the changes to interp.

This changes the logic to require allocation for any changes to the
bprm->interp.  To avoid adding a new kmalloc to every exec, the default
value is left as-is.  Only when passing through binfmt_script or
binfmt_misc does an allocation take place.

For a proof of concept, see DoTest.sh from:

   http://www.halfdog.net/Security/2012/LinuxKernelBinfmtScriptStackDataDisclosure/

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: halfdog <me@halfdog.net>
Cc: P J P <ppandit@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c        |  5 ++++-
 fs/binfmt_script.c      |  4 +++-
 fs/exec.c               | 15 +++++++++++++++
 include/linux/binfmts.h |  1 +
 4 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9be335fb8a7c..0c8869fdd14e 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -172,7 +172,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		goto _error;
 	bprm->argc ++;
 
-	bprm->interp = iname;	/* for binfmt_script */
+	/* Update interp in case binfmt_script needs it. */
+	retval = bprm_change_interp(iname, bprm);
+	if (retval < 0)
+		goto _error;
 
 	interp_file = open_exec (iname);
 	retval = PTR_ERR (interp_file);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 1610a91637e5..5027a3e14922 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -80,7 +80,9 @@ static int load_script(struct linux_binprm *bprm)
 	retval = copy_strings_kernel(1, &i_name, bprm);
 	if (retval) return retval; 
 	bprm->argc++;
-	bprm->interp = interp;
+	retval = bprm_change_interp(interp, bprm);
+	if (retval < 0)
+		return retval;
 
 	/*
 	 * OK, now restart the process with the interpreter's dentry.
diff --git a/fs/exec.c b/fs/exec.c
index d8e1191cb112..237d5342786c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1175,9 +1175,24 @@ void free_bprm(struct linux_binprm *bprm)
 		mutex_unlock(&current->signal->cred_guard_mutex);
 		abort_creds(bprm->cred);
 	}
+	/* If a binfmt changed the interp, free it. */
+	if (bprm->interp != bprm->filename)
+		kfree(bprm->interp);
 	kfree(bprm);
 }
 
+int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+{
+	/* If a binfmt changed the interp, free it first. */
+	if (bprm->interp != bprm->filename)
+		kfree(bprm->interp);
+	bprm->interp = kstrdup(interp, GFP_KERNEL);
+	if (!bprm->interp)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL(bprm_change_interp);
+
 /*
  * install the new credentials for this executable
  */
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a4c2b565c835..bdf3965f0a29 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -112,6 +112,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
+extern int bprm_change_interp(char *interp, struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc, const char *const *argv,
 			       struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
-- 
cgit v1.2.3


From c4e18497d8fd92eef2c6e7eadcc1a107ccd115ea Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 20 Dec 2012 15:05:42 -0800
Subject: linux/kernel.h: fix DIV_ROUND_CLOSEST with unsigned divisors

Commit 263a523d18bc ("linux/kernel.h: Fix warning seen with W=1 due to
change in DIV_ROUND_CLOSEST") fixes a warning seen with W=1 due to
change in DIV_ROUND_CLOSEST.

Unfortunately, the C compiler converts divide operations with unsigned
divisors to unsigned, even if the dividend is signed and negative (for
example, -10 / 5U = 858993457).  The C standard says "If one operand has
unsigned int type, the other operand is converted to unsigned int", so
the compiler is not to blame.  As a result, DIV_ROUND_CLOSEST(0, 2U) and
similar operations now return bad values, since the automatic conversion
of expressions such as "0 - 2U/2" to unsigned was not taken into
account.

Fix by checking for the divisor variable type when deciding which
operation to perform.  This fixes DIV_ROUND_CLOSEST(0, 2U), but still
returns bad values for negative dividends divided by unsigned divisors.
Mark the latter case as unsupported.

One observed effect of this problem is that the s2c_hwmon driver reports
a value of 4198403 instead of 0 if the ADC reads 0.

Other impact is unpredictable.  Problem is seen if the divisor is an
unsigned variable or constant and the dividend is less than (divisor/2).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Juergen Beisert <jbe@pengutronix.de>
Tested-by: Juergen Beisert <jbe@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: <stable@vger.kernel.org>	[3.7.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d140e8fb075f..c566927efcbd 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -77,13 +77,15 @@
 
 /*
  * Divide positive or negative dividend by positive divisor and round
- * to closest integer. Result is undefined for negative divisors.
+ * to closest integer. Result is undefined for negative divisors and
+ * for negative dividends if the divisor variable type is unsigned.
  */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
 	typeof(x) __x = x;				\
 	typeof(divisor) __d = divisor;			\
-	(((typeof(x))-1) > 0 || (__x) > 0) ?		\
+	(((typeof(x))-1) > 0 ||				\
+	 ((typeof(divisor))-1) > 0 || (__x) > 0) ?	\
 		(((__x) + ((__d) / 2)) / (__d)) :	\
 		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
-- 
cgit v1.2.3


From 80f5ab097b87c86581cb9736a8e55c5a3047d4bb Mon Sep 17 00:00:00 2001
From: Shaun Ruffell <sruffell@digium.com>
Date: Sun, 19 Aug 2012 01:11:24 -0300
Subject: edac: edac_mc no longer deals with kobjects directly

There are no more embedded kobjects in struct mem_ctl_info. Remove a header and
a comment that does not reflect the code anymore.

Signed-off-by: Shaun Ruffell <sruffell@digium.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 7 -------
 include/linux/edac.h   | 1 -
 2 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 281f566a5513..a641f623fffd 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -441,13 +441,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 
 	mci->op_state = OP_ALLOC;
 
-	/* at this point, the root kobj is valid, and in order to
-	 * 'free' the object, then the function:
-	 *      edac_mc_unregister_sysfs_main_kobj() must be called
-	 * which will perform kobj unregistration and the actual free
-	 * will occur during the kobject callback operation
-	 */
-
 	return mci;
 
 error:
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 1b8c02b36f76..4784213c819d 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -14,7 +14,6 @@
 
 #include <linux/atomic.h>
 #include <linux/device.h>
-#include <linux/kobject.h>
 #include <linux/completion.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
-- 
cgit v1.2.3


From d54eaa5a0fde0a202e4e91f200f818edcef15bee Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 21 Dec 2012 20:23:36 +0000
Subject: dm: prepare to support WRITE SAME

Allow targets to opt in to WRITE SAME support by setting
'num_write_same_requests' in the dm_target structure.

A dm device will only advertise WRITE SAME support if all its
targets and all its underlying devices support it.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c         | 30 +++++++++++++++++++++++++++++-
 include/linux/device-mapper.h |  5 +++++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index fa2955790031..6be58b696377 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1414,6 +1414,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t,
 	return 1;
 }
 
+static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev,
+					 sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+
+	return q && !q->limits.max_write_same_sectors;
+}
+
+static bool dm_table_supports_write_same(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i = 0;
+
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+
+		if (!ti->num_write_same_requests)
+			return false;
+
+		if (!ti->type->iterate_devices ||
+		    !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL))
+			return false;
+	}
+
+	return true;
+}
+
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			       struct queue_limits *limits)
 {
@@ -1445,7 +1472,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	else
 		queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
 
-	q->limits.max_write_same_sectors = 0;
+	if (!dm_table_supports_write_same(t))
+		q->limits.max_write_same_sectors = 0;
 
 	dm_table_set_integrity(t);
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 38d27a10aa5d..d1f6cd8486f2 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -205,6 +205,11 @@ struct dm_target {
 	 */
 	unsigned num_discard_requests;
 
+	/*
+	 * The number of WRITE SAME requests that will be submitted to the target.
+	 */
+	unsigned num_write_same_requests;
+
 	/* target specific data */
 	void *private;
 
-- 
cgit v1.2.3


From c0820cf5ad09522bdd9ff68e84841a09c9f339d8 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 21 Dec 2012 20:23:38 +0000
Subject: dm: introduce per_bio_data

Introduce a field per_bio_data_size in struct dm_target.

Targets can set this field in the constructor. If a target sets this
field to a non-zero value, "per_bio_data_size" bytes of auxiliary data
are allocated for each bio submitted to the target. These data can be
used for any purpose by the target and help us improve performance by
removing some per-target mempools.

Per-bio data is accessed with dm_per_bio_data. The
argument data_size must be the same as the value per_bio_data_size in
dm_target.

If the target has a pointer to per_bio_data, it can get a pointer to
the bio with dm_bio_from_per_bio_data() function (data_size must be the
same as the value passed to dm_per_bio_data).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-table.c         | 11 ++++++++++-
 drivers/md/dm.c               | 33 +++++++++++++++------------------
 drivers/md/dm.h               |  2 +-
 include/linux/device-mapper.h | 30 ++++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 6be58b696377..daf25d0890b3 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t)
 int dm_table_alloc_md_mempools(struct dm_table *t)
 {
 	unsigned type = dm_table_get_type(t);
+	unsigned per_bio_data_size = 0;
+	struct dm_target *tgt;
+	unsigned i;
 
 	if (unlikely(type == DM_TYPE_NONE)) {
 		DMWARN("no table type is set, can't allocate mempools");
 		return -EINVAL;
 	}
 
-	t->mempools = dm_alloc_md_mempools(type, t->integrity_supported);
+	if (type == DM_TYPE_BIO_BASED)
+		for (i = 0; i < t->num_targets; i++) {
+			tgt = t->targets + i;
+			per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
+		}
+
+	t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size);
 	if (!t->mempools)
 		return -ENOMEM;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5401cdce0fc5..2765cf2ba0ff 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -62,18 +62,6 @@ struct dm_io {
 	spinlock_t endio_lock;
 };
 
-/*
- * For bio-based dm.
- * One of these is allocated per target within a bio.  Hopefully
- * this will be simplified out one day.
- */
-struct dm_target_io {
-	struct dm_io *io;
-	struct dm_target *ti;
-	union map_info info;
-	struct bio clone;
-};
-
 /*
  * For request-based dm.
  * One of these is allocated per request.
@@ -1980,13 +1968,20 @@ static void free_dev(struct mapped_device *md)
 
 static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
 {
-	struct dm_md_mempools *p;
+	struct dm_md_mempools *p = dm_table_get_md_mempools(t);
 
-	if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs)
-		/* the md already has necessary mempools */
+	if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) {
+		/*
+		 * The md already has necessary mempools. Reload just the
+		 * bioset because front_pad may have changed because
+		 * a different table was loaded.
+		 */
+		bioset_free(md->bs);
+		md->bs = p->bs;
+		p->bs = NULL;
 		goto out;
+	}
 
-	p = dm_table_get_md_mempools(t);
 	BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
 
 	md->io_pool = p->io_pool;
@@ -2745,7 +2740,7 @@ int dm_noflush_suspending(struct dm_target *ti)
 }
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size)
 {
 	struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
 	unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
@@ -2753,6 +2748,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
 	if (!pools)
 		return NULL;
 
+	per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io));
+
 	pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
 			 mempool_create_slab_pool(MIN_IOS, _io_cache) :
 			 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
@@ -2768,7 +2765,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
 
 	pools->bs = (type == DM_TYPE_BIO_BASED) ?
 		bioset_create(pool_size,
-			      offsetof(struct dm_target_io, clone)) :
+			      per_bio_data_size + offsetof(struct dm_target_io, clone)) :
 		bioset_create(pool_size,
 			      offsetof(struct dm_rq_clone_bio_info, clone));
 	if (!pools->bs)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 6a99fefaa743..45b97da1bd06 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -159,7 +159,7 @@ void dm_kcopyd_exit(void);
 /*
  * Mempool operations
  */
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity);
+struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size);
 void dm_free_md_mempools(struct dm_md_mempools *pools);
 
 #endif
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index d1f6cd8486f2..6f0e73b4a80d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -210,6 +210,12 @@ struct dm_target {
 	 */
 	unsigned num_write_same_requests;
 
+	/*
+	 * The minimum number of extra bytes allocated in each bio for the
+	 * target to use.  dm_per_bio_data returns the data location.
+	 */
+	unsigned per_bio_data_size;
+
 	/* target specific data */
 	void *private;
 
@@ -246,6 +252,30 @@ struct dm_target_callbacks {
 	int (*congested_fn) (struct dm_target_callbacks *, int);
 };
 
+/*
+ * For bio-based dm.
+ * One of these is allocated for each bio.
+ * This structure shouldn't be touched directly by target drivers.
+ * It is here so that we can inline dm_per_bio_data and
+ * dm_bio_from_per_bio_data
+ */
+struct dm_target_io {
+	struct dm_io *io;
+	struct dm_target *ti;
+	union map_info info;
+	struct bio clone;
+};
+
+static inline void *dm_per_bio_data(struct bio *bio, size_t data_size)
+{
+	return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
+}
+
+static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
+{
+	return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
+}
+
 int dm_register_target(struct target_type *t);
 void dm_unregister_target(struct target_type *t);
 
-- 
cgit v1.2.3


From ddbd658f6446a35e4d6ba84812fd71023320cae2 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 21 Dec 2012 20:23:39 +0000
Subject: dm: move target request nr to dm_target_io

This patch moves target_request_nr from map_info to dm_target_io and
makes it accessible with dm_bio_get_target_request_nr.

This patch is a preparation for the next patch that removes map_info.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-snap.c          |  2 +-
 drivers/md/dm-stripe.c        |  4 ++--
 drivers/md/dm.c               |  3 ++-
 include/linux/device-mapper.h | 14 ++++++++++----
 4 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 5e88bc437be0..b7e179cdc5af 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1682,7 +1682,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
 	chunk_t chunk;
 
 	if (bio->bi_rw & REQ_FLUSH) {
-		if (!map_context->target_request_nr)
+		if (!dm_bio_get_target_request_nr(bio))
 			bio->bi_bdev = s->origin->bdev;
 		else
 			bio->bi_bdev = s->cow->bdev;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index e2f876539743..4e7ba82146c0 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -279,13 +279,13 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
 	unsigned target_request_nr;
 
 	if (bio->bi_rw & REQ_FLUSH) {
-		target_request_nr = map_context->target_request_nr;
+		target_request_nr = dm_bio_get_target_request_nr(bio);
 		BUG_ON(target_request_nr >= sc->stripes);
 		bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
 		return DM_MAPIO_REMAPPED;
 	}
 	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
-		target_request_nr = map_context->target_request_nr;
+		target_request_nr = dm_bio_get_target_request_nr(bio);
 		BUG_ON(target_request_nr >= sc->stripes);
 		return stripe_map_discard(sc, bio, target_request_nr);
 	}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2765cf2ba0ff..5ee580b4f330 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1099,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
 	tio->io = ci->io;
 	tio->ti = ti;
 	memset(&tio->info, 0, sizeof(tio->info));
+	tio->target_request_nr = 0;
 
 	return tio;
 }
@@ -1109,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
 	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs);
 	struct bio *clone = &tio->clone;
 
-	tio->info.target_request_nr = request_nr;
+	tio->target_request_nr = request_nr;
 
 	/*
 	 * Discard requests require the bio's inline iovecs be initialized.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 6f0e73b4a80d..eb96ef6fd8b7 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
 union map_info {
 	void *ptr;
 	unsigned long long ll;
-	unsigned target_request_nr;
 };
 
 /*
@@ -193,20 +192,21 @@ struct dm_target {
 	 * A number of zero-length barrier requests that will be submitted
 	 * to the target for the purpose of flushing cache.
 	 *
-	 * The request number will be placed in union map_info->target_request_nr.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 * It is a responsibility of the target driver to remap these requests
 	 * to the real underlying devices.
 	 */
 	unsigned num_flush_requests;
 
 	/*
-	 * The number of discard requests that will be submitted to the
-	 * target.  map_info->request_nr is used just like num_flush_requests.
+	 * The number of discard requests that will be submitted to the target.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 */
 	unsigned num_discard_requests;
 
 	/*
 	 * The number of WRITE SAME requests that will be submitted to the target.
+	 * The request number can be accessed with dm_bio_get_target_request_nr.
 	 */
 	unsigned num_write_same_requests;
 
@@ -263,6 +263,7 @@ struct dm_target_io {
 	struct dm_io *io;
 	struct dm_target *ti;
 	union map_info info;
+	unsigned target_request_nr;
 	struct bio clone;
 };
 
@@ -276,6 +277,11 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
 	return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
 }
 
+static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio)
+{
+	return container_of(bio, struct dm_target_io, clone)->target_request_nr;
+}
+
 int dm_register_target(struct target_type *t);
 void dm_unregister_target(struct target_type *t);
 
-- 
cgit v1.2.3


From 7de3ee57da4b717050e79c9313a9bf66ccc72519 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 21 Dec 2012 20:23:41 +0000
Subject: dm: remove map_info

This patch removes map_info from bio-based device mapper targets.
map_info is still used for request-based targets.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         |  5 ++---
 drivers/md/dm-delay.c         |  5 ++---
 drivers/md/dm-flakey.c        |  6 ++----
 drivers/md/dm-linear.c        |  3 +--
 drivers/md/dm-raid.c          |  4 ++--
 drivers/md/dm-raid1.c         |  6 ++----
 drivers/md/dm-snap.c          | 12 ++++--------
 drivers/md/dm-stripe.c        |  6 ++----
 drivers/md/dm-target.c        |  5 ++---
 drivers/md/dm-thin.c          | 15 +++++----------
 drivers/md/dm-verity.c        |  3 +--
 drivers/md/dm-zero.c          |  5 ++---
 drivers/md/dm.c               |  4 ++--
 include/linux/device-mapper.h |  6 ++----
 14 files changed, 31 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bbf459bca61d..f7369f9d8595 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1689,8 +1689,7 @@ bad:
 	return ret;
 }
 
-static int crypt_map(struct dm_target *ti, struct bio *bio,
-		     union map_info *map_context)
+static int crypt_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_crypt_io *io;
 	struct crypt_config *cc = ti->private;
@@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 11, 0},
+	.version = {1, 12, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index f53846f9ab50..cc1bd048acb2 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti)
 	atomic_set(&dc->may_delay, 1);
 }
 
-static int delay_map(struct dm_target *ti, struct bio *bio,
-		     union map_info *map_context)
+static int delay_map(struct dm_target *ti, struct bio *bio)
 {
 	struct delay_c *dc = ti->private;
 
@@ -338,7 +337,7 @@ out:
 
 static struct target_type delay_target = {
 	.name	     = "delay",
-	.version     = {1, 1, 0},
+	.version     = {1, 2, 0},
 	.module      = THIS_MODULE,
 	.ctr	     = delay_ctr,
 	.dtr	     = delay_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 660f98167e7b..9721f2ffb1a2 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -270,8 +270,7 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
 	}
 }
 
-static int flakey_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int flakey_map(struct dm_target *ti, struct bio *bio)
 {
 	struct flakey_c *fc = ti->private;
 	unsigned elapsed;
@@ -321,8 +320,7 @@ map_bio:
 	return DM_MAPIO_REMAPPED;
 }
 
-static int flakey_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
+static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	struct flakey_c *fc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 82222a8cf750..328cad5617ab 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -88,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
 		bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
 }
 
-static int linear_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int linear_map(struct dm_target *ti, struct bio *bio)
 {
 	linear_map_bio(ti, bio);
 
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 4a20bf8c72da..3d8984edeff7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1218,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti)
 	context_free(rs);
 }
 
-static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context)
+static int raid_map(struct dm_target *ti, struct bio *bio)
 {
 	struct raid_set *rs = ti->private;
 	struct mddev *mddev = &rs->md;
@@ -1432,7 +1432,7 @@ static void raid_resume(struct dm_target *ti)
 
 static struct target_type raid_target = {
 	.name = "raid",
-	.version = {1, 3, 1},
+	.version = {1, 4, 0},
 	.module = THIS_MODULE,
 	.ctr = raid_ctr,
 	.dtr = raid_dtr,
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 57685cf0afa8..fa519185ebba 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1142,8 +1142,7 @@ static void mirror_dtr(struct dm_target *ti)
 /*
  * Mirror mapping function
  */
-static int mirror_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int mirror_map(struct dm_target *ti, struct bio *bio)
 {
 	int r, rw = bio_rw(bio);
 	struct mirror *m;
@@ -1192,8 +1191,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
 	return DM_MAPIO_REMAPPED;
 }
 
-static int mirror_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
+static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	int rw = bio_rw(bio);
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e1ecacf2456f..59fc18ae52c2 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1567,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
 					  s->store->chunk_mask);
 }
 
-static int snapshot_map(struct dm_target *ti, struct bio *bio,
-			union map_info *map_context)
+static int snapshot_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_exception *e;
 	struct dm_snapshot *s = ti->private;
@@ -1683,8 +1682,7 @@ out:
  * If merging is currently taking place on the chunk in question, the
  * I/O is deferred by adding it to s->bios_queued_during_merge.
  */
-static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
-			      union map_info *map_context)
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_exception *e;
 	struct dm_snapshot *s = ti->private;
@@ -1744,8 +1742,7 @@ out_unlock:
 	return r;
 }
 
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
-			   int error, union map_info *map_context)
+static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	struct dm_snapshot *s = ti->private;
 
@@ -2119,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti)
 	dm_put_device(ti, dev);
 }
 
-static int origin_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int origin_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_dev *dev = ti->private;
 	bio->bi_bdev = dev->bdev;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4e7ba82146c0..6b0e5ea38027 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -271,8 +271,7 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
 	}
 }
 
-static int stripe_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int stripe_map(struct dm_target *ti, struct bio *bio)
 {
 	struct stripe_c *sc = ti->private;
 	uint32_t stripe;
@@ -342,8 +341,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type,
 	return 0;
 }
 
-static int stripe_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
+static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	unsigned i;
 	char major_minor[16];
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 8da366cf381c..617d21a77256 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt)
 	/* empty */
 }
 
-static int io_err_map(struct dm_target *tt, struct bio *bio,
-		      union map_info *map_context)
+static int io_err_map(struct dm_target *tt, struct bio *bio)
 {
 	return -EIO;
 }
 
 static struct target_type error_target = {
 	.name = "error",
-	.version = {1, 0, 1},
+	.version = {1, 1, 0},
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
 	.map  = io_err_map,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e7743c69a24c..675ae5274016 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1371,8 +1371,7 @@ static void thin_hook_bio(struct thin_c *tc, struct bio *bio)
 /*
  * Non-blocking function called from the thin target's map function.
  */
-static int thin_bio_map(struct dm_target *ti, struct bio *bio,
-			union map_info *map_context)
+static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 {
 	int r;
 	struct thin_c *tc = ti->private;
@@ -1980,8 +1979,7 @@ out_unlock:
 	return r;
 }
 
-static int pool_map(struct dm_target *ti, struct bio *bio,
-		    union map_info *map_context)
+static int pool_map(struct dm_target *ti, struct bio *bio)
 {
 	int r;
 	struct pool_c *pt = ti->private;
@@ -2626,17 +2624,14 @@ out_unlock:
 	return r;
 }
 
-static int thin_map(struct dm_target *ti, struct bio *bio,
-		    union map_info *map_context)
+static int thin_map(struct dm_target *ti, struct bio *bio)
 {
 	bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
 
-	return thin_bio_map(ti, bio, map_context);
+	return thin_bio_map(ti, bio);
 }
 
-static int thin_endio(struct dm_target *ti,
-		      struct bio *bio, int err,
-		      union map_info *map_context)
+static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
 {
 	unsigned long flags;
 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 78f349894b24..52cde982164a 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -458,8 +458,7 @@ no_prefetch_cluster:
  * Bio map function. It allocates dm_verity_io structure and bio vector and
  * fills them. Then it issues prefetches and the I/O.
  */
-static int verity_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int verity_map(struct dm_target *ti, struct bio *bio)
 {
 	struct dm_verity *v = ti->private;
 	struct dm_verity_io *io;
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index cc2b3cb81946..69a5c3b3b340 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 /*
  * Return zeros only on reads
  */
-static int zero_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
+static int zero_map(struct dm_target *ti, struct bio *bio)
 {
 	switch(bio_rw(bio)) {
 	case READ:
@@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio,
 
 static struct target_type zero_target = {
 	.name   = "zero",
-	.version = {1, 0, 0},
+	.version = {1, 1, 0},
 	.module = THIS_MODULE,
 	.ctr    = zero_ctr,
 	.map    = zero_map,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5ee580b4f330..c72e4d5a9617 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -645,7 +645,7 @@ static void clone_endio(struct bio *bio, int error)
 		error = -EIO;
 
 	if (endio) {
-		r = endio(tio->ti, bio, error, &tio->info);
+		r = endio(tio->ti, bio, error);
 		if (r < 0 || r == DM_ENDIO_REQUEUE)
 			/*
 			 * error and requeue request are handled
@@ -1004,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio)
 	 */
 	atomic_inc(&tio->io->io_count);
 	sector = clone->bi_sector;
-	r = ti->type->map(ti, clone, &tio->info);
+	r = ti->type->map(ti, clone);
 	if (r == DM_MAPIO_REMAPPED) {
 		/* the bio has been remapped so dispatch it */
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index eb96ef6fd8b7..bf6afa2fc432 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -45,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
  * = 1: simple remap complete
  * = 2: The target wants to push back the io
  */
-typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio,
-			  union map_info *map_context);
+typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
 typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
 				  union map_info *map_context);
 
@@ -59,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
  * 2   : The target wants to push back the io
  */
 typedef int (*dm_endio_fn) (struct dm_target *ti,
-			    struct bio *bio, int error,
-			    union map_info *map_context);
+			    struct bio *bio, int error);
 typedef int (*dm_request_endio_fn) (struct dm_target *ti,
 				    struct request *clone, int error,
 				    union map_info *map_context);
-- 
cgit v1.2.3


From 30e6c9fa93cf3dbc7cc6df1d748ad25e4264545a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 20 Dec 2012 17:25:08 +0000
Subject: net: devnet_rename_seq should be a seqcount

Using a seqlock for devnet_rename_seq is not a good idea,
as device_rename() can sleep.

As we hold RTNL, we dont need a protection for writers,
and only need a seqcount so that readers can catch a change done
by a writer.

Bug added in commit c91f6df2db4972d3 (sockopt: Change getsockopt() of
SO_BINDTODEVICE to return an interface name)

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 18 +++++++++---------
 net/core/sock.c           |  4 ++--
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 02e0f6b156c3..c599e4782d45 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1576,7 +1576,7 @@ extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 
 extern rwlock_t				dev_base_lock;		/* Device list lock */
 
-extern seqlock_t	devnet_rename_seq;	/* Device rename lock */
+extern seqcount_t	devnet_rename_seq;	/* Device rename seq */
 
 
 #define for_each_netdev(net, d)		\
diff --git a/net/core/dev.c b/net/core/dev.c
index d0cbc93fcf32..515473ee52cb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -203,7 +203,7 @@ static struct list_head offload_base __read_mostly;
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 
-DEFINE_SEQLOCK(devnet_rename_seq);
+seqcount_t devnet_rename_seq;
 
 static inline void dev_base_seq_inc(struct net *net)
 {
@@ -1093,10 +1093,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
 	if (dev->flags & IFF_UP)
 		return -EBUSY;
 
-	write_seqlock(&devnet_rename_seq);
+	write_seqcount_begin(&devnet_rename_seq);
 
 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return 0;
 	}
 
@@ -1104,7 +1104,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
 
 	err = dev_get_valid_name(net, dev, newname);
 	if (err < 0) {
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return err;
 	}
 
@@ -1112,11 +1112,11 @@ rollback:
 	ret = device_rename(&dev->dev, dev->name);
 	if (ret) {
 		memcpy(dev->name, oldname, IFNAMSIZ);
-		write_sequnlock(&devnet_rename_seq);
+		write_seqcount_end(&devnet_rename_seq);
 		return ret;
 	}
 
-	write_sequnlock(&devnet_rename_seq);
+	write_seqcount_end(&devnet_rename_seq);
 
 	write_lock_bh(&dev_base_lock);
 	hlist_del_rcu(&dev->name_hlist);
@@ -1135,7 +1135,7 @@ rollback:
 		/* err >= 0 after dev_alloc_name() or stores the first errno */
 		if (err >= 0) {
 			err = ret;
-			write_seqlock(&devnet_rename_seq);
+			write_seqcount_begin(&devnet_rename_seq);
 			memcpy(dev->name, oldname, IFNAMSIZ);
 			goto rollback;
 		} else {
@@ -4180,7 +4180,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
 		return -EFAULT;
 
 retry:
-	seq = read_seqbegin(&devnet_rename_seq);
+	seq = read_seqcount_begin(&devnet_rename_seq);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
 	if (!dev) {
@@ -4190,7 +4190,7 @@ retry:
 
 	strcpy(ifr.ifr_name, dev->name);
 	rcu_read_unlock();
-	if (read_seqretry(&devnet_rename_seq, seq))
+	if (read_seqcount_retry(&devnet_rename_seq, seq))
 		goto retry;
 
 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
diff --git a/net/core/sock.c b/net/core/sock.c
index a692ef49c9bb..bc131d419683 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -583,7 +583,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
 		goto out;
 
 retry:
-	seq = read_seqbegin(&devnet_rename_seq);
+	seq = read_seqcount_begin(&devnet_rename_seq);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
 	ret = -ENODEV;
@@ -594,7 +594,7 @@ retry:
 
 	strcpy(devname, dev->name);
 	rcu_read_unlock();
-	if (read_seqretry(&devnet_rename_seq, seq))
+	if (read_seqcount_retry(&devnet_rename_seq, seq))
 		goto retry;
 
 	len = strlen(devname) + 1;
-- 
cgit v1.2.3


From b59320cc5a5e6ceaa17f0895ffbe0711ebad7adf Mon Sep 17 00:00:00 2001
From: Daniel Jeong <gshark.jeong@gmail.com>
Date: Mon, 17 Dec 2012 10:24:06 +0900
Subject: regulator: lp8755: new driver for LP8755

This patch is for new lp8755 regulator dirver and
several unsed variables were deleted and then test was done.

LP8755 :
The LP8755 is a high performance power management unit.It contains
six step-down DC-DC converters which can can be filexibly bundled
together in multiphase converters as required by application.
www.ti.com

Signed-off-by: Daniel Jeong <gshark.jeong@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/Kconfig            |   9 +
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/lp8755.c           | 580 +++++++++++++++++++++++++++++++++++
 include/linux/platform_data/lp8755.h |  71 +++++
 4 files changed, 661 insertions(+)
 create mode 100644 drivers/regulator/lp8755.c
 create mode 100644 include/linux/platform_data/lp8755.h

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 551a22b07538..f37a1c8bb929 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -277,6 +277,15 @@ config REGULATOR_LP872X
 	help
 	  This driver supports LP8720/LP8725 PMIC
 
+config REGULATOR_LP8755
+	tristate "TI LP8755 High Performance PMU driver"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This driver supports LP8755 High Performance PMU driver. This
+	  chip contains six step-down DC/DC converters which can support
+	  9 mode multiphase configuration.
+
 config REGULATOR_LP8788
 	bool "TI LP8788 Power Regulators"
 	depends on MFD_LP8788
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index b802b0c7fb02..6e8250382def 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o
 obj-$(CONFIG_REGULATOR_LP872X) += lp872x.o
 obj-$(CONFIG_REGULATOR_LP8788) += lp8788-buck.o
 obj-$(CONFIG_REGULATOR_LP8788) += lp8788-ldo.o
+obj-$(CONFIG_REGULATOR_LP8755) += lp8755.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
 obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
diff --git a/drivers/regulator/lp8755.c b/drivers/regulator/lp8755.c
new file mode 100644
index 000000000000..dbc4d1254ca1
--- /dev/null
+++ b/drivers/regulator/lp8755.c
@@ -0,0 +1,580 @@
+/*
+ * LP8755 High Performance Power Management Unit : System Interface Driver
+ * (based on rev. 0.26)
+ * Copyright 2012 Texas Instruments
+ *
+ * Author: Daniel(Geon Si) Jeong <daniel.jeong@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/regmap.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/platform_data/lp8755.h>
+
+#define LP8755_REG_BUCK0	0x00
+#define LP8755_REG_BUCK1	0x03
+#define LP8755_REG_BUCK2	0x04
+#define LP8755_REG_BUCK3	0x01
+#define LP8755_REG_BUCK4	0x05
+#define LP8755_REG_BUCK5	0x02
+#define LP8755_REG_MAX		0xFF
+
+#define LP8755_BUCK_EN_M	BIT(7)
+#define LP8755_BUCK_LINEAR_OUT_MAX	0x76
+#define LP8755_BUCK_VOUT_M	0x7F
+
+enum bucks {
+	BUCK0 = 0,
+	BUCK1,
+	BUCK2,
+	BUCK3,
+	BUCK4,
+	BUCK5,
+};
+
+struct lp8755_mphase {
+	int nreg;
+	int buck_num[LP8755_BUCK_MAX];
+};
+
+struct lp8755_chip {
+	struct device *dev;
+	struct regmap *regmap;
+	struct lp8755_platform_data *pdata;
+
+	int irq;
+	unsigned int irqmask;
+
+	int mphase;
+	struct regulator_dev *rdev[LP8755_BUCK_MAX];
+};
+
+/**
+ *lp8755_read : read a single register value from lp8755.
+ *@pchip : device to read from
+ *@reg   : register to read from
+ *@val   : pointer to store read value
+ */
+static int lp8755_read(struct lp8755_chip *pchip, unsigned int reg,
+		       unsigned int *val)
+{
+	return regmap_read(pchip->regmap, reg, val);
+}
+
+/**
+ *lp8755_write : write a single register value to lp8755.
+ *@pchip : device to write to
+ *@reg   : register to write to
+ *@val   : value to be written
+ */
+static int lp8755_write(struct lp8755_chip *pchip, unsigned int reg,
+			unsigned int val)
+{
+	return regmap_write(pchip->regmap, reg, val);
+}
+
+/**
+ *lp8755_update_bits : set the values of bit fields in lp8755 register.
+ *@pchip : device to read from
+ *@reg   : register to update
+ *@mask  : bitmask to be changed
+ *@val   : value for bitmask
+ */
+static int lp8755_update_bits(struct lp8755_chip *pchip, unsigned int reg,
+			      unsigned int mask, unsigned int val)
+{
+	return regmap_update_bits(pchip->regmap, reg, mask, val);
+}
+
+static int lp8755_buck_enable_time(struct regulator_dev *rdev)
+{
+	int ret;
+	unsigned int regval;
+	enum lp8755_bucks id = rdev_get_id(rdev);
+	struct lp8755_chip *pchip = rdev_get_drvdata(rdev);
+
+	ret = lp8755_read(pchip, 0x12 + id, &regval);
+	if (ret < 0) {
+		dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+		return ret;
+	}
+	return (regval & 0xff) * 100;
+}
+
+static int lp8755_buck_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	int ret;
+	unsigned int regbval = 0x0;
+	enum lp8755_bucks id = rdev_get_id(rdev);
+	struct lp8755_chip *pchip = rdev_get_drvdata(rdev);
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		/* forced pwm mode */
+		regbval = (0x01 << id);
+		break;
+	case REGULATOR_MODE_NORMAL:
+		/* enable automatic pwm/pfm mode */
+		ret = lp8755_update_bits(pchip, 0x08 + id, 0x20, 0x00);
+		if (ret < 0)
+			goto err_i2c;
+		break;
+	case REGULATOR_MODE_IDLE:
+		/* enable automatic pwm/pfm/lppfm mode */
+		ret = lp8755_update_bits(pchip, 0x08 + id, 0x20, 0x20);
+		if (ret < 0)
+			goto err_i2c;
+
+		ret = lp8755_update_bits(pchip, 0x10, 0x01, 0x01);
+		if (ret < 0)
+			goto err_i2c;
+		break;
+	default:
+		dev_err(pchip->dev, "Not supported buck mode %s\n", __func__);
+		/* forced pwm mode */
+		regbval = (0x01 << id);
+	}
+
+	ret = lp8755_update_bits(pchip, 0x06, 0x01 << id, regbval);
+	if (ret < 0)
+		goto err_i2c;
+	return ret;
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return ret;
+}
+
+static unsigned int lp8755_buck_get_mode(struct regulator_dev *rdev)
+{
+	int ret;
+	unsigned int regval;
+	enum lp8755_bucks id = rdev_get_id(rdev);
+	struct lp8755_chip *pchip = rdev_get_drvdata(rdev);
+
+	ret = lp8755_read(pchip, 0x06, &regval);
+	if (ret < 0)
+		goto err_i2c;
+
+	/* mode fast means forced pwm mode */
+	if (regval & (0x01 << id))
+		return REGULATOR_MODE_FAST;
+
+	ret = lp8755_read(pchip, 0x08 + id, &regval);
+	if (ret < 0)
+		goto err_i2c;
+
+	/* mode idle means automatic pwm/pfm/lppfm mode */
+	if (regval & 0x20)
+		return REGULATOR_MODE_IDLE;
+
+	/* mode normal means automatic pwm/pfm mode */
+	return REGULATOR_MODE_NORMAL;
+
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return 0;
+}
+
+static int lp8755_buck_set_ramp(struct regulator_dev *rdev, int ramp)
+{
+	int ret;
+	unsigned int regval = 0x00;
+	enum lp8755_bucks id = rdev_get_id(rdev);
+	struct lp8755_chip *pchip = rdev_get_drvdata(rdev);
+
+	/* uV/us */
+	switch (ramp) {
+	case 0 ... 230:
+		regval = 0x07;
+		break;
+	case 231 ... 470:
+		regval = 0x06;
+		break;
+	case 471 ... 940:
+		regval = 0x05;
+		break;
+	case 941 ... 1900:
+		regval = 0x04;
+		break;
+	case 1901 ... 3800:
+		regval = 0x03;
+		break;
+	case 3801 ... 7500:
+		regval = 0x02;
+		break;
+	case 7501 ... 15000:
+		regval = 0x01;
+		break;
+	case 15001 ... 30000:
+		regval = 0x00;
+		break;
+	default:
+		dev_err(pchip->dev,
+			"Not supported ramp value %d %s\n", ramp, __func__);
+		return -EINVAL;
+	}
+
+	ret = lp8755_update_bits(pchip, 0x07 + id, 0x07, regval);
+	if (ret < 0)
+		goto err_i2c;
+	return ret;
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return ret;
+}
+
+static struct regulator_ops lp8755_buck_ops = {
+	.list_voltage = regulator_list_voltage_linear,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.enable_time = lp8755_buck_enable_time,
+	.set_mode = lp8755_buck_set_mode,
+	.get_mode = lp8755_buck_get_mode,
+	.set_ramp_delay = lp8755_buck_set_ramp,
+};
+
+#define lp8755_rail(_id) "lp8755_buck"#_id
+#define lp8755_buck_init(_id)\
+{\
+	.constraints = {\
+		.name = lp8755_rail(_id),\
+		.valid_ops_mask = REGULATOR_CHANGE_VOLTAGE,\
+		.min_uV = 500000,\
+		.max_uV = 1675000,\
+	},\
+}
+
+static struct regulator_init_data lp8755_reg_default[LP8755_BUCK_MAX] = {
+	[BUCK0] = lp8755_buck_init(0),
+	[BUCK1] = lp8755_buck_init(1),
+	[BUCK2] = lp8755_buck_init(2),
+	[BUCK3] = lp8755_buck_init(3),
+	[BUCK4] = lp8755_buck_init(4),
+	[BUCK5] = lp8755_buck_init(5),
+};
+
+static const struct lp8755_mphase mphase_buck[MPHASE_CONF_MAX] = {
+	{3, {BUCK0, BUCK3, BUCK5}
+	 },
+	{6, {BUCK0, BUCK1, BUCK2, BUCK3, BUCK4, BUCK5}
+	 },
+	{5, {BUCK0, BUCK2, BUCK3, BUCK4, BUCK5}
+	 },
+	{4, {BUCK0, BUCK3, BUCK4, BUCK5}
+	 },
+	{3, {BUCK0, BUCK4, BUCK5}
+	 },
+	{2, {BUCK0, BUCK5}
+	 },
+	{1, {BUCK0}
+	 },
+	{2, {BUCK0, BUCK3}
+	 },
+	{4, {BUCK0, BUCK2, BUCK3, BUCK5}
+	 },
+};
+
+static int lp8755_init_data(struct lp8755_chip *pchip)
+{
+	unsigned int regval;
+	int ret, icnt, buck_num;
+	struct lp8755_platform_data *pdata = pchip->pdata;
+
+	/* read back  muti-phase configuration */
+	ret = lp8755_read(pchip, 0x3D, &regval);
+	if (ret < 0)
+		goto out_i2c_error;
+	pchip->mphase = regval & 0x07;
+
+	/* set default data based on multi-phase config */
+	for (icnt = 0; icnt < mphase_buck[pchip->mphase].nreg; icnt++) {
+		buck_num = mphase_buck[pchip->mphase].buck_num[icnt];
+		pdata->buck_data[buck_num] = &lp8755_reg_default[buck_num];
+	}
+	return ret;
+
+out_i2c_error:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return ret;
+}
+
+#define lp8755_buck_desc(_id)\
+{\
+	.name = lp8755_rail(_id),\
+	.id   = LP8755_BUCK##_id,\
+	.ops  = &lp8755_buck_ops,\
+	.n_voltages = LP8755_BUCK_LINEAR_OUT_MAX+1,\
+	.uV_step = 10000,\
+	.min_uV = 500000,\
+	.type = REGULATOR_VOLTAGE,\
+	.owner = THIS_MODULE,\
+	.enable_reg = LP8755_REG_BUCK##_id,\
+	.enable_mask = LP8755_BUCK_EN_M,\
+	.vsel_reg = LP8755_REG_BUCK##_id,\
+	.vsel_mask = LP8755_BUCK_VOUT_M,\
+}
+
+static struct regulator_desc lp8755_regulators[] = {
+	lp8755_buck_desc(0),
+	lp8755_buck_desc(1),
+	lp8755_buck_desc(2),
+	lp8755_buck_desc(3),
+	lp8755_buck_desc(4),
+	lp8755_buck_desc(5),
+};
+
+static int lp8755_regulator_init(struct lp8755_chip *pchip)
+{
+	int ret, icnt, buck_num;
+	struct lp8755_platform_data *pdata = pchip->pdata;
+	struct regulator_config rconfig = { };
+
+	rconfig.regmap = pchip->regmap;
+	rconfig.dev = pchip->dev;
+	rconfig.driver_data = pchip;
+
+	for (icnt = 0; icnt < mphase_buck[pchip->mphase].nreg; icnt++) {
+		buck_num = mphase_buck[pchip->mphase].buck_num[icnt];
+		rconfig.init_data = pdata->buck_data[buck_num];
+		rconfig.of_node = pchip->dev->of_node;
+		pchip->rdev[buck_num] =
+		    regulator_register(&lp8755_regulators[buck_num], &rconfig);
+		if (IS_ERR(pchip->rdev[buck_num])) {
+			ret = PTR_ERR(pchip->rdev[buck_num]);
+			dev_err(pchip->dev, "regulator init failed: buck 0\n");
+			goto err_buck;
+		}
+	}
+
+	return 0;
+
+err_buck:
+	for (icnt = 0; icnt < LP8755_BUCK_MAX; icnt++)
+		if (pchip->rdev[icnt] != NULL)
+			regulator_unregister(pchip->rdev[icnt]);
+	return ret;
+}
+
+static irqreturn_t lp8755_irq_handler(int irq, void *data)
+{
+	int ret, icnt;
+	unsigned int flag0, flag1;
+	struct lp8755_chip *pchip = data;
+
+	/* read flag0 register */
+	ret = lp8755_read(pchip, 0x0D, &flag0);
+	if (ret < 0)
+		goto err_i2c;
+	/* clear flag register to pull up int. pin */
+	ret = lp8755_write(pchip, 0x0D, 0x00);
+	if (ret < 0)
+		goto err_i2c;
+
+	/* sent power fault detection event to specific regulator */
+	for (icnt = 0; icnt < 6; icnt++)
+		if ((flag0 & (0x4 << icnt))
+		    && (pchip->irqmask & (0x04 << icnt))
+		    && (pchip->rdev[icnt] != NULL))
+			regulator_notifier_call_chain(pchip->rdev[icnt],
+						      LP8755_EVENT_PWR_FAULT,
+						      NULL);
+
+	/* read flag1 register */
+	ret = lp8755_read(pchip, 0x0E, &flag1);
+	if (ret < 0)
+		goto err_i2c;
+	/* clear flag register to pull up int. pin */
+	ret = lp8755_write(pchip, 0x0E, 0x00);
+	if (ret < 0)
+		goto err_i2c;
+
+	/* send OCP event to all regualtor devices */
+	if ((flag1 & 0x01) && (pchip->irqmask & 0x01))
+		for (icnt = 0; icnt < LP8755_BUCK_MAX; icnt++)
+			if (pchip->rdev[icnt] != NULL)
+				regulator_notifier_call_chain(pchip->rdev[icnt],
+							      LP8755_EVENT_OCP,
+							      NULL);
+
+	/* send OVP event to all regualtor devices */
+	if ((flag1 & 0x02) && (pchip->irqmask & 0x02))
+		for (icnt = 0; icnt < LP8755_BUCK_MAX; icnt++)
+			if (pchip->rdev[icnt] != NULL)
+				regulator_notifier_call_chain(pchip->rdev[icnt],
+							      LP8755_EVENT_OVP,
+							      NULL);
+	return IRQ_HANDLED;
+
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return IRQ_NONE;
+}
+
+static int lp8755_int_config(struct lp8755_chip *pchip)
+{
+	int ret;
+	unsigned int regval;
+
+	if (pchip->irq == 0) {
+		dev_warn(pchip->dev, "not use interrupt : %s\n", __func__);
+		return 0;
+	}
+
+	ret = lp8755_read(pchip, 0x0F, &regval);
+	if (ret < 0)
+		goto err_i2c;
+	pchip->irqmask = regval;
+	ret = request_threaded_irq(pchip->irq, NULL, lp8755_irq_handler,
+				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+				   "lp8755-irq", pchip);
+	if (ret)
+		return ret;
+
+	return ret;
+
+err_i2c:
+	dev_err(pchip->dev, "i2c acceess error %s\n", __func__);
+	return ret;
+}
+
+static const struct regmap_config lp8755_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = LP8755_REG_MAX,
+};
+
+static int lp8755_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	int ret, icnt;
+	struct lp8755_chip *pchip;
+	struct lp8755_platform_data *pdata = client->dev.platform_data;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "i2c functionality check fail.\n");
+		return -EOPNOTSUPP;
+	}
+
+	pchip = devm_kzalloc(&client->dev,
+			     sizeof(struct lp8755_chip), GFP_KERNEL);
+	if (!pchip)
+		return -ENOMEM;
+
+	pchip->dev = &client->dev;
+	pchip->regmap = devm_regmap_init_i2c(client, &lp8755_regmap);
+	if (IS_ERR(pchip->regmap)) {
+		ret = PTR_ERR(pchip->regmap);
+		dev_err(&client->dev, "fail to allocate regmap %d\n", ret);
+		return ret;
+	}
+	i2c_set_clientdata(client, pchip);
+
+	if (pdata != NULL) {
+		pchip->pdata = pdata;
+		pchip->mphase = pdata->mphase;
+	} else {
+		pchip->pdata = devm_kzalloc(pchip->dev,
+					    sizeof(struct lp8755_platform_data),
+					    GFP_KERNEL);
+		if (!pchip->pdata)
+			return -ENOMEM;
+		ret = lp8755_init_data(pchip);
+		if (ret < 0)
+			goto err_chip_init;
+	}
+
+	ret = lp8755_regulator_init(pchip);
+	if (ret < 0)
+		goto err_regulator;
+
+	pchip->irq = client->irq;
+	ret = lp8755_int_config(pchip);
+	if (ret < 0)
+		goto err_irq;
+
+	return ret;
+
+err_irq:
+	dev_err(&client->dev, "fail to irq config\n");
+
+	for (icnt = 0; icnt < mphase_buck[pchip->mphase].nreg; icnt++)
+		regulator_unregister(pchip->rdev[icnt]);
+
+err_regulator:
+	dev_err(&client->dev, "fail to initialize regulators\n");
+	/* output disable */
+	for (icnt = 0; icnt < 0x06; icnt++)
+		lp8755_write(pchip, icnt, 0x00);
+
+err_chip_init:
+	dev_err(&client->dev, "fail to initialize chip\n");
+	return ret;
+}
+
+static int lp8755_remove(struct i2c_client *client)
+{
+	int icnt;
+	struct lp8755_chip *pchip = i2c_get_clientdata(client);
+
+	for (icnt = 0; icnt < mphase_buck[pchip->mphase].nreg; icnt++)
+		regulator_unregister(pchip->rdev[icnt]);
+
+	for (icnt = 0; icnt < 0x06; icnt++)
+		lp8755_write(pchip, icnt, 0x00);
+
+	if (pchip->irq != 0)
+		free_irq(pchip->irq, pchip);
+
+	return 0;
+}
+
+static const struct i2c_device_id lp8755_id[] = {
+	{LP8755_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, lp8755_id);
+
+static struct i2c_driver lp8755_i2c_driver = {
+	.driver = {
+		   .name = LP8755_NAME,
+		   },
+	.probe = lp8755_probe,
+	.remove = __devexit_p(lp8755_remove),
+	.id_table = lp8755_id,
+};
+
+static int __init lp8755_init(void)
+{
+	return i2c_add_driver(&lp8755_i2c_driver);
+}
+
+subsys_initcall(lp8755_init);
+
+static void __exit lp8755_exit(void)
+{
+	i2c_del_driver(&lp8755_i2c_driver);
+}
+
+module_exit(lp8755_exit);
+
+MODULE_DESCRIPTION("Texas Instruments lp8755 driver");
+MODULE_AUTHOR("Daniel Jeong <daniel.jeong@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/lp8755.h b/include/linux/platform_data/lp8755.h
new file mode 100644
index 000000000000..a7fd0776c9bf
--- /dev/null
+++ b/include/linux/platform_data/lp8755.h
@@ -0,0 +1,71 @@
+/*
+ * LP8755 High Performance Power Management Unit Driver:System Interface Driver
+ *
+ *			Copyright (C) 2012 Texas Instruments
+ *
+ * Author: Daniel(Geon Si) Jeong <daniel.jeong@ti.com>
+ *             G.Shark Jeong <gshark.jeong@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _LP8755_H
+#define _LP8755_H
+
+#include <linux/regulator/consumer.h>
+
+#define LP8755_NAME "lp8755-regulator"
+/*
+ *PWR FAULT : power fault detected
+ *OCP : over current protect activated
+ *OVP : over voltage protect activated
+ *TEMP_WARN : thermal warning
+ *TEMP_SHDN : thermal shutdonw detected
+ *I_LOAD : current measured
+ */
+#define LP8755_EVENT_PWR_FAULT REGULATOR_EVENT_FAIL
+#define LP8755_EVENT_OCP REGULATOR_EVENT_OVER_CURRENT
+#define LP8755_EVENT_OVP 0x10000
+#define LP8755_EVENT_TEMP_WARN 0x2000
+#define LP8755_EVENT_TEMP_SHDN REGULATOR_EVENT_OVER_TEMP
+#define LP8755_EVENT_I_LOAD	0x40000
+
+enum lp8755_bucks {
+	LP8755_BUCK0 = 0,
+	LP8755_BUCK1,
+	LP8755_BUCK2,
+	LP8755_BUCK3,
+	LP8755_BUCK4,
+	LP8755_BUCK5,
+	LP8755_BUCK_MAX,
+};
+
+/**
+ * multiphase configuration options
+ */
+enum lp8755_mphase_config {
+	MPHASE_CONF0,
+	MPHASE_CONF1,
+	MPHASE_CONF2,
+	MPHASE_CONF3,
+	MPHASE_CONF4,
+	MPHASE_CONF5,
+	MPHASE_CONF6,
+	MPHASE_CONF7,
+	MPHASE_CONF8,
+	MPHASE_CONF_MAX
+};
+
+/**
+ * struct lp8755_platform_data
+ * @mphase_type : Multiphase Switcher Configurations.
+ * @buck_data   : buck0~6 init voltage in uV
+ */
+struct lp8755_platform_data {
+	int mphase;
+	struct regulator_init_data *buck_data[LP8755_BUCK_MAX];
+};
+#endif
-- 
cgit v1.2.3


From c8520b4c5d25eb7b8b54f1ae9ba7da71375f2b2c Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 18 Dec 2012 09:30:10 +0800
Subject: regulator: core: Allow specify apply_[reg|bit] for regmap based
 voltage_sel operations

Some DVM regulators needs to update apply_bit after setting vsel_reg to
initiate voltage change on the output.  This patch adds apply_reg and
apply_bit to struct regulator_desc and update
regulator_set_voltage_sel_regmap() to set apply_bit of apply_reg when
apply_bit is set.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/regulator/core.c         | 12 +++++++++++-
 include/linux/regulator/driver.h |  6 ++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 0f65b246cc0c..571ce0f1c06e 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2074,10 +2074,20 @@ EXPORT_SYMBOL_GPL(regulator_get_voltage_sel_regmap);
  */
 int regulator_set_voltage_sel_regmap(struct regulator_dev *rdev, unsigned sel)
 {
+	int ret;
+
 	sel <<= ffs(rdev->desc->vsel_mask) - 1;
 
-	return regmap_update_bits(rdev->regmap, rdev->desc->vsel_reg,
+	ret = regmap_update_bits(rdev->regmap, rdev->desc->vsel_reg,
 				  rdev->desc->vsel_mask, sel);
+	if (ret)
+		return ret;
+
+	if (rdev->desc->apply_bit)
+		ret = regmap_update_bits(rdev->regmap, rdev->desc->apply_reg,
+					 rdev->desc->apply_bit,
+					 rdev->desc->apply_bit);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(regulator_set_voltage_sel_regmap);
 
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index d10bb0f39c5e..23070fd83872 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -193,6 +193,10 @@ enum regulator_type {
  *
  * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_
  * @vsel_mask: Mask for register bitfield used for selector
+ * @apply_reg: Register for initiate voltage change on the output when
+ *                using regulator_set_voltage_sel_regmap
+ * @apply_bit: Register bitfield used for initiate voltage change on the
+ *                output when using regulator_set_voltage_sel_regmap
  * @enable_reg: Register for control when using regmap enable/disable ops
  * @enable_mask: Mask for control when using regmap enable/disable ops
  *
@@ -218,6 +222,8 @@ struct regulator_desc {
 
 	unsigned int vsel_reg;
 	unsigned int vsel_mask;
+	unsigned int apply_reg;
+	unsigned int apply_bit;
 	unsigned int enable_reg;
 	unsigned int enable_mask;
 	unsigned int bypass_reg;
-- 
cgit v1.2.3


From 7b1f62076bba10786d2118006ae68ac120cd6c56 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Wed, 7 Nov 2012 17:58:54 -0700
Subject: time: convert arch_gettimeoffset to a pointer

Currently, whenever CONFIG_ARCH_USES_GETTIMEOFFSET is enabled, each
arch core provides a single implementation of arch_gettimeoffset(). In
many cases, different sub-architectures, different machines, or
different timer providers exist, and so the arch ends up implementing
arch_gettimeoffset() as a call-through-pointer anyway. Examples are
ARM, Cris, M68K, and it's arguable that the remaining architectures,
M32R and Blackfin, should be doing this anyway.

Modify arch_gettimeoffset so that it itself is a function pointer, which
the arch initializes. This will allow later changes to move the
initialization of this function into individual machine support or timer
drivers. This is particularly useful for code in drivers/clocksource
which should rely on an arch-independant mechanism to register their
implementation of arch_gettimeoffset().

This patch also converts the Cris architecture to set arch_gettimeoffset
directly to the final implementation in time_init(), because Cris already
had separate time_init() functions per sub-architecture. M68K and ARM
are converted to set arch_gettimeoffset to the final implementation in
later patches, because they already have function pointers in place for
this purpose.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/kernel/time.c           |  6 +++++-
 arch/blackfin/kernel/time.c      |  6 +++++-
 arch/cris/arch-v10/kernel/time.c |  6 ++++--
 arch/cris/kernel/time.c          | 11 -----------
 arch/m32r/kernel/time.c          |  4 +++-
 arch/m68k/kernel/time.c          | 16 ++++++++++------
 include/linux/time.h             |  4 +---
 kernel/time/timekeeping.c        | 26 ++++++++++++++++++++------
 8 files changed, 48 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 09be0c3c9069..b0190b41cb33 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -70,7 +70,7 @@ EXPORT_SYMBOL(profile_pc);
 #endif
 
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-u32 arch_gettimeoffset(void)
+static u32 arm_gettimeoffset(void)
 {
 	if (system_timer->offset != NULL)
 		return system_timer->offset() * 1000;
@@ -164,6 +164,10 @@ device_initcall(timer_init_syscore_ops);
 
 void __init time_init(void)
 {
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+	arch_gettimeoffset = arm_gettimeoffset;
+#endif
+
 	system_timer = machine_desc->timer;
 	system_timer->init();
 	sched_clock_postinit();
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index 2310b249675f..3126b920a4a5 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -85,7 +85,7 @@ time_sched_init(irqreturn_t(*timer_routine) (int, void *))
 /*
  * Should return useconds since last timer tick
  */
-u32 arch_gettimeoffset(void)
+static u32 blackfin_gettimeoffset(void)
 {
 	unsigned long offset;
 	unsigned long clocks_per_jiffy;
@@ -141,6 +141,10 @@ void read_persistent_clock(struct timespec *ts)
 
 void __init time_init(void)
 {
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+	arch_gettimeoffset = blackfin_gettimeoffset;
+#endif
+
 #ifdef CONFIG_RTC_DRV_BFIN
 	/* [#2663] hack to filter junk RTC values that would cause
 	 * userspace to have to deal with time values greater than
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 162892f24a20..fce7c541d70d 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -55,9 +55,9 @@ unsigned long get_ns_in_jiffie(void)
 	return ns;
 }
 
-unsigned long do_slow_gettimeoffset(void)
+static u32 cris_v10_gettimeoffset(void)
 {
-	unsigned long count;
+	u32 count;
 
 	/* The timer interrupt comes from Etrax timer 0. In order to get
 	 * better precision, we check the current value. It might have
@@ -191,6 +191,8 @@ static struct irqaction irq2  = {
 void __init
 time_init(void)
 {	
+	arch_gettimeoffset = cris_v10_gettimeoffset;
+
 	/* probe for the RTC and read it if it exists 
 	 * Before the RTC can be probed the loops_per_usec variable needs 
 	 * to be initialized to make usleep work. A better value for 
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index b063c9217b5c..fe6acdabbc8d 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -39,17 +39,6 @@
 extern unsigned long loops_per_jiffy; /* init/main.c */
 unsigned long loops_per_usec;
 
-
-#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-extern unsigned long do_slow_gettimeoffset(void);
-static unsigned long (*do_gettimeoffset)(void) = do_slow_gettimeoffset;
-
-u32 arch_gettimeoffset(void)
-{
-	return do_gettimeoffset();
-}
-#endif
-
 int set_rtc_mmss(unsigned long nowtime)
 {
 	D(printk(KERN_DEBUG "set_rtc_mmss(%lu)\n", nowtime));
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index 84dd04048db9..1a15f81ea1bd 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -57,7 +57,7 @@ extern void smp_local_timer_interrupt(void);
 
 static unsigned long latch;
 
-u32 arch_gettimeoffset(void)
+static u32 m32r_gettimeoffset(void)
 {
 	unsigned long  elapsed_time = 0;  /* [us] */
 
@@ -165,6 +165,8 @@ void read_persistent_clock(struct timespec *ts)
 
 void __init time_init(void)
 {
+	arch_gettimeoffset = m32r_gettimeoffset;
+
 #if defined(CONFIG_CHIP_M32102) || defined(CONFIG_CHIP_XNUX2) \
 	|| defined(CONFIG_CHIP_VDEC2) || defined(CONFIG_CHIP_M32700) \
 	|| defined(CONFIG_CHIP_OPSP) || defined(CONFIG_CHIP_M32104)
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 5d0bcaad2e55..c2994c8c30d5 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -80,14 +80,9 @@ void read_persistent_clock(struct timespec *ts)
 	}
 }
 
-void __init time_init(void)
-{
-	mach_sched_init(timer_interrupt);
-}
-
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 
-u32 arch_gettimeoffset(void)
+static u32 m68k_gettimeoffset(void)
 {
 	return mach_gettimeoffset() * 1000;
 }
@@ -106,3 +101,12 @@ static int __init rtc_init(void)
 module_init(rtc_init);
 
 #endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */
+
+void __init time_init(void)
+{
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+	arch_gettimeoffset = m68k_gettimeoffset;
+#endif
+
+	mach_sched_init(timer_interrupt);
+}
diff --git a/include/linux/time.h b/include/linux/time.h
index 4d358e9d10f1..05e32a72103c 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -142,9 +142,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta);
  * finer then tick granular time.
  */
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-extern u32 arch_gettimeoffset(void);
-#else
-static inline u32 arch_gettimeoffset(void) { return 0; }
+extern u32 (*arch_gettimeoffset)(void);
 #endif
 
 extern void do_gettimeofday(struct timeval *tv);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cbc6acb0db3f..8ed934601587 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -135,6 +135,20 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 }
 
 /* Timekeeper helper functions. */
+
+#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
+u32 (*arch_gettimeoffset)(void);
+
+u32 get_arch_timeoffset(void)
+{
+	if (likely(arch_gettimeoffset))
+		return arch_gettimeoffset();
+	return 0;
+}
+#else
+static inline u32 get_arch_timeoffset(void) { return 0; }
+#endif
+
 static inline s64 timekeeping_get_ns(struct timekeeper *tk)
 {
 	cycle_t cycle_now, cycle_delta;
@@ -151,8 +165,8 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk)
 	nsec = cycle_delta * tk->mult + tk->xtime_nsec;
 	nsec >>= tk->shift;
 
-	/* If arch requires, add in gettimeoffset() */
-	return nsec + arch_gettimeoffset();
+	/* If arch requires, add in get_arch_timeoffset() */
+	return nsec + get_arch_timeoffset();
 }
 
 static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
@@ -171,8 +185,8 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 	/* convert delta to nanoseconds. */
 	nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
 
-	/* If arch requires, add in gettimeoffset() */
-	return nsec + arch_gettimeoffset();
+	/* If arch requires, add in get_arch_timeoffset() */
+	return nsec + get_arch_timeoffset();
 }
 
 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
@@ -254,8 +268,8 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 
 	tk->xtime_nsec += cycle_delta * tk->mult;
 
-	/* If arch requires, add in gettimeoffset() */
-	tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
+	/* If arch requires, add in get_arch_timeoffset() */
+	tk->xtime_nsec += (u64)get_arch_timeoffset() << tk->shift;
 
 	tk_normalize_xtime(tk);
 
-- 
cgit v1.2.3


From 6bb27d7349db51b50c40534710fe164ca0d58902 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Thu, 8 Nov 2012 12:40:59 -0700
Subject: ARM: delete struct sys_timer

Now that the only field in struct sys_timer is .init, delete the struct,
and replace the machine descriptor .timer field with the initialization
function itself.

This will enable moving timer drivers into drivers/clocksource without
having to place a public prototype of each struct sys_timer object into
include/linux; the intent is to create a single of_clocksource_init()
function that determines which timer driver to initialize by scanning
the device dtree, much like the proposed irqchip_init() at:
http://www.spinics.net/lists/arm-kernel/msg203686.html

Includes mach-omap2 fixes from Igor Grinberg.

Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/include/asm/mach/arch.h               |  3 +--
 arch/arm/include/asm/mach/time.h               | 16 ----------------
 arch/arm/kernel/time.c                         |  9 +--------
 arch/arm/mach-at91/at91rm9200_time.c           |  5 -----
 arch/arm/mach-at91/at91sam926x_time.c          |  6 +-----
 arch/arm/mach-at91/at91x40_time.c              |  5 -----
 arch/arm/mach-at91/board-1arm.c                |  2 +-
 arch/arm/mach-at91/board-afeb-9260v1.c         |  2 +-
 arch/arm/mach-at91/board-cam60.c               |  2 +-
 arch/arm/mach-at91/board-carmeva.c             |  2 +-
 arch/arm/mach-at91/board-cpu9krea.c            |  2 +-
 arch/arm/mach-at91/board-cpuat91.c             |  2 +-
 arch/arm/mach-at91/board-csb337.c              |  2 +-
 arch/arm/mach-at91/board-csb637.c              |  2 +-
 arch/arm/mach-at91/board-dt.c                  |  2 +-
 arch/arm/mach-at91/board-eb01.c                |  2 +-
 arch/arm/mach-at91/board-eb9200.c              |  2 +-
 arch/arm/mach-at91/board-ecbat91.c             |  2 +-
 arch/arm/mach-at91/board-eco920.c              |  2 +-
 arch/arm/mach-at91/board-flexibity.c           |  2 +-
 arch/arm/mach-at91/board-foxg20.c              |  2 +-
 arch/arm/mach-at91/board-gsia18s.c             |  2 +-
 arch/arm/mach-at91/board-kafa.c                |  2 +-
 arch/arm/mach-at91/board-kb9202.c              |  2 +-
 arch/arm/mach-at91/board-neocore926.c          |  2 +-
 arch/arm/mach-at91/board-pcontrol-g20.c        |  2 +-
 arch/arm/mach-at91/board-picotux200.c          |  2 +-
 arch/arm/mach-at91/board-qil-a9260.c           |  2 +-
 arch/arm/mach-at91/board-rm9200dk.c            |  2 +-
 arch/arm/mach-at91/board-rm9200ek.c            |  2 +-
 arch/arm/mach-at91/board-rsi-ews.c             |  2 +-
 arch/arm/mach-at91/board-sam9-l9260.c          |  2 +-
 arch/arm/mach-at91/board-sam9260ek.c           |  2 +-
 arch/arm/mach-at91/board-sam9261ek.c           |  2 +-
 arch/arm/mach-at91/board-sam9263ek.c           |  2 +-
 arch/arm/mach-at91/board-sam9g20ek.c           |  4 ++--
 arch/arm/mach-at91/board-sam9m10g45ek.c        |  2 +-
 arch/arm/mach-at91/board-sam9rlek.c            |  2 +-
 arch/arm/mach-at91/board-snapper9260.c         |  2 +-
 arch/arm/mach-at91/board-stamp9g20.c           |  4 ++--
 arch/arm/mach-at91/board-usb-a926x.c           |  6 +++---
 arch/arm/mach-at91/board-yl-9200.c             |  2 +-
 arch/arm/mach-at91/generic.h                   |  7 +++----
 arch/arm/mach-bcm/board_bcm.c                  |  6 +-----
 arch/arm/mach-bcm2835/bcm2835.c                |  2 +-
 arch/arm/mach-clps711x/board-autcpu12.c        |  2 +-
 arch/arm/mach-clps711x/board-cdb89712.c        |  2 +-
 arch/arm/mach-clps711x/board-clep7312.c        |  2 +-
 arch/arm/mach-clps711x/board-edb7211.c         |  2 +-
 arch/arm/mach-clps711x/board-fortunet.c        |  2 +-
 arch/arm/mach-clps711x/board-p720t.c           |  2 +-
 arch/arm/mach-clps711x/common.c                |  6 +-----
 arch/arm/mach-clps711x/common.h                |  4 +---
 arch/arm/mach-cns3xxx/cns3420vb.c              |  2 +-
 arch/arm/mach-cns3xxx/core.c                   |  6 +-----
 arch/arm/mach-cns3xxx/core.h                   |  2 +-
 arch/arm/mach-davinci/board-da830-evm.c        |  2 +-
 arch/arm/mach-davinci/board-da850-evm.c        |  2 +-
 arch/arm/mach-davinci/board-dm355-evm.c        |  2 +-
 arch/arm/mach-davinci/board-dm355-leopard.c    |  2 +-
 arch/arm/mach-davinci/board-dm365-evm.c        |  2 +-
 arch/arm/mach-davinci/board-dm644x-evm.c       |  2 +-
 arch/arm/mach-davinci/board-dm646x-evm.c       |  4 ++--
 arch/arm/mach-davinci/board-mityomapl138.c     |  2 +-
 arch/arm/mach-davinci/board-neuros-osd2.c      |  2 +-
 arch/arm/mach-davinci/board-omapl138-hawk.c    |  2 +-
 arch/arm/mach-davinci/board-sffsdr.c           |  2 +-
 arch/arm/mach-davinci/board-tnetv107x-evm.c    |  2 +-
 arch/arm/mach-davinci/da8xx-dt.c               |  2 +-
 arch/arm/mach-davinci/include/mach/common.h    |  4 +---
 arch/arm/mach-davinci/time.c                   |  7 +------
 arch/arm/mach-dove/cm-a510.c                   |  2 +-
 arch/arm/mach-dove/common.c                    |  8 ++------
 arch/arm/mach-dove/common.h                    |  2 +-
 arch/arm/mach-dove/dove-db-setup.c             |  2 +-
 arch/arm/mach-ebsa110/core.c                   |  8 ++------
 arch/arm/mach-ep93xx/adssphere.c               |  2 +-
 arch/arm/mach-ep93xx/core.c                    |  6 +-----
 arch/arm/mach-ep93xx/edb93xx.c                 | 16 ++++++++--------
 arch/arm/mach-ep93xx/gesbc9312.c               |  2 +-
 arch/arm/mach-ep93xx/include/mach/platform.h   |  2 +-
 arch/arm/mach-ep93xx/micro9.c                  |  8 ++++----
 arch/arm/mach-ep93xx/simone.c                  |  2 +-
 arch/arm/mach-ep93xx/snappercl15.c             |  2 +-
 arch/arm/mach-ep93xx/ts72xx.c                  |  2 +-
 arch/arm/mach-ep93xx/vision_ep9307.c           |  2 +-
 arch/arm/mach-exynos/common.h                  |  2 +-
 arch/arm/mach-exynos/mach-armlex4210.c         |  2 +-
 arch/arm/mach-exynos/mach-exynos4-dt.c         |  2 +-
 arch/arm/mach-exynos/mach-exynos5-dt.c         |  2 +-
 arch/arm/mach-exynos/mach-nuri.c               |  2 +-
 arch/arm/mach-exynos/mach-origen.c             |  2 +-
 arch/arm/mach-exynos/mach-smdk4x12.c           |  4 ++--
 arch/arm/mach-exynos/mach-smdkv310.c           |  4 ++--
 arch/arm/mach-exynos/mach-universal_c210.c     |  2 +-
 arch/arm/mach-exynos/mct.c                     |  6 +-----
 arch/arm/mach-footbridge/cats-hw.c             |  2 +-
 arch/arm/mach-footbridge/common.h              |  4 ++--
 arch/arm/mach-footbridge/dc21285-timer.c       |  6 +-----
 arch/arm/mach-footbridge/ebsa285.c             |  2 +-
 arch/arm/mach-footbridge/isa-timer.c           |  6 +-----
 arch/arm/mach-footbridge/netwinder-hw.c        |  2 +-
 arch/arm/mach-footbridge/personal.c            |  2 +-
 arch/arm/mach-gemini/board-nas4220b.c          |  6 +-----
 arch/arm/mach-gemini/board-rut1xx.c            |  6 +-----
 arch/arm/mach-gemini/board-wbd111.c            |  6 +-----
 arch/arm/mach-gemini/board-wbd222.c            |  6 +-----
 arch/arm/mach-h720x/common.h                   |  4 ++--
 arch/arm/mach-h720x/cpu-h7201.c                |  6 +-----
 arch/arm/mach-h720x/cpu-h7202.c                |  6 +-----
 arch/arm/mach-h720x/h7201-eval.c               |  2 +-
 arch/arm/mach-h720x/h7202-eval.c               |  2 +-
 arch/arm/mach-highbank/highbank.c              |  6 +-----
 arch/arm/mach-imx/imx25-dt.c                   | 11 +----------
 arch/arm/mach-imx/imx27-dt.c                   | 16 ++++++----------
 arch/arm/mach-imx/imx31-dt.c                   | 11 +----------
 arch/arm/mach-imx/imx51-dt.c                   | 16 ++++++----------
 arch/arm/mach-imx/mach-apf9328.c               |  6 +-----
 arch/arm/mach-imx/mach-armadillo5x0.c          |  6 +-----
 arch/arm/mach-imx/mach-bug.c                   |  6 +-----
 arch/arm/mach-imx/mach-cpuimx27.c              |  6 +-----
 arch/arm/mach-imx/mach-cpuimx35.c              |  6 +-----
 arch/arm/mach-imx/mach-cpuimx51sd.c            |  6 +-----
 arch/arm/mach-imx/mach-eukrea_cpuimx25.c       |  6 +-----
 arch/arm/mach-imx/mach-imx27_visstrim_m10.c    |  6 +-----
 arch/arm/mach-imx/mach-imx27ipcam.c            |  6 +-----
 arch/arm/mach-imx/mach-imx27lite.c             |  6 +-----
 arch/arm/mach-imx/mach-imx53.c                 | 16 ++++++----------
 arch/arm/mach-imx/mach-imx6q.c                 |  6 +-----
 arch/arm/mach-imx/mach-kzm_arm11_01.c          |  6 +-----
 arch/arm/mach-imx/mach-mx1ads.c                |  8 ++------
 arch/arm/mach-imx/mach-mx21ads.c               |  6 +-----
 arch/arm/mach-imx/mach-mx25_3ds.c              |  6 +-----
 arch/arm/mach-imx/mach-mx27_3ds.c              |  6 +-----
 arch/arm/mach-imx/mach-mx27ads.c               |  6 +-----
 arch/arm/mach-imx/mach-mx31_3ds.c              |  6 +-----
 arch/arm/mach-imx/mach-mx31ads.c               |  6 +-----
 arch/arm/mach-imx/mach-mx31lilly.c             |  6 +-----
 arch/arm/mach-imx/mach-mx31lite.c              |  6 +-----
 arch/arm/mach-imx/mach-mx31moboard.c           |  6 +-----
 arch/arm/mach-imx/mach-mx35_3ds.c              |  6 +-----
 arch/arm/mach-imx/mach-mx50_rdp.c              |  6 +-----
 arch/arm/mach-imx/mach-mx51_3ds.c              |  6 +-----
 arch/arm/mach-imx/mach-mx51_babbage.c          |  6 +-----
 arch/arm/mach-imx/mach-mxt_td60.c              |  6 +-----
 arch/arm/mach-imx/mach-pca100.c                |  6 +-----
 arch/arm/mach-imx/mach-pcm037.c                |  6 +-----
 arch/arm/mach-imx/mach-pcm038.c                |  6 +-----
 arch/arm/mach-imx/mach-pcm043.c                |  6 +-----
 arch/arm/mach-imx/mach-qong.c                  |  6 +-----
 arch/arm/mach-imx/mach-scb9328.c               |  6 +-----
 arch/arm/mach-imx/mach-vpr200.c                |  6 +-----
 arch/arm/mach-integrator/integrator_ap.c       | 16 ++++------------
 arch/arm/mach-integrator/integrator_cp.c       | 16 ++++------------
 arch/arm/mach-iop13xx/iq81340mc.c              |  6 +-----
 arch/arm/mach-iop13xx/iq81340sc.c              |  6 +-----
 arch/arm/mach-iop32x/em7210.c                  |  6 +-----
 arch/arm/mach-iop32x/glantank.c                |  6 +-----
 arch/arm/mach-iop32x/iq31244.c                 |  8 ++------
 arch/arm/mach-iop32x/iq80321.c                 |  6 +-----
 arch/arm/mach-iop32x/n2100.c                   |  6 +-----
 arch/arm/mach-iop33x/iq80331.c                 |  6 +-----
 arch/arm/mach-iop33x/iq80332.c                 |  6 +-----
 arch/arm/mach-ixp4xx/avila-setup.c             |  4 ++--
 arch/arm/mach-ixp4xx/common.c                  |  4 ----
 arch/arm/mach-ixp4xx/coyote-setup.c            |  4 ++--
 arch/arm/mach-ixp4xx/dsmg600-setup.c           |  6 +-----
 arch/arm/mach-ixp4xx/fsg-setup.c               |  2 +-
 arch/arm/mach-ixp4xx/gateway7001-setup.c       |  2 +-
 arch/arm/mach-ixp4xx/goramo_mlr.c              |  2 +-
 arch/arm/mach-ixp4xx/gtwx5715-setup.c          |  2 +-
 arch/arm/mach-ixp4xx/include/mach/platform.h   |  3 ---
 arch/arm/mach-ixp4xx/ixdp425-setup.c           |  8 ++++----
 arch/arm/mach-ixp4xx/nas100d-setup.c           |  2 +-
 arch/arm/mach-ixp4xx/nslu2-setup.c             |  6 +-----
 arch/arm/mach-ixp4xx/omixp-setup.c             |  6 +++---
 arch/arm/mach-ixp4xx/vulcan-setup.c            |  2 +-
 arch/arm/mach-ixp4xx/wg302v2-setup.c           |  2 +-
 arch/arm/mach-kirkwood/board-dt.c              |  2 +-
 arch/arm/mach-kirkwood/common.c                |  6 +-----
 arch/arm/mach-kirkwood/common.h                |  2 +-
 arch/arm/mach-kirkwood/d2net_v2-setup.c        |  2 +-
 arch/arm/mach-kirkwood/db88f6281-bp-setup.c    |  2 +-
 arch/arm/mach-kirkwood/dockstar-setup.c        |  2 +-
 arch/arm/mach-kirkwood/guruplug-setup.c        |  2 +-
 arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c |  2 +-
 arch/arm/mach-kirkwood/netspace_v2-setup.c     |  6 +++---
 arch/arm/mach-kirkwood/netxbig_v2-setup.c      |  4 ++--
 arch/arm/mach-kirkwood/openrd-setup.c          |  6 +++---
 arch/arm/mach-kirkwood/rd88f6192-nas-setup.c   |  2 +-
 arch/arm/mach-kirkwood/rd88f6281-setup.c       |  2 +-
 arch/arm/mach-kirkwood/sheevaplug-setup.c      |  4 ++--
 arch/arm/mach-kirkwood/t5325-setup.c           |  2 +-
 arch/arm/mach-kirkwood/ts219-setup.c           |  2 +-
 arch/arm/mach-kirkwood/ts41x-setup.c           |  2 +-
 arch/arm/mach-ks8695/board-acs5k.c             |  2 +-
 arch/arm/mach-ks8695/board-dsm320.c            |  2 +-
 arch/arm/mach-ks8695/board-micrel.c            |  2 +-
 arch/arm/mach-ks8695/board-og.c                | 10 +++++-----
 arch/arm/mach-ks8695/board-sg.c                |  6 +++---
 arch/arm/mach-ks8695/generic.h                 |  2 +-
 arch/arm/mach-ks8695/time.c                    |  6 +-----
 arch/arm/mach-lpc32xx/common.h                 |  2 +-
 arch/arm/mach-lpc32xx/phy3250.c                |  2 +-
 arch/arm/mach-lpc32xx/timer.c                  |  7 +------
 arch/arm/mach-mmp/aspenite.c                   |  4 ++--
 arch/arm/mach-mmp/avengers_lite.c              |  2 +-
 arch/arm/mach-mmp/brownstone.c                 |  2 +-
 arch/arm/mach-mmp/common.h                     |  2 --
 arch/arm/mach-mmp/flint.c                      |  2 +-
 arch/arm/mach-mmp/gplugd.c                     |  2 +-
 arch/arm/mach-mmp/include/mach/mmp2.h          |  4 +---
 arch/arm/mach-mmp/include/mach/pxa168.h        |  4 +---
 arch/arm/mach-mmp/include/mach/pxa910.h        |  4 +---
 arch/arm/mach-mmp/jasper.c                     |  2 +-
 arch/arm/mach-mmp/mmp-dt.c                     |  8 ++------
 arch/arm/mach-mmp/mmp2-dt.c                    |  6 +-----
 arch/arm/mach-mmp/mmp2.c                       |  6 +-----
 arch/arm/mach-mmp/pxa168.c                     |  6 +-----
 arch/arm/mach-mmp/pxa910.c                     |  6 +-----
 arch/arm/mach-mmp/tavorevb.c                   |  2 +-
 arch/arm/mach-mmp/teton_bga.c                  |  2 +-
 arch/arm/mach-mmp/ttc_dkb.c                    |  2 +-
 arch/arm/mach-msm/board-dt-8660.c              |  2 +-
 arch/arm/mach-msm/board-dt-8960.c              |  2 +-
 arch/arm/mach-msm/board-halibut.c              |  2 +-
 arch/arm/mach-msm/board-mahimahi.c             |  4 ++--
 arch/arm/mach-msm/board-msm7x30.c              |  6 +++---
 arch/arm/mach-msm/board-qsd8x50.c              |  4 ++--
 arch/arm/mach-msm/board-sapphire.c             |  4 ++--
 arch/arm/mach-msm/board-trout.c                |  2 +-
 arch/arm/mach-msm/common.h                     |  8 ++++----
 arch/arm/mach-msm/timer.c                      | 24 ++++--------------------
 arch/arm/mach-mv78xx0/buffalo-wxl-setup.c      |  2 +-
 arch/arm/mach-mv78xx0/common.c                 |  6 +-----
 arch/arm/mach-mv78xx0/common.h                 |  2 +-
 arch/arm/mach-mv78xx0/db78x00-bp-setup.c       |  2 +-
 arch/arm/mach-mv78xx0/rd78x00-masa-setup.c     |  2 +-
 arch/arm/mach-mvebu/armada-370-xp.c            |  6 +-----
 arch/arm/mach-mxs/mach-mxs.c                   | 12 ++----------
 arch/arm/mach-netx/generic.h                   |  3 +--
 arch/arm/mach-netx/nxdb500.c                   |  2 +-
 arch/arm/mach-netx/nxdkn.c                     |  2 +-
 arch/arm/mach-netx/nxeb500hmi.c                |  2 +-
 arch/arm/mach-netx/time.c                      |  6 +-----
 arch/arm/mach-nomadik/board-nhk8815.c          |  6 +-----
 arch/arm/mach-omap1/board-ams-delta.c          |  2 +-
 arch/arm/mach-omap1/board-fsample.c            |  2 +-
 arch/arm/mach-omap1/board-generic.c            |  2 +-
 arch/arm/mach-omap1/board-h2.c                 |  2 +-
 arch/arm/mach-omap1/board-h3.c                 |  2 +-
 arch/arm/mach-omap1/board-htcherald.c          |  2 +-
 arch/arm/mach-omap1/board-innovator.c          |  2 +-
 arch/arm/mach-omap1/board-nokia770.c           |  2 +-
 arch/arm/mach-omap1/board-osk.c                |  2 +-
 arch/arm/mach-omap1/board-palmte.c             |  2 +-
 arch/arm/mach-omap1/board-palmtt.c             |  2 +-
 arch/arm/mach-omap1/board-palmz71.c            |  2 +-
 arch/arm/mach-omap1/board-perseus2.c           |  2 +-
 arch/arm/mach-omap1/board-sx1.c                |  2 +-
 arch/arm/mach-omap1/board-voiceblue.c          |  2 +-
 arch/arm/mach-omap1/common.h                   |  2 +-
 arch/arm/mach-omap1/time.c                     |  6 +-----
 arch/arm/mach-omap2/board-2430sdp.c            |  2 +-
 arch/arm/mach-omap2/board-3430sdp.c            |  2 +-
 arch/arm/mach-omap2/board-3630sdp.c            |  2 +-
 arch/arm/mach-omap2/board-4430sdp.c            |  2 +-
 arch/arm/mach-omap2/board-am3517crane.c        |  2 +-
 arch/arm/mach-omap2/board-am3517evm.c          |  2 +-
 arch/arm/mach-omap2/board-apollon.c            |  2 +-
 arch/arm/mach-omap2/board-cm-t35.c             |  4 ++--
 arch/arm/mach-omap2/board-cm-t3517.c           |  2 +-
 arch/arm/mach-omap2/board-devkit8000.c         |  2 +-
 arch/arm/mach-omap2/board-generic.c            | 14 +++++++-------
 arch/arm/mach-omap2/board-h4.c                 |  2 +-
 arch/arm/mach-omap2/board-igep0020.c           |  4 ++--
 arch/arm/mach-omap2/board-ldp.c                |  2 +-
 arch/arm/mach-omap2/board-n8x0.c               |  6 +++---
 arch/arm/mach-omap2/board-omap3beagle.c        |  2 +-
 arch/arm/mach-omap2/board-omap3evm.c           |  2 +-
 arch/arm/mach-omap2/board-omap3logic.c         |  4 ++--
 arch/arm/mach-omap2/board-omap3pandora.c       |  2 +-
 arch/arm/mach-omap2/board-omap3stalker.c       |  2 +-
 arch/arm/mach-omap2/board-omap3touchbook.c     |  2 +-
 arch/arm/mach-omap2/board-omap4panda.c         |  2 +-
 arch/arm/mach-omap2/board-overo.c              |  2 +-
 arch/arm/mach-omap2/board-rm680.c              |  4 ++--
 arch/arm/mach-omap2/board-rx51.c               |  2 +-
 arch/arm/mach-omap2/board-ti8168evm.c          |  4 ++--
 arch/arm/mach-omap2/board-zoom.c               |  4 ++--
 arch/arm/mach-omap2/common.h                   | 14 +++++++-------
 arch/arm/mach-omap2/timer.c                    | 22 +++++-----------------
 arch/arm/mach-orion5x/board-dt.c               |  2 +-
 arch/arm/mach-orion5x/common.c                 |  6 +-----
 arch/arm/mach-orion5x/common.h                 |  2 +-
 arch/arm/mach-orion5x/d2net-setup.c            |  4 ++--
 arch/arm/mach-orion5x/db88f5281-setup.c        |  2 +-
 arch/arm/mach-orion5x/dns323-setup.c           |  2 +-
 arch/arm/mach-orion5x/kurobox_pro-setup.c      |  4 ++--
 arch/arm/mach-orion5x/ls-chl-setup.c           |  2 +-
 arch/arm/mach-orion5x/ls_hgl-setup.c           |  2 +-
 arch/arm/mach-orion5x/lsmini-setup.c           |  2 +-
 arch/arm/mach-orion5x/mss2-setup.c             |  2 +-
 arch/arm/mach-orion5x/mv2120-setup.c           |  2 +-
 arch/arm/mach-orion5x/net2big-setup.c          |  2 +-
 arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c   |  2 +-
 arch/arm/mach-orion5x/rd88f5181l-ge-setup.c    |  2 +-
 arch/arm/mach-orion5x/rd88f5182-setup.c        |  2 +-
 arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c   |  2 +-
 arch/arm/mach-orion5x/terastation_pro2-setup.c |  2 +-
 arch/arm/mach-orion5x/ts209-setup.c            |  2 +-
 arch/arm/mach-orion5x/ts409-setup.c            |  2 +-
 arch/arm/mach-orion5x/ts78xx-setup.c           |  2 +-
 arch/arm/mach-orion5x/wnr854t-setup.c          |  2 +-
 arch/arm/mach-orion5x/wrt350n-v2-setup.c       |  2 +-
 arch/arm/mach-picoxcell/common.c               |  2 +-
 arch/arm/mach-picoxcell/common.h               |  2 +-
 arch/arm/mach-prima2/common.c                  |  2 +-
 arch/arm/mach-prima2/common.h                  |  2 +-
 arch/arm/mach-prima2/timer.c                   |  8 ++------
 arch/arm/mach-pxa/balloon3.c                   |  2 +-
 arch/arm/mach-pxa/capc7117.c                   |  2 +-
 arch/arm/mach-pxa/cm-x2xx.c                    |  2 +-
 arch/arm/mach-pxa/cm-x300.c                    |  2 +-
 arch/arm/mach-pxa/colibri-pxa270.c             |  4 ++--
 arch/arm/mach-pxa/colibri-pxa300.c             |  2 +-
 arch/arm/mach-pxa/colibri-pxa320.c             |  2 +-
 arch/arm/mach-pxa/corgi.c                      |  6 +++---
 arch/arm/mach-pxa/csb726.c                     |  2 +-
 arch/arm/mach-pxa/em-x270.c                    |  4 ++--
 arch/arm/mach-pxa/eseries.c                    | 12 ++++++------
 arch/arm/mach-pxa/ezx.c                        | 12 ++++++------
 arch/arm/mach-pxa/generic.h                    |  3 +--
 arch/arm/mach-pxa/gumstix.c                    |  2 +-
 arch/arm/mach-pxa/h5000.c                      |  2 +-
 arch/arm/mach-pxa/himalaya.c                   |  2 +-
 arch/arm/mach-pxa/hx4700.c                     |  2 +-
 arch/arm/mach-pxa/icontrol.c                   |  2 +-
 arch/arm/mach-pxa/idp.c                        |  2 +-
 arch/arm/mach-pxa/littleton.c                  |  2 +-
 arch/arm/mach-pxa/lpd270.c                     |  2 +-
 arch/arm/mach-pxa/lubbock.c                    |  2 +-
 arch/arm/mach-pxa/magician.c                   |  2 +-
 arch/arm/mach-pxa/mainstone.c                  |  2 +-
 arch/arm/mach-pxa/mioa701.c                    |  2 +-
 arch/arm/mach-pxa/mp900.c                      |  2 +-
 arch/arm/mach-pxa/palmld.c                     |  2 +-
 arch/arm/mach-pxa/palmt5.c                     |  2 +-
 arch/arm/mach-pxa/palmtc.c                     |  2 +-
 arch/arm/mach-pxa/palmte2.c                    |  2 +-
 arch/arm/mach-pxa/palmtreo.c                   |  4 ++--
 arch/arm/mach-pxa/palmtx.c                     |  2 +-
 arch/arm/mach-pxa/palmz72.c                    |  2 +-
 arch/arm/mach-pxa/pcm027.c                     |  2 +-
 arch/arm/mach-pxa/poodle.c                     |  2 +-
 arch/arm/mach-pxa/pxa-dt.c                     |  2 +-
 arch/arm/mach-pxa/raumfeld.c                   |  6 +++---
 arch/arm/mach-pxa/saar.c                       |  2 +-
 arch/arm/mach-pxa/spitz.c                      |  6 +++---
 arch/arm/mach-pxa/stargate2.c                  |  4 ++--
 arch/arm/mach-pxa/tavorevb.c                   |  2 +-
 arch/arm/mach-pxa/time.c                       |  6 +-----
 arch/arm/mach-pxa/tosa.c                       |  2 +-
 arch/arm/mach-pxa/trizeps4.c                   |  4 ++--
 arch/arm/mach-pxa/viper.c                      |  2 +-
 arch/arm/mach-pxa/vpac270.c                    |  2 +-
 arch/arm/mach-pxa/xcep.c                       |  2 +-
 arch/arm/mach-pxa/z2.c                         |  2 +-
 arch/arm/mach-pxa/zeus.c                       |  2 +-
 arch/arm/mach-pxa/zylonite.c                   |  2 +-
 arch/arm/mach-realview/realview_eb.c           |  6 +-----
 arch/arm/mach-realview/realview_pb1176.c       |  6 +-----
 arch/arm/mach-realview/realview_pb11mp.c       |  6 +-----
 arch/arm/mach-realview/realview_pba8.c         |  6 +-----
 arch/arm/mach-realview/realview_pbx.c          |  6 +-----
 arch/arm/mach-rpc/riscpc.c                     |  4 ++--
 arch/arm/mach-rpc/time.c                       |  7 +------
 arch/arm/mach-s3c24xx/mach-amlm5900.c          |  2 +-
 arch/arm/mach-s3c24xx/mach-anubis.c            |  2 +-
 arch/arm/mach-s3c24xx/mach-at2440evb.c         |  2 +-
 arch/arm/mach-s3c24xx/mach-bast.c              |  2 +-
 arch/arm/mach-s3c24xx/mach-gta02.c             |  2 +-
 arch/arm/mach-s3c24xx/mach-h1940.c             |  2 +-
 arch/arm/mach-s3c24xx/mach-jive.c              |  2 +-
 arch/arm/mach-s3c24xx/mach-mini2440.c          |  2 +-
 arch/arm/mach-s3c24xx/mach-n30.c               |  4 ++--
 arch/arm/mach-s3c24xx/mach-nexcoder.c          |  2 +-
 arch/arm/mach-s3c24xx/mach-osiris.c            |  2 +-
 arch/arm/mach-s3c24xx/mach-otom.c              |  2 +-
 arch/arm/mach-s3c24xx/mach-qt2410.c            |  2 +-
 arch/arm/mach-s3c24xx/mach-rx1950.c            |  2 +-
 arch/arm/mach-s3c24xx/mach-rx3715.c            |  2 +-
 arch/arm/mach-s3c24xx/mach-smdk2410.c          |  2 +-
 arch/arm/mach-s3c24xx/mach-smdk2413.c          |  6 +++---
 arch/arm/mach-s3c24xx/mach-smdk2416.c          |  2 +-
 arch/arm/mach-s3c24xx/mach-smdk2440.c          |  2 +-
 arch/arm/mach-s3c24xx/mach-smdk2443.c          |  2 +-
 arch/arm/mach-s3c24xx/mach-tct_hammer.c        |  2 +-
 arch/arm/mach-s3c24xx/mach-vr1000.c            |  2 +-
 arch/arm/mach-s3c24xx/mach-vstms.c             |  2 +-
 arch/arm/mach-s3c64xx/mach-anw6410.c           |  2 +-
 arch/arm/mach-s3c64xx/mach-crag6410.c          |  2 +-
 arch/arm/mach-s3c64xx/mach-hmt.c               |  2 +-
 arch/arm/mach-s3c64xx/mach-mini6410.c          |  2 +-
 arch/arm/mach-s3c64xx/mach-ncp.c               |  2 +-
 arch/arm/mach-s3c64xx/mach-real6410.c          |  2 +-
 arch/arm/mach-s3c64xx/mach-smartq5.c           |  2 +-
 arch/arm/mach-s3c64xx/mach-smartq7.c           |  2 +-
 arch/arm/mach-s3c64xx/mach-smdk6400.c          |  2 +-
 arch/arm/mach-s3c64xx/mach-smdk6410.c          |  2 +-
 arch/arm/mach-s5p64x0/mach-smdk6440.c          |  2 +-
 arch/arm/mach-s5p64x0/mach-smdk6450.c          |  2 +-
 arch/arm/mach-s5pc100/mach-smdkc100.c          |  2 +-
 arch/arm/mach-s5pv210/mach-aquila.c            |  2 +-
 arch/arm/mach-s5pv210/mach-goni.c              |  2 +-
 arch/arm/mach-s5pv210/mach-smdkc110.c          |  2 +-
 arch/arm/mach-s5pv210/mach-smdkv210.c          |  2 +-
 arch/arm/mach-s5pv210/mach-torbreck.c          |  2 +-
 arch/arm/mach-sa1100/assabet.c                 |  2 +-
 arch/arm/mach-sa1100/badge4.c                  |  2 +-
 arch/arm/mach-sa1100/cerf.c                    |  2 +-
 arch/arm/mach-sa1100/collie.c                  |  2 +-
 arch/arm/mach-sa1100/generic.h                 |  4 +---
 arch/arm/mach-sa1100/h3100.c                   |  2 +-
 arch/arm/mach-sa1100/h3600.c                   |  2 +-
 arch/arm/mach-sa1100/hackkit.c                 |  2 +-
 arch/arm/mach-sa1100/jornada720.c              |  2 +-
 arch/arm/mach-sa1100/lart.c                    |  2 +-
 arch/arm/mach-sa1100/nanoengine.c              |  2 +-
 arch/arm/mach-sa1100/pleb.c                    |  2 +-
 arch/arm/mach-sa1100/shannon.c                 |  2 +-
 arch/arm/mach-sa1100/simpad.c                  |  2 +-
 arch/arm/mach-sa1100/time.c                    |  6 +-----
 arch/arm/mach-shark/core.c                     |  6 +-----
 arch/arm/mach-shmobile/board-ag5evm.c          |  2 +-
 arch/arm/mach-shmobile/board-ap4evb.c          |  2 +-
 arch/arm/mach-shmobile/board-armadillo800eva.c |  5 +----
 arch/arm/mach-shmobile/board-bonito.c          |  5 +----
 arch/arm/mach-shmobile/board-kota2.c           |  2 +-
 arch/arm/mach-shmobile/board-kzm9d.c           |  2 +-
 arch/arm/mach-shmobile/board-kzm9g.c           |  2 +-
 arch/arm/mach-shmobile/board-mackerel.c        |  2 +-
 arch/arm/mach-shmobile/board-marzen.c          |  2 +-
 arch/arm/mach-shmobile/include/mach/common.h   |  5 ++++-
 arch/arm/mach-shmobile/setup-emev2.c           |  2 +-
 arch/arm/mach-shmobile/setup-r8a7740.c         | 11 +----------
 arch/arm/mach-shmobile/setup-r8a7779.c         |  5 +----
 arch/arm/mach-shmobile/setup-sh7372.c          |  7 ++-----
 arch/arm/mach-shmobile/setup-sh73a0.c          |  5 +----
 arch/arm/mach-shmobile/timer.c                 |  6 +-----
 arch/arm/mach-socfpga/socfpga.c                |  2 +-
 arch/arm/mach-spear13xx/include/mach/generic.h |  2 +-
 arch/arm/mach-spear13xx/spear1310.c            |  2 +-
 arch/arm/mach-spear13xx/spear1340.c            |  2 +-
 arch/arm/mach-spear13xx/spear13xx.c            |  6 +-----
 arch/arm/mach-spear3xx/include/mach/generic.h  |  2 +-
 arch/arm/mach-spear3xx/spear300.c              |  2 +-
 arch/arm/mach-spear3xx/spear310.c              |  2 +-
 arch/arm/mach-spear3xx/spear320.c              |  2 +-
 arch/arm/mach-spear3xx/spear3xx.c              |  6 +-----
 arch/arm/mach-spear6xx/spear6xx.c              |  8 ++------
 arch/arm/mach-tegra/board-dt-tegra20.c         |  2 +-
 arch/arm/mach-tegra/board-dt-tegra30.c         |  2 +-
 arch/arm/mach-tegra/board.h                    |  2 +-
 arch/arm/mach-tegra/timer.c                    |  6 +-----
 arch/arm/mach-u300/core.c                      |  2 +-
 arch/arm/mach-u300/timer.c                     | 10 +---------
 arch/arm/mach-u300/timer.h                     |  2 +-
 arch/arm/mach-ux500/board-mop500.c             |  8 ++++----
 arch/arm/mach-ux500/cpu-db8500.c               |  2 +-
 arch/arm/mach-ux500/include/mach/setup.h       |  3 +--
 arch/arm/mach-ux500/timer.c                    |  6 +-----
 arch/arm/mach-versatile/core.c                 |  7 +------
 arch/arm/mach-versatile/core.h                 |  2 +-
 arch/arm/mach-versatile/versatile_ab.c         |  2 +-
 arch/arm/mach-versatile/versatile_dt.c         |  2 +-
 arch/arm/mach-versatile/versatile_pb.c         |  2 +-
 arch/arm/mach-vexpress/v2m.c                   | 12 ++----------
 arch/arm/mach-vt8500/vt8500.c                  |  6 +-----
 arch/arm/mach-w90x900/mach-nuc910evb.c         |  2 +-
 arch/arm/mach-w90x900/mach-nuc950evb.c         |  2 +-
 arch/arm/mach-w90x900/mach-nuc960evb.c         |  2 +-
 arch/arm/mach-w90x900/nuc9xx.h                 |  3 +--
 arch/arm/mach-w90x900/time.c                   |  6 +-----
 arch/arm/mach-zynq/common.c                    |  9 +--------
 arch/arm/plat-samsung/include/plat/cpu.h       |  3 +--
 arch/arm/plat-samsung/include/plat/s5p-time.h  |  2 +-
 arch/arm/plat-samsung/s5p-time.c               |  6 +-----
 arch/arm/plat-samsung/time.c                   |  6 +-----
 drivers/clocksource/bcm2835_timer.c            |  6 +-----
 drivers/clocksource/dw_apb_timer_of.c          |  6 +-----
 drivers/clocksource/sunxi_timer.c              |  6 +-----
 include/linux/bcm2835_timer.h                  |  2 +-
 include/linux/dw_apb_timer.h                   |  2 +-
 include/linux/sunxi_timer.h                    |  2 +-
 495 files changed, 629 insertions(+), 1224 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 917d4fcfd9b4..308ad7d6f98b 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -12,7 +12,6 @@
 
 struct tag;
 struct meminfo;
-struct sys_timer;
 struct pt_regs;
 struct smp_operations;
 #ifdef CONFIG_SMP
@@ -48,7 +47,7 @@ struct machine_desc {
 	void			(*map_io)(void);/* IO mapping function	*/
 	void			(*init_early)(void);
 	void			(*init_irq)(void);
-	struct sys_timer	*timer;		/* system tick timer	*/
+	void			(*init_time)(void);
 	void			(*init_machine)(void);
 	void			(*init_late)(void);
 #ifdef CONFIG_MULTI_IRQ_HANDLER
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index d316d76ef242..90c12e1e695c 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -10,22 +10,6 @@
 #ifndef __ASM_ARM_MACH_TIME_H
 #define __ASM_ARM_MACH_TIME_H
 
-/*
- * This is our kernel timer structure.
- *
- * - init
- *   Initialise the kernels jiffy timer source, claim interrupt
- *   using setup_irq.  This is called early on during initialisation
- *   while interrupts are still disabled on the local CPU.
- * - offset
- *   Return the timer offset in microseconds since the last timer
- *   interrupt.  Note: this must take account of any unprocessed
- *   timer interrupt which may be pending.
- */
-struct sys_timer {
-	void			(*init)(void);
-};
-
 extern void timer_tick(void);
 
 struct timespec;
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 0b51a7c88157..955d92d265e5 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -30,11 +30,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 
-/*
- * Our system timer.
- */
-static struct sys_timer *system_timer;
-
 #if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) || \
     defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE)
 /* this needs a better home */
@@ -120,8 +115,6 @@ int __init register_persistent_clock(clock_access_fn read_boot,
 
 void __init time_init(void)
 {
-	system_timer = machine_desc->timer;
-	system_timer->init();
+	machine_desc->init_time();
 	sched_clock_postinit();
 }
-
diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c
index cafe98836c8a..180b3024bec3 100644
--- a/arch/arm/mach-at91/at91rm9200_time.c
+++ b/arch/arm/mach-at91/at91rm9200_time.c
@@ -274,8 +274,3 @@ void __init at91rm9200_timer_init(void)
 	/* register clocksource */
 	clocksource_register_hz(&clk32k, AT91_SLOW_CLOCK);
 }
-
-struct sys_timer at91rm9200_timer = {
-	.init		= at91rm9200_timer_init,
-};
-
diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c
index f7191e11df27..3a4bc2e1a65e 100644
--- a/arch/arm/mach-at91/at91sam926x_time.c
+++ b/arch/arm/mach-at91/at91sam926x_time.c
@@ -224,7 +224,7 @@ static int __init of_at91sam926x_pit_init(void)
 /*
  * Set up both clocksource and clockevent support.
  */
-static void __init at91sam926x_pit_init(void)
+void __init at91sam926x_pit_init(void)
 {
 	unsigned long	pit_rate;
 	unsigned	bits;
@@ -279,7 +279,3 @@ void __init at91sam926x_ioremap_pit(u32 addr)
 	if (!pit_base_addr)
 		panic("Impossible to ioremap PIT\n");
 }
-
-struct sys_timer at91sam926x_timer = {
-	.init		= at91sam926x_pit_init,
-};
diff --git a/arch/arm/mach-at91/at91x40_time.c b/arch/arm/mach-at91/at91x40_time.c
index fb3c70124ac3..0c07a4459cb2 100644
--- a/arch/arm/mach-at91/at91x40_time.c
+++ b/arch/arm/mach-at91/at91x40_time.c
@@ -82,8 +82,3 @@ void __init at91x40_timer_init(void)
 
 	at91_tc_write(AT91_TC_CLK1BASE + AT91_TC_CCR, (AT91_TC_SWTRG | AT91_TC_CLKEN));
 }
-
-struct sys_timer at91x40_timer = {
-	.init	= at91x40_timer_init,
-};
-
diff --git a/arch/arm/mach-at91/board-1arm.c b/arch/arm/mach-at91/board-1arm.c
index b99b5752cc10..35ab632bbf68 100644
--- a/arch/arm/mach-at91/board-1arm.c
+++ b/arch/arm/mach-at91/board-1arm.c
@@ -90,7 +90,7 @@ static void __init onearm_board_init(void)
 
 MACHINE_START(ONEARM, "Ajeco 1ARM single board computer")
 	/* Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= onearm_init_early,
diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c
index 854b97974287..f95e31cda4b3 100644
--- a/arch/arm/mach-at91/board-afeb-9260v1.c
+++ b/arch/arm/mach-at91/board-afeb-9260v1.c
@@ -210,7 +210,7 @@ static void __init afeb9260_board_init(void)
 
 MACHINE_START(AFEB9260, "Custom afeb9260 board")
 	/* Maintainer: Sergey Lapin <slapin@ossfans.org> */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= afeb9260_init_early,
diff --git a/arch/arm/mach-at91/board-cam60.c b/arch/arm/mach-at91/board-cam60.c
index 28a18ce6d914..ade948b82662 100644
--- a/arch/arm/mach-at91/board-cam60.c
+++ b/arch/arm/mach-at91/board-cam60.c
@@ -187,7 +187,7 @@ static void __init cam60_board_init(void)
 
 MACHINE_START(CAM60, "KwikByte CAM60")
 	/* Maintainer: KwikByte */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= cam60_init_early,
diff --git a/arch/arm/mach-at91/board-carmeva.c b/arch/arm/mach-at91/board-carmeva.c
index c17bb533a949..92983050a9bd 100644
--- a/arch/arm/mach-at91/board-carmeva.c
+++ b/arch/arm/mach-at91/board-carmeva.c
@@ -157,7 +157,7 @@ static void __init carmeva_board_init(void)
 
 MACHINE_START(CARMEVA, "Carmeva")
 	/* Maintainer: Conitec Datasystems */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= carmeva_init_early,
diff --git a/arch/arm/mach-at91/board-cpu9krea.c b/arch/arm/mach-at91/board-cpu9krea.c
index 847432441ecc..008527efdbcf 100644
--- a/arch/arm/mach-at91/board-cpu9krea.c
+++ b/arch/arm/mach-at91/board-cpu9krea.c
@@ -374,7 +374,7 @@ MACHINE_START(CPUAT9260, "Eukrea CPU9260")
 MACHINE_START(CPUAT9G20, "Eukrea CPU9G20")
 #endif
 	/* Maintainer: Eric Benard - EUKREA Electromatique */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= cpu9krea_init_early,
diff --git a/arch/arm/mach-at91/board-cpuat91.c b/arch/arm/mach-at91/board-cpuat91.c
index 2a7af7868747..42f1353a4baf 100644
--- a/arch/arm/mach-at91/board-cpuat91.c
+++ b/arch/arm/mach-at91/board-cpuat91.c
@@ -178,7 +178,7 @@ static void __init cpuat91_board_init(void)
 
 MACHINE_START(CPUAT91, "Eukrea")
 	/* Maintainer: Eric Benard - EUKREA Electromatique */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= cpuat91_init_early,
diff --git a/arch/arm/mach-at91/board-csb337.c b/arch/arm/mach-at91/board-csb337.c
index 48a531e05be3..e5fde215225b 100644
--- a/arch/arm/mach-at91/board-csb337.c
+++ b/arch/arm/mach-at91/board-csb337.c
@@ -251,7 +251,7 @@ static void __init csb337_board_init(void)
 
 MACHINE_START(CSB337, "Cogent CSB337")
 	/* Maintainer: Bill Gatliff */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= csb337_init_early,
diff --git a/arch/arm/mach-at91/board-csb637.c b/arch/arm/mach-at91/board-csb637.c
index ec0f3abd504b..fdf11061c577 100644
--- a/arch/arm/mach-at91/board-csb637.c
+++ b/arch/arm/mach-at91/board-csb637.c
@@ -132,7 +132,7 @@ static void __init csb637_board_init(void)
 
 MACHINE_START(CSB637, "Cogent CSB637")
 	/* Maintainer: Bill Gatliff */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= csb637_init_early,
diff --git a/arch/arm/mach-at91/board-dt.c b/arch/arm/mach-at91/board-dt.c
index 881170ce61dd..8db30132abed 100644
--- a/arch/arm/mach-at91/board-dt.c
+++ b/arch/arm/mach-at91/board-dt.c
@@ -49,7 +49,7 @@ static const char *at91_dt_board_compat[] __initdata = {
 
 DT_MACHINE_START(at91sam_dt, "Atmel AT91SAM (Device Tree)")
 	/* Maintainer: Atmel */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= at91_dt_initialize,
diff --git a/arch/arm/mach-at91/board-eb01.c b/arch/arm/mach-at91/board-eb01.c
index b489388a6f84..becf0a6a289e 100644
--- a/arch/arm/mach-at91/board-eb01.c
+++ b/arch/arm/mach-at91/board-eb01.c
@@ -44,7 +44,7 @@ static void __init at91eb01_init_early(void)
 
 MACHINE_START(AT91EB01, "Atmel AT91 EB01")
 	/* Maintainer: Greg Ungerer <gerg@snapgear.com> */
-	.timer		= &at91x40_timer,
+	.init_time	= at91x40_timer_init,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= at91eb01_init_early,
 	.init_irq	= at91eb01_init_irq,
diff --git a/arch/arm/mach-at91/board-eb9200.c b/arch/arm/mach-at91/board-eb9200.c
index 9f5e71c95f05..f9be8161bbfa 100644
--- a/arch/arm/mach-at91/board-eb9200.c
+++ b/arch/arm/mach-at91/board-eb9200.c
@@ -116,7 +116,7 @@ static void __init eb9200_board_init(void)
 }
 
 MACHINE_START(ATEB9200, "Embest ATEB9200")
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= eb9200_init_early,
diff --git a/arch/arm/mach-at91/board-ecbat91.c b/arch/arm/mach-at91/board-ecbat91.c
index ef69e0ebe949..b2fcd71262ba 100644
--- a/arch/arm/mach-at91/board-ecbat91.c
+++ b/arch/arm/mach-at91/board-ecbat91.c
@@ -181,7 +181,7 @@ static void __init ecb_at91board_init(void)
 
 MACHINE_START(ECBAT91, "emQbit's ECB_AT91")
 	/* Maintainer: emQbit.com */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ecb_at91init_early,
diff --git a/arch/arm/mach-at91/board-eco920.c b/arch/arm/mach-at91/board-eco920.c
index 50f3d3795c05..77de410efc90 100644
--- a/arch/arm/mach-at91/board-eco920.c
+++ b/arch/arm/mach-at91/board-eco920.c
@@ -149,7 +149,7 @@ static void __init eco920_board_init(void)
 
 MACHINE_START(ECO920, "eco920")
 	/* Maintainer: Sascha Hauer */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= eco920_init_early,
diff --git a/arch/arm/mach-at91/board-flexibity.c b/arch/arm/mach-at91/board-flexibity.c
index 5d44eba0f20f..737c08563628 100644
--- a/arch/arm/mach-at91/board-flexibity.c
+++ b/arch/arm/mach-at91/board-flexibity.c
@@ -159,7 +159,7 @@ static void __init flexibity_board_init(void)
 
 MACHINE_START(FLEXIBITY, "Flexibity Connect")
 	/* Maintainer: Maxim Osipov */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= flexibity_init_early,
diff --git a/arch/arm/mach-at91/board-foxg20.c b/arch/arm/mach-at91/board-foxg20.c
index 191d37c16bab..2ea7059b840b 100644
--- a/arch/arm/mach-at91/board-foxg20.c
+++ b/arch/arm/mach-at91/board-foxg20.c
@@ -261,7 +261,7 @@ static void __init foxg20_board_init(void)
 
 MACHINE_START(ACMENETUSFOXG20, "Acme Systems srl FOX Board G20")
 	/* Maintainer: Sergio Tanzilli */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= foxg20_init_early,
diff --git a/arch/arm/mach-at91/board-gsia18s.c b/arch/arm/mach-at91/board-gsia18s.c
index 23a2fa17ab29..c1d61d247790 100644
--- a/arch/arm/mach-at91/board-gsia18s.c
+++ b/arch/arm/mach-at91/board-gsia18s.c
@@ -574,7 +574,7 @@ static void __init gsia18s_board_init(void)
 }
 
 MACHINE_START(GSIA18S, "GS_IA18_S")
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= gsia18s_init_early,
diff --git a/arch/arm/mach-at91/board-kafa.c b/arch/arm/mach-at91/board-kafa.c
index 9a43d1e1a037..88e2f5d2d16d 100644
--- a/arch/arm/mach-at91/board-kafa.c
+++ b/arch/arm/mach-at91/board-kafa.c
@@ -103,7 +103,7 @@ static void __init kafa_board_init(void)
 
 MACHINE_START(KAFA, "Sperry-Sun KAFA")
 	/* Maintainer: Sergei Sharonov */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= kafa_init_early,
diff --git a/arch/arm/mach-at91/board-kb9202.c b/arch/arm/mach-at91/board-kb9202.c
index f168bec2369f..0c519d9ebffc 100644
--- a/arch/arm/mach-at91/board-kb9202.c
+++ b/arch/arm/mach-at91/board-kb9202.c
@@ -149,7 +149,7 @@ static void __init kb9202_board_init(void)
 
 MACHINE_START(KB9200, "KB920x")
 	/* Maintainer: KwikByte, Inc. */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= kb9202_init_early,
diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c
index bc7a1c4a1f6a..5b4760fe53de 100644
--- a/arch/arm/mach-at91/board-neocore926.c
+++ b/arch/arm/mach-at91/board-neocore926.c
@@ -378,7 +378,7 @@ static void __init neocore926_board_init(void)
 
 MACHINE_START(NEOCORE926, "ADENEO NEOCORE 926")
 	/* Maintainer: ADENEO */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= neocore926_init_early,
diff --git a/arch/arm/mach-at91/board-pcontrol-g20.c b/arch/arm/mach-at91/board-pcontrol-g20.c
index 0299554495dd..65c0d6b5ecba 100644
--- a/arch/arm/mach-at91/board-pcontrol-g20.c
+++ b/arch/arm/mach-at91/board-pcontrol-g20.c
@@ -217,7 +217,7 @@ static void __init pcontrol_g20_board_init(void)
 
 MACHINE_START(PCONTROL_G20, "PControl G20")
 	/* Maintainer: pgsellmann@portner-elektronik.at */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= pcontrol_g20_init_early,
diff --git a/arch/arm/mach-at91/board-picotux200.c b/arch/arm/mach-at91/board-picotux200.c
index 4938f1cd5e13..ab2b2ec36c14 100644
--- a/arch/arm/mach-at91/board-picotux200.c
+++ b/arch/arm/mach-at91/board-picotux200.c
@@ -119,7 +119,7 @@ static void __init picotux200_board_init(void)
 
 MACHINE_START(PICOTUX2XX, "picotux 200")
 	/* Maintainer: Kleinhenz Elektronik GmbH */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= picotux200_init_early,
diff --git a/arch/arm/mach-at91/board-qil-a9260.c b/arch/arm/mach-at91/board-qil-a9260.c
index 33b1628467ea..aa3bc9b0f150 100644
--- a/arch/arm/mach-at91/board-qil-a9260.c
+++ b/arch/arm/mach-at91/board-qil-a9260.c
@@ -257,7 +257,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(QIL_A9260, "CALAO QIL_A9260")
 	/* Maintainer: calao-systems */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-rm9200dk.c b/arch/arm/mach-at91/board-rm9200dk.c
index 9e5061bef0d0..690541b18cbc 100644
--- a/arch/arm/mach-at91/board-rm9200dk.c
+++ b/arch/arm/mach-at91/board-rm9200dk.c
@@ -219,7 +219,7 @@ static void __init dk_board_init(void)
 
 MACHINE_START(AT91RM9200DK, "Atmel AT91RM9200-DK")
 	/* Maintainer: SAN People/Atmel */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= dk_init_early,
diff --git a/arch/arm/mach-at91/board-rm9200ek.c b/arch/arm/mach-at91/board-rm9200ek.c
index 58277dbc718f..8b17dadc1aba 100644
--- a/arch/arm/mach-at91/board-rm9200ek.c
+++ b/arch/arm/mach-at91/board-rm9200ek.c
@@ -186,7 +186,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(AT91RM9200EK, "Atmel AT91RM9200-EK")
 	/* Maintainer: SAN People/Atmel */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-rsi-ews.c b/arch/arm/mach-at91/board-rsi-ews.c
index 2e8b8339a206..f6d7f1958c7e 100644
--- a/arch/arm/mach-at91/board-rsi-ews.c
+++ b/arch/arm/mach-at91/board-rsi-ews.c
@@ -222,7 +222,7 @@ static void __init rsi_ews_board_init(void)
 
 MACHINE_START(RSI_EWS, "RSI EWS")
 	/* Maintainer: Josef Holzmayr <holzmayr@rsi-elektrotechnik.de> */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= rsi_ews_init_early,
diff --git a/arch/arm/mach-at91/board-sam9-l9260.c b/arch/arm/mach-at91/board-sam9-l9260.c
index b75fbf6003a1..43ee4dc43b50 100644
--- a/arch/arm/mach-at91/board-sam9-l9260.c
+++ b/arch/arm/mach-at91/board-sam9-l9260.c
@@ -218,7 +218,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(SAM9_L9260, "Olimex SAM9-L9260")
 	/* Maintainer: Olimex */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-sam9260ek.c b/arch/arm/mach-at91/board-sam9260ek.c
index f0135cd1d858..0b153c87521d 100644
--- a/arch/arm/mach-at91/board-sam9260ek.c
+++ b/arch/arm/mach-at91/board-sam9260ek.c
@@ -343,7 +343,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(AT91SAM9260EK, "Atmel AT91SAM9260-EK")
 	/* Maintainer: Atmel */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-sam9261ek.c b/arch/arm/mach-at91/board-sam9261ek.c
index 13ebaa8e4100..b446645c7727 100644
--- a/arch/arm/mach-at91/board-sam9261ek.c
+++ b/arch/arm/mach-at91/board-sam9261ek.c
@@ -612,7 +612,7 @@ MACHINE_START(AT91SAM9261EK, "Atmel AT91SAM9261-EK")
 MACHINE_START(AT91SAM9G10EK, "Atmel AT91SAM9G10-EK")
 #endif
 	/* Maintainer: Atmel */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c
index 89b9608742a7..3284df05df14 100644
--- a/arch/arm/mach-at91/board-sam9263ek.c
+++ b/arch/arm/mach-at91/board-sam9263ek.c
@@ -443,7 +443,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(AT91SAM9263EK, "Atmel AT91SAM9263-EK")
 	/* Maintainer: Atmel */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c
index 1b7dd9f688d3..f9cd1f2c7146 100644
--- a/arch/arm/mach-at91/board-sam9g20ek.c
+++ b/arch/arm/mach-at91/board-sam9g20ek.c
@@ -409,7 +409,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(AT91SAM9G20EK, "Atmel AT91SAM9G20-EK")
 	/* Maintainer: Atmel */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
@@ -419,7 +419,7 @@ MACHINE_END
 
 MACHINE_START(AT91SAM9G20EK_2MMC, "Atmel AT91SAM9G20-EK 2 MMC Slot Mod")
 	/* Maintainer: Atmel */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-sam9m10g45ek.c b/arch/arm/mach-at91/board-sam9m10g45ek.c
index e4cc375e3a32..2a94896a1375 100644
--- a/arch/arm/mach-at91/board-sam9m10g45ek.c
+++ b/arch/arm/mach-at91/board-sam9m10g45ek.c
@@ -502,7 +502,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(AT91SAM9M10G45EK, "Atmel AT91SAM9M10G45-EK")
 	/* Maintainer: Atmel */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c
index 377a1097afa7..aa265dcf2128 100644
--- a/arch/arm/mach-at91/board-sam9rlek.c
+++ b/arch/arm/mach-at91/board-sam9rlek.c
@@ -320,7 +320,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(AT91SAM9RLEK, "Atmel AT91SAM9RL-EK")
 	/* Maintainer: Atmel */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-snapper9260.c b/arch/arm/mach-at91/board-snapper9260.c
index 98771500ddb9..3aaa9784cf0e 100644
--- a/arch/arm/mach-at91/board-snapper9260.c
+++ b/arch/arm/mach-at91/board-snapper9260.c
@@ -177,7 +177,7 @@ static void __init snapper9260_board_init(void)
 }
 
 MACHINE_START(SNAPPER_9260, "Bluewater Systems Snapper 9260/9G20 module")
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= snapper9260_init_early,
diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c
index 48a962b61fa3..a033b8df9fb2 100644
--- a/arch/arm/mach-at91/board-stamp9g20.c
+++ b/arch/arm/mach-at91/board-stamp9g20.c
@@ -272,7 +272,7 @@ static void __init stamp9g20evb_board_init(void)
 
 MACHINE_START(PORTUXG20, "taskit PortuxG20")
 	/* Maintainer: taskit GmbH */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= stamp9g20_init_early,
@@ -282,7 +282,7 @@ MACHINE_END
 
 MACHINE_START(STAMP9G20, "taskit Stamp9G20")
 	/* Maintainer: taskit GmbH */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= stamp9g20_init_early,
diff --git a/arch/arm/mach-at91/board-usb-a926x.c b/arch/arm/mach-at91/board-usb-a926x.c
index c1060f96e589..2487d944a1bc 100644
--- a/arch/arm/mach-at91/board-usb-a926x.c
+++ b/arch/arm/mach-at91/board-usb-a926x.c
@@ -355,7 +355,7 @@ static void __init ek_board_init(void)
 
 MACHINE_START(USB_A9263, "CALAO USB_A9263")
 	/* Maintainer: calao-systems */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
@@ -365,7 +365,7 @@ MACHINE_END
 
 MACHINE_START(USB_A9260, "CALAO USB_A9260")
 	/* Maintainer: calao-systems */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
@@ -375,7 +375,7 @@ MACHINE_END
 
 MACHINE_START(USB_A9G20, "CALAO USB_A92G0")
 	/* Maintainer: Jean-Christophe PLAGNIOL-VILLARD */
-	.timer		= &at91sam926x_timer,
+	.init_time	= at91sam926x_pit_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= ek_init_early,
diff --git a/arch/arm/mach-at91/board-yl-9200.c b/arch/arm/mach-at91/board-yl-9200.c
index 8673aebcb85d..be083771df2e 100644
--- a/arch/arm/mach-at91/board-yl-9200.c
+++ b/arch/arm/mach-at91/board-yl-9200.c
@@ -587,7 +587,7 @@ static void __init yl9200_board_init(void)
 
 MACHINE_START(YL9200, "uCdragon YL-9200")
 	/* Maintainer: S.Birtles */
-	.timer		= &at91rm9200_timer,
+	.init_time	= at91rm9200_timer_init,
 	.map_io		= at91_map_io,
 	.handle_irq	= at91_aic_handle_irq,
 	.init_early	= yl9200_init_early,
diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h
index fc593d615e7d..78ab06548658 100644
--- a/arch/arm/mach-at91/generic.h
+++ b/arch/arm/mach-at91/generic.h
@@ -36,12 +36,11 @@ extern int  __init at91_aic5_of_init(struct device_node *node,
 
 
  /* Timer */
-struct sys_timer;
 extern void at91rm9200_ioremap_st(u32 addr);
-extern struct sys_timer at91rm9200_timer;
+extern void at91rm9200_timer_init(void);
 extern void at91sam926x_ioremap_pit(u32 addr);
-extern struct sys_timer at91sam926x_timer;
-extern struct sys_timer at91x40_timer;
+extern void at91sam926x_pit_init(void);
+extern void at91x40_timer_init(void);
 
  /* Clocks */
 #ifdef CONFIG_AT91_PMC_UNIT
diff --git a/arch/arm/mach-bcm/board_bcm.c b/arch/arm/mach-bcm/board_bcm.c
index 3a62f1b1cabc..3df68030cf68 100644
--- a/arch/arm/mach-bcm/board_bcm.c
+++ b/arch/arm/mach-bcm/board_bcm.c
@@ -31,10 +31,6 @@ static void timer_init(void)
 {
 }
 
-static struct sys_timer timer = {
-	.init = timer_init,
-};
-
 static void __init init_irq(void)
 {
 	of_irq_init(irq_match);
@@ -50,7 +46,7 @@ static const char * const bcm11351_dt_compat[] = { "bcm,bcm11351", NULL, };
 
 DT_MACHINE_START(BCM11351_DT, "Broadcom Application Processor")
 	.init_irq = init_irq,
-	.timer = &timer,
+	.init_time = timer_init,
 	.init_machine = board_init,
 	.dt_compat = bcm11351_dt_compat,
 	.handle_irq = gic_handle_irq,
diff --git a/arch/arm/mach-bcm2835/bcm2835.c b/arch/arm/mach-bcm2835/bcm2835.c
index f0d739f4b7a3..176d2d24782d 100644
--- a/arch/arm/mach-bcm2835/bcm2835.c
+++ b/arch/arm/mach-bcm2835/bcm2835.c
@@ -104,7 +104,7 @@ DT_MACHINE_START(BCM2835, "BCM2835")
 	.init_irq = bcm2835_init_irq,
 	.handle_irq = bcm2835_handle_irq,
 	.init_machine = bcm2835_init,
-	.timer = &bcm2835_timer,
+	.init_time = bcm2835_timer_init,
 	.restart = bcm2835_restart,
 	.dt_compat = bcm2835_compat
 MACHINE_END
diff --git a/arch/arm/mach-clps711x/board-autcpu12.c b/arch/arm/mach-clps711x/board-autcpu12.c
index 3fbf43f72589..f38584709df7 100644
--- a/arch/arm/mach-clps711x/board-autcpu12.c
+++ b/arch/arm/mach-clps711x/board-autcpu12.c
@@ -170,7 +170,7 @@ MACHINE_START(AUTCPU12, "autronix autcpu12")
 	.nr_irqs	= CLPS711X_NR_IRQS,
 	.map_io		= clps711x_map_io,
 	.init_irq	= clps711x_init_irq,
-	.timer		= &clps711x_timer,
+	.init_time	= clps711x_timer_init,
 	.init_machine	= autcpu12_init,
 	.init_late	= autcpu12_init_late,
 	.handle_irq	= clps711x_handle_irq,
diff --git a/arch/arm/mach-clps711x/board-cdb89712.c b/arch/arm/mach-clps711x/board-cdb89712.c
index 60900ddf97c9..baab7da33c9b 100644
--- a/arch/arm/mach-clps711x/board-cdb89712.c
+++ b/arch/arm/mach-clps711x/board-cdb89712.c
@@ -140,7 +140,7 @@ MACHINE_START(CDB89712, "Cirrus-CDB89712")
 	.nr_irqs	= CLPS711X_NR_IRQS,
 	.map_io		= clps711x_map_io,
 	.init_irq	= clps711x_init_irq,
-	.timer		= &clps711x_timer,
+	.init_time	= clps711x_timer_init,
 	.init_machine	= cdb89712_init,
 	.handle_irq	= clps711x_handle_irq,
 	.restart	= clps711x_restart,
diff --git a/arch/arm/mach-clps711x/board-clep7312.c b/arch/arm/mach-clps711x/board-clep7312.c
index 0b32a487183b..014aa3c19a03 100644
--- a/arch/arm/mach-clps711x/board-clep7312.c
+++ b/arch/arm/mach-clps711x/board-clep7312.c
@@ -40,7 +40,7 @@ MACHINE_START(CLEP7212, "Cirrus Logic 7212/7312")
 	.fixup		= fixup_clep7312,
 	.map_io		= clps711x_map_io,
 	.init_irq	= clps711x_init_irq,
-	.timer		= &clps711x_timer,
+	.init_time	= clps711x_timer_init,
 	.handle_irq	= clps711x_handle_irq,
 	.restart	= clps711x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-clps711x/board-edb7211.c b/arch/arm/mach-clps711x/board-edb7211.c
index 71aa5cf2c0d3..5f928e9ed2ef 100644
--- a/arch/arm/mach-clps711x/board-edb7211.c
+++ b/arch/arm/mach-clps711x/board-edb7211.c
@@ -173,7 +173,7 @@ MACHINE_START(EDB7211, "CL-EDB7211 (EP7211 eval board)")
 	.reserve	= edb7211_reserve,
 	.map_io		= edb7211_map_io,
 	.init_irq	= clps711x_init_irq,
-	.timer		= &clps711x_timer,
+	.init_time	= clps711x_timer_init,
 	.init_machine	= edb7211_init,
 	.handle_irq	= clps711x_handle_irq,
 	.restart	= clps711x_restart,
diff --git a/arch/arm/mach-clps711x/board-fortunet.c b/arch/arm/mach-clps711x/board-fortunet.c
index 7d0125580366..c5675efc8c6a 100644
--- a/arch/arm/mach-clps711x/board-fortunet.c
+++ b/arch/arm/mach-clps711x/board-fortunet.c
@@ -78,7 +78,7 @@ MACHINE_START(FORTUNET, "ARM-FortuNet")
 	.fixup		= fortunet_fixup,
 	.map_io		= clps711x_map_io,
 	.init_irq	= clps711x_init_irq,
-	.timer		= &clps711x_timer,
+	.init_time	= clps711x_timer_init,
 	.handle_irq	= clps711x_handle_irq,
 	.restart	= clps711x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-clps711x/board-p720t.c b/arch/arm/mach-clps711x/board-p720t.c
index 1518fc83babd..8d3ee6771135 100644
--- a/arch/arm/mach-clps711x/board-p720t.c
+++ b/arch/arm/mach-clps711x/board-p720t.c
@@ -224,7 +224,7 @@ MACHINE_START(P720T, "ARM-Prospector720T")
 	.map_io		= p720t_map_io,
 	.init_early	= p720t_init_early,
 	.init_irq	= clps711x_init_irq,
-	.timer		= &clps711x_timer,
+	.init_time	= clps711x_timer_init,
 	.init_machine	= p720t_init,
 	.init_late	= p720t_init_late,
 	.handle_irq	= clps711x_handle_irq,
diff --git a/arch/arm/mach-clps711x/common.c b/arch/arm/mach-clps711x/common.c
index e046439573ee..20ff50f3ccf0 100644
--- a/arch/arm/mach-clps711x/common.c
+++ b/arch/arm/mach-clps711x/common.c
@@ -282,7 +282,7 @@ static void add_fixed_clk(struct clk *clk, const char *name, int rate)
 	clk_register_clkdev(clk, name, NULL);
 }
 
-static void __init clps711x_timer_init(void)
+void __init clps711x_timer_init(void)
 {
 	int osc, ext, pll, cpu, bus, timl, timh, uart, spi;
 	u32 tmp;
@@ -345,10 +345,6 @@ static void __init clps711x_timer_init(void)
 	setup_irq(IRQ_TC2OI, &clps711x_timer_irq);
 }
 
-struct sys_timer clps711x_timer = {
-	.init		= clps711x_timer_init,
-};
-
 void clps711x_restart(char mode, const char *cmd)
 {
 	soft_restart(0);
diff --git a/arch/arm/mach-clps711x/common.h b/arch/arm/mach-clps711x/common.h
index b7c0c75c90c0..f84a7292c70e 100644
--- a/arch/arm/mach-clps711x/common.h
+++ b/arch/arm/mach-clps711x/common.h
@@ -8,10 +8,8 @@
 #define CLPS711X_NR_GPIO	(4 * 8 + 3)
 #define CLPS711X_GPIO(prt, bit)	((prt) * 8 + (bit))
 
-struct sys_timer;
-
 extern void clps711x_map_io(void);
 extern void clps711x_init_irq(void);
+extern void clps711x_timer_init(void);
 extern void clps711x_handle_irq(struct pt_regs *regs);
 extern void clps711x_restart(char mode, const char *cmd);
-extern struct sys_timer clps711x_timer;
diff --git a/arch/arm/mach-cns3xxx/cns3420vb.c b/arch/arm/mach-cns3xxx/cns3420vb.c
index ae305397003c..3c86f910b647 100644
--- a/arch/arm/mach-cns3xxx/cns3420vb.c
+++ b/arch/arm/mach-cns3xxx/cns3420vb.c
@@ -250,7 +250,7 @@ MACHINE_START(CNS3420VB, "Cavium Networks CNS3420 Validation Board")
 	.atag_offset	= 0x100,
 	.map_io		= cns3420_map_io,
 	.init_irq	= cns3xxx_init_irq,
-	.timer		= &cns3xxx_timer,
+	.init_time	= cns3xxx_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= cns3420_init,
 	.restart	= cns3xxx_restart,
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 031805b1428d..1754f8f4f34c 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -235,17 +235,13 @@ static void __init __cns3xxx_timer_init(unsigned int timer_irq)
 	cns3xxx_clockevents_init(timer_irq);
 }
 
-static void __init cns3xxx_timer_init(void)
+void __init cns3xxx_timer_init(void)
 {
 	cns3xxx_tmr1 = IOMEM(CNS3XXX_TIMER1_2_3_BASE_VIRT);
 
 	__cns3xxx_timer_init(IRQ_CNS3XXX_TIMER0);
 }
 
-struct sys_timer cns3xxx_timer = {
-	.init = cns3xxx_timer_init,
-};
-
 #ifdef CONFIG_CACHE_L2X0
 
 void __init cns3xxx_l2x0_init(void)
diff --git a/arch/arm/mach-cns3xxx/core.h b/arch/arm/mach-cns3xxx/core.h
index 4894b8c17151..b23b17b4da10 100644
--- a/arch/arm/mach-cns3xxx/core.h
+++ b/arch/arm/mach-cns3xxx/core.h
@@ -11,7 +11,7 @@
 #ifndef __CNS3XXX_CORE_H
 #define __CNS3XXX_CORE_H
 
-extern struct sys_timer cns3xxx_timer;
+extern void cns3xxx_timer_init(void);
 
 #ifdef CONFIG_CACHE_L2X0
 void __init cns3xxx_l2x0_init(void);
diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c
index 95b5e102ceb1..e3742716cbaa 100644
--- a/arch/arm/mach-davinci/board-da830-evm.c
+++ b/arch/arm/mach-davinci/board-da830-evm.c
@@ -679,7 +679,7 @@ MACHINE_START(DAVINCI_DA830_EVM, "DaVinci DA830/OMAP-L137/AM17x EVM")
 	.atag_offset	= 0x100,
 	.map_io		= da830_evm_map_io,
 	.init_irq	= cp_intc_init,
-	.timer		= &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine	= da830_evm_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 0299915575a8..3b3356097bb0 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -1599,7 +1599,7 @@ MACHINE_START(DAVINCI_DA850_EVM, "DaVinci DA850/OMAP-L138/AM18x EVM")
 	.atag_offset	= 0x100,
 	.map_io		= da850_evm_map_io,
 	.init_irq	= cp_intc_init,
-	.timer		= &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine	= da850_evm_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c
index cdf8d0746e79..147b8e1a4407 100644
--- a/arch/arm/mach-davinci/board-dm355-evm.c
+++ b/arch/arm/mach-davinci/board-dm355-evm.c
@@ -355,7 +355,7 @@ MACHINE_START(DAVINCI_DM355_EVM, "DaVinci DM355 EVM")
 	.atag_offset  = 0x100,
 	.map_io	      = dm355_evm_map_io,
 	.init_irq     = davinci_irq_init,
-	.timer	      = &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine = dm355_evm_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index d41954507fc2..dff4ddc5ef81 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -274,7 +274,7 @@ MACHINE_START(DM355_LEOPARD, "DaVinci DM355 leopard")
 	.atag_offset  = 0x100,
 	.map_io	      = dm355_leopard_map_io,
 	.init_irq     = davinci_irq_init,
-	.timer	      = &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine = dm355_leopard_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index 5d49c75388ca..c2d4958a0cb6 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -616,7 +616,7 @@ MACHINE_START(DAVINCI_DM365_EVM, "DaVinci DM365 EVM")
 	.atag_offset	= 0x100,
 	.map_io		= dm365_evm_map_io,
 	.init_irq	= davinci_irq_init,
-	.timer		= &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine	= dm365_evm_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index f5e018de7fa5..e4a16f98e6a2 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -825,7 +825,7 @@ MACHINE_START(DAVINCI_EVM, "DaVinci DM644x EVM")
 	.atag_offset  = 0x100,
 	.map_io	      = davinci_evm_map_io,
 	.init_irq     = davinci_irq_init,
-	.timer	      = &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine = davinci_evm_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index 9211e8800c79..a9f2054c3e9b 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -818,7 +818,7 @@ MACHINE_START(DAVINCI_DM6467_EVM, "DaVinci DM646x EVM")
 	.atag_offset  = 0x100,
 	.map_io       = davinci_map_io,
 	.init_irq     = davinci_irq_init,
-	.timer        = &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine = evm_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
@@ -829,7 +829,7 @@ MACHINE_START(DAVINCI_DM6467TEVM, "DaVinci DM6467T EVM")
 	.atag_offset  = 0x100,
 	.map_io       = davinci_map_io,
 	.init_irq     = davinci_irq_init,
-	.timer        = &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine = evm_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c
index 43e4a0d663fa..b0df578bf744 100644
--- a/arch/arm/mach-davinci/board-mityomapl138.c
+++ b/arch/arm/mach-davinci/board-mityomapl138.c
@@ -570,7 +570,7 @@ MACHINE_START(MITYOMAPL138, "MityDSP-L138/MityARM-1808")
 	.atag_offset	= 0x100,
 	.map_io		= mityomapl138_map_io,
 	.init_irq	= cp_intc_init,
-	.timer		= &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine	= mityomapl138_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 3e3e3afebf88..1c98107527fa 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -237,7 +237,7 @@ MACHINE_START(NEUROS_OSD2, "Neuros OSD2")
 	.atag_offset	= 0x100,
 	.map_io		 = davinci_ntosd2_map_io,
 	.init_irq	= davinci_irq_init,
-	.timer		= &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine = davinci_ntosd2_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c
index dc1208e9e664..deb3922612b9 100644
--- a/arch/arm/mach-davinci/board-omapl138-hawk.c
+++ b/arch/arm/mach-davinci/board-omapl138-hawk.c
@@ -341,7 +341,7 @@ MACHINE_START(OMAPL138_HAWKBOARD, "AM18x/OMAP-L138 Hawkboard")
 	.atag_offset	= 0x100,
 	.map_io		= omapl138_hawk_map_io,
 	.init_irq	= cp_intc_init,
-	.timer		= &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine	= omapl138_hawk_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c
index 6957787fa7f3..739be7e738fe 100644
--- a/arch/arm/mach-davinci/board-sffsdr.c
+++ b/arch/arm/mach-davinci/board-sffsdr.c
@@ -155,7 +155,7 @@ MACHINE_START(SFFSDR, "Lyrtech SFFSDR")
 	.atag_offset  = 0x100,
 	.map_io	      = davinci_sffsdr_map_io,
 	.init_irq     = davinci_irq_init,
-	.timer	      = &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine = davinci_sffsdr_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/board-tnetv107x-evm.c b/arch/arm/mach-davinci/board-tnetv107x-evm.c
index be3099733b1f..4f416023d4e2 100644
--- a/arch/arm/mach-davinci/board-tnetv107x-evm.c
+++ b/arch/arm/mach-davinci/board-tnetv107x-evm.c
@@ -280,7 +280,7 @@ MACHINE_START(TNETV107X, "TNETV107X EVM")
 	.atag_offset	= 0x100,
 	.map_io		= tnetv107x_init,
 	.init_irq	= cp_intc_init,
-	.timer		= &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine	= tnetv107x_evm_board_init,
 	.init_late	= davinci_init_late,
 	.dma_zone_size	= SZ_128M,
diff --git a/arch/arm/mach-davinci/da8xx-dt.c b/arch/arm/mach-davinci/da8xx-dt.c
index 37c27af18fa0..9a7c76efc8f8 100644
--- a/arch/arm/mach-davinci/da8xx-dt.c
+++ b/arch/arm/mach-davinci/da8xx-dt.c
@@ -56,7 +56,7 @@ static const char *da850_boards_compat[] __initdata = {
 DT_MACHINE_START(DA850_DT, "Generic DA850/OMAP-L138/AM18x")
 	.map_io		= da850_init,
 	.init_irq	= da8xx_init_irq,
-	.timer		= &davinci_timer,
+	.init_time	= davinci_timer_init,
 	.init_machine	= da850_init_machine,
 	.dt_compat	= da850_boards_compat,
 	.init_late	= davinci_init_late,
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 046c7238a3d6..b124b77c90c5 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -15,9 +15,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 
-struct sys_timer;
-
-extern struct sys_timer davinci_timer;
+extern void davinci_timer_init(void);
 
 extern void davinci_irq_init(void);
 extern void __iomem *davinci_intc_base;
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 9847938785ca..bad361ec1666 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -337,7 +337,7 @@ static struct clock_event_device clockevent_davinci = {
 };
 
 
-static void __init davinci_timer_init(void)
+void __init davinci_timer_init(void)
 {
 	struct clk *timer_clk;
 	struct davinci_soc_info *soc_info = &davinci_soc_info;
@@ -410,11 +410,6 @@ static void __init davinci_timer_init(void)
 		timer32_config(&timers[i]);
 }
 
-struct sys_timer davinci_timer = {
-	.init   = davinci_timer_init,
-};
-
-
 /* reset board using watchdog timer */
 void davinci_watchdog_reset(struct platform_device *pdev)
 {
diff --git a/arch/arm/mach-dove/cm-a510.c b/arch/arm/mach-dove/cm-a510.c
index 792b4e2e24f1..0dc39cf30fdd 100644
--- a/arch/arm/mach-dove/cm-a510.c
+++ b/arch/arm/mach-dove/cm-a510.c
@@ -92,6 +92,6 @@ MACHINE_START(CM_A510, "Compulab CM-A510 Board")
 	.map_io		= dove_map_io,
 	.init_early	= dove_init_early,
 	.init_irq	= dove_init_irq,
-	.timer		= &dove_timer,
+	.init_time	= dove_timer_init,
 	.restart	= dove_restart,
 MACHINE_END
diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index 89f4f993cd03..0c7911b3e155 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -242,17 +242,13 @@ static int __init dove_find_tclk(void)
 	return 166666667;
 }
 
-static void __init dove_timer_init(void)
+void __init dove_timer_init(void)
 {
 	dove_tclk = dove_find_tclk();
 	orion_time_init(BRIDGE_VIRT_BASE, BRIDGE_INT_TIMER1_CLR,
 			IRQ_DOVE_BRIDGE, dove_tclk);
 }
 
-struct sys_timer dove_timer = {
-	.init = dove_timer_init,
-};
-
 /*****************************************************************************
  * Cryptographic Engines and Security Accelerator (CESA)
  ****************************************************************************/
@@ -454,7 +450,7 @@ DT_MACHINE_START(DOVE_DT, "Marvell Dove (Flattened Device Tree)")
 	.map_io		= dove_map_io,
 	.init_early	= dove_init_early,
 	.init_irq	= orion_dt_init_irq,
-	.timer		= &dove_timer,
+	.init_time	= dove_timer_init,
 	.init_machine	= dove_dt_init,
 	.restart	= dove_restart,
 	.dt_compat	= dove_dt_board_compat,
diff --git a/arch/arm/mach-dove/common.h b/arch/arm/mach-dove/common.h
index 1a233404b735..ee59fba4c6d1 100644
--- a/arch/arm/mach-dove/common.h
+++ b/arch/arm/mach-dove/common.h
@@ -14,7 +14,7 @@
 struct mv643xx_eth_platform_data;
 struct mv_sata_platform_data;
 
-extern struct sys_timer dove_timer;
+extern void dove_timer_init(void);
 
 /*
  * Basic Dove init functions used early by machine-setup.
diff --git a/arch/arm/mach-dove/dove-db-setup.c b/arch/arm/mach-dove/dove-db-setup.c
index bc2867f11346..76e26f949c27 100644
--- a/arch/arm/mach-dove/dove-db-setup.c
+++ b/arch/arm/mach-dove/dove-db-setup.c
@@ -98,6 +98,6 @@ MACHINE_START(DOVE_DB, "Marvell DB-MV88AP510-BP Development Board")
 	.map_io		= dove_map_io,
 	.init_early	= dove_init_early,
 	.init_irq	= dove_init_irq,
-	.timer		= &dove_timer,
+	.init_time	= dove_timer_init,
 	.restart	= dove_restart,
 MACHINE_END
diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c
index d96dd94fb582..b13cc74114db 100644
--- a/arch/arm/mach-ebsa110/core.c
+++ b/arch/arm/mach-ebsa110/core.c
@@ -213,7 +213,7 @@ static struct irqaction ebsa110_timer_irq = {
 /*
  * Set up timer interrupt.
  */
-static void __init ebsa110_timer_init(void)
+void __init ebsa110_timer_init(void)
 {
 	arch_gettimeoffset = ebsa110_gettimeoffset;
 
@@ -227,10 +227,6 @@ static void __init ebsa110_timer_init(void)
 	setup_irq(IRQ_EBSA110_TIMER0, &ebsa110_timer_irq);
 }
 
-static struct sys_timer ebsa110_timer = {
-	.init		= ebsa110_timer_init,
-};
-
 static struct plat_serial8250_port serial_platform_data[] = {
 	{
 		.iobase		= 0x3f8,
@@ -329,6 +325,6 @@ MACHINE_START(EBSA110, "EBSA110")
 	.map_io		= ebsa110_map_io,
 	.init_early	= ebsa110_init_early,
 	.init_irq	= ebsa110_init_irq,
-	.timer		= &ebsa110_timer,
+	.init_time	= ebsa110_timer_init,
 	.restart	= ebsa110_restart,
 MACHINE_END
diff --git a/arch/arm/mach-ep93xx/adssphere.c b/arch/arm/mach-ep93xx/adssphere.c
index 41383bf03d4b..82d9c788535a 100644
--- a/arch/arm/mach-ep93xx/adssphere.c
+++ b/arch/arm/mach-ep93xx/adssphere.c
@@ -40,7 +40,7 @@ MACHINE_START(ADSSPHERE, "ADS Sphere board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= adssphere_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index 6f48b87a1ace..ee27b4b0ab07 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -156,7 +156,7 @@ static u32 ep93xx_gettimeoffset(void)
 	return (offset + (53 * offset / 3072)) * 1000;
 }
 
-static void __init ep93xx_timer_init(void)
+void __init ep93xx_timer_init(void)
 {
 	u32 tmode = EP93XX_TIMER123_CONTROL_MODE |
 		    EP93XX_TIMER123_CONTROL_CLKSEL;
@@ -176,10 +176,6 @@ static void __init ep93xx_timer_init(void)
 	setup_irq(IRQ_EP93XX_TIMER1, &ep93xx_timer_irq);
 }
 
-struct sys_timer ep93xx_timer = {
-	.init		= ep93xx_timer_init,
-};
-
 
 /*************************************************************************
  * EP93xx IRQ handling
diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c
index b8f53d57a299..ac260519c9e9 100644
--- a/arch/arm/mach-ep93xx/edb93xx.c
+++ b/arch/arm/mach-ep93xx/edb93xx.c
@@ -277,7 +277,7 @@ MACHINE_START(EDB9301, "Cirrus Logic EDB9301 Evaluation Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= edb93xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -291,7 +291,7 @@ MACHINE_START(EDB9302, "Cirrus Logic EDB9302 Evaluation Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= edb93xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -305,7 +305,7 @@ MACHINE_START(EDB9302A, "Cirrus Logic EDB9302A Evaluation Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= edb93xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -319,7 +319,7 @@ MACHINE_START(EDB9307, "Cirrus Logic EDB9307 Evaluation Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= edb93xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -333,7 +333,7 @@ MACHINE_START(EDB9307A, "Cirrus Logic EDB9307A Evaluation Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= edb93xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -347,7 +347,7 @@ MACHINE_START(EDB9312, "Cirrus Logic EDB9312 Evaluation Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= edb93xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -361,7 +361,7 @@ MACHINE_START(EDB9315, "Cirrus Logic EDB9315 Evaluation Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= edb93xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -375,7 +375,7 @@ MACHINE_START(EDB9315A, "Cirrus Logic EDB9315A Evaluation Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= edb93xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
diff --git a/arch/arm/mach-ep93xx/gesbc9312.c b/arch/arm/mach-ep93xx/gesbc9312.c
index 7fd705b5efe4..76c50f42bd71 100644
--- a/arch/arm/mach-ep93xx/gesbc9312.c
+++ b/arch/arm/mach-ep93xx/gesbc9312.c
@@ -40,7 +40,7 @@ MACHINE_START(GESBC9312, "Glomation GESBC-9312-sx")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= gesbc9312_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
diff --git a/arch/arm/mach-ep93xx/include/mach/platform.h b/arch/arm/mach-ep93xx/include/mach/platform.h
index 33a5122c6dc8..a14e1b37beff 100644
--- a/arch/arm/mach-ep93xx/include/mach/platform.h
+++ b/arch/arm/mach-ep93xx/include/mach/platform.h
@@ -53,7 +53,7 @@ int ep93xx_ide_acquire_gpio(struct platform_device *pdev);
 void ep93xx_ide_release_gpio(struct platform_device *pdev);
 
 void ep93xx_init_devices(void);
-extern struct sys_timer ep93xx_timer;
+extern void ep93xx_timer_init(void);
 
 void ep93xx_restart(char, const char *);
 void ep93xx_init_late(void);
diff --git a/arch/arm/mach-ep93xx/micro9.c b/arch/arm/mach-ep93xx/micro9.c
index 3d7cdab725b2..777cd2170f8a 100644
--- a/arch/arm/mach-ep93xx/micro9.c
+++ b/arch/arm/mach-ep93xx/micro9.c
@@ -83,7 +83,7 @@ MACHINE_START(MICRO9, "Contec Micro9-High")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= micro9_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -97,7 +97,7 @@ MACHINE_START(MICRO9M, "Contec Micro9-Mid")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= micro9_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -111,7 +111,7 @@ MACHINE_START(MICRO9L, "Contec Micro9-Lite")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= micro9_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
@@ -125,7 +125,7 @@ MACHINE_START(MICRO9S, "Contec Micro9-Slim")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= micro9_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
diff --git a/arch/arm/mach-ep93xx/simone.c b/arch/arm/mach-ep93xx/simone.c
index 0eb3f17a6fa2..6ff39ee2ad5d 100644
--- a/arch/arm/mach-ep93xx/simone.c
+++ b/arch/arm/mach-ep93xx/simone.c
@@ -84,7 +84,7 @@ MACHINE_START(SIM_ONE, "Simplemachines Sim.One Board")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= simone_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c
index 50043eef1cf2..6434c07dbf96 100644
--- a/arch/arm/mach-ep93xx/snappercl15.c
+++ b/arch/arm/mach-ep93xx/snappercl15.c
@@ -177,7 +177,7 @@ MACHINE_START(SNAPPER_CL15, "Bluewater Systems Snapper CL15")
 	.map_io		= ep93xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer 		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= snappercl15_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c
index 3c4c233391dc..e4fa0d3760a5 100644
--- a/arch/arm/mach-ep93xx/ts72xx.c
+++ b/arch/arm/mach-ep93xx/ts72xx.c
@@ -247,7 +247,7 @@ MACHINE_START(TS72XX, "Technologic Systems TS-72xx SBC")
 	.map_io		= ts72xx_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= ts72xx_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c
index ba92e25e3016..8610ba293991 100644
--- a/arch/arm/mach-ep93xx/vision_ep9307.c
+++ b/arch/arm/mach-ep93xx/vision_ep9307.c
@@ -365,7 +365,7 @@ MACHINE_START(VISION_EP9307, "Vision Engraving Systems EP9307")
 	.map_io		= vision_map_io,
 	.init_irq	= ep93xx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &ep93xx_timer,
+	.init_time	= ep93xx_timer_init,
 	.init_machine	= vision_init_machine,
 	.init_late	= ep93xx_init_late,
 	.restart	= ep93xx_restart,
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index 04744f9c120f..12f2f1117e99 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -12,7 +12,7 @@
 #ifndef __ARCH_ARM_MACH_EXYNOS_COMMON_H
 #define __ARCH_ARM_MACH_EXYNOS_COMMON_H
 
-extern struct sys_timer exynos4_timer;
+extern void exynos4_timer_init(void);
 
 struct map_desc;
 void exynos_init_io(struct map_desc *mach_desc, int size);
diff --git a/arch/arm/mach-exynos/mach-armlex4210.c b/arch/arm/mach-exynos/mach-armlex4210.c
index b938f9fc1dd1..2f18130d0d10 100644
--- a/arch/arm/mach-exynos/mach-armlex4210.c
+++ b/arch/arm/mach-exynos/mach-armlex4210.c
@@ -204,6 +204,6 @@ MACHINE_START(ARMLEX4210, "ARMLEX4210")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= armlex4210_machine_init,
 	.init_late	= exynos_init_late,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.restart	= exynos4_restart,
 MACHINE_END
diff --git a/arch/arm/mach-exynos/mach-exynos4-dt.c b/arch/arm/mach-exynos/mach-exynos4-dt.c
index 92757ff817ae..160030168b19 100644
--- a/arch/arm/mach-exynos/mach-exynos4-dt.c
+++ b/arch/arm/mach-exynos/mach-exynos4-dt.c
@@ -110,7 +110,7 @@ DT_MACHINE_START(EXYNOS4210_DT, "Samsung Exynos4 (Flattened Device Tree)")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= exynos4_dt_machine_init,
 	.init_late	= exynos_init_late,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.dt_compat	= exynos4_dt_compat,
 	.restart        = exynos4_restart,
 MACHINE_END
diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c
index e99d3d8f2bcf..4e074c67cc8b 100644
--- a/arch/arm/mach-exynos/mach-exynos5-dt.c
+++ b/arch/arm/mach-exynos/mach-exynos5-dt.c
@@ -182,7 +182,7 @@ DT_MACHINE_START(EXYNOS5_DT, "SAMSUNG EXYNOS5 (Flattened Device Tree)")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= exynos5_dt_machine_init,
 	.init_late	= exynos_init_late,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.dt_compat	= exynos5_dt_compat,
 	.restart        = exynos5_restart,
 	.reserve	= exynos5_reserve,
diff --git a/arch/arm/mach-exynos/mach-nuri.c b/arch/arm/mach-exynos/mach-nuri.c
index 27d4ed8b116e..dccd1d16b836 100644
--- a/arch/arm/mach-exynos/mach-nuri.c
+++ b/arch/arm/mach-exynos/mach-nuri.c
@@ -1382,7 +1382,7 @@ MACHINE_START(NURI, "NURI")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= nuri_machine_init,
 	.init_late	= exynos_init_late,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.reserve        = &nuri_reserve,
 	.restart	= exynos4_restart,
 MACHINE_END
diff --git a/arch/arm/mach-exynos/mach-origen.c b/arch/arm/mach-exynos/mach-origen.c
index 5e34b9c16196..4e1156324562 100644
--- a/arch/arm/mach-exynos/mach-origen.c
+++ b/arch/arm/mach-exynos/mach-origen.c
@@ -817,7 +817,7 @@ MACHINE_START(ORIGEN, "ORIGEN")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= origen_machine_init,
 	.init_late	= exynos_init_late,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.reserve	= &origen_reserve,
 	.restart	= exynos4_restart,
 MACHINE_END
diff --git a/arch/arm/mach-exynos/mach-smdk4x12.c b/arch/arm/mach-exynos/mach-smdk4x12.c
index ae6da40c2aa9..e9c9c2995f09 100644
--- a/arch/arm/mach-exynos/mach-smdk4x12.c
+++ b/arch/arm/mach-exynos/mach-smdk4x12.c
@@ -378,7 +378,7 @@ MACHINE_START(SMDK4212, "SMDK4212")
 	.map_io		= smdk4x12_map_io,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= smdk4x12_machine_init,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.restart	= exynos4_restart,
 	.reserve	= &smdk4x12_reserve,
 MACHINE_END
@@ -393,7 +393,7 @@ MACHINE_START(SMDK4412, "SMDK4412")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= smdk4x12_machine_init,
 	.init_late	= exynos_init_late,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.restart	= exynos4_restart,
 	.reserve	= &smdk4x12_reserve,
 MACHINE_END
diff --git a/arch/arm/mach-exynos/mach-smdkv310.c b/arch/arm/mach-exynos/mach-smdkv310.c
index 35548e3c097d..b228ab9bda0b 100644
--- a/arch/arm/mach-exynos/mach-smdkv310.c
+++ b/arch/arm/mach-exynos/mach-smdkv310.c
@@ -425,7 +425,7 @@ MACHINE_START(SMDKV310, "SMDKV310")
 	.map_io		= smdkv310_map_io,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= smdkv310_machine_init,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.reserve	= &smdkv310_reserve,
 	.restart	= exynos4_restart,
 MACHINE_END
@@ -439,7 +439,7 @@ MACHINE_START(SMDKC210, "SMDKC210")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= smdkv310_machine_init,
 	.init_late	= exynos_init_late,
-	.timer		= &exynos4_timer,
+	.init_time	= exynos4_timer_init,
 	.reserve	= &smdkv310_reserve,
 	.restart	= exynos4_restart,
 MACHINE_END
diff --git a/arch/arm/mach-exynos/mach-universal_c210.c b/arch/arm/mach-exynos/mach-universal_c210.c
index 9e3340f18950..866f29a9beee 100644
--- a/arch/arm/mach-exynos/mach-universal_c210.c
+++ b/arch/arm/mach-exynos/mach-universal_c210.c
@@ -1154,7 +1154,7 @@ MACHINE_START(UNIVERSAL_C210, "UNIVERSAL_C210")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= universal_machine_init,
 	.init_late	= exynos_init_late,
-	.timer		= &s5p_timer,
+	.init_time	= s5p_timer_init,
 	.reserve        = &universal_reserve,
 	.restart	= exynos4_restart,
 MACHINE_END
diff --git a/arch/arm/mach-exynos/mct.c b/arch/arm/mach-exynos/mct.c
index 57668eb68e75..4a89b54fcadb 100644
--- a/arch/arm/mach-exynos/mct.c
+++ b/arch/arm/mach-exynos/mct.c
@@ -478,7 +478,7 @@ static void __init exynos4_timer_resources(void)
 #endif /* CONFIG_LOCAL_TIMERS */
 }
 
-static void __init exynos_timer_init(void)
+void __init exynos4_timer_init(void)
 {
 	if (soc_is_exynos5440()) {
 		arch_timer_of_register();
@@ -494,7 +494,3 @@ static void __init exynos_timer_init(void)
 	exynos4_clocksource_init();
 	exynos4_clockevent_init();
 }
-
-struct sys_timer exynos4_timer = {
-	.init		= exynos_timer_init,
-};
diff --git a/arch/arm/mach-footbridge/cats-hw.c b/arch/arm/mach-footbridge/cats-hw.c
index 25b453601acc..6987a09ec219 100644
--- a/arch/arm/mach-footbridge/cats-hw.c
+++ b/arch/arm/mach-footbridge/cats-hw.c
@@ -90,6 +90,6 @@ MACHINE_START(CATS, "Chalice-CATS")
 	.fixup		= fixup_cats,
 	.map_io		= footbridge_map_io,
 	.init_irq	= footbridge_init_irq,
-	.timer		= &isa_timer,
+	.init_time	= isa_timer_init,
 	.restart	= footbridge_restart,
 MACHINE_END
diff --git a/arch/arm/mach-footbridge/common.h b/arch/arm/mach-footbridge/common.h
index c9767b892cb2..a846e50a07b8 100644
--- a/arch/arm/mach-footbridge/common.h
+++ b/arch/arm/mach-footbridge/common.h
@@ -1,6 +1,6 @@
 
-extern struct sys_timer footbridge_timer;
-extern struct sys_timer isa_timer;
+extern void footbridge_timer_init(void);
+extern void isa_timer_init(void);
 
 extern void isa_rtc_init(void);
 
diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c
index 3b54196447c7..9f14b1d9a0e7 100644
--- a/arch/arm/mach-footbridge/dc21285-timer.c
+++ b/arch/arm/mach-footbridge/dc21285-timer.c
@@ -93,7 +93,7 @@ static struct irqaction footbridge_timer_irq = {
 /*
  * Set up timer interrupt.
  */
-static void __init footbridge_timer_init(void)
+void __init footbridge_timer_init(void)
 {
 	struct clock_event_device *ce = &ckevt_dc21285;
 
@@ -108,7 +108,3 @@ static void __init footbridge_timer_init(void)
 
 	clockevents_register_device(ce);
 }
-
-struct sys_timer footbridge_timer = {
-	.init		= footbridge_timer_init,
-};
diff --git a/arch/arm/mach-footbridge/ebsa285.c b/arch/arm/mach-footbridge/ebsa285.c
index b09551ef89ca..b08243500e2e 100644
--- a/arch/arm/mach-footbridge/ebsa285.c
+++ b/arch/arm/mach-footbridge/ebsa285.c
@@ -101,7 +101,7 @@ MACHINE_START(EBSA285, "EBSA285")
 	.video_end	= 0x000bffff,
 	.map_io		= footbridge_map_io,
 	.init_irq	= footbridge_init_irq,
-	.timer		= &footbridge_timer,
+	.init_time	= footbridge_timer_init,
 	.restart	= footbridge_restart,
 MACHINE_END
 
diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c
index c40bb415f4b5..d9301dd56354 100644
--- a/arch/arm/mach-footbridge/isa-timer.c
+++ b/arch/arm/mach-footbridge/isa-timer.c
@@ -31,14 +31,10 @@ static struct irqaction pit_timer_irq = {
 	.dev_id		= &i8253_clockevent,
 };
 
-static void __init isa_timer_init(void)
+void __init isa_timer_init(void)
 {
 	clocksource_i8253_init();
 
 	setup_irq(i8253_clockevent.irq, &pit_timer_irq);
 	clockevent_i8253_init(false);
 }
-
-struct sys_timer isa_timer = {
-	.init		= isa_timer_init,
-};
diff --git a/arch/arm/mach-footbridge/netwinder-hw.c b/arch/arm/mach-footbridge/netwinder-hw.c
index d2d14339c6c4..90ea23fdce4c 100644
--- a/arch/arm/mach-footbridge/netwinder-hw.c
+++ b/arch/arm/mach-footbridge/netwinder-hw.c
@@ -766,6 +766,6 @@ MACHINE_START(NETWINDER, "Rebel-NetWinder")
 	.fixup		= fixup_netwinder,
 	.map_io		= footbridge_map_io,
 	.init_irq	= footbridge_init_irq,
-	.timer		= &isa_timer,
+	.init_time	= isa_timer_init,
 	.restart	= netwinder_restart,
 MACHINE_END
diff --git a/arch/arm/mach-footbridge/personal.c b/arch/arm/mach-footbridge/personal.c
index e1e9990fa957..7bdeabdcd4d8 100644
--- a/arch/arm/mach-footbridge/personal.c
+++ b/arch/arm/mach-footbridge/personal.c
@@ -18,7 +18,7 @@ MACHINE_START(PERSONAL_SERVER, "Compaq-PersonalServer")
 	.atag_offset	= 0x100,
 	.map_io		= footbridge_map_io,
 	.init_irq	= footbridge_init_irq,
-	.timer		= &footbridge_timer,
+	.init_time	= footbridge_timer_init,
 	.restart	= footbridge_restart,
 MACHINE_END
 
diff --git a/arch/arm/mach-gemini/board-nas4220b.c b/arch/arm/mach-gemini/board-nas4220b.c
index 5927d3c253aa..08bd650c42f3 100644
--- a/arch/arm/mach-gemini/board-nas4220b.c
+++ b/arch/arm/mach-gemini/board-nas4220b.c
@@ -31,10 +31,6 @@
 
 #include "common.h"
 
-static struct sys_timer ib4220b_timer = {
-	.init	= gemini_timer_init,
-};
-
 static struct gpio_led ib4220b_leds[] = {
 	{
 		.name			= "nas4220b:orange:hdd",
@@ -105,6 +101,6 @@ MACHINE_START(NAS4220B, "Raidsonic NAS IB-4220-B")
 	.atag_offset	= 0x100,
 	.map_io		= gemini_map_io,
 	.init_irq	= gemini_init_irq,
-	.timer		= &ib4220b_timer,
+	.init_time	= gemini_timer_init,
 	.init_machine	= ib4220b_init,
 MACHINE_END
diff --git a/arch/arm/mach-gemini/board-rut1xx.c b/arch/arm/mach-gemini/board-rut1xx.c
index cd7437a1cea0..fa0a36337f4d 100644
--- a/arch/arm/mach-gemini/board-rut1xx.c
+++ b/arch/arm/mach-gemini/board-rut1xx.c
@@ -71,10 +71,6 @@ static struct platform_device rut1xx_leds = {
 	},
 };
 
-static struct sys_timer rut1xx_timer = {
-	.init	= gemini_timer_init,
-};
-
 static void __init rut1xx_init(void)
 {
 	gemini_gpio_init();
@@ -89,6 +85,6 @@ MACHINE_START(RUT100, "Teltonika RUT100")
 	.atag_offset	= 0x100,
 	.map_io		= gemini_map_io,
 	.init_irq	= gemini_init_irq,
-	.timer		= &rut1xx_timer,
+	.init_time	= gemini_timer_init,
 	.init_machine	= rut1xx_init,
 MACHINE_END
diff --git a/arch/arm/mach-gemini/board-wbd111.c b/arch/arm/mach-gemini/board-wbd111.c
index a367880368f1..3321cd6cc1f3 100644
--- a/arch/arm/mach-gemini/board-wbd111.c
+++ b/arch/arm/mach-gemini/board-wbd111.c
@@ -80,10 +80,6 @@ static struct platform_device wbd111_leds_device = {
 	},
 };
 
-static struct sys_timer wbd111_timer = {
-	.init	= gemini_timer_init,
-};
-
 static struct mtd_partition wbd111_partitions[] = {
 	{
 		.name		= "RedBoot",
@@ -132,6 +128,6 @@ MACHINE_START(WBD111, "Wiliboard WBD-111")
 	.atag_offset	= 0x100,
 	.map_io		= gemini_map_io,
 	.init_irq	= gemini_init_irq,
-	.timer		= &wbd111_timer,
+	.init_time	= gemini_timer_init,
 	.init_machine	= wbd111_init,
 MACHINE_END
diff --git a/arch/arm/mach-gemini/board-wbd222.c b/arch/arm/mach-gemini/board-wbd222.c
index f382811c1319..fe33c825fdaf 100644
--- a/arch/arm/mach-gemini/board-wbd222.c
+++ b/arch/arm/mach-gemini/board-wbd222.c
@@ -80,10 +80,6 @@ static struct platform_device wbd222_leds_device = {
 	},
 };
 
-static struct sys_timer wbd222_timer = {
-	.init	= gemini_timer_init,
-};
-
 static struct mtd_partition wbd222_partitions[] = {
 	{
 		.name		= "RedBoot",
@@ -132,6 +128,6 @@ MACHINE_START(WBD222, "Wiliboard WBD-222")
 	.atag_offset	= 0x100,
 	.map_io		= gemini_map_io,
 	.init_irq	= gemini_init_irq,
-	.timer		= &wbd222_timer,
+	.init_time	= gemini_timer_init,
 	.init_machine	= wbd222_init,
 MACHINE_END
diff --git a/arch/arm/mach-h720x/common.h b/arch/arm/mach-h720x/common.h
index 79cfb9706360..7e738410ca93 100644
--- a/arch/arm/mach-h720x/common.h
+++ b/arch/arm/mach-h720x/common.h
@@ -19,12 +19,12 @@ extern void __init h720x_map_io(void);
 extern void h720x_restart(char, const char *);
 
 #ifdef CONFIG_ARCH_H7202
-extern struct sys_timer h7202_timer;
+extern void h7202_timer_init(void);
 extern void __init init_hw_h7202(void);
 extern void __init h7202_init_irq(void);
 extern void __init h7202_init_time(void);
 #endif
 
 #ifdef CONFIG_ARCH_H7201
-extern struct sys_timer h7201_timer;
+extern void h7201_timer_init(void);
 #endif
diff --git a/arch/arm/mach-h720x/cpu-h7201.c b/arch/arm/mach-h720x/cpu-h7201.c
index ba349cffcd3a..13c741215387 100644
--- a/arch/arm/mach-h720x/cpu-h7201.c
+++ b/arch/arm/mach-h720x/cpu-h7201.c
@@ -44,7 +44,7 @@ static struct irqaction h7201_timer_irq = {
 /*
  * Setup TIMER0 as system timer
  */
-void __init h7201_init_time(void)
+void __init h7201_timer_init(void)
 {
 	arch_gettimeoffset = h720x_gettimeoffset;
 
@@ -55,7 +55,3 @@ void __init h7201_init_time(void)
 
 	setup_irq(IRQ_TIMER0, &h7201_timer_irq);
 }
-
-struct sys_timer h7201_timer = {
-	.init		= h7201_init_time,
-};
diff --git a/arch/arm/mach-h720x/cpu-h7202.c b/arch/arm/mach-h720x/cpu-h7202.c
index fb9ca76c7313..e2ae7e898f9d 100644
--- a/arch/arm/mach-h720x/cpu-h7202.c
+++ b/arch/arm/mach-h720x/cpu-h7202.c
@@ -178,7 +178,7 @@ static struct irqaction h7202_timer_irq = {
 /*
  * Setup TIMER0 as system timer
  */
-void __init h7202_init_time(void)
+void __init h7202_timer_init(void)
 {
 	arch_gettimeoffset = h720x_gettimeoffset;
 
@@ -190,10 +190,6 @@ void __init h7202_init_time(void)
 	setup_irq(IRQ_TIMER0, &h7202_timer_irq);
 }
 
-struct sys_timer h7202_timer = {
-	.init		= h7202_init_time,
-};
-
 void __init h7202_init_irq (void)
 {
 	int 	irq;
diff --git a/arch/arm/mach-h720x/h7201-eval.c b/arch/arm/mach-h720x/h7201-eval.c
index 5fdb20c855e2..4fdeb686c0a9 100644
--- a/arch/arm/mach-h720x/h7201-eval.c
+++ b/arch/arm/mach-h720x/h7201-eval.c
@@ -32,7 +32,7 @@ MACHINE_START(H7201, "Hynix GMS30C7201")
 	.atag_offset	= 0x1000,
 	.map_io		= h720x_map_io,
 	.init_irq	= h720x_init_irq,
-	.timer		= &h7201_timer,
+	.init_time	= h7201_timer_init,
 	.dma_zone_size	= SZ_256M,
 	.restart	= h720x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-h720x/h7202-eval.c b/arch/arm/mach-h720x/h7202-eval.c
index 169673036c59..f68e967a2062 100644
--- a/arch/arm/mach-h720x/h7202-eval.c
+++ b/arch/arm/mach-h720x/h7202-eval.c
@@ -74,7 +74,7 @@ MACHINE_START(H7202, "Hynix HMS30C7202")
 	.atag_offset	= 0x100,
 	.map_io		= h720x_map_io,
 	.init_irq	= h7202_init_irq,
-	.timer		= &h7202_timer,
+	.init_time	= h7202_timer_init,
 	.init_machine	= init_eval_h7202,
 	.dma_zone_size	= SZ_256M,
 	.restart	= h720x_restart,
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index dc248167d206..f6ca285ee5c5 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -129,10 +129,6 @@ static void __init highbank_timer_init(void)
 	arch_timer_sched_clock_init();
 }
 
-static struct sys_timer highbank_timer = {
-	.init = highbank_timer_init,
-};
-
 static void highbank_power_off(void)
 {
 	hignbank_set_pwr_shutdown();
@@ -209,7 +205,7 @@ DT_MACHINE_START(HIGHBANK, "Highbank")
 	.smp		= smp_ops(highbank_smp_ops),
 	.map_io		= debug_ll_io_init,
 	.init_irq	= highbank_init_irq,
-	.timer		= &highbank_timer,
+	.init_time	= highbank_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= highbank_init,
 	.dt_compat	= highbank_match,
diff --git a/arch/arm/mach-imx/imx25-dt.c b/arch/arm/mach-imx/imx25-dt.c
index e17dfbc42192..03b65e5ea541 100644
--- a/arch/arm/mach-imx/imx25-dt.c
+++ b/arch/arm/mach-imx/imx25-dt.c
@@ -22,15 +22,6 @@ static void __init imx25_dt_init(void)
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
-static void __init imx25_timer_init(void)
-{
-	mx25_clocks_init_dt();
-}
-
-static struct sys_timer imx25_timer = {
-	.init = imx25_timer_init,
-};
-
 static const char * const imx25_dt_board_compat[] __initconst = {
 	"fsl,imx25",
 	NULL
@@ -41,7 +32,7 @@ DT_MACHINE_START(IMX25_DT, "Freescale i.MX25 (Device Tree Support)")
 	.init_early	= imx25_init_early,
 	.init_irq	= mx25_init_irq,
 	.handle_irq	= imx25_handle_irq,
-	.timer		= &imx25_timer,
+	.init_time	= imx25_timer_init,
 	.init_machine	= imx25_dt_init,
 	.dt_compat	= imx25_dt_board_compat,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/imx27-dt.c b/arch/arm/mach-imx/imx27-dt.c
index ebfae96543c4..c915a490a11c 100644
--- a/arch/arm/mach-imx/imx27-dt.c
+++ b/arch/arm/mach-imx/imx27-dt.c
@@ -39,26 +39,22 @@ static void __init imx27_dt_init(void)
 			     imx27_auxdata_lookup, NULL);
 }
 
-static void __init imx27_timer_init(void)
-{
-	mx27_clocks_init_dt();
-}
-
-static struct sys_timer imx27_timer = {
-	.init = imx27_timer_init,
-};
-
 static const char * const imx27_dt_board_compat[] __initconst = {
 	"fsl,imx27",
 	NULL
 };
 
+static void __init imx27_timer_init(void)
+{
+	mx27_clocks_init_dt();
+}
+
 DT_MACHINE_START(IMX27_DT, "Freescale i.MX27 (Device Tree Support)")
 	.map_io		= mx27_map_io,
 	.init_early	= imx27_init_early,
 	.init_irq	= mx27_init_irq,
 	.handle_irq	= imx27_handle_irq,
-	.timer		= &imx27_timer,
+	.init_time	= imx27_timer_init,
 	.init_machine	= imx27_dt_init,
 	.dt_compat	= imx27_dt_board_compat,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/imx31-dt.c b/arch/arm/mach-imx/imx31-dt.c
index af476de2570e..f9a690960097 100644
--- a/arch/arm/mach-imx/imx31-dt.c
+++ b/arch/arm/mach-imx/imx31-dt.c
@@ -38,15 +38,6 @@ static void __init imx31_dt_init(void)
 			     imx31_auxdata_lookup, NULL);
 }
 
-static void __init imx31_timer_init(void)
-{
-	mx31_clocks_init_dt();
-}
-
-static struct sys_timer imx31_timer = {
-	.init = imx31_timer_init,
-};
-
 static const char *imx31_dt_board_compat[] __initdata = {
 	"fsl,imx31",
 	NULL
@@ -57,7 +48,7 @@ DT_MACHINE_START(IMX31_DT, "Freescale i.MX31 (Device Tree Support)")
 	.init_early	= imx31_init_early,
 	.init_irq	= mx31_init_irq,
 	.handle_irq	= imx31_handle_irq,
-	.timer		= &imx31_timer,
+	.init_time	= mx31_clocks_init_dt,
 	.init_machine	= imx31_dt_init,
 	.dt_compat	= imx31_dt_board_compat,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/imx51-dt.c b/arch/arm/mach-imx/imx51-dt.c
index 5ffa40c673f8..e2926a8863f8 100644
--- a/arch/arm/mach-imx/imx51-dt.c
+++ b/arch/arm/mach-imx/imx51-dt.c
@@ -24,26 +24,22 @@ static void __init imx51_dt_init(void)
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
-static void __init imx51_timer_init(void)
-{
-	mx51_clocks_init_dt();
-}
-
-static struct sys_timer imx51_timer = {
-	.init = imx51_timer_init,
-};
-
 static const char *imx51_dt_board_compat[] __initdata = {
 	"fsl,imx51",
 	NULL
 };
 
+static void __init imx51_timer_init(void)
+{
+	mx51_clocks_init_dt();
+}
+
 DT_MACHINE_START(IMX51_DT, "Freescale i.MX51 (Device Tree Support)")
 	.map_io		= mx51_map_io,
 	.init_early	= imx51_init_early,
 	.init_irq	= mx51_init_irq,
 	.handle_irq	= imx51_handle_irq,
-	.timer		= &imx51_timer,
+	.init_time	= imx51_timer_init,
 	.init_machine	= imx51_dt_init,
 	.init_late	= imx51_init_late,
 	.dt_compat	= imx51_dt_board_compat,
diff --git a/arch/arm/mach-imx/mach-apf9328.c b/arch/arm/mach-imx/mach-apf9328.c
index 5c9bd2c66e6d..067580b2969b 100644
--- a/arch/arm/mach-imx/mach-apf9328.c
+++ b/arch/arm/mach-imx/mach-apf9328.c
@@ -137,17 +137,13 @@ static void __init apf9328_timer_init(void)
 	mx1_clocks_init(32768);
 }
 
-static struct sys_timer apf9328_timer = {
-	.init	= apf9328_timer_init,
-};
-
 MACHINE_START(APF9328, "Armadeus APF9328")
 	/* Maintainer: Gwenhael Goavec-Merou, ARMadeus Systems */
 	.map_io       = mx1_map_io,
 	.init_early   = imx1_init_early,
 	.init_irq     = mx1_init_irq,
 	.handle_irq   = imx1_handle_irq,
-	.timer        = &apf9328_timer,
+	.init_time	= apf9328_timer_init,
 	.init_machine = apf9328_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-armadillo5x0.c b/arch/arm/mach-imx/mach-armadillo5x0.c
index 59bd6b06a6b5..368a6e3f5926 100644
--- a/arch/arm/mach-imx/mach-armadillo5x0.c
+++ b/arch/arm/mach-imx/mach-armadillo5x0.c
@@ -557,10 +557,6 @@ static void __init armadillo5x0_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer armadillo5x0_timer = {
-	.init	= armadillo5x0_timer_init,
-};
-
 MACHINE_START(ARMADILLO5X0, "Armadillo-500")
 	/* Maintainer: Alberto Panizzo  */
 	.atag_offset = 0x100,
@@ -568,7 +564,7 @@ MACHINE_START(ARMADILLO5X0, "Armadillo-500")
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &armadillo5x0_timer,
+	.init_time	= armadillo5x0_timer_init,
 	.init_machine = armadillo5x0_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-bug.c b/arch/arm/mach-imx/mach-bug.c
index 3a39d5aec07a..2d00476f7d2c 100644
--- a/arch/arm/mach-imx/mach-bug.c
+++ b/arch/arm/mach-imx/mach-bug.c
@@ -53,16 +53,12 @@ static void __init bug_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer bug_timer = {
-	.init = bug_timer_init,
-};
-
 MACHINE_START(BUG, "BugLabs BUGBase")
 	.map_io = mx31_map_io,
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &bug_timer,
+	.init_time	= bug_timer_init,
 	.init_machine = bug_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c
index 12a370646b45..146559311bd2 100644
--- a/arch/arm/mach-imx/mach-cpuimx27.c
+++ b/arch/arm/mach-imx/mach-cpuimx27.c
@@ -309,17 +309,13 @@ static void __init eukrea_cpuimx27_timer_init(void)
 	mx27_clocks_init(26000000);
 }
 
-static struct sys_timer eukrea_cpuimx27_timer = {
-	.init = eukrea_cpuimx27_timer_init,
-};
-
 MACHINE_START(EUKREA_CPUIMX27, "EUKREA CPUIMX27")
 	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
-	.timer = &eukrea_cpuimx27_timer,
+	.init_time	= eukrea_cpuimx27_timer_init,
 	.init_machine = eukrea_cpuimx27_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c
index 5a31bf8c8f4c..771362d1fbee 100644
--- a/arch/arm/mach-imx/mach-cpuimx35.c
+++ b/arch/arm/mach-imx/mach-cpuimx35.c
@@ -193,10 +193,6 @@ static void __init eukrea_cpuimx35_timer_init(void)
 	mx35_clocks_init();
 }
 
-static struct sys_timer eukrea_cpuimx35_timer = {
-	.init	= eukrea_cpuimx35_timer_init,
-};
-
 MACHINE_START(EUKREA_CPUIMX35SD, "Eukrea CPUIMX35")
 	/* Maintainer: Eukrea Electromatique */
 	.atag_offset = 0x100,
@@ -204,7 +200,7 @@ MACHINE_START(EUKREA_CPUIMX35SD, "Eukrea CPUIMX35")
 	.init_early = imx35_init_early,
 	.init_irq = mx35_init_irq,
 	.handle_irq = imx35_handle_irq,
-	.timer = &eukrea_cpuimx35_timer,
+	.init_time	= eukrea_cpuimx35_timer_init,
 	.init_machine = eukrea_cpuimx35_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-cpuimx51sd.c b/arch/arm/mach-imx/mach-cpuimx51sd.c
index b727de029c8f..9b7393234f6f 100644
--- a/arch/arm/mach-imx/mach-cpuimx51sd.c
+++ b/arch/arm/mach-imx/mach-cpuimx51sd.c
@@ -355,10 +355,6 @@ static void __init eukrea_cpuimx51sd_timer_init(void)
 	mx51_clocks_init(32768, 24000000, 22579200, 0);
 }
 
-static struct sys_timer mxc_timer = {
-	.init	= eukrea_cpuimx51sd_timer_init,
-};
-
 MACHINE_START(EUKREA_CPUIMX51SD, "Eukrea CPUIMX51SD")
 	/* Maintainer: Eric Bénard <eric@eukrea.com> */
 	.atag_offset = 0x100,
@@ -366,7 +362,7 @@ MACHINE_START(EUKREA_CPUIMX51SD, "Eukrea CPUIMX51SD")
 	.init_early = imx51_init_early,
 	.init_irq = mx51_init_irq,
 	.handle_irq = imx51_handle_irq,
-	.timer = &mxc_timer,
+	.init_time	= eukrea_cpuimx51sd_timer_init,
 	.init_machine = eukrea_cpuimx51sd_init,
 	.init_late	= imx51_init_late,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
index 75027a5ad8b7..4bf454424249 100644
--- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
+++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
@@ -159,10 +159,6 @@ static void __init eukrea_cpuimx25_timer_init(void)
 	mx25_clocks_init();
 }
 
-static struct sys_timer eukrea_cpuimx25_timer = {
-	.init   = eukrea_cpuimx25_timer_init,
-};
-
 MACHINE_START(EUKREA_CPUIMX25SD, "Eukrea CPUIMX25")
 	/* Maintainer: Eukrea Electromatique */
 	.atag_offset = 0x100,
@@ -170,7 +166,7 @@ MACHINE_START(EUKREA_CPUIMX25SD, "Eukrea CPUIMX25")
 	.init_early = imx25_init_early,
 	.init_irq = mx25_init_irq,
 	.handle_irq = imx25_handle_irq,
-	.timer = &eukrea_cpuimx25_timer,
+	.init_time = eukrea_cpuimx25_timer_init,
 	.init_machine = eukrea_cpuimx25_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
index 318bd8df7fcc..29ac8ee651d2 100644
--- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
+++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
@@ -598,10 +598,6 @@ static void __init visstrim_m10_timer_init(void)
 	mx27_clocks_init((unsigned long)25000000);
 }
 
-static struct sys_timer visstrim_m10_timer = {
-	.init	= visstrim_m10_timer_init,
-};
-
 MACHINE_START(IMX27_VISSTRIM_M10, "Vista Silicon Visstrim_M10")
 	.atag_offset = 0x100,
 	.reserve = visstrim_reserve,
@@ -609,7 +605,7 @@ MACHINE_START(IMX27_VISSTRIM_M10, "Vista Silicon Visstrim_M10")
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
-	.timer = &visstrim_m10_timer,
+	.init_time	= visstrim_m10_timer_init,
 	.init_machine = visstrim_m10_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-imx27ipcam.c b/arch/arm/mach-imx/mach-imx27ipcam.c
index 53a860112938..1a851aea6832 100644
--- a/arch/arm/mach-imx/mach-imx27ipcam.c
+++ b/arch/arm/mach-imx/mach-imx27ipcam.c
@@ -65,10 +65,6 @@ static void __init mx27ipcam_timer_init(void)
 	mx27_clocks_init(25000000);
 }
 
-static struct sys_timer mx27ipcam_timer = {
-	.init	= mx27ipcam_timer_init,
-};
-
 MACHINE_START(IMX27IPCAM, "Freescale IMX27IPCAM")
 	/* maintainer: Freescale Semiconductor, Inc. */
 	.atag_offset = 0x100,
@@ -76,7 +72,7 @@ MACHINE_START(IMX27IPCAM, "Freescale IMX27IPCAM")
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
-	.timer = &mx27ipcam_timer,
+	.init_time	= mx27ipcam_timer_init,
 	.init_machine = mx27ipcam_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-imx27lite.c b/arch/arm/mach-imx/mach-imx27lite.c
index fc8dce931378..3da2e3e44ce9 100644
--- a/arch/arm/mach-imx/mach-imx27lite.c
+++ b/arch/arm/mach-imx/mach-imx27lite.c
@@ -72,17 +72,13 @@ static void __init mx27lite_timer_init(void)
 	mx27_clocks_init(26000000);
 }
 
-static struct sys_timer mx27lite_timer = {
-	.init	= mx27lite_timer_init,
-};
-
 MACHINE_START(IMX27LITE, "LogicPD i.MX27LITE")
 	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
-	.timer = &mx27lite_timer,
+	.init_time	= mx27lite_timer_init,
 	.init_machine = mx27lite_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-imx53.c b/arch/arm/mach-imx/mach-imx53.c
index 860284dea0e7..f579c616feed 100644
--- a/arch/arm/mach-imx/mach-imx53.c
+++ b/arch/arm/mach-imx/mach-imx53.c
@@ -44,26 +44,22 @@ static void __init imx53_dt_init(void)
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
-static void __init imx53_timer_init(void)
-{
-	mx53_clocks_init_dt();
-}
-
-static struct sys_timer imx53_timer = {
-	.init = imx53_timer_init,
-};
-
 static const char *imx53_dt_board_compat[] __initdata = {
 	"fsl,imx53",
 	NULL
 };
 
+static void __init imx53_timer_init(void)
+{
+	mx53_clocks_init_dt();
+}
+
 DT_MACHINE_START(IMX53_DT, "Freescale i.MX53 (Device Tree Support)")
 	.map_io		= mx53_map_io,
 	.init_early	= imx53_init_early,
 	.init_irq	= mx53_init_irq,
 	.handle_irq	= imx53_handle_irq,
-	.timer		= &imx53_timer,
+	.init_time	= imx53_timer_init,
 	.init_machine	= imx53_dt_init,
 	.init_late	= imx53_init_late,
 	.dt_compat	= imx53_dt_board_compat,
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 4eb1b3ac794c..cd277a0f5b16 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -241,10 +241,6 @@ static void __init imx6q_timer_init(void)
 	imx_print_silicon_rev("i.MX6Q", imx6q_revision());
 }
 
-static struct sys_timer imx6q_timer = {
-	.init = imx6q_timer_init,
-};
-
 static const char *imx6q_dt_compat[] __initdata = {
 	"fsl,imx6q",
 	NULL,
@@ -255,7 +251,7 @@ DT_MACHINE_START(IMX6Q, "Freescale i.MX6 Quad (Device Tree)")
 	.map_io		= imx6q_map_io,
 	.init_irq	= imx6q_init_irq,
 	.handle_irq	= imx6q_handle_irq,
-	.timer		= &imx6q_timer,
+	.init_time	= imx6q_timer_init,
 	.init_machine	= imx6q_init_machine,
 	.init_late      = imx6q_init_late,
 	.dt_compat	= imx6q_dt_compat,
diff --git a/arch/arm/mach-imx/mach-kzm_arm11_01.c b/arch/arm/mach-imx/mach-kzm_arm11_01.c
index 2e536ea53444..c7bc41d6b468 100644
--- a/arch/arm/mach-imx/mach-kzm_arm11_01.c
+++ b/arch/arm/mach-imx/mach-kzm_arm11_01.c
@@ -284,17 +284,13 @@ static void __init kzm_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer kzm_timer = {
-	.init = kzm_timer_init,
-};
-
 MACHINE_START(KZM_ARM11_01, "Kyoto Microcomputer Co., Ltd. KZM-ARM11-01")
 	.atag_offset = 0x100,
 	.map_io = kzm_map_io,
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &kzm_timer,
+	.init_time	= kzm_timer_init,
 	.init_machine = kzm_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx1ads.c b/arch/arm/mach-imx/mach-mx1ads.c
index 06b483783e68..9f883e4d6fc9 100644
--- a/arch/arm/mach-imx/mach-mx1ads.c
+++ b/arch/arm/mach-imx/mach-mx1ads.c
@@ -132,10 +132,6 @@ static void __init mx1ads_timer_init(void)
 	mx1_clocks_init(32000);
 }
 
-static struct sys_timer mx1ads_timer = {
-	.init	= mx1ads_timer_init,
-};
-
 MACHINE_START(MX1ADS, "Freescale MX1ADS")
 	/* Maintainer: Sascha Hauer, Pengutronix */
 	.atag_offset = 0x100,
@@ -143,7 +139,7 @@ MACHINE_START(MX1ADS, "Freescale MX1ADS")
 	.init_early = imx1_init_early,
 	.init_irq = mx1_init_irq,
 	.handle_irq = imx1_handle_irq,
-	.timer = &mx1ads_timer,
+	.init_time	= mx1ads_timer_init,
 	.init_machine = mx1ads_init,
 	.restart	= mxc_restart,
 MACHINE_END
@@ -154,7 +150,7 @@ MACHINE_START(MXLADS, "Freescale MXLADS")
 	.init_early = imx1_init_early,
 	.init_irq = mx1_init_irq,
 	.handle_irq = imx1_handle_irq,
-	.timer = &mx1ads_timer,
+	.init_time	= mx1ads_timer_init,
 	.init_machine = mx1ads_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c
index 6adb3136bb08..a06aa4dc37fc 100644
--- a/arch/arm/mach-imx/mach-mx21ads.c
+++ b/arch/arm/mach-imx/mach-mx21ads.c
@@ -318,10 +318,6 @@ static void __init mx21ads_timer_init(void)
 	mx21_clocks_init(32768, 26000000);
 }
 
-static struct sys_timer mx21ads_timer = {
-	.init	= mx21ads_timer_init,
-};
-
 MACHINE_START(MX21ADS, "Freescale i.MX21ADS")
 	/* maintainer: Freescale Semiconductor, Inc. */
 	.atag_offset = 0x100,
@@ -329,7 +325,7 @@ MACHINE_START(MX21ADS, "Freescale i.MX21ADS")
 	.init_early = imx21_init_early,
 	.init_irq = mx21_init_irq,
 	.handle_irq = imx21_handle_irq,
-	.timer = &mx21ads_timer,
+	.init_time	= mx21ads_timer_init,
 	.init_machine = mx21ads_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx25_3ds.c b/arch/arm/mach-imx/mach-mx25_3ds.c
index b1b03aa55bb8..8bcda688a006 100644
--- a/arch/arm/mach-imx/mach-mx25_3ds.c
+++ b/arch/arm/mach-imx/mach-mx25_3ds.c
@@ -257,10 +257,6 @@ static void __init mx25pdk_timer_init(void)
 	mx25_clocks_init();
 }
 
-static struct sys_timer mx25pdk_timer = {
-	.init   = mx25pdk_timer_init,
-};
-
 MACHINE_START(MX25_3DS, "Freescale MX25PDK (3DS)")
 	/* Maintainer: Freescale Semiconductor, Inc. */
 	.atag_offset = 0x100,
@@ -268,7 +264,7 @@ MACHINE_START(MX25_3DS, "Freescale MX25PDK (3DS)")
 	.init_early = imx25_init_early,
 	.init_irq = mx25_init_irq,
 	.handle_irq = imx25_handle_irq,
-	.timer = &mx25pdk_timer,
+	.init_time	= mx25pdk_timer_init,
 	.init_machine = mx25pdk_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx27_3ds.c b/arch/arm/mach-imx/mach-mx27_3ds.c
index d0e547fa925f..25b3e4c9bc0a 100644
--- a/arch/arm/mach-imx/mach-mx27_3ds.c
+++ b/arch/arm/mach-imx/mach-mx27_3ds.c
@@ -538,10 +538,6 @@ static void __init mx27pdk_timer_init(void)
 	mx27_clocks_init(26000000);
 }
 
-static struct sys_timer mx27pdk_timer = {
-	.init	= mx27pdk_timer_init,
-};
-
 MACHINE_START(MX27_3DS, "Freescale MX27PDK")
 	/* maintainer: Freescale Semiconductor, Inc. */
 	.atag_offset = 0x100,
@@ -549,7 +545,7 @@ MACHINE_START(MX27_3DS, "Freescale MX27PDK")
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
-	.timer = &mx27pdk_timer,
+	.init_time	= mx27pdk_timer_init,
 	.init_machine = mx27pdk_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx27ads.c b/arch/arm/mach-imx/mach-mx27ads.c
index 3d036f57f0e6..9821b824dcaf 100644
--- a/arch/arm/mach-imx/mach-mx27ads.c
+++ b/arch/arm/mach-imx/mach-mx27ads.c
@@ -323,10 +323,6 @@ static void __init mx27ads_timer_init(void)
 	mx27_clocks_init(fref);
 }
 
-static struct sys_timer mx27ads_timer = {
-	.init	= mx27ads_timer_init,
-};
-
 static struct map_desc mx27ads_io_desc[] __initdata = {
 	{
 		.virtual = PBC_BASE_ADDRESS,
@@ -349,7 +345,7 @@ MACHINE_START(MX27ADS, "Freescale i.MX27ADS")
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
-	.timer = &mx27ads_timer,
+	.init_time	= mx27ads_timer_init,
 	.init_machine = mx27ads_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx31_3ds.c b/arch/arm/mach-imx/mach-mx31_3ds.c
index bc301befdd06..1ed916175d41 100644
--- a/arch/arm/mach-imx/mach-mx31_3ds.c
+++ b/arch/arm/mach-imx/mach-mx31_3ds.c
@@ -762,10 +762,6 @@ static void __init mx31_3ds_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer mx31_3ds_timer = {
-	.init	= mx31_3ds_timer_init,
-};
-
 static void __init mx31_3ds_reserve(void)
 {
 	/* reserve MX31_3DS_CAMERA_BUF_SIZE bytes for mx3-camera */
@@ -780,7 +776,7 @@ MACHINE_START(MX31_3DS, "Freescale MX31PDK (3DS)")
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &mx31_3ds_timer,
+	.init_time	= mx31_3ds_timer_init,
 	.init_machine = mx31_3ds_init,
 	.reserve = mx31_3ds_reserve,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/mach-mx31ads.c b/arch/arm/mach-imx/mach-mx31ads.c
index 8b56f8883f32..daf8889125cc 100644
--- a/arch/arm/mach-imx/mach-mx31ads.c
+++ b/arch/arm/mach-imx/mach-mx31ads.c
@@ -576,10 +576,6 @@ static void __init mx31ads_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer mx31ads_timer = {
-	.init	= mx31ads_timer_init,
-};
-
 MACHINE_START(MX31ADS, "Freescale MX31ADS")
 	/* Maintainer: Freescale Semiconductor, Inc. */
 	.atag_offset = 0x100,
@@ -587,7 +583,7 @@ MACHINE_START(MX31ADS, "Freescale MX31ADS")
 	.init_early = imx31_init_early,
 	.init_irq = mx31ads_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &mx31ads_timer,
+	.init_time	= mx31ads_timer_init,
 	.init_machine = mx31ads_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx31lilly.c b/arch/arm/mach-imx/mach-mx31lilly.c
index 08b9965c8b36..832b1e2f964e 100644
--- a/arch/arm/mach-imx/mach-mx31lilly.c
+++ b/arch/arm/mach-imx/mach-mx31lilly.c
@@ -303,17 +303,13 @@ static void __init mx31lilly_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer mx31lilly_timer = {
-	.init	= mx31lilly_timer_init,
-};
-
 MACHINE_START(LILLY1131, "INCO startec LILLY-1131")
 	.atag_offset = 0x100,
 	.map_io = mx31_map_io,
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &mx31lilly_timer,
+	.init_time	= mx31lilly_timer_init,
 	.init_machine = mx31lilly_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx31lite.c b/arch/arm/mach-imx/mach-mx31lite.c
index bdcd92e59518..bea07299b61a 100644
--- a/arch/arm/mach-imx/mach-mx31lite.c
+++ b/arch/arm/mach-imx/mach-mx31lite.c
@@ -285,10 +285,6 @@ static void __init mx31lite_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer mx31lite_timer = {
-	.init	= mx31lite_timer_init,
-};
-
 MACHINE_START(MX31LITE, "LogicPD i.MX31 SOM")
 	/* Maintainer: Freescale Semiconductor, Inc. */
 	.atag_offset = 0x100,
@@ -296,7 +292,7 @@ MACHINE_START(MX31LITE, "LogicPD i.MX31 SOM")
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &mx31lite_timer,
+	.init_time	= mx31lite_timer_init,
 	.init_machine = mx31lite_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c
index 2517cfa9f26b..dae4cd7be040 100644
--- a/arch/arm/mach-imx/mach-mx31moboard.c
+++ b/arch/arm/mach-imx/mach-mx31moboard.c
@@ -596,10 +596,6 @@ static void __init mx31moboard_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer mx31moboard_timer = {
-	.init	= mx31moboard_timer_init,
-};
-
 static void __init mx31moboard_reserve(void)
 {
 	/* reserve 4 MiB for mx3-camera */
@@ -615,7 +611,7 @@ MACHINE_START(MX31MOBOARD, "EPFL Mobots mx31moboard")
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &mx31moboard_timer,
+	.init_time	= mx31moboard_timer_init,
 	.init_machine = mx31moboard_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx35_3ds.c b/arch/arm/mach-imx/mach-mx35_3ds.c
index 5277da45d60c..a42f4f07051f 100644
--- a/arch/arm/mach-imx/mach-mx35_3ds.c
+++ b/arch/arm/mach-imx/mach-mx35_3ds.c
@@ -602,10 +602,6 @@ static void __init mx35pdk_timer_init(void)
 	mx35_clocks_init();
 }
 
-static struct sys_timer mx35pdk_timer = {
-	.init	= mx35pdk_timer_init,
-};
-
 static void __init mx35_3ds_reserve(void)
 {
 	/* reserve MX35_3DS_CAMERA_BUF_SIZE bytes for mx3-camera */
@@ -620,7 +616,7 @@ MACHINE_START(MX35_3DS, "Freescale MX35PDK")
 	.init_early = imx35_init_early,
 	.init_irq = mx35_init_irq,
 	.handle_irq = imx35_handle_irq,
-	.timer = &mx35pdk_timer,
+	.init_time	= mx35pdk_timer_init,
 	.init_machine = mx35_3ds_init,
 	.reserve = mx35_3ds_reserve,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/mach-mx50_rdp.c b/arch/arm/mach-imx/mach-mx50_rdp.c
index 0c1f88a80bdc..8937902bda5f 100644
--- a/arch/arm/mach-imx/mach-mx50_rdp.c
+++ b/arch/arm/mach-imx/mach-mx50_rdp.c
@@ -210,16 +210,12 @@ static void __init mx50_rdp_timer_init(void)
 	mx50_clocks_init(32768, 24000000, 22579200);
 }
 
-static struct sys_timer mx50_rdp_timer = {
-	.init	= mx50_rdp_timer_init,
-};
-
 MACHINE_START(MX50_RDP, "Freescale MX50 Reference Design Platform")
 	.map_io = mx50_map_io,
 	.init_early = imx50_init_early,
 	.init_irq = mx50_init_irq,
 	.handle_irq = imx50_handle_irq,
-	.timer = &mx50_rdp_timer,
+	.init_time	= mx50_rdp_timer_init,
 	.init_machine = mx50_rdp_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-mx51_3ds.c b/arch/arm/mach-imx/mach-mx51_3ds.c
index abc25bd1107b..2d2365111532 100644
--- a/arch/arm/mach-imx/mach-mx51_3ds.c
+++ b/arch/arm/mach-imx/mach-mx51_3ds.c
@@ -160,10 +160,6 @@ static void __init mx51_3ds_timer_init(void)
 	mx51_clocks_init(32768, 24000000, 22579200, 0);
 }
 
-static struct sys_timer mx51_3ds_timer = {
-	.init = mx51_3ds_timer_init,
-};
-
 MACHINE_START(MX51_3DS, "Freescale MX51 3-Stack Board")
 	/* Maintainer: Freescale Semiconductor, Inc. */
 	.atag_offset = 0x100,
@@ -171,7 +167,7 @@ MACHINE_START(MX51_3DS, "Freescale MX51 3-Stack Board")
 	.init_early = imx51_init_early,
 	.init_irq = mx51_init_irq,
 	.handle_irq = imx51_handle_irq,
-	.timer = &mx51_3ds_timer,
+	.init_time	= mx51_3ds_timer_init,
 	.init_machine = mx51_3ds_init,
 	.init_late	= imx51_init_late,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/mach-mx51_babbage.c b/arch/arm/mach-imx/mach-mx51_babbage.c
index d9a84ca2199a..6c4d7feb4520 100644
--- a/arch/arm/mach-imx/mach-mx51_babbage.c
+++ b/arch/arm/mach-imx/mach-mx51_babbage.c
@@ -418,10 +418,6 @@ static void __init mx51_babbage_timer_init(void)
 	mx51_clocks_init(32768, 24000000, 22579200, 0);
 }
 
-static struct sys_timer mx51_babbage_timer = {
-	.init = mx51_babbage_timer_init,
-};
-
 MACHINE_START(MX51_BABBAGE, "Freescale MX51 Babbage Board")
 	/* Maintainer: Amit Kucheria <amit.kucheria@canonical.com> */
 	.atag_offset = 0x100,
@@ -429,7 +425,7 @@ MACHINE_START(MX51_BABBAGE, "Freescale MX51 Babbage Board")
 	.init_early = imx51_init_early,
 	.init_irq = mx51_init_irq,
 	.handle_irq = imx51_handle_irq,
-	.timer = &mx51_babbage_timer,
+	.init_time	= mx51_babbage_timer_init,
 	.init_machine = mx51_babbage_init,
 	.init_late	= imx51_init_late,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/mach-mxt_td60.c b/arch/arm/mach-imx/mach-mxt_td60.c
index f4a8c7e108e1..a27faaba98ec 100644
--- a/arch/arm/mach-imx/mach-mxt_td60.c
+++ b/arch/arm/mach-imx/mach-mxt_td60.c
@@ -261,10 +261,6 @@ static void __init mxt_td60_timer_init(void)
 	mx27_clocks_init(26000000);
 }
 
-static struct sys_timer mxt_td60_timer = {
-	.init	= mxt_td60_timer_init,
-};
-
 MACHINE_START(MXT_TD60, "Maxtrack i-MXT TD60")
 	/* maintainer: Maxtrack Industrial */
 	.atag_offset = 0x100,
@@ -272,7 +268,7 @@ MACHINE_START(MXT_TD60, "Maxtrack i-MXT TD60")
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
-	.timer = &mxt_td60_timer,
+	.init_time	= mxt_td60_timer_init,
 	.init_machine = mxt_td60_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-pca100.c b/arch/arm/mach-imx/mach-pca100.c
index eee369fa94a2..b8b15bb1ffdf 100644
--- a/arch/arm/mach-imx/mach-pca100.c
+++ b/arch/arm/mach-imx/mach-pca100.c
@@ -416,10 +416,6 @@ static void __init pca100_timer_init(void)
 	mx27_clocks_init(26000000);
 }
 
-static struct sys_timer pca100_timer = {
-	.init = pca100_timer_init,
-};
-
 MACHINE_START(PCA100, "phyCARD-i.MX27")
 	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
@@ -427,6 +423,6 @@ MACHINE_START(PCA100, "phyCARD-i.MX27")
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
 	.init_machine = pca100_init,
-	.timer = &pca100_timer,
+	.init_time	= pca100_timer_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-pcm037.c b/arch/arm/mach-imx/mach-pcm037.c
index 547fef133f65..bc0261e99d39 100644
--- a/arch/arm/mach-imx/mach-pcm037.c
+++ b/arch/arm/mach-imx/mach-pcm037.c
@@ -685,10 +685,6 @@ static void __init pcm037_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer pcm037_timer = {
-	.init	= pcm037_timer_init,
-};
-
 static void __init pcm037_reserve(void)
 {
 	/* reserve 4 MiB for mx3-camera */
@@ -709,7 +705,7 @@ MACHINE_START(PCM037, "Phytec Phycore pcm037")
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &pcm037_timer,
+	.init_time	= pcm037_timer_init,
 	.init_machine = pcm037_init,
 	.init_late = pcm037_init_late,
 	.restart	= mxc_restart,
diff --git a/arch/arm/mach-imx/mach-pcm038.c b/arch/arm/mach-imx/mach-pcm038.c
index 4aa0d0798605..e805ac273e9c 100644
--- a/arch/arm/mach-imx/mach-pcm038.c
+++ b/arch/arm/mach-imx/mach-pcm038.c
@@ -346,17 +346,13 @@ static void __init pcm038_timer_init(void)
 	mx27_clocks_init(26000000);
 }
 
-static struct sys_timer pcm038_timer = {
-	.init = pcm038_timer_init,
-};
-
 MACHINE_START(PCM038, "phyCORE-i.MX27")
 	.atag_offset = 0x100,
 	.map_io = mx27_map_io,
 	.init_early = imx27_init_early,
 	.init_irq = mx27_init_irq,
 	.handle_irq = imx27_handle_irq,
-	.timer = &pcm038_timer,
+	.init_time	= pcm038_timer_init,
 	.init_machine = pcm038_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-pcm043.c b/arch/arm/mach-imx/mach-pcm043.c
index 92445440221e..8ed533f0f8ca 100644
--- a/arch/arm/mach-imx/mach-pcm043.c
+++ b/arch/arm/mach-imx/mach-pcm043.c
@@ -394,10 +394,6 @@ static void __init pcm043_timer_init(void)
 	mx35_clocks_init();
 }
 
-static struct sys_timer pcm043_timer = {
-	.init	= pcm043_timer_init,
-};
-
 MACHINE_START(PCM043, "Phytec Phycore pcm043")
 	/* Maintainer: Pengutronix */
 	.atag_offset = 0x100,
@@ -405,7 +401,7 @@ MACHINE_START(PCM043, "Phytec Phycore pcm043")
 	.init_early = imx35_init_early,
 	.init_irq = mx35_init_irq,
 	.handle_irq = imx35_handle_irq,
-	.timer = &pcm043_timer,
+	.init_time = pcm043_timer_init,
 	.init_machine = pcm043_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-qong.c b/arch/arm/mach-imx/mach-qong.c
index 96d9a91f8a3b..22af27ed457e 100644
--- a/arch/arm/mach-imx/mach-qong.c
+++ b/arch/arm/mach-imx/mach-qong.c
@@ -260,10 +260,6 @@ static void __init qong_timer_init(void)
 	mx31_clocks_init(26000000);
 }
 
-static struct sys_timer qong_timer = {
-	.init	= qong_timer_init,
-};
-
 MACHINE_START(QONG, "Dave/DENX QongEVB-LITE")
 	/* Maintainer: DENX Software Engineering GmbH */
 	.atag_offset = 0x100,
@@ -271,7 +267,7 @@ MACHINE_START(QONG, "Dave/DENX QongEVB-LITE")
 	.init_early = imx31_init_early,
 	.init_irq = mx31_init_irq,
 	.handle_irq = imx31_handle_irq,
-	.timer = &qong_timer,
+	.init_time	= qong_timer_init,
 	.init_machine = qong_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-scb9328.c b/arch/arm/mach-imx/mach-scb9328.c
index fc970409dbaf..b0fa10dd79fe 100644
--- a/arch/arm/mach-imx/mach-scb9328.c
+++ b/arch/arm/mach-imx/mach-scb9328.c
@@ -131,10 +131,6 @@ static void __init scb9328_timer_init(void)
 	mx1_clocks_init(32000);
 }
 
-static struct sys_timer scb9328_timer = {
-	.init	= scb9328_timer_init,
-};
-
 MACHINE_START(SCB9328, "Synertronixx scb9328")
 	/* Sascha Hauer */
 	.atag_offset = 100,
@@ -142,7 +138,7 @@ MACHINE_START(SCB9328, "Synertronixx scb9328")
 	.init_early = imx1_init_early,
 	.init_irq = mx1_init_irq,
 	.handle_irq = imx1_handle_irq,
-	.timer = &scb9328_timer,
+	.init_time	= scb9328_timer_init,
 	.init_machine = scb9328_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-imx/mach-vpr200.c b/arch/arm/mach-imx/mach-vpr200.c
index 3aecf91e4289..0910761e8280 100644
--- a/arch/arm/mach-imx/mach-vpr200.c
+++ b/arch/arm/mach-imx/mach-vpr200.c
@@ -305,17 +305,13 @@ static void __init vpr200_timer_init(void)
 	mx35_clocks_init();
 }
 
-static struct sys_timer vpr200_timer = {
-	.init	= vpr200_timer_init,
-};
-
 MACHINE_START(VPR200, "VPR200")
 	/* Maintainer: Creative Product Design */
 	.map_io = mx35_map_io,
 	.init_early = imx35_init_early,
 	.init_irq = mx35_init_irq,
 	.handle_irq = imx35_handle_irq,
-	.timer = &vpr200_timer,
+	.init_time = vpr200_timer_init,
 	.init_machine = vpr200_board_init,
 	.restart	= mxc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c
index 11e2a4145807..78f1b3814f77 100644
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -425,7 +425,7 @@ void __init ap_init_early(void)
 
 #ifdef CONFIG_OF
 
-static void __init ap_init_timer_of(void)
+static void __init ap_of_timer_init(void)
 {
 	struct device_node *node;
 	const char *path;
@@ -464,10 +464,6 @@ static void __init ap_init_timer_of(void)
 	integrator_clockevent_init(rate, base, irq);
 }
 
-static struct sys_timer ap_of_timer = {
-	.init		= ap_init_timer_of,
-};
-
 static const struct of_device_id fpga_irq_of_match[] __initconst = {
 	{ .compatible = "arm,versatile-fpga-irq", .data = fpga_irq_of_init, },
 	{ /* Sentinel */ }
@@ -586,7 +582,7 @@ DT_MACHINE_START(INTEGRATOR_AP_DT, "ARM Integrator/AP (Device Tree)")
 	.init_early	= ap_init_early,
 	.init_irq	= ap_init_irq_of,
 	.handle_irq	= fpga_handle_irq,
-	.timer		= &ap_of_timer,
+	.init_time	= ap_of_timer_init,
 	.init_machine	= ap_init_of,
 	.restart	= integrator_restart,
 	.dt_compat      = ap_dt_board_compat,
@@ -638,7 +634,7 @@ static struct platform_device cfi_flash_device = {
 	.resource	= &cfi_flash_resource,
 };
 
-static void __init ap_init_timer(void)
+static void __init ap_timer_init(void)
 {
 	struct clk *clk;
 	unsigned long rate;
@@ -657,10 +653,6 @@ static void __init ap_init_timer(void)
 				IRQ_TIMERINT1);
 }
 
-static struct sys_timer ap_timer = {
-	.init		= ap_init_timer,
-};
-
 #define INTEGRATOR_SC_VALID_INT	0x003fffff
 
 static void __init ap_init_irq(void)
@@ -716,7 +708,7 @@ MACHINE_START(INTEGRATOR, "ARM-Integrator")
 	.init_early	= ap_init_early,
 	.init_irq	= ap_init_irq,
 	.handle_irq	= fpga_handle_irq,
-	.timer		= &ap_timer,
+	.init_time	= ap_timer_init,
 	.init_machine	= ap_init,
 	.restart	= integrator_restart,
 MACHINE_END
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 7322838c0447..4cef9a0ebbb9 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -251,7 +251,7 @@ static void __init intcp_init_early(void)
 
 #ifdef CONFIG_OF
 
-static void __init intcp_timer_init_of(void)
+static void __init cp_of_timer_init(void)
 {
 	struct device_node *node;
 	const char *path;
@@ -283,10 +283,6 @@ static void __init intcp_timer_init_of(void)
 	sp804_clockevents_init(base, irq, node->name);
 }
 
-static struct sys_timer cp_of_timer = {
-	.init		= intcp_timer_init_of,
-};
-
 static const struct of_device_id fpga_irq_of_match[] __initconst = {
 	{ .compatible = "arm,versatile-fpga-irq", .data = fpga_irq_of_init, },
 	{ /* Sentinel */ }
@@ -390,7 +386,7 @@ DT_MACHINE_START(INTEGRATOR_CP_DT, "ARM Integrator/CP (Device Tree)")
 	.init_early	= intcp_init_early,
 	.init_irq	= intcp_init_irq_of,
 	.handle_irq	= fpga_handle_irq,
-	.timer		= &cp_of_timer,
+	.init_time	= cp_of_timer_init,
 	.init_machine	= intcp_init_of,
 	.restart	= integrator_restart,
 	.dt_compat      = intcp_dt_board_compat,
@@ -512,7 +508,7 @@ static void __init intcp_init_irq(void)
 #define TIMER1_VA_BASE __io_address(INTEGRATOR_TIMER1_BASE)
 #define TIMER2_VA_BASE __io_address(INTEGRATOR_TIMER2_BASE)
 
-static void __init intcp_timer_init(void)
+static void __init cp_timer_init(void)
 {
 	writel(0, TIMER0_VA_BASE + TIMER_CTRL);
 	writel(0, TIMER1_VA_BASE + TIMER_CTRL);
@@ -522,10 +518,6 @@ static void __init intcp_timer_init(void)
 	sp804_clockevents_init(TIMER1_VA_BASE, IRQ_TIMERINT1, "timer1");
 }
 
-static struct sys_timer cp_timer = {
-	.init		= intcp_timer_init,
-};
-
 #define INTEGRATOR_CP_MMC_IRQS	{ IRQ_CP_MMCIINT0, IRQ_CP_MMCIINT1 }
 #define INTEGRATOR_CP_AACI_IRQS	{ IRQ_CP_AACIINT }
 
@@ -565,7 +557,7 @@ MACHINE_START(CINTEGRATOR, "ARM-IntegratorCP")
 	.init_early	= intcp_init_early,
 	.init_irq	= intcp_init_irq,
 	.handle_irq	= fpga_handle_irq,
-	.timer		= &cp_timer,
+	.init_time	= cp_timer_init,
 	.init_machine	= intcp_init,
 	.restart	= integrator_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop13xx/iq81340mc.c b/arch/arm/mach-iop13xx/iq81340mc.c
index e3f3e7daa79e..02a8228ac2d3 100644
--- a/arch/arm/mach-iop13xx/iq81340mc.c
+++ b/arch/arm/mach-iop13xx/iq81340mc.c
@@ -84,17 +84,13 @@ static void __init iq81340mc_timer_init(void)
 	iop_init_time(bus_freq);
 }
 
-static struct sys_timer iq81340mc_timer = {
-       .init       = iq81340mc_timer_init,
-};
-
 MACHINE_START(IQ81340MC, "Intel IQ81340MC")
 	/* Maintainer: Dan Williams <dan.j.williams@intel.com> */
 	.atag_offset    = 0x100,
 	.init_early     = iop13xx_init_early,
 	.map_io         = iop13xx_map_io,
 	.init_irq       = iop13xx_init_irq,
-	.timer          = &iq81340mc_timer,
+	.init_time	= iq81340mc_timer_init,
 	.init_machine   = iq81340mc_init,
 	.restart	= iop13xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop13xx/iq81340sc.c b/arch/arm/mach-iop13xx/iq81340sc.c
index e94744111634..1b80f10722b3 100644
--- a/arch/arm/mach-iop13xx/iq81340sc.c
+++ b/arch/arm/mach-iop13xx/iq81340sc.c
@@ -86,17 +86,13 @@ static void __init iq81340sc_timer_init(void)
 	iop_init_time(bus_freq);
 }
 
-static struct sys_timer iq81340sc_timer = {
-       .init       = iq81340sc_timer_init,
-};
-
 MACHINE_START(IQ81340SC, "Intel IQ81340SC")
 	/* Maintainer: Dan Williams <dan.j.williams@intel.com> */
 	.atag_offset    = 0x100,
 	.init_early     = iop13xx_init_early,
 	.map_io         = iop13xx_map_io,
 	.init_irq       = iop13xx_init_irq,
-	.timer          = &iq81340sc_timer,
+	.init_time	= iq81340sc_timer_init,
 	.init_machine   = iq81340sc_init,
 	.restart	= iop13xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop32x/em7210.c b/arch/arm/mach-iop32x/em7210.c
index 9f369f09c29d..31fbb6c61b25 100644
--- a/arch/arm/mach-iop32x/em7210.c
+++ b/arch/arm/mach-iop32x/em7210.c
@@ -40,10 +40,6 @@ static void __init em7210_timer_init(void)
 	iop_init_time(200000000);
 }
 
-static struct sys_timer em7210_timer = {
-	.init		= em7210_timer_init,
-};
-
 /*
  * EM7210 RTC
  */
@@ -205,7 +201,7 @@ MACHINE_START(EM7210, "Lanner EM7210")
 	.atag_offset	= 0x100,
 	.map_io		= em7210_map_io,
 	.init_irq	= iop32x_init_irq,
-	.timer		= &em7210_timer,
+	.init_time	= em7210_timer_init,
 	.init_machine	= em7210_init_machine,
 	.restart	= iop3xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop32x/glantank.c b/arch/arm/mach-iop32x/glantank.c
index 02e20c3912ba..ac304705fe68 100644
--- a/arch/arm/mach-iop32x/glantank.c
+++ b/arch/arm/mach-iop32x/glantank.c
@@ -44,10 +44,6 @@ static void __init glantank_timer_init(void)
 	iop_init_time(200000000);
 }
 
-static struct sys_timer glantank_timer = {
-	.init		= glantank_timer_init,
-};
-
 
 /*
  * GLAN Tank I/O.
@@ -209,7 +205,7 @@ MACHINE_START(GLANTANK, "GLAN Tank")
 	.atag_offset	= 0x100,
 	.map_io		= glantank_map_io,
 	.init_irq	= iop32x_init_irq,
-	.timer		= &glantank_timer,
+	.init_time	= glantank_timer_init,
 	.init_machine	= glantank_init_machine,
 	.restart	= iop3xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop32x/iq31244.c b/arch/arm/mach-iop32x/iq31244.c
index ddd1c7ecfe57..f2cd2966212d 100644
--- a/arch/arm/mach-iop32x/iq31244.c
+++ b/arch/arm/mach-iop32x/iq31244.c
@@ -75,10 +75,6 @@ static void __init iq31244_timer_init(void)
 	}
 }
 
-static struct sys_timer iq31244_timer = {
-	.init		= iq31244_timer_init,
-};
-
 
 /*
  * IQ31244 I/O.
@@ -314,7 +310,7 @@ MACHINE_START(IQ31244, "Intel IQ31244")
 	.atag_offset	= 0x100,
 	.map_io		= iq31244_map_io,
 	.init_irq	= iop32x_init_irq,
-	.timer		= &iq31244_timer,
+	.init_time	= iq31244_timer_init,
 	.init_machine	= iq31244_init_machine,
 	.restart	= iop3xx_restart,
 MACHINE_END
@@ -329,7 +325,7 @@ MACHINE_START(EP80219, "Intel EP80219")
 	.atag_offset	= 0x100,
 	.map_io		= iq31244_map_io,
 	.init_irq	= iop32x_init_irq,
-	.timer		= &iq31244_timer,
+	.init_time	= iq31244_timer_init,
 	.init_machine	= iq31244_init_machine,
 	.restart	= iop3xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop32x/iq80321.c b/arch/arm/mach-iop32x/iq80321.c
index bf155e6a3b45..015435de90dd 100644
--- a/arch/arm/mach-iop32x/iq80321.c
+++ b/arch/arm/mach-iop32x/iq80321.c
@@ -43,10 +43,6 @@ static void __init iq80321_timer_init(void)
 	iop_init_time(200000000);
 }
 
-static struct sys_timer iq80321_timer = {
-	.init		= iq80321_timer_init,
-};
-
 
 /*
  * IQ80321 I/O.
@@ -188,7 +184,7 @@ MACHINE_START(IQ80321, "Intel IQ80321")
 	.atag_offset	= 0x100,
 	.map_io		= iq80321_map_io,
 	.init_irq	= iop32x_init_irq,
-	.timer		= &iq80321_timer,
+	.init_time	= iq80321_timer_init,
 	.init_machine	= iq80321_init_machine,
 	.restart	= iop3xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c
index 5a7ae91e8849..ea0984a7449e 100644
--- a/arch/arm/mach-iop32x/n2100.c
+++ b/arch/arm/mach-iop32x/n2100.c
@@ -50,10 +50,6 @@ static void __init n2100_timer_init(void)
 	iop_init_time(198000000);
 }
 
-static struct sys_timer n2100_timer = {
-	.init		= n2100_timer_init,
-};
-
 
 /*
  * N2100 I/O.
@@ -337,7 +333,7 @@ MACHINE_START(N2100, "Thecus N2100")
 	.atag_offset	= 0x100,
 	.map_io		= n2100_map_io,
 	.init_irq	= iop32x_init_irq,
-	.timer		= &n2100_timer,
+	.init_time	= n2100_timer_init,
 	.init_machine	= n2100_init_machine,
 	.restart	= n2100_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop33x/iq80331.c b/arch/arm/mach-iop33x/iq80331.c
index e74a7debe793..c43304a10fa7 100644
--- a/arch/arm/mach-iop33x/iq80331.c
+++ b/arch/arm/mach-iop33x/iq80331.c
@@ -45,10 +45,6 @@ static void __init iq80331_timer_init(void)
 		iop_init_time(266000000);
 }
 
-static struct sys_timer iq80331_timer = {
-	.init		= iq80331_timer_init,
-};
-
 
 /*
  * IQ80331 PCI.
@@ -143,7 +139,7 @@ MACHINE_START(IQ80331, "Intel IQ80331")
 	.atag_offset	= 0x100,
 	.map_io		= iop3xx_map_io,
 	.init_irq	= iop33x_init_irq,
-	.timer		= &iq80331_timer,
+	.init_time	= iq80331_timer_init,
 	.init_machine	= iq80331_init_machine,
 	.restart	= iop3xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-iop33x/iq80332.c b/arch/arm/mach-iop33x/iq80332.c
index e2f5beece6e8..8192987e78e5 100644
--- a/arch/arm/mach-iop33x/iq80332.c
+++ b/arch/arm/mach-iop33x/iq80332.c
@@ -45,10 +45,6 @@ static void __init iq80332_timer_init(void)
 		iop_init_time(266000000);
 }
 
-static struct sys_timer iq80332_timer = {
-	.init		= iq80332_timer_init,
-};
-
 
 /*
  * IQ80332 PCI.
@@ -143,7 +139,7 @@ MACHINE_START(IQ80332, "Intel IQ80332")
 	.atag_offset	= 0x100,
 	.map_io		= iop3xx_map_io,
 	.init_irq	= iop33x_init_irq,
-	.timer		= &iq80332_timer,
+	.init_time	= iq80332_timer_init,
 	.init_machine	= iq80332_init_machine,
 	.restart	= iop3xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-ixp4xx/avila-setup.c b/arch/arm/mach-ixp4xx/avila-setup.c
index 90e42e9982cb..6beec150c060 100644
--- a/arch/arm/mach-ixp4xx/avila-setup.c
+++ b/arch/arm/mach-ixp4xx/avila-setup.c
@@ -167,7 +167,7 @@ MACHINE_START(AVILA, "Gateworks Avila Network Platform")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= avila_init,
 #if defined(CONFIG_PCI)
@@ -187,7 +187,7 @@ MACHINE_START(LOFT, "Giant Shoulder Inc Loft board")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= avila_init,
 #if defined(CONFIG_PCI)
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 8c0c0e2d0727..f6ac695ceb60 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -307,10 +307,6 @@ void __init ixp4xx_timer_init(void)
 	ixp4xx_clockevent_init();
 }
 
-struct sys_timer ixp4xx_timer = {
-	.init		= ixp4xx_timer_init,
-};
-
 static struct pxa2xx_udc_mach_info ixp4xx_udc_info;
 
 void __init ixp4xx_set_udc_info(struct pxa2xx_udc_mach_info *info)
diff --git a/arch/arm/mach-ixp4xx/coyote-setup.c b/arch/arm/mach-ixp4xx/coyote-setup.c
index 1b83110028d6..820cae8608fc 100644
--- a/arch/arm/mach-ixp4xx/coyote-setup.c
+++ b/arch/arm/mach-ixp4xx/coyote-setup.c
@@ -112,7 +112,7 @@ MACHINE_START(ADI_COYOTE, "ADI Engineering Coyote")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= coyote_init,
 #if defined(CONFIG_PCI)
@@ -132,7 +132,7 @@ MACHINE_START(IXDPG425, "Intel IXDPG425")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= coyote_init,
 	.restart	= ixp4xx_restart,
diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c
index 97a0af8f1955..5d413f8c5700 100644
--- a/arch/arm/mach-ixp4xx/dsmg600-setup.c
+++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c
@@ -226,10 +226,6 @@ static void __init dsmg600_timer_init(void)
     ixp4xx_timer_init();
 }
 
-static struct sys_timer dsmg600_timer = {
-    .init   = dsmg600_timer_init,
-};
-
 static void __init dsmg600_init(void)
 {
 	ixp4xx_sys_init();
@@ -282,7 +278,7 @@ MACHINE_START(DSMG600, "D-Link DSM-G600 RevA")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer          = &dsmg600_timer,
+	.init_time	= dsmg600_timer_init,
 	.init_machine	= dsmg600_init,
 #if defined(CONFIG_PCI)
 	.dma_zone_size	= SZ_64M,
diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c
index 9175a25a7511..429966b756ed 100644
--- a/arch/arm/mach-ixp4xx/fsg-setup.c
+++ b/arch/arm/mach-ixp4xx/fsg-setup.c
@@ -272,7 +272,7 @@ MACHINE_START(FSG, "Freecom FSG-3")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= fsg_init,
 #if defined(CONFIG_PCI)
diff --git a/arch/arm/mach-ixp4xx/gateway7001-setup.c b/arch/arm/mach-ixp4xx/gateway7001-setup.c
index 033c71758953..3d24b3fcee87 100644
--- a/arch/arm/mach-ixp4xx/gateway7001-setup.c
+++ b/arch/arm/mach-ixp4xx/gateway7001-setup.c
@@ -99,7 +99,7 @@ MACHINE_START(GATEWAY7001, "Gateway 7001 AP")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= gateway7001_init,
 #if defined(CONFIG_PCI)
diff --git a/arch/arm/mach-ixp4xx/goramo_mlr.c b/arch/arm/mach-ixp4xx/goramo_mlr.c
index 53b8348dfcc2..e54ff491c105 100644
--- a/arch/arm/mach-ixp4xx/goramo_mlr.c
+++ b/arch/arm/mach-ixp4xx/goramo_mlr.c
@@ -498,7 +498,7 @@ MACHINE_START(GORAMO_MLR, "MultiLink")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= gmlr_init,
 #if defined(CONFIG_PCI)
diff --git a/arch/arm/mach-ixp4xx/gtwx5715-setup.c b/arch/arm/mach-ixp4xx/gtwx5715-setup.c
index 18ebc6be7969..16a12994fb53 100644
--- a/arch/arm/mach-ixp4xx/gtwx5715-setup.c
+++ b/arch/arm/mach-ixp4xx/gtwx5715-setup.c
@@ -167,7 +167,7 @@ MACHINE_START(GTWX5715, "Gemtek GTWX5715 (Linksys WRV54G)")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= gtwx5715_init,
 #if defined(CONFIG_PCI)
diff --git a/arch/arm/mach-ixp4xx/include/mach/platform.h b/arch/arm/mach-ixp4xx/include/mach/platform.h
index 5bce94aacca9..db5afb69c123 100644
--- a/arch/arm/mach-ixp4xx/include/mach/platform.h
+++ b/arch/arm/mach-ixp4xx/include/mach/platform.h
@@ -89,8 +89,6 @@ struct ixp4xx_pata_data {
 	void __iomem	*cs1;
 };
 
-struct sys_timer;
-
 #define IXP4XX_ETH_NPEA		0x00
 #define IXP4XX_ETH_NPEB		0x10
 #define IXP4XX_ETH_NPEC		0x20
@@ -125,7 +123,6 @@ extern void ixp4xx_init_early(void);
 extern void ixp4xx_init_irq(void);
 extern void ixp4xx_sys_init(void);
 extern void ixp4xx_timer_init(void);
-extern struct sys_timer ixp4xx_timer;
 extern void ixp4xx_restart(char, const char *);
 extern void ixp4xx_pci_preinit(void);
 struct pci_sys_data;
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index 108a9d3f382d..22d688b7d513 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -252,7 +252,7 @@ MACHINE_START(IXDP425, "Intel IXDP425 Development Platform")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= ixdp425_init,
 #if defined(CONFIG_PCI)
@@ -268,7 +268,7 @@ MACHINE_START(IXDP465, "Intel IXDP465 Development Platform")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= ixdp425_init,
 #if defined(CONFIG_PCI)
@@ -283,7 +283,7 @@ MACHINE_START(IXCDP1100, "Intel IXCDP1100 Development Platform")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= ixdp425_init,
 #if defined(CONFIG_PCI)
@@ -298,7 +298,7 @@ MACHINE_START(KIXRP435, "Intel KIXRP435 Reference Platform")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= ixdp425_init,
 #if defined(CONFIG_PCI)
diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c
index 33cb0955b6bf..ed667ce9f576 100644
--- a/arch/arm/mach-ixp4xx/nas100d-setup.c
+++ b/arch/arm/mach-ixp4xx/nas100d-setup.c
@@ -317,7 +317,7 @@ MACHINE_START(NAS100D, "Iomega NAS 100d")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer          = &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.init_machine	= nas100d_init,
 #if defined(CONFIG_PCI)
 	.dma_zone_size	= SZ_64M,
diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c
index e2903faaebb3..7e55236c26ea 100644
--- a/arch/arm/mach-ixp4xx/nslu2-setup.c
+++ b/arch/arm/mach-ixp4xx/nslu2-setup.c
@@ -232,10 +232,6 @@ static void __init nslu2_timer_init(void)
     ixp4xx_timer_init();
 }
 
-static struct sys_timer nslu2_timer = {
-    .init   = nslu2_timer_init,
-};
-
 static void __init nslu2_init(void)
 {
 	uint8_t __iomem *f;
@@ -303,7 +299,7 @@ MACHINE_START(NSLU2, "Linksys NSLU2")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer          = &nslu2_timer,
+	.init_time	= nslu2_timer_init,
 	.init_machine	= nslu2_init,
 #if defined(CONFIG_PCI)
 	.dma_zone_size	= SZ_64M,
diff --git a/arch/arm/mach-ixp4xx/omixp-setup.c b/arch/arm/mach-ixp4xx/omixp-setup.c
index 158ddb79821d..46a89f5e8269 100644
--- a/arch/arm/mach-ixp4xx/omixp-setup.c
+++ b/arch/arm/mach-ixp4xx/omixp-setup.c
@@ -245,7 +245,7 @@ MACHINE_START(DEVIXP, "Omicron DEVIXP")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer          = &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.init_machine	= omixp_init,
 	.restart	= ixp4xx_restart,
 MACHINE_END
@@ -257,7 +257,7 @@ MACHINE_START(MICCPT, "Omicron MICCPT")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer          = &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.init_machine	= omixp_init,
 #if defined(CONFIG_PCI)
 	.dma_zone_size	= SZ_64M,
@@ -272,7 +272,7 @@ MACHINE_START(MIC256, "Omicron MIC256")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer          = &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.init_machine	= omixp_init,
 	.restart	= ixp4xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-ixp4xx/vulcan-setup.c b/arch/arm/mach-ixp4xx/vulcan-setup.c
index 2798f435aaf4..d42730a1d4ab 100644
--- a/arch/arm/mach-ixp4xx/vulcan-setup.c
+++ b/arch/arm/mach-ixp4xx/vulcan-setup.c
@@ -239,7 +239,7 @@ MACHINE_START(ARCOM_VULCAN, "Arcom/Eurotech Vulcan")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= vulcan_init,
 #if defined(CONFIG_PCI)
diff --git a/arch/arm/mach-ixp4xx/wg302v2-setup.c b/arch/arm/mach-ixp4xx/wg302v2-setup.c
index a785175b115b..8f9ea2f3a9a5 100644
--- a/arch/arm/mach-ixp4xx/wg302v2-setup.c
+++ b/arch/arm/mach-ixp4xx/wg302v2-setup.c
@@ -100,7 +100,7 @@ MACHINE_START(WG302V2, "Netgear WG302 v2 / WAG302 v2")
 	.map_io		= ixp4xx_map_io,
 	.init_early	= ixp4xx_init_early,
 	.init_irq	= ixp4xx_init_irq,
-	.timer		= &ixp4xx_timer,
+	.init_time	= ixp4xx_timer_init,
 	.atag_offset	= 0x100,
 	.init_machine	= wg302v2_init,
 #if defined(CONFIG_PCI)
diff --git a/arch/arm/mach-kirkwood/board-dt.c b/arch/arm/mach-kirkwood/board-dt.c
index ff4150a2ad05..d6f57fd6f788 100644
--- a/arch/arm/mach-kirkwood/board-dt.c
+++ b/arch/arm/mach-kirkwood/board-dt.c
@@ -179,7 +179,7 @@ DT_MACHINE_START(KIRKWOOD_DT, "Marvell Kirkwood (Flattened Device Tree)")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= orion_dt_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.init_machine	= kirkwood_dt_init,
 	.restart	= kirkwood_restart,
 	.dt_compat	= kirkwood_dt_board_compat,
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index bac21a554c91..b5ad4dff6b12 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -530,7 +530,7 @@ static int __init kirkwood_find_tclk(void)
 	return 166666667;
 }
 
-static void __init kirkwood_timer_init(void)
+void __init kirkwood_timer_init(void)
 {
 	kirkwood_tclk = kirkwood_find_tclk();
 
@@ -538,10 +538,6 @@ static void __init kirkwood_timer_init(void)
 			IRQ_KIRKWOOD_BRIDGE, kirkwood_tclk);
 }
 
-struct sys_timer kirkwood_timer = {
-	.init = kirkwood_timer_init,
-};
-
 /*****************************************************************************
  * Audio
  ****************************************************************************/
diff --git a/arch/arm/mach-kirkwood/common.h b/arch/arm/mach-kirkwood/common.h
index 5ffa57f08c80..283ab611e8da 100644
--- a/arch/arm/mach-kirkwood/common.h
+++ b/arch/arm/mach-kirkwood/common.h
@@ -156,7 +156,7 @@ void kirkwood_xor1_init(void);
 void kirkwood_crypto_init(void);
 
 extern int kirkwood_tclk;
-extern struct sys_timer kirkwood_timer;
+extern void kirkwood_timer_init(void);
 
 #define ARRAY_AND_SIZE(x)	(x), ARRAY_SIZE(x)
 
diff --git a/arch/arm/mach-kirkwood/d2net_v2-setup.c b/arch/arm/mach-kirkwood/d2net_v2-setup.c
index 2c1a453df201..453418063c1e 100644
--- a/arch/arm/mach-kirkwood/d2net_v2-setup.c
+++ b/arch/arm/mach-kirkwood/d2net_v2-setup.c
@@ -226,6 +226,6 @@ MACHINE_START(D2NET_V2, "LaCie d2 Network v2")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
index c49b177c1523..5a369fe74754 100644
--- a/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
+++ b/arch/arm/mach-kirkwood/db88f6281-bp-setup.c
@@ -103,6 +103,6 @@ MACHINE_START(DB88F6281_BP, "Marvell DB-88F6281-BP Development Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/dockstar-setup.c b/arch/arm/mach-kirkwood/dockstar-setup.c
index 791a98fafa29..77f98f2b0416 100644
--- a/arch/arm/mach-kirkwood/dockstar-setup.c
+++ b/arch/arm/mach-kirkwood/dockstar-setup.c
@@ -107,6 +107,6 @@ MACHINE_START(DOCKSTAR, "Seagate FreeAgent DockStar")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/guruplug-setup.c b/arch/arm/mach-kirkwood/guruplug-setup.c
index 7cb55f982243..1c6e736cbbf8 100644
--- a/arch/arm/mach-kirkwood/guruplug-setup.c
+++ b/arch/arm/mach-kirkwood/guruplug-setup.c
@@ -126,6 +126,6 @@ MACHINE_START(GURUPLUG, "Marvell GuruPlug Reference Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
index 6d8364a97810..ba384b992bef 100644
--- a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
+++ b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c
@@ -167,6 +167,6 @@ MACHINE_START(MV88F6281GTW_GE, "Marvell 88F6281 GTW GE Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/netspace_v2-setup.c b/arch/arm/mach-kirkwood/netspace_v2-setup.c
index 728e86d33f0c..3b706611da8e 100644
--- a/arch/arm/mach-kirkwood/netspace_v2-setup.c
+++ b/arch/arm/mach-kirkwood/netspace_v2-setup.c
@@ -263,7 +263,7 @@ MACHINE_START(NETSPACE_V2, "LaCie Network Space v2")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
@@ -275,7 +275,7 @@ MACHINE_START(INETSPACE_V2, "LaCie Internet Space v2")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
@@ -287,7 +287,7 @@ MACHINE_START(NETSPACE_MAX_V2, "LaCie Network Space Max v2")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-kirkwood/netxbig_v2-setup.c b/arch/arm/mach-kirkwood/netxbig_v2-setup.c
index a3b091470b8a..913d032cdb19 100644
--- a/arch/arm/mach-kirkwood/netxbig_v2-setup.c
+++ b/arch/arm/mach-kirkwood/netxbig_v2-setup.c
@@ -404,7 +404,7 @@ MACHINE_START(NET2BIG_V2, "LaCie 2Big Network v2")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
@@ -416,7 +416,7 @@ MACHINE_START(NET5BIG_V2, "LaCie 5Big Network v2")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-kirkwood/openrd-setup.c b/arch/arm/mach-kirkwood/openrd-setup.c
index 7e81e9b586bf..8ddd69fdc937 100644
--- a/arch/arm/mach-kirkwood/openrd-setup.c
+++ b/arch/arm/mach-kirkwood/openrd-setup.c
@@ -221,7 +221,7 @@ MACHINE_START(OPENRD_BASE, "Marvell OpenRD Base Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
@@ -234,7 +234,7 @@ MACHINE_START(OPENRD_CLIENT, "Marvell OpenRD Client Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
@@ -247,7 +247,7 @@ MACHINE_START(OPENRD_ULTIMATE, "Marvell OpenRD Ultimate Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c b/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
index 19072c84008f..e4fd3129d36f 100644
--- a/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
+++ b/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c
@@ -84,6 +84,6 @@ MACHINE_START(RD88F6192_NAS, "Marvell RD-88F6192-NAS Development Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/rd88f6281-setup.c b/arch/arm/mach-kirkwood/rd88f6281-setup.c
index 9717101a7437..c7d93b48926b 100644
--- a/arch/arm/mach-kirkwood/rd88f6281-setup.c
+++ b/arch/arm/mach-kirkwood/rd88f6281-setup.c
@@ -120,6 +120,6 @@ MACHINE_START(RD88F6281, "Marvell RD-88F6281 Reference Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/sheevaplug-setup.c b/arch/arm/mach-kirkwood/sheevaplug-setup.c
index 8a175948b28d..55b68fa39f45 100644
--- a/arch/arm/mach-kirkwood/sheevaplug-setup.c
+++ b/arch/arm/mach-kirkwood/sheevaplug-setup.c
@@ -143,7 +143,7 @@ MACHINE_START(SHEEVAPLUG, "Marvell SheevaPlug Reference Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
@@ -155,7 +155,7 @@ MACHINE_START(ESATA_SHEEVAPLUG, "Marvell eSATA SheevaPlug Reference Board")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-kirkwood/t5325-setup.c b/arch/arm/mach-kirkwood/t5325-setup.c
index f2daf711e72e..8736f8c97518 100644
--- a/arch/arm/mach-kirkwood/t5325-setup.c
+++ b/arch/arm/mach-kirkwood/t5325-setup.c
@@ -211,6 +211,6 @@ MACHINE_START(T5325, "HP t5325 Thin Client")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/ts219-setup.c b/arch/arm/mach-kirkwood/ts219-setup.c
index 73e2b6ca9564..283abff90228 100644
--- a/arch/arm/mach-kirkwood/ts219-setup.c
+++ b/arch/arm/mach-kirkwood/ts219-setup.c
@@ -137,6 +137,6 @@ MACHINE_START(TS219, "QNAP TS-119/TS-219")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-kirkwood/ts41x-setup.c b/arch/arm/mach-kirkwood/ts41x-setup.c
index e4c61279ea86..81d585806b2f 100644
--- a/arch/arm/mach-kirkwood/ts41x-setup.c
+++ b/arch/arm/mach-kirkwood/ts41x-setup.c
@@ -181,6 +181,6 @@ MACHINE_START(TS41X, "QNAP TS-41x")
 	.map_io		= kirkwood_map_io,
 	.init_early	= kirkwood_init_early,
 	.init_irq	= kirkwood_init_irq,
-	.timer		= &kirkwood_timer,
+	.init_time	= kirkwood_timer_init,
 	.restart	= kirkwood_restart,
 MACHINE_END
diff --git a/arch/arm/mach-ks8695/board-acs5k.c b/arch/arm/mach-ks8695/board-acs5k.c
index 255502ddd879..7beec9b8becd 100644
--- a/arch/arm/mach-ks8695/board-acs5k.c
+++ b/arch/arm/mach-ks8695/board-acs5k.c
@@ -227,6 +227,6 @@ MACHINE_START(ACS5K, "Brivo Systems LLC ACS-5000 Master board")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= acs5k_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart	= ks8695_restart,
 MACHINE_END
diff --git a/arch/arm/mach-ks8695/board-dsm320.c b/arch/arm/mach-ks8695/board-dsm320.c
index e0d36cef2c56..d37c218c3584 100644
--- a/arch/arm/mach-ks8695/board-dsm320.c
+++ b/arch/arm/mach-ks8695/board-dsm320.c
@@ -125,6 +125,6 @@ MACHINE_START(DSM320, "D-Link DSM-320 Wireless Media Player")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= dsm320_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart	= ks8695_restart,
 MACHINE_END
diff --git a/arch/arm/mach-ks8695/board-micrel.c b/arch/arm/mach-ks8695/board-micrel.c
index a8270725b76d..3acbdfd31391 100644
--- a/arch/arm/mach-ks8695/board-micrel.c
+++ b/arch/arm/mach-ks8695/board-micrel.c
@@ -57,6 +57,6 @@ MACHINE_START(KS8695, "KS8695 Centaur Development Board")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= micrel_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart	= ks8695_restart,
 MACHINE_END
diff --git a/arch/arm/mach-ks8695/board-og.c b/arch/arm/mach-ks8695/board-og.c
index 1623ba461e47..002bc619bb68 100644
--- a/arch/arm/mach-ks8695/board-og.c
+++ b/arch/arm/mach-ks8695/board-og.c
@@ -145,7 +145,7 @@ MACHINE_START(CM4002, "OpenGear/CM4002")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= og_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart        = ks8695_restart,
 MACHINE_END
 #endif
@@ -157,7 +157,7 @@ MACHINE_START(CM4008, "OpenGear/CM4008")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= og_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart        = ks8695_restart,
 MACHINE_END
 #endif
@@ -169,7 +169,7 @@ MACHINE_START(CM41XX, "OpenGear/CM41xx")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= og_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart        = ks8695_restart,
 MACHINE_END
 #endif
@@ -181,7 +181,7 @@ MACHINE_START(IM4004, "OpenGear/IM4004")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= og_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart        = ks8695_restart,
 MACHINE_END
 #endif
@@ -193,7 +193,7 @@ MACHINE_START(IM42XX, "OpenGear/IM42xx")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= og_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart        = ks8695_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-ks8695/board-sg.c b/arch/arm/mach-ks8695/board-sg.c
index f35b98b5bf37..fdf2352d2cf8 100644
--- a/arch/arm/mach-ks8695/board-sg.c
+++ b/arch/arm/mach-ks8695/board-sg.c
@@ -91,7 +91,7 @@ MACHINE_START(LITE300, "SecureComputing/SG300")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= sg_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart	= ks8695_restart,
 MACHINE_END
 #endif
@@ -103,7 +103,7 @@ MACHINE_START(SG310, "McAfee/SG310")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= sg_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart	= ks8695_restart,
 MACHINE_END
 #endif
@@ -115,7 +115,7 @@ MACHINE_START(SE4200, "SecureComputing/SE4200")
 	.map_io		= ks8695_map_io,
 	.init_irq	= ks8695_init_irq,
 	.init_machine	= sg_init,
-	.timer		= &ks8695_timer,
+	.init_time	= ks8695_timer_init,
 	.restart	= ks8695_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-ks8695/generic.h b/arch/arm/mach-ks8695/generic.h
index f8bdb11a9c33..6e97ce462d73 100644
--- a/arch/arm/mach-ks8695/generic.h
+++ b/arch/arm/mach-ks8695/generic.h
@@ -13,4 +13,4 @@
 extern __init void ks8695_map_io(void);
 extern __init void ks8695_init_irq(void);
 extern void ks8695_restart(char, const char *);
-extern struct sys_timer ks8695_timer;
+extern void ks8695_timer_init(void);
diff --git a/arch/arm/mach-ks8695/time.c b/arch/arm/mach-ks8695/time.c
index 46c84bc7792c..c272a3863d5f 100644
--- a/arch/arm/mach-ks8695/time.c
+++ b/arch/arm/mach-ks8695/time.c
@@ -146,7 +146,7 @@ static void ks8695_timer_setup(void)
 					0xFFFFFFFFU);
 }
 
-static void __init ks8695_timer_init (void)
+void __init ks8695_timer_init(void)
 {
 	ks8695_timer_setup();
 
@@ -154,10 +154,6 @@ static void __init ks8695_timer_init (void)
 	setup_irq(KS8695_IRQ_TIMER1, &ks8695_timer_irq);
 }
 
-struct sys_timer ks8695_timer = {
-	.init		= ks8695_timer_init,
-};
-
 void ks8695_restart(char mode, const char *cmd)
 {
 	unsigned int reg;
diff --git a/arch/arm/mach-lpc32xx/common.h b/arch/arm/mach-lpc32xx/common.h
index afeac3b1fae6..e0b26062a272 100644
--- a/arch/arm/mach-lpc32xx/common.h
+++ b/arch/arm/mach-lpc32xx/common.h
@@ -25,7 +25,7 @@
 /*
  * Other arch specific structures and functions
  */
-extern struct sys_timer lpc32xx_timer;
+extern void lpc32xx_timer_init(void);
 extern void __init lpc32xx_init_irq(void);
 extern void __init lpc32xx_map_io(void);
 extern void __init lpc32xx_serial_init(void);
diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index e8ff4c3f0566..c1cd5a943ab1 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -263,7 +263,7 @@ DT_MACHINE_START(LPC32XX_DT, "LPC32XX SoC (Flattened Device Tree)")
 	.atag_offset	= 0x100,
 	.map_io		= lpc32xx_map_io,
 	.init_irq	= lpc32xx_init_irq,
-	.timer		= &lpc32xx_timer,
+	.init_time	= lpc32xx_timer_init,
 	.init_machine	= lpc3250_machine_init,
 	.dt_compat	= lpc32xx_dt_compat,
 	.restart	= lpc23xx_restart,
diff --git a/arch/arm/mach-lpc32xx/timer.c b/arch/arm/mach-lpc32xx/timer.c
index c40667c33161..88bd4ce3b94a 100644
--- a/arch/arm/mach-lpc32xx/timer.c
+++ b/arch/arm/mach-lpc32xx/timer.c
@@ -100,7 +100,7 @@ static struct irqaction lpc32xx_timer_irq = {
  * clocks need to be enabled here manually and then tagged as used in
  * the clock driver initialization
  */
-static void __init lpc32xx_timer_init(void)
+void __init lpc32xx_timer_init(void)
 {
 	u32 clkrate, pllreg;
 
@@ -161,8 +161,3 @@ static void __init lpc32xx_timer_init(void)
 	clocksource_mmio_init(LPC32XX_TIMER_TC(LPC32XX_TIMER1_BASE),
 		"lpc32xx_clksrc", clkrate, 300, 32, clocksource_mmio_readl_up);
 }
-
-struct sys_timer lpc32xx_timer = {
-	.init		= &lpc32xx_timer_init,
-};
-
diff --git a/arch/arm/mach-mmp/aspenite.c b/arch/arm/mach-mmp/aspenite.c
index e5dba9c5dc54..9f64d5632e07 100644
--- a/arch/arm/mach-mmp/aspenite.c
+++ b/arch/arm/mach-mmp/aspenite.c
@@ -262,7 +262,7 @@ MACHINE_START(ASPENITE, "PXA168-based Aspenite Development Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= MMP_NR_IRQS,
 	.init_irq       = pxa168_init_irq,
-	.timer          = &pxa168_timer,
+	.init_time	= pxa168_timer_init,
 	.init_machine   = common_init,
 	.restart	= pxa168_restart,
 MACHINE_END
@@ -271,7 +271,7 @@ MACHINE_START(ZYLONITE2, "PXA168-based Zylonite2 Development Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= MMP_NR_IRQS,
 	.init_irq       = pxa168_init_irq,
-	.timer          = &pxa168_timer,
+	.init_time	= pxa168_timer_init,
 	.init_machine   = common_init,
 	.restart	= pxa168_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/avengers_lite.c b/arch/arm/mach-mmp/avengers_lite.c
index 603542ae6fbd..1f94957b56ae 100644
--- a/arch/arm/mach-mmp/avengers_lite.c
+++ b/arch/arm/mach-mmp/avengers_lite.c
@@ -45,7 +45,7 @@ MACHINE_START(AVENGERS_LITE, "PXA168 Avengers lite Development Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= MMP_NR_IRQS,
 	.init_irq       = pxa168_init_irq,
-	.timer          = &pxa168_timer,
+	.init_time	= pxa168_timer_init,
 	.init_machine   = avengers_lite_init,
 	.restart	= pxa168_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/brownstone.c b/arch/arm/mach-mmp/brownstone.c
index 5cb769cd26d9..2358011c7d8e 100644
--- a/arch/arm/mach-mmp/brownstone.c
+++ b/arch/arm/mach-mmp/brownstone.c
@@ -218,7 +218,7 @@ MACHINE_START(BROWNSTONE, "Brownstone Development Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= BROWNSTONE_NR_IRQS,
 	.init_irq	= mmp2_init_irq,
-	.timer		= &mmp2_timer,
+	.init_time	= mmp2_timer_init,
 	.init_machine	= brownstone_init,
 	.restart	= mmp_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/common.h b/arch/arm/mach-mmp/common.h
index bd453274fca2..0bdc50b134ce 100644
--- a/arch/arm/mach-mmp/common.h
+++ b/arch/arm/mach-mmp/common.h
@@ -1,7 +1,5 @@
 #define ARRAY_AND_SIZE(x)	(x), ARRAY_SIZE(x)
 
-struct sys_timer;
-
 extern void timer_init(int irq);
 
 extern void __init icu_init_irq(void);
diff --git a/arch/arm/mach-mmp/flint.c b/arch/arm/mach-mmp/flint.c
index 8059cc0905c6..754c352dd02b 100644
--- a/arch/arm/mach-mmp/flint.c
+++ b/arch/arm/mach-mmp/flint.c
@@ -121,7 +121,7 @@ MACHINE_START(FLINT, "Flint Development Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= FLINT_NR_IRQS,
 	.init_irq       = mmp2_init_irq,
-	.timer          = &mmp2_timer,
+	.init_time	= mmp2_timer_init,
 	.init_machine   = flint_init,
 	.restart	= mmp_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/gplugd.c b/arch/arm/mach-mmp/gplugd.c
index 5c3d61ee729a..d1e2d595e79c 100644
--- a/arch/arm/mach-mmp/gplugd.c
+++ b/arch/arm/mach-mmp/gplugd.c
@@ -194,7 +194,7 @@ MACHINE_START(GPLUGD, "PXA168-based GuruPlug Display (gplugD) Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= MMP_NR_IRQS,
 	.init_irq       = pxa168_init_irq,
-	.timer          = &pxa168_timer,
+	.init_time	= pxa168_timer_init,
 	.init_machine   = gplugd_init,
 	.restart	= pxa168_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/include/mach/mmp2.h b/arch/arm/mach-mmp/include/mach/mmp2.h
index c4ca4d17194a..0764f4ecec82 100644
--- a/arch/arm/mach-mmp/include/mach/mmp2.h
+++ b/arch/arm/mach-mmp/include/mach/mmp2.h
@@ -3,9 +3,7 @@
 
 #include <linux/platform_data/pxa_sdhci.h>
 
-struct sys_timer;
-
-extern struct sys_timer mmp2_timer;
+extern void mmp2_timer_init(void);
 extern void __init mmp2_init_icu(void);
 extern void __init mmp2_init_irq(void);
 extern void mmp2_clear_pmic_int(void);
diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h
index 37632d964d50..7ed1df21ea1c 100644
--- a/arch/arm/mach-mmp/include/mach/pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/pxa168.h
@@ -1,9 +1,7 @@
 #ifndef __ASM_MACH_PXA168_H
 #define __ASM_MACH_PXA168_H
 
-struct sys_timer;
-
-extern struct sys_timer pxa168_timer;
+extern void pxa168_timer_init(void);
 extern void __init pxa168_init_irq(void);
 extern void pxa168_restart(char, const char *);
 extern void pxa168_clear_keypad_wakeup(void);
diff --git a/arch/arm/mach-mmp/include/mach/pxa910.h b/arch/arm/mach-mmp/include/mach/pxa910.h
index 3b58a3b2d7df..eff31ab6dc3b 100644
--- a/arch/arm/mach-mmp/include/mach/pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/pxa910.h
@@ -1,9 +1,7 @@
 #ifndef __ASM_MACH_PXA910_H
 #define __ASM_MACH_PXA910_H
 
-struct sys_timer;
-
-extern struct sys_timer pxa910_timer;
+extern void pxa910_timer_init(void);
 extern void __init pxa910_init_irq(void);
 
 #include <linux/i2c.h>
diff --git a/arch/arm/mach-mmp/jasper.c b/arch/arm/mach-mmp/jasper.c
index ff73249884d0..66634fd0ecb0 100644
--- a/arch/arm/mach-mmp/jasper.c
+++ b/arch/arm/mach-mmp/jasper.c
@@ -174,7 +174,7 @@ MACHINE_START(MARVELL_JASPER, "Jasper Development Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= JASPER_NR_IRQS,
 	.init_irq       = mmp2_init_irq,
-	.timer          = &mmp2_timer,
+	.init_time	= mmp2_timer_init,
 	.init_machine   = jasper_init,
 	.restart	= mmp_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/mmp-dt.c b/arch/arm/mach-mmp/mmp-dt.c
index 033cc31b3c72..d063efa0a4f1 100644
--- a/arch/arm/mach-mmp/mmp-dt.c
+++ b/arch/arm/mach-mmp/mmp-dt.c
@@ -22,10 +22,6 @@
 extern void __init mmp_dt_irq_init(void);
 extern void __init mmp_dt_init_timer(void);
 
-static struct sys_timer mmp_dt_timer = {
-	.init	= mmp_dt_init_timer,
-};
-
 static const struct of_dev_auxdata pxa168_auxdata_lookup[] __initconst = {
 	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.0", NULL),
 	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.1", NULL),
@@ -69,7 +65,7 @@ static const char *mmp_dt_board_compat[] __initdata = {
 DT_MACHINE_START(PXA168_DT, "Marvell PXA168 (Device Tree Support)")
 	.map_io		= mmp_map_io,
 	.init_irq	= mmp_dt_irq_init,
-	.timer		= &mmp_dt_timer,
+	.init_time	= mmp_dt_init_timer,
 	.init_machine	= pxa168_dt_init,
 	.dt_compat	= mmp_dt_board_compat,
 MACHINE_END
@@ -77,7 +73,7 @@ MACHINE_END
 DT_MACHINE_START(PXA910_DT, "Marvell PXA910 (Device Tree Support)")
 	.map_io		= mmp_map_io,
 	.init_irq	= mmp_dt_irq_init,
-	.timer		= &mmp_dt_timer,
+	.init_time	= mmp_dt_init_timer,
 	.init_machine	= pxa910_dt_init,
 	.dt_compat	= mmp_dt_board_compat,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/mmp2-dt.c b/arch/arm/mach-mmp/mmp2-dt.c
index 535a5ed5977b..fad431aa6e09 100644
--- a/arch/arm/mach-mmp/mmp2-dt.c
+++ b/arch/arm/mach-mmp/mmp2-dt.c
@@ -24,10 +24,6 @@
 extern void __init mmp_dt_irq_init(void);
 extern void __init mmp_dt_init_timer(void);
 
-static struct sys_timer mmp_dt_timer = {
-	.init	= mmp_dt_init_timer,
-};
-
 static const struct of_dev_auxdata mmp2_auxdata_lookup[] __initconst = {
 	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4030000, "pxa2xx-uart.0", NULL),
 	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.1", NULL),
@@ -54,7 +50,7 @@ static const char *mmp2_dt_board_compat[] __initdata = {
 DT_MACHINE_START(MMP2_DT, "Marvell MMP2 (Device Tree Support)")
 	.map_io		= mmp_map_io,
 	.init_irq	= mmp_dt_irq_init,
-	.timer		= &mmp_dt_timer,
+	.init_time	= mmp_dt_init_timer,
 	.init_machine	= mmp2_dt_init,
 	.dt_compat	= mmp2_dt_board_compat,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/mmp2.c b/arch/arm/mach-mmp/mmp2.c
index 3a3768c7a191..d94d114eef7b 100644
--- a/arch/arm/mach-mmp/mmp2.c
+++ b/arch/arm/mach-mmp/mmp2.c
@@ -114,7 +114,7 @@ postcore_initcall(mmp2_init);
 
 #define APBC_TIMERS	APBC_REG(0x024)
 
-static void __init mmp2_timer_init(void)
+void __init mmp2_timer_init(void)
 {
 	unsigned long clk_rst;
 
@@ -130,10 +130,6 @@ static void __init mmp2_timer_init(void)
 	timer_init(IRQ_MMP2_TIMER1);
 }
 
-struct sys_timer mmp2_timer = {
-	.init	= mmp2_timer_init,
-};
-
 /* on-chip devices */
 MMP2_DEVICE(uart1, "pxa2xx-uart", 0, UART1, 0xd4030000, 0x30, 4, 5);
 MMP2_DEVICE(uart2, "pxa2xx-uart", 1, UART2, 0xd4017000, 0x30, 20, 21);
diff --git a/arch/arm/mach-mmp/pxa168.c b/arch/arm/mach-mmp/pxa168.c
index b7f074f15498..9bc7b86a86a7 100644
--- a/arch/arm/mach-mmp/pxa168.c
+++ b/arch/arm/mach-mmp/pxa168.c
@@ -67,7 +67,7 @@ postcore_initcall(pxa168_init);
 #define TIMER_CLK_RST	(APBC_APBCLK | APBC_FNCLK | APBC_FNCLKSEL(3))
 #define APBC_TIMERS	APBC_REG(0x34)
 
-static void __init pxa168_timer_init(void)
+void __init pxa168_timer_init(void)
 {
 	/* this is early, we have to initialize the CCU registers by
 	 * ourselves instead of using clk_* API. Clock rate is defined
@@ -81,10 +81,6 @@ static void __init pxa168_timer_init(void)
 	timer_init(IRQ_PXA168_TIMER1);
 }
 
-struct sys_timer pxa168_timer = {
-	.init	= pxa168_timer_init,
-};
-
 void pxa168_clear_keypad_wakeup(void)
 {
 	uint32_t val;
diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c
index 8b1e16fbb7a5..c6a89f1eca4e 100644
--- a/arch/arm/mach-mmp/pxa910.c
+++ b/arch/arm/mach-mmp/pxa910.c
@@ -101,7 +101,7 @@ postcore_initcall(pxa910_init);
 #define TIMER_CLK_RST	(APBC_APBCLK | APBC_FNCLK | APBC_FNCLKSEL(3))
 #define APBC_TIMERS	APBC_REG(0x34)
 
-static void __init pxa910_timer_init(void)
+void __init pxa910_timer_init(void)
 {
 	/* reset and configure */
 	__raw_writel(APBC_APBCLK | APBC_RST, APBC_TIMERS);
@@ -110,10 +110,6 @@ static void __init pxa910_timer_init(void)
 	timer_init(IRQ_PXA910_AP1_TIMER1);
 }
 
-struct sys_timer pxa910_timer = {
-	.init	= pxa910_timer_init,
-};
-
 /* on-chip devices */
 
 /* NOTE: there are totally 3 UARTs on PXA910:
diff --git a/arch/arm/mach-mmp/tavorevb.c b/arch/arm/mach-mmp/tavorevb.c
index b28f9084dfff..4c127d23955d 100644
--- a/arch/arm/mach-mmp/tavorevb.c
+++ b/arch/arm/mach-mmp/tavorevb.c
@@ -103,7 +103,7 @@ MACHINE_START(TAVOREVB, "PXA910 Evaluation Board (aka TavorEVB)")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= MMP_NR_IRQS,
 	.init_irq       = pxa910_init_irq,
-	.timer          = &pxa910_timer,
+	.init_time	= pxa910_timer_init,
 	.init_machine   = tavorevb_init,
 	.restart	= mmp_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/teton_bga.c b/arch/arm/mach-mmp/teton_bga.c
index dd30ea74785c..8609967975ed 100644
--- a/arch/arm/mach-mmp/teton_bga.c
+++ b/arch/arm/mach-mmp/teton_bga.c
@@ -86,7 +86,7 @@ MACHINE_START(TETON_BGA, "PXA168-based Teton BGA Development Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= MMP_NR_IRQS,
 	.init_irq       = pxa168_init_irq,
-	.timer          = &pxa168_timer,
+	.init_time	= pxa168_timer_init,
 	.init_machine   = teton_bga_init,
 	.restart	= pxa168_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mmp/ttc_dkb.c b/arch/arm/mach-mmp/ttc_dkb.c
index ce55fd8821c4..6e474900b13e 100644
--- a/arch/arm/mach-mmp/ttc_dkb.c
+++ b/arch/arm/mach-mmp/ttc_dkb.c
@@ -218,7 +218,7 @@ MACHINE_START(TTC_DKB, "PXA910-based TTC_DKB Development Platform")
 	.map_io		= mmp_map_io,
 	.nr_irqs	= TTCDKB_NR_IRQS,
 	.init_irq       = pxa910_init_irq,
-	.timer          = &pxa910_timer,
+	.init_time	= pxa910_timer_init,
 	.init_machine   = ttc_dkb_init,
 	.restart	= mmp_restart,
 MACHINE_END
diff --git a/arch/arm/mach-msm/board-dt-8660.c b/arch/arm/mach-msm/board-dt-8660.c
index b5b4de2cdf9e..27c41eabfd12 100644
--- a/arch/arm/mach-msm/board-dt-8660.c
+++ b/arch/arm/mach-msm/board-dt-8660.c
@@ -59,6 +59,6 @@ DT_MACHINE_START(MSM_DT, "Qualcomm MSM (Flattened Device Tree)")
 	.handle_irq = gic_handle_irq,
 	.init_machine = msm8x60_dt_init,
 	.init_late = msm8x60_init_late,
-	.timer = &msm_dt_timer,
+	.init_time	= msm_dt_timer_init,
 	.dt_compat = msm8x60_fluid_match,
 MACHINE_END
diff --git a/arch/arm/mach-msm/board-dt-8960.c b/arch/arm/mach-msm/board-dt-8960.c
index 4490edb71c17..3226d5276962 100644
--- a/arch/arm/mach-msm/board-dt-8960.c
+++ b/arch/arm/mach-msm/board-dt-8960.c
@@ -43,7 +43,7 @@ DT_MACHINE_START(MSM8960_DT, "Qualcomm MSM (Flattened Device Tree)")
 	.smp = smp_ops(msm_smp_ops),
 	.map_io = msm_map_msm8960_io,
 	.init_irq = msm_dt_init_irq,
-	.timer = &msm_dt_timer,
+	.init_time	= msm_dt_timer_init,
 	.init_machine = msm_dt_init,
 	.dt_compat = msm8960_dt_match,
 	.handle_irq = gic_handle_irq,
diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c
index 6ce542e2e21c..84d720af34ab 100644
--- a/arch/arm/mach-msm/board-halibut.c
+++ b/arch/arm/mach-msm/board-halibut.c
@@ -106,5 +106,5 @@ MACHINE_START(HALIBUT, "Halibut Board (QCT SURF7200A)")
 	.init_irq	= halibut_init_irq,
 	.init_machine	= halibut_init,
 	.init_late	= halibut_init_late,
-	.timer		= &msm7x01_timer,
+	.init_time	= msm7x01_timer_init,
 MACHINE_END
diff --git a/arch/arm/mach-msm/board-mahimahi.c b/arch/arm/mach-msm/board-mahimahi.c
index df00bc03ce74..30c3496db593 100644
--- a/arch/arm/mach-msm/board-mahimahi.c
+++ b/arch/arm/mach-msm/board-mahimahi.c
@@ -75,7 +75,7 @@ static void __init mahimahi_init_late(void)
 	smd_debugfs_init();
 }
 
-extern struct sys_timer msm_timer;
+void msm_timer_init(void);
 
 MACHINE_START(MAHIMAHI, "mahimahi")
 	.atag_offset	= 0x100,
@@ -84,5 +84,5 @@ MACHINE_START(MAHIMAHI, "mahimahi")
 	.init_irq	= msm_init_irq,
 	.init_machine	= mahimahi_init,
 	.init_late	= mahimahi_init_late,
-	.timer		= &msm_timer,
+	.init_time	= msm_timer_init,
 MACHINE_END
diff --git a/arch/arm/mach-msm/board-msm7x30.c b/arch/arm/mach-msm/board-msm7x30.c
index effa6f4336c7..7bc3f82e3ec9 100644
--- a/arch/arm/mach-msm/board-msm7x30.c
+++ b/arch/arm/mach-msm/board-msm7x30.c
@@ -131,7 +131,7 @@ MACHINE_START(MSM7X30_SURF, "QCT MSM7X30 SURF")
 	.init_irq = msm7x30_init_irq,
 	.init_machine = msm7x30_init,
 	.init_late = msm7x30_init_late,
-	.timer = &msm7x30_timer,
+	.init_time	= msm7x30_timer_init,
 MACHINE_END
 
 MACHINE_START(MSM7X30_FFA, "QCT MSM7X30 FFA")
@@ -142,7 +142,7 @@ MACHINE_START(MSM7X30_FFA, "QCT MSM7X30 FFA")
 	.init_irq = msm7x30_init_irq,
 	.init_machine = msm7x30_init,
 	.init_late = msm7x30_init_late,
-	.timer = &msm7x30_timer,
+	.init_time	= msm7x30_timer_init,
 MACHINE_END
 
 MACHINE_START(MSM7X30_FLUID, "QCT MSM7X30 FLUID")
@@ -153,5 +153,5 @@ MACHINE_START(MSM7X30_FLUID, "QCT MSM7X30 FLUID")
 	.init_irq = msm7x30_init_irq,
 	.init_machine = msm7x30_init,
 	.init_late = msm7x30_init_late,
-	.timer = &msm7x30_timer,
+	.init_time	= msm7x30_timer_init,
 MACHINE_END
diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c
index 2448fcf09eb1..686e7949a73a 100644
--- a/arch/arm/mach-msm/board-qsd8x50.c
+++ b/arch/arm/mach-msm/board-qsd8x50.c
@@ -200,7 +200,7 @@ MACHINE_START(QSD8X50_SURF, "QCT QSD8X50 SURF")
 	.init_irq = qsd8x50_init_irq,
 	.init_machine = qsd8x50_init,
 	.init_late = qsd8x50_init_late,
-	.timer = &qsd8x50_timer,
+	.init_time	= qsd8x50_timer_init,
 MACHINE_END
 
 MACHINE_START(QSD8X50A_ST1_5, "QCT QSD8X50A ST1.5")
@@ -209,5 +209,5 @@ MACHINE_START(QSD8X50A_ST1_5, "QCT QSD8X50A ST1.5")
 	.init_irq = qsd8x50_init_irq,
 	.init_machine = qsd8x50_init,
 	.init_late = qsd8x50_init_late,
-	.timer = &qsd8x50_timer,
+	.init_time	= qsd8x50_timer_init,
 MACHINE_END
diff --git a/arch/arm/mach-msm/board-sapphire.c b/arch/arm/mach-msm/board-sapphire.c
index b7b0fc7e3278..70730111b37c 100644
--- a/arch/arm/mach-msm/board-sapphire.c
+++ b/arch/arm/mach-msm/board-sapphire.c
@@ -53,7 +53,7 @@ static struct platform_device *devices[] __initdata = {
 	&msm_device_uart3,
 };
 
-extern struct sys_timer msm_timer;
+void msm_timer_init(void);
 
 static void __init sapphire_init_irq(void)
 {
@@ -113,5 +113,5 @@ MACHINE_START(SAPPHIRE, "sapphire")
 	.init_irq       = sapphire_init_irq,
 	.init_machine   = sapphire_init,
 	.init_late      = sapphire_init_late,
-	.timer          = &msm_timer,
+	.init_time	= msm_timer_init,
 MACHINE_END
diff --git a/arch/arm/mach-msm/board-trout.c b/arch/arm/mach-msm/board-trout.c
index 4ba0800e243e..919bfa32871a 100644
--- a/arch/arm/mach-msm/board-trout.c
+++ b/arch/arm/mach-msm/board-trout.c
@@ -110,5 +110,5 @@ MACHINE_START(TROUT, "HTC Dream")
 	.init_irq	= trout_init_irq,
 	.init_machine	= trout_init,
 	.init_late	= trout_init_late,
-	.timer		= &msm7x01_timer,
+	.init_time	= msm7x01_timer_init,
 MACHINE_END
diff --git a/arch/arm/mach-msm/common.h b/arch/arm/mach-msm/common.h
index 633a7159d5ff..ce8215a269e5 100644
--- a/arch/arm/mach-msm/common.h
+++ b/arch/arm/mach-msm/common.h
@@ -12,10 +12,10 @@
 #ifndef __MACH_COMMON_H
 #define __MACH_COMMON_H
 
-extern struct sys_timer msm7x01_timer;
-extern struct sys_timer msm7x30_timer;
-extern struct sys_timer msm_dt_timer;
-extern struct sys_timer qsd8x50_timer;
+extern void msm7x01_timer_init(void);
+extern void msm7x30_timer_init(void);
+extern void msm_dt_timer_init(void);
+extern void qsd8x50_timer_init(void);
 
 extern void msm_map_common_io(void);
 extern void msm_map_msm7x30_io(void);
diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c
index 476549a8a709..2fb5f3eec50f 100644
--- a/arch/arm/mach-msm/timer.c
+++ b/arch/arm/mach-msm/timer.c
@@ -229,7 +229,7 @@ static const struct of_device_id msm_gpt_match[] __initconst = {
 	{ },
 };
 
-static void __init msm_dt_timer_init(void)
+void __init msm_dt_timer_init(void)
 {
 	struct device_node *np;
 	u32 freq;
@@ -296,10 +296,6 @@ static void __init msm_dt_timer_init(void)
 
 	msm_timer_init(freq, 32, irq, !!percpu_offset);
 }
-
-struct sys_timer msm_dt_timer = {
-	.init = msm_dt_timer_init
-};
 #endif
 
 static int __init msm_timer_map(phys_addr_t event, phys_addr_t source)
@@ -317,7 +313,7 @@ static int __init msm_timer_map(phys_addr_t event, phys_addr_t source)
 	return 0;
 }
 
-static void __init msm7x01_timer_init(void)
+void __init msm7x01_timer_init(void)
 {
 	struct clocksource *cs = &msm_clocksource;
 
@@ -330,28 +326,16 @@ static void __init msm7x01_timer_init(void)
 			false);
 }
 
-struct sys_timer msm7x01_timer = {
-	.init = msm7x01_timer_init
-};
-
-static void __init msm7x30_timer_init(void)
+void __init msm7x30_timer_init(void)
 {
 	if (msm_timer_map(0xc0100004, 0xc0100024))
 		return;
 	msm_timer_init(24576000 / 4, 32, 1, false);
 }
 
-struct sys_timer msm7x30_timer = {
-	.init = msm7x30_timer_init
-};
-
-static void __init qsd8x50_timer_init(void)
+void __init qsd8x50_timer_init(void)
 {
 	if (msm_timer_map(0xAC100000, 0xAC100010))
 		return;
 	msm_timer_init(19200000 / 4, 32, 7, false);
 }
-
-struct sys_timer qsd8x50_timer = {
-	.init = qsd8x50_timer_init
-};
diff --git a/arch/arm/mach-mv78xx0/buffalo-wxl-setup.c b/arch/arm/mach-mv78xx0/buffalo-wxl-setup.c
index ee74ec97c141..1f2ef98b37c6 100644
--- a/arch/arm/mach-mv78xx0/buffalo-wxl-setup.c
+++ b/arch/arm/mach-mv78xx0/buffalo-wxl-setup.c
@@ -150,6 +150,6 @@ MACHINE_START(TERASTATION_WXL, "Buffalo Nas WXL")
 	.map_io		= mv78xx0_map_io,
 	.init_early	= mv78xx0_init_early,
 	.init_irq	= mv78xx0_init_irq,
-	.timer		= &mv78xx0_timer,
+	.init_time	= mv78xx0_timer_init,
 	.restart	= mv78xx0_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index d0cb4857b4b3..0efa14498ebc 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -336,16 +336,12 @@ void __init mv78xx0_init_early(void)
 	orion_time_set_base(TIMER_VIRT_BASE);
 }
 
-static void __init_refok mv78xx0_timer_init(void)
+void __init_refok mv78xx0_timer_init(void)
 {
 	orion_time_init(BRIDGE_VIRT_BASE, BRIDGE_INT_TIMER1_CLR,
 			IRQ_MV78XX0_TIMER_1, get_tclk());
 }
 
-struct sys_timer mv78xx0_timer = {
-	.init = mv78xx0_timer_init,
-};
-
 
 /*****************************************************************************
  * General
diff --git a/arch/arm/mach-mv78xx0/common.h b/arch/arm/mach-mv78xx0/common.h
index 507c767d49e0..5e9485bad0ac 100644
--- a/arch/arm/mach-mv78xx0/common.h
+++ b/arch/arm/mach-mv78xx0/common.h
@@ -47,7 +47,7 @@ void mv78xx0_uart3_init(void);
 void mv78xx0_i2c_init(void);
 void mv78xx0_restart(char, const char *);
 
-extern struct sys_timer mv78xx0_timer;
+extern void mv78xx0_timer_init(void);
 
 
 #endif
diff --git a/arch/arm/mach-mv78xx0/db78x00-bp-setup.c b/arch/arm/mach-mv78xx0/db78x00-bp-setup.c
index 4d6d48bf51ef..4e0f22b30bc8 100644
--- a/arch/arm/mach-mv78xx0/db78x00-bp-setup.c
+++ b/arch/arm/mach-mv78xx0/db78x00-bp-setup.c
@@ -98,6 +98,6 @@ MACHINE_START(DB78X00_BP, "Marvell DB-78x00-BP Development Board")
 	.map_io		= mv78xx0_map_io,
 	.init_early	= mv78xx0_init_early,
 	.init_irq	= mv78xx0_init_irq,
-	.timer		= &mv78xx0_timer,
+	.init_time	= mv78xx0_timer_init,
 	.restart	= mv78xx0_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mv78xx0/rd78x00-masa-setup.c b/arch/arm/mach-mv78xx0/rd78x00-masa-setup.c
index 9a882706e138..d2d06f3957f3 100644
--- a/arch/arm/mach-mv78xx0/rd78x00-masa-setup.c
+++ b/arch/arm/mach-mv78xx0/rd78x00-masa-setup.c
@@ -83,6 +83,6 @@ MACHINE_START(RD78X00_MASA, "Marvell RD-78x00-MASA Development Board")
 	.map_io		= mv78xx0_map_io,
 	.init_early	= mv78xx0_init_early,
 	.init_irq	= mv78xx0_init_irq,
-	.timer		= &mv78xx0_timer,
+	.init_time	= mv78xx0_timer_init,
 	.restart	= mv78xx0_restart,
 MACHINE_END
diff --git a/arch/arm/mach-mvebu/armada-370-xp.c b/arch/arm/mach-mvebu/armada-370-xp.c
index 7434b5e36197..a5ea616d6d12 100644
--- a/arch/arm/mach-mvebu/armada-370-xp.c
+++ b/arch/arm/mach-mvebu/armada-370-xp.c
@@ -56,10 +56,6 @@ void __init armada_370_xp_init_early(void)
 	init_dma_coherent_pool_size(SZ_1M);
 }
 
-struct sys_timer armada_370_xp_timer = {
-	.init		= armada_370_xp_timer_and_clk_init,
-};
-
 static void __init armada_370_xp_dt_init(void)
 {
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
@@ -78,7 +74,7 @@ DT_MACHINE_START(ARMADA_XP_DT, "Marvell Armada 370/XP (Device Tree)")
 	.init_early	= armada_370_xp_init_early,
 	.init_irq	= armada_370_xp_init_irq,
 	.handle_irq     = armada_370_xp_handle_irq,
-	.timer		= &armada_370_xp_timer,
+	.init_time	= armada_370_xp_timer_and_clk_init,
 	.restart	= mvebu_restart,
 	.dt_compat	= armada_370_xp_dt_compat,
 MACHINE_END
diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index c66129b5dd18..5fad7cefe8aa 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c
@@ -163,19 +163,11 @@ static void __init imx23_timer_init(void)
 	mx23_clocks_init();
 }
 
-static struct sys_timer imx23_timer = {
-	.init = imx23_timer_init,
-};
-
 static void __init imx28_timer_init(void)
 {
 	mx28_clocks_init();
 }
 
-static struct sys_timer imx28_timer = {
-	.init = imx28_timer_init,
-};
-
 enum mac_oui {
 	OUI_FSL,
 	OUI_DENX,
@@ -446,7 +438,7 @@ DT_MACHINE_START(IMX23, "Freescale i.MX23 (Device Tree)")
 	.map_io		= mx23_map_io,
 	.init_irq	= icoll_init_irq,
 	.handle_irq	= icoll_handle_irq,
-	.timer		= &imx23_timer,
+	.init_time	= imx23_timer_init,
 	.init_machine	= mxs_machine_init,
 	.dt_compat	= imx23_dt_compat,
 	.restart	= mxs_restart,
@@ -456,7 +448,7 @@ DT_MACHINE_START(IMX28, "Freescale i.MX28 (Device Tree)")
 	.map_io		= mx28_map_io,
 	.init_irq	= icoll_init_irq,
 	.handle_irq	= icoll_handle_irq,
-	.timer		= &imx28_timer,
+	.init_time	= imx28_timer_init,
 	.init_machine	= mxs_machine_init,
 	.dt_compat	= imx28_dt_compat,
 	.restart	= mxs_restart,
diff --git a/arch/arm/mach-netx/generic.h b/arch/arm/mach-netx/generic.h
index 9b915119b8d6..768b26bbb42b 100644
--- a/arch/arm/mach-netx/generic.h
+++ b/arch/arm/mach-netx/generic.h
@@ -21,5 +21,4 @@ extern void __init netx_map_io(void);
 extern void __init netx_init_irq(void);
 extern void netx_restart(char, const char *);
 
-struct sys_timer;
-extern struct sys_timer netx_timer;
+extern void netx_timer_init(void);
diff --git a/arch/arm/mach-netx/nxdb500.c b/arch/arm/mach-netx/nxdb500.c
index 8b781ff7c9e9..241e1b9c58cb 100644
--- a/arch/arm/mach-netx/nxdb500.c
+++ b/arch/arm/mach-netx/nxdb500.c
@@ -205,7 +205,7 @@ MACHINE_START(NXDB500, "Hilscher nxdb500")
 	.map_io		= netx_map_io,
 	.init_irq	= netx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &netx_timer,
+	.init_time	= netx_timer_init,
 	.init_machine	= nxdb500_init,
 	.restart	= netx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-netx/nxdkn.c b/arch/arm/mach-netx/nxdkn.c
index b26dbce334f2..055aeecedde2 100644
--- a/arch/arm/mach-netx/nxdkn.c
+++ b/arch/arm/mach-netx/nxdkn.c
@@ -98,7 +98,7 @@ MACHINE_START(NXDKN, "Hilscher nxdkn")
 	.map_io		= netx_map_io,
 	.init_irq	= netx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &netx_timer,
+	.init_time	= netx_timer_init,
 	.init_machine	= nxdkn_init,
 	.restart	= netx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-netx/nxeb500hmi.c b/arch/arm/mach-netx/nxeb500hmi.c
index 257382efafa0..018e91c55b00 100644
--- a/arch/arm/mach-netx/nxeb500hmi.c
+++ b/arch/arm/mach-netx/nxeb500hmi.c
@@ -182,7 +182,7 @@ MACHINE_START(NXEB500HMI, "Hilscher nxeb500hmi")
 	.map_io		= netx_map_io,
 	.init_irq	= netx_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &netx_timer,
+	.init_time	= netx_timer_init,
 	.init_machine	= nxeb500hmi_init,
 	.restart	= netx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c
index e24c141ba489..0dee4524494f 100644
--- a/arch/arm/mach-netx/time.c
+++ b/arch/arm/mach-netx/time.c
@@ -107,7 +107,7 @@ static struct irqaction netx_timer_irq = {
 /*
  * Set up timer interrupt
  */
-static void __init netx_timer_init(void)
+void __init netx_timer_init(void)
 {
 	/* disable timer initially */
 	writel(0, NETX_GPIO_COUNTER_CTRL(0));
@@ -151,7 +151,3 @@ static void __init netx_timer_init(void)
 	netx_clockevent.cpumask = cpumask_of(0);
 	clockevents_register_device(&netx_clockevent);
 }
-
-struct sys_timer netx_timer = {
-	.init		= netx_timer_init,
-};
diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 98167a4319f7..ab756ed87ef0 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -268,10 +268,6 @@ static void __init nomadik_timer_init(void)
 	nmdk_timer_init(io_p2v(NOMADIK_MTU0_BASE), IRQ_MTU0);
 }
 
-static struct sys_timer nomadik_timer = {
-	.init	= nomadik_timer_init,
-};
-
 static struct i2c_board_info __initdata nhk8815_i2c0_devices[] = {
 	{
 		I2C_BOARD_INFO("stw4811", 0x2d),
@@ -354,7 +350,7 @@ MACHINE_START(NOMADIK, "NHK8815")
 	.map_io		= cpu8815_map_io,
 	.init_irq	= cpu8815_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &nomadik_timer,
+	.init_time	= nomadik_timer_init,
 	.init_machine	= nhk8815_platform_init,
 	.restart	= cpu8815_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index a8fce3ccc707..3d2aa6f30192 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -628,6 +628,6 @@ MACHINE_START(AMS_DELTA, "Amstrad E3 (Delta)")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= ams_delta_init,
 	.init_late	= ams_delta_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-fsample.c b/arch/arm/mach-omap1/board-fsample.c
index 560a7dcf0a56..702d58039cc1 100644
--- a/arch/arm/mach-omap1/board-fsample.c
+++ b/arch/arm/mach-omap1/board-fsample.c
@@ -364,6 +364,6 @@ MACHINE_START(OMAP_FSAMPLE, "OMAP730 F-Sample")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= omap_fsample_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-generic.c b/arch/arm/mach-omap1/board-generic.c
index 608e7d2a2778..e1d9171774bc 100644
--- a/arch/arm/mach-omap1/board-generic.c
+++ b/arch/arm/mach-omap1/board-generic.c
@@ -84,6 +84,6 @@ MACHINE_START(OMAP_GENERIC, "Generic OMAP1510/1610/1710")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= omap_generic_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index 2274bd677efc..0dac3d239e32 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -461,6 +461,6 @@ MACHINE_START(OMAP_H2, "TI-H2")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= h2_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index 1051935f0aac..816ecd13f81e 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -454,6 +454,6 @@ MACHINE_START(OMAP_H3, "TI OMAP1710 H3 board")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= h3_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-htcherald.c b/arch/arm/mach-omap1/board-htcherald.c
index 356f816c84a6..35a2379b986f 100644
--- a/arch/arm/mach-omap1/board-htcherald.c
+++ b/arch/arm/mach-omap1/board-htcherald.c
@@ -603,6 +603,6 @@ MACHINE_START(HERALD, "HTC Herald")
 	.init_irq       = omap1_init_irq,
 	.init_machine   = htcherald_init,
 	.init_late	= omap1_init_late,
-	.timer          = &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-innovator.c b/arch/arm/mach-omap1/board-innovator.c
index f8033fab0f82..bd5f02e9c354 100644
--- a/arch/arm/mach-omap1/board-innovator.c
+++ b/arch/arm/mach-omap1/board-innovator.c
@@ -458,6 +458,6 @@ MACHINE_START(OMAP_INNOVATOR, "TI-Innovator")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= innovator_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index 24d2f2df11a0..4695ca717706 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -242,6 +242,6 @@ MACHINE_START(NOKIA770, "Nokia 770")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= omap_nokia770_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index 872ea47cd28a..a7ce69286688 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -609,6 +609,6 @@ MACHINE_START(OMAP_OSK, "TI-OSK")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= osk_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c
index c33dceb46607..845a1a7aef95 100644
--- a/arch/arm/mach-omap1/board-palmte.c
+++ b/arch/arm/mach-omap1/board-palmte.c
@@ -268,6 +268,6 @@ MACHINE_START(OMAP_PALMTE, "OMAP310 based Palm Tungsten E")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= omap_palmte_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-palmtt.c b/arch/arm/mach-omap1/board-palmtt.c
index 2948b0ee4be8..65a4a3e357f2 100644
--- a/arch/arm/mach-omap1/board-palmtt.c
+++ b/arch/arm/mach-omap1/board-palmtt.c
@@ -314,6 +314,6 @@ MACHINE_START(OMAP_PALMTT, "OMAP1510 based Palm Tungsten|T")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= omap_palmtt_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-palmz71.c b/arch/arm/mach-omap1/board-palmz71.c
index 7a05895c0be3..01c970071fd8 100644
--- a/arch/arm/mach-omap1/board-palmz71.c
+++ b/arch/arm/mach-omap1/board-palmz71.c
@@ -330,6 +330,6 @@ MACHINE_START(OMAP_PALMZ71, "OMAP310 based Palm Zire71")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= omap_palmz71_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-perseus2.c b/arch/arm/mach-omap1/board-perseus2.c
index 27f8d12ec222..8b2f7127f716 100644
--- a/arch/arm/mach-omap1/board-perseus2.c
+++ b/arch/arm/mach-omap1/board-perseus2.c
@@ -326,6 +326,6 @@ MACHINE_START(OMAP_PERSEUS2, "OMAP730 Perseus2")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= omap_perseus2_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-sx1.c b/arch/arm/mach-omap1/board-sx1.c
index 20ed52ae1714..9732a98f3e06 100644
--- a/arch/arm/mach-omap1/board-sx1.c
+++ b/arch/arm/mach-omap1/board-sx1.c
@@ -407,6 +407,6 @@ MACHINE_START(SX1, "OMAP310 based Siemens SX1")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= omap_sx1_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= omap1_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/board-voiceblue.c b/arch/arm/mach-omap1/board-voiceblue.c
index abf705f49b19..6c116e1a4b01 100644
--- a/arch/arm/mach-omap1/board-voiceblue.c
+++ b/arch/arm/mach-omap1/board-voiceblue.c
@@ -289,6 +289,6 @@ MACHINE_START(VOICEBLUE, "VoiceBlue OMAP5910")
 	.init_irq	= omap1_init_irq,
 	.init_machine	= voiceblue_init,
 	.init_late	= omap1_init_late,
-	.timer		= &omap1_timer,
+	.init_time	= omap1_timer_init,
 	.restart	= voiceblue_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h
index b53e0854422f..fb18831e88aa 100644
--- a/arch/arm/mach-omap1/common.h
+++ b/arch/arm/mach-omap1/common.h
@@ -75,7 +75,7 @@ extern void __init omap_check_revision(void);
 extern void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd,
 			       unsigned int ctrl);
 
-extern struct sys_timer omap1_timer;
+extern void omap1_timer_init(void);
 #ifdef CONFIG_OMAP_32K_TIMER
 extern int omap_32k_timer_init(void);
 #else
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 4d4816fd6fc9..1d4512fdc9bc 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -236,12 +236,8 @@ static inline void omap_mpu_timer_init(void)
  * Timer initialization
  * ---------------------------------------------------------------------------
  */
-static void __init omap1_timer_init(void)
+void __init omap1_timer_init(void)
 {
 	if (omap_32k_timer_init() != 0)
 		omap_mpu_timer_init();
 }
-
-struct sys_timer omap1_timer = {
-	.init		= omap1_timer_init,
-};
diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c
index 4815ea6f8f5d..5f413968d568 100644
--- a/arch/arm/mach-omap2/board-2430sdp.c
+++ b/arch/arm/mach-omap2/board-2430sdp.c
@@ -284,6 +284,6 @@ MACHINE_START(OMAP_2430SDP, "OMAP2430 sdp2430 board")
 	.handle_irq	= omap2_intc_handle_irq,
 	.init_machine	= omap_2430sdp_init,
 	.init_late	= omap2430_init_late,
-	.timer		= &omap2_timer,
+	.init_time	= omap2_sync32k_timer_init,
 	.restart	= omap2xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c
index bb73afc9ac17..8e2513f6a282 100644
--- a/arch/arm/mach-omap2/board-3430sdp.c
+++ b/arch/arm/mach-omap2/board-3430sdp.c
@@ -597,6 +597,6 @@ MACHINE_START(OMAP_3430SDP, "OMAP3430 3430SDP board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap_3430sdp_init,
 	.init_late	= omap3430_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-3630sdp.c b/arch/arm/mach-omap2/board-3630sdp.c
index 050aaa771254..33846274bb8a 100644
--- a/arch/arm/mach-omap2/board-3630sdp.c
+++ b/arch/arm/mach-omap2/board-3630sdp.c
@@ -211,6 +211,6 @@ MACHINE_START(OMAP_3630SDP, "OMAP 3630SDP board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap_sdp_init,
 	.init_late	= omap3630_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 1cc6696594fd..f5d5f5941d7d 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -725,6 +725,6 @@ MACHINE_START(OMAP_4430SDP, "OMAP4430 4430SDP board")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= omap_4430sdp_init,
 	.init_late	= omap4430_init_late,
-	.timer		= &omap4_timer,
+	.init_time	= omap4_local_timer_init,
 	.restart	= omap44xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-am3517crane.c b/arch/arm/mach-omap2/board-am3517crane.c
index 51b96a1206d1..07f0be24a5d1 100644
--- a/arch/arm/mach-omap2/board-am3517crane.c
+++ b/arch/arm/mach-omap2/board-am3517crane.c
@@ -92,6 +92,6 @@ MACHINE_START(CRANEBOARD, "AM3517/05 CRANEBOARD")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= am3517_crane_init,
 	.init_late	= am35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c
index f81a303b87ff..6f5b2a05f4b2 100644
--- a/arch/arm/mach-omap2/board-am3517evm.c
+++ b/arch/arm/mach-omap2/board-am3517evm.c
@@ -393,6 +393,6 @@ MACHINE_START(OMAP3517EVM, "OMAP3517/AM3517 EVM")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= am3517_evm_init,
 	.init_late	= am35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c
index 5d0a61f54165..3a6ca74709ab 100644
--- a/arch/arm/mach-omap2/board-apollon.c
+++ b/arch/arm/mach-omap2/board-apollon.c
@@ -337,6 +337,6 @@ MACHINE_START(OMAP_APOLLON, "OMAP24xx Apollon")
 	.handle_irq	= omap2_intc_handle_irq,
 	.init_machine	= omap_apollon_init,
 	.init_late	= omap2420_init_late,
-	.timer		= &omap2_timer,
+	.init_time	= omap2_sync32k_timer_init,
 	.restart	= omap2xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index b3102c2f4a3c..68647c389190 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -751,7 +751,7 @@ MACHINE_START(CM_T35, "Compulab CM-T35")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= cm_t35_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
 
@@ -764,6 +764,6 @@ MACHINE_START(CM_T3730, "Compulab CM-T3730")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= cm_t3730_init,
 	.init_late     = omap3630_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-cm-t3517.c b/arch/arm/mach-omap2/board-cm-t3517.c
index ebbc2adb499e..6a9529ab95cd 100644
--- a/arch/arm/mach-omap2/board-cm-t3517.c
+++ b/arch/arm/mach-omap2/board-cm-t3517.c
@@ -297,6 +297,6 @@ MACHINE_START(CM_T3517, "Compulab CM-T3517")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= cm_t3517_init,
 	.init_late	= am35xx_init_late,
-	.timer		= &omap3_gp_timer,
+	.init_time	= omap3_gp_gptimer_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c
index 12865af25d3a..0b1d8f758088 100644
--- a/arch/arm/mach-omap2/board-devkit8000.c
+++ b/arch/arm/mach-omap2/board-devkit8000.c
@@ -643,6 +643,6 @@ MACHINE_START(DEVKIT8000, "OMAP3 Devkit8000")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= devkit8000_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_secure_timer,
+	.init_time	= omap3_secure_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 53cb380b7877..8a5f814613c6 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -65,7 +65,7 @@ DT_MACHINE_START(OMAP242X_DT, "Generic OMAP2420 (Flattened Device Tree)")
 	.init_irq	= omap_intc_of_init,
 	.handle_irq	= omap2_intc_handle_irq,
 	.init_machine	= omap_generic_init,
-	.timer		= &omap2_timer,
+	.init_time	= omap2_sync32k_timer_init,
 	.dt_compat	= omap242x_boards_compat,
 	.restart	= omap2xxx_restart,
 MACHINE_END
@@ -84,7 +84,7 @@ DT_MACHINE_START(OMAP243X_DT, "Generic OMAP2430 (Flattened Device Tree)")
 	.init_irq	= omap_intc_of_init,
 	.handle_irq	= omap2_intc_handle_irq,
 	.init_machine	= omap_generic_init,
-	.timer		= &omap2_timer,
+	.init_time	= omap2_sync32k_timer_init,
 	.dt_compat	= omap243x_boards_compat,
 	.restart	= omap2xxx_restart,
 MACHINE_END
@@ -103,7 +103,7 @@ DT_MACHINE_START(OMAP3_DT, "Generic OMAP3 (Flattened Device Tree)")
 	.init_irq	= omap_intc_of_init,
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap_generic_init,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.dt_compat	= omap3_boards_compat,
 	.restart	= omap3xxx_restart,
 MACHINE_END
@@ -120,7 +120,7 @@ DT_MACHINE_START(OMAP3_GP_DT, "Generic OMAP3-GP (Flattened Device Tree)")
 	.init_irq	= omap_intc_of_init,
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap_generic_init,
-	.timer		= &omap3_secure_timer,
+	.init_time	= omap3_secure_sync32k_timer_init,
 	.dt_compat	= omap3_gp_boards_compat,
 	.restart	= omap3xxx_restart,
 MACHINE_END
@@ -139,7 +139,7 @@ DT_MACHINE_START(AM33XX_DT, "Generic AM33XX (Flattened Device Tree)")
 	.init_irq	= omap_intc_of_init,
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap_generic_init,
-	.timer		= &omap3_am33xx_timer,
+	.init_time	= omap3_am33xx_gptimer_timer_init,
 	.dt_compat	= am33xx_boards_compat,
 MACHINE_END
 #endif
@@ -159,7 +159,7 @@ DT_MACHINE_START(OMAP4_DT, "Generic OMAP4 (Flattened Device Tree)")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= omap_generic_init,
 	.init_late	= omap4430_init_late,
-	.timer		= &omap4_timer,
+	.init_time	= omap4_local_timer_init,
 	.dt_compat	= omap4_boards_compat,
 	.restart	= omap44xx_restart,
 MACHINE_END
@@ -179,7 +179,7 @@ DT_MACHINE_START(OMAP5_DT, "Generic OMAP5 (Flattened Device Tree)")
 	.init_irq	= omap_gic_of_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= omap_generic_init,
-	.timer		= &omap5_timer,
+	.init_time	= omap5_realtime_timer_init,
 	.dt_compat	= omap5_boards_compat,
 	.restart	= omap44xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c
index 3be1311f9e33..812c829fa46f 100644
--- a/arch/arm/mach-omap2/board-h4.c
+++ b/arch/arm/mach-omap2/board-h4.c
@@ -342,6 +342,6 @@ MACHINE_START(OMAP_H4, "OMAP2420 H4 board")
 	.handle_irq	= omap2_intc_handle_irq,
 	.init_machine	= omap_h4_init,
 	.init_late	= omap2420_init_late,
-	.timer		= &omap2_timer,
+	.init_time	= omap2_sync32k_timer_init,
 	.restart	= omap2xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c
index 0f24cb84ba5a..5b447649f5a0 100644
--- a/arch/arm/mach-omap2/board-igep0020.c
+++ b/arch/arm/mach-omap2/board-igep0020.c
@@ -655,7 +655,7 @@ MACHINE_START(IGEP0020, "IGEP v2 board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= igep_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
 
@@ -668,6 +668,6 @@ MACHINE_START(IGEP0030, "IGEP OMAP3 module")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= igep_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c
index 0869f4f3d3e1..ff440c0d04dd 100644
--- a/arch/arm/mach-omap2/board-ldp.c
+++ b/arch/arm/mach-omap2/board-ldp.c
@@ -435,6 +435,6 @@ MACHINE_START(OMAP_LDP, "OMAP LDP board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap_ldp_init,
 	.init_late	= omap3430_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 0abb30fe399c..f6eeb87e4e95 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -731,7 +731,7 @@ MACHINE_START(NOKIA_N800, "Nokia N800")
 	.handle_irq	= omap2_intc_handle_irq,
 	.init_machine	= n8x0_init_machine,
 	.init_late	= omap2420_init_late,
-	.timer		= &omap2_timer,
+	.init_time	= omap2_sync32k_timer_init,
 	.restart	= omap2xxx_restart,
 MACHINE_END
 
@@ -744,7 +744,7 @@ MACHINE_START(NOKIA_N810, "Nokia N810")
 	.handle_irq	= omap2_intc_handle_irq,
 	.init_machine	= n8x0_init_machine,
 	.init_late	= omap2420_init_late,
-	.timer		= &omap2_timer,
+	.init_time	= omap2_sync32k_timer_init,
 	.restart	= omap2xxx_restart,
 MACHINE_END
 
@@ -757,6 +757,6 @@ MACHINE_START(NOKIA_N810_WIMAX, "Nokia N810 WiMAX")
 	.handle_irq	= omap2_intc_handle_irq,
 	.init_machine	= n8x0_init_machine,
 	.init_late	= omap2420_init_late,
-	.timer		= &omap2_timer,
+	.init_time	= omap2_sync32k_timer_init,
 	.restart	= omap2xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index 22c483d5dfa8..b81b4585f46f 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -544,6 +544,6 @@ MACHINE_START(OMAP3_BEAGLE, "OMAP3 Beagle Board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap3_beagle_init,
 	.init_late	= omap3_init_late,
-	.timer		= &omap3_secure_timer,
+	.init_time	= omap3_secure_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 3985f35aee06..f2f636b19762 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -757,6 +757,6 @@ MACHINE_START(OMAP3EVM, "OMAP3 EVM")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap3_evm_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-omap3logic.c b/arch/arm/mach-omap2/board-omap3logic.c
index 2a065ba6eb58..0fba43a9b07d 100644
--- a/arch/arm/mach-omap2/board-omap3logic.c
+++ b/arch/arm/mach-omap2/board-omap3logic.c
@@ -231,7 +231,7 @@ MACHINE_START(OMAP3_TORPEDO, "Logic OMAP3 Torpedo board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap3logic_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
 
@@ -244,6 +244,6 @@ MACHINE_START(OMAP3530_LV_SOM, "OMAP Logic 3530 LV SOM board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap3logic_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index a53a6683c1b8..12e181689340 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -618,6 +618,6 @@ MACHINE_START(OMAP3_PANDORA, "Pandora Handheld Console")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap3pandora_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c
index 53a6cbcf9747..13ee40545604 100644
--- a/arch/arm/mach-omap2/board-omap3stalker.c
+++ b/arch/arm/mach-omap2/board-omap3stalker.c
@@ -427,6 +427,6 @@ MACHINE_START(SBC3530, "OMAP3 STALKER")
 	.handle_irq		= omap3_intc_handle_irq,
 	.init_machine		= omap3_stalker_init,
 	.init_late		= omap35xx_init_late,
-	.timer			= &omap3_secure_timer,
+	.init_time		= omap3_secure_sync32k_timer_init,
 	.restart		= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-omap3touchbook.c b/arch/arm/mach-omap2/board-omap3touchbook.c
index 263cb9cfbf37..36c455c85ed9 100644
--- a/arch/arm/mach-omap2/board-omap3touchbook.c
+++ b/arch/arm/mach-omap2/board-omap3touchbook.c
@@ -386,6 +386,6 @@ MACHINE_START(TOUCHBOOK, "OMAP3 touchbook Board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap3_touchbook_init,
 	.init_late	= omap3430_init_late,
-	.timer		= &omap3_secure_timer,
+	.init_time	= omap3_secure_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 5c8e9cee2c2e..ed8240c1a9b9 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -456,6 +456,6 @@ MACHINE_START(OMAP4_PANDA, "OMAP4 Panda board")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= omap4_panda_init,
 	.init_late	= omap4430_init_late,
-	.timer		= &omap4_timer,
+	.init_time	= omap4_local_timer_init,
 	.restart	= omap44xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c
index c8fde3e56441..233a37d541c3 100644
--- a/arch/arm/mach-omap2/board-overo.c
+++ b/arch/arm/mach-omap2/board-overo.c
@@ -551,6 +551,6 @@ MACHINE_START(OVERO, "Gumstix Overo")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= overo_init,
 	.init_late	= omap35xx_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-rm680.c b/arch/arm/mach-omap2/board-rm680.c
index 0c777b75e484..386a2ddc1173 100644
--- a/arch/arm/mach-omap2/board-rm680.c
+++ b/arch/arm/mach-omap2/board-rm680.c
@@ -147,7 +147,7 @@ MACHINE_START(NOKIA_RM680, "Nokia RM-680 board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= rm680_init,
 	.init_late	= omap3630_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
 
@@ -160,6 +160,6 @@ MACHINE_START(NOKIA_RM696, "Nokia RM-696 board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= rm680_init,
 	.init_late	= omap3630_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index d0374ea2dfb0..f7c4616cbb60 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -123,6 +123,6 @@ MACHINE_START(NOKIA_RX51, "Nokia RX-51 board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= rx51_init,
 	.init_late	= omap3430_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/board-ti8168evm.c b/arch/arm/mach-omap2/board-ti8168evm.c
index 1a3e056d63a7..6273c286e1d8 100644
--- a/arch/arm/mach-omap2/board-ti8168evm.c
+++ b/arch/arm/mach-omap2/board-ti8168evm.c
@@ -43,7 +43,7 @@ MACHINE_START(TI8168EVM, "ti8168evm")
 	.map_io		= ti81xx_map_io,
 	.init_early	= ti81xx_init_early,
 	.init_irq	= ti81xx_init_irq,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.init_machine	= ti81xx_evm_init,
 	.init_late	= ti81xx_init_late,
 	.restart	= omap44xx_restart,
@@ -55,7 +55,7 @@ MACHINE_START(TI8148EVM, "ti8148evm")
 	.map_io		= ti81xx_map_io,
 	.init_early	= ti81xx_init_early,
 	.init_irq	= ti81xx_init_irq,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.init_machine	= ti81xx_evm_init,
 	.init_late	= ti81xx_init_late,
 	.restart	= omap44xx_restart,
diff --git a/arch/arm/mach-omap2/board-zoom.c b/arch/arm/mach-omap2/board-zoom.c
index d7fa31e67238..d257cf1e0abe 100644
--- a/arch/arm/mach-omap2/board-zoom.c
+++ b/arch/arm/mach-omap2/board-zoom.c
@@ -137,7 +137,7 @@ MACHINE_START(OMAP_ZOOM2, "OMAP Zoom2 board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap_zoom_init,
 	.init_late	= omap3430_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
 
@@ -150,6 +150,6 @@ MACHINE_START(OMAP_ZOOM3, "OMAP Zoom3 board")
 	.handle_irq	= omap3_intc_handle_irq,
 	.init_machine	= omap_zoom_init,
 	.init_late	= omap3630_init_late,
-	.timer		= &omap3_timer,
+	.init_time	= omap3_sync32k_timer_init,
 	.restart	= omap3xxx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 948bcaa82eb6..b4350274361b 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -79,13 +79,13 @@ static inline int omap_mux_late_init(void)
 
 extern void omap2_init_common_infrastructure(void);
 
-extern struct sys_timer omap2_timer;
-extern struct sys_timer omap3_timer;
-extern struct sys_timer omap3_secure_timer;
-extern struct sys_timer omap3_gp_timer;
-extern struct sys_timer omap3_am33xx_timer;
-extern struct sys_timer omap4_timer;
-extern struct sys_timer omap5_timer;
+extern void omap2_sync32k_timer_init(void);
+extern void omap3_sync32k_timer_init(void);
+extern void omap3_secure_sync32k_timer_init(void);
+extern void omap3_gp_gptimer_timer_init(void);
+extern void omap3_am33xx_gptimer_timer_init(void);
+extern void omap4_local_timer_init(void);
+extern void omap5_realtime_timer_init(void);
 
 void omap2420_init_early(void);
 void omap2430_init_early(void);
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 691aa674665a..5975a42e16d4 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -556,7 +556,7 @@ static inline void __init realtime_counter_init(void)
 
 #define OMAP_SYS_GP_TIMER_INIT(name, clkev_nr, clkev_src, clkev_prop,	\
 			       clksrc_nr, clksrc_src)			\
-static void __init omap##name##_gptimer_timer_init(void)		\
+void __init omap##name##_gptimer_timer_init(void)			\
 {									\
 	omap_dmtimer_init();						\
 	omap2_gp_clockevent_init((clkev_nr), clkev_src, clkev_prop);	\
@@ -565,7 +565,7 @@ static void __init omap##name##_gptimer_timer_init(void)		\
 
 #define OMAP_SYS_32K_TIMER_INIT(name, clkev_nr, clkev_src, clkev_prop,	\
 				clksrc_nr, clksrc_src)			\
-static void __init omap##name##_sync32k_timer_init(void)		\
+void __init omap##name##_sync32k_timer_init(void)		\
 {									\
 	omap_dmtimer_init();						\
 	omap2_gp_clockevent_init((clkev_nr), clkev_src, clkev_prop);	\
@@ -576,33 +576,23 @@ static void __init omap##name##_sync32k_timer_init(void)		\
 		omap2_sync32k_clocksource_init();			\
 }
 
-#define OMAP_SYS_TIMER(name, clksrc)					\
-struct sys_timer omap##name##_timer = {					\
-	.init	= omap##name##_##clksrc##_timer_init,			\
-};
-
 #ifdef CONFIG_ARCH_OMAP2
 OMAP_SYS_32K_TIMER_INIT(2, 1, OMAP2_32K_SOURCE, "ti,timer-alwon",
 			2, OMAP2_MPU_SOURCE);
-OMAP_SYS_TIMER(2, sync32k);
 #endif /* CONFIG_ARCH_OMAP2 */
 
 #ifdef CONFIG_ARCH_OMAP3
 OMAP_SYS_32K_TIMER_INIT(3, 1, OMAP3_32K_SOURCE, "ti,timer-alwon",
 			2, OMAP3_MPU_SOURCE);
-OMAP_SYS_TIMER(3, sync32k);
 OMAP_SYS_32K_TIMER_INIT(3_secure, 12, OMAP3_32K_SOURCE, "ti,timer-secure",
 			2, OMAP3_MPU_SOURCE);
-OMAP_SYS_TIMER(3_secure, sync32k);
 OMAP_SYS_GP_TIMER_INIT(3_gp, 1, OMAP3_MPU_SOURCE, "ti,timer-alwon",
 		       2, OMAP3_MPU_SOURCE);
-OMAP_SYS_TIMER(3_gp, gptimer);
 #endif /* CONFIG_ARCH_OMAP3 */
 
 #ifdef CONFIG_SOC_AM33XX
 OMAP_SYS_GP_TIMER_INIT(3_am33xx, 1, OMAP4_MPU_SOURCE, "ti,timer-alwon",
 		       2, OMAP4_MPU_SOURCE);
-OMAP_SYS_TIMER(3_am33xx, gptimer);
 #endif /* CONFIG_SOC_AM33XX */
 
 #ifdef CONFIG_ARCH_OMAP4
@@ -610,7 +600,7 @@ OMAP_SYS_32K_TIMER_INIT(4, 1, OMAP4_32K_SOURCE, "ti,timer-alwon",
 			2, OMAP4_MPU_SOURCE);
 #ifdef CONFIG_LOCAL_TIMERS
 static DEFINE_TWD_LOCAL_TIMER(twd_local_timer, OMAP44XX_LOCAL_TWD_BASE, 29);
-static void __init omap4_local_timer_init(void)
+void __init omap4_local_timer_init(void)
 {
 	omap4_sync32k_timer_init();
 	/* Local timers are not supprted on OMAP4430 ES1.0 */
@@ -628,18 +618,17 @@ static void __init omap4_local_timer_init(void)
 	}
 }
 #else /* CONFIG_LOCAL_TIMERS */
-static void __init omap4_local_timer_init(void)
+void __init omap4_local_timer_init(void)
 {
 	omap4_sync32k_timer_init();
 }
 #endif /* CONFIG_LOCAL_TIMERS */
-OMAP_SYS_TIMER(4, local);
 #endif /* CONFIG_ARCH_OMAP4 */
 
 #ifdef CONFIG_SOC_OMAP5
 OMAP_SYS_32K_TIMER_INIT(5, 1, OMAP4_32K_SOURCE, "ti,timer-alwon",
 			2, OMAP4_MPU_SOURCE);
-static void __init omap5_realtime_timer_init(void)
+void __init omap5_realtime_timer_init(void)
 {
 	int err;
 
@@ -650,7 +639,6 @@ static void __init omap5_realtime_timer_init(void)
 	if (err)
 		pr_err("%s: arch_timer_register failed %d\n", __func__, err);
 }
-OMAP_SYS_TIMER(5, realtime);
 #endif /* CONFIG_SOC_OMAP5 */
 
 /**
diff --git a/arch/arm/mach-orion5x/board-dt.c b/arch/arm/mach-orion5x/board-dt.c
index 32e5c211a89b..35a8014529ca 100644
--- a/arch/arm/mach-orion5x/board-dt.c
+++ b/arch/arm/mach-orion5x/board-dt.c
@@ -72,7 +72,7 @@ DT_MACHINE_START(ORION5X_DT, "Marvell Orion5x (Flattened Device Tree)")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion_dt_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.init_machine	= orion5x_dt_init,
 	.restart	= orion5x_restart,
 	.dt_compat	= orion5x_dt_compat,
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 550f92320afb..d068f1431c40 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -217,7 +217,7 @@ int __init orion5x_find_tclk(void)
 	return 166666667;
 }
 
-static void __init orion5x_timer_init(void)
+void __init orion5x_timer_init(void)
 {
 	orion5x_tclk = orion5x_find_tclk();
 
@@ -225,10 +225,6 @@ static void __init orion5x_timer_init(void)
 			IRQ_ORION5X_BRIDGE, orion5x_tclk);
 }
 
-struct sys_timer orion5x_timer = {
-	.init = orion5x_timer_init,
-};
-
 
 /*****************************************************************************
  * General
diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h
index 7db5cdd9c4b7..e60345760283 100644
--- a/arch/arm/mach-orion5x/common.h
+++ b/arch/arm/mach-orion5x/common.h
@@ -15,7 +15,7 @@ void orion5x_init(void);
 void orion5x_id(u32 *dev, u32 *rev, char **dev_name);
 void clk_init(void);
 extern int orion5x_tclk;
-extern struct sys_timer orion5x_timer;
+extern void orion5x_timer_init(void);
 
 /*
  * Enumerations and functions for Orion windows mapping. Used by Orion core
diff --git a/arch/arm/mach-orion5x/d2net-setup.c b/arch/arm/mach-orion5x/d2net-setup.c
index e3629c063df2..57d0af74874d 100644
--- a/arch/arm/mach-orion5x/d2net-setup.c
+++ b/arch/arm/mach-orion5x/d2net-setup.c
@@ -342,7 +342,7 @@ MACHINE_START(D2NET, "LaCie d2 Network")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
@@ -355,7 +355,7 @@ MACHINE_START(BIGDISK, "LaCie Big Disk Network")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/db88f5281-setup.c b/arch/arm/mach-orion5x/db88f5281-setup.c
index 41fe2b1ff47c..76665640087b 100644
--- a/arch/arm/mach-orion5x/db88f5281-setup.c
+++ b/arch/arm/mach-orion5x/db88f5281-setup.c
@@ -362,6 +362,6 @@ MACHINE_START(DB88F5281, "Marvell Orion-2 Development Board")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c
index e533588880ff..6eb1732757fd 100644
--- a/arch/arm/mach-orion5x/dns323-setup.c
+++ b/arch/arm/mach-orion5x/dns323-setup.c
@@ -714,7 +714,7 @@ MACHINE_START(DNS323, "D-Link DNS-323")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/kurobox_pro-setup.c b/arch/arm/mach-orion5x/kurobox_pro-setup.c
index f1ae10ae5bd4..b98403526218 100644
--- a/arch/arm/mach-orion5x/kurobox_pro-setup.c
+++ b/arch/arm/mach-orion5x/kurobox_pro-setup.c
@@ -383,7 +383,7 @@ MACHINE_START(KUROBOX_PRO, "Buffalo/Revogear Kurobox Pro")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
@@ -397,7 +397,7 @@ MACHINE_START(LINKSTATION_PRO, "Buffalo Linkstation Pro/Live")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/ls-chl-setup.c b/arch/arm/mach-orion5x/ls-chl-setup.c
index 0c9e413b5805..044da5b6a6ae 100644
--- a/arch/arm/mach-orion5x/ls-chl-setup.c
+++ b/arch/arm/mach-orion5x/ls-chl-setup.c
@@ -322,7 +322,7 @@ MACHINE_START(LINKSTATION_LSCHL, "Buffalo Linkstation LiveV3 (LS-CHL)")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/ls_hgl-setup.c b/arch/arm/mach-orion5x/ls_hgl-setup.c
index c1b5d8a58037..d49f93423f52 100644
--- a/arch/arm/mach-orion5x/ls_hgl-setup.c
+++ b/arch/arm/mach-orion5x/ls_hgl-setup.c
@@ -269,7 +269,7 @@ MACHINE_START(LINKSTATION_LS_HGL, "Buffalo Linkstation LS-HGL")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/lsmini-setup.c b/arch/arm/mach-orion5x/lsmini-setup.c
index 949eaa8f12e3..8e3965c6c0fe 100644
--- a/arch/arm/mach-orion5x/lsmini-setup.c
+++ b/arch/arm/mach-orion5x/lsmini-setup.c
@@ -271,7 +271,7 @@ MACHINE_START(LINKSTATION_MINI, "Buffalo Linkstation Mini")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/mss2-setup.c b/arch/arm/mach-orion5x/mss2-setup.c
index 1c16d045333e..0ec94a1f2b16 100644
--- a/arch/arm/mach-orion5x/mss2-setup.c
+++ b/arch/arm/mach-orion5x/mss2-setup.c
@@ -265,7 +265,7 @@ MACHINE_START(MSS2, "Maxtor Shared Storage II")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/mv2120-setup.c b/arch/arm/mach-orion5x/mv2120-setup.c
index c87fde4deeca..18143f2a9093 100644
--- a/arch/arm/mach-orion5x/mv2120-setup.c
+++ b/arch/arm/mach-orion5x/mv2120-setup.c
@@ -233,7 +233,7 @@ MACHINE_START(MV2120, "HP Media Vault mv2120")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/net2big-setup.c b/arch/arm/mach-orion5x/net2big-setup.c
index 3506f16c0bf2..282e503b003e 100644
--- a/arch/arm/mach-orion5x/net2big-setup.c
+++ b/arch/arm/mach-orion5x/net2big-setup.c
@@ -425,7 +425,7 @@ MACHINE_START(NET2BIG, "LaCie 2Big Network")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
index 9b1c95310291..d6e72f672afb 100644
--- a/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5181l-fxo-setup.c
@@ -171,7 +171,7 @@ MACHINE_START(RD88F5181L_FXO, "Marvell Orion-VoIP FXO Reference Design")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
index 51ba2b81a10b..c8b7913310e5 100644
--- a/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5181l-ge-setup.c
@@ -183,7 +183,7 @@ MACHINE_START(RD88F5181L_GE, "Marvell Orion-VoIP GE Reference Design")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/rd88f5182-setup.c b/arch/arm/mach-orion5x/rd88f5182-setup.c
index 0a56b9444f1b..f9e156725d7c 100644
--- a/arch/arm/mach-orion5x/rd88f5182-setup.c
+++ b/arch/arm/mach-orion5x/rd88f5182-setup.c
@@ -281,6 +281,6 @@ MACHINE_START(RD88F5182, "Marvell Orion-NAS Reference Design")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
index ed50910b08a4..78a1e6ab1b9d 100644
--- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
+++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c
@@ -123,7 +123,7 @@ MACHINE_START(RD88F6183AP_GE, "Marvell Orion-1-90 AP GE Reference Design")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/terastation_pro2-setup.c b/arch/arm/mach-orion5x/terastation_pro2-setup.c
index 90e571dc4deb..acc0877ec1c9 100644
--- a/arch/arm/mach-orion5x/terastation_pro2-setup.c
+++ b/arch/arm/mach-orion5x/terastation_pro2-setup.c
@@ -361,7 +361,7 @@ MACHINE_START(TERASTATION_PRO2, "Buffalo Terastation Pro II/Live")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/ts209-setup.c b/arch/arm/mach-orion5x/ts209-setup.c
index b184f680e0db..9c17f0c2b488 100644
--- a/arch/arm/mach-orion5x/ts209-setup.c
+++ b/arch/arm/mach-orion5x/ts209-setup.c
@@ -326,7 +326,7 @@ MACHINE_START(TS209, "QNAP TS-109/TS-209")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/ts409-setup.c b/arch/arm/mach-orion5x/ts409-setup.c
index a5c2e64c4ece..8cc5ab6c503e 100644
--- a/arch/arm/mach-orion5x/ts409-setup.c
+++ b/arch/arm/mach-orion5x/ts409-setup.c
@@ -315,7 +315,7 @@ MACHINE_START(TS409, "QNAP TS-409")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c
index b0727dcd1ef9..e960855d32ac 100644
--- a/arch/arm/mach-orion5x/ts78xx-setup.c
+++ b/arch/arm/mach-orion5x/ts78xx-setup.c
@@ -619,6 +619,6 @@ MACHINE_START(TS78XX, "Technologic Systems TS-78xx SBC")
 	.map_io		= ts78xx_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/wnr854t-setup.c b/arch/arm/mach-orion5x/wnr854t-setup.c
index 754c12b6abf0..66552ca7e05d 100644
--- a/arch/arm/mach-orion5x/wnr854t-setup.c
+++ b/arch/arm/mach-orion5x/wnr854t-setup.c
@@ -176,7 +176,7 @@ MACHINE_START(WNR854T, "Netgear WNR854T")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-orion5x/wrt350n-v2-setup.c b/arch/arm/mach-orion5x/wrt350n-v2-setup.c
index 45c21251eb1e..2c5408e2e689 100644
--- a/arch/arm/mach-orion5x/wrt350n-v2-setup.c
+++ b/arch/arm/mach-orion5x/wrt350n-v2-setup.c
@@ -264,7 +264,7 @@ MACHINE_START(WRT350N_V2, "Linksys WRT350N v2")
 	.map_io		= orion5x_map_io,
 	.init_early	= orion5x_init_early,
 	.init_irq	= orion5x_init_irq,
-	.timer		= &orion5x_timer,
+	.init_time	= orion5x_timer_init,
 	.fixup		= tag_fixup_mem32,
 	.restart	= orion5x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-picoxcell/common.c b/arch/arm/mach-picoxcell/common.c
index f6c0849af5e9..518c59bdbcda 100644
--- a/arch/arm/mach-picoxcell/common.c
+++ b/arch/arm/mach-picoxcell/common.c
@@ -99,7 +99,7 @@ DT_MACHINE_START(PICOXCELL, "Picochip picoXcell")
 	.nr_irqs	= NR_IRQS_LEGACY,
 	.init_irq	= picoxcell_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &dw_apb_timer,
+	.init_time	= dw_apb_timer_init,
 	.init_machine	= picoxcell_init_machine,
 	.dt_compat	= picoxcell_dt_match,
 	.restart	= picoxcell_wdt_restart,
diff --git a/arch/arm/mach-picoxcell/common.h b/arch/arm/mach-picoxcell/common.h
index a65cb02f84c8..481b42a4ef15 100644
--- a/arch/arm/mach-picoxcell/common.h
+++ b/arch/arm/mach-picoxcell/common.h
@@ -12,6 +12,6 @@
 
 #include <asm/mach/time.h>
 
-extern struct sys_timer dw_apb_timer;
+extern void dw_apb_timer_init(void);
 
 #endif /* __PICOXCELL_COMMON_H__ */
diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c
index f25a54194639..ed3570e5eb8f 100644
--- a/arch/arm/mach-prima2/common.c
+++ b/arch/arm/mach-prima2/common.c
@@ -40,7 +40,7 @@ DT_MACHINE_START(PRIMA2_DT, "Generic PRIMA2 (Flattened Device Tree)")
 	/* Maintainer: Barry Song <baohua.song@csr.com> */
 	.map_io         = sirfsoc_map_lluart,
 	.init_irq	= sirfsoc_of_irq_init,
-	.timer		= &sirfsoc_timer,
+	.init_time	= sirfsoc_timer_init,
 	.dma_zone_size	= SZ_256M,
 	.init_machine	= sirfsoc_mach_init,
 	.init_late	= sirfsoc_init_late,
diff --git a/arch/arm/mach-prima2/common.h b/arch/arm/mach-prima2/common.h
index 60d826fc2185..9c75f124e3cf 100644
--- a/arch/arm/mach-prima2/common.h
+++ b/arch/arm/mach-prima2/common.h
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <asm/mach/time.h>
 
-extern struct sys_timer sirfsoc_timer;
+extern void sirfsoc_timer_init(void);
 
 extern void __init sirfsoc_of_irq_init(void);
 extern void __init sirfsoc_of_clk_init(void);
diff --git a/arch/arm/mach-prima2/timer.c b/arch/arm/mach-prima2/timer.c
index d95bf252f694..8c732a5beb7f 100644
--- a/arch/arm/mach-prima2/timer.c
+++ b/arch/arm/mach-prima2/timer.c
@@ -187,7 +187,7 @@ static void __init sirfsoc_clockevent_init(void)
 }
 
 /* initialize the kernel jiffy timer source */
-static void __init sirfsoc_timer_init(void)
+void __init sirfsoc_timer_init(void)
 {
 	unsigned long rate;
 	struct clk *clk;
@@ -226,7 +226,7 @@ static struct of_device_id timer_ids[] = {
 	{},
 };
 
-static void __init sirfsoc_of_timer_map(void)
+void __init sirfsoc_of_timer_map(void)
 {
 	struct device_node *np;
 	const unsigned int *intspec;
@@ -245,7 +245,3 @@ static void __init sirfsoc_of_timer_map(void)
 
 	of_node_put(np);
 }
-
-struct sys_timer sirfsoc_timer = {
-	.init = sirfsoc_timer_init,
-};
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index 208229342514..2f71b3fbd319 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -822,7 +822,7 @@ MACHINE_START(BALLOON3, "Balloon3")
 	.nr_irqs	= BALLOON3_NR_IRQS,
 	.init_irq	= balloon3_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= balloon3_init,
 	.atag_offset	= 0x100,
 	.restart	= pxa_restart,
diff --git a/arch/arm/mach-pxa/capc7117.c b/arch/arm/mach-pxa/capc7117.c
index 9a8760b72913..c092730749b9 100644
--- a/arch/arm/mach-pxa/capc7117.c
+++ b/arch/arm/mach-pxa/capc7117.c
@@ -153,7 +153,7 @@ MACHINE_START(CAPC7117,
 	.nr_irqs = PXA_NR_IRQS,
 	.init_irq = pxa3xx_init_irq,
 	.handle_irq = pxa3xx_handle_irq,
-	.timer = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine = capc7117_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c
index a103c8ffea9f..bb99f59a36d8 100644
--- a/arch/arm/mach-pxa/cm-x2xx.c
+++ b/arch/arm/mach-pxa/cm-x2xx.c
@@ -520,7 +520,7 @@ MACHINE_START(ARMCORE, "Compulab CM-X2XX")
 	.init_irq	= cmx2xx_init_irq,
 	/* NOTE: pxa25x_handle_irq() works on PXA27x w/o camera support */
 	.handle_irq	= pxa25x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= cmx2xx_init,
 #ifdef CONFIG_PCI
 	.dma_zone_size	= SZ_64M,
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index cc2b23afcaaf..8091aac89edf 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -854,7 +854,7 @@ MACHINE_START(CM_X300, "CM-X300 module")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= cm_x300_init,
 	.fixup		= cm_x300_fixup,
 	.restart	= pxa_restart,
diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c
index b2f227d36125..5f9d9303b346 100644
--- a/arch/arm/mach-pxa/colibri-pxa270.c
+++ b/arch/arm/mach-pxa/colibri-pxa270.c
@@ -313,7 +313,7 @@ MACHINE_START(COLIBRI, "Toradex Colibri PXA270")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 
@@ -324,7 +324,7 @@ MACHINE_START(INCOME, "Income s.r.o. SH-Dmaster PXA270 SBC")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 
diff --git a/arch/arm/mach-pxa/colibri-pxa300.c b/arch/arm/mach-pxa/colibri-pxa300.c
index a9c9c163dd95..f1a1ac1fbd85 100644
--- a/arch/arm/mach-pxa/colibri-pxa300.c
+++ b/arch/arm/mach-pxa/colibri-pxa300.c
@@ -189,7 +189,7 @@ MACHINE_START(COLIBRI300, "Toradex Colibri PXA300")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 
diff --git a/arch/arm/mach-pxa/colibri-pxa320.c b/arch/arm/mach-pxa/colibri-pxa320.c
index 25515cd7e68f..f6cc8b0ab82f 100644
--- a/arch/arm/mach-pxa/colibri-pxa320.c
+++ b/arch/arm/mach-pxa/colibri-pxa320.c
@@ -259,7 +259,7 @@ MACHINE_START(COLIBRI320, "Toradex Colibri PXA320")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index 7c83f52c549c..a5b8fead7d61 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -733,7 +733,7 @@ MACHINE_START(CORGI, "SHARP Corgi")
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
 	.init_machine	= corgi_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= corgi_restart,
 MACHINE_END
 #endif
@@ -746,7 +746,7 @@ MACHINE_START(SHEPHERD, "SHARP Shepherd")
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
 	.init_machine	= corgi_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= corgi_restart,
 MACHINE_END
 #endif
@@ -759,7 +759,7 @@ MACHINE_START(HUSKY, "SHARP Husky")
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
 	.init_machine	= corgi_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= corgi_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c
index 7039f44b3647..fadfff8feaef 100644
--- a/arch/arm/mach-pxa/csb726.c
+++ b/arch/arm/mach-pxa/csb726.c
@@ -278,6 +278,6 @@ MACHINE_START(CSB726, "Cogent CSB726")
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
 	.init_machine   = csb726_init,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 1b6411439ec8..446563a7d1ad 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -1298,7 +1298,7 @@ MACHINE_START(EM_X270, "Compulab EM-X270")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= em_x270_init,
 	.restart	= pxa_restart,
 MACHINE_END
@@ -1309,7 +1309,7 @@ MACHINE_START(EXEDA, "Compulab eXeda")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= em_x270_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/eseries.c b/arch/arm/mach-pxa/eseries.c
index be2ee9bf5c6e..8280ebcaab9f 100644
--- a/arch/arm/mach-pxa/eseries.c
+++ b/arch/arm/mach-pxa/eseries.c
@@ -195,7 +195,7 @@ MACHINE_START(E330, "Toshiba e330")
 	.handle_irq	= pxa25x_handle_irq,
 	.fixup		= eseries_fixup,
 	.init_machine	= e330_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
@@ -246,7 +246,7 @@ MACHINE_START(E350, "Toshiba e350")
 	.handle_irq	= pxa25x_handle_irq,
 	.fixup		= eseries_fixup,
 	.init_machine	= e350_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
@@ -370,7 +370,7 @@ MACHINE_START(E400, "Toshiba e400")
 	.handle_irq	= pxa25x_handle_irq,
 	.fixup		= eseries_fixup,
 	.init_machine	= e400_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
@@ -566,7 +566,7 @@ MACHINE_START(E740, "Toshiba e740")
 	.handle_irq	= pxa25x_handle_irq,
 	.fixup		= eseries_fixup,
 	.init_machine	= e740_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
@@ -765,7 +765,7 @@ MACHINE_START(E750, "Toshiba e750")
 	.handle_irq	= pxa25x_handle_irq,
 	.fixup		= eseries_fixup,
 	.init_machine	= e750_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
@@ -977,7 +977,7 @@ MACHINE_START(E800, "Toshiba e800")
 	.handle_irq	= pxa25x_handle_irq,
 	.fixup		= eseries_fixup,
 	.init_machine	= e800_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index dc58fa0edb66..dca10709be8f 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -802,7 +802,7 @@ MACHINE_START(EZX_A780, "Motorola EZX A780")
 	.nr_irqs	= EZX_NR_IRQS,
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = a780_init,
 	.restart	= pxa_restart,
 MACHINE_END
@@ -869,7 +869,7 @@ MACHINE_START(EZX_E680, "Motorola EZX E680")
 	.nr_irqs	= EZX_NR_IRQS,
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = e680_init,
 	.restart	= pxa_restart,
 MACHINE_END
@@ -936,7 +936,7 @@ MACHINE_START(EZX_A1200, "Motorola EZX A1200")
 	.nr_irqs	= EZX_NR_IRQS,
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = a1200_init,
 	.restart	= pxa_restart,
 MACHINE_END
@@ -1128,7 +1128,7 @@ MACHINE_START(EZX_A910, "Motorola EZX A910")
 	.nr_irqs	= EZX_NR_IRQS,
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = a910_init,
 	.restart	= pxa_restart,
 MACHINE_END
@@ -1195,7 +1195,7 @@ MACHINE_START(EZX_E6, "Motorola EZX E6")
 	.nr_irqs	= EZX_NR_IRQS,
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = e6_init,
 	.restart	= pxa_restart,
 MACHINE_END
@@ -1236,7 +1236,7 @@ MACHINE_START(EZX_E2, "Motorola EZX E2")
 	.nr_irqs	= EZX_NR_IRQS,
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = e2_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h
index 42d5cca66257..fd7ea39b78c0 100644
--- a/arch/arm/mach-pxa/generic.h
+++ b/arch/arm/mach-pxa/generic.h
@@ -10,9 +10,8 @@
  */
 
 struct irq_data;
-struct sys_timer;
 
-extern struct sys_timer pxa_timer;
+extern void pxa_timer_init(void);
 
 extern void __init pxa_map_io(void);
 
diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c
index 60755a6bb1c6..00b92dad7b81 100644
--- a/arch/arm/mach-pxa/gumstix.c
+++ b/arch/arm/mach-pxa/gumstix.c
@@ -238,7 +238,7 @@ MACHINE_START(GUMSTIX, "Gumstix")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= gumstix_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/h5000.c b/arch/arm/mach-pxa/h5000.c
index e7dec589f014..875ec3351499 100644
--- a/arch/arm/mach-pxa/h5000.c
+++ b/arch/arm/mach-pxa/h5000.c
@@ -208,7 +208,7 @@ MACHINE_START(H5400, "HP iPAQ H5000")
 	.nr_irqs = PXA_NR_IRQS,
 	.init_irq = pxa25x_init_irq,
 	.handle_irq = pxa25x_handle_irq,
-	.timer = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine = h5000_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/himalaya.c b/arch/arm/mach-pxa/himalaya.c
index 2962de898da9..7a8d749a07b8 100644
--- a/arch/arm/mach-pxa/himalaya.c
+++ b/arch/arm/mach-pxa/himalaya.c
@@ -164,6 +164,6 @@ MACHINE_START(HIMALAYA, "HTC Himalaya")
 	.init_irq = pxa25x_init_irq,
 	.handle_irq = pxa25x_handle_irq,
 	.init_machine = himalaya_init,
-	.timer = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index e2c6391863fe..133109ec7332 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -900,6 +900,6 @@ MACHINE_START(H4700, "HP iPAQ HX4700")
 	.init_irq     = pxa27x_init_irq,
 	.handle_irq     = pxa27x_handle_irq,
 	.init_machine = hx4700_init,
-	.timer        = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 1d02eabc9c65..fe31bfcbb8df 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -196,7 +196,7 @@ MACHINE_START(ICONTROL, "iControl/SafeTcam boards using Embedian MXM-8x10 CoM")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= icontrol_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index 64507cdd2e8f..343c4e3a7c5d 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -279,7 +279,7 @@ MACHINE_START(PXA_IDP, "Vibren PXA255 IDP")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= idp_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 402874f9021f..e848c4607baf 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -443,7 +443,7 @@ MACHINE_START(LITTLETON, "Marvell Form Factor Development Platform (aka Littleto
 	.nr_irqs	= LITTLETON_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= littleton_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
index 1a63eaa89867..1255ee00f3d1 100644
--- a/arch/arm/mach-pxa/lpd270.c
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -503,7 +503,7 @@ MACHINE_START(LOGICPD_PXA270, "LogicPD PXA270 Card Engine")
 	.nr_irqs	= LPD270_NR_IRQS,
 	.init_irq	= lpd270_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= lpd270_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 553056d9a3c5..d8a1be619f21 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -650,7 +650,7 @@ MACHINE_START(LUBBOCK, "Intel DBPXA250 Development Platform (aka Lubbock)")
 	.nr_irqs	= LUBBOCK_NR_IRQS,
 	.init_irq	= lubbock_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= lubbock_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index f7922404d941..f44532fc648b 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -774,6 +774,6 @@ MACHINE_START(MAGICIAN, "HTC Magician")
 	.init_irq = pxa27x_init_irq,
 	.handle_irq = pxa27x_handle_irq,
 	.init_machine = magician_init,
-	.timer = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index f27a61ee7ac7..7a12c1ba90ff 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -714,7 +714,7 @@ MACHINE_START(MAINSTONE, "Intel HCDDBBVA0 Development Platform (aka Mainstone)")
 	.nr_irqs	= MAINSTONE_NR_IRQS,
 	.init_irq	= mainstone_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= mainstone_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index 2831308dba68..f8979b943cbf 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -762,6 +762,6 @@ MACHINE_START(MIOA701, "MIO A701")
 	.init_irq	= &pxa27x_init_irq,
 	.handle_irq	= &pxa27x_handle_irq,
 	.init_machine	= mioa701_machine_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= mioa701_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/mp900.c b/arch/arm/mach-pxa/mp900.c
index 152efbf093f6..854f1f562d6b 100644
--- a/arch/arm/mach-pxa/mp900.c
+++ b/arch/arm/mach-pxa/mp900.c
@@ -93,7 +93,7 @@ static void __init mp900c_init(void)
 /* Maintainer - Michael Petchkovsky <mkpetch@internode.on.net> */
 MACHINE_START(NEC_MP900, "MobilePro900/C")
 	.atag_offset	= 0x220100,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.map_io		= pxa25x_map_io,
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa25x_init_irq,
diff --git a/arch/arm/mach-pxa/palmld.c b/arch/arm/mach-pxa/palmld.c
index 8bcc96e3b0db..909b713e5789 100644
--- a/arch/arm/mach-pxa/palmld.c
+++ b/arch/arm/mach-pxa/palmld.c
@@ -347,7 +347,7 @@ MACHINE_START(PALMLD, "Palm LifeDrive")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= palmld_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/palmt5.c b/arch/arm/mach-pxa/palmt5.c
index 5ca7b904a30e..5033fd07968f 100644
--- a/arch/arm/mach-pxa/palmt5.c
+++ b/arch/arm/mach-pxa/palmt5.c
@@ -208,7 +208,7 @@ MACHINE_START(PALMT5, "Palm Tungsten|T5")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= palmt5_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c
index ca924cfedfc0..100b176f7e88 100644
--- a/arch/arm/mach-pxa/palmtc.c
+++ b/arch/arm/mach-pxa/palmtc.c
@@ -542,7 +542,7 @@ MACHINE_START(PALMTC, "Palm Tungsten|C")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= palmtc_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/palmte2.c b/arch/arm/mach-pxa/palmte2.c
index 32e0d7998355..0742721ced2d 100644
--- a/arch/arm/mach-pxa/palmte2.c
+++ b/arch/arm/mach-pxa/palmte2.c
@@ -363,7 +363,7 @@ MACHINE_START(PALMTE2, "Palm Tungsten|E2")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= palmte2_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/palmtreo.c b/arch/arm/mach-pxa/palmtreo.c
index 3f3c48f2f7ce..d17bda278782 100644
--- a/arch/arm/mach-pxa/palmtreo.c
+++ b/arch/arm/mach-pxa/palmtreo.c
@@ -451,7 +451,7 @@ MACHINE_START(TREO680, "Palm Treo 680")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = treo680_init,
 	.restart	= pxa_restart,
 MACHINE_END
@@ -465,7 +465,7 @@ MACHINE_START(CENTRO, "Palm Centro 685")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq       = pxa27x_init_irq,
 	.handle_irq       = pxa27x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= centro_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index 8b4366628a12..627c93a7364c 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -366,7 +366,7 @@ MACHINE_START(PALMTX, "Palm T|X")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= palmtx_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 8cdd4f58e253..18b7fcd98592 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -404,7 +404,7 @@ MACHINE_START(PALMZ72, "Palm Zire72")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= palmz72_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/pcm027.c b/arch/arm/mach-pxa/pcm027.c
index fe9054435b6f..69918c7e3f1f 100644
--- a/arch/arm/mach-pxa/pcm027.c
+++ b/arch/arm/mach-pxa/pcm027.c
@@ -263,7 +263,7 @@ MACHINE_START(PCM027, "Phytec Messtechnik GmbH phyCORE-PXA270")
 	.nr_irqs	= PCM027_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= pcm027_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 2910bb935c75..50ccd5f1d560 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -469,7 +469,7 @@ MACHINE_START(POODLE, "SHARP Poodle")
 	.nr_irqs	= POODLE_NR_IRQS,	/* 4 for LoCoMo */
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= poodle_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/pxa-dt.c b/arch/arm/mach-pxa/pxa-dt.c
index c9192cea0033..3835979a0dd3 100644
--- a/arch/arm/mach-pxa/pxa-dt.c
+++ b/arch/arm/mach-pxa/pxa-dt.c
@@ -55,7 +55,7 @@ DT_MACHINE_START(PXA_DT, "Marvell PXA3xx (Device Tree Support)")
 	.map_io		= pxa3xx_map_io,
 	.init_irq	= pxa3xx_dt_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 	.init_machine	= pxa3xx_dt_init,
 	.dt_compat	= pxa3xx_dt_board_compat,
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index 25b08bfa997b..af41888acbd6 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -1095,7 +1095,7 @@ MACHINE_START(RAUMFELD_RC, "Raumfeld Controller")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
@@ -1108,7 +1108,7 @@ MACHINE_START(RAUMFELD_CONNECTOR, "Raumfeld Connector")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
@@ -1121,7 +1121,7 @@ MACHINE_START(RAUMFELD_SPEAKER, "Raumfeld Speaker")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-pxa/saar.c b/arch/arm/mach-pxa/saar.c
index 08d87a5d2639..710c493eac89 100644
--- a/arch/arm/mach-pxa/saar.c
+++ b/arch/arm/mach-pxa/saar.c
@@ -601,7 +601,7 @@ MACHINE_START(SAAR, "PXA930 Handheld Platform (aka SAAR)")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq       = pxa3xx_init_irq,
 	.handle_irq       = pxa3xx_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = saar_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 2073f0e6db0d..f90aa27ad599 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -986,7 +986,7 @@ MACHINE_START(SPITZ, "SHARP Spitz")
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
 	.init_machine	= spitz_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= spitz_restart,
 MACHINE_END
 #endif
@@ -1000,7 +1000,7 @@ MACHINE_START(BORZOI, "SHARP Borzoi")
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
 	.init_machine	= spitz_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= spitz_restart,
 MACHINE_END
 #endif
@@ -1014,7 +1014,7 @@ MACHINE_START(AKITA, "SHARP Akita")
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
 	.init_machine	= spitz_init,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= spitz_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 456560b5aad4..88fde43c948c 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -1006,7 +1006,7 @@ MACHINE_START(INTELMOTE2, "IMOTE 2")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= imote2_init,
 	.atag_offset	= 0x100,
 	.restart	= pxa_restart,
@@ -1019,7 +1019,7 @@ MACHINE_START(STARGATE2, "Stargate 2")
 	.nr_irqs = STARGATE_NR_IRQS,
 	.init_irq = pxa27x_init_irq,
 	.handle_irq = pxa27x_handle_irq,
-	.timer = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine = stargate2_init,
 	.atag_offset = 0x100,
 	.restart	= pxa_restart,
diff --git a/arch/arm/mach-pxa/tavorevb.c b/arch/arm/mach-pxa/tavorevb.c
index 1a25f8a7b0ce..f55979c09a5f 100644
--- a/arch/arm/mach-pxa/tavorevb.c
+++ b/arch/arm/mach-pxa/tavorevb.c
@@ -494,7 +494,7 @@ MACHINE_START(TAVOREVB, "PXA930 Evaluation Board (aka TavorEVB)")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq       = pxa3xx_init_irq,
 	.handle_irq       = pxa3xx_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine   = tavorevb_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index ce58bc9acb14..bea19a06b2bf 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -142,7 +142,7 @@ static struct irqaction pxa_ost0_irq = {
 	.dev_id		= &ckevt_pxa_osmr0,
 };
 
-static void __init pxa_timer_init(void)
+void __init pxa_timer_init(void)
 {
 	unsigned long clock_tick_rate = get_clock_tick_rate();
 
@@ -164,7 +164,3 @@ static void __init pxa_timer_init(void)
 		clocksource_mmio_readl_up);
 	clockevents_register_device(&ckevt_pxa_osmr0);
 }
-
-struct sys_timer pxa_timer = {
-	.init		= pxa_timer_init,
-};
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index 233629edf7ee..9e7998d3635f 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -982,6 +982,6 @@ MACHINE_START(TOSA, "SHARP Tosa")
 	.init_irq       = pxa25x_init_irq,
 	.handle_irq       = pxa25x_handle_irq,
 	.init_machine   = tosa_init,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= tosa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index fbbcbed4d1d4..c58043462acd 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -561,7 +561,7 @@ MACHINE_START(TRIZEPS4, "Keith und Koep Trizeps IV module")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 
@@ -573,6 +573,6 @@ MACHINE_START(TRIZEPS4WL, "Keith und Koep Trizeps IV-WL module")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index c773e4dded64..9c363c081d3f 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -997,7 +997,7 @@ MACHINE_START(VIPER, "Arcom/Eurotech VIPER SBC")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= viper_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
-	.timer          = &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= viper_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index 491b6c9a2a9b..aa89488f961e 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -719,7 +719,7 @@ MACHINE_START(VPAC270, "Voipac PXA270")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= vpac270_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c
index 4275713ccd10..13b1d4586d7d 100644
--- a/arch/arm/mach-pxa/xcep.c
+++ b/arch/arm/mach-pxa/xcep.c
@@ -185,7 +185,7 @@ MACHINE_START(XCEP, "Iskratel XCEP")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa25x_init_irq,
 	.handle_irq	= pxa25x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.restart	= pxa_restart,
 MACHINE_END
 
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index 97529face7aa..989903a7e467 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -722,7 +722,7 @@ MACHINE_START(ZIPIT2, "Zipit Z2")
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= z2_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index abd3aa145083..f5d436434566 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -910,7 +910,7 @@ MACHINE_START(ARCOM_ZEUS, "Arcom/Eurotech ZEUS")
 	.nr_irqs	= ZEUS_NR_IRQS,
 	.init_irq	= zeus_init_irq,
 	.handle_irq	= pxa27x_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= zeus_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-pxa/zylonite.c b/arch/arm/mach-pxa/zylonite.c
index 226279fac9d4..1f00d650ac27 100644
--- a/arch/arm/mach-pxa/zylonite.c
+++ b/arch/arm/mach-pxa/zylonite.c
@@ -428,7 +428,7 @@ MACHINE_START(ZYLONITE, "PXA3xx Platform Development Kit (aka Zylonite)")
 	.nr_irqs	= ZYLONITE_NR_IRQS,
 	.init_irq	= pxa3xx_init_irq,
 	.handle_irq	= pxa3xx_handle_irq,
-	.timer		= &pxa_timer,
+	.init_time	= pxa_timer_init,
 	.init_machine	= zylonite_init,
 	.restart	= pxa_restart,
 MACHINE_END
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index 28511d43637a..f892862fd6ae 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -418,10 +418,6 @@ static void __init realview_eb_timer_init(void)
 	realview_eb_twd_init();
 }
 
-static struct sys_timer realview_eb_timer = {
-	.init		= realview_eb_timer_init,
-};
-
 static void realview_eb_restart(char mode, const char *cmd)
 {
 	void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL);
@@ -472,7 +468,7 @@ MACHINE_START(REALVIEW_EB, "ARM-RealView EB")
 	.map_io		= realview_eb_map_io,
 	.init_early	= realview_init_early,
 	.init_irq	= gic_init_irq,
-	.timer		= &realview_eb_timer,
+	.init_time	= realview_eb_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= realview_eb_init,
 #ifdef CONFIG_ZONE_DMA
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index 07d6672ddae7..6a4524b93fc6 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -329,10 +329,6 @@ static void __init realview_pb1176_timer_init(void)
 	realview_timer_init(IRQ_DC1176_TIMER0);
 }
 
-static struct sys_timer realview_pb1176_timer = {
-	.init		= realview_pb1176_timer_init,
-};
-
 static void realview_pb1176_restart(char mode, const char *cmd)
 {
 	void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL);
@@ -384,7 +380,7 @@ MACHINE_START(REALVIEW_PB1176, "ARM-RealView PB1176")
 	.map_io		= realview_pb1176_map_io,
 	.init_early	= realview_init_early,
 	.init_irq	= gic_init_irq,
-	.timer		= &realview_pb1176_timer,
+	.init_time	= realview_pb1176_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= realview_pb1176_init,
 #ifdef CONFIG_ZONE_DMA
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index 7ed53d75350f..502f6e6c6913 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -316,10 +316,6 @@ static void __init realview_pb11mp_timer_init(void)
 	realview_pb11mp_twd_init();
 }
 
-static struct sys_timer realview_pb11mp_timer = {
-	.init		= realview_pb11mp_timer_init,
-};
-
 static void realview_pb11mp_restart(char mode, const char *cmd)
 {
 	void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL);
@@ -367,7 +363,7 @@ MACHINE_START(REALVIEW_PB11MP, "ARM-RealView PB11MPCore")
 	.map_io		= realview_pb11mp_map_io,
 	.init_early	= realview_init_early,
 	.init_irq	= gic_init_irq,
-	.timer		= &realview_pb11mp_timer,
+	.init_time	= realview_pb11mp_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= realview_pb11mp_init,
 #ifdef CONFIG_ZONE_DMA
diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c
index 9992431b8a15..85c81aaa05e8 100644
--- a/arch/arm/mach-realview/realview_pba8.c
+++ b/arch/arm/mach-realview/realview_pba8.c
@@ -264,10 +264,6 @@ static void __init realview_pba8_timer_init(void)
 	realview_timer_init(IRQ_PBA8_TIMER0_1);
 }
 
-static struct sys_timer realview_pba8_timer = {
-	.init		= realview_pba8_timer_init,
-};
-
 static void realview_pba8_restart(char mode, const char *cmd)
 {
 	void __iomem *reset_ctrl = __io_address(REALVIEW_SYS_RESETCTL);
@@ -308,7 +304,7 @@ MACHINE_START(REALVIEW_PBA8, "ARM-RealView PB-A8")
 	.map_io		= realview_pba8_map_io,
 	.init_early	= realview_init_early,
 	.init_irq	= gic_init_irq,
-	.timer		= &realview_pba8_timer,
+	.init_time	= realview_pba8_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= realview_pba8_init,
 #ifdef CONFIG_ZONE_DMA
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index 4f486f05108a..a15a7b0be29b 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -324,10 +324,6 @@ static void __init realview_pbx_timer_init(void)
 	realview_pbx_twd_init();
 }
 
-static struct sys_timer realview_pbx_timer = {
-	.init		= realview_pbx_timer_init,
-};
-
 static void realview_pbx_fixup(struct tag *tags, char **from,
 			       struct meminfo *meminfo)
 {
@@ -404,7 +400,7 @@ MACHINE_START(REALVIEW_PBX, "ARM-RealView PBX")
 	.map_io		= realview_pbx_map_io,
 	.init_early	= realview_init_early,
 	.init_irq	= gic_init_irq,
-	.timer		= &realview_pbx_timer,
+	.init_time	= realview_pbx_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= realview_pbx_init,
 #ifdef CONFIG_ZONE_DMA
diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c
index f3fa259ce01f..a302cf5e0fc7 100644
--- a/arch/arm/mach-rpc/riscpc.c
+++ b/arch/arm/mach-rpc/riscpc.c
@@ -211,7 +211,7 @@ static void rpc_restart(char mode, const char *cmd)
 	soft_restart(0);
 }
 
-extern struct sys_timer ioc_timer;
+void ioc_timer_init(void);
 
 MACHINE_START(RISCPC, "Acorn-RiscPC")
 	/* Maintainer: Russell King */
@@ -220,6 +220,6 @@ MACHINE_START(RISCPC, "Acorn-RiscPC")
 	.reserve_lp1	= 1,
 	.map_io		= rpc_map_io,
 	.init_irq	= rpc_init_irq,
-	.timer		= &ioc_timer,
+	.init_time	= ioc_timer_init,
 	.restart	= rpc_restart,
 MACHINE_END
diff --git a/arch/arm/mach-rpc/time.c b/arch/arm/mach-rpc/time.c
index 6ddccb0210ac..9a6def14df01 100644
--- a/arch/arm/mach-rpc/time.c
+++ b/arch/arm/mach-rpc/time.c
@@ -82,14 +82,9 @@ static struct irqaction ioc_timer_irq = {
 /*
  * Set up timer interrupt.
  */
-static void __init ioc_timer_init(void)
+void __init ioc_timer_init(void)
 {
 	arch_gettimeoffset = ioc_timer_gettimeoffset;
 	ioctime_init();
 	setup_irq(IRQ_TIMER0, &ioc_timer_irq);
 }
-
-struct sys_timer ioc_timer = {
-	.init		= ioc_timer_init,
-};
-
diff --git a/arch/arm/mach-s3c24xx/mach-amlm5900.c b/arch/arm/mach-s3c24xx/mach-amlm5900.c
index f4ad99c1e476..0e0279e79150 100644
--- a/arch/arm/mach-s3c24xx/mach-amlm5900.c
+++ b/arch/arm/mach-s3c24xx/mach-amlm5900.c
@@ -237,6 +237,6 @@ MACHINE_START(AML_M5900, "AML_M5900")
 	.map_io		= amlm5900_map_io,
 	.init_irq	= s3c24xx_init_irq,
 	.init_machine	= amlm5900_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2410_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-anubis.c b/arch/arm/mach-s3c24xx/mach-anubis.c
index 1ee8c4638743..85eefab881af 100644
--- a/arch/arm/mach-s3c24xx/mach-anubis.c
+++ b/arch/arm/mach-s3c24xx/mach-anubis.c
@@ -448,6 +448,6 @@ MACHINE_START(ANUBIS, "Simtec-Anubis")
 	.map_io		= anubis_map_io,
 	.init_machine	= anubis_init,
 	.init_irq	= s3c24xx_init_irq,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-at2440evb.c b/arch/arm/mach-s3c24xx/mach-at2440evb.c
index 00381fe5de32..b31c4aa724f2 100644
--- a/arch/arm/mach-s3c24xx/mach-at2440evb.c
+++ b/arch/arm/mach-s3c24xx/mach-at2440evb.c
@@ -210,6 +210,6 @@ MACHINE_START(AT2440EVB, "AT2440EVB")
 	.map_io		= at2440evb_map_io,
 	.init_machine	= at2440evb_init,
 	.init_irq	= s3c24xx_init_irq,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-bast.c b/arch/arm/mach-s3c24xx/mach-bast.c
index 6a30ce7e4aa7..526964c19dd8 100644
--- a/arch/arm/mach-s3c24xx/mach-bast.c
+++ b/arch/arm/mach-s3c24xx/mach-bast.c
@@ -612,6 +612,6 @@ MACHINE_START(BAST, "Simtec-BAST")
 	.map_io		= bast_map_io,
 	.init_irq	= s3c24xx_init_irq,
 	.init_machine	= bast_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2410_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-gta02.c b/arch/arm/mach-s3c24xx/mach-gta02.c
index 973b87ca87f4..fb5d3b3b53db 100644
--- a/arch/arm/mach-s3c24xx/mach-gta02.c
+++ b/arch/arm/mach-s3c24xx/mach-gta02.c
@@ -595,6 +595,6 @@ MACHINE_START(NEO1973_GTA02, "GTA02")
 	.map_io		= gta02_map_io,
 	.init_irq	= s3c24xx_init_irq,
 	.init_machine	= gta02_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-h1940.c b/arch/arm/mach-s3c24xx/mach-h1940.c
index b23dd1b106e8..2eb09e27c13c 100644
--- a/arch/arm/mach-s3c24xx/mach-h1940.c
+++ b/arch/arm/mach-s3c24xx/mach-h1940.c
@@ -746,6 +746,6 @@ MACHINE_START(H1940, "IPAQ-H1940")
 	.reserve	= h1940_reserve,
 	.init_irq	= h1940_init_irq,
 	.init_machine	= h1940_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2410_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c
index c9954e26b492..d7a172555238 100644
--- a/arch/arm/mach-s3c24xx/mach-jive.c
+++ b/arch/arm/mach-s3c24xx/mach-jive.c
@@ -661,6 +661,6 @@ MACHINE_START(JIVE, "JIVE")
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= jive_map_io,
 	.init_machine	= jive_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2412_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c
index a31d5b83e5f7..2db09ade9b50 100644
--- a/arch/arm/mach-s3c24xx/mach-mini2440.c
+++ b/arch/arm/mach-s3c24xx/mach-mini2440.c
@@ -688,6 +688,6 @@ MACHINE_START(MINI2440, "MINI2440")
 	.map_io		= mini2440_map_io,
 	.init_machine	= mini2440_init,
 	.init_irq	= s3c24xx_init_irq,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-n30.c b/arch/arm/mach-s3c24xx/mach-n30.c
index c53a9bfe1417..d9d04b240295 100644
--- a/arch/arm/mach-s3c24xx/mach-n30.c
+++ b/arch/arm/mach-s3c24xx/mach-n30.c
@@ -589,7 +589,7 @@ MACHINE_START(N30, "Acer-N30")
 				Ben Dooks <ben-linux@fluff.org>
 	*/
 	.atag_offset	= 0x100,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.init_machine	= n30_init,
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= n30_map_io,
@@ -600,7 +600,7 @@ MACHINE_START(N35, "Acer-N35")
 	/* Maintainer: Christer Weinigel <christer@weinigel.se>
 	*/
 	.atag_offset	= 0x100,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.init_machine	= n30_init,
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= n30_map_io,
diff --git a/arch/arm/mach-s3c24xx/mach-nexcoder.c b/arch/arm/mach-s3c24xx/mach-nexcoder.c
index a2b92b0898e2..a454e2461860 100644
--- a/arch/arm/mach-s3c24xx/mach-nexcoder.c
+++ b/arch/arm/mach-s3c24xx/mach-nexcoder.c
@@ -153,6 +153,6 @@ MACHINE_START(NEXCODER_2440, "NexVision - Nexcoder 2440")
 	.map_io		= nexcoder_map_io,
 	.init_machine	= nexcoder_init,
 	.init_irq	= s3c24xx_init_irq,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-osiris.c b/arch/arm/mach-s3c24xx/mach-osiris.c
index bb36d832bd3d..ba0f5b5ec19e 100644
--- a/arch/arm/mach-s3c24xx/mach-osiris.c
+++ b/arch/arm/mach-s3c24xx/mach-osiris.c
@@ -428,6 +428,6 @@ MACHINE_START(OSIRIS, "Simtec-OSIRIS")
 	.map_io		= osiris_map_io,
 	.init_irq	= s3c24xx_init_irq,
 	.init_machine	= osiris_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-otom.c b/arch/arm/mach-s3c24xx/mach-otom.c
index bca39f0232b3..e0fdae93aa7b 100644
--- a/arch/arm/mach-s3c24xx/mach-otom.c
+++ b/arch/arm/mach-s3c24xx/mach-otom.c
@@ -118,6 +118,6 @@ MACHINE_START(OTOM, "Nex Vision - Otom 1.1")
 	.map_io		= otom11_map_io,
 	.init_machine	= otom11_init,
 	.init_irq	= s3c24xx_init_irq,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2410_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-qt2410.c b/arch/arm/mach-s3c24xx/mach-qt2410.c
index 7b6ba13d7285..56175f0941b1 100644
--- a/arch/arm/mach-s3c24xx/mach-qt2410.c
+++ b/arch/arm/mach-s3c24xx/mach-qt2410.c
@@ -343,6 +343,6 @@ MACHINE_START(QT2410, "QT2410")
 	.map_io		= qt2410_map_io,
 	.init_irq	= s3c24xx_init_irq,
 	.init_machine	= qt2410_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2410_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c
index 0606f2faaa5c..e14ec7105a6d 100644
--- a/arch/arm/mach-s3c24xx/mach-rx1950.c
+++ b/arch/arm/mach-s3c24xx/mach-rx1950.c
@@ -814,6 +814,6 @@ MACHINE_START(RX1950, "HP iPAQ RX1950")
 	.reserve	= rx1950_reserve,
 	.init_irq = s3c24xx_init_irq,
 	.init_machine = rx1950_init_machine,
-	.timer = &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-rx3715.c b/arch/arm/mach-s3c24xx/mach-rx3715.c
index dacbb9a2122a..d00caa8de922 100644
--- a/arch/arm/mach-s3c24xx/mach-rx3715.c
+++ b/arch/arm/mach-s3c24xx/mach-rx3715.c
@@ -212,6 +212,6 @@ MACHINE_START(RX3715, "IPAQ-RX3715")
 	.reserve	= rx3715_reserve,
 	.init_irq	= rx3715_init_irq,
 	.init_machine	= rx3715_init_machine,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2410.c b/arch/arm/mach-s3c24xx/mach-smdk2410.c
index 82796b97cb04..e184bfa9613a 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2410.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2410.c
@@ -117,6 +117,6 @@ MACHINE_START(SMDK2410, "SMDK2410") /* @TODO: request a new identifier and switc
 	.map_io		= smdk2410_map_io,
 	.init_irq	= s3c24xx_init_irq,
 	.init_machine	= smdk2410_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2410_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2413.c b/arch/arm/mach-s3c24xx/mach-smdk2413.c
index ce99fd8bbbc5..69f356e83790 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2413.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2413.c
@@ -133,7 +133,7 @@ MACHINE_START(S3C2413, "S3C2413")
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= smdk2413_map_io,
 	.init_machine	= smdk2413_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2412_restart,
 MACHINE_END
 
@@ -145,7 +145,7 @@ MACHINE_START(SMDK2412, "SMDK2412")
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= smdk2413_map_io,
 	.init_machine	= smdk2413_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2412_restart,
 MACHINE_END
 
@@ -157,6 +157,6 @@ MACHINE_START(SMDK2413, "SMDK2413")
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= smdk2413_map_io,
 	.init_machine	= smdk2413_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2412_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2416.c b/arch/arm/mach-s3c24xx/mach-smdk2416.c
index f30d7fccbfee..fe160c7f4b0a 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2416.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2416.c
@@ -254,6 +254,6 @@ MACHINE_START(SMDK2416, "SMDK2416")
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= smdk2416_map_io,
 	.init_machine	= smdk2416_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2416_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2440.c b/arch/arm/mach-s3c24xx/mach-smdk2440.c
index b7ff882c6ce6..a8fdafedc4c1 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2440.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2440.c
@@ -182,6 +182,6 @@ MACHINE_START(S3C2440, "SMDK2440")
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= smdk2440_map_io,
 	.init_machine	= smdk2440_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c244x_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-smdk2443.c b/arch/arm/mach-s3c24xx/mach-smdk2443.c
index 2568656f046f..7830d7004306 100644
--- a/arch/arm/mach-s3c24xx/mach-smdk2443.c
+++ b/arch/arm/mach-s3c24xx/mach-smdk2443.c
@@ -144,6 +144,6 @@ MACHINE_START(SMDK2443, "SMDK2443")
 	.init_irq	= s3c24xx_init_irq,
 	.map_io		= smdk2443_map_io,
 	.init_machine	= smdk2443_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2443_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-tct_hammer.c b/arch/arm/mach-s3c24xx/mach-tct_hammer.c
index 495bf5cf52e9..24b3d79e7b2c 100644
--- a/arch/arm/mach-s3c24xx/mach-tct_hammer.c
+++ b/arch/arm/mach-s3c24xx/mach-tct_hammer.c
@@ -149,6 +149,6 @@ MACHINE_START(TCT_HAMMER, "TCT_HAMMER")
 	.map_io		= tct_hammer_map_io,
 	.init_irq	= s3c24xx_init_irq,
 	.init_machine	= tct_hammer_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2410_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-vr1000.c b/arch/arm/mach-s3c24xx/mach-vr1000.c
index 14d5b12e388c..dda21a01e3cc 100644
--- a/arch/arm/mach-s3c24xx/mach-vr1000.c
+++ b/arch/arm/mach-s3c24xx/mach-vr1000.c
@@ -357,6 +357,6 @@ MACHINE_START(VR1000, "Thorcom-VR1000")
 	.map_io		= vr1000_map_io,
 	.init_machine	= vr1000_init,
 	.init_irq	= s3c24xx_init_irq,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2410_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c
index f1d44ae11833..7fe7d4f60419 100644
--- a/arch/arm/mach-s3c24xx/mach-vstms.c
+++ b/arch/arm/mach-s3c24xx/mach-vstms.c
@@ -161,6 +161,6 @@ MACHINE_START(VSTMS, "VSTMS")
 	.init_irq	= s3c24xx_init_irq,
 	.init_machine	= vstms_init,
 	.map_io		= vstms_map_io,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c2412_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-anw6410.c b/arch/arm/mach-s3c64xx/mach-anw6410.c
index 99e82ac81b69..75cbc67f628e 100644
--- a/arch/arm/mach-s3c64xx/mach-anw6410.c
+++ b/arch/arm/mach-s3c64xx/mach-anw6410.c
@@ -234,6 +234,6 @@ MACHINE_START(ANW6410, "A&W6410")
 	.map_io		= anw6410_map_io,
 	.init_machine	= anw6410_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index cdde249166b5..87aab127dd60 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -871,6 +871,6 @@ MACHINE_START(WLF_CRAGG_6410, "Wolfson Cragganmore 6410")
 	.map_io		= crag6410_map_io,
 	.init_machine	= crag6410_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-hmt.c b/arch/arm/mach-s3c64xx/mach-hmt.c
index 2b144893ddc4..7e8605d98142 100644
--- a/arch/arm/mach-s3c64xx/mach-hmt.c
+++ b/arch/arm/mach-s3c64xx/mach-hmt.c
@@ -277,6 +277,6 @@ MACHINE_START(HMT, "Airgoo-HMT")
 	.map_io		= hmt_map_io,
 	.init_machine	= hmt_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c
index 07c349cca333..4f8dc7dff92b 100644
--- a/arch/arm/mach-s3c64xx/mach-mini6410.c
+++ b/arch/arm/mach-s3c64xx/mach-mini6410.c
@@ -356,6 +356,6 @@ MACHINE_START(MINI6410, "MINI6410")
 	.map_io		= mini6410_map_io,
 	.init_machine	= mini6410_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-ncp.c b/arch/arm/mach-s3c64xx/mach-ncp.c
index e5f9a79b535d..cdd7f947376c 100644
--- a/arch/arm/mach-s3c64xx/mach-ncp.c
+++ b/arch/arm/mach-s3c64xx/mach-ncp.c
@@ -105,6 +105,6 @@ MACHINE_START(NCP, "NCP")
 	.map_io		= ncp_map_io,
 	.init_machine	= ncp_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c
index 7476f7c722ab..b0f61982ef56 100644
--- a/arch/arm/mach-s3c64xx/mach-real6410.c
+++ b/arch/arm/mach-s3c64xx/mach-real6410.c
@@ -335,6 +335,6 @@ MACHINE_START(REAL6410, "REAL6410")
 	.map_io		= real6410_map_io,
 	.init_machine	= real6410_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-smartq5.c b/arch/arm/mach-s3c64xx/mach-smartq5.c
index 96d6da2b6b5f..7a737614717e 100644
--- a/arch/arm/mach-s3c64xx/mach-smartq5.c
+++ b/arch/arm/mach-s3c64xx/mach-smartq5.c
@@ -157,6 +157,6 @@ MACHINE_START(SMARTQ5, "SmartQ 5")
 	.map_io		= smartq_map_io,
 	.init_machine	= smartq5_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-smartq7.c b/arch/arm/mach-s3c64xx/mach-smartq7.c
index 7d1167bdc921..889d525325c8 100644
--- a/arch/arm/mach-s3c64xx/mach-smartq7.c
+++ b/arch/arm/mach-s3c64xx/mach-smartq7.c
@@ -173,6 +173,6 @@ MACHINE_START(SMARTQ7, "SmartQ 7")
 	.map_io		= smartq_map_io,
 	.init_machine	= smartq7_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6400.c b/arch/arm/mach-s3c64xx/mach-smdk6400.c
index a928fae5694e..e31fe5bb37b6 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6400.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6400.c
@@ -94,6 +94,6 @@ MACHINE_START(SMDK6400, "SMDK6400")
 	.map_io		= smdk6400_map_io,
 	.init_machine	= smdk6400_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index 574a9eef588d..f1b87cd9cb0e 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -704,6 +704,6 @@ MACHINE_START(SMDK6410, "SMDK6410")
 	.map_io		= smdk6410_map_io,
 	.init_machine	= smdk6410_machine_init,
 	.init_late	= s3c64xx_init_late,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s3c64xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s5p64x0/mach-smdk6440.c b/arch/arm/mach-s5p64x0/mach-smdk6440.c
index 1af823558c60..0a3146dc081a 100644
--- a/arch/arm/mach-s5p64x0/mach-smdk6440.c
+++ b/arch/arm/mach-s5p64x0/mach-smdk6440.c
@@ -275,6 +275,6 @@ MACHINE_START(SMDK6440, "SMDK6440")
 	.handle_irq	= vic_handle_irq,
 	.map_io		= smdk6440_map_io,
 	.init_machine	= smdk6440_machine_init,
-	.timer		= &s5p_timer,
+	.init_time	= s5p_timer_init,
 	.restart	= s5p64x0_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s5p64x0/mach-smdk6450.c b/arch/arm/mach-s5p64x0/mach-smdk6450.c
index 62526ccf6b70..36917f2ea25a 100644
--- a/arch/arm/mach-s5p64x0/mach-smdk6450.c
+++ b/arch/arm/mach-s5p64x0/mach-smdk6450.c
@@ -294,6 +294,6 @@ MACHINE_START(SMDK6450, "SMDK6450")
 	.handle_irq	= vic_handle_irq,
 	.map_io		= smdk6450_map_io,
 	.init_machine	= smdk6450_machine_init,
-	.timer		= &s5p_timer,
+	.init_time	= s5p_timer_init,
 	.restart	= s5p64x0_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s5pc100/mach-smdkc100.c b/arch/arm/mach-s5pc100/mach-smdkc100.c
index 9abe95e806ab..39a9197d1746 100644
--- a/arch/arm/mach-s5pc100/mach-smdkc100.c
+++ b/arch/arm/mach-s5pc100/mach-smdkc100.c
@@ -257,6 +257,6 @@ MACHINE_START(SMDKC100, "SMDKC100")
 	.handle_irq	= vic_handle_irq,
 	.map_io		= smdkc100_map_io,
 	.init_machine	= smdkc100_machine_init,
-	.timer		= &s3c24xx_timer,
+	.init_time	= s3c24xx_timer_init,
 	.restart	= s5pc100_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s5pv210/mach-aquila.c b/arch/arm/mach-s5pv210/mach-aquila.c
index ee9fa5c2ef2c..1fb44a5ebb83 100644
--- a/arch/arm/mach-s5pv210/mach-aquila.c
+++ b/arch/arm/mach-s5pv210/mach-aquila.c
@@ -688,6 +688,6 @@ MACHINE_START(AQUILA, "Aquila")
 	.handle_irq	= vic_handle_irq,
 	.map_io		= aquila_map_io,
 	.init_machine	= aquila_machine_init,
-	.timer		= &s5p_timer,
+	.init_time	= s5p_timer_init,
 	.restart	= s5pv210_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index c72b31078c99..ababdca2b3e4 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -975,7 +975,7 @@ MACHINE_START(GONI, "GONI")
 	.handle_irq	= vic_handle_irq,
 	.map_io		= goni_map_io,
 	.init_machine	= goni_machine_init,
-	.timer		= &s5p_timer,
+	.init_time	= s5p_timer_init,
 	.reserve	= &goni_reserve,
 	.restart	= s5pv210_restart,
 MACHINE_END
diff --git a/arch/arm/mach-s5pv210/mach-smdkc110.c b/arch/arm/mach-s5pv210/mach-smdkc110.c
index f1f3bd37ecda..acfb0ebce13b 100644
--- a/arch/arm/mach-s5pv210/mach-smdkc110.c
+++ b/arch/arm/mach-s5pv210/mach-smdkc110.c
@@ -155,7 +155,7 @@ MACHINE_START(SMDKC110, "SMDKC110")
 	.handle_irq	= vic_handle_irq,
 	.map_io		= smdkc110_map_io,
 	.init_machine	= smdkc110_machine_init,
-	.timer		= &s5p_timer,
+	.init_time	= s5p_timer_init,
 	.restart	= s5pv210_restart,
 	.reserve	= &smdkc110_reserve,
 MACHINE_END
diff --git a/arch/arm/mach-s5pv210/mach-smdkv210.c b/arch/arm/mach-s5pv210/mach-smdkv210.c
index 6bc8404bf678..e1d820f3b426 100644
--- a/arch/arm/mach-s5pv210/mach-smdkv210.c
+++ b/arch/arm/mach-s5pv210/mach-smdkv210.c
@@ -331,7 +331,7 @@ MACHINE_START(SMDKV210, "SMDKV210")
 	.handle_irq	= vic_handle_irq,
 	.map_io		= smdkv210_map_io,
 	.init_machine	= smdkv210_machine_init,
-	.timer		= &s5p_timer,
+	.init_time	= s5p_timer_init,
 	.restart	= s5pv210_restart,
 	.reserve	= &smdkv210_reserve,
 MACHINE_END
diff --git a/arch/arm/mach-s5pv210/mach-torbreck.c b/arch/arm/mach-s5pv210/mach-torbreck.c
index 18785cb5e1ef..1e6fc6eccdf3 100644
--- a/arch/arm/mach-s5pv210/mach-torbreck.c
+++ b/arch/arm/mach-s5pv210/mach-torbreck.c
@@ -132,6 +132,6 @@ MACHINE_START(TORBRECK, "TORBRECK")
 	.handle_irq	= vic_handle_irq,
 	.map_io		= torbreck_map_io,
 	.init_machine	= torbreck_machine_init,
-	.timer		= &s5p_timer,
+	.init_time	= s5p_timer_init,
 	.restart	= s5pv210_restart,
 MACHINE_END
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 9a23739f7026..b38d2525d5db 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -621,7 +621,7 @@ MACHINE_START(ASSABET, "Intel-Assabet")
 	.map_io		= assabet_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= assabet_init,
 	.init_late	= sa11x0_init_late,
 #ifdef CONFIG_SA1111
diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c
index b2dadf3ea3df..63361b6d04e9 100644
--- a/arch/arm/mach-sa1100/badge4.c
+++ b/arch/arm/mach-sa1100/badge4.c
@@ -336,7 +336,7 @@ MACHINE_START(BADGE4, "Hewlett-Packard Laboratories BadgePAD 4")
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
 	.init_late	= sa11x0_init_late,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 #ifdef CONFIG_SA1111
 	.dma_zone_size	= SZ_1M,
 #endif
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index 304bca4a07c0..2d25ececb415 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -174,7 +174,7 @@ MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube")
 	.map_io		= cerf_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= cerf_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= cerf_init,
 	.init_late	= sa11x0_init_late,
 	.restart	= sa11x0_restart,
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 45f424f5fca6..612a45689770 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -399,7 +399,7 @@ MACHINE_START(COLLIE, "Sharp-Collie")
 	.map_io		= collie_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= collie_init,
 	.init_late	= sa11x0_init_late,
 	.restart	= sa11x0_restart,
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index a5b7c13da3e3..2abc6a1f6e86 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -4,9 +4,7 @@
  * Author: Nicolas Pitre
  */
 
-struct sys_timer;
-
-extern struct sys_timer sa1100_timer;
+extern void sa1100_timer_init(void);
 extern void __init sa1100_map_io(void);
 extern void __init sa1100_init_irq(void);
 extern void __init sa1100_init_gpio(void);
diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c
index e1571eab08ae..b8f2b151539b 100644
--- a/arch/arm/mach-sa1100/h3100.c
+++ b/arch/arm/mach-sa1100/h3100.c
@@ -108,7 +108,7 @@ MACHINE_START(H3100, "Compaq iPAQ H3100")
 	.map_io		= h3100_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= h3100_mach_init,
 	.init_late	= sa11x0_init_late,
 	.restart	= sa11x0_restart,
diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c
index ba7a2901ab88..b8dc5bd22623 100644
--- a/arch/arm/mach-sa1100/h3600.c
+++ b/arch/arm/mach-sa1100/h3600.c
@@ -158,7 +158,7 @@ MACHINE_START(H3600, "Compaq iPAQ H3600")
 	.map_io		= h3600_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= h3600_mach_init,
 	.init_late	= sa11x0_init_late,
 	.restart	= sa11x0_restart,
diff --git a/arch/arm/mach-sa1100/hackkit.c b/arch/arm/mach-sa1100/hackkit.c
index d005939c41fc..643d5f2d9af9 100644
--- a/arch/arm/mach-sa1100/hackkit.c
+++ b/arch/arm/mach-sa1100/hackkit.c
@@ -229,7 +229,7 @@ MACHINE_START(HACKKIT, "HackKit Cpu Board")
 	.map_io		= hackkit_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= hackkit_init,
 	.init_late	= sa11x0_init_late,
 	.restart	= sa11x0_restart,
diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c
index 35cfc428b4d4..c0b1f5bafae4 100644
--- a/arch/arm/mach-sa1100/jornada720.c
+++ b/arch/arm/mach-sa1100/jornada720.c
@@ -346,7 +346,7 @@ MACHINE_START(JORNADA720, "HP Jornada 720")
 	.map_io		= jornada720_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= jornada720_mach_init,
 	.init_late	= sa11x0_init_late,
 #ifdef CONFIG_SA1111
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index f69f78fc3ddd..a89917653884 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -174,6 +174,6 @@ MACHINE_START(LART, "LART")
 	.init_irq	= sa1100_init_irq,
 	.init_machine	= lart_init,
 	.init_late	= sa11x0_init_late,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.restart	= sa11x0_restart,
 MACHINE_END
diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c
index 102e08f7b109..f1cb3784d525 100644
--- a/arch/arm/mach-sa1100/nanoengine.c
+++ b/arch/arm/mach-sa1100/nanoengine.c
@@ -110,7 +110,7 @@ MACHINE_START(NANOENGINE, "BSE nanoEngine")
 	.map_io		= nanoengine_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= nanoengine_init,
 	.init_late	= sa11x0_init_late,
 	.restart	= sa11x0_restart,
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index c51bb63f90fb..091261878eff 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -133,7 +133,7 @@ MACHINE_START(PLEB, "PLEB")
 	.map_io		= pleb_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine   = pleb_init,
 	.init_late	= sa11x0_init_late,
 	.restart	= sa11x0_restart,
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 6460d25fbb88..c8866bce7386 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -102,7 +102,7 @@ MACHINE_START(SHANNON, "Shannon (AKA: Tuxscreen)")
 	.map_io		= shannon_map_io,
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.init_machine	= shannon_init,
 	.init_late	= sa11x0_init_late,
 	.restart	= sa11x0_restart,
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 6d65f65fcb23..bcbc94540e45 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -396,6 +396,6 @@ MACHINE_START(SIMPAD, "Simpad")
 	.nr_irqs	= SA1100_NR_IRQS,
 	.init_irq	= sa1100_init_irq,
 	.init_late	= sa11x0_init_late,
-	.timer		= &sa1100_timer,
+	.init_time	= sa1100_timer_init,
 	.restart	= sa11x0_restart,
 MACHINE_END
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 164f8276233c..934db6385cd6 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -117,7 +117,7 @@ static struct irqaction sa1100_timer_irq = {
 	.dev_id		= &ckevt_sa1100_osmr0,
 };
 
-static void __init sa1100_timer_init(void)
+void __init sa1100_timer_init(void)
 {
 	writel_relaxed(0, OIER);
 	writel_relaxed(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR);
@@ -137,7 +137,3 @@ static void __init sa1100_timer_init(void)
 		clocksource_mmio_readl_up);
 	clockevents_register_device(&ckevt_sa1100_osmr0);
 }
-
-struct sys_timer sa1100_timer = {
-	.init		= sa1100_timer_init,
-};
diff --git a/arch/arm/mach-shark/core.c b/arch/arm/mach-shark/core.c
index 9ad2e9737fb5..b63dec848195 100644
--- a/arch/arm/mach-shark/core.c
+++ b/arch/arm/mach-shark/core.c
@@ -128,10 +128,6 @@ static void __init shark_timer_init(void)
 	setup_irq(IRQ_TIMER, &shark_timer_irq);
 }
 
-static struct sys_timer shark_timer = {
-	.init		= shark_timer_init,
-};
-
 static void shark_init_early(void)
 {
 	disable_hlt();
@@ -142,7 +138,7 @@ MACHINE_START(SHARK, "Shark")
 	.atag_offset	= 0x3000,
 	.init_early	= shark_init_early,
 	.init_irq	= shark_init_irq,
-	.timer		= &shark_timer,
+	.init_time	= shark_timer_init,
 	.dma_zone_size	= SZ_4M,
 	.restart	= shark_restart,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
index 032d10817e79..d81a66362b7c 100644
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -671,5 +671,5 @@ MACHINE_START(AG5EVM, "ag5evm")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= ag5evm_init,
 	.init_late	= shmobile_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= sh73a0_earlytimer_init,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index 99ef190d0909..c1d4ab630214 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -1350,5 +1350,5 @@ MACHINE_START(AP4EVB, "ap4evb")
 	.handle_irq	= shmobile_handle_irq_intc,
 	.init_machine	= ap4evb_init,
 	.init_late	= sh7372_pm_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= sh7372_earlytimer_init,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index 5353adf6b828..e7912447ad50 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -1192,9 +1192,6 @@ static void __init eva_earlytimer_init(void)
 static void __init eva_add_early_devices(void)
 {
 	r8a7740_add_early_devices();
-
-	/* override timer setup with board-specific code */
-	shmobile_timer.init = eva_earlytimer_init;
 }
 
 #define RESCNT2 IOMEM(0xe6188020)
@@ -1216,7 +1213,7 @@ DT_MACHINE_START(ARMADILLO800EVA_DT, "armadillo800eva")
 	.handle_irq	= shmobile_handle_irq_intc,
 	.init_machine	= eva_init,
 	.init_late	= shmobile_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= eva_earlytimer_init,
 	.dt_compat	= eva_boards_compat_dt,
 	.restart	= eva_restart,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-bonito.c b/arch/arm/mach-shmobile/board-bonito.c
index cb8c994e1430..331b7ce4edd8 100644
--- a/arch/arm/mach-shmobile/board-bonito.c
+++ b/arch/arm/mach-shmobile/board-bonito.c
@@ -499,9 +499,6 @@ static void __init bonito_earlytimer_init(void)
 static void __init bonito_add_early_devices(void)
 {
 	r8a7740_add_early_devices();
-
-	/* override timer setup with board-specific code */
-	shmobile_timer.init = bonito_earlytimer_init;
 }
 
 MACHINE_START(BONITO, "bonito")
@@ -511,5 +508,5 @@ MACHINE_START(BONITO, "bonito")
 	.handle_irq	= shmobile_handle_irq_intc,
 	.init_machine	= bonito_init,
 	.init_late	= shmobile_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= bonito_earlytimer_init,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-kota2.c b/arch/arm/mach-shmobile/board-kota2.c
index bf88f9a8b7ac..2f24994f2ef8 100644
--- a/arch/arm/mach-shmobile/board-kota2.c
+++ b/arch/arm/mach-shmobile/board-kota2.c
@@ -553,5 +553,5 @@ MACHINE_START(KOTA2, "kota2")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= kota2_init,
 	.init_late	= shmobile_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= sh73a0_earlytimer_init,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-kzm9d.c b/arch/arm/mach-shmobile/board-kzm9d.c
index b52bc0d1273f..59be864f5992 100644
--- a/arch/arm/mach-shmobile/board-kzm9d.c
+++ b/arch/arm/mach-shmobile/board-kzm9d.c
@@ -92,6 +92,6 @@ DT_MACHINE_START(KZM9D_DT, "kzm9d")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= kzm9d_add_standard_devices,
 	.init_late	= shmobile_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= shmobile_timer_init,
 	.dt_compat	= kzm9d_boards_compat_dt,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index c02448d6847f..adb23ef51121 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -795,7 +795,7 @@ DT_MACHINE_START(KZM9G_DT, "kzm9g")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= kzm_init,
 	.init_late	= shmobile_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= sh73a0_earlytimer_init,
 	.restart	= kzm9g_restart,
 	.dt_compat	= kzm9g_boards_compat_dt,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index 2fed62f66045..fe4917f2c1a2 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -1593,6 +1593,6 @@ DT_MACHINE_START(MACKEREL_DT, "mackerel")
 	.handle_irq	= shmobile_handle_irq_intc,
 	.init_machine	= mackerel_init,
 	.init_late	= sh7372_pm_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= sh7372_earlytimer_init,
 	.dt_compat  = mackerel_boards_compat_dt,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index 449f9289567d..ca45a0c50afe 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -385,5 +385,5 @@ MACHINE_START(MARZEN, "marzen")
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= marzen_init,
 	.init_late	= marzen_init_late,
-	.timer		= &shmobile_timer,
+	.init_time	= r8a7779_earlytimer_init,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
index dfeca79e9e96..a57439eec11a 100644
--- a/arch/arm/mach-shmobile/include/mach/common.h
+++ b/arch/arm/mach-shmobile/include/mach/common.h
@@ -2,7 +2,7 @@
 #define __ARCH_MACH_COMMON_H
 
 extern void shmobile_earlytimer_init(void);
-extern struct sys_timer shmobile_timer;
+extern void shmobile_timer_init(void);
 extern void shmobile_setup_delay(unsigned int max_cpu_core_mhz,
 			 unsigned int mult, unsigned int div);
 struct twd_local_timer;
@@ -20,6 +20,7 @@ extern void shmobile_cpuidle_set_driver(struct cpuidle_driver *drv);
 
 extern void sh7372_init_irq(void);
 extern void sh7372_map_io(void);
+extern void sh7372_earlytimer_init(void);
 extern void sh7372_add_early_devices(void);
 extern void sh7372_add_standard_devices(void);
 extern void sh7372_clock_init(void);
@@ -32,6 +33,7 @@ extern struct clk sh7372_extal2_clk;
 
 extern void sh73a0_init_irq(void);
 extern void sh73a0_map_io(void);
+extern void sh73a0_earlytimer_init(void);
 extern void sh73a0_add_early_devices(void);
 extern void sh73a0_add_standard_devices(void);
 extern void sh73a0_clock_init(void);
@@ -50,6 +52,7 @@ extern void r8a7740_pinmux_init(void);
 
 extern void r8a7779_init_irq(void);
 extern void r8a7779_map_io(void);
+extern void r8a7779_earlytimer_init(void);
 extern void r8a7779_add_early_devices(void);
 extern void r8a7779_add_standard_devices(void);
 extern void r8a7779_clock_init(void);
diff --git a/arch/arm/mach-shmobile/setup-emev2.c b/arch/arm/mach-shmobile/setup-emev2.c
index a47beeb18283..ea61cb657ac3 100644
--- a/arch/arm/mach-shmobile/setup-emev2.c
+++ b/arch/arm/mach-shmobile/setup-emev2.c
@@ -467,7 +467,7 @@ DT_MACHINE_START(EMEV2_DT, "Generic Emma Mobile EV2 (Flattened Device Tree)")
 	.init_irq	= emev2_init_irq_dt,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= emev2_add_standard_devices_dt,
-	.timer		= &shmobile_timer,
+	.init_time	= shmobile_timer_init,
 	.dt_compat	= emev2_boards_compat_dt,
 MACHINE_END
 
diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c
index 095222469d03..03c69f9979aa 100644
--- a/arch/arm/mach-shmobile/setup-r8a7740.c
+++ b/arch/arm/mach-shmobile/setup-r8a7740.c
@@ -705,12 +705,6 @@ void __init r8a7740_add_standard_devices(void)
 	rmobile_add_device_to_domain("A3SP",	&i2c1_device);
 }
 
-static void __init r8a7740_earlytimer_init(void)
-{
-	r8a7740_clock_init(0);
-	shmobile_earlytimer_init();
-}
-
 void __init r8a7740_add_early_devices(void)
 {
 	early_platform_add_devices(r8a7740_early_devices,
@@ -718,9 +712,6 @@ void __init r8a7740_add_early_devices(void)
 
 	/* setup early console here as well */
 	shmobile_setup_console();
-
-	/* override timer setup with soc-specific code */
-	shmobile_timer.init = r8a7740_earlytimer_init;
 }
 
 #ifdef CONFIG_USE_OF
@@ -763,7 +754,7 @@ DT_MACHINE_START(R8A7740_DT, "Generic R8A7740 (Flattened Device Tree)")
 	.init_irq	= r8a7740_init_irq,
 	.handle_irq	= shmobile_handle_irq_intc,
 	.init_machine	= r8a7740_add_standard_devices_dt,
-	.timer		= &shmobile_timer,
+	.init_time	= shmobile_timer_init,
 	.dt_compat	= r8a7740_boards_compat_dt,
 MACHINE_END
 
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 7a1ad4f38539..a181ced09e45 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -339,7 +339,7 @@ void __init r8a7779_add_standard_devices(void)
 /* do nothing for !CONFIG_SMP or !CONFIG_HAVE_TWD */
 void __init __weak r8a7779_register_twd(void) { }
 
-static void __init r8a7779_earlytimer_init(void)
+void __init r8a7779_earlytimer_init(void)
 {
 	r8a7779_clock_init();
 	shmobile_earlytimer_init();
@@ -366,7 +366,4 @@ void __init r8a7779_add_early_devices(void)
 	 * As a final step pass earlyprint=sh-sci.2,115200 on the kernel
 	 * command line in case of the marzen board.
 	 */
-
-	/* override timer setup with soc-specific code */
-	shmobile_timer.init = r8a7779_earlytimer_init;
 }
diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c
index c917882424a7..191ae72e21ba 100644
--- a/arch/arm/mach-shmobile/setup-sh7372.c
+++ b/arch/arm/mach-shmobile/setup-sh7372.c
@@ -1054,7 +1054,7 @@ void __init sh7372_add_standard_devices(void)
 				       ARRAY_SIZE(domain_devices));
 }
 
-static void __init sh7372_earlytimer_init(void)
+void __init sh7372_earlytimer_init(void)
 {
 	sh7372_clock_init();
 	shmobile_earlytimer_init();
@@ -1067,9 +1067,6 @@ void __init sh7372_add_early_devices(void)
 
 	/* setup early console here as well */
 	shmobile_setup_console();
-
-	/* override timer setup with soc-specific code */
-	shmobile_timer.init = sh7372_earlytimer_init;
 }
 
 #ifdef CONFIG_USE_OF
@@ -1113,7 +1110,7 @@ DT_MACHINE_START(SH7372_DT, "Generic SH7372 (Flattened Device Tree)")
 	.init_irq	= sh7372_init_irq,
 	.handle_irq	= shmobile_handle_irq_intc,
 	.init_machine	= sh7372_add_standard_devices_dt,
-	.timer		= &shmobile_timer,
+	.init_time	= shmobile_timer_init,
 	.dt_compat	= sh7372_boards_compat_dt,
 MACHINE_END
 
diff --git a/arch/arm/mach-shmobile/setup-sh73a0.c b/arch/arm/mach-shmobile/setup-sh73a0.c
index db99a4ade80c..8c2d6424f470 100644
--- a/arch/arm/mach-shmobile/setup-sh73a0.c
+++ b/arch/arm/mach-shmobile/setup-sh73a0.c
@@ -796,7 +796,7 @@ void __init sh73a0_add_standard_devices(void)
 /* do nothing for !CONFIG_SMP or !CONFIG_HAVE_TWD */
 void __init __weak sh73a0_register_twd(void) { }
 
-static void __init sh73a0_earlytimer_init(void)
+void __init sh73a0_earlytimer_init(void)
 {
 	sh73a0_clock_init();
 	shmobile_earlytimer_init();
@@ -810,7 +810,4 @@ void __init sh73a0_add_early_devices(void)
 
 	/* setup early console here as well */
 	shmobile_setup_console();
-
-	/* override timer setup with soc-specific code */
-	shmobile_timer.init = sh73a0_earlytimer_init;
 }
diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c
index a68919727e24..fdbe54a11555 100644
--- a/arch/arm/mach-shmobile/timer.c
+++ b/arch/arm/mach-shmobile/timer.c
@@ -60,10 +60,6 @@ void __init shmobile_earlytimer_init(void)
 	late_time_init = shmobile_late_time_init;
 }
 
-static void __init shmobile_timer_init(void)
+void __init shmobile_timer_init(void)
 {
 }
-
-struct sys_timer shmobile_timer = {
-	.init		= shmobile_timer_init,
-};
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index 6732924a5fee..b54baea5f809 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -107,7 +107,7 @@ DT_MACHINE_START(SOCFPGA, "Altera SOCFPGA")
 	.map_io		= socfpga_map_io,
 	.init_irq	= gic_init_irq,
 	.handle_irq     = gic_handle_irq,
-	.timer		= &dw_apb_timer,
+	.init_time	= dw_apb_timer_init,
 	.init_machine	= socfpga_cyclone5_init,
 	.restart	= socfpga_cyclone5_restart,
 	.dt_compat	= altera_dt_match,
diff --git a/arch/arm/mach-spear13xx/include/mach/generic.h b/arch/arm/mach-spear13xx/include/mach/generic.h
index c33f4d9361bd..35e8a0074639 100644
--- a/arch/arm/mach-spear13xx/include/mach/generic.h
+++ b/arch/arm/mach-spear13xx/include/mach/generic.h
@@ -18,7 +18,7 @@
 #include <asm/mach/time.h>
 
 /* Add spear13xx structure declarations here */
-extern struct sys_timer spear13xx_timer;
+extern void spear13xx_timer_init(void);
 extern struct pl022_ssp_controller pl022_plat_data;
 extern struct dw_dma_platform_data dmac_plat_data;
 extern struct dw_dma_slave cf_dma_priv;
diff --git a/arch/arm/mach-spear13xx/spear1310.c b/arch/arm/mach-spear13xx/spear1310.c
index 02f4724bb0d4..e77d05d48082 100644
--- a/arch/arm/mach-spear13xx/spear1310.c
+++ b/arch/arm/mach-spear13xx/spear1310.c
@@ -92,7 +92,7 @@ DT_MACHINE_START(SPEAR1310_DT, "ST SPEAr1310 SoC with Flattened Device Tree")
 	.map_io		=	spear1310_map_io,
 	.init_irq	=	spear13xx_dt_init_irq,
 	.handle_irq	=	gic_handle_irq,
-	.timer		=	&spear13xx_timer,
+	.init_time	=	spear13xx_timer_init,
 	.init_machine	=	spear1310_dt_init,
 	.restart	=	spear_restart,
 	.dt_compat	=	spear1310_dt_board_compat,
diff --git a/arch/arm/mach-spear13xx/spear1340.c b/arch/arm/mach-spear13xx/spear1340.c
index 081014fb314a..ebc254779069 100644
--- a/arch/arm/mach-spear13xx/spear1340.c
+++ b/arch/arm/mach-spear13xx/spear1340.c
@@ -186,7 +186,7 @@ DT_MACHINE_START(SPEAR1340_DT, "ST SPEAr1340 SoC with Flattened Device Tree")
 	.map_io		=	spear13xx_map_io,
 	.init_irq	=	spear13xx_dt_init_irq,
 	.handle_irq	=	gic_handle_irq,
-	.timer		=	&spear13xx_timer,
+	.init_time	=	spear13xx_timer_init,
 	.init_machine	=	spear1340_dt_init,
 	.restart	=	spear_restart,
 	.dt_compat	=	spear1340_dt_board_compat,
diff --git a/arch/arm/mach-spear13xx/spear13xx.c b/arch/arm/mach-spear13xx/spear13xx.c
index c4af775a8451..7f7acf775f07 100644
--- a/arch/arm/mach-spear13xx/spear13xx.c
+++ b/arch/arm/mach-spear13xx/spear13xx.c
@@ -153,7 +153,7 @@ static void __init spear13xx_clk_init(void)
 		pr_err("%s: Unknown machine\n", __func__);
 }
 
-static void __init spear13xx_timer_init(void)
+void __init spear13xx_timer_init(void)
 {
 	char pclk_name[] = "osc_24m_clk";
 	struct clk *gpt_clk, *pclk;
@@ -183,10 +183,6 @@ static void __init spear13xx_timer_init(void)
 	twd_local_timer_of_register();
 }
 
-struct sys_timer spear13xx_timer = {
-	.init = spear13xx_timer_init,
-};
-
 static const struct of_device_id gic_of_match[] __initconst = {
 	{ .compatible = "arm,cortex-a9-gic", .data = gic_of_init },
 	{ /* Sentinel */ }
diff --git a/arch/arm/mach-spear3xx/include/mach/generic.h b/arch/arm/mach-spear3xx/include/mach/generic.h
index ce19113ca791..46b8f7e4d380 100644
--- a/arch/arm/mach-spear3xx/include/mach/generic.h
+++ b/arch/arm/mach-spear3xx/include/mach/generic.h
@@ -22,7 +22,7 @@
 #include <asm/mach/map.h>
 
 /* Add spear3xx family device structure declarations here */
-extern struct sys_timer spear3xx_timer;
+extern void spear3xx_timer_init(void);
 extern struct pl022_ssp_controller pl022_plat_data;
 extern struct pl08x_platform_data pl080_plat_data;
 
diff --git a/arch/arm/mach-spear3xx/spear300.c b/arch/arm/mach-spear3xx/spear300.c
index a69cbfdb07ee..2630efa93f5e 100644
--- a/arch/arm/mach-spear3xx/spear300.c
+++ b/arch/arm/mach-spear3xx/spear300.c
@@ -214,7 +214,7 @@ DT_MACHINE_START(SPEAR300_DT, "ST SPEAr300 SoC with Flattened Device Tree")
 	.map_io		=	spear300_map_io,
 	.init_irq	=	spear3xx_dt_init_irq,
 	.handle_irq	=	vic_handle_irq,
-	.timer		=	&spear3xx_timer,
+	.init_time	=	spear3xx_timer_init,
 	.init_machine	=	spear300_dt_init,
 	.restart	=	spear_restart,
 	.dt_compat	=	spear300_dt_board_compat,
diff --git a/arch/arm/mach-spear3xx/spear310.c b/arch/arm/mach-spear3xx/spear310.c
index b963ebb10b56..b6147eaebcde 100644
--- a/arch/arm/mach-spear3xx/spear310.c
+++ b/arch/arm/mach-spear3xx/spear310.c
@@ -256,7 +256,7 @@ DT_MACHINE_START(SPEAR310_DT, "ST SPEAr310 SoC with Flattened Device Tree")
 	.map_io		=	spear310_map_io,
 	.init_irq	=	spear3xx_dt_init_irq,
 	.handle_irq	=	vic_handle_irq,
-	.timer		=	&spear3xx_timer,
+	.init_time	=	spear3xx_timer_init,
 	.init_machine	=	spear310_dt_init,
 	.restart	=	spear_restart,
 	.dt_compat	=	spear310_dt_board_compat,
diff --git a/arch/arm/mach-spear3xx/spear320.c b/arch/arm/mach-spear3xx/spear320.c
index 66e3a0c33e75..53160f713afe 100644
--- a/arch/arm/mach-spear3xx/spear320.c
+++ b/arch/arm/mach-spear3xx/spear320.c
@@ -270,7 +270,7 @@ DT_MACHINE_START(SPEAR320_DT, "ST SPEAr320 SoC with Flattened Device Tree")
 	.map_io		=	spear320_map_io,
 	.init_irq	=	spear3xx_dt_init_irq,
 	.handle_irq	=	vic_handle_irq,
-	.timer		=	&spear3xx_timer,
+	.init_time	=	spear3xx_timer_init,
 	.init_machine	=	spear320_dt_init,
 	.restart	=	spear_restart,
 	.dt_compat	=	spear320_dt_board_compat,
diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c
index 38fe95db31a7..89f4c58908e3 100644
--- a/arch/arm/mach-spear3xx/spear3xx.c
+++ b/arch/arm/mach-spear3xx/spear3xx.c
@@ -87,7 +87,7 @@ void __init spear3xx_map_io(void)
 	iotable_init(spear3xx_io_desc, ARRAY_SIZE(spear3xx_io_desc));
 }
 
-static void __init spear3xx_timer_init(void)
+void __init spear3xx_timer_init(void)
 {
 	char pclk_name[] = "pll3_clk";
 	struct clk *gpt_clk, *pclk;
@@ -116,10 +116,6 @@ static void __init spear3xx_timer_init(void)
 	spear_setup_of_timer();
 }
 
-struct sys_timer spear3xx_timer = {
-	.init = spear3xx_timer_init,
-};
-
 static const struct of_device_id vic_of_match[] __initconst = {
 	{ .compatible = "arm,pl190-vic", .data = vic_of_init, },
 	{ .compatible = "st,spear300-shirq", .data = spear300_shirq_of_init, },
diff --git a/arch/arm/mach-spear6xx/spear6xx.c b/arch/arm/mach-spear6xx/spear6xx.c
index 5a5a52db252b..1f85bc07c6cb 100644
--- a/arch/arm/mach-spear6xx/spear6xx.c
+++ b/arch/arm/mach-spear6xx/spear6xx.c
@@ -374,7 +374,7 @@ void __init spear6xx_map_io(void)
 	iotable_init(spear6xx_io_desc, ARRAY_SIZE(spear6xx_io_desc));
 }
 
-static void __init spear6xx_timer_init(void)
+void __init spear6xx_timer_init(void)
 {
 	char pclk_name[] = "pll3_clk";
 	struct clk *gpt_clk, *pclk;
@@ -403,10 +403,6 @@ static void __init spear6xx_timer_init(void)
 	spear_setup_of_timer();
 }
 
-struct sys_timer spear6xx_timer = {
-	.init = spear6xx_timer_init,
-};
-
 /* Add auxdata to pass platform data */
 struct of_dev_auxdata spear6xx_auxdata_lookup[] __initdata = {
 	OF_DEV_AUXDATA("arm,pl080", SPEAR6XX_ICM3_DMA_BASE, NULL,
@@ -439,7 +435,7 @@ DT_MACHINE_START(SPEAR600_DT, "ST SPEAr600 (Flattened Device Tree)")
 	.map_io		=	spear6xx_map_io,
 	.init_irq	=	spear6xx_dt_init_irq,
 	.handle_irq	=	vic_handle_irq,
-	.timer		=	&spear6xx_timer,
+	.init_time	=	spear6xx_timer_init,
 	.init_machine	=	spear600_dt_init,
 	.restart	=	spear_restart,
 	.dt_compat	=	spear600_dt_board_compat,
diff --git a/arch/arm/mach-tegra/board-dt-tegra20.c b/arch/arm/mach-tegra/board-dt-tegra20.c
index 734d9cc87f2e..3b9956aabf5a 100644
--- a/arch/arm/mach-tegra/board-dt-tegra20.c
+++ b/arch/arm/mach-tegra/board-dt-tegra20.c
@@ -203,7 +203,7 @@ DT_MACHINE_START(TEGRA_DT, "nVidia Tegra20 (Flattened Device Tree)")
 	.init_early	= tegra20_init_early,
 	.init_irq	= tegra_dt_init_irq,
 	.handle_irq	= gic_handle_irq,
-	.timer		= &tegra_sys_timer,
+	.init_time	= tegra_init_timer,
 	.init_machine	= tegra_dt_init,
 	.init_late	= tegra_dt_init_late,
 	.restart	= tegra_assert_system_reset,
diff --git a/arch/arm/mach-tegra/board-dt-tegra30.c b/arch/arm/mach-tegra/board-dt-tegra30.c
index 6497d1236b08..381b2f25f0b4 100644
--- a/arch/arm/mach-tegra/board-dt-tegra30.c
+++ b/arch/arm/mach-tegra/board-dt-tegra30.c
@@ -113,7 +113,7 @@ DT_MACHINE_START(TEGRA30_DT, "NVIDIA Tegra30 (Flattened Device Tree)")
 	.init_early	= tegra30_init_early,
 	.init_irq	= tegra_dt_init_irq,
 	.handle_irq	= gic_handle_irq,
-	.timer		= &tegra_sys_timer,
+	.init_time	= tegra_init_timer,
 	.init_machine	= tegra30_dt_init,
 	.init_late	= tegra_init_late,
 	.restart	= tegra_assert_system_reset,
diff --git a/arch/arm/mach-tegra/board.h b/arch/arm/mach-tegra/board.h
index 91fbe733a21e..744cdd246f6a 100644
--- a/arch/arm/mach-tegra/board.h
+++ b/arch/arm/mach-tegra/board.h
@@ -55,5 +55,5 @@ static inline int harmony_pcie_init(void) { return 0; }
 
 void __init tegra_paz00_wifikill_init(void);
 
-extern struct sys_timer tegra_sys_timer;
+extern void tegra_init_timer(void);
 #endif
diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c
index e4863f3e9ee7..b0036e519a15 100644
--- a/arch/arm/mach-tegra/timer.c
+++ b/arch/arm/mach-tegra/timer.c
@@ -168,7 +168,7 @@ static const struct of_device_id rtc_match[] __initconst = {
 	{}
 };
 
-static void __init tegra_init_timer(void)
+void __init tegra_init_timer(void)
 {
 	struct device_node *np;
 	struct clk *clk;
@@ -273,10 +273,6 @@ static void __init tegra_init_timer(void)
 	register_persistent_clock(NULL, tegra_read_persistent_clock);
 }
 
-struct sys_timer tegra_sys_timer = {
-	.init = tegra_init_timer,
-};
-
 #ifdef CONFIG_PM
 static u32 usec_config;
 
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 4ce77cdc31cc..100a8b764dad 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -1780,7 +1780,7 @@ MACHINE_START(U300, "Ericsson AB U335 S335/B335 Prototype Board")
 	.nr_irqs	= 0,
 	.init_irq	= u300_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &u300_timer,
+	.init_time	= u300_timer_init,
 	.init_machine	= u300_init_machine,
 	.restart	= u300_restart,
 MACHINE_END
diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c
index 1da10e20e996..d9e73209c9b8 100644
--- a/arch/arm/mach-u300/timer.c
+++ b/arch/arm/mach-u300/timer.c
@@ -349,7 +349,7 @@ static u32 notrace u300_read_sched_clock(void)
 /*
  * This sets up the system timers, clock source and clock event.
  */
-static void __init u300_timer_init(void)
+void __init u300_timer_init(void)
 {
 	struct clk *clk;
 	unsigned long rate;
@@ -413,11 +413,3 @@ static void __init u300_timer_init(void)
 	 * used by hrtimers!
 	 */
 }
-
-/*
- * Very simple system timer that only register the clock event and
- * clock source.
- */
-struct sys_timer u300_timer = {
-	.init		= u300_timer_init,
-};
diff --git a/arch/arm/mach-u300/timer.h b/arch/arm/mach-u300/timer.h
index b5e9791762e0..d34287bc34f5 100644
--- a/arch/arm/mach-u300/timer.h
+++ b/arch/arm/mach-u300/timer.h
@@ -1 +1 @@
-extern struct sys_timer u300_timer;
+extern void u300_timer_init(void);
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index d453522edb0d..e1dfa24b4fb9 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -751,7 +751,7 @@ MACHINE_START(U8500, "ST-Ericsson MOP500 platform")
 	.map_io		= u8500_map_io,
 	.init_irq	= ux500_init_irq,
 	/* we re-use nomadik timer here */
-	.timer		= &ux500_timer,
+	.init_time	= ux500_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= mop500_init_machine,
 	.init_late	= ux500_init_late,
@@ -761,7 +761,7 @@ MACHINE_START(U8520, "ST-Ericsson U8520 Platform HREFP520")
 	.atag_offset	= 0x100,
 	.map_io		= u8500_map_io,
 	.init_irq	= ux500_init_irq,
-	.timer		= &ux500_timer,
+	.init_time	= ux500_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= mop500_init_machine,
 	.init_late	= ux500_init_late,
@@ -772,7 +772,7 @@ MACHINE_START(HREFV60, "ST-Ericsson U8500 Platform HREFv60+")
 	.smp		= smp_ops(ux500_smp_ops),
 	.map_io		= u8500_map_io,
 	.init_irq	= ux500_init_irq,
-	.timer		= &ux500_timer,
+	.init_time	= ux500_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= hrefv60_init_machine,
 	.init_late	= ux500_init_late,
@@ -784,7 +784,7 @@ MACHINE_START(SNOWBALL, "Calao Systems Snowball platform")
 	.map_io		= u8500_map_io,
 	.init_irq	= ux500_init_irq,
 	/* we re-use nomadik timer here */
-	.timer		= &ux500_timer,
+	.init_time	= ux500_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= snowball_init_machine,
 	.init_late	= NULL,
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index db0bb75e2c76..7875d3c85df1 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -340,7 +340,7 @@ DT_MACHINE_START(U8500_DT, "ST-Ericsson Ux5x0 platform (Device Tree Support)")
 	.map_io		= u8500_map_io,
 	.init_irq	= ux500_init_irq,
 	/* we re-use nomadik timer here */
-	.timer		= &ux500_timer,
+	.init_time	= ux500_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= u8500_init_machine,
 	.init_late	= NULL,
diff --git a/arch/arm/mach-ux500/include/mach/setup.h b/arch/arm/mach-ux500/include/mach/setup.h
index 6be4c4d2ab88..bddce2b49372 100644
--- a/arch/arm/mach-ux500/include/mach/setup.h
+++ b/arch/arm/mach-ux500/include/mach/setup.h
@@ -28,8 +28,7 @@ extern struct device *ux500_soc_device_init(const char *soc_id);
 struct amba_device;
 extern void __init amba_add_devices(struct amba_device *devs[], int num);
 
-struct sys_timer;
-extern struct sys_timer ux500_timer;
+extern void ux500_timer_init(void);
 
 #define __IO_DEV_DESC(x, sz)	{		\
 	.virtual	= IO_ADDRESS(x),	\
diff --git a/arch/arm/mach-ux500/timer.c b/arch/arm/mach-ux500/timer.c
index 46a724493964..aa2a78acb59e 100644
--- a/arch/arm/mach-ux500/timer.c
+++ b/arch/arm/mach-ux500/timer.c
@@ -46,7 +46,7 @@ const static struct of_device_id prcmu_timer_of_match[] __initconst = {
 	{ },
 };
 
-static void __init ux500_timer_init(void)
+void __init ux500_timer_init(void)
 {
 	void __iomem *mtu_timer_base;
 	void __iomem *prcmu_timer_base;
@@ -99,7 +99,3 @@ dt_fail:
 	clksrc_dbx500_prcmu_init(prcmu_timer_base);
 	ux500_twd_init();
 }
-
-struct sys_timer ux500_timer = {
-	.init		= ux500_timer_init,
-};
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 5d5929450366..d5ddc0c77f5b 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -770,7 +770,7 @@ void __init versatile_init(void)
 /*
  * Set up timer interrupt, and return the current time in seconds.
  */
-static void __init versatile_timer_init(void)
+void __init versatile_timer_init(void)
 {
 	u32 val;
 
@@ -797,8 +797,3 @@ static void __init versatile_timer_init(void)
 	sp804_clocksource_init(TIMER3_VA_BASE, "timer3");
 	sp804_clockevents_init(TIMER0_VA_BASE, IRQ_TIMERINT0_1, "timer0");
 }
-
-struct sys_timer versatile_timer = {
-	.init		= versatile_timer_init,
-};
-
diff --git a/arch/arm/mach-versatile/core.h b/arch/arm/mach-versatile/core.h
index 683e60776a85..5c1b87d1da6b 100644
--- a/arch/arm/mach-versatile/core.h
+++ b/arch/arm/mach-versatile/core.h
@@ -29,7 +29,7 @@ extern void __init versatile_init(void);
 extern void __init versatile_init_early(void);
 extern void __init versatile_init_irq(void);
 extern void __init versatile_map_io(void);
-extern struct sys_timer versatile_timer;
+extern void versatile_timer_init(void);
 extern void versatile_restart(char, const char *);
 extern unsigned int mmc_status(struct device *dev);
 #ifdef CONFIG_OF
diff --git a/arch/arm/mach-versatile/versatile_ab.c b/arch/arm/mach-versatile/versatile_ab.c
index 98f65493177a..187c1da2c4bb 100644
--- a/arch/arm/mach-versatile/versatile_ab.c
+++ b/arch/arm/mach-versatile/versatile_ab.c
@@ -40,7 +40,7 @@ MACHINE_START(VERSATILE_AB, "ARM-Versatile AB")
 	.init_early	= versatile_init_early,
 	.init_irq	= versatile_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &versatile_timer,
+	.init_time	= versatile_timer_init,
 	.init_machine	= versatile_init,
 	.restart	= versatile_restart,
 MACHINE_END
diff --git a/arch/arm/mach-versatile/versatile_dt.c b/arch/arm/mach-versatile/versatile_dt.c
index ae5ad3c8f3dd..ccf9f8a92067 100644
--- a/arch/arm/mach-versatile/versatile_dt.c
+++ b/arch/arm/mach-versatile/versatile_dt.c
@@ -47,7 +47,7 @@ DT_MACHINE_START(VERSATILE_PB, "ARM-Versatile (Device Tree Support)")
 	.init_early	= versatile_init_early,
 	.init_irq	= versatile_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &versatile_timer,
+	.init_time	= versatile_timer_init,
 	.init_machine	= versatile_dt_init,
 	.dt_compat	= versatile_dt_match,
 	.restart	= versatile_restart,
diff --git a/arch/arm/mach-versatile/versatile_pb.c b/arch/arm/mach-versatile/versatile_pb.c
index 19738331bd3d..1cabc0aa569d 100644
--- a/arch/arm/mach-versatile/versatile_pb.c
+++ b/arch/arm/mach-versatile/versatile_pb.c
@@ -108,7 +108,7 @@ MACHINE_START(VERSATILE_PB, "ARM-Versatile PB")
 	.init_early	= versatile_init_early,
 	.init_irq	= versatile_init_irq,
 	.handle_irq	= vic_handle_irq,
-	.timer		= &versatile_timer,
+	.init_time	= versatile_timer_init,
 	.init_machine	= versatile_pb_init,
 	.restart	= versatile_restart,
 MACHINE_END
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 011661a6c5cb..08bd548ef144 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -291,10 +291,6 @@ static void __init v2m_timer_init(void)
 	v2m_sp804_init(ioremap(V2M_TIMER01, SZ_4K), IRQ_V2M_TIMER0);
 }
 
-static struct sys_timer v2m_timer = {
-	.init	= v2m_timer_init,
-};
-
 static void __init v2m_init_early(void)
 {
 	if (ct_desc->init_early)
@@ -376,7 +372,7 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express")
 	.map_io		= v2m_map_io,
 	.init_early	= v2m_init_early,
 	.init_irq	= v2m_init_irq,
-	.timer		= &v2m_timer,
+	.init_time	= v2m_timer_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= v2m_init,
 	.restart	= vexpress_restart,
@@ -468,10 +464,6 @@ static void __init v2m_dt_timer_init(void)
 				24000000);
 }
 
-static struct sys_timer v2m_dt_timer = {
-	.init = v2m_dt_timer_init,
-};
-
 static const struct of_device_id v2m_dt_bus_match[] __initconst = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "arm,amba-bus", },
@@ -498,7 +490,7 @@ DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.map_io		= v2m_dt_map_io,
 	.init_early	= v2m_dt_init_early,
 	.init_irq	= v2m_dt_init_irq,
-	.timer		= &v2m_dt_timer,
+	.init_time	= v2m_dt_timer_init,
 	.init_machine	= v2m_dt_init,
 	.handle_irq	= gic_handle_irq,
 	.restart	= vexpress_restart,
diff --git a/arch/arm/mach-vt8500/vt8500.c b/arch/arm/mach-vt8500/vt8500.c
index 3c66d48ea082..d5b9c6689c9d 100644
--- a/arch/arm/mach-vt8500/vt8500.c
+++ b/arch/arm/mach-vt8500/vt8500.c
@@ -175,10 +175,6 @@ static void __init vt8500_init_irq(void)
 	of_irq_init(vt8500_irq_match);
 };
 
-static struct sys_timer vt8500_timer = {
-	.init = vt8500_timer_init,
-};
-
 static const char * const vt8500_dt_compat[] = {
 	"via,vt8500",
 	"wm,wm8650",
@@ -189,7 +185,7 @@ DT_MACHINE_START(WMT_DT, "VIA/Wondermedia SoC (Device Tree Support)")
 	.dt_compat	= vt8500_dt_compat,
 	.map_io		= vt8500_map_io,
 	.init_irq	= vt8500_init_irq,
-	.timer		= &vt8500_timer,
+	.init_time	= vt8500_timer_init,
 	.init_machine	= vt8500_init,
 	.restart	= vt8500_restart,
 	.handle_irq	= vt8500_handle_irq,
diff --git a/arch/arm/mach-w90x900/mach-nuc910evb.c b/arch/arm/mach-w90x900/mach-nuc910evb.c
index b4243e4f1565..92f1c978f35e 100644
--- a/arch/arm/mach-w90x900/mach-nuc910evb.c
+++ b/arch/arm/mach-w90x900/mach-nuc910evb.c
@@ -37,6 +37,6 @@ MACHINE_START(W90P910EVB, "W90P910EVB")
 	.map_io		= nuc910evb_map_io,
 	.init_irq	= nuc900_init_irq,
 	.init_machine	= nuc910evb_init,
-	.timer		= &nuc900_timer,
+	.init_time	= nuc900_timer_init,
 	.restart	= nuc9xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-w90x900/mach-nuc950evb.c b/arch/arm/mach-w90x900/mach-nuc950evb.c
index 500fe5932ce9..26f7189056e3 100644
--- a/arch/arm/mach-w90x900/mach-nuc950evb.c
+++ b/arch/arm/mach-w90x900/mach-nuc950evb.c
@@ -40,6 +40,6 @@ MACHINE_START(W90P950EVB, "W90P950EVB")
 	.map_io		= nuc950evb_map_io,
 	.init_irq	= nuc900_init_irq,
 	.init_machine	= nuc950evb_init,
-	.timer		= &nuc900_timer,
+	.init_time	= nuc900_timer_init,
 	.restart	= nuc9xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-w90x900/mach-nuc960evb.c b/arch/arm/mach-w90x900/mach-nuc960evb.c
index cbb3adc3db10..9b4e73fe10e5 100644
--- a/arch/arm/mach-w90x900/mach-nuc960evb.c
+++ b/arch/arm/mach-w90x900/mach-nuc960evb.c
@@ -37,6 +37,6 @@ MACHINE_START(W90N960EVB, "W90N960EVB")
 	.map_io		= nuc960evb_map_io,
 	.init_irq	= nuc900_init_irq,
 	.init_machine	= nuc960evb_init,
-	.timer		= &nuc900_timer,
+	.init_time	= nuc900_timer_init,
 	.restart	= nuc9xx_restart,
 MACHINE_END
diff --git a/arch/arm/mach-w90x900/nuc9xx.h b/arch/arm/mach-w90x900/nuc9xx.h
index 91acb4047793..88ef4b267089 100644
--- a/arch/arm/mach-w90x900/nuc9xx.h
+++ b/arch/arm/mach-w90x900/nuc9xx.h
@@ -15,10 +15,9 @@
  *
  */
 struct map_desc;
-struct sys_timer;
 
 /* core initialisation functions */
 
 extern void nuc900_init_irq(void);
-extern struct sys_timer nuc900_timer;
+extern void nuc900_timer_init(void);
 extern void nuc9xx_restart(char, const char *);
diff --git a/arch/arm/mach-w90x900/time.c b/arch/arm/mach-w90x900/time.c
index fa27c498ac09..d9c3d6b801c7 100644
--- a/arch/arm/mach-w90x900/time.c
+++ b/arch/arm/mach-w90x900/time.c
@@ -167,12 +167,8 @@ static void __init nuc900_clocksource_init(void)
 		TDR_SHIFT, clocksource_mmio_readl_down);
 }
 
-static void __init nuc900_timer_init(void)
+void __init nuc900_timer_init(void)
 {
 	nuc900_clocksource_init();
 	nuc900_clockevents_init();
 }
-
-struct sys_timer nuc900_timer = {
-	.init		= nuc900_timer_init,
-};
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index e16d4bed0f7a..2ae4bce652b6 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -93,13 +93,6 @@ static void __init xilinx_zynq_timer_init(void)
 	xttcpss_timer_init();
 }
 
-/*
- * Instantiate and initialize the system timer structure
- */
-static struct sys_timer xttcpss_sys_timer = {
-	.init		= xilinx_zynq_timer_init,
-};
-
 /**
  * xilinx_map_io() - Create memory mappings needed for early I/O.
  */
@@ -120,6 +113,6 @@ MACHINE_START(XILINX_EP107, "Xilinx Zynq Platform")
 	.init_irq	= xilinx_irq_init,
 	.handle_irq	= gic_handle_irq,
 	.init_machine	= xilinx_init_machine,
-	.timer		= &xttcpss_sys_timer,
+	.init_time	= xilinx_zynq_timer_init,
 	.dt_compat	= xilinx_dt_match,
 MACHINE_END
diff --git a/arch/arm/plat-samsung/include/plat/cpu.h b/arch/arm/plat-samsung/include/plat/cpu.h
index e0072ce8d6e9..e0667a1c137c 100644
--- a/arch/arm/plat-samsung/include/plat/cpu.h
+++ b/arch/arm/plat-samsung/include/plat/cpu.h
@@ -194,8 +194,7 @@ extern void s3c24xx_init_uartdevs(char *name,
 
 /* timer for 2410/2440 */
 
-struct sys_timer;
-extern struct sys_timer s3c24xx_timer;
+extern void s3c24xx_timer_init(void);
 
 extern struct syscore_ops s3c2410_pm_syscore_ops;
 extern struct syscore_ops s3c2412_pm_syscore_ops;
diff --git a/arch/arm/plat-samsung/include/plat/s5p-time.h b/arch/arm/plat-samsung/include/plat/s5p-time.h
index 3a70aebc9205..9c96f3586ce0 100644
--- a/arch/arm/plat-samsung/include/plat/s5p-time.h
+++ b/arch/arm/plat-samsung/include/plat/s5p-time.h
@@ -36,5 +36,5 @@ struct s5p_timer_source {
 
 extern void __init s5p_set_timer_source(enum s5p_timer_mode event,
 					enum s5p_timer_mode source);
-extern	struct sys_timer s5p_timer;
+extern	void s5p_timer_init(void);
 #endif /* __ASM_PLAT_S5P_TIME_H */
diff --git a/arch/arm/plat-samsung/s5p-time.c b/arch/arm/plat-samsung/s5p-time.c
index 028b6e877eb9..dabede46c0ec 100644
--- a/arch/arm/plat-samsung/s5p-time.c
+++ b/arch/arm/plat-samsung/s5p-time.c
@@ -393,13 +393,9 @@ static void __init s5p_timer_resources(void)
 	clk_enable(tin_source);
 }
 
-static void __init s5p_timer_init(void)
+void __init s5p_timer_init(void)
 {
 	s5p_timer_resources();
 	s5p_clockevent_init();
 	s5p_clocksource_init();
 }
-
-struct sys_timer s5p_timer = {
-	.init		= s5p_timer_init,
-};
diff --git a/arch/arm/plat-samsung/time.c b/arch/arm/plat-samsung/time.c
index 773745a85f0f..73defd00c3e4 100644
--- a/arch/arm/plat-samsung/time.c
+++ b/arch/arm/plat-samsung/time.c
@@ -276,7 +276,7 @@ static struct syscore_ops s3c24xx_syscore_ops = {
 	.resume		= s3c2410_timer_setup,
 };
 
-static void __init s3c2410_timer_init(void)
+void __init s3c24xx_timer_init(void)
 {
 	arch_gettimeoffset = s3c2410_gettimeoffset;
 
@@ -285,7 +285,3 @@ static void __init s3c2410_timer_init(void)
 	setup_irq(IRQ_TIMER4, &s3c2410_timer_irq);
 	register_syscore_ops(&s3c24xx_syscore_ops);
 }
-
-struct sys_timer s3c24xx_timer = {
-	.init		= s3c2410_timer_init,
-};
diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c
index bc19f12c20ce..7f796d8f7505 100644
--- a/drivers/clocksource/bcm2835_timer.c
+++ b/drivers/clocksource/bcm2835_timer.c
@@ -101,7 +101,7 @@ static struct of_device_id bcm2835_time_match[] __initconst = {
 	{}
 };
 
-static void __init bcm2835_time_init(void)
+void __init bcm2835_timer_init(void)
 {
 	struct device_node *node;
 	void __iomem *base;
@@ -155,7 +155,3 @@ static void __init bcm2835_time_init(void)
 
 	pr_info("bcm2835: system timer (irq = %d)\n", irq);
 }
-
-struct sys_timer bcm2835_timer = {
-	.init = bcm2835_time_init,
-};
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index f7dba5b79b44..ab09ed3742ee 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -107,7 +107,7 @@ static const struct of_device_id osctimer_ids[] __initconst = {
 	{},
 };
 
-static void __init timer_init(void)
+void __init dw_apb_timer_init(void)
 {
 	struct device_node *event_timer, *source_timer;
 
@@ -125,7 +125,3 @@ static void __init timer_init(void)
 
 	init_sched_clock();
 }
-
-struct sys_timer dw_apb_timer = {
-	.init = timer_init,
-};
diff --git a/drivers/clocksource/sunxi_timer.c b/drivers/clocksource/sunxi_timer.c
index 3cd1bd3d7aee..6c2ed56e8b14 100644
--- a/drivers/clocksource/sunxi_timer.c
+++ b/drivers/clocksource/sunxi_timer.c
@@ -104,7 +104,7 @@ static struct of_device_id sunxi_timer_dt_ids[] = {
 	{ }
 };
 
-static void __init sunxi_timer_init(void)
+void __init sunxi_timer_init(void)
 {
 	struct device_node *node;
 	unsigned long rate = 0;
@@ -165,7 +165,3 @@ static void __init sunxi_timer_init(void)
 
 	clockevents_register_device(&sunxi_clockevent);
 }
-
-struct sys_timer sunxi_timer = {
-	.init = sunxi_timer_init,
-};
diff --git a/include/linux/bcm2835_timer.h b/include/linux/bcm2835_timer.h
index 25680fe0903c..ca17aa817006 100644
--- a/include/linux/bcm2835_timer.h
+++ b/include/linux/bcm2835_timer.h
@@ -17,6 +17,6 @@
 
 #include <asm/mach/time.h>
 
-extern struct sys_timer bcm2835_timer;
+extern void bcm2835_timer_init(void);
 
 #endif
diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h
index 1148575fd134..dd755ce2a5eb 100644
--- a/include/linux/dw_apb_timer.h
+++ b/include/linux/dw_apb_timer.h
@@ -53,5 +53,5 @@ void dw_apb_clocksource_start(struct dw_apb_clocksource *dw_cs);
 cycle_t dw_apb_clocksource_read(struct dw_apb_clocksource *dw_cs);
 void dw_apb_clocksource_unregister(struct dw_apb_clocksource *dw_cs);
 
-extern struct sys_timer dw_apb_timer;
+extern void dw_apb_timer_init(void);
 #endif /* __DW_APB_TIMER_H__ */
diff --git a/include/linux/sunxi_timer.h b/include/linux/sunxi_timer.h
index b9165bba6e61..18081787e5f3 100644
--- a/include/linux/sunxi_timer.h
+++ b/include/linux/sunxi_timer.h
@@ -19,6 +19,6 @@
 
 #include <asm/mach/time.h>
 
-extern struct sys_timer sunxi_timer;
+void sunxi_timer_init(void);
 
 #endif
-- 
cgit v1.2.3


From 176eb8dc91bd91e7b1beb1761ee29f016e8c09e8 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Mon, 24 Dec 2012 09:49:55 -0800
Subject: Input: adxl34x - make platform_data include self contained

Since it suggests to use defines from input.h (ABS_X, ...), also include
the file to make them available.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/adxl34x.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input/adxl34x.h b/include/linux/input/adxl34x.h
index 57e01a7cb006..010d98175efa 100644
--- a/include/linux/input/adxl34x.h
+++ b/include/linux/input/adxl34x.h
@@ -13,6 +13,8 @@
 #ifndef __LINUX_INPUT_ADXL34X_H__
 #define __LINUX_INPUT_ADXL34X_H__
 
+#include <linux/input.h>
+
 struct adxl34x_platform_data {
 
 	/*
-- 
cgit v1.2.3


From 53e872681fed6a43047e71bf927f77d06f467988 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 25 Dec 2012 13:29:52 -0500
Subject: ext4: fix deadlock in journal_unmap_buffer()

We cannot wait for transaction commit in journal_unmap_buffer()
because we hold page lock which ranks below transaction start.  We
solve the issue by bailing out of journal_unmap_buffer() and
jbd2_journal_invalidatepage() with -EBUSY.  Caller is then responsible
for waiting for transaction commit to finish and try invalidation
again. Since the issue can happen only for page stradding i_size, it
is simple enough to manually call jbd2_journal_invalidatepage() for
such page from ext4_setattr(), check the return value and wait if
necessary.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c       | 82 ++++++++++++++++++++++++++++++++++++++++++++-------
 fs/jbd2/transaction.c | 27 +++++++++--------
 include/linux/jbd2.h  |  2 +-
 3 files changed, 86 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 12d3fbcff59f..cbfe13bf5b2a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
 	block_invalidatepage(page, offset);
 }
 
-static void ext4_journalled_invalidatepage(struct page *page,
-					   unsigned long offset)
+static int __ext4_journalled_invalidatepage(struct page *page,
+					    unsigned long offset)
 {
 	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
 	if (offset == 0)
 		ClearPageChecked(page);
 
-	jbd2_journal_invalidatepage(journal, page, offset);
+	return jbd2_journal_invalidatepage(journal, page, offset);
+}
+
+/* Wrapper for aops... */
+static void ext4_journalled_invalidatepage(struct page *page,
+					   unsigned long offset)
+{
+	WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
 }
 
 static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -4313,6 +4320,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return err;
 }
 
+/*
+ * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
+ * buffers that are attached to a page stradding i_size and are undergoing
+ * commit. In that case we have to wait for commit to finish and try again.
+ */
+static void ext4_wait_for_tail_page_commit(struct inode *inode)
+{
+	struct page *page;
+	unsigned offset;
+	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+	tid_t commit_tid = 0;
+	int ret;
+
+	offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+	/*
+	 * All buffers in the last page remain valid? Then there's nothing to
+	 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+	 * blocksize case
+	 */
+	if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+		return;
+	while (1) {
+		page = find_lock_page(inode->i_mapping,
+				      inode->i_size >> PAGE_CACHE_SHIFT);
+		if (!page)
+			return;
+		ret = __ext4_journalled_invalidatepage(page, offset);
+		unlock_page(page);
+		page_cache_release(page);
+		if (ret != -EBUSY)
+			return;
+		commit_tid = 0;
+		read_lock(&journal->j_state_lock);
+		if (journal->j_committing_transaction)
+			commit_tid = journal->j_committing_transaction->t_tid;
+		read_unlock(&journal->j_state_lock);
+		if (commit_tid)
+			jbd2_log_wait_commit(journal, commit_tid);
+	}
+}
+
 /*
  * ext4_setattr()
  *
@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		if (attr->ia_size != i_size_read(inode)) {
-			truncate_setsize(inode, attr->ia_size);
-			/* Inode size will be reduced, wait for dio in flight.
-			 * Temporarily disable dioread_nolock to prevent
-			 * livelock. */
+		if (attr->ia_size != inode->i_size) {
+			loff_t oldsize = inode->i_size;
+
+			i_size_write(inode, attr->ia_size);
+			/*
+			 * Blocks are going to be removed from the inode. Wait
+			 * for dio in flight.  Temporarily disable
+			 * dioread_nolock to prevent livelock.
+			 */
 			if (orphan) {
-				ext4_inode_block_unlocked_dio(inode);
-				inode_dio_wait(inode);
-				ext4_inode_resume_unlocked_dio(inode);
+				if (!ext4_should_journal_data(inode)) {
+					ext4_inode_block_unlocked_dio(inode);
+					inode_dio_wait(inode);
+					ext4_inode_resume_unlocked_dio(inode);
+				} else
+					ext4_wait_for_tail_page_commit(inode);
 			}
+			/*
+			 * Truncate pagecache after we've waited for commit
+			 * in data=journal mode to make pages freeable.
+			 */
+			truncate_pagecache(inode, oldsize, inode->i_size);
 		}
 		ext4_truncate(inode);
 	}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index cd4485db42b3..ddc51a7f4508 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 
 	BUFFER_TRACE(bh, "entry");
 
-retry:
 	/*
 	 * It is safe to proceed here without the j_list_lock because the
 	 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1935,14 +1934,11 @@ retry:
 		 * for commit and try again.
 		 */
 		if (partial_page) {
-			tid_t tid = journal->j_committing_transaction->t_tid;
-
 			jbd2_journal_put_journal_head(jh);
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
 			write_unlock(&journal->j_state_lock);
-			jbd2_log_wait_commit(journal, tid);
-			goto retry;
+			return -EBUSY;
 		}
 		/*
 		 * OK, buffer won't be reachable after truncate. We just set
@@ -2003,21 +1999,23 @@ zap_buffer_unlocked:
  * @page:    page to flush
  * @offset:  length of page to invalidate.
  *
- * Reap page buffers containing data after offset in page.
- *
+ * Reap page buffers containing data after offset in page. Can return -EBUSY
+ * if buffers are part of the committing transaction and the page is straddling
+ * i_size. Caller then has to wait for current commit and try again.
  */
-void jbd2_journal_invalidatepage(journal_t *journal,
-		      struct page *page,
-		      unsigned long offset)
+int jbd2_journal_invalidatepage(journal_t *journal,
+				struct page *page,
+				unsigned long offset)
 {
 	struct buffer_head *head, *bh, *next;
 	unsigned int curr_off = 0;
 	int may_free = 1;
+	int ret = 0;
 
 	if (!PageLocked(page))
 		BUG();
 	if (!page_has_buffers(page))
-		return;
+		return 0;
 
 	/* We will potentially be playing with lists other than just the
 	 * data lists (especially for journaled data mode), so be
@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
 		if (offset <= curr_off) {
 			/* This block is wholly outside the truncation point */
 			lock_buffer(bh);
-			may_free &= journal_unmap_buffer(journal, bh,
-							 offset > 0);
+			ret = journal_unmap_buffer(journal, bh, offset > 0);
 			unlock_buffer(bh);
+			if (ret < 0)
+				return ret;
+			may_free &= ret;
 		}
 		curr_off = next_off;
 		bh = next;
@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
 		if (may_free && try_to_free_buffers(page))
 			J_ASSERT(!page_has_buffers(page));
 	}
+	return 0;
 }
 
 /*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 1be23d9fdacb..e30b66346942 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1098,7 +1098,7 @@ void		 jbd2_journal_set_triggers(struct buffer_head *,
 extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
 extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
-extern void	 jbd2_journal_invalidatepage(journal_t *,
+extern int	 jbd2_journal_invalidatepage(journal_t *,
 				struct page *, unsigned long);
 extern int	 jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
 extern int	 jbd2_journal_stop(handle_t *);
-- 
cgit v1.2.3


From 08b60f8438879a84246d7debded31c9cb7aea6e4 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@wwwdotorg.org>
Date: Mon, 24 Dec 2012 11:14:58 -0700
Subject: namei.h: include errno.h

This solves:

In file included from fs/ext3/symlink.c:20:0:
include/linux/namei.h: In function 'retry_estale':
include/linux/namei.h:114:19: error: 'ESTALE' undeclared (first use in this function)

Signed-off-by: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index e998c030061d..5a5ff57ceed4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -2,6 +2,7 @@
 #define _LINUX_NAMEI_H
 
 #include <linux/dcache.h>
+#include <linux/errno.h>
 #include <linux/linkage.h>
 #include <linux/path.h>
 
-- 
cgit v1.2.3


From c876ad7682155958d0c9c27afe9017925c230d64 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 21 Dec 2012 20:27:12 -0800
Subject: pidns: Stop pid allocation when init dies

Oleg pointed out that in a pid namespace the sequence.
- pid 1 becomes a zombie
- setns(thepidns), fork,...
- reaping pid 1.
- The injected processes exiting.

Can lead to processes attempting access their child reaper and
instead following a stale pointer.

That waitpid for init can return before all of the processes in
the pid namespace have exited is also unfortunate.

Avoid these problems by disabling the allocation of new pids in a pid
namespace when init dies, instead of when the last process in a pid
namespace is reaped.

Pointed-out-by:  Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/pid.h           |  1 +
 include/linux/pid_namespace.h |  4 +++-
 kernel/pid.c                  | 15 ++++++++++++---
 kernel/pid_namespace.c        |  4 ++++
 4 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index b152d44fb181..2381c973d897 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -121,6 +121,7 @@ int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
+extern void disable_pid_allocation(struct pid_namespace *ns);
 
 /*
  * ns_of_pid() returns the pid namespace in which the specified pid was
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index bf285999273a..215e5e3dda10 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,7 +21,7 @@ struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
-	int nr_hashed;
+	unsigned int nr_hashed;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
 	unsigned int level;
@@ -42,6 +42,8 @@ struct pid_namespace {
 
 extern struct pid_namespace init_pid_ns;
 
+#define PIDNS_HASH_ADDING (1U << 31)
+
 #ifdef CONFIG_PID_NS
 static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
diff --git a/kernel/pid.c b/kernel/pid.c
index 36aa02ff17d6..de9af600006f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -270,7 +270,6 @@ void free_pid(struct pid *pid)
 			wake_up_process(ns->child_reaper);
 			break;
 		case 0:
-			ns->nr_hashed = -1;
 			schedule_work(&ns->proc_work);
 			break;
 		}
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 
 	upid = pid->numbers + ns->level;
 	spin_lock_irq(&pidmap_lock);
-	if (ns->nr_hashed < 0)
+	if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
 		goto out_unlock;
 	for ( ; upid >= pid->numbers; --upid) {
 		hlist_add_head_rcu(&upid->pid_chain,
@@ -342,6 +341,13 @@ out_free:
 	goto out;
 }
 
+void disable_pid_allocation(struct pid_namespace *ns)
+{
+	spin_lock_irq(&pidmap_lock);
+	ns->nr_hashed &= ~PIDNS_HASH_ADDING;
+	spin_unlock_irq(&pidmap_lock);
+}
+
 struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
 {
 	struct hlist_node *elem;
@@ -573,6 +579,9 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
+	/* Veryify no one has done anything silly */
+	BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
+
 	/* bump default and minimum pid_max based on number of cpus */
 	pid_max = min(pid_max_max, max_t(int, pid_max,
 				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
@@ -584,7 +593,7 @@ void __init pidmap_init(void)
 	/* Reserve PID 0. We never call free_pidmap(0) */
 	set_bit(0, init_pid_ns.pidmap[0].page);
 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
-	init_pid_ns.nr_hashed = 1;
+	init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;
 
 	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index fdbd0cdf271a..c1c3dc1c6023 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
 	ns->user_ns = get_user_ns(user_ns);
+	ns->nr_hashed = PIDNS_HASH_ADDING;
 	INIT_WORK(&ns->proc_work, proc_cleanup_work);
 
 	set_bit(0, ns->pidmap[0].page);
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 	int rc;
 	struct task_struct *task, *me = current;
 
+	/* Don't allow any more processes into the pid namespace */
+	disable_pid_allocation(pid_ns);
+
 	/* Ignore SIGCHLD causing any terminated children to autoreap */
 	spin_lock_irq(&me->sighand->siglock);
 	me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
-- 
cgit v1.2.3


From 812089e01b9f65f90fc8fc670d8cce72a0e01fbb Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Sat, 1 Dec 2012 12:37:20 -0800
Subject: PCI: Reduce Ricoh 0xe822 SD card reader base clock frequency to 50MHz

Otherwise it fails like this on cards like the Transcend 16GB SDHC card:

    mmc0: new SDHC card at address b368
    mmcblk0: mmc0:b368 SDC   15.0 GiB
    mmcblk0: error -110 sending status command, retrying
    mmcblk0: error -84 transferring data, sector 0, nr 8, cmd response 0x900, card status 0xb0

Tested on my Lenovo x200 laptop.

[bhelgaas: changelog]
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Chris Ball <cjb@laptop.org>
CC: Manoj Iyer <manoj.iyer@canonical.com>
CC: stable@vger.kernel.org
---
 drivers/pci/quirks.c    | 7 +++++--
 include/linux/pci_ids.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8f7a6344e79e..0369fb6fc1da 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2725,7 +2725,7 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 	if (PCI_FUNC(dev->devfn))
 		return;
 	/*
-	 * RICOH 0xe823 SD/MMC card reader fails to recognize
+	 * RICOH 0xe822 and 0xe823 SD/MMC card readers fail to recognize
 	 * certain types of SD/MMC cards. Lowering the SD base
 	 * clock frequency from 200Mhz to 50Mhz fixes this issue.
 	 *
@@ -2736,7 +2736,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 	 * 0xf9  - Key register for 0x150
 	 * 0xfc  - key register for 0xe1
 	 */
-	if (dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
+	if (dev->device == PCI_DEVICE_ID_RICOH_R5CE822 ||
+	    dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
 		pci_write_config_byte(dev, 0xf9, 0xfc);
 		pci_write_config_byte(dev, 0x150, 0x10);
 		pci_write_config_byte(dev, 0xf9, 0x00);
@@ -2763,6 +2764,8 @@ static void ricoh_mmc_fixup_r5c832(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE822, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5CE823, ricoh_mmc_fixup_r5c832);
 #endif /*CONFIG_MMC_RICOH_MMC*/
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f8447376ddb..0eb65796bcb9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1568,6 +1568,7 @@
 #define PCI_DEVICE_ID_RICOH_RL5C476	0x0476
 #define PCI_DEVICE_ID_RICOH_RL5C478	0x0478
 #define PCI_DEVICE_ID_RICOH_R5C822	0x0822
+#define PCI_DEVICE_ID_RICOH_R5CE822	0xe822
 #define PCI_DEVICE_ID_RICOH_R5CE823	0xe823
 #define PCI_DEVICE_ID_RICOH_R5C832	0x0832
 #define PCI_DEVICE_ID_RICOH_R5C843	0x0843
-- 
cgit v1.2.3


From ad4b3fb7ff9940bcdb1e4cd62bd189d10fa636ba Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@cs.columbia.edu>
Date: Fri, 21 Dec 2012 13:03:50 -0500
Subject: mm: Fix PageHead when !CONFIG_PAGEFLAGS_EXTENDED

Unfortunately with !CONFIG_PAGEFLAGS_EXTENDED, (!PageHead) is false, and
(PageHead) is true, for tail pages.  If this is indeed the intended
behavior, which I doubt because it breaks cache cleaning on some ARM
systems, then the nomenclature is highly problematic.

This patch makes sure PageHead is only true for head pages and PageTail
is only true for tail pages, and neither is true for non-compound pages.

[ This buglet seems ancient - seems to have been introduced back in Apr
  2008 in commit 6a1e7f777f61: "pageflags: convert to the use of new
  macros".  And the reason nobody noticed is because the PageHead()
  tests are almost all about just sanity-checking, and only used on
  pages that are actual page heads.  The fact that the old code returned
  true for tail pages too was thus not really noticeable.   - Linus ]

Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
Acked-by:  Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Will Deacon <Will.Deacon@arm.com>
Cc: Steve Capper <Steve.Capper@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: stable@kernel.org  # 2.6.26+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b5d13841604e..70473da47b3f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -362,7 +362,7 @@ static inline void ClearPageCompound(struct page *page)
  * pages on the LRU and/or pagecache.
  */
 TESTPAGEFLAG(Compound, compound)
-__PAGEFLAG(Head, compound)
+__SETPAGEFLAG(Head, compound)  __CLEARPAGEFLAG(Head, compound)
 
 /*
  * PG_reclaim is used in combination with PG_compound to mark the
@@ -374,8 +374,14 @@ __PAGEFLAG(Head, compound)
  * PG_compound & PG_reclaim	=> Tail page
  * PG_compound & ~PG_reclaim	=> Head page
  */
+#define PG_head_mask ((1L << PG_compound))
 #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
 
+static inline int PageHead(struct page *page)
+{
+	return ((page->flags & PG_head_tail_mask) == PG_head_mask);
+}
+
 static inline int PageTail(struct page *page)
 {
 	return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask);
-- 
cgit v1.2.3


From 4bf84c35c65f36a344fb7a6cde6274df4120efb8 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 27 Dec 2012 23:49:37 +0000
Subject: net: add change_carrier netdev op

This allows a driver to register change_carrier callback which will be
called whenever user will like to change carrier state. This is useful
for devices like dummy, gre, team and so on.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 12 ++++++++++++
 net/core/dev.c            | 19 +++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c599e4782d45..0e1b92a0c1ec 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -891,6 +891,14 @@ struct netdev_fcoe_hbainfo {
  * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh)
  * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
  *			     struct net_device *dev)
+ *
+ * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
+ *	Called to change device carrier. Soft-devices (like dummy, team, etc)
+ *	which do not represent real hardware may define this to allow their
+ *	userspace components to manage their virtual carrier state. Devices
+ *	that determine carrier state from physical hardware properties (eg
+ *	network cables) or protocol-dependent mechanisms (eg
+ *	USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1008,6 +1016,8 @@ struct net_device_ops {
 	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
 						      u32 pid, u32 seq,
 						      struct net_device *dev);
+	int			(*ndo_change_carrier)(struct net_device *dev,
+						      bool new_carrier);
 };
 
 /*
@@ -2194,6 +2204,8 @@ extern int		dev_set_mtu(struct net_device *, int);
 extern void		dev_set_group(struct net_device *, int);
 extern int		dev_set_mac_address(struct net_device *,
 					    struct sockaddr *);
+extern int		dev_change_carrier(struct net_device *,
+					   bool new_carrier);
 extern int		dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev,
 					    struct netdev_queue *txq);
diff --git a/net/core/dev.c b/net/core/dev.c
index 515473ee52cb..21c5b976dcf3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5027,6 +5027,25 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 }
 EXPORT_SYMBOL(dev_set_mac_address);
 
+/**
+ *	dev_change_carrier - Change device carrier
+ *	@dev: device
+ *	@new_carries: new value
+ *
+ *	Change device carrier
+ */
+int dev_change_carrier(struct net_device *dev, bool new_carrier)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!ops->ndo_change_carrier)
+		return -EOPNOTSUPP;
+	if (!netif_device_present(dev))
+		return -ENODEV;
+	return ops->ndo_change_carrier(dev, new_carrier);
+}
+EXPORT_SYMBOL(dev_change_carrier);
+
 /*
  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
  */
-- 
cgit v1.2.3


From 2681128f0ced8aa4e66f221197e183cc16d244fe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 29 Dec 2012 16:02:43 +0000
Subject: veth: reduce stat overhead

veth stats are a bit bloated. There is no need to account transmit
and receive stats, since they are absolutely symmetric.

Also use a per device atomic64_t for the dropped counter, as it
should never be used in fast path.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c        | 115 +++++++++++++++++++---------------------------
 include/linux/netdevice.h |   1 +
 2 files changed, 48 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 95814d9747ef..c048f8d27bbf 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -25,18 +25,15 @@
 #define MIN_MTU 68		/* Min L3 MTU */
 #define MAX_MTU 65535		/* Max L3 MTU (arbitrary) */
 
-struct veth_net_stats {
-	u64			rx_packets;
-	u64			rx_bytes;
-	u64			tx_packets;
-	u64			tx_bytes;
-	u64			rx_dropped;
+struct pcpu_vstats {
+	u64			packets;
+	u64			bytes;
 	struct u64_stats_sync	syncp;
 };
 
 struct veth_priv {
-	struct net_device *peer;
-	struct veth_net_stats __percpu *stats;
+	struct net_device	*peer;
+	atomic64_t		dropped;
 };
 
 /*
@@ -107,50 +104,30 @@ static const struct ethtool_ops veth_ethtool_ops = {
 	.get_ethtool_stats	= veth_get_ethtool_stats,
 };
 
-/*
- * xmit
- */
-
 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device *rcv = NULL;
-	struct veth_priv *priv, *rcv_priv;
-	struct veth_net_stats *stats, *rcv_stats;
-	int length;
-
-	priv = netdev_priv(dev);
-	rcv = priv->peer;
-	rcv_priv = netdev_priv(rcv);
-
-	stats = this_cpu_ptr(priv->stats);
-	rcv_stats = this_cpu_ptr(rcv_priv->stats);
+	struct veth_priv *priv = netdev_priv(dev);
+	struct net_device *rcv = priv->peer;
+	int length = skb->len;
 
 	/* don't change ip_summed == CHECKSUM_PARTIAL, as that
-	   will cause bad checksum on forwarded packets */
+	 * will cause bad checksum on forwarded packets
+	 */
 	if (skb->ip_summed == CHECKSUM_NONE &&
 	    rcv->features & NETIF_F_RXCSUM)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	length = skb->len;
-	if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
-		goto rx_drop;
-
-	u64_stats_update_begin(&stats->syncp);
-	stats->tx_bytes += length;
-	stats->tx_packets++;
-	u64_stats_update_end(&stats->syncp);
+	if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
+		struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
 
-	u64_stats_update_begin(&rcv_stats->syncp);
-	rcv_stats->rx_bytes += length;
-	rcv_stats->rx_packets++;
-	u64_stats_update_end(&rcv_stats->syncp);
-
-	return NETDEV_TX_OK;
+		u64_stats_update_begin(&stats->syncp);
+		stats->bytes += length;
+		stats->packets++;
+		u64_stats_update_end(&stats->syncp);
+	} else {
+		atomic64_inc(&priv->dropped);
+	}
 
-rx_drop:
-	u64_stats_update_begin(&rcv_stats->syncp);
-	rcv_stats->rx_dropped++;
-	u64_stats_update_end(&rcv_stats->syncp);
 	return NETDEV_TX_OK;
 }
 
@@ -158,32 +135,42 @@ rx_drop:
  * general routines
  */
 
-static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
-						  struct rtnl_link_stats64 *tot)
+static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	int cpu;
 
+	result->packets = 0;
+	result->bytes = 0;
 	for_each_possible_cpu(cpu) {
-		struct veth_net_stats *stats = per_cpu_ptr(priv->stats, cpu);
-		u64 rx_packets, rx_bytes, rx_dropped;
-		u64 tx_packets, tx_bytes;
+		struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
+		u64 packets, bytes;
 		unsigned int start;
 
 		do {
 			start = u64_stats_fetch_begin_bh(&stats->syncp);
-			rx_packets = stats->rx_packets;
-			tx_packets = stats->tx_packets;
-			rx_bytes = stats->rx_bytes;
-			tx_bytes = stats->tx_bytes;
-			rx_dropped = stats->rx_dropped;
+			packets = stats->packets;
+			bytes = stats->bytes;
 		} while (u64_stats_fetch_retry_bh(&stats->syncp, start));
-		tot->rx_packets += rx_packets;
-		tot->tx_packets += tx_packets;
-		tot->rx_bytes   += rx_bytes;
-		tot->tx_bytes   += tx_bytes;
-		tot->rx_dropped += rx_dropped;
+		result->packets += packets;
+		result->bytes += bytes;
 	}
+	return atomic64_read(&priv->dropped);
+}
+
+static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
+						  struct rtnl_link_stats64 *tot)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+	struct pcpu_vstats one;
+
+	tot->tx_dropped = veth_stats_one(&one, dev);
+	tot->tx_bytes = one.bytes;
+	tot->tx_packets = one.packets;
+
+	tot->rx_dropped = veth_stats_one(&one, priv->peer);
+	tot->rx_bytes = one.bytes;
+	tot->rx_packets = one.packets;
 
 	return tot;
 }
@@ -228,24 +215,16 @@ static int veth_change_mtu(struct net_device *dev, int new_mtu)
 
 static int veth_dev_init(struct net_device *dev)
 {
-	struct veth_net_stats __percpu *stats;
-	struct veth_priv *priv;
-
-	stats = alloc_percpu(struct veth_net_stats);
-	if (stats == NULL)
+	dev->vstats = alloc_percpu(struct pcpu_vstats);
+	if (!dev->vstats)
 		return -ENOMEM;
 
-	priv = netdev_priv(dev);
-	priv->stats = stats;
 	return 0;
 }
 
 static void veth_dev_free(struct net_device *dev)
 {
-	struct veth_priv *priv;
-
-	priv = netdev_priv(dev);
-	free_percpu(priv->stats);
+	free_percpu(dev->vstats);
 	free_netdev(dev);
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0e1b92a0c1ec..6835b5837f93 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1284,6 +1284,7 @@ struct net_device {
 		struct pcpu_lstats __percpu	*lstats; /* loopback stats */
 		struct pcpu_tstats __percpu	*tstats; /* tunnel stats */
 		struct pcpu_dstats __percpu	*dstats; /* dummy stats */
+		struct pcpu_vstats __percpu	*vstats; /* veth stats */
 	};
 	/* GARP */
 	struct garp_port __rcu	*garp_port;
-- 
cgit v1.2.3


From 2ac902ce17f9dfa0d4d1f0818be147b5d2515fb7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 19 Dec 2012 14:51:55 +0000
Subject: regmap: flat: Add flat cache type

While for I2C and SPI devices the overhead of using rbtree for devices with
only one block of registers is negligible the same isn't always going to
be true for MMIO devices where the I/O costs are very much lower. Cater
for these devices by adding a simple flat array type for them where the
lookups are simple array accesses, taking us right back to the original
ASoC cache implementation.

Thanks to Magnus Damm for the discussion which prompted this.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/Makefile        |  2 +-
 drivers/base/regmap/internal.h      |  1 +
 drivers/base/regmap/regcache-flat.c | 72 +++++++++++++++++++++++++++++++++++++
 drivers/base/regmap/regcache.c      |  1 +
 include/linux/regmap.h              |  3 +-
 5 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 drivers/base/regmap/regcache-flat.c

(limited to 'include/linux')

diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 5e75d1b683e2..cf129980abd0 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_REGMAP) += regmap.o regcache.o
-obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o
+obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o regcache-flat.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
 obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
 obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 401d1919635a..e22bb80edff1 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -177,5 +177,6 @@ int regcache_lookup_reg(struct regmap *map, unsigned int reg);
 
 extern struct regcache_ops regcache_rbtree_ops;
 extern struct regcache_ops regcache_lzo_ops;
+extern struct regcache_ops regcache_flat_ops;
 
 #endif
diff --git a/drivers/base/regmap/regcache-flat.c b/drivers/base/regmap/regcache-flat.c
new file mode 100644
index 000000000000..d9762e41959b
--- /dev/null
+++ b/drivers/base/regmap/regcache-flat.c
@@ -0,0 +1,72 @@
+/*
+ * Register cache access API - flat caching support
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/seq_file.h>
+
+#include "internal.h"
+
+static int regcache_flat_init(struct regmap *map)
+{
+	int i;
+	unsigned int *cache;
+
+	map->cache = kzalloc(sizeof(unsigned int) * (map->max_register + 1),
+			     GFP_KERNEL);
+	if (!map->cache)
+		return -ENOMEM;
+
+	cache = map->cache;
+
+	for (i = 0; i < map->num_reg_defaults; i++)
+		cache[map->reg_defaults[i].reg] = map->reg_defaults[i].def;
+
+	return 0;
+}
+
+static int regcache_flat_exit(struct regmap *map)
+{
+	kfree(map->cache);
+	map->cache = NULL;
+
+	return 0;
+}
+
+static int regcache_flat_read(struct regmap *map,
+			      unsigned int reg, unsigned int *value)
+{
+	unsigned int *cache = map->cache;
+
+	*value = cache[reg];
+
+	return 0;
+}
+
+static int regcache_flat_write(struct regmap *map, unsigned int reg,
+			       unsigned int value)
+{
+	unsigned int *cache = map->cache;
+
+	cache[reg] = value;
+
+	return 0;
+}
+
+struct regcache_ops regcache_flat_ops = {
+	.type = REGCACHE_FLAT,
+	.name = "flat",
+	.init = regcache_flat_init,
+	.exit = regcache_flat_exit,
+	.read = regcache_flat_read,
+	.write = regcache_flat_write,
+};
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 835883bda977..e69ff3e4742c 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -22,6 +22,7 @@
 static const struct regcache_ops *cache_types[] = {
 	&regcache_rbtree_ops,
 	&regcache_lzo_ops,
+	&regcache_flat_ops,
 };
 
 static int regcache_hw_init(struct regmap *map)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..390d879d473a 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -28,7 +28,8 @@ struct regmap_range_cfg;
 enum regcache_type {
 	REGCACHE_NONE,
 	REGCACHE_RBTREE,
-	REGCACHE_COMPRESSED
+	REGCACHE_COMPRESSED,
+	REGCACHE_FLAT,
 };
 
 /**
-- 
cgit v1.2.3


From a7a88b23737095e6c18a20c5d4eef9e25ec5b829 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 2 Jan 2013 02:04:23 -0800
Subject: mempolicy: remove arg from mpol_parse_str, mpol_to_str

Remove the unused argument (formerly no_context) from mpol_parse_str()
and from mpol_to_str().

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c        |  2 +-
 include/linux/mempolicy.h | 11 ++++-------
 mm/mempolicy.c            |  6 ++----
 mm/shmem.c                |  4 ++--
 4 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 448455b7fd91..ca5ce7f9f800 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1278,7 +1278,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	walk.mm = mm;
 
 	pol = get_vma_policy(task, vma, vma->vm_start);
-	mpol_to_str(buffer, sizeof(buffer), pol, 0);
+	mpol_to_str(buffer, sizeof(buffer), pol);
 	mpol_cond_put(pol);
 
 	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 9adc270de7ef..92bc9988a180 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -165,11 +165,10 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 
 
 #ifdef CONFIG_TMPFS
-extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
+extern int mpol_parse_str(char *str, struct mempolicy **mpol);
 #endif
 
-extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
-			int no_context);
+extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
 
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
@@ -296,15 +295,13 @@ static inline void check_highest_zone(int k)
 }
 
 #ifdef CONFIG_TMPFS
-static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
-				int no_context)
+static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
 {
 	return 1;	/* error */
 }
 #endif
 
-static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
-				int no_context)
+static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
 	return 0;
 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 02c914cca53d..1cb200af3828 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2612,14 +2612,13 @@ static const char * const policy_modes[] =
  * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
  * @str:  string containing mempolicy to parse
  * @mpol:  pointer to struct mempolicy pointer, returned on success.
- * @unused:  redundant argument, to be removed later.
  *
  * Format of input:
  *	<mode>[=<flags>][:<nodelist>]
  *
  * On success, returns 0, else 1
  */
-int mpol_parse_str(char *str, struct mempolicy **mpol, int unused)
+int mpol_parse_str(char *str, struct mempolicy **mpol)
 {
 	struct mempolicy *new = NULL;
 	unsigned short mode;
@@ -2747,13 +2746,12 @@ out:
  * @buffer:  to contain formatted mempolicy string
  * @maxlen:  length of @buffer
  * @pol:  pointer to mempolicy to be formatted
- * @unused:  redundant argument, to be removed later.
  *
  * Convert a mempolicy into a string.
  * Returns the number of characters in buffer (if positive)
  * or an error (negative)
  */
-int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int unused)
+int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 {
 	char *p = buffer;
 	int l;
diff --git a/mm/shmem.c b/mm/shmem.c
index 5c90d84c2b02..5dd56f6efdbd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -889,7 +889,7 @@ static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
 	if (!mpol || mpol->mode == MPOL_DEFAULT)
 		return;		/* show nothing */
 
-	mpol_to_str(buffer, sizeof(buffer), mpol, 1);
+	mpol_to_str(buffer, sizeof(buffer), mpol);
 
 	seq_printf(seq, ",mpol=%s", buffer);
 }
@@ -2463,7 +2463,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
 			if (!gid_valid(sbinfo->gid))
 				goto bad_val;
 		} else if (!strcmp(this_char,"mpol")) {
-			if (mpol_parse_str(value, &sbinfo->mpol, 1))
+			if (mpol_parse_str(value, &sbinfo->mpol))
 				goto bad_val;
 		} else {
 			printk(KERN_ERR "tmpfs: Bad mount option %s\n",
-- 
cgit v1.2.3


From ae278a935f086775e8ae31a8ec9f7224ea25ea3c Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Mon, 19 Nov 2012 16:41:20 -0700
Subject: clocksource: add common of_clksrc_init() function

It is desirable to move all clocksource drivers to drivers/clocksource,
yet each requires its own initialization function. We'd rather not
pollute <linux/> with a header for each function. Instead, create a
single of_clksrc_init() function which will determine which clocksource
driver to initialize based on device tree.

Based on a similar patch for drivers/irqchip by Thomas Petazzoni.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 drivers/clocksource/Kconfig       |  3 +++
 drivers/clocksource/Makefile      |  1 +
 drivers/clocksource/clksrc-of.c   | 35 +++++++++++++++++++++++++++++++++++
 include/asm-generic/vmlinux.lds.h |  9 +++++++++
 include/linux/clocksource.h       |  9 +++++++++
 5 files changed, 57 insertions(+)
 create mode 100644 drivers/clocksource/clksrc-of.c

(limited to 'include/linux')

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 7fdcbd3f4da5..a32b7a9c65d3 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -1,3 +1,6 @@
+config CLKSRC_OF
+	bool
+
 config CLKSRC_I8253
 	bool
 
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index f93453d01673..a33f79240217 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_CLKSRC_OF)	+= clksrc-of.o
 obj-$(CONFIG_ATMEL_TCB_CLKSRC)	+= tcb_clksrc.o
 obj-$(CONFIG_X86_CYCLONE_TIMER)	+= cyclone.o
 obj-$(CONFIG_X86_PM_TIMER)	+= acpi_pm.o
diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c
new file mode 100644
index 000000000000..bdabdaa8d00f
--- /dev/null
+++ b/drivers/clocksource/clksrc-of.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/init.h>
+#include <linux/of.h>
+
+extern struct of_device_id __clksrc_of_table[];
+
+static const struct of_device_id __clksrc_of_table_sentinel
+	__used __section(__clksrc_of_table_end);
+
+void __init clocksource_of_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match;
+	void (*init_func)(void);
+
+	for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
+		init_func = match->data;
+		init_func();
+	}
+}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d1ea7ce0b4cb..1e744c5a0ffe 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -149,6 +149,14 @@
 #define TRACE_SYSCALLS()
 #endif
 
+#ifdef CONFIG_CLKSRC_OF
+#define CLKSRC_OF_TABLES() . = ALIGN(8);				\
+			   VMLINUX_SYMBOL(__clksrc_of_table) = .;	\
+			   *(__clksrc_of_table)				\
+			   *(__clksrc_of_table_end)
+#else
+#define CLKSRC_OF_TABLES()
+#endif
 
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
@@ -493,6 +501,7 @@
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)					\
+	CLKSRC_OF_TABLES()						\
 	KERNEL_DTB()
 
 #define INIT_TEXT							\
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 4dceaf8ae152..7944f14ea947 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -332,4 +332,13 @@ extern int clocksource_mmio_init(void __iomem *, const char *,
 
 extern int clocksource_i8253_init(void);
 
+#ifdef CONFIG_CLKSRC_OF
+extern void clocksource_of_init(void);
+
+#define CLOCKSOURCE_OF_DECLARE(name, compat, fn)			\
+	static const struct of_device_id __clksrc_of_table_##name	\
+		__used __section(__clksrc_of_table)			\
+		 = { .compatible = compat, .data = fn };
+#endif
+
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3


From 42288fe366c4f1ce7522bc9f27d0bc2a81c55264 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 21 Dec 2012 23:10:25 +0000
Subject: mm: mempolicy: Convert shared_policy mutex to spinlock

Sasha was fuzzing with trinity and reported the following problem:

  BUG: sleeping function called from invalid context at kernel/mutex.c:269
  in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main
  2 locks held by trinity-main/6361:
   #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff810aa314>] __do_page_fault+0x1e4/0x4f0
   #1:  (&(&mm->page_table_lock)->rlock){+.+...}, at: [<ffffffff8122f017>] handle_pte_fault+0x3f7/0x6a0
  Pid: 6361, comm: trinity-main Tainted: G        W
  3.7.0-rc2-next-20121024-sasha-00001-gd95ef01-dirty #74
  Call Trace:
    __might_sleep+0x1c3/0x1e0
    mutex_lock_nested+0x29/0x50
    mpol_shared_policy_lookup+0x2e/0x90
    shmem_get_policy+0x2e/0x30
    get_vma_policy+0x5a/0xa0
    mpol_misplaced+0x41/0x1d0
    handle_pte_fault+0x465/0x6a0

This was triggered by a different version of automatic NUMA balancing
but in theory the current version is vunerable to the same problem.

do_numa_page
  -> numa_migrate_prep
    -> mpol_misplaced
      -> get_vma_policy
        -> shmem_get_policy

It's very unlikely this will happen as shared pages are not marked
pte_numa -- see the page_mapcount() check in change_pte_range() -- but
it is possible.

To address this, this patch restores sp->lock as originally implemented
by Kosaki Motohiro.  In the path where get_vma_policy() is called, it
should not be calling sp_alloc() so it is not necessary to treat the PTL
specially.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 +-
 mm/mempolicy.c            | 68 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 49 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 92bc9988a180..0d7df39a5885 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -123,7 +123,7 @@ struct sp_node {
 
 struct shared_policy {
 	struct rb_root root;
-	struct mutex mutex;
+	spinlock_t lock;
 };
 
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1cb200af3828..e2df1c1fb41f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2132,7 +2132,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
  */
 
 /* lookup first element intersecting start-end */
-/* Caller holds sp->mutex */
+/* Caller holds sp->lock */
 static struct sp_node *
 sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
 {
@@ -2196,13 +2196,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
 
 	if (!sp->root.rb_node)
 		return NULL;
-	mutex_lock(&sp->mutex);
+	spin_lock(&sp->lock);
 	sn = sp_lookup(sp, idx, idx+1);
 	if (sn) {
 		mpol_get(sn->policy);
 		pol = sn->policy;
 	}
-	mutex_unlock(&sp->mutex);
+	spin_unlock(&sp->lock);
 	return pol;
 }
 
@@ -2328,6 +2328,14 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 	sp_free(n);
 }
 
+static void sp_node_init(struct sp_node *node, unsigned long start,
+			unsigned long end, struct mempolicy *pol)
+{
+	node->start = start;
+	node->end = end;
+	node->policy = pol;
+}
+
 static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
 				struct mempolicy *pol)
 {
@@ -2344,10 +2352,7 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
 		return NULL;
 	}
 	newpol->flags |= MPOL_F_SHARED;
-
-	n->start = start;
-	n->end = end;
-	n->policy = newpol;
+	sp_node_init(n, start, end, newpol);
 
 	return n;
 }
@@ -2357,9 +2362,12 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
 				 unsigned long end, struct sp_node *new)
 {
 	struct sp_node *n;
+	struct sp_node *n_new = NULL;
+	struct mempolicy *mpol_new = NULL;
 	int ret = 0;
 
-	mutex_lock(&sp->mutex);
+restart:
+	spin_lock(&sp->lock);
 	n = sp_lookup(sp, start, end);
 	/* Take care of old policies in the same range. */
 	while (n && n->start < end) {
@@ -2372,14 +2380,16 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
 		} else {
 			/* Old policy spanning whole new range. */
 			if (n->end > end) {
-				struct sp_node *new2;
-				new2 = sp_alloc(end, n->end, n->policy);
-				if (!new2) {
-					ret = -ENOMEM;
-					goto out;
-				}
+				if (!n_new)
+					goto alloc_new;
+
+				*mpol_new = *n->policy;
+				atomic_set(&mpol_new->refcnt, 1);
+				sp_node_init(n_new, n->end, end, mpol_new);
+				sp_insert(sp, n_new);
 				n->end = start;
-				sp_insert(sp, new2);
+				n_new = NULL;
+				mpol_new = NULL;
 				break;
 			} else
 				n->end = start;
@@ -2390,9 +2400,27 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
 	}
 	if (new)
 		sp_insert(sp, new);
-out:
-	mutex_unlock(&sp->mutex);
+	spin_unlock(&sp->lock);
+	ret = 0;
+
+err_out:
+	if (mpol_new)
+		mpol_put(mpol_new);
+	if (n_new)
+		kmem_cache_free(sn_cache, n_new);
+
 	return ret;
+
+alloc_new:
+	spin_unlock(&sp->lock);
+	ret = -ENOMEM;
+	n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL);
+	if (!n_new)
+		goto err_out;
+	mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
+	if (!mpol_new)
+		goto err_out;
+	goto restart;
 }
 
 /**
@@ -2410,7 +2438,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 	int ret;
 
 	sp->root = RB_ROOT;		/* empty tree == default mempolicy */
-	mutex_init(&sp->mutex);
+	spin_lock_init(&sp->lock);
 
 	if (mpol) {
 		struct vm_area_struct pvma;
@@ -2476,14 +2504,14 @@ void mpol_free_shared_policy(struct shared_policy *p)
 
 	if (!p->root.rb_node)
 		return;
-	mutex_lock(&p->mutex);
+	spin_lock(&p->lock);
 	next = rb_first(&p->root);
 	while (next) {
 		n = rb_entry(next, struct sp_node, nd);
 		next = rb_next(&n->nd);
 		sp_delete(p, n);
 	}
-	mutex_unlock(&p->mutex);
+	spin_unlock(&p->lock);
 }
 
 #ifdef CONFIG_NUMA_BALANCING
-- 
cgit v1.2.3


From 3d33fcc11bdd11b6949cf5c406726a094395dc4f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 2 Jan 2013 15:12:55 +0000
Subject: UAPI: Remove empty Kbuild files

Empty files can get deleted by the patch program, so remove empty Kbuild
files and their links from the parent Kbuilds.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/Kbuild            | 3 ---
 include/linux/Kbuild      | 5 -----
 include/linux/hdlc/Kbuild | 0
 include/linux/hsi/Kbuild  | 0
 include/linux/raid/Kbuild | 0
 include/linux/usb/Kbuild  | 0
 include/rdma/Kbuild       | 0
 include/sound/Kbuild      | 0
 8 files changed, 8 deletions(-)
 delete mode 100644 include/linux/Kbuild
 delete mode 100644 include/linux/hdlc/Kbuild
 delete mode 100644 include/linux/hsi/Kbuild
 delete mode 100644 include/linux/raid/Kbuild
 delete mode 100644 include/linux/usb/Kbuild
 delete mode 100644 include/rdma/Kbuild
 delete mode 100644 include/sound/Kbuild

(limited to 'include/linux')

diff --git a/include/Kbuild b/include/Kbuild
index 83256b64166a..1dfd33e8d43b 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -1,8 +1,5 @@
 # Top-level Makefile calls into asm-$(ARCH)
 # List only non-arch directories below
 
-header-y += linux/
-header-y += sound/
-header-y += rdma/
 header-y += video/
 header-y += scsi/
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
deleted file mode 100644
index 7fe2dae251e5..000000000000
--- a/include/linux/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
-header-y += dvb/
-header-y += hdlc/
-header-y += hsi/
-header-y += raid/
-header-y += usb/
diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/hsi/Kbuild b/include/linux/hsi/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/include/sound/Kbuild b/include/sound/Kbuild
deleted file mode 100644
index e69de29bb2d1..000000000000
-- 
cgit v1.2.3


From 4d02c58eb1f19433cb852b2bde41c44849691614 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Mon, 17 Dec 2012 15:26:55 -0700
Subject: HID: introduce helper for hid_driver boilerplate

Introduce the module_hid_driver macro which is a convenience macro
for HID driver modules similar to module_usb_driver. It is intended
to be used by drivers with init/exit sections that do nothing but
register/unregister the HID driver.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 7330a0fef0c0..d6c71a674310 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -700,6 +700,18 @@ extern int __must_check __hid_register_driver(struct hid_driver *,
 
 extern void hid_unregister_driver(struct hid_driver *);
 
+/**
+ * module_hid_driver() - Helper macro for registering a HID driver
+ * @__hid_driver: hid_driver struct
+ *
+ * Helper macro for HID drivers which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit()
+ */
+#define module_hid_driver(__hid_driver) \
+	module_driver(__hid_driver, hid_register_driver, \
+		      hid_unregister_driver)
+
 extern void hidinput_hid_event(struct hid_device *, struct hid_field *, struct hid_usage *, __s32);
 extern void hidinput_report_event(struct hid_device *hid, struct hid_report *report);
 extern int hidinput_connect(struct hid_device *hid, unsigned int force);
-- 
cgit v1.2.3


From d463f4719a2fa883bc0bb1fb67e6fea2307aa6df Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Mon, 17 Dec 2012 15:27:33 -0700
Subject: HID: hid.h: remove unused hid_generic_{init,exit} prototypes

These functions are not defined. Remove the extern declarations.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index d6c71a674310..828726c70503 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -884,9 +884,6 @@ static inline int hid_hw_power(struct hid_device *hdev, int level)
 int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
 		int interrupt);
 
-extern int hid_generic_init(void);
-extern void hid_generic_exit(void);
-
 /* HID quirks API */
 u32 usbhid_lookup_quirk(const u16 idVendor, const u16 idProduct);
 int usbhid_quirks_init(char **quirks_param);
-- 
cgit v1.2.3


From 0f6dfcee2e081f47a3e97cb8984fb4d62217e6f7 Mon Sep 17 00:00:00 2001
From: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Date: Tue, 18 Dec 2012 09:55:33 +0200
Subject: wireless: more 'capability info' bits

define bits for 'capability info', as in recent spec edition
IEEE802.11-2012

Also, add mask for 2-bit field 'bss type', as it is in 802.11ad

Signed-off-by: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f0859cc73861..09879eb24380 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1311,16 +1311,21 @@ struct ieee80211_vht_operation {
 #define WLAN_CAPABILITY_SPECTRUM_MGMT	(1<<8)
 #define WLAN_CAPABILITY_QOS		(1<<9)
 #define WLAN_CAPABILITY_SHORT_SLOT_TIME	(1<<10)
+#define WLAN_CAPABILITY_APSD		(1<<11)
+#define WLAN_CAPABILITY_RADIO_MEASURE	(1<<12)
 #define WLAN_CAPABILITY_DSSS_OFDM	(1<<13)
+#define WLAN_CAPABILITY_DEL_BACK	(1<<14)
+#define WLAN_CAPABILITY_IMM_BACK	(1<<15)
 
 /* DMG (60gHz) 802.11ad */
 /* type - bits 0..1 */
+#define WLAN_CAPABILITY_DMG_TYPE_MASK		(3<<0)
 #define WLAN_CAPABILITY_DMG_TYPE_IBSS		(1<<0) /* Tx by: STA */
 #define WLAN_CAPABILITY_DMG_TYPE_PBSS		(2<<0) /* Tx by: PCP */
 #define WLAN_CAPABILITY_DMG_TYPE_AP		(3<<0) /* Tx by: AP */
 
 #define WLAN_CAPABILITY_DMG_CBAP_ONLY		(1<<2)
-#define WLAN_CAPABILITY_DMG_CBAP_SOURCE	(1<<3)
+#define WLAN_CAPABILITY_DMG_CBAP_SOURCE		(1<<3)
 #define WLAN_CAPABILITY_DMG_PRIVACY		(1<<4)
 #define WLAN_CAPABILITY_DMG_ECPAC		(1<<5)
 
-- 
cgit v1.2.3


From 598a5938e04ce30d837dca4c3c3326c69435342a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 28 Dec 2012 12:00:40 +0100
Subject: wireless: use __packed in ieee80211.h

Use __packed instead of __attribute__((packed)).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 84 +++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 09879eb24380..5db76ebe8810 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -180,7 +180,7 @@ struct ieee80211_hdr {
 	u8 addr3[6];
 	__le16 seq_ctrl;
 	u8 addr4[6];
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_hdr_3addr {
 	__le16 frame_control;
@@ -189,7 +189,7 @@ struct ieee80211_hdr_3addr {
 	u8 addr2[6];
 	u8 addr3[6];
 	__le16 seq_ctrl;
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_qos_hdr {
 	__le16 frame_control;
@@ -199,7 +199,7 @@ struct ieee80211_qos_hdr {
 	u8 addr3[6];
 	__le16 seq_ctrl;
 	__le16 qos_ctrl;
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set
@@ -576,7 +576,7 @@ struct ieee80211s_hdr {
 	__le32 seqnum;
 	u8 eaddr1[6];
 	u8 eaddr2[6];
-} __attribute__ ((packed));
+} __packed;
 
 /* Mesh flags */
 #define MESH_FLAGS_AE_A4 	0x1
@@ -614,7 +614,7 @@ struct ieee80211_quiet_ie {
 	u8 period;
 	__le16 duration;
 	__le16 offset;
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * struct ieee80211_msrment_ie
@@ -626,7 +626,7 @@ struct ieee80211_msrment_ie {
 	u8 mode;
 	u8 type;
 	u8 request[0];
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * struct ieee80211_channel_sw_ie
@@ -637,7 +637,7 @@ struct ieee80211_channel_sw_ie {
 	u8 mode;
 	u8 new_ch_num;
 	u8 count;
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * struct ieee80211_tim
@@ -650,7 +650,7 @@ struct ieee80211_tim_ie {
 	u8 bitmap_ctrl;
 	/* variable size: 1 - 251 bytes */
 	u8 virtual_map[1];
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * struct ieee80211_meshconf_ie
@@ -665,7 +665,7 @@ struct ieee80211_meshconf_ie {
 	u8 meshconf_auth;
 	u8 meshconf_form;
 	u8 meshconf_cap;
-} __attribute__ ((packed));
+} __packed;
 
 /**
  * enum mesh_config_capab_flags - Mesh Configuration IE capability field flags
@@ -695,7 +695,7 @@ struct ieee80211_rann_ie {
 	__le32 rann_seq;
 	__le32 rann_interval;
 	__le32 rann_metric;
-} __attribute__ ((packed));
+} __packed;
 
 enum ieee80211_rann_flags {
 	RANN_FLAG_IS_GATE = 1 << 0,
@@ -717,33 +717,33 @@ struct ieee80211_mgmt {
 			__le16 status_code;
 			/* possibly followed by Challenge text */
 			u8 variable[0];
-		} __attribute__ ((packed)) auth;
+		} __packed auth;
 		struct {
 			__le16 reason_code;
-		} __attribute__ ((packed)) deauth;
+		} __packed deauth;
 		struct {
 			__le16 capab_info;
 			__le16 listen_interval;
 			/* followed by SSID and Supported rates */
 			u8 variable[0];
-		} __attribute__ ((packed)) assoc_req;
+		} __packed assoc_req;
 		struct {
 			__le16 capab_info;
 			__le16 status_code;
 			__le16 aid;
 			/* followed by Supported rates */
 			u8 variable[0];
-		} __attribute__ ((packed)) assoc_resp, reassoc_resp;
+		} __packed assoc_resp, reassoc_resp;
 		struct {
 			__le16 capab_info;
 			__le16 listen_interval;
 			u8 current_ap[6];
 			/* followed by SSID and Supported rates */
 			u8 variable[0];
-		} __attribute__ ((packed)) reassoc_req;
+		} __packed reassoc_req;
 		struct {
 			__le16 reason_code;
-		} __attribute__ ((packed)) disassoc;
+		} __packed disassoc;
 		struct {
 			__le64 timestamp;
 			__le16 beacon_int;
@@ -751,11 +751,11 @@ struct ieee80211_mgmt {
 			/* followed by some of SSID, Supported rates,
 			 * FH Params, DS Params, CF Params, IBSS Params, TIM */
 			u8 variable[0];
-		} __attribute__ ((packed)) beacon;
+		} __packed beacon;
 		struct {
 			/* only variable items: SSID, Supported rates */
 			u8 variable[0];
-		} __attribute__ ((packed)) probe_req;
+		} __packed probe_req;
 		struct {
 			__le64 timestamp;
 			__le16 beacon_int;
@@ -763,7 +763,7 @@ struct ieee80211_mgmt {
 			/* followed by some of SSID, Supported rates,
 			 * FH Params, DS Params, CF Params, IBSS Params */
 			u8 variable[0];
-		} __attribute__ ((packed)) probe_resp;
+		} __packed probe_resp;
 		struct {
 			u8 category;
 			union {
@@ -772,55 +772,55 @@ struct ieee80211_mgmt {
 					u8 dialog_token;
 					u8 status_code;
 					u8 variable[0];
-				} __attribute__ ((packed)) wme_action;
+				} __packed wme_action;
 				struct{
 					u8 action_code;
 					u8 element_id;
 					u8 length;
 					struct ieee80211_channel_sw_ie sw_elem;
-				} __attribute__((packed)) chan_switch;
+				} __packed chan_switch;
 				struct{
 					u8 action_code;
 					u8 dialog_token;
 					u8 element_id;
 					u8 length;
 					struct ieee80211_msrment_ie msr_elem;
-				} __attribute__((packed)) measurement;
+				} __packed measurement;
 				struct{
 					u8 action_code;
 					u8 dialog_token;
 					__le16 capab;
 					__le16 timeout;
 					__le16 start_seq_num;
-				} __attribute__((packed)) addba_req;
+				} __packed addba_req;
 				struct{
 					u8 action_code;
 					u8 dialog_token;
 					__le16 status;
 					__le16 capab;
 					__le16 timeout;
-				} __attribute__((packed)) addba_resp;
+				} __packed addba_resp;
 				struct{
 					u8 action_code;
 					__le16 params;
 					__le16 reason_code;
-				} __attribute__((packed)) delba;
+				} __packed delba;
 				struct {
 					u8 action_code;
 					u8 variable[0];
-				} __attribute__((packed)) self_prot;
+				} __packed self_prot;
 				struct{
 					u8 action_code;
 					u8 variable[0];
-				} __attribute__((packed)) mesh_action;
+				} __packed mesh_action;
 				struct {
 					u8 action;
 					u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN];
-				} __attribute__ ((packed)) sa_query;
+				} __packed sa_query;
 				struct {
 					u8 action;
 					u8 smps_control;
-				} __attribute__ ((packed)) ht_smps;
+				} __packed ht_smps;
 				struct {
 					u8 action_code;
 					u8 dialog_token;
@@ -828,9 +828,9 @@ struct ieee80211_mgmt {
 					u8 variable[0];
 				} __packed tdls_discover_resp;
 			} u;
-		} __attribute__ ((packed)) action;
+		} __packed action;
 	} u;
-} __attribute__ ((packed));
+} __packed;
 
 /* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */
 #define BSS_MEMBERSHIP_SELECTOR_HT_PHY	127
@@ -846,7 +846,7 @@ struct ieee80211_mmie {
 	__le16 key_id;
 	u8 sequence_number[6];
 	u8 mic[8];
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_vendor_ie {
 	u8 element_id;
@@ -861,20 +861,20 @@ struct ieee80211_rts {
 	__le16 duration;
 	u8 ra[6];
 	u8 ta[6];
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_cts {
 	__le16 frame_control;
 	__le16 duration;
 	u8 ra[6];
-} __attribute__ ((packed));
+} __packed;
 
 struct ieee80211_pspoll {
 	__le16 frame_control;
 	__le16 aid;
 	u8 bssid[6];
 	u8 ta[6];
-} __attribute__ ((packed));
+} __packed;
 
 /* TDLS */
 
@@ -967,7 +967,7 @@ struct ieee80211_bar {
 	__u8 ta[6];
 	__le16 control;
 	__le16 start_seq_num;
-} __attribute__((packed));
+} __packed;
 
 /* 802.11 BAR control masks */
 #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL	0x0000
@@ -992,7 +992,7 @@ struct ieee80211_mcs_info {
 	__le16 rx_highest;
 	u8 tx_params;
 	u8 reserved[3];
-} __attribute__((packed));
+} __packed;
 
 /* 802.11n HT capability MSC set */
 #define IEEE80211_HT_MCS_RX_HIGHEST_MASK	0x3ff
@@ -1031,7 +1031,7 @@ struct ieee80211_ht_cap {
 	__le16 extended_ht_cap_info;
 	__le32 tx_BF_cap_info;
 	u8 antenna_selection_info;
-} __attribute__ ((packed));
+} __packed;
 
 /* 802.11n HT capabilities masks (for cap_info) */
 #define IEEE80211_HT_CAP_LDPC_CODING		0x0001
@@ -1102,7 +1102,7 @@ struct ieee80211_ht_operation {
 	__le16 operation_mode;
 	__le16 stbc_param;
 	u8 basic_set[16];
-} __attribute__ ((packed));
+} __packed;
 
 /* for ht_param */
 #define IEEE80211_HT_PARAM_CHA_SEC_OFFSET		0x03
@@ -1839,14 +1839,14 @@ struct ieee80211_country_ie_triplet {
 			u8 first_channel;
 			u8 num_channels;
 			s8 max_power;
-		} __attribute__ ((packed)) chans;
+		} __packed chans;
 		struct {
 			u8 reg_extension_id;
 			u8 reg_class;
 			u8 coverage_class;
-		} __attribute__ ((packed)) ext;
+		} __packed ext;
 	};
-} __attribute__ ((packed));
+} __packed;
 
 enum ieee80211_timeout_interval_type {
 	WLAN_TIMEOUT_REASSOC_DEADLINE = 1 /* 802.11r */,
-- 
cgit v1.2.3


From ec61cd63dd3f3bf982180b2bcc1b325160d73837 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 28 Dec 2012 12:12:10 +0100
Subject: mac80211: support HT notify channel width action

Support the HT notify channel width action frame
to update the rate scaling about the bandwidth
the peer can receive in.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  9 +++++++++
 net/mac80211/rx.c         | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 5db76ebe8810..ccf9ee1dca8c 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -701,6 +701,11 @@ enum ieee80211_rann_flags {
 	RANN_FLAG_IS_GATE = 1 << 0,
 };
 
+enum ieee80211_ht_chanwidth_values {
+	IEEE80211_HT_CHANWIDTH_20MHZ = 0,
+	IEEE80211_HT_CHANWIDTH_ANY = 1,
+};
+
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 
 struct ieee80211_mgmt {
@@ -821,6 +826,10 @@ struct ieee80211_mgmt {
 					u8 action;
 					u8 smps_control;
 				} __packed ht_smps;
+				struct {
+					u8 action_code;
+					u8 chanwidth;
+				} __packed ht_notify_cw;
 				struct {
 					u8 action_code;
 					u8 dialog_token;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 580704eba8b8..a19089565c4b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2353,7 +2353,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		    sdata->vif.type != NL80211_IFTYPE_ADHOC)
 			break;
 
-		/* verify action & smps_control are present */
+		/* verify action & smps_control/chanwidth are present */
 		if (len < IEEE80211_MIN_ACTION_SIZE + 2)
 			goto invalid;
 
@@ -2392,6 +2392,35 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 						 IEEE80211_RC_SMPS_CHANGED);
 			goto handled;
 		}
+		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
+			struct ieee80211_supported_band *sband;
+			u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
+			bool old_40mhz, new_40mhz;
+
+			/* If it doesn't support 40 MHz it can't change ... */
+			if (!rx->sta->supports_40mhz)
+				goto handled;
+
+			old_40mhz = rx->sta->sta.ht_cap.cap &
+					IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			new_40mhz = chanwidth == IEEE80211_HT_CHANWIDTH_ANY;
+
+			if (old_40mhz == new_40mhz)
+				goto handled;
+
+			if (new_40mhz)
+				rx->sta->sta.ht_cap.cap |=
+					IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+			else
+				rx->sta->sta.ht_cap.cap &=
+					~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
+
+			sband = rx->local->hw.wiphy->bands[status->band];
+
+			rate_control_rate_update(local, sband, rx->sta,
+						 IEEE80211_RC_BW_CHANGED);
+			goto handled;
+		}
 		default:
 			goto invalid;
 		}
-- 
cgit v1.2.3


From d2e5f0c16ad60a7208fd371233e63b73c990ece2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 23 Dec 2012 00:02:44 +0100
Subject: ACPI / PCI: Rework the setup and cleanup of device wakeup

Currently, the ACPI wakeup capability of PCI devices is set up
in two different places, partially in acpi_pci_bind() where
runtime wakeup is initialized and partially in
platform_pci_wakeup_init(), where system wakeup is initialized.
The cleanup is only done in acpi_pci_unbind() and it only covers
runtime wakeup.

Use the new .setup() and .cleanup() callbacks in struct acpi_bus_type
to consolidate that code and do the setup and the cleanup each in one
place.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Toshi Kani <toshi.kani@hp.com>
---
 drivers/acpi/device_pm.c |  2 +-
 drivers/acpi/pci_bind.c  |  5 -----
 drivers/pci/pci-acpi.c   | 30 +++++++++++++++++++++++++++++-
 drivers/pci/pci.c        | 26 +-------------------------
 drivers/pci/pci.h        |  5 -----
 drivers/pci/probe.c      |  1 -
 include/linux/acpi.h     |  5 +++++
 7 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index f09dc987cf17..e4f6ac95595c 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -353,7 +353,7 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
  * acpi_dev_pm_get_node - Get ACPI device node for the given physical device.
  * @dev: Device to get the ACPI node for.
  */
-static struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
+struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
 {
 	acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
 	struct acpi_device *adev;
diff --git a/drivers/acpi/pci_bind.c b/drivers/acpi/pci_bind.c
index a1dee29beed3..bbddcc9c894f 100644
--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -43,8 +43,6 @@ static int acpi_pci_unbind(struct acpi_device *device)
 	if (!dev)
 		goto out;
 
-	device_set_run_wake(&dev->dev, false);
-	pci_acpi_remove_pm_notifier(device);
 	acpi_power_resource_unregister_device(&dev->dev, device->handle);
 
 	if (!dev->subordinate)
@@ -71,10 +69,7 @@ static int acpi_pci_bind(struct acpi_device *device)
 	if (!dev)
 		return 0;
 
-	pci_acpi_add_pm_notifier(device, dev);
 	acpi_power_resource_register_device(&dev->dev, device->handle);
-	if (device->wakeup.flags.run_wake)
-		device_set_run_wake(&dev->dev, true);
 
 	/*
 	 * Install the 'bind' function to facilitate callbacks for
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 1af4008182fd..b98106c110e7 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -283,7 +283,6 @@ static struct pci_platform_pm_ops acpi_pci_platform_pm = {
 	.is_manageable = acpi_pci_power_manageable,
 	.set_state = acpi_pci_set_power_state,
 	.choose_state = acpi_pci_choose_state,
-	.can_wakeup = acpi_pci_can_wakeup,
 	.sleep_wake = acpi_pci_sleep_wake,
 	.run_wake = acpi_pci_run_wake,
 };
@@ -321,10 +320,39 @@ static int acpi_pci_find_root_bridge(struct device *dev, acpi_handle *handle)
 	return 0;
 }
 
+static void acpi_pci_wakeup_setup(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
+	if (!adev || !adev->wakeup.flags.valid)
+		return;
+
+	device_set_wakeup_capable(dev, true);
+	acpi_pci_sleep_wake(pci_dev, false);
+
+	pci_acpi_add_pm_notifier(adev, pci_dev);
+	if (adev->wakeup.flags.run_wake)
+		device_set_run_wake(dev, true);
+}
+
+static void acpi_pci_wakeup_cleanup(struct device *dev)
+{
+	struct acpi_device *adev = acpi_dev_pm_get_node(dev);
+
+	if (adev && adev->wakeup.flags.valid) {
+		device_set_wakeup_capable(dev, false);
+		device_set_run_wake(dev, false);
+		pci_acpi_remove_pm_notifier(adev);
+	}
+}
+
 static struct acpi_bus_type acpi_pci_bus = {
 	.bus = &pci_bus_type,
 	.find_device = acpi_pci_find_device,
 	.find_bridge = acpi_pci_find_root_bridge,
+	.setup = acpi_pci_wakeup_setup,
+	.cleanup = acpi_pci_wakeup_cleanup,
 };
 
 static int __init acpi_pci_init(void)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 5cb5820fae40..0c4f641b7be1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -450,7 +450,7 @@ static struct pci_platform_pm_ops *pci_platform_pm;
 int pci_set_platform_pm(struct pci_platform_pm_ops *ops)
 {
 	if (!ops->is_manageable || !ops->set_state || !ops->choose_state
-	    || !ops->sleep_wake || !ops->can_wakeup)
+	    || !ops->sleep_wake)
 		return -EINVAL;
 	pci_platform_pm = ops;
 	return 0;
@@ -473,11 +473,6 @@ static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev)
 			pci_platform_pm->choose_state(dev) : PCI_POWER_ERROR;
 }
 
-static inline bool platform_pci_can_wakeup(struct pci_dev *dev)
-{
-	return pci_platform_pm ? pci_platform_pm->can_wakeup(dev) : false;
-}
-
 static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable)
 {
 	return pci_platform_pm ?
@@ -1985,25 +1980,6 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 }
 
-/**
- * platform_pci_wakeup_init - init platform wakeup if present
- * @dev: PCI device
- *
- * Some devices don't have PCI PM caps but can still generate wakeup
- * events through platform methods (like ACPI events).  If @dev supports
- * platform wakeup events, set the device flag to indicate as much.  This
- * may be redundant if the device also supports PCI PM caps, but double
- * initialization should be safe in that case.
- */
-void platform_pci_wakeup_init(struct pci_dev *dev)
-{
-	if (!platform_pci_can_wakeup(dev))
-		return;
-
-	device_set_wakeup_capable(&dev->dev, true);
-	platform_pci_sleep_wake(dev, false);
-}
-
 static void pci_add_saved_cap(struct pci_dev *pci_dev,
 	struct pci_cap_saved_state *new_cap)
 {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index e8518292826f..adfd172c5b9b 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -43,9 +43,6 @@ int pci_probe_reset_function(struct pci_dev *dev);
  *                platform; to be used during system-wide transitions from a
  *                sleeping state to the working state and vice versa
  *
- * @can_wakeup: returns 'true' if given device is capable of waking up the
- *              system from a sleeping state
- *
  * @sleep_wake: enables/disables the system wake up capability of given device
  *
  * @run_wake: enables/disables the platform to generate run-time wake-up events
@@ -59,7 +56,6 @@ struct pci_platform_pm_ops {
 	bool (*is_manageable)(struct pci_dev *dev);
 	int (*set_state)(struct pci_dev *dev, pci_power_t state);
 	pci_power_t (*choose_state)(struct pci_dev *dev);
-	bool (*can_wakeup)(struct pci_dev *dev);
 	int (*sleep_wake)(struct pci_dev *dev, bool enable);
 	int (*run_wake)(struct pci_dev *dev, bool enable);
 };
@@ -74,7 +70,6 @@ extern void pci_wakeup_bus(struct pci_bus *bus);
 extern void pci_config_pm_runtime_get(struct pci_dev *dev);
 extern void pci_config_pm_runtime_put(struct pci_dev *dev);
 extern void pci_pm_init(struct pci_dev *dev);
-extern void platform_pci_wakeup_init(struct pci_dev *dev);
 extern void pci_allocate_cap_save_buffers(struct pci_dev *dev);
 void pci_free_cap_save_buffers(struct pci_dev *dev);
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 6186f03d84f3..2dcd22d9c816 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1280,7 +1280,6 @@ static void pci_init_capabilities(struct pci_dev *dev)
 
 	/* Power Management */
 	pci_pm_init(dev);
-	platform_pci_wakeup_init(dev);
 
 	/* Vital Product Data */
 	pci_vpd_pci22_init(dev);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3994d7790b23..8c1d6f2a2193 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -526,9 +526,14 @@ static inline int acpi_subsys_resume_early(struct device *dev) { return 0; }
 #endif
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM)
+struct acpi_device *acpi_dev_pm_get_node(struct device *dev);
 int acpi_dev_pm_attach(struct device *dev, bool power_on);
 void acpi_dev_pm_detach(struct device *dev, bool power_off);
 #else
+static inline struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
+{
+	return NULL;
+}
 static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From 9a32299394d8cce79ca7d0098dc32c4f14032dcd Mon Sep 17 00:00:00 2001
From: Philippe De Muyter <phdm@macqel.be>
Date: Fri, 12 Oct 2012 17:52:45 +0200
Subject: powerpc, dma: move bestcomm driver from arch/powerpc/sysdev to
 drivers/dma

The bestcomm dma hardware, and some of its users like the FEC ethernet
component, is used in different FreeScale parts, including non-powerpc
parts like the ColdFire MCF547x & MCF548x families.  Don't keep the
driver hidden in arch/powerpc where it is inaccessible for other arches.
.c files are moved to drivers/dma/bestcomm, while .h files are moved to
include/linux/fsl/bestcomm.  Makefiles, Kconfigs and #include directives
are updated for the new file locations.

Tested by recompiling for MPC5200 with all bestcomm users enabled.

Signed-off-by: Philippe De Muyter <phdm@macqel.be>
Signed-off-by: Anatolij Gustschin <agust@denx.de>
---
 arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c      |   6 +-
 arch/powerpc/platforms/Kconfig                     |   2 -
 arch/powerpc/sysdev/Makefile                       |   1 -
 arch/powerpc/sysdev/bestcomm/Kconfig               |  36 --
 arch/powerpc/sysdev/bestcomm/Makefile              |  14 -
 arch/powerpc/sysdev/bestcomm/ata.c                 | 157 ------
 arch/powerpc/sysdev/bestcomm/ata.h                 |  30 --
 arch/powerpc/sysdev/bestcomm/bcom_ata_task.c       |  67 ---
 arch/powerpc/sysdev/bestcomm/bcom_fec_rx_task.c    |  78 ---
 arch/powerpc/sysdev/bestcomm/bcom_fec_tx_task.c    |  91 ----
 arch/powerpc/sysdev/bestcomm/bcom_gen_bd_rx_task.c |  63 ---
 arch/powerpc/sysdev/bestcomm/bcom_gen_bd_tx_task.c |  69 ---
 arch/powerpc/sysdev/bestcomm/bestcomm.c            | 532 ---------------------
 arch/powerpc/sysdev/bestcomm/bestcomm.h            | 213 ---------
 arch/powerpc/sysdev/bestcomm/bestcomm_priv.h       | 350 --------------
 arch/powerpc/sysdev/bestcomm/fec.c                 | 270 -----------
 arch/powerpc/sysdev/bestcomm/fec.h                 |  61 ---
 arch/powerpc/sysdev/bestcomm/gen_bd.c              | 354 --------------
 arch/powerpc/sysdev/bestcomm/gen_bd.h              |  53 --
 arch/powerpc/sysdev/bestcomm/sram.c                | 178 -------
 arch/powerpc/sysdev/bestcomm/sram.h                |  54 ---
 drivers/Makefile                                   |   2 +-
 drivers/ata/pata_mpc52xx.c                         |   6 +-
 drivers/dma/Kconfig                                |   2 +
 drivers/dma/Makefile                               |   1 +
 drivers/dma/bestcomm/Kconfig                       |  36 ++
 drivers/dma/bestcomm/Makefile                      |  14 +
 drivers/dma/bestcomm/ata.c                         | 157 ++++++
 drivers/dma/bestcomm/bcom_ata_task.c               |  67 +++
 drivers/dma/bestcomm/bcom_fec_rx_task.c            |  78 +++
 drivers/dma/bestcomm/bcom_fec_tx_task.c            |  91 ++++
 drivers/dma/bestcomm/bcom_gen_bd_rx_task.c         |  63 +++
 drivers/dma/bestcomm/bcom_gen_bd_tx_task.c         |  69 +++
 drivers/dma/bestcomm/bestcomm.c                    | 532 +++++++++++++++++++++
 drivers/dma/bestcomm/fec.c                         | 270 +++++++++++
 drivers/dma/bestcomm/gen_bd.c                      | 354 ++++++++++++++
 drivers/dma/bestcomm/sram.c                        | 178 +++++++
 drivers/net/ethernet/freescale/fec_mpc52xx.c       |   4 +-
 include/linux/fsl/bestcomm/ata.h                   |  30 ++
 include/linux/fsl/bestcomm/bestcomm.h              | 213 +++++++++
 include/linux/fsl/bestcomm/bestcomm_priv.h         | 350 ++++++++++++++
 include/linux/fsl/bestcomm/fec.h                   |  61 +++
 include/linux/fsl/bestcomm/gen_bd.h                |  53 ++
 include/linux/fsl/bestcomm/sram.h                  |  54 +++
 sound/soc/fsl/mpc5200_dma.c                        |   4 +-
 45 files changed, 2684 insertions(+), 2684 deletions(-)
 delete mode 100644 arch/powerpc/sysdev/bestcomm/Kconfig
 delete mode 100644 arch/powerpc/sysdev/bestcomm/Makefile
 delete mode 100644 arch/powerpc/sysdev/bestcomm/ata.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/ata.h
 delete mode 100644 arch/powerpc/sysdev/bestcomm/bcom_ata_task.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/bcom_fec_rx_task.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/bcom_fec_tx_task.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/bcom_gen_bd_rx_task.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/bcom_gen_bd_tx_task.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/bestcomm.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/bestcomm.h
 delete mode 100644 arch/powerpc/sysdev/bestcomm/bestcomm_priv.h
 delete mode 100644 arch/powerpc/sysdev/bestcomm/fec.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/fec.h
 delete mode 100644 arch/powerpc/sysdev/bestcomm/gen_bd.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/gen_bd.h
 delete mode 100644 arch/powerpc/sysdev/bestcomm/sram.c
 delete mode 100644 arch/powerpc/sysdev/bestcomm/sram.h
 create mode 100644 drivers/dma/bestcomm/Kconfig
 create mode 100644 drivers/dma/bestcomm/Makefile
 create mode 100644 drivers/dma/bestcomm/ata.c
 create mode 100644 drivers/dma/bestcomm/bcom_ata_task.c
 create mode 100644 drivers/dma/bestcomm/bcom_fec_rx_task.c
 create mode 100644 drivers/dma/bestcomm/bcom_fec_tx_task.c
 create mode 100644 drivers/dma/bestcomm/bcom_gen_bd_rx_task.c
 create mode 100644 drivers/dma/bestcomm/bcom_gen_bd_tx_task.c
 create mode 100644 drivers/dma/bestcomm/bestcomm.c
 create mode 100644 drivers/dma/bestcomm/fec.c
 create mode 100644 drivers/dma/bestcomm/gen_bd.c
 create mode 100644 drivers/dma/bestcomm/sram.c
 create mode 100644 include/linux/fsl/bestcomm/ata.h
 create mode 100644 include/linux/fsl/bestcomm/bestcomm.h
 create mode 100644 include/linux/fsl/bestcomm/bestcomm_priv.h
 create mode 100644 include/linux/fsl/bestcomm/fec.h
 create mode 100644 include/linux/fsl/bestcomm/gen_bd.h
 create mode 100644 include/linux/fsl/bestcomm/sram.h

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index 16150fa430f9..93fa645904e6 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -20,9 +20,9 @@
 #include <asm/mpc52xx.h>
 #include <asm/time.h>
 
-#include <sysdev/bestcomm/bestcomm.h>
-#include <sysdev/bestcomm/bestcomm_priv.h>
-#include <sysdev/bestcomm/gen_bd.h>
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/gen_bd.h>
 
 MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
 MODULE_DESCRIPTION("MPC5200 LocalPlus FIFO device driver");
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 48a920d51489..52de8bccfb30 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -352,8 +352,6 @@ config OF_RTC
 	  Uses information from the OF or flattened device tree to instantiate
 	  platform devices for direct mapped RTC chips like the DS1742 or DS1743.
 
-source "arch/powerpc/sysdev/bestcomm/Kconfig"
-
 config SIMPLE_GPIO
 	bool "Support for simple, memory-mapped GPIO controllers"
 	depends on PPC
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index a57600b3a4e3..3884776600fd 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -26,7 +26,6 @@ obj-$(CONFIG_SIMPLE_GPIO)	+= simple_gpio.o
 obj-$(CONFIG_FSL_RIO)		+= fsl_rio.o fsl_rmu.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
 obj-$(CONFIG_QUICC_ENGINE)	+= qe_lib/
-obj-$(CONFIG_PPC_BESTCOMM)	+= bestcomm/
 mv64x60-$(CONFIG_PCI)		+= mv64x60_pci.o
 obj-$(CONFIG_MV64X60)		+= $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \
 				   mv64x60_udbg.o
diff --git a/arch/powerpc/sysdev/bestcomm/Kconfig b/arch/powerpc/sysdev/bestcomm/Kconfig
deleted file mode 100644
index 29e427085efb..000000000000
--- a/arch/powerpc/sysdev/bestcomm/Kconfig
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Kconfig options for Bestcomm
-#
-
-config PPC_BESTCOMM
-	tristate "Bestcomm DMA engine support"
-	depends on PPC_MPC52xx
-	default n
-	select PPC_LIB_RHEAP
-	help
-	  BestComm is the name of the communication coprocessor found
-	  on the Freescale MPC5200 family of processor.  Its usage is
-	  optional for some drivers (like ATA), but required for
-	  others (like FEC).
-
-	  If you want to use drivers that require DMA operations,
-	  answer Y or M. Otherwise say N.
-
-config PPC_BESTCOMM_ATA
-	tristate
-	depends on PPC_BESTCOMM
-	help
-	  This option enables the support for the ATA task.
-
-config PPC_BESTCOMM_FEC
-	tristate
-	depends on PPC_BESTCOMM
-	help
-	  This option enables the support for the FEC tasks.
-
-config PPC_BESTCOMM_GEN_BD
-	tristate
-	depends on PPC_BESTCOMM
-	help
-	  This option enables the support for the GenBD tasks.
-
diff --git a/arch/powerpc/sysdev/bestcomm/Makefile b/arch/powerpc/sysdev/bestcomm/Makefile
deleted file mode 100644
index aed2df2a6580..000000000000
--- a/arch/powerpc/sysdev/bestcomm/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-#
-# Makefile for BestComm & co
-#
-
-bestcomm-core-objs	:= bestcomm.o sram.o
-bestcomm-ata-objs	:= ata.o bcom_ata_task.o
-bestcomm-fec-objs	:= fec.o bcom_fec_rx_task.o bcom_fec_tx_task.o
-bestcomm-gen-bd-objs	:= gen_bd.o bcom_gen_bd_rx_task.o bcom_gen_bd_tx_task.o
-
-obj-$(CONFIG_PPC_BESTCOMM)		+= bestcomm-core.o
-obj-$(CONFIG_PPC_BESTCOMM_ATA)		+= bestcomm-ata.o
-obj-$(CONFIG_PPC_BESTCOMM_FEC)		+= bestcomm-fec.o
-obj-$(CONFIG_PPC_BESTCOMM_GEN_BD)	+= bestcomm-gen-bd.o
- 
diff --git a/arch/powerpc/sysdev/bestcomm/ata.c b/arch/powerpc/sysdev/bestcomm/ata.c
deleted file mode 100644
index 901c9f91e5dd..000000000000
--- a/arch/powerpc/sysdev/bestcomm/ata.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Bestcomm ATA task driver
- *
- *
- * Patterned after bestcomm/fec.c by Dale Farnsworth <dfarnsworth@mvista.com>
- *                                   2003-2004 (c) MontaVista, Software, Inc.
- *
- * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
- * Copyright (C) 2006      Freescale - John Rigby
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/io.h>
-
-#include "bestcomm.h"
-#include "bestcomm_priv.h"
-#include "ata.h"
-
-
-/* ======================================================================== */
-/* Task image/var/inc                                                       */
-/* ======================================================================== */
-
-/* ata task image */
-extern u32 bcom_ata_task[];
-
-/* ata task vars that need to be set before enabling the task */
-struct bcom_ata_var {
-	u32 enable;		/* (u16*) address of task's control register */
-	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
-	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
-	u32 bd_start;		/* (struct bcom_bd*) current bd */
-	u32 buffer_size;	/* size of receive buffer */
-};
-
-/* ata task incs that need to be set before enabling the task */
-struct bcom_ata_inc {
-	u16 pad0;
-	s16 incr_bytes;
-	u16 pad1;
-	s16 incr_dst;
-	u16 pad2;
-	s16 incr_src;
-};
-
-
-/* ======================================================================== */
-/* Task support code                                                        */
-/* ======================================================================== */
-
-struct bcom_task *
-bcom_ata_init(int queue_len, int maxbufsize)
-{
-	struct bcom_task *tsk;
-	struct bcom_ata_var *var;
-	struct bcom_ata_inc *inc;
-
-	/* Prefetch breaks ATA DMA.  Turn it off for ATA DMA */
-	bcom_disable_prefetch();
-
-	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_ata_bd), 0);
-	if (!tsk)
-		return NULL;
-
-	tsk->flags = BCOM_FLAGS_NONE;
-
-	bcom_ata_reset_bd(tsk);
-
-	var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum);
-	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
-
-	if (bcom_load_image(tsk->tasknum, bcom_ata_task)) {
-		bcom_task_free(tsk);
-		return NULL;
-	}
-
-	var->enable	= bcom_eng->regs_base +
-				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
-	var->bd_base	= tsk->bd_pa;
-	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
-	var->bd_start	= tsk->bd_pa;
-	var->buffer_size = maxbufsize;
-
-	/* Configure some stuff */
-	bcom_set_task_pragma(tsk->tasknum, BCOM_ATA_PRAGMA);
-	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
-
-	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_RX], BCOM_IPR_ATA_RX);
-	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_TX], BCOM_IPR_ATA_TX);
-
-	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum); /* Clear ints */
-
-	return tsk;
-}
-EXPORT_SYMBOL_GPL(bcom_ata_init);
-
-void bcom_ata_rx_prepare(struct bcom_task *tsk)
-{
-	struct bcom_ata_inc *inc;
-
-	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
-
-	inc->incr_bytes	= -(s16)sizeof(u32);
-	inc->incr_src	= 0;
-	inc->incr_dst	= sizeof(u32);
-
-	bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_RX);
-}
-EXPORT_SYMBOL_GPL(bcom_ata_rx_prepare);
-
-void bcom_ata_tx_prepare(struct bcom_task *tsk)
-{
-	struct bcom_ata_inc *inc;
-
-	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
-
-	inc->incr_bytes	= -(s16)sizeof(u32);
-	inc->incr_src	= sizeof(u32);
-	inc->incr_dst	= 0;
-
-	bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_TX);
-}
-EXPORT_SYMBOL_GPL(bcom_ata_tx_prepare);
-
-void bcom_ata_reset_bd(struct bcom_task *tsk)
-{
-	struct bcom_ata_var *var;
-
-	/* Reset all BD */
-	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
-
-	tsk->index = 0;
-	tsk->outdex = 0;
-
-	var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum);
-	var->bd_start = var->bd_base;
-}
-EXPORT_SYMBOL_GPL(bcom_ata_reset_bd);
-
-void bcom_ata_release(struct bcom_task *tsk)
-{
-	/* Nothing special for the ATA tasks */
-	bcom_task_free(tsk);
-}
-EXPORT_SYMBOL_GPL(bcom_ata_release);
-
-
-MODULE_DESCRIPTION("BestComm ATA task driver");
-MODULE_AUTHOR("John Rigby");
-MODULE_LICENSE("GPL v2");
-
diff --git a/arch/powerpc/sysdev/bestcomm/ata.h b/arch/powerpc/sysdev/bestcomm/ata.h
deleted file mode 100644
index 0b2371811334..000000000000
--- a/arch/powerpc/sysdev/bestcomm/ata.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Header for Bestcomm ATA task driver
- *
- *
- * Copyright (C) 2006 Freescale - John Rigby
- * Copyright (C) 2006 Sylvain Munaut <tnt@246tNt.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __BESTCOMM_ATA_H__
-#define __BESTCOMM_ATA_H__
-
-
-struct bcom_ata_bd {
-	u32	status;
-	u32	src_pa;
-	u32	dst_pa;
-};
-
-extern struct bcom_task * bcom_ata_init(int queue_len, int maxbufsize);
-extern void bcom_ata_rx_prepare(struct bcom_task *tsk);
-extern void bcom_ata_tx_prepare(struct bcom_task *tsk);
-extern void bcom_ata_reset_bd(struct bcom_task *tsk);
-extern void bcom_ata_release(struct bcom_task *tsk);
-
-#endif /* __BESTCOMM_ATA_H__ */
-
diff --git a/arch/powerpc/sysdev/bestcomm/bcom_ata_task.c b/arch/powerpc/sysdev/bestcomm/bcom_ata_task.c
deleted file mode 100644
index cc6049a4e469..000000000000
--- a/arch/powerpc/sysdev/bestcomm/bcom_ata_task.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Bestcomm ATA task microcode
- *
- * Copyright (c) 2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * Created based on bestcom/code_dma/image_rtos1/dma_image.hex
- */
-
-#include <asm/types.h>
-
-/*
- * The header consists of the following fields:
- *	u32	magic;
- *	u8	desc_size;
- *	u8	var_size;
- *	u8	inc_size;
- *	u8	first_var;
- *	u8	reserved[8];
- *
- * The size fields contain the number of 32-bit words.
- */
-
-u32 bcom_ata_task[] = {
-	/* header */
-	0x4243544b,
-	0x0e060709,
-	0x00000000,
-	0x00000000,
-
-	/* Task descriptors */
-	0x8198009b, /* LCD: idx0 = var3; idx0 <= var2; idx0 += inc3 */
-	0x13e00c08, /*   DRD1A: var3 = var1; FN=0 MORE init=31 WS=0 RS=0 */
-	0xb8000264, /*   LCD: idx1 = *idx0, idx2 = var0; idx1 < var9; idx1 += inc4, idx2 += inc4 */
-	0x10000f00, /*     DRD1A: var3 = idx0; FN=0 MORE init=0 WS=0 RS=0 */
-	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
-	0x0c8cfc8a, /*     DRD2B1: *idx2 = EU3(); EU3(*idx2,var10)  */
-	0xd8988240, /*   LCDEXT: idx1 = idx1; idx1 > var9; idx1 += inc0 */
-	0xf845e011, /*   LCDEXT: idx2 = *(idx0 + var00000015); ; idx2 += inc2 */
-	0xb845e00a, /*   LCD: idx3 = *(idx0 + var00000019); ; idx3 += inc1 */
-	0x0bfecf90, /*     DRD1A: *idx3 = *idx2; FN=0 TFD init=31 WS=3 RS=3 */
-	0x9898802d, /*   LCD: idx1 = idx1; idx1 once var0; idx1 += inc5 */
-	0x64000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 INT EXT init=0 WS=0 RS=0 */
-	0x0c0cf849, /*     DRD2B1: *idx0 = EU3(); EU3(idx1,var9)  */
-	0x000001f8, /* NOP */
-
-	/* VAR[9]-VAR[14] */
-	0x40000000,
-	0x7fff7fff,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-
-	/* INC[0]-INC[6] */
-	0x40000000,
-	0xe0000000,
-	0xe0000000,
-	0xa000000c,
-	0x20000000,
-	0x00000000,
-	0x00000000,
-};
-
diff --git a/arch/powerpc/sysdev/bestcomm/bcom_fec_rx_task.c b/arch/powerpc/sysdev/bestcomm/bcom_fec_rx_task.c
deleted file mode 100644
index a1ad6a02fcef..000000000000
--- a/arch/powerpc/sysdev/bestcomm/bcom_fec_rx_task.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Bestcomm FEC RX task microcode
- *
- * Copyright (c) 2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
- * on Tue Mar 22 11:19:38 2005 GMT
- */
-
-#include <asm/types.h>
-
-/*
- * The header consists of the following fields:
- *	u32	magic;
- *	u8	desc_size;
- *	u8	var_size;
- *	u8	inc_size;
- *	u8	first_var;
- *	u8	reserved[8];
- *
- * The size fields contain the number of 32-bit words.
- */
-
-u32 bcom_fec_rx_task[] = {
-	/* header */
-	0x4243544b,
-	0x18060709,
-	0x00000000,
-	0x00000000,
-
-	/* Task descriptors */
-	0x808220e3, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */
-	0x10601010, /*   DRD1A: var4 = var2; FN=0 MORE init=3 WS=0 RS=0 */
-	0xb8800264, /*   LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc4, idx3 += inc4 */
-	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
-	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
-	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
-	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
-	0xb8c58029, /*   LCD: idx3 = *(idx1 + var00000015); idx3 once var0; idx3 += inc5 */
-	0x60000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=0 RS=0 */
-	0x088cf8cc, /*     DRD2B1: idx2 = EU3(); EU3(idx3,var12)  */
-	0x991982f2, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var11; idx2 += inc6, idx3 += inc2 */
-	0x006acf80, /*     DRD1A: *idx3 = *idx0; FN=0 init=3 WS=1 RS=1 */
-	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
-	0x9999802d, /*   LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */
-	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
-	0x034cfc4e, /*     DRD2B1: var13 = EU3(); EU3(*idx1,var14)  */
-	0x00008868, /*     DRD1A: idx2 = var13; FN=0 init=0 WS=0 RS=0 */
-	0x99198341, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var13; idx2 += inc0, idx3 += inc1 */
-	0x007ecf80, /*     DRD1A: *idx3 = *idx0; FN=0 init=3 WS=3 RS=3 */
-	0x99198272, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc6, idx3 += inc2 */
-	0x046acf80, /*     DRD1A: *idx3 = *idx0; FN=0 INT init=3 WS=1 RS=1 */
-	0x9819002d, /*   LCD: idx2 = idx0; idx2 once var0; idx2 += inc5 */
-	0x0060c790, /*     DRD1A: *idx1 = *idx2; FN=0 init=3 WS=0 RS=0 */
-	0x000001f8, /*   NOP */
-
-	/* VAR[9]-VAR[14] */
-	0x40000000,
-	0x7fff7fff,
-	0x00000000,
-	0x00000003,
-	0x40000008,
-	0x43ffffff,
-
-	/* INC[0]-INC[6] */
-	0x40000000,
-	0xe0000000,
-	0xe0000000,
-	0xa0000008,
-	0x20000000,
-	0x00000000,
-	0x4000ffff,
-};
-
diff --git a/arch/powerpc/sysdev/bestcomm/bcom_fec_tx_task.c b/arch/powerpc/sysdev/bestcomm/bcom_fec_tx_task.c
deleted file mode 100644
index b1c495c3a65a..000000000000
--- a/arch/powerpc/sysdev/bestcomm/bcom_fec_tx_task.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Bestcomm FEC TX task microcode
- *
- * Copyright (c) 2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
- * on Tue Mar 22 11:19:29 2005 GMT
- */
-
-#include <asm/types.h>
-
-/*
- * The header consists of the following fields:
- *	u32	magic;
- *	u8	desc_size;
- *	u8	var_size;
- *	u8	inc_size;
- *	u8	first_var;
- *	u8	reserved[8];
- *
- * The size fields contain the number of 32-bit words.
- */
-
-u32 bcom_fec_tx_task[] = {
-	/* header */
-	0x4243544b,
-	0x2407070d,
-	0x00000000,
-	0x00000000,
-
-	/* Task descriptors */
-	0x8018001b, /* LCD: idx0 = var0; idx0 <= var0; idx0 += inc3 */
-	0x60000005, /*   DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
-	0x01ccfc0d, /*   DRD2B1: var7 = EU3(); EU3(*idx0,var13)  */
-	0x8082a123, /* LCD: idx0 = var1, idx1 = var5; idx1 <= var4; idx0 += inc4, idx1 += inc3 */
-	0x10801418, /*   DRD1A: var5 = var3; FN=0 MORE init=4 WS=0 RS=0 */
-	0xf88103a4, /*   LCDEXT: idx2 = *idx1, idx3 = var2; idx2 < var14; idx2 += inc4, idx3 += inc4 */
-	0x801a6024, /*   LCD: idx4 = var0; ; idx4 += inc4 */
-	0x10001708, /*     DRD1A: var5 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
-	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
-	0x0cccfccf, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var15)  */
-	0x991a002c, /*   LCD: idx2 = idx2, idx3 = idx4; idx2 once var0; idx2 += inc5, idx3 += inc4 */
-	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
-	0x024cfc4d, /*     DRD2B1: var9 = EU3(); EU3(*idx1,var13)  */
-	0x60000003, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=3 EXT init=0 WS=0 RS=0 */
-	0x0cccf247, /*     DRD2B1: *idx3 = EU3(); EU3(var9,var7)  */
-	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
-	0xb8c80029, /*   LCD: idx3 = *(idx1 + var0000001a); idx3 once var0; idx3 += inc5 */
-	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
-	0x088cf8d1, /*     DRD2B1: idx2 = EU3(); EU3(idx3,var17)  */
-	0x00002f10, /*     DRD1A: var11 = idx2; FN=0 init=0 WS=0 RS=0 */
-	0x99198432, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var16; idx2 += inc6, idx3 += inc2 */
-	0x008ac398, /*     DRD1A: *idx0 = *idx3; FN=0 init=4 WS=1 RS=1 */
-	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
-	0x9999802d, /*   LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */
-	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
-	0x048cfc53, /*     DRD2B1: var18 = EU3(); EU3(*idx1,var19)  */
-	0x60000008, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=8 EXT init=0 WS=0 RS=0 */
-	0x088cf48b, /*     DRD2B1: idx2 = EU3(); EU3(var18,var11)  */
-	0x99198481, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var18; idx2 += inc0, idx3 += inc1 */
-	0x009ec398, /*     DRD1A: *idx0 = *idx3; FN=0 init=4 WS=3 RS=3 */
-	0x991983b2, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var14; idx2 += inc6, idx3 += inc2 */
-	0x088ac398, /*     DRD1A: *idx0 = *idx3; FN=0 TFD init=4 WS=1 RS=1 */
-	0x9919002d, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc5 */
-	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
-	0x0c4cf88e, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var14)  */
-	0x000001f8, /*   NOP */
-
-	/* VAR[13]-VAR[19] */
-	0x0c000000,
-	0x40000000,
-	0x7fff7fff,
-	0x00000000,
-	0x00000003,
-	0x40000004,
-	0x43ffffff,
-
-	/* INC[0]-INC[6] */
-	0x40000000,
-	0xe0000000,
-	0xe0000000,
-	0xa0000008,
-	0x20000000,
-	0x00000000,
-	0x4000ffff,
-};
-
diff --git a/arch/powerpc/sysdev/bestcomm/bcom_gen_bd_rx_task.c b/arch/powerpc/sysdev/bestcomm/bcom_gen_bd_rx_task.c
deleted file mode 100644
index efee022b0256..000000000000
--- a/arch/powerpc/sysdev/bestcomm/bcom_gen_bd_rx_task.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Bestcomm GenBD RX task microcode
- *
- * Copyright (C) 2006 AppSpec Computer Technologies Corp.
- *                    Jeff Gibbons <jeff.gibbons@appspec.com>
- * Copyright (c) 2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
- * on Tue Mar 4 10:14:12 2006 GMT
- *
- */
-
-#include <asm/types.h>
-
-/*
- * The header consists of the following fields:
- *	u32	magic;
- *	u8	desc_size;
- *	u8	var_size;
- *	u8	inc_size;
- *	u8	first_var;
- *	u8	reserved[8];
- *
- * The size fields contain the number of 32-bit words.
- */
-
-u32 bcom_gen_bd_rx_task[] = {
-	/* header */
-	0x4243544b,
-	0x0d020409,
-	0x00000000,
-	0x00000000,
-
-	/* Task descriptors */
-	0x808220da, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc3, idx1 += inc2 */
-	0x13e01010, /*   DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */
-	0xb880025b, /*   LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc3, idx3 += inc3 */
-	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
-	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
-	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
-	0xd9190240, /*   LCDEXT: idx2 = idx2; idx2 > var9; idx2 += inc0 */
-	0xb8c5e009, /*   LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */
-	0x07fecf80, /*     DRD1A: *idx3 = *idx0; FN=0 INT init=31 WS=3 RS=3 */
-	0x99190024, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc4 */
-	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
-	0x0c4cf889, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var9)  */
-	0x000001f8, /*   NOP */
-
-	/* VAR[9]-VAR[10] */
-	0x40000000,
-	0x7fff7fff,
-
-	/* INC[0]-INC[3] */
-	0x40000000,
-	0xe0000000,
-	0xa0000008,
-	0x20000000,
-};
-
diff --git a/arch/powerpc/sysdev/bestcomm/bcom_gen_bd_tx_task.c b/arch/powerpc/sysdev/bestcomm/bcom_gen_bd_tx_task.c
deleted file mode 100644
index c605aa42ecbb..000000000000
--- a/arch/powerpc/sysdev/bestcomm/bcom_gen_bd_tx_task.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Bestcomm GenBD TX task microcode
- *
- * Copyright (C) 2006 AppSpec Computer Technologies Corp.
- *                    Jeff Gibbons <jeff.gibbons@appspec.com>
- * Copyright (c) 2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
- * on Tue Mar 4 10:14:12 2006 GMT
- *
- */
-
-#include <asm/types.h>
-
-/*
- * The header consists of the following fields:
- *	u32	magic;
- *	u8	desc_size;
- *	u8	var_size;
- *	u8	inc_size;
- *	u8	first_var;
- *	u8	reserved[8];
- *
- * The size fields contain the number of 32-bit words.
- */
-
-u32 bcom_gen_bd_tx_task[] = {
-	/* header */
-	0x4243544b,
-	0x0f040609,
-	0x00000000,
-	0x00000000,
-
-	/* Task descriptors */
-	0x800220e3, /* LCD: idx0 = var0, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */
-	0x13e01010, /*   DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */
-	0xb8808264, /*   LCD: idx2 = *idx1, idx3 = var1; idx2 < var9; idx2 += inc4, idx3 += inc4 */
-	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
-	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
-	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
-	0xd9190300, /*   LCDEXT: idx2 = idx2; idx2 > var12; idx2 += inc0 */
-	0xb8c5e009, /*   LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */
-	0x03fec398, /*     DRD1A: *idx0 = *idx3; FN=0 init=31 WS=3 RS=3 */
-	0x9919826a, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc5, idx3 += inc2 */
-	0x0feac398, /*     DRD1A: *idx0 = *idx3; FN=0 TFD INT init=31 WS=1 RS=1 */
-	0x99190036, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc6 */
-	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
-	0x0c4cf889, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var9)  */
-	0x000001f8, /*   NOP */
-
-	/* VAR[9]-VAR[12] */
-	0x40000000,
-	0x7fff7fff,
-	0x00000000,
-	0x40000004,
-
-	/* INC[0]-INC[5] */
-	0x40000000,
-	0xe0000000,
-	0xe0000000,
-	0xa0000008,
-	0x20000000,
-	0x4000ffff,
-};
-
diff --git a/arch/powerpc/sysdev/bestcomm/bestcomm.c b/arch/powerpc/sysdev/bestcomm/bestcomm.c
deleted file mode 100644
index b3fbb271be87..000000000000
--- a/arch/powerpc/sysdev/bestcomm/bestcomm.c
+++ /dev/null
@@ -1,532 +0,0 @@
-/*
- * Driver for MPC52xx processor BestComm peripheral controller
- *
- *
- * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
- * Copyright (C) 2005      Varma Electronics Oy,
- *                         ( by Andrey Volkov <avolkov@varma-el.com> )
- * Copyright (C) 2003-2004 MontaVista, Software, Inc.
- *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/mpc52xx.h>
-
-#include "sram.h"
-#include "bestcomm_priv.h"
-#include "bestcomm.h"
-
-#define DRIVER_NAME "bestcomm-core"
-
-/* MPC5200 device tree match tables */
-static struct of_device_id mpc52xx_sram_ids[] __devinitdata = {
-	{ .compatible = "fsl,mpc5200-sram", },
-	{ .compatible = "mpc5200-sram", },
-	{}
-};
-
-
-struct bcom_engine *bcom_eng = NULL;
-EXPORT_SYMBOL_GPL(bcom_eng);	/* needed for inline functions */
-
-/* ======================================================================== */
-/* Public and private API                                                   */
-/* ======================================================================== */
-
-/* Private API */
-
-struct bcom_task *
-bcom_task_alloc(int bd_count, int bd_size, int priv_size)
-{
-	int i, tasknum = -1;
-	struct bcom_task *tsk;
-
-	/* Don't try to do anything if bestcomm init failed */
-	if (!bcom_eng)
-		return NULL;
-
-	/* Get and reserve a task num */
-	spin_lock(&bcom_eng->lock);
-
-	for (i=0; i<BCOM_MAX_TASKS; i++)
-		if (!bcom_eng->tdt[i].stop) {	/* we use stop as a marker */
-			bcom_eng->tdt[i].stop = 0xfffffffful; /* dummy addr */
-			tasknum = i;
-			break;
-		}
-
-	spin_unlock(&bcom_eng->lock);
-
-	if (tasknum < 0)
-		return NULL;
-
-	/* Allocate our structure */
-	tsk = kzalloc(sizeof(struct bcom_task) + priv_size, GFP_KERNEL);
-	if (!tsk)
-		goto error;
-
-	tsk->tasknum = tasknum;
-	if (priv_size)
-		tsk->priv = (void*)tsk + sizeof(struct bcom_task);
-
-	/* Get IRQ of that task */
-	tsk->irq = irq_of_parse_and_map(bcom_eng->ofnode, tsk->tasknum);
-	if (tsk->irq == NO_IRQ)
-		goto error;
-
-	/* Init the BDs, if needed */
-	if (bd_count) {
-		tsk->cookie = kmalloc(sizeof(void*) * bd_count, GFP_KERNEL);
-		if (!tsk->cookie)
-			goto error;
-
-		tsk->bd = bcom_sram_alloc(bd_count * bd_size, 4, &tsk->bd_pa);
-		if (!tsk->bd)
-			goto error;
-		memset(tsk->bd, 0x00, bd_count * bd_size);
-
-		tsk->num_bd = bd_count;
-		tsk->bd_size = bd_size;
-	}
-
-	return tsk;
-
-error:
-	if (tsk) {
-		if (tsk->irq != NO_IRQ)
-			irq_dispose_mapping(tsk->irq);
-		bcom_sram_free(tsk->bd);
-		kfree(tsk->cookie);
-		kfree(tsk);
-	}
-
-	bcom_eng->tdt[tasknum].stop = 0;
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(bcom_task_alloc);
-
-void
-bcom_task_free(struct bcom_task *tsk)
-{
-	/* Stop the task */
-	bcom_disable_task(tsk->tasknum);
-
-	/* Clear TDT */
-	bcom_eng->tdt[tsk->tasknum].start = 0;
-	bcom_eng->tdt[tsk->tasknum].stop  = 0;
-
-	/* Free everything */
-	irq_dispose_mapping(tsk->irq);
-	bcom_sram_free(tsk->bd);
-	kfree(tsk->cookie);
-	kfree(tsk);
-}
-EXPORT_SYMBOL_GPL(bcom_task_free);
-
-int
-bcom_load_image(int task, u32 *task_image)
-{
-	struct bcom_task_header *hdr = (struct bcom_task_header *)task_image;
-	struct bcom_tdt *tdt;
-	u32 *desc, *var, *inc;
-	u32 *desc_src, *var_src, *inc_src;
-
-	/* Safety checks */
-	if (hdr->magic != BCOM_TASK_MAGIC) {
-		printk(KERN_ERR DRIVER_NAME
-			": Trying to load invalid microcode\n");
-		return -EINVAL;
-	}
-
-	if ((task < 0) || (task >= BCOM_MAX_TASKS)) {
-		printk(KERN_ERR DRIVER_NAME
-			": Trying to load invalid task %d\n", task);
-		return -EINVAL;
-	}
-
-	/* Initial load or reload */
-	tdt = &bcom_eng->tdt[task];
-
-	if (tdt->start) {
-		desc = bcom_task_desc(task);
-		if (hdr->desc_size != bcom_task_num_descs(task)) {
-			printk(KERN_ERR DRIVER_NAME
-				": Trying to reload wrong task image "
-				"(%d size %d/%d)!\n",
-				task,
-				hdr->desc_size,
-				bcom_task_num_descs(task));
-			return -EINVAL;
-		}
-	} else {
-		phys_addr_t start_pa;
-
-		desc = bcom_sram_alloc(hdr->desc_size * sizeof(u32), 4, &start_pa);
-		if (!desc)
-			return -ENOMEM;
-
-		tdt->start = start_pa;
-		tdt->stop = start_pa + ((hdr->desc_size-1) * sizeof(u32));
-	}
-
-	var = bcom_task_var(task);
-	inc = bcom_task_inc(task);
-
-	/* Clear & copy */
-	memset(var, 0x00, BCOM_VAR_SIZE);
-	memset(inc, 0x00, BCOM_INC_SIZE);
-
-	desc_src = (u32 *)(hdr + 1);
-	var_src = desc_src + hdr->desc_size;
-	inc_src = var_src + hdr->var_size;
-
-	memcpy(desc, desc_src, hdr->desc_size * sizeof(u32));
-	memcpy(var + hdr->first_var, var_src, hdr->var_size * sizeof(u32));
-	memcpy(inc, inc_src, hdr->inc_size * sizeof(u32));
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bcom_load_image);
-
-void
-bcom_set_initiator(int task, int initiator)
-{
-	int i;
-	int num_descs;
-	u32 *desc;
-	int next_drd_has_initiator;
-
-	bcom_set_tcr_initiator(task, initiator);
-
-	/* Just setting tcr is apparently not enough due to some problem */
-	/* with it. So we just go thru all the microcode and replace in  */
-	/* the DRD directly */
-
-	desc = bcom_task_desc(task);
-	next_drd_has_initiator = 1;
-	num_descs = bcom_task_num_descs(task);
-
-	for (i=0; i<num_descs; i++, desc++) {
-		if (!bcom_desc_is_drd(*desc))
-			continue;
-		if (next_drd_has_initiator)
-			if (bcom_desc_initiator(*desc) != BCOM_INITIATOR_ALWAYS)
-				bcom_set_desc_initiator(desc, initiator);
-		next_drd_has_initiator = !bcom_drd_is_extended(*desc);
-	}
-}
-EXPORT_SYMBOL_GPL(bcom_set_initiator);
-
-
-/* Public API */
-
-void
-bcom_enable(struct bcom_task *tsk)
-{
-	bcom_enable_task(tsk->tasknum);
-}
-EXPORT_SYMBOL_GPL(bcom_enable);
-
-void
-bcom_disable(struct bcom_task *tsk)
-{
-	bcom_disable_task(tsk->tasknum);
-}
-EXPORT_SYMBOL_GPL(bcom_disable);
-
-
-/* ======================================================================== */
-/* Engine init/cleanup                                                      */
-/* ======================================================================== */
-
-/* Function Descriptor table */
-/* this will need to be updated if Freescale changes their task code FDT */
-static u32 fdt_ops[] = {
-	0xa0045670,	/* FDT[48] - load_acc()	  */
-	0x80045670,	/* FDT[49] - unload_acc() */
-	0x21800000,	/* FDT[50] - and()        */
-	0x21e00000,	/* FDT[51] - or()         */
-	0x21500000,	/* FDT[52] - xor()        */
-	0x21400000,	/* FDT[53] - andn()       */
-	0x21500000,	/* FDT[54] - not()        */
-	0x20400000,	/* FDT[55] - add()        */
-	0x20500000,	/* FDT[56] - sub()        */
-	0x20800000,	/* FDT[57] - lsh()        */
-	0x20a00000,	/* FDT[58] - rsh()        */
-	0xc0170000,	/* FDT[59] - crc8()       */
-	0xc0145670,	/* FDT[60] - crc16()      */
-	0xc0345670,	/* FDT[61] - crc32()      */
-	0xa0076540,	/* FDT[62] - endian32()   */
-	0xa0000760,	/* FDT[63] - endian16()   */
-};
-
-
-static int __devinit
-bcom_engine_init(void)
-{
-	int task;
-	phys_addr_t tdt_pa, ctx_pa, var_pa, fdt_pa;
-	unsigned int tdt_size, ctx_size, var_size, fdt_size;
-
-	/* Allocate & clear SRAM zones for FDT, TDTs, contexts and vars/incs */
-	tdt_size = BCOM_MAX_TASKS * sizeof(struct bcom_tdt);
-	ctx_size = BCOM_MAX_TASKS * BCOM_CTX_SIZE;
-	var_size = BCOM_MAX_TASKS * (BCOM_VAR_SIZE + BCOM_INC_SIZE);
-	fdt_size = BCOM_FDT_SIZE;
-
-	bcom_eng->tdt = bcom_sram_alloc(tdt_size, sizeof(u32), &tdt_pa);
-	bcom_eng->ctx = bcom_sram_alloc(ctx_size, BCOM_CTX_ALIGN, &ctx_pa);
-	bcom_eng->var = bcom_sram_alloc(var_size, BCOM_VAR_ALIGN, &var_pa);
-	bcom_eng->fdt = bcom_sram_alloc(fdt_size, BCOM_FDT_ALIGN, &fdt_pa);
-
-	if (!bcom_eng->tdt || !bcom_eng->ctx || !bcom_eng->var || !bcom_eng->fdt) {
-		printk(KERN_ERR "DMA: SRAM alloc failed in engine init !\n");
-
-		bcom_sram_free(bcom_eng->tdt);
-		bcom_sram_free(bcom_eng->ctx);
-		bcom_sram_free(bcom_eng->var);
-		bcom_sram_free(bcom_eng->fdt);
-
-		return -ENOMEM;
-	}
-
-	memset(bcom_eng->tdt, 0x00, tdt_size);
-	memset(bcom_eng->ctx, 0x00, ctx_size);
-	memset(bcom_eng->var, 0x00, var_size);
-	memset(bcom_eng->fdt, 0x00, fdt_size);
-
-	/* Copy the FDT for the EU#3 */
-	memcpy(&bcom_eng->fdt[48], fdt_ops, sizeof(fdt_ops));
-
-	/* Initialize Task base structure */
-	for (task=0; task<BCOM_MAX_TASKS; task++)
-	{
-		out_be16(&bcom_eng->regs->tcr[task], 0);
-		out_8(&bcom_eng->regs->ipr[task], 0);
-
-		bcom_eng->tdt[task].context	= ctx_pa;
-		bcom_eng->tdt[task].var	= var_pa;
-		bcom_eng->tdt[task].fdt	= fdt_pa;
-
-		var_pa += BCOM_VAR_SIZE + BCOM_INC_SIZE;
-		ctx_pa += BCOM_CTX_SIZE;
-	}
-
-	out_be32(&bcom_eng->regs->taskBar, tdt_pa);
-
-	/* Init 'always' initiator */
-	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ALWAYS], BCOM_IPR_ALWAYS);
-
-	/* Disable COMM Bus Prefetch on the original 5200; it's broken */
-	if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR)
-		bcom_disable_prefetch();
-
-	/* Init lock */
-	spin_lock_init(&bcom_eng->lock);
-
-	return 0;
-}
-
-static void
-bcom_engine_cleanup(void)
-{
-	int task;
-
-	/* Stop all tasks */
-	for (task=0; task<BCOM_MAX_TASKS; task++)
-	{
-		out_be16(&bcom_eng->regs->tcr[task], 0);
-		out_8(&bcom_eng->regs->ipr[task], 0);
-	}
-
-	out_be32(&bcom_eng->regs->taskBar, 0ul);
-
-	/* Release the SRAM zones */
-	bcom_sram_free(bcom_eng->tdt);
-	bcom_sram_free(bcom_eng->ctx);
-	bcom_sram_free(bcom_eng->var);
-	bcom_sram_free(bcom_eng->fdt);
-}
-
-
-/* ======================================================================== */
-/* OF platform driver                                                       */
-/* ======================================================================== */
-
-static int __devinit mpc52xx_bcom_probe(struct platform_device *op)
-{
-	struct device_node *ofn_sram;
-	struct resource res_bcom;
-
-	int rv;
-
-	/* Inform user we're ok so far */
-	printk(KERN_INFO "DMA: MPC52xx BestComm driver\n");
-
-	/* Get the bestcomm node */
-	of_node_get(op->dev.of_node);
-
-	/* Prepare SRAM */
-	ofn_sram = of_find_matching_node(NULL, mpc52xx_sram_ids);
-	if (!ofn_sram) {
-		printk(KERN_ERR DRIVER_NAME ": "
-			"No SRAM found in device tree\n");
-		rv = -ENODEV;
-		goto error_ofput;
-	}
-	rv = bcom_sram_init(ofn_sram, DRIVER_NAME);
-	of_node_put(ofn_sram);
-
-	if (rv) {
-		printk(KERN_ERR DRIVER_NAME ": "
-			"Error in SRAM init\n");
-		goto error_ofput;
-	}
-
-	/* Get a clean struct */
-	bcom_eng = kzalloc(sizeof(struct bcom_engine), GFP_KERNEL);
-	if (!bcom_eng) {
-		printk(KERN_ERR DRIVER_NAME ": "
-			"Can't allocate state structure\n");
-		rv = -ENOMEM;
-		goto error_sramclean;
-	}
-
-	/* Save the node */
-	bcom_eng->ofnode = op->dev.of_node;
-
-	/* Get, reserve & map io */
-	if (of_address_to_resource(op->dev.of_node, 0, &res_bcom)) {
-		printk(KERN_ERR DRIVER_NAME ": "
-			"Can't get resource\n");
-		rv = -EINVAL;
-		goto error_sramclean;
-	}
-
-	if (!request_mem_region(res_bcom.start, sizeof(struct mpc52xx_sdma),
-				DRIVER_NAME)) {
-		printk(KERN_ERR DRIVER_NAME ": "
-			"Can't request registers region\n");
-		rv = -EBUSY;
-		goto error_sramclean;
-	}
-
-	bcom_eng->regs_base = res_bcom.start;
-	bcom_eng->regs = ioremap(res_bcom.start, sizeof(struct mpc52xx_sdma));
-	if (!bcom_eng->regs) {
-		printk(KERN_ERR DRIVER_NAME ": "
-			"Can't map registers\n");
-		rv = -ENOMEM;
-		goto error_release;
-	}
-
-	/* Now, do the real init */
-	rv = bcom_engine_init();
-	if (rv)
-		goto error_unmap;
-
-	/* Done ! */
-	printk(KERN_INFO "DMA: MPC52xx BestComm engine @%08lx ok !\n",
-		(long)bcom_eng->regs_base);
-
-	return 0;
-
-	/* Error path */
-error_unmap:
-	iounmap(bcom_eng->regs);
-error_release:
-	release_mem_region(res_bcom.start, sizeof(struct mpc52xx_sdma));
-error_sramclean:
-	kfree(bcom_eng);
-	bcom_sram_cleanup();
-error_ofput:
-	of_node_put(op->dev.of_node);
-
-	printk(KERN_ERR "DMA: MPC52xx BestComm init failed !\n");
-
-	return rv;
-}
-
-
-static int mpc52xx_bcom_remove(struct platform_device *op)
-{
-	/* Clean up the engine */
-	bcom_engine_cleanup();
-
-	/* Cleanup SRAM */
-	bcom_sram_cleanup();
-
-	/* Release regs */
-	iounmap(bcom_eng->regs);
-	release_mem_region(bcom_eng->regs_base, sizeof(struct mpc52xx_sdma));
-
-	/* Release the node */
-	of_node_put(bcom_eng->ofnode);
-
-	/* Release memory */
-	kfree(bcom_eng);
-	bcom_eng = NULL;
-
-	return 0;
-}
-
-static struct of_device_id mpc52xx_bcom_of_match[] = {
-	{ .compatible = "fsl,mpc5200-bestcomm", },
-	{ .compatible = "mpc5200-bestcomm", },
-	{},
-};
-
-MODULE_DEVICE_TABLE(of, mpc52xx_bcom_of_match);
-
-
-static struct platform_driver mpc52xx_bcom_of_platform_driver = {
-	.probe		= mpc52xx_bcom_probe,
-	.remove		= mpc52xx_bcom_remove,
-	.driver = {
-		.name = DRIVER_NAME,
-		.owner = THIS_MODULE,
-		.of_match_table = mpc52xx_bcom_of_match,
-	},
-};
-
-
-/* ======================================================================== */
-/* Module                                                                   */
-/* ======================================================================== */
-
-static int __init
-mpc52xx_bcom_init(void)
-{
-	return platform_driver_register(&mpc52xx_bcom_of_platform_driver);
-}
-
-static void __exit
-mpc52xx_bcom_exit(void)
-{
-	platform_driver_unregister(&mpc52xx_bcom_of_platform_driver);
-}
-
-/* If we're not a module, we must make sure everything is setup before  */
-/* anyone tries to use us ... that's why we use subsys_initcall instead */
-/* of module_init. */
-subsys_initcall(mpc52xx_bcom_init);
-module_exit(mpc52xx_bcom_exit);
-
-MODULE_DESCRIPTION("Freescale MPC52xx BestComm DMA");
-MODULE_AUTHOR("Sylvain Munaut <tnt@246tNt.com>");
-MODULE_AUTHOR("Andrey Volkov <avolkov@varma-el.com>");
-MODULE_AUTHOR("Dale Farnsworth <dfarnsworth@mvista.com>");
-MODULE_LICENSE("GPL v2");
-
diff --git a/arch/powerpc/sysdev/bestcomm/bestcomm.h b/arch/powerpc/sysdev/bestcomm/bestcomm.h
deleted file mode 100644
index a0e2e6b19b57..000000000000
--- a/arch/powerpc/sysdev/bestcomm/bestcomm.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Public header for the MPC52xx processor BestComm driver
- *
- *
- * Copyright (C) 2006      Sylvain Munaut <tnt@246tNt.com>
- * Copyright (C) 2005      Varma Electronics Oy,
- *                         ( by Andrey Volkov <avolkov@varma-el.com> )
- * Copyright (C) 2003-2004 MontaVista, Software, Inc.
- *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __BESTCOMM_H__
-#define __BESTCOMM_H__
-
-/**
- * struct bcom_bd - Structure describing a generic BestComm buffer descriptor
- * @status: The current status of this buffer. Exact meaning depends on the
- *          task type
- * @data: An array of u32 extra data.  Size of array is task dependent.
- *
- * Note: Don't dereference a bcom_bd pointer as an array.  The size of the
- *       bcom_bd is variable.  Use bcom_get_bd() instead.
- */
-struct bcom_bd {
-	u32	status;
-	u32	data[0];	/* variable payload size */
-};
-
-/* ======================================================================== */
-/* Generic task management                                                   */
-/* ======================================================================== */
-
-/**
- * struct bcom_task - Structure describing a loaded BestComm task
- *
- * This structure is never built by the driver it self. It's built and
- * filled the intermediate layer of the BestComm API, the task dependent
- * support code.
- *
- * Most likely you don't need to poke around inside this structure. The
- * fields are exposed in the header just for the sake of inline functions
- */
-struct bcom_task {
-	unsigned int	tasknum;
-	unsigned int	flags;
-	int		irq;
-
-	struct bcom_bd	*bd;
-	phys_addr_t	bd_pa;
-	void		**cookie;
-	unsigned short	index;
-	unsigned short	outdex;
-	unsigned int	num_bd;
-	unsigned int	bd_size;
-
-	void*		priv;
-};
-
-#define BCOM_FLAGS_NONE         0x00000000ul
-#define BCOM_FLAGS_ENABLE_TASK  (1ul <<  0)
-
-/**
- * bcom_enable - Enable a BestComm task
- * @tsk: The BestComm task structure
- *
- * This function makes sure the given task is enabled and can be run
- * by the BestComm engine as needed
- */
-extern void bcom_enable(struct bcom_task *tsk);
-
-/**
- * bcom_disable - Disable a BestComm task
- * @tsk: The BestComm task structure
- *
- * This function disable a given task, making sure it's not executed
- * by the BestComm engine.
- */
-extern void bcom_disable(struct bcom_task *tsk);
-
-
-/**
- * bcom_get_task_irq - Returns the irq number of a BestComm task
- * @tsk: The BestComm task structure
- */
-static inline int
-bcom_get_task_irq(struct bcom_task *tsk) {
-	return tsk->irq;
-}
-
-/* ======================================================================== */
-/* BD based tasks helpers                                                   */
-/* ======================================================================== */
-
-#define BCOM_BD_READY	0x40000000ul
-
-/** _bcom_next_index - Get next input index.
- * @tsk: pointer to task structure
- *
- * Support function; Device drivers should not call this
- */
-static inline int
-_bcom_next_index(struct bcom_task *tsk)
-{
-	return ((tsk->index + 1) == tsk->num_bd) ? 0 : tsk->index + 1;
-}
-
-/** _bcom_next_outdex - Get next output index.
- * @tsk: pointer to task structure
- *
- * Support function; Device drivers should not call this
- */
-static inline int
-_bcom_next_outdex(struct bcom_task *tsk)
-{
-	return ((tsk->outdex + 1) == tsk->num_bd) ? 0 : tsk->outdex + 1;
-}
-
-/**
- * bcom_queue_empty - Checks if a BestComm task BD queue is empty
- * @tsk: The BestComm task structure
- */
-static inline int
-bcom_queue_empty(struct bcom_task *tsk)
-{
-	return tsk->index == tsk->outdex;
-}
-
-/**
- * bcom_queue_full - Checks if a BestComm task BD queue is full
- * @tsk: The BestComm task structure
- */
-static inline int
-bcom_queue_full(struct bcom_task *tsk)
-{
-	return tsk->outdex == _bcom_next_index(tsk);
-}
-
-/**
- * bcom_get_bd - Get a BD from the queue
- * @tsk: The BestComm task structure
- * index: Index of the BD to fetch
- */
-static inline struct bcom_bd
-*bcom_get_bd(struct bcom_task *tsk, unsigned int index)
-{
-	/* A cast to (void*) so the address can be incremented by the
-	 * real size instead of by sizeof(struct bcom_bd) */
-	return ((void *)tsk->bd) + (index * tsk->bd_size);
-}
-
-/**
- * bcom_buffer_done - Checks if a BestComm 
- * @tsk: The BestComm task structure
- */
-static inline int
-bcom_buffer_done(struct bcom_task *tsk)
-{
-	struct bcom_bd *bd;
-	if (bcom_queue_empty(tsk))
-		return 0;
-
-	bd = bcom_get_bd(tsk, tsk->outdex);
-	return !(bd->status & BCOM_BD_READY);
-}
-
-/**
- * bcom_prepare_next_buffer - clear status of next available buffer.
- * @tsk: The BestComm task structure
- *
- * Returns pointer to next buffer descriptor
- */
-static inline struct bcom_bd *
-bcom_prepare_next_buffer(struct bcom_task *tsk)
-{
-	struct bcom_bd *bd;
-
-	bd = bcom_get_bd(tsk, tsk->index);
-	bd->status = 0;	/* cleanup last status */
-	return bd;
-}
-
-static inline void
-bcom_submit_next_buffer(struct bcom_task *tsk, void *cookie)
-{
-	struct bcom_bd *bd = bcom_get_bd(tsk, tsk->index);
-
-	tsk->cookie[tsk->index] = cookie;
-	mb();	/* ensure the bd is really up-to-date */
-	bd->status |= BCOM_BD_READY;
-	tsk->index = _bcom_next_index(tsk);
-	if (tsk->flags & BCOM_FLAGS_ENABLE_TASK)
-		bcom_enable(tsk);
-}
-
-static inline void *
-bcom_retrieve_buffer(struct bcom_task *tsk, u32 *p_status, struct bcom_bd **p_bd)
-{
-	void *cookie = tsk->cookie[tsk->outdex];
-	struct bcom_bd *bd = bcom_get_bd(tsk, tsk->outdex);
-
-	if (p_status)
-		*p_status = bd->status;
-	if (p_bd)
-		*p_bd = bd;
-	tsk->outdex = _bcom_next_outdex(tsk);
-	return cookie;
-}
-
-#endif /* __BESTCOMM_H__ */
diff --git a/arch/powerpc/sysdev/bestcomm/bestcomm_priv.h b/arch/powerpc/sysdev/bestcomm/bestcomm_priv.h
deleted file mode 100644
index 3b52f3ffbdf8..000000000000
--- a/arch/powerpc/sysdev/bestcomm/bestcomm_priv.h
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * Private header for the MPC52xx processor BestComm driver
- *
- * By private, we mean that driver should not use it directly. It's meant
- * to be used by the BestComm engine driver itself and by the intermediate
- * layer between the core and the drivers.
- *
- * Copyright (C) 2006      Sylvain Munaut <tnt@246tNt.com>
- * Copyright (C) 2005      Varma Electronics Oy,
- *                         ( by Andrey Volkov <avolkov@varma-el.com> )
- * Copyright (C) 2003-2004 MontaVista, Software, Inc.
- *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __BESTCOMM_PRIV_H__
-#define __BESTCOMM_PRIV_H__
-
-#include <linux/spinlock.h>
-#include <linux/of.h>
-#include <asm/io.h>
-#include <asm/mpc52xx.h>
-
-#include "sram.h"
-
-
-/* ======================================================================== */
-/* Engine related stuff                                                     */
-/* ======================================================================== */
-
-/* Zones sizes and needed alignments */
-#define BCOM_MAX_TASKS		16
-#define BCOM_MAX_VAR		24
-#define BCOM_MAX_INC		8
-#define BCOM_MAX_FDT		64
-#define BCOM_MAX_CTX		20
-#define BCOM_CTX_SIZE		(BCOM_MAX_CTX * sizeof(u32))
-#define BCOM_CTX_ALIGN		0x100
-#define BCOM_VAR_SIZE		(BCOM_MAX_VAR * sizeof(u32))
-#define BCOM_INC_SIZE		(BCOM_MAX_INC * sizeof(u32))
-#define BCOM_VAR_ALIGN		0x80
-#define BCOM_FDT_SIZE		(BCOM_MAX_FDT * sizeof(u32))
-#define BCOM_FDT_ALIGN		0x100
-
-/**
- * struct bcom_tdt - Task Descriptor Table Entry
- *
- */
-struct bcom_tdt {
-	u32 start;
-	u32 stop;
-	u32 var;
-	u32 fdt;
-	u32 exec_status;	/* used internally by BestComm engine */
-	u32 mvtp;		/* used internally by BestComm engine */
-	u32 context;
-	u32 litbase;
-};
-
-/**
- * struct bcom_engine
- *
- * This holds all info needed globaly to handle the engine
- */
-struct bcom_engine {
-	struct device_node		*ofnode;
-	struct mpc52xx_sdma __iomem     *regs;
-	phys_addr_t                      regs_base;
-
-	struct bcom_tdt			*tdt;
-	u32				*ctx;
-	u32				*var;
-	u32				*fdt;
-
-	spinlock_t			lock;
-};
-
-extern struct bcom_engine *bcom_eng;
-
-
-/* ======================================================================== */
-/* Tasks related stuff                                                      */
-/* ======================================================================== */
-
-/* Tasks image header */
-#define BCOM_TASK_MAGIC		0x4243544B	/* 'BCTK' */
-
-struct bcom_task_header {
-	u32	magic;
-	u8	desc_size;	/* the size fields     */
-	u8	var_size;	/* are given in number */
-	u8	inc_size;	/* of 32-bits words    */
-	u8	first_var;
-	u8	reserved[8];
-};
-
-/* Descriptors structure & co */
-#define BCOM_DESC_NOP		0x000001f8
-#define BCOM_LCD_MASK		0x80000000
-#define BCOM_DRD_EXTENDED	0x40000000
-#define BCOM_DRD_INITIATOR_SHIFT	21
-
-/* Tasks pragma */
-#define BCOM_PRAGMA_BIT_RSV		7	/* reserved pragma bit */
-#define BCOM_PRAGMA_BIT_PRECISE_INC	6	/* increment 0=when possible, */
-						/*           1=iter end */
-#define BCOM_PRAGMA_BIT_RST_ERROR_NO	5	/* don't reset errors on */
-						/* task enable */
-#define BCOM_PRAGMA_BIT_PACK		4	/* pack data enable */
-#define BCOM_PRAGMA_BIT_INTEGER		3	/* data alignment */
-						/* 0=frac(msb), 1=int(lsb) */
-#define BCOM_PRAGMA_BIT_SPECREAD	2	/* XLB speculative read */
-#define BCOM_PRAGMA_BIT_CW		1	/* write line buffer enable */
-#define BCOM_PRAGMA_BIT_RL		0	/* read line buffer enable */
-
-	/* Looks like XLB speculative read generates XLB errors when a buffer
-	 * is at the end of the physical memory. i.e. when accessing the
-	 * lasts words, the engine tries to prefetch the next but there is no
-	 * next ...
-	 */
-#define BCOM_STD_PRAGMA		((0 << BCOM_PRAGMA_BIT_RSV)		| \
-				 (0 << BCOM_PRAGMA_BIT_PRECISE_INC)	| \
-				 (0 << BCOM_PRAGMA_BIT_RST_ERROR_NO)	| \
-				 (0 << BCOM_PRAGMA_BIT_PACK)		| \
-				 (0 << BCOM_PRAGMA_BIT_INTEGER)		| \
-				 (0 << BCOM_PRAGMA_BIT_SPECREAD)	| \
-				 (1 << BCOM_PRAGMA_BIT_CW)		| \
-				 (1 << BCOM_PRAGMA_BIT_RL))
-
-#define BCOM_PCI_PRAGMA		((0 << BCOM_PRAGMA_BIT_RSV)		| \
-				 (0 << BCOM_PRAGMA_BIT_PRECISE_INC)	| \
-				 (0 << BCOM_PRAGMA_BIT_RST_ERROR_NO)	| \
-				 (0 << BCOM_PRAGMA_BIT_PACK)		| \
-				 (1 << BCOM_PRAGMA_BIT_INTEGER)		| \
-				 (0 << BCOM_PRAGMA_BIT_SPECREAD)	| \
-				 (1 << BCOM_PRAGMA_BIT_CW)		| \
-				 (1 << BCOM_PRAGMA_BIT_RL))
-
-#define BCOM_ATA_PRAGMA		BCOM_STD_PRAGMA
-#define BCOM_CRC16_DP_0_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_CRC16_DP_1_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_FEC_RX_BD_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_FEC_TX_BD_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_DP_0_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_DP_1_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_DP_2_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_DP_3_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_DP_BD_0_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_DP_BD_1_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_RX_BD_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_TX_BD_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_GEN_LPC_PRAGMA	BCOM_STD_PRAGMA
-#define BCOM_PCI_RX_PRAGMA	BCOM_PCI_PRAGMA
-#define BCOM_PCI_TX_PRAGMA	BCOM_PCI_PRAGMA
-
-/* Initiators number */
-#define BCOM_INITIATOR_ALWAYS	 0
-#define BCOM_INITIATOR_SCTMR_0	 1
-#define BCOM_INITIATOR_SCTMR_1	 2
-#define BCOM_INITIATOR_FEC_RX	 3
-#define BCOM_INITIATOR_FEC_TX	 4
-#define BCOM_INITIATOR_ATA_RX	 5
-#define BCOM_INITIATOR_ATA_TX	 6
-#define BCOM_INITIATOR_SCPCI_RX	 7
-#define BCOM_INITIATOR_SCPCI_TX	 8
-#define BCOM_INITIATOR_PSC3_RX	 9
-#define BCOM_INITIATOR_PSC3_TX	10
-#define BCOM_INITIATOR_PSC2_RX	11
-#define BCOM_INITIATOR_PSC2_TX	12
-#define BCOM_INITIATOR_PSC1_RX	13
-#define BCOM_INITIATOR_PSC1_TX	14
-#define BCOM_INITIATOR_SCTMR_2	15
-#define BCOM_INITIATOR_SCLPC	16
-#define BCOM_INITIATOR_PSC5_RX	17
-#define BCOM_INITIATOR_PSC5_TX	18
-#define BCOM_INITIATOR_PSC4_RX	19
-#define BCOM_INITIATOR_PSC4_TX	20
-#define BCOM_INITIATOR_I2C2_RX	21
-#define BCOM_INITIATOR_I2C2_TX	22
-#define BCOM_INITIATOR_I2C1_RX	23
-#define BCOM_INITIATOR_I2C1_TX	24
-#define BCOM_INITIATOR_PSC6_RX	25
-#define BCOM_INITIATOR_PSC6_TX	26
-#define BCOM_INITIATOR_IRDA_RX	25
-#define BCOM_INITIATOR_IRDA_TX	26
-#define BCOM_INITIATOR_SCTMR_3	27
-#define BCOM_INITIATOR_SCTMR_4	28
-#define BCOM_INITIATOR_SCTMR_5	29
-#define BCOM_INITIATOR_SCTMR_6	30
-#define BCOM_INITIATOR_SCTMR_7	31
-
-/* Initiators priorities */
-#define BCOM_IPR_ALWAYS		7
-#define BCOM_IPR_SCTMR_0	2
-#define BCOM_IPR_SCTMR_1	2
-#define BCOM_IPR_FEC_RX		6
-#define BCOM_IPR_FEC_TX		5
-#define BCOM_IPR_ATA_RX		7
-#define BCOM_IPR_ATA_TX		7
-#define BCOM_IPR_SCPCI_RX	2
-#define BCOM_IPR_SCPCI_TX	2
-#define BCOM_IPR_PSC3_RX	2
-#define BCOM_IPR_PSC3_TX	2
-#define BCOM_IPR_PSC2_RX	2
-#define BCOM_IPR_PSC2_TX	2
-#define BCOM_IPR_PSC1_RX	2
-#define BCOM_IPR_PSC1_TX	2
-#define BCOM_IPR_SCTMR_2	2
-#define BCOM_IPR_SCLPC		2
-#define BCOM_IPR_PSC5_RX	2
-#define BCOM_IPR_PSC5_TX	2
-#define BCOM_IPR_PSC4_RX	2
-#define BCOM_IPR_PSC4_TX	2
-#define BCOM_IPR_I2C2_RX	2
-#define BCOM_IPR_I2C2_TX	2
-#define BCOM_IPR_I2C1_RX	2
-#define BCOM_IPR_I2C1_TX	2
-#define BCOM_IPR_PSC6_RX	2
-#define BCOM_IPR_PSC6_TX	2
-#define BCOM_IPR_IRDA_RX	2
-#define BCOM_IPR_IRDA_TX	2
-#define BCOM_IPR_SCTMR_3	2
-#define BCOM_IPR_SCTMR_4	2
-#define BCOM_IPR_SCTMR_5	2
-#define BCOM_IPR_SCTMR_6	2
-#define BCOM_IPR_SCTMR_7	2
-
-
-/* ======================================================================== */
-/* API                                                                      */
-/* ======================================================================== */
-
-extern struct bcom_task *bcom_task_alloc(int bd_count, int bd_size, int priv_size);
-extern void bcom_task_free(struct bcom_task *tsk);
-extern int bcom_load_image(int task, u32 *task_image);
-extern void bcom_set_initiator(int task, int initiator);
-
-
-#define TASK_ENABLE             0x8000
-
-/**
- * bcom_disable_prefetch - Hook to disable bus prefetching
- *
- * ATA DMA and the original MPC5200 need this due to silicon bugs.  At the
- * moment disabling prefetch is a one-way street.  There is no mechanism
- * in place to turn prefetch back on after it has been disabled.  There is
- * no reason it couldn't be done, it would just be more complex to implement.
- */
-static inline void bcom_disable_prefetch(void)
-{
-	u16 regval;
-
-	regval = in_be16(&bcom_eng->regs->PtdCntrl);
-	out_be16(&bcom_eng->regs->PtdCntrl, regval | 1);
-};
-
-static inline void
-bcom_enable_task(int task)
-{
-        u16 reg;
-        reg = in_be16(&bcom_eng->regs->tcr[task]);
-        out_be16(&bcom_eng->regs->tcr[task],  reg | TASK_ENABLE);
-}
-
-static inline void
-bcom_disable_task(int task)
-{
-        u16 reg = in_be16(&bcom_eng->regs->tcr[task]);
-        out_be16(&bcom_eng->regs->tcr[task], reg & ~TASK_ENABLE);
-}
-
-
-static inline u32 *
-bcom_task_desc(int task)
-{
-	return bcom_sram_pa2va(bcom_eng->tdt[task].start);
-}
-
-static inline int
-bcom_task_num_descs(int task)
-{
-	return (bcom_eng->tdt[task].stop - bcom_eng->tdt[task].start)/sizeof(u32) + 1;
-}
-
-static inline u32 *
-bcom_task_var(int task)
-{
-	return bcom_sram_pa2va(bcom_eng->tdt[task].var);
-}
-
-static inline u32 *
-bcom_task_inc(int task)
-{
-	return &bcom_task_var(task)[BCOM_MAX_VAR];
-}
-
-
-static inline int
-bcom_drd_is_extended(u32 desc)
-{
-	return (desc) & BCOM_DRD_EXTENDED;
-}
-
-static inline int
-bcom_desc_is_drd(u32 desc)
-{
-	return !(desc & BCOM_LCD_MASK) && desc != BCOM_DESC_NOP;
-}
-
-static inline int
-bcom_desc_initiator(u32 desc)
-{
-	return (desc >> BCOM_DRD_INITIATOR_SHIFT) & 0x1f;
-}
-
-static inline void
-bcom_set_desc_initiator(u32 *desc, int initiator)
-{
-	*desc = (*desc & ~(0x1f << BCOM_DRD_INITIATOR_SHIFT)) |
-			((initiator & 0x1f) << BCOM_DRD_INITIATOR_SHIFT);
-}
-
-
-static inline void
-bcom_set_task_pragma(int task, int pragma)
-{
-	u32 *fdt = &bcom_eng->tdt[task].fdt;
-	*fdt = (*fdt & ~0xff) | pragma;
-}
-
-static inline void
-bcom_set_task_auto_start(int task, int next_task)
-{
-	u16 __iomem *tcr = &bcom_eng->regs->tcr[task];
-	out_be16(tcr, (in_be16(tcr) & ~0xff) | 0x00c0 | next_task);
-}
-
-static inline void
-bcom_set_tcr_initiator(int task, int initiator)
-{
-	u16 __iomem *tcr = &bcom_eng->regs->tcr[task];
-	out_be16(tcr, (in_be16(tcr) & ~0x1f00) | ((initiator & 0x1f) << 8));
-}
-
-
-#endif /* __BESTCOMM_PRIV_H__ */
-
diff --git a/arch/powerpc/sysdev/bestcomm/fec.c b/arch/powerpc/sysdev/bestcomm/fec.c
deleted file mode 100644
index 957a988d23ea..000000000000
--- a/arch/powerpc/sysdev/bestcomm/fec.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Bestcomm FEC tasks driver
- *
- *
- * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
- * Copyright (C) 2003-2004 MontaVista, Software, Inc.
- *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/io.h>
-
-#include "bestcomm.h"
-#include "bestcomm_priv.h"
-#include "fec.h"
-
-
-/* ======================================================================== */
-/* Task image/var/inc                                                       */
-/* ======================================================================== */
-
-/* fec tasks images */
-extern u32 bcom_fec_rx_task[];
-extern u32 bcom_fec_tx_task[];
-
-/* rx task vars that need to be set before enabling the task */
-struct bcom_fec_rx_var {
-	u32 enable;		/* (u16*) address of task's control register */
-	u32 fifo;		/* (u32*) address of fec's fifo */
-	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
-	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
-	u32 bd_start;		/* (struct bcom_bd*) current bd */
-	u32 buffer_size;	/* size of receive buffer */
-};
-
-/* rx task incs that need to be set before enabling the task */
-struct bcom_fec_rx_inc {
-	u16 pad0;
-	s16 incr_bytes;
-	u16 pad1;
-	s16 incr_dst;
-	u16 pad2;
-	s16 incr_dst_ma;
-};
-
-/* tx task vars that need to be set before enabling the task */
-struct bcom_fec_tx_var {
-	u32 DRD;		/* (u32*) address of self-modified DRD */
-	u32 fifo;		/* (u32*) address of fec's fifo */
-	u32 enable;		/* (u16*) address of task's control register */
-	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
-	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
-	u32 bd_start;		/* (struct bcom_bd*) current bd */
-	u32 buffer_size;	/* set by uCode for each packet */
-};
-
-/* tx task incs that need to be set before enabling the task */
-struct bcom_fec_tx_inc {
-	u16 pad0;
-	s16 incr_bytes;
-	u16 pad1;
-	s16 incr_src;
-	u16 pad2;
-	s16 incr_src_ma;
-};
-
-/* private structure in the task */
-struct bcom_fec_priv {
-	phys_addr_t	fifo;
-	int		maxbufsize;
-};
-
-
-/* ======================================================================== */
-/* Task support code                                                        */
-/* ======================================================================== */
-
-struct bcom_task *
-bcom_fec_rx_init(int queue_len, phys_addr_t fifo, int maxbufsize)
-{
-	struct bcom_task *tsk;
-	struct bcom_fec_priv *priv;
-
-	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd),
-				sizeof(struct bcom_fec_priv));
-	if (!tsk)
-		return NULL;
-
-	tsk->flags = BCOM_FLAGS_NONE;
-
-	priv = tsk->priv;
-	priv->fifo = fifo;
-	priv->maxbufsize = maxbufsize;
-
-	if (bcom_fec_rx_reset(tsk)) {
-		bcom_task_free(tsk);
-		return NULL;
-	}
-
-	return tsk;
-}
-EXPORT_SYMBOL_GPL(bcom_fec_rx_init);
-
-int
-bcom_fec_rx_reset(struct bcom_task *tsk)
-{
-	struct bcom_fec_priv *priv = tsk->priv;
-	struct bcom_fec_rx_var *var;
-	struct bcom_fec_rx_inc *inc;
-
-	/* Shutdown the task */
-	bcom_disable_task(tsk->tasknum);
-
-	/* Reset the microcode */
-	var = (struct bcom_fec_rx_var *) bcom_task_var(tsk->tasknum);
-	inc = (struct bcom_fec_rx_inc *) bcom_task_inc(tsk->tasknum);
-
-	if (bcom_load_image(tsk->tasknum, bcom_fec_rx_task))
-		return -1;
-
-	var->enable	= bcom_eng->regs_base +
-				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
-	var->fifo	= (u32) priv->fifo;
-	var->bd_base	= tsk->bd_pa;
-	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
-	var->bd_start	= tsk->bd_pa;
-	var->buffer_size = priv->maxbufsize;
-
-	inc->incr_bytes	= -(s16)sizeof(u32);	/* These should be in the   */
-	inc->incr_dst	= sizeof(u32);		/* task image, but we stick */
-	inc->incr_dst_ma= sizeof(u8);		/* to the official ones     */
-
-	/* Reset the BDs */
-	tsk->index = 0;
-	tsk->outdex = 0;
-
-	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
-
-	/* Configure some stuff */
-	bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_RX_BD_PRAGMA);
-	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
-
-	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_RX], BCOM_IPR_FEC_RX);
-
-	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bcom_fec_rx_reset);
-
-void
-bcom_fec_rx_release(struct bcom_task *tsk)
-{
-	/* Nothing special for the FEC tasks */
-	bcom_task_free(tsk);
-}
-EXPORT_SYMBOL_GPL(bcom_fec_rx_release);
-
-
-
-	/* Return 2nd to last DRD */
-	/* This is an ugly hack, but at least it's only done
-	   once at initialization */
-static u32 *self_modified_drd(int tasknum)
-{
-	u32 *desc;
-	int num_descs;
-	int drd_count;
-	int i;
-
-	num_descs = bcom_task_num_descs(tasknum);
-	desc = bcom_task_desc(tasknum) + num_descs - 1;
-	drd_count = 0;
-	for (i=0; i<num_descs; i++, desc--)
-		if (bcom_desc_is_drd(*desc) && ++drd_count == 3)
-			break;
-	return desc;
-}
-
-struct bcom_task *
-bcom_fec_tx_init(int queue_len, phys_addr_t fifo)
-{
-	struct bcom_task *tsk;
-	struct bcom_fec_priv *priv;
-
-	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd),
-				sizeof(struct bcom_fec_priv));
-	if (!tsk)
-		return NULL;
-
-	tsk->flags = BCOM_FLAGS_ENABLE_TASK;
-
-	priv = tsk->priv;
-	priv->fifo = fifo;
-
-	if (bcom_fec_tx_reset(tsk)) {
-		bcom_task_free(tsk);
-		return NULL;
-	}
-
-	return tsk;
-}
-EXPORT_SYMBOL_GPL(bcom_fec_tx_init);
-
-int
-bcom_fec_tx_reset(struct bcom_task *tsk)
-{
-	struct bcom_fec_priv *priv = tsk->priv;
-	struct bcom_fec_tx_var *var;
-	struct bcom_fec_tx_inc *inc;
-
-	/* Shutdown the task */
-	bcom_disable_task(tsk->tasknum);
-
-	/* Reset the microcode */
-	var = (struct bcom_fec_tx_var *) bcom_task_var(tsk->tasknum);
-	inc = (struct bcom_fec_tx_inc *) bcom_task_inc(tsk->tasknum);
-
-	if (bcom_load_image(tsk->tasknum, bcom_fec_tx_task))
-		return -1;
-
-	var->enable	= bcom_eng->regs_base +
-				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
-	var->fifo	= (u32) priv->fifo;
-	var->DRD	= bcom_sram_va2pa(self_modified_drd(tsk->tasknum));
-	var->bd_base	= tsk->bd_pa;
-	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
-	var->bd_start	= tsk->bd_pa;
-
-	inc->incr_bytes	= -(s16)sizeof(u32);	/* These should be in the   */
-	inc->incr_src	= sizeof(u32);		/* task image, but we stick */
-	inc->incr_src_ma= sizeof(u8);		/* to the official ones     */
-
-	/* Reset the BDs */
-	tsk->index = 0;
-	tsk->outdex = 0;
-
-	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
-
-	/* Configure some stuff */
-	bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_TX_BD_PRAGMA);
-	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
-
-	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_TX], BCOM_IPR_FEC_TX);
-
-	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bcom_fec_tx_reset);
-
-void
-bcom_fec_tx_release(struct bcom_task *tsk)
-{
-	/* Nothing special for the FEC tasks */
-	bcom_task_free(tsk);
-}
-EXPORT_SYMBOL_GPL(bcom_fec_tx_release);
-
-
-MODULE_DESCRIPTION("BestComm FEC tasks driver");
-MODULE_AUTHOR("Dale Farnsworth <dfarnsworth@mvista.com>");
-MODULE_LICENSE("GPL v2");
-
diff --git a/arch/powerpc/sysdev/bestcomm/fec.h b/arch/powerpc/sysdev/bestcomm/fec.h
deleted file mode 100644
index ee565d94d503..000000000000
--- a/arch/powerpc/sysdev/bestcomm/fec.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Header for Bestcomm FEC tasks driver
- *
- *
- * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
- * Copyright (C) 2003-2004 MontaVista, Software, Inc.
- *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __BESTCOMM_FEC_H__
-#define __BESTCOMM_FEC_H__
-
-
-struct bcom_fec_bd {
-	u32	status;
-	u32	skb_pa;
-};
-
-#define BCOM_FEC_TX_BD_TFD	0x08000000ul	/* transmit frame done */
-#define BCOM_FEC_TX_BD_TC	0x04000000ul	/* transmit CRC */
-#define BCOM_FEC_TX_BD_ABC	0x02000000ul	/* append bad CRC */
-
-#define BCOM_FEC_RX_BD_L	0x08000000ul	/* buffer is last in frame */
-#define BCOM_FEC_RX_BD_BC	0x00800000ul	/* DA is broadcast */
-#define BCOM_FEC_RX_BD_MC	0x00400000ul	/* DA is multicast and not broadcast */
-#define BCOM_FEC_RX_BD_LG	0x00200000ul	/* Rx frame length violation */
-#define BCOM_FEC_RX_BD_NO	0x00100000ul	/* Rx non-octet aligned frame */
-#define BCOM_FEC_RX_BD_CR	0x00040000ul	/* Rx CRC error */
-#define BCOM_FEC_RX_BD_OV	0x00020000ul	/* overrun */
-#define BCOM_FEC_RX_BD_TR	0x00010000ul	/* Rx frame truncated */
-#define BCOM_FEC_RX_BD_LEN_MASK	0x000007fful	/* mask for length of received frame */
-#define BCOM_FEC_RX_BD_ERRORS	(BCOM_FEC_RX_BD_LG | BCOM_FEC_RX_BD_NO | \
-		BCOM_FEC_RX_BD_CR | BCOM_FEC_RX_BD_OV | BCOM_FEC_RX_BD_TR)
-
-
-extern struct bcom_task *
-bcom_fec_rx_init(int queue_len, phys_addr_t fifo, int maxbufsize);
-
-extern int
-bcom_fec_rx_reset(struct bcom_task *tsk);
-
-extern void
-bcom_fec_rx_release(struct bcom_task *tsk);
-
-
-extern struct bcom_task *
-bcom_fec_tx_init(int queue_len, phys_addr_t fifo);
-
-extern int
-bcom_fec_tx_reset(struct bcom_task *tsk);
-
-extern void
-bcom_fec_tx_release(struct bcom_task *tsk);
-
-
-#endif /* __BESTCOMM_FEC_H__ */
-
diff --git a/arch/powerpc/sysdev/bestcomm/gen_bd.c b/arch/powerpc/sysdev/bestcomm/gen_bd.c
deleted file mode 100644
index e0a53e3147b2..000000000000
--- a/arch/powerpc/sysdev/bestcomm/gen_bd.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * Driver for MPC52xx processor BestComm General Buffer Descriptor
- *
- * Copyright (C) 2007 Sylvain Munaut <tnt@246tNt.com>
- * Copyright (C) 2006 AppSpec Computer Technologies Corp.
- *                    Jeff Gibbons <jeff.gibbons@appspec.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <asm/errno.h>
-#include <asm/io.h>
-
-#include <asm/mpc52xx.h>
-#include <asm/mpc52xx_psc.h>
-
-#include "bestcomm.h"
-#include "bestcomm_priv.h"
-#include "gen_bd.h"
-
-
-/* ======================================================================== */
-/* Task image/var/inc                                                       */
-/* ======================================================================== */
-
-/* gen_bd tasks images */
-extern u32 bcom_gen_bd_rx_task[];
-extern u32 bcom_gen_bd_tx_task[];
-
-/* rx task vars that need to be set before enabling the task */
-struct bcom_gen_bd_rx_var {
-	u32 enable;		/* (u16*) address of task's control register */
-	u32 fifo;		/* (u32*) address of gen_bd's fifo */
-	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
-	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
-	u32 bd_start;		/* (struct bcom_bd*) current bd */
-	u32 buffer_size;	/* size of receive buffer */
-};
-
-/* rx task incs that need to be set before enabling the task */
-struct bcom_gen_bd_rx_inc {
-	u16 pad0;
-	s16 incr_bytes;
-	u16 pad1;
-	s16 incr_dst;
-};
-
-/* tx task vars that need to be set before enabling the task */
-struct bcom_gen_bd_tx_var {
-	u32 fifo;		/* (u32*) address of gen_bd's fifo */
-	u32 enable;		/* (u16*) address of task's control register */
-	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
-	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
-	u32 bd_start;		/* (struct bcom_bd*) current bd */
-	u32 buffer_size;	/* set by uCode for each packet */
-};
-
-/* tx task incs that need to be set before enabling the task */
-struct bcom_gen_bd_tx_inc {
-	u16 pad0;
-	s16 incr_bytes;
-	u16 pad1;
-	s16 incr_src;
-	u16 pad2;
-	s16 incr_src_ma;
-};
-
-/* private structure */
-struct bcom_gen_bd_priv {
-	phys_addr_t	fifo;
-	int		initiator;
-	int		ipr;
-	int		maxbufsize;
-};
-
-
-/* ======================================================================== */
-/* Task support code                                                        */
-/* ======================================================================== */
-
-struct bcom_task *
-bcom_gen_bd_rx_init(int queue_len, phys_addr_t fifo,
-			int initiator, int ipr, int maxbufsize)
-{
-	struct bcom_task *tsk;
-	struct bcom_gen_bd_priv *priv;
-
-	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd),
-			sizeof(struct bcom_gen_bd_priv));
-	if (!tsk)
-		return NULL;
-
-	tsk->flags = BCOM_FLAGS_NONE;
-
-	priv = tsk->priv;
-	priv->fifo	= fifo;
-	priv->initiator	= initiator;
-	priv->ipr	= ipr;
-	priv->maxbufsize = maxbufsize;
-
-	if (bcom_gen_bd_rx_reset(tsk)) {
-		bcom_task_free(tsk);
-		return NULL;
-	}
-
-	return tsk;
-}
-EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_init);
-
-int
-bcom_gen_bd_rx_reset(struct bcom_task *tsk)
-{
-	struct bcom_gen_bd_priv *priv = tsk->priv;
-	struct bcom_gen_bd_rx_var *var;
-	struct bcom_gen_bd_rx_inc *inc;
-
-	/* Shutdown the task */
-	bcom_disable_task(tsk->tasknum);
-
-	/* Reset the microcode */
-	var = (struct bcom_gen_bd_rx_var *) bcom_task_var(tsk->tasknum);
-	inc = (struct bcom_gen_bd_rx_inc *) bcom_task_inc(tsk->tasknum);
-
-	if (bcom_load_image(tsk->tasknum, bcom_gen_bd_rx_task))
-		return -1;
-
-	var->enable	= bcom_eng->regs_base +
-				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
-	var->fifo	= (u32) priv->fifo;
-	var->bd_base	= tsk->bd_pa;
-	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
-	var->bd_start	= tsk->bd_pa;
-	var->buffer_size = priv->maxbufsize;
-
-	inc->incr_bytes	= -(s16)sizeof(u32);
-	inc->incr_dst	= sizeof(u32);
-
-	/* Reset the BDs */
-	tsk->index = 0;
-	tsk->outdex = 0;
-
-	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
-
-	/* Configure some stuff */
-	bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_RX_BD_PRAGMA);
-	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
-
-	out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr);
-	bcom_set_initiator(tsk->tasknum, priv->initiator);
-
-	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_reset);
-
-void
-bcom_gen_bd_rx_release(struct bcom_task *tsk)
-{
-	/* Nothing special for the GenBD tasks */
-	bcom_task_free(tsk);
-}
-EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_release);
-
-
-extern struct bcom_task *
-bcom_gen_bd_tx_init(int queue_len, phys_addr_t fifo,
-			int initiator, int ipr)
-{
-	struct bcom_task *tsk;
-	struct bcom_gen_bd_priv *priv;
-
-	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd),
-			sizeof(struct bcom_gen_bd_priv));
-	if (!tsk)
-		return NULL;
-
-	tsk->flags = BCOM_FLAGS_NONE;
-
-	priv = tsk->priv;
-	priv->fifo	= fifo;
-	priv->initiator	= initiator;
-	priv->ipr	= ipr;
-
-	if (bcom_gen_bd_tx_reset(tsk)) {
-		bcom_task_free(tsk);
-		return NULL;
-	}
-
-	return tsk;
-}
-EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_init);
-
-int
-bcom_gen_bd_tx_reset(struct bcom_task *tsk)
-{
-	struct bcom_gen_bd_priv *priv = tsk->priv;
-	struct bcom_gen_bd_tx_var *var;
-	struct bcom_gen_bd_tx_inc *inc;
-
-	/* Shutdown the task */
-	bcom_disable_task(tsk->tasknum);
-
-	/* Reset the microcode */
-	var = (struct bcom_gen_bd_tx_var *) bcom_task_var(tsk->tasknum);
-	inc = (struct bcom_gen_bd_tx_inc *) bcom_task_inc(tsk->tasknum);
-
-	if (bcom_load_image(tsk->tasknum, bcom_gen_bd_tx_task))
-		return -1;
-
-	var->enable	= bcom_eng->regs_base +
-				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
-	var->fifo	= (u32) priv->fifo;
-	var->bd_base	= tsk->bd_pa;
-	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
-	var->bd_start	= tsk->bd_pa;
-
-	inc->incr_bytes	= -(s16)sizeof(u32);
-	inc->incr_src	= sizeof(u32);
-	inc->incr_src_ma = sizeof(u8);
-
-	/* Reset the BDs */
-	tsk->index = 0;
-	tsk->outdex = 0;
-
-	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
-
-	/* Configure some stuff */
-	bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_TX_BD_PRAGMA);
-	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
-
-	out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr);
-	bcom_set_initiator(tsk->tasknum, priv->initiator);
-
-	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_reset);
-
-void
-bcom_gen_bd_tx_release(struct bcom_task *tsk)
-{
-	/* Nothing special for the GenBD tasks */
-	bcom_task_free(tsk);
-}
-EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_release);
-
-/* ---------------------------------------------------------------------
- * PSC support code
- */
-
-/**
- * bcom_psc_parameters - Bestcomm initialization value table for PSC devices
- *
- * This structure is only used internally.  It is a lookup table for PSC
- * specific parameters to bestcomm tasks.
- */
-static struct bcom_psc_params {
-	int rx_initiator;
-	int rx_ipr;
-	int tx_initiator;
-	int tx_ipr;
-} bcom_psc_params[] = {
-	[0] = {
-		.rx_initiator = BCOM_INITIATOR_PSC1_RX,
-		.rx_ipr = BCOM_IPR_PSC1_RX,
-		.tx_initiator = BCOM_INITIATOR_PSC1_TX,
-		.tx_ipr = BCOM_IPR_PSC1_TX,
-	},
-	[1] = {
-		.rx_initiator = BCOM_INITIATOR_PSC2_RX,
-		.rx_ipr = BCOM_IPR_PSC2_RX,
-		.tx_initiator = BCOM_INITIATOR_PSC2_TX,
-		.tx_ipr = BCOM_IPR_PSC2_TX,
-	},
-	[2] = {
-		.rx_initiator = BCOM_INITIATOR_PSC3_RX,
-		.rx_ipr = BCOM_IPR_PSC3_RX,
-		.tx_initiator = BCOM_INITIATOR_PSC3_TX,
-		.tx_ipr = BCOM_IPR_PSC3_TX,
-	},
-	[3] = {
-		.rx_initiator = BCOM_INITIATOR_PSC4_RX,
-		.rx_ipr = BCOM_IPR_PSC4_RX,
-		.tx_initiator = BCOM_INITIATOR_PSC4_TX,
-		.tx_ipr = BCOM_IPR_PSC4_TX,
-	},
-	[4] = {
-		.rx_initiator = BCOM_INITIATOR_PSC5_RX,
-		.rx_ipr = BCOM_IPR_PSC5_RX,
-		.tx_initiator = BCOM_INITIATOR_PSC5_TX,
-		.tx_ipr = BCOM_IPR_PSC5_TX,
-	},
-	[5] = {
-		.rx_initiator = BCOM_INITIATOR_PSC6_RX,
-		.rx_ipr = BCOM_IPR_PSC6_RX,
-		.tx_initiator = BCOM_INITIATOR_PSC6_TX,
-		.tx_ipr = BCOM_IPR_PSC6_TX,
-	},
-};
-
-/**
- * bcom_psc_gen_bd_rx_init - Allocate a receive bcom_task for a PSC port
- * @psc_num:	Number of the PSC to allocate a task for
- * @queue_len:	number of buffer descriptors to allocate for the task
- * @fifo:	physical address of FIFO register
- * @maxbufsize:	Maximum receive data size in bytes.
- *
- * Allocate a bestcomm task structure for receiving data from a PSC.
- */
-struct bcom_task * bcom_psc_gen_bd_rx_init(unsigned psc_num, int queue_len,
-					   phys_addr_t fifo, int maxbufsize)
-{
-	if (psc_num >= MPC52xx_PSC_MAXNUM)
-		return NULL;
-
-	return bcom_gen_bd_rx_init(queue_len, fifo,
-				   bcom_psc_params[psc_num].rx_initiator,
-				   bcom_psc_params[psc_num].rx_ipr,
-				   maxbufsize);
-}
-EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_rx_init);
-
-/**
- * bcom_psc_gen_bd_tx_init - Allocate a transmit bcom_task for a PSC port
- * @psc_num:	Number of the PSC to allocate a task for
- * @queue_len:	number of buffer descriptors to allocate for the task
- * @fifo:	physical address of FIFO register
- *
- * Allocate a bestcomm task structure for transmitting data to a PSC.
- */
-struct bcom_task *
-bcom_psc_gen_bd_tx_init(unsigned psc_num, int queue_len, phys_addr_t fifo)
-{
-	struct psc;
-	return bcom_gen_bd_tx_init(queue_len, fifo,
-				   bcom_psc_params[psc_num].tx_initiator,
-				   bcom_psc_params[psc_num].tx_ipr);
-}
-EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_tx_init);
-
-
-MODULE_DESCRIPTION("BestComm General Buffer Descriptor tasks driver");
-MODULE_AUTHOR("Jeff Gibbons <jeff.gibbons@appspec.com>");
-MODULE_LICENSE("GPL v2");
-
diff --git a/arch/powerpc/sysdev/bestcomm/gen_bd.h b/arch/powerpc/sysdev/bestcomm/gen_bd.h
deleted file mode 100644
index de47260e69da..000000000000
--- a/arch/powerpc/sysdev/bestcomm/gen_bd.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Header for Bestcomm General Buffer Descriptor tasks driver
- *
- *
- * Copyright (C) 2007 Sylvain Munaut <tnt@246tNt.com>
- * Copyright (C) 2006 AppSpec Computer Technologies Corp.
- *                    Jeff Gibbons <jeff.gibbons@appspec.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- *
- */
-
-#ifndef __BESTCOMM_GEN_BD_H__
-#define __BESTCOMM_GEN_BD_H__
-
-struct bcom_gen_bd {
-	u32	status;
-	u32	buf_pa;
-};
-
-
-extern struct bcom_task *
-bcom_gen_bd_rx_init(int queue_len, phys_addr_t fifo,
-			int initiator, int ipr, int maxbufsize);
-
-extern int
-bcom_gen_bd_rx_reset(struct bcom_task *tsk);
-
-extern void
-bcom_gen_bd_rx_release(struct bcom_task *tsk);
-
-
-extern struct bcom_task *
-bcom_gen_bd_tx_init(int queue_len, phys_addr_t fifo,
-			int initiator, int ipr);
-
-extern int
-bcom_gen_bd_tx_reset(struct bcom_task *tsk);
-
-extern void
-bcom_gen_bd_tx_release(struct bcom_task *tsk);
-
-
-/* PSC support utility wrappers */
-struct bcom_task * bcom_psc_gen_bd_rx_init(unsigned psc_num, int queue_len,
-					   phys_addr_t fifo, int maxbufsize);
-struct bcom_task * bcom_psc_gen_bd_tx_init(unsigned psc_num, int queue_len,
-					   phys_addr_t fifo);
-#endif  /* __BESTCOMM_GEN_BD_H__ */
-
diff --git a/arch/powerpc/sysdev/bestcomm/sram.c b/arch/powerpc/sysdev/bestcomm/sram.c
deleted file mode 100644
index b6db23e085fb..000000000000
--- a/arch/powerpc/sysdev/bestcomm/sram.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Simple memory allocator for on-board SRAM
- *
- *
- * Maintainer : Sylvain Munaut <tnt@246tNt.com>
- *
- * Copyright (C) 2005 Sylvain Munaut <tnt@246tNt.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/ioport.h>
-#include <linux/of.h>
-
-#include <asm/io.h>
-#include <asm/mmu.h>
-
-#include "sram.h"
-
-
-/* Struct keeping our 'state' */
-struct bcom_sram *bcom_sram = NULL;
-EXPORT_SYMBOL_GPL(bcom_sram);	/* needed for inline functions */
-
-
-/* ======================================================================== */
-/* Public API                                                               */
-/* ======================================================================== */
-/* DO NOT USE in interrupts, if needed in irq handler, we should use the
-   _irqsave version of the spin_locks */
-
-int bcom_sram_init(struct device_node *sram_node, char *owner)
-{
-	int rv;
-	const u32 *regaddr_p;
-	u64 regaddr64, size64;
-	unsigned int psize;
-
-	/* Create our state struct */
-	if (bcom_sram) {
-		printk(KERN_ERR "%s: bcom_sram_init: "
-			"Already initialized !\n", owner);
-		return -EBUSY;
-	}
-
-	bcom_sram = kmalloc(sizeof(struct bcom_sram), GFP_KERNEL);
-	if (!bcom_sram) {
-		printk(KERN_ERR "%s: bcom_sram_init: "
-			"Couldn't allocate internal state !\n", owner);
-		return -ENOMEM;
-	}
-
-	/* Get address and size of the sram */
-	regaddr_p = of_get_address(sram_node, 0, &size64, NULL);
-	if (!regaddr_p) {
-		printk(KERN_ERR "%s: bcom_sram_init: "
-			"Invalid device node !\n", owner);
-		rv = -EINVAL;
-		goto error_free;
-	}
-
-	regaddr64 = of_translate_address(sram_node, regaddr_p);
-
-	bcom_sram->base_phys = (phys_addr_t) regaddr64;
-	bcom_sram->size = (unsigned int) size64;
-
-	/* Request region */
-	if (!request_mem_region(bcom_sram->base_phys, bcom_sram->size, owner)) {
-		printk(KERN_ERR "%s: bcom_sram_init: "
-			"Couldn't request region !\n", owner);
-		rv = -EBUSY;
-		goto error_free;
-	}
-
-	/* Map SRAM */
-		/* sram is not really __iomem */
-	bcom_sram->base_virt = (void*) ioremap(bcom_sram->base_phys, bcom_sram->size);
-
-	if (!bcom_sram->base_virt) {
-		printk(KERN_ERR "%s: bcom_sram_init: "
-			"Map error SRAM zone 0x%08lx (0x%0x)!\n",
-			owner, (long)bcom_sram->base_phys, bcom_sram->size );
-		rv = -ENOMEM;
-		goto error_release;
-	}
-
-	/* Create an rheap (defaults to 32 bits word alignment) */
-	bcom_sram->rh = rh_create(4);
-
-	/* Attach the free zones */
-#if 0
-	/* Currently disabled ... for future use only */
-	reg_addr_p = of_get_property(sram_node, "available", &psize);
-#else
-	regaddr_p = NULL;
-	psize = 0;
-#endif
-
-	if (!regaddr_p || !psize) {
-		/* Attach the whole zone */
-		rh_attach_region(bcom_sram->rh, 0, bcom_sram->size);
-	} else {
-		/* Attach each zone independently */
-		while (psize >= 2 * sizeof(u32)) {
-			phys_addr_t zbase = of_translate_address(sram_node, regaddr_p);
-			rh_attach_region(bcom_sram->rh, zbase - bcom_sram->base_phys, regaddr_p[1]);
-			regaddr_p += 2;
-			psize -= 2 * sizeof(u32);
-		}
-	}
-
-	/* Init our spinlock */
-	spin_lock_init(&bcom_sram->lock);
-
-	return 0;
-
-error_release:
-	release_mem_region(bcom_sram->base_phys, bcom_sram->size);
-error_free:
-	kfree(bcom_sram);
-	bcom_sram = NULL;
-
-	return rv;
-}
-EXPORT_SYMBOL_GPL(bcom_sram_init);
-
-void bcom_sram_cleanup(void)
-{
-	/* Free resources */
-	if (bcom_sram) {
-		rh_destroy(bcom_sram->rh);
-		iounmap((void __iomem *)bcom_sram->base_virt);
-		release_mem_region(bcom_sram->base_phys, bcom_sram->size);
-		kfree(bcom_sram);
-		bcom_sram = NULL;
-	}
-}
-EXPORT_SYMBOL_GPL(bcom_sram_cleanup);
-
-void* bcom_sram_alloc(int size, int align, phys_addr_t *phys)
-{
-	unsigned long offset;
-
-	spin_lock(&bcom_sram->lock);
-	offset = rh_alloc_align(bcom_sram->rh, size, align, NULL);
-	spin_unlock(&bcom_sram->lock);
-
-	if (IS_ERR_VALUE(offset))
-		return NULL;
-
-	*phys = bcom_sram->base_phys + offset;
-	return bcom_sram->base_virt + offset;
-}
-EXPORT_SYMBOL_GPL(bcom_sram_alloc);
-
-void bcom_sram_free(void *ptr)
-{
-	unsigned long offset;
-
-	if (!ptr)
-		return;
-
-	offset = ptr - bcom_sram->base_virt;
-
-	spin_lock(&bcom_sram->lock);
-	rh_free(bcom_sram->rh, offset);
-	spin_unlock(&bcom_sram->lock);
-}
-EXPORT_SYMBOL_GPL(bcom_sram_free);
-
diff --git a/arch/powerpc/sysdev/bestcomm/sram.h b/arch/powerpc/sysdev/bestcomm/sram.h
deleted file mode 100644
index b6d668963cce..000000000000
--- a/arch/powerpc/sysdev/bestcomm/sram.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Handling of a sram zone for bestcomm
- *
- *
- * Copyright (C) 2007 Sylvain Munaut <tnt@246tNt.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef __BESTCOMM_SRAM_H__
-#define __BESTCOMM_SRAM_H__
-
-#include <asm/rheap.h>
-#include <asm/mmu.h>
-#include <linux/spinlock.h>
-
-
-/* Structure used internally */
-	/* The internals are here for the inline functions
-	 * sake, certainly not for the user to mess with !
-	 */
-struct bcom_sram {
-	phys_addr_t		 base_phys;
-	void 			*base_virt;
-	unsigned int		 size;
-	rh_info_t		*rh;
-	spinlock_t		 lock;
-};
-
-extern struct bcom_sram *bcom_sram;
-
-
-/* Public API */
-extern int  bcom_sram_init(struct device_node *sram_node, char *owner);
-extern void bcom_sram_cleanup(void);
-
-extern void* bcom_sram_alloc(int size, int align, phys_addr_t *phys);
-extern void  bcom_sram_free(void *ptr);
-
-static inline phys_addr_t bcom_sram_va2pa(void *va) {
-	return bcom_sram->base_phys +
-		(unsigned long)(va - bcom_sram->base_virt);
-}
-
-static inline void *bcom_sram_pa2va(phys_addr_t pa) {
-	return bcom_sram->base_virt +
-		(unsigned long)(pa - bcom_sram->base_phys);
-}
-
-
-#endif  /* __BESTCOMM_SRAM_H__ */
-
diff --git a/drivers/Makefile b/drivers/Makefile
index 7863b9fee50b..d8372ab2e4cf 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_PNP)		+= pnp/
 obj-y				+= amba/
 # Many drivers will want to use DMA so this has to be made available
 # really early.
-obj-$(CONFIG_DMA_ENGINE)	+= dma/
+obj-$(CONFIG_DMADEVICES)	+= dma/
 
 obj-$(CONFIG_VIRTIO)		+= virtio/
 obj-$(CONFIG_XEN)		+= xen/
diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c
index ec67f54dc56f..0b363e9d42a5 100644
--- a/drivers/ata/pata_mpc52xx.c
+++ b/drivers/ata/pata_mpc52xx.c
@@ -26,9 +26,9 @@
 #include <asm/prom.h>
 #include <asm/mpc52xx.h>
 
-#include <sysdev/bestcomm/bestcomm.h>
-#include <sysdev/bestcomm/bestcomm_priv.h>
-#include <sysdev/bestcomm/ata.h>
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/ata.h>
 
 #define DRV_NAME	"mpc52xx_ata"
 
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d4c12180c654..40179e749f08 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -125,6 +125,8 @@ config MPC512X_DMA
 	---help---
 	  Enable support for the Freescale MPC512x built-in DMA engine.
 
+source "drivers/dma/bestcomm/Kconfig"
+
 config MV_XOR
 	bool "Marvell XOR engine support"
 	depends on PLAT_ORION
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 7428feaa8705..642d96736cf5 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
 obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o
+obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
 obj-$(CONFIG_MV_XOR) += mv_xor.o
 obj-$(CONFIG_DW_DMAC) += dw_dmac.o
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
diff --git a/drivers/dma/bestcomm/Kconfig b/drivers/dma/bestcomm/Kconfig
new file mode 100644
index 000000000000..29e427085efb
--- /dev/null
+++ b/drivers/dma/bestcomm/Kconfig
@@ -0,0 +1,36 @@
+#
+# Kconfig options for Bestcomm
+#
+
+config PPC_BESTCOMM
+	tristate "Bestcomm DMA engine support"
+	depends on PPC_MPC52xx
+	default n
+	select PPC_LIB_RHEAP
+	help
+	  BestComm is the name of the communication coprocessor found
+	  on the Freescale MPC5200 family of processor.  Its usage is
+	  optional for some drivers (like ATA), but required for
+	  others (like FEC).
+
+	  If you want to use drivers that require DMA operations,
+	  answer Y or M. Otherwise say N.
+
+config PPC_BESTCOMM_ATA
+	tristate
+	depends on PPC_BESTCOMM
+	help
+	  This option enables the support for the ATA task.
+
+config PPC_BESTCOMM_FEC
+	tristate
+	depends on PPC_BESTCOMM
+	help
+	  This option enables the support for the FEC tasks.
+
+config PPC_BESTCOMM_GEN_BD
+	tristate
+	depends on PPC_BESTCOMM
+	help
+	  This option enables the support for the GenBD tasks.
+
diff --git a/drivers/dma/bestcomm/Makefile b/drivers/dma/bestcomm/Makefile
new file mode 100644
index 000000000000..aed2df2a6580
--- /dev/null
+++ b/drivers/dma/bestcomm/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for BestComm & co
+#
+
+bestcomm-core-objs	:= bestcomm.o sram.o
+bestcomm-ata-objs	:= ata.o bcom_ata_task.o
+bestcomm-fec-objs	:= fec.o bcom_fec_rx_task.o bcom_fec_tx_task.o
+bestcomm-gen-bd-objs	:= gen_bd.o bcom_gen_bd_rx_task.o bcom_gen_bd_tx_task.o
+
+obj-$(CONFIG_PPC_BESTCOMM)		+= bestcomm-core.o
+obj-$(CONFIG_PPC_BESTCOMM_ATA)		+= bestcomm-ata.o
+obj-$(CONFIG_PPC_BESTCOMM_FEC)		+= bestcomm-fec.o
+obj-$(CONFIG_PPC_BESTCOMM_GEN_BD)	+= bestcomm-gen-bd.o
+ 
diff --git a/drivers/dma/bestcomm/ata.c b/drivers/dma/bestcomm/ata.c
new file mode 100644
index 000000000000..2fd87f83cf90
--- /dev/null
+++ b/drivers/dma/bestcomm/ata.c
@@ -0,0 +1,157 @@
+/*
+ * Bestcomm ATA task driver
+ *
+ *
+ * Patterned after bestcomm/fec.c by Dale Farnsworth <dfarnsworth@mvista.com>
+ *                                   2003-2004 (c) MontaVista, Software, Inc.
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2006      Freescale - John Rigby
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/io.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/ata.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc                                                       */
+/* ======================================================================== */
+
+/* ata task image */
+extern u32 bcom_ata_task[];
+
+/* ata task vars that need to be set before enabling the task */
+struct bcom_ata_var {
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* size of receive buffer */
+};
+
+/* ata task incs that need to be set before enabling the task */
+struct bcom_ata_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_dst;
+	u16 pad2;
+	s16 incr_src;
+};
+
+
+/* ======================================================================== */
+/* Task support code                                                        */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_ata_init(int queue_len, int maxbufsize)
+{
+	struct bcom_task *tsk;
+	struct bcom_ata_var *var;
+	struct bcom_ata_inc *inc;
+
+	/* Prefetch breaks ATA DMA.  Turn it off for ATA DMA */
+	bcom_disable_prefetch();
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_ata_bd), 0);
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_NONE;
+
+	bcom_ata_reset_bd(tsk);
+
+	var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_ata_task)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+	var->buffer_size = maxbufsize;
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_ATA_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_RX], BCOM_IPR_ATA_RX);
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_TX], BCOM_IPR_ATA_TX);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum); /* Clear ints */
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_ata_init);
+
+void bcom_ata_rx_prepare(struct bcom_task *tsk)
+{
+	struct bcom_ata_inc *inc;
+
+	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+	inc->incr_bytes	= -(s16)sizeof(u32);
+	inc->incr_src	= 0;
+	inc->incr_dst	= sizeof(u32);
+
+	bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_RX);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_rx_prepare);
+
+void bcom_ata_tx_prepare(struct bcom_task *tsk)
+{
+	struct bcom_ata_inc *inc;
+
+	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+	inc->incr_bytes	= -(s16)sizeof(u32);
+	inc->incr_src	= sizeof(u32);
+	inc->incr_dst	= 0;
+
+	bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_TX);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_tx_prepare);
+
+void bcom_ata_reset_bd(struct bcom_task *tsk)
+{
+	struct bcom_ata_var *var;
+
+	/* Reset all BD */
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum);
+	var->bd_start = var->bd_base;
+}
+EXPORT_SYMBOL_GPL(bcom_ata_reset_bd);
+
+void bcom_ata_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the ATA tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_release);
+
+
+MODULE_DESCRIPTION("BestComm ATA task driver");
+MODULE_AUTHOR("John Rigby");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/bcom_ata_task.c b/drivers/dma/bestcomm/bcom_ata_task.c
new file mode 100644
index 000000000000..cc6049a4e469
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_ata_task.c
@@ -0,0 +1,67 @@
+/*
+ * Bestcomm ATA task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Created based on bestcom/code_dma/image_rtos1/dma_image.hex
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_ata_task[] = {
+	/* header */
+	0x4243544b,
+	0x0e060709,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x8198009b, /* LCD: idx0 = var3; idx0 <= var2; idx0 += inc3 */
+	0x13e00c08, /*   DRD1A: var3 = var1; FN=0 MORE init=31 WS=0 RS=0 */
+	0xb8000264, /*   LCD: idx1 = *idx0, idx2 = var0; idx1 < var9; idx1 += inc4, idx2 += inc4 */
+	0x10000f00, /*     DRD1A: var3 = idx0; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0c8cfc8a, /*     DRD2B1: *idx2 = EU3(); EU3(*idx2,var10)  */
+	0xd8988240, /*   LCDEXT: idx1 = idx1; idx1 > var9; idx1 += inc0 */
+	0xf845e011, /*   LCDEXT: idx2 = *(idx0 + var00000015); ; idx2 += inc2 */
+	0xb845e00a, /*   LCD: idx3 = *(idx0 + var00000019); ; idx3 += inc1 */
+	0x0bfecf90, /*     DRD1A: *idx3 = *idx2; FN=0 TFD init=31 WS=3 RS=3 */
+	0x9898802d, /*   LCD: idx1 = idx1; idx1 once var0; idx1 += inc5 */
+	0x64000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 INT EXT init=0 WS=0 RS=0 */
+	0x0c0cf849, /*     DRD2B1: *idx0 = EU3(); EU3(idx1,var9)  */
+	0x000001f8, /* NOP */
+
+	/* VAR[9]-VAR[14] */
+	0x40000000,
+	0x7fff7fff,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	/* INC[0]-INC[6] */
+	0x40000000,
+	0xe0000000,
+	0xe0000000,
+	0xa000000c,
+	0x20000000,
+	0x00000000,
+	0x00000000,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_fec_rx_task.c b/drivers/dma/bestcomm/bcom_fec_rx_task.c
new file mode 100644
index 000000000000..a1ad6a02fcef
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_fec_rx_task.c
@@ -0,0 +1,78 @@
+/*
+ * Bestcomm FEC RX task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 22 11:19:38 2005 GMT
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_fec_rx_task[] = {
+	/* header */
+	0x4243544b,
+	0x18060709,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x808220e3, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */
+	0x10601010, /*   DRD1A: var4 = var2; FN=0 MORE init=3 WS=0 RS=0 */
+	0xb8800264, /*   LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc4, idx3 += inc4 */
+	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
+	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
+	0xb8c58029, /*   LCD: idx3 = *(idx1 + var00000015); idx3 once var0; idx3 += inc5 */
+	0x60000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=0 RS=0 */
+	0x088cf8cc, /*     DRD2B1: idx2 = EU3(); EU3(idx3,var12)  */
+	0x991982f2, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var11; idx2 += inc6, idx3 += inc2 */
+	0x006acf80, /*     DRD1A: *idx3 = *idx0; FN=0 init=3 WS=1 RS=1 */
+	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
+	0x9999802d, /*   LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */
+	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+	0x034cfc4e, /*     DRD2B1: var13 = EU3(); EU3(*idx1,var14)  */
+	0x00008868, /*     DRD1A: idx2 = var13; FN=0 init=0 WS=0 RS=0 */
+	0x99198341, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var13; idx2 += inc0, idx3 += inc1 */
+	0x007ecf80, /*     DRD1A: *idx3 = *idx0; FN=0 init=3 WS=3 RS=3 */
+	0x99198272, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc6, idx3 += inc2 */
+	0x046acf80, /*     DRD1A: *idx3 = *idx0; FN=0 INT init=3 WS=1 RS=1 */
+	0x9819002d, /*   LCD: idx2 = idx0; idx2 once var0; idx2 += inc5 */
+	0x0060c790, /*     DRD1A: *idx1 = *idx2; FN=0 init=3 WS=0 RS=0 */
+	0x000001f8, /*   NOP */
+
+	/* VAR[9]-VAR[14] */
+	0x40000000,
+	0x7fff7fff,
+	0x00000000,
+	0x00000003,
+	0x40000008,
+	0x43ffffff,
+
+	/* INC[0]-INC[6] */
+	0x40000000,
+	0xe0000000,
+	0xe0000000,
+	0xa0000008,
+	0x20000000,
+	0x00000000,
+	0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_fec_tx_task.c b/drivers/dma/bestcomm/bcom_fec_tx_task.c
new file mode 100644
index 000000000000..b1c495c3a65a
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_fec_tx_task.c
@@ -0,0 +1,91 @@
+/*
+ * Bestcomm FEC TX task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 22 11:19:29 2005 GMT
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_fec_tx_task[] = {
+	/* header */
+	0x4243544b,
+	0x2407070d,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x8018001b, /* LCD: idx0 = var0; idx0 <= var0; idx0 += inc3 */
+	0x60000005, /*   DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+	0x01ccfc0d, /*   DRD2B1: var7 = EU3(); EU3(*idx0,var13)  */
+	0x8082a123, /* LCD: idx0 = var1, idx1 = var5; idx1 <= var4; idx0 += inc4, idx1 += inc3 */
+	0x10801418, /*   DRD1A: var5 = var3; FN=0 MORE init=4 WS=0 RS=0 */
+	0xf88103a4, /*   LCDEXT: idx2 = *idx1, idx3 = var2; idx2 < var14; idx2 += inc4, idx3 += inc4 */
+	0x801a6024, /*   LCD: idx4 = var0; ; idx4 += inc4 */
+	0x10001708, /*     DRD1A: var5 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0cccfccf, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var15)  */
+	0x991a002c, /*   LCD: idx2 = idx2, idx3 = idx4; idx2 once var0; idx2 += inc5, idx3 += inc4 */
+	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+	0x024cfc4d, /*     DRD2B1: var9 = EU3(); EU3(*idx1,var13)  */
+	0x60000003, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=3 EXT init=0 WS=0 RS=0 */
+	0x0cccf247, /*     DRD2B1: *idx3 = EU3(); EU3(var9,var7)  */
+	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
+	0xb8c80029, /*   LCD: idx3 = *(idx1 + var0000001a); idx3 once var0; idx3 += inc5 */
+	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+	0x088cf8d1, /*     DRD2B1: idx2 = EU3(); EU3(idx3,var17)  */
+	0x00002f10, /*     DRD1A: var11 = idx2; FN=0 init=0 WS=0 RS=0 */
+	0x99198432, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var16; idx2 += inc6, idx3 += inc2 */
+	0x008ac398, /*     DRD1A: *idx0 = *idx3; FN=0 init=4 WS=1 RS=1 */
+	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
+	0x9999802d, /*   LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */
+	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+	0x048cfc53, /*     DRD2B1: var18 = EU3(); EU3(*idx1,var19)  */
+	0x60000008, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=8 EXT init=0 WS=0 RS=0 */
+	0x088cf48b, /*     DRD2B1: idx2 = EU3(); EU3(var18,var11)  */
+	0x99198481, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var18; idx2 += inc0, idx3 += inc1 */
+	0x009ec398, /*     DRD1A: *idx0 = *idx3; FN=0 init=4 WS=3 RS=3 */
+	0x991983b2, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var14; idx2 += inc6, idx3 += inc2 */
+	0x088ac398, /*     DRD1A: *idx0 = *idx3; FN=0 TFD init=4 WS=1 RS=1 */
+	0x9919002d, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc5 */
+	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+	0x0c4cf88e, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var14)  */
+	0x000001f8, /*   NOP */
+
+	/* VAR[13]-VAR[19] */
+	0x0c000000,
+	0x40000000,
+	0x7fff7fff,
+	0x00000000,
+	0x00000003,
+	0x40000004,
+	0x43ffffff,
+
+	/* INC[0]-INC[6] */
+	0x40000000,
+	0xe0000000,
+	0xe0000000,
+	0xa0000008,
+	0x20000000,
+	0x00000000,
+	0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c b/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c
new file mode 100644
index 000000000000..efee022b0256
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c
@@ -0,0 +1,63 @@
+/*
+ * Bestcomm GenBD RX task microcode
+ *
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ *                    Jeff Gibbons <jeff.gibbons@appspec.com>
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 4 10:14:12 2006 GMT
+ *
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_gen_bd_rx_task[] = {
+	/* header */
+	0x4243544b,
+	0x0d020409,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x808220da, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc3, idx1 += inc2 */
+	0x13e01010, /*   DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */
+	0xb880025b, /*   LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc3, idx3 += inc3 */
+	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
+	0xd9190240, /*   LCDEXT: idx2 = idx2; idx2 > var9; idx2 += inc0 */
+	0xb8c5e009, /*   LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */
+	0x07fecf80, /*     DRD1A: *idx3 = *idx0; FN=0 INT init=31 WS=3 RS=3 */
+	0x99190024, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc4 */
+	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+	0x0c4cf889, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var9)  */
+	0x000001f8, /*   NOP */
+
+	/* VAR[9]-VAR[10] */
+	0x40000000,
+	0x7fff7fff,
+
+	/* INC[0]-INC[3] */
+	0x40000000,
+	0xe0000000,
+	0xa0000008,
+	0x20000000,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c b/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c
new file mode 100644
index 000000000000..c605aa42ecbb
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c
@@ -0,0 +1,69 @@
+/*
+ * Bestcomm GenBD TX task microcode
+ *
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ *                    Jeff Gibbons <jeff.gibbons@appspec.com>
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 4 10:14:12 2006 GMT
+ *
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_gen_bd_tx_task[] = {
+	/* header */
+	0x4243544b,
+	0x0f040609,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x800220e3, /* LCD: idx0 = var0, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */
+	0x13e01010, /*   DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */
+	0xb8808264, /*   LCD: idx2 = *idx1, idx3 = var1; idx2 < var9; idx2 += inc4, idx3 += inc4 */
+	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
+	0xd9190300, /*   LCDEXT: idx2 = idx2; idx2 > var12; idx2 += inc0 */
+	0xb8c5e009, /*   LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */
+	0x03fec398, /*     DRD1A: *idx0 = *idx3; FN=0 init=31 WS=3 RS=3 */
+	0x9919826a, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc5, idx3 += inc2 */
+	0x0feac398, /*     DRD1A: *idx0 = *idx3; FN=0 TFD INT init=31 WS=1 RS=1 */
+	0x99190036, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc6 */
+	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+	0x0c4cf889, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var9)  */
+	0x000001f8, /*   NOP */
+
+	/* VAR[9]-VAR[12] */
+	0x40000000,
+	0x7fff7fff,
+	0x00000000,
+	0x40000004,
+
+	/* INC[0]-INC[5] */
+	0x40000000,
+	0xe0000000,
+	0xe0000000,
+	0xa0000008,
+	0x20000000,
+	0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bestcomm.c b/drivers/dma/bestcomm/bestcomm.c
new file mode 100644
index 000000000000..67371fb820d3
--- /dev/null
+++ b/drivers/dma/bestcomm/bestcomm.c
@@ -0,0 +1,532 @@
+/*
+ * Driver for MPC52xx processor BestComm peripheral controller
+ *
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2005      Varma Electronics Oy,
+ *                         ( by Andrey Volkov <avolkov@varma-el.com> )
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mpc52xx.h>
+
+#include <linux/fsl/bestcomm/sram.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include "linux/fsl/bestcomm/bestcomm.h"
+
+#define DRIVER_NAME "bestcomm-core"
+
+/* MPC5200 device tree match tables */
+static struct of_device_id mpc52xx_sram_ids[] __devinitdata = {
+	{ .compatible = "fsl,mpc5200-sram", },
+	{ .compatible = "mpc5200-sram", },
+	{}
+};
+
+
+struct bcom_engine *bcom_eng = NULL;
+EXPORT_SYMBOL_GPL(bcom_eng);	/* needed for inline functions */
+
+/* ======================================================================== */
+/* Public and private API                                                   */
+/* ======================================================================== */
+
+/* Private API */
+
+struct bcom_task *
+bcom_task_alloc(int bd_count, int bd_size, int priv_size)
+{
+	int i, tasknum = -1;
+	struct bcom_task *tsk;
+
+	/* Don't try to do anything if bestcomm init failed */
+	if (!bcom_eng)
+		return NULL;
+
+	/* Get and reserve a task num */
+	spin_lock(&bcom_eng->lock);
+
+	for (i=0; i<BCOM_MAX_TASKS; i++)
+		if (!bcom_eng->tdt[i].stop) {	/* we use stop as a marker */
+			bcom_eng->tdt[i].stop = 0xfffffffful; /* dummy addr */
+			tasknum = i;
+			break;
+		}
+
+	spin_unlock(&bcom_eng->lock);
+
+	if (tasknum < 0)
+		return NULL;
+
+	/* Allocate our structure */
+	tsk = kzalloc(sizeof(struct bcom_task) + priv_size, GFP_KERNEL);
+	if (!tsk)
+		goto error;
+
+	tsk->tasknum = tasknum;
+	if (priv_size)
+		tsk->priv = (void*)tsk + sizeof(struct bcom_task);
+
+	/* Get IRQ of that task */
+	tsk->irq = irq_of_parse_and_map(bcom_eng->ofnode, tsk->tasknum);
+	if (tsk->irq == NO_IRQ)
+		goto error;
+
+	/* Init the BDs, if needed */
+	if (bd_count) {
+		tsk->cookie = kmalloc(sizeof(void*) * bd_count, GFP_KERNEL);
+		if (!tsk->cookie)
+			goto error;
+
+		tsk->bd = bcom_sram_alloc(bd_count * bd_size, 4, &tsk->bd_pa);
+		if (!tsk->bd)
+			goto error;
+		memset(tsk->bd, 0x00, bd_count * bd_size);
+
+		tsk->num_bd = bd_count;
+		tsk->bd_size = bd_size;
+	}
+
+	return tsk;
+
+error:
+	if (tsk) {
+		if (tsk->irq != NO_IRQ)
+			irq_dispose_mapping(tsk->irq);
+		bcom_sram_free(tsk->bd);
+		kfree(tsk->cookie);
+		kfree(tsk);
+	}
+
+	bcom_eng->tdt[tasknum].stop = 0;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(bcom_task_alloc);
+
+void
+bcom_task_free(struct bcom_task *tsk)
+{
+	/* Stop the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Clear TDT */
+	bcom_eng->tdt[tsk->tasknum].start = 0;
+	bcom_eng->tdt[tsk->tasknum].stop  = 0;
+
+	/* Free everything */
+	irq_dispose_mapping(tsk->irq);
+	bcom_sram_free(tsk->bd);
+	kfree(tsk->cookie);
+	kfree(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_task_free);
+
+int
+bcom_load_image(int task, u32 *task_image)
+{
+	struct bcom_task_header *hdr = (struct bcom_task_header *)task_image;
+	struct bcom_tdt *tdt;
+	u32 *desc, *var, *inc;
+	u32 *desc_src, *var_src, *inc_src;
+
+	/* Safety checks */
+	if (hdr->magic != BCOM_TASK_MAGIC) {
+		printk(KERN_ERR DRIVER_NAME
+			": Trying to load invalid microcode\n");
+		return -EINVAL;
+	}
+
+	if ((task < 0) || (task >= BCOM_MAX_TASKS)) {
+		printk(KERN_ERR DRIVER_NAME
+			": Trying to load invalid task %d\n", task);
+		return -EINVAL;
+	}
+
+	/* Initial load or reload */
+	tdt = &bcom_eng->tdt[task];
+
+	if (tdt->start) {
+		desc = bcom_task_desc(task);
+		if (hdr->desc_size != bcom_task_num_descs(task)) {
+			printk(KERN_ERR DRIVER_NAME
+				": Trying to reload wrong task image "
+				"(%d size %d/%d)!\n",
+				task,
+				hdr->desc_size,
+				bcom_task_num_descs(task));
+			return -EINVAL;
+		}
+	} else {
+		phys_addr_t start_pa;
+
+		desc = bcom_sram_alloc(hdr->desc_size * sizeof(u32), 4, &start_pa);
+		if (!desc)
+			return -ENOMEM;
+
+		tdt->start = start_pa;
+		tdt->stop = start_pa + ((hdr->desc_size-1) * sizeof(u32));
+	}
+
+	var = bcom_task_var(task);
+	inc = bcom_task_inc(task);
+
+	/* Clear & copy */
+	memset(var, 0x00, BCOM_VAR_SIZE);
+	memset(inc, 0x00, BCOM_INC_SIZE);
+
+	desc_src = (u32 *)(hdr + 1);
+	var_src = desc_src + hdr->desc_size;
+	inc_src = var_src + hdr->var_size;
+
+	memcpy(desc, desc_src, hdr->desc_size * sizeof(u32));
+	memcpy(var + hdr->first_var, var_src, hdr->var_size * sizeof(u32));
+	memcpy(inc, inc_src, hdr->inc_size * sizeof(u32));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_load_image);
+
+void
+bcom_set_initiator(int task, int initiator)
+{
+	int i;
+	int num_descs;
+	u32 *desc;
+	int next_drd_has_initiator;
+
+	bcom_set_tcr_initiator(task, initiator);
+
+	/* Just setting tcr is apparently not enough due to some problem */
+	/* with it. So we just go thru all the microcode and replace in  */
+	/* the DRD directly */
+
+	desc = bcom_task_desc(task);
+	next_drd_has_initiator = 1;
+	num_descs = bcom_task_num_descs(task);
+
+	for (i=0; i<num_descs; i++, desc++) {
+		if (!bcom_desc_is_drd(*desc))
+			continue;
+		if (next_drd_has_initiator)
+			if (bcom_desc_initiator(*desc) != BCOM_INITIATOR_ALWAYS)
+				bcom_set_desc_initiator(desc, initiator);
+		next_drd_has_initiator = !bcom_drd_is_extended(*desc);
+	}
+}
+EXPORT_SYMBOL_GPL(bcom_set_initiator);
+
+
+/* Public API */
+
+void
+bcom_enable(struct bcom_task *tsk)
+{
+	bcom_enable_task(tsk->tasknum);
+}
+EXPORT_SYMBOL_GPL(bcom_enable);
+
+void
+bcom_disable(struct bcom_task *tsk)
+{
+	bcom_disable_task(tsk->tasknum);
+}
+EXPORT_SYMBOL_GPL(bcom_disable);
+
+
+/* ======================================================================== */
+/* Engine init/cleanup                                                      */
+/* ======================================================================== */
+
+/* Function Descriptor table */
+/* this will need to be updated if Freescale changes their task code FDT */
+static u32 fdt_ops[] = {
+	0xa0045670,	/* FDT[48] - load_acc()	  */
+	0x80045670,	/* FDT[49] - unload_acc() */
+	0x21800000,	/* FDT[50] - and()        */
+	0x21e00000,	/* FDT[51] - or()         */
+	0x21500000,	/* FDT[52] - xor()        */
+	0x21400000,	/* FDT[53] - andn()       */
+	0x21500000,	/* FDT[54] - not()        */
+	0x20400000,	/* FDT[55] - add()        */
+	0x20500000,	/* FDT[56] - sub()        */
+	0x20800000,	/* FDT[57] - lsh()        */
+	0x20a00000,	/* FDT[58] - rsh()        */
+	0xc0170000,	/* FDT[59] - crc8()       */
+	0xc0145670,	/* FDT[60] - crc16()      */
+	0xc0345670,	/* FDT[61] - crc32()      */
+	0xa0076540,	/* FDT[62] - endian32()   */
+	0xa0000760,	/* FDT[63] - endian16()   */
+};
+
+
+static int __devinit
+bcom_engine_init(void)
+{
+	int task;
+	phys_addr_t tdt_pa, ctx_pa, var_pa, fdt_pa;
+	unsigned int tdt_size, ctx_size, var_size, fdt_size;
+
+	/* Allocate & clear SRAM zones for FDT, TDTs, contexts and vars/incs */
+	tdt_size = BCOM_MAX_TASKS * sizeof(struct bcom_tdt);
+	ctx_size = BCOM_MAX_TASKS * BCOM_CTX_SIZE;
+	var_size = BCOM_MAX_TASKS * (BCOM_VAR_SIZE + BCOM_INC_SIZE);
+	fdt_size = BCOM_FDT_SIZE;
+
+	bcom_eng->tdt = bcom_sram_alloc(tdt_size, sizeof(u32), &tdt_pa);
+	bcom_eng->ctx = bcom_sram_alloc(ctx_size, BCOM_CTX_ALIGN, &ctx_pa);
+	bcom_eng->var = bcom_sram_alloc(var_size, BCOM_VAR_ALIGN, &var_pa);
+	bcom_eng->fdt = bcom_sram_alloc(fdt_size, BCOM_FDT_ALIGN, &fdt_pa);
+
+	if (!bcom_eng->tdt || !bcom_eng->ctx || !bcom_eng->var || !bcom_eng->fdt) {
+		printk(KERN_ERR "DMA: SRAM alloc failed in engine init !\n");
+
+		bcom_sram_free(bcom_eng->tdt);
+		bcom_sram_free(bcom_eng->ctx);
+		bcom_sram_free(bcom_eng->var);
+		bcom_sram_free(bcom_eng->fdt);
+
+		return -ENOMEM;
+	}
+
+	memset(bcom_eng->tdt, 0x00, tdt_size);
+	memset(bcom_eng->ctx, 0x00, ctx_size);
+	memset(bcom_eng->var, 0x00, var_size);
+	memset(bcom_eng->fdt, 0x00, fdt_size);
+
+	/* Copy the FDT for the EU#3 */
+	memcpy(&bcom_eng->fdt[48], fdt_ops, sizeof(fdt_ops));
+
+	/* Initialize Task base structure */
+	for (task=0; task<BCOM_MAX_TASKS; task++)
+	{
+		out_be16(&bcom_eng->regs->tcr[task], 0);
+		out_8(&bcom_eng->regs->ipr[task], 0);
+
+		bcom_eng->tdt[task].context	= ctx_pa;
+		bcom_eng->tdt[task].var	= var_pa;
+		bcom_eng->tdt[task].fdt	= fdt_pa;
+
+		var_pa += BCOM_VAR_SIZE + BCOM_INC_SIZE;
+		ctx_pa += BCOM_CTX_SIZE;
+	}
+
+	out_be32(&bcom_eng->regs->taskBar, tdt_pa);
+
+	/* Init 'always' initiator */
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ALWAYS], BCOM_IPR_ALWAYS);
+
+	/* Disable COMM Bus Prefetch on the original 5200; it's broken */
+	if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR)
+		bcom_disable_prefetch();
+
+	/* Init lock */
+	spin_lock_init(&bcom_eng->lock);
+
+	return 0;
+}
+
+static void
+bcom_engine_cleanup(void)
+{
+	int task;
+
+	/* Stop all tasks */
+	for (task=0; task<BCOM_MAX_TASKS; task++)
+	{
+		out_be16(&bcom_eng->regs->tcr[task], 0);
+		out_8(&bcom_eng->regs->ipr[task], 0);
+	}
+
+	out_be32(&bcom_eng->regs->taskBar, 0ul);
+
+	/* Release the SRAM zones */
+	bcom_sram_free(bcom_eng->tdt);
+	bcom_sram_free(bcom_eng->ctx);
+	bcom_sram_free(bcom_eng->var);
+	bcom_sram_free(bcom_eng->fdt);
+}
+
+
+/* ======================================================================== */
+/* OF platform driver                                                       */
+/* ======================================================================== */
+
+static int __devinit mpc52xx_bcom_probe(struct platform_device *op)
+{
+	struct device_node *ofn_sram;
+	struct resource res_bcom;
+
+	int rv;
+
+	/* Inform user we're ok so far */
+	printk(KERN_INFO "DMA: MPC52xx BestComm driver\n");
+
+	/* Get the bestcomm node */
+	of_node_get(op->dev.of_node);
+
+	/* Prepare SRAM */
+	ofn_sram = of_find_matching_node(NULL, mpc52xx_sram_ids);
+	if (!ofn_sram) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"No SRAM found in device tree\n");
+		rv = -ENODEV;
+		goto error_ofput;
+	}
+	rv = bcom_sram_init(ofn_sram, DRIVER_NAME);
+	of_node_put(ofn_sram);
+
+	if (rv) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Error in SRAM init\n");
+		goto error_ofput;
+	}
+
+	/* Get a clean struct */
+	bcom_eng = kzalloc(sizeof(struct bcom_engine), GFP_KERNEL);
+	if (!bcom_eng) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Can't allocate state structure\n");
+		rv = -ENOMEM;
+		goto error_sramclean;
+	}
+
+	/* Save the node */
+	bcom_eng->ofnode = op->dev.of_node;
+
+	/* Get, reserve & map io */
+	if (of_address_to_resource(op->dev.of_node, 0, &res_bcom)) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Can't get resource\n");
+		rv = -EINVAL;
+		goto error_sramclean;
+	}
+
+	if (!request_mem_region(res_bcom.start, sizeof(struct mpc52xx_sdma),
+				DRIVER_NAME)) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Can't request registers region\n");
+		rv = -EBUSY;
+		goto error_sramclean;
+	}
+
+	bcom_eng->regs_base = res_bcom.start;
+	bcom_eng->regs = ioremap(res_bcom.start, sizeof(struct mpc52xx_sdma));
+	if (!bcom_eng->regs) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Can't map registers\n");
+		rv = -ENOMEM;
+		goto error_release;
+	}
+
+	/* Now, do the real init */
+	rv = bcom_engine_init();
+	if (rv)
+		goto error_unmap;
+
+	/* Done ! */
+	printk(KERN_INFO "DMA: MPC52xx BestComm engine @%08lx ok !\n",
+		(long)bcom_eng->regs_base);
+
+	return 0;
+
+	/* Error path */
+error_unmap:
+	iounmap(bcom_eng->regs);
+error_release:
+	release_mem_region(res_bcom.start, sizeof(struct mpc52xx_sdma));
+error_sramclean:
+	kfree(bcom_eng);
+	bcom_sram_cleanup();
+error_ofput:
+	of_node_put(op->dev.of_node);
+
+	printk(KERN_ERR "DMA: MPC52xx BestComm init failed !\n");
+
+	return rv;
+}
+
+
+static int mpc52xx_bcom_remove(struct platform_device *op)
+{
+	/* Clean up the engine */
+	bcom_engine_cleanup();
+
+	/* Cleanup SRAM */
+	bcom_sram_cleanup();
+
+	/* Release regs */
+	iounmap(bcom_eng->regs);
+	release_mem_region(bcom_eng->regs_base, sizeof(struct mpc52xx_sdma));
+
+	/* Release the node */
+	of_node_put(bcom_eng->ofnode);
+
+	/* Release memory */
+	kfree(bcom_eng);
+	bcom_eng = NULL;
+
+	return 0;
+}
+
+static struct of_device_id mpc52xx_bcom_of_match[] = {
+	{ .compatible = "fsl,mpc5200-bestcomm", },
+	{ .compatible = "mpc5200-bestcomm", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, mpc52xx_bcom_of_match);
+
+
+static struct platform_driver mpc52xx_bcom_of_platform_driver = {
+	.probe		= mpc52xx_bcom_probe,
+	.remove		= mpc52xx_bcom_remove,
+	.driver = {
+		.name = DRIVER_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = mpc52xx_bcom_of_match,
+	},
+};
+
+
+/* ======================================================================== */
+/* Module                                                                   */
+/* ======================================================================== */
+
+static int __init
+mpc52xx_bcom_init(void)
+{
+	return platform_driver_register(&mpc52xx_bcom_of_platform_driver);
+}
+
+static void __exit
+mpc52xx_bcom_exit(void)
+{
+	platform_driver_unregister(&mpc52xx_bcom_of_platform_driver);
+}
+
+/* If we're not a module, we must make sure everything is setup before  */
+/* anyone tries to use us ... that's why we use subsys_initcall instead */
+/* of module_init. */
+subsys_initcall(mpc52xx_bcom_init);
+module_exit(mpc52xx_bcom_exit);
+
+MODULE_DESCRIPTION("Freescale MPC52xx BestComm DMA");
+MODULE_AUTHOR("Sylvain Munaut <tnt@246tNt.com>");
+MODULE_AUTHOR("Andrey Volkov <avolkov@varma-el.com>");
+MODULE_AUTHOR("Dale Farnsworth <dfarnsworth@mvista.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/fec.c b/drivers/dma/bestcomm/fec.c
new file mode 100644
index 000000000000..7f1fb1c999e4
--- /dev/null
+++ b/drivers/dma/bestcomm/fec.c
@@ -0,0 +1,270 @@
+/*
+ * Bestcomm FEC tasks driver
+ *
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/io.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/fec.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc                                                       */
+/* ======================================================================== */
+
+/* fec tasks images */
+extern u32 bcom_fec_rx_task[];
+extern u32 bcom_fec_tx_task[];
+
+/* rx task vars that need to be set before enabling the task */
+struct bcom_fec_rx_var {
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 fifo;		/* (u32*) address of fec's fifo */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* size of receive buffer */
+};
+
+/* rx task incs that need to be set before enabling the task */
+struct bcom_fec_rx_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_dst;
+	u16 pad2;
+	s16 incr_dst_ma;
+};
+
+/* tx task vars that need to be set before enabling the task */
+struct bcom_fec_tx_var {
+	u32 DRD;		/* (u32*) address of self-modified DRD */
+	u32 fifo;		/* (u32*) address of fec's fifo */
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* set by uCode for each packet */
+};
+
+/* tx task incs that need to be set before enabling the task */
+struct bcom_fec_tx_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_src;
+	u16 pad2;
+	s16 incr_src_ma;
+};
+
+/* private structure in the task */
+struct bcom_fec_priv {
+	phys_addr_t	fifo;
+	int		maxbufsize;
+};
+
+
+/* ======================================================================== */
+/* Task support code                                                        */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_fec_rx_init(int queue_len, phys_addr_t fifo, int maxbufsize)
+{
+	struct bcom_task *tsk;
+	struct bcom_fec_priv *priv;
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd),
+				sizeof(struct bcom_fec_priv));
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_NONE;
+
+	priv = tsk->priv;
+	priv->fifo = fifo;
+	priv->maxbufsize = maxbufsize;
+
+	if (bcom_fec_rx_reset(tsk)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_init);
+
+int
+bcom_fec_rx_reset(struct bcom_task *tsk)
+{
+	struct bcom_fec_priv *priv = tsk->priv;
+	struct bcom_fec_rx_var *var;
+	struct bcom_fec_rx_inc *inc;
+
+	/* Shutdown the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Reset the microcode */
+	var = (struct bcom_fec_rx_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_fec_rx_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_fec_rx_task))
+		return -1;
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->fifo	= (u32) priv->fifo;
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+	var->buffer_size = priv->maxbufsize;
+
+	inc->incr_bytes	= -(s16)sizeof(u32);	/* These should be in the   */
+	inc->incr_dst	= sizeof(u32);		/* task image, but we stick */
+	inc->incr_dst_ma= sizeof(u8);		/* to the official ones     */
+
+	/* Reset the BDs */
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_RX_BD_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_RX], BCOM_IPR_FEC_RX);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_reset);
+
+void
+bcom_fec_rx_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the FEC tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_release);
+
+
+
+	/* Return 2nd to last DRD */
+	/* This is an ugly hack, but at least it's only done
+	   once at initialization */
+static u32 *self_modified_drd(int tasknum)
+{
+	u32 *desc;
+	int num_descs;
+	int drd_count;
+	int i;
+
+	num_descs = bcom_task_num_descs(tasknum);
+	desc = bcom_task_desc(tasknum) + num_descs - 1;
+	drd_count = 0;
+	for (i=0; i<num_descs; i++, desc--)
+		if (bcom_desc_is_drd(*desc) && ++drd_count == 3)
+			break;
+	return desc;
+}
+
+struct bcom_task *
+bcom_fec_tx_init(int queue_len, phys_addr_t fifo)
+{
+	struct bcom_task *tsk;
+	struct bcom_fec_priv *priv;
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd),
+				sizeof(struct bcom_fec_priv));
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_ENABLE_TASK;
+
+	priv = tsk->priv;
+	priv->fifo = fifo;
+
+	if (bcom_fec_tx_reset(tsk)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_init);
+
+int
+bcom_fec_tx_reset(struct bcom_task *tsk)
+{
+	struct bcom_fec_priv *priv = tsk->priv;
+	struct bcom_fec_tx_var *var;
+	struct bcom_fec_tx_inc *inc;
+
+	/* Shutdown the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Reset the microcode */
+	var = (struct bcom_fec_tx_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_fec_tx_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_fec_tx_task))
+		return -1;
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->fifo	= (u32) priv->fifo;
+	var->DRD	= bcom_sram_va2pa(self_modified_drd(tsk->tasknum));
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+
+	inc->incr_bytes	= -(s16)sizeof(u32);	/* These should be in the   */
+	inc->incr_src	= sizeof(u32);		/* task image, but we stick */
+	inc->incr_src_ma= sizeof(u8);		/* to the official ones     */
+
+	/* Reset the BDs */
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_TX_BD_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_TX], BCOM_IPR_FEC_TX);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_reset);
+
+void
+bcom_fec_tx_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the FEC tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_release);
+
+
+MODULE_DESCRIPTION("BestComm FEC tasks driver");
+MODULE_AUTHOR("Dale Farnsworth <dfarnsworth@mvista.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/gen_bd.c b/drivers/dma/bestcomm/gen_bd.c
new file mode 100644
index 000000000000..1a5b22d88127
--- /dev/null
+++ b/drivers/dma/bestcomm/gen_bd.c
@@ -0,0 +1,354 @@
+/*
+ * Driver for MPC52xx processor BestComm General Buffer Descriptor
+ *
+ * Copyright (C) 2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ *                    Jeff Gibbons <jeff.gibbons@appspec.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/errno.h>
+#include <asm/io.h>
+
+#include <asm/mpc52xx.h>
+#include <asm/mpc52xx_psc.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/gen_bd.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc                                                       */
+/* ======================================================================== */
+
+/* gen_bd tasks images */
+extern u32 bcom_gen_bd_rx_task[];
+extern u32 bcom_gen_bd_tx_task[];
+
+/* rx task vars that need to be set before enabling the task */
+struct bcom_gen_bd_rx_var {
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 fifo;		/* (u32*) address of gen_bd's fifo */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* size of receive buffer */
+};
+
+/* rx task incs that need to be set before enabling the task */
+struct bcom_gen_bd_rx_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_dst;
+};
+
+/* tx task vars that need to be set before enabling the task */
+struct bcom_gen_bd_tx_var {
+	u32 fifo;		/* (u32*) address of gen_bd's fifo */
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* set by uCode for each packet */
+};
+
+/* tx task incs that need to be set before enabling the task */
+struct bcom_gen_bd_tx_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_src;
+	u16 pad2;
+	s16 incr_src_ma;
+};
+
+/* private structure */
+struct bcom_gen_bd_priv {
+	phys_addr_t	fifo;
+	int		initiator;
+	int		ipr;
+	int		maxbufsize;
+};
+
+
+/* ======================================================================== */
+/* Task support code                                                        */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_gen_bd_rx_init(int queue_len, phys_addr_t fifo,
+			int initiator, int ipr, int maxbufsize)
+{
+	struct bcom_task *tsk;
+	struct bcom_gen_bd_priv *priv;
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd),
+			sizeof(struct bcom_gen_bd_priv));
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_NONE;
+
+	priv = tsk->priv;
+	priv->fifo	= fifo;
+	priv->initiator	= initiator;
+	priv->ipr	= ipr;
+	priv->maxbufsize = maxbufsize;
+
+	if (bcom_gen_bd_rx_reset(tsk)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_init);
+
+int
+bcom_gen_bd_rx_reset(struct bcom_task *tsk)
+{
+	struct bcom_gen_bd_priv *priv = tsk->priv;
+	struct bcom_gen_bd_rx_var *var;
+	struct bcom_gen_bd_rx_inc *inc;
+
+	/* Shutdown the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Reset the microcode */
+	var = (struct bcom_gen_bd_rx_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_gen_bd_rx_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_gen_bd_rx_task))
+		return -1;
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->fifo	= (u32) priv->fifo;
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+	var->buffer_size = priv->maxbufsize;
+
+	inc->incr_bytes	= -(s16)sizeof(u32);
+	inc->incr_dst	= sizeof(u32);
+
+	/* Reset the BDs */
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_RX_BD_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr);
+	bcom_set_initiator(tsk->tasknum, priv->initiator);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_reset);
+
+void
+bcom_gen_bd_rx_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the GenBD tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_release);
+
+
+extern struct bcom_task *
+bcom_gen_bd_tx_init(int queue_len, phys_addr_t fifo,
+			int initiator, int ipr)
+{
+	struct bcom_task *tsk;
+	struct bcom_gen_bd_priv *priv;
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd),
+			sizeof(struct bcom_gen_bd_priv));
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_NONE;
+
+	priv = tsk->priv;
+	priv->fifo	= fifo;
+	priv->initiator	= initiator;
+	priv->ipr	= ipr;
+
+	if (bcom_gen_bd_tx_reset(tsk)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_init);
+
+int
+bcom_gen_bd_tx_reset(struct bcom_task *tsk)
+{
+	struct bcom_gen_bd_priv *priv = tsk->priv;
+	struct bcom_gen_bd_tx_var *var;
+	struct bcom_gen_bd_tx_inc *inc;
+
+	/* Shutdown the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Reset the microcode */
+	var = (struct bcom_gen_bd_tx_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_gen_bd_tx_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_gen_bd_tx_task))
+		return -1;
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->fifo	= (u32) priv->fifo;
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+
+	inc->incr_bytes	= -(s16)sizeof(u32);
+	inc->incr_src	= sizeof(u32);
+	inc->incr_src_ma = sizeof(u8);
+
+	/* Reset the BDs */
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_TX_BD_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr);
+	bcom_set_initiator(tsk->tasknum, priv->initiator);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_reset);
+
+void
+bcom_gen_bd_tx_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the GenBD tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_release);
+
+/* ---------------------------------------------------------------------
+ * PSC support code
+ */
+
+/**
+ * bcom_psc_parameters - Bestcomm initialization value table for PSC devices
+ *
+ * This structure is only used internally.  It is a lookup table for PSC
+ * specific parameters to bestcomm tasks.
+ */
+static struct bcom_psc_params {
+	int rx_initiator;
+	int rx_ipr;
+	int tx_initiator;
+	int tx_ipr;
+} bcom_psc_params[] = {
+	[0] = {
+		.rx_initiator = BCOM_INITIATOR_PSC1_RX,
+		.rx_ipr = BCOM_IPR_PSC1_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC1_TX,
+		.tx_ipr = BCOM_IPR_PSC1_TX,
+	},
+	[1] = {
+		.rx_initiator = BCOM_INITIATOR_PSC2_RX,
+		.rx_ipr = BCOM_IPR_PSC2_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC2_TX,
+		.tx_ipr = BCOM_IPR_PSC2_TX,
+	},
+	[2] = {
+		.rx_initiator = BCOM_INITIATOR_PSC3_RX,
+		.rx_ipr = BCOM_IPR_PSC3_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC3_TX,
+		.tx_ipr = BCOM_IPR_PSC3_TX,
+	},
+	[3] = {
+		.rx_initiator = BCOM_INITIATOR_PSC4_RX,
+		.rx_ipr = BCOM_IPR_PSC4_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC4_TX,
+		.tx_ipr = BCOM_IPR_PSC4_TX,
+	},
+	[4] = {
+		.rx_initiator = BCOM_INITIATOR_PSC5_RX,
+		.rx_ipr = BCOM_IPR_PSC5_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC5_TX,
+		.tx_ipr = BCOM_IPR_PSC5_TX,
+	},
+	[5] = {
+		.rx_initiator = BCOM_INITIATOR_PSC6_RX,
+		.rx_ipr = BCOM_IPR_PSC6_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC6_TX,
+		.tx_ipr = BCOM_IPR_PSC6_TX,
+	},
+};
+
+/**
+ * bcom_psc_gen_bd_rx_init - Allocate a receive bcom_task for a PSC port
+ * @psc_num:	Number of the PSC to allocate a task for
+ * @queue_len:	number of buffer descriptors to allocate for the task
+ * @fifo:	physical address of FIFO register
+ * @maxbufsize:	Maximum receive data size in bytes.
+ *
+ * Allocate a bestcomm task structure for receiving data from a PSC.
+ */
+struct bcom_task * bcom_psc_gen_bd_rx_init(unsigned psc_num, int queue_len,
+					   phys_addr_t fifo, int maxbufsize)
+{
+	if (psc_num >= MPC52xx_PSC_MAXNUM)
+		return NULL;
+
+	return bcom_gen_bd_rx_init(queue_len, fifo,
+				   bcom_psc_params[psc_num].rx_initiator,
+				   bcom_psc_params[psc_num].rx_ipr,
+				   maxbufsize);
+}
+EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_rx_init);
+
+/**
+ * bcom_psc_gen_bd_tx_init - Allocate a transmit bcom_task for a PSC port
+ * @psc_num:	Number of the PSC to allocate a task for
+ * @queue_len:	number of buffer descriptors to allocate for the task
+ * @fifo:	physical address of FIFO register
+ *
+ * Allocate a bestcomm task structure for transmitting data to a PSC.
+ */
+struct bcom_task *
+bcom_psc_gen_bd_tx_init(unsigned psc_num, int queue_len, phys_addr_t fifo)
+{
+	struct psc;
+	return bcom_gen_bd_tx_init(queue_len, fifo,
+				   bcom_psc_params[psc_num].tx_initiator,
+				   bcom_psc_params[psc_num].tx_ipr);
+}
+EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_tx_init);
+
+
+MODULE_DESCRIPTION("BestComm General Buffer Descriptor tasks driver");
+MODULE_AUTHOR("Jeff Gibbons <jeff.gibbons@appspec.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/sram.c b/drivers/dma/bestcomm/sram.c
new file mode 100644
index 000000000000..5e2ed30ba2c4
--- /dev/null
+++ b/drivers/dma/bestcomm/sram.c
@@ -0,0 +1,178 @@
+/*
+ * Simple memory allocator for on-board SRAM
+ *
+ *
+ * Maintainer : Sylvain Munaut <tnt@246tNt.com>
+ *
+ * Copyright (C) 2005 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/of.h>
+
+#include <asm/io.h>
+#include <asm/mmu.h>
+
+#include <linux/fsl/bestcomm/sram.h>
+
+
+/* Struct keeping our 'state' */
+struct bcom_sram *bcom_sram = NULL;
+EXPORT_SYMBOL_GPL(bcom_sram);	/* needed for inline functions */
+
+
+/* ======================================================================== */
+/* Public API                                                               */
+/* ======================================================================== */
+/* DO NOT USE in interrupts, if needed in irq handler, we should use the
+   _irqsave version of the spin_locks */
+
+int bcom_sram_init(struct device_node *sram_node, char *owner)
+{
+	int rv;
+	const u32 *regaddr_p;
+	u64 regaddr64, size64;
+	unsigned int psize;
+
+	/* Create our state struct */
+	if (bcom_sram) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Already initialized !\n", owner);
+		return -EBUSY;
+	}
+
+	bcom_sram = kmalloc(sizeof(struct bcom_sram), GFP_KERNEL);
+	if (!bcom_sram) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Couldn't allocate internal state !\n", owner);
+		return -ENOMEM;
+	}
+
+	/* Get address and size of the sram */
+	regaddr_p = of_get_address(sram_node, 0, &size64, NULL);
+	if (!regaddr_p) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Invalid device node !\n", owner);
+		rv = -EINVAL;
+		goto error_free;
+	}
+
+	regaddr64 = of_translate_address(sram_node, regaddr_p);
+
+	bcom_sram->base_phys = (phys_addr_t) regaddr64;
+	bcom_sram->size = (unsigned int) size64;
+
+	/* Request region */
+	if (!request_mem_region(bcom_sram->base_phys, bcom_sram->size, owner)) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Couldn't request region !\n", owner);
+		rv = -EBUSY;
+		goto error_free;
+	}
+
+	/* Map SRAM */
+		/* sram is not really __iomem */
+	bcom_sram->base_virt = (void*) ioremap(bcom_sram->base_phys, bcom_sram->size);
+
+	if (!bcom_sram->base_virt) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Map error SRAM zone 0x%08lx (0x%0x)!\n",
+			owner, (long)bcom_sram->base_phys, bcom_sram->size );
+		rv = -ENOMEM;
+		goto error_release;
+	}
+
+	/* Create an rheap (defaults to 32 bits word alignment) */
+	bcom_sram->rh = rh_create(4);
+
+	/* Attach the free zones */
+#if 0
+	/* Currently disabled ... for future use only */
+	reg_addr_p = of_get_property(sram_node, "available", &psize);
+#else
+	regaddr_p = NULL;
+	psize = 0;
+#endif
+
+	if (!regaddr_p || !psize) {
+		/* Attach the whole zone */
+		rh_attach_region(bcom_sram->rh, 0, bcom_sram->size);
+	} else {
+		/* Attach each zone independently */
+		while (psize >= 2 * sizeof(u32)) {
+			phys_addr_t zbase = of_translate_address(sram_node, regaddr_p);
+			rh_attach_region(bcom_sram->rh, zbase - bcom_sram->base_phys, regaddr_p[1]);
+			regaddr_p += 2;
+			psize -= 2 * sizeof(u32);
+		}
+	}
+
+	/* Init our spinlock */
+	spin_lock_init(&bcom_sram->lock);
+
+	return 0;
+
+error_release:
+	release_mem_region(bcom_sram->base_phys, bcom_sram->size);
+error_free:
+	kfree(bcom_sram);
+	bcom_sram = NULL;
+
+	return rv;
+}
+EXPORT_SYMBOL_GPL(bcom_sram_init);
+
+void bcom_sram_cleanup(void)
+{
+	/* Free resources */
+	if (bcom_sram) {
+		rh_destroy(bcom_sram->rh);
+		iounmap((void __iomem *)bcom_sram->base_virt);
+		release_mem_region(bcom_sram->base_phys, bcom_sram->size);
+		kfree(bcom_sram);
+		bcom_sram = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(bcom_sram_cleanup);
+
+void* bcom_sram_alloc(int size, int align, phys_addr_t *phys)
+{
+	unsigned long offset;
+
+	spin_lock(&bcom_sram->lock);
+	offset = rh_alloc_align(bcom_sram->rh, size, align, NULL);
+	spin_unlock(&bcom_sram->lock);
+
+	if (IS_ERR_VALUE(offset))
+		return NULL;
+
+	*phys = bcom_sram->base_phys + offset;
+	return bcom_sram->base_virt + offset;
+}
+EXPORT_SYMBOL_GPL(bcom_sram_alloc);
+
+void bcom_sram_free(void *ptr)
+{
+	unsigned long offset;
+
+	if (!ptr)
+		return;
+
+	offset = ptr - bcom_sram->base_virt;
+
+	spin_lock(&bcom_sram->lock);
+	rh_free(bcom_sram->rh, offset);
+	spin_unlock(&bcom_sram->lock);
+}
+EXPORT_SYMBOL_GPL(bcom_sram_free);
+
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index 817d081d2cd8..85e776d500a6 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -40,8 +40,8 @@
 #include <asm/delay.h>
 #include <asm/mpc52xx.h>
 
-#include <sysdev/bestcomm/bestcomm.h>
-#include <sysdev/bestcomm/fec.h>
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/fec.h>
 
 #include "fec_mpc52xx.h"
 
diff --git a/include/linux/fsl/bestcomm/ata.h b/include/linux/fsl/bestcomm/ata.h
new file mode 100644
index 000000000000..0b2371811334
--- /dev/null
+++ b/include/linux/fsl/bestcomm/ata.h
@@ -0,0 +1,30 @@
+/*
+ * Header for Bestcomm ATA task driver
+ *
+ *
+ * Copyright (C) 2006 Freescale - John Rigby
+ * Copyright (C) 2006 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __BESTCOMM_ATA_H__
+#define __BESTCOMM_ATA_H__
+
+
+struct bcom_ata_bd {
+	u32	status;
+	u32	src_pa;
+	u32	dst_pa;
+};
+
+extern struct bcom_task * bcom_ata_init(int queue_len, int maxbufsize);
+extern void bcom_ata_rx_prepare(struct bcom_task *tsk);
+extern void bcom_ata_tx_prepare(struct bcom_task *tsk);
+extern void bcom_ata_reset_bd(struct bcom_task *tsk);
+extern void bcom_ata_release(struct bcom_task *tsk);
+
+#endif /* __BESTCOMM_ATA_H__ */
+
diff --git a/include/linux/fsl/bestcomm/bestcomm.h b/include/linux/fsl/bestcomm/bestcomm.h
new file mode 100644
index 000000000000..a0e2e6b19b57
--- /dev/null
+++ b/include/linux/fsl/bestcomm/bestcomm.h
@@ -0,0 +1,213 @@
+/*
+ * Public header for the MPC52xx processor BestComm driver
+ *
+ *
+ * Copyright (C) 2006      Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2005      Varma Electronics Oy,
+ *                         ( by Andrey Volkov <avolkov@varma-el.com> )
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __BESTCOMM_H__
+#define __BESTCOMM_H__
+
+/**
+ * struct bcom_bd - Structure describing a generic BestComm buffer descriptor
+ * @status: The current status of this buffer. Exact meaning depends on the
+ *          task type
+ * @data: An array of u32 extra data.  Size of array is task dependent.
+ *
+ * Note: Don't dereference a bcom_bd pointer as an array.  The size of the
+ *       bcom_bd is variable.  Use bcom_get_bd() instead.
+ */
+struct bcom_bd {
+	u32	status;
+	u32	data[0];	/* variable payload size */
+};
+
+/* ======================================================================== */
+/* Generic task management                                                   */
+/* ======================================================================== */
+
+/**
+ * struct bcom_task - Structure describing a loaded BestComm task
+ *
+ * This structure is never built by the driver it self. It's built and
+ * filled the intermediate layer of the BestComm API, the task dependent
+ * support code.
+ *
+ * Most likely you don't need to poke around inside this structure. The
+ * fields are exposed in the header just for the sake of inline functions
+ */
+struct bcom_task {
+	unsigned int	tasknum;
+	unsigned int	flags;
+	int		irq;
+
+	struct bcom_bd	*bd;
+	phys_addr_t	bd_pa;
+	void		**cookie;
+	unsigned short	index;
+	unsigned short	outdex;
+	unsigned int	num_bd;
+	unsigned int	bd_size;
+
+	void*		priv;
+};
+
+#define BCOM_FLAGS_NONE         0x00000000ul
+#define BCOM_FLAGS_ENABLE_TASK  (1ul <<  0)
+
+/**
+ * bcom_enable - Enable a BestComm task
+ * @tsk: The BestComm task structure
+ *
+ * This function makes sure the given task is enabled and can be run
+ * by the BestComm engine as needed
+ */
+extern void bcom_enable(struct bcom_task *tsk);
+
+/**
+ * bcom_disable - Disable a BestComm task
+ * @tsk: The BestComm task structure
+ *
+ * This function disable a given task, making sure it's not executed
+ * by the BestComm engine.
+ */
+extern void bcom_disable(struct bcom_task *tsk);
+
+
+/**
+ * bcom_get_task_irq - Returns the irq number of a BestComm task
+ * @tsk: The BestComm task structure
+ */
+static inline int
+bcom_get_task_irq(struct bcom_task *tsk) {
+	return tsk->irq;
+}
+
+/* ======================================================================== */
+/* BD based tasks helpers                                                   */
+/* ======================================================================== */
+
+#define BCOM_BD_READY	0x40000000ul
+
+/** _bcom_next_index - Get next input index.
+ * @tsk: pointer to task structure
+ *
+ * Support function; Device drivers should not call this
+ */
+static inline int
+_bcom_next_index(struct bcom_task *tsk)
+{
+	return ((tsk->index + 1) == tsk->num_bd) ? 0 : tsk->index + 1;
+}
+
+/** _bcom_next_outdex - Get next output index.
+ * @tsk: pointer to task structure
+ *
+ * Support function; Device drivers should not call this
+ */
+static inline int
+_bcom_next_outdex(struct bcom_task *tsk)
+{
+	return ((tsk->outdex + 1) == tsk->num_bd) ? 0 : tsk->outdex + 1;
+}
+
+/**
+ * bcom_queue_empty - Checks if a BestComm task BD queue is empty
+ * @tsk: The BestComm task structure
+ */
+static inline int
+bcom_queue_empty(struct bcom_task *tsk)
+{
+	return tsk->index == tsk->outdex;
+}
+
+/**
+ * bcom_queue_full - Checks if a BestComm task BD queue is full
+ * @tsk: The BestComm task structure
+ */
+static inline int
+bcom_queue_full(struct bcom_task *tsk)
+{
+	return tsk->outdex == _bcom_next_index(tsk);
+}
+
+/**
+ * bcom_get_bd - Get a BD from the queue
+ * @tsk: The BestComm task structure
+ * index: Index of the BD to fetch
+ */
+static inline struct bcom_bd
+*bcom_get_bd(struct bcom_task *tsk, unsigned int index)
+{
+	/* A cast to (void*) so the address can be incremented by the
+	 * real size instead of by sizeof(struct bcom_bd) */
+	return ((void *)tsk->bd) + (index * tsk->bd_size);
+}
+
+/**
+ * bcom_buffer_done - Checks if a BestComm 
+ * @tsk: The BestComm task structure
+ */
+static inline int
+bcom_buffer_done(struct bcom_task *tsk)
+{
+	struct bcom_bd *bd;
+	if (bcom_queue_empty(tsk))
+		return 0;
+
+	bd = bcom_get_bd(tsk, tsk->outdex);
+	return !(bd->status & BCOM_BD_READY);
+}
+
+/**
+ * bcom_prepare_next_buffer - clear status of next available buffer.
+ * @tsk: The BestComm task structure
+ *
+ * Returns pointer to next buffer descriptor
+ */
+static inline struct bcom_bd *
+bcom_prepare_next_buffer(struct bcom_task *tsk)
+{
+	struct bcom_bd *bd;
+
+	bd = bcom_get_bd(tsk, tsk->index);
+	bd->status = 0;	/* cleanup last status */
+	return bd;
+}
+
+static inline void
+bcom_submit_next_buffer(struct bcom_task *tsk, void *cookie)
+{
+	struct bcom_bd *bd = bcom_get_bd(tsk, tsk->index);
+
+	tsk->cookie[tsk->index] = cookie;
+	mb();	/* ensure the bd is really up-to-date */
+	bd->status |= BCOM_BD_READY;
+	tsk->index = _bcom_next_index(tsk);
+	if (tsk->flags & BCOM_FLAGS_ENABLE_TASK)
+		bcom_enable(tsk);
+}
+
+static inline void *
+bcom_retrieve_buffer(struct bcom_task *tsk, u32 *p_status, struct bcom_bd **p_bd)
+{
+	void *cookie = tsk->cookie[tsk->outdex];
+	struct bcom_bd *bd = bcom_get_bd(tsk, tsk->outdex);
+
+	if (p_status)
+		*p_status = bd->status;
+	if (p_bd)
+		*p_bd = bd;
+	tsk->outdex = _bcom_next_outdex(tsk);
+	return cookie;
+}
+
+#endif /* __BESTCOMM_H__ */
diff --git a/include/linux/fsl/bestcomm/bestcomm_priv.h b/include/linux/fsl/bestcomm/bestcomm_priv.h
new file mode 100644
index 000000000000..3b52f3ffbdf8
--- /dev/null
+++ b/include/linux/fsl/bestcomm/bestcomm_priv.h
@@ -0,0 +1,350 @@
+/*
+ * Private header for the MPC52xx processor BestComm driver
+ *
+ * By private, we mean that driver should not use it directly. It's meant
+ * to be used by the BestComm engine driver itself and by the intermediate
+ * layer between the core and the drivers.
+ *
+ * Copyright (C) 2006      Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2005      Varma Electronics Oy,
+ *                         ( by Andrey Volkov <avolkov@varma-el.com> )
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __BESTCOMM_PRIV_H__
+#define __BESTCOMM_PRIV_H__
+
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <asm/io.h>
+#include <asm/mpc52xx.h>
+
+#include "sram.h"
+
+
+/* ======================================================================== */
+/* Engine related stuff                                                     */
+/* ======================================================================== */
+
+/* Zones sizes and needed alignments */
+#define BCOM_MAX_TASKS		16
+#define BCOM_MAX_VAR		24
+#define BCOM_MAX_INC		8
+#define BCOM_MAX_FDT		64
+#define BCOM_MAX_CTX		20
+#define BCOM_CTX_SIZE		(BCOM_MAX_CTX * sizeof(u32))
+#define BCOM_CTX_ALIGN		0x100
+#define BCOM_VAR_SIZE		(BCOM_MAX_VAR * sizeof(u32))
+#define BCOM_INC_SIZE		(BCOM_MAX_INC * sizeof(u32))
+#define BCOM_VAR_ALIGN		0x80
+#define BCOM_FDT_SIZE		(BCOM_MAX_FDT * sizeof(u32))
+#define BCOM_FDT_ALIGN		0x100
+
+/**
+ * struct bcom_tdt - Task Descriptor Table Entry
+ *
+ */
+struct bcom_tdt {
+	u32 start;
+	u32 stop;
+	u32 var;
+	u32 fdt;
+	u32 exec_status;	/* used internally by BestComm engine */
+	u32 mvtp;		/* used internally by BestComm engine */
+	u32 context;
+	u32 litbase;
+};
+
+/**
+ * struct bcom_engine
+ *
+ * This holds all info needed globaly to handle the engine
+ */
+struct bcom_engine {
+	struct device_node		*ofnode;
+	struct mpc52xx_sdma __iomem     *regs;
+	phys_addr_t                      regs_base;
+
+	struct bcom_tdt			*tdt;
+	u32				*ctx;
+	u32				*var;
+	u32				*fdt;
+
+	spinlock_t			lock;
+};
+
+extern struct bcom_engine *bcom_eng;
+
+
+/* ======================================================================== */
+/* Tasks related stuff                                                      */
+/* ======================================================================== */
+
+/* Tasks image header */
+#define BCOM_TASK_MAGIC		0x4243544B	/* 'BCTK' */
+
+struct bcom_task_header {
+	u32	magic;
+	u8	desc_size;	/* the size fields     */
+	u8	var_size;	/* are given in number */
+	u8	inc_size;	/* of 32-bits words    */
+	u8	first_var;
+	u8	reserved[8];
+};
+
+/* Descriptors structure & co */
+#define BCOM_DESC_NOP		0x000001f8
+#define BCOM_LCD_MASK		0x80000000
+#define BCOM_DRD_EXTENDED	0x40000000
+#define BCOM_DRD_INITIATOR_SHIFT	21
+
+/* Tasks pragma */
+#define BCOM_PRAGMA_BIT_RSV		7	/* reserved pragma bit */
+#define BCOM_PRAGMA_BIT_PRECISE_INC	6	/* increment 0=when possible, */
+						/*           1=iter end */
+#define BCOM_PRAGMA_BIT_RST_ERROR_NO	5	/* don't reset errors on */
+						/* task enable */
+#define BCOM_PRAGMA_BIT_PACK		4	/* pack data enable */
+#define BCOM_PRAGMA_BIT_INTEGER		3	/* data alignment */
+						/* 0=frac(msb), 1=int(lsb) */
+#define BCOM_PRAGMA_BIT_SPECREAD	2	/* XLB speculative read */
+#define BCOM_PRAGMA_BIT_CW		1	/* write line buffer enable */
+#define BCOM_PRAGMA_BIT_RL		0	/* read line buffer enable */
+
+	/* Looks like XLB speculative read generates XLB errors when a buffer
+	 * is at the end of the physical memory. i.e. when accessing the
+	 * lasts words, the engine tries to prefetch the next but there is no
+	 * next ...
+	 */
+#define BCOM_STD_PRAGMA		((0 << BCOM_PRAGMA_BIT_RSV)		| \
+				 (0 << BCOM_PRAGMA_BIT_PRECISE_INC)	| \
+				 (0 << BCOM_PRAGMA_BIT_RST_ERROR_NO)	| \
+				 (0 << BCOM_PRAGMA_BIT_PACK)		| \
+				 (0 << BCOM_PRAGMA_BIT_INTEGER)		| \
+				 (0 << BCOM_PRAGMA_BIT_SPECREAD)	| \
+				 (1 << BCOM_PRAGMA_BIT_CW)		| \
+				 (1 << BCOM_PRAGMA_BIT_RL))
+
+#define BCOM_PCI_PRAGMA		((0 << BCOM_PRAGMA_BIT_RSV)		| \
+				 (0 << BCOM_PRAGMA_BIT_PRECISE_INC)	| \
+				 (0 << BCOM_PRAGMA_BIT_RST_ERROR_NO)	| \
+				 (0 << BCOM_PRAGMA_BIT_PACK)		| \
+				 (1 << BCOM_PRAGMA_BIT_INTEGER)		| \
+				 (0 << BCOM_PRAGMA_BIT_SPECREAD)	| \
+				 (1 << BCOM_PRAGMA_BIT_CW)		| \
+				 (1 << BCOM_PRAGMA_BIT_RL))
+
+#define BCOM_ATA_PRAGMA		BCOM_STD_PRAGMA
+#define BCOM_CRC16_DP_0_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_CRC16_DP_1_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_FEC_RX_BD_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_FEC_TX_BD_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_DP_0_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_DP_1_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_DP_2_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_DP_3_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_DP_BD_0_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_DP_BD_1_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_RX_BD_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_TX_BD_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_GEN_LPC_PRAGMA	BCOM_STD_PRAGMA
+#define BCOM_PCI_RX_PRAGMA	BCOM_PCI_PRAGMA
+#define BCOM_PCI_TX_PRAGMA	BCOM_PCI_PRAGMA
+
+/* Initiators number */
+#define BCOM_INITIATOR_ALWAYS	 0
+#define BCOM_INITIATOR_SCTMR_0	 1
+#define BCOM_INITIATOR_SCTMR_1	 2
+#define BCOM_INITIATOR_FEC_RX	 3
+#define BCOM_INITIATOR_FEC_TX	 4
+#define BCOM_INITIATOR_ATA_RX	 5
+#define BCOM_INITIATOR_ATA_TX	 6
+#define BCOM_INITIATOR_SCPCI_RX	 7
+#define BCOM_INITIATOR_SCPCI_TX	 8
+#define BCOM_INITIATOR_PSC3_RX	 9
+#define BCOM_INITIATOR_PSC3_TX	10
+#define BCOM_INITIATOR_PSC2_RX	11
+#define BCOM_INITIATOR_PSC2_TX	12
+#define BCOM_INITIATOR_PSC1_RX	13
+#define BCOM_INITIATOR_PSC1_TX	14
+#define BCOM_INITIATOR_SCTMR_2	15
+#define BCOM_INITIATOR_SCLPC	16
+#define BCOM_INITIATOR_PSC5_RX	17
+#define BCOM_INITIATOR_PSC5_TX	18
+#define BCOM_INITIATOR_PSC4_RX	19
+#define BCOM_INITIATOR_PSC4_TX	20
+#define BCOM_INITIATOR_I2C2_RX	21
+#define BCOM_INITIATOR_I2C2_TX	22
+#define BCOM_INITIATOR_I2C1_RX	23
+#define BCOM_INITIATOR_I2C1_TX	24
+#define BCOM_INITIATOR_PSC6_RX	25
+#define BCOM_INITIATOR_PSC6_TX	26
+#define BCOM_INITIATOR_IRDA_RX	25
+#define BCOM_INITIATOR_IRDA_TX	26
+#define BCOM_INITIATOR_SCTMR_3	27
+#define BCOM_INITIATOR_SCTMR_4	28
+#define BCOM_INITIATOR_SCTMR_5	29
+#define BCOM_INITIATOR_SCTMR_6	30
+#define BCOM_INITIATOR_SCTMR_7	31
+
+/* Initiators priorities */
+#define BCOM_IPR_ALWAYS		7
+#define BCOM_IPR_SCTMR_0	2
+#define BCOM_IPR_SCTMR_1	2
+#define BCOM_IPR_FEC_RX		6
+#define BCOM_IPR_FEC_TX		5
+#define BCOM_IPR_ATA_RX		7
+#define BCOM_IPR_ATA_TX		7
+#define BCOM_IPR_SCPCI_RX	2
+#define BCOM_IPR_SCPCI_TX	2
+#define BCOM_IPR_PSC3_RX	2
+#define BCOM_IPR_PSC3_TX	2
+#define BCOM_IPR_PSC2_RX	2
+#define BCOM_IPR_PSC2_TX	2
+#define BCOM_IPR_PSC1_RX	2
+#define BCOM_IPR_PSC1_TX	2
+#define BCOM_IPR_SCTMR_2	2
+#define BCOM_IPR_SCLPC		2
+#define BCOM_IPR_PSC5_RX	2
+#define BCOM_IPR_PSC5_TX	2
+#define BCOM_IPR_PSC4_RX	2
+#define BCOM_IPR_PSC4_TX	2
+#define BCOM_IPR_I2C2_RX	2
+#define BCOM_IPR_I2C2_TX	2
+#define BCOM_IPR_I2C1_RX	2
+#define BCOM_IPR_I2C1_TX	2
+#define BCOM_IPR_PSC6_RX	2
+#define BCOM_IPR_PSC6_TX	2
+#define BCOM_IPR_IRDA_RX	2
+#define BCOM_IPR_IRDA_TX	2
+#define BCOM_IPR_SCTMR_3	2
+#define BCOM_IPR_SCTMR_4	2
+#define BCOM_IPR_SCTMR_5	2
+#define BCOM_IPR_SCTMR_6	2
+#define BCOM_IPR_SCTMR_7	2
+
+
+/* ======================================================================== */
+/* API                                                                      */
+/* ======================================================================== */
+
+extern struct bcom_task *bcom_task_alloc(int bd_count, int bd_size, int priv_size);
+extern void bcom_task_free(struct bcom_task *tsk);
+extern int bcom_load_image(int task, u32 *task_image);
+extern void bcom_set_initiator(int task, int initiator);
+
+
+#define TASK_ENABLE             0x8000
+
+/**
+ * bcom_disable_prefetch - Hook to disable bus prefetching
+ *
+ * ATA DMA and the original MPC5200 need this due to silicon bugs.  At the
+ * moment disabling prefetch is a one-way street.  There is no mechanism
+ * in place to turn prefetch back on after it has been disabled.  There is
+ * no reason it couldn't be done, it would just be more complex to implement.
+ */
+static inline void bcom_disable_prefetch(void)
+{
+	u16 regval;
+
+	regval = in_be16(&bcom_eng->regs->PtdCntrl);
+	out_be16(&bcom_eng->regs->PtdCntrl, regval | 1);
+};
+
+static inline void
+bcom_enable_task(int task)
+{
+        u16 reg;
+        reg = in_be16(&bcom_eng->regs->tcr[task]);
+        out_be16(&bcom_eng->regs->tcr[task],  reg | TASK_ENABLE);
+}
+
+static inline void
+bcom_disable_task(int task)
+{
+        u16 reg = in_be16(&bcom_eng->regs->tcr[task]);
+        out_be16(&bcom_eng->regs->tcr[task], reg & ~TASK_ENABLE);
+}
+
+
+static inline u32 *
+bcom_task_desc(int task)
+{
+	return bcom_sram_pa2va(bcom_eng->tdt[task].start);
+}
+
+static inline int
+bcom_task_num_descs(int task)
+{
+	return (bcom_eng->tdt[task].stop - bcom_eng->tdt[task].start)/sizeof(u32) + 1;
+}
+
+static inline u32 *
+bcom_task_var(int task)
+{
+	return bcom_sram_pa2va(bcom_eng->tdt[task].var);
+}
+
+static inline u32 *
+bcom_task_inc(int task)
+{
+	return &bcom_task_var(task)[BCOM_MAX_VAR];
+}
+
+
+static inline int
+bcom_drd_is_extended(u32 desc)
+{
+	return (desc) & BCOM_DRD_EXTENDED;
+}
+
+static inline int
+bcom_desc_is_drd(u32 desc)
+{
+	return !(desc & BCOM_LCD_MASK) && desc != BCOM_DESC_NOP;
+}
+
+static inline int
+bcom_desc_initiator(u32 desc)
+{
+	return (desc >> BCOM_DRD_INITIATOR_SHIFT) & 0x1f;
+}
+
+static inline void
+bcom_set_desc_initiator(u32 *desc, int initiator)
+{
+	*desc = (*desc & ~(0x1f << BCOM_DRD_INITIATOR_SHIFT)) |
+			((initiator & 0x1f) << BCOM_DRD_INITIATOR_SHIFT);
+}
+
+
+static inline void
+bcom_set_task_pragma(int task, int pragma)
+{
+	u32 *fdt = &bcom_eng->tdt[task].fdt;
+	*fdt = (*fdt & ~0xff) | pragma;
+}
+
+static inline void
+bcom_set_task_auto_start(int task, int next_task)
+{
+	u16 __iomem *tcr = &bcom_eng->regs->tcr[task];
+	out_be16(tcr, (in_be16(tcr) & ~0xff) | 0x00c0 | next_task);
+}
+
+static inline void
+bcom_set_tcr_initiator(int task, int initiator)
+{
+	u16 __iomem *tcr = &bcom_eng->regs->tcr[task];
+	out_be16(tcr, (in_be16(tcr) & ~0x1f00) | ((initiator & 0x1f) << 8));
+}
+
+
+#endif /* __BESTCOMM_PRIV_H__ */
+
diff --git a/include/linux/fsl/bestcomm/fec.h b/include/linux/fsl/bestcomm/fec.h
new file mode 100644
index 000000000000..ee565d94d503
--- /dev/null
+++ b/include/linux/fsl/bestcomm/fec.h
@@ -0,0 +1,61 @@
+/*
+ * Header for Bestcomm FEC tasks driver
+ *
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __BESTCOMM_FEC_H__
+#define __BESTCOMM_FEC_H__
+
+
+struct bcom_fec_bd {
+	u32	status;
+	u32	skb_pa;
+};
+
+#define BCOM_FEC_TX_BD_TFD	0x08000000ul	/* transmit frame done */
+#define BCOM_FEC_TX_BD_TC	0x04000000ul	/* transmit CRC */
+#define BCOM_FEC_TX_BD_ABC	0x02000000ul	/* append bad CRC */
+
+#define BCOM_FEC_RX_BD_L	0x08000000ul	/* buffer is last in frame */
+#define BCOM_FEC_RX_BD_BC	0x00800000ul	/* DA is broadcast */
+#define BCOM_FEC_RX_BD_MC	0x00400000ul	/* DA is multicast and not broadcast */
+#define BCOM_FEC_RX_BD_LG	0x00200000ul	/* Rx frame length violation */
+#define BCOM_FEC_RX_BD_NO	0x00100000ul	/* Rx non-octet aligned frame */
+#define BCOM_FEC_RX_BD_CR	0x00040000ul	/* Rx CRC error */
+#define BCOM_FEC_RX_BD_OV	0x00020000ul	/* overrun */
+#define BCOM_FEC_RX_BD_TR	0x00010000ul	/* Rx frame truncated */
+#define BCOM_FEC_RX_BD_LEN_MASK	0x000007fful	/* mask for length of received frame */
+#define BCOM_FEC_RX_BD_ERRORS	(BCOM_FEC_RX_BD_LG | BCOM_FEC_RX_BD_NO | \
+		BCOM_FEC_RX_BD_CR | BCOM_FEC_RX_BD_OV | BCOM_FEC_RX_BD_TR)
+
+
+extern struct bcom_task *
+bcom_fec_rx_init(int queue_len, phys_addr_t fifo, int maxbufsize);
+
+extern int
+bcom_fec_rx_reset(struct bcom_task *tsk);
+
+extern void
+bcom_fec_rx_release(struct bcom_task *tsk);
+
+
+extern struct bcom_task *
+bcom_fec_tx_init(int queue_len, phys_addr_t fifo);
+
+extern int
+bcom_fec_tx_reset(struct bcom_task *tsk);
+
+extern void
+bcom_fec_tx_release(struct bcom_task *tsk);
+
+
+#endif /* __BESTCOMM_FEC_H__ */
+
diff --git a/include/linux/fsl/bestcomm/gen_bd.h b/include/linux/fsl/bestcomm/gen_bd.h
new file mode 100644
index 000000000000..de47260e69da
--- /dev/null
+++ b/include/linux/fsl/bestcomm/gen_bd.h
@@ -0,0 +1,53 @@
+/*
+ * Header for Bestcomm General Buffer Descriptor tasks driver
+ *
+ *
+ * Copyright (C) 2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ *                    Jeff Gibbons <jeff.gibbons@appspec.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ *
+ */
+
+#ifndef __BESTCOMM_GEN_BD_H__
+#define __BESTCOMM_GEN_BD_H__
+
+struct bcom_gen_bd {
+	u32	status;
+	u32	buf_pa;
+};
+
+
+extern struct bcom_task *
+bcom_gen_bd_rx_init(int queue_len, phys_addr_t fifo,
+			int initiator, int ipr, int maxbufsize);
+
+extern int
+bcom_gen_bd_rx_reset(struct bcom_task *tsk);
+
+extern void
+bcom_gen_bd_rx_release(struct bcom_task *tsk);
+
+
+extern struct bcom_task *
+bcom_gen_bd_tx_init(int queue_len, phys_addr_t fifo,
+			int initiator, int ipr);
+
+extern int
+bcom_gen_bd_tx_reset(struct bcom_task *tsk);
+
+extern void
+bcom_gen_bd_tx_release(struct bcom_task *tsk);
+
+
+/* PSC support utility wrappers */
+struct bcom_task * bcom_psc_gen_bd_rx_init(unsigned psc_num, int queue_len,
+					   phys_addr_t fifo, int maxbufsize);
+struct bcom_task * bcom_psc_gen_bd_tx_init(unsigned psc_num, int queue_len,
+					   phys_addr_t fifo);
+#endif  /* __BESTCOMM_GEN_BD_H__ */
+
diff --git a/include/linux/fsl/bestcomm/sram.h b/include/linux/fsl/bestcomm/sram.h
new file mode 100644
index 000000000000..b6d668963cce
--- /dev/null
+++ b/include/linux/fsl/bestcomm/sram.h
@@ -0,0 +1,54 @@
+/*
+ * Handling of a sram zone for bestcomm
+ *
+ *
+ * Copyright (C) 2007 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __BESTCOMM_SRAM_H__
+#define __BESTCOMM_SRAM_H__
+
+#include <asm/rheap.h>
+#include <asm/mmu.h>
+#include <linux/spinlock.h>
+
+
+/* Structure used internally */
+	/* The internals are here for the inline functions
+	 * sake, certainly not for the user to mess with !
+	 */
+struct bcom_sram {
+	phys_addr_t		 base_phys;
+	void 			*base_virt;
+	unsigned int		 size;
+	rh_info_t		*rh;
+	spinlock_t		 lock;
+};
+
+extern struct bcom_sram *bcom_sram;
+
+
+/* Public API */
+extern int  bcom_sram_init(struct device_node *sram_node, char *owner);
+extern void bcom_sram_cleanup(void);
+
+extern void* bcom_sram_alloc(int size, int align, phys_addr_t *phys);
+extern void  bcom_sram_free(void *ptr);
+
+static inline phys_addr_t bcom_sram_va2pa(void *va) {
+	return bcom_sram->base_phys +
+		(unsigned long)(va - bcom_sram->base_virt);
+}
+
+static inline void *bcom_sram_pa2va(phys_addr_t pa) {
+	return bcom_sram->base_virt +
+		(unsigned long)(pa - bcom_sram->base_phys);
+}
+
+
+#endif  /* __BESTCOMM_SRAM_H__ */
+
diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c
index 9997c039bb24..2a847ca494b5 100644
--- a/sound/soc/fsl/mpc5200_dma.c
+++ b/sound/soc/fsl/mpc5200_dma.c
@@ -14,8 +14,8 @@
 
 #include <sound/soc.h>
 
-#include <sysdev/bestcomm/bestcomm.h>
-#include <sysdev/bestcomm/gen_bd.h>
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/gen_bd.h>
 #include <asm/mpc52xx_psc.h>
 
 #include "mpc5200_dma.h"
-- 
cgit v1.2.3


From 1d5210008bd3a26daf4b06aed9d6c330dd4c83e2 Mon Sep 17 00:00:00 2001
From: Lance Ortiz <lance.ortiz@hp.com>
Date: Thu, 3 Jan 2013 15:34:08 -0700
Subject: aerdrv: Enhanced AER logging

This patch will provide a more reliable and easy way for user-space
applications to have access to AER logs rather than reading them from the
message buffer. It also provides a way to notify user-space when an AER
event occurs.

The aer driver is updated to generate a trace event of function 'aer_event'
when a PCIe error is reported over the AER interface.  The trace event was
added to both the interrupt based aer path and the firmware first path.

Signed-off-by: Lance Ortiz <lance.ortiz@hp.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Boris Petkov <bp@alien8.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/apei/cper.c               | 19 ++++++++++++++++---
 drivers/pci/pcie/aer/aerdrv_errprint.c |  9 ++++++++-
 include/linux/aer.h                    |  4 ++--
 3 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index e6defd86b424..1e5d8a40101e 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
 #include <linux/time.h>
 #include <linux/cper.h>
 #include <linux/acpi.h>
+#include <linux/pci.h>
 #include <linux/aer.h>
 
 /*
@@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = {
 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 			    const struct acpi_hest_generic_data *gdata)
 {
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+	struct pci_dev *dev;
+#endif
+
 	if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 		printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
 		       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
@@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
-		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
-		cper_print_aer(pfx, gdata->error_severity, aer_regs);
+	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
+			pcie->device_id.bus, pcie->device_id.function);
+	if (!dev) {
+		pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
+			pcie->device_id.segment, pcie->device_id.bus,
+			pcie->device_id.slot, pcie->device_id.function);
+		return;
 	}
+	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO)
+		cper_print_aer(pfx, dev, gdata->error_severity,
+				(struct aer_capability_regs *) pcie->aer_info);
+	pci_dev_put(dev);
 #endif
 }
 
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 3ea51736f18d..d3e5fc5a2de9 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -23,6 +23,9 @@
 
 #include "aerdrv.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/ras.h>
+
 #define AER_AGENT_RECEIVER		0
 #define AER_AGENT_REQUESTER		1
 #define AER_AGENT_COMPLETER		2
@@ -194,6 +197,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 	if (info->id && info->error_dev_num > 1 && info->id == id)
 		printk("%s""  Error of this Agent(%04x) is reported first\n",
 			prefix, id);
+	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
+			info->severity);
 }
 
 void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -217,7 +222,7 @@ int cper_severity_to_aer(int cper_severity)
 }
 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 
-void cper_print_aer(const char *prefix, int cper_severity,
+void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity,
 		    struct aer_capability_regs *aer)
 {
 	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
@@ -259,5 +264,7 @@ void cper_print_aer(const char *prefix, int cper_severity,
 			*(tlp + 8), *(tlp + 15), *(tlp + 14),
 			*(tlp + 13), *(tlp + 12));
 	}
+	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
+			aer_severity);
 }
 #endif
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 544abdb2238c..ec10e1b24c1c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
 }
 #endif
 
-extern void cper_print_aer(const char *prefix, int cper_severity,
-			   struct aer_capability_regs *aer);
+extern void cper_print_aer(const char *prefix, struct pci_dev *dev,
+			   int cper_severity, struct aer_capability_regs *aer);
 extern int cper_severity_to_aer(int cper_severity);
 extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
 			      int severity);
-- 
cgit v1.2.3


From f568f6ca811fe681ecfd11c4ce78b6aa488020c0 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Dec 2012 15:02:05 -0800
Subject: pstore: remove __dev* attributes.

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit from the pstore filesystem.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: Anton Vorontsov <cbouatmailru@gmail.com>
Cc: Colin Cross <ccross@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pstore/ram.c            | 14 ++++++--------
 fs/pstore/ram_core.c       |  9 ++++-----
 include/linux/pstore_ram.h |  5 ++---
 3 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index f883e7e74305..7003e5266f25 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -291,9 +291,8 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
 	kfree(cxt->przs);
 }
 
-static int __devinit ramoops_init_przs(struct device *dev,
-				       struct ramoops_context *cxt,
-				       phys_addr_t *paddr, size_t dump_mem_sz)
+static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
+			     phys_addr_t *paddr, size_t dump_mem_sz)
 {
 	int err = -ENOMEM;
 	int i;
@@ -336,10 +335,9 @@ fail_prz:
 	return err;
 }
 
-static int __devinit ramoops_init_prz(struct device *dev,
-				      struct ramoops_context *cxt,
-				      struct persistent_ram_zone **prz,
-				      phys_addr_t *paddr, size_t sz, u32 sig)
+static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
+			    struct persistent_ram_zone **prz,
+			    phys_addr_t *paddr, size_t sz, u32 sig)
 {
 	if (!sz)
 		return 0;
@@ -367,7 +365,7 @@ static int __devinit ramoops_init_prz(struct device *dev,
 	return 0;
 }
 
-static int __devinit ramoops_probe(struct platform_device *pdev)
+static int ramoops_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ramoops_platform_data *pdata = pdev->dev.platform_data;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index eecd2a8a84dd..0306303be372 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -390,8 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
 	return 0;
 }
 
-static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz,
-					      u32 sig, int ecc_size)
+static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
+				    int ecc_size)
 {
 	int ret;
 
@@ -443,9 +443,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
 	kfree(prz);
 }
 
-struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start,
-							  size_t size, u32 sig,
-							  int ecc_size)
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+					       u32 sig, int ecc_size)
 {
 	struct persistent_ram_zone *prz;
 	int ret = -ENOMEM;
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 098d2a838296..cb6ab5feab67 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -46,9 +46,8 @@ struct persistent_ram_zone {
 	size_t old_log_size;
 };
 
-struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start,
-							  size_t size, u32 sig,
-							  int ecc_size);
+struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
+					       u32 sig, int ecc_size);
 void persistent_ram_free(struct persistent_ram_zone *prz);
 void persistent_ram_zap(struct persistent_ram_zone *prz);
 
-- 
cgit v1.2.3


From 0f58a01ddd5e8177255705ba15e64c3b74d67993 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Dec 2012 15:12:59 -0800
Subject: Drivers: bcma: remove __dev* attributes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit, __devexit_p, and __devexit
from these drivers.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: "Rafał Miłecki" <zajec5@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bcma/bcma_private.h               |  6 +++---
 drivers/bcma/driver_gmac_cmn.c            |  2 +-
 drivers/bcma/driver_pci.c                 |  4 ++--
 drivers/bcma/driver_pci_host.c            | 13 ++++++-------
 drivers/bcma/host_pci.c                   |  8 ++++----
 drivers/bcma/main.c                       |  2 +-
 include/linux/bcma/bcma_driver_gmac_cmn.h |  2 +-
 include/linux/bcma/bcma_driver_pci.h      |  2 +-
 8 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 4a2d72ec6d43..19e3fbfd5757 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -22,7 +22,7 @@
 struct bcma_bus;
 
 /* main.c */
-int __devinit bcma_bus_register(struct bcma_bus *bus);
+int bcma_bus_register(struct bcma_bus *bus);
 void bcma_bus_unregister(struct bcma_bus *bus);
 int __init bcma_bus_early_register(struct bcma_bus *bus,
 				   struct bcma_device *core_cc,
@@ -87,8 +87,8 @@ u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
 extern int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc);
 
 #ifdef CONFIG_BCMA_DRIVER_PCI_HOSTMODE
-bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc);
-void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc);
+bool bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc);
+void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc);
 #endif /* CONFIG_BCMA_DRIVER_PCI_HOSTMODE */
 
 #ifdef CONFIG_BCMA_DRIVER_GPIO
diff --git a/drivers/bcma/driver_gmac_cmn.c b/drivers/bcma/driver_gmac_cmn.c
index 834225f65e8f..dcb137926d31 100644
--- a/drivers/bcma/driver_gmac_cmn.c
+++ b/drivers/bcma/driver_gmac_cmn.c
@@ -8,7 +8,7 @@
 #include "bcma_private.h"
 #include <linux/bcma/bcma.h>
 
-void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc)
+void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc)
 {
 	mutex_init(&gc->phy_mutex);
 }
diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c
index c39ee6d45850..cf7a476a519f 100644
--- a/drivers/bcma/driver_pci.c
+++ b/drivers/bcma/driver_pci.c
@@ -207,14 +207,14 @@ static void bcma_core_pci_config_fixup(struct bcma_drv_pci *pc)
  * Init.
  **************************************************/
 
-static void __devinit bcma_core_pci_clientmode_init(struct bcma_drv_pci *pc)
+static void bcma_core_pci_clientmode_init(struct bcma_drv_pci *pc)
 {
 	bcma_core_pci_fixcfg(pc);
 	bcma_pcicore_serdes_workaround(pc);
 	bcma_core_pci_config_fixup(pc);
 }
 
-void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc)
+void bcma_core_pci_init(struct bcma_drv_pci *pc)
 {
 	if (pc->setup_done)
 		return;
diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index e6b5c89469dc..af0c9fabee54 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -24,7 +24,7 @@
 #define BCMA_PCI_SLOT_MAX	16
 #define	PCI_CONFIG_SPACE_SIZE	256
 
-bool __devinit bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc)
+bool bcma_core_pci_is_in_hostmode(struct bcma_drv_pci *pc)
 {
 	struct bcma_bus *bus = pc->core->bus;
 	u16 chipid_top;
@@ -264,10 +264,9 @@ static int bcma_core_pci_hostmode_write_config(struct pci_bus *bus,
 }
 
 /* return cap_offset if requested capability exists in the PCI config space */
-static u8 __devinit bcma_find_pci_capability(struct bcma_drv_pci *pc,
-					     unsigned int dev,
-					     unsigned int func, u8 req_cap_id,
-					     unsigned char *buf, u32 *buflen)
+static u8 bcma_find_pci_capability(struct bcma_drv_pci *pc, unsigned int dev,
+				   unsigned int func, u8 req_cap_id,
+				   unsigned char *buf, u32 *buflen)
 {
 	u8 cap_id;
 	u8 cap_ptr = 0;
@@ -334,7 +333,7 @@ static u8 __devinit bcma_find_pci_capability(struct bcma_drv_pci *pc,
  * Retry Status (CRS) Completion Status to software then
  * enable the feature.
  */
-static void __devinit bcma_core_pci_enable_crs(struct bcma_drv_pci *pc)
+static void bcma_core_pci_enable_crs(struct bcma_drv_pci *pc)
 {
 	struct bcma_bus *bus = pc->core->bus;
 	u8 cap_ptr, root_ctrl, root_cap, dev;
@@ -381,7 +380,7 @@ static void __devinit bcma_core_pci_enable_crs(struct bcma_drv_pci *pc)
 	}
 }
 
-void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
+void bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 {
 	struct bcma_bus *bus = pc->core->bus;
 	struct bcma_drv_pci_host *pc_host;
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index 98fdc3e014e7..fbf2759e7e4e 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -155,8 +155,8 @@ static const struct bcma_host_ops bcma_host_pci_ops = {
 	.awrite32	= bcma_host_pci_awrite32,
 };
 
-static int __devinit bcma_host_pci_probe(struct pci_dev *dev,
-					 const struct pci_device_id *id)
+static int bcma_host_pci_probe(struct pci_dev *dev,
+			       const struct pci_device_id *id)
 {
 	struct bcma_bus *bus;
 	int err = -ENOMEM;
@@ -226,7 +226,7 @@ err_kfree_bus:
 	return err;
 }
 
-static void __devexit bcma_host_pci_remove(struct pci_dev *dev)
+static void bcma_host_pci_remove(struct pci_dev *dev)
 {
 	struct bcma_bus *bus = pci_get_drvdata(dev);
 
@@ -284,7 +284,7 @@ static struct pci_driver bcma_pci_bridge_driver = {
 	.name = "bcma-pci-bridge",
 	.id_table = bcma_pci_bridge_tbl,
 	.probe = bcma_host_pci_probe,
-	.remove = __devexit_p(bcma_host_pci_remove),
+	.remove = bcma_host_pci_remove,
 	.driver.pm = BCMA_PM_OPS,
 };
 
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 53ba20ca17e0..4a92f647b58b 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -192,7 +192,7 @@ static void bcma_unregister_cores(struct bcma_bus *bus)
 		platform_device_unregister(bus->drv_cc.watchdog);
 }
 
-int __devinit bcma_bus_register(struct bcma_bus *bus)
+int bcma_bus_register(struct bcma_bus *bus)
 {
 	int err;
 	struct bcma_device *core;
diff --git a/include/linux/bcma/bcma_driver_gmac_cmn.h b/include/linux/bcma/bcma_driver_gmac_cmn.h
index def894b83b0d..4dd1f33e36a2 100644
--- a/include/linux/bcma/bcma_driver_gmac_cmn.h
+++ b/include/linux/bcma/bcma_driver_gmac_cmn.h
@@ -92,7 +92,7 @@ struct bcma_drv_gmac_cmn {
 #define gmac_cmn_write32(gc, offset, val)	bcma_write32((gc)->core, offset, val)
 
 #ifdef CONFIG_BCMA_DRIVER_GMAC_CMN
-extern void __devinit bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc);
+extern void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc);
 #else
 static inline void bcma_core_gmac_cmn_init(struct bcma_drv_gmac_cmn *gc) { }
 #endif
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 41da581e1612..c48d98d27b77 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -214,7 +214,7 @@ struct bcma_drv_pci {
 #define pcicore_write16(pc, offset, val)	bcma_write16((pc)->core, offset, val)
 #define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
 
-extern void __devinit bcma_core_pci_init(struct bcma_drv_pci *pc);
+extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
 extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc,
 				 struct bcma_device *core, bool enable);
 extern void bcma_core_pci_extend_L1timer(struct bcma_drv_pci *pc, bool extend);
-- 
cgit v1.2.3


From e389623a68622e3c9be440ab522fac1aa1ca3454 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Dec 2012 15:15:49 -0800
Subject: include: remove __dev* attributes.

CONFIG_HOTPLUG is going away as an option.  As a result, the __dev*
markings need to be removed.

This change removes the use of __devinit from some include files that
were previously missed.

Based on patches originally written by Bill Pemberton, but redone by me
in order to handle some of the coding style issues better, by hand.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/parport.h |  4 ++--
 include/linux/ata_platform.h  | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/parport.h b/include/asm-generic/parport.h
index 40528cb977e8..2c9f9d4336ca 100644
--- a/include/asm-generic/parport.h
+++ b/include/asm-generic/parport.h
@@ -10,8 +10,8 @@
  * to devices on the PCI bus.
  */
 
-static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
-static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
+static int parport_pc_find_isa_ports(int autoirq, int autodma);
+static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
 {
 #ifdef CONFIG_ISA
 	return parport_pc_find_isa_ports(autoirq, autodma);
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index fe9989636b62..b9fde17f767c 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -15,12 +15,12 @@ struct pata_platform_info {
 	unsigned int irq_flags;
 };
 
-extern int __devinit __pata_platform_probe(struct device *dev,
-					   struct resource *io_res,
-					   struct resource *ctl_res,
-					   struct resource *irq_res,
-					   unsigned int ioport_shift,
-					   int __pio_mask);
+extern int __pata_platform_probe(struct device *dev,
+				 struct resource *io_res,
+				 struct resource *ctl_res,
+				 struct resource *irq_res,
+				 unsigned int ioport_shift,
+				 int __pio_mask);
 
 /*
  * Marvell SATA private data
-- 
cgit v1.2.3


From e41b2d7fe7803e85e1202d0eb172717d7bf1bbaf Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 1 Jan 2013 03:30:15 +0000
Subject: net: set dev->addr_assign_type correctly

Not a bitfield, but a plain value.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +-
 drivers/net/ethernet/atheros/atlx/atl1.c        | 2 +-
 drivers/net/ethernet/ethoc.c                    | 2 +-
 drivers/net/ethernet/lantiq_etop.c              | 2 +-
 include/linux/etherdevice.h                     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 56d3f697e0c7..17651c779680 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2540,7 +2540,7 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	if (atl1c_read_mac_addr(&adapter->hw)) {
 		/* got a random MAC address, set NET_ADDR_RANDOM to netdev */
-		netdev->addr_assign_type |= NET_ADDR_RANDOM;
+		netdev->addr_assign_type = NET_ADDR_RANDOM;
 	}
 	memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
 	memcpy(netdev->perm_addr, adapter->hw.mac_addr, netdev->addr_len);
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 71b3d7daa21d..5b0d9931c720 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -3053,7 +3053,7 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* copy the MAC address out of the EEPROM */
 	if (atl1_read_mac_addr(&adapter->hw)) {
 		/* mark random mac */
-		netdev->addr_assign_type |= NET_ADDR_RANDOM;
+		netdev->addr_assign_type = NET_ADDR_RANDOM;
 	}
 	memcpy(netdev->dev_addr, adapter->hw.mac_addr, netdev->addr_len);
 
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 8db1c06008de..f380bb7653dd 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1068,7 +1068,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	}
 
 	if (random_mac)
-		netdev->addr_assign_type |= NET_ADDR_RANDOM;
+		netdev->addr_assign_type = NET_ADDR_RANDOM;
 
 	/* register MII bus */
 	priv->mdio = mdiobus_alloc();
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index c124e67a1a1c..cd3d2c09cdd0 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -655,7 +655,7 @@ ltq_etop_init(struct net_device *dev)
 
 	/* Set addr_assign_type here, ltq_etop_set_mac_address would reset it. */
 	if (random_mac)
-		dev->addr_assign_type |= NET_ADDR_RANDOM;
+		dev->addr_assign_type = NET_ADDR_RANDOM;
 
 	ltq_etop_set_multicast_list(dev);
 	err = ltq_etop_mdio_init(dev);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 243eea1e33d8..1a43e1b4f7ad 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -192,7 +192,7 @@ static inline void eth_zero_addr(u8 *addr)
  */
 static inline void eth_hw_addr_random(struct net_device *dev)
 {
-	dev->addr_assign_type |= NET_ADDR_RANDOM;
+	dev->addr_assign_type = NET_ADDR_RANDOM;
 	eth_random_addr(dev->dev_addr);
 }
 
-- 
cgit v1.2.3


From fbdeca2d7753aa1ab929aeb77ccc46489eed02b9 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 1 Jan 2013 03:30:16 +0000
Subject: net: add address assign type "SET"

This is the way to indicate that mac address of a device has been set by
dev_set_mac_address()

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/core/dev.c            | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6835b5837f93..c5031a45e185 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -64,6 +64,8 @@ struct wireless_dev;
 #define NET_ADDR_PERM		0	/* address is permanent (default) */
 #define NET_ADDR_RANDOM		1	/* address is generated randomly */
 #define NET_ADDR_STOLEN		2	/* address is stolen from other device */
+#define NET_ADDR_SET		3	/* address is set using
+					 * dev_set_mac_address() */
 
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0	/* keep 'em coming, baby */
diff --git a/net/core/dev.c b/net/core/dev.c
index c85e32b30f04..bddb2f2ccaa9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5022,6 +5022,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 	err = ops->ndo_set_mac_address(dev, sa);
 	if (err)
 		return err;
+	dev->addr_assign_type = NET_ADDR_SET;
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
 	return 0;
-- 
cgit v1.2.3


From d069015e268bcac6c8bd8997b3235f5f977d3ab6 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Mon, 19 Nov 2012 15:33:51 +0800
Subject: Introduce THERMAL_TREND_RAISE_FULL and THERMAL_TREND_DROP_FULL

These two new thermal_trend types are used to tell the governor
that the temeprature is raising/dropping quickly.

Thermal cooling governors should handle this situation and make
proper decisions, e.g. set cooling state to upper/lower limit directly
instead of one step each time.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 include/linux/thermal.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index fe82022478e7..883bcda7e1e4 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -74,6 +74,8 @@ enum thermal_trend {
 	THERMAL_TREND_STABLE, /* temperature is stable */
 	THERMAL_TREND_RAISING, /* temperature is raising */
 	THERMAL_TREND_DROPPING, /* temperature is dropping */
+	THERMAL_TREND_RAISE_FULL, /* apply highest cooling action */
+	THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */
 };
 
 /* Events supported by Thermal Netlink */
-- 
cgit v1.2.3


From 9442490a02867088bcd5ed6231fa3b35a733c2b8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 4 Jan 2013 16:35:07 +0000
Subject: regmap: irq: Support wake IRQ mask inversion

Support devices which have an enable rather than mask register for wake
sources.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap-irq.c | 21 +++++++++++++++++----
 include/linux/regmap.h           |  1 +
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index be5579964be0..4706c63d0bc6 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -92,8 +92,14 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		reg = d->chip->wake_base +
 			(i * map->reg_stride * d->irq_reg_stride);
 		if (d->wake_buf) {
-			ret = regmap_update_bits(d->map, reg,
-					 d->mask_buf_def[i], d->wake_buf[i]);
+			if (d->chip->wake_invert)
+				ret = regmap_update_bits(d->map, reg,
+							 d->mask_buf_def[i],
+							 ~d->wake_buf[i]);
+			else
+				ret = regmap_update_bits(d->map, reg,
+							 d->mask_buf_def[i],
+							 d->wake_buf[i]);
 			if (ret != 0)
 				dev_err(d->map->dev,
 					"Failed to sync wakes in %x: %d\n",
@@ -419,8 +425,15 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 			d->wake_buf[i] = d->mask_buf_def[i];
 			reg = chip->wake_base +
 				(i * map->reg_stride * d->irq_reg_stride);
-			ret = regmap_update_bits(map, reg, d->wake_buf[i],
-						 d->wake_buf[i]);
+
+			if (chip->wake_invert)
+				ret = regmap_update_bits(map, reg,
+							 d->mask_buf_def[i],
+							 0);
+			else
+				ret = regmap_update_bits(map, reg,
+							 d->mask_buf_def[i],
+							 d->wake_buf[i]);
 			if (ret != 0) {
 				dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
 					reg, ret);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..8c0b50df3d59 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -381,6 +381,7 @@ struct regmap_irq_chip {
 	unsigned int wake_base;
 	unsigned int irq_reg_stride;
 	unsigned int mask_invert;
+	unsigned int wake_invert;
 	bool runtime_pm;
 
 	int num_regs;
-- 
cgit v1.2.3


From 9ff162a8b96c96238773972e26288a366e403b0c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 3 Jan 2013 22:48:49 +0000
Subject: net: introduce upper device lists

This lists are supposed to serve for storing pointers to all upper devices.
Eventually it will replace dev->master pointer which is used for
bonding, bridge, team but it cannot be used for vlan, macvlan where
there might be multiple upper present. In case the upper link is
replacement for dev->master, it is marked with "master" flag.

New upper device list resolves this limitation. Also, the information
stored in lists is used for preventing looping setups like
"bond->somethingelse->samebond"

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  14 +++
 net/core/dev.c            | 239 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 249 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c5031a45e185..e324601f48e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1174,6 +1174,8 @@ struct net_device {
 					  * which this device is member of.
 					  */
 
+	struct list_head	upper_dev_list; /* List of upper devices */
+
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		*dev_addr;	/* hw address, (before bcast
 						   because most packets are
@@ -2636,6 +2638,18 @@ extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		bpf_jit_enable;
+
+extern bool netdev_has_upper_dev(struct net_device *dev,
+				 struct net_device *upper_dev);
+extern bool netdev_has_any_upper_dev(struct net_device *dev);
+extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
+extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
+extern int netdev_upper_dev_link(struct net_device *dev,
+				 struct net_device *upper_dev);
+extern int netdev_master_upper_dev_link(struct net_device *dev,
+					struct net_device *upper_dev);
+extern void netdev_upper_dev_unlink(struct net_device *dev,
+				    struct net_device *upper_dev);
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int netdev_set_bond_master(struct net_device *dev,
 				  struct net_device *master);
diff --git a/net/core/dev.c b/net/core/dev.c
index bddb2f2ccaa9..53a9fefbc9af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4600,6 +4600,232 @@ static int __init dev_proc_init(void)
 #endif	/* CONFIG_PROC_FS */
 
 
+struct netdev_upper {
+	struct net_device *dev;
+	bool master;
+	struct list_head list;
+	struct rcu_head rcu;
+	struct list_head search_list;
+};
+
+static void __append_search_uppers(struct list_head *search_list,
+				   struct net_device *dev)
+{
+	struct netdev_upper *upper;
+
+	list_for_each_entry(upper, &dev->upper_dev_list, list) {
+		/* check if this upper is not already in search list */
+		if (list_empty(&upper->search_list))
+			list_add_tail(&upper->search_list, search_list);
+	}
+}
+
+static bool __netdev_search_upper_dev(struct net_device *dev,
+				      struct net_device *upper_dev)
+{
+	LIST_HEAD(search_list);
+	struct netdev_upper *upper;
+	struct netdev_upper *tmp;
+	bool ret = false;
+
+	__append_search_uppers(&search_list, dev);
+	list_for_each_entry(upper, &search_list, search_list) {
+		if (upper->dev == upper_dev) {
+			ret = true;
+			break;
+		}
+		__append_search_uppers(&search_list, upper->dev);
+	}
+	list_for_each_entry_safe(upper, tmp, &search_list, search_list)
+		INIT_LIST_HEAD(&upper->search_list);
+	return ret;
+}
+
+static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
+						struct net_device *upper_dev)
+{
+	struct netdev_upper *upper;
+
+	list_for_each_entry(upper, &dev->upper_dev_list, list) {
+		if (upper->dev == upper_dev)
+			return upper;
+	}
+	return NULL;
+}
+
+/**
+ * netdev_has_upper_dev - Check if device is linked to an upper device
+ * @dev: device
+ * @upper_dev: upper device to check
+ *
+ * Find out if a device is linked to specified upper device and return true
+ * in case it is. Note that this checks only immediate upper device,
+ * not through a complete stack of devices. The caller must hold the RTNL lock.
+ */
+bool netdev_has_upper_dev(struct net_device *dev,
+			  struct net_device *upper_dev)
+{
+	ASSERT_RTNL();
+
+	return __netdev_find_upper(dev, upper_dev);
+}
+EXPORT_SYMBOL(netdev_has_upper_dev);
+
+/**
+ * netdev_has_any_upper_dev - Check if device is linked to some device
+ * @dev: device
+ *
+ * Find out if a device is linked to an upper device and return true in case
+ * it is. The caller must hold the RTNL lock.
+ */
+bool netdev_has_any_upper_dev(struct net_device *dev)
+{
+	ASSERT_RTNL();
+
+	return !list_empty(&dev->upper_dev_list);
+}
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
+
+/**
+ * netdev_master_upper_dev_get - Get master upper device
+ * @dev: device
+ *
+ * Find a master upper device and return pointer to it or NULL in case
+ * it's not there. The caller must hold the RTNL lock.
+ */
+struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
+{
+	struct netdev_upper *upper;
+
+	ASSERT_RTNL();
+
+	if (list_empty(&dev->upper_dev_list))
+		return NULL;
+
+	upper = list_first_entry(&dev->upper_dev_list,
+				 struct netdev_upper, list);
+	if (likely(upper->master))
+		return upper->dev;
+	return NULL;
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_get);
+
+/**
+ * netdev_master_upper_dev_get_rcu - Get master upper device
+ * @dev: device
+ *
+ * Find a master upper device and return pointer to it or NULL in case
+ * it's not there. The caller must hold the RCU read lock.
+ */
+struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
+{
+	struct netdev_upper *upper;
+
+	upper = list_first_or_null_rcu(&dev->upper_dev_list,
+				       struct netdev_upper, list);
+	if (upper && likely(upper->master))
+		return upper->dev;
+	return NULL;
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
+
+static int __netdev_upper_dev_link(struct net_device *dev,
+				   struct net_device *upper_dev, bool master)
+{
+	struct netdev_upper *upper;
+
+	ASSERT_RTNL();
+
+	if (dev == upper_dev)
+		return -EBUSY;
+
+	/* To prevent loops, check if dev is not upper device to upper_dev. */
+	if (__netdev_search_upper_dev(upper_dev, dev))
+		return -EBUSY;
+
+	if (__netdev_find_upper(dev, upper_dev))
+		return -EEXIST;
+
+	if (master && netdev_master_upper_dev_get(dev))
+		return -EBUSY;
+
+	upper = kmalloc(sizeof(*upper), GFP_KERNEL);
+	if (!upper)
+		return -ENOMEM;
+
+	upper->dev = upper_dev;
+	upper->master = master;
+	INIT_LIST_HEAD(&upper->search_list);
+
+	/* Ensure that master upper link is always the first item in list. */
+	if (master)
+		list_add_rcu(&upper->list, &dev->upper_dev_list);
+	else
+		list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
+	dev_hold(upper_dev);
+
+	return 0;
+}
+
+/**
+ * netdev_upper_dev_link - Add a link to the upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Adds a link to device which is upper to this one. The caller must hold
+ * the RTNL lock. On a failure a negative errno code is returned.
+ * On success the reference counts are adjusted and the function
+ * returns zero.
+ */
+int netdev_upper_dev_link(struct net_device *dev,
+			  struct net_device *upper_dev)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, false);
+}
+EXPORT_SYMBOL(netdev_upper_dev_link);
+
+/**
+ * netdev_master_upper_dev_link - Add a master link to the upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Adds a link to device which is upper to this one. In this case, only
+ * one master upper device can be linked, although other non-master devices
+ * might be linked as well. The caller must hold the RTNL lock.
+ * On a failure a negative errno code is returned. On success the reference
+ * counts are adjusted and the function returns zero.
+ */
+int netdev_master_upper_dev_link(struct net_device *dev,
+				 struct net_device *upper_dev)
+{
+	return __netdev_upper_dev_link(dev, upper_dev, true);
+}
+EXPORT_SYMBOL(netdev_master_upper_dev_link);
+
+/**
+ * netdev_upper_dev_unlink - Removes a link to upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Removes a link to device which is upper to this one. The caller must hold
+ * the RTNL lock.
+ */
+void netdev_upper_dev_unlink(struct net_device *dev,
+			     struct net_device *upper_dev)
+{
+	struct netdev_upper *upper;
+
+	ASSERT_RTNL();
+
+	upper = __netdev_find_upper(dev, upper_dev);
+	if (!upper)
+		return;
+	list_del_rcu(&upper->list);
+	dev_put(upper_dev);
+	kfree_rcu(upper, rcu);
+}
+EXPORT_SYMBOL(netdev_upper_dev_unlink);
+
 /**
  *	netdev_set_master	-	set up master pointer
  *	@slave: slave device
@@ -4613,19 +4839,23 @@ static int __init dev_proc_init(void)
 int netdev_set_master(struct net_device *slave, struct net_device *master)
 {
 	struct net_device *old = slave->master;
+	int err;
 
 	ASSERT_RTNL();
 
 	if (master) {
 		if (old)
 			return -EBUSY;
-		dev_hold(master);
+		err = netdev_master_upper_dev_link(slave, master);
+		if (err)
+			return err;
 	}
 
 	slave->master = master;
 
 	if (old)
-		dev_put(old);
+		netdev_upper_dev_unlink(slave, master);
+
 	return 0;
 }
 EXPORT_SYMBOL(netdev_set_master);
@@ -5503,8 +5733,8 @@ static void rollback_registered_many(struct list_head *head)
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 
-		/* Notifier chain MUST detach us from master device. */
-		WARN_ON(dev->master);
+		/* Notifier chain MUST detach us all upper devices. */
+		WARN_ON(netdev_has_any_upper_dev(dev));
 
 		/* Remove entries from kobject tree */
 		netdev_unregister_kobject(dev);
@@ -6212,6 +6442,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
 	INIT_LIST_HEAD(&dev->link_watch_list);
+	INIT_LIST_HEAD(&dev->upper_dev_list);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
 	setup(dev);
 
-- 
cgit v1.2.3


From 8b98a70c28a607a02b3c3d41bc9a4c141f421052 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 3 Jan 2013 22:49:02 +0000
Subject: net: remove no longer used netdev_set_bond_master() and
 netdev_set_master()

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 +----
 net/core/dev.c            | 63 -----------------------------------------------
 2 files changed, 1 insertion(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e324601f48e8..3cad8eab02b6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -858,8 +858,7 @@ struct netdev_fcoe_hbainfo {
  *	flow_id is a flow ID to be passed to rps_may_expire_flow() later.
  *	Return the filter ID on success, or a negative error code.
  *
- *	Slave management functions (for bridge, bonding, etc). User should
- *	call netdev_set_master() to set dev->master properly.
+ *	Slave management functions (for bridge, bonding, etc).
  * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev);
  *	Called to make another netdev an underling.
  *
@@ -2650,9 +2649,6 @@ extern int netdev_master_upper_dev_link(struct net_device *dev,
 					struct net_device *upper_dev);
 extern void netdev_upper_dev_unlink(struct net_device *dev,
 				    struct net_device *upper_dev);
-extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
-extern int netdev_set_bond_master(struct net_device *dev,
-				  struct net_device *master);
 extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features);
diff --git a/net/core/dev.c b/net/core/dev.c
index 53a9fefbc9af..a51ccf46e8b7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4826,69 +4826,6 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
-/**
- *	netdev_set_master	-	set up master pointer
- *	@slave: slave device
- *	@master: new master device
- *
- *	Changes the master device of the slave. Pass %NULL to break the
- *	bonding. The caller must hold the RTNL semaphore. On a failure
- *	a negative errno code is returned. On success the reference counts
- *	are adjusted and the function returns zero.
- */
-int netdev_set_master(struct net_device *slave, struct net_device *master)
-{
-	struct net_device *old = slave->master;
-	int err;
-
-	ASSERT_RTNL();
-
-	if (master) {
-		if (old)
-			return -EBUSY;
-		err = netdev_master_upper_dev_link(slave, master);
-		if (err)
-			return err;
-	}
-
-	slave->master = master;
-
-	if (old)
-		netdev_upper_dev_unlink(slave, master);
-
-	return 0;
-}
-EXPORT_SYMBOL(netdev_set_master);
-
-/**
- *	netdev_set_bond_master	-	set up bonding master/slave pair
- *	@slave: slave device
- *	@master: new master device
- *
- *	Changes the master device of the slave. Pass %NULL to break the
- *	bonding. The caller must hold the RTNL semaphore. On a failure
- *	a negative errno code is returned. On success %RTM_NEWLINK is sent
- *	to the routing socket and the function returns zero.
- */
-int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	err = netdev_set_master(slave, master);
-	if (err)
-		return err;
-	if (master)
-		slave->flags |= IFF_SLAVE;
-	else
-		slave->flags &= ~IFF_SLAVE;
-
-	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
-	return 0;
-}
-EXPORT_SYMBOL(netdev_set_bond_master);
-
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-- 
cgit v1.2.3


From 85464ef271a0f5496f404c6a2f2dfbf1d76e1a49 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 3 Jan 2013 22:49:03 +0000
Subject: net: kill dev->master

Nobody uses this now. Remove it.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3cad8eab02b6..0209ac328e8a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1169,10 +1169,6 @@ struct net_device {
 						 * avoid dirtying this cache line.
 						 */
 
-	struct net_device	*master; /* Pointer to master device of a group,
-					  * which this device is member of.
-					  */
-
 	struct list_head	upper_dev_list; /* List of upper devices */
 
 	/* Interface address info used in eth_type_trans() */
-- 
cgit v1.2.3


From 03f595668017f1a1fb971c02fc37140bc6e7bb1c Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 4 Jan 2013 15:34:50 -0800
Subject: ipc: add sysctl to specify desired next object id

Add 3 new variables and sysctls to tune them (by one "next_id" variable
for messages, semaphores and shared memory respectively).  This variable
can be used to set desired id for next allocated IPC object.  By default
it's equal to -1 and old behaviour is preserved.  If this variable is
non-negative, then desired idr will be extracted from it and used as a
start value to search for free IDR slot.

Notes:

1) this patch doesn't guarantee that the new object will have desired
   id.  So it's up to user space how to handle new object with wrong id.

2) After a sucessful id allocation attempt, "next_id" will be set back
   to -1 (if it was non-negative).

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/kernel.txt | 19 +++++++++++++++++++
 include/linux/ipc_namespace.h   |  1 +
 ipc/ipc_sysctl.c                | 32 ++++++++++++++++++++++++++++++++
 ipc/util.c                      | 16 ++++++++++++----
 ipc/util.h                      |  1 +
 5 files changed, 65 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 2907ba6c3607..51b953a1b149 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -38,6 +38,7 @@ show up in /proc/sys/kernel:
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
 - modules_disabled
+- msg_next_id		      [ sysv ipc ]
 - msgmax
 - msgmnb
 - msgmni
@@ -62,7 +63,9 @@ show up in /proc/sys/kernel:
 - rtsig-max
 - rtsig-nr
 - sem
+- sem_next_id		      [ sysv ipc ]
 - sg-big-buff                 [ generic SCSI device (sg) ]
+- shm_next_id		      [ sysv ipc ]
 - shm_rmid_forced
 - shmall
 - shmmax                      [ sysv ipc ]
@@ -320,6 +323,22 @@ to false.
 
 ==============================================================
 
+msg_next_id, sem_next_id, and shm_next_id:
+
+These three toggles allows to specify desired id for next allocated IPC
+object: message, semaphore or shared memory respectively.
+
+By default they are equal to -1, which means generic allocation logic.
+Possible values to set are in range {0..INT_MAX}.
+
+Notes:
+1) kernel doesn't guarantee, that new object will have desired id. So,
+it's up to userspace, how to handle an object with "wrong" id.
+2) Toggle with non-default value will be set back to -1 by kernel after
+successful IPC object allocation.
+
+==============================================================
+
 nmi_watchdog:
 
 Enables/Disables the NMI watchdog on x86 systems. When the value is
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index fe771978e877..ae221a7b5092 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -24,6 +24,7 @@ struct ipc_ids {
 	unsigned short seq_max;
 	struct rw_semaphore rw_mutex;
 	struct idr ipcs_idr;
+	int next_id;
 };
 
 struct ipc_namespace {
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 00fba2bab87d..130dfece27ac 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -158,6 +158,9 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 
 static int zero;
 static int one = 1;
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int int_max = INT_MAX;
+#endif
 
 static struct ctl_table ipc_kern_table[] = {
 	{
@@ -227,6 +230,35 @@ static struct ctl_table ipc_kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	{
+		.procname	= "sem_next_id",
+		.data		= &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
+		.maxlen		= sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
+	},
+	{
+		.procname	= "msg_next_id",
+		.data		= &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
+		.maxlen		= sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
+	},
+	{
+		.procname	= "shm_next_id",
+		.data		= &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
+		.maxlen		= sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &int_max,
+	},
+#endif
 	{}
 };
 
diff --git a/ipc/util.c b/ipc/util.c
index 72fd0785ac94..74e1d9c7a98a 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -122,6 +122,7 @@ void ipc_init_ids(struct ipc_ids *ids)
 
 	ids->in_use = 0;
 	ids->seq = 0;
+	ids->next_id = -1;
 	{
 		int seq_limit = INT_MAX/SEQ_MULTIPLIER;
 		if (seq_limit > USHRT_MAX)
@@ -252,6 +253,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 	kuid_t euid;
 	kgid_t egid;
 	int id, err;
+	int next_id = ids->next_id;
 
 	if (size > IPCMNI)
 		size = IPCMNI;
@@ -264,7 +266,8 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 	rcu_read_lock();
 	spin_lock(&new->lock);
 
-	err = idr_get_new(&ids->ipcs_idr, new, &id);
+	err = idr_get_new_above(&ids->ipcs_idr, new,
+				(next_id < 0) ? 0 : ipcid_to_idx(next_id), &id);
 	if (err) {
 		spin_unlock(&new->lock);
 		rcu_read_unlock();
@@ -277,9 +280,14 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 	new->cuid = new->uid = euid;
 	new->gid = new->cgid = egid;
 
-	new->seq = ids->seq++;
-	if(ids->seq > ids->seq_max)
-		ids->seq = 0;
+	if (next_id < 0) {
+		new->seq = ids->seq++;
+		if (ids->seq > ids->seq_max)
+			ids->seq = 0;
+	} else {
+		new->seq = ipcid_to_seqx(next_id);
+		ids->next_id = -1;
+	}
 
 	new->id = ipc_buildid(id, new->seq);
 	return id;
diff --git a/ipc/util.h b/ipc/util.h
index c8fe2f7631e9..a61e0ca2bffd 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -92,6 +92,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
 #define IPC_SHM_IDS	2
 
 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
+#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
 
 /* must be called with ids->rw_mutex acquired for writing */
 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
-- 
cgit v1.2.3


From f9dd87f4738c7555aca2cdf8cb2b2326cafb0cad Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 4 Jan 2013 15:34:52 -0800
Subject: ipc: message queue receive cleanup

Move all message related manipulation into one function msg_fill().
Actually, two functions because of the compat one.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h |  5 +++--
 ipc/compat.c        | 45 +++++++++++++++++++--------------------------
 ipc/msg.c           | 44 +++++++++++++++++++++++---------------------
 3 files changed, 45 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index 7a4b9e97d29a..fc5743a554e6 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -34,7 +34,8 @@ struct msg_queue {
 /* Helper routines for sys_msgsnd and sys_msgrcv */
 extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
 			size_t msgsz, int msgflg);
-extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
-			size_t msgsz, long msgtyp, int msgflg);
+extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
+		      int msgflg,
+		      long (*msg_fill)(void __user *, struct msg_msg *, size_t));
 
 #endif /* _LINUX_MSG_H */
diff --git a/ipc/compat.c b/ipc/compat.c
index ad9518eb26e0..eb3ea16d2d1d 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -306,6 +306,20 @@ static long do_compat_semctl(int first, int second, int third, u32 pad)
 	return err;
 }
 
+long compat_do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
+{
+	struct compat_msgbuf __user *msgp = dest;
+	size_t msgsz;
+
+	if (put_user(msg->m_type, &msgp->mtype))
+		return -EFAULT;
+
+	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
+	if (store_msg(msgp->mtext, msg, msgsz))
+		return -EFAULT;
+	return msgsz;
+}
+
 #ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
 long compat_sys_semctl(int first, int second, int third, void __user *uptr)
 {
@@ -337,10 +351,6 @@ long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
 long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 			   int version, void __user *uptr)
 {
-	struct compat_msgbuf __user *up;
-	long type;
-	int err;
-
 	if (first < 0)
 		return -EINVAL;
 	if (second < 0)
@@ -348,23 +358,14 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 
 	if (!version) {
 		struct compat_ipc_kludge ipck;
-		err = -EINVAL;
 		if (!uptr)
-			goto out;
-		err = -EFAULT;
+			return -EINVAL;
 		if (copy_from_user (&ipck, uptr, sizeof(ipck)))
-			goto out;
+			return -EFAULT;
 		uptr = compat_ptr(ipck.msgp);
 		msgtyp = ipck.msgtyp;
 	}
-	up = uptr;
-	err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third);
-	if (err < 0)
-		goto out;
-	if (put_user(type, &up->mtype))
-		err = -EFAULT;
-out:
-	return err;
+	return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill);
 }
 #else
 long compat_sys_semctl(int semid, int semnum, int cmd, int arg)
@@ -385,16 +386,8 @@ long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp,
 long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp,
 		       compat_ssize_t msgsz, long msgtyp, int msgflg)
 {
-	long err, mtype;
-
-	err =  do_msgrcv(msqid, &mtype, msgp->mtext, (ssize_t)msgsz, msgtyp, msgflg);
-	if (err < 0)
-		goto out;
-
-	if (put_user(mtype, &msgp->mtype))
-		err = -EFAULT;
- out:
-	return err;
+	return do_msgrcv(msqid, msgp, (ssize_t)msgsz, msgtyp, msgflg,
+			 compat_do_msg_fill);
 }
 #endif
 
diff --git a/ipc/msg.c b/ipc/msg.c
index 2f272fa76595..cefc24f46e3e 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -755,15 +755,30 @@ static inline int convert_mode(long *msgtyp, int msgflg)
 	return SEARCH_EQUAL;
 }
 
-long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
-		size_t msgsz, long msgtyp, int msgflg)
+static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
+{
+	struct msgbuf __user *msgp = dest;
+	size_t msgsz;
+
+	if (put_user(msg->m_type, &msgp->mtype))
+		return -EFAULT;
+
+	msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz;
+	if (store_msg(msgp->mtext, msg, msgsz))
+		return -EFAULT;
+	return msgsz;
+}
+
+long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
+	       int msgflg,
+	       long (*msg_handler)(void __user *, struct msg_msg *, size_t))
 {
 	struct msg_queue *msq;
 	struct msg_msg *msg;
 	int mode;
 	struct ipc_namespace *ns;
 
-	if (msqid < 0 || (long) msgsz < 0)
+	if (msqid < 0 || (long) bufsz < 0)
 		return -EINVAL;
 	mode = convert_mode(&msgtyp, msgflg);
 	ns = current->nsproxy->ipc_ns;
@@ -804,7 +819,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 			 * Found a suitable message.
 			 * Unlink it from the queue.
 			 */
-			if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
+			if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
 				msg = ERR_PTR(-E2BIG);
 				goto out_unlock;
 			}
@@ -831,7 +846,7 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 		if (msgflg & MSG_NOERROR)
 			msr_d.r_maxsize = INT_MAX;
 		else
-			msr_d.r_maxsize = msgsz;
+			msr_d.r_maxsize = bufsz;
 		msr_d.r_msg = ERR_PTR(-EAGAIN);
 		current->state = TASK_INTERRUPTIBLE;
 		msg_unlock(msq);
@@ -894,29 +909,16 @@ out_unlock:
 	if (IS_ERR(msg))
 		return PTR_ERR(msg);
 
-	msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
-	*pmtype = msg->m_type;
-	if (store_msg(mtext, msg, msgsz))
-		msgsz = -EFAULT;
-
+	bufsz = msg_handler(buf, msg, bufsz);
 	free_msg(msg);
 
-	return msgsz;
+	return bufsz;
 }
 
 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
 		long, msgtyp, int, msgflg)
 {
-	long err, mtype;
-
-	err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
-	if (err < 0)
-		goto out;
-
-	if (put_user(mtype, &msgp->mtype))
-		err = -EFAULT;
-out:
-	return err;
+	return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
 }
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 3a665531a3b7c2ad2c87903b24646be6916340e4 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Fri, 4 Jan 2013 15:34:56 -0800
Subject: selftests: IPC message queue copy feature test

This test can be used to check wheither kernel supports IPC message queue
copy and restore features (required by CRIU project).

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msg.h                  |   3 +-
 ipc/compat.c                         |   3 +-
 tools/testing/selftests/ipc/Makefile |  25 ++++
 tools/testing/selftests/ipc/msgque.c | 246 +++++++++++++++++++++++++++++++++++
 4 files changed, 275 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/ipc/Makefile
 create mode 100644 tools/testing/selftests/ipc/msgque.c

(limited to 'include/linux')

diff --git a/include/linux/msg.h b/include/linux/msg.h
index fc5743a554e6..391af8d11cce 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -36,6 +36,7 @@ extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
 			size_t msgsz, int msgflg);
 extern long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
 		      int msgflg,
-		      long (*msg_fill)(void __user *, struct msg_msg *, size_t));
+		      long (*msg_fill)(void __user *, struct msg_msg *,
+				       size_t));
 
 #endif /* _LINUX_MSG_H */
diff --git a/ipc/compat.c b/ipc/compat.c
index eb3ea16d2d1d..2547f29dcd1b 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -365,7 +365,8 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 		uptr = compat_ptr(ipck.msgp);
 		msgtyp = ipck.msgtyp;
 	}
-	return do_msgrcv(first, uptr, second, msgtyp, third, compat_do_msg_fill);
+	return do_msgrcv(first, uptr, second, msgtyp, third,
+			 compat_do_msg_fill);
 }
 #else
 long compat_sys_semctl(int semid, int semnum, int cmd, int arg)
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
new file mode 100644
index 000000000000..5386fd7c43ae
--- /dev/null
+++ b/tools/testing/selftests/ipc/Makefile
@@ -0,0 +1,25 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := X86
+	CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := X86
+	CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../usr/include/
+
+all:
+ifeq ($(ARCH),X86)
+	gcc $(CFLAGS) msgque.c -o msgque_test
+else
+	echo "Not an x86 target, can't build msgque selftest"
+endif
+
+run_tests: all
+	./msgque_test
+
+clean:
+	rm -fr ./msgque_test
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
new file mode 100644
index 000000000000..d66418237d21
--- /dev/null
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -0,0 +1,246 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/msg.h>
+#include <fcntl.h>
+
+#define MAX_MSG_SIZE		32
+
+struct msg1 {
+	int msize;
+	long mtype;
+	char mtext[MAX_MSG_SIZE];
+};
+
+#define TEST_STRING "Test sysv5 msg"
+#define MSG_TYPE 1
+
+#define ANOTHER_TEST_STRING "Yet another test sysv5 msg"
+#define ANOTHER_MSG_TYPE 26538
+
+struct msgque_data {
+	key_t key;
+	int msq_id;
+	int qbytes;
+	int qnum;
+	int mode;
+	struct msg1 *messages;
+};
+
+int restore_queue(struct msgque_data *msgque)
+{
+	int fd, ret, id, i;
+	char buf[32];
+
+	fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
+	if (fd == -1) {
+		printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+		return -errno;
+	}
+	sprintf(buf, "%d", msgque->msq_id);
+
+	ret = write(fd, buf, strlen(buf));
+	if (ret != strlen(buf)) {
+		printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+		return -errno;
+	}
+
+	id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
+	if (id == -1) {
+		printf("Failed to create queue\n");
+		return -errno;
+	}
+
+	if (id != msgque->msq_id) {
+		printf("Restored queue has wrong id (%d instead of %d)\n",
+							id, msgque->msq_id);
+		ret = -EFAULT;
+		goto destroy;
+	}
+
+	for (i = 0; i < msgque->qnum; i++) {
+		if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
+			   msgque->messages[i].msize, IPC_NOWAIT) != 0) {
+			printf("msgsnd failed (%m)\n");
+			ret = -errno;
+			goto destroy;
+		};
+	}
+	return 0;
+
+destroy:
+	if (msgctl(id, IPC_RMID, 0))
+		printf("Failed to destroy queue: %d\n", -errno);
+	return ret;
+}
+
+int check_and_destroy_queue(struct msgque_data *msgque)
+{
+	struct msg1 message;
+	int cnt = 0, ret;
+
+	while (1) {
+		ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE,
+				0, IPC_NOWAIT);
+		if (ret < 0) {
+			if (errno == ENOMSG)
+				break;
+			printf("Failed to read IPC message: %m\n");
+			ret = -errno;
+			goto err;
+		}
+		if (ret != msgque->messages[cnt].msize) {
+			printf("Wrong message size: %d (expected %d)\n", ret,
+						msgque->messages[cnt].msize);
+			ret = -EINVAL;
+			goto err;
+		}
+		if (message.mtype != msgque->messages[cnt].mtype) {
+			printf("Wrong message type\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
+			printf("Wrong message content\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		cnt++;
+	}
+
+	if (cnt != msgque->qnum) {
+		printf("Wrong message number\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = 0;
+err:
+	if (msgctl(msgque->msq_id, IPC_RMID, 0)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+	return ret;
+}
+
+int dump_queue(struct msgque_data *msgque)
+{
+	struct msqid64_ds ds;
+	int kern_id;
+	int i, ret;
+
+	for (kern_id = 0; kern_id < 256; kern_id++) {
+		ret = msgctl(kern_id, MSG_STAT, &ds);
+		if (ret < 0) {
+			if (errno == -EINVAL)
+				continue;
+			printf("Failed to get stats for IPC queue with id %d\n",
+					kern_id);
+			return -errno;
+		}
+
+		if (ret == msgque->msq_id)
+			break;
+	}
+
+	msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
+	if (msgque->messages == NULL) {
+		printf("Failed to get stats for IPC queue\n");
+		return -ENOMEM;
+	}
+
+	msgque->qnum = ds.msg_qnum;
+	msgque->mode = ds.msg_perm.mode;
+	msgque->qbytes = ds.msg_qbytes;
+
+	for (i = 0; i < msgque->qnum; i++) {
+		ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
+				MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
+		if (ret < 0) {
+			printf("Failed to copy IPC message: %m (%d)\n", errno);
+			return -errno;
+		}
+		msgque->messages[i].msize = ret;
+	}
+	return 0;
+}
+
+int fill_msgque(struct msgque_data *msgque)
+{
+	struct msg1 msgbuf;
+
+	msgbuf.mtype = MSG_TYPE;
+	memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
+				IPC_NOWAIT) != 0) {
+		printf("First message send failed (%m)\n");
+		return -errno;
+	};
+
+	msgbuf.mtype = ANOTHER_MSG_TYPE;
+	memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
+				IPC_NOWAIT) != 0) {
+		printf("Second message send failed (%m)\n");
+		return -errno;
+	};
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int msg, pid, err;
+	struct msgque_data msgque;
+
+	msgque.key = ftok(argv[0], 822155650);
+	if (msgque.key == -1) {
+		printf("Can't make key\n");
+		return -errno;
+	}
+
+	msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
+	if (msgque.msq_id == -1) {
+		printf("Can't create queue\n");
+		goto err_out;
+	}
+
+	err = fill_msgque(&msgque);
+	if (err) {
+		printf("Failed to fill queue\n");
+		goto err_destroy;
+	}
+
+	err = dump_queue(&msgque);
+	if (err) {
+		printf("Failed to dump queue\n");
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to check and destroy queue\n");
+		goto err_out;
+	}
+
+	err = restore_queue(&msgque);
+	if (err) {
+		printf("Failed to restore queue\n");
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to test queue\n");
+		goto err_out;
+	}
+	return 0;
+
+err_destroy:
+	if (msgctl(msgque.msq_id, IPC_RMID, 0)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+err_out:
+	return err;
+}
-- 
cgit v1.2.3


From a458431e176ddb27e8ef8b98c2a681b217337393 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Fri, 4 Jan 2013 15:35:08 -0800
Subject: mm: fix zone_watermark_ok_safe() accounting of isolated pages

Commit 702d1a6e0766 ("memory-hotplug: fix kswapd looping forever
problem") added an isolated pageblocks counter (nr_pageblock_isolate in
struct zone) and used it to adjust free pages counter in
zone_watermark_ok_safe() to prevent kswapd looping forever problem.

Then later, commit 2139cbe627b8 ("cma: fix counting of isolated pages")
fixed accounting of isolated pages in global free pages counter.  It
made the previous zone_watermark_ok_safe() fix unnecessary and
potentially harmful (cause now isolated pages may be accounted twice
making free pages counter incorrect).

This patch removes the special isolated pageblocks counter altogether
which fixes zone_watermark_ok_safe() free pages check.

Reported-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Aaditya Kumar <aaditya.kumar.30@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  8 --------
 mm/page_alloc.c        | 27 ---------------------------
 mm/page_isolation.c    | 26 ++------------------------
 3 files changed, 2 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4bec5be82cab..73b64a38b984 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -503,14 +503,6 @@ struct zone {
 	 * rarely used fields:
 	 */
 	const char		*name;
-#ifdef CONFIG_MEMORY_ISOLATION
-	/*
-	 * the number of MIGRATE_ISOLATE *pageblock*.
-	 * We need this for free page counting. Look at zone_watermark_ok_safe.
-	 * It's protected by zone->lock
-	 */
-	int		nr_pageblock_isolate;
-#endif
 } ____cacheline_internodealigned_in_smp;
 
 typedef enum {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4ba5e37127fc..bc6cc0e913bd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -221,11 +221,6 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
-/*
- * NOTE:
- * Don't use set_pageblock_migratetype(page, MIGRATE_ISOLATE) directly.
- * Instead, use {un}set_pageblock_isolate.
- */
 void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 
@@ -1655,20 +1650,6 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 	return true;
 }
 
-#ifdef CONFIG_MEMORY_ISOLATION
-static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
-{
-	if (unlikely(zone->nr_pageblock_isolate))
-		return zone->nr_pageblock_isolate * pageblock_nr_pages;
-	return 0;
-}
-#else
-static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
-{
-	return 0;
-}
-#endif
-
 bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		      int classzone_idx, int alloc_flags)
 {
@@ -1684,14 +1665,6 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
 	if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
 		free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
 
-	/*
-	 * If the zone has MIGRATE_ISOLATE type free pages, we should consider
-	 * it.  nr_zone_isolate_freepages is never accurate so kswapd might not
-	 * sleep although it could do so.  But this is more desirable for memory
-	 * hotplug than sleeping which can cause a livelock in the direct
-	 * reclaim path.
-	 */
-	free_pages -= nr_zone_isolate_freepages(z);
 	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
 								free_pages);
 }
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 9d2264ea4606..383bdbb98b04 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -8,28 +8,6 @@
 #include <linux/memory.h>
 #include "internal.h"
 
-/* called while holding zone->lock */
-static void set_pageblock_isolate(struct page *page)
-{
-	if (get_pageblock_migratetype(page) == MIGRATE_ISOLATE)
-		return;
-
-	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
-	page_zone(page)->nr_pageblock_isolate++;
-}
-
-/* called while holding zone->lock */
-static void restore_pageblock_isolate(struct page *page, int migratetype)
-{
-	struct zone *zone = page_zone(page);
-	if (WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE))
-		return;
-
-	BUG_ON(zone->nr_pageblock_isolate <= 0);
-	set_pageblock_migratetype(page, migratetype);
-	zone->nr_pageblock_isolate--;
-}
-
 int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
 {
 	struct zone *zone;
@@ -80,7 +58,7 @@ out:
 		unsigned long nr_pages;
 		int migratetype = get_pageblock_migratetype(page);
 
-		set_pageblock_isolate(page);
+		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
 		nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
 
 		__mod_zone_freepage_state(zone, -nr_pages, migratetype);
@@ -103,7 +81,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 		goto out;
 	nr_pages = move_freepages_block(zone, page, migratetype);
 	__mod_zone_freepage_state(zone, nr_pages, migratetype);
-	restore_pageblock_isolate(page, migratetype);
+	set_pageblock_migratetype(page, migratetype);
 out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
-- 
cgit v1.2.3


From a05be99174edc9f258ee68140b71b9645ad977ee Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Fri, 30 Nov 2012 13:57:46 +0530
Subject: power_supply: Add watchdog and safety timer expiries under
 PROP_HEALTH_*

As most of the charger chips come with two kinds of safety features
related to timing:

1. Watchdog Timer (interms of seconds/mins)
2. Safety Timer (interms of hours)

This patch adds these to fault causes in POWER_SUPPLY_PROP_HEALTH_* enums
so that whenever there is either watchdog timeout or safety timer timeout
driver could notify the user space accurately about the fault and will
also be helpful for debug.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/power_supply_sysfs.c | 3 ++-
 include/linux/power_supply.h       | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 40fa3b7cae54..29178f78d73c 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -55,7 +55,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 	};
 	static char *health_text[] = {
 		"Unknown", "Good", "Overheat", "Dead", "Over voltage",
-		"Unspecified failure", "Cold",
+		"Unspecified failure", "Cold", "Watchdog timer expire",
+		"Safety timer expire"
 	};
 	static char *technology_text[] = {
 		"Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd",
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 1f0ab90aff00..25c0982eb9b1 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -54,6 +54,8 @@ enum {
 	POWER_SUPPLY_HEALTH_OVERVOLTAGE,
 	POWER_SUPPLY_HEALTH_UNSPEC_FAILURE,
 	POWER_SUPPLY_HEALTH_COLD,
+	POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE,
+	POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE,
 };
 
 enum {
-- 
cgit v1.2.3


From 80524f083e2c3e70057f5bb476db92baa14cb22b Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Tue, 20 Nov 2012 23:04:35 -0800
Subject: Input: increase struct ps2dev cmdbuf[] to 8 bytes

Cypress PS/2 Trackpad (drivers/input/mouse/cypress_ps2.c) needs
this larger cmdbuf[] to handle 8-byte packet responses.

Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/libps2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index 79603a6c356f..4ad06e824f76 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -36,7 +36,7 @@ struct ps2dev {
 	wait_queue_head_t wait;
 
 	unsigned long flags;
-	unsigned char cmdbuf[6];
+	unsigned char cmdbuf[8];
 	unsigned char cmdcnt;
 	unsigned char nak;
 };
-- 
cgit v1.2.3


From 8e3cdca26c9d9f0936c555bc67e2a5d71e61bb0d Mon Sep 17 00:00:00 2001
From: Alexander Holler <holler@ahsoftware.de>
Date: Sat, 15 Dec 2012 12:45:00 +0000
Subject: iio: Add Usage IDs for HID time sensors.

These are Usage IDs for the attributes year, month, day,
hour, minute and second, needed to read HID time sensors.

Signed-off-by: Alexander Holler <holler@ahsoftware.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/hid-sensor-ids.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 55f277372fed..6f24446e7669 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -66,6 +66,15 @@
 #define HID_USAGE_SENSOR_ORIENT_MAGN_FLUX_Y_AXIS		0x200486
 #define HID_USAGE_SENSOR_ORIENT_MAGN_FLUX_Z_AXIS		0x200487
 
+/* Time (2000a0) */
+#define HID_USAGE_SENSOR_TIME					0x2000a0
+#define HID_USAGE_SENSOR_TIME_YEAR				0x200521
+#define HID_USAGE_SENSOR_TIME_MONTH				0x200522
+#define HID_USAGE_SENSOR_TIME_DAY				0x200523
+#define HID_USAGE_SENSOR_TIME_HOUR				0x200525
+#define HID_USAGE_SENSOR_TIME_MINUTE				0x200526
+#define HID_USAGE_SENSOR_TIME_SECOND				0x200527
+
 /* Units */
 #define HID_USAGE_SENSOR_UNITS_NOT_SPECIFIED			0x00
 #define HID_USAGE_SENSOR_UNITS_LUX				0x01
-- 
cgit v1.2.3


From 2974cdf293e1cb00860522624252aeaba502c8bf Mon Sep 17 00:00:00 2001
From: Alexander Holler <holler@ahsoftware.de>
Date: Sat, 15 Dec 2012 12:45:00 +0000
Subject: iio: merge hid-sensor-attributes.h into hid-sensor-hub.h

The stuff in hid-sensor-attributes.h is needed by every piece which
uses hid-sensor-hub and merging it into hid-sensor-hub.h makes it accessible
from outside the iio subdirectory.

Signed-off-by: Alexander Holler <holler@ahsoftware.de>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/hid-sensor-accel-3d.c            |  1 -
 .../iio/common/hid-sensors/hid-sensor-attributes.c |  1 -
 .../iio/common/hid-sensors/hid-sensor-attributes.h | 57 ----------------------
 .../iio/common/hid-sensors/hid-sensor-trigger.c    |  1 -
 drivers/iio/gyro/hid-sensor-gyro-3d.c              |  1 -
 drivers/iio/light/hid-sensor-als.c                 |  1 -
 drivers/iio/magnetometer/hid-sensor-magn-3d.c      |  1 -
 include/linux/hid-sensor-hub.h                     | 38 +++++++++++++++
 8 files changed, 38 insertions(+), 63 deletions(-)
 delete mode 100644 drivers/iio/common/hid-sensors/hid-sensor-attributes.h

(limited to 'include/linux')

diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index e67bb912bd19..268ca3a1030e 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -28,7 +28,6 @@
 #include <linux/iio/buffer.h>
 #include <linux/iio/trigger_consumer.h>
 #include <linux/iio/triggered_buffer.h>
-#include "../common/hid-sensors/hid-sensor-attributes.h"
 #include "../common/hid-sensors/hid-sensor-trigger.h"
 
 /*Format: HID-SENSOR-usage_id_in_hex*/
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index 75374955caba..5873f160bac9 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -25,7 +25,6 @@
 #include <linux/hid-sensor-hub.h>
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
-#include "hid-sensor-attributes.h"
 
 static int pow_10(unsigned power)
 {
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.h b/drivers/iio/common/hid-sensors/hid-sensor-attributes.h
deleted file mode 100644
index a4676a0c3de5..000000000000
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * HID Sensors Driver
- * Copyright (c) 2012, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-#ifndef _HID_SENSORS_ATTRIBUTES_H
-#define _HID_SENSORS_ATTRIBUTES_H
-
-/* Common hid sensor iio structure */
-struct hid_sensor_iio_common {
-	struct hid_sensor_hub_device *hsdev;
-	struct platform_device *pdev;
-	unsigned usage_id;
-	bool data_ready;
-	struct hid_sensor_hub_attribute_info poll;
-	struct hid_sensor_hub_attribute_info report_state;
-	struct hid_sensor_hub_attribute_info power_state;
-	struct hid_sensor_hub_attribute_info sensitivity;
-};
-
-/*Convert from hid unit expo to regular exponent*/
-static inline int hid_sensor_convert_exponent(int unit_expo)
-{
-	if (unit_expo < 0x08)
-		return unit_expo;
-	else if (unit_expo <= 0x0f)
-		return -(0x0f-unit_expo+1);
-	else
-		return 0;
-}
-
-int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
-					u32 usage_id,
-					struct hid_sensor_iio_common *st);
-int hid_sensor_write_raw_hyst_value(struct hid_sensor_iio_common *st,
-					int val1, int val2);
-int hid_sensor_read_raw_hyst_value(struct hid_sensor_iio_common *st,
-					int *val1, int *val2);
-int hid_sensor_write_samp_freq_value(struct hid_sensor_iio_common *st,
-					int val1, int val2);
-int hid_sensor_read_samp_freq_value(struct hid_sensor_iio_common *st,
-					int *val1, int *val2);
-
-#endif
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index d60198a6ca29..7da73800dcdd 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -26,7 +26,6 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/trigger.h>
 #include <linux/iio/sysfs.h>
-#include "hid-sensor-attributes.h"
 #include "hid-sensor-trigger.h"
 
 static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index 4c8b158e40e1..8d34ee4f33b1 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -28,7 +28,6 @@
 #include <linux/iio/buffer.h>
 #include <linux/iio/trigger_consumer.h>
 #include <linux/iio/triggered_buffer.h>
-#include "../common/hid-sensors/hid-sensor-attributes.h"
 #include "../common/hid-sensors/hid-sensor-trigger.h"
 
 /*Format: HID-SENSOR-usage_id_in_hex*/
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 23eeeef64e84..723f203269f2 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -28,7 +28,6 @@
 #include <linux/iio/buffer.h>
 #include <linux/iio/trigger_consumer.h>
 #include <linux/iio/triggered_buffer.h>
-#include "../common/hid-sensors/hid-sensor-attributes.h"
 #include "../common/hid-sensors/hid-sensor-trigger.h"
 
 /*Format: HID-SENSOR-usage_id_in_hex*/
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index 8e75eb76ccd9..7811c0bdc95e 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -28,7 +28,6 @@
 #include <linux/iio/buffer.h>
 #include <linux/iio/trigger_consumer.h>
 #include <linux/iio/triggered_buffer.h>
-#include "../common/hid-sensors/hid-sensor-attributes.h"
 #include "../common/hid-sensors/hid-sensor-trigger.h"
 
 /*Format: HID-SENSOR-usage_id_in_hex*/
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 0aa5f4c42ae6..e6265f94d816 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -157,4 +157,42 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 */
 int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 			u32 field_index, s32 *value);
+
+/* hid-sensor-attributes */
+
+/* Common hid sensor iio structure */
+struct hid_sensor_iio_common {
+	struct hid_sensor_hub_device *hsdev;
+	struct platform_device *pdev;
+	unsigned usage_id;
+	bool data_ready;
+	struct hid_sensor_hub_attribute_info poll;
+	struct hid_sensor_hub_attribute_info report_state;
+	struct hid_sensor_hub_attribute_info power_state;
+	struct hid_sensor_hub_attribute_info sensitivity;
+};
+
+/*Convert from hid unit expo to regular exponent*/
+static inline int hid_sensor_convert_exponent(int unit_expo)
+{
+	if (unit_expo < 0x08)
+		return unit_expo;
+	else if (unit_expo <= 0x0f)
+		return -(0x0f-unit_expo+1);
+	else
+		return 0;
+}
+
+int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
+					u32 usage_id,
+					struct hid_sensor_iio_common *st);
+int hid_sensor_write_raw_hyst_value(struct hid_sensor_iio_common *st,
+					int val1, int val2);
+int hid_sensor_read_raw_hyst_value(struct hid_sensor_iio_common *st,
+					int *val1, int *val2);
+int hid_sensor_write_samp_freq_value(struct hid_sensor_iio_common *st,
+					int val1, int val2);
+int hid_sensor_read_samp_freq_value(struct hid_sensor_iio_common *st,
+					int *val1, int *val2);
+
 #endif
-- 
cgit v1.2.3


From e07c6d170ccec9e7377a43a2999b87b860c642e9 Mon Sep 17 00:00:00 2001
From: Alexander Holler <holler@ahsoftware.de>
Date: Sat, 15 Dec 2012 12:45:00 +0000
Subject: hid: iio: rename struct hid_sensor_iio_common to hid_sensor_common

The structure with common attributes for hid-sensors isn't specific
to the iio-subsystem, so rename it to hid_sensor_common.

Signed-off-by: Alexander Holler <holler@ahsoftware.de>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/hid-sensor-accel-3d.c                |  2 +-
 drivers/iio/common/hid-sensors/hid-sensor-attributes.c | 10 +++++-----
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c    |  4 ++--
 drivers/iio/common/hid-sensors/hid-sensor-trigger.h    |  2 +-
 drivers/iio/gyro/hid-sensor-gyro-3d.c                  |  2 +-
 drivers/iio/light/hid-sensor-als.c                     |  2 +-
 drivers/iio/magnetometer/hid-sensor-magn-3d.c          |  2 +-
 drivers/rtc/rtc-hid-sensor-time.c                      |  2 +-
 include/linux/hid-sensor-hub.h                         | 12 ++++++------
 9 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index 268ca3a1030e..253684ebc572 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -43,7 +43,7 @@ enum accel_3d_channel {
 
 struct accel_3d_state {
 	struct hid_sensor_hub_callbacks callbacks;
-	struct hid_sensor_iio_common common_attributes;
+	struct hid_sensor_common common_attributes;
 	struct hid_sensor_hub_attribute_info accel[ACCEL_3D_CHANNEL_MAX];
 	u32 accel_val[ACCEL_3D_CHANNEL_MAX];
 };
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index 5873f160bac9..75b54730a963 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -113,7 +113,7 @@ static u32 convert_to_vtf_format(int size, int exp, int val1, int val2)
 	return value;
 }
 
-int hid_sensor_read_samp_freq_value(struct hid_sensor_iio_common *st,
+int hid_sensor_read_samp_freq_value(struct hid_sensor_common *st,
 				int *val1, int *val2)
 {
 	s32 value;
@@ -140,7 +140,7 @@ int hid_sensor_read_samp_freq_value(struct hid_sensor_iio_common *st,
 }
 EXPORT_SYMBOL(hid_sensor_read_samp_freq_value);
 
-int hid_sensor_write_samp_freq_value(struct hid_sensor_iio_common *st,
+int hid_sensor_write_samp_freq_value(struct hid_sensor_common *st,
 				int val1, int val2)
 {
 	s32 value;
@@ -168,7 +168,7 @@ int hid_sensor_write_samp_freq_value(struct hid_sensor_iio_common *st,
 }
 EXPORT_SYMBOL(hid_sensor_write_samp_freq_value);
 
-int hid_sensor_read_raw_hyst_value(struct hid_sensor_iio_common *st,
+int hid_sensor_read_raw_hyst_value(struct hid_sensor_common *st,
 				int *val1, int *val2)
 {
 	s32 value;
@@ -190,7 +190,7 @@ int hid_sensor_read_raw_hyst_value(struct hid_sensor_iio_common *st,
 }
 EXPORT_SYMBOL(hid_sensor_read_raw_hyst_value);
 
-int hid_sensor_write_raw_hyst_value(struct hid_sensor_iio_common *st,
+int hid_sensor_write_raw_hyst_value(struct hid_sensor_common *st,
 					int val1, int val2)
 {
 	s32 value;
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(hid_sensor_write_raw_hyst_value);
 
 int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
 					u32 usage_id,
-					struct hid_sensor_iio_common *st)
+					struct hid_sensor_common *st)
 {
 
 	sensor_hub_input_get_attribute_info(hsdev,
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index 7da73800dcdd..7a525a91105d 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -31,7 +31,7 @@
 static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
 						bool state)
 {
-	struct hid_sensor_iio_common *st = trig->private_data;
+	struct hid_sensor_common *st = trig->private_data;
 	int state_val;
 
 	state_val = state ? 1 : 0;
@@ -63,7 +63,7 @@ static const struct iio_trigger_ops hid_sensor_trigger_ops = {
 };
 
 int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
-				struct hid_sensor_iio_common *attrb)
+				struct hid_sensor_common *attrb)
 {
 	int ret;
 	struct iio_trigger *trig;
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.h b/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
index fd982971b1b8..9a8731478eda 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
@@ -20,7 +20,7 @@
 #define _HID_SENSOR_TRIGGER_H
 
 int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
-				struct hid_sensor_iio_common *attrb);
+				struct hid_sensor_common *attrb);
 void hid_sensor_remove_trigger(struct iio_dev *indio_dev);
 
 #endif
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index 8d34ee4f33b1..1a64f88d1e89 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -43,7 +43,7 @@ enum gyro_3d_channel {
 
 struct gyro_3d_state {
 	struct hid_sensor_hub_callbacks callbacks;
-	struct hid_sensor_iio_common common_attributes;
+	struct hid_sensor_common common_attributes;
 	struct hid_sensor_hub_attribute_info gyro[GYRO_3D_CHANNEL_MAX];
 	u32 gyro_val[GYRO_3D_CHANNEL_MAX];
 };
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 723f203269f2..6ab987a13c79 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -38,7 +38,7 @@
 
 struct als_state {
 	struct hid_sensor_hub_callbacks callbacks;
-	struct hid_sensor_iio_common common_attributes;
+	struct hid_sensor_common common_attributes;
 	struct hid_sensor_hub_attribute_info als_illum;
 	u32 illum;
 };
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index 7811c0bdc95e..28026e2ab06b 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -43,7 +43,7 @@ enum magn_3d_channel {
 
 struct magn_3d_state {
 	struct hid_sensor_hub_callbacks callbacks;
-	struct hid_sensor_iio_common common_attributes;
+	struct hid_sensor_common common_attributes;
 	struct hid_sensor_hub_attribute_info magn[MAGN_3D_CHANNEL_MAX];
 	u32 magn_val[MAGN_3D_CHANNEL_MAX];
 };
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index 25cac6ec374d..0438c9ee9a3b 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -39,7 +39,7 @@ enum hid_time_channel {
 
 struct hid_time_state {
 	struct hid_sensor_hub_callbacks callbacks;
-	struct hid_sensor_iio_common common_attributes;
+	struct hid_sensor_common common_attributes;
 	struct hid_sensor_hub_attribute_info info[TIME_RTC_CHANNEL_MAX];
 	struct rtc_time last_time;
 	spinlock_t lock_last_time;
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index e6265f94d816..ecefb7311dd6 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -161,7 +161,7 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 /* hid-sensor-attributes */
 
 /* Common hid sensor iio structure */
-struct hid_sensor_iio_common {
+struct hid_sensor_common {
 	struct hid_sensor_hub_device *hsdev;
 	struct platform_device *pdev;
 	unsigned usage_id;
@@ -185,14 +185,14 @@ static inline int hid_sensor_convert_exponent(int unit_expo)
 
 int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
 					u32 usage_id,
-					struct hid_sensor_iio_common *st);
-int hid_sensor_write_raw_hyst_value(struct hid_sensor_iio_common *st,
+					struct hid_sensor_common *st);
+int hid_sensor_write_raw_hyst_value(struct hid_sensor_common *st,
 					int val1, int val2);
-int hid_sensor_read_raw_hyst_value(struct hid_sensor_iio_common *st,
+int hid_sensor_read_raw_hyst_value(struct hid_sensor_common *st,
 					int *val1, int *val2);
-int hid_sensor_write_samp_freq_value(struct hid_sensor_iio_common *st,
+int hid_sensor_write_samp_freq_value(struct hid_sensor_common *st,
 					int val1, int val2);
-int hid_sensor_read_samp_freq_value(struct hid_sensor_iio_common *st,
+int hid_sensor_read_samp_freq_value(struct hid_sensor_common *st,
 					int *val1, int *val2);
 
 #endif
-- 
cgit v1.2.3


From 9a6cecc846169159bfce511f4c0034bb96eea1ca Mon Sep 17 00:00:00 2001
From: Jon Hunter <jon-hunter@ti.com>
Date: Fri, 14 Sep 2012 17:41:57 -0500
Subject: dmaengine: add helper function to request a slave DMA channel

Currently slave DMA channels are requested by calling dma_request_channel()
and requires DMA clients to pass various filter parameters to obtain the
appropriate channel.

With device-tree being used by architectures such as arm and the addition of
device-tree helper functions to extract the relevant DMA client information
from device-tree, add a new function to request a slave DMA channel using
device-tree. This function is currently a simple wrapper that calls the
device-tree of_dma_request_slave_channel() function.

Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Benoit Cousson <b-cousson@ti.com>
Cc: Stephen Warren <swarren@nvidia.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Rob Herring <rob.herring@calxeda.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Dan Williams <djbw@fb.com>

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jon Hunter <jon-hunter@ti.com>
Reviewed-by: Stephen Warren <swarren@wwwdotorg.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dmaengine.c   | 16 ++++++++++++++++
 include/linux/dmaengine.h |  6 ++++++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index a815d44c70a4..d37cf95987b6 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -62,6 +62,7 @@
 #include <linux/rculist.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
+#include <linux/of_dma.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
 static DEFINE_IDR(dma_idr);
@@ -546,6 +547,21 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
 }
 EXPORT_SYMBOL_GPL(__dma_request_channel);
 
+/**
+ * dma_request_slave_channel - try to allocate an exclusive slave channel
+ * @dev:	pointer to client device structure
+ * @name:	slave channel name
+ */
+struct dma_chan *dma_request_slave_channel(struct device *dev, char *name)
+{
+	/* If device-tree is present get slave info from here */
+	if (dev->of_node)
+		return of_dma_request_slave_channel(dev->of_node, name);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(dma_request_slave_channel);
+
 void dma_release_channel(struct dma_chan *chan)
 {
 	mutex_lock(&dma_list_mutex);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d3201e438d16..8cd0e2556d04 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -974,6 +974,7 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
 enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
 void dma_issue_pending_all(void);
 struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
+struct dma_chan *dma_request_slave_channel(struct device *dev, char *name);
 void dma_release_channel(struct dma_chan *chan);
 #else
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
@@ -988,6 +989,11 @@ static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask,
 {
 	return NULL;
 }
+static inline struct dma_chan *dma_request_slave_channel(struct device *dev,
+							 char *name)
+{
+	return NULL
+}
 static inline void dma_release_channel(struct dma_chan *chan)
 {
 }
-- 
cgit v1.2.3


From aa3da644c76d1c2083d085e453c18f7c51f19bc4 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jon-hunter@ti.com>
Date: Fri, 14 Sep 2012 17:41:56 -0500
Subject: of: Add generic device tree DMA helpers

This is based upon the work by Benoit Cousson [1] and Nicolas Ferre [2]
to add some basic helpers to retrieve a DMA controller device_node and the
DMA request/channel information.

Aim of DMA helpers
- The purpose of device-tree is to describe the capabilites of the hardware.
  Thinking about DMA controllers purely from the context of the hardware to
  begin with, we can describe a device in terms of a DMA controller as
  follows ...
  	1. Number of DMA controllers
	2. Number of channels (maybe physical or logical)
	3. Mapping of DMA requests signals to DMA controller
	4. Number of DMA interrupts
	5. Mapping of DMA interrupts to channels
- With the above in mind the aim of the DT DMA helper functions is to extract
  the above information from the DT and provide to the appropriate driver.
  However, due to the vast number of DMA controllers and not all are using a
  common driver (such as DMA Engine) it has been seen that this is not a
  trivial task. In previous discussions on this topic the following concerns
  have been raised ...
	1. How does the binding support devices with multiple DMA controllers?
  	2. How to support both legacy DMA controllers not using DMA Engine as
	   well as those that support DMA Engine.
	3. When using with DMA Engine how do we support the various
	   implementations where the opaque filter function parameter differs
	   between implementations?
	4. How do we handle DMA channels that are identified with a string
	   versus a integer?
- Hence the design of the DMA helpers has to accomodate the above or align on
  an agreement what can be or should be supported.

Design of DMA helpers

1. Registering DMA controllers

   In the case of DMA controllers that are using DMA Engine, requesting a
   channel is performed by calling the following function.

	struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
			dma_filter_fn filter_fn,
			void *filter_param);

   The mask variable is used to match a type of the device controller in a list
   of controllers. The filter_fn and filter_param are used to identify the
   required dma channel and return a handle to the dma channel of type dma_chan.

   From the examples I have seen, the mask and filter_fn are constant
   for a given DMA controller and therefore, we can specify these as controller
   specific data when registering the DMA controller with the device-tree DMA
   helpers.

   The filter_param variable is of an unknown type and is typically specific
   to the DMA engine implementation for a given DMA controller. To allow some
   flexibility in the type and formating of this filter_param we employ an
   xlate to translate the device-tree binding information into the appropriate
   format. The xlate function used for a DMA controller can also be specified
   when registering the DMA controller with the device-tree DMA helpers.

   Based upon the above, a function for registering the DMA controller with the
   DMA helpers now looks like the below. The data variable is used to pass a
   pointer to DMA controller specific data used by the xlate function.

	int of_dma_controller_register(struct device_node *np,
		struct dma_chan *(*of_dma_xlate)
		(struct of_phandle_args *, struct of_dma *),
		void *data)

   For example, in the case where DMA engine is used, we define the following
   structure (that stores the DMA engine capability mask and filter function)
   and pass this to the data variable in the above function.

	struct of_dma_filter_info {
		dma_cap_mask_t  dma_cap;
		dma_filter_fn   filter_fn;
	};

2. Representing and requesting channel information

   Please see the dma binding documentation included in this patch for a
   description of how DMA controllers and client information should be
   represented with device-tree. For more information on how this binding
   came about please see [3]. In addition to this, feedback received from
   the Linux kernel summit showed a consensus (among those who attended) to
   use a name to identify DMA client information [4].

   A DMA channel can be requested by calling the following function, where name
   is a required parameter used for identifying a DMA channel. This function
   has been designed to return a structure of type dma_chan to work with the
   DMA engine driver. Note that if DMA engine is used then drivers should be
   using the DMA engine API dma_request_slave_channel() (implemented in part 2
   of this series, "dmaengine: add helper function to request a slave DMA
   channel") which will in turn call the below function if device-tree is
   present. The aim being to have a common DMA engine interface regardless of
   whether device tree is being used.

	struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
						      char *name)

3. Supporting legacy devices not using DMA Engine

   These devices present a problem, as there may not be a uniform way to easily
   support them with regard to device tree. Ideally, these should be migrated
   to DMA engine. However, if this is not possible, then they should still be
   able to use this binding, the only constaint imposed by this implementation
   is that when requesting a DMA channel via of_dma_request_slave_channel(), it
   will return a type of dma_chan.

This implementation has been tested on OMAP4430 using the kernel v3.6-rc5. I
have validated that MMC is working on the PANDA board with this implementation.
My development branch for testing on OMAP can be found here [5].

v6: - minor corrections in DMA binding documentation
v5: - minor update to binding documentation
    - added loop to exhaustively search for a slave channel in the case where
      there could be alternative channels available
v4: - revert the removal of xlate function from v3
    - update the proposed binding format and APIs based upon discussions [3]
v3: - avoid passing an xlate function and instead pass DMA engine parameters
    - define number of dma channels and requests in dma-controller node
v2: - remove of_dma_to_resource API
    - make property #dma-cells required (no fallback anymore)
    - another check in of_dma_xlate_onenumbercell() function

[1] http://article.gmane.org/gmane.linux.drivers.devicetree/12022
[2] http://article.gmane.org/gmane.linux.ports.arm.omap/73622
[3] http://marc.info/?l=linux-omap&m=133582085008539&w=2
[4] http://pad.linaro.org/arm-mini-summit-2012
[5] https://github.com/jonhunter/linux/tree/dev-dt-dma

Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Benoit Cousson <b-cousson@ti.com>
Cc: Stephen Warren <swarren@nvidia.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Rob Herring <rob.herring@calxeda.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Dan Williams <djbw@fb.com>

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Jon Hunter <jon-hunter@ti.com>
Reviewed-by: Stephen Warren <swarren@wwwdotorg.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 Documentation/devicetree/bindings/dma/dma.txt |  81 ++++++++++
 drivers/of/Makefile                           |   2 +-
 drivers/of/dma.c                              | 219 ++++++++++++++++++++++++++
 include/linux/of_dma.h                        |  45 ++++++
 4 files changed, 346 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/dma/dma.txt
 create mode 100644 drivers/of/dma.c
 create mode 100644 include/linux/of_dma.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/dma.txt b/Documentation/devicetree/bindings/dma/dma.txt
new file mode 100644
index 000000000000..a4f59a5967d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/dma.txt
@@ -0,0 +1,81 @@
+* Generic DMA Controller and DMA request bindings
+
+Generic binding to provide a way for a driver using DMA Engine to retrieve the
+DMA request or channel information that goes from a hardware device to a DMA
+controller.
+
+
+* DMA controller
+
+Required property:
+- #dma-cells: 		Must be at least 1. Used to provide DMA controller
+			specific information. See DMA client binding below for
+			more details.
+
+Optional properties:
+- #dma-channels: 	Number of DMA channels supported by the controller.
+- #dma-requests: 	Number of DMA requests signals supported by the
+			controller.
+
+Example:
+
+	dma: dma@48000000 {
+		compatible = "ti,omap-sdma"
+		reg = <0x48000000 0x1000>;
+		interrupts = <0 12 0x4
+			      0 13 0x4
+			      0 14 0x4
+			      0 15 0x4>;
+		#dma-cells = <1>;
+		#dma-channels = <32>;
+		#dma-requests = <127>;
+	};
+
+
+* DMA client
+
+Client drivers should specify the DMA property using a phandle to the controller
+followed by DMA controller specific data.
+
+Required property:
+- dmas:			List of one or more DMA specifiers, each consisting of
+			- A phandle pointing to DMA controller node
+			- A number of integer cells, as determined by the
+			  #dma-cells property in the node referenced by phandle
+			  containing DMA controller specific information. This
+			  typically contains a DMA request line number or a
+			  channel number, but can contain any data that is used
+			  required for configuring a channel.
+- dma-names: 		Contains one identifier string for each DMA specifier in
+			the dmas property. The specific strings that can be used
+			are defined in the binding of the DMA client device.
+			Multiple DMA specifiers can be used to represent
+			alternatives and in this case the dma-names for those
+			DMA specifiers must be identical (see examples).
+
+Examples:
+
+1. A device with one DMA read channel, one DMA write channel:
+
+	i2c1: i2c@1 {
+		...
+		dmas = <&dma 2		/* read channel */
+			&dma 3>;	/* write channel */
+		dma-names = "rx", "tx"
+		...
+	};
+
+2. A single read-write channel with three alternative DMA controllers:
+
+	dmas = <&dma1 5
+		&dma2 7
+		&dma3 2>;
+	dma-names = "rx-tx", "rx-tx", "rx-tx"
+
+3. A device with three channels, one of which has two alternatives:
+
+	dmas = <&dma1 2			/* read channel */
+		&dma1 3			/* write channel */
+		&dma2 0			/* error read */
+		&dma3 0>;		/* alternative error read */
+	dma-names = "rx", "tx", "error", "error";
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index e027f444d10c..eafa107aed40 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,4 +1,4 @@
-obj-y = base.o
+obj-y = base.o dma.o
 obj-$(CONFIG_OF_FLATTREE) += fdt.o
 obj-$(CONFIG_OF_PROMTREE) += pdt.o
 obj-$(CONFIG_OF_ADDRESS)  += address.o
diff --git a/drivers/of/dma.c b/drivers/of/dma.c
new file mode 100644
index 000000000000..19ad37c066f5
--- /dev/null
+++ b/drivers/of/dma.c
@@ -0,0 +1,219 @@
+/*
+ * Device tree helpers for DMA request / controller
+ *
+ * Based on of_gpio.c
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+
+static LIST_HEAD(of_dma_list);
+
+/**
+ * of_dma_find_controller - Find a DMA controller in DT DMA helpers list
+ * @np:		device node of DMA controller
+ */
+static struct of_dma *of_dma_find_controller(struct device_node *np)
+{
+	struct of_dma *ofdma;
+
+	if (list_empty(&of_dma_list)) {
+		pr_err("empty DMA controller list\n");
+		return NULL;
+	}
+
+	list_for_each_entry_rcu(ofdma, &of_dma_list, of_dma_controllers)
+		if (ofdma->of_node == np)
+			return ofdma;
+
+	return NULL;
+}
+
+/**
+ * of_dma_controller_register - Register a DMA controller to DT DMA helpers
+ * @np:			device node of DMA controller
+ * @of_dma_xlate:	translation function which converts a phandle
+ *			arguments list into a dma_chan structure
+ * @data		pointer to controller specific data to be used by
+ *			translation function
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate of_dma_controller_free()
+ * call.
+ */
+int of_dma_controller_register(struct device_node *np,
+				struct dma_chan *(*of_dma_xlate)
+				(struct of_phandle_args *, struct of_dma *),
+				void *data)
+{
+	struct of_dma	*ofdma;
+	int		nbcells;
+
+	if (!np || !of_dma_xlate) {
+		pr_err("%s: not enough information provided\n", __func__);
+		return -EINVAL;
+	}
+
+	ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
+	if (!ofdma)
+		return -ENOMEM;
+
+	nbcells = be32_to_cpup(of_get_property(np, "#dma-cells", NULL));
+	if (!nbcells) {
+		pr_err("%s: #dma-cells property is missing or invalid\n",
+		       __func__);
+		return -EINVAL;
+	}
+
+	ofdma->of_node = np;
+	ofdma->of_dma_nbcells = nbcells;
+	ofdma->of_dma_xlate = of_dma_xlate;
+	ofdma->of_dma_data = data;
+
+	/* Now queue of_dma controller structure in list */
+	list_add_tail_rcu(&ofdma->of_dma_controllers, &of_dma_list);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_dma_controller_register);
+
+/**
+ * of_dma_controller_free - Remove a DMA controller from DT DMA helpers list
+ * @np:		device node of DMA controller
+ *
+ * Memory allocated by of_dma_controller_register() is freed here.
+ */
+void of_dma_controller_free(struct device_node *np)
+{
+	struct of_dma *ofdma;
+
+	ofdma = of_dma_find_controller(np);
+	if (ofdma) {
+		list_del_rcu(&ofdma->of_dma_controllers);
+		kfree(ofdma);
+	}
+}
+EXPORT_SYMBOL_GPL(of_dma_controller_free);
+
+/**
+ * of_dma_find_channel - Find a DMA channel by name
+ * @np:		device node to look for DMA channels
+ * @name:	name of desired channel
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ *
+ * Find a DMA channel by the name. Returns 0 on success or appropriate
+ * errno value on error.
+ */
+static int of_dma_find_channel(struct device_node *np, char *name,
+			       struct of_phandle_args *dma_spec)
+{
+	int count, i;
+	const char *s;
+
+	count = of_property_count_strings(np, "dma-names");
+	if (count < 0)
+		return count;
+
+	for (i = 0; i < count; i++) {
+		if (of_property_read_string_index(np, "dma-names", i, &s))
+			continue;
+
+		if (strcmp(name, s))
+			continue;
+
+		if (!of_parse_phandle_with_args(np, "dmas", "#dma-cells", i,
+						dma_spec))
+			return 0;
+	}
+
+	return -ENODEV;
+}
+
+/**
+ * of_dma_request_slave_channel - Get the DMA slave channel
+ * @np:		device node to get DMA request from
+ * @name:	name of desired channel
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
+					      char *name)
+{
+	struct of_phandle_args	dma_spec;
+	struct of_dma		*ofdma;
+	struct dma_chan		*chan;
+	int			r;
+
+	if (!np || !name) {
+		pr_err("%s: not enough information provided\n", __func__);
+		return NULL;
+	}
+
+	do {
+		r = of_dma_find_channel(np, name, &dma_spec);
+		if (r) {
+			pr_err("%s: can't find DMA channel\n", np->full_name);
+			return NULL;
+		}
+
+		ofdma = of_dma_find_controller(dma_spec.np);
+		if (!ofdma) {
+			pr_debug("%s: can't find DMA controller %s\n",
+				 np->full_name, dma_spec.np->full_name);
+			continue;
+		}
+
+		if (dma_spec.args_count != ofdma->of_dma_nbcells) {
+			pr_debug("%s: wrong #dma-cells for %s\n", np->full_name,
+				 dma_spec.np->full_name);
+			continue;
+		}
+
+		chan = ofdma->of_dma_xlate(&dma_spec, ofdma);
+
+		of_node_put(dma_spec.np);
+
+	} while (!chan);
+
+	return chan;
+}
+
+/**
+ * of_dma_simple_xlate - Simple DMA engine translation function
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data
+ *
+ * A simple translation function for devices that use a 32-bit value for the
+ * filter_param when calling the DMA engine dma_request_channel() function.
+ * Note that this translation function requires that #dma-cells is equal to 1
+ * and the argument of the dma specifier is the 32-bit filter_param. Returns
+ * pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	int count = dma_spec->args_count;
+	struct of_dma_filter_info *info = ofdma->of_dma_data;
+
+	if (!info || !info->filter_fn)
+		return NULL;
+
+	if (count != 1)
+		return NULL;
+
+	return dma_request_channel(info->dma_cap, info->filter_fn,
+			&dma_spec->args[0]);
+}
+EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
new file mode 100644
index 000000000000..337823dc6b90
--- /dev/null
+++ b/include/linux/of_dma.h
@@ -0,0 +1,45 @@
+/*
+ * OF helpers for DMA request / controller
+ *
+ * Based on of_gpio.h
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_OF_DMA_H
+#define __LINUX_OF_DMA_H
+
+#include <linux/of.h>
+#include <linux/dmaengine.h>
+
+struct device_node;
+
+struct of_dma {
+	struct list_head	of_dma_controllers;
+	struct device_node	*of_node;
+	int			of_dma_nbcells;
+	struct dma_chan		*(*of_dma_xlate)
+				(struct of_phandle_args *, struct of_dma *);
+	void			*of_dma_data;
+};
+
+struct of_dma_filter_info {
+	dma_cap_mask_t	dma_cap;
+	dma_filter_fn	filter_fn;
+};
+
+extern int of_dma_controller_register(struct device_node *np,
+		struct dma_chan *(*of_dma_xlate)
+		(struct of_phandle_args *, struct of_dma *),
+		void *data);
+extern void of_dma_controller_free(struct device_node *np);
+extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
+						     char *name);
+extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+		struct of_dma *ofdma);
+
+#endif /* __LINUX_OF_DMA_H */
-- 
cgit v1.2.3


From 4c26bc601d20fa67eefcb27477feda130c10790e Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Tue, 25 Sep 2012 09:57:36 +0530
Subject: of: dma- fix build break for !CONFIG_OF

Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/of_dma.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 337823dc6b90..67158ddd1f3e 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -32,6 +32,7 @@ struct of_dma_filter_info {
 	dma_filter_fn	filter_fn;
 };
 
+#ifdef CONFIG_OF
 extern int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
 		(struct of_phandle_args *, struct of_dma *),
@@ -41,5 +42,31 @@ extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
+#else
+static int of_dma_controller_register(struct device_node *np,
+		struct dma_chan *(*of_dma_xlate)
+		(struct of_phandle_args *, struct of_dma *),
+		void *data)
+{
+	return -ENODEV;
+}
+
+static void of_dma_controller_free(struct device_node *np)
+{
+}
+
+static struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
+						     char *name)
+{
+	return NULL;
+}
+
+static struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+		struct of_dma *ofdma)
+{
+	return NULL;
+}
+
+#endif
 
 #endif /* __LINUX_OF_DMA_H */
-- 
cgit v1.2.3


From d18d5f59797009d86a0ee46feb2a56d53707455a Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Tue, 25 Sep 2012 16:18:55 +0530
Subject: dmaengine: fix build failure due to missing semi-colon

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8cd0e2556d04..c88f302d91c9 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -992,7 +992,7 @@ static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask,
 static inline struct dma_chan *dma_request_slave_channel(struct device *dev,
 							 char *name)
 {
-	return NULL
+	return NULL;
 }
 static inline void dma_release_channel(struct dma_chan *chan)
 {
-- 
cgit v1.2.3


From 177d2bf5c7d3ab41bfb4ce2597dde668225958dd Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 16 Oct 2012 09:49:16 +0530
Subject: dmaengine: dw_dmac: Update documentation style comments for
 dw_dma_platform_data

Documentation style comments were missing for few fields in struct
dw_dma_platform_data. Add these.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/dw_dmac.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index e1c8c9e919ac..62a6190dee22 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -19,6 +19,8 @@
  * @nr_channels: Number of channels supported by hardware (max 8)
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
+ * @chan_allocation_order: Allocate channels starting from 0 or 7
+ * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
  * @nr_masters: Number of AHB masters supported by the controller
  * @data_width: Maximum data width supported by hardware per AHB master
-- 
cgit v1.2.3


From 9f575d9741ff28b6661f639d63b4f465c19889c4 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 4 Jan 2013 10:35:06 +0100
Subject: dma: coh901318: create a proper platform data file

This extracts the platform data that we will keep generic
from the U300 platform and associates it with the COH901318
driver in <linux/platform_data/dma-coh901318.h>.

Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-u300/core.c                   |  2 +-
 arch/arm/mach-u300/dma_channels.h           | 60 ------------------------
 arch/arm/mach-u300/include/mach/coh901318.h | 16 -------
 arch/arm/mach-u300/spi.c                    |  2 +-
 drivers/dma/coh901318.c                     |  1 +
 include/linux/platform_data/dma-coh901318.h | 72 +++++++++++++++++++++++++++++
 6 files changed, 75 insertions(+), 78 deletions(-)
 delete mode 100644 arch/arm/mach-u300/dma_channels.h
 create mode 100644 include/linux/platform_data/dma-coh901318.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index 4ce77cdc31cc..0951b51f36a6 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -31,6 +31,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/platform_data/clk-u300.h>
 #include <linux/platform_data/pinctrl-coh901.h>
+#include <linux/platform_data/dma-coh901318.h>
 
 #include <asm/types.h>
 #include <asm/setup.h>
@@ -49,7 +50,6 @@
 #include "spi.h"
 #include "i2c.h"
 #include "u300-gpio.h"
-#include "dma_channels.h"
 
 /*
  * Static I/O mappings that are needed for booting the U300 platforms. The
diff --git a/arch/arm/mach-u300/dma_channels.h b/arch/arm/mach-u300/dma_channels.h
deleted file mode 100644
index 4e8a88fbca49..000000000000
--- a/arch/arm/mach-u300/dma_channels.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- *
- * arch/arm/mach-u300/include/mach/dma_channels.h
- *
- *
- * Copyright (C) 2007-2012 ST-Ericsson
- * License terms: GNU General Public License (GPL) version 2
- * Map file for the U300 dma driver.
- * Author: Per Friden <per.friden@stericsson.com>
- */
-
-#ifndef DMA_CHANNELS_H
-#define DMA_CHANNELS_H
-
-#define U300_DMA_MSL_TX_0             0
-#define U300_DMA_MSL_TX_1             1
-#define U300_DMA_MSL_TX_2             2
-#define U300_DMA_MSL_TX_3             3
-#define U300_DMA_MSL_TX_4             4
-#define U300_DMA_MSL_TX_5             5
-#define U300_DMA_MSL_TX_6             6
-#define U300_DMA_MSL_RX_0             7
-#define U300_DMA_MSL_RX_1             8
-#define U300_DMA_MSL_RX_2             9
-#define U300_DMA_MSL_RX_3             10
-#define U300_DMA_MSL_RX_4             11
-#define U300_DMA_MSL_RX_5             12
-#define U300_DMA_MSL_RX_6             13
-#define U300_DMA_MMCSD_RX_TX          14
-#define U300_DMA_MSPRO_TX             15
-#define U300_DMA_MSPRO_RX             16
-#define U300_DMA_UART0_TX             17
-#define U300_DMA_UART0_RX             18
-#define U300_DMA_APEX_TX              19
-#define U300_DMA_APEX_RX              20
-#define U300_DMA_PCM_I2S0_TX          21
-#define U300_DMA_PCM_I2S0_RX          22
-#define U300_DMA_PCM_I2S1_TX          23
-#define U300_DMA_PCM_I2S1_RX          24
-#define U300_DMA_XGAM_CDI             25
-#define U300_DMA_XGAM_PDI             26
-#define U300_DMA_SPI_TX               27
-#define U300_DMA_SPI_RX               28
-#define U300_DMA_GENERAL_PURPOSE_0    29
-#define U300_DMA_GENERAL_PURPOSE_1    30
-#define U300_DMA_GENERAL_PURPOSE_2    31
-#define U300_DMA_GENERAL_PURPOSE_3    32
-#define U300_DMA_GENERAL_PURPOSE_4    33
-#define U300_DMA_GENERAL_PURPOSE_5    34
-#define U300_DMA_GENERAL_PURPOSE_6    35
-#define U300_DMA_GENERAL_PURPOSE_7    36
-#define U300_DMA_GENERAL_PURPOSE_8    37
-#define U300_DMA_UART1_TX             38
-#define U300_DMA_UART1_RX             39
-
-#define U300_DMA_DEVICE_CHANNELS      32
-#define U300_DMA_CHANNELS             40
-
-
-#endif /* DMA_CHANNELS_H */
diff --git a/arch/arm/mach-u300/include/mach/coh901318.h b/arch/arm/mach-u300/include/mach/coh901318.h
index 7c3b2b2d25b6..fc47d30988fc 100644
--- a/arch/arm/mach-u300/include/mach/coh901318.h
+++ b/arch/arm/mach-u300/include/mach/coh901318.h
@@ -102,22 +102,6 @@ struct coh901318_platform {
 	const int max_channels;
 };
 
-#ifdef CONFIG_COH901318
-/**
- * coh901318_filter_id() - DMA channel filter function
- * @chan: dma channel handle
- * @chan_id: id of dma channel to be filter out
- *
- * In dma_request_channel() it specifies what channel id to be requested
- */
-bool coh901318_filter_id(struct dma_chan *chan, void *chan_id);
-#else
-static inline bool coh901318_filter_id(struct dma_chan *chan, void *chan_id)
-{
-	return false;
-}
-#endif
-
 /*
  * DMA Controller - this access the static mappings of the coh901318 dma.
  *
diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c
index 02e6659286d5..b34567f944ca 100644
--- a/arch/arm/mach-u300/spi.c
+++ b/arch/arm/mach-u300/spi.c
@@ -10,9 +10,9 @@
 #include <linux/amba/bus.h>
 #include <linux/spi/spi.h>
 #include <linux/amba/pl022.h>
+#include <linux/platform_data/dma-coh901318.h>
 #include <linux/err.h>
 #include <mach/coh901318.h>
-#include "dma_channels.h"
 
 /*
  * The following is for the actual devices on the SSP/SPI bus
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index aa384e53b7ac..5fdd38bcda23 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 #include <linux/uaccess.h>
 #include <linux/debugfs.h>
+#include <linux/platform_data/dma-coh901318.h>
 #include <mach/coh901318.h>
 
 #include "coh901318_lli.h"
diff --git a/include/linux/platform_data/dma-coh901318.h b/include/linux/platform_data/dma-coh901318.h
new file mode 100644
index 000000000000..c4cb9590d115
--- /dev/null
+++ b/include/linux/platform_data/dma-coh901318.h
@@ -0,0 +1,72 @@
+/*
+ * Platform data for the COH901318 DMA controller
+ * Copyright (C) 2007-2013 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef PLAT_COH901318_H
+#define PLAT_COH901318_H
+
+#ifdef CONFIG_COH901318
+
+/* We only support the U300 DMA channels */
+#define U300_DMA_MSL_TX_0		0
+#define U300_DMA_MSL_TX_1		1
+#define U300_DMA_MSL_TX_2		2
+#define U300_DMA_MSL_TX_3		3
+#define U300_DMA_MSL_TX_4		4
+#define U300_DMA_MSL_TX_5		5
+#define U300_DMA_MSL_TX_6		6
+#define U300_DMA_MSL_RX_0		7
+#define U300_DMA_MSL_RX_1		8
+#define U300_DMA_MSL_RX_2		9
+#define U300_DMA_MSL_RX_3		10
+#define U300_DMA_MSL_RX_4		11
+#define U300_DMA_MSL_RX_5		12
+#define U300_DMA_MSL_RX_6		13
+#define U300_DMA_MMCSD_RX_TX		14
+#define U300_DMA_MSPRO_TX		15
+#define U300_DMA_MSPRO_RX		16
+#define U300_DMA_UART0_TX		17
+#define U300_DMA_UART0_RX		18
+#define U300_DMA_APEX_TX		19
+#define U300_DMA_APEX_RX		20
+#define U300_DMA_PCM_I2S0_TX		21
+#define U300_DMA_PCM_I2S0_RX		22
+#define U300_DMA_PCM_I2S1_TX		23
+#define U300_DMA_PCM_I2S1_RX		24
+#define U300_DMA_XGAM_CDI		25
+#define U300_DMA_XGAM_PDI		26
+#define U300_DMA_SPI_TX			27
+#define U300_DMA_SPI_RX			28
+#define U300_DMA_GENERAL_PURPOSE_0	29
+#define U300_DMA_GENERAL_PURPOSE_1	30
+#define U300_DMA_GENERAL_PURPOSE_2	31
+#define U300_DMA_GENERAL_PURPOSE_3	32
+#define U300_DMA_GENERAL_PURPOSE_4	33
+#define U300_DMA_GENERAL_PURPOSE_5	34
+#define U300_DMA_GENERAL_PURPOSE_6	35
+#define U300_DMA_GENERAL_PURPOSE_7	36
+#define U300_DMA_GENERAL_PURPOSE_8	37
+#define U300_DMA_UART1_TX		38
+#define U300_DMA_UART1_RX		39
+
+#define U300_DMA_DEVICE_CHANNELS	32
+#define U300_DMA_CHANNELS		40
+
+/**
+ * coh901318_filter_id() - DMA channel filter function
+ * @chan: dma channel handle
+ * @chan_id: id of dma channel to be filter out
+ *
+ * In dma_request_channel() it specifies what channel id to be requested
+ */
+bool coh901318_filter_id(struct dma_chan *chan, void *chan_id);
+#else
+static inline bool coh901318_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	return false;
+}
+#endif
+
+#endif /* PLAT_COH901318_H */
-- 
cgit v1.2.3


From d5b1fe68baa7213f198e5be8cd1a1037258ab2c8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 28 Dec 2012 13:18:28 -0800
Subject: cgroup: remove unused dummy cgroup_fork_callbacks()

5edee61ede ("cgroup: cgroup_subsys->fork() should be called after the
task is added to css_set") removed cgroup_fork_callbacks() but forgot
to remove its dummy version for !CONFIG_CGROUPS.  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Herton Ronaldo Krzesinski <herton.krzesinski@canonical.com>
---
 include/linux/cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7d73905dcba2..942e68705577 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -706,7 +706,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 static inline void cgroup_fork(struct task_struct *p) {}
-static inline void cgroup_fork_callbacks(struct task_struct *p) {}
 static inline void cgroup_post_fork(struct task_struct *p) {}
 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
 
-- 
cgit v1.2.3


From 12a9d2fef1d35770d3cdc2cd1faabb83c45bc0fa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 7 Jan 2013 08:49:33 -0800
Subject: cgroup: implement cgroup_rightmost_descendant()

Implement cgroup_rightmost_descendant() which returns the right most
descendant of the specified cgroup.  This can be used to skip the
cgroup's subtree while iterating with
cgroup_for_each_descendant_pre().

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 942e68705577..8118a3120378 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -558,6 +558,7 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
 
 struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
 					  struct cgroup *cgroup);
+struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
 
 /**
  * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4855892798fd..6643f7053454 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3017,6 +3017,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
 }
 EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
 
+/**
+ * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup
+ * @pos: cgroup of interest
+ *
+ * Return the rightmost descendant of @pos.  If there's no descendant,
+ * @pos is returned.  This can be used during pre-order traversal to skip
+ * subtree of @pos.
+ */
+struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
+{
+	struct cgroup *last, *tmp;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	do {
+		last = pos;
+		/* ->prev isn't RCU safe, walk ->next till the end */
+		pos = NULL;
+		list_for_each_entry_rcu(tmp, &last->children, sibling)
+			pos = tmp;
+	} while (pos);
+
+	return last;
+}
+EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant);
+
 static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
 {
 	struct cgroup *last;
-- 
cgit v1.2.3


From 8f9dc85348ac37ff3b6b031d22e93a5b59d81f83 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Fri, 4 Jan 2013 00:51:24 +0100
Subject: bcma: mips: remove assigned_irqs from structure

This member is not needed any more.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_mips.c            | 2 --
 include/linux/bcma/bcma_driver_mips.h | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 69815079a6dd..c6d7be33b972 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -263,8 +263,6 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 
 	bcma_core_mips_early_init(mcore);
 
-	mcore->assigned_irqs = 1;
-
 	switch (bus->chipinfo.id) {
 	case BCMA_CHIP_ID_BCM4716:
 	case BCMA_CHIP_ID_BCM4748:
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 0baf8a56b794..6495579e3f35 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -36,7 +36,6 @@ struct bcma_drv_mips {
 	struct bcma_device *core;
 	u8 setup_done:1;
 	u8 early_setup_done:1;
-	unsigned int assigned_irqs;
 };
 
 #ifdef CONFIG_BCMA_DRIVER_MIPS
-- 
cgit v1.2.3


From 3c449ed0075994b3f3371f8254560428ba787efc Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 3 Nov 2012 21:39:31 -0700
Subject: PCI/ACPI: Reserve firmware-allocated resources for hot-added root
 buses

Firmware may have assigned PCI BARs for hot-added devices, so reserve
those resources before trying to allocate more.

[bhelgaas: move empty weak definition here]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/pci_root.c | 4 +++-
 drivers/pci/bus.c       | 2 ++
 include/linux/pci.h     | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 7928d4dc7056..dcbe9660e756 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -650,8 +650,10 @@ static int acpi_pci_root_start(struct acpi_device *device)
 	struct acpi_pci_root *root = acpi_driver_data(device);
 	struct acpi_pci_driver *driver;
 
-	if (system_state != SYSTEM_BOOTING)
+	if (system_state != SYSTEM_BOOTING) {
+		pcibios_resource_survey_bus(root->bus);
 		pci_assign_unassigned_bus_resources(root->bus);
+	}
 
 	mutex_lock(&acpi_pci_root_lock);
 	list_for_each_entry(driver, &acpi_pci_drivers, node)
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index ad6a8b635692..847f3ca47bb8 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -158,6 +158,8 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res,
 	return ret;
 }
 
+void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { }
+
 /**
  * pci_bus_add_device - add a single device
  * @dev: device to add
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15472d691ee6..907b455ab603 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -674,6 +674,7 @@ extern struct list_head pci_root_buses;	/* list of all known PCI buses */
 /* Some device drivers need know if pci is initiated */
 extern int no_pci_devices(void);
 
+void pcibios_resource_survey_bus(struct pci_bus *bus);
 void pcibios_fixup_bus(struct pci_bus *);
 int __must_check pcibios_enable_device(struct pci_dev *, int mask);
 /* Architecture specific versions may override this (weak) */
-- 
cgit v1.2.3


From a9ddb575d6d6c58c39e8c44a22b84445fedb0521 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 16 Oct 2012 09:49:17 +0530
Subject: dmaengine: dw_dmac: Enhance device tree support

dw_dmac driver already supports device tree but it used to have its platform
data passed the non-DT way.

This patch does following changes:
- pass platform data via DT, non-DT way still takes precedence if both are used.
- create generic filter routine
- Earlier slave information was made available by slave specific filter routines
  in chan->private field. Now, this information would be passed from within dmac
  DT node. Slave drivers would now be required to pass bus_id (a string) as
  parameter to this generic filter(), which would be compared against the slave
  data passed from DT, by the generic filter routine.
- Update binding document

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[Fixed __devinit usage]
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 Documentation/devicetree/bindings/dma/snps-dma.txt |  44 +++++++
 drivers/dma/dw_dmac.c                              | 134 +++++++++++++++++++++
 drivers/dma/dw_dmac_regs.h                         |   4 +
 include/linux/dw_dmac.h                            |  43 ++++---
 4 files changed, 208 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
index c0d85dbcada5..5bb3dfb6f1d8 100644
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -6,6 +6,26 @@ Required properties:
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
 - interrupt: Should contain the DMAC interrupt number
+- nr_channels: Number of channels supported by hardware
+- is_private: The device channels should be marked as private and not for by the
+  general purpose DMA channel allocator. False if not passed.
+- chan_allocation_order: order of allocation of channel, 0 (default): ascending,
+  1: descending
+- chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1:
+  increase from chan n->0
+- block_size: Maximum block size supported by the controller
+- nr_masters: Number of AHB masters supported by the controller
+- data_width: Maximum data width supported by hardware per AHB master
+  (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
+- slave_info:
+	- bus_id: name of this device channel, not just a device name since
+	  devices may have more than one channel e.g. "foo_tx". For using the
+	  dw_generic_filter(), slave drivers must pass exactly this string as
+	  param to filter function.
+	- cfg_hi: Platform-specific initializer for the CFG_HI register
+	- cfg_lo: Platform-specific initializer for the CFG_LO register
+	- src_master: src master for transfers on allocated channel.
+	- dst_master: dest master for transfers on allocated channel.
 
 Example:
 
@@ -14,4 +34,28 @@ Example:
 		reg = <0xfc000000 0x1000>;
 		interrupt-parent = <&vic1>;
 		interrupts = <12>;
+
+		nr_channels = <8>;
+		chan_allocation_order = <1>;
+		chan_priority = <1>;
+		block_size = <0xfff>;
+		nr_masters = <2>;
+		data_width = <3 3 0 0>;
+
+		slave_info {
+			uart0-tx {
+				bus_id = "uart0-tx";
+				cfg_hi = <0x4000>;	/* 0x8 << 11 */
+				cfg_lo = <0>;
+				src_master = <0>;
+				dst_master = <1>;
+			};
+			spi0-tx {
+				bus_id = "spi0-tx";
+				cfg_hi = <0x2000>;	/* 0x4 << 11 */
+				cfg_lo = <0>;
+				src_master = <0>;
+				dst_master = <0>;
+			};
+		};
 	};
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 8f0b111af4de..27b8e1d1845e 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1179,6 +1179,50 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
+bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_dma *dw = to_dw_dma(chan->device);
+	static struct dw_dma *last_dw;
+	static char *last_bus_id;
+	int i = -1;
+
+	/*
+	 * dmaengine framework calls this routine for all channels of all dma
+	 * controller, until true is returned. If 'param' bus_id is not
+	 * registered with a dma controller (dw), then there is no need of
+	 * running below function for all channels of dw.
+	 *
+	 * This block of code does this by saving the parameters of last
+	 * failure. If dw and param are same, i.e. trying on same dw with
+	 * different channel, return false.
+	 */
+	if ((last_dw == dw) && (last_bus_id == param))
+		return false;
+	/*
+	 * Return true:
+	 * - If dw_dma's platform data is not filled with slave info, then all
+	 *   dma controllers are fine for transfer.
+	 * - Or if param is NULL
+	 */
+	if (!dw->sd || !param)
+		return true;
+
+	while (++i < dw->sd_count) {
+		if (!strcmp(dw->sd[i].bus_id, param)) {
+			chan->private = &dw->sd[i];
+			last_dw = NULL;
+			last_bus_id = NULL;
+
+			return true;
+		}
+	}
+
+	last_dw = dw;
+	last_bus_id = param;
+	return false;
+}
+EXPORT_SYMBOL(dw_dma_generic_filter);
+
 /* --------------------- Cyclic DMA API extensions -------------------- */
 
 /**
@@ -1462,6 +1506,91 @@ static void dw_dma_off(struct dw_dma *dw)
 		dw->chan[i].initialized = false;
 }
 
+#ifdef CONFIG_OF
+static struct dw_dma_platform_data *
+dw_dma_parse_dt(struct platform_device *pdev)
+{
+	struct device_node *sn, *cn, *np = pdev->dev.of_node;
+	struct dw_dma_platform_data *pdata;
+	struct dw_dma_slave *sd;
+	u32 tmp, arr[4];
+
+	if (!np) {
+		dev_err(&pdev->dev, "Missing DT data\n");
+		return NULL;
+	}
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	if (of_property_read_u32(np, "nr_channels", &pdata->nr_channels))
+		return NULL;
+
+	if (of_property_read_bool(np, "is_private"))
+		pdata->is_private = true;
+
+	if (!of_property_read_u32(np, "chan_allocation_order", &tmp))
+		pdata->chan_allocation_order = (unsigned char)tmp;
+
+	if (!of_property_read_u32(np, "chan_priority", &tmp))
+		pdata->chan_priority = tmp;
+
+	if (!of_property_read_u32(np, "block_size", &tmp))
+		pdata->block_size = tmp;
+
+	if (!of_property_read_u32(np, "nr_masters", &tmp)) {
+		if (tmp > 4)
+			return NULL;
+
+		pdata->nr_masters = tmp;
+	}
+
+	if (!of_property_read_u32_array(np, "data_width", arr,
+				pdata->nr_masters))
+		for (tmp = 0; tmp < pdata->nr_masters; tmp++)
+			pdata->data_width[tmp] = arr[tmp];
+
+	/* parse slave data */
+	sn = of_find_node_by_name(np, "slave_info");
+	if (!sn)
+		return pdata;
+
+	/* calculate number of slaves */
+	tmp = of_get_child_count(sn);
+	if (!tmp)
+		return NULL;
+
+	sd = devm_kzalloc(&pdev->dev, sizeof(*sd) * tmp, GFP_KERNEL);
+	if (!sd)
+		return NULL;
+
+	pdata->sd = sd;
+	pdata->sd_count = tmp;
+
+	for_each_child_of_node(sn, cn) {
+		sd->dma_dev = &pdev->dev;
+		of_property_read_string(cn, "bus_id", &sd->bus_id);
+		of_property_read_u32(cn, "cfg_hi", &sd->cfg_hi);
+		of_property_read_u32(cn, "cfg_lo", &sd->cfg_lo);
+		if (!of_property_read_u32(cn, "src_master", &tmp))
+			sd->src_master = tmp;
+
+		if (!of_property_read_u32(cn, "dst_master", &tmp))
+			sd->dst_master = tmp;
+		sd++;
+	}
+
+	return pdata;
+}
+#else
+static inline struct dw_dma_platform_data *
+dw_dma_parse_dt(struct platform_device *pdev)
+{
+	return NULL;
+}
+#endif
+
 static int dw_probe(struct platform_device *pdev)
 {
 	struct dw_dma_platform_data *pdata;
@@ -1478,6 +1607,9 @@ static int dw_probe(struct platform_device *pdev)
 	int			i;
 
 	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata)
+		pdata = dw_dma_parse_dt(pdev);
+
 	if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
 		return -EINVAL;
 
@@ -1512,6 +1644,8 @@ static int dw_probe(struct platform_device *pdev)
 	clk_prepare_enable(dw->clk);
 
 	dw->regs = regs;
+	dw->sd = pdata->sd;
+	dw->sd_count = pdata->sd_count;
 
 	/* get hardware configuration parameters */
 	if (autocfg) {
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 88965597b7d0..88a069f66b89 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -239,6 +239,10 @@ struct dw_dma {
 	struct tasklet_struct	tasklet;
 	struct clk		*clk;
 
+	/* slave information */
+	struct dw_dma_slave	*sd;
+	unsigned int		sd_count;
+
 	u8			all_chan_mask;
 
 	/* hardware configuration */
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 62a6190dee22..41766de66e33 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -14,6 +14,26 @@
 
 #include <linux/dmaengine.h>
 
+/**
+ * struct dw_dma_slave - Controller-specific information about a slave
+ *
+ * @dma_dev: required DMA master device. Depricated.
+ * @bus_id: name of this device channel, not just a device name since
+ *          devices may have more than one channel e.g. "foo_tx"
+ * @cfg_hi: Platform-specific initializer for the CFG_HI register
+ * @cfg_lo: Platform-specific initializer for the CFG_LO register
+ * @src_master: src master for transfers on allocated channel.
+ * @dst_master: dest master for transfers on allocated channel.
+ */
+struct dw_dma_slave {
+	struct device		*dma_dev;
+	const char		*bus_id;
+	u32			cfg_hi;
+	u32			cfg_lo;
+	u8			src_master;
+	u8			dst_master;
+};
+
 /**
  * struct dw_dma_platform_data - Controller configuration parameters
  * @nr_channels: Number of channels supported by hardware (max 8)
@@ -25,6 +45,8 @@
  * @nr_masters: Number of AHB masters supported by the controller
  * @data_width: Maximum data width supported by hardware per AHB master
  *		(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
+ * @sd: slave specific data. Used for configuring channels
+ * @sd_count: count of slave data structures passed.
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
@@ -38,6 +60,9 @@ struct dw_dma_platform_data {
 	unsigned short	block_size;
 	unsigned char	nr_masters;
 	unsigned char	data_width[4];
+
+	struct dw_dma_slave *sd;
+	unsigned int sd_count;
 };
 
 /* bursts size */
@@ -52,23 +77,6 @@ enum dw_dma_msize {
 	DW_DMA_MSIZE_256,
 };
 
-/**
- * struct dw_dma_slave - Controller-specific information about a slave
- *
- * @dma_dev: required DMA master device
- * @cfg_hi: Platform-specific initializer for the CFG_HI register
- * @cfg_lo: Platform-specific initializer for the CFG_LO register
- * @src_master: src master for transfers on allocated channel.
- * @dst_master: dest master for transfers on allocated channel.
- */
-struct dw_dma_slave {
-	struct device		*dma_dev;
-	u32			cfg_hi;
-	u32			cfg_lo;
-	u8			src_master;
-	u8			dst_master;
-};
-
 /* Platform-configurable bits in CFG_HI */
 #define DWC_CFGH_FCMODE		(1 << 0)
 #define DWC_CFGH_FIFO_MODE	(1 << 1)
@@ -106,5 +114,6 @@ void dw_dma_cyclic_stop(struct dma_chan *chan);
 dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
 
 dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
+bool dw_dma_generic_filter(struct dma_chan *chan, void *param);
 
 #endif /* DW_DMAC_H */
-- 
cgit v1.2.3


From e5a087fdc1ebe5bba40bcecb53c28a0af70e3b47 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Fri, 26 Oct 2012 23:35:15 +0900
Subject: dmaengine: use for_each_set_bit

Use for_each_set_bit() to implement for_each_dma_cap_mask() and
remove unused first_dma_cap() and next_dma_cap().

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Dan Williams <djbw@fb.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/dmaengine.h | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c88f302d91c9..4c8643794e0d 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -849,20 +849,6 @@ static inline bool async_tx_test_ack(struct dma_async_tx_descriptor *tx)
 	return (tx->flags & DMA_CTRL_ACK) == DMA_CTRL_ACK;
 }
 
-#define first_dma_cap(mask) __first_dma_cap(&(mask))
-static inline int __first_dma_cap(const dma_cap_mask_t *srcp)
-{
-	return min_t(int, DMA_TX_TYPE_END,
-		find_first_bit(srcp->bits, DMA_TX_TYPE_END));
-}
-
-#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask))
-static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp)
-{
-	return min_t(int, DMA_TX_TYPE_END,
-		find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1));
-}
-
 #define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask))
 static inline void
 __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
@@ -891,9 +877,7 @@ __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
 }
 
 #define for_each_dma_cap_mask(cap, mask) \
-	for ((cap) = first_dma_cap(mask);	\
-		(cap) < DMA_TX_TYPE_END;	\
-		(cap) = next_dma_cap((cap), (mask)))
+	for_each_set_bit(cap, mask.bits, DMA_TX_TYPE_END)
 
 /**
  * dma_async_issue_pending - flush pending transactions to HW
-- 
cgit v1.2.3


From 9743a3b62dee8c9d8af1319f8d1c1ff39130267d Mon Sep 17 00:00:00 2001
From: Jon Hunter <jon-hunter@ti.com>
Date: Thu, 11 Oct 2012 14:43:01 -0500
Subject: of: dma: fix protection of DMA controller data stored by DMA helpers

In the current implementation of the OF DMA helpers, read-copy-update (RCU)
linked lists are being used for storing and accessing the DMA controller data.
This part of implementation is based upon V2 of the DMA helpers by Nicolas [1].
During a recent review of RCU, it became apparent that the code is missing the
required rcu_read_lock()/unlock() calls as well as synchronisation calls before
freeing any memory protected by RCU.

Having looked into adding the appropriate RCU calls to protect the DMA data it
became apparent that with the current DMA helper implementation, using RCU is
not as attractive as it may have been before. The main reasons being that ...

1. We need to protect the DMA data around calls to the xlate function.
2. The of_dma_simple_xlate() function calls the DMA engine function
   dma_request_channel() which employs a mutex and so could sleep.
3. The RCU read-side critical sections must not sleep and so we cannot hold
   an RCU read lock around the xlate function.

Therefore, instead of using RCU, an alternative for this use-case is to employ
a simple spinlock inconjunction with a usage count variable to keep track of
how many current users of the DMA data structure there are. With this
implementation, the DMA data cannot be freed until all current users of the
DMA data are finished.

This patch is based upon the DMA helpers fix for potential deadlock [2].

[1] http://article.gmane.org/gmane.linux.ports.arm.omap/73622
[2] http://marc.info/?l=linux-arm-kernel&m=134859982520984&w=2

Signed-off-by: Jon Hunter <jon-hunter@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/of/dma.c       | 89 +++++++++++++++++++++++++++++++++++++-------------
 include/linux/of_dma.h |  5 +--
 2 files changed, 70 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/dma.c b/drivers/of/dma.c
index 4bed490a69e4..59631b2c4666 100644
--- a/drivers/of/dma.c
+++ b/drivers/of/dma.c
@@ -19,27 +19,60 @@
 #include <linux/of_dma.h>
 
 static LIST_HEAD(of_dma_list);
+static DEFINE_SPINLOCK(of_dma_lock);
 
 /**
- * of_dma_find_controller - Find a DMA controller in DT DMA helpers list
- * @np:		device node of DMA controller
+ * of_dma_get_controller - Get a DMA controller in DT DMA helpers list
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ *
+ * Finds a DMA controller with matching device node and number for dma cells
+ * in a list of registered DMA controllers. If a match is found the use_count
+ * variable is increased and a valid pointer to the DMA data stored is retuned.
+ * A NULL pointer is returned if no match is found.
  */
-static struct of_dma *of_dma_find_controller(struct device_node *np)
+static struct of_dma *of_dma_get_controller(struct of_phandle_args *dma_spec)
 {
 	struct of_dma *ofdma;
 
+	spin_lock(&of_dma_lock);
+
 	if (list_empty(&of_dma_list)) {
-		pr_err("empty DMA controller list\n");
+		spin_unlock(&of_dma_lock);
 		return NULL;
 	}
 
-	list_for_each_entry_rcu(ofdma, &of_dma_list, of_dma_controllers)
-		if (ofdma->of_node == np)
+	list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
+		if ((ofdma->of_node == dma_spec->np) &&
+		    (ofdma->of_dma_nbcells == dma_spec->args_count)) {
+			ofdma->use_count++;
+			spin_unlock(&of_dma_lock);
 			return ofdma;
+		}
+
+	spin_unlock(&of_dma_lock);
+
+	pr_debug("%s: can't find DMA controller %s\n", __func__,
+		 dma_spec->np->full_name);
 
 	return NULL;
 }
 
+/**
+ * of_dma_put_controller - Decrement use count for a registered DMA controller
+ * @of_dma:	pointer to DMA controller data
+ *
+ * Decrements the use_count variable in the DMA data structure. This function
+ * should be called only when a valid pointer is returned from
+ * of_dma_get_controller() and no further accesses to data referenced by that
+ * pointer are needed.
+ */
+static void of_dma_put_controller(struct of_dma *ofdma)
+{
+	spin_lock(&of_dma_lock);
+	ofdma->use_count--;
+	spin_unlock(&of_dma_lock);
+}
+
 /**
  * of_dma_controller_register - Register a DMA controller to DT DMA helpers
  * @np:			device node of DMA controller
@@ -81,9 +114,10 @@ int of_dma_controller_register(struct device_node *np,
 	ofdma->of_dma_nbcells = nbcells;
 	ofdma->of_dma_xlate = of_dma_xlate;
 	ofdma->of_dma_data = data;
+	ofdma->use_count = 0;
 
 	/* Now queue of_dma controller structure in list */
-	list_add_tail_rcu(&ofdma->of_dma_controllers, &of_dma_list);
+	list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
 
 	return 0;
 }
@@ -95,15 +129,32 @@ EXPORT_SYMBOL_GPL(of_dma_controller_register);
  *
  * Memory allocated by of_dma_controller_register() is freed here.
  */
-void of_dma_controller_free(struct device_node *np)
+int of_dma_controller_free(struct device_node *np)
 {
 	struct of_dma *ofdma;
 
-	ofdma = of_dma_find_controller(np);
-	if (ofdma) {
-		list_del_rcu(&ofdma->of_dma_controllers);
-		kfree(ofdma);
+	spin_lock(&of_dma_lock);
+
+	if (list_empty(&of_dma_list)) {
+		spin_unlock(&of_dma_lock);
+		return -ENODEV;
 	}
+
+	list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
+		if (ofdma->of_node == np) {
+			if (ofdma->use_count) {
+				spin_unlock(&of_dma_lock);
+				return -EBUSY;
+			}
+
+			list_del(&ofdma->of_dma_controllers);
+			spin_unlock(&of_dma_lock);
+			kfree(ofdma);
+			return 0;
+		}
+
+	spin_unlock(&of_dma_lock);
+	return -ENODEV;
 }
 EXPORT_SYMBOL_GPL(of_dma_controller_free);
 
@@ -166,21 +217,15 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 		if (of_dma_match_channel(np, name, i, &dma_spec))
 			continue;
 
-		ofdma = of_dma_find_controller(dma_spec.np);
-		if (!ofdma) {
-			pr_debug("%s: can't find DMA controller %s\n",
-				 np->full_name, dma_spec.np->full_name);
-			continue;
-		}
+		ofdma = of_dma_get_controller(&dma_spec);
 
-		if (dma_spec.args_count != ofdma->of_dma_nbcells) {
-			pr_debug("%s: wrong #dma-cells for %s\n", np->full_name,
-				 dma_spec.np->full_name);
+		if (!ofdma)
 			continue;
-		}
 
 		chan = ofdma->of_dma_xlate(&dma_spec, ofdma);
 
+		of_dma_put_controller(ofdma);
+
 		of_node_put(dma_spec.np);
 
 		if (chan)
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 67158ddd1f3e..84b64f857e23 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -25,6 +25,7 @@ struct of_dma {
 	struct dma_chan		*(*of_dma_xlate)
 				(struct of_phandle_args *, struct of_dma *);
 	void			*of_dma_data;
+	int			use_count;
 };
 
 struct of_dma_filter_info {
@@ -37,7 +38,7 @@ extern int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
 		(struct of_phandle_args *, struct of_dma *),
 		void *data);
-extern void of_dma_controller_free(struct device_node *np);
+extern int of_dma_controller_free(struct device_node *np);
 extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     char *name);
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
@@ -51,7 +52,7 @@ static int of_dma_controller_register(struct device_node *np,
 	return -ENODEV;
 }
 
-static void of_dma_controller_free(struct device_node *np)
+static int of_dma_controller_free(struct device_node *np)
 {
 }
 
-- 
cgit v1.2.3


From 91f8aecc501e456c97a6f49f7ea3797e874d5d89 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Thu, 29 Nov 2012 12:17:22 +0530
Subject: dmaengine: fix !of_dma compilation warning

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/of_dma.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 84b64f857e23..d15073e080dd 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -44,7 +44,7 @@ extern struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 extern struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma);
 #else
-static int of_dma_controller_register(struct device_node *np,
+static inline int of_dma_controller_register(struct device_node *np,
 		struct dma_chan *(*of_dma_xlate)
 		(struct of_phandle_args *, struct of_dma *),
 		void *data)
@@ -52,17 +52,18 @@ static int of_dma_controller_register(struct device_node *np,
 	return -ENODEV;
 }
 
-static int of_dma_controller_free(struct device_node *np)
+static inline int of_dma_controller_free(struct device_node *np)
 {
+	return -ENODEV;
 }
 
-static struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
+static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     char *name)
 {
 	return NULL;
 }
 
-static struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 		struct of_dma *ofdma)
 {
 	return NULL;
-- 
cgit v1.2.3


From a14acb4ac2a1486f6633c55eb7f7ded07f3ec9fc Mon Sep 17 00:00:00 2001
From: Barry Song <Baohua.Song@csr.com>
Date: Tue, 6 Nov 2012 21:32:39 +0800
Subject: DMAEngine: add dmaengine_prep_interleaved_dma wrapper for interleaved
 api

commit b14dab792dee(DMAEngine: Define interleaved transfer request api) adds
interleaved request api, this patch adds the dmaengine_prep_interleaved_dma
just like we have dmaengine_prep_ for other modes to avoid drivers call:
xxx_chan->device->device_prep_interleaved_dma().

Signed-off-by: Barry Song <Baohua.Song@csr.com>
Cc: Jassi Brar <jaswinder.singh@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/dmaengine.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 4c8643794e0d..3aa76dbed166 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -660,6 +660,13 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic(
 						period_len, dir, flags, NULL);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags)
+{
+	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
+}
+
 static inline int dmaengine_terminate_all(struct dma_chan *chan)
 {
 	return dmaengine_device_control(chan, DMA_TERMINATE_ALL, 0);
-- 
cgit v1.2.3


From 944ea4dd38b8575e30a5699633c81945bff1864d Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@intel.com>
Date: Sun, 11 Nov 2012 23:03:20 +0000
Subject: dmatest: Fix NULL pointer dereference on ioat

device_control is an optional and not implemented in all DMA drivers.
Any calls to these will result in a NULL pointer dereference.  dmatest
makes two of these calls when completing the kernel thread and removing
the module.  These are corrected by calling the dmaengine_device_control
wrapper and checking for a non-existant device_control function pointer
there.

Signed-off-by: Jon Mason <jon.mason@intel.com>
CC: Vinod Koul <vinod.koul@intel.com>
CC: Dan Williams <djbw@fb.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/dmatest.c     | 4 ++--
 include/linux/dmaengine.h | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 3a8047b1f108..99a75e5d66be 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -536,7 +536,7 @@ err_srcs:
 			thread_name, total_tests, failed_tests, ret);
 
 	/* terminate all transfers on specified channels */
-	chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+	dmaengine_terminate_all(chan);
 	if (iterations > 0)
 		while (!kthread_should_stop()) {
 			DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
@@ -561,7 +561,7 @@ static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
 	}
 
 	/* terminate all transfers on specified channels */
-	dtc->chan->device->device_control(dtc->chan, DMA_TERMINATE_ALL, 0);
+	dmaengine_terminate_all(dtc->chan);
 
 	kfree(dtc);
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3aa76dbed166..be6e95395b11 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -608,7 +608,10 @@ static inline int dmaengine_device_control(struct dma_chan *chan,
 					   enum dma_ctrl_cmd cmd,
 					   unsigned long arg)
 {
-	return chan->device->device_control(chan, cmd, arg);
+	if (chan->device->device_control)
+		return chan->device->device_control(chan, cmd, arg);
+	else
+		return -ENOSYS;
 }
 
 static inline int dmaengine_slave_config(struct dma_chan *chan,
-- 
cgit v1.2.3


From b9ee86830f34737a08deead93872a79a37419a13 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 8 Nov 2012 09:59:54 +0000
Subject: dmaengine: remove dma_async_memcpy_pending() macro

Just use dma_async_issue_pending() directly.

Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Dan Williams <djbw@fb.com>
---
 drivers/misc/carma/carma-fpga-program.c | 2 +-
 drivers/misc/carma/carma-fpga.c         | 2 +-
 include/linux/dmaengine.h               | 2 --
 net/ipv4/tcp.c                          | 6 +++---
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c
index eaddfe9db149..736c7714f565 100644
--- a/drivers/misc/carma/carma-fpga-program.c
+++ b/drivers/misc/carma/carma-fpga-program.c
@@ -546,7 +546,7 @@ static noinline int fpga_program_dma(struct fpga_dev *priv)
 		goto out_dma_unmap;
 	}
 
-	dma_async_memcpy_issue_pending(chan);
+	dma_async_issue_pending(chan);
 
 	/* Set the total byte count */
 	fpga_set_byte_count(priv->regs, priv->bytes);
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c
index 6b43f8c7b3be..7508cafff103 100644
--- a/drivers/misc/carma/carma-fpga.c
+++ b/drivers/misc/carma/carma-fpga.c
@@ -751,7 +751,7 @@ static irqreturn_t data_irq(int irq, void *dev_id)
 	submitted = true;
 
 	/* Start the DMA Engine */
-	dma_async_memcpy_issue_pending(priv->chan);
+	dma_async_issue_pending(priv->chan);
 
 out:
 	/* If no DMA was submitted, re-enable interrupts */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index be6e95395b11..cd15958d4d1d 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -901,8 +901,6 @@ static inline void dma_async_issue_pending(struct dma_chan *chan)
 	chan->device->device_issue_pending(chan);
 }
 
-#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan)
-
 /**
  * dma_async_is_tx_complete - poll for transaction completion
  * @chan: DMA channel
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1ca253635f7a..cf949a119a54 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1406,7 +1406,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
 		return;
 
 	last_issued = tp->ucopy.dma_cookie;
-	dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+	dma_async_issue_pending(tp->ucopy.dma_chan);
 
 	do {
 		if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
@@ -1744,7 +1744,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				tcp_service_net_dma(sk, true);
 				tcp_cleanup_rbuf(sk, copied);
 			} else
-				dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+				dma_async_issue_pending(tp->ucopy.dma_chan);
 		}
 #endif
 		if (copied >= target) {
@@ -1837,7 +1837,7 @@ do_prequeue:
 					break;
 				}
 
-				dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+				dma_async_issue_pending(tp->ucopy.dma_chan);
 
 				if ((offset + used) == skb->len)
 					copied_early = true;
-- 
cgit v1.2.3


From e239345f642e6a255d0ba1e3d92c2f9ec5a44fbe Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 8 Nov 2012 10:01:01 +0000
Subject: dmaengine: remove dma_async_memcpy_complete() macro

Just use dma_async_is_tx_complete() directly.

Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Dan Williams <djbw@fb.com>
---
 include/linux/dmaengine.h | 5 +----
 net/ipv4/tcp.c            | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index cd15958d4d1d..4ca9cf73ad3f 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -926,16 +926,13 @@ static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
 	return status;
 }
 
-#define dma_async_memcpy_complete(chan, cookie, last, used)\
-	dma_async_is_tx_complete(chan, cookie, last, used)
-
 /**
  * dma_async_is_complete - test a cookie against chan state
  * @cookie: transaction identifier to test status of
  * @last_complete: last know completed transaction
  * @last_used: last cookie value handed out
  *
- * dma_async_is_complete() is used in dma_async_memcpy_complete()
+ * dma_async_is_complete() is used in dma_async_is_tx_complete()
  * the test logic is separated for lightweight testing of multiple cookies
  */
 static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf949a119a54..db0856ad70cb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1409,7 +1409,7 @@ static void tcp_service_net_dma(struct sock *sk, bool wait)
 	dma_async_issue_pending(tp->ucopy.dma_chan);
 
 	do {
-		if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
+		if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
 					      last_issued, &done,
 					      &used) == DMA_SUCCESS) {
 			/* Safe to free early-copied skbs now */
-- 
cgit v1.2.3


From e909c682d04e55e77a3c9d158e7dc36027195493 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 7 Jan 2013 10:57:14 +0100
Subject: [media] coda: Fix build due to iram.h rename

commit c045e3f13 (ARM: imx: include iram.h rather than mach/iram.h) changed the
location of iram.h, which causes the following build error when building the coda
driver:

drivers/media/platform/coda.c:27:23: error: mach/iram.h: No such file or directory
drivers/media/platform/coda.c: In function 'coda_probe':
drivers/media/platform/coda.c:2000: error: implicit declaration of function 'iram_alloc'
drivers/media/platform/coda.c:2001: warning: assignment makes pointer from integer without a cast
drivers/media/platform/coda.c: In function 'coda_remove':
drivers/media/platform/coda.c:2024: error: implicit declaration of function 'iram_free'

Since the content of iram.h is not imx specific, move it to
include/linux/platform_data/imx-iram.h instead. This is an intermediate solution
until the i.MX iram allocator is converted to the generic SRAM allocator.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/arm/mach-imx/iram.h               | 41 ----------------------------------
 arch/arm/mach-imx/iram_alloc.c         |  3 +--
 drivers/media/platform/coda.c          |  2 +-
 include/linux/platform_data/imx-iram.h | 41 ++++++++++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 44 deletions(-)
 delete mode 100644 arch/arm/mach-imx/iram.h
 create mode 100644 include/linux/platform_data/imx-iram.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/iram.h b/arch/arm/mach-imx/iram.h
deleted file mode 100644
index 022690c33702..000000000000
--- a/arch/arm/mach-imx/iram.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2010 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-#include <linux/errno.h>
-
-#ifdef CONFIG_IRAM_ALLOC
-
-int __init iram_init(unsigned long base, unsigned long size);
-void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr);
-void iram_free(unsigned long dma_addr, unsigned int size);
-
-#else
-
-static inline int __init iram_init(unsigned long base, unsigned long size)
-{
-	return -ENOMEM;
-}
-
-static inline void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr)
-{
-	return NULL;
-}
-
-static inline void iram_free(unsigned long base, unsigned long size) {}
-
-#endif
diff --git a/arch/arm/mach-imx/iram_alloc.c b/arch/arm/mach-imx/iram_alloc.c
index 6c80424f678e..e05cf407db65 100644
--- a/arch/arm/mach-imx/iram_alloc.c
+++ b/arch/arm/mach-imx/iram_alloc.c
@@ -22,8 +22,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/genalloc.h>
-
-#include "iram.h"
+#include "linux/platform_data/imx-iram.h"
 
 static unsigned long iram_phys_base;
 static void __iomem *iram_virt_base;
diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index 7b8b547f2d51..afadd3aba3e0 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -23,8 +23,8 @@
 #include <linux/slab.h>
 #include <linux/videodev2.h>
 #include <linux/of.h>
+#include <linux/platform_data/imx-iram.h>
 
-#include <mach/iram.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-ioctl.h>
diff --git a/include/linux/platform_data/imx-iram.h b/include/linux/platform_data/imx-iram.h
new file mode 100644
index 000000000000..022690c33702
--- /dev/null
+++ b/include/linux/platform_data/imx-iram.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2010 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+#include <linux/errno.h>
+
+#ifdef CONFIG_IRAM_ALLOC
+
+int __init iram_init(unsigned long base, unsigned long size);
+void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr);
+void iram_free(unsigned long dma_addr, unsigned int size);
+
+#else
+
+static inline int __init iram_init(unsigned long base, unsigned long size)
+{
+	return -ENOMEM;
+}
+
+static inline void __iomem *iram_alloc(unsigned int size, unsigned long *dma_addr)
+{
+	return NULL;
+}
+
+static inline void iram_free(unsigned long base, unsigned long size) {}
+
+#endif
-- 
cgit v1.2.3


From 851bacf5902cad15f9bb789d278a1ee9608c8f25 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 7 Jan 2013 12:44:33 +0200
Subject: spi/pxa2xx: embed the ssp_device to platform data

The spi-pxa2xx-pci glue driver had to implement pxa_ssp_request()/free() in
order to support the spi-pxa2xx platform driver. Since the ACPI enabled
platforms can use the same platform driver we would need to implement
pxa_ssp_request()/free() in some central place that can be shared by the
ACPI and PCI glue code.

Instead of doing that we can make pxa_ssp_request()/free() to be available
only when CONFIG_ARCH_PXA is set. On other arches these are being stubbed
out in preference to passing the ssp_device from the platform data
directly.

We also change the SPI bus number to be taken from ssp->port_id instead of
platform device id. This way the supporting code that passes the ssp can
decide the number (or it can set it to the same as pdev->id).

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/spi-pxa2xx-pci.c   | 73 +++---------------------------------------
 drivers/spi/spi-pxa2xx.c       | 15 ++++++---
 include/linux/pxa2xx_ssp.h     |  9 ++++++
 include/linux/spi/pxa2xx_spi.h |  3 ++
 4 files changed, 28 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index ece979e9c642..364964d2ed04 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -8,55 +8,11 @@
 #include <linux/module.h>
 #include <linux/spi/pxa2xx_spi.h>
 
-struct ce4100_info {
-	struct ssp_device ssp;
-	struct platform_device *spi_pdev;
-};
-
-static DEFINE_MUTEX(ssp_lock);
-static LIST_HEAD(ssp_list);
-
-struct ssp_device *pxa_ssp_request(int port, const char *label)
-{
-	struct ssp_device *ssp = NULL;
-
-	mutex_lock(&ssp_lock);
-
-	list_for_each_entry(ssp, &ssp_list, node) {
-		if (ssp->port_id == port && ssp->use_count == 0) {
-			ssp->use_count++;
-			ssp->label = label;
-			break;
-		}
-	}
-
-	mutex_unlock(&ssp_lock);
-
-	if (&ssp->node == &ssp_list)
-		return NULL;
-
-	return ssp;
-}
-EXPORT_SYMBOL_GPL(pxa_ssp_request);
-
-void pxa_ssp_free(struct ssp_device *ssp)
-{
-	mutex_lock(&ssp_lock);
-	if (ssp->use_count) {
-		ssp->use_count--;
-		ssp->label = NULL;
-	} else
-		dev_err(&ssp->pdev->dev, "device already free\n");
-	mutex_unlock(&ssp_lock);
-}
-EXPORT_SYMBOL_GPL(pxa_ssp_free);
-
 static int ce4100_spi_probe(struct pci_dev *dev,
 		const struct pci_device_id *ent)
 {
 	struct platform_device_info pi;
 	int ret;
-	struct ce4100_info *spi_info;
 	struct platform_device *pdev;
 	struct pxa2xx_spi_master spi_pdata;
 	struct ssp_device *ssp;
@@ -69,14 +25,10 @@ static int ce4100_spi_probe(struct pci_dev *dev,
 	if (!ret)
 		return ret;
 
-	spi_info = devm_kzalloc(&dev->dev, sizeof(*spi_info), GFP_KERNEL);
-	if (!spi_info)
-		return -ENOMEM;
-
 	memset(&spi_pdata, 0, sizeof(spi_pdata));
 	spi_pdata.num_chipselect = dev->devfn;
 
-	ssp = &spi_info->ssp;
+	ssp = &spi_pdata.ssp;
 	ssp->phys_base = pci_resource_start(dev, 0);
 	ssp->mmio_base = pcim_iomap_table(dev)[0];
 	if (!ssp->mmio_base) {
@@ -87,10 +39,6 @@ static int ce4100_spi_probe(struct pci_dev *dev,
 	ssp->port_id = dev->devfn;
 	ssp->type = PXA25x_SSP;
 
-	mutex_lock(&ssp_lock);
-	list_add(&ssp->node, &ssp_list);
-	mutex_unlock(&ssp_lock);
-
 	memset(&pi, 0, sizeof(pi));
 	pi.parent = &dev->dev;
 	pi.name = "pxa2xx-spi";
@@ -99,30 +47,19 @@ static int ce4100_spi_probe(struct pci_dev *dev,
 	pi.size_data = sizeof(spi_pdata);
 
 	pdev = platform_device_register_full(&pi);
-	if (!pdev) {
-		mutex_lock(&ssp_lock);
-		list_del(&ssp->node);
-		mutex_unlock(&ssp_lock);
-
+	if (!pdev)
 		return -ENOMEM;
-	}
 
-	spi_info->spi_pdev = pdev;
-	pci_set_drvdata(dev, spi_info);
+	pci_set_drvdata(dev, pdev);
 
 	return 0;
 }
 
 static void ce4100_spi_remove(struct pci_dev *dev)
 {
-	struct ce4100_info *spi_info = pci_get_drvdata(dev);
-	struct ssp_device *ssp = &spi_info->ssp;
-
-	platform_device_unregister(spi_info->spi_pdev);
+	struct platform_device *pdev = pci_get_drvdata(dev);
 
-	mutex_lock(&ssp_lock);
-	list_del(&ssp->node);
-	mutex_unlock(&ssp_lock);
+	platform_device_unregister(pdev);
 }
 
 static DEFINE_PCI_DEVICE_TABLE(ce4100_spi_devices) = {
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 5c8c4f5883c4..54097ad76356 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1535,11 +1535,18 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	struct ssp_device *ssp;
 	int status;
 
-	platform_info = dev->platform_data;
+	platform_info = dev_get_platdata(dev);
+	if (!platform_info) {
+		dev_err(&pdev->dev, "missing platform data\n");
+		return -ENODEV;
+	}
 
 	ssp = pxa_ssp_request(pdev->id, pdev->name);
-	if (ssp == NULL) {
-		dev_err(&pdev->dev, "failed to request SSP%d\n", pdev->id);
+	if (!ssp)
+		ssp = &platform_info->ssp;
+
+	if (!ssp->mmio_base) {
+		dev_err(&pdev->dev, "failed to get ssp\n");
 		return -ENODEV;
 	}
 
@@ -1561,7 +1568,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 
-	master->bus_num = pdev->id;
+	master->bus_num = ssp->port_id;
 	master->num_chipselect = platform_info->num_chipselect;
 	master->dma_alignment = DMA_ALIGNMENT;
 	master->cleanup = cleanup;
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index f36632061c66..065e7f6c3ad7 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -206,6 +206,15 @@ static inline u32 pxa_ssp_read_reg(struct ssp_device *dev, u32 reg)
 	return __raw_readl(dev->mmio_base + reg);
 }
 
+#ifdef CONFIG_ARCH_PXA
 struct ssp_device *pxa_ssp_request(int port, const char *label);
 void pxa_ssp_free(struct ssp_device *);
+#else
+static inline struct ssp_device *pxa_ssp_request(int port, const char *label)
+{
+	return NULL;
+}
+static inline void pxa_ssp_free(struct ssp_device *ssp) {}
+#endif
+
 #endif
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index c73d1445c77e..6b99f09b717d 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -28,6 +28,9 @@ struct pxa2xx_spi_master {
 	u32 clock_enable;
 	u16 num_chipselect;
 	u8 enable_dma;
+
+	/* For non-PXA arches */
+	struct ssp_device ssp;
 };
 
 /* spi_board_info.controller_data for SPI slave devices,
-- 
cgit v1.2.3


From 52494535103986dbbf689b44d8c2c7efe2132b16 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Wed, 14 Nov 2012 16:26:40 -0800
Subject: rcu: Reduce rcutorture tracing

Currently, rcutorture traces every read-side access.  This can be
problematic because even a two-minute rcutorture run on a two-CPU system
can generate 28,853,363 reads.  Normally, only a failing read is of
interest, so this commit traces adjusts rcutorture's tracing to only
trace failing reads.  The resulting event tracing records the time
and the ->completed value captured at the beginning of the RCU read-side
critical section, allowing correlation with other event-tracing messages.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
[ paulmck: Add fix to build problem located by Randy Dunlap based on
  diagnosis by Steven Rostedt. ]
---
 include/linux/rcupdate.h   | 13 ++++++++++---
 include/trace/events/rcu.h | 19 ++++++++++++++-----
 kernel/rcupdate.c          |  9 ++++++---
 kernel/rcutorture.c        | 31 ++++++++++++++++++++++++-------
 lib/Kconfig.debug          |  1 +
 5 files changed, 55 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f1062d..7f89cea596e1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -53,7 +53,10 @@ extern int rcutorture_runnable; /* for sysctl */
 extern void rcutorture_record_test_transition(void);
 extern void rcutorture_record_progress(unsigned long vernum);
 extern void do_trace_rcu_torture_read(char *rcutorturename,
-				      struct rcu_head *rhp);
+				      struct rcu_head *rhp,
+				      unsigned long secs,
+				      unsigned long c_old,
+				      unsigned long c);
 #else
 static inline void rcutorture_record_test_transition(void)
 {
@@ -63,9 +66,13 @@ static inline void rcutorture_record_progress(unsigned long vernum)
 }
 #ifdef CONFIG_RCU_TRACE
 extern void do_trace_rcu_torture_read(char *rcutorturename,
-				      struct rcu_head *rhp);
+				      struct rcu_head *rhp,
+				      unsigned long secs,
+				      unsigned long c_old,
+				      unsigned long c);
 #else
-#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
 #endif
 #endif
 
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d4f559b1ec34..09af021c8e96 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -523,22 +523,30 @@ TRACE_EVENT(rcu_batch_end,
  */
 TRACE_EVENT(rcu_torture_read,
 
-	TP_PROTO(char *rcutorturename, struct rcu_head *rhp),
+	TP_PROTO(char *rcutorturename, struct rcu_head *rhp,
+		 unsigned long secs, unsigned long c_old, unsigned long c),
 
-	TP_ARGS(rcutorturename, rhp),
+	TP_ARGS(rcutorturename, rhp, secs, c_old, c),
 
 	TP_STRUCT__entry(
 		__field(char *, rcutorturename)
 		__field(struct rcu_head *, rhp)
+		__field(unsigned long, secs)
+		__field(unsigned long, c_old)
+		__field(unsigned long, c)
 	),
 
 	TP_fast_assign(
 		__entry->rcutorturename = rcutorturename;
 		__entry->rhp = rhp;
+		__entry->secs = secs;
+		__entry->c_old = c_old;
+		__entry->c = c;
 	),
 
-	TP_printk("%s torture read %p",
-		  __entry->rcutorturename, __entry->rhp)
+	TP_printk("%s torture read %p %luus c: %lu %lu",
+		  __entry->rcutorturename, __entry->rhp,
+		  __entry->secs, __entry->c_old, __entry->c)
 );
 
 /*
@@ -608,7 +616,8 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
 #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \
 	do { } while (0)
-#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
 #define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0)
 
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a2cf76177b44..303359d1ca88 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -404,11 +404,14 @@ EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
-void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp)
+void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp,
+			       unsigned long secs,
+			       unsigned long c_old, unsigned long c)
 {
-	trace_rcu_torture_read(rcutorturename, rhp);
+	trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c);
 }
 EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
 #else
-#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0)
+#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
 #endif
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 31dea01c85fd..a583f1ce713d 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -46,6 +46,7 @@
 #include <linux/stat.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/trace_clock.h>
 #include <asm/byteorder.h>
 
 MODULE_LICENSE("GPL");
@@ -1028,7 +1029,6 @@ void rcutorture_trace_dump(void)
 		return;
 	if (atomic_xchg(&beenhere, 1) != 0)
 		return;
-	do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL);
 	ftrace_dump(DUMP_ALL);
 }
 
@@ -1042,13 +1042,16 @@ static void rcu_torture_timer(unsigned long unused)
 {
 	int idx;
 	int completed;
+	int completed_end;
 	static DEFINE_RCU_RANDOM(rand);
 	static DEFINE_SPINLOCK(rand_lock);
 	struct rcu_torture *p;
 	int pipe_count;
+	unsigned long long ts;
 
 	idx = cur_ops->readlock();
 	completed = cur_ops->completed();
+	ts = trace_clock_local();
 	p = rcu_dereference_check(rcu_torture_current,
 				  rcu_read_lock_bh_held() ||
 				  rcu_read_lock_sched_held() ||
@@ -1058,7 +1061,6 @@ static void rcu_torture_timer(unsigned long unused)
 		cur_ops->readunlock(idx);
 		return;
 	}
-	do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
 	if (p->rtort_mbtest == 0)
 		atomic_inc(&n_rcu_torture_mberror);
 	spin_lock(&rand_lock);
@@ -1071,10 +1073,16 @@ static void rcu_torture_timer(unsigned long unused)
 		/* Should not happen, but... */
 		pipe_count = RCU_TORTURE_PIPE_LEN;
 	}
-	if (pipe_count > 1)
+	completed_end = cur_ops->completed();
+	if (pipe_count > 1) {
+		unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC);
+
+		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
+					  completed, completed_end);
 		rcutorture_trace_dump();
+	}
 	__this_cpu_inc(rcu_torture_count[pipe_count]);
-	completed = cur_ops->completed() - completed;
+	completed = completed_end - completed;
 	if (completed > RCU_TORTURE_PIPE_LEN) {
 		/* Should not happen, but... */
 		completed = RCU_TORTURE_PIPE_LEN;
@@ -1094,11 +1102,13 @@ static int
 rcu_torture_reader(void *arg)
 {
 	int completed;
+	int completed_end;
 	int idx;
 	DEFINE_RCU_RANDOM(rand);
 	struct rcu_torture *p;
 	int pipe_count;
 	struct timer_list t;
+	unsigned long long ts;
 
 	VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
 	set_user_nice(current, 19);
@@ -1112,6 +1122,7 @@ rcu_torture_reader(void *arg)
 		}
 		idx = cur_ops->readlock();
 		completed = cur_ops->completed();
+		ts = trace_clock_local();
 		p = rcu_dereference_check(rcu_torture_current,
 					  rcu_read_lock_bh_held() ||
 					  rcu_read_lock_sched_held() ||
@@ -1122,7 +1133,6 @@ rcu_torture_reader(void *arg)
 			schedule_timeout_interruptible(HZ);
 			continue;
 		}
-		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
 		if (p->rtort_mbtest == 0)
 			atomic_inc(&n_rcu_torture_mberror);
 		cur_ops->read_delay(&rand);
@@ -1132,10 +1142,17 @@ rcu_torture_reader(void *arg)
 			/* Should not happen, but... */
 			pipe_count = RCU_TORTURE_PIPE_LEN;
 		}
-		if (pipe_count > 1)
+		completed_end = cur_ops->completed();
+		if (pipe_count > 1) {
+			unsigned long __maybe_unused ts_rem =
+					do_div(ts, NSEC_PER_USEC);
+
+			do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
+						  ts, completed, completed_end);
 			rcutorture_trace_dump();
+		}
 		__this_cpu_inc(rcu_torture_count[pipe_count]);
-		completed = cur_ops->completed() - completed;
+		completed = completed_end - completed;
 		if (completed > RCU_TORTURE_PIPE_LEN) {
 			/* Should not happen, but... */
 			completed = RCU_TORTURE_PIPE_LEN;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3a353091a903..7d83f52fbade 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1008,6 +1008,7 @@ config RCU_CPU_STALL_INFO
 config RCU_TRACE
 	bool "Enable tracing for RCU"
 	depends on DEBUG_KERNEL
+	select TRACE_CLOCK
 	help
 	  This option provides tracing in RCU which presents stats
 	  in debugfs for debugging RCU implementation.
-- 
cgit v1.2.3


From 20259849bb1ac1ffb0156eb359810e8b99cb644d Mon Sep 17 00:00:00 2001
From: George Zhang <georgezhang@vmware.com>
Date: Tue, 8 Jan 2013 15:55:59 -0800
Subject: VMCI: Some header and config files.

VMCI head config patch Adds all the necessary files to enable building of the VMCI
module with the Linux Makefiles and Kconfig systems. Also adds the header files used
for building modules against the driver.

Signed-off-by: George Zhang <georgezhang@vmware.com>
Acked-by: Andy king <acking@vmware.com>
Acked-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/Kconfig           |   1 +
 drivers/misc/Makefile          |   2 +
 drivers/misc/vmw_vmci/Kconfig  |  16 +
 drivers/misc/vmw_vmci/Makefile |   4 +
 include/linux/vmw_vmci_api.h   |  82 ++++
 include/linux/vmw_vmci_defs.h  | 880 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 985 insertions(+)
 create mode 100644 drivers/misc/vmw_vmci/Kconfig
 create mode 100644 drivers/misc/vmw_vmci/Makefile
 create mode 100644 include/linux/vmw_vmci_api.h
 create mode 100644 include/linux/vmw_vmci_defs.h

(limited to 'include/linux')

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index b151b7c1bd59..264e647413b5 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -507,4 +507,5 @@ source "drivers/misc/lis3lv02d/Kconfig"
 source "drivers/misc/carma/Kconfig"
 source "drivers/misc/altera-stapl/Kconfig"
 source "drivers/misc/mei/Kconfig"
+source "drivers/misc/vmw_vmci/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 2129377c0de6..5c99726dd617 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -49,3 +49,5 @@ obj-y				+= carma/
 obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
 obj-$(CONFIG_ALTERA_STAPL)	+=altera-stapl/
 obj-$(CONFIG_INTEL_MEI)		+= mei/
+obj-$(CONFIG_MAX8997_MUIC)	+= max8997-muic.o
+obj-$(CONFIG_VMWARE_VMCI)	+= vmw_vmci/
diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig
new file mode 100644
index 000000000000..55015e75683c
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Kconfig
@@ -0,0 +1,16 @@
+#
+# VMware VMCI device
+#
+
+config VMWARE_VMCI
+	tristate "VMware VMCI Driver"
+	depends on X86
+	help
+	  This is VMware's Virtual Machine Communication Interface.  It enables
+	  high-speed communication between host and guest in a virtual
+	  environment via the VMCI virtual device.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called vmw_vmci.
diff --git a/drivers/misc/vmw_vmci/Makefile b/drivers/misc/vmw_vmci/Makefile
new file mode 100644
index 000000000000..4da9893c3942
--- /dev/null
+++ b/drivers/misc/vmw_vmci/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci.o
+vmw_vmci-y += vmci_context.o vmci_datagram.o vmci_doorbell.o \
+	vmci_driver.o vmci_event.o vmci_guest.o vmci_handle_array.o \
+	vmci_host.o vmci_queue_pair.o vmci_resource.o vmci_route.o
diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h
new file mode 100644
index 000000000000..023430e265fe
--- /dev/null
+++ b/include/linux/vmw_vmci_api.h
@@ -0,0 +1,82 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef __VMW_VMCI_API_H__
+#define __VMW_VMCI_API_H__
+
+#include <linux/uidgid.h>
+#include <linux/vmw_vmci_defs.h>
+
+#undef  VMCI_KERNEL_API_VERSION
+#define VMCI_KERNEL_API_VERSION_1 1
+#define VMCI_KERNEL_API_VERSION_2 2
+#define VMCI_KERNEL_API_VERSION   VMCI_KERNEL_API_VERSION_2
+
+typedef void (vmci_device_shutdown_fn) (void *device_registration,
+					void *user_data);
+
+int vmci_datagram_create_handle(u32 resource_id, u32 flags,
+				vmci_datagram_recv_cb recv_cb,
+				void *client_data,
+				struct vmci_handle *out_handle);
+int vmci_datagram_create_handle_priv(u32 resource_id, u32 flags, u32 priv_flags,
+				     vmci_datagram_recv_cb recv_cb,
+				     void *client_data,
+				     struct vmci_handle *out_handle);
+int vmci_datagram_destroy_handle(struct vmci_handle handle);
+int vmci_datagram_send(struct vmci_datagram *msg);
+int vmci_doorbell_create(struct vmci_handle *handle, u32 flags,
+			 u32 priv_flags,
+			 vmci_callback notify_cb, void *client_data);
+int vmci_doorbell_destroy(struct vmci_handle handle);
+int vmci_doorbell_notify(struct vmci_handle handle, u32 priv_flags);
+u32 vmci_get_context_id(void);
+bool vmci_is_context_owner(u32 context_id, kuid_t uid);
+
+int vmci_event_subscribe(u32 event,
+			 vmci_event_cb callback, void *callback_data,
+			 u32 *subid);
+int vmci_event_unsubscribe(u32 subid);
+u32 vmci_context_get_priv_flags(u32 context_id);
+int vmci_qpair_alloc(struct vmci_qp **qpair,
+		     struct vmci_handle *handle,
+		     u64 produce_qsize,
+		     u64 consume_qsize,
+		     u32 peer, u32 flags, u32 priv_flags);
+int vmci_qpair_detach(struct vmci_qp **qpair);
+int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
+				   u64 *producer_tail,
+				   u64 *consumer_head);
+int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
+				   u64 *consumer_tail,
+				   u64 *producer_head);
+s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair);
+s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair);
+s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair);
+s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair);
+ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
+			   const void *buf, size_t buf_size, int mode);
+ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
+			   void *buf, size_t buf_size, int mode);
+ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size,
+			int mode);
+ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
+			  void *iov, size_t iov_size, int mode);
+ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
+			  void *iov, size_t iov_size, int mode);
+ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, void *iov, size_t iov_size,
+			 int mode);
+
+#endif /* !__VMW_VMCI_API_H__ */
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
new file mode 100644
index 000000000000..65ac54c61c18
--- /dev/null
+++ b/include/linux/vmw_vmci_defs.h
@@ -0,0 +1,880 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef _VMW_VMCI_DEF_H_
+#define _VMW_VMCI_DEF_H_
+
+#include <linux/atomic.h>
+
+/* Register offsets. */
+#define VMCI_STATUS_ADDR      0x00
+#define VMCI_CONTROL_ADDR     0x04
+#define VMCI_ICR_ADDR	      0x08
+#define VMCI_IMR_ADDR         0x0c
+#define VMCI_DATA_OUT_ADDR    0x10
+#define VMCI_DATA_IN_ADDR     0x14
+#define VMCI_CAPS_ADDR        0x18
+#define VMCI_RESULT_LOW_ADDR  0x1c
+#define VMCI_RESULT_HIGH_ADDR 0x20
+
+/* Max number of devices. */
+#define VMCI_MAX_DEVICES 1
+
+/* Status register bits. */
+#define VMCI_STATUS_INT_ON     0x1
+
+/* Control register bits. */
+#define VMCI_CONTROL_RESET        0x1
+#define VMCI_CONTROL_INT_ENABLE   0x2
+#define VMCI_CONTROL_INT_DISABLE  0x4
+
+/* Capabilities register bits. */
+#define VMCI_CAPS_HYPERCALL     0x1
+#define VMCI_CAPS_GUESTCALL     0x2
+#define VMCI_CAPS_DATAGRAM      0x4
+#define VMCI_CAPS_NOTIFICATIONS 0x8
+
+/* Interrupt Cause register bits. */
+#define VMCI_ICR_DATAGRAM      0x1
+#define VMCI_ICR_NOTIFICATION  0x2
+
+/* Interrupt Mask register bits. */
+#define VMCI_IMR_DATAGRAM      0x1
+#define VMCI_IMR_NOTIFICATION  0x2
+
+/* Interrupt type. */
+enum {
+	VMCI_INTR_TYPE_INTX = 0,
+	VMCI_INTR_TYPE_MSI = 1,
+	VMCI_INTR_TYPE_MSIX = 2,
+};
+
+/* Maximum MSI/MSI-X interrupt vectors in the device. */
+#define VMCI_MAX_INTRS 2
+
+/*
+ * Supported interrupt vectors.  There is one for each ICR value above,
+ * but here they indicate the position in the vector array/message ID.
+ */
+enum {
+	VMCI_INTR_DATAGRAM = 0,
+	VMCI_INTR_NOTIFICATION = 1,
+};
+
+/*
+ * A single VMCI device has an upper limit of 128MB on the amount of
+ * memory that can be used for queue pairs.
+ */
+#define VMCI_MAX_GUEST_QP_MEMORY (128 * 1024 * 1024)
+
+/*
+ * Queues with pre-mapped data pages must be small, so that we don't pin
+ * too much kernel memory (especially on vmkernel).  We limit a queuepair to
+ * 32 KB, or 16 KB per queue for symmetrical pairs.
+ */
+#define VMCI_MAX_PINNED_QP_MEMORY (32 * 1024)
+
+/*
+ * We have a fixed set of resource IDs available in the VMX.
+ * This allows us to have a very simple implementation since we statically
+ * know how many will create datagram handles. If a new caller arrives and
+ * we have run out of slots we can manually increment the maximum size of
+ * available resource IDs.
+ *
+ * VMCI reserved hypervisor datagram resource IDs.
+ */
+enum {
+	VMCI_RESOURCES_QUERY = 0,
+	VMCI_GET_CONTEXT_ID = 1,
+	VMCI_SET_NOTIFY_BITMAP = 2,
+	VMCI_DOORBELL_LINK = 3,
+	VMCI_DOORBELL_UNLINK = 4,
+	VMCI_DOORBELL_NOTIFY = 5,
+	/*
+	 * VMCI_DATAGRAM_REQUEST_MAP and VMCI_DATAGRAM_REMOVE_MAP are
+	 * obsoleted by the removal of VM to VM communication.
+	 */
+	VMCI_DATAGRAM_REQUEST_MAP = 6,
+	VMCI_DATAGRAM_REMOVE_MAP = 7,
+	VMCI_EVENT_SUBSCRIBE = 8,
+	VMCI_EVENT_UNSUBSCRIBE = 9,
+	VMCI_QUEUEPAIR_ALLOC = 10,
+	VMCI_QUEUEPAIR_DETACH = 11,
+
+	/*
+	 * VMCI_VSOCK_VMX_LOOKUP was assigned to 12 for Fusion 3.0/3.1,
+	 * WS 7.0/7.1 and ESX 4.1
+	 */
+	VMCI_HGFS_TRANSPORT = 13,
+	VMCI_UNITY_PBRPC_REGISTER = 14,
+	VMCI_RPC_PRIVILEGED = 15,
+	VMCI_RPC_UNPRIVILEGED = 16,
+	VMCI_RESOURCE_MAX = 17,
+};
+
+/*
+ * struct vmci_handle - Ownership information structure
+ * @context:    The VMX context ID.
+ * @resource:   The resource ID (used for locating in resource hash).
+ *
+ * The vmci_handle structure is used to track resources used within
+ * vmw_vmci.
+ */
+struct vmci_handle {
+	u32 context;
+	u32 resource;
+};
+
+#define vmci_make_handle(_cid, _rid) \
+	(struct vmci_handle){ .context = _cid, .resource = _rid }
+
+static inline bool vmci_handle_is_equal(struct vmci_handle h1,
+					struct vmci_handle h2)
+{
+	return h1.context == h2.context && h1.resource == h2.resource;
+}
+
+#define VMCI_INVALID_ID ~0
+static const struct vmci_handle VMCI_INVALID_HANDLE = {
+	.context = VMCI_INVALID_ID,
+	.resource = VMCI_INVALID_ID
+};
+
+static inline bool vmci_handle_is_invalid(struct vmci_handle h)
+{
+	return vmci_handle_is_equal(h, VMCI_INVALID_HANDLE);
+}
+
+/*
+ * The below defines can be used to send anonymous requests.
+ * This also indicates that no response is expected.
+ */
+#define VMCI_ANON_SRC_CONTEXT_ID   VMCI_INVALID_ID
+#define VMCI_ANON_SRC_RESOURCE_ID  VMCI_INVALID_ID
+static const struct vmci_handle VMCI_ANON_SRC_HANDLE = {
+	.context = VMCI_ANON_SRC_CONTEXT_ID,
+	.resource = VMCI_ANON_SRC_RESOURCE_ID
+};
+
+/* The lowest 16 context ids are reserved for internal use. */
+#define VMCI_RESERVED_CID_LIMIT ((u32) 16)
+
+/*
+ * Hypervisor context id, used for calling into hypervisor
+ * supplied services from the VM.
+ */
+#define VMCI_HYPERVISOR_CONTEXT_ID 0
+
+/*
+ * Well-known context id, a logical context that contains a set of
+ * well-known services. This context ID is now obsolete.
+ */
+#define VMCI_WELL_KNOWN_CONTEXT_ID 1
+
+/*
+ * Context ID used by host endpoints.
+ */
+#define VMCI_HOST_CONTEXT_ID  2
+
+#define VMCI_CONTEXT_IS_VM(_cid) (VMCI_INVALID_ID != (_cid) &&		\
+				  (_cid) > VMCI_HOST_CONTEXT_ID)
+
+/*
+ * The VMCI_CONTEXT_RESOURCE_ID is used together with vmci_make_handle to make
+ * handles that refer to a specific context.
+ */
+#define VMCI_CONTEXT_RESOURCE_ID 0
+
+/*
+ * VMCI error codes.
+ */
+enum {
+	VMCI_SUCCESS_QUEUEPAIR_ATTACH	= 5,
+	VMCI_SUCCESS_QUEUEPAIR_CREATE	= 4,
+	VMCI_SUCCESS_LAST_DETACH	= 3,
+	VMCI_SUCCESS_ACCESS_GRANTED	= 2,
+	VMCI_SUCCESS_ENTRY_DEAD		= 1,
+	VMCI_SUCCESS			 = 0,
+	VMCI_ERROR_INVALID_RESOURCE	 = (-1),
+	VMCI_ERROR_INVALID_ARGS		 = (-2),
+	VMCI_ERROR_NO_MEM		 = (-3),
+	VMCI_ERROR_DATAGRAM_FAILED	 = (-4),
+	VMCI_ERROR_MORE_DATA		 = (-5),
+	VMCI_ERROR_NO_MORE_DATAGRAMS	 = (-6),
+	VMCI_ERROR_NO_ACCESS		 = (-7),
+	VMCI_ERROR_NO_HANDLE		 = (-8),
+	VMCI_ERROR_DUPLICATE_ENTRY	 = (-9),
+	VMCI_ERROR_DST_UNREACHABLE	 = (-10),
+	VMCI_ERROR_PAYLOAD_TOO_LARGE	 = (-11),
+	VMCI_ERROR_INVALID_PRIV		 = (-12),
+	VMCI_ERROR_GENERIC		 = (-13),
+	VMCI_ERROR_PAGE_ALREADY_SHARED	 = (-14),
+	VMCI_ERROR_CANNOT_SHARE_PAGE	 = (-15),
+	VMCI_ERROR_CANNOT_UNSHARE_PAGE	 = (-16),
+	VMCI_ERROR_NO_PROCESS		 = (-17),
+	VMCI_ERROR_NO_DATAGRAM		 = (-18),
+	VMCI_ERROR_NO_RESOURCES		 = (-19),
+	VMCI_ERROR_UNAVAILABLE		 = (-20),
+	VMCI_ERROR_NOT_FOUND		 = (-21),
+	VMCI_ERROR_ALREADY_EXISTS	 = (-22),
+	VMCI_ERROR_NOT_PAGE_ALIGNED	 = (-23),
+	VMCI_ERROR_INVALID_SIZE		 = (-24),
+	VMCI_ERROR_REGION_ALREADY_SHARED = (-25),
+	VMCI_ERROR_TIMEOUT		 = (-26),
+	VMCI_ERROR_DATAGRAM_INCOMPLETE	 = (-27),
+	VMCI_ERROR_INCORRECT_IRQL	 = (-28),
+	VMCI_ERROR_EVENT_UNKNOWN	 = (-29),
+	VMCI_ERROR_OBSOLETE		 = (-30),
+	VMCI_ERROR_QUEUEPAIR_MISMATCH	 = (-31),
+	VMCI_ERROR_QUEUEPAIR_NOTSET	 = (-32),
+	VMCI_ERROR_QUEUEPAIR_NOTOWNER	 = (-33),
+	VMCI_ERROR_QUEUEPAIR_NOTATTACHED = (-34),
+	VMCI_ERROR_QUEUEPAIR_NOSPACE	 = (-35),
+	VMCI_ERROR_QUEUEPAIR_NODATA	 = (-36),
+	VMCI_ERROR_BUSMEM_INVALIDATION	 = (-37),
+	VMCI_ERROR_MODULE_NOT_LOADED	 = (-38),
+	VMCI_ERROR_DEVICE_NOT_FOUND	 = (-39),
+	VMCI_ERROR_QUEUEPAIR_NOT_READY	 = (-40),
+	VMCI_ERROR_WOULD_BLOCK		 = (-41),
+
+	/* VMCI clients should return error code within this range */
+	VMCI_ERROR_CLIENT_MIN		 = (-500),
+	VMCI_ERROR_CLIENT_MAX		 = (-550),
+
+	/* Internal error codes. */
+	VMCI_SHAREDMEM_ERROR_BAD_CONTEXT = (-1000),
+};
+
+/* VMCI reserved events. */
+enum {
+	/* Only applicable to guest endpoints */
+	VMCI_EVENT_CTX_ID_UPDATE  = 0,
+
+	/* Applicable to guest and host */
+	VMCI_EVENT_CTX_REMOVED	  = 1,
+
+	/* Only applicable to guest endpoints */
+	VMCI_EVENT_QP_RESUMED	  = 2,
+
+	/* Applicable to guest and host */
+	VMCI_EVENT_QP_PEER_ATTACH = 3,
+
+	/* Applicable to guest and host */
+	VMCI_EVENT_QP_PEER_DETACH = 4,
+
+	/*
+	 * Applicable to VMX and vmk.  On vmk,
+	 * this event has the Context payload type.
+	 */
+	VMCI_EVENT_MEM_ACCESS_ON  = 5,
+
+	/*
+	 * Applicable to VMX and vmk.  Same as
+	 * above for the payload type.
+	 */
+	VMCI_EVENT_MEM_ACCESS_OFF = 6,
+	VMCI_EVENT_MAX		  = 7,
+};
+
+/*
+ * Of the above events, a few are reserved for use in the VMX, and
+ * other endpoints (guest and host kernel) should not use them. For
+ * the rest of the events, we allow both host and guest endpoints to
+ * subscribe to them, to maintain the same API for host and guest
+ * endpoints.
+ */
+#define VMCI_EVENT_VALID_VMX(_event) ((_event) == VMCI_EVENT_MEM_ACCESS_ON || \
+				      (_event) == VMCI_EVENT_MEM_ACCESS_OFF)
+
+#define VMCI_EVENT_VALID(_event) ((_event) < VMCI_EVENT_MAX &&		\
+				  !VMCI_EVENT_VALID_VMX(_event))
+
+/* Reserved guest datagram resource ids. */
+#define VMCI_EVENT_HANDLER 0
+
+/*
+ * VMCI coarse-grained privileges (per context or host
+ * process/endpoint. An entity with the restricted flag is only
+ * allowed to interact with the hypervisor and trusted entities.
+ */
+enum {
+	VMCI_NO_PRIVILEGE_FLAGS = 0,
+	VMCI_PRIVILEGE_FLAG_RESTRICTED = 1,
+	VMCI_PRIVILEGE_FLAG_TRUSTED = 2,
+	VMCI_PRIVILEGE_ALL_FLAGS = (VMCI_PRIVILEGE_FLAG_RESTRICTED |
+				    VMCI_PRIVILEGE_FLAG_TRUSTED),
+	VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS = VMCI_NO_PRIVILEGE_FLAGS,
+	VMCI_LEAST_PRIVILEGE_FLAGS = VMCI_PRIVILEGE_FLAG_RESTRICTED,
+	VMCI_MAX_PRIVILEGE_FLAGS = VMCI_PRIVILEGE_FLAG_TRUSTED,
+};
+
+/* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */
+#define VMCI_RESERVED_RESOURCE_ID_MAX 1023
+
+/*
+ * Driver version.
+ *
+ * Increment major version when you make an incompatible change.
+ * Compatibility goes both ways (old driver with new executable
+ * as well as new driver with old executable).
+ */
+
+/* Never change VMCI_VERSION_SHIFT_WIDTH */
+#define VMCI_VERSION_SHIFT_WIDTH 16
+#define VMCI_MAKE_VERSION(_major, _minor)			\
+	((_major) << VMCI_VERSION_SHIFT_WIDTH | (u16) (_minor))
+
+#define VMCI_VERSION_MAJOR(v)  ((u32) (v) >> VMCI_VERSION_SHIFT_WIDTH)
+#define VMCI_VERSION_MINOR(v)  ((u16) (v))
+
+/*
+ * VMCI_VERSION is always the current version.  Subsequently listed
+ * versions are ways of detecting previous versions of the connecting
+ * application (i.e., VMX).
+ *
+ * VMCI_VERSION_NOVMVM: This version removed support for VM to VM
+ * communication.
+ *
+ * VMCI_VERSION_NOTIFY: This version introduced doorbell notification
+ * support.
+ *
+ * VMCI_VERSION_HOSTQP: This version introduced host end point support
+ * for hosted products.
+ *
+ * VMCI_VERSION_PREHOSTQP: This is the version prior to the adoption of
+ * support for host end-points.
+ *
+ * VMCI_VERSION_PREVERS2: This fictional version number is intended to
+ * represent the version of a VMX which doesn't call into the driver
+ * with ioctl VERSION2 and thus doesn't establish its version with the
+ * driver.
+ */
+
+#define VMCI_VERSION                VMCI_VERSION_NOVMVM
+#define VMCI_VERSION_NOVMVM         VMCI_MAKE_VERSION(11, 0)
+#define VMCI_VERSION_NOTIFY         VMCI_MAKE_VERSION(10, 0)
+#define VMCI_VERSION_HOSTQP         VMCI_MAKE_VERSION(9, 0)
+#define VMCI_VERSION_PREHOSTQP      VMCI_MAKE_VERSION(8, 0)
+#define VMCI_VERSION_PREVERS2       VMCI_MAKE_VERSION(1, 0)
+
+#define VMCI_SOCKETS_MAKE_VERSION(_p)					\
+	((((_p)[0] & 0xFF) << 24) | (((_p)[1] & 0xFF) << 16) | ((_p)[2]))
+
+/*
+ * The VMCI IOCTLs.  We use identity code 7, as noted in ioctl-number.h, and
+ * we start at sequence 9f.  This gives us the same values that our shipping
+ * products use, starting at 1951, provided we leave out the direction and
+ * structure size.  Note that VMMon occupies the block following us, starting
+ * at 2001.
+ */
+#define IOCTL_VMCI_VERSION			_IO(7, 0x9f)	/* 1951 */
+#define IOCTL_VMCI_INIT_CONTEXT			_IO(7, 0xa0)
+#define IOCTL_VMCI_QUEUEPAIR_SETVA		_IO(7, 0xa4)
+#define IOCTL_VMCI_NOTIFY_RESOURCE		_IO(7, 0xa5)
+#define IOCTL_VMCI_NOTIFICATIONS_RECEIVE	_IO(7, 0xa6)
+#define IOCTL_VMCI_VERSION2			_IO(7, 0xa7)
+#define IOCTL_VMCI_QUEUEPAIR_ALLOC		_IO(7, 0xa8)
+#define IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE	_IO(7, 0xa9)
+#define IOCTL_VMCI_QUEUEPAIR_DETACH		_IO(7, 0xaa)
+#define IOCTL_VMCI_DATAGRAM_SEND		_IO(7, 0xab)
+#define IOCTL_VMCI_DATAGRAM_RECEIVE		_IO(7, 0xac)
+#define IOCTL_VMCI_CTX_ADD_NOTIFICATION		_IO(7, 0xaf)
+#define IOCTL_VMCI_CTX_REMOVE_NOTIFICATION	_IO(7, 0xb0)
+#define IOCTL_VMCI_CTX_GET_CPT_STATE		_IO(7, 0xb1)
+#define IOCTL_VMCI_CTX_SET_CPT_STATE		_IO(7, 0xb2)
+#define IOCTL_VMCI_GET_CONTEXT_ID		_IO(7, 0xb3)
+#define IOCTL_VMCI_SOCKETS_VERSION		_IO(7, 0xb4)
+#define IOCTL_VMCI_SOCKETS_GET_AF_VALUE		_IO(7, 0xb8)
+#define IOCTL_VMCI_SOCKETS_GET_LOCAL_CID	_IO(7, 0xb9)
+#define IOCTL_VMCI_SET_NOTIFY			_IO(7, 0xcb)	/* 1995 */
+/*IOCTL_VMMON_START				_IO(7, 0xd1)*/	/* 2001 */
+
+/*
+ * struct vmci_queue_header - VMCI Queue Header information.
+ *
+ * A Queue cannot stand by itself as designed.  Each Queue's header
+ * contains a pointer into itself (the producer_tail) and into its peer
+ * (consumer_head).  The reason for the separation is one of
+ * accessibility: Each end-point can modify two things: where the next
+ * location to enqueue is within its produce_q (producer_tail); and
+ * where the next dequeue location is in its consume_q (consumer_head).
+ *
+ * An end-point cannot modify the pointers of its peer (guest to
+ * guest; NOTE that in the host both queue headers are mapped r/w).
+ * But, each end-point needs read access to both Queue header
+ * structures in order to determine how much space is used (or left)
+ * in the Queue.  This is because for an end-point to know how full
+ * its produce_q is, it needs to use the consumer_head that points into
+ * the produce_q but -that- consumer_head is in the Queue header for
+ * that end-points consume_q.
+ *
+ * Thoroughly confused?  Sorry.
+ *
+ * producer_tail: the point to enqueue new entrants.  When you approach
+ * a line in a store, for example, you walk up to the tail.
+ *
+ * consumer_head: the point in the queue from which the next element is
+ * dequeued.  In other words, who is next in line is he who is at the
+ * head of the line.
+ *
+ * Also, producer_tail points to an empty byte in the Queue, whereas
+ * consumer_head points to a valid byte of data (unless producer_tail ==
+ * consumer_head in which case consumer_head does not point to a valid
+ * byte of data).
+ *
+ * For a queue of buffer 'size' bytes, the tail and head pointers will be in
+ * the range [0, size-1].
+ *
+ * If produce_q_header->producer_tail == consume_q_header->consumer_head
+ * then the produce_q is empty.
+ */
+struct vmci_queue_header {
+	/* All fields are 64bit and aligned. */
+	struct vmci_handle handle;	/* Identifier. */
+	atomic64_t producer_tail;	/* Offset in this queue. */
+	atomic64_t consumer_head;	/* Offset in peer queue. */
+};
+
+/*
+ * struct vmci_datagram - Base struct for vmci datagrams.
+ * @dst:        A vmci_handle that tracks the destination of the datagram.
+ * @src:        A vmci_handle that tracks the source of the datagram.
+ * @payload_size:       The size of the payload.
+ *
+ * vmci_datagram structs are used when sending vmci datagrams.  They include
+ * the necessary source and destination information to properly route
+ * the information along with the size of the package.
+ */
+struct vmci_datagram {
+	struct vmci_handle dst;
+	struct vmci_handle src;
+	u64 payload_size;
+};
+
+/*
+ * Second flag is for creating a well-known handle instead of a per context
+ * handle.  Next flag is for deferring datagram delivery, so that the
+ * datagram callback is invoked in a delayed context (not interrupt context).
+ */
+#define VMCI_FLAG_DG_NONE          0
+#define VMCI_FLAG_WELLKNOWN_DG_HND 0x1
+#define VMCI_FLAG_ANYCID_DG_HND    0x2
+#define VMCI_FLAG_DG_DELAYED_CB    0x4
+
+/*
+ * Maximum supported size of a VMCI datagram for routable datagrams.
+ * Datagrams going to the hypervisor are allowed to be larger.
+ */
+#define VMCI_MAX_DG_SIZE (17 * 4096)
+#define VMCI_MAX_DG_PAYLOAD_SIZE (VMCI_MAX_DG_SIZE - \
+				  sizeof(struct vmci_datagram))
+#define VMCI_DG_PAYLOAD(_dg) (void *)((char *)(_dg) +			\
+				      sizeof(struct vmci_datagram))
+#define VMCI_DG_HEADERSIZE sizeof(struct vmci_datagram)
+#define VMCI_DG_SIZE(_dg) (VMCI_DG_HEADERSIZE + (size_t)(_dg)->payload_size)
+#define VMCI_DG_SIZE_ALIGNED(_dg) ((VMCI_DG_SIZE(_dg) + 7) & (~((size_t) 0x7)))
+#define VMCI_MAX_DATAGRAM_QUEUE_SIZE (VMCI_MAX_DG_SIZE * 2)
+
+struct vmci_event_payload_qp {
+	struct vmci_handle handle;  /* queue_pair handle. */
+	u32 peer_id;		    /* Context id of attaching/detaching VM. */
+	u32 _pad;
+};
+
+/* Flags for VMCI queue_pair API. */
+enum {
+	/* Fail alloc if QP not created by peer. */
+	VMCI_QPFLAG_ATTACH_ONLY = 1 << 0,
+
+	/* Only allow attaches from local context. */
+	VMCI_QPFLAG_LOCAL = 1 << 1,
+
+	/* Host won't block when guest is quiesced. */
+	VMCI_QPFLAG_NONBLOCK = 1 << 2,
+
+	/* Pin data pages in ESX.  Used with NONBLOCK */
+	VMCI_QPFLAG_PINNED = 1 << 3,
+
+	/* Update the following flag when adding new flags. */
+	VMCI_QP_ALL_FLAGS = (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QPFLAG_LOCAL |
+			     VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED),
+
+	/* Convenience flags */
+	VMCI_QP_ASYMM = (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED),
+	VMCI_QP_ASYMM_PEER = (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QP_ASYMM),
+};
+
+/*
+ * We allow at least 1024 more event datagrams from the hypervisor past the
+ * normally allowed datagrams pending for a given context.  We define this
+ * limit on event datagrams from the hypervisor to guard against DoS attack
+ * from a malicious VM which could repeatedly attach to and detach from a queue
+ * pair, causing events to be queued at the destination VM.  However, the rate
+ * at which such events can be generated is small since it requires a VM exit
+ * and handling of queue pair attach/detach call at the hypervisor.  Event
+ * datagrams may be queued up at the destination VM if it has interrupts
+ * disabled or if it is not draining events for some other reason.  1024
+ * datagrams is a grossly conservative estimate of the time for which
+ * interrupts may be disabled in the destination VM, but at the same time does
+ * not exacerbate the memory pressure problem on the host by much (size of each
+ * event datagram is small).
+ */
+#define VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE				\
+	(VMCI_MAX_DATAGRAM_QUEUE_SIZE +					\
+	 1024 * (sizeof(struct vmci_datagram) +				\
+		 sizeof(struct vmci_event_data_max)))
+
+/*
+ * Struct used for querying, via VMCI_RESOURCES_QUERY, the availability of
+ * hypervisor resources.  Struct size is 16 bytes. All fields in struct are
+ * aligned to their natural alignment.
+ */
+struct vmci_resource_query_hdr {
+	struct vmci_datagram hdr;
+	u32 num_resources;
+	u32 _padding;
+};
+
+/*
+ * Convenience struct for negotiating vectors. Must match layout of
+ * VMCIResourceQueryHdr minus the struct vmci_datagram header.
+ */
+struct vmci_resource_query_msg {
+	u32 num_resources;
+	u32 _padding;
+	u32 resources[1];
+};
+
+/*
+ * The maximum number of resources that can be queried using
+ * VMCI_RESOURCE_QUERY is 31, as the result is encoded in the lower 31
+ * bits of a positive return value. Negative values are reserved for
+ * errors.
+ */
+#define VMCI_RESOURCE_QUERY_MAX_NUM 31
+
+/* Maximum size for the VMCI_RESOURCE_QUERY request. */
+#define VMCI_RESOURCE_QUERY_MAX_SIZE				\
+	(sizeof(struct vmci_resource_query_hdr) +		\
+	 sizeof(u32) * VMCI_RESOURCE_QUERY_MAX_NUM)
+
+/*
+ * Struct used for setting the notification bitmap.  All fields in
+ * struct are aligned to their natural alignment.
+ */
+struct vmci_notify_bm_set_msg {
+	struct vmci_datagram hdr;
+	u32 bitmap_ppn;
+	u32 _pad;
+};
+
+/*
+ * Struct used for linking a doorbell handle with an index in the
+ * notify bitmap. All fields in struct are aligned to their natural
+ * alignment.
+ */
+struct vmci_doorbell_link_msg {
+	struct vmci_datagram hdr;
+	struct vmci_handle handle;
+	u64 notify_idx;
+};
+
+/*
+ * Struct used for unlinking a doorbell handle from an index in the
+ * notify bitmap. All fields in struct are aligned to their natural
+ * alignment.
+ */
+struct vmci_doorbell_unlink_msg {
+	struct vmci_datagram hdr;
+	struct vmci_handle handle;
+};
+
+/*
+ * Struct used for generating a notification on a doorbell handle. All
+ * fields in struct are aligned to their natural alignment.
+ */
+struct vmci_doorbell_notify_msg {
+	struct vmci_datagram hdr;
+	struct vmci_handle handle;
+};
+
+/*
+ * This struct is used to contain data for events.  Size of this struct is a
+ * multiple of 8 bytes, and all fields are aligned to their natural alignment.
+ */
+struct vmci_event_data {
+	u32 event;		/* 4 bytes. */
+	u32 _pad;
+	/* Event payload is put here. */
+};
+
+/*
+ * Define the different VMCI_EVENT payload data types here.  All structs must
+ * be a multiple of 8 bytes, and fields must be aligned to their natural
+ * alignment.
+ */
+struct vmci_event_payld_ctx {
+	u32 context_id;	/* 4 bytes. */
+	u32 _pad;
+};
+
+struct vmci_event_payld_qp {
+	struct vmci_handle handle;  /* queue_pair handle. */
+	u32 peer_id;	    /* Context id of attaching/detaching VM. */
+	u32 _pad;
+};
+
+/*
+ * We define the following struct to get the size of the maximum event
+ * data the hypervisor may send to the guest.  If adding a new event
+ * payload type above, add it to the following struct too (inside the
+ * union).
+ */
+struct vmci_event_data_max {
+	struct vmci_event_data event_data;
+	union {
+		struct vmci_event_payld_ctx context_payload;
+		struct vmci_event_payld_qp qp_payload;
+	} ev_data_payload;
+};
+
+/*
+ * Struct used for VMCI_EVENT_SUBSCRIBE/UNSUBSCRIBE and
+ * VMCI_EVENT_HANDLER messages.  Struct size is 32 bytes.  All fields
+ * in struct are aligned to their natural alignment.
+ */
+struct vmci_event_msg {
+	struct vmci_datagram hdr;
+
+	/* Has event type and payload. */
+	struct vmci_event_data event_data;
+
+	/* Payload gets put here. */
+};
+
+/* Event with context payload. */
+struct vmci_event_ctx {
+	struct vmci_event_msg msg;
+	struct vmci_event_payld_ctx payload;
+};
+
+/* Event with QP payload. */
+struct vmci_event_qp {
+	struct vmci_event_msg msg;
+	struct vmci_event_payld_qp payload;
+};
+
+/*
+ * Structs used for queue_pair alloc and detach messages.  We align fields of
+ * these structs to 64bit boundaries.
+ */
+struct vmci_qp_alloc_msg {
+	struct vmci_datagram hdr;
+	struct vmci_handle handle;
+	u32 peer;
+	u32 flags;
+	u64 produce_size;
+	u64 consume_size;
+	u64 num_ppns;
+
+	/* List of PPNs placed here. */
+};
+
+struct vmci_qp_detach_msg {
+	struct vmci_datagram hdr;
+	struct vmci_handle handle;
+};
+
+/* VMCI Doorbell API. */
+#define VMCI_FLAG_DELAYED_CB 0x01
+
+typedef void (*vmci_callback) (void *client_data);
+
+/*
+ * struct vmci_qp - A vmw_vmci queue pair handle.
+ *
+ * This structure is used as a handle to a queue pair created by
+ * VMCI.  It is intentionally left opaque to clients.
+ */
+struct vmci_qp;
+
+/* Callback needed for correctly waiting on events. */
+typedef int (*vmci_datagram_recv_cb) (void *client_data,
+				      struct vmci_datagram *msg);
+
+/* VMCI Event API. */
+typedef void (*vmci_event_cb) (u32 sub_id, const struct vmci_event_data *ed,
+			       void *client_data);
+
+/*
+ * We use the following inline function to access the payload data
+ * associated with an event data.
+ */
+static inline const void *
+vmci_event_data_const_payload(const struct vmci_event_data *ev_data)
+{
+	return (const char *)ev_data + sizeof(*ev_data);
+}
+
+static inline void *vmci_event_data_payload(struct vmci_event_data *ev_data)
+{
+	return (void *)vmci_event_data_const_payload(ev_data);
+}
+
+/*
+ * Helper to add a given offset to a head or tail pointer. Wraps the
+ * value of the pointer around the max size of the queue.
+ */
+static inline void vmci_qp_add_pointer(atomic64_t *var,
+				       size_t add,
+				       u64 size)
+{
+	u64 new_val = atomic64_read(var);
+
+	if (new_val >= size - add)
+		new_val -= size;
+
+	new_val += add;
+
+	atomic64_set(var, new_val);
+}
+
+/*
+ * Helper routine to get the Producer Tail from the supplied queue.
+ */
+static inline u64
+vmci_q_header_producer_tail(const struct vmci_queue_header *q_header)
+{
+	struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
+	return atomic64_read(&qh->producer_tail);
+}
+
+/*
+ * Helper routine to get the Consumer Head from the supplied queue.
+ */
+static inline u64
+vmci_q_header_consumer_head(const struct vmci_queue_header *q_header)
+{
+	struct vmci_queue_header *qh = (struct vmci_queue_header *)q_header;
+	return atomic64_read(&qh->consumer_head);
+}
+
+/*
+ * Helper routine to increment the Producer Tail.  Fundamentally,
+ * vmci_qp_add_pointer() is used to manipulate the tail itself.
+ */
+static inline void
+vmci_q_header_add_producer_tail(struct vmci_queue_header *q_header,
+				size_t add,
+				u64 queue_size)
+{
+	vmci_qp_add_pointer(&q_header->producer_tail, add, queue_size);
+}
+
+/*
+ * Helper routine to increment the Consumer Head.  Fundamentally,
+ * vmci_qp_add_pointer() is used to manipulate the head itself.
+ */
+static inline void
+vmci_q_header_add_consumer_head(struct vmci_queue_header *q_header,
+				size_t add,
+				u64 queue_size)
+{
+	vmci_qp_add_pointer(&q_header->consumer_head, add, queue_size);
+}
+
+/*
+ * Helper routine for getting the head and the tail pointer for a queue.
+ * Both the VMCIQueues are needed to get both the pointers for one queue.
+ */
+static inline void
+vmci_q_header_get_pointers(const struct vmci_queue_header *produce_q_header,
+			   const struct vmci_queue_header *consume_q_header,
+			   u64 *producer_tail,
+			   u64 *consumer_head)
+{
+	if (producer_tail)
+		*producer_tail = vmci_q_header_producer_tail(produce_q_header);
+
+	if (consumer_head)
+		*consumer_head = vmci_q_header_consumer_head(consume_q_header);
+}
+
+static inline void vmci_q_header_init(struct vmci_queue_header *q_header,
+				      const struct vmci_handle handle)
+{
+	q_header->handle = handle;
+	atomic64_set(&q_header->producer_tail, 0);
+	atomic64_set(&q_header->consumer_head, 0);
+}
+
+/*
+ * Finds available free space in a produce queue to enqueue more
+ * data or reports an error if queue pair corruption is detected.
+ */
+static s64
+vmci_q_header_free_space(const struct vmci_queue_header *produce_q_header,
+			 const struct vmci_queue_header *consume_q_header,
+			 const u64 produce_q_size)
+{
+	u64 tail;
+	u64 head;
+	u64 free_space;
+
+	tail = vmci_q_header_producer_tail(produce_q_header);
+	head = vmci_q_header_consumer_head(consume_q_header);
+
+	if (tail >= produce_q_size || head >= produce_q_size)
+		return VMCI_ERROR_INVALID_SIZE;
+
+	/*
+	 * Deduct 1 to avoid tail becoming equal to head which causes
+	 * ambiguity. If head and tail are equal it means that the
+	 * queue is empty.
+	 */
+	if (tail >= head)
+		free_space = produce_q_size - (tail - head) - 1;
+	else
+		free_space = head - tail - 1;
+
+	return free_space;
+}
+
+/*
+ * vmci_q_header_free_space() does all the heavy lifting of
+ * determing the number of free bytes in a Queue.  This routine,
+ * then subtracts that size from the full size of the Queue so
+ * the caller knows how many bytes are ready to be dequeued.
+ * Results:
+ * On success, available data size in bytes (up to MAX_INT64).
+ * On failure, appropriate error code.
+ */
+static inline s64
+vmci_q_header_buf_ready(const struct vmci_queue_header *consume_q_header,
+			const struct vmci_queue_header *produce_q_header,
+			const u64 consume_q_size)
+{
+	s64 free_space;
+
+	free_space = vmci_q_header_free_space(consume_q_header,
+					      produce_q_header, consume_q_size);
+	if (free_space < VMCI_SUCCESS)
+		return free_space;
+
+	return consume_q_size - free_space - 1;
+}
+
+
+#endif /* _VMW_VMCI_DEF_H_ */
-- 
cgit v1.2.3


From fda55eca5a33f33ffcd4192c6b2d75179714a52c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 7 Jan 2013 09:28:21 +0000
Subject: net: introduce skb_transport_header_was_set()

We have skb_mac_header_was_set() helper to tell if mac_header
was set on a skb. We would like the same for transport_header.

__netif_receive_skb() doesn't reset the transport header if already
set by GRO layer.

Note that network stacks usually reset the transport header anyway,
after pulling the network header, so this change only allows
a followup patch to have more precise qdisc pkt_len computation
for GSO packets at ingress side.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 ++++++++++
 net/core/dev.c         |  3 ++-
 net/core/skbuff.c      |  2 ++
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 320e976d5ab8..8b2256e880e0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1492,6 +1492,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb,
 	skb->inner_network_header += offset;
 }
 
+static inline bool skb_transport_header_was_set(const struct sk_buff *skb)
+{
+	return skb->transport_header != ~0U;
+}
+
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
 	return skb->head + skb->transport_header;
@@ -1580,6 +1585,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb,
 	skb->inner_network_header = skb->data + offset;
 }
 
+static inline bool skb_transport_header_was_set(const struct sk_buff *skb)
+{
+	return skb->transport_header != NULL;
+}
+
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
 	return skb->transport_header;
diff --git a/net/core/dev.c b/net/core/dev.c
index a51ccf46e8b7..2e2448201a76 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3352,7 +3352,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	orig_dev = skb->dev;
 
 	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
+	if (!skb_transport_header_was_set(skb))
+		skb_reset_transport_header(skb);
 	skb_reset_mac_len(skb);
 
 	pt_prev = NULL;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b03fc0c6a952..1e1b9ea0296d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -260,6 +260,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->end = skb->tail + size;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
+	skb->transport_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
@@ -328,6 +329,7 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 	skb->end = skb->tail + size;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->mac_header = ~0U;
+	skb->transport_header = ~0U;
 #endif
 
 	/* make sure we initialize shinfo sequentially */
-- 
cgit v1.2.3


From b7394d2429c198b1da3d46ac39192e891029ec0f Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Mon, 7 Jan 2013 20:52:39 +0000
Subject: netpoll: prepare for ipv6

This patch adjusts some struct and functions, to prepare
for supporting IPv6.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c |  12 +-
 include/linux/netpoll.h  |  13 +-
 net/core/netpoll.c       | 402 ++++++++++++++++++++++++++---------------------
 3 files changed, 243 insertions(+), 184 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 6989ebe2bc79..998fa0257a92 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -269,12 +269,14 @@ static ssize_t show_remote_port(struct netconsole_target *nt, char *buf)
 
 static ssize_t show_local_ip(struct netconsole_target *nt, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.local_ip);
+	if (!nt->np.ipv6)
+		return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.local_ip);
 }
 
 static ssize_t show_remote_ip(struct netconsole_target *nt, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.remote_ip);
+	if (!nt->np.ipv6)
+		return snprintf(buf, PAGE_SIZE, "%pI4\n", &nt->np.remote_ip);
 }
 
 static ssize_t show_local_mac(struct netconsole_target *nt, char *buf)
@@ -410,7 +412,8 @@ static ssize_t store_local_ip(struct netconsole_target *nt,
 		return -EINVAL;
 	}
 
-	nt->np.local_ip = in_aton(buf);
+	if (!strnchr(buf, count, ':'))
+		nt->np.local_ip.ip = in_aton(buf);
 
 	return strnlen(buf, count);
 }
@@ -426,7 +429,8 @@ static ssize_t store_remote_ip(struct netconsole_target *nt,
 		return -EINVAL;
 	}
 
-	nt->np.remote_ip = in_aton(buf);
+	if (!strnchr(buf, count, ':'))
+		nt->np.remote_ip.ip = in_aton(buf);
 
 	return strnlen(buf, count);
 }
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 66d5379c305e..f54c3bb6a22b 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -12,13 +12,22 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 
+union inet_addr {
+	__u32		all[4];
+	__be32		ip;
+	__be32		ip6[4];
+	struct in_addr	in;
+	struct in6_addr	in6;
+};
+
 struct netpoll {
 	struct net_device *dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
 
-	__be32 local_ip, remote_ip;
+	union inet_addr local_ip, remote_ip;
+	bool ipv6;
 	u16 local_port, remote_port;
 	u8 remote_mac[ETH_ALEN];
 
@@ -33,7 +42,7 @@ struct netpoll_info {
 	spinlock_t rx_lock;
 	struct list_head rx_np; /* netpolls that registered an rx_hook */
 
-	struct sk_buff_head arp_tx; /* list of arp requests to reply to */
+	struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */
 	struct sk_buff_head txq;
 
 	struct delayed_work tx_work;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d2bda8eb08ec..6bd073688f68 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -55,7 +55,7 @@ static atomic_t trapped;
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
+static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -181,13 +181,13 @@ static void poll_napi(struct net_device *dev)
 	}
 }
 
-static void service_arp_queue(struct netpoll_info *npi)
+static void service_neigh_queue(struct netpoll_info *npi)
 {
 	if (npi) {
 		struct sk_buff *skb;
 
-		while ((skb = skb_dequeue(&npi->arp_tx)))
-			netpoll_arp_reply(skb, npi);
+		while ((skb = skb_dequeue(&npi->neigh_tx)))
+			netpoll_neigh_reply(skb, npi);
 	}
 }
 
@@ -216,14 +216,14 @@ static void netpoll_poll_dev(struct net_device *dev)
 
 			bond_dev = netdev_master_upper_dev_get_rcu(dev);
 			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
-			while ((skb = skb_dequeue(&ni->arp_tx))) {
+			while ((skb = skb_dequeue(&ni->neigh_tx))) {
 				skb->dev = bond_dev;
-				skb_queue_tail(&bond_ni->arp_tx, skb);
+				skb_queue_tail(&bond_ni->neigh_tx, skb);
 			}
 		}
 	}
 
-	service_arp_queue(ni);
+	service_neigh_queue(ni);
 
 	zap_completion_queue();
 }
@@ -386,7 +386,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	static atomic_t ip_ident;
 
 	udp_len = len + sizeof(*udph);
-	ip_len = udp_len + sizeof(*iph);
+	if (!np->ipv6)
+		ip_len = udp_len + sizeof(*iph);
+
 	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
 
 	skb = find_skb(np, total_len + np->dev->needed_tailroom,
@@ -403,34 +405,38 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	udph->source = htons(np->local_port);
 	udph->dest = htons(np->remote_port);
 	udph->len = htons(udp_len);
-	udph->check = 0;
-	udph->check = csum_tcpudp_magic(np->local_ip,
-					np->remote_ip,
-					udp_len, IPPROTO_UDP,
-					csum_partial(udph, udp_len, 0));
-	if (udph->check == 0)
-		udph->check = CSUM_MANGLED_0;
-
-	skb_push(skb, sizeof(*iph));
-	skb_reset_network_header(skb);
-	iph = ip_hdr(skb);
-
-	/* iph->version = 4; iph->ihl = 5; */
-	put_unaligned(0x45, (unsigned char *)iph);
-	iph->tos      = 0;
-	put_unaligned(htons(ip_len), &(iph->tot_len));
-	iph->id       = htons(atomic_inc_return(&ip_ident));
-	iph->frag_off = 0;
-	iph->ttl      = 64;
-	iph->protocol = IPPROTO_UDP;
-	iph->check    = 0;
-	put_unaligned(np->local_ip, &(iph->saddr));
-	put_unaligned(np->remote_ip, &(iph->daddr));
-	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
-
-	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-	skb_reset_mac_header(skb);
-	skb->protocol = eth->h_proto = htons(ETH_P_IP);
+
+	if (!np->ipv6) {
+		udph->check = 0;
+		udph->check = csum_tcpudp_magic(np->local_ip.ip,
+						np->remote_ip.ip,
+						udp_len, IPPROTO_UDP,
+						csum_partial(udph, udp_len, 0));
+		if (udph->check == 0)
+			udph->check = CSUM_MANGLED_0;
+
+		skb_push(skb, sizeof(*iph));
+		skb_reset_network_header(skb);
+		iph = ip_hdr(skb);
+
+		/* iph->version = 4; iph->ihl = 5; */
+		put_unaligned(0x45, (unsigned char *)iph);
+		iph->tos      = 0;
+		put_unaligned(htons(ip_len), &(iph->tot_len));
+		iph->id       = htons(atomic_inc_return(&ip_ident));
+		iph->frag_off = 0;
+		iph->ttl      = 64;
+		iph->protocol = IPPROTO_UDP;
+		iph->check    = 0;
+		put_unaligned(np->local_ip.ip, &(iph->saddr));
+		put_unaligned(np->remote_ip.ip, &(iph->daddr));
+		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+		skb_reset_mac_header(skb);
+		skb->protocol = eth->h_proto = htons(ETH_P_IP);
+	}
+
 	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
 	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
 
@@ -440,7 +446,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 }
 EXPORT_SYMBOL(netpoll_send_udp);
 
-static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
+static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
 	struct arphdr *arp;
 	unsigned char *arp_ptr;
@@ -451,7 +457,7 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 	struct netpoll *np, *tmp;
 	unsigned long flags;
 	int hlen, tlen;
-	int hits = 0;
+	int hits = 0, proto;
 
 	if (list_empty(&npinfo->rx_np))
 		return;
@@ -469,94 +475,97 @@ static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 	if (!hits)
 		return;
 
-	/* No arp on this interface */
-	if (skb->dev->flags & IFF_NOARP)
-		return;
-
-	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
-		return;
+	proto = ntohs(eth_hdr(skb)->h_proto);
+	if (proto == ETH_P_IP) {
+		/* No arp on this interface */
+		if (skb->dev->flags & IFF_NOARP)
+			return;
 
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-	arp = arp_hdr(skb);
+		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
+			return;
 
-	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
-	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
-	    arp->ar_pro != htons(ETH_P_IP) ||
-	    arp->ar_op != htons(ARPOP_REQUEST))
-		return;
+		skb_reset_network_header(skb);
+		skb_reset_transport_header(skb);
+		arp = arp_hdr(skb);
 
-	arp_ptr = (unsigned char *)(arp+1);
-	/* save the location of the src hw addr */
-	sha = arp_ptr;
-	arp_ptr += skb->dev->addr_len;
-	memcpy(&sip, arp_ptr, 4);
-	arp_ptr += 4;
-	/* If we actually cared about dst hw addr,
-	   it would get copied here */
-	arp_ptr += skb->dev->addr_len;
-	memcpy(&tip, arp_ptr, 4);
-
-	/* Should we ignore arp? */
-	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
-		return;
+		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
+		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
+		    arp->ar_pro != htons(ETH_P_IP) ||
+		    arp->ar_op != htons(ARPOP_REQUEST))
+			return;
 
-	size = arp_hdr_len(skb->dev);
+		arp_ptr = (unsigned char *)(arp+1);
+		/* save the location of the src hw addr */
+		sha = arp_ptr;
+		arp_ptr += skb->dev->addr_len;
+		memcpy(&sip, arp_ptr, 4);
+		arp_ptr += 4;
+		/* If we actually cared about dst hw addr,
+		   it would get copied here */
+		arp_ptr += skb->dev->addr_len;
+		memcpy(&tip, arp_ptr, 4);
 
-	spin_lock_irqsave(&npinfo->rx_lock, flags);
-	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
-		if (tip != np->local_ip)
-			continue;
+		/* Should we ignore arp? */
+		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+			return;
 
-		hlen = LL_RESERVED_SPACE(np->dev);
-		tlen = np->dev->needed_tailroom;
-		send_skb = find_skb(np, size + hlen + tlen, hlen);
-		if (!send_skb)
-			continue;
+		size = arp_hdr_len(skb->dev);
 
-		skb_reset_network_header(send_skb);
-		arp = (struct arphdr *) skb_put(send_skb, size);
-		send_skb->dev = skb->dev;
-		send_skb->protocol = htons(ETH_P_ARP);
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (tip != np->local_ip.ip)
+				continue;
+
+			hlen = LL_RESERVED_SPACE(np->dev);
+			tlen = np->dev->needed_tailroom;
+			send_skb = find_skb(np, size + hlen + tlen, hlen);
+			if (!send_skb)
+				continue;
+
+			skb_reset_network_header(send_skb);
+			arp = (struct arphdr *) skb_put(send_skb, size);
+			send_skb->dev = skb->dev;
+			send_skb->protocol = htons(ETH_P_ARP);
+
+			/* Fill the device header for the ARP frame */
+			if (dev_hard_header(send_skb, skb->dev, ptype,
+					    sha, np->dev->dev_addr,
+					    send_skb->len) < 0) {
+				kfree_skb(send_skb);
+				continue;
+			}
 
-		/* Fill the device header for the ARP frame */
-		if (dev_hard_header(send_skb, skb->dev, ptype,
-				    sha, np->dev->dev_addr,
-				    send_skb->len) < 0) {
-			kfree_skb(send_skb);
-			continue;
+			/*
+			 * Fill out the arp protocol part.
+			 *
+			 * we only support ethernet device type,
+			 * which (according to RFC 1390) should
+			 * always equal 1 (Ethernet).
+			 */
+
+			arp->ar_hrd = htons(np->dev->type);
+			arp->ar_pro = htons(ETH_P_IP);
+			arp->ar_hln = np->dev->addr_len;
+			arp->ar_pln = 4;
+			arp->ar_op = htons(type);
+
+			arp_ptr = (unsigned char *)(arp + 1);
+			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+			arp_ptr += np->dev->addr_len;
+			memcpy(arp_ptr, &tip, 4);
+			arp_ptr += 4;
+			memcpy(arp_ptr, sha, np->dev->addr_len);
+			arp_ptr += np->dev->addr_len;
+			memcpy(arp_ptr, &sip, 4);
+
+			netpoll_send_skb(np, send_skb);
+
+			/* If there are several rx_hooks for the same address,
+			   we're fine by sending a single reply */
+			break;
 		}
-
-		/*
-		 * Fill out the arp protocol part.
-		 *
-		 * we only support ethernet device type,
-		 * which (according to RFC 1390) should
-		 * always equal 1 (Ethernet).
-		 */
-
-		arp->ar_hrd = htons(np->dev->type);
-		arp->ar_pro = htons(ETH_P_IP);
-		arp->ar_hln = np->dev->addr_len;
-		arp->ar_pln = 4;
-		arp->ar_op = htons(type);
-
-		arp_ptr = (unsigned char *)(arp + 1);
-		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
-		arp_ptr += np->dev->addr_len;
-		memcpy(arp_ptr, &tip, 4);
-		arp_ptr += 4;
-		memcpy(arp_ptr, sha, np->dev->addr_len);
-		arp_ptr += np->dev->addr_len;
-		memcpy(arp_ptr, &sip, 4);
-
-		netpoll_send_skb(np, send_skb);
-
-		/* If there are several rx_hooks for the same address,
-		   we're fine by sending a single reply */
-		break;
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
-	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
@@ -576,7 +585,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 	/* check if netpoll clients need ARP */
 	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
-		skb_queue_tail(&npinfo->arp_tx, skb);
+		skb_queue_tail(&npinfo->neigh_tx, skb);
 		return 1;
 	}
 
@@ -587,60 +596,61 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 	}
 
 	proto = ntohs(eth_hdr(skb)->h_proto);
-	if (proto != ETH_P_IP)
+	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
 		goto out;
 	if (skb->pkt_type == PACKET_OTHERHOST)
 		goto out;
 	if (skb_shared(skb))
 		goto out;
 
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		goto out;
-	iph = (struct iphdr *)skb->data;
-	if (iph->ihl < 5 || iph->version != 4)
-		goto out;
-	if (!pskb_may_pull(skb, iph->ihl*4))
-		goto out;
-	iph = (struct iphdr *)skb->data;
-	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
-		goto out;
-
-	len = ntohs(iph->tot_len);
-	if (skb->len < len || len < iph->ihl*4)
-		goto out;
-
-	/*
-	 * Our transport medium may have padded the buffer out.
-	 * Now We trim to the true length of the frame.
-	 */
-	if (pskb_trim_rcsum(skb, len))
-		goto out;
+	if (proto == ETH_P_IP) {
+		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+			goto out;
+		iph = (struct iphdr *)skb->data;
+		if (iph->ihl < 5 || iph->version != 4)
+			goto out;
+		if (!pskb_may_pull(skb, iph->ihl*4))
+			goto out;
+		iph = (struct iphdr *)skb->data;
+		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+			goto out;
 
-	iph = (struct iphdr *)skb->data;
-	if (iph->protocol != IPPROTO_UDP)
-		goto out;
+		len = ntohs(iph->tot_len);
+		if (skb->len < len || len < iph->ihl*4)
+			goto out;
 
-	len -= iph->ihl*4;
-	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
-	ulen = ntohs(uh->len);
+		/*
+		 * Our transport medium may have padded the buffer out.
+		 * Now We trim to the true length of the frame.
+		 */
+		if (pskb_trim_rcsum(skb, len))
+			goto out;
 
-	if (ulen != len)
-		goto out;
-	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
-		goto out;
+		iph = (struct iphdr *)skb->data;
+		if (iph->protocol != IPPROTO_UDP)
+			goto out;
 
-	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
-		if (np->local_ip && np->local_ip != iph->daddr)
-			continue;
-		if (np->remote_ip && np->remote_ip != iph->saddr)
-			continue;
-		if (np->local_port && np->local_port != ntohs(uh->dest))
-			continue;
+		len -= iph->ihl*4;
+		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+		ulen = ntohs(uh->len);
 
-		np->rx_hook(np, ntohs(uh->source),
-			       (char *)(uh+1),
-			       ulen - sizeof(struct udphdr));
-		hits++;
+		if (ulen != len)
+			goto out;
+		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
+			goto out;
+		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
+				continue;
+			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
+				continue;
+			if (np->local_port && np->local_port != ntohs(uh->dest))
+				continue;
+
+			np->rx_hook(np, ntohs(uh->source),
+				       (char *)(uh+1),
+				       ulen - sizeof(struct udphdr));
+			hits++;
+		}
 	}
 
 	if (!hits)
@@ -661,17 +671,40 @@ out:
 void netpoll_print_options(struct netpoll *np)
 {
 	np_info(np, "local port %d\n", np->local_port);
-	np_info(np, "local IP %pI4\n", &np->local_ip);
+	if (!np->ipv6)
+		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
 	np_info(np, "interface '%s'\n", np->dev_name);
 	np_info(np, "remote port %d\n", np->remote_port);
-	np_info(np, "remote IP %pI4\n", &np->remote_ip);
+	if (!np->ipv6)
+		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
 	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
 }
 EXPORT_SYMBOL(netpoll_print_options);
 
+static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
+{
+	const char *end;
+
+	if (!strchr(str, ':') &&
+	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
+		if (!*end)
+			return 0;
+	}
+	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
+#if IS_ENABLED(CONFIG_IPV6)
+		if (!*end)
+			return 1;
+#else
+		return -1;
+#endif
+	}
+	return -1;
+}
+
 int netpoll_parse_options(struct netpoll *np, char *opt)
 {
 	char *cur=opt, *delim;
+	int ipv6;
 
 	if (*cur != '@') {
 		if ((delim = strchr(cur, '@')) == NULL)
@@ -687,7 +720,11 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 		if ((delim = strchr(cur, '/')) == NULL)
 			goto parse_failed;
 		*delim = 0;
-		np->local_ip = in_aton(cur);
+		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
+		if (ipv6 < 0)
+			goto parse_failed;
+		else
+			np->ipv6 = (bool)ipv6;
 		cur = delim;
 	}
 	cur++;
@@ -719,7 +756,13 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	if ((delim = strchr(cur, '/')) == NULL)
 		goto parse_failed;
 	*delim = 0;
-	np->remote_ip = in_aton(cur);
+	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
+	if (ipv6 < 0)
+		goto parse_failed;
+	else if (np->ipv6 != (bool)ipv6)
+		goto parse_failed;
+	else
+		np->ipv6 = (bool)ipv6;
 	cur = delim + 1;
 
 	if (*cur != 0) {
@@ -767,7 +810,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->neigh_tx);
 		skb_queue_head_init(&npinfo->txq);
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
@@ -859,21 +902,24 @@ int netpoll_setup(struct netpoll *np)
 		}
 	}
 
-	if (!np->local_ip) {
-		rcu_read_lock();
-		in_dev = __in_dev_get_rcu(ndev);
+	if (!np->local_ip.ip) {
+		if (!np->ipv6) {
+			rcu_read_lock();
+			in_dev = __in_dev_get_rcu(ndev);
 
-		if (!in_dev || !in_dev->ifa_list) {
+
+			if (!in_dev || !in_dev->ifa_list) {
+				rcu_read_unlock();
+				np_err(np, "no IP address for %s, aborting\n",
+				       np->dev_name);
+				err = -EDESTADDRREQ;
+				goto put;
+			}
+
+			np->local_ip.ip = in_dev->ifa_list->ifa_local;
 			rcu_read_unlock();
-			np_err(np, "no IP address for %s, aborting\n",
-			       np->dev_name);
-			err = -EDESTADDRREQ;
-			goto put;
+			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
 		}
-
-		np->local_ip = in_dev->ifa_list->ifa_local;
-		rcu_read_unlock();
-		np_info(np, "local IP %pI4\n", &np->local_ip);
 	}
 
 	/* fill up the skb queue */
@@ -906,7 +952,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
 	struct netpoll_info *npinfo =
 			container_of(rcu_head, struct netpoll_info, rcu);
 
-	skb_queue_purge(&npinfo->arp_tx);
+	skb_queue_purge(&npinfo->neigh_tx);
 	skb_queue_purge(&npinfo->txq);
 
 	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
-- 
cgit v1.2.3


From 8a168ca7074b463d0e19a9e9c15946db1afbddc4 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sat, 29 Dec 2012 02:00:09 +0900
Subject: treewide: Fix typo in various drivers

Correct spelling typo in printk within various drivers.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/bluetooth/ath3k.c                        | 2 +-
 drivers/devfreq/exynos4_bus.c                    | 2 +-
 drivers/firewire/ohci.c                          | 2 +-
 drivers/gpu/drm/i915/i915_debugfs.c              | 2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +-
 drivers/rpmsg/virtio_rpmsg_bus.c                 | 2 +-
 drivers/target/sbp/sbp_target.c                  | 2 +-
 fs/cifs/link.c                                   | 2 +-
 fs/hpfs/inode.c                                  | 2 +-
 fs/ocfs2/cluster/tcp.c                           | 2 +-
 fs/qnx6/inode.c                                  | 2 +-
 include/linux/dma-buf.h                          | 2 +-
 net/caif/cfcnfg.c                                | 2 +-
 scripts/basic/fixdep.c                           | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index fc2de5528dcc..9cd3cb8f888a 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -338,7 +338,7 @@ static int ath3k_load_syscfg(struct usb_device *udev)
 
 	ret = ath3k_get_state(udev, &fw_state);
 	if (ret < 0) {
-		BT_ERR("Can't get state to change to load configration err");
+		BT_ERR("Can't get state to change to load configuration err");
 		return -EBUSY;
 	}
 
diff --git a/drivers/devfreq/exynos4_bus.c b/drivers/devfreq/exynos4_bus.c
index 88ddc77a9bb1..e1ac076c2917 100644
--- a/drivers/devfreq/exynos4_bus.c
+++ b/drivers/devfreq/exynos4_bus.c
@@ -636,7 +636,7 @@ static int exynos4_bus_target(struct device *dev, unsigned long *_freq,
 	if (old_freq == freq)
 		return 0;
 
-	dev_dbg(dev, "targetting %lukHz %luuV\n", freq, opp_get_voltage(opp));
+	dev_dbg(dev, "targeting %lukHz %luuV\n", freq, opp_get_voltage(opp));
 
 	mutex_lock(&data->lock);
 
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 961e4398664b..638eea51376b 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -329,7 +329,7 @@ module_param_named(quirks, param_quirks, int, 0644);
 MODULE_PARM_DESC(quirks, "Chip quirks (default = 0"
 	", nonatomic cycle timer = "	__stringify(QUIRK_CYCLE_TIMER)
 	", reset packet generation = "	__stringify(QUIRK_RESET_PACKET)
-	", AR/selfID endianess = "	__stringify(QUIRK_BE_HEADERS)
+	", AR/selfID endianness = "	__stringify(QUIRK_BE_HEADERS)
 	", no 1394a enhancements = "	__stringify(QUIRK_NO_1394A)
 	", disable MSI = "		__stringify(QUIRK_NO_MSI)
 	", TI SLLZ059 erratum = "	__stringify(QUIRK_TI_SLLZ059)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index dde8b505bf7f..6b39e4861133 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1449,7 +1449,7 @@ static const char *swizzle_string(unsigned swizzle)
 	case I915_BIT_6_SWIZZLE_9_10_17:
 		return "bit9/bit10/bit17";
 	case I915_BIT_6_SWIZZLE_UNKNOWN:
-		return "unkown";
+		return "unknown";
 	}
 
 	return "bug";
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 24ad17ec7fcd..4c3b67c13047 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -982,7 +982,7 @@ static int qlcnic_check_npar_opertional(struct qlcnic_adapter *adapter)
 	}
 	if (!npar_opt_timeo) {
 		dev_err(&adapter->pdev->dev,
-			"Waiting for NPAR state to opertional timeout\n");
+			"Waiting for NPAR state to operational timeout\n");
 		return -EIO;
 	}
 	return 0;
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 1859f71372e2..a3c4c030007d 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -841,7 +841,7 @@ static void rpmsg_recv_done(struct virtqueue *rvq)
 		/* farewell, ept, we don't need you anymore */
 		kref_put(&ept->refcount, __ept_release);
 	} else
-		dev_warn(dev, "msg received with no recepient\n");
+		dev_warn(dev, "msg received with no recipient\n");
 
 	/* publish the real size of the buffer */
 	sg_init_one(&sg, msg, RPMSG_BUF_SIZE);
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 0d6d7c1f025e..be793883413d 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -1718,7 +1718,7 @@ static struct se_node_acl *sbp_alloc_fabric_acl(struct se_portal_group *se_tpg)
 
 	nacl = kzalloc(sizeof(struct sbp_nacl), GFP_KERNEL);
 	if (!nacl) {
-		pr_err("Unable to alocate struct sbp_nacl\n");
+		pr_err("Unable to allocate struct sbp_nacl\n");
 		return NULL;
 	}
 
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 51dc2fb6e854..9f6c4c45d21e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -76,7 +76,7 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
 	}
 	rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
 	if (rc) {
-		cERROR(1, "%s: Could not update iwth link_str", __func__);
+		cERROR(1, "%s: Could not update with link_str", __func__);
 		goto symlink_hash_err;
 	}
 	rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 804a9a842cbc..405ab77db39c 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -147,7 +147,7 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
 	/*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
 		   Some unknown structures like ACL may be in fnode,
 		   we'd better not overwrite them
-		hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
+		hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 structures", i->i_ino);
 	} else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) {
 		__le32 ea;
 		if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1bfe8802cc1e..a1d83c58b296 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -870,7 +870,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
 		/* we've had some trouble with handlers seemingly vanishing. */
 		mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p,
 							  &parent) == NULL,
-			        "couldn't find handler we *just* registerd "
+			        "couldn't find handler we *just* registered "
 				"for type %u key %08x\n", msg_type, key);
 	}
 	write_unlock(&o2net_handler_lock);
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index b6addf560483..57199a52a351 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -285,7 +285,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
 		if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
 			/* we got a big endian fs */
 			QNX6DEBUG((KERN_INFO "qnx6: fs got different"
-					" endianess.\n"));
+					" endianness.\n"));
 			return bh;
 		} else
 			sbi->s_bytesex = BYTESEX_LE;
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index eb48f3816df9..139e673a44f8 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -53,7 +53,7 @@ struct dma_buf_attachment;
  * @begin_cpu_access: [optional] called before cpu access to invalidate cpu
  * 		      caches and allocate backing storage (if not yet done)
  * 		      respectively pin the objet into memory.
- * @end_cpu_access: [optional] called after cpu access to flush cashes.
+ * @end_cpu_access: [optional] called after cpu access to flush caches.
  * @kmap_atomic: maps a page from the buffer into kernel address
  * 		 space, users may not block until the subsequent unmap call.
  * 		 This callback must not sleep.
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index ba9cfd47778a..f1dbddb95a6c 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -402,7 +402,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 
 	phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
 	if (phyinfo == NULL) {
-		pr_err("ERROR: Link Layer Device dissapeared"
+		pr_err("ERROR: Link Layer Device disappeared"
 				"while connecting\n");
 		goto unlock;
 	}
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index cb1f50cf12e3..7f6425e24ce3 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -409,7 +409,7 @@ static void traps(void)
 	int *p = (int *)test;
 
 	if (*p != INT_CONF) {
-		fprintf(stderr, "fixdep: sizeof(int) != 4 or wrong endianess? %#x\n",
+		fprintf(stderr, "fixdep: sizeof(int) != 4 or wrong endianness? %#x\n",
 			*p);
 		exit(2);
 	}
-- 
cgit v1.2.3


From 548bc8e1b38e48653a90f48f636f8d253504f8a2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 9 Jan 2013 08:05:13 -0800
Subject: block: RCU free request_queue

RCU free request_queue so that blkcg_gq->q can be dereferenced under
RCU lock.  This will be used to implement hierarchical stats.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 block/blk-sysfs.c      | 9 ++++++++-
 include/linux/blkdev.h | 2 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 788147797a79..6206a934eb8c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -497,6 +497,13 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 	return res;
 }
 
+static void blk_free_queue_rcu(struct rcu_head *rcu_head)
+{
+	struct request_queue *q = container_of(rcu_head, struct request_queue,
+					       rcu_head);
+	kmem_cache_free(blk_requestq_cachep, q);
+}
+
 /**
  * blk_release_queue: - release a &struct request_queue when it is no longer needed
  * @kobj:    the kobj belonging to the request queue to be released
@@ -538,7 +545,7 @@ static void blk_release_queue(struct kobject *kobj)
 	bdi_destroy(&q->backing_dev_info);
 
 	ida_simple_remove(&blk_queue_ida, q->id);
-	kmem_cache_free(blk_requestq_cachep, q);
+	call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
 static const struct sysfs_ops queue_sysfs_ops = {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f94bc83011ed..406343c43cda 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -19,6 +19,7 @@
 #include <linux/gfp.h>
 #include <linux/bsg.h>
 #include <linux/smp.h>
+#include <linux/rcupdate.h>
 
 #include <asm/scatterlist.h>
 
@@ -437,6 +438,7 @@ struct request_queue {
 	/* Throttle data */
 	struct throtl_data *td;
 #endif
+	struct rcu_head		rcu_head;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
cgit v1.2.3


From 08c097fc3bb283299a6915a6a3795edab85979b1 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.c.dionne@gmail.com>
Date: Wed, 9 Jan 2013 14:16:30 +0000
Subject: cred: Remove tgcred pointer from struct cred

Commit 3a50597de863 ("KEYS: Make the session and process keyrings
per-thread") removed the definition of the thread_group_cred structure,
but left a now unused pointer in struct cred.

Signed-off-by: Marc Dionne <marc.c.dionne@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cred.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index abb2cd50f6b2..04421e825365 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -128,7 +128,6 @@ struct cred {
 	struct key	*process_keyring; /* keyring private to this process */
 	struct key	*thread_keyring; /* keyring private to this thread */
 	struct key	*request_key_auth; /* assumed request_key authority */
-	struct thread_group_cred *tgcred; /* thread-group shared credentials */
 #endif
 #ifdef CONFIG_SECURITY
 	void		*security;	/* subjective LSM security */
-- 
cgit v1.2.3


From dd4544f05469aaaeee891d7dc54d66430344321e Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Tue, 8 Jan 2013 20:06:23 +0000
Subject: bgmac: driver for GBit MAC core on BCMA bus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCMA is a Broadcom specific bus with devices AKA cores. All recent BCMA
based SoCs have gigabit ethernet provided by the GBit MAC core. This
patch adds driver for such a cores registering itself as a netdev. It
has been tested on a BCM4706 and BCM4718 chipsets.

In the kernel tree there is already b44 driver which has some common
things with bgmac, however there are many differences that has led to
the decision or writing a new driver:
1) GBit MAC cores appear on BCMA bus (not SSB as in case of b44)
2) There is 64bit DMA engine which differs from 32bit one
3) There is no CAM (Content Addressable Memory) in GBit MAC
4) We have 4 TX queues on GBit MAC devices (instead of 1)
5) Many registers have different addresses/values
6) RX header flags are also different

The driver in it's state is functional how, however there is of course
place for improvements:
1) Supporting more net_device_ops
2) SUpporting more ethtool_ops
3) Unaligned addressing in DMA
4) Writing separated PHY driver

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/bcma/driver_chipcommon_pmu.c        |    3 +-
 drivers/net/ethernet/broadcom/Kconfig       |    9 +
 drivers/net/ethernet/broadcom/Makefile      |    1 +
 drivers/net/ethernet/broadcom/bgmac.c       | 1422 +++++++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bgmac.h       |  456 +++++++++
 include/linux/bcma/bcma_driver_chipcommon.h |    2 +
 6 files changed, 1892 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/broadcom/bgmac.c
 create mode 100644 drivers/net/ethernet/broadcom/bgmac.h

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon_pmu.c b/drivers/bcma/driver_chipcommon_pmu.c
index c62c788b3289..932b101dee36 100644
--- a/drivers/bcma/driver_chipcommon_pmu.c
+++ b/drivers/bcma/driver_chipcommon_pmu.c
@@ -264,7 +264,7 @@ static u32 bcma_pmu_pll_clock_bcm4706(struct bcma_drv_cc *cc, u32 pll0, u32 m)
 }
 
 /* query bus clock frequency for PMU-enabled chipcommon */
-static u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc)
+u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc)
 {
 	struct bcma_bus *bus = cc->core->bus;
 
@@ -293,6 +293,7 @@ static u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc)
 	}
 	return BCMA_CC_PMU_HT_CLOCK;
 }
+EXPORT_SYMBOL_GPL(bcma_pmu_get_bus_clock);
 
 /* query cpu clock frequency for PMU-enabled chipcommon */
 u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc)
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 3b3bf0dd0f1a..3e69b3f88099 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -130,4 +130,13 @@ config BNX2X_SRIOV
 	  Virtualization support in the 578xx and 57712 products. This
 	  allows for virtual function acceleration in virtual environments.
 
+config BGMAC
+	tristate "BCMA bus GBit core support"
+	depends on BCMA_HOST_SOC && HAS_DMA
+	---help---
+	  This driver supports GBit MAC and BCM4706 GBit MAC cores on BCMA bus.
+	  They can be found on BCM47xx SoCs and provide gigabit ethernet.
+	  In case of using this driver on BCM4706 it's also requires to enable
+	  BCMA_DRIVER_GMAC_CMN to make it work.
+
 endif # NET_VENDOR_BROADCOM
diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile
index b7896051d54e..68efa1a3fb88 100644
--- a/drivers/net/ethernet/broadcom/Makefile
+++ b/drivers/net/ethernet/broadcom/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_CNIC) += cnic.o
 obj-$(CONFIG_BNX2X) += bnx2x/
 obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o
 obj-$(CONFIG_TIGON3) += tg3.o
+obj-$(CONFIG_BGMAC) += bgmac.o
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
new file mode 100644
index 000000000000..9bd33db7fddd
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -0,0 +1,1422 @@
+/*
+ * Driver for (BCM4706)? GBit MAC core on BCMA bus.
+ *
+ * Copyright (C) 2012 Rafał Miłecki <zajec5@gmail.com>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bgmac.h"
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/mii.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <asm/mach-bcm47xx/nvram.h>
+
+static const struct bcma_device_id bgmac_bcma_tbl[] = {
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS),
+	BCMA_CORETABLE_END
+};
+MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl);
+
+static bool bgmac_wait_value(struct bcma_device *core, u16 reg, u32 mask,
+			     u32 value, int timeout)
+{
+	u32 val;
+	int i;
+
+	for (i = 0; i < timeout / 10; i++) {
+		val = bcma_read32(core, reg);
+		if ((val & mask) == value)
+			return true;
+		udelay(10);
+	}
+	pr_err("Timeout waiting for reg 0x%X\n", reg);
+	return false;
+}
+
+/**************************************************
+ * DMA
+ **************************************************/
+
+static void bgmac_dma_tx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
+{
+	u32 val;
+	int i;
+
+	if (!ring->mmio_base)
+		return;
+
+	/* Suspend DMA TX ring first.
+	 * bgmac_wait_value doesn't support waiting for any of few values, so
+	 * implement whole loop here.
+	 */
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL,
+		    BGMAC_DMA_TX_SUSPEND);
+	for (i = 0; i < 10000 / 10; i++) {
+		val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
+		val &= BGMAC_DMA_TX_STAT;
+		if (val == BGMAC_DMA_TX_STAT_DISABLED ||
+		    val == BGMAC_DMA_TX_STAT_IDLEWAIT ||
+		    val == BGMAC_DMA_TX_STAT_STOPPED) {
+			i = 0;
+			break;
+		}
+		udelay(10);
+	}
+	if (i)
+		bgmac_err(bgmac, "Timeout suspending DMA TX ring 0x%X (BGMAC_DMA_TX_STAT: 0x%08X)\n",
+			  ring->mmio_base, val);
+
+	/* Remove SUSPEND bit */
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, 0);
+	if (!bgmac_wait_value(bgmac->core,
+			      ring->mmio_base + BGMAC_DMA_TX_STATUS,
+			      BGMAC_DMA_TX_STAT, BGMAC_DMA_TX_STAT_DISABLED,
+			      10000)) {
+		bgmac_warn(bgmac, "DMA TX ring 0x%X wasn't disabled on time, waiting additional 300us\n",
+			   ring->mmio_base);
+		udelay(300);
+		val = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
+		if ((val & BGMAC_DMA_TX_STAT) != BGMAC_DMA_TX_STAT_DISABLED)
+			bgmac_err(bgmac, "Reset of DMA TX ring 0x%X failed\n",
+				  ring->mmio_base);
+	}
+}
+
+static void bgmac_dma_tx_enable(struct bgmac *bgmac,
+				struct bgmac_dma_ring *ring)
+{
+	u32 ctl;
+
+	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL);
+	ctl |= BGMAC_DMA_TX_ENABLE;
+	ctl |= BGMAC_DMA_TX_PARITY_DISABLE;
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
+}
+
+static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
+				    struct bgmac_dma_ring *ring,
+				    struct sk_buff *skb)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	struct net_device *net_dev = bgmac->net_dev;
+	struct bgmac_dma_desc *dma_desc;
+	struct bgmac_slot_info *slot;
+	u32 ctl0, ctl1;
+	int free_slots;
+
+	if (skb->len > BGMAC_DESC_CTL1_LEN) {
+		bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+		goto err_stop_drop;
+	}
+
+	if (ring->start <= ring->end)
+		free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+	else
+		free_slots = ring->start - ring->end;
+	if (free_slots == 1) {
+		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+		netif_stop_queue(net_dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	slot = &ring->slots[ring->end];
+	slot->skb = skb;
+	slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+		bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
+			  ring->mmio_base);
+		goto err_stop_drop;
+	}
+
+	ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
+	if (ring->end == ring->num_slots - 1)
+		ctl0 |= BGMAC_DESC_CTL0_EOT;
+	ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
+
+	dma_desc = ring->cpu_base;
+	dma_desc += ring->end;
+	dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
+	dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
+	dma_desc->ctl0 = cpu_to_le32(ctl0);
+	dma_desc->ctl1 = cpu_to_le32(ctl1);
+
+	wmb();
+
+	/* Increase ring->end to point empty slot. We tell hardware the first
+	 * slot it should *not* read.
+	 */
+	if (++ring->end >= BGMAC_TX_RING_SLOTS)
+		ring->end = 0;
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+		    ring->end * sizeof(struct bgmac_dma_desc));
+
+	/* Always keep one slot free to allow detecting bugged calls. */
+	if (--free_slots == 1)
+		netif_stop_queue(net_dev);
+
+	return NETDEV_TX_OK;
+
+err_stop_drop:
+	netif_stop_queue(net_dev);
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+/* Free transmitted packets */
+static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	int empty_slot;
+	bool freed = false;
+
+	/* The last slot that hardware didn't consume yet */
+	empty_slot = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_STATUS);
+	empty_slot &= BGMAC_DMA_TX_STATDPTR;
+	empty_slot /= sizeof(struct bgmac_dma_desc);
+
+	while (ring->start != empty_slot) {
+		struct bgmac_slot_info *slot = &ring->slots[ring->start];
+
+		if (slot->skb) {
+			/* Unmap no longer used buffer */
+			dma_unmap_single(dma_dev, slot->dma_addr,
+					 slot->skb->len, DMA_TO_DEVICE);
+			slot->dma_addr = 0;
+
+			/* Free memory! :) */
+			dev_kfree_skb(slot->skb);
+			slot->skb = NULL;
+		} else {
+			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+				  ring->start, ring->end);
+		}
+
+		if (++ring->start >= BGMAC_TX_RING_SLOTS)
+			ring->start = 0;
+		freed = true;
+	}
+
+	if (freed && netif_queue_stopped(bgmac->net_dev))
+		netif_wake_queue(bgmac->net_dev);
+}
+
+static void bgmac_dma_rx_reset(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
+{
+	if (!ring->mmio_base)
+		return;
+
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, 0);
+	if (!bgmac_wait_value(bgmac->core,
+			      ring->mmio_base + BGMAC_DMA_RX_STATUS,
+			      BGMAC_DMA_RX_STAT, BGMAC_DMA_RX_STAT_DISABLED,
+			      10000))
+		bgmac_err(bgmac, "Reset of ring 0x%X RX failed\n",
+			  ring->mmio_base);
+}
+
+static void bgmac_dma_rx_enable(struct bgmac *bgmac,
+				struct bgmac_dma_ring *ring)
+{
+	u32 ctl;
+
+	ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
+	ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
+	ctl |= BGMAC_DMA_RX_ENABLE;
+	ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
+	ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
+	ctl |= BGMAC_RX_FRAME_OFFSET << BGMAC_DMA_RX_FRAME_OFFSET_SHIFT;
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL, ctl);
+}
+
+static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
+				     struct bgmac_slot_info *slot)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	struct bgmac_rx_header *rx;
+
+	/* Alloc skb */
+	slot->skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE);
+	if (!slot->skb) {
+		bgmac_err(bgmac, "Allocation of skb failed!\n");
+		return -ENOMEM;
+	}
+
+	/* Poison - if everything goes fine, hardware will overwrite it */
+	rx = (struct bgmac_rx_header *)slot->skb->data;
+	rx->len = cpu_to_le16(0xdead);
+	rx->flags = cpu_to_le16(0xbeef);
+
+	/* Map skb for the DMA */
+	slot->dma_addr = dma_map_single(dma_dev, slot->skb->data,
+					BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+		bgmac_err(bgmac, "DMA mapping error\n");
+		return -ENOMEM;
+	}
+	if (slot->dma_addr & 0xC0000000)
+		bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
+
+	return 0;
+}
+
+static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
+			     int weight)
+{
+	u32 end_slot;
+	int handled = 0;
+
+	end_slot = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_STATUS);
+	end_slot &= BGMAC_DMA_RX_STATDPTR;
+	end_slot /= sizeof(struct bgmac_dma_desc);
+
+	ring->end = end_slot;
+
+	while (ring->start != ring->end) {
+		struct device *dma_dev = bgmac->core->dma_dev;
+		struct bgmac_slot_info *slot = &ring->slots[ring->start];
+		struct sk_buff *skb = slot->skb;
+		struct sk_buff *new_skb;
+		struct bgmac_rx_header *rx;
+		u16 len, flags;
+
+		/* Unmap buffer to make it accessible to the CPU */
+		dma_sync_single_for_cpu(dma_dev, slot->dma_addr,
+					BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+
+		/* Get info from the header */
+		rx = (struct bgmac_rx_header *)skb->data;
+		len = le16_to_cpu(rx->len);
+		flags = le16_to_cpu(rx->flags);
+
+		/* Check for poison and drop or pass the packet */
+		if (len == 0xdead && flags == 0xbeef) {
+			bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n",
+				  ring->start);
+		} else {
+			new_skb = netdev_alloc_skb(bgmac->net_dev, len);
+			if (new_skb) {
+				skb_put(new_skb, len);
+				skb_copy_from_linear_data_offset(skb, BGMAC_RX_FRAME_OFFSET,
+								 new_skb->data,
+								 len);
+				new_skb->protocol =
+					eth_type_trans(new_skb, bgmac->net_dev);
+				netif_receive_skb(new_skb);
+				handled++;
+			} else {
+				bgmac->net_dev->stats.rx_dropped++;
+				bgmac_err(bgmac, "Allocation of skb for copying packet failed!\n");
+			}
+
+			/* Poison the old skb */
+			rx->len = cpu_to_le16(0xdead);
+			rx->flags = cpu_to_le16(0xbeef);
+		}
+
+		/* Make it back accessible to the hardware */
+		dma_sync_single_for_device(dma_dev, slot->dma_addr,
+					   BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+
+		if (++ring->start >= BGMAC_RX_RING_SLOTS)
+			ring->start = 0;
+
+		if (handled >= weight) /* Should never be greater */
+			break;
+	}
+
+	return handled;
+}
+
+/* Does ring support unaligned addressing? */
+static bool bgmac_dma_unaligned(struct bgmac *bgmac,
+				struct bgmac_dma_ring *ring,
+				enum bgmac_dma_ring_type ring_type)
+{
+	switch (ring_type) {
+	case BGMAC_DMA_RING_TX:
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO,
+			    0xff0);
+		if (bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO))
+			return true;
+		break;
+	case BGMAC_DMA_RING_RX:
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO,
+			    0xff0);
+		if (bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO))
+			return true;
+		break;
+	}
+	return false;
+}
+
+static void bgmac_dma_ring_free(struct bgmac *bgmac,
+				struct bgmac_dma_ring *ring)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	struct bgmac_slot_info *slot;
+	int size;
+	int i;
+
+	for (i = 0; i < ring->num_slots; i++) {
+		slot = &ring->slots[i];
+		if (slot->skb) {
+			if (slot->dma_addr)
+				dma_unmap_single(dma_dev, slot->dma_addr,
+						 slot->skb->len, DMA_TO_DEVICE);
+			dev_kfree_skb(slot->skb);
+		}
+	}
+
+	if (ring->cpu_base) {
+		/* Free ring of descriptors */
+		size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+		dma_free_coherent(dma_dev, size, ring->cpu_base,
+				  ring->dma_base);
+	}
+}
+
+static void bgmac_dma_free(struct bgmac *bgmac)
+{
+	int i;
+
+	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
+		bgmac_dma_ring_free(bgmac, &bgmac->tx_ring[i]);
+	for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
+		bgmac_dma_ring_free(bgmac, &bgmac->rx_ring[i]);
+}
+
+static int bgmac_dma_alloc(struct bgmac *bgmac)
+{
+	struct device *dma_dev = bgmac->core->dma_dev;
+	struct bgmac_dma_ring *ring;
+	static const u16 ring_base[] = { BGMAC_DMA_BASE0, BGMAC_DMA_BASE1,
+					 BGMAC_DMA_BASE2, BGMAC_DMA_BASE3, };
+	int size; /* ring size: different for Tx and Rx */
+	int err;
+	int i;
+
+	BUILD_BUG_ON(BGMAC_MAX_TX_RINGS > ARRAY_SIZE(ring_base));
+	BUILD_BUG_ON(BGMAC_MAX_RX_RINGS > ARRAY_SIZE(ring_base));
+
+	if (!(bcma_aread32(bgmac->core, BCMA_IOST) & BCMA_IOST_DMA64)) {
+		bgmac_err(bgmac, "Core does not report 64-bit DMA\n");
+		return -ENOTSUPP;
+	}
+
+	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
+		ring = &bgmac->tx_ring[i];
+		ring->num_slots = BGMAC_TX_RING_SLOTS;
+		ring->mmio_base = ring_base[i];
+		if (bgmac_dma_unaligned(bgmac, ring, BGMAC_DMA_RING_TX))
+			bgmac_warn(bgmac, "TX on ring 0x%X supports unaligned addressing but this feature is not implemented\n",
+				   ring->mmio_base);
+
+		/* Alloc ring of descriptors */
+		size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+		ring->cpu_base = dma_zalloc_coherent(dma_dev, size,
+						     &ring->dma_base,
+						     GFP_KERNEL);
+		if (!ring->cpu_base) {
+			bgmac_err(bgmac, "Allocation of TX ring 0x%X failed\n",
+				  ring->mmio_base);
+			goto err_dma_free;
+		}
+		if (ring->dma_base & 0xC0000000)
+			bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
+
+		/* No need to alloc TX slots yet */
+	}
+
+	for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+		ring = &bgmac->rx_ring[i];
+		ring->num_slots = BGMAC_RX_RING_SLOTS;
+		ring->mmio_base = ring_base[i];
+		if (bgmac_dma_unaligned(bgmac, ring, BGMAC_DMA_RING_RX))
+			bgmac_warn(bgmac, "RX on ring 0x%X supports unaligned addressing but this feature is not implemented\n",
+				   ring->mmio_base);
+
+		/* Alloc ring of descriptors */
+		size = ring->num_slots * sizeof(struct bgmac_dma_desc);
+		ring->cpu_base = dma_zalloc_coherent(dma_dev, size,
+						     &ring->dma_base,
+						     GFP_KERNEL);
+		if (!ring->cpu_base) {
+			bgmac_err(bgmac, "Allocation of RX ring 0x%X failed\n",
+				  ring->mmio_base);
+			err = -ENOMEM;
+			goto err_dma_free;
+		}
+		if (ring->dma_base & 0xC0000000)
+			bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
+
+		/* Alloc RX slots */
+		for (i = 0; i < ring->num_slots; i++) {
+			err = bgmac_dma_rx_skb_for_slot(bgmac, &ring->slots[i]);
+			if (err) {
+				bgmac_err(bgmac, "Can't allocate skb for slot in RX ring\n");
+				goto err_dma_free;
+			}
+		}
+	}
+
+	return 0;
+
+err_dma_free:
+	bgmac_dma_free(bgmac);
+	return -ENOMEM;
+}
+
+static void bgmac_dma_init(struct bgmac *bgmac)
+{
+	struct bgmac_dma_ring *ring;
+	struct bgmac_dma_desc *dma_desc;
+	u32 ctl0, ctl1;
+	int i;
+
+	for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) {
+		ring = &bgmac->tx_ring[i];
+
+		/* We don't implement unaligned addressing, so enable first */
+		bgmac_dma_tx_enable(bgmac, ring);
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGLO,
+			    lower_32_bits(ring->dma_base));
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_RINGHI,
+			    upper_32_bits(ring->dma_base));
+
+		ring->start = 0;
+		ring->end = 0;	/* Points the slot that should *not* be read */
+	}
+
+	for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+		ring = &bgmac->rx_ring[i];
+
+		/* We don't implement unaligned addressing, so enable first */
+		bgmac_dma_rx_enable(bgmac, ring);
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGLO,
+			    lower_32_bits(ring->dma_base));
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGHI,
+			    upper_32_bits(ring->dma_base));
+
+		for (i = 0, dma_desc = ring->cpu_base; i < ring->num_slots;
+		     i++, dma_desc++) {
+			ctl0 = ctl1 = 0;
+
+			if (i == ring->num_slots - 1)
+				ctl0 |= BGMAC_DESC_CTL0_EOT;
+			ctl1 |= BGMAC_RX_BUF_SIZE & BGMAC_DESC_CTL1_LEN;
+			/* Is there any BGMAC device that requires extension? */
+			/* ctl1 |= (addrext << B43_DMA64_DCTL1_ADDREXT_SHIFT) &
+			 * B43_DMA64_DCTL1_ADDREXT_MASK;
+			 */
+
+			dma_desc->addr_low = cpu_to_le32(lower_32_bits(ring->slots[i].dma_addr));
+			dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[i].dma_addr));
+			dma_desc->ctl0 = cpu_to_le32(ctl0);
+			dma_desc->ctl1 = cpu_to_le32(ctl1);
+		}
+
+		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
+			    ring->num_slots * sizeof(struct bgmac_dma_desc));
+
+		ring->start = 0;
+		ring->end = 0;
+	}
+}
+
+/**************************************************
+ * PHY ops
+ **************************************************/
+
+u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg)
+{
+	struct bcma_device *core;
+	u16 phy_access_addr;
+	u16 phy_ctl_addr;
+	u32 tmp;
+
+	BUILD_BUG_ON(BGMAC_PA_DATA_MASK != BCMA_GMAC_CMN_PA_DATA_MASK);
+	BUILD_BUG_ON(BGMAC_PA_ADDR_MASK != BCMA_GMAC_CMN_PA_ADDR_MASK);
+	BUILD_BUG_ON(BGMAC_PA_ADDR_SHIFT != BCMA_GMAC_CMN_PA_ADDR_SHIFT);
+	BUILD_BUG_ON(BGMAC_PA_REG_MASK != BCMA_GMAC_CMN_PA_REG_MASK);
+	BUILD_BUG_ON(BGMAC_PA_REG_SHIFT != BCMA_GMAC_CMN_PA_REG_SHIFT);
+	BUILD_BUG_ON(BGMAC_PA_WRITE != BCMA_GMAC_CMN_PA_WRITE);
+	BUILD_BUG_ON(BGMAC_PA_START != BCMA_GMAC_CMN_PA_START);
+	BUILD_BUG_ON(BGMAC_PC_EPA_MASK != BCMA_GMAC_CMN_PC_EPA_MASK);
+	BUILD_BUG_ON(BGMAC_PC_MCT_MASK != BCMA_GMAC_CMN_PC_MCT_MASK);
+	BUILD_BUG_ON(BGMAC_PC_MCT_SHIFT != BCMA_GMAC_CMN_PC_MCT_SHIFT);
+	BUILD_BUG_ON(BGMAC_PC_MTE != BCMA_GMAC_CMN_PC_MTE);
+
+	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		core = bgmac->core->bus->drv_gmac_cmn.core;
+		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+	} else {
+		core = bgmac->core;
+		phy_access_addr = BGMAC_PHY_ACCESS;
+		phy_ctl_addr = BGMAC_PHY_CNTL;
+	}
+
+	tmp = bcma_read32(core, phy_ctl_addr);
+	tmp &= ~BGMAC_PC_EPA_MASK;
+	tmp |= phyaddr;
+	bcma_write32(core, phy_ctl_addr, tmp);
+
+	tmp = BGMAC_PA_START;
+	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+	tmp |= reg << BGMAC_PA_REG_SHIFT;
+	bcma_write32(core, phy_access_addr, tmp);
+
+	if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000)) {
+		bgmac_err(bgmac, "Reading PHY %d register 0x%X failed\n",
+			  phyaddr, reg);
+		return 0xffff;
+	}
+
+	return bcma_read32(core, phy_access_addr) & BGMAC_PA_DATA_MASK;
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphywr */
+void bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value)
+{
+	struct bcma_device *core;
+	u16 phy_access_addr;
+	u16 phy_ctl_addr;
+	u32 tmp;
+
+	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT) {
+		core = bgmac->core->bus->drv_gmac_cmn.core;
+		phy_access_addr = BCMA_GMAC_CMN_PHY_ACCESS;
+		phy_ctl_addr = BCMA_GMAC_CMN_PHY_CTL;
+	} else {
+		core = bgmac->core;
+		phy_access_addr = BGMAC_PHY_ACCESS;
+		phy_ctl_addr = BGMAC_PHY_CNTL;
+	}
+
+	tmp = bcma_read32(core, phy_ctl_addr);
+	tmp &= ~BGMAC_PC_EPA_MASK;
+	tmp |= phyaddr;
+	bcma_write32(core, phy_ctl_addr, tmp);
+
+	bgmac_write(bgmac, BGMAC_INT_STATUS, BGMAC_IS_MDIO);
+	if (bgmac_read(bgmac, BGMAC_INT_STATUS) & BGMAC_IS_MDIO)
+		bgmac_warn(bgmac, "Error setting MDIO int\n");
+
+	tmp = BGMAC_PA_START;
+	tmp |= BGMAC_PA_WRITE;
+	tmp |= phyaddr << BGMAC_PA_ADDR_SHIFT;
+	tmp |= reg << BGMAC_PA_REG_SHIFT;
+	tmp |= value;
+	bcma_write32(core, phy_access_addr, tmp);
+
+	if (!bgmac_wait_value(core, phy_access_addr, BGMAC_PA_START, 0, 1000))
+		bgmac_err(bgmac, "Writing to PHY %d register 0x%X failed\n",
+			  phyaddr, reg);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyforce */
+static void bgmac_phy_force(struct bgmac *bgmac)
+{
+	u16 ctl;
+	u16 mask = ~(BGMAC_PHY_CTL_SPEED | BGMAC_PHY_CTL_SPEED_MSB |
+		     BGMAC_PHY_CTL_ANENAB | BGMAC_PHY_CTL_DUPLEX);
+
+	if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
+		return;
+
+	if (bgmac->autoneg)
+		return;
+
+	ctl = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL);
+	ctl &= mask;
+	if (bgmac->full_duplex)
+		ctl |= BGMAC_PHY_CTL_DUPLEX;
+	if (bgmac->speed == BGMAC_SPEED_100)
+		ctl |= BGMAC_PHY_CTL_SPEED_100;
+	else if (bgmac->speed == BGMAC_SPEED_1000)
+		ctl |= BGMAC_PHY_CTL_SPEED_1000;
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL, ctl);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyadvertise */
+static void bgmac_phy_advertise(struct bgmac *bgmac)
+{
+	u16 adv;
+
+	if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
+		return;
+
+	if (!bgmac->autoneg)
+		return;
+
+	/* Adv selected 10/100 speeds */
+	adv = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV);
+	adv &= ~(BGMAC_PHY_ADV_10HALF | BGMAC_PHY_ADV_10FULL |
+		 BGMAC_PHY_ADV_100HALF | BGMAC_PHY_ADV_100FULL);
+	if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_10)
+		adv |= BGMAC_PHY_ADV_10HALF;
+	if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_100)
+		adv |= BGMAC_PHY_ADV_100HALF;
+	if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_10)
+		adv |= BGMAC_PHY_ADV_10FULL;
+	if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_100)
+		adv |= BGMAC_PHY_ADV_100FULL;
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV, adv);
+
+	/* Adv selected 1000 speeds */
+	adv = bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV2);
+	adv &= ~(BGMAC_PHY_ADV2_1000HALF | BGMAC_PHY_ADV2_1000FULL);
+	if (!bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_1000)
+		adv |= BGMAC_PHY_ADV2_1000HALF;
+	if (bgmac->full_duplex && bgmac->speed & BGMAC_SPEED_1000)
+		adv |= BGMAC_PHY_ADV2_1000FULL;
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_ADV2, adv);
+
+	/* Restart */
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL,
+			bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL) |
+			BGMAC_PHY_CTL_RESTART);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyinit */
+static void bgmac_phy_init(struct bgmac *bgmac)
+{
+	struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
+	struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
+	u8 i;
+
+	if (ci->id == BCMA_CHIP_ID_BCM5356) {
+		for (i = 0; i < 5; i++) {
+			bgmac_phy_write(bgmac, i, 0x1f, 0x008b);
+			bgmac_phy_write(bgmac, i, 0x15, 0x0100);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
+			bgmac_phy_write(bgmac, i, 0x12, 0x2aaa);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
+		}
+	}
+	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg != 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg != 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg != 9)) {
+		bcma_chipco_chipctl_maskset(cc, 2, ~0xc0000000, 0);
+		bcma_chipco_chipctl_maskset(cc, 4, ~0x80000000, 0);
+		for (i = 0; i < 5; i++) {
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
+			bgmac_phy_write(bgmac, i, 0x16, 0x5284);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
+			bgmac_phy_write(bgmac, i, 0x17, 0x0010);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000f);
+			bgmac_phy_write(bgmac, i, 0x16, 0x5296);
+			bgmac_phy_write(bgmac, i, 0x17, 0x1073);
+			bgmac_phy_write(bgmac, i, 0x17, 0x9073);
+			bgmac_phy_write(bgmac, i, 0x16, 0x52b6);
+			bgmac_phy_write(bgmac, i, 0x17, 0x9273);
+			bgmac_phy_write(bgmac, i, 0x1f, 0x000b);
+		}
+	}
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipphyreset */
+static void bgmac_phy_reset(struct bgmac *bgmac)
+{
+	if (bgmac->phyaddr == BGMAC_PHY_NOREGS)
+		return;
+
+	bgmac_phy_write(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL,
+			BGMAC_PHY_CTL_RESET);
+	udelay(100);
+	if (bgmac_phy_read(bgmac, bgmac->phyaddr, BGMAC_PHY_CTL) &
+	    BGMAC_PHY_CTL_RESET)
+		bgmac_err(bgmac, "PHY reset failed\n");
+	bgmac_phy_init(bgmac);
+}
+
+/**************************************************
+ * Chip ops
+ **************************************************/
+
+/* TODO: can we just drop @force? Can we don't reset MAC at all if there is
+ * nothing to change? Try if after stabilizng driver.
+ */
+static void bgmac_cmdcfg_maskset(struct bgmac *bgmac, u32 mask, u32 set,
+				 bool force)
+{
+	u32 cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
+	u32 new_val = (cmdcfg & mask) | set;
+
+	bgmac_set(bgmac, BGMAC_CMDCFG, BGMAC_CMDCFG_SR);
+	udelay(2);
+
+	if (new_val != cmdcfg || force)
+		bgmac_write(bgmac, BGMAC_CMDCFG, new_val);
+
+	bgmac_mask(bgmac, BGMAC_CMDCFG, ~BGMAC_CMDCFG_SR);
+	udelay(2);
+}
+
+#if 0 /* We don't use that regs yet */
+static void bgmac_chip_stats_update(struct bgmac *bgmac)
+{
+	int i;
+
+	if (bgmac->core->id.id != BCMA_CORE_4706_MAC_GBIT) {
+		for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++)
+			bgmac->mib_tx_regs[i] =
+				bgmac_read(bgmac,
+					   BGMAC_TX_GOOD_OCTETS + (i * 4));
+		for (i = 0; i < BGMAC_NUM_MIB_RX_REGS; i++)
+			bgmac->mib_rx_regs[i] =
+				bgmac_read(bgmac,
+					   BGMAC_RX_GOOD_OCTETS + (i * 4));
+	}
+
+	/* TODO: what else? how to handle BCM4706? Specs are needed */
+}
+#endif
+
+static void bgmac_clear_mib(struct bgmac *bgmac)
+{
+	int i;
+
+	if (bgmac->core->id.id == BCMA_CORE_4706_MAC_GBIT)
+		return;
+
+	bgmac_set(bgmac, BGMAC_DEV_CTL, BGMAC_DC_MROR);
+	for (i = 0; i < BGMAC_NUM_MIB_TX_REGS; i++)
+		bgmac_read(bgmac, BGMAC_TX_GOOD_OCTETS + (i * 4));
+	for (i = 0; i < BGMAC_NUM_MIB_RX_REGS; i++)
+		bgmac_read(bgmac, BGMAC_RX_GOOD_OCTETS + (i * 4));
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_speed */
+static void bgmac_speed(struct bgmac *bgmac, int speed)
+{
+	u32 mask = ~(BGMAC_CMDCFG_ES_MASK | BGMAC_CMDCFG_HD);
+	u32 set = 0;
+
+	if (speed & BGMAC_SPEED_10)
+		set |= BGMAC_CMDCFG_ES_10;
+	if (speed & BGMAC_SPEED_100)
+		set |= BGMAC_CMDCFG_ES_100;
+	if (speed & BGMAC_SPEED_1000)
+		set |= BGMAC_CMDCFG_ES_1000;
+	if (!bgmac->full_duplex)
+		set |= BGMAC_CMDCFG_HD;
+	bgmac_cmdcfg_maskset(bgmac, mask, set, true);
+}
+
+static void bgmac_miiconfig(struct bgmac *bgmac)
+{
+	u8 imode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
+			BGMAC_DS_MM_SHIFT;
+	if (imode == 0 || imode == 1) {
+		if (bgmac->autoneg)
+			bgmac_speed(bgmac, BGMAC_SPEED_100);
+		else
+			bgmac_speed(bgmac, bgmac->speed);
+	}
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipreset */
+static void bgmac_chip_reset(struct bgmac *bgmac)
+{
+	struct bcma_device *core = bgmac->core;
+	struct bcma_bus *bus = core->bus;
+	struct bcma_chipinfo *ci = &bus->chipinfo;
+	u32 flags = 0;
+	u32 iost;
+	int i;
+
+	if (bcma_core_is_enabled(core)) {
+		if (!bgmac->stats_grabbed) {
+			/* bgmac_chip_stats_update(bgmac); */
+			bgmac->stats_grabbed = true;
+		}
+
+		for (i = 0; i < BGMAC_MAX_TX_RINGS; i++)
+			bgmac_dma_tx_reset(bgmac, &bgmac->tx_ring[i]);
+
+		bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_ML, false);
+		udelay(1);
+
+		for (i = 0; i < BGMAC_MAX_RX_RINGS; i++)
+			bgmac_dma_rx_reset(bgmac, &bgmac->rx_ring[i]);
+
+		/* TODO: Clear software multicast filter list */
+	}
+
+	iost = bcma_aread32(core, BCMA_IOST);
+	if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM4749 && ci->pkg == 10) ||
+	    (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == 9))
+		iost &= ~BGMAC_BCMA_IOST_ATTACHED;
+
+	if (iost & BGMAC_BCMA_IOST_ATTACHED) {
+		flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
+		if (!bgmac->has_robosw)
+			flags |= BGMAC_BCMA_IOCTL_SW_RESET;
+	}
+
+	bcma_core_enable(core, flags);
+
+	if (core->id.rev > 2) {
+		bgmac_set(bgmac, BCMA_CLKCTLST, 1 << 8);
+		bgmac_wait_value(bgmac->core, BCMA_CLKCTLST, 1 << 24, 1 << 24,
+				 1000);
+	}
+
+	if (ci->id == BCMA_CHIP_ID_BCM5357 || ci->id == BCMA_CHIP_ID_BCM4749 ||
+	    ci->id == BCMA_CHIP_ID_BCM53572) {
+		struct bcma_drv_cc *cc = &bgmac->core->bus->drv_cc;
+		u8 et_swtype = 0;
+		u8 sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHY |
+			     BGMAC_CHIPCTL_1_IF_TYPE_RMII;
+		char buf[2];
+
+		if (nvram_getenv("et_swtype", buf, 1) > 0) {
+			if (kstrtou8(buf, 0, &et_swtype))
+				bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n",
+					  buf);
+			et_swtype &= 0x0f;
+			et_swtype <<= 4;
+			sw_type = et_swtype;
+		} else if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == 9) {
+			sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
+		} else if (0) {
+			/* TODO */
+		}
+		bcma_chipco_chipctl_maskset(cc, 1,
+					    ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
+					      BGMAC_CHIPCTL_1_SW_TYPE_MASK),
+					    sw_type);
+	}
+
+	if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
+		bcma_awrite32(core, BCMA_IOCTL,
+			      bcma_aread32(core, BCMA_IOCTL) &
+			      ~BGMAC_BCMA_IOCTL_SW_RESET);
+
+	/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_reset
+	 * Specs don't say about using BGMAC_CMDCFG_SR, but in this routine
+	 * BGMAC_CMDCFG is read _after_ putting chip in a reset. So it has to
+	 * be keps until taking MAC out of the reset.
+	 */
+	bgmac_cmdcfg_maskset(bgmac,
+			     ~(BGMAC_CMDCFG_TE |
+			       BGMAC_CMDCFG_RE |
+			       BGMAC_CMDCFG_RPI |
+			       BGMAC_CMDCFG_TAI |
+			       BGMAC_CMDCFG_HD |
+			       BGMAC_CMDCFG_ML |
+			       BGMAC_CMDCFG_CFE |
+			       BGMAC_CMDCFG_RL |
+			       BGMAC_CMDCFG_RED |
+			       BGMAC_CMDCFG_PE |
+			       BGMAC_CMDCFG_TPI |
+			       BGMAC_CMDCFG_PAD_EN |
+			       BGMAC_CMDCFG_PF),
+			     BGMAC_CMDCFG_PROM |
+			     BGMAC_CMDCFG_NLC |
+			     BGMAC_CMDCFG_CFE |
+			     BGMAC_CMDCFG_SR,
+			     false);
+
+	bgmac_clear_mib(bgmac);
+	if (core->id.id == BCMA_CORE_4706_MAC_GBIT)
+		bcma_maskset32(bgmac->cmn, BCMA_GMAC_CMN_PHY_CTL, ~0,
+			       BCMA_GMAC_CMN_PC_MTE);
+	else
+		bgmac_set(bgmac, BGMAC_PHY_CNTL, BGMAC_PC_MTE);
+	bgmac_miiconfig(bgmac);
+	bgmac_phy_init(bgmac);
+
+	bgmac->int_status = 0;
+}
+
+static void bgmac_chip_intrs_on(struct bgmac *bgmac)
+{
+	bgmac_write(bgmac, BGMAC_INT_MASK, bgmac->int_mask);
+}
+
+static void bgmac_chip_intrs_off(struct bgmac *bgmac)
+{
+	bgmac_write(bgmac, BGMAC_INT_MASK, 0);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/gmac_enable */
+static void bgmac_enable(struct bgmac *bgmac)
+{
+	struct bcma_chipinfo *ci = &bgmac->core->bus->chipinfo;
+	u32 cmdcfg;
+	u32 mode;
+	u32 rxq_ctl;
+	u32 fl_ctl;
+	u16 bp_clk;
+	u8 mdp;
+
+	cmdcfg = bgmac_read(bgmac, BGMAC_CMDCFG);
+	bgmac_cmdcfg_maskset(bgmac, ~(BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE),
+			     BGMAC_CMDCFG_SR, true);
+	udelay(2);
+	cmdcfg |= BGMAC_CMDCFG_TE | BGMAC_CMDCFG_RE;
+	bgmac_write(bgmac, BGMAC_CMDCFG, cmdcfg);
+
+	mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
+		BGMAC_DS_MM_SHIFT;
+	if (ci->id != BCMA_CHIP_ID_BCM47162 || mode != 0)
+		bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
+	if (ci->id == BCMA_CHIP_ID_BCM47162 && mode == 2)
+		bcma_chipco_chipctl_maskset(&bgmac->core->bus->drv_cc, 1, ~0,
+					    BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
+
+	switch (ci->id) {
+	case BCMA_CHIP_ID_BCM5357:
+	case BCMA_CHIP_ID_BCM4749:
+	case BCMA_CHIP_ID_BCM53572:
+	case BCMA_CHIP_ID_BCM4716:
+	case BCMA_CHIP_ID_BCM47162:
+		fl_ctl = 0x03cb04cb;
+		if (ci->id == BCMA_CHIP_ID_BCM5357 ||
+		    ci->id == BCMA_CHIP_ID_BCM4749 ||
+		    ci->id == BCMA_CHIP_ID_BCM53572)
+			fl_ctl = 0x2300e1;
+		bgmac_write(bgmac, BGMAC_FLOW_CTL_THRESH, fl_ctl);
+		bgmac_write(bgmac, BGMAC_PAUSE_CTL, 0x27fff);
+		break;
+	}
+
+	rxq_ctl = bgmac_read(bgmac, BGMAC_RXQ_CTL);
+	rxq_ctl &= ~BGMAC_RXQ_CTL_MDP_MASK;
+	bp_clk = bcma_pmu_get_bus_clock(&bgmac->core->bus->drv_cc) / 1000000;
+	mdp = (bp_clk * 128 / 1000) - 3;
+	rxq_ctl |= (mdp << BGMAC_RXQ_CTL_MDP_SHIFT);
+	bgmac_write(bgmac, BGMAC_RXQ_CTL, rxq_ctl);
+}
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */
+static void bgmac_chip_init(struct bgmac *bgmac, bool full_init)
+{
+	struct bgmac_dma_ring *ring;
+	u8 *mac = bgmac->net_dev->dev_addr;
+	u32 tmp;
+	int i;
+
+	/* 1 interrupt per received frame */
+	bgmac_write(bgmac, BGMAC_INT_RECV_LAZY, 1 << BGMAC_IRL_FC_SHIFT);
+
+	/* Enable 802.3x tx flow control (honor received PAUSE frames) */
+	bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_RPI, 0, true);
+
+	if (bgmac->net_dev->flags & IFF_PROMISC)
+		bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_PROM, false);
+	else
+		bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_PROM, 0, false);
+
+	/* Set MAC addr */
+	tmp = (mac[0] << 24) | (mac[1] << 16) | (mac[2] << 8) | mac[3];
+	bgmac_write(bgmac, BGMAC_MACADDR_HIGH, tmp);
+	tmp = (mac[4] << 8) | mac[5];
+	bgmac_write(bgmac, BGMAC_MACADDR_LOW, tmp);
+
+	if (bgmac->loopback)
+		bgmac_cmdcfg_maskset(bgmac, ~0, BGMAC_CMDCFG_ML, true);
+	else
+		bgmac_cmdcfg_maskset(bgmac, ~BGMAC_CMDCFG_ML, 0, true);
+
+	bgmac_write(bgmac, BGMAC_RXMAX_LENGTH, 32 + ETHER_MAX_LEN);
+
+	if (!bgmac->autoneg) {
+		bgmac_speed(bgmac, bgmac->speed);
+		bgmac_phy_force(bgmac);
+	} else if (bgmac->speed) { /* if there is anything to adv */
+		bgmac_phy_advertise(bgmac);
+	}
+
+	if (full_init) {
+		bgmac_dma_init(bgmac);
+		if (1) /* FIXME: is there any case we don't want IRQs? */
+			bgmac_chip_intrs_on(bgmac);
+	} else {
+		for (i = 0; i < BGMAC_MAX_RX_RINGS; i++) {
+			ring = &bgmac->rx_ring[i];
+			bgmac_dma_rx_enable(bgmac, ring);
+		}
+	}
+
+	bgmac_enable(bgmac);
+}
+
+static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
+{
+	struct bgmac *bgmac = netdev_priv(dev_id);
+
+	u32 int_status = bgmac_read(bgmac, BGMAC_INT_STATUS);
+	int_status &= bgmac->int_mask;
+
+	if (!int_status)
+		return IRQ_NONE;
+
+	/* Ack */
+	bgmac_write(bgmac, BGMAC_INT_STATUS, int_status);
+
+	/* Disable new interrupts until handling existing ones */
+	bgmac_chip_intrs_off(bgmac);
+
+	bgmac->int_status = int_status;
+
+	napi_schedule(&bgmac->napi);
+
+	return IRQ_HANDLED;
+}
+
+static int bgmac_poll(struct napi_struct *napi, int weight)
+{
+	struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
+	struct bgmac_dma_ring *ring;
+	int handled = 0;
+
+	if (bgmac->int_status & BGMAC_IS_TX0) {
+		ring = &bgmac->tx_ring[0];
+		bgmac_dma_tx_free(bgmac, ring);
+		bgmac->int_status &= ~BGMAC_IS_TX0;
+	}
+
+	if (bgmac->int_status & BGMAC_IS_RX) {
+		ring = &bgmac->rx_ring[0];
+		handled += bgmac_dma_rx_read(bgmac, ring, weight);
+		bgmac->int_status &= ~BGMAC_IS_RX;
+	}
+
+	if (bgmac->int_status) {
+		bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status);
+		bgmac->int_status = 0;
+	}
+
+	if (handled < weight)
+		napi_complete(napi);
+
+	bgmac_chip_intrs_on(bgmac);
+
+	return handled;
+}
+
+/**************************************************
+ * net_device_ops
+ **************************************************/
+
+static int bgmac_open(struct net_device *net_dev)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+	int err = 0;
+
+	bgmac_chip_reset(bgmac);
+	/* Specs say about reclaiming rings here, but we do that in DMA init */
+	bgmac_chip_init(bgmac, true);
+
+	err = request_irq(bgmac->core->irq, bgmac_interrupt, IRQF_SHARED,
+			  KBUILD_MODNAME, net_dev);
+	if (err < 0) {
+		bgmac_err(bgmac, "IRQ request error: %d!\n", err);
+		goto err_out;
+	}
+	napi_enable(&bgmac->napi);
+
+	netif_carrier_on(net_dev);
+
+err_out:
+	return err;
+}
+
+static int bgmac_stop(struct net_device *net_dev)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+
+	netif_carrier_off(net_dev);
+
+	napi_disable(&bgmac->napi);
+	bgmac_chip_intrs_off(bgmac);
+	free_irq(bgmac->core->irq, net_dev);
+
+	bgmac_chip_reset(bgmac);
+
+	return 0;
+}
+
+static netdev_tx_t bgmac_start_xmit(struct sk_buff *skb,
+				    struct net_device *net_dev)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+	struct bgmac_dma_ring *ring;
+
+	/* No QOS support yet */
+	ring = &bgmac->tx_ring[0];
+	return bgmac_dma_tx_add(bgmac, ring, skb);
+}
+
+static int bgmac_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+	struct mii_ioctl_data *data = if_mii(ifr);
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		data->phy_id = bgmac->phyaddr;
+		/* fallthru */
+	case SIOCGMIIREG:
+		if (!netif_running(net_dev))
+			return -EAGAIN;
+		data->val_out = bgmac_phy_read(bgmac, data->phy_id,
+					       data->reg_num & 0x1f);
+		return 0;
+	case SIOCSMIIREG:
+		if (!netif_running(net_dev))
+			return -EAGAIN;
+		bgmac_phy_write(bgmac, data->phy_id, data->reg_num & 0x1f,
+				data->val_in);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct net_device_ops bgmac_netdev_ops = {
+	.ndo_open		= bgmac_open,
+	.ndo_stop		= bgmac_stop,
+	.ndo_start_xmit		= bgmac_start_xmit,
+	.ndo_set_mac_address	= eth_mac_addr, /* generic, sets dev_addr */
+	.ndo_do_ioctl           = bgmac_ioctl,
+};
+
+/**************************************************
+ * ethtool_ops
+ **************************************************/
+
+static int bgmac_get_settings(struct net_device *net_dev,
+			      struct ethtool_cmd *cmd)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+
+	cmd->supported = SUPPORTED_10baseT_Half |
+			 SUPPORTED_10baseT_Full |
+			 SUPPORTED_100baseT_Half |
+			 SUPPORTED_100baseT_Full |
+			 SUPPORTED_1000baseT_Half |
+			 SUPPORTED_1000baseT_Full |
+			 SUPPORTED_Autoneg;
+
+	if (bgmac->autoneg) {
+		WARN_ON(cmd->advertising);
+		if (bgmac->full_duplex) {
+			if (bgmac->speed & BGMAC_SPEED_10)
+				cmd->advertising |= ADVERTISED_10baseT_Full;
+			if (bgmac->speed & BGMAC_SPEED_100)
+				cmd->advertising |= ADVERTISED_100baseT_Full;
+			if (bgmac->speed & BGMAC_SPEED_1000)
+				cmd->advertising |= ADVERTISED_1000baseT_Full;
+		} else {
+			if (bgmac->speed & BGMAC_SPEED_10)
+				cmd->advertising |= ADVERTISED_10baseT_Half;
+			if (bgmac->speed & BGMAC_SPEED_100)
+				cmd->advertising |= ADVERTISED_100baseT_Half;
+			if (bgmac->speed & BGMAC_SPEED_1000)
+				cmd->advertising |= ADVERTISED_1000baseT_Half;
+		}
+	} else {
+		switch (bgmac->speed) {
+		case BGMAC_SPEED_10:
+			ethtool_cmd_speed_set(cmd, SPEED_10);
+			break;
+		case BGMAC_SPEED_100:
+			ethtool_cmd_speed_set(cmd, SPEED_100);
+			break;
+		case BGMAC_SPEED_1000:
+			ethtool_cmd_speed_set(cmd, SPEED_1000);
+			break;
+		}
+	}
+
+	cmd->duplex = bgmac->full_duplex ? DUPLEX_FULL : DUPLEX_HALF;
+
+	cmd->autoneg = bgmac->autoneg;
+
+	return 0;
+}
+
+#if 0
+static int bgmac_set_settings(struct net_device *net_dev,
+			      struct ethtool_cmd *cmd)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+
+	return -1;
+}
+#endif
+
+static void bgmac_get_drvinfo(struct net_device *net_dev,
+			      struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->bus_info, "BCMA", sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops bgmac_ethtool_ops = {
+	.get_settings		= bgmac_get_settings,
+	.get_drvinfo		= bgmac_get_drvinfo,
+};
+
+/**************************************************
+ * BCMA bus ops
+ **************************************************/
+
+/* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
+static int bgmac_probe(struct bcma_device *core)
+{
+	struct net_device *net_dev;
+	struct bgmac *bgmac;
+	struct ssb_sprom *sprom = &core->bus->sprom;
+	u8 *mac = core->core_unit ? sprom->et1mac : sprom->et0mac;
+	int err;
+
+	/* We don't support 2nd, 3rd, ... units, SPROM has to be adjusted */
+	if (core->core_unit > 1) {
+		pr_err("Unsupported core_unit %d\n", core->core_unit);
+		return -ENOTSUPP;
+	}
+
+	/* Allocation and references */
+	net_dev = alloc_etherdev(sizeof(*bgmac));
+	if (!net_dev)
+		return -ENOMEM;
+	net_dev->netdev_ops = &bgmac_netdev_ops;
+	net_dev->irq = core->irq;
+	SET_ETHTOOL_OPS(net_dev, &bgmac_ethtool_ops);
+	bgmac = netdev_priv(net_dev);
+	bgmac->net_dev = net_dev;
+	bgmac->core = core;
+	bcma_set_drvdata(core, bgmac);
+
+	/* Defaults */
+	bgmac->autoneg = true;
+	bgmac->full_duplex = true;
+	bgmac->speed = BGMAC_SPEED_10 | BGMAC_SPEED_100 | BGMAC_SPEED_1000;
+	memcpy(bgmac->net_dev->dev_addr, mac, ETH_ALEN);
+
+	/* On BCM4706 we need common core to access PHY */
+	if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
+	    !core->bus->drv_gmac_cmn.core) {
+		bgmac_err(bgmac, "GMAC CMN core not found (required for BCM4706)\n");
+		err = -ENODEV;
+		goto err_netdev_free;
+	}
+	bgmac->cmn = core->bus->drv_gmac_cmn.core;
+
+	bgmac->phyaddr = core->core_unit ? sprom->et1phyaddr :
+			 sprom->et0phyaddr;
+	bgmac->phyaddr &= BGMAC_PHY_MASK;
+	if (bgmac->phyaddr == BGMAC_PHY_MASK) {
+		bgmac_err(bgmac, "No PHY found\n");
+		err = -ENODEV;
+		goto err_netdev_free;
+	}
+	bgmac_info(bgmac, "Found PHY addr: %d%s\n", bgmac->phyaddr,
+		   bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
+
+	if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
+		bgmac_err(bgmac, "PCI setup not implemented\n");
+		err = -ENOTSUPP;
+		goto err_netdev_free;
+	}
+
+	bgmac_chip_reset(bgmac);
+
+	err = bgmac_dma_alloc(bgmac);
+	if (err) {
+		bgmac_err(bgmac, "Unable to alloc memory for DMA\n");
+		goto err_netdev_free;
+	}
+
+	bgmac->int_mask = BGMAC_IS_ERRMASK | BGMAC_IS_RX | BGMAC_IS_TX_MASK;
+	if (nvram_getenv("et0_no_txint", NULL, 0) == 0)
+		bgmac->int_mask &= ~BGMAC_IS_TX_MASK;
+
+	/* TODO: reset the external phy. Specs are needed */
+	bgmac_phy_reset(bgmac);
+
+	bgmac->has_robosw = !!(core->bus->sprom.boardflags_lo &
+			       BGMAC_BFL_ENETROBO);
+	if (bgmac->has_robosw)
+		bgmac_warn(bgmac, "Support for Roboswitch not implemented\n");
+
+	if (core->bus->sprom.boardflags_lo & BGMAC_BFL_ENETADM)
+		bgmac_warn(bgmac, "Support for ADMtek ethernet switch not implemented\n");
+
+	err = register_netdev(bgmac->net_dev);
+	if (err) {
+		bgmac_err(bgmac, "Cannot register net device\n");
+		err = -ENOTSUPP;
+		goto err_dma_free;
+	}
+
+	netif_carrier_off(net_dev);
+
+	netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
+
+	return 0;
+
+err_dma_free:
+	bgmac_dma_free(bgmac);
+
+err_netdev_free:
+	bcma_set_drvdata(core, NULL);
+	free_netdev(net_dev);
+
+	return err;
+}
+
+static void bgmac_remove(struct bcma_device *core)
+{
+	struct bgmac *bgmac = bcma_get_drvdata(core);
+
+	netif_napi_del(&bgmac->napi);
+	unregister_netdev(bgmac->net_dev);
+	bgmac_dma_free(bgmac);
+	bcma_set_drvdata(core, NULL);
+	free_netdev(bgmac->net_dev);
+}
+
+static struct bcma_driver bgmac_bcma_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= bgmac_bcma_tbl,
+	.probe		= bgmac_probe,
+	.remove		= bgmac_remove,
+};
+
+static int __init bgmac_init(void)
+{
+	int err;
+
+	err = bcma_driver_register(&bgmac_bcma_driver);
+	if (err)
+		return err;
+	pr_info("Broadcom 47xx GBit MAC driver loaded\n");
+
+	return 0;
+}
+
+static void __exit bgmac_exit(void)
+{
+	bcma_driver_unregister(&bgmac_bcma_driver);
+}
+
+module_init(bgmac_init)
+module_exit(bgmac_exit)
+
+MODULE_AUTHOR("Rafał Miłecki");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
new file mode 100644
index 000000000000..129947017041
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -0,0 +1,456 @@
+#ifndef _BGMAC_H
+#define _BGMAC_H
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
+
+#define bgmac_err(bgmac, fmt, ...) \
+	dev_err(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
+#define bgmac_warn(bgmac, fmt, ...) \
+	dev_warn(&(bgmac)->core->dev, fmt,  ##__VA_ARGS__)
+#define bgmac_info(bgmac, fmt, ...) \
+	dev_info(&(bgmac)->core->dev, fmt,  ##__VA_ARGS__)
+#define bgmac_dbg(bgmac, fmt, ...) \
+	dev_dbg(&(bgmac)->core->dev, fmt, ##__VA_ARGS__)
+
+#include <linux/bcma/bcma.h>
+#include <linux/netdevice.h>
+
+#define BGMAC_DEV_CTL				0x000
+#define  BGMAC_DC_TSM				0x00000002
+#define  BGMAC_DC_CFCO				0x00000004
+#define  BGMAC_DC_RLSS				0x00000008
+#define  BGMAC_DC_MROR				0x00000010
+#define  BGMAC_DC_FCM_MASK			0x00000060
+#define  BGMAC_DC_FCM_SHIFT			5
+#define  BGMAC_DC_NAE				0x00000080
+#define  BGMAC_DC_TF				0x00000100
+#define  BGMAC_DC_RDS_MASK			0x00030000
+#define  BGMAC_DC_RDS_SHIFT			16
+#define  BGMAC_DC_TDS_MASK			0x000c0000
+#define  BGMAC_DC_TDS_SHIFT			18
+#define BGMAC_DEV_STATUS			0x004		/* Configuration of the interface */
+#define  BGMAC_DS_RBF				0x00000001
+#define  BGMAC_DS_RDF				0x00000002
+#define  BGMAC_DS_RIF				0x00000004
+#define  BGMAC_DS_TBF				0x00000008
+#define  BGMAC_DS_TDF				0x00000010
+#define  BGMAC_DS_TIF				0x00000020
+#define  BGMAC_DS_PO				0x00000040
+#define  BGMAC_DS_MM_MASK			0x00000300	/* Mode of the interface */
+#define  BGMAC_DS_MM_SHIFT			8
+#define BGMAC_BIST_STATUS			0x00c
+#define BGMAC_INT_STATUS			0x020		/* Interrupt status */
+#define  BGMAC_IS_MRO				0x00000001
+#define  BGMAC_IS_MTO				0x00000002
+#define  BGMAC_IS_TFD				0x00000004
+#define  BGMAC_IS_LS				0x00000008
+#define  BGMAC_IS_MDIO				0x00000010
+#define  BGMAC_IS_MR				0x00000020
+#define  BGMAC_IS_MT				0x00000040
+#define  BGMAC_IS_TO				0x00000080
+#define  BGMAC_IS_DESC_ERR			0x00000400	/* Descriptor error */
+#define  BGMAC_IS_DATA_ERR			0x00000800	/* Data error */
+#define  BGMAC_IS_DESC_PROT_ERR			0x00001000	/* Descriptor protocol error */
+#define  BGMAC_IS_RX_DESC_UNDERF		0x00002000	/* Receive descriptor underflow */
+#define  BGMAC_IS_RX_F_OVERF			0x00004000	/* Receive FIFO overflow */
+#define  BGMAC_IS_TX_F_UNDERF			0x00008000	/* Transmit FIFO underflow */
+#define  BGMAC_IS_RX				0x00010000	/* Interrupt for RX queue 0 */
+#define  BGMAC_IS_TX0				0x01000000	/* Interrupt for TX queue 0 */
+#define  BGMAC_IS_TX1				0x02000000	/* Interrupt for TX queue 1 */
+#define  BGMAC_IS_TX2				0x04000000	/* Interrupt for TX queue 2 */
+#define  BGMAC_IS_TX3				0x08000000	/* Interrupt for TX queue 3 */
+#define  BGMAC_IS_TX_MASK			0x0f000000
+#define  BGMAC_IS_INTMASK			0x0f01fcff
+#define  BGMAC_IS_ERRMASK			0x0000fc00
+#define BGMAC_INT_MASK				0x024		/* Interrupt mask */
+#define BGMAC_GP_TIMER				0x028
+#define BGMAC_INT_RECV_LAZY			0x100
+#define  BGMAC_IRL_TO_MASK			0x00ffffff
+#define  BGMAC_IRL_FC_MASK			0xff000000
+#define  BGMAC_IRL_FC_SHIFT			24		/* Shift the number of interrupts triggered per received frame */
+#define BGMAC_FLOW_CTL_THRESH			0x104		/* Flow control thresholds */
+#define BGMAC_WRRTHRESH				0x108
+#define BGMAC_GMAC_IDLE_CNT_THRESH		0x10c
+#define BGMAC_PHY_ACCESS			0x180		/* PHY access address */
+#define  BGMAC_PA_DATA_MASK			0x0000ffff
+#define  BGMAC_PA_ADDR_MASK			0x001f0000
+#define  BGMAC_PA_ADDR_SHIFT			16
+#define  BGMAC_PA_REG_MASK			0x1f000000
+#define  BGMAC_PA_REG_SHIFT			24
+#define  BGMAC_PA_WRITE				0x20000000
+#define  BGMAC_PA_START				0x40000000
+#define BGMAC_PHY_CNTL				0x188		/* PHY control address */
+#define  BGMAC_PC_EPA_MASK			0x0000001f
+#define  BGMAC_PC_MCT_MASK			0x007f0000
+#define  BGMAC_PC_MCT_SHIFT			16
+#define  BGMAC_PC_MTE				0x00800000
+#define BGMAC_TXQ_CTL				0x18c
+#define  BGMAC_TXQ_CTL_DBT_MASK			0x00000fff
+#define  BGMAC_TXQ_CTL_DBT_SHIFT		0
+#define BGMAC_RXQ_CTL				0x190
+#define  BGMAC_RXQ_CTL_DBT_MASK			0x00000fff
+#define  BGMAC_RXQ_CTL_DBT_SHIFT		0
+#define  BGMAC_RXQ_CTL_PTE			0x00001000
+#define  BGMAC_RXQ_CTL_MDP_MASK			0x3f000000
+#define  BGMAC_RXQ_CTL_MDP_SHIFT		24
+#define BGMAC_GPIO_SELECT			0x194
+#define BGMAC_GPIO_OUTPUT_EN			0x198
+/* For 0x1e0 see BCMA_CLKCTLST */
+#define BGMAC_HW_WAR				0x1e4
+#define BGMAC_PWR_CTL				0x1e8
+#define BGMAC_DMA_BASE0				0x200		/* Tx and Rx controller */
+#define BGMAC_DMA_BASE1				0x240		/* Tx controller only */
+#define BGMAC_DMA_BASE2				0x280		/* Tx controller only */
+#define BGMAC_DMA_BASE3				0x2C0		/* Tx controller only */
+#define BGMAC_TX_GOOD_OCTETS			0x300
+#define BGMAC_TX_GOOD_OCTETS_HIGH		0x304
+#define BGMAC_TX_GOOD_PKTS			0x308
+#define BGMAC_TX_OCTETS				0x30c
+#define BGMAC_TX_OCTETS_HIGH			0x310
+#define BGMAC_TX_PKTS				0x314
+#define BGMAC_TX_BROADCAST_PKTS			0x318
+#define BGMAC_TX_MULTICAST_PKTS			0x31c
+#define BGMAC_TX_LEN_64				0x320
+#define BGMAC_TX_LEN_65_TO_127			0x324
+#define BGMAC_TX_LEN_128_TO_255			0x328
+#define BGMAC_TX_LEN_256_TO_511			0x32c
+#define BGMAC_TX_LEN_512_TO_1023		0x330
+#define BGMAC_TX_LEN_1024_TO_1522		0x334
+#define BGMAC_TX_LEN_1523_TO_2047		0x338
+#define BGMAC_TX_LEN_2048_TO_4095		0x33c
+#define BGMAC_TX_LEN_4095_TO_8191		0x340
+#define BGMAC_TX_LEN_8192_TO_MAX		0x344
+#define BGMAC_TX_JABBER_PKTS			0x348		/* Error */
+#define BGMAC_TX_OVERSIZE_PKTS			0x34c		/* Error */
+#define BGMAC_TX_FRAGMENT_PKTS			0x350
+#define BGMAC_TX_UNDERRUNS			0x354		/* Error */
+#define BGMAC_TX_TOTAL_COLS			0x358
+#define BGMAC_TX_SINGLE_COLS			0x35c
+#define BGMAC_TX_MULTIPLE_COLS			0x360
+#define BGMAC_TX_EXCESSIVE_COLS			0x364		/* Error */
+#define BGMAC_TX_LATE_COLS			0x368		/* Error */
+#define BGMAC_TX_DEFERED			0x36c
+#define BGMAC_TX_CARRIER_LOST			0x370
+#define BGMAC_TX_PAUSE_PKTS			0x374
+#define BGMAC_TX_UNI_PKTS			0x378
+#define BGMAC_TX_Q0_PKTS			0x37c
+#define BGMAC_TX_Q0_OCTETS			0x380
+#define BGMAC_TX_Q0_OCTETS_HIGH			0x384
+#define BGMAC_TX_Q1_PKTS			0x388
+#define BGMAC_TX_Q1_OCTETS			0x38c
+#define BGMAC_TX_Q1_OCTETS_HIGH			0x390
+#define BGMAC_TX_Q2_PKTS			0x394
+#define BGMAC_TX_Q2_OCTETS			0x398
+#define BGMAC_TX_Q2_OCTETS_HIGH			0x39c
+#define BGMAC_TX_Q3_PKTS			0x3a0
+#define BGMAC_TX_Q3_OCTETS			0x3a4
+#define BGMAC_TX_Q3_OCTETS_HIGH			0x3a8
+#define BGMAC_RX_GOOD_OCTETS			0x3b0
+#define BGMAC_RX_GOOD_OCTETS_HIGH		0x3b4
+#define BGMAC_RX_GOOD_PKTS			0x3b8
+#define BGMAC_RX_OCTETS				0x3bc
+#define BGMAC_RX_OCTETS_HIGH			0x3c0
+#define BGMAC_RX_PKTS				0x3c4
+#define BGMAC_RX_BROADCAST_PKTS			0x3c8
+#define BGMAC_RX_MULTICAST_PKTS			0x3cc
+#define BGMAC_RX_LEN_64				0x3d0
+#define BGMAC_RX_LEN_65_TO_127			0x3d4
+#define BGMAC_RX_LEN_128_TO_255			0x3d8
+#define BGMAC_RX_LEN_256_TO_511			0x3dc
+#define BGMAC_RX_LEN_512_TO_1023		0x3e0
+#define BGMAC_RX_LEN_1024_TO_1522		0x3e4
+#define BGMAC_RX_LEN_1523_TO_2047		0x3e8
+#define BGMAC_RX_LEN_2048_TO_4095		0x3ec
+#define BGMAC_RX_LEN_4095_TO_8191		0x3f0
+#define BGMAC_RX_LEN_8192_TO_MAX		0x3f4
+#define BGMAC_RX_JABBER_PKTS			0x3f8		/* Error */
+#define BGMAC_RX_OVERSIZE_PKTS			0x3fc		/* Error */
+#define BGMAC_RX_FRAGMENT_PKTS			0x400
+#define BGMAC_RX_MISSED_PKTS			0x404		/* Error */
+#define BGMAC_RX_CRC_ALIGN_ERRS			0x408		/* Error */
+#define BGMAC_RX_UNDERSIZE			0x40c		/* Error */
+#define BGMAC_RX_CRC_ERRS			0x410		/* Error */
+#define BGMAC_RX_ALIGN_ERRS			0x414		/* Error */
+#define BGMAC_RX_SYMBOL_ERRS			0x418		/* Error */
+#define BGMAC_RX_PAUSE_PKTS			0x41c
+#define BGMAC_RX_NONPAUSE_PKTS			0x420
+#define BGMAC_RX_SACHANGES			0x424
+#define BGMAC_RX_UNI_PKTS			0x428
+#define BGMAC_UNIMAC_VERSION			0x800
+#define BGMAC_HDBKP_CTL				0x804
+#define BGMAC_CMDCFG				0x808		/* Configuration */
+#define  BGMAC_CMDCFG_TE			0x00000001	/* Set to activate TX */
+#define  BGMAC_CMDCFG_RE			0x00000002	/* Set to activate RX */
+#define  BGMAC_CMDCFG_ES_MASK			0x0000000c	/* Ethernet speed see gmac_speed */
+#define   BGMAC_CMDCFG_ES_10			0x00000000
+#define   BGMAC_CMDCFG_ES_100			0x00000004
+#define   BGMAC_CMDCFG_ES_1000			0x00000008
+#define  BGMAC_CMDCFG_PROM			0x00000010	/* Set to activate promiscuous mode */
+#define  BGMAC_CMDCFG_PAD_EN			0x00000020
+#define  BGMAC_CMDCFG_CF			0x00000040
+#define  BGMAC_CMDCFG_PF			0x00000080
+#define  BGMAC_CMDCFG_RPI			0x00000100	/* Unset to enable 802.3x tx flow control */
+#define  BGMAC_CMDCFG_TAI			0x00000200
+#define  BGMAC_CMDCFG_HD			0x00000400	/* Set if in half duplex mode */
+#define  BGMAC_CMDCFG_HD_SHIFT			10
+#define  BGMAC_CMDCFG_SR			0x00000800	/* Set to reset mode */
+#define  BGMAC_CMDCFG_ML			0x00008000	/* Set to activate mac loopback mode */
+#define  BGMAC_CMDCFG_AE			0x00400000
+#define  BGMAC_CMDCFG_CFE			0x00800000
+#define  BGMAC_CMDCFG_NLC			0x01000000
+#define  BGMAC_CMDCFG_RL			0x02000000
+#define  BGMAC_CMDCFG_RED			0x04000000
+#define  BGMAC_CMDCFG_PE			0x08000000
+#define  BGMAC_CMDCFG_TPI			0x10000000
+#define  BGMAC_CMDCFG_AT			0x20000000
+#define BGMAC_MACADDR_HIGH			0x80c		/* High 4 octets of own mac address */
+#define BGMAC_MACADDR_LOW			0x810		/* Low 2 octets of own mac address */
+#define BGMAC_RXMAX_LENGTH			0x814		/* Max receive frame length with vlan tag */
+#define BGMAC_PAUSEQUANTA			0x818
+#define BGMAC_MAC_MODE				0x844
+#define BGMAC_OUTERTAG				0x848
+#define BGMAC_INNERTAG				0x84c
+#define BGMAC_TXIPG				0x85c
+#define BGMAC_PAUSE_CTL				0xb30
+#define BGMAC_TX_FLUSH				0xb34
+#define BGMAC_RX_STATUS				0xb38
+#define BGMAC_TX_STATUS				0xb3c
+
+#define BGMAC_PHY_CTL				0x00
+#define  BGMAC_PHY_CTL_SPEED_MSB		0x0040
+#define  BGMAC_PHY_CTL_DUPLEX			0x0100		/* duplex mode */
+#define  BGMAC_PHY_CTL_RESTART			0x0200		/* restart autonegotiation */
+#define  BGMAC_PHY_CTL_ANENAB			0x1000		/* enable autonegotiation */
+#define  BGMAC_PHY_CTL_SPEED			0x2000
+#define  BGMAC_PHY_CTL_LOOP			0x4000		/* loopback */
+#define  BGMAC_PHY_CTL_RESET			0x8000		/* reset */
+/* Helpers */
+#define  BGMAC_PHY_CTL_SPEED_10			0
+#define  BGMAC_PHY_CTL_SPEED_100		BGMAC_PHY_CTL_SPEED
+#define  BGMAC_PHY_CTL_SPEED_1000		BGMAC_PHY_CTL_SPEED_MSB
+#define BGMAC_PHY_ADV				0x04
+#define  BGMAC_PHY_ADV_10HALF			0x0020		/* advertise 10MBits/s half duplex */
+#define  BGMAC_PHY_ADV_10FULL			0x0040		/* advertise 10MBits/s full duplex */
+#define  BGMAC_PHY_ADV_100HALF			0x0080		/* advertise 100MBits/s half duplex */
+#define  BGMAC_PHY_ADV_100FULL			0x0100		/* advertise 100MBits/s full duplex */
+#define BGMAC_PHY_ADV2				0x09
+#define  BGMAC_PHY_ADV2_1000HALF		0x0100		/* advertise 1000MBits/s half duplex */
+#define  BGMAC_PHY_ADV2_1000FULL		0x0200		/* advertise 1000MBits/s full duplex */
+
+/* BCMA GMAC core specific IO Control (BCMA_IOCTL) flags */
+#define BGMAC_BCMA_IOCTL_SW_CLKEN		0x00000004	/* PHY Clock Enable */
+#define BGMAC_BCMA_IOCTL_SW_RESET		0x00000008	/* PHY Reset */
+
+/* BCMA GMAC core specific IO status (BCMA_IOST) flags */
+#define BGMAC_BCMA_IOST_ATTACHED		0x00000800
+
+#define BGMAC_NUM_MIB_TX_REGS	\
+		(((BGMAC_TX_Q3_OCTETS_HIGH - BGMAC_TX_GOOD_OCTETS) / 4) + 1)
+#define BGMAC_NUM_MIB_RX_REGS	\
+		(((BGMAC_RX_UNI_PKTS - BGMAC_RX_GOOD_OCTETS) / 4) + 1)
+
+#define BGMAC_DMA_TX_CTL			0x00
+#define  BGMAC_DMA_TX_ENABLE			0x00000001
+#define  BGMAC_DMA_TX_SUSPEND			0x00000002
+#define  BGMAC_DMA_TX_LOOPBACK			0x00000004
+#define  BGMAC_DMA_TX_FLUSH			0x00000010
+#define  BGMAC_DMA_TX_PARITY_DISABLE		0x00000800
+#define  BGMAC_DMA_TX_ADDREXT_MASK		0x00030000
+#define  BGMAC_DMA_TX_ADDREXT_SHIFT		16
+#define BGMAC_DMA_TX_INDEX			0x04
+#define BGMAC_DMA_TX_RINGLO			0x08
+#define BGMAC_DMA_TX_RINGHI			0x0C
+#define BGMAC_DMA_TX_STATUS			0x10
+#define  BGMAC_DMA_TX_STATDPTR			0x00001FFF
+#define  BGMAC_DMA_TX_STAT			0xF0000000
+#define   BGMAC_DMA_TX_STAT_DISABLED		0x00000000
+#define   BGMAC_DMA_TX_STAT_ACTIVE		0x10000000
+#define   BGMAC_DMA_TX_STAT_IDLEWAIT		0x20000000
+#define   BGMAC_DMA_TX_STAT_STOPPED		0x30000000
+#define   BGMAC_DMA_TX_STAT_SUSP		0x40000000
+#define BGMAC_DMA_TX_ERROR			0x14
+#define  BGMAC_DMA_TX_ERRDPTR			0x0001FFFF
+#define  BGMAC_DMA_TX_ERR			0xF0000000
+#define   BGMAC_DMA_TX_ERR_NOERR		0x00000000
+#define   BGMAC_DMA_TX_ERR_PROT			0x10000000
+#define   BGMAC_DMA_TX_ERR_UNDERRUN		0x20000000
+#define   BGMAC_DMA_TX_ERR_TRANSFER		0x30000000
+#define   BGMAC_DMA_TX_ERR_DESCREAD		0x40000000
+#define   BGMAC_DMA_TX_ERR_CORE			0x50000000
+#define BGMAC_DMA_RX_CTL			0x20
+#define  BGMAC_DMA_RX_ENABLE			0x00000001
+#define  BGMAC_DMA_RX_FRAME_OFFSET_MASK		0x000000FE
+#define  BGMAC_DMA_RX_FRAME_OFFSET_SHIFT	1
+#define  BGMAC_DMA_RX_DIRECT_FIFO		0x00000100
+#define  BGMAC_DMA_RX_OVERFLOW_CONT		0x00000400
+#define  BGMAC_DMA_RX_PARITY_DISABLE		0x00000800
+#define  BGMAC_DMA_RX_ADDREXT_MASK		0x00030000
+#define  BGMAC_DMA_RX_ADDREXT_SHIFT		16
+#define BGMAC_DMA_RX_INDEX			0x24
+#define BGMAC_DMA_RX_RINGLO			0x28
+#define BGMAC_DMA_RX_RINGHI			0x2C
+#define BGMAC_DMA_RX_STATUS			0x30
+#define  BGMAC_DMA_RX_STATDPTR			0x00001FFF
+#define  BGMAC_DMA_RX_STAT			0xF0000000
+#define   BGMAC_DMA_RX_STAT_DISABLED		0x00000000
+#define   BGMAC_DMA_RX_STAT_ACTIVE		0x10000000
+#define   BGMAC_DMA_RX_STAT_IDLEWAIT		0x20000000
+#define   BGMAC_DMA_RX_STAT_STOPPED		0x30000000
+#define   BGMAC_DMA_RX_STAT_SUSP		0x40000000
+#define BGMAC_DMA_RX_ERROR			0x34
+#define  BGMAC_DMA_RX_ERRDPTR			0x0001FFFF
+#define  BGMAC_DMA_RX_ERR			0xF0000000
+#define   BGMAC_DMA_RX_ERR_NOERR		0x00000000
+#define   BGMAC_DMA_RX_ERR_PROT			0x10000000
+#define   BGMAC_DMA_RX_ERR_UNDERRUN		0x20000000
+#define   BGMAC_DMA_RX_ERR_TRANSFER		0x30000000
+#define   BGMAC_DMA_RX_ERR_DESCREAD		0x40000000
+#define   BGMAC_DMA_RX_ERR_CORE			0x50000000
+
+#define BGMAC_DESC_CTL0_EOT			0x10000000	/* End of ring */
+#define BGMAC_DESC_CTL0_IOC			0x20000000	/* IRQ on complete */
+#define BGMAC_DESC_CTL0_SOF			0x40000000	/* Start of frame */
+#define BGMAC_DESC_CTL0_EOF			0x80000000	/* End of frame */
+#define BGMAC_DESC_CTL1_LEN			0x00001FFF
+
+#define BGMAC_PHY_NOREGS			0x1E
+#define BGMAC_PHY_MASK				0x1F
+
+#define BGMAC_MAX_TX_RINGS			4
+#define BGMAC_MAX_RX_RINGS			1
+
+#define BGMAC_TX_RING_SLOTS			128
+#define BGMAC_RX_RING_SLOTS			512 - 1		/* Why -1? Well, Broadcom does that... */
+
+#define BGMAC_RX_HEADER_LEN			28		/* Last 24 bytes are unused. Well... */
+#define BGMAC_RX_FRAME_OFFSET			30		/* There are 2 unused bytes between header and real data */
+#define BGMAC_RX_MAX_FRAME_SIZE			1536		/* Copied from b44/tg3 */
+#define BGMAC_RX_BUF_SIZE			(BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
+
+#define BGMAC_BFL_ENETROBO			0x0010		/* has ephy roboswitch spi */
+#define BGMAC_BFL_ENETADM			0x0080		/* has ADMtek switch */
+#define BGMAC_BFL_ENETVLAN			0x0100		/* can do vlan */
+
+#define BGMAC_CHIPCTL_1_IF_TYPE_MASK		0x00000030
+#define BGMAC_CHIPCTL_1_IF_TYPE_RMII		0x00000000
+#define BGMAC_CHIPCTL_1_IF_TYPE_MI		0x00000010
+#define BGMAC_CHIPCTL_1_IF_TYPE_RGMII		0x00000020
+#define BGMAC_CHIPCTL_1_SW_TYPE_MASK		0x000000C0
+#define BGMAC_CHIPCTL_1_SW_TYPE_EPHY		0x00000000
+#define BGMAC_CHIPCTL_1_SW_TYPE_EPHYMII		0x00000040
+#define BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII	0x00000080
+#define BGMAC_CHIPCTL_1_SW_TYPE_RGMI		0x000000C0
+#define BGMAC_CHIPCTL_1_RXC_DLL_BYPASS		0x00010000
+
+#define BGMAC_SPEED_10				0x0001
+#define BGMAC_SPEED_100				0x0002
+#define BGMAC_SPEED_1000			0x0004
+
+#define BGMAC_WEIGHT	64
+
+#define ETHER_MAX_LEN   1518
+
+struct bgmac_slot_info {
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+};
+
+struct bgmac_dma_desc {
+	__le32 ctl0;
+	__le32 ctl1;
+	__le32 addr_low;
+	__le32 addr_high;
+} __packed;
+
+enum bgmac_dma_ring_type {
+	BGMAC_DMA_RING_TX,
+	BGMAC_DMA_RING_RX,
+};
+
+/**
+ * bgmac_dma_ring - contains info about DMA ring (either TX or RX one)
+ * @start: index of the first slot containing data
+ * @end: index of a slot that can *not* be read (yet)
+ *
+ * Be really aware of the specific @end meaning. It's an index of a slot *after*
+ * the one containing data that can be read. If @start equals @end the ring is
+ * empty.
+ */
+struct bgmac_dma_ring {
+	u16 num_slots;
+	u16 start;
+	u16 end;
+
+	u16 mmio_base;
+	struct bgmac_dma_desc *cpu_base;
+	dma_addr_t dma_base;
+
+	struct bgmac_slot_info slots[BGMAC_RX_RING_SLOTS];
+};
+
+struct bgmac_rx_header {
+	__le16 len;
+	__le16 flags;
+	__le16 pad[12];
+};
+
+struct bgmac {
+	struct bcma_device *core;
+	struct bcma_device *cmn; /* Reference to CMN core for BCM4706 */
+	struct net_device *net_dev;
+	struct napi_struct napi;
+
+	/* DMA */
+	struct bgmac_dma_ring tx_ring[BGMAC_MAX_TX_RINGS];
+	struct bgmac_dma_ring rx_ring[BGMAC_MAX_RX_RINGS];
+
+	/* Stats */
+	bool stats_grabbed;
+	u32 mib_tx_regs[BGMAC_NUM_MIB_TX_REGS];
+	u32 mib_rx_regs[BGMAC_NUM_MIB_RX_REGS];
+
+	/* Int */
+	u32 int_mask;
+	u32 int_status;
+
+	/* Speed-related */
+	int speed;
+	bool autoneg;
+	bool full_duplex;
+
+	u8 phyaddr;
+	bool has_robosw;
+
+	bool loopback;
+};
+
+static inline u32 bgmac_read(struct bgmac *bgmac, u16 offset)
+{
+	return bcma_read32(bgmac->core, offset);
+}
+
+static inline void bgmac_write(struct bgmac *bgmac, u16 offset, u32 value)
+{
+	bcma_write32(bgmac->core, offset, value);
+}
+
+static inline void bgmac_maskset(struct bgmac *bgmac, u16 offset, u32 mask,
+				   u32 set)
+{
+	bgmac_write(bgmac, offset, (bgmac_read(bgmac, offset) & mask) | set);
+}
+
+static inline void bgmac_mask(struct bgmac *bgmac, u16 offset, u32 mask)
+{
+	bgmac_maskset(bgmac, offset, mask, 0);
+}
+
+static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set)
+{
+	bgmac_maskset(bgmac, offset, ~0, set);
+}
+
+u16 bgmac_phy_read(struct bgmac *bgmac, u8 phyaddr, u8 reg);
+void bgmac_phy_write(struct bgmac *bgmac, u8 phyaddr, u8 reg, u16 value);
+
+#endif /* _BGMAC_H */
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 9a0e3fa3ca95..ee332fab825b 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -634,4 +634,6 @@ extern void bcma_chipco_regctl_maskset(struct bcma_drv_cc *cc,
 				       u32 offset, u32 mask, u32 set);
 extern void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid);
 
+extern u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc);
+
 #endif /* LINUX_BCMA_DRIVER_CC_H_ */
-- 
cgit v1.2.3


From 8f900a9a6e2691441ad763952d640ac44220e5dc Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 3 Dec 2012 20:07:05 +0100
Subject: usb: gadget: consider link speed for bMaxPower

The USB 2.0 specification says that bMaxPower is the maximum power
consumption expressed in 2 mA units and the USB 3.0 specification says
that it is expressed in 8 mA units.

This patch renames bMaxPower to MaxPower and the various /2 and *2 are
removed. Before reporting the config descriptor, the proper value is
computer based on the speed, all in-tree users are updated. MaxPower is
also increased to u16 so we can store the nokia gadget value which is
larger than the max value allowed for u8.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 27 +++++++++++++++++++++++----
 drivers/usb/gadget/gmidi.c     |  2 +-
 drivers/usb/gadget/nokia.c     |  4 ++--
 drivers/usb/gadget/webcam.c    |  2 +-
 include/linux/usb/composite.h  |  5 +++--
 5 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 2a6bfe759c29..71475b6d8568 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -320,6 +320,25 @@ int usb_interface_id(struct usb_configuration *config,
 }
 EXPORT_SYMBOL_GPL(usb_interface_id);
 
+static u8 encode_bMaxPower(enum usb_device_speed speed,
+		struct usb_configuration *c)
+{
+	unsigned val;
+
+	if (c->MaxPower)
+		val = c->MaxPower;
+	else
+		val = CONFIG_USB_GADGET_VBUS_DRAW;
+	if (!val)
+		return 0;
+	switch (speed) {
+	case USB_SPEED_SUPER:
+		return DIV_ROUND_UP(val, 8);
+	default:
+		return DIV_ROUND_UP(val, 2);
+	};
+}
+
 static int config_buf(struct usb_configuration *config,
 		enum usb_device_speed speed, void *buf, u8 type)
 {
@@ -339,7 +358,7 @@ static int config_buf(struct usb_configuration *config,
 	c->bConfigurationValue = config->bConfigurationValue;
 	c->iConfiguration = config->iConfiguration;
 	c->bmAttributes = USB_CONFIG_ATT_ONE | config->bmAttributes;
-	c->bMaxPower = config->bMaxPower ? : (CONFIG_USB_GADGET_VBUS_DRAW / 2);
+	c->bMaxPower = encode_bMaxPower(speed, config);
 
 	/* There may be e.g. OTG descriptors */
 	if (config->descriptors) {
@@ -656,7 +675,7 @@ static int set_config(struct usb_composite_dev *cdev,
 	}
 
 	/* when we return, be sure our power usage is valid */
-	power = c->bMaxPower ? (2 * c->bMaxPower) : CONFIG_USB_GADGET_VBUS_DRAW;
+	power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW;
 done:
 	usb_gadget_vbus_draw(gadget, power);
 	if (result >= 0 && cdev->delayed_status)
@@ -1518,10 +1537,10 @@ composite_resume(struct usb_gadget *gadget)
 				f->resume(f);
 		}
 
-		maxpower = cdev->config->bMaxPower;
+		maxpower = cdev->config->MaxPower;
 
 		usb_gadget_vbus_draw(gadget, maxpower ?
-			(2 * maxpower) : CONFIG_USB_GADGET_VBUS_DRAW);
+			maxpower : CONFIG_USB_GADGET_VBUS_DRAW);
 	}
 
 	cdev->suspended = 0;
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index 881aab86ae99..e879e2c9f461 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -125,7 +125,7 @@ static struct usb_configuration midi_config = {
 	.bConfigurationValue = 1,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes	= USB_CONFIG_ATT_ONE,
-	.bMaxPower	= CONFIG_USB_GADGET_VBUS_DRAW / 2,
+	.MaxPower	= CONFIG_USB_GADGET_VBUS_DRAW,
 };
 
 static int __init midi_bind_config(struct usb_configuration *c)
diff --git a/drivers/usb/gadget/nokia.c b/drivers/usb/gadget/nokia.c
index 661600abace8..1475d663b527 100644
--- a/drivers/usb/gadget/nokia.c
+++ b/drivers/usb/gadget/nokia.c
@@ -133,7 +133,7 @@ static struct usb_configuration nokia_config_500ma_driver = {
 	.bConfigurationValue = 1,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes	= USB_CONFIG_ATT_ONE,
-	.bMaxPower	= 250, /* 500mA */
+	.MaxPower	= 500,
 };
 
 static struct usb_configuration nokia_config_100ma_driver = {
@@ -141,7 +141,7 @@ static struct usb_configuration nokia_config_100ma_driver = {
 	.bConfigurationValue = 2,
 	/* .iConfiguration = DYNAMIC */
 	.bmAttributes	= USB_CONFIG_ATT_ONE | USB_CONFIG_ATT_SELFPOWER,
-	.bMaxPower	= 50, /* 100 mA */
+	.MaxPower	= 100,
 };
 
 static int __init nokia_bind(struct usb_composite_dev *cdev)
diff --git a/drivers/usb/gadget/webcam.c b/drivers/usb/gadget/webcam.c
index 69cf5c2cd335..8cef1e658c29 100644
--- a/drivers/usb/gadget/webcam.c
+++ b/drivers/usb/gadget/webcam.c
@@ -336,7 +336,7 @@ static struct usb_configuration webcam_config_driver = {
 	.bConfigurationValue	= 1,
 	.iConfiguration		= 0, /* dynamic */
 	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
-	.bMaxPower		= CONFIG_USB_GADGET_VBUS_DRAW / 2,
+	.MaxPower		= CONFIG_USB_GADGET_VBUS_DRAW,
 };
 
 static int /* __init_or_exit */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index b09c37e04a91..dc512c9432d7 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -184,7 +184,8 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f,
  * @bConfigurationValue: Copied into configuration descriptor.
  * @iConfiguration: Copied into configuration descriptor.
  * @bmAttributes: Copied into configuration descriptor.
- * @bMaxPower: Copied into configuration descriptor.
+ * @MaxPower: Power consumtion in mA. Used to compute bMaxPower in the
+ *	configuration descriptor after considering the bus speed.
  * @cdev: assigned by @usb_add_config() before calling @bind(); this is
  *	the device associated with this configuration.
  *
@@ -230,7 +231,7 @@ struct usb_configuration {
 	u8			bConfigurationValue;
 	u8			iConfiguration;
 	u8			bmAttributes;
-	u8			bMaxPower;
+	u16			MaxPower;
 
 	struct usb_composite_dev	*cdev;
 
-- 
cgit v1.2.3


From f6e916b82022cba67bdd0ec7df84e2bce2ef3f73 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 20 Nov 2012 23:00:52 +0100
Subject: irqchip: add basic infrastructure

With the recent creation of the drivers/irqchip/ directory, it is
desirable to move irq controller drivers here. At the moment, the only
driver here is irq-bcm2835, the driver for the irq controller found in
the ARM BCM2835 SoC, present in Rasberry Pi systems. This irq
controller driver was exporting its initialization function and its
irq handling function through a header file in
<linux/irqchip/bcm2835.h>.

When proposing to also move another irq controller driver in
drivers/irqchip, Rob Herring raised the very valid point that moving
things to drivers/irqchip was good in order to remove more stuff from
arch/arm, but if it means adding gazillions of headers files in
include/linux/irqchip/, it would not be very nice.

So, upon the suggestion of Rob Herring and Arnd Bergmann, this commit
introduces a small infrastructure that defines a central
irqchip_init() function in drivers/irqchip/irqchip.c, which is meant
to be called as the ->init_irq() callback of ARM platforms. This
function calls of_irq_init() with an array of match strings and init
functions generated from a special linker section.

Note that the irq controller driver initialization function is
responsible for setting the global handle_arch_irq() variable, so that
ARM platforms no longer have to define the ->handle_irq field in their
DT_MACHINE structure.

A global header, <linux/irqchip.h> is also added to expose the single
irqchip_init() function to the reset of the kernel.

A further commit moves the BCM2835 irq controller driver to this new
small infrastructure, therefore removing the include/linux/irqchip/
directory.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Stephen Warren <swarren@wwwdotorg.org>
Reviewed-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
[rob.herring: reword commit message to reflect use of linker sections.]
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 drivers/irqchip/Kconfig           |  4 ++++
 drivers/irqchip/Makefile          |  2 ++
 drivers/irqchip/irqchip.c         | 30 ++++++++++++++++++++++++++++++
 drivers/irqchip/irqchip.h         | 29 +++++++++++++++++++++++++++++
 include/asm-generic/vmlinux.lds.h | 12 +++++++++++-
 include/linux/irqchip.h           | 16 ++++++++++++++++
 6 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 drivers/irqchip/irqchip.c
 create mode 100644 drivers/irqchip/irqchip.h
 create mode 100644 include/linux/irqchip.h

(limited to 'include/linux')

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 62ca575701d3..93dfd8fa66c7 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -1,3 +1,7 @@
+config IRQCHIP
+	def_bool y
+	depends on OF_IRQ
+
 config VERSATILE_FPGA_IRQ
 	bool
 	select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index bf4609a5bd9d..29b78c9449c8 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -1,3 +1,5 @@
+obj-$(CONFIG_IRQCHIP)			+= irqchip.o
+
 obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2835.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi.o
 obj-$(CONFIG_VERSATILE_FPGA_IRQ)	+= irq-versatile-fpga.o
diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c
new file mode 100644
index 000000000000..f496afce29de
--- /dev/null
+++ b/drivers/irqchip/irqchip.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2012 Thomas Petazzoni
+ *
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_irq.h>
+
+#include "irqchip.h"
+
+/*
+ * This special of_device_id is the sentinel at the end of the
+ * of_device_id[] array of all irqchips. It is automatically placed at
+ * the end of the array by the linker, thanks to being part of a
+ * special section.
+ */
+static const struct of_device_id
+irqchip_of_match_end __used __section(__irqchip_of_end);
+
+extern struct of_device_id __irqchip_begin[];
+
+void __init irqchip_init(void)
+{
+	of_irq_init(__irqchip_begin);
+}
diff --git a/drivers/irqchip/irqchip.h b/drivers/irqchip/irqchip.h
new file mode 100644
index 000000000000..e445ba2d6add
--- /dev/null
+++ b/drivers/irqchip/irqchip.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012 Thomas Petazzoni
+ *
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef _IRQCHIP_H
+#define _IRQCHIP_H
+
+/*
+ * This macro must be used by the different irqchip drivers to declare
+ * the association between their DT compatible string and their
+ * initialization function.
+ *
+ * @name: name that must be unique accross all IRQCHIP_DECLARE of the
+ * same file.
+ * @compstr: compatible string of the irqchip driver
+ * @fn: initialization function
+ */
+#define IRQCHIP_DECLARE(name,compstr,fn)				\
+	static const struct of_device_id irqchip_of_match_##name	\
+	__used __section(__irqchip_of_table)				\
+	= { .compatible = compstr, .data = fn }
+
+#endif
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d1ea7ce0b4cb..c80c599897b9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -149,6 +149,15 @@
 #define TRACE_SYSCALLS()
 #endif
 
+#ifdef CONFIG_IRQCHIP
+#define IRQCHIP_OF_MATCH_TABLE()					\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__irqchip_begin) = .;				\
+	*(__irqchip_of_table)		  				\
+	*(__irqchip_of_end)
+#else
+#define IRQCHIP_OF_MATCH_TABLE()
+#endif
 
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
@@ -493,7 +502,8 @@
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)					\
-	KERNEL_DTB()
+	KERNEL_DTB()							\
+	IRQCHIP_OF_MATCH_TABLE()
 
 #define INIT_TEXT							\
 	*(.init.text)							\
diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h
new file mode 100644
index 000000000000..e0006f1d35a0
--- /dev/null
+++ b/include/linux/irqchip.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2012 Thomas Petazzoni
+ *
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef _LINUX_IRQCHIP_H
+#define _LINUX_IRQCHIP_H
+
+void irqchip_init(void);
+
+#endif
-- 
cgit v1.2.3


From 54b956b903607f8f8878754dd4352da6a54a1da2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 10 Jan 2013 10:57:01 -0800
Subject: Remove __dev* markings from init.h

Now that all in-kernel users of __dev* are gone, let's remove them from
init.h to keep them from popping up again and again.

Thanks to Bill Pemberton for doing all of the hard work to make removal
of this possible.

Cc: Bill Pemberton <wfp5p@virginia.edu>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/init.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index a799273714ac..10ed4f436458 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -93,14 +93,6 @@
 
 #define __exit          __section(.exit.text) __exitused __cold notrace
 
-/* Used for HOTPLUG, but that is always enabled now, so just make them noops */
-#define __devinit
-#define __devinitdata
-#define __devinitconst
-#define __devexit
-#define __devexitdata
-#define __devexitconst
-
 /* Used for HOTPLUG_CPU */
 #define __cpuinit        __section(.cpuinit.text) __cold notrace
 #define __cpuinitdata    __section(.cpuinit.data)
@@ -337,18 +329,6 @@ void __init parse_early_options(char *cmdline);
 #define __INITRODATA_OR_MODULE __INITRODATA
 #endif /*CONFIG_MODULES*/
 
-/* Functions marked as __devexit may be discarded at kernel link time, depending
-   on config options.  Newer versions of binutils detect references from
-   retained sections to discarded sections and flag an error.  Pointers to
-   __devexit functions must use __devexit_p(function_name), the wrapper will
-   insert either the function_name or NULL, depending on the config options.
- */
-#if defined(MODULE) || defined(CONFIG_HOTPLUG)
-#define __devexit_p(x) x
-#else
-#define __devexit_p(x) NULL
-#endif
-
 #ifdef MODULE
 #define __exit_p(x) x
 #else
-- 
cgit v1.2.3


From 416186fbf8c5b4e4465a10c6ac7a45b6c47144b2 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 10 Jan 2013 08:56:51 +0000
Subject: net: Split core bits of netdev_pick_tx into __netdev_pick_tx

This change splits the core bits of netdev_pick_tx into a separate function.
The main idea behind this is to make this code accessible to select queue
functions when they decide to process the standard path instead of their
own custom path in their select queue routine.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 57 ++++++++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0209ac328e8a..608c3ac4d045 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1403,6 +1403,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 
 extern struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 					   struct sk_buff *skb);
+extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
 
 /*
  * Net namespace inlines
diff --git a/net/core/dev.c b/net/core/dev.c
index 4794cae84939..81ff67149f62 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2495,37 +2495,44 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-				    struct sk_buff *skb)
+u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 {
-	int queue_index;
-	const struct net_device_ops *ops = dev->netdev_ops;
-
-	if (dev->real_num_tx_queues == 1)
-		queue_index = 0;
-	else if (ops->ndo_select_queue) {
-		queue_index = ops->ndo_select_queue(dev, skb);
-		queue_index = dev_cap_txqueue(dev, queue_index);
-	} else {
-		struct sock *sk = skb->sk;
-		queue_index = sk_tx_queue_get(sk);
+	struct sock *sk = skb->sk;
+	int queue_index = sk_tx_queue_get(sk);
 
-		if (queue_index < 0 || skb->ooo_okay ||
-		    queue_index >= dev->real_num_tx_queues) {
-			int old_index = queue_index;
+	if (queue_index < 0 || skb->ooo_okay ||
+	    queue_index >= dev->real_num_tx_queues) {
+		int new_index = get_xps_queue(dev, skb);
+		if (new_index < 0)
+			new_index = skb_tx_hash(dev, skb);
 
-			queue_index = get_xps_queue(dev, skb);
-			if (queue_index < 0)
-				queue_index = skb_tx_hash(dev, skb);
-
-			if (queue_index != old_index && sk) {
-				struct dst_entry *dst =
+		if (queue_index != new_index && sk) {
+			struct dst_entry *dst =
 				    rcu_dereference_check(sk->sk_dst_cache, 1);
 
-				if (dst && skb_dst(skb) == dst)
-					sk_tx_queue_set(sk, queue_index);
-			}
+			if (dst && skb_dst(skb) == dst)
+				sk_tx_queue_set(sk, queue_index);
+
 		}
+
+		queue_index = new_index;
+	}
+
+	return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb)
+{
+	int queue_index = 0;
+
+	if (dev->real_num_tx_queues != 1) {
+		const struct net_device_ops *ops = dev->netdev_ops;
+		if (ops->ndo_select_queue)
+			queue_index = ops->ndo_select_queue(dev, skb);
+		else
+			queue_index = __netdev_pick_tx(dev, skb);
+		queue_index = dev_cap_txqueue(dev, queue_index);
 	}
 
 	skb_set_queue_mapping(skb, queue_index);
-- 
cgit v1.2.3


From 537c00de1c9ba9876b91d869e84caceefe2b8bf9 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 10 Jan 2013 08:57:02 +0000
Subject: net: Add functions netif_reset_xps_queue and netif_set_xps_queue

This patch adds two functions, netif_reset_xps_queue and
netif_set_xps_queue.  The main idea behind these two functions is to
provide a mechanism through which drivers can update their defaults in
regards to XPS.

Currently no such mechanism exists and as a result we cannot use XPS for
things such as ATR which would require a basic configuration to start in
which the Tx queues are mapped to CPUs via a 1:1 mapping.  With this change
I am making it possible for drivers such as ixgbe to be able to use the XPS
feature by controlling the default configuration.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  13 ++++
 net/core/dev.c            | 155 ++++++++++++++++++++++++++++++++++++++++++++++
 net/core/net-sysfs.c      | 148 ++-----------------------------------------
 3 files changed, 173 insertions(+), 143 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 608c3ac4d045..59fe9da4e315 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2103,6 +2103,19 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 		__netif_schedule(txq->qdisc);
 }
 
+#ifdef CONFIG_XPS
+extern void netif_reset_xps_queue(struct net_device *dev, u16 index);
+extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
+			       u16 index);
+#else
+static inline int netif_set_xps_queue(struct net_device *dev,
+				      struct cpumask *mask,
+				      u16 index)
+{
+	return 0;
+}
+#endif
+
 /*
  * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
  * as a distribution range limit for the returned value.
diff --git a/net/core/dev.c b/net/core/dev.c
index 81ff67149f62..257b29516f69 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1857,6 +1857,161 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
 	}
 }
 
+#ifdef CONFIG_XPS
+static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)		\
+	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
+
+void netif_reset_xps_queue(struct net_device *dev, u16 index)
+{
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int i, pos, nonempty = 0;
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	if (!dev_maps)
+		goto out_no_maps;
+
+	for_each_possible_cpu(i) {
+		map = xmap_dereference(dev_maps->cpu_map[i]);
+		if (!map)
+			continue;
+
+		for (pos = 0; pos < map->len; pos++)
+			if (map->queues[pos] == index)
+				break;
+
+		if (pos < map->len) {
+			if (map->len > 1) {
+				map->queues[pos] = map->queues[--map->len];
+			} else {
+				RCU_INIT_POINTER(dev_maps->cpu_map[i], NULL);
+				kfree_rcu(map, rcu);
+				map = NULL;
+			}
+		}
+		if (map)
+			nonempty = 1;
+	}
+
+	if (!nonempty) {
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		kfree_rcu(dev_maps, rcu);
+	}
+
+out_no_maps:
+	mutex_unlock(&xps_map_mutex);
+}
+
+int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+{
+	int i, cpu, pos, map_len, alloc_len, need_set;
+	struct xps_map *map, *new_map;
+	struct xps_dev_maps *dev_maps, *new_dev_maps;
+	int nonempty = 0;
+	int numa_node_id = -2;
+	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
+
+	new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+	if (!new_dev_maps)
+		return -ENOMEM;
+
+	mutex_lock(&xps_map_mutex);
+
+	dev_maps = xmap_dereference(dev->xps_maps);
+
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		new_map = map;
+		if (map) {
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+			map_len = map->len;
+			alloc_len = map->alloc_len;
+		} else
+			pos = map_len = alloc_len = 0;
+
+		need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
+#ifdef CONFIG_NUMA
+		if (need_set) {
+			if (numa_node_id == -2)
+				numa_node_id = cpu_to_node(cpu);
+			else if (numa_node_id != cpu_to_node(cpu))
+				numa_node_id = -1;
+		}
+#endif
+		if (need_set && pos >= map_len) {
+			/* Need to add queue to this CPU's map */
+			if (map_len >= alloc_len) {
+				alloc_len = alloc_len ?
+				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
+				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
+						       GFP_KERNEL,
+						       cpu_to_node(cpu));
+				if (!new_map)
+					goto error;
+				new_map->alloc_len = alloc_len;
+				for (i = 0; i < map_len; i++)
+					new_map->queues[i] = map->queues[i];
+				new_map->len = map_len;
+			}
+			new_map->queues[new_map->len++] = index;
+		} else if (!need_set && pos < map_len) {
+			/* Need to remove queue from this CPU's map */
+			if (map_len > 1)
+				new_map->queues[pos] =
+				    new_map->queues[--new_map->len];
+			else
+				new_map = NULL;
+		}
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
+	}
+
+	/* Cleanup old maps */
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
+			kfree_rcu(map, rcu);
+		if (new_dev_maps->cpu_map[cpu])
+			nonempty = 1;
+	}
+
+	if (nonempty) {
+		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	} else {
+		kfree(new_dev_maps);
+		RCU_INIT_POINTER(dev->xps_maps, NULL);
+	}
+
+	if (dev_maps)
+		kfree_rcu(dev_maps, rcu);
+
+	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+				     (numa_node_id >= 0) ? numa_node_id :
+				     NUMA_NO_NODE);
+
+	mutex_unlock(&xps_map_mutex);
+
+	return 0;
+error:
+	mutex_unlock(&xps_map_mutex);
+
+	if (new_dev_maps)
+		for_each_possible_cpu(i)
+			kfree(rcu_dereference_protected(
+				new_dev_maps->cpu_map[i],
+				1));
+	kfree(new_dev_maps);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(netif_set_xps_queue);
+
+#endif
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 29c884a74c38..5ad489d5d062 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1002,54 +1002,14 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len;
 }
 
-static DEFINE_MUTEX(xps_map_mutex);
-#define xmap_dereference(P)		\
-	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
-
 static void xps_queue_release(struct netdev_queue *queue)
 {
 	struct net_device *dev = queue->dev;
-	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
 	unsigned long index;
-	int i, pos, nonempty = 0;
 
 	index = get_netdev_queue_index(queue);
 
-	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	if (dev_maps) {
-		for_each_possible_cpu(i) {
-			map = xmap_dereference(dev_maps->cpu_map[i]);
-			if (!map)
-				continue;
-
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-
-			if (pos < map->len) {
-				if (map->len > 1)
-					map->queues[pos] =
-					    map->queues[--map->len];
-				else {
-					RCU_INIT_POINTER(dev_maps->cpu_map[i],
-					    NULL);
-					kfree_rcu(map, rcu);
-					map = NULL;
-				}
-			}
-			if (map)
-				nonempty = 1;
-		}
-
-		if (!nonempty) {
-			RCU_INIT_POINTER(dev->xps_maps, NULL);
-			kfree_rcu(dev_maps, rcu);
-		}
-	}
-	mutex_unlock(&xps_map_mutex);
+	netif_reset_xps_queue(dev, index);
 }
 
 static ssize_t store_xps_map(struct netdev_queue *queue,
@@ -1057,13 +1017,9 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 		      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
-	cpumask_var_t mask;
-	int err, i, cpu, pos, map_len, alloc_len, need_set;
 	unsigned long index;
-	struct xps_map *map, *new_map;
-	struct xps_dev_maps *dev_maps, *new_dev_maps;
-	int nonempty = 0;
-	int numa_node_id = -2;
+	cpumask_var_t mask;
+	int err;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1079,105 +1035,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 		return err;
 	}
 
-	new_dev_maps = kzalloc(max_t(unsigned int,
-	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
-	if (!new_dev_maps) {
-		free_cpumask_var(mask);
-		return -ENOMEM;
-	}
-
-	mutex_lock(&xps_map_mutex);
-
-	dev_maps = xmap_dereference(dev->xps_maps);
-
-	for_each_possible_cpu(cpu) {
-		map = dev_maps ?
-			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
-		new_map = map;
-		if (map) {
-			for (pos = 0; pos < map->len; pos++)
-				if (map->queues[pos] == index)
-					break;
-			map_len = map->len;
-			alloc_len = map->alloc_len;
-		} else
-			pos = map_len = alloc_len = 0;
-
-		need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu);
-#ifdef CONFIG_NUMA
-		if (need_set) {
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
-		}
-#endif
-		if (need_set && pos >= map_len) {
-			/* Need to add queue to this CPU's map */
-			if (map_len >= alloc_len) {
-				alloc_len = alloc_len ?
-				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
-				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
-						       GFP_KERNEL,
-						       cpu_to_node(cpu));
-				if (!new_map)
-					goto error;
-				new_map->alloc_len = alloc_len;
-				for (i = 0; i < map_len; i++)
-					new_map->queues[i] = map->queues[i];
-				new_map->len = map_len;
-			}
-			new_map->queues[new_map->len++] = index;
-		} else if (!need_set && pos < map_len) {
-			/* Need to remove queue from this CPU's map */
-			if (map_len > 1)
-				new_map->queues[pos] =
-				    new_map->queues[--new_map->len];
-			else
-				new_map = NULL;
-		}
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
-	}
-
-	/* Cleanup old maps */
-	for_each_possible_cpu(cpu) {
-		map = dev_maps ?
-			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
-		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
-			kfree_rcu(map, rcu);
-		if (new_dev_maps->cpu_map[cpu])
-			nonempty = 1;
-	}
-
-	if (nonempty) {
-		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
-	} else {
-		kfree(new_dev_maps);
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-	}
-
-	if (dev_maps)
-		kfree_rcu(dev_maps, rcu);
-
-	netdev_queue_numa_node_write(queue, (numa_node_id >= 0) ? numa_node_id :
-					    NUMA_NO_NODE);
-
-	mutex_unlock(&xps_map_mutex);
+	err = netif_set_xps_queue(dev, mask, index);
 
 	free_cpumask_var(mask);
-	return len;
 
-error:
-	mutex_unlock(&xps_map_mutex);
-
-	if (new_dev_maps)
-		for_each_possible_cpu(i)
-			kfree(rcu_dereference_protected(
-				new_dev_maps->cpu_map[i],
-				1));
-	kfree(new_dev_maps);
-	free_cpumask_var(mask);
-	return -ENOMEM;
+	return err ? : len;
 }
 
 static struct netdev_queue_attribute xps_cpus_attribute =
-- 
cgit v1.2.3


From 024e9679a2daaa67642693366fb63a6b8c61b9f3 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 10 Jan 2013 08:57:46 +0000
Subject: net: Add support for XPS without sysfs being defined

This patch makes it so that we can support transmit packet steering without
sysfs needing to be enabled.  The reason for making this change is to make
it so that a driver can make use of the XPS even while the sysfs portion of
the interface is not present.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 net/Kconfig               |  2 +-
 net/core/dev.c            | 26 ++++++++++++++++++++------
 net/core/net-sysfs.c      | 14 --------------
 4 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 59fe9da4e315..aa7ad8a96e70 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2104,7 +2104,6 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 }
 
 #ifdef CONFIG_XPS
-extern void netif_reset_xps_queue(struct net_device *dev, u16 index);
 extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
 			       u16 index);
 #else
diff --git a/net/Kconfig b/net/Kconfig
index 30b48f523135..3cc5be0fe420 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,7 +232,7 @@ config RFS_ACCEL
 
 config XPS
 	boolean
-	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+	depends on SMP && USE_GENERIC_SMP_HELPERS
 	default y
 
 config NETPRIO_CGROUP
diff --git a/net/core/dev.c b/net/core/dev.c
index 41d5120df469..95de4c011808 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1887,10 +1887,10 @@ static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
 	return map;
 }
 
-void netif_reset_xps_queue(struct net_device *dev, u16 index)
+static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 {
 	struct xps_dev_maps *dev_maps;
-	int cpu;
+	int cpu, i;
 	bool active = false;
 
 	mutex_lock(&xps_map_mutex);
@@ -1900,7 +1900,11 @@ void netif_reset_xps_queue(struct net_device *dev, u16 index)
 		goto out_no_maps;
 
 	for_each_possible_cpu(cpu) {
-		if (remove_xps_queue(dev_maps, cpu, index))
+		for (i = index; i < dev->num_tx_queues; i++) {
+			if (!remove_xps_queue(dev_maps, cpu, i))
+				break;
+		}
+		if (i == dev->num_tx_queues)
 			active = true;
 	}
 
@@ -1909,8 +1913,10 @@ void netif_reset_xps_queue(struct net_device *dev, u16 index)
 		kfree_rcu(dev_maps, rcu);
 	}
 
-	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-				     NUMA_NO_NODE);
+	for (i = index; i < dev->num_tx_queues; i++)
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+					     NUMA_NO_NODE);
+
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 }
@@ -2096,8 +2102,12 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 		if (dev->num_tc)
 			netif_setup_tc(dev, txq);
 
-		if (txq < dev->real_num_tx_queues)
+		if (txq < dev->real_num_tx_queues) {
 			qdisc_reset_all_tx_gt(dev, txq);
+#ifdef CONFIG_XPS
+			netif_reset_xps_queues_gt(dev, txq);
+#endif
+		}
 	}
 
 	dev->real_num_tx_queues = txq;
@@ -5919,6 +5929,10 @@ static void rollback_registered_many(struct list_head *head)
 
 		/* Remove entries from kobject tree */
 		netdev_unregister_kobject(dev);
+#ifdef CONFIG_XPS
+		/* Remove XPS queueing entries */
+		netif_reset_xps_queues_gt(dev, 0);
+#endif
 	}
 
 	synchronize_net();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5ad489d5d062..a5b89a6fec6d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1002,16 +1002,6 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len;
 }
 
-static void xps_queue_release(struct netdev_queue *queue)
-{
-	struct net_device *dev = queue->dev;
-	unsigned long index;
-
-	index = get_netdev_queue_index(queue);
-
-	netif_reset_xps_queue(dev, index);
-}
-
 static ssize_t store_xps_map(struct netdev_queue *queue,
 		      struct netdev_queue_attribute *attribute,
 		      const char *buf, size_t len)
@@ -1058,10 +1048,6 @@ static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
-#ifdef CONFIG_XPS
-	xps_queue_release(queue);
-#endif
-
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
-- 
cgit v1.2.3


From b43e1065cab4b5be90c016b2f076086b70cd1556 Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Sat, 12 Jan 2013 15:29:38 +0000
Subject: ACPICA: Cleanup table handler naming conflicts.

This is a cosmetic patch only. Comparison of the resulting binary showed
only line number differences.

This patch does not affect the generation of the Linux binary.
This patch decreases 44 lines of 20121114 divergence.diff.

There are naming conflicts between Linux and ACPICA on table handlers. This
patch cleans up this conflicts to reduce the source code diff between Linux
and ACPICA.

Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acglobal.h |  2 +-
 drivers/acpi/acpica/tbxface.c  |  4 ++--
 drivers/acpi/numa.c            |  2 +-
 drivers/acpi/tables.c          |  6 +++---
 include/acpi/acpixf.h          |  4 ++--
 include/acpi/actypes.h         |  2 +-
 include/linux/acpi.h           | 15 ++++++++++-----
 7 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 864806e1ac54..585d364fb7e5 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -252,7 +252,7 @@ ACPI_EXTERN acpi_cache_t *acpi_gbl_operand_cache;
 ACPI_EXTERN struct acpi_global_notify_handler acpi_gbl_global_notify[2];
 ACPI_EXTERN acpi_exception_handler acpi_gbl_exception_handler;
 ACPI_EXTERN acpi_init_handler acpi_gbl_init_handler;
-ACPI_EXTERN acpi_tbl_handler acpi_gbl_table_handler;
+ACPI_EXTERN acpi_table_handler acpi_gbl_table_handler;
 ACPI_EXTERN void *acpi_gbl_table_handler_context;
 ACPI_EXTERN struct acpi_walk_state *acpi_gbl_breakpoint_walk;
 ACPI_EXTERN acpi_interface_handler acpi_gbl_interface_handler;
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index d102fe7f709b..2115f2242a29 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -436,7 +436,7 @@ ACPI_EXPORT_SYMBOL(acpi_get_table_by_index)
  *
  ******************************************************************************/
 acpi_status
-acpi_install_table_handler(acpi_tbl_handler handler, void *context)
+acpi_install_table_handler(acpi_table_handler handler, void *context)
 {
 	acpi_status status;
 
@@ -482,7 +482,7 @@ ACPI_EXPORT_SYMBOL(acpi_install_table_handler)
  * DESCRIPTION: Remove table event handler
  *
  ******************************************************************************/
-acpi_status acpi_remove_table_handler(acpi_tbl_handler handler)
+acpi_status acpi_remove_table_handler(acpi_table_handler handler)
 {
 	acpi_status status;
 
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index cb31298ca684..5ddbc65e0f6e 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -273,7 +273,7 @@ static int __init acpi_parse_srat(struct acpi_table_header *table)
 
 static int __init
 acpi_table_parse_srat(enum acpi_srat_type id,
-		      acpi_table_entry_handler handler, unsigned int max_entries)
+		      acpi_tbl_entry_handler handler, unsigned int max_entries)
 {
 	return acpi_table_parse_entries(ACPI_SIG_SRAT,
 					    sizeof(struct acpi_table_srat), id,
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 2572d9715bda..d67a1fe07f0e 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -204,7 +204,7 @@ int __init
 acpi_table_parse_entries(char *id,
 			     unsigned long table_size,
 			     int entry_id,
-			     acpi_table_entry_handler handler,
+			     acpi_tbl_entry_handler handler,
 			     unsigned int max_entries)
 {
 	struct acpi_table_header *table_header = NULL;
@@ -269,7 +269,7 @@ err:
 
 int __init
 acpi_table_parse_madt(enum acpi_madt_type id,
-		      acpi_table_entry_handler handler, unsigned int max_entries)
+		      acpi_tbl_entry_handler handler, unsigned int max_entries)
 {
 	return acpi_table_parse_entries(ACPI_SIG_MADT,
 					    sizeof(struct acpi_table_madt), id,
@@ -285,7 +285,7 @@ acpi_table_parse_madt(enum acpi_madt_type id,
  * Scan the ACPI System Descriptor Table (STD) for a table matching @id,
  * run @handler on it.  Return 0 if table found, return on if not.
  */
-int __init acpi_table_parse(char *id, acpi_table_handler handler)
+int __init acpi_table_parse(char *id, acpi_tbl_table_handler handler)
 {
 	struct acpi_table_header *table = NULL;
 	acpi_size tbl_size;
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index b86364d48645..d8b2ea673fc6 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -197,9 +197,9 @@ acpi_status
 acpi_get_table_by_index(u32 table_index, struct acpi_table_header **out_table);
 
 acpi_status
-acpi_install_table_handler(acpi_tbl_handler handler, void *context);
+acpi_install_table_handler(acpi_table_handler handler, void *context);
 
-acpi_status acpi_remove_table_handler(acpi_tbl_handler handler);
+acpi_status acpi_remove_table_handler(acpi_table_handler handler);
 
 /*
  * Namespace and name interfaces
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index cd89810a4387..3d4e09c60e2b 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -984,7 +984,7 @@ acpi_status(*acpi_exception_handler) (acpi_status aml_status,
 /* Table Event handler (Load, load_table, etc.) and types */
 
 typedef
-acpi_status(*acpi_tbl_handler) (u32 event, void *table, void *context);
+acpi_status(*acpi_table_handler) (u32 event, void *table, void *context);
 
 #define ACPI_TABLE_LOAD             0x0
 #define ACPI_TABLE_UNLOAD           0x1
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3994d7790b23..6b795bd36383 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -74,9 +74,10 @@ enum acpi_address_range_id {
 
 /* Table Handlers */
 
-typedef int (*acpi_table_handler) (struct acpi_table_header *table);
+typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);
 
-typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end);
+typedef int (*acpi_tbl_entry_handler)(struct acpi_subtable_header *header,
+				      const unsigned long end);
 
 #ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
 void acpi_initrd_override(void *data, size_t size);
@@ -95,10 +96,14 @@ int acpi_mps_check (void);
 int acpi_numa_init (void);
 
 int acpi_table_init (void);
-int acpi_table_parse (char *id, acpi_table_handler handler);
+int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,
-	int entry_id, acpi_table_entry_handler handler, unsigned int max_entries);
-int acpi_table_parse_madt (enum acpi_madt_type id, acpi_table_entry_handler handler, unsigned int max_entries);
+				    int entry_id,
+				    acpi_tbl_entry_handler handler,
+				    unsigned int max_entries);
+int acpi_table_parse_madt(enum acpi_madt_type id,
+			  acpi_tbl_entry_handler handler,
+			  unsigned int max_entries);
 int acpi_parse_mcfg (struct acpi_table_header *header);
 void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
 
-- 
cgit v1.2.3


From 242d98f077ac0ab80920219769eb095503b93f61 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 17 Dec 2012 10:01:27 -0500
Subject: block,elevator: use new hashtable implementation

Switch elevator to use the new hashtable implementation. This reduces the
amount of generic unrelated code in the elevator.

This also removes the dymanic allocation of the hash table. The size of the table is
constant so there's no point in paying the price of an extra dereference when accessing
it.

This patch depends on d9b482c ("hashtable: introduce a small and naive
hashtable") which was merged in v3.6.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h              |  2 +-
 block/elevator.c         | 23 ++++-------------------
 include/linux/elevator.h |  5 ++++-
 3 files changed, 9 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 47fdfdd41520..e837b8f619b7 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -61,7 +61,7 @@ static inline void blk_clear_rq_complete(struct request *rq)
 /*
  * Internal elevator interface
  */
-#define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
+#define ELV_ON_HASH(rq) hash_hashed(&(rq)->hash)
 
 void blk_insert_flush(struct request *rq);
 void blk_abort_flushes(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index 9edba1b8323e..11683bb10b7b 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -46,11 +46,6 @@ static LIST_HEAD(elv_list);
 /*
  * Merge hash stuff.
  */
-static const int elv_hash_shift = 6;
-#define ELV_HASH_BLOCK(sec)	((sec) >> 3)
-#define ELV_HASH_FN(sec)	\
-		(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
-#define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
 #define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
 
 /*
@@ -142,7 +137,6 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q,
 				  struct elevator_type *e)
 {
 	struct elevator_queue *eq;
-	int i;
 
 	eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
 	if (unlikely(!eq))
@@ -151,14 +145,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q,
 	eq->type = e;
 	kobject_init(&eq->kobj, &elv_ktype);
 	mutex_init(&eq->sysfs_lock);
-
-	eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
-					GFP_KERNEL, q->node);
-	if (!eq->hash)
-		goto err;
-
-	for (i = 0; i < ELV_HASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&eq->hash[i]);
+	hash_init(eq->hash);
 
 	return eq;
 err:
@@ -173,7 +160,6 @@ static void elevator_release(struct kobject *kobj)
 
 	e = container_of(kobj, struct elevator_queue, kobj);
 	elevator_put(e->type);
-	kfree(e->hash);
 	kfree(e);
 }
 
@@ -240,7 +226,7 @@ EXPORT_SYMBOL(elevator_exit);
 
 static inline void __elv_rqhash_del(struct request *rq)
 {
-	hlist_del_init(&rq->hash);
+	hash_del(&rq->hash);
 }
 
 static void elv_rqhash_del(struct request_queue *q, struct request *rq)
@@ -254,7 +240,7 @@ static void elv_rqhash_add(struct request_queue *q, struct request *rq)
 	struct elevator_queue *e = q->elevator;
 
 	BUG_ON(ELV_ON_HASH(rq));
-	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
+	hash_add(e->hash, &rq->hash, rq_hash_key(rq));
 }
 
 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
@@ -266,11 +252,10 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
 static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
 {
 	struct elevator_queue *e = q->elevator;
-	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
 	struct hlist_node *entry, *next;
 	struct request *rq;
 
-	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+	hash_for_each_possible_safe(e->hash, rq, entry, next, hash, offset) {
 		BUG_ON(!ELV_ON_HASH(rq));
 
 		if (unlikely(!rq_mergeable(rq))) {
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c03af7687bb4..7c5a7c9789ee 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -2,6 +2,7 @@
 #define _LINUX_ELEVATOR_H
 
 #include <linux/percpu.h>
+#include <linux/hashtable.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -96,6 +97,8 @@ struct elevator_type
 	struct list_head list;
 };
 
+#define ELV_HASH_BITS 6
+
 /*
  * each queue has an elevator_queue associated with it
  */
@@ -105,8 +108,8 @@ struct elevator_queue
 	void *elevator_data;
 	struct kobject kobj;
 	struct mutex sysfs_lock;
-	struct hlist_head *hash;
 	unsigned int registered:1;
+	DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
 };
 
 /*
-- 
cgit v1.2.3


From 422765c2638924da10ff363b5eed77924911bdc7 Mon Sep 17 00:00:00 2001
From: Jianpeng Ma <majianpeng@gmail.com>
Date: Fri, 11 Jan 2013 14:46:09 +0100
Subject: block: Remove should_sort judgement when flush blk_plug

In commit 975927b942c932,it add blk_rq_pos to sort rq when flushing.
Although this commit was used for the situation which blk_plug handled
multi devices on the same time like md device.
I think there must be some situations like this but only single
device.
So remove the should_sort judgement.
Because the parameter should_sort is only for this purpose,it can delete
should_sort from blk_plug.

CC: Shaohua Li <shli@kernel.org>
Signed-off-by: Jianpeng Ma <majianpeng@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 13 +------------
 include/linux/blkdev.h |  1 -
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index c973249d68cd..aca5d82ff13c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1548,13 +1548,6 @@ get_rq:
 		if (list_empty(&plug->list))
 			trace_block_plug(q);
 		else {
-			if (!plug->should_sort) {
-				struct request *__rq;
-
-				__rq = list_entry_rq(plug->list.prev);
-				if (__rq->q != q)
-					plug->should_sort = 1;
-			}
 			if (request_count >= BLK_MAX_REQUEST_COUNT) {
 				blk_flush_plug_list(plug, false);
 				trace_block_plug(q);
@@ -2888,7 +2881,6 @@ void blk_start_plug(struct blk_plug *plug)
 	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
 	INIT_LIST_HEAD(&plug->cb_list);
-	plug->should_sort = 0;
 
 	/*
 	 * If this is a nested plug, don't actually assign it. It will be
@@ -2990,10 +2982,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
 	list_splice_init(&plug->list, &list);
 
-	if (plug->should_sort) {
-		list_sort(NULL, &list, plug_rq_cmp);
-		plug->should_sort = 0;
-	}
+	list_sort(NULL, &list, plug_rq_cmp);
 
 	q = NULL;
 	depth = 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f94bc83011ed..dbe74279f3d6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -974,7 +974,6 @@ struct blk_plug {
 	unsigned long magic; /* detect uninitialized use-cases */
 	struct list_head list; /* requests */
 	struct list_head cb_list; /* md requires an unplug callback */
-	unsigned int should_sort; /* list to be sorted before flushing? */
 };
 #define BLK_MAX_REQUEST_COUNT 16
 
-- 
cgit v1.2.3


From 9f244e9cfd70c7c0f82d3c92ce772ab2a92d9f64 Mon Sep 17 00:00:00 2001
From: Seiji Aguchi <seiji.aguchi@hds.com>
Date: Fri, 11 Jan 2013 18:09:41 +0000
Subject: pstore: Avoid deadlock in panic and emergency-restart path

[Issue]

When pstore is in panic and emergency-restart paths, it may be blocked
in those paths because it simply takes spin_lock.

This is an example scenario which pstore may hang up in a panic path:

 - cpuA grabs psinfo->buf_lock
 - cpuB panics and calls smp_send_stop
 - smp_send_stop sends IRQ to cpuA
 - after 1 second, cpuB gives up on cpuA and sends an NMI instead
 - cpuA is now in an NMI handler while still holding buf_lock
 - cpuB is deadlocked

This case may happen if a firmware has a bug and
cpuA is stuck talking with it more than one second.

Also, this is a similar scenario in an emergency-restart path:

 - cpuA grabs psinfo->buf_lock and stucks in a firmware
 - cpuB kicks emergency-restart via either sysrq-b or hangcheck timer.
   And then, cpuB is deadlocked by taking psinfo->buf_lock again.

[Solution]

This patch avoids the deadlocking issues in both panic and emergency_restart
paths by introducing a function, is_non_blocking_path(), to check if a cpu
can be blocked in current path.

With this patch, pstore is not blocked even if another cpu has
taken a spin_lock, in those paths by changing from spin_lock_irqsave
to spin_trylock_irqsave.

In addition, according to a comment of emergency_restart() in kernel/sys.c,
spin_lock shouldn't be taken in an emergency_restart path to avoid
deadlock. This patch fits the comment below.

<snip>
/**
 *      emergency_restart - reboot the system
 *
 *      Without shutting down any hardware or taking any locks
 *      reboot the system.  This is called when we know we are in
 *      trouble so this is our best effort to reboot.  This is
 *      safe to call in interrupt context.
 */
void emergency_restart(void)
<snip>

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 fs/pstore/platform.c   | 35 +++++++++++++++++++++++++++++------
 include/linux/pstore.h |  6 ++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 5ea2e77ff023..86d1038b5a12 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -96,6 +96,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
 	}
 }
 
+bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
+{
+	/*
+	 * In case of NMI path, pstore shouldn't be blocked
+	 * regardless of reason.
+	 */
+	if (in_nmi())
+		return true;
+
+	switch (reason) {
+	/* In panic case, other cpus are stopped by smp_send_stop(). */
+	case KMSG_DUMP_PANIC:
+	/* Emergency restart shouldn't be blocked by spin lock. */
+	case KMSG_DUMP_EMERG:
+		return true;
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
+
 /*
  * callback from kmsg_dump. (s2,l2) has the most recently
  * written bytes, older bytes are in (s1,l1). Save as much
@@ -114,10 +135,12 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 
 	why = get_reason_str(reason);
 
-	if (in_nmi()) {
-		is_locked = spin_trylock(&psinfo->buf_lock);
-		if (!is_locked)
-			pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
+	if (pstore_cannot_block_path(reason)) {
+		is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
+		if (!is_locked) {
+			pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
+				       , in_nmi() ? "NMI" : why);
+		}
 	} else
 		spin_lock_irqsave(&psinfo->buf_lock, flags);
 	oopscount++;
@@ -143,9 +166,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		total += hsize + len;
 		part++;
 	}
-	if (in_nmi()) {
+	if (pstore_cannot_block_path(reason)) {
 		if (is_locked)
-			spin_unlock(&psinfo->buf_lock);
+			spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 	} else
 		spin_unlock_irqrestore(&psinfo->buf_lock, flags);
 }
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 1788909d9a99..75d01760c911 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -68,12 +68,18 @@ struct pstore_info {
 
 #ifdef CONFIG_PSTORE
 extern int pstore_register(struct pstore_info *);
+extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason);
 #else
 static inline int
 pstore_register(struct pstore_info *psi)
 {
 	return -ENODEV;
 }
+static inline bool
+pstore_cannot_block_path(enum kmsg_dump_reason reason)
+{
+	return false;
+}
 #endif
 
 #endif /*_LINUX_PSTORE_H*/
-- 
cgit v1.2.3


From e2aa19fadd718d7dd920a3994118863861a4b61e Mon Sep 17 00:00:00 2001
From: Nathan Hintz <nlhintz@hotmail.com>
Date: Thu, 10 Jan 2013 17:54:09 +0100
Subject: bcma: return the mips irq number in bcma_core_irq

The irq signal numbers that are send by the cpu are increased by 2 from
the number programmed into the mips core by bcma.
Return the irq number on which the irqs are send in bcma_core_irq() now.

Signed-off-by: Nathan Hintz <nlhintz@hotmail.com>
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/mips/bcm47xx/serial.c            |  2 +-
 drivers/bcma/driver_chipcommon.c      |  2 +-
 drivers/bcma/driver_mips.c            | 12 +++++++++---
 drivers/bcma/driver_pci_host.c        |  4 ++--
 include/linux/bcma/bcma_driver_mips.h |  2 +-
 5 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/bcm47xx/serial.c b/arch/mips/bcm47xx/serial.c
index 57981e4fe2bc..b8ef965705cf 100644
--- a/arch/mips/bcm47xx/serial.c
+++ b/arch/mips/bcm47xx/serial.c
@@ -62,7 +62,7 @@ static int __init uart8250_init_bcma(void)
 
 		p->mapbase = (unsigned int) bcma_port->regs;
 		p->membase = (void *) bcma_port->regs;
-		p->irq = bcma_port->irq + 2;
+		p->irq = bcma_port->irq;
 		p->uartclk = bcma_port->baud_base;
 		p->regshift = bcma_port->reg_shift;
 		p->iotype = UPIO_MEM;
diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index e461ad25fda4..28fa50ad87be 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -329,7 +329,7 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc)
 		return;
 	}
 
-	irq = bcma_core_mips_irq(cc->core);
+	irq = bcma_core_irq(cc->core);
 
 	/* Determine the registers of the UARTs */
 	cc->nr_serial_ports = (cc->capabilities & BCMA_CC_CAP_NRUART);
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 90f20a247725..a808404db4b1 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -85,7 +85,7 @@ static u32 bcma_core_mips_irqflag(struct bcma_device *dev)
  * If disabled, 5 is returned.
  * If not supported, 6 is returned.
  */
-unsigned int bcma_core_mips_irq(struct bcma_device *dev)
+static unsigned int bcma_core_mips_irq(struct bcma_device *dev)
 {
 	struct bcma_device *mdev = dev->bus->drv_mips.core;
 	u32 irqflag;
@@ -102,7 +102,13 @@ unsigned int bcma_core_mips_irq(struct bcma_device *dev)
 
 	return 5;
 }
-EXPORT_SYMBOL(bcma_core_mips_irq);
+
+unsigned int bcma_core_irq(struct bcma_device *dev)
+{
+	unsigned int mips_irq = bcma_core_mips_irq(dev);
+	return mips_irq <= 4 ? mips_irq + 2 : 0;
+}
+EXPORT_SYMBOL(bcma_core_irq);
 
 static void bcma_core_mips_set_irq(struct bcma_device *dev, unsigned int irq)
 {
@@ -299,7 +305,7 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 		break;
 	default:
 		list_for_each_entry(core, &bus->cores, list) {
-			core->irq = bcma_core_mips_irq(core) + 2;
+			core->irq = bcma_core_irq(core);
 		}
 		bcma_err(bus,
 			 "Unknown device (0x%x) found, can not configure IRQs\n",
diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index e6b5c89469dc..ef9f0938da77 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -577,7 +577,7 @@ int bcma_core_pci_plat_dev_init(struct pci_dev *dev)
 	pr_info("PCI: Fixing up device %s\n", pci_name(dev));
 
 	/* Fix up interrupt lines */
-	dev->irq = bcma_core_mips_irq(pc_host->pdev->core) + 2;
+	dev->irq = bcma_core_irq(pc_host->pdev->core);
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
 
 	return 0;
@@ -596,6 +596,6 @@ int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev)
 
 	pc_host = container_of(dev->bus->ops, struct bcma_drv_pci_host,
 			       pci_ops);
-	return bcma_core_mips_irq(pc_host->pdev->core) + 2;
+	return bcma_core_irq(pc_host->pdev->core);
 }
 EXPORT_SYMBOL(bcma_core_pci_pcibios_map_irq);
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 6495579e3f35..73c7f4b882cc 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -48,6 +48,6 @@ static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { }
 
 extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore);
 
-extern unsigned int bcma_core_mips_irq(struct bcma_device *dev);
+extern unsigned int bcma_core_irq(struct bcma_device *core);
 
 #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */
-- 
cgit v1.2.3


From 990debe2ca8379863709721926550a55f47f3880 Mon Sep 17 00:00:00 2001
From: Nathan Hintz <nlhintz@hotmail.com>
Date: Thu, 10 Jan 2013 22:24:03 -0800
Subject: bcma: update pci configuration for bcm4706/bcm4716

Update the PCI configuration for BCM4706 and BCM4716 per the 2011
Broadcom SDK.

Signed-off-by: Nathan Hintz <nlhintz@hotmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_pci_host.c       | 13 ++++++++++++-
 include/linux/bcma/bcma_driver_pci.h |  2 ++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index ef9f0938da77..37d1777dcd47 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -427,7 +427,7 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 	/* Reset RC */
 	usleep_range(3000, 5000);
 	pcicore_write32(pc, BCMA_CORE_PCI_CTL, BCMA_CORE_PCI_CTL_RST_OE);
-	usleep_range(1000, 2000);
+	msleep(50);
 	pcicore_write32(pc, BCMA_CORE_PCI_CTL, BCMA_CORE_PCI_CTL_RST |
 			BCMA_CORE_PCI_CTL_RST_OE);
 
@@ -489,6 +489,17 @@ void __devinit bcma_core_pci_hostmode_init(struct bcma_drv_pci *pc)
 
 	bcma_core_pci_enable_crs(pc);
 
+	if (bus->chipinfo.id == BCMA_CHIP_ID_BCM4706 ||
+	    bus->chipinfo.id == BCMA_CHIP_ID_BCM4716) {
+		u16 val16;
+		bcma_extpci_read_config(pc, 0, 0, BCMA_CORE_PCI_CFG_DEVCTRL,
+					&val16, sizeof(val16));
+		val16 |= (2 << 5);	/* Max payload size of 512 */
+		val16 |= (2 << 12);	/* MRRS 512 */
+		bcma_extpci_write_config(pc, 0, 0, BCMA_CORE_PCI_CFG_DEVCTRL,
+					 &val16, sizeof(val16));
+	}
+
 	/* Enable PCI bridge BAR0 memory & master access */
 	tmp = PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY;
 	bcma_extpci_write_config(pc, 0, 0, PCI_COMMAND, &tmp, sizeof(tmp));
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 41da581e1612..31232247a1ee 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -179,6 +179,8 @@ struct pci_dev;
 #define BCMA_CORE_PCI_CFG_FUN_MASK		7	/* Function mask */
 #define BCMA_CORE_PCI_CFG_OFF_MASK		0xfff	/* Register mask */
 
+#define BCMA_CORE_PCI_CFG_DEVCTRL		0xd8
+
 /* PCIE Root Capability Register bits (Host mode only) */
 #define BCMA_CORE_PCI_RC_CRS_VISIBILITY		0x0001
 
-- 
cgit v1.2.3


From 6a099c63650e50ebf7d1259b859a3d230aec4207 Mon Sep 17 00:00:00 2001
From: Dongjin Kim <tobetter@gmail.com>
Date: Sat, 8 Dec 2012 05:18:44 +0900
Subject: USB: misc: Add USB3503 High-Speed Hub Controller

This patch adds new driver of SMSC USB3503 USB 2.0 hub controller with HSIC
upstream connectivity and three USB 2.0 downstream ports. The specification
can be found from 'http://www.smsc.com/index.php?tid=295&pid=325'.

The current version have been tested very basic features switching the modes,
HUB-MODE and STANDBY-MODE.

Signed-off-by: Dongjin Kim <tobetter@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/Kconfig              |   6 +
 drivers/usb/misc/Makefile             |   1 +
 drivers/usb/misc/usb3503.c            | 303 ++++++++++++++++++++++++++++++++++
 include/linux/platform_data/usb3503.h |  19 +++
 4 files changed, 329 insertions(+)
 create mode 100644 drivers/usb/misc/usb3503.c
 create mode 100644 include/linux/platform_data/usb3503.h

(limited to 'include/linux')

diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index fecde69bfa7d..3b1a3f4ec5e9 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -250,3 +250,9 @@ config USB_EZUSB_FX2
 	help
 	  Say Y here if you need EZUSB device support.
 	  (Cypress FX/FX2/FX2LP microcontrollers)
+
+config USB_HSIC_USB3503
+       tristate "USB3503 HSIC to USB20 Driver"
+       depends on I2C
+       help
+         This option enables support for SMSC USB3503 HSIC to USB 2.0 Driver.
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index 3e99a643294b..3e1bd70b06ea 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -26,5 +26,6 @@ obj-$(CONFIG_USB_TRANCEVIBRATOR)	+= trancevibrator.o
 obj-$(CONFIG_USB_USS720)		+= uss720.o
 obj-$(CONFIG_USB_SEVSEG)		+= usbsevseg.o
 obj-$(CONFIG_USB_YUREX)			+= yurex.o
+obj-$(CONFIG_USB_HSIC_USB3503)		+= usb3503.o
 
 obj-$(CONFIG_USB_SISUSBVGA)		+= sisusbvga/
diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
new file mode 100644
index 000000000000..796d58c95b63
--- /dev/null
+++ b/drivers/usb/misc/usb3503.c
@@ -0,0 +1,303 @@
+/*
+ * Driver for SMSC USB3503 USB 2.0 hub controller driver
+ *
+ * Copyright (c) 2012-2013 Dongjin Kim (tobetter@gmail.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/usb3503.h>
+
+#define USB3503_VIDL		0x00
+#define USB3503_VIDM		0x01
+#define USB3503_PIDL		0x02
+#define USB3503_PIDM		0x03
+#define USB3503_DIDL		0x04
+#define USB3503_DIDM		0x05
+
+#define USB3503_CFG1		0x06
+#define USB3503_SELF_BUS_PWR	(1 << 7)
+
+#define USB3503_CFG2		0x07
+#define USB3503_CFG3		0x08
+#define USB3503_NRD		0x09
+
+#define USB3503_PDS		0x0a
+#define USB3503_PORT1		(1 << 1)
+#define USB3503_PORT2		(1 << 2)
+#define USB3503_PORT3		(1 << 3)
+
+#define USB3503_SP_ILOCK	0xe7
+#define USB3503_SPILOCK_CONNECT	(1 << 1)
+#define USB3503_SPILOCK_CONFIG	(1 << 0)
+
+#define USB3503_CFGP		0xee
+#define USB3503_CLKSUSP		(1 << 7)
+
+struct usb3503 {
+	enum usb3503_mode	mode;
+	struct i2c_client	*client;
+	int	gpio_intn;
+	int	gpio_reset;
+	int	gpio_connect;
+};
+
+static int usb3503_write_register(struct i2c_client *client,
+		char reg, char data)
+{
+	return i2c_smbus_write_byte_data(client, reg, data);
+}
+
+static int usb3503_read_register(struct i2c_client *client, char reg)
+{
+	return i2c_smbus_read_byte_data(client, reg);
+}
+
+static int usb3503_set_bits(struct i2c_client *client, char reg, char req)
+{
+	int err;
+
+	err = usb3503_read_register(client, reg);
+	if (err < 0)
+		return err;
+
+	err = usb3503_write_register(client, reg, err | req);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int usb3503_clear_bits(struct i2c_client *client, char reg, char req)
+{
+	int err;
+
+	err = usb3503_read_register(client, reg);
+	if (err < 0)
+		return err;
+
+	err = usb3503_write_register(client, reg, err & ~req);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int usb3503_reset(int gpio_reset, int state)
+{
+	if (gpio_is_valid(gpio_reset))
+		gpio_set_value(gpio_reset, state);
+
+	/* Wait RefClk when RESET_N is released, otherwise Hub will
+	 * not transition to Hub Communication Stage.
+	 */
+	if (state)
+		msleep(100);
+
+	return 0;
+}
+
+static int usb3503_switch_mode(struct usb3503 *hub, enum usb3503_mode mode)
+{
+	struct i2c_client *i2c = hub->client;
+	int err = 0;
+
+	switch (mode) {
+	case USB3503_MODE_HUB:
+		usb3503_reset(hub->gpio_reset, 1);
+
+		/* SP_ILOCK: set connect_n, config_n for config */
+		err = usb3503_write_register(i2c, USB3503_SP_ILOCK,
+				(USB3503_SPILOCK_CONNECT
+				 | USB3503_SPILOCK_CONFIG));
+		if (err < 0) {
+			dev_err(&i2c->dev, "SP_ILOCK failed (%d)\n", err);
+			goto err_hubmode;
+		}
+
+		/* PDS : Port2,3 Disable For Self Powered Operation */
+		err = usb3503_set_bits(i2c, USB3503_PDS,
+				(USB3503_PORT2 | USB3503_PORT3));
+		if (err < 0) {
+			dev_err(&i2c->dev, "PDS failed (%d)\n", err);
+			goto err_hubmode;
+		}
+
+		/* CFG1 : SELF_BUS_PWR -> Self-Powerd operation */
+		err = usb3503_set_bits(i2c, USB3503_CFG1, USB3503_SELF_BUS_PWR);
+		if (err < 0) {
+			dev_err(&i2c->dev, "CFG1 failed (%d)\n", err);
+			goto err_hubmode;
+		}
+
+		/* SP_LOCK: clear connect_n, config_n for hub connect */
+		err = usb3503_clear_bits(i2c, USB3503_SP_ILOCK,
+				(USB3503_SPILOCK_CONNECT
+				 | USB3503_SPILOCK_CONFIG));
+		if (err < 0) {
+			dev_err(&i2c->dev, "SP_ILOCK failed (%d)\n", err);
+			goto err_hubmode;
+		}
+
+		hub->mode = mode;
+		dev_info(&i2c->dev, "switched to HUB mode\n");
+		break;
+
+	case USB3503_MODE_STANDBY:
+		usb3503_reset(hub->gpio_reset, 0);
+
+		hub->mode = mode;
+		dev_info(&i2c->dev, "switched to STANDBY mode\n");
+		break;
+
+	default:
+		dev_err(&i2c->dev, "unknown mode is request\n");
+		err = -EINVAL;
+		break;
+	}
+
+err_hubmode:
+	return err;
+}
+
+int usb3503_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
+{
+	struct usb3503_platform_data *pdata = i2c->dev.platform_data;
+	struct usb3503 *hub;
+	int err;
+
+	hub = kzalloc(sizeof(struct usb3503), GFP_KERNEL);
+	if (!hub) {
+		dev_err(&i2c->dev, "private data alloc fail\n");
+		return -ENOMEM;
+	}
+
+	i2c_set_clientdata(i2c, hub);
+	hub->client = i2c;
+
+	if (!pdata) {
+		dev_dbg(&i2c->dev, "missing platform data\n");
+	} else {
+		hub->gpio_intn		= pdata->gpio_intn;
+		hub->gpio_connect	= pdata->gpio_connect;
+		hub->gpio_reset		= pdata->gpio_reset;
+		hub->mode		= pdata->initial_mode;
+	}
+
+	if (gpio_is_valid(hub->gpio_intn)) {
+		err = gpio_request_one(hub->gpio_intn,
+				GPIOF_OUT_INIT_HIGH, "usb3503 intn");
+		if (err) {
+			dev_err(&i2c->dev,
+					"unable to request GPIO %d as connect pin (%d)\n",
+					hub->gpio_intn, err);
+			goto err_gpio_intn;
+		}
+	}
+
+	if (gpio_is_valid(hub->gpio_connect)) {
+		err = gpio_request_one(hub->gpio_connect,
+				GPIOF_OUT_INIT_HIGH, "usb3503 connect");
+		if (err) {
+			dev_err(&i2c->dev,
+					"unable to request GPIO %d as connect pin (%d)\n",
+					hub->gpio_connect, err);
+			goto err_gpio_connect;
+		}
+	}
+
+	if (gpio_is_valid(hub->gpio_reset)) {
+		err = gpio_request_one(hub->gpio_reset,
+				GPIOF_OUT_INIT_LOW, "usb3503 reset");
+		if (err) {
+			dev_err(&i2c->dev,
+					"unable to request GPIO %d as reset pin (%d)\n",
+					hub->gpio_reset, err);
+			goto err_gpio_reset;
+		}
+	}
+
+	usb3503_switch_mode(hub, pdata->initial_mode);
+
+	dev_info(&i2c->dev, "%s: probed on  %s mode\n", __func__,
+			(hub->mode == USB3503_MODE_HUB) ? "hub" : "standby");
+
+	return 0;
+
+err_gpio_reset:
+	if (gpio_is_valid(hub->gpio_connect))
+		gpio_free(hub->gpio_connect);
+err_gpio_connect:
+	if (gpio_is_valid(hub->gpio_intn))
+		gpio_free(hub->gpio_intn);
+err_gpio_intn:
+	kfree(hub);
+
+	return err;
+}
+
+static int usb3503_remove(struct i2c_client *i2c)
+{
+	struct usb3503 *hub = i2c_get_clientdata(i2c);
+
+	if (gpio_is_valid(hub->gpio_intn))
+		gpio_free(hub->gpio_intn);
+	if (gpio_is_valid(hub->gpio_connect))
+		gpio_free(hub->gpio_connect);
+	if (gpio_is_valid(hub->gpio_reset))
+		gpio_free(hub->gpio_reset);
+
+	kfree(hub);
+
+	return 0;
+}
+
+static const struct i2c_device_id usb3503_id[] = {
+	{ USB3503_I2C_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, usb3503_id);
+
+static struct i2c_driver usb3503_driver = {
+	.driver = {
+		.name = USB3503_I2C_NAME,
+	},
+	.probe		= usb3503_probe,
+	.remove		= usb3503_remove,
+	.id_table	= usb3503_id,
+};
+
+static int __init usb3503_init(void)
+{
+	return i2c_add_driver(&usb3503_driver);
+}
+
+static void __exit usb3503_exit(void)
+{
+	i2c_del_driver(&usb3503_driver);
+}
+
+module_init(usb3503_init);
+module_exit(usb3503_exit);
+
+MODULE_AUTHOR("Dongjin Kim <tobetter@gmail.com>");
+MODULE_DESCRIPTION("USB3503 USB HUB driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/usb3503.h b/include/linux/platform_data/usb3503.h
new file mode 100644
index 000000000000..85dcc709f7e9
--- /dev/null
+++ b/include/linux/platform_data/usb3503.h
@@ -0,0 +1,19 @@
+#ifndef __USB3503_H__
+#define __USB3503_H__
+
+#define USB3503_I2C_NAME	"usb3503"
+
+enum usb3503_mode {
+	USB3503_MODE_UNKNOWN,
+	USB3503_MODE_HUB,
+	USB3503_MODE_STANDBY,
+};
+
+struct usb3503_platform_data {
+	enum usb3503_mode	initial_mode;
+	int	gpio_intn;
+	int	gpio_connect;
+	int	gpio_reset;
+};
+
+#endif
-- 
cgit v1.2.3


From 483f33f63c1cb3c6becb465bac7b75d7ff5e3b8f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 4 Jan 2013 17:57:40 +0100
Subject: pinctrl: add pinconf-generic defines for output

This adds a definition of a generic output configuration
for a certain pin when using the generic pin configuration
library. Whereas driving pins low/high is usually a GPIO
business, you may want to set up pins into a default state
using hogs, and never touch them again. This helps out
with that scenario.

Based on a patch from Patrice Chotard.

Signed-off-by: Patrice Chotard <patrice.chotard@stericsson.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf-generic.c       | 1 +
 include/linux/pinctrl/pinconf-generic.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index 833a36458157..bcf8157ceea7 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -46,6 +46,7 @@ struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "time units"),
 	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector"),
 	PCONFDUMP(PIN_CONFIG_LOW_POWER_MODE, "pin low power", "mode"),
+	PCONFDUMP(PIN_CONFIG_OUTPUT, "pin output", "level"),
 };
 
 void pinconf_generic_dump_pin(struct pinctrl_dev *pctldev,
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 47a1bdd88878..b23d99da3b4b 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -62,6 +62,8 @@
  *	operation, if several modes of operation are supported these can be
  *	passed in the argument on a custom form, else just use argument 1
  *	to indicate low power mode, argument 0 turns low power mode off.
+ * @PIN_CONFIG_OUTPUT: this will configure the pin in output, use argument
+ *	1 to indicate high level, argument 0 to indicate low level.
  * @PIN_CONFIG_END: this is the last enumerator for pin configurations, if
  *	you need to pass in custom configurations to the pin controller, use
  *	PIN_CONFIG_END+1 as the base offset.
@@ -79,6 +81,7 @@ enum pin_config_param {
 	PIN_CONFIG_INPUT_DEBOUNCE,
 	PIN_CONFIG_POWER_SOURCE,
 	PIN_CONFIG_LOW_POWER_MODE,
+	PIN_CONFIG_OUTPUT,
 	PIN_CONFIG_END = 0x7FFF,
 };
 
-- 
cgit v1.2.3


From 896f97ea95c1d29c0520ee0766b66b7f64cb967c Mon Sep 17 00:00:00 2001
From: David Decotigny <decot@googlers.com>
Date: Fri, 11 Jan 2013 14:31:36 -0800
Subject: lib: cpu_rmap: avoid flushing all workqueues

In some cases, free_irq_cpu_rmap() is called while holding a lock (eg
rtnl).  This can lead to deadlocks, because it invokes
flush_scheduled_work() which ends up waiting for whole system workqueue
to flush, but some pending works might try to acquire the lock we are
already holding.

This commit uses reference-counting to replace
irq_run_affinity_notifiers().  It also removes
irq_run_affinity_notifiers() altogether.

[akpm@linux-foundation.org: eliminate free_cpu_rmap, rename cpu_rmap_reclaim() to cpu_rmap_release(), propagate kref_put() retval from cpu_rmap_put()]
Signed-off-by: David Decotigny <decot@googlers.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Acked-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu_rmap.h  | 13 ++++--------
 include/linux/interrupt.h |  5 -----
 lib/cpu_rmap.c            | 54 ++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 53 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
index ac3bbb5b9502..1739510d8994 100644
--- a/include/linux/cpu_rmap.h
+++ b/include/linux/cpu_rmap.h
@@ -13,9 +13,11 @@
 #include <linux/cpumask.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/kref.h>
 
 /**
  * struct cpu_rmap - CPU affinity reverse-map
+ * @refcount: kref for object
  * @size: Number of objects to be reverse-mapped
  * @used: Number of objects added
  * @obj: Pointer to array of object pointers
@@ -23,6 +25,7 @@
  *      based on affinity masks
  */
 struct cpu_rmap {
+	struct kref	refcount;
 	u16		size, used;
 	void		**obj;
 	struct {
@@ -33,15 +36,7 @@ struct cpu_rmap {
 #define CPU_RMAP_DIST_INF 0xffff
 
 extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags);
-
-/**
- * free_cpu_rmap - free CPU affinity reverse-map
- * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL
- */
-static inline void free_cpu_rmap(struct cpu_rmap *rmap)
-{
-	kfree(rmap);
-}
+extern int cpu_rmap_put(struct cpu_rmap *rmap);
 
 extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
 extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5e4e6170f43a..5fa5afeeb759 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -268,11 +268,6 @@ struct irq_affinity_notify {
 extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
-static inline void irq_run_affinity_notifiers(void)
-{
-	flush_scheduled_work();
-}
-
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index 145dec5267c9..5fbed5caba6e 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
 	if (!rmap)
 		return NULL;
 
+	kref_init(&rmap->refcount);
 	rmap->obj = (void **)((char *)rmap + obj_offset);
 
 	/* Initially assign CPUs to objects on a rota, since we have
@@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
 }
 EXPORT_SYMBOL(alloc_cpu_rmap);
 
+/**
+ * cpu_rmap_release - internal reclaiming helper called from kref_put
+ * @ref: kref to struct cpu_rmap
+ */
+static void cpu_rmap_release(struct kref *ref)
+{
+	struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
+	kfree(rmap);
+}
+
+/**
+ * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+static inline void cpu_rmap_get(struct cpu_rmap *rmap)
+{
+	kref_get(&rmap->refcount);
+}
+
+/**
+ * cpu_rmap_put - release ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+int cpu_rmap_put(struct cpu_rmap *rmap)
+{
+	return kref_put(&rmap->refcount, cpu_rmap_release);
+}
+EXPORT_SYMBOL(cpu_rmap_put);
+
 /* Reevaluate nearest object for given CPU, comparing with the given
  * neighbours at the given distance.
  */
@@ -197,8 +227,7 @@ struct irq_glue {
  * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
  * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
  *
- * Must be called in process context, before freeing the IRQs, and
- * without holding any locks required by global workqueue items.
+ * Must be called in process context, before freeing the IRQs.
  */
 void free_irq_cpu_rmap(struct cpu_rmap *rmap)
 {
@@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
 		glue = rmap->obj[index];
 		irq_set_affinity_notifier(glue->notify.irq, NULL);
 	}
-	irq_run_affinity_notifiers();
 
-	kfree(rmap);
+	cpu_rmap_put(rmap);
 }
 EXPORT_SYMBOL(free_irq_cpu_rmap);
 
+/**
+ * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
+ * @notify: struct irq_affinity_notify passed by irq/manage.c
+ * @mask: cpu mask for new SMP affinity
+ *
+ * This is executed in workqueue context.
+ */
 static void
 irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
 {
@@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
 		pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
 }
 
+/**
+ * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
+ * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
+ */
 static void irq_cpu_rmap_release(struct kref *ref)
 {
 	struct irq_glue *glue =
 		container_of(ref, struct irq_glue, notify.kref);
+
+	cpu_rmap_put(glue->rmap);
 	kfree(glue);
 }
 
@@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
 	glue->notify.notify = irq_cpu_rmap_notify;
 	glue->notify.release = irq_cpu_rmap_release;
 	glue->rmap = rmap;
+	cpu_rmap_get(rmap);
 	glue->index = cpu_rmap_add(rmap, glue);
 	rc = irq_set_affinity_notifier(irq, &glue->notify);
-	if (rc)
+	if (rc) {
+		cpu_rmap_put(glue->rmap);
 		kfree(glue);
+	}
 	return rc;
 }
 EXPORT_SYMBOL(irq_cpu_rmap_add);
-- 
cgit v1.2.3


From 1b963c81b14509e330e0fe3218b645ece2738dc5 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 11 Jan 2013 14:31:56 -0800
Subject: lockdep, rwsem: provide down_write_nest_lock()

down_write_nest_lock() provides a means to annotate locking scenario
where an outer lock is guaranteed to serialize the order nested locks
are being acquired.

This is analogoue to already existing mutex_lock_nest_lock() and
spin_lock_nest_lock().

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mel Gorman <mel@csn.ul.ie>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h |  3 +++
 include/linux/rwsem.h   |  9 +++++++++
 kernel/rwsem.c          | 10 ++++++++++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 00e46376e28f..2bca44b0893c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -524,14 +524,17 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # ifdef CONFIG_PROVE_LOCKING
 #  define rwsem_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, NULL, i)
+#  define rwsem_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 2, n, i)
 #  define rwsem_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 1, 2, NULL, i)
 # else
 #  define rwsem_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, NULL, i)
+#  define rwsem_acquire_nest(l, s, t, n, i)	lock_acquire(l, s, t, 0, 1, n, i)
 #  define rwsem_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 1, 1, NULL, i)
 # endif
 # define rwsem_release(l, n, i)			lock_release(l, n, i)
 #else
 # define rwsem_acquire(l, s, t, i)		do { } while (0)
+# define rwsem_acquire_nest(l, s, t, n, i)	do { } while (0)
 # define rwsem_acquire_read(l, s, t, i)		do { } while (0)
 # define rwsem_release(l, n, i)			do { } while (0)
 #endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 54bd7cd7ecbd..413cc11e414a 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -125,8 +125,17 @@ extern void downgrade_write(struct rw_semaphore *sem);
  */
 extern void down_read_nested(struct rw_semaphore *sem, int subclass);
 extern void down_write_nested(struct rw_semaphore *sem, int subclass);
+extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock);
+
+# define down_write_nest_lock(sem, nest_lock)			\
+do {								\
+	typecheck(struct lockdep_map *, &(nest_lock)->dep_map);	\
+	_down_write_nest_lock(sem, &(nest_lock)->dep_map);	\
+} while (0);
+
 #else
 # define down_read_nested(sem, subclass)		down_read(sem)
+# define down_write_nest_lock(sem, nest_lock)	down_read(sem)
 # define down_write_nested(sem, subclass)	down_write(sem)
 #endif
 
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index 6850f53e02d8..b3c6c3fcd847 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
 
 EXPORT_SYMBOL(down_read_nested);
 
+void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
+{
+	might_sleep();
+	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
+
+	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+}
+
+EXPORT_SYMBOL(_down_write_nest_lock);
+
 void down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	might_sleep();
-- 
cgit v1.2.3


From 7b9205bd775afc4439ed86d617f9042ee9e76a71 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Jan 2013 14:32:05 -0800
Subject: audit: create explicit AUDIT_SECCOMP event type

The seccomp path was using AUDIT_ANOM_ABEND from when seccomp mode 1
could only kill a process.  While we still want to make sure an audit
record is forced on a kill, this should use a separate record type since
seccomp mode 2 introduces other behaviors.

In the case of "handled" behaviors (process wasn't killed), only emit a
record if the process is under inspection.  This change also fixes
userspace examination of seccomp audit events, since it was considered
malformed due to missing fields of the AUDIT_ANOM_ABEND event type.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Paris <eparis@redhat.com>
Cc: Jeff Layton <jlayton@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Julien Tinnes <jln@google.com>
Acked-by: Will Drewry <wad@chromium.org>
Acked-by: Steve Grubb <sgrubb@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h      |  3 ++-
 include/uapi/linux/audit.h |  1 +
 kernel/auditsc.c           | 14 +++++++++++---
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index bce729afbcf9..9d5104d7aba9 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -157,7 +157,8 @@ void audit_core_dumps(long signr);
 
 static inline void audit_seccomp(unsigned long syscall, long signr, int code)
 {
-	if (unlikely(!audit_dummy_context()))
+	/* Force a record to be reported if a signal was delivered. */
+	if (signr || unlikely(!audit_dummy_context()))
 		__audit_seccomp(syscall, signr, code);
 }
 
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 76352ac45f24..09a2d94ab113 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -106,6 +106,7 @@
 #define AUDIT_MMAP		1323	/* Record showing descriptor and flags in mmap */
 #define AUDIT_NETFILTER_PKT	1324	/* Packets traversing netfilter chains */
 #define AUDIT_NETFILTER_CFG	1325	/* Netfilter chain modifications */
+#define AUDIT_SECCOMP		1326	/* Secure Computing event */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e37e6a12c5e3..3e46d1dec613 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags)
 	context->type = AUDIT_MMAP;
 }
 
-static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+static void audit_log_task(struct audit_buffer *ab)
 {
 	kuid_t auid, uid;
 	kgid_t gid;
@@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
 	audit_log_task_context(ab);
 	audit_log_format(ab, " pid=%d comm=", current->pid);
 	audit_log_untrustedstring(ab, current->comm);
+}
+
+static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+{
+	audit_log_task(ab);
 	audit_log_format(ab, " reason=");
 	audit_log_string(ab, reason);
 	audit_log_format(ab, " sig=%ld", signr);
@@ -2723,8 +2728,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
 {
 	struct audit_buffer *ab;
 
-	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
-	audit_log_abend(ab, "seccomp", signr);
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP);
+	if (unlikely(!ab))
+		return;
+	audit_log_task(ab);
+	audit_log_format(ab, " sig=%ld", signr);
 	audit_log_format(ab, " syscall=%ld", syscall);
 	audit_log_format(ab, " compat=%d", is_compat_task());
 	audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current));
-- 
cgit v1.2.3


From c0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 11 Jan 2013 14:32:13 -0800
Subject: linux/audit.h: move ptrace.h include to kernel header

While the kernel internals want pt_regs (and so it includes
linux/ptrace.h), the user version of audit.h does not need it.  So move
the include out of the uapi version.

This avoids issues where people want the audit defines and userland
ptrace api.  Including both the kernel ptrace and the userland ptrace
headers can easily lead to failure.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/audit.h      | 1 +
 include/uapi/linux/audit.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 9d5104d7aba9..5a6d718adf34 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -24,6 +24,7 @@
 #define _LINUX_AUDIT_H_
 
 #include <linux/sched.h>
+#include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
 
 struct audit_sig_info {
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 09a2d94ab113..9f096f1c0907 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -26,7 +26,6 @@
 
 #include <linux/types.h>
 #include <linux/elf-em.h>
-#include <linux/ptrace.h>
 
 /* The netlink messages for the audit system is divided into blocks:
  * 1000 - 1099 are for commanding the audit system
-- 
cgit v1.2.3


From 8fb74b9fb2b182d54beee592350d9ea1f325917a Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 11 Jan 2013 14:32:16 -0800
Subject: mm: compaction: partially revert capture of suitable high-order page

Eric Wong reported on 3.7 and 3.8-rc2 that ppoll() got stuck when
waiting for POLLIN on a local TCP socket.  It was easier to trigger if
there was disk IO and dirty pages at the same time and he bisected it to
commit 1fb3f8ca0e92 ("mm: compaction: capture a suitable high-order page
immediately when it is made available").

The intention of that patch was to improve high-order allocations under
memory pressure after changes made to reclaim in 3.6 drastically hurt
THP allocations but the approach was flawed.  For Eric, the problem was
that page->pfmemalloc was not being cleared for captured pages leading
to a poor interaction with swap-over-NFS support causing the packets to
be dropped.  However, I identified a few more problems with the patch
including the fact that it can increase contention on zone->lock in some
cases which could result in async direct compaction being aborted early.

In retrospect the capture patch took the wrong approach.  What it should
have done is mark the pageblock being migrated as MIGRATE_ISOLATE if it
was allocating for THP and avoided races that way.  While the patch was
showing to improve allocation success rates at the time, the benefit is
marginal given the relative complexity and it should be revisited from
scratch in the context of the other reclaim-related changes that have
taken place since the patch was first written and tested.  This patch
partially reverts commit 1fb3f8ca0e92 ("mm: compaction: capture a
suitable high-order page immediately when it is made available").

Reported-and-tested-by: Eric Wong <normalperson@yhbt.net>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  4 +-
 include/linux/mm.h         |  1 -
 mm/compaction.c            | 92 +++++++---------------------------------------
 mm/internal.h              |  1 -
 mm/page_alloc.c            | 35 ++++--------------
 5 files changed, 23 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 6ecb6dc2f303..cc7bddeaf553 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			bool sync, bool *contended, struct page **page);
+			bool sync, bool *contended);
 extern int compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -75,7 +75,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync, bool *contended, struct page **page)
+			bool sync, bool *contended)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63204078f72b..66e2f7c61e5c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -455,7 +455,6 @@ void put_pages_list(struct list_head *pages);
 
 void split_page(struct page *page, unsigned int order);
 int split_free_page(struct page *page);
-int capture_free_page(struct page *page, int alloc_order, int migratetype);
 
 /*
  * Compound pages have a destructor function.  Provide a
diff --git a/mm/compaction.c b/mm/compaction.c
index f8f5c111b7d7..c62bd063d766 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -816,6 +816,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 static int compact_finished(struct zone *zone,
 			    struct compact_control *cc)
 {
+	unsigned int order;
 	unsigned long watermark;
 
 	if (fatal_signal_pending(current))
@@ -850,22 +851,16 @@ static int compact_finished(struct zone *zone,
 		return COMPACT_CONTINUE;
 
 	/* Direct compactor: Is a suitable page free? */
-	if (cc->page) {
-		/* Was a suitable page captured? */
-		if (*cc->page)
+	for (order = cc->order; order < MAX_ORDER; order++) {
+		struct free_area *area = &zone->free_area[order];
+
+		/* Job done if page is free of the right migratetype */
+		if (!list_empty(&area->free_list[cc->migratetype]))
+			return COMPACT_PARTIAL;
+
+		/* Job done if allocation would set block type */
+		if (cc->order >= pageblock_order && area->nr_free)
 			return COMPACT_PARTIAL;
-	} else {
-		unsigned int order;
-		for (order = cc->order; order < MAX_ORDER; order++) {
-			struct free_area *area = &zone->free_area[cc->order];
-			/* Job done if page is free of the right migratetype */
-			if (!list_empty(&area->free_list[cc->migratetype]))
-				return COMPACT_PARTIAL;
-
-			/* Job done if allocation would set block type */
-			if (cc->order >= pageblock_order && area->nr_free)
-				return COMPACT_PARTIAL;
-		}
 	}
 
 	return COMPACT_CONTINUE;
@@ -921,60 +916,6 @@ unsigned long compaction_suitable(struct zone *zone, int order)
 	return COMPACT_CONTINUE;
 }
 
-static void compact_capture_page(struct compact_control *cc)
-{
-	unsigned long flags;
-	int mtype, mtype_low, mtype_high;
-
-	if (!cc->page || *cc->page)
-		return;
-
-	/*
-	 * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
-	 * regardless of the migratetype of the freelist is is captured from.
-	 * This is fine because the order for a high-order MIGRATE_MOVABLE
-	 * allocation is typically at least a pageblock size and overall
-	 * fragmentation is not impaired. Other allocation types must
-	 * capture pages from their own migratelist because otherwise they
-	 * could pollute other pageblocks like MIGRATE_MOVABLE with
-	 * difficult to move pages and making fragmentation worse overall.
-	 */
-	if (cc->migratetype == MIGRATE_MOVABLE) {
-		mtype_low = 0;
-		mtype_high = MIGRATE_PCPTYPES;
-	} else {
-		mtype_low = cc->migratetype;
-		mtype_high = cc->migratetype + 1;
-	}
-
-	/* Speculatively examine the free lists without zone lock */
-	for (mtype = mtype_low; mtype < mtype_high; mtype++) {
-		int order;
-		for (order = cc->order; order < MAX_ORDER; order++) {
-			struct page *page;
-			struct free_area *area;
-			area = &(cc->zone->free_area[order]);
-			if (list_empty(&area->free_list[mtype]))
-				continue;
-
-			/* Take the lock and attempt capture of the page */
-			if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
-				return;
-			if (!list_empty(&area->free_list[mtype])) {
-				page = list_entry(area->free_list[mtype].next,
-							struct page, lru);
-				if (capture_free_page(page, cc->order, mtype)) {
-					spin_unlock_irqrestore(&cc->zone->lock,
-									flags);
-					*cc->page = page;
-					return;
-				}
-			}
-			spin_unlock_irqrestore(&cc->zone->lock, flags);
-		}
-	}
-}
-
 static int compact_zone(struct zone *zone, struct compact_control *cc)
 {
 	int ret;
@@ -1054,9 +995,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 				goto out;
 			}
 		}
-
-		/* Capture a page now if it is a suitable size */
-		compact_capture_page(cc);
 	}
 
 out:
@@ -1069,8 +1007,7 @@ out:
 
 static unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 bool sync, bool *contended,
-				 struct page **page)
+				 bool sync, bool *contended)
 {
 	unsigned long ret;
 	struct compact_control cc = {
@@ -1080,7 +1017,6 @@ static unsigned long compact_zone_order(struct zone *zone,
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.sync = sync,
-		.page = page,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -1110,7 +1046,7 @@ int sysctl_extfrag_threshold = 500;
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync, bool *contended, struct page **page)
+			bool sync, bool *contended)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
 	int may_enter_fs = gfp_mask & __GFP_FS;
@@ -1136,7 +1072,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 		int status;
 
 		status = compact_zone_order(zone, order, gfp_mask, sync,
-						contended, page);
+						contended);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
@@ -1192,7 +1128,6 @@ int compact_pgdat(pg_data_t *pgdat, int order)
 	struct compact_control cc = {
 		.order = order,
 		.sync = false,
-		.page = NULL,
 	};
 
 	return __compact_pgdat(pgdat, &cc);
@@ -1203,7 +1138,6 @@ static int compact_node(int nid)
 	struct compact_control cc = {
 		.order = -1,
 		.sync = true,
-		.page = NULL,
 	};
 
 	return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/internal.h b/mm/internal.h
index d597f94cc205..9ba21100ebf3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -135,7 +135,6 @@ struct compact_control {
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
 	bool contended;			/* True if a lock was contended */
-	struct page **page;		/* Page captured of requested size */
 };
 
 unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c957805a7f0e..df2022ff0c8a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1384,14 +1384,8 @@ void split_page(struct page *page, unsigned int order)
 		set_page_refcounted(page + i);
 }
 
-/*
- * Similar to the split_page family of functions except that the page
- * required at the given order and being isolated now to prevent races
- * with parallel allocators
- */
-int capture_free_page(struct page *page, int alloc_order, int migratetype)
+static int __isolate_free_page(struct page *page, unsigned int order)
 {
-	unsigned int order;
 	unsigned long watermark;
 	struct zone *zone;
 	int mt;
@@ -1399,7 +1393,6 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
 	BUG_ON(!PageBuddy(page));
 
 	zone = page_zone(page);
-	order = page_order(page);
 	mt = get_pageblock_migratetype(page);
 
 	if (mt != MIGRATE_ISOLATE) {
@@ -1408,7 +1401,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
 		if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
 			return 0;
 
-		__mod_zone_freepage_state(zone, -(1UL << alloc_order), mt);
+		__mod_zone_freepage_state(zone, -(1UL << order), mt);
 	}
 
 	/* Remove page from free list */
@@ -1416,11 +1409,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
 	zone->free_area[order].nr_free--;
 	rmv_page_order(page);
 
-	if (alloc_order != order)
-		expand(zone, page, alloc_order, order,
-			&zone->free_area[order], migratetype);
-
-	/* Set the pageblock if the captured page is at least a pageblock */
+	/* Set the pageblock if the isolated page is at least a pageblock */
 	if (order >= pageblock_order - 1) {
 		struct page *endpage = page + (1 << order) - 1;
 		for (; page < endpage; page += pageblock_nr_pages) {
@@ -1431,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
 		}
 	}
 
-	return 1UL << alloc_order;
+	return 1UL << order;
 }
 
 /*
@@ -1449,10 +1438,9 @@ int split_free_page(struct page *page)
 	unsigned int order;
 	int nr_pages;
 
-	BUG_ON(!PageBuddy(page));
 	order = page_order(page);
 
-	nr_pages = capture_free_page(page, order, 0);
+	nr_pages = __isolate_free_page(page, order);
 	if (!nr_pages)
 		return 0;
 
@@ -2136,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	bool *contended_compaction, bool *deferred_compaction,
 	unsigned long *did_some_progress)
 {
-	struct page *page = NULL;
-
 	if (!order)
 		return NULL;
 
@@ -2149,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	current->flags |= PF_MEMALLOC;
 	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
 						nodemask, sync_migration,
-						contended_compaction, &page);
+						contended_compaction);
 	current->flags &= ~PF_MEMALLOC;
 
-	/* If compaction captured a page, prep and use it */
-	if (page) {
-		prep_new_page(page, order, gfp_mask);
-		goto got_page;
-	}
-
 	if (*did_some_progress != COMPACT_SKIPPED) {
+		struct page *page;
+
 		/* Page migration frees to the PCP lists but we want merging */
 		drain_pages(get_cpu());
 		put_cpu();
@@ -2168,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 				alloc_flags & ~ALLOC_NO_WATERMARKS,
 				preferred_zone, migratetype);
 		if (page) {
-got_page:
 			preferred_zone->compact_blockskip_flush = false;
 			preferred_zone->compact_considered = 0;
 			preferred_zone->compact_defer_shift = 0;
-- 
cgit v1.2.3


From 3cb7a56344ca45ee56d71c5f8fe9f922306bff1f Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 11 Jan 2013 14:32:20 -0800
Subject: lib/rbtree.c: avoid the use of non-static __always_inline

lib/rbtree.c declared __rb_erase_color() as __always_inline void, and
then exported it with EXPORT_SYMBOL.

This was because __rb_erase_color() must be exported for augmented
rbtree users, but it must also be inlined into rb_erase() so that the
dummy callback can get optimized out of that call site.

(Actually with a modern compiler, none of the dummy callback functions
should even be generated as separate text functions).

The above usage is legal C, but it was unusual enough for some compilers
to warn about it.  This change makes things more explicit, with a static
__always_inline ____rb_erase_color function for use in rb_erase(), and a
separate non-inline __rb_erase_color function for use in
rb_erase_augmented call sites.

Signed-off-by: Michel Lespinasse <walken@google.com>
Reported-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 14 +++++++++++---
 lib/rbtree.c                     | 20 +++++++++++++++++---
 2 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 2ac60c9cf644..fea49b5da12a 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -123,9 +123,9 @@ __rb_change_child(struct rb_node *old, struct rb_node *new,
 extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
 
-static __always_inline void
-rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-		   const struct rb_augment_callbacks *augment)
+static __always_inline struct rb_node *
+__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		     const struct rb_augment_callbacks *augment)
 {
 	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
 	struct rb_node *parent, *rebalance;
@@ -217,6 +217,14 @@ rb_erase_augmented(struct rb_node *node, struct rb_root *root,
 	}
 
 	augment->propagate(tmp, NULL);
+	return rebalance;
+}
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		   const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
 	if (rebalance)
 		__rb_erase_color(rebalance, root, augment->rotate);
 }
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 4f56a11d67fa..c0e31fe2fabf 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -194,8 +194,12 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
 	}
 }
 
-__always_inline void
-__rb_erase_color(struct rb_node *parent, struct rb_root *root,
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
 {
 	struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
@@ -355,6 +359,13 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 		}
 	}
 }
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+	____rb_erase_color(parent, root, augment_rotate);
+}
 EXPORT_SYMBOL(__rb_erase_color);
 
 /*
@@ -380,7 +391,10 @@ EXPORT_SYMBOL(rb_insert_color);
 
 void rb_erase(struct rb_node *node, struct rb_root *root)
 {
-	rb_erase_augmented(node, root, &dummy_callbacks);
+	struct rb_node *rebalance;
+	rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+	if (rebalance)
+		____rb_erase_color(rebalance, root, dummy_rotate);
 }
 EXPORT_SYMBOL(rb_erase);
 
-- 
cgit v1.2.3


From d07d7507bfb4e23735c9b83e397c43e1e8a173e8 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Thu, 10 Jan 2013 23:19:10 +0000
Subject: net, wireless: overwrite default_ethtool_ops

Since:

commit 2c60db037034d27f8c636403355d52872da92f81
Author: Eric Dumazet <edumazet@google.com>
Date:   Sun Sep 16 09:17:26 2012 +0000

    net: provide a default dev->ethtool_ops

wireless core does not correctly assign ethtool_ops.

After alloc_netdev*() call, some cfg80211 drivers provide they own
ethtool_ops, but some do not. For them, wireless core provide generic
cfg80211_ethtool_ops, which is assigned in NETDEV_REGISTER notify call:

        if (!dev->ethtool_ops)
                dev->ethtool_ops = &cfg80211_ethtool_ops;

But after Eric's commit, dev->ethtool_ops is no longer NULL (on cfg80211
drivers without custom ethtool_ops), but points to &default_ethtool_ops.

In order to fix the problem, provide function which will overwrite
default_ethtool_ops and use it by wireless core.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/dev.c            | 8 ++++++++
 net/wireless/core.c       | 3 +--
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c599e4782d45..9ef07d0868b6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -60,6 +60,9 @@ struct wireless_dev;
 #define SET_ETHTOOL_OPS(netdev,ops) \
 	( (netdev)->ethtool_ops = (ops) )
 
+extern void netdev_set_default_ethtool_ops(struct net_device *dev,
+					   const struct ethtool_ops *ops);
+
 /* hardware address assignment types */
 #define NET_ADDR_PERM		0	/* address is permanent (default) */
 #define NET_ADDR_RANDOM		1	/* address is generated randomly */
diff --git a/net/core/dev.c b/net/core/dev.c
index 515473ee52cb..f64e439b4a00 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6121,6 +6121,14 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 
 static const struct ethtool_ops default_ethtool_ops;
 
+void netdev_set_default_ethtool_ops(struct net_device *dev,
+				    const struct ethtool_ops *ops)
+{
+	if (dev->ethtool_ops == &default_ethtool_ops)
+		dev->ethtool_ops = ops;
+}
+EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
+
 /**
  *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 14d990400354..b677eab55b68 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -866,8 +866,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		/* allow mac80211 to determine the timeout */
 		wdev->ps_timeout = -1;
 
-		if (!dev->ethtool_ops)
-			dev->ethtool_ops = &cfg80211_ethtool_ops;
+		netdev_set_default_ethtool_ops(dev, &cfg80211_ethtool_ops);
 
 		if ((wdev->iftype == NL80211_IFTYPE_STATION ||
 		     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
-- 
cgit v1.2.3


From 10ee27a06cc8eb57f83342a8eabcb75deb872d52 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Thu, 10 Jan 2013 13:47:57 +0800
Subject: vfs: re-implement writeback_inodes_sb(_nr)_if_idle() and rename them

writeback_inodes_sb(_nr)_if_idle() is re-implemented by replacing down_read()
with down_read_trylock() because

- If ->s_umount is write locked, then the sb is not idle. That is
  writeback_inodes_sb(_nr)_if_idle() needn't wait for the lock.

- writeback_inodes_sb(_nr)_if_idle() grabs s_umount lock when it want to start
  writeback, it may bring us deadlock problem when doing umount. In order to
  fix the problem, ext4 and btrfs implemented their own writeback functions
  instead of writeback_inodes_sb(_nr)_if_idle(), but it introduced the redundant
  code, it is better to implement a new writeback_inodes_sb(_nr)_if_idle().

The name of these two functions is cumbersome, so rename them to
try_to_writeback_inodes_sb(_nr).

This idea came from Christoph Hellwig.
Some code is from the patch of Kamal Mostafa.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---
 fs/btrfs/extent-tree.c    | 20 +++-----------------
 fs/ext4/inode.c           |  8 ++------
 fs/fs-writeback.c         | 44 ++++++++++++++++++++------------------------
 include/linux/writeback.h |  6 +++---
 4 files changed, 28 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 521e9d4424f6..f31abb14e06f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3689,20 +3689,6 @@ static int can_overcommit(struct btrfs_root *root,
 	return 0;
 }
 
-static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
-					       unsigned long nr_pages,
-					       enum wb_reason reason)
-{
-	if (!writeback_in_progress(sb->s_bdi) &&
-	    down_read_trylock(&sb->s_umount)) {
-		writeback_inodes_sb_nr(sb, nr_pages, reason);
-		up_read(&sb->s_umount);
-		return 1;
-	}
-
-	return 0;
-}
-
 /*
  * shrink metadata reservation for delalloc
  */
@@ -3735,9 +3721,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 	while (delalloc_bytes && loops < 3) {
 		max_reclaim = min(delalloc_bytes, to_reclaim);
 		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
-		writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
-						    nr_pages,
-						    WB_REASON_FS_FREE_SPACE);
+		try_to_writeback_inodes_sb_nr(root->fs_info->sb,
+					      nr_pages,
+					      WB_REASON_FS_FREE_SPACE);
 
 		/*
 		 * We need to wait for the async pages to actually start before
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..5f6eef71ff21 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb)
 	/*
 	 * Start pushing delalloc when 1/2 of free blocks are dirty.
 	 */
-	if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
-	    !writeback_in_progress(sb->s_bdi) &&
-	    down_read_trylock(&sb->s_umount)) {
-		writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
-		up_read(&sb->s_umount);
-	}
+	if (dirty_blocks && (free_blocks < 2 * dirty_blocks))
+		try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
 
 	if (2 * free_blocks < 3 * dirty_blocks ||
 		free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 310972b72a66..ad3cc46a743a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1332,47 +1332,43 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
 EXPORT_SYMBOL(writeback_inodes_sb);
 
 /**
- * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * try_to_writeback_inodes_sb_nr - try to start writeback if none underway
  * @sb: the superblock
- * @reason: reason why some writeback work was initiated
+ * @nr: the number of pages to write
+ * @reason: the reason of writeback
  *
- * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Invoke writeback_inodes_sb_nr if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
+int try_to_writeback_inodes_sb_nr(struct super_block *sb,
+				  unsigned long nr,
+				  enum wb_reason reason)
 {
-	if (!writeback_in_progress(sb->s_bdi)) {
-		down_read(&sb->s_umount);
-		writeback_inodes_sb(sb, reason);
-		up_read(&sb->s_umount);
+	if (writeback_in_progress(sb->s_bdi))
 		return 1;
-	} else
+
+	if (!down_read_trylock(&sb->s_umount))
 		return 0;
+
+	writeback_inodes_sb_nr(sb, nr, reason);
+	up_read(&sb->s_umount);
+	return 1;
 }
-EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr);
 
 /**
- * writeback_inodes_sb_nr_if_idle	-	start writeback if none underway
+ * try_to_writeback_inodes_sb - try to start writeback if none underway
  * @sb: the superblock
- * @nr: the number of pages to write
  * @reason: reason why some writeback work was initiated
  *
- * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Implement by try_to_writeback_inodes_sb_nr()
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
-				   unsigned long nr,
-				   enum wb_reason reason)
+int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
 {
-	if (!writeback_in_progress(sb->s_bdi)) {
-		down_read(&sb->s_umount);
-		writeback_inodes_sb_nr(sb, nr, reason);
-		up_read(&sb->s_umount);
-		return 1;
-	} else
-		return 0;
+	return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
 }
-EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+EXPORT_SYMBOL(try_to_writeback_inodes_sb);
 
 /**
  * sync_inodes_sb	-	sync sb inode pages
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b82a83aba311..9a9367c0c076 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -87,9 +87,9 @@ int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
 							enum wb_reason reason);
-int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason);
-int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
-							enum wb_reason reason);
+int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
+int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
+				  enum wb_reason reason);
 void sync_inodes_sb(struct super_block *);
 long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
 				enum wb_reason reason);
-- 
cgit v1.2.3


From 0d21b0e3477395e7ff2acc269f15df6e6a8d356d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 12 Jan 2013 11:38:44 +1030
Subject: module: add new state MODULE_STATE_UNFORMED.

You should never look at such a module, so it's excised from all paths
which traverse the modules list.

We add the state at the end, to avoid gratuitous ABI break (ksplice).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h      | 10 ++++----
 kernel/debug/kdb/kdb_main.c |  2 ++
 kernel/module.c             | 57 +++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 59 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 7760c6d344a3..1375ee3f03aa 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -199,11 +199,11 @@ struct module_use {
 	struct module *source, *target;
 };
 
-enum module_state
-{
-	MODULE_STATE_LIVE,
-	MODULE_STATE_COMING,
-	MODULE_STATE_GOING,
+enum module_state {
+	MODULE_STATE_LIVE,	/* Normal state. */
+	MODULE_STATE_COMING,	/* Full formed, running module_init. */
+	MODULE_STATE_GOING,	/* Going away. */
+	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
 /**
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 4d5f8d5612f3..8875254120b6 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv)
 
 	kdb_printf("Module                  Size  modstruct     Used by\n");
 	list_for_each_entry(mod, kdb_modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 
 		kdb_printf("%-20s%8u  0x%p ", mod->name,
 			   mod->core_size, (void *)mod);
diff --git a/kernel/module.c b/kernel/module.c
index 41bc1189b061..c3a2ee8e3679 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -188,6 +188,7 @@ struct load_info {
    ongoing or failed initialization etc. */
 static inline int strong_try_module_get(struct module *mod)
 {
+	BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED);
 	if (mod && mod->state == MODULE_STATE_COMING)
 		return -EBUSY;
 	if (try_module_get(mod))
@@ -343,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
 #endif
 		};
 
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
+
 		if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
 			return true;
 	}
@@ -450,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name,
 EXPORT_SYMBOL_GPL(find_symbol);
 
 /* Search for module by name: must hold module_mutex. */
-struct module *find_module(const char *name)
+static struct module *find_module_all(const char *name,
+				      bool even_unformed)
 {
 	struct module *mod;
 
 	list_for_each_entry(mod, &modules, list) {
+		if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (strcmp(mod->name, name) == 0)
 			return mod;
 	}
 	return NULL;
 }
+
+struct module *find_module(const char *name)
+{
+	return find_module_all(name, false);
+}
 EXPORT_SYMBOL_GPL(find_module);
 
 #ifdef CONFIG_SMP
@@ -525,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr)
 	preempt_disable();
 
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (!mod->percpu_size)
 			continue;
 		for_each_possible_cpu(cpu) {
@@ -1048,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr,
 	case MODULE_STATE_GOING:
 		state = "going";
 		break;
+	default:
+		BUG();
 	}
 	return sprintf(buffer, "%s\n", state);
 }
@@ -1786,6 +1802,8 @@ void set_all_modules_text_rw(void)
 
 	mutex_lock(&module_mutex);
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if ((mod->module_core) && (mod->core_text_size)) {
 			set_page_attributes(mod->module_core,
 						mod->module_core + mod->core_text_size,
@@ -1807,6 +1825,8 @@ void set_all_modules_text_ro(void)
 
 	mutex_lock(&module_mutex);
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if ((mod->module_core) && (mod->core_text_size)) {
 			set_page_attributes(mod->module_core,
 						mod->module_core + mod->core_text_size,
@@ -2998,7 +3018,8 @@ static bool finished_loading(const char *name)
 
 	mutex_lock(&module_mutex);
 	mod = find_module(name);
-	ret = !mod || mod->state != MODULE_STATE_COMING;
+	ret = !mod || mod->state == MODULE_STATE_LIVE
+		|| mod->state == MODULE_STATE_GOING;
 	mutex_unlock(&module_mutex);
 
 	return ret;
@@ -3361,6 +3382,8 @@ const char *module_address_lookup(unsigned long addr,
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (within_module_init(addr, mod) ||
 		    within_module_core(addr, mod)) {
 			if (modname)
@@ -3384,6 +3407,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (within_module_init(addr, mod) ||
 		    within_module_core(addr, mod)) {
 			const char *sym;
@@ -3408,6 +3433,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (within_module_init(addr, mod) ||
 		    within_module_core(addr, mod)) {
 			const char *sym;
@@ -3435,6 +3462,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (symnum < mod->num_symtab) {
 			*value = mod->symtab[symnum].st_value;
 			*type = mod->symtab[symnum].st_info;
@@ -3477,9 +3506,12 @@ unsigned long module_kallsyms_lookup_name(const char *name)
 			ret = mod_find_symname(mod, colon+1);
 		*colon = ':';
 	} else {
-		list_for_each_entry_rcu(mod, &modules, list)
+		list_for_each_entry_rcu(mod, &modules, list) {
+			if (mod->state == MODULE_STATE_UNFORMED)
+				continue;
 			if ((ret = mod_find_symname(mod, name)) != 0)
 				break;
+		}
 	}
 	preempt_enable();
 	return ret;
@@ -3494,6 +3526,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
 	int ret;
 
 	list_for_each_entry(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		for (i = 0; i < mod->num_symtab; i++) {
 			ret = fn(data, mod->strtab + mod->symtab[i].st_name,
 				 mod, mod->symtab[i].st_value);
@@ -3509,6 +3543,7 @@ static char *module_flags(struct module *mod, char *buf)
 {
 	int bx = 0;
 
+	BUG_ON(mod->state == MODULE_STATE_UNFORMED);
 	if (mod->taints ||
 	    mod->state == MODULE_STATE_GOING ||
 	    mod->state == MODULE_STATE_COMING) {
@@ -3550,6 +3585,10 @@ static int m_show(struct seq_file *m, void *p)
 	struct module *mod = list_entry(p, struct module, list);
 	char buf[8];
 
+	/* We always ignore unformed modules. */
+	if (mod->state == MODULE_STATE_UNFORMED)
+		return 0;
+
 	seq_printf(m, "%s %u",
 		   mod->name, mod->init_size + mod->core_size);
 	print_unload_info(m, mod);
@@ -3610,6 +3649,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
 
 	preempt_disable();
 	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (mod->num_exentries == 0)
 			continue;
 
@@ -3658,10 +3699,13 @@ struct module *__module_address(unsigned long addr)
 	if (addr < module_addr_min || addr > module_addr_max)
 		return NULL;
 
-	list_for_each_entry_rcu(mod, &modules, list)
+	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		if (within_module_core(addr, mod)
 		    || within_module_init(addr, mod))
 			return mod;
+	}
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(__module_address);
@@ -3714,8 +3758,11 @@ void print_modules(void)
 	printk(KERN_DEFAULT "Modules linked in:");
 	/* Most callers should already have preempt disabled, but make sure */
 	preempt_disable();
-	list_for_each_entry_rcu(mod, &modules, list)
+	list_for_each_entry_rcu(mod, &modules, list) {
+		if (mod->state == MODULE_STATE_UNFORMED)
+			continue;
 		printk(" %s%s", mod->name, module_flags(mod, buf));
+	}
 	preempt_enable();
 	if (last_unloaded_module[0])
 		printk(" [last unloaded: %s]", last_unloaded_module);
-- 
cgit v1.2.3


From 61cc13a51bcff737ce02d2047834171c0365b00d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 10 Jan 2013 10:52:56 +0200
Subject: dmaengine: introduce is_slave_direction function

This function helps to distinguish the slave type of transfer by checking the
direction parameter.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 4ca9cf73ad3f..bfcdecb5d87a 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -621,6 +621,11 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
 			(unsigned long)config);
 }
 
+static inline bool is_slave_direction(enum dma_transfer_direction direction)
+{
+	return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
 	struct dma_chan *chan, dma_addr_t buf, size_t len,
 	enum dma_transfer_direction dir, unsigned long flags)
-- 
cgit v1.2.3


From 520f7bd73354f003a9a59937b28e4903d985c420 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Thu, 27 Dec 2012 13:10:24 -0600
Subject: irqchip: Move ARM gic.h to include/linux/irqchip/arm-gic.h

Now that we have GIC moved to drivers/irqchip and all GIC DT init for
platforms using irqchip_init, move gic.h and update the remaining
includes.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Anton Vorontsov <avorontsov@mvista.com>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: David Brown <davidb@codeaurora.org>
Cc: Daniel Walker <dwalker@fifo99.com>
Cc: Bryan Huntsman <bryanh@codeaurora.org>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Magnus Damm <magnus.damm@gmail.com>
Cc: Viresh Kumar <viresh.linux@gmail.com>
Cc: Shiraz Hashim <shiraz.hashim@st.com>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Cc: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/include/asm/hardware/gic.h          | 48 ----------------------------
 arch/arm/mach-cns3xxx/core.c                 |  2 +-
 arch/arm/mach-exynos/common.c                |  2 +-
 arch/arm/mach-exynos/include/mach/regs-irq.h |  2 +-
 arch/arm/mach-exynos/platsmp.c               |  2 +-
 arch/arm/mach-highbank/platsmp.c             |  2 +-
 arch/arm/mach-imx/gpc.c                      |  2 +-
 arch/arm/mach-imx/platsmp.c                  |  2 +-
 arch/arm/mach-msm/platsmp.c                  |  2 +-
 arch/arm/mach-omap2/board-4430sdp.c          |  2 +-
 arch/arm/mach-omap2/board-omap4panda.c       |  2 +-
 arch/arm/mach-omap2/omap-smp.c               |  2 +-
 arch/arm/mach-omap2/omap-wakeupgen.c         |  3 +-
 arch/arm/mach-omap2/omap4-common.c           |  2 +-
 arch/arm/mach-realview/realview_eb.c         |  2 +-
 arch/arm/mach-realview/realview_pb1176.c     |  2 +-
 arch/arm/mach-realview/realview_pb11mp.c     |  2 +-
 arch/arm/mach-realview/realview_pba8.c       |  2 +-
 arch/arm/mach-realview/realview_pbx.c        |  2 +-
 arch/arm/mach-shmobile/board-ag5evm.c        |  2 +-
 arch/arm/mach-shmobile/board-kota2.c         |  2 +-
 arch/arm/mach-shmobile/board-kzm9g.c         |  2 +-
 arch/arm/mach-shmobile/intc-r8a7779.c        |  2 +-
 arch/arm/mach-shmobile/intc-sh73a0.c         |  2 +-
 arch/arm/mach-shmobile/setup-emev2.c         |  2 +-
 arch/arm/mach-shmobile/smp-emev2.c           |  2 +-
 arch/arm/mach-shmobile/smp-r8a7779.c         |  2 +-
 arch/arm/mach-shmobile/smp-sh73a0.c          |  2 +-
 arch/arm/mach-socfpga/platsmp.c              |  2 +-
 arch/arm/mach-spear13xx/platsmp.c            |  2 +-
 arch/arm/mach-tegra/irq.c                    |  3 +-
 arch/arm/mach-tegra/platsmp.c                |  2 +-
 arch/arm/mach-ux500/cpu.c                    |  2 +-
 arch/arm/mach-ux500/platsmp.c                |  2 +-
 arch/arm/mach-vexpress/ct-ca9x4.c            |  2 +-
 arch/arm/plat-versatile/platsmp.c            |  2 +-
 drivers/irqchip/irq-gic.c                    |  2 +-
 drivers/mfd/db8500-prcmu.c                   |  2 +-
 include/linux/irqchip/arm-gic.h              | 48 ++++++++++++++++++++++++++++
 39 files changed, 85 insertions(+), 87 deletions(-)
 delete mode 100644 arch/arm/include/asm/hardware/gic.h
 create mode 100644 include/linux/irqchip/arm-gic.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h
deleted file mode 100644
index 5c14398d72d2..000000000000
--- a/arch/arm/include/asm/hardware/gic.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *  arch/arm/include/asm/hardware/gic.h
- *
- *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __ASM_ARM_HARDWARE_GIC_H
-#define __ASM_ARM_HARDWARE_GIC_H
-
-#define GIC_CPU_CTRL			0x00
-#define GIC_CPU_PRIMASK			0x04
-#define GIC_CPU_BINPOINT		0x08
-#define GIC_CPU_INTACK			0x0c
-#define GIC_CPU_EOI			0x10
-#define GIC_CPU_RUNNINGPRI		0x14
-#define GIC_CPU_HIGHPRI			0x18
-
-#define GIC_DIST_CTRL			0x000
-#define GIC_DIST_CTR			0x004
-#define GIC_DIST_ENABLE_SET		0x100
-#define GIC_DIST_ENABLE_CLEAR		0x180
-#define GIC_DIST_PENDING_SET		0x200
-#define GIC_DIST_PENDING_CLEAR		0x280
-#define GIC_DIST_ACTIVE_BIT		0x300
-#define GIC_DIST_PRI			0x400
-#define GIC_DIST_TARGET			0x800
-#define GIC_DIST_CONFIG			0xc00
-#define GIC_DIST_SOFTINT		0xf00
-
-struct device_node;
-
-extern struct irq_chip gic_arch_extn;
-
-void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *,
-		    u32 offset, struct device_node *);
-void gic_secondary_init(unsigned int);
-void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
-
-static inline void gic_init(unsigned int nr, int start,
-			    void __iomem *dist , void __iomem *cpu)
-{
-	gic_init_bases(nr, start, dist, cpu, 0, NULL);
-}
-
-#endif
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 031805b1428d..db3806c00f4b 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -12,10 +12,10 @@
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 #include <asm/mach/irq.h>
-#include <asm/hardware/gic.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <mach/cns3xxx.h>
 #include "core.h"
diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c
index 3a3250fda93a..4ea80bc4ef9b 100644
--- a/arch/arm/mach-exynos/common.c
+++ b/arch/arm/mach-exynos/common.c
@@ -24,11 +24,11 @@
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
 #include <linux/of_address.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/proc-fns.h>
 #include <asm/exception.h>
 #include <asm/hardware/cache-l2x0.h>
-#include <asm/hardware/gic.h>
 #include <asm/mach/map.h>
 #include <asm/mach/irq.h>
 #include <asm/cacheflush.h>
diff --git a/arch/arm/mach-exynos/include/mach/regs-irq.h b/arch/arm/mach-exynos/include/mach/regs-irq.h
index 9c7b4bfd546f..f2b50506b9f6 100644
--- a/arch/arm/mach-exynos/include/mach/regs-irq.h
+++ b/arch/arm/mach-exynos/include/mach/regs-irq.h
@@ -13,7 +13,7 @@
 #ifndef __ASM_ARCH_REGS_IRQ_H
 #define __ASM_ARCH_REGS_IRQ_H __FILE__
 
-#include <asm/hardware/gic.h>
+#include <linux/irqchip/arm-gic.h>
 #include <mach/map.h>
 
 #endif /* __ASM_ARCH_REGS_IRQ_H */
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 5898f826b070..60f7c5be057d 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -20,9 +20,9 @@
 #include <linux/jiffies.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/cacheflush.h>
-#include <asm/hardware/gic.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
 
diff --git a/arch/arm/mach-highbank/platsmp.c b/arch/arm/mach-highbank/platsmp.c
index e8d3c5f0171f..8797a7001720 100644
--- a/arch/arm/mach-highbank/platsmp.c
+++ b/arch/arm/mach-highbank/platsmp.c
@@ -17,9 +17,9 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/smp_scu.h>
-#include <asm/hardware/gic.h>
 
 #include "core.h"
 
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index e1537f9e45b8..ff24920699e4 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -15,7 +15,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <asm/hardware/gic.h>
+#include <linux/irqchip/arm-gic.h>
 
 #define GPC_IMR1		0x008
 #define GPC_PGC_CPU_PDN		0x2a0
diff --git a/arch/arm/mach-imx/platsmp.c b/arch/arm/mach-imx/platsmp.c
index 8e720574fe9f..d35693117991 100644
--- a/arch/arm/mach-imx/platsmp.c
+++ b/arch/arm/mach-imx/platsmp.c
@@ -12,9 +12,9 @@
 
 #include <linux/init.h>
 #include <linux/smp.h>
+#include <linux/irqchip/arm-gic.h>
 #include <asm/page.h>
 #include <asm/smp_scu.h>
-#include <asm/hardware/gic.h>
 #include <asm/mach/map.h>
 
 #include "common.h"
diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c
index e27e57b12274..42932865416a 100644
--- a/arch/arm/mach-msm/platsmp.c
+++ b/arch/arm/mach-msm/platsmp.c
@@ -15,8 +15,8 @@
 #include <linux/jiffies.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 
-#include <asm/hardware/gic.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c
index 3de62a97c5d2..cb43253dfa86 100644
--- a/arch/arm/mach-omap2/board-4430sdp.c
+++ b/arch/arm/mach-omap2/board-4430sdp.c
@@ -26,10 +26,10 @@
 #include <linux/regulator/fixed.h>
 #include <linux/leds.h>
 #include <linux/leds_pwm.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/omap4-keypad.h>
 #include <linux/usb/musb.h>
 
-#include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c
index 4df66c907f28..37495bc9a59c 100644
--- a/arch/arm/mach-omap2/board-omap4panda.c
+++ b/arch/arm/mach-omap2/board-omap4panda.c
@@ -31,9 +31,9 @@
 #include <linux/ti_wilink_st.h>
 #include <linux/usb/musb.h>
 #include <linux/wl12xx.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/omap-abe-twl6040.h>
 
-#include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 668172a5f9ea..361677983af0 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -19,9 +19,9 @@
 #include <linux/device.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/cacheflush.h>
-#include <asm/hardware/gic.h>
 #include <asm/smp_scu.h>
 
 #include "omap-secure.h"
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index 5d3b4f4f81ae..8c5b5e3e3541 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -24,8 +24,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/cpu_pm.h>
-
-#include <asm/hardware/gic.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include "omap-wakeupgen.h"
 #include "omap-secure.h"
diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c
index addd16106d68..547094883606 100644
--- a/arch/arm/mach-omap2/omap4-common.c
+++ b/arch/arm/mach-omap2/omap4-common.c
@@ -21,8 +21,8 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/export.h>
+#include <linux/irqchip/arm-gic.h>
 
-#include <asm/hardware/gic.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/map.h>
 #include <asm/memblock.h>
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index 3a7375805898..728587c60770 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -27,13 +27,13 @@
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/clk-realview.h>
 
 #include <mach/hardware.h>
 #include <asm/irq.h>
 #include <asm/mach-types.h>
 #include <asm/pgtable.h>
-#include <asm/hardware/gic.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/smp_twd.h>
 
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index afa967bb153a..3d116c44c0d4 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -29,13 +29,13 @@
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/clk-realview.h>
 
 #include <mach/hardware.h>
 #include <asm/irq.h>
 #include <asm/mach-types.h>
 #include <asm/pgtable.h>
-#include <asm/hardware/gic.h>
 #include <asm/hardware/cache-l2x0.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index d8a25b4f1031..e1b8efdf35b6 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -27,13 +27,13 @@
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/clk-realview.h>
 
 #include <mach/hardware.h>
 #include <asm/irq.h>
 #include <asm/mach-types.h>
 #include <asm/pgtable.h>
-#include <asm/hardware/gic.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/smp_twd.h>
 
diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c
index 5391d0a91aa3..0b037f23b0f6 100644
--- a/arch/arm/mach-realview/realview_pba8.c
+++ b/arch/arm/mach-realview/realview_pba8.c
@@ -27,12 +27,12 @@
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/clk-realview.h>
 
 #include <asm/irq.h>
 #include <asm/mach-types.h>
 #include <asm/pgtable.h>
-#include <asm/hardware/gic.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index bfb834b24588..a1f1ef557c67 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -26,13 +26,13 @@
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/clk-realview.h>
 
 #include <asm/irq.h>
 #include <asm/mach-types.h>
 #include <asm/smp_twd.h>
 #include <asm/pgtable.h>
-#include <asm/hardware/gic.h>
 #include <asm/hardware/cache-l2x0.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
index 81b31f548c34..fd28358e2abe 100644
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -40,6 +40,7 @@
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <linux/sh_clk.h>
+#include <linux/irqchip/arm-gic.h>
 #include <video/sh_mobile_lcdc.h>
 #include <video/sh_mipi_dsi.h>
 #include <sound/sh_fsi.h>
@@ -49,7 +50,6 @@
 #include <mach/common.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
-#include <asm/hardware/gic.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/traps.h>
 
diff --git a/arch/arm/mach-shmobile/board-kota2.c b/arch/arm/mach-shmobile/board-kota2.c
index 2df8e600d0fb..a1d315fc329a 100644
--- a/arch/arm/mach-shmobile/board-kota2.c
+++ b/arch/arm/mach-shmobile/board-kota2.c
@@ -35,6 +35,7 @@
 #include <linux/input/sh_keysc.h>
 #include <linux/gpio_keys.h>
 #include <linux/leds.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/leds-renesas-tpu.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mmcif.h>
@@ -47,7 +48,6 @@
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
-#include <asm/hardware/gic.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/traps.h>
 
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index d1607586af7b..278c8f26f22e 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -25,6 +25,7 @@
 #include <linux/i2c.h>
 #include <linux/i2c/pcf857x.h>
 #include <linux/input.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mmcif.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
@@ -42,7 +43,6 @@
 #include <mach/sh73a0.h>
 #include <mach/common.h>
 #include <asm/hardware/cache-l2x0.h>
-#include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <video/sh_mobile_lcdc.h>
diff --git a/arch/arm/mach-shmobile/intc-r8a7779.c b/arch/arm/mach-shmobile/intc-r8a7779.c
index ef66f1a8aa2e..8807c27f71f9 100644
--- a/arch/arm/mach-shmobile/intc-r8a7779.c
+++ b/arch/arm/mach-shmobile/intc-r8a7779.c
@@ -22,10 +22,10 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 #include <mach/common.h>
 #include <mach/intc.h>
 #include <mach/r8a7779.h>
-#include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 
diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c
index f0c5e5190601..978369973be4 100644
--- a/arch/arm/mach-shmobile/intc-sh73a0.c
+++ b/arch/arm/mach-shmobile/intc-sh73a0.c
@@ -23,10 +23,10 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/sh_intc.h>
+#include <linux/irqchip/arm-gic.h>
 #include <mach/intc.h>
 #include <mach/irqs.h>
 #include <mach/sh73a0.h>
-#include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 
diff --git a/arch/arm/mach-shmobile/setup-emev2.c b/arch/arm/mach-shmobile/setup-emev2.c
index 50040344bf28..646d61b22153 100644
--- a/arch/arm/mach-shmobile/setup-emev2.c
+++ b/arch/arm/mach-shmobile/setup-emev2.c
@@ -27,6 +27,7 @@
 #include <linux/delay.h>
 #include <linux/input.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 #include <mach/hardware.h>
 #include <mach/common.h>
 #include <mach/emev2.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
-#include <asm/hardware/gic.h>
 
 static struct map_desc emev2_io_desc[] __initdata = {
 #ifdef CONFIG_SMP
diff --git a/arch/arm/mach-shmobile/smp-emev2.c b/arch/arm/mach-shmobile/smp-emev2.c
index 6262d6776f5d..953eb1f9388d 100644
--- a/arch/arm/mach-shmobile/smp-emev2.c
+++ b/arch/arm/mach-shmobile/smp-emev2.c
@@ -23,11 +23,11 @@
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/delay.h>
+#include <linux/irqchip/arm-gic.h>
 #include <mach/common.h>
 #include <mach/emev2.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
-#include <asm/hardware/gic.h>
 #include <asm/cacheflush.h>
 
 #define EMEV2_SCU_BASE 0x1e000000
diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c
index 2ce6af9a6a37..3a4acf23edcf 100644
--- a/arch/arm/mach-shmobile/smp-r8a7779.c
+++ b/arch/arm/mach-shmobile/smp-r8a7779.c
@@ -23,12 +23,12 @@
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/delay.h>
+#include <linux/irqchip/arm-gic.h>
 #include <mach/common.h>
 #include <mach/r8a7779.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
 #include <asm/smp_twd.h>
-#include <asm/hardware/gic.h>
 
 #define AVECR IOMEM(0xfe700040)
 
diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
index 624f00f70abf..5c5bcb595350 100644
--- a/arch/arm/mach-shmobile/smp-sh73a0.c
+++ b/arch/arm/mach-shmobile/smp-sh73a0.c
@@ -23,12 +23,12 @@
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/delay.h>
+#include <linux/irqchip/arm-gic.h>
 #include <mach/common.h>
 #include <asm/smp_plat.h>
 #include <mach/sh73a0.h>
 #include <asm/smp_scu.h>
 #include <asm/smp_twd.h>
-#include <asm/hardware/gic.h>
 
 #define WUPCR		IOMEM(0xe6151010)
 #define SRESCR		IOMEM(0xe6151018)
diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c
index 98a22200104f..4e9e69d9e7de 100644
--- a/arch/arm/mach-socfpga/platsmp.c
+++ b/arch/arm/mach-socfpga/platsmp.c
@@ -22,9 +22,9 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/cacheflush.h>
-#include <asm/hardware/gic.h>
 #include <asm/smp_scu.h>
 #include <asm/smp_plat.h>
 
diff --git a/arch/arm/mach-spear13xx/platsmp.c b/arch/arm/mach-spear13xx/platsmp.c
index 27e3f69c6274..af4ade61cd95 100644
--- a/arch/arm/mach-spear13xx/platsmp.c
+++ b/arch/arm/mach-spear13xx/platsmp.c
@@ -15,8 +15,8 @@
 #include <linux/jiffies.h>
 #include <linux/io.h>
 #include <linux/smp.h>
+#include <linux/irqchip/arm-gic.h>
 #include <asm/cacheflush.h>
-#include <asm/hardware/gic.h>
 #include <asm/smp_scu.h>
 #include <mach/spear.h>
 #include <mach/generic.h>
diff --git a/arch/arm/mach-tegra/irq.c b/arch/arm/mach-tegra/irq.c
index b7886f183511..2ff2128cb9d8 100644
--- a/arch/arm/mach-tegra/irq.c
+++ b/arch/arm/mach-tegra/irq.c
@@ -22,8 +22,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/of.h>
-
-#include <asm/hardware/gic.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include "board.h"
 #include "iomap.h"
diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index d8e6754e77a9..18d7290cf93b 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -18,9 +18,9 @@
 #include <linux/jiffies.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/cacheflush.h>
-#include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
 #include <asm/smp_scu.h>
 
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index a5931f736658..5dd90d31ffc3 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -18,9 +18,9 @@
 #include <linux/of_irq.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/clk-ux500.h>
 
-#include <asm/hardware/gic.h>
 #include <asm/mach/map.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index fa07d4de6d85..b8adac93421f 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -16,9 +16,9 @@
 #include <linux/device.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/cacheflush.h>
-#include <asm/hardware/gic.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 0ad050f5c652..6f34497a4245 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -10,10 +10,10 @@
 #include <linux/amba/clcd.h>
 #include <linux/clkdev.h>
 #include <linux/vexpress.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/hardware/arm_timer.h>
 #include <asm/hardware/cache-l2x0.h>
-#include <asm/hardware/gic.h>
 #include <asm/smp_scu.h>
 #include <asm/smp_twd.h>
 
diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c
index 2336024d1b76..f2ac15561778 100644
--- a/arch/arm/plat-versatile/platsmp.c
+++ b/arch/arm/plat-versatile/platsmp.c
@@ -14,10 +14,10 @@
 #include <linux/device.h>
 #include <linux/jiffies.h>
 #include <linux/smp.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/cacheflush.h>
 #include <asm/smp_plat.h>
-#include <asm/hardware/gic.h>
 
 /*
  * Write pen_release in a way that is guaranteed to be visible to all
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index dc511a4a0757..69d9a395d54c 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -38,12 +38,12 @@
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/irq.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
 #include <asm/mach/irq.h>
-#include <asm/hardware/gic.h>
 
 #include "irqchip.h"
 
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index dc8826d8d69d..13f4ccf2612d 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -26,13 +26,13 @@
 #include <linux/fs.h>
 #include <linux/platform_device.h>
 #include <linux/uaccess.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/dbx500-prcmu.h>
 #include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/db8500-prcmu.h>
 #include <linux/regulator/machine.h>
 #include <linux/cpufreq.h>
-#include <asm/hardware/gic.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/db8500-regs.h>
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
new file mode 100644
index 000000000000..a67ca55e6f4e
--- /dev/null
+++ b/include/linux/irqchip/arm-gic.h
@@ -0,0 +1,48 @@
+/*
+ *  include/linux/irqchip/arm-gic.h
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_GIC_H
+#define __LINUX_IRQCHIP_ARM_GIC_H
+
+#define GIC_CPU_CTRL			0x00
+#define GIC_CPU_PRIMASK			0x04
+#define GIC_CPU_BINPOINT		0x08
+#define GIC_CPU_INTACK			0x0c
+#define GIC_CPU_EOI			0x10
+#define GIC_CPU_RUNNINGPRI		0x14
+#define GIC_CPU_HIGHPRI			0x18
+
+#define GIC_DIST_CTRL			0x000
+#define GIC_DIST_CTR			0x004
+#define GIC_DIST_ENABLE_SET		0x100
+#define GIC_DIST_ENABLE_CLEAR		0x180
+#define GIC_DIST_PENDING_SET		0x200
+#define GIC_DIST_PENDING_CLEAR		0x280
+#define GIC_DIST_ACTIVE_BIT		0x300
+#define GIC_DIST_PRI			0x400
+#define GIC_DIST_TARGET			0x800
+#define GIC_DIST_CONFIG			0xc00
+#define GIC_DIST_SOFTINT		0xf00
+
+struct device_node;
+
+extern struct irq_chip gic_arch_extn;
+
+void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *,
+		    u32 offset, struct device_node *);
+void gic_secondary_init(unsigned int);
+void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
+
+static inline void gic_init(unsigned int nr, int start,
+			    void __iomem *dist , void __iomem *cpu)
+{
+	gic_init_bases(nr, start, dist, cpu, 0, NULL);
+}
+
+#endif
-- 
cgit v1.2.3


From 9e47b8bf9815523a5816f2f83e73b13812d74014 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 7 Jan 2013 09:45:59 -0600
Subject: irqchip: Move ARM vic.h to include/linux/irqchip/arm-vic.h

Now that we have VIC moved to drivers/irqchip and all VIC DT init for
platforms using irqchip_init, move gic.h and update the remaining
includes.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Ryan Mallon <rmallon@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Alessandro Rubini <rubini@unipv.it>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Cc: STEricsson <STEricsson_nomadik_linux@list.st.com>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: linux-samsung-soc@vger.kernel.org
---
 arch/arm/include/asm/hardware/vic.h           | 37 ---------------------------
 arch/arm/mach-ep93xx/core.c                   |  3 +--
 arch/arm/mach-netx/generic.c                  |  2 +-
 arch/arm/mach-nomadik/cpu-8815.c              |  2 +-
 arch/arm/mach-s3c64xx/common.c                |  2 +-
 arch/arm/mach-s3c64xx/include/mach/regs-irq.h |  1 -
 arch/arm/mach-s3c64xx/include/mach/tick.h     |  2 ++
 arch/arm/mach-s5pc100/include/mach/regs-irq.h |  1 -
 arch/arm/mach-s5pc100/include/mach/tick.h     |  2 ++
 arch/arm/mach-u300/core.c                     |  2 +-
 arch/arm/mach-versatile/core.c                |  2 +-
 arch/arm/plat-samsung/s5p-irq-eint.c          |  3 +--
 arch/arm/plat-samsung/s5p-irq.c               |  3 +--
 drivers/irqchip/irq-vic.c                     |  2 +-
 include/linux/irqchip/arm-vic.h               | 36 ++++++++++++++++++++++++++
 15 files changed, 49 insertions(+), 51 deletions(-)
 delete mode 100644 arch/arm/include/asm/hardware/vic.h
 create mode 100644 include/linux/irqchip/arm-vic.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hardware/vic.h b/arch/arm/include/asm/hardware/vic.h
deleted file mode 100644
index de7aad1e255c..000000000000
--- a/arch/arm/include/asm/hardware/vic.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- *  arch/arm/include/asm/hardware/vic.h
- *
- *  Copyright (c) ARM Limited 2003.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef __ASM_ARM_HARDWARE_VIC_H
-#define __ASM_ARM_HARDWARE_VIC_H
-
-#include <linux/types.h>
-
-#define VIC_RAW_STATUS			0x08
-#define VIC_INT_ENABLE			0x10	/* 1 = enable, 0 = disable */
-#define VIC_INT_ENABLE_CLEAR		0x14
-
-struct device_node;
-struct pt_regs;
-
-void __vic_init(void __iomem *base, int irq_start, u32 vic_sources,
-		u32 resume_sources, struct device_node *node);
-void vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources, u32 resume_sources);
-int vic_of_init(struct device_node *node, struct device_node *parent);
-
-#endif
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index e85bf17f2d2a..c533f37aad2f 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -34,6 +34,7 @@
 #include <linux/i2c-gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/export.h>
+#include <linux/irqchip/arm-vic.h>
 
 #include <mach/hardware.h>
 #include <linux/platform_data/video-ep93xx.h>
@@ -44,8 +45,6 @@
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 
-#include <asm/hardware/vic.h>
-
 #include "soc.h"
 
 /*************************************************************************
diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c
index aa627465d914..27c2cb7ab813 100644
--- a/arch/arm/mach-netx/generic.c
+++ b/arch/arm/mach-netx/generic.c
@@ -23,9 +23,9 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-vic.h>
 #include <mach/hardware.h>
 #include <asm/mach/map.h>
-#include <asm/hardware/vic.h>
 #include <mach/netx-regs.h>
 #include <asm/mach/irq.h>
 
diff --git a/arch/arm/mach-nomadik/cpu-8815.c b/arch/arm/mach-nomadik/cpu-8815.c
index 1273931303fb..351404673f6c 100644
--- a/arch/arm/mach-nomadik/cpu-8815.c
+++ b/arch/arm/mach-nomadik/cpu-8815.c
@@ -25,13 +25,13 @@
 #include <linux/slab.h>
 #include <linux/irq.h>
 #include <linux/dma-mapping.h>
+#include <linux/irqchip/arm-vic.h>
 #include <linux/platform_data/clk-nomadik.h>
 #include <linux/platform_data/pinctrl-nomadik.h>
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <asm/mach/map.h>
-#include <asm/hardware/vic.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c
index aef303b8997e..0b9c0ba44834 100644
--- a/arch/arm/mach-s3c64xx/common.c
+++ b/arch/arm/mach-s3c64xx/common.c
@@ -25,10 +25,10 @@
 #include <linux/dma-mapping.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
+#include <linux/irqchip/arm-vic.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/hardware/vic.h>
 #include <asm/system_misc.h>
 
 #include <mach/map.h>
diff --git a/arch/arm/mach-s3c64xx/include/mach/regs-irq.h b/arch/arm/mach-s3c64xx/include/mach/regs-irq.h
index bcce68a0bb75..6a1127891c87 100644
--- a/arch/arm/mach-s3c64xx/include/mach/regs-irq.h
+++ b/arch/arm/mach-s3c64xx/include/mach/regs-irq.h
@@ -15,6 +15,5 @@
 #ifndef __ASM_ARCH_REGS_IRQ_H
 #define __ASM_ARCH_REGS_IRQ_H __FILE__
 
-#include <asm/hardware/vic.h>
 
 #endif /* __ASM_ARCH_6400_REGS_IRQ_H */
diff --git a/arch/arm/mach-s3c64xx/include/mach/tick.h b/arch/arm/mach-s3c64xx/include/mach/tick.h
index ebe18a9469b8..db9c1b1d56a4 100644
--- a/arch/arm/mach-s3c64xx/include/mach/tick.h
+++ b/arch/arm/mach-s3c64xx/include/mach/tick.h
@@ -15,6 +15,8 @@
 #ifndef __ASM_ARCH_TICK_H
 #define __ASM_ARCH_TICK_H __FILE__
 
+#include <linux/irqchip/arm-vic.h>
+
 /* note, the timer interrutps turn up in 2 places, the vic and then
  * the timer block. We take the VIC as the base at the moment.
  */
diff --git a/arch/arm/mach-s5pc100/include/mach/regs-irq.h b/arch/arm/mach-s5pc100/include/mach/regs-irq.h
index 4d9036d0f288..761627897f30 100644
--- a/arch/arm/mach-s5pc100/include/mach/regs-irq.h
+++ b/arch/arm/mach-s5pc100/include/mach/regs-irq.h
@@ -14,6 +14,5 @@
 #define __ASM_ARCH_REGS_IRQ_H __FILE__
 
 #include <mach/map.h>
-#include <asm/hardware/vic.h>
 
 #endif /* __ASM_ARCH_REGS_IRQ_H */
diff --git a/arch/arm/mach-s5pc100/include/mach/tick.h b/arch/arm/mach-s5pc100/include/mach/tick.h
index 20f68730ed18..0af8e41230ed 100644
--- a/arch/arm/mach-s5pc100/include/mach/tick.h
+++ b/arch/arm/mach-s5pc100/include/mach/tick.h
@@ -15,6 +15,8 @@
 #ifndef __ASM_ARCH_TICK_H
 #define __ASM_ARCH_TICK_H __FILE__
 
+#include <linux/irqchip/arm-vic.h>
+
 /* note, the timer interrutps turn up in 2 places, the vic and then
  * the timer block. We take the VIC as the base at the moment.
  */
diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c
index a9d3bcfa5c7e..26b48fa9ea62 100644
--- a/arch/arm/mach-u300/core.c
+++ b/arch/arm/mach-u300/core.c
@@ -31,11 +31,11 @@
 #include <linux/dma-mapping.h>
 #include <linux/platform_data/clk-u300.h>
 #include <linux/platform_data/pinctrl-coh901.h>
+#include <linux/irqchip/arm-vic.h>
 
 #include <asm/types.h>
 #include <asm/setup.h>
 #include <asm/memory.h>
-#include <asm/hardware/vic.h>
 #include <asm/mach/map.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 5d5929450366..49ac384d2814 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -32,6 +32,7 @@
 #include <linux/amba/mmci.h>
 #include <linux/amba/pl022.h>
 #include <linux/io.h>
+#include <linux/irqchip/arm-vic.h>
 #include <linux/irqchip/versatile-fpga.h>
 #include <linux/gfp.h>
 #include <linux/clkdev.h>
@@ -40,7 +41,6 @@
 #include <asm/irq.h>
 #include <asm/hardware/arm_timer.h>
 #include <asm/hardware/icst.h>
-#include <asm/hardware/vic.h>
 #include <asm/mach-types.h>
 
 #include <asm/mach/arch.h>
diff --git a/arch/arm/plat-samsung/s5p-irq-eint.c b/arch/arm/plat-samsung/s5p-irq-eint.c
index 33bd3f3d20f5..faa651602780 100644
--- a/arch/arm/plat-samsung/s5p-irq-eint.c
+++ b/arch/arm/plat-samsung/s5p-irq-eint.c
@@ -15,8 +15,7 @@
 #include <linux/io.h>
 #include <linux/device.h>
 #include <linux/gpio.h>
-
-#include <asm/hardware/vic.h>
+#include <linux/irqchip/arm-vic.h>
 
 #include <plat/regs-irqtype.h>
 
diff --git a/arch/arm/plat-samsung/s5p-irq.c b/arch/arm/plat-samsung/s5p-irq.c
index dfb47d638f03..103e371f5e35 100644
--- a/arch/arm/plat-samsung/s5p-irq.c
+++ b/arch/arm/plat-samsung/s5p-irq.c
@@ -13,8 +13,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-
-#include <asm/hardware/vic.h>
+#include <linux/irqchip/arm-vic.h>
 
 #include <mach/map.h>
 #include <plat/regs-timer.h>
diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
index e9a8a7d2fec7..3cf97aaebe40 100644
--- a/drivers/irqchip/irq-vic.c
+++ b/drivers/irqchip/irq-vic.c
@@ -30,10 +30,10 @@
 #include <linux/syscore_ops.h>
 #include <linux/device.h>
 #include <linux/amba/bus.h>
+#include <linux/irqchip/arm-vic.h>
 
 #include <asm/exception.h>
 #include <asm/mach/irq.h>
-#include <asm/hardware/vic.h>
 
 #include "irqchip.h"
 
diff --git a/include/linux/irqchip/arm-vic.h b/include/linux/irqchip/arm-vic.h
new file mode 100644
index 000000000000..e3c82dc95756
--- /dev/null
+++ b/include/linux/irqchip/arm-vic.h
@@ -0,0 +1,36 @@
+/*
+ *  arch/arm/include/asm/hardware/vic.h
+ *
+ *  Copyright (c) ARM Limited 2003.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef __ASM_ARM_HARDWARE_VIC_H
+#define __ASM_ARM_HARDWARE_VIC_H
+
+#include <linux/types.h>
+
+#define VIC_RAW_STATUS			0x08
+#define VIC_INT_ENABLE			0x10	/* 1 = enable, 0 = disable */
+#define VIC_INT_ENABLE_CLEAR		0x14
+
+struct device_node;
+struct pt_regs;
+
+void __vic_init(void __iomem *base, int irq_start, u32 vic_sources,
+		u32 resume_sources, struct device_node *node);
+void vic_init(void __iomem *base, unsigned int irq_start, u32 vic_sources, u32 resume_sources);
+
+#endif
-- 
cgit v1.2.3


From 3a95b9fbba893ebfa9b83de105707539e0228e0c Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Sat, 24 Nov 2012 00:22:56 +0000
Subject: pl080.h: moved from arm/include/asm/hardware to include/linux/amba/

The header is used by drivers/dma/amba-pl08x.c, which can be compiled
under x86, where PL080 exists under a PCI-to-AMBA bridge. This patche
moves it where it can be accessed by other architectures, and fixes
all users.

Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Acked-by: Giancarlo Asnaghi <giancarlo.asnaghi@st.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/include/asm/hardware/pl080.h | 146 ----------------------------------
 arch/arm/mach-s3c64xx/dma.c           |   2 +-
 arch/arm/mach-spear3xx/spear3xx.c     |   1 -
 arch/arm/mach-spear6xx/spear6xx.c     |   2 +-
 drivers/dma/amba-pl08x.c              |   2 +-
 include/linux/amba/pl080.h            | 146 ++++++++++++++++++++++++++++++++++
 6 files changed, 149 insertions(+), 150 deletions(-)
 delete mode 100644 arch/arm/include/asm/hardware/pl080.h
 create mode 100644 include/linux/amba/pl080.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hardware/pl080.h b/arch/arm/include/asm/hardware/pl080.h
deleted file mode 100644
index 4eea2107214b..000000000000
--- a/arch/arm/include/asm/hardware/pl080.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/* arch/arm/include/asm/hardware/pl080.h
- *
- * Copyright 2008 Openmoko, Inc.
- * Copyright 2008 Simtec Electronics
- *      http://armlinux.simtec.co.uk/
- *      Ben Dooks <ben@simtec.co.uk>
- *
- * ARM PrimeCell PL080 DMA controller
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-/* Note, there are some Samsung updates to this controller block which
- * make it not entierly compatible with the PL080 specification from
- * ARM. When in doubt, check the Samsung documentation first.
- *
- * The Samsung defines are PL080S, and add an extra control register,
- * the ability to move more than 2^11 counts of data and some extra
- * OneNAND features.
-*/
-
-#ifndef ASM_PL080_H
-#define ASM_PL080_H
-
-#define PL080_INT_STATUS			(0x00)
-#define PL080_TC_STATUS				(0x04)
-#define PL080_TC_CLEAR				(0x08)
-#define PL080_ERR_STATUS			(0x0C)
-#define PL080_ERR_CLEAR				(0x10)
-#define PL080_RAW_TC_STATUS			(0x14)
-#define PL080_RAW_ERR_STATUS			(0x18)
-#define PL080_EN_CHAN				(0x1c)
-#define PL080_SOFT_BREQ				(0x20)
-#define PL080_SOFT_SREQ				(0x24)
-#define PL080_SOFT_LBREQ			(0x28)
-#define PL080_SOFT_LSREQ			(0x2C)
-
-#define PL080_CONFIG				(0x30)
-#define PL080_CONFIG_M2_BE			(1 << 2)
-#define PL080_CONFIG_M1_BE			(1 << 1)
-#define PL080_CONFIG_ENABLE			(1 << 0)
-
-#define PL080_SYNC				(0x34)
-
-/* Per channel configuration registers */
-
-#define PL080_Cx_STRIDE				(0x20)
-#define PL080_Cx_BASE(x)			((0x100 + (x * 0x20)))
-#define PL080_Cx_SRC_ADDR(x)			((0x100 + (x * 0x20)))
-#define PL080_Cx_DST_ADDR(x)			((0x104 + (x * 0x20)))
-#define PL080_Cx_LLI(x)				((0x108 + (x * 0x20)))
-#define PL080_Cx_CONTROL(x)			((0x10C + (x * 0x20)))
-#define PL080_Cx_CONFIG(x)			((0x110 + (x * 0x20)))
-#define PL080S_Cx_CONTROL2(x)			((0x110 + (x * 0x20)))
-#define PL080S_Cx_CONFIG(x)			((0x114 + (x * 0x20)))
-
-#define PL080_CH_SRC_ADDR			(0x00)
-#define PL080_CH_DST_ADDR			(0x04)
-#define PL080_CH_LLI				(0x08)
-#define PL080_CH_CONTROL			(0x0C)
-#define PL080_CH_CONFIG				(0x10)
-#define PL080S_CH_CONTROL2			(0x10)
-#define PL080S_CH_CONFIG			(0x14)
-
-#define PL080_LLI_ADDR_MASK			(0x3fffffff << 2)
-#define PL080_LLI_ADDR_SHIFT			(2)
-#define PL080_LLI_LM_AHB2			(1 << 0)
-
-#define PL080_CONTROL_TC_IRQ_EN			(1 << 31)
-#define PL080_CONTROL_PROT_MASK			(0x7 << 28)
-#define PL080_CONTROL_PROT_SHIFT		(28)
-#define PL080_CONTROL_PROT_CACHE		(1 << 30)
-#define PL080_CONTROL_PROT_BUFF			(1 << 29)
-#define PL080_CONTROL_PROT_SYS			(1 << 28)
-#define PL080_CONTROL_DST_INCR			(1 << 27)
-#define PL080_CONTROL_SRC_INCR			(1 << 26)
-#define PL080_CONTROL_DST_AHB2			(1 << 25)
-#define PL080_CONTROL_SRC_AHB2			(1 << 24)
-#define PL080_CONTROL_DWIDTH_MASK		(0x7 << 21)
-#define PL080_CONTROL_DWIDTH_SHIFT		(21)
-#define PL080_CONTROL_SWIDTH_MASK		(0x7 << 18)
-#define PL080_CONTROL_SWIDTH_SHIFT		(18)
-#define PL080_CONTROL_DB_SIZE_MASK		(0x7 << 15)
-#define PL080_CONTROL_DB_SIZE_SHIFT		(15)
-#define PL080_CONTROL_SB_SIZE_MASK		(0x7 << 12)
-#define PL080_CONTROL_SB_SIZE_SHIFT		(12)
-#define PL080_CONTROL_TRANSFER_SIZE_MASK	(0xfff << 0)
-#define PL080_CONTROL_TRANSFER_SIZE_SHIFT	(0)
-
-#define PL080_BSIZE_1				(0x0)
-#define PL080_BSIZE_4				(0x1)
-#define PL080_BSIZE_8				(0x2)
-#define PL080_BSIZE_16				(0x3)
-#define PL080_BSIZE_32				(0x4)
-#define PL080_BSIZE_64				(0x5)
-#define PL080_BSIZE_128				(0x6)
-#define PL080_BSIZE_256				(0x7)
-
-#define PL080_WIDTH_8BIT			(0x0)
-#define PL080_WIDTH_16BIT			(0x1)
-#define PL080_WIDTH_32BIT			(0x2)
-
-#define PL080N_CONFIG_ITPROT			(1 << 20)
-#define PL080N_CONFIG_SECPROT			(1 << 19)
-#define PL080_CONFIG_HALT			(1 << 18)
-#define PL080_CONFIG_ACTIVE			(1 << 17)  /* RO */
-#define PL080_CONFIG_LOCK			(1 << 16)
-#define PL080_CONFIG_TC_IRQ_MASK		(1 << 15)
-#define PL080_CONFIG_ERR_IRQ_MASK		(1 << 14)
-#define PL080_CONFIG_FLOW_CONTROL_MASK		(0x7 << 11)
-#define PL080_CONFIG_FLOW_CONTROL_SHIFT		(11)
-#define PL080_CONFIG_DST_SEL_MASK		(0xf << 6)
-#define PL080_CONFIG_DST_SEL_SHIFT		(6)
-#define PL080_CONFIG_SRC_SEL_MASK		(0xf << 1)
-#define PL080_CONFIG_SRC_SEL_SHIFT		(1)
-#define PL080_CONFIG_ENABLE			(1 << 0)
-
-#define PL080_FLOW_MEM2MEM			(0x0)
-#define PL080_FLOW_MEM2PER			(0x1)
-#define PL080_FLOW_PER2MEM			(0x2)
-#define PL080_FLOW_SRC2DST			(0x3)
-#define PL080_FLOW_SRC2DST_DST			(0x4)
-#define PL080_FLOW_MEM2PER_PER			(0x5)
-#define PL080_FLOW_PER2MEM_PER			(0x6)
-#define PL080_FLOW_SRC2DST_SRC			(0x7)
-
-/* DMA linked list chain structure */
-
-struct pl080_lli {
-	u32	src_addr;
-	u32	dst_addr;
-	u32	next_lli;
-	u32	control0;
-};
-
-struct pl080s_lli {
-	u32	src_addr;
-	u32	dst_addr;
-	u32	next_lli;
-	u32	control0;
-	u32	control1;
-};
-
-#endif /* ASM_PL080_H */
diff --git a/arch/arm/mach-s3c64xx/dma.c b/arch/arm/mach-s3c64xx/dma.c
index f2a7a1725596..a77f5214bbe8 100644
--- a/arch/arm/mach-s3c64xx/dma.c
+++ b/arch/arm/mach-s3c64xx/dma.c
@@ -23,6 +23,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/amba/pl080.h>
 
 #include <mach/dma.h>
 #include <mach/map.h>
@@ -30,7 +31,6 @@
 
 #include <mach/regs-sys.h>
 
-#include <asm/hardware/pl080.h>
 
 /* dma channel state information */
 
diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c
index 38fe95db31a7..3d9b1b5e8ed9 100644
--- a/arch/arm/mach-spear3xx/spear3xx.c
+++ b/arch/arm/mach-spear3xx/spear3xx.c
@@ -18,7 +18,6 @@
 #include <linux/irqchip/spear-shirq.h>
 #include <linux/of_irq.h>
 #include <linux/io.h>
-#include <asm/hardware/pl080.h>
 #include <asm/hardware/vic.h>
 #include <plat/pl080.h>
 #include <mach/generic.h>
diff --git a/arch/arm/mach-spear6xx/spear6xx.c b/arch/arm/mach-spear6xx/spear6xx.c
index 5a5a52db252b..8ce65a23b06e 100644
--- a/arch/arm/mach-spear6xx/spear6xx.c
+++ b/arch/arm/mach-spear6xx/spear6xx.c
@@ -20,7 +20,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
-#include <asm/hardware/pl080.h>
+#include <linux/amba/pl080.h>
 #include <asm/hardware/vic.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 6eb6a5c210bb..8bad254a498d 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -83,7 +83,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <asm/hardware/pl080.h>
+#include <linux/amba/pl080.h>
 
 #include "dmaengine.h"
 #include "virt-dma.h"
diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
new file mode 100644
index 000000000000..3e7b62fbefbd
--- /dev/null
+++ b/include/linux/amba/pl080.h
@@ -0,0 +1,146 @@
+/* include/linux/amba/pl080.h
+ *
+ * Copyright 2008 Openmoko, Inc.
+ * Copyright 2008 Simtec Electronics
+ *      http://armlinux.simtec.co.uk/
+ *      Ben Dooks <ben@simtec.co.uk>
+ *
+ * ARM PrimeCell PL080 DMA controller
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+/* Note, there are some Samsung updates to this controller block which
+ * make it not entierly compatible with the PL080 specification from
+ * ARM. When in doubt, check the Samsung documentation first.
+ *
+ * The Samsung defines are PL080S, and add an extra control register,
+ * the ability to move more than 2^11 counts of data and some extra
+ * OneNAND features.
+*/
+
+#ifndef ASM_PL080_H
+#define ASM_PL080_H
+
+#define PL080_INT_STATUS			(0x00)
+#define PL080_TC_STATUS				(0x04)
+#define PL080_TC_CLEAR				(0x08)
+#define PL080_ERR_STATUS			(0x0C)
+#define PL080_ERR_CLEAR				(0x10)
+#define PL080_RAW_TC_STATUS			(0x14)
+#define PL080_RAW_ERR_STATUS			(0x18)
+#define PL080_EN_CHAN				(0x1c)
+#define PL080_SOFT_BREQ				(0x20)
+#define PL080_SOFT_SREQ				(0x24)
+#define PL080_SOFT_LBREQ			(0x28)
+#define PL080_SOFT_LSREQ			(0x2C)
+
+#define PL080_CONFIG				(0x30)
+#define PL080_CONFIG_M2_BE			(1 << 2)
+#define PL080_CONFIG_M1_BE			(1 << 1)
+#define PL080_CONFIG_ENABLE			(1 << 0)
+
+#define PL080_SYNC				(0x34)
+
+/* Per channel configuration registers */
+
+#define PL080_Cx_STRIDE				(0x20)
+#define PL080_Cx_BASE(x)			((0x100 + (x * 0x20)))
+#define PL080_Cx_SRC_ADDR(x)			((0x100 + (x * 0x20)))
+#define PL080_Cx_DST_ADDR(x)			((0x104 + (x * 0x20)))
+#define PL080_Cx_LLI(x)				((0x108 + (x * 0x20)))
+#define PL080_Cx_CONTROL(x)			((0x10C + (x * 0x20)))
+#define PL080_Cx_CONFIG(x)			((0x110 + (x * 0x20)))
+#define PL080S_Cx_CONTROL2(x)			((0x110 + (x * 0x20)))
+#define PL080S_Cx_CONFIG(x)			((0x114 + (x * 0x20)))
+
+#define PL080_CH_SRC_ADDR			(0x00)
+#define PL080_CH_DST_ADDR			(0x04)
+#define PL080_CH_LLI				(0x08)
+#define PL080_CH_CONTROL			(0x0C)
+#define PL080_CH_CONFIG				(0x10)
+#define PL080S_CH_CONTROL2			(0x10)
+#define PL080S_CH_CONFIG			(0x14)
+
+#define PL080_LLI_ADDR_MASK			(0x3fffffff << 2)
+#define PL080_LLI_ADDR_SHIFT			(2)
+#define PL080_LLI_LM_AHB2			(1 << 0)
+
+#define PL080_CONTROL_TC_IRQ_EN			(1 << 31)
+#define PL080_CONTROL_PROT_MASK			(0x7 << 28)
+#define PL080_CONTROL_PROT_SHIFT		(28)
+#define PL080_CONTROL_PROT_CACHE		(1 << 30)
+#define PL080_CONTROL_PROT_BUFF			(1 << 29)
+#define PL080_CONTROL_PROT_SYS			(1 << 28)
+#define PL080_CONTROL_DST_INCR			(1 << 27)
+#define PL080_CONTROL_SRC_INCR			(1 << 26)
+#define PL080_CONTROL_DST_AHB2			(1 << 25)
+#define PL080_CONTROL_SRC_AHB2			(1 << 24)
+#define PL080_CONTROL_DWIDTH_MASK		(0x7 << 21)
+#define PL080_CONTROL_DWIDTH_SHIFT		(21)
+#define PL080_CONTROL_SWIDTH_MASK		(0x7 << 18)
+#define PL080_CONTROL_SWIDTH_SHIFT		(18)
+#define PL080_CONTROL_DB_SIZE_MASK		(0x7 << 15)
+#define PL080_CONTROL_DB_SIZE_SHIFT		(15)
+#define PL080_CONTROL_SB_SIZE_MASK		(0x7 << 12)
+#define PL080_CONTROL_SB_SIZE_SHIFT		(12)
+#define PL080_CONTROL_TRANSFER_SIZE_MASK	(0xfff << 0)
+#define PL080_CONTROL_TRANSFER_SIZE_SHIFT	(0)
+
+#define PL080_BSIZE_1				(0x0)
+#define PL080_BSIZE_4				(0x1)
+#define PL080_BSIZE_8				(0x2)
+#define PL080_BSIZE_16				(0x3)
+#define PL080_BSIZE_32				(0x4)
+#define PL080_BSIZE_64				(0x5)
+#define PL080_BSIZE_128				(0x6)
+#define PL080_BSIZE_256				(0x7)
+
+#define PL080_WIDTH_8BIT			(0x0)
+#define PL080_WIDTH_16BIT			(0x1)
+#define PL080_WIDTH_32BIT			(0x2)
+
+#define PL080N_CONFIG_ITPROT			(1 << 20)
+#define PL080N_CONFIG_SECPROT			(1 << 19)
+#define PL080_CONFIG_HALT			(1 << 18)
+#define PL080_CONFIG_ACTIVE			(1 << 17)  /* RO */
+#define PL080_CONFIG_LOCK			(1 << 16)
+#define PL080_CONFIG_TC_IRQ_MASK		(1 << 15)
+#define PL080_CONFIG_ERR_IRQ_MASK		(1 << 14)
+#define PL080_CONFIG_FLOW_CONTROL_MASK		(0x7 << 11)
+#define PL080_CONFIG_FLOW_CONTROL_SHIFT		(11)
+#define PL080_CONFIG_DST_SEL_MASK		(0xf << 6)
+#define PL080_CONFIG_DST_SEL_SHIFT		(6)
+#define PL080_CONFIG_SRC_SEL_MASK		(0xf << 1)
+#define PL080_CONFIG_SRC_SEL_SHIFT		(1)
+#define PL080_CONFIG_ENABLE			(1 << 0)
+
+#define PL080_FLOW_MEM2MEM			(0x0)
+#define PL080_FLOW_MEM2PER			(0x1)
+#define PL080_FLOW_PER2MEM			(0x2)
+#define PL080_FLOW_SRC2DST			(0x3)
+#define PL080_FLOW_SRC2DST_DST			(0x4)
+#define PL080_FLOW_MEM2PER_PER			(0x5)
+#define PL080_FLOW_PER2MEM_PER			(0x6)
+#define PL080_FLOW_SRC2DST_SRC			(0x7)
+
+/* DMA linked list chain structure */
+
+struct pl080_lli {
+	u32	src_addr;
+	u32	dst_addr;
+	u32	next_lli;
+	u32	control0;
+};
+
+struct pl080s_lli {
+	u32	src_addr;
+	u32	dst_addr;
+	u32	next_lli;
+	u32	control0;
+	u32	control1;
+};
+
+#endif /* ASM_PL080_H */
-- 
cgit v1.2.3


From 6c0cc950ae670403a362bdcbf3cde0df33744928 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Jan 2013 22:33:37 +0100
Subject: ACPI / PCI: Set root bridge ACPI handle in advance

The ACPI handles of PCI root bridges need to be known to
acpi_bind_one(), so that it can create the appropriate
"firmware_node" and "physical_node" files for them, but currently
the way it gets to know those handles is not exactly straightforward
(to put it lightly).

This is how it works, roughly:

  1. acpi_bus_scan() finds the handle of a PCI root bridge,
     creates a struct acpi_device object for it and passes that
     object to acpi_pci_root_add().

  2. acpi_pci_root_add() creates a struct acpi_pci_root object,
     populates its "device" field with its argument's address
     (device->handle is the ACPI handle found in step 1).

  3. The struct acpi_pci_root object created in step 2 is passed
     to pci_acpi_scan_root() and used to get resources that are
     passed to pci_create_root_bus().

  4. pci_create_root_bus() creates a struct pci_host_bridge object
     and passes its "dev" member to device_register().

  5. platform_notify(), which for systems with ACPI is set to
     acpi_platform_notify(), is called.

So far, so good.  Now it starts to be "interesting".

  6. acpi_find_bridge_device() is used to find the ACPI handle of
     the given device (which is the PCI root bridge) and executes
     acpi_pci_find_root_bridge(), among other things, for the
     given device object.

  7. acpi_pci_find_root_bridge() uses the name (sic!) of the given
     device object to extract the segment and bus numbers of the PCI
     root bridge and passes them to acpi_get_pci_rootbridge_handle().

  8. acpi_get_pci_rootbridge_handle() browses the list of ACPI PCI
     root bridges and finds the one that matches the given segment
     and bus numbers.  Its handle is then used to initialize the
     ACPI handle of the PCI root bridge's device object by
     acpi_bind_one().  However, this is *exactly* the ACPI handle we
     started with in step 1.

Needless to say, this is quite embarassing, but it may be avoided
thanks to commit f3fd0c8 (ACPI: Allow ACPI handles of devices to be
initialized in advance), which makes it possible to initialize the
ACPI handle of a device before passing it to device_register().

Accordingly, add a new __weak routine, pcibios_root_bridge_prepare(),
defaulting to an empty implementation that can be replaced by the
interested architecutres (x86 and ia64 at the moment) with functions
that will set the root bridge's ACPI handle before its dev member is
passed to device_register().  Make both x86 and ia64 provide such
implementations of pcibios_root_bridge_prepare() and remove
acpi_pci_find_root_bridge() and acpi_get_pci_rootbridge_handle() that
aren't necessary any more.

Included is a fix for breakage on systems with non-ACPI PCI host
bridges from Bjorn Helgaas.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/ia64/pci/pci.c        |  8 ++++++++
 arch/x86/include/asm/pci.h |  3 +++
 arch/x86/pci/acpi.c        |  9 +++++++++
 drivers/acpi/pci_root.c    | 18 ------------------
 drivers/pci/pci-acpi.c     | 19 -------------------
 drivers/pci/probe.c        | 16 ++++++++++++++++
 include/acpi/acpi_bus.h    |  1 -
 include/linux/pci.h        |  2 ++
 8 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 5faa66c5c2a8..00e59c7ad3c0 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -396,6 +396,14 @@ out1:
 	return NULL;
 }
 
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	struct pci_controller *controller = bridge->bus->sysdata;
+
+	ACPI_HANDLE_SET(&bridge->dev, controller->acpi_handle);
+	return 0;
+}
+
 static int __devinit is_valid_resource(struct pci_dev *dev, int idx)
 {
 	unsigned int i, type_mask = IORESOURCE_IO | IORESOURCE_MEM;
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index dba7805176bf..9f437e97e9e8 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -14,6 +14,9 @@
 struct pci_sysdata {
 	int		domain;		/* PCI domain */
 	int		node;		/* NUMA node */
+#ifdef CONFIG_ACPI
+	void		*acpi;		/* ACPI-specific data */
+#endif
 #ifdef CONFIG_X86_64
 	void		*iommu;		/* IOMMU private data */
 #endif
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 0c01261fe5a8..3d49094ed3e8 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -522,6 +522,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
 	sd = &info->sd;
 	sd->domain = domain;
 	sd->node = node;
+	sd->acpi = device->handle;
 	/*
 	 * Maybe the desired pci bus has been already scanned. In such case
 	 * it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -593,6 +594,14 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
 	return bus;
 }
 
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	struct pci_sysdata *sd = bridge->bus->sysdata;
+
+	ACPI_HANDLE_SET(&bridge->dev, sd->acpi);
+	return 0;
+}
+
 int __init pci_acpi_init(void)
 {
 	struct pci_dev *dev = NULL;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 471b2dcb1c67..bf5108ad4d63 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -107,24 +107,6 @@ void acpi_pci_unregister_driver(struct acpi_pci_driver *driver)
 }
 EXPORT_SYMBOL(acpi_pci_unregister_driver);
 
-acpi_handle acpi_get_pci_rootbridge_handle(unsigned int seg, unsigned int bus)
-{
-	struct acpi_pci_root *root;
-	acpi_handle handle = NULL;
-	
-	mutex_lock(&acpi_pci_root_lock);
-	list_for_each_entry(root, &acpi_pci_roots, node)
-		if ((root->segment == (u16) seg) &&
-		    (root->secondary.start == (u16) bus)) {
-			handle = root->device->handle;
-			break;
-		}
-	mutex_unlock(&acpi_pci_root_lock);
-	return handle;
-}
-
-EXPORT_SYMBOL_GPL(acpi_get_pci_rootbridge_handle);
-
 /**
  * acpi_is_root_bridge - determine whether an ACPI CA node is a PCI root bridge
  * @handle - the ACPI CA node in question.
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 42736e213f25..1c2587c40299 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -302,24 +302,6 @@ static int acpi_pci_find_device(struct device *dev, acpi_handle *handle)
 	return 0;
 }
 
-static int acpi_pci_find_root_bridge(struct device *dev, acpi_handle *handle)
-{
-	int num;
-	unsigned int seg, bus;
-
-	/*
-	 * The string should be the same as root bridge's name
-	 * Please look at 'pci_scan_bus_parented'
-	 */
-	num = sscanf(dev_name(dev), "pci%04x:%02x", &seg, &bus);
-	if (num != 2)
-		return -ENODEV;
-	*handle = acpi_get_pci_rootbridge_handle(seg, bus);
-	if (!*handle)
-		return -ENODEV;
-	return 0;
-}
-
 static void pci_acpi_setup(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
@@ -378,7 +360,6 @@ static void pci_acpi_cleanup(struct device *dev)
 static struct acpi_bus_type acpi_pci_bus = {
 	.bus = &pci_bus_type,
 	.find_device = acpi_pci_find_device,
-	.find_bridge = acpi_pci_find_root_bridge,
 	.setup = pci_acpi_setup,
 	.cleanup = pci_acpi_cleanup,
 };
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2dcd22d9c816..bbe4be7fc685 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1632,6 +1632,18 @@ unsigned int pci_scan_child_bus(struct pci_bus *bus)
 	return max;
 }
 
+/**
+ * pcibios_root_bridge_prepare - Platform-specific host bridge setup.
+ * @bridge: Host bridge to set up.
+ *
+ * Default empty implementation.  Replace with an architecture-specific setup
+ * routine, if necessary.
+ */
+int __weak pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+	return 0;
+}
+
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 		struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
@@ -1665,6 +1677,10 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 	bridge->dev.parent = parent;
 	bridge->dev.release = pci_release_bus_bridge_dev;
 	dev_set_name(&bridge->dev, "pci%04x:%02x", pci_domain_nr(b), bus);
+	error = pcibios_root_bridge_prepare(bridge);
+	if (error)
+		goto bridge_dev_reg_err;
+
 	error = device_register(&bridge->dev);
 	if (error)
 		goto bridge_dev_reg_err;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index a9e1421cd007..796ccc3247df 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -402,7 +402,6 @@ struct acpi_pci_root {
 /* helper */
 acpi_handle acpi_get_child(acpi_handle, u64);
 int acpi_is_root_bridge(acpi_handle);
-acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 907b455ab603..6860f4dec997 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -378,6 +378,8 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 		     void (*release_fn)(struct pci_host_bridge *),
 		     void *release_data);
 
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge);
+
 /*
  * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond
  * to P2P or CardBus bridge windows) go in a table.  Additional ones (for
-- 
cgit v1.2.3


From e7219858ac1f98213a4714d0e24e7a003e1bf6a2 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 13 Jan 2013 05:02:01 +0000
Subject: ipv6: Use ipv6_get_dsfield() instead of ipv6_tclass().

Commit 7a3198a8 ("ipv6: helper function to get tclass") introduced
ipv6_tclass(), but similar function is already available as
ipv6_get_dsfield().

We might be able to call ipv6_tclass() from ipv6_get_dsfield(),
but it is confusing to have two versions.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 5 -----
 net/ipv6/datagram.c  | 3 ++-
 net/ipv6/tcp_ipv6.c  | 6 +++---
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index faed1e357dd6..304a9f46b578 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -77,11 +77,6 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 	return (struct ipv6hdr *)skb_transport_header(skb);
 }
 
-static inline __u8 ipv6_tclass(const struct ipv6hdr *iph)
-{
-	return (ntohl(*(__be32 *)iph) >> 20) & 0xff;
-}
-
 /* 
    This structure contains results of exthdrs parsing
    as offsets from skb->nh.
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 7f65df41c396..33be36398a78 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -30,6 +30,7 @@
 #include <net/transp_v6.h>
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
+#include <net/dsfield.h>
 
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
@@ -487,7 +488,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 	}
 
 	if (np->rxopt.bits.rxtclass) {
-		int tclass = ipv6_tclass(ipv6_hdr(skb));
+		int tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3164ad272a74..3701c3c6e2eb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1163,7 +1163,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
 		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
-		newnp->rcv_tclass  = ipv6_tclass(ipv6_hdr(skb));
+		newnp->rcv_tclass  = ipv6_get_dsfield(ipv6_hdr(skb));
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1243,7 +1243,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
-	newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+	newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 
 	/* Clone native IPv6 options from listening socket (if any)
 
@@ -1456,7 +1456,7 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
 		if (np->rxopt.bits.rxtclass)
-			np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
+			np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
-- 
cgit v1.2.3


From dd3332bfcb2223458f553f341d3388cb84040e6a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 13 Jan 2013 05:02:45 +0000
Subject: ipv6: Store Router Alert option in IP6CB directly.

Router Alert option is very small and we can store the value
itself in the skb.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      | 3 ++-
 include/uapi/linux/ipv6.h | 2 ++
 net/ipv6/exthdrs.c        | 3 ++-
 net/ipv6/ip6_input.c      | 5 ++---
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 304a9f46b578..e971e3742172 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -84,7 +84,7 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 
 struct inet6_skb_parm {
 	int			iif;
-	__u16			ra;
+	__be16			ra;
 	__u16			hop;
 	__u16			dst0;
 	__u16			srcrt;
@@ -100,6 +100,7 @@ struct inet6_skb_parm {
 #define IP6SKB_XFRM_TRANSFORMED	1
 #define IP6SKB_FORWARDED	2
 #define IP6SKB_REROUTED		4
+#define IP6SKB_ROUTERALERT	8
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 5a2991cf0251..4bda4cf5b0f5 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -63,6 +63,8 @@ struct ipv6_opt_hdr {
 #define ipv6_destopt_hdr ipv6_opt_hdr
 #define ipv6_hopopt_hdr  ipv6_opt_hdr
 
+/* Router Alert option values (RFC2711) */
+#define IPV6_OPT_ROUTERALERT_MLD	0x0000	/* MLD(RFC2710) */
 
 /*
  *	routing header type 0 (used in cmsghdr struct)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 473f628f9f20..07a7d65a7cb6 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -553,7 +553,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
 	const unsigned char *nh = skb_network_header(skb);
 
 	if (nh[optoff + 1] == 2) {
-		IP6CB(skb)->ra = optoff;
+		IP6CB(skb)->flags |= IP6SKB_ROUTERALERT;
+		memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra));
 		return true;
 	}
 	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2ccd35ec3628..4ac5bf30e16a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -280,9 +280,8 @@ int ip6_mc_input(struct sk_buff *skb)
 		struct inet6_skb_parm *opt = IP6CB(skb);
 
 		/* Check for MLD */
-		if (unlikely(opt->ra)) {
+		if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 			/* Check if this is a mld message */
-			u8 *ptr = skb_network_header(skb) + opt->ra;
 			u8 nexthdr = hdr->nexthdr;
 			__be16 frag_off;
 			int offset;
@@ -290,7 +289,7 @@ int ip6_mc_input(struct sk_buff *skb)
 			/* Check if the value of Router Alert
 			 * is for MLD (0x0000).
 			 */
-			if ((ptr[2] | ptr[3]) == 0) {
+			if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) {
 				deliver = false;
 
 				if (!ipv6_ext_hdr(nexthdr)) {
-- 
cgit v1.2.3


From 25d46f43a911b08c5aa8c8fd4fe7fa9b36445068 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 13 Jan 2013 16:02:06 +0000
Subject: ipv6: Move comment to right place.

IN6ADDR_* and in6addr_* are not exported to userspace, and are defined
in include/linux/in6.h.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h      | 4 ++++
 include/uapi/linux/in6.h | 5 -----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 9e2ae26fb598..a16e19349ec0 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -22,6 +22,10 @@
 
 #include <uapi/linux/in6.h>
 
+/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553
+ * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
+ * in network byte order, not in host byte order as are the IPv4 equivalents
+ */
 extern const struct in6_addr in6addr_any;
 #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
 extern const struct in6_addr in6addr_loopback;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index f79c3721da6e..5673b97dcf54 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -38,11 +38,6 @@ struct in6_addr {
 #define s6_addr32		in6_u.u6_addr32
 };
 
-/* IPv6 Wildcard Address (::) and Loopback Address (::1) defined in RFC2553
- * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
- * in network byte order, not in host byte order as are the IPv4 equivalents
- */
-
 struct sockaddr_in6 {
 	unsigned short int	sin6_family;    /* AF_INET6 */
 	__be16			sin6_port;      /* Transport layer port # */
-- 
cgit v1.2.3


From ff7ec345f0ece9ddbb28538b70ba0c7f0acc17dc Mon Sep 17 00:00:00 2001
From: Tony Prisk <linux@prisktech.co.nz>
Date: Mon, 14 Jan 2013 17:58:21 +1300
Subject: timer: vt8500: Move timer code to drivers/clocksource

This patch moves arch-vt8500/timer.c into drivers/clocksource and
updates the necessary Kconfig/Makefile options.

Signed-off-by: Tony Prisk <linux@prisktech.co.nz>
---
 arch/arm/mach-vt8500/Kconfig       |   1 +
 arch/arm/mach-vt8500/Makefile      |   2 +-
 arch/arm/mach-vt8500/common.h      |   1 -
 arch/arm/mach-vt8500/timer.c       | 184 -------------------------------------
 arch/arm/mach-vt8500/vt8500.c      |   1 +
 drivers/clocksource/Kconfig        |   3 +
 drivers/clocksource/Makefile       |   1 +
 drivers/clocksource/vt8500_timer.c | 184 +++++++++++++++++++++++++++++++++++++
 include/linux/vt8500_timer.h       |  22 +++++
 9 files changed, 213 insertions(+), 186 deletions(-)
 delete mode 100644 arch/arm/mach-vt8500/timer.c
 create mode 100644 drivers/clocksource/vt8500_timer.c
 create mode 100644 include/linux/vt8500_timer.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-vt8500/Kconfig b/arch/arm/mach-vt8500/Kconfig
index 2ed0b7d95db6..570a801fb862 100644
--- a/arch/arm/mach-vt8500/Kconfig
+++ b/arch/arm/mach-vt8500/Kconfig
@@ -8,5 +8,6 @@ config ARCH_VT8500
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_GPIO
 	select HAVE_CLK
+	select VT8500_TIMER
 	help
 	  Support for VIA/WonderMedia VT8500/WM85xx System-on-Chip.
diff --git a/arch/arm/mach-vt8500/Makefile b/arch/arm/mach-vt8500/Makefile
index e035251cda48..92ceb2436b60 100644
--- a/arch/arm/mach-vt8500/Makefile
+++ b/arch/arm/mach-vt8500/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_ARCH_VT8500) += irq.o timer.o vt8500.o
+obj-$(CONFIG_ARCH_VT8500) += irq.o vt8500.o
diff --git a/arch/arm/mach-vt8500/common.h b/arch/arm/mach-vt8500/common.h
index 6f2b843115db..77611a6968d6 100644
--- a/arch/arm/mach-vt8500/common.h
+++ b/arch/arm/mach-vt8500/common.h
@@ -18,7 +18,6 @@
 
 #include <linux/of.h>
 
-void __init vt8500_timer_init(void);
 int __init vt8500_irq_init(struct device_node *node,
 				struct device_node *parent);
 
diff --git a/arch/arm/mach-vt8500/timer.c b/arch/arm/mach-vt8500/timer.c
deleted file mode 100644
index 3dd21a47881f..000000000000
--- a/arch/arm/mach-vt8500/timer.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- *  arch/arm/mach-vt8500/timer.c
- *
- *  Copyright (C) 2012 Tony Prisk <linux@prisktech.co.nz>
- *  Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-/*
- * This file is copied and modified from the original timer.c provided by
- * Alexey Charkov. Minor changes have been made for Device Tree Support.
- */
-
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/delay.h>
-#include <asm/mach/time.h>
-
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#define VT8500_TIMER_OFFSET	0x0100
-#define VT8500_TIMER_HZ		3000000
-#define TIMER_MATCH_VAL		0x0000
-#define TIMER_COUNT_VAL		0x0010
-#define TIMER_STATUS_VAL	0x0014
-#define TIMER_IER_VAL		0x001c		/* interrupt enable */
-#define TIMER_CTRL_VAL		0x0020
-#define TIMER_AS_VAL		0x0024		/* access status */
-#define TIMER_COUNT_R_ACTIVE	(1 << 5)	/* not ready for read */
-#define TIMER_COUNT_W_ACTIVE	(1 << 4)	/* not ready for write */
-#define TIMER_MATCH_W_ACTIVE	(1 << 0)	/* not ready for write */
-
-#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
-
-static void __iomem *regbase;
-
-static cycle_t vt8500_timer_read(struct clocksource *cs)
-{
-	int loops = msecs_to_loops(10);
-	writel(3, regbase + TIMER_CTRL_VAL);
-	while ((readl((regbase + TIMER_AS_VAL)) & TIMER_COUNT_R_ACTIVE)
-						&& --loops)
-		cpu_relax();
-	return readl(regbase + TIMER_COUNT_VAL);
-}
-
-static struct clocksource clocksource = {
-	.name           = "vt8500_timer",
-	.rating         = 200,
-	.read           = vt8500_timer_read,
-	.mask           = CLOCKSOURCE_MASK(32),
-	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static int vt8500_timer_set_next_event(unsigned long cycles,
-				    struct clock_event_device *evt)
-{
-	int loops = msecs_to_loops(10);
-	cycle_t alarm = clocksource.read(&clocksource) + cycles;
-	while ((readl(regbase + TIMER_AS_VAL) & TIMER_MATCH_W_ACTIVE)
-						&& --loops)
-		cpu_relax();
-	writel((unsigned long)alarm, regbase + TIMER_MATCH_VAL);
-
-	if ((signed)(alarm - clocksource.read(&clocksource)) <= 16)
-		return -ETIME;
-
-	writel(1, regbase + TIMER_IER_VAL);
-
-	return 0;
-}
-
-static void vt8500_timer_set_mode(enum clock_event_mode mode,
-			      struct clock_event_device *evt)
-{
-	switch (mode) {
-	case CLOCK_EVT_MODE_RESUME:
-	case CLOCK_EVT_MODE_PERIODIC:
-		break;
-	case CLOCK_EVT_MODE_ONESHOT:
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		writel(readl(regbase + TIMER_CTRL_VAL) | 1,
-			regbase + TIMER_CTRL_VAL);
-		writel(0, regbase + TIMER_IER_VAL);
-		break;
-	}
-}
-
-static struct clock_event_device clockevent = {
-	.name           = "vt8500_timer",
-	.features       = CLOCK_EVT_FEAT_ONESHOT,
-	.rating         = 200,
-	.set_next_event = vt8500_timer_set_next_event,
-	.set_mode       = vt8500_timer_set_mode,
-};
-
-static irqreturn_t vt8500_timer_interrupt(int irq, void *dev_id)
-{
-	struct clock_event_device *evt = dev_id;
-	writel(0xf, regbase + TIMER_STATUS_VAL);
-	evt->event_handler(evt);
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq = {
-	.name    = "vt8500_timer",
-	.flags   = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
-	.handler = vt8500_timer_interrupt,
-	.dev_id  = &clockevent,
-};
-
-static struct of_device_id vt8500_timer_ids[] = {
-	{ .compatible = "via,vt8500-timer" },
-	{ }
-};
-
-void __init vt8500_timer_init(void)
-{
-	struct device_node *np;
-	int timer_irq;
-
-	np = of_find_matching_node(NULL, vt8500_timer_ids);
-	if (!np) {
-		pr_err("%s: Timer description missing from Device Tree\n",
-								__func__);
-		return;
-	}
-	regbase = of_iomap(np, 0);
-	if (!regbase) {
-		pr_err("%s: Missing iobase description in Device Tree\n",
-								__func__);
-		of_node_put(np);
-		return;
-	}
-	timer_irq = irq_of_parse_and_map(np, 0);
-	if (!timer_irq) {
-		pr_err("%s: Missing irq description in Device Tree\n",
-								__func__);
-		of_node_put(np);
-		return;
-	}
-
-	writel(1, regbase + TIMER_CTRL_VAL);
-	writel(0xf, regbase + TIMER_STATUS_VAL);
-	writel(~0, regbase + TIMER_MATCH_VAL);
-
-	if (clocksource_register_hz(&clocksource, VT8500_TIMER_HZ))
-		pr_err("%s: vt8500_timer_init: clocksource_register failed for %s\n",
-					__func__, clocksource.name);
-
-	clockevents_calc_mult_shift(&clockevent, VT8500_TIMER_HZ, 4);
-
-	/* copy-pasted from mach-msm; no idea */
-	clockevent.max_delta_ns =
-		clockevent_delta2ns(0xf0000000, &clockevent);
-	clockevent.min_delta_ns = clockevent_delta2ns(4, &clockevent);
-	clockevent.cpumask = cpumask_of(0);
-
-	if (setup_irq(timer_irq, &irq))
-		pr_err("%s: setup_irq failed for %s\n", __func__,
-							clockevent.name);
-	clockevents_register_device(&clockevent);
-}
-
diff --git a/arch/arm/mach-vt8500/vt8500.c b/arch/arm/mach-vt8500/vt8500.c
index d5b9c6689c9d..b9fd9d3cbfb3 100644
--- a/arch/arm/mach-vt8500/vt8500.c
+++ b/arch/arm/mach-vt8500/vt8500.c
@@ -20,6 +20,7 @@
 
 #include <linux/io.h>
 #include <linux/pm.h>
+#include <linux/vt8500_timer.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index a32b7a9c65d3..7d978c1bd528 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -28,6 +28,9 @@ config ARMADA_370_XP_TIMER
 config SUNXI_TIMER
 	bool
 
+config VT8500_TIMER
+	bool
+
 config CLKSRC_NOMADIK_MTU
 	bool
 	depends on (ARCH_NOMADIK || ARCH_U8500)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index a33f79240217..440449c1ca21 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -17,5 +17,6 @@ obj-$(CONFIG_CLKSRC_DBX500_PRCMU)	+= clksrc-dbx500-prcmu.o
 obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= time-armada-370-xp.o
 obj-$(CONFIG_ARCH_BCM2835)	+= bcm2835_timer.o
 obj-$(CONFIG_SUNXI_TIMER)	+= sunxi_timer.o
+obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
 
 obj-$(CONFIG_CLKSRC_ARM_GENERIC)	+= arm_generic.o
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
new file mode 100644
index 000000000000..3dd21a47881f
--- /dev/null
+++ b/drivers/clocksource/vt8500_timer.c
@@ -0,0 +1,184 @@
+/*
+ *  arch/arm/mach-vt8500/timer.c
+ *
+ *  Copyright (C) 2012 Tony Prisk <linux@prisktech.co.nz>
+ *  Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*
+ * This file is copied and modified from the original timer.c provided by
+ * Alexey Charkov. Minor changes have been made for Device Tree Support.
+ */
+
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/delay.h>
+#include <asm/mach/time.h>
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define VT8500_TIMER_OFFSET	0x0100
+#define VT8500_TIMER_HZ		3000000
+#define TIMER_MATCH_VAL		0x0000
+#define TIMER_COUNT_VAL		0x0010
+#define TIMER_STATUS_VAL	0x0014
+#define TIMER_IER_VAL		0x001c		/* interrupt enable */
+#define TIMER_CTRL_VAL		0x0020
+#define TIMER_AS_VAL		0x0024		/* access status */
+#define TIMER_COUNT_R_ACTIVE	(1 << 5)	/* not ready for read */
+#define TIMER_COUNT_W_ACTIVE	(1 << 4)	/* not ready for write */
+#define TIMER_MATCH_W_ACTIVE	(1 << 0)	/* not ready for write */
+
+#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
+
+static void __iomem *regbase;
+
+static cycle_t vt8500_timer_read(struct clocksource *cs)
+{
+	int loops = msecs_to_loops(10);
+	writel(3, regbase + TIMER_CTRL_VAL);
+	while ((readl((regbase + TIMER_AS_VAL)) & TIMER_COUNT_R_ACTIVE)
+						&& --loops)
+		cpu_relax();
+	return readl(regbase + TIMER_COUNT_VAL);
+}
+
+static struct clocksource clocksource = {
+	.name           = "vt8500_timer",
+	.rating         = 200,
+	.read           = vt8500_timer_read,
+	.mask           = CLOCKSOURCE_MASK(32),
+	.flags          = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int vt8500_timer_set_next_event(unsigned long cycles,
+				    struct clock_event_device *evt)
+{
+	int loops = msecs_to_loops(10);
+	cycle_t alarm = clocksource.read(&clocksource) + cycles;
+	while ((readl(regbase + TIMER_AS_VAL) & TIMER_MATCH_W_ACTIVE)
+						&& --loops)
+		cpu_relax();
+	writel((unsigned long)alarm, regbase + TIMER_MATCH_VAL);
+
+	if ((signed)(alarm - clocksource.read(&clocksource)) <= 16)
+		return -ETIME;
+
+	writel(1, regbase + TIMER_IER_VAL);
+
+	return 0;
+}
+
+static void vt8500_timer_set_mode(enum clock_event_mode mode,
+			      struct clock_event_device *evt)
+{
+	switch (mode) {
+	case CLOCK_EVT_MODE_RESUME:
+	case CLOCK_EVT_MODE_PERIODIC:
+		break;
+	case CLOCK_EVT_MODE_ONESHOT:
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		writel(readl(regbase + TIMER_CTRL_VAL) | 1,
+			regbase + TIMER_CTRL_VAL);
+		writel(0, regbase + TIMER_IER_VAL);
+		break;
+	}
+}
+
+static struct clock_event_device clockevent = {
+	.name           = "vt8500_timer",
+	.features       = CLOCK_EVT_FEAT_ONESHOT,
+	.rating         = 200,
+	.set_next_event = vt8500_timer_set_next_event,
+	.set_mode       = vt8500_timer_set_mode,
+};
+
+static irqreturn_t vt8500_timer_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = dev_id;
+	writel(0xf, regbase + TIMER_STATUS_VAL);
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq = {
+	.name    = "vt8500_timer",
+	.flags   = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL,
+	.handler = vt8500_timer_interrupt,
+	.dev_id  = &clockevent,
+};
+
+static struct of_device_id vt8500_timer_ids[] = {
+	{ .compatible = "via,vt8500-timer" },
+	{ }
+};
+
+void __init vt8500_timer_init(void)
+{
+	struct device_node *np;
+	int timer_irq;
+
+	np = of_find_matching_node(NULL, vt8500_timer_ids);
+	if (!np) {
+		pr_err("%s: Timer description missing from Device Tree\n",
+								__func__);
+		return;
+	}
+	regbase = of_iomap(np, 0);
+	if (!regbase) {
+		pr_err("%s: Missing iobase description in Device Tree\n",
+								__func__);
+		of_node_put(np);
+		return;
+	}
+	timer_irq = irq_of_parse_and_map(np, 0);
+	if (!timer_irq) {
+		pr_err("%s: Missing irq description in Device Tree\n",
+								__func__);
+		of_node_put(np);
+		return;
+	}
+
+	writel(1, regbase + TIMER_CTRL_VAL);
+	writel(0xf, regbase + TIMER_STATUS_VAL);
+	writel(~0, regbase + TIMER_MATCH_VAL);
+
+	if (clocksource_register_hz(&clocksource, VT8500_TIMER_HZ))
+		pr_err("%s: vt8500_timer_init: clocksource_register failed for %s\n",
+					__func__, clocksource.name);
+
+	clockevents_calc_mult_shift(&clockevent, VT8500_TIMER_HZ, 4);
+
+	/* copy-pasted from mach-msm; no idea */
+	clockevent.max_delta_ns =
+		clockevent_delta2ns(0xf0000000, &clockevent);
+	clockevent.min_delta_ns = clockevent_delta2ns(4, &clockevent);
+	clockevent.cpumask = cpumask_of(0);
+
+	if (setup_irq(timer_irq, &irq))
+		pr_err("%s: setup_irq failed for %s\n", __func__,
+							clockevent.name);
+	clockevents_register_device(&clockevent);
+}
+
diff --git a/include/linux/vt8500_timer.h b/include/linux/vt8500_timer.h
new file mode 100644
index 000000000000..33b0ee87d083
--- /dev/null
+++ b/include/linux/vt8500_timer.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2012 Tony Prisk <linux@prisktech.co.nz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __VT8500_TIMER_H
+#define __VT8500_TIMER_H
+
+#include <asm/mach/time.h>
+
+void vt8500_timer_init(void);
+
+#endif
-- 
cgit v1.2.3


From 47db92f4a63499b1605b4c66f9347ba5479e7b19 Mon Sep 17 00:00:00 2001
From: Gerald Baeza <gerald.baeza@stericsson.com>
Date: Fri, 21 Sep 2012 21:21:37 +0200
Subject: dmaengine: ste_dma40: physical channels number correction

DMAC_ICFG[0:2]=SCHNB only allows to count 'multiple of 4' physical
channels so it was ok with platforms having 8 channels but cannot be
used for next versions (with 10 or 14 channels).  This patch allows to
provide the number of physical channels for a DMA device via
platform_data, or still rely on SCHNB if platform_data announces 0
channel.

Signed-off-by: Gerald Baeza <gerald.baeza@stericsson.com>
Reviewed-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 drivers/dma/ste_dma40.c                     | 15 ++++++++++-----
 include/linux/platform_data/dma-ste-dma40.h |  4 ++++
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 5feab7db9449..ca18117def0a 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -3004,14 +3004,21 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	 * ? has revision 1
 	 * DB8500v1 has revision 2
 	 * DB8500v2 has revision 3
+	 * AP9540v1 has revision 4
+	 * DB8540v1 has revision 4
 	 */
 	rev = AMBA_REV_BITS(pid);
 
+	plat_data = pdev->dev.platform_data;
+
 	/* The number of physical channels on this HW */
-	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
+	if (plat_data->num_of_phy_chans)
+		num_phy_chans = plat_data->num_of_phy_chans;
+	else
+		num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
 
-	dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n",
-		 rev, res->start);
+	dev_info(&pdev->dev, "hardware revision: %d @ 0x%x with %d physical channels\n",
+		 rev, res->start, num_phy_chans);
 
 	if (rev < 2) {
 		d40_err(&pdev->dev, "hardware revision: %d is not supported",
@@ -3019,8 +3026,6 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 		goto failure;
 	}
 
-	plat_data = pdev->dev.platform_data;
-
 	/* Count the number of logical channels in use */
 	for (i = 0; i < plat_data->dev_len; i++)
 		if (plat_data->dev_rx[i] != 0)
diff --git a/include/linux/platform_data/dma-ste-dma40.h b/include/linux/platform_data/dma-ste-dma40.h
index 9ff93b065686..833cb959f3df 100644
--- a/include/linux/platform_data/dma-ste-dma40.h
+++ b/include/linux/platform_data/dma-ste-dma40.h
@@ -147,6 +147,9 @@ struct stedma40_chan_cfg {
  * @memcpy_conf_log: default configuration of logical channel memcpy
  * @disabled_channels: A vector, ending with -1, that marks physical channels
  * that are for different reasons not available for the driver.
+ * @num_of_phy_chans: The number of physical channels implemented in HW.
+ * 0 means reading the number of channels from DMA HW but this is only valid
+ * for 'multiple of 4' channels, like 8.
  */
 struct stedma40_platform_data {
 	u32				 dev_len;
@@ -158,6 +161,7 @@ struct stedma40_platform_data {
 	struct stedma40_chan_cfg	*memcpy_conf_log;
 	int				 disabled_channels[STEDMA40_MAX_PHYS];
 	bool				 use_esram_lcla;
+	int				 num_of_phy_chans;
 };
 
 #ifdef CONFIG_STE_DMA40
-- 
cgit v1.2.3


From 762eb33fdebed34d98943d85ee1425663d7cceaa Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@linaro.org>
Date: Thu, 13 Dec 2012 11:38:39 +0100
Subject: dmaengine: ste_dma40: add missing kernel-doc entry

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 include/linux/platform_data/dma-ste-dma40.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dma-ste-dma40.h b/include/linux/platform_data/dma-ste-dma40.h
index 833cb959f3df..b99024ba94ec 100644
--- a/include/linux/platform_data/dma-ste-dma40.h
+++ b/include/linux/platform_data/dma-ste-dma40.h
@@ -147,6 +147,7 @@ struct stedma40_chan_cfg {
  * @memcpy_conf_log: default configuration of logical channel memcpy
  * @disabled_channels: A vector, ending with -1, that marks physical channels
  * that are for different reasons not available for the driver.
+ * @use_esram_lcla: flag for mapping the lcla into esram region
  * @num_of_phy_chans: The number of physical channels implemented in HW.
  * 0 means reading the number of channels from DMA HW but this is only valid
  * for 'multiple of 4' channels, like 8.
-- 
cgit v1.2.3


From 7407048bec896268b50e3c43c1d012a4764dc210 Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@linaro.org>
Date: Tue, 18 Dec 2012 12:25:14 +0100
Subject: dmaengine: ste_dma40: add software lli support

This patch add support to manage LLI by SW for select phy channels.

There is a HW issue in certain controllers due to which on certain
occassions HW LLI cannot be used on some physical channels.  To avoid
the HW issue on a specific phy channel, the phy channel number can be
added to the list of soft_lli_channels and there after all the transfers
on that channel will use software LLI, for peripheral to memory
transfers.

SoftLLI introduces relink overhead, that could impact performace for
certain use cases.

This is based on a previous patch of Narayanan Gopalakrishnan.

Cc: Shreshtha Kumar Sahu <shreshthakumar.sahu@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 drivers/dma/ste_dma40.c                     | 20 +++++++++++++++++++-
 include/linux/platform_data/dma-ste-dma40.h |  8 ++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index e317debdbe5a..2ecefb7113b9 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -355,6 +355,7 @@ struct d40_lcla_pool {
  * @allocated_dst: Same as for src but is dst.
  * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as
  * event line number.
+ * @use_soft_lli: To mark if the linked lists of channel are managed by SW.
  */
 struct d40_phy_res {
 	spinlock_t lock;
@@ -362,6 +363,7 @@ struct d40_phy_res {
 	int	   num;
 	u32	   allocated_src;
 	u32	   allocated_dst;
+	bool	   use_soft_lli;
 };
 
 struct d40_base;
@@ -783,7 +785,16 @@ static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
 	 * can't link back to the one in LCPA space
 	 */
 	if (linkback || (lli_len - lli_current > 1)) {
-		curr_lcla = d40_lcla_alloc_one(chan, desc);
+		/*
+		 * If the channel is expected to use only soft_lli don't
+		 * allocate a lcla. This is to avoid a HW issue that exists
+		 * in some controller during a peripheral to memory transfer
+		 * that uses linked lists.
+		 */
+		if (!(chan->phy_chan->use_soft_lli &&
+			chan->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM))
+			curr_lcla = d40_lcla_alloc_one(chan, desc);
+
 		first_lcla = curr_lcla;
 	}
 
@@ -3063,6 +3074,13 @@ static int __init d40_phy_res_init(struct d40_base *base)
 		num_phy_chans_avail--;
 	}
 
+	/* Mark soft_lli channels */
+	for (i = 0; i < base->plat_data->num_of_soft_lli_chans; i++) {
+		int chan = base->plat_data->soft_lli_chans[i];
+
+		base->phy_res[chan].use_soft_lli = true;
+	}
+
 	dev_info(base->dev, "%d of %d physical DMA channels available\n",
 		 num_phy_chans_avail, base->num_phy_chans);
 
diff --git a/include/linux/platform_data/dma-ste-dma40.h b/include/linux/platform_data/dma-ste-dma40.h
index b99024ba94ec..4b781014b0a0 100644
--- a/include/linux/platform_data/dma-ste-dma40.h
+++ b/include/linux/platform_data/dma-ste-dma40.h
@@ -147,6 +147,12 @@ struct stedma40_chan_cfg {
  * @memcpy_conf_log: default configuration of logical channel memcpy
  * @disabled_channels: A vector, ending with -1, that marks physical channels
  * that are for different reasons not available for the driver.
+ * @soft_lli_chans: A vector, that marks physical channels will use LLI by SW
+ * which avoids HW bug that exists in some versions of the controller.
+ * SoftLLI introduces relink overhead that could impact performace for
+ * certain use cases.
+ * @num_of_soft_lli_chans: The number of channels that needs to be configured
+ * to use SoftLLI.
  * @use_esram_lcla: flag for mapping the lcla into esram region
  * @num_of_phy_chans: The number of physical channels implemented in HW.
  * 0 means reading the number of channels from DMA HW but this is only valid
@@ -161,6 +167,8 @@ struct stedma40_platform_data {
 	struct stedma40_chan_cfg	*memcpy_conf_phy;
 	struct stedma40_chan_cfg	*memcpy_conf_log;
 	int				 disabled_channels[STEDMA40_MAX_PHYS];
+	int				*soft_lli_chans;
+	int				 num_of_soft_lli_chans;
 	bool				 use_esram_lcla;
 	int				 num_of_phy_chans;
 };
-- 
cgit v1.2.3


From 3a366e614d0837d9fc23f78cdb1a1186ebc3387f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 11 Jan 2013 13:06:33 -0800
Subject: block: add missing block_bio_complete() tracepoint

bio completion didn't kick block_bio_complete TP.  Only dm was
explicitly triggering the TP on IO completion.  This makes
block_bio_complete TP useless for tracers which want to know about
bios, and all other bio based drivers skip generating blktrace
completion events.

This patch makes all bio completions via bio_endio() generate
block_bio_complete TP.

* Explicit trace_block_bio_complete() invocation removed from dm and
  the trace point is unexported.

* @rq dropped from trace_block_bio_complete().  bios may fly around
  w/o queue associated.  Verifying and accessing the assocaited queue
  belongs to TP probes.

* blktrace now gets both request and bio completions.  Make it ignore
  bio completions if request completion path is happening.

This makes all bio based drivers generate blktrace completion events
properly and makes the block_bio_complete TP actually useful.

v2: With this change, block_bio_complete TP could be invoked on sg
    commands which have bio's with %NULL bi_bdev.  Update TP
    assignment code to check whether bio->bi_bdev is %NULL before
    dereferencing.

Signed-off-by: Tejun Heo <tj@kernel.org>
Original-patch-by: Namhyung Kim <namhyung@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: dm-devel@redhat.com
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c             |  1 -
 drivers/md/dm.c              |  1 -
 drivers/md/raid5.c           | 11 +----------
 fs/bio.c                     |  2 ++
 include/linux/blktrace_api.h |  1 +
 include/trace/events/block.h |  8 ++++----
 kernel/trace/blktrace.c      | 26 +++++++++++++++++++++++---
 7 files changed, 31 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index aca5d82ff13c..4f5aec708be6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,7 +39,6 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
-EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c72e4d5a9617..650ec2866e34 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -627,7 +627,6 @@ static void dec_pending(struct dm_io *io, int error)
 			queue_io(md, bio);
 		} else {
 			/* done with normal IO or empty flush */
-			trace_block_bio_complete(md->queue, bio, io_error);
 			bio_endio(bio, io_error);
 		}
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 19d77a026639..9ab506df42da 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -184,8 +184,6 @@ static void return_io(struct bio *return_bi)
 		return_bi = bi->bi_next;
 		bi->bi_next = NULL;
 		bi->bi_size = 0;
-		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
-					 bi, 0);
 		bio_endio(bi, 0);
 		bi = return_bi;
 	}
@@ -3917,8 +3915,6 @@ static void raid5_align_endio(struct bio *bi, int error)
 	rdev_dec_pending(rdev, conf->mddev);
 
 	if (!error && uptodate) {
-		trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
-					 raid_bi, 0);
 		bio_endio(raid_bi, 0);
 		if (atomic_dec_and_test(&conf->active_aligned_reads))
 			wake_up(&conf->wait_for_stripe);
@@ -4377,8 +4373,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
 		if ( rw == WRITE )
 			md_write_end(mddev);
 
-		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
-					 bi, 0);
 		bio_endio(bi, 0);
 	}
 }
@@ -4755,11 +4749,8 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
 		handled++;
 	}
 	remaining = raid5_dec_bi_active_stripes(raid_bio);
-	if (remaining == 0) {
-		trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
-					 raid_bio, 0);
+	if (remaining == 0)
 		bio_endio(raid_bio, 0);
-	}
 	if (atomic_dec_and_test(&conf->active_aligned_reads))
 		wake_up(&conf->wait_for_stripe);
 	return handled;
diff --git a/fs/bio.c b/fs/bio.c
index b96fc6ce4855..bb5768f59b32 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1428,6 +1428,8 @@ void bio_endio(struct bio *bio, int error)
 	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
 		error = -EIO;
 
+	trace_block_bio_complete(bio, error);
+
 	if (bio->bi_end_io)
 		bio->bi_end_io(bio, error);
 }
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7c2e030e72f1..0ea61e07a91c 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -12,6 +12,7 @@
 
 struct blk_trace {
 	int trace_state;
+	bool rq_based;
 	struct rchan *rchan;
 	unsigned long __percpu *sequence;
 	unsigned char __percpu *msg_data;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 05c5e61f0a7c..8a168db9a645 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -206,7 +206,6 @@ TRACE_EVENT(block_bio_bounce,
 
 /**
  * block_bio_complete - completed all work on the block operation
- * @q: queue holding the block operation
  * @bio: block operation completed
  * @error: io error value
  *
@@ -215,9 +214,9 @@ TRACE_EVENT(block_bio_bounce,
  */
 TRACE_EVENT(block_bio_complete,
 
-	TP_PROTO(struct request_queue *q, struct bio *bio, int error),
+	TP_PROTO(struct bio *bio, int error),
 
-	TP_ARGS(q, bio, error),
+	TP_ARGS(bio, error),
 
 	TP_STRUCT__entry(
 		__field( dev_t,		dev		)
@@ -228,7 +227,8 @@ TRACE_EVENT(block_bio_complete,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->dev		= bio->bi_bdev ?
+					  bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio->bi_sector;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		__entry->error		= error;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c0bd0308741c..190d98fbed27 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -739,6 +739,12 @@ static void blk_add_trace_rq_complete(void *ignore,
 				      struct request_queue *q,
 				      struct request *rq)
 {
+	struct blk_trace *bt = q->blk_trace;
+
+	/* if control ever passes through here, it's a request based driver */
+	if (unlikely(bt && !bt->rq_based))
+		bt->rq_based = true;
+
 	blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
 }
 
@@ -774,10 +780,24 @@ static void blk_add_trace_bio_bounce(void *ignore,
 	blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
 }
 
-static void blk_add_trace_bio_complete(void *ignore,
-				       struct request_queue *q, struct bio *bio,
-				       int error)
+static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error)
 {
+	struct request_queue *q;
+	struct blk_trace *bt;
+
+	if (!bio->bi_bdev)
+		return;
+
+	q = bdev_get_queue(bio->bi_bdev);
+	bt = q->blk_trace;
+
+	/*
+	 * Request based drivers will generate both rq and bio completions.
+	 * Ignore bio ones.
+	 */
+	if (likely(!bt) || bt->rq_based)
+		return;
+
 	blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
 }
 
-- 
cgit v1.2.3


From f0059afd3e6e7aa1a0ffc23468b74c43d47660b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 11 Jan 2013 13:06:35 -0800
Subject: buffer: make touch_buffer() an exported function

We want to add a trace point to touch_buffer() but macros and inline
functions defined in header files can't have tracing points.  Move
touch_buffer() to fs/buffer.c and make it a proper function.

The new exported function is also declared inline.  As most uses of
touch_buffer() are inside buffer.c with nilfs2 as the only other user,
the effect of this change should be negligible.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/buffer.c                 | 6 ++++++
 include/linux/buffer_head.h | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index c017a2dfb909..a8c2dfb68dcd 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -53,6 +53,12 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
 }
 EXPORT_SYMBOL(init_buffer);
 
+inline void touch_buffer(struct buffer_head *bh)
+{
+	mark_page_accessed(bh->b_page);
+}
+EXPORT_SYMBOL(touch_buffer);
+
 static int sleep_on_buffer(void *word)
 {
 	io_schedule();
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 458f497738a4..5afc4f94d110 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -126,7 +126,6 @@ BUFFER_FNS(Write_EIO, write_io_error)
 BUFFER_FNS(Unwritten, unwritten)
 
 #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
-#define touch_buffer(bh)	mark_page_accessed(bh->b_page)
 
 /* If we *know* page->private refers to buffer_heads */
 #define page_buffers(page)					\
@@ -142,6 +141,7 @@ BUFFER_FNS(Unwritten, unwritten)
 
 void mark_buffer_dirty(struct buffer_head *bh);
 void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
+void touch_buffer(struct buffer_head *bh);
 void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset);
 int try_to_free_buffers(struct page *);
-- 
cgit v1.2.3


From ccf04c51004d0b973a688a91c879e2d91780d03c Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Fri, 14 Dec 2012 11:36:41 +0100
Subject: mtd: omap-nand: pass device_node in platform data

Pass an optional device_node pointer in the platform data, which in turn
will be put into a mtd_part_parser_data. This way, code that sets up the
platform devices can pass along the node from DT so that the partitions
can be parsed.

For non-DT boards, this change has no effect.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Artem Bityutskiy <dedekind1@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/mtd/nand/omap2.c                     | 4 +++-
 include/linux/platform_data/mtd-nand-omap2.h | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 0002d5e94f0d..1d333497cfcb 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1332,6 +1332,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 	dma_cap_mask_t mask;
 	unsigned sig;
 	struct resource			*res;
+	struct mtd_part_parser_data	ppdata = {};
 
 	pdata = pdev->dev.platform_data;
 	if (pdata == NULL) {
@@ -1557,7 +1558,8 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto out_release_mem_region;
 	}
 
-	mtd_device_parse_register(&info->mtd, NULL, NULL, pdata->parts,
+	ppdata.of_node = pdata->of_node;
+	mtd_device_parse_register(&info->mtd, NULL, &ppdata, pdata->parts,
 				  pdata->nr_parts);
 
 	platform_set_drvdata(pdev, &info->mtd);
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 24d32ca34bef..6bf9ef43ddb1 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -60,6 +60,8 @@ struct omap_nand_platform_data {
 	int			devsize;
 	enum omap_ecc           ecc_opt;
 	struct gpmc_nand_regs	reg;
-};
 
+	/* for passing the partitions */
+	struct device_node	*of_node;
+};
 #endif
-- 
cgit v1.2.3


From 803739d25c2343da6d2f95eebdcbc08bf67097d4 Mon Sep 17 00:00:00 2001
From: Shane Huang <shane.huang@amd.com>
Date: Mon, 17 Dec 2012 23:18:59 +0800
Subject: [libata] replace sata_settings with devslp_timing

NCQ capability was used to check availability of SATA Settings page
from Identify Device Data Log, which contains DevSlp timing variables.
It does not work on some HDDs and leads to error messages.

IDENTIFY word 78 bit 5(Hardware Feature Control) can't work either
because it is only the sufficient condition of Identify Device data
log, not the necessary condition.

This patch replaced ata_device->sata_settings with ->devslp_timing
to only save DevSlp timing variables(8 bytes), instead of the whole
SATA Settings page(512 bytes).

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=51881

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Shane Huang <shane.huang@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libahci.c     |  6 +++---
 drivers/ata/libata-core.c | 22 +++++++++++++---------
 include/linux/ata.h       |  8 +++++---
 include/linux/libata.h    |  4 ++--
 4 files changed, 23 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 320712a7b9ea..6cd7805e47ca 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1951,13 +1951,13 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep)
 	/* Use the nominal value 10 ms if the read MDAT is zero,
 	 * the nominal value of DETO is 20 ms.
 	 */
-	if (dev->sata_settings[ATA_LOG_DEVSLP_VALID] &
+	if (dev->devslp_timing[ATA_LOG_DEVSLP_VALID] &
 	    ATA_LOG_DEVSLP_VALID_MASK) {
-		mdat = dev->sata_settings[ATA_LOG_DEVSLP_MDAT] &
+		mdat = dev->devslp_timing[ATA_LOG_DEVSLP_MDAT] &
 		       ATA_LOG_DEVSLP_MDAT_MASK;
 		if (!mdat)
 			mdat = 10;
-		deto = dev->sata_settings[ATA_LOG_DEVSLP_DETO];
+		deto = dev->devslp_timing[ATA_LOG_DEVSLP_DETO];
 		if (!deto)
 			deto = 20;
 	} else {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 9e8b99af400d..46cd3f4c6aaa 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2325,24 +2325,28 @@ int ata_dev_configure(struct ata_device *dev)
 			}
 		}
 
-		/* check and mark DevSlp capability */
-		if (ata_id_has_devslp(dev->id))
-			dev->flags |= ATA_DFLAG_DEVSLP;
-
-		/* Obtain SATA Settings page from Identify Device Data Log,
-		 * which contains DevSlp timing variables etc.
-		 * Exclude old devices with ata_id_has_ncq()
+		/* Check and mark DevSlp capability. Get DevSlp timing variables
+		 * from SATA Settings page of Identify Device Data Log.
 		 */
-		if (ata_id_has_ncq(dev->id)) {
+		if (ata_id_has_devslp(dev->id)) {
+			u8 sata_setting[ATA_SECT_SIZE];
+			int i, j;
+
+			dev->flags |= ATA_DFLAG_DEVSLP;
 			err_mask = ata_read_log_page(dev,
 						     ATA_LOG_SATA_ID_DEV_DATA,
 						     ATA_LOG_SATA_SETTINGS,
-						     dev->sata_settings,
+						     sata_setting,
 						     1);
 			if (err_mask)
 				ata_dev_dbg(dev,
 					    "failed to get Identify Device Data, Emask 0x%x\n",
 					    err_mask);
+			else
+				for (i = 0; i < ATA_LOG_DEVSLP_SIZE; i++) {
+					j = ATA_LOG_DEVSLP_OFFSET + i;
+					dev->devslp_timing[i] = sata_setting[j];
+				}
 		}
 
 		dev->cdb_len = 16;
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 408da9502177..8f7a3d68371a 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -297,10 +297,12 @@ enum {
 	ATA_LOG_SATA_NCQ	= 0x10,
 	ATA_LOG_SATA_ID_DEV_DATA  = 0x30,
 	ATA_LOG_SATA_SETTINGS	  = 0x08,
-	ATA_LOG_DEVSLP_MDAT	  = 0x30,
+	ATA_LOG_DEVSLP_OFFSET	  = 0x30,
+	ATA_LOG_DEVSLP_SIZE	  = 0x08,
+	ATA_LOG_DEVSLP_MDAT	  = 0x00,
 	ATA_LOG_DEVSLP_MDAT_MASK  = 0x1F,
-	ATA_LOG_DEVSLP_DETO	  = 0x31,
-	ATA_LOG_DEVSLP_VALID	  = 0x37,
+	ATA_LOG_DEVSLP_DETO	  = 0x01,
+	ATA_LOG_DEVSLP_VALID	  = 0x07,
 	ATA_LOG_DEVSLP_VALID_MASK = 0x80,
 
 	/* READ/WRITE LONG (obsolete) */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 83ba0ab2c915..649e5f86b5f0 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -652,8 +652,8 @@ struct ata_device {
 		u32		gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */
 	};
 
-	/* Identify Device Data Log (30h), SATA Settings (page 08h) */
-	u8			sata_settings[ATA_SECT_SIZE];
+	/* DEVSLP Timing Variables from Identify Device Data Log */
+	u8			devslp_timing[ATA_LOG_DEVSLP_SIZE];
 
 	/* error history */
 	int			spdn_cnt;
-- 
cgit v1.2.3


From 1757d902b029a29dfcef63609964385cf8865b5a Mon Sep 17 00:00:00 2001
From: David Milburn <dmilburn@redhat.com>
Date: Mon, 14 Jan 2013 09:59:30 -0600
Subject: libata: export host controller number thru /sys

As low-level drivers register their host controller(s), keep track
of the number of controllers and export thru /sys in a <host.port>
format so that udev can better match up port numbers with a
specific controller.

# pwd
/sys/devices/pci0000:00
# find . -name 'ata*' -print

(2nd controller with port multiplier attached)

./0000:00:1e.0/0000:05:01.0/ata2.7
./0000:00:1e.0/0000:05:01.0/ata2.7/link7/dev7.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.0/dev7.0.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.0/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.1/dev7.1.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.1/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.2/dev7.2.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.2/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.3/dev7.3.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.3/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.4/dev7.4.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.4/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.5/dev7.5.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.5/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.6/dev7.6.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.6/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.7/dev7.7.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.7/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.8/dev7.8.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.8/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.9/dev7.9.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.9/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/ata_port
./0000:00:1e.0/0000:05:01.0/ata2.7/ata_port/ata2.7
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.10/dev7.10.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.10/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.11/dev7.11.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.11/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.12/dev7.12.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.12/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.13/dev7.13.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.13/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.14/dev7.14.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.7/link7.14/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.8
./0000:00:1e.0/0000:05:01.0/ata2.8/link8/dev8.0/ata_device
./0000:00:1e.0/0000:05:01.0/ata2.8/link8/ata_link
./0000:00:1e.0/0000:05:01.0/ata2.8/ata_port
./0000:00:1e.0/0000:05:01.0/ata2.8/ata_port/ata2.8

(1st controller)

./0000:00:1f.2/ata1.1
./0000:00:1f.2/ata1.1/link1/dev1.0/ata_device
./0000:00:1f.2/ata1.1/link1/ata_link
./0000:00:1f.2/ata1.1/ata_port
./0000:00:1f.2/ata1.1/ata_port/ata1.1
./0000:00:1f.2/ata1.2
./0000:00:1f.2/ata1.2/link2/dev2.0/ata_device
./0000:00:1f.2/ata1.2/link2/ata_link
./0000:00:1f.2/ata1.2/ata_port
./0000:00:1f.2/ata1.2/ata_port/ata1.2
./0000:00:1f.2/ata1.3
./0000:00:1f.2/ata1.3/link3/dev3.0/ata_device
./0000:00:1f.2/ata1.3/link3/ata_link
./0000:00:1f.2/ata1.3/ata_port
./0000:00:1f.2/ata1.3/ata_port/ata1.3
./0000:00:1f.2/ata1.4
./0000:00:1f.2/ata1.4/link4/dev4.0/ata_device
./0000:00:1f.2/ata1.4/link4/ata_link
./0000:00:1f.2/ata1.4/ata_port
./0000:00:1f.2/ata1.4/ata_port/ata1.4
./0000:00:1f.2/ata1.5
./0000:00:1f.2/ata1.5/link5/dev5.0/ata_device
./0000:00:1f.2/ata1.5/link5/ata_link
./0000:00:1f.2/ata1.5/ata_port
./0000:00:1f.2/ata1.5/ata_port/ata1.5
./0000:00:1f.2/ata1.6
./0000:00:1f.2/ata1.6/link6/dev6.0/ata_device
./0000:00:1f.2/ata1.6/link6/ata_link
./0000:00:1f.2/ata1.6/ata_port
./0000:00:1f.2/ata1.6/ata_port/ata1.6

Signed-off-by: David Milburn <dmilburn@redhat.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c      | 4 ++++
 drivers/ata/libata-transport.c | 2 +-
 include/linux/libata.h         | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 46cd3f4c6aaa..275941b576a8 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -99,6 +99,7 @@ static void ata_dev_xfermask(struct ata_device *dev);
 static unsigned long ata_dev_blacklisted(const struct ata_device *dev);
 
 atomic_t ata_print_id = ATOMIC_INIT(0);
+atomic_t host_print_id = ATOMIC_INIT(0);
 
 struct ata_force_param {
 	const char	*name;
@@ -6097,6 +6098,9 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
 	for (i = host->n_ports; host->ports[i]; i++)
 		kfree(host->ports[i]);
 
+	/* track host controller */
+	host->host_id = atomic_inc_return(&host_print_id);
+
 	/* give ports names and add SCSI hosts */
 	for (i = 0; i < host->n_ports; i++)
 		host->ports[i]->print_id = atomic_inc_return(&ata_print_id);
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index c04d393d20c1..61dca7a20a31 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -284,7 +284,7 @@ int ata_tport_add(struct device *parent,
 
 	dev->parent = get_device(parent);
 	dev->release = ata_tport_release;
-	dev_set_name(dev, "ata%d", ap->print_id);
+	dev_set_name(dev, "ata%d.%d", ap->host->host_id, ap->print_id);
 	transport_setup_device(dev);
 	error = device_add(dev);
 	if (error) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 649e5f86b5f0..7ae207eb29a0 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -546,6 +546,7 @@ struct ata_host {
 	void			*private_data;
 	struct ata_port_operations *ops;
 	unsigned long		flags;
+	unsigned int            host_id; /* user visible host ID */
 
 	struct mutex		eh_mutex;
 	struct task_struct	*eh_owner;
-- 
cgit v1.2.3


From 6bf2e5461479c4511f59946a7378db576b04dbc5 Mon Sep 17 00:00:00 2001
From: Nathan Hintz <nlhintz@hotmail.com>
Date: Fri, 11 Jan 2013 22:07:22 -0800
Subject: bcma: fix bcm4716/bcm4748 i2s irqflag

The default irqflag assignment for the I2S core on some Broadcom
4716/4748 devices is invalid and needs to be corrected (from the
Broadcom SDK).

Signed-off-by: Nathan Hintz <nlhintz@hotmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_mips.c            | 28 ++++++++++++++++++++++++++++
 include/linux/bcma/bcma_driver_mips.h |  1 +
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index a808404db4b1..9fe86ee16c66 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -256,6 +256,32 @@ void bcma_core_mips_early_init(struct bcma_drv_mips *mcore)
 	mcore->early_setup_done = true;
 }
 
+static void bcma_fix_i2s_irqflag(struct bcma_bus *bus)
+{
+	struct bcma_device *cpu, *pcie, *i2s;
+
+	/* Fixup the interrupts in 4716/4748 for i2s core (2010 Broadcom SDK)
+	 * (IRQ flags > 7 are ignored when setting the interrupt masks)
+	 */
+	if (bus->chipinfo.id != BCMA_CHIP_ID_BCM4716 &&
+	    bus->chipinfo.id != BCMA_CHIP_ID_BCM4748)
+		return;
+
+	cpu = bcma_find_core(bus, BCMA_CORE_MIPS_74K);
+	pcie = bcma_find_core(bus, BCMA_CORE_PCIE);
+	i2s = bcma_find_core(bus, BCMA_CORE_I2S);
+	if (cpu && pcie && i2s &&
+	    bcma_aread32(cpu, BCMA_MIPS_OOBSELINA74) == 0x08060504 &&
+	    bcma_aread32(pcie, BCMA_MIPS_OOBSELINA74) == 0x08060504 &&
+	    bcma_aread32(i2s, BCMA_MIPS_OOBSELOUTA30) == 0x88) {
+		bcma_awrite32(cpu, BCMA_MIPS_OOBSELINA74, 0x07060504);
+		bcma_awrite32(pcie, BCMA_MIPS_OOBSELINA74, 0x07060504);
+		bcma_awrite32(i2s, BCMA_MIPS_OOBSELOUTA30, 0x87);
+		bcma_debug(bus,
+			   "Moved i2s interrupt to oob line 7 instead of 8\n");
+	}
+}
+
 void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 {
 	struct bcma_bus *bus;
@@ -269,6 +295,8 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 
 	bcma_core_mips_early_init(mcore);
 
+	bcma_fix_i2s_irqflag(bus);
+
 	switch (bus->chipinfo.id) {
 	case BCMA_CHIP_ID_BCM4716:
 	case BCMA_CHIP_ID_BCM4748:
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 73c7f4b882cc..0d1ea297851a 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -28,6 +28,7 @@
 #define BCMA_MIPS_MIPS74K_GPIOEN	0x0048
 #define BCMA_MIPS_MIPS74K_CLKCTLST	0x01E0
 
+#define BCMA_MIPS_OOBSELINA74		0x004
 #define BCMA_MIPS_OOBSELOUTA30		0x100
 
 struct bcma_device;
-- 
cgit v1.2.3


From f9a8f83b04e0c362a2fc660dbad980d24af209fc Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian@openwrt.org>
Date: Mon, 14 Jan 2013 00:52:52 +0000
Subject: net: phy: remove flags argument from phy_{attach, connect,
 connect_direct}

The flags argument of the phy_{attach,connect,connect_direct} functions
is then used to assign a struct phy_device dev_flags with its value.
All callers but the tg3 driver pass the flag 0, which results in the
underlying PHY drivers in drivers/net/phy/ not being able to actually
use any of the flags they would set in dev_flags. This patch gets rid of
the flags argument, and passes phydev->dev_flags to the internal PHY
library call phy_attach_direct() such that drivers which actually modify
a phy device dev_flags get the value preserved for use by the underlying
phy driver.

Acked-by: Kosta Zertsekel <konszert@marvell.com>
Signed-off-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.txt                  | 11 ++++++-----
 drivers/net/ethernet/8390/ax88796.c               |  2 +-
 drivers/net/ethernet/adi/bfin_mac.c               |  4 ++--
 drivers/net/ethernet/aeroflex/greth.c             |  4 +---
 drivers/net/ethernet/amd/au1000_eth.c             |  4 ++--
 drivers/net/ethernet/broadcom/bcm63xx_enet.c      |  2 +-
 drivers/net/ethernet/broadcom/sb1250-mac.c        |  2 +-
 drivers/net/ethernet/broadcom/tg3.c               |  4 ++--
 drivers/net/ethernet/cadence/macb.c               |  2 +-
 drivers/net/ethernet/dnet.c                       |  4 ++--
 drivers/net/ethernet/ethoc.c                      |  4 ++--
 drivers/net/ethernet/faraday/ftgmac100.c          |  3 +--
 drivers/net/ethernet/freescale/fec.c              |  2 +-
 drivers/net/ethernet/lantiq_etop.c                |  4 ++--
 drivers/net/ethernet/marvell/mv643xx_eth.c        |  2 +-
 drivers/net/ethernet/marvell/pxa168_eth.c         |  2 +-
 drivers/net/ethernet/nxp/lpc_eth.c                |  2 +-
 drivers/net/ethernet/rdc/r6040.c                  |  2 +-
 drivers/net/ethernet/renesas/sh_eth.c             |  2 +-
 drivers/net/ethernet/s6gmac.c                     |  2 +-
 drivers/net/ethernet/smsc/smsc911x.c              |  5 ++---
 drivers/net/ethernet/smsc/smsc9420.c              |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  3 +--
 drivers/net/ethernet/ti/cpmac.c                   |  4 ++--
 drivers/net/ethernet/ti/cpsw.c                    |  2 +-
 drivers/net/ethernet/ti/davinci_emac.c            |  2 +-
 drivers/net/ethernet/toshiba/tc35815.c            |  5 ++---
 drivers/net/ethernet/xscale/ixp4xx_eth.c          |  2 +-
 drivers/net/phy/phy_device.c                      | 15 ++++++---------
 drivers/net/usb/ax88172a.c                        |  2 +-
 drivers/of/of_mdio.c                              |  4 ++--
 drivers/staging/et131x/et131x.c                   |  2 +-
 include/linux/phy.h                               |  6 +++---
 net/dsa/slave.c                                   |  2 +-
 34 files changed, 56 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index 95e5f5985a2a..d5b1a3935245 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -103,7 +103,7 @@ Letting the PHY Abstraction Layer do Everything
  
  Now, to connect, just call this function:
  
-   phydev = phy_connect(dev, phy_name, &adjust_link, flags, interface);
+   phydev = phy_connect(dev, phy_name, &adjust_link, interface);
 
  phydev is a pointer to the phy_device structure which represents the PHY.  If
  phy_connect is successful, it will return the pointer.  dev, here, is the
@@ -113,7 +113,9 @@ Letting the PHY Abstraction Layer do Everything
  current state, though the PHY will not yet be truly operational at this
  point.
 
- flags is a u32 which can optionally contain phy-specific flags.
+ PHY-specific flags should be set in phydev->dev_flags prior to the call
+ to phy_connect() such that the underlying PHY driver can check for flags
+ and perform specific operations based on them.
  This is useful if the system has put hardware restrictions on
  the PHY/controller, of which the PHY needs to be aware.
 
@@ -185,11 +187,10 @@ Doing it all yourself
    start, or disables then frees them for stop.
 
  struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
-		 u32 flags, phy_interface_t interface);
+		 phy_interface_t interface);
 
    Attaches a network device to a particular PHY, binding the PHY to a generic
-   driver if none was found during bus initialization.  Passes in
-   any phy-specific flags as needed.
+   driver if none was found during bus initialization.
 
  int phy_start_aneg(struct phy_device *phydev);
    
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 7eeddf01307f..cab306a9888e 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -358,7 +358,7 @@ static int ax_mii_probe(struct net_device *dev)
 		return -ENODEV;
 	}
 
-	ret = phy_connect_direct(dev, phy_dev, ax_handle_link_change, 0,
+	ret = phy_connect_direct(dev, phy_dev, ax_handle_link_change,
 				 PHY_INTERFACE_MODE_MII);
 	if (ret) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index c7a83f6f2382..a175d0be1ae1 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -425,8 +425,8 @@ static int mii_probe(struct net_device *dev, int phy_mode)
 		return -EINVAL;
 	}
 
-	phydev = phy_connect(dev, dev_name(&phydev->dev), &bfin_mac_adjust_link,
-			0, phy_mode);
+	phydev = phy_connect(dev, dev_name(&phydev->dev),
+			     &bfin_mac_adjust_link, phy_mode);
 
 	if (IS_ERR(phydev)) {
 		netdev_err(dev, "could not attach PHY\n");
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 480662ba5227..0be2195e5034 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1288,9 +1288,7 @@ static int greth_mdio_probe(struct net_device *dev)
 	}
 
 	ret = phy_connect_direct(dev, phy, &greth_link_change,
-			0, greth->gbit_mac ?
-			PHY_INTERFACE_MODE_GMII :
-			PHY_INTERFACE_MODE_MII);
+				 greth->gbit_mac ? PHY_INTERFACE_MODE_GMII : PHY_INTERFACE_MODE_MII);
 	if (ret) {
 		if (netif_msg_ifup(greth))
 			dev_err(&dev->dev, "could not attach to PHY\n");
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index 65b865a0cc78..de774d419144 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -437,8 +437,8 @@ static int au1000_mii_probe(struct net_device *dev)
 	/* now we are supposed to have a proper phydev, to attach to... */
 	BUG_ON(phydev->attached_dev);
 
-	phydev = phy_connect(dev, dev_name(&phydev->dev), &au1000_adjust_link,
-			0, PHY_INTERFACE_MODE_MII);
+	phydev = phy_connect(dev, dev_name(&phydev->dev),
+			     &au1000_adjust_link, PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(phydev)) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index d8a151046728..f5b6b4715d45 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -799,7 +799,7 @@ static int bcm_enet_open(struct net_device *dev)
 		snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT,
 			 priv->mii_bus->id, priv->phy_id);
 
-		phydev = phy_connect(dev, phy_id, bcm_enet_adjust_phy_link, 0,
+		phydev = phy_connect(dev, phy_id, bcm_enet_adjust_phy_link,
 				     PHY_INTERFACE_MODE_MII);
 
 		if (IS_ERR(phydev)) {
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 3a1c8a3cf7c9..e9b35da375cb 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2385,7 +2385,7 @@ static int sbmac_mii_probe(struct net_device *dev)
 		return -ENXIO;
 	}
 
-	phy_dev = phy_connect(dev, dev_name(&phy_dev->dev), &sbmac_mii_poll, 0,
+	phy_dev = phy_connect(dev, dev_name(&phy_dev->dev), &sbmac_mii_poll,
 			      PHY_INTERFACE_MODE_GMII);
 	if (IS_ERR(phy_dev)) {
 		printk(KERN_ERR "%s: could not attach to PHY\n", dev->name);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 88f2d41c009b..227749107789 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -2004,8 +2004,8 @@ static int tg3_phy_init(struct tg3 *tp)
 	phydev = tp->mdio_bus->phy_map[TG3_PHY_MII_ADDR];
 
 	/* Attach the MAC to the PHY. */
-	phydev = phy_connect(tp->dev, dev_name(&phydev->dev), tg3_adjust_link,
-			     phydev->dev_flags, phydev->interface);
+	phydev = phy_connect(tp->dev, dev_name(&phydev->dev),
+			     tg3_adjust_link, phydev->interface);
 	if (IS_ERR(phydev)) {
 		dev_err(&tp->pdev->dev, "Could not attach to PHY\n");
 		return PTR_ERR(phydev);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index a9b0830fb39d..352190b9ebe7 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -287,7 +287,7 @@ static int macb_mii_probe(struct net_device *dev)
 	}
 
 	/* attach the mac to the phy */
-	ret = phy_connect_direct(dev, phydev, &macb_handle_link_change, 0,
+	ret = phy_connect_direct(dev, phydev, &macb_handle_link_change,
 				 bp->phy_interface);
 	if (ret) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 2c177b329c8b..f3d60eb13c3a 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -281,11 +281,11 @@ static int dnet_mii_probe(struct net_device *dev)
 	/* attach the mac to the phy */
 	if (bp->capabilities & DNET_HAS_RMII) {
 		phydev = phy_connect(dev, dev_name(&phydev->dev),
-				     &dnet_handle_link_change, 0,
+				     &dnet_handle_link_change,
 				     PHY_INTERFACE_MODE_RMII);
 	} else {
 		phydev = phy_connect(dev, dev_name(&phydev->dev),
-				     &dnet_handle_link_change, 0,
+				     &dnet_handle_link_change,
 				     PHY_INTERFACE_MODE_MII);
 	}
 
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index b51c81ac0b6f..aa47ef9689a8 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -682,8 +682,8 @@ static int ethoc_mdio_probe(struct net_device *dev)
 		return -ENXIO;
 	}
 
-	err = phy_connect_direct(dev, phy, ethoc_mdio_poll, 0,
-			PHY_INTERFACE_MODE_GMII);
+	err = phy_connect_direct(dev, phy, ethoc_mdio_poll,
+				 PHY_INTERFACE_MODE_GMII);
 	if (err) {
 		dev_err(&dev->dev, "could not attach to PHY\n");
 		return err;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 96454b5fca63..7c361d1db94c 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -858,8 +858,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv)
 	}
 
 	phydev = phy_connect(netdev, dev_name(&phydev->dev),
-			     &ftgmac100_adjust_link, 0,
-			     PHY_INTERFACE_MODE_GMII);
+			     &ftgmac100_adjust_link, PHY_INTERFACE_MODE_GMII);
 
 	if (IS_ERR(phydev)) {
 		netdev_err(netdev, "%s: Could not attach to PHY\n", netdev->name);
diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index 5f2b4acf4836..1b7684a8851e 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c
@@ -1008,7 +1008,7 @@ static int fec_enet_mii_probe(struct net_device *ndev)
 	}
 
 	snprintf(phy_name, sizeof(phy_name), PHY_ID_FMT, mdio_bus_id, phy_id);
-	phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, 0,
+	phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link,
 			      fep->phy_interface);
 	if (IS_ERR(phy_dev)) {
 		printk(KERN_ERR "%s: could not attach to PHY\n", ndev->name);
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 8ead46adc21e..6a2127489af7 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -393,8 +393,8 @@ ltq_etop_mdio_probe(struct net_device *dev)
 		return -ENODEV;
 	}
 
-	phydev = phy_connect(dev, dev_name(&phydev->dev), &ltq_etop_mdio_link,
-			0, priv->pldata->mii_mode);
+	phydev = phy_connect(dev, dev_name(&phydev->dev),
+			     &ltq_etop_mdio_link, priv->pldata->mii_mode);
 
 	if (IS_ERR(phydev)) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 84c13263c514..c27b23d8f4fc 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2789,7 +2789,7 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex)
 
 	phy_reset(mp);
 
-	phy_attach(mp->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_GMII);
+	phy_attach(mp->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_GMII);
 
 	if (speed == 0) {
 		phy->autoneg = AUTONEG_ENABLE;
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index c7f2fa60fe6f..037ed866c22f 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1390,7 +1390,7 @@ static void phy_init(struct pxa168_eth_private *pep, int speed, int duplex)
 	struct phy_device *phy = pep->phy;
 	ethernet_phy_reset(pep);
 
-	phy_attach(pep->dev, dev_name(&phy->dev), 0, PHY_INTERFACE_MODE_MII);
+	phy_attach(pep->dev, dev_name(&phy->dev), PHY_INTERFACE_MODE_MII);
 
 	if (speed == 0) {
 		phy->autoneg = AUTONEG_ENABLE;
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 6fda51ebcc76..c4122c86f829 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -800,7 +800,7 @@ static int lpc_mii_probe(struct net_device *ndev)
 	else
 		netdev_info(ndev, "using RMII interface\n");
 	phydev = phy_connect(ndev, dev_name(&phydev->dev),
-			     &lpc_handle_link_change, 0,
+			     &lpc_handle_link_change,
 			     lpc_phy_interface_mode(&pldat->pdev->dev));
 
 	if (IS_ERR(phydev)) {
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index be3616d060d9..34f76e99dc8a 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -1042,7 +1042,7 @@ static int r6040_mii_probe(struct net_device *dev)
 	}
 
 	phydev = phy_connect(dev, dev_name(&phydev->dev), &r6040_adjust_link,
-				0, PHY_INTERFACE_MODE_MII);
+			     PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(phydev)) {
 		dev_err(&lp->pdev->dev, "could not attach to PHY\n");
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 3d705862bd7d..e195c1e89d61 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1422,7 +1422,7 @@ static int sh_eth_phy_init(struct net_device *ndev)
 
 	/* Try connect to PHY */
 	phydev = phy_connect(ndev, phy_id, sh_eth_adjust_link,
-				0, mdp->phy_interface);
+			     mdp->phy_interface);
 	if (IS_ERR(phydev)) {
 		dev_err(&ndev->dev, "phy_connect failed\n");
 		return PTR_ERR(phydev);
diff --git a/drivers/net/ethernet/s6gmac.c b/drivers/net/ethernet/s6gmac.c
index 72fc57dd084d..21683e2b1ff4 100644
--- a/drivers/net/ethernet/s6gmac.c
+++ b/drivers/net/ethernet/s6gmac.c
@@ -795,7 +795,7 @@ static inline int s6gmac_phy_start(struct net_device *dev)
 	struct phy_device *p = NULL;
 	while ((i < PHY_MAX_ADDR) && (!(p = pd->mii.bus->phy_map[i])))
 		i++;
-	p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link, 0,
+	p = phy_connect(dev, dev_name(&p->dev), &s6gmac_adjust_link,
 			PHY_INTERFACE_MODE_RGMII);
 	if (IS_ERR(p)) {
 		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 04ff63cb6544..da5cc9a3b34c 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -997,9 +997,8 @@ static int smsc911x_mii_probe(struct net_device *dev)
 	SMSC_TRACE(pdata, probe, "PHY: addr %d, phy_id 0x%08X",
 		   phydev->addr, phydev->phy_id);
 
-	ret = phy_connect_direct(dev, phydev,
-			&smsc911x_phy_adjust_link, 0,
-			pdata->config.phy_interface);
+	ret = phy_connect_direct(dev, phydev, &smsc911x_phy_adjust_link,
+				 pdata->config.phy_interface);
 
 	if (ret) {
 		netdev_err(dev, "Could not attach to PHY\n");
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 3c586585e1b3..ecfb43614d7b 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -1179,7 +1179,7 @@ static int smsc9420_mii_probe(struct net_device *dev)
 		phydev->phy_id);
 
 	phydev = phy_connect(dev, dev_name(&phydev->dev),
-		smsc9420_phy_adjust_link, 0, PHY_INTERFACE_MODE_MII);
+			     smsc9420_phy_adjust_link, PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(phydev)) {
 		pr_err("%s: Could not attach to PHY\n", dev->name);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f07c0612abf6..8c657294ce56 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -428,8 +428,7 @@ static int stmmac_init_phy(struct net_device *dev)
 		 priv->plat->phy_addr);
 	pr_debug("stmmac_init_phy:  trying to attach to %s\n", phy_id_fmt);
 
-	phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, 0,
-			     interface);
+	phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, interface);
 
 	if (IS_ERR(phydev)) {
 		pr_err("%s: Could not attach to PHY\n", dev->name);
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 70d1920cac97..31bbbca341a7 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -1172,8 +1172,8 @@ static int cpmac_probe(struct platform_device *pdev)
 	snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT,
 						mdio_bus_id, phy_id);
 
-	priv->phy = phy_connect(dev, priv->phy_name, cpmac_adjust_link, 0,
-						PHY_INTERFACE_MODE_MII);
+	priv->phy = phy_connect(dev, priv->phy_name, cpmac_adjust_link,
+				PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(priv->phy)) {
 		if (netif_msg_drv(priv))
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index bea736b8c3ec..3772804fb697 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -592,7 +592,7 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 			   1 << slave_port, 0, ALE_MCAST_FWD_2);
 
 	slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
-				 &cpsw_adjust_link, 0, slave->data->phy_if);
+				 &cpsw_adjust_link, slave->data->phy_if);
 	if (IS_ERR(slave->phy)) {
 		dev_err(priv->dev, "phy %s not found on slave %d\n",
 			slave->data->phy_id, slave->slave_num);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 6621ae3a98d9..8478d98c1092 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1599,7 +1599,7 @@ static int emac_dev_open(struct net_device *ndev)
 
 	if (priv->phy_id && *priv->phy_id) {
 		priv->phydev = phy_connect(ndev, priv->phy_id,
-					   &emac_adjust_link, 0,
+					   &emac_adjust_link,
 					   PHY_INTERFACE_MODE_MII);
 
 		if (IS_ERR(priv->phydev)) {
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index f16410e599f4..fe256094db35 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -633,9 +633,8 @@ static int tc_mii_probe(struct net_device *dev)
 
 	/* attach the mac to the phy */
 	phydev = phy_connect(dev, dev_name(&phydev->dev),
-			     &tc_handle_link_change, 0,
-			     lp->chiptype == TC35815_TX4939 ?
-			     PHY_INTERFACE_MODE_RMII : PHY_INTERFACE_MODE_MII);
+			     &tc_handle_link_change,
+			     lp->chiptype == TC35815_TX4939 ? PHY_INTERFACE_MODE_RMII : PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(phydev)) {
 		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
 		return PTR_ERR(phydev);
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index a4be1ad886c5..6958a5e87703 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1451,7 +1451,7 @@ static int eth_init_one(struct platform_device *pdev)
 
 	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
 		mdio_bus->id, plat->phy);
-	port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link, 0,
+	port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link,
 				   PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(port->phydev)) {
 		err = PTR_ERR(port->phydev);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8af46e88a181..9930f9999561 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -416,16 +416,15 @@ static void phy_prepare_link(struct phy_device *phydev,
  * @dev: the network device to connect
  * @phydev: the pointer to the phy device
  * @handler: callback function for state change notifications
- * @flags: PHY device's dev_flags
  * @interface: PHY device's interface
  */
 int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
-		       void (*handler)(struct net_device *), u32 flags,
+		       void (*handler)(struct net_device *),
 		       phy_interface_t interface)
 {
 	int rc;
 
-	rc = phy_attach_direct(dev, phydev, flags, interface);
+	rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
 	if (rc)
 		return rc;
 
@@ -443,7 +442,6 @@ EXPORT_SYMBOL(phy_connect_direct);
  * @dev: the network device to connect
  * @bus_id: the id string of the PHY device to connect
  * @handler: callback function for state change notifications
- * @flags: PHY device's dev_flags
  * @interface: PHY device's interface
  *
  * Description: Convenience function for connecting ethernet
@@ -455,7 +453,7 @@ EXPORT_SYMBOL(phy_connect_direct);
  *   the desired functionality.
  */
 struct phy_device * phy_connect(struct net_device *dev, const char *bus_id,
-		void (*handler)(struct net_device *), u32 flags,
+		void (*handler)(struct net_device *),
 		phy_interface_t interface)
 {
 	struct phy_device *phydev;
@@ -471,7 +469,7 @@ struct phy_device * phy_connect(struct net_device *dev, const char *bus_id,
 	}
 	phydev = to_phy_device(d);
 
-	rc = phy_connect_direct(dev, phydev, handler, flags, interface);
+	rc = phy_connect_direct(dev, phydev, handler, interface);
 	if (rc)
 		return ERR_PTR(rc);
 
@@ -576,14 +574,13 @@ static int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
  * phy_attach - attach a network device to a particular PHY device
  * @dev: network device to attach
  * @bus_id: Bus ID of PHY device to attach
- * @flags: PHY device's dev_flags
  * @interface: PHY device's interface
  *
  * Description: Same as phy_attach_direct() except that a PHY bus_id
  *     string is passed instead of a pointer to a struct phy_device.
  */
 struct phy_device *phy_attach(struct net_device *dev,
-		const char *bus_id, u32 flags, phy_interface_t interface)
+		const char *bus_id, phy_interface_t interface)
 {
 	struct bus_type *bus = &mdio_bus_type;
 	struct phy_device *phydev;
@@ -599,7 +596,7 @@ struct phy_device *phy_attach(struct net_device *dev,
 	}
 	phydev = to_phy_device(d);
 
-	rc = phy_attach_direct(dev, phydev, flags, interface);
+	rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
 	if (rc)
 		return ERR_PTR(rc);
 
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index c8e0aa85fb8e..fdbab72926bd 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -377,7 +377,7 @@ static int ax88172a_reset(struct usbnet *dev)
 
 	priv->phydev = phy_connect(dev->net, priv->phy_name,
 				   &ax88172a_adjust_link,
-				   0, PHY_INTERFACE_MODE_MII);
+				   PHY_INTERFACE_MODE_MII);
 	if (IS_ERR(priv->phydev)) {
 		netdev_err(dev->net, "Could not connect to PHY device %s\n",
 			   priv->phy_name);
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 83ca06f4312b..e3a8b22ef9dd 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -157,7 +157,7 @@ struct phy_device *of_phy_connect(struct net_device *dev,
 	if (!phy)
 		return NULL;
 
-	return phy_connect_direct(dev, phy, hndlr, flags, iface) ? NULL : phy;
+	return phy_connect_direct(dev, phy, hndlr, iface) ? NULL : phy;
 }
 EXPORT_SYMBOL(of_phy_connect);
 
@@ -194,7 +194,7 @@ struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
 
 	sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0]));
 
-	phy = phy_connect(dev, bus_id, hndlr, 0, iface);
+	phy = phy_connect(dev, bus_id, hndlr, iface);
 	return IS_ERR(phy) ? NULL : phy;
 }
 EXPORT_SYMBOL(of_phy_connect_fixed_link);
diff --git a/drivers/staging/et131x/et131x.c b/drivers/staging/et131x/et131x.c
index f15059ca3781..a0a30b3f2dcd 100644
--- a/drivers/staging/et131x/et131x.c
+++ b/drivers/staging/et131x/et131x.c
@@ -3917,7 +3917,7 @@ static int et131x_mii_probe(struct net_device *netdev)
 	}
 
 	phydev = phy_connect(netdev, dev_name(&phydev->dev),
-			&et131x_adjust_link, 0, PHY_INTERFACE_MODE_MII);
+			     &et131x_adjust_link, PHY_INTERFACE_MODE_MII);
 
 	if (IS_ERR(phydev)) {
 		dev_err(&adapter->pdev->dev, "Could not attach to PHY\n");
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 93b3cf77f564..33999adbf8c8 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -506,13 +506,13 @@ struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
 int phy_device_register(struct phy_device *phy);
 int phy_init_hw(struct phy_device *phydev);
 struct phy_device * phy_attach(struct net_device *dev,
-		const char *bus_id, u32 flags, phy_interface_t interface);
+		const char *bus_id, phy_interface_t interface);
 struct phy_device *phy_find_first(struct mii_bus *bus);
 int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
-		void (*handler)(struct net_device *), u32 flags,
+		void (*handler)(struct net_device *),
 		phy_interface_t interface);
 struct phy_device * phy_connect(struct net_device *dev, const char *bus_id,
-		void (*handler)(struct net_device *), u32 flags,
+		void (*handler)(struct net_device *),
 		phy_interface_t interface);
 void phy_disconnect(struct phy_device *phydev);
 void phy_detach(struct phy_device *phydev);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f795b0ca7ee6..f4345582a6b9 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -391,7 +391,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	if (p->phy != NULL) {
 		phy_attach(slave_dev, dev_name(&p->phy->dev),
-			   0, PHY_INTERFACE_MODE_GMII);
+			   PHY_INTERFACE_MODE_GMII);
 
 		p->phy->autoneg = AUTONEG_ENABLE;
 		p->phy->speed = 0;
-- 
cgit v1.2.3


From 5dbbaf2de89613d19a9286d4db0a535ca2735d26 Mon Sep 17 00:00:00 2001
From: Paul Moore <pmoore@redhat.com>
Date: Mon, 14 Jan 2013 07:12:19 +0000
Subject: tun: fix LSM/SELinux labeling of tun/tap devices

This patch corrects some problems with LSM/SELinux that were introduced
with the multiqueue patchset.  The problem stems from the fact that the
multiqueue work changed the relationship between the tun device and its
associated socket; before the socket persisted for the life of the
device, however after the multiqueue changes the socket only persisted
for the life of the userspace connection (fd open).  For non-persistent
devices this is not an issue, but for persistent devices this can cause
the tun device to lose its SELinux label.

We correct this problem by adding an opaque LSM security blob to the
tun device struct which allows us to have the LSM security state, e.g.
SELinux labeling information, persist for the lifetime of the tun
device.  In the process we tweak the LSM hooks to work with this new
approach to TUN device/socket labeling and introduce a new LSM hook,
security_tun_dev_attach_queue(), to approve requests to attach to a
TUN queue via TUNSETQUEUE.

The SELinux code has been adjusted to match the new LSM hooks, the
other LSMs do not make use of the LSM TUN controls.  This patch makes
use of the recently added "tun_socket:attach_queue" permission to
restrict access to the TUNSETQUEUE operation.  On older SELinux
policies which do not define the "tun_socket:attach_queue" permission
the access control decision for TUNSETQUEUE will be handled according
to the SELinux policy's unknown permission setting.

Signed-off-by: Paul Moore <pmoore@redhat.com>
Acked-by: Eric Paris <eparis@parisplace.org>
Tested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c                 | 23 +++++++++++----
 include/linux/security.h          | 59 ++++++++++++++++++++++++++++++---------
 security/capability.c             | 24 ++++++++++++++--
 security/security.c               | 28 +++++++++++++++----
 security/selinux/hooks.c          | 50 +++++++++++++++++++++++++--------
 security/selinux/include/objsec.h |  4 +++
 6 files changed, 151 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index af372d0957fe..c81680dc10eb 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -185,6 +185,7 @@ struct tun_struct {
 	unsigned long ageing_time;
 	unsigned int numdisabled;
 	struct list_head disabled;
+	void *security;
 };
 
 static inline u32 tun_hashfn(u32 rxhash)
@@ -490,6 +491,10 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 	struct tun_file *tfile = file->private_data;
 	int err;
 
+	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
+	if (err < 0)
+		goto out;
+
 	err = -EINVAL;
 	if (rtnl_dereference(tfile->tun))
 		goto out;
@@ -1373,6 +1378,7 @@ static void tun_free_netdev(struct net_device *dev)
 
 	BUG_ON(!(list_empty(&tun->disabled)));
 	tun_flow_uninit(tun);
+	security_tun_dev_free_security(tun->security);
 	free_netdev(dev);
 }
 
@@ -1562,7 +1568,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		if (tun_not_capable(tun))
 			return -EPERM;
-		err = security_tun_dev_attach(tfile->socket.sk);
+		err = security_tun_dev_open(tun->security);
 		if (err < 0)
 			return err;
 
@@ -1619,7 +1625,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		spin_lock_init(&tun->lock);
 
-		security_tun_dev_post_create(&tfile->sk);
+		err = security_tun_dev_alloc_security(&tun->security);
+		if (err < 0)
+			goto err_free_dev;
 
 		tun_net_init(dev);
 
@@ -1789,10 +1797,14 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 
 	if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
 		tun = tfile->detached;
-		if (!tun)
+		if (!tun) {
 			ret = -EINVAL;
-		else
-			ret = tun_attach(tun, file);
+			goto unlock;
+		}
+		ret = security_tun_dev_attach_queue(tun->security);
+		if (ret < 0)
+			goto unlock;
+		ret = tun_attach(tun, file);
 	} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
 		tun = rtnl_dereference(tfile->tun);
 		if (!tun || !(tun->flags & TUN_TAP_MQ))
@@ -1802,6 +1814,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 	} else
 		ret = -EINVAL;
 
+unlock:
 	rtnl_unlock();
 	return ret;
 }
diff --git a/include/linux/security.h b/include/linux/security.h
index 0f6afc657f77..eee7478cda70 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -989,17 +989,29 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	tells the LSM to decrement the number of secmark labeling rules loaded
  * @req_classify_flow:
  *	Sets the flow's sid to the openreq sid.
+ * @tun_dev_alloc_security:
+ *	This hook allows a module to allocate a security structure for a TUN
+ *	device.
+ *	@security pointer to a security structure pointer.
+ *	Returns a zero on success, negative values on failure.
+ * @tun_dev_free_security:
+ *	This hook allows a module to free the security structure for a TUN
+ *	device.
+ *	@security pointer to the TUN device's security structure
  * @tun_dev_create:
  *	Check permissions prior to creating a new TUN device.
- * @tun_dev_post_create:
- *	This hook allows a module to update or allocate a per-socket security
- *	structure.
- *	@sk contains the newly created sock structure.
+ * @tun_dev_attach_queue:
+ *	Check permissions prior to attaching to a TUN device queue.
+ *	@security pointer to the TUN device's security structure.
  * @tun_dev_attach:
- *	Check permissions prior to attaching to a persistent TUN device.  This
- *	hook can also be used by the module to update any security state
+ *	This hook can be used by the module to update any security state
  *	associated with the TUN device's sock structure.
  *	@sk contains the existing sock structure.
+ *	@security pointer to the TUN device's security structure.
+ * @tun_dev_open:
+ *	This hook can be used by the module to update any security state
+ *	associated with the TUN device's security structure.
+ *	@security pointer to the TUN devices's security structure.
  *
  * Security hooks for XFRM operations.
  *
@@ -1620,9 +1632,12 @@ struct security_operations {
 	void (*secmark_refcount_inc) (void);
 	void (*secmark_refcount_dec) (void);
 	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
-	int (*tun_dev_create)(void);
-	void (*tun_dev_post_create)(struct sock *sk);
-	int (*tun_dev_attach)(struct sock *sk);
+	int (*tun_dev_alloc_security) (void **security);
+	void (*tun_dev_free_security) (void *security);
+	int (*tun_dev_create) (void);
+	int (*tun_dev_attach_queue) (void *security);
+	int (*tun_dev_attach) (struct sock *sk, void *security);
+	int (*tun_dev_open) (void *security);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2566,9 +2581,12 @@ void security_inet_conn_established(struct sock *sk,
 int security_secmark_relabel_packet(u32 secid);
 void security_secmark_refcount_inc(void);
 void security_secmark_refcount_dec(void);
+int security_tun_dev_alloc_security(void **security);
+void security_tun_dev_free_security(void *security);
 int security_tun_dev_create(void);
-void security_tun_dev_post_create(struct sock *sk);
-int security_tun_dev_attach(struct sock *sk);
+int security_tun_dev_attach_queue(void *security);
+int security_tun_dev_attach(struct sock *sk, void *security);
+int security_tun_dev_open(void *security);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -2733,16 +2751,31 @@ static inline void security_secmark_refcount_dec(void)
 {
 }
 
+static inline int security_tun_dev_alloc_security(void **security)
+{
+	return 0;
+}
+
+static inline void security_tun_dev_free_security(void *security)
+{
+}
+
 static inline int security_tun_dev_create(void)
 {
 	return 0;
 }
 
-static inline void security_tun_dev_post_create(struct sock *sk)
+static inline int security_tun_dev_attach_queue(void *security)
+{
+	return 0;
+}
+
+static inline int security_tun_dev_attach(struct sock *sk, void *security)
 {
+	return 0;
 }
 
-static inline int security_tun_dev_attach(struct sock *sk)
+static inline int security_tun_dev_open(void *security)
 {
 	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index 0fe5a026aef8..579775088967 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -709,16 +709,31 @@ static void cap_req_classify_flow(const struct request_sock *req,
 {
 }
 
+static int cap_tun_dev_alloc_security(void **security)
+{
+	return 0;
+}
+
+static void cap_tun_dev_free_security(void *security)
+{
+}
+
 static int cap_tun_dev_create(void)
 {
 	return 0;
 }
 
-static void cap_tun_dev_post_create(struct sock *sk)
+static int cap_tun_dev_attach_queue(void *security)
+{
+	return 0;
+}
+
+static int cap_tun_dev_attach(struct sock *sk, void *security)
 {
+	return 0;
 }
 
-static int cap_tun_dev_attach(struct sock *sk)
+static int cap_tun_dev_open(void *security)
 {
 	return 0;
 }
@@ -1050,8 +1065,11 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, secmark_refcount_inc);
 	set_to_cap_if_null(ops, secmark_refcount_dec);
 	set_to_cap_if_null(ops, req_classify_flow);
+	set_to_cap_if_null(ops, tun_dev_alloc_security);
+	set_to_cap_if_null(ops, tun_dev_free_security);
 	set_to_cap_if_null(ops, tun_dev_create);
-	set_to_cap_if_null(ops, tun_dev_post_create);
+	set_to_cap_if_null(ops, tun_dev_open);
+	set_to_cap_if_null(ops, tun_dev_attach_queue);
 	set_to_cap_if_null(ops, tun_dev_attach);
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/security.c b/security/security.c
index daa97f4ac9d1..7b88c6aeaed4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1254,24 +1254,42 @@ void security_secmark_refcount_dec(void)
 }
 EXPORT_SYMBOL(security_secmark_refcount_dec);
 
+int security_tun_dev_alloc_security(void **security)
+{
+	return security_ops->tun_dev_alloc_security(security);
+}
+EXPORT_SYMBOL(security_tun_dev_alloc_security);
+
+void security_tun_dev_free_security(void *security)
+{
+	security_ops->tun_dev_free_security(security);
+}
+EXPORT_SYMBOL(security_tun_dev_free_security);
+
 int security_tun_dev_create(void)
 {
 	return security_ops->tun_dev_create();
 }
 EXPORT_SYMBOL(security_tun_dev_create);
 
-void security_tun_dev_post_create(struct sock *sk)
+int security_tun_dev_attach_queue(void *security)
 {
-	return security_ops->tun_dev_post_create(sk);
+	return security_ops->tun_dev_attach_queue(security);
 }
-EXPORT_SYMBOL(security_tun_dev_post_create);
+EXPORT_SYMBOL(security_tun_dev_attach_queue);
 
-int security_tun_dev_attach(struct sock *sk)
+int security_tun_dev_attach(struct sock *sk, void *security)
 {
-	return security_ops->tun_dev_attach(sk);
+	return security_ops->tun_dev_attach(sk, security);
 }
 EXPORT_SYMBOL(security_tun_dev_attach);
 
+int security_tun_dev_open(void *security)
+{
+	return security_ops->tun_dev_open(security);
+}
+EXPORT_SYMBOL(security_tun_dev_open);
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 61a53367d029..ef26e9611ffb 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4399,6 +4399,24 @@ static void selinux_req_classify_flow(const struct request_sock *req,
 	fl->flowi_secid = req->secid;
 }
 
+static int selinux_tun_dev_alloc_security(void **security)
+{
+	struct tun_security_struct *tunsec;
+
+	tunsec = kzalloc(sizeof(*tunsec), GFP_KERNEL);
+	if (!tunsec)
+		return -ENOMEM;
+	tunsec->sid = current_sid();
+
+	*security = tunsec;
+	return 0;
+}
+
+static void selinux_tun_dev_free_security(void *security)
+{
+	kfree(security);
+}
+
 static int selinux_tun_dev_create(void)
 {
 	u32 sid = current_sid();
@@ -4414,8 +4432,17 @@ static int selinux_tun_dev_create(void)
 			    NULL);
 }
 
-static void selinux_tun_dev_post_create(struct sock *sk)
+static int selinux_tun_dev_attach_queue(void *security)
 {
+	struct tun_security_struct *tunsec = security;
+
+	return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET,
+			    TUN_SOCKET__ATTACH_QUEUE, NULL);
+}
+
+static int selinux_tun_dev_attach(struct sock *sk, void *security)
+{
+	struct tun_security_struct *tunsec = security;
 	struct sk_security_struct *sksec = sk->sk_security;
 
 	/* we don't currently perform any NetLabel based labeling here and it
@@ -4425,20 +4452,19 @@ static void selinux_tun_dev_post_create(struct sock *sk)
 	 * cause confusion to the TUN user that had no idea network labeling
 	 * protocols were being used */
 
-	/* see the comments in selinux_tun_dev_create() about why we don't use
-	 * the sockcreate SID here */
-
-	sksec->sid = current_sid();
+	sksec->sid = tunsec->sid;
 	sksec->sclass = SECCLASS_TUN_SOCKET;
+
+	return 0;
 }
 
-static int selinux_tun_dev_attach(struct sock *sk)
+static int selinux_tun_dev_open(void *security)
 {
-	struct sk_security_struct *sksec = sk->sk_security;
+	struct tun_security_struct *tunsec = security;
 	u32 sid = current_sid();
 	int err;
 
-	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
+	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
 			   TUN_SOCKET__RELABELFROM, NULL);
 	if (err)
 		return err;
@@ -4446,8 +4472,7 @@ static int selinux_tun_dev_attach(struct sock *sk)
 			   TUN_SOCKET__RELABELTO, NULL);
 	if (err)
 		return err;
-
-	sksec->sid = sid;
+	tunsec->sid = sid;
 
 	return 0;
 }
@@ -5642,9 +5667,12 @@ static struct security_operations selinux_ops = {
 	.secmark_refcount_inc =		selinux_secmark_refcount_inc,
 	.secmark_refcount_dec =		selinux_secmark_refcount_dec,
 	.req_classify_flow =		selinux_req_classify_flow,
+	.tun_dev_alloc_security =	selinux_tun_dev_alloc_security,
+	.tun_dev_free_security =	selinux_tun_dev_free_security,
 	.tun_dev_create =		selinux_tun_dev_create,
-	.tun_dev_post_create = 		selinux_tun_dev_post_create,
+	.tun_dev_attach_queue =		selinux_tun_dev_attach_queue,
 	.tun_dev_attach =		selinux_tun_dev_attach,
+	.tun_dev_open =			selinux_tun_dev_open,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	.xfrm_policy_alloc_security =	selinux_xfrm_policy_alloc,
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 26c7eee1c309..aa47bcabb5f6 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -110,6 +110,10 @@ struct sk_security_struct {
 	u16 sclass;			/* sock security class */
 };
 
+struct tun_security_struct {
+	u32 sid;			/* SID for the tun device sockets */
+};
+
 struct key_security_struct {
 	u32 sid;	/* SID of key */
 };
-- 
cgit v1.2.3


From c1b724f6659a7e9e32f8fcf6409d053e1b7bccad Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@wwwdotorg.org>
Date: Thu, 3 Jan 2013 20:23:13 -0700
Subject: ARM: bcm2835: make use of CLKSRC_OF

Using CLKSRC_OF enables deletion of the SoC-specific header
bcm2835_timer.h, replacing the custom function bcm2835_timer_init() with
the standardized automatic clocksource_of_init().

Signed-off-by: Stephen Warren <swarren@wwwdotorg.org>
---
 arch/arm/Kconfig                    |  1 +
 arch/arm/mach-bcm2835/bcm2835.c     |  4 ++--
 drivers/clocksource/bcm2835_timer.c |  5 +++--
 include/linux/bcm2835_timer.h       | 22 ----------------------
 4 files changed, 6 insertions(+), 26 deletions(-)
 delete mode 100644 include/linux/bcm2835_timer.h

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 67874b82a4ed..670cf44c35c8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -344,6 +344,7 @@ config ARCH_BCM2835
 	select ARM_ERRATA_411920
 	select ARM_TIMER_SP804
 	select CLKDEV_LOOKUP
+	select CLKSRC_OF
 	select COMMON_CLK
 	select CPU_V6
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/arm/mach-bcm2835/bcm2835.c b/arch/arm/mach-bcm2835/bcm2835.c
index 176d2d24782d..d615a61e902c 100644
--- a/arch/arm/mach-bcm2835/bcm2835.c
+++ b/arch/arm/mach-bcm2835/bcm2835.c
@@ -17,8 +17,8 @@
 #include <linux/irqchip/bcm2835.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
-#include <linux/bcm2835_timer.h>
 #include <linux/clk/bcm2835.h>
+#include <linux/clocksource.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -104,7 +104,7 @@ DT_MACHINE_START(BCM2835, "BCM2835")
 	.init_irq = bcm2835_init_irq,
 	.handle_irq = bcm2835_handle_irq,
 	.init_machine = bcm2835_init,
-	.init_time = bcm2835_timer_init,
+	.init_time = clocksource_of_init,
 	.restart = bcm2835_restart,
 	.dt_compat = bcm2835_compat
 MACHINE_END
diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c
index 7f796d8f7505..50c68fef944b 100644
--- a/drivers/clocksource/bcm2835_timer.c
+++ b/drivers/clocksource/bcm2835_timer.c
@@ -16,7 +16,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/bcm2835_timer.h>
 #include <linux/bitops.h>
 #include <linux/clockchips.h>
 #include <linux/clocksource.h>
@@ -101,7 +100,7 @@ static struct of_device_id bcm2835_time_match[] __initconst = {
 	{}
 };
 
-void __init bcm2835_timer_init(void)
+static void __init bcm2835_timer_init(void)
 {
 	struct device_node *node;
 	void __iomem *base;
@@ -155,3 +154,5 @@ void __init bcm2835_timer_init(void)
 
 	pr_info("bcm2835: system timer (irq = %d)\n", irq);
 }
+CLOCKSOURCE_OF_DECLARE(bcm2835, "brcm,bcm2835-system-timer",
+			bcm2835_timer_init);
diff --git a/include/linux/bcm2835_timer.h b/include/linux/bcm2835_timer.h
deleted file mode 100644
index ca17aa817006..000000000000
--- a/include/linux/bcm2835_timer.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2012 Simon Arlott
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __BCM2835_TIMER_H
-#define __BCM2835_TIMER_H
-
-#include <asm/mach/time.h>
-
-extern void bcm2835_timer_init(void);
-
-#endif
-- 
cgit v1.2.3


From 154f757fd315270e42bd17f4a68d84bd070e5758 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 27 Nov 2012 09:40:32 +0900
Subject: extcon: max77693: Remove duplicate code by making function

This patch make max77693-muic_get_cable_type() function to remove
duplicate code because almost internal function need to read
adc/adc1k/adclow/chg_type value of MUIC register. Also, this patch
add description of internal function move field constant of muic device
from extcon-max77693 driver to max77693 header file because of it
is needed for masking some interrupt through platform data.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max77693.c     | 440 +++++++++++++++++++----------------
 include/linux/mfd/max77693-private.h |  86 +++++++
 2 files changed, 327 insertions(+), 199 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index 8c17b65eb74d..e84d5dc06798 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -30,92 +30,6 @@
 
 #define	DEV_NAME			"max77693-muic"
 
-/* MAX77693 MUIC - STATUS1~3 Register */
-#define STATUS1_ADC_SHIFT		(0)
-#define STATUS1_ADCLOW_SHIFT		(5)
-#define STATUS1_ADCERR_SHIFT		(6)
-#define STATUS1_ADC1K_SHIFT		(7)
-#define STATUS1_ADC_MASK		(0x1f << STATUS1_ADC_SHIFT)
-#define STATUS1_ADCLOW_MASK		(0x1 << STATUS1_ADCLOW_SHIFT)
-#define STATUS1_ADCERR_MASK		(0x1 << STATUS1_ADCERR_SHIFT)
-#define STATUS1_ADC1K_MASK		(0x1 << STATUS1_ADC1K_SHIFT)
-
-#define STATUS2_CHGTYP_SHIFT		(0)
-#define STATUS2_CHGDETRUN_SHIFT		(3)
-#define STATUS2_DCDTMR_SHIFT		(4)
-#define STATUS2_DXOVP_SHIFT		(5)
-#define STATUS2_VBVOLT_SHIFT		(6)
-#define STATUS2_VIDRM_SHIFT		(7)
-#define STATUS2_CHGTYP_MASK		(0x7 << STATUS2_CHGTYP_SHIFT)
-#define STATUS2_CHGDETRUN_MASK		(0x1 << STATUS2_CHGDETRUN_SHIFT)
-#define STATUS2_DCDTMR_MASK		(0x1 << STATUS2_DCDTMR_SHIFT)
-#define STATUS2_DXOVP_MASK		(0x1 << STATUS2_DXOVP_SHIFT)
-#define STATUS2_VBVOLT_MASK		(0x1 << STATUS2_VBVOLT_SHIFT)
-#define STATUS2_VIDRM_MASK		(0x1 << STATUS2_VIDRM_SHIFT)
-
-#define STATUS3_OVP_SHIFT		(2)
-#define STATUS3_OVP_MASK		(0x1 << STATUS3_OVP_SHIFT)
-
-/* MAX77693 CDETCTRL1~2 register */
-#define CDETCTRL1_CHGDETEN_SHIFT	(0)
-#define CDETCTRL1_CHGTYPMAN_SHIFT	(1)
-#define CDETCTRL1_DCDEN_SHIFT		(2)
-#define CDETCTRL1_DCD2SCT_SHIFT		(3)
-#define CDETCTRL1_CDDELAY_SHIFT		(4)
-#define CDETCTRL1_DCDCPL_SHIFT		(5)
-#define CDETCTRL1_CDPDET_SHIFT		(7)
-#define CDETCTRL1_CHGDETEN_MASK		(0x1 << CDETCTRL1_CHGDETEN_SHIFT)
-#define CDETCTRL1_CHGTYPMAN_MASK	(0x1 << CDETCTRL1_CHGTYPMAN_SHIFT)
-#define CDETCTRL1_DCDEN_MASK		(0x1 << CDETCTRL1_DCDEN_SHIFT)
-#define CDETCTRL1_DCD2SCT_MASK		(0x1 << CDETCTRL1_DCD2SCT_SHIFT)
-#define CDETCTRL1_CDDELAY_MASK		(0x1 << CDETCTRL1_CDDELAY_SHIFT)
-#define CDETCTRL1_DCDCPL_MASK		(0x1 << CDETCTRL1_DCDCPL_SHIFT)
-#define CDETCTRL1_CDPDET_MASK		(0x1 << CDETCTRL1_CDPDET_SHIFT)
-
-#define CDETCTRL2_VIDRMEN_SHIFT		(1)
-#define CDETCTRL2_DXOVPEN_SHIFT		(3)
-#define CDETCTRL2_VIDRMEN_MASK		(0x1 << CDETCTRL2_VIDRMEN_SHIFT)
-#define CDETCTRL2_DXOVPEN_MASK		(0x1 << CDETCTRL2_DXOVPEN_SHIFT)
-
-/* MAX77693 MUIC - CONTROL1~3 register */
-#define COMN1SW_SHIFT			(0)
-#define COMP2SW_SHIFT			(3)
-#define COMN1SW_MASK			(0x7 << COMN1SW_SHIFT)
-#define COMP2SW_MASK			(0x7 << COMP2SW_SHIFT)
-#define COMP_SW_MASK			(COMP2SW_MASK | COMN1SW_MASK)
-#define CONTROL1_SW_USB			((1 << COMP2SW_SHIFT) \
-						| (1 << COMN1SW_SHIFT))
-#define CONTROL1_SW_AUDIO		((2 << COMP2SW_SHIFT) \
-						| (2 << COMN1SW_SHIFT))
-#define CONTROL1_SW_UART		((3 << COMP2SW_SHIFT) \
-						| (3 << COMN1SW_SHIFT))
-#define CONTROL1_SW_OPEN		((0 << COMP2SW_SHIFT) \
-						| (0 << COMN1SW_SHIFT))
-
-#define CONTROL2_LOWPWR_SHIFT		(0)
-#define CONTROL2_ADCEN_SHIFT		(1)
-#define CONTROL2_CPEN_SHIFT		(2)
-#define CONTROL2_SFOUTASRT_SHIFT	(3)
-#define CONTROL2_SFOUTORD_SHIFT		(4)
-#define CONTROL2_ACCDET_SHIFT		(5)
-#define CONTROL2_USBCPINT_SHIFT		(6)
-#define CONTROL2_RCPS_SHIFT		(7)
-#define CONTROL2_LOWPWR_MASK		(0x1 << CONTROL2_LOWPWR_SHIFT)
-#define CONTROL2_ADCEN_MASK		(0x1 << CONTROL2_ADCEN_SHIFT)
-#define CONTROL2_CPEN_MASK		(0x1 << CONTROL2_CPEN_SHIFT)
-#define CONTROL2_SFOUTASRT_MASK		(0x1 << CONTROL2_SFOUTASRT_SHIFT)
-#define CONTROL2_SFOUTORD_MASK		(0x1 << CONTROL2_SFOUTORD_SHIFT)
-#define CONTROL2_ACCDET_MASK		(0x1 << CONTROL2_ACCDET_SHIFT)
-#define CONTROL2_USBCPINT_MASK		(0x1 << CONTROL2_USBCPINT_SHIFT)
-#define CONTROL2_RCPS_MASK		(0x1 << CONTROL2_RCPS_SHIFT)
-
-#define CONTROL3_JIGSET_SHIFT		(0)
-#define CONTROL3_BTLDSET_SHIFT		(2)
-#define CONTROL3_ADCDBSET_SHIFT		(4)
-#define CONTROL3_JIGSET_MASK		(0x3 << CONTROL3_JIGSET_SHIFT)
-#define CONTROL3_BTLDSET_MASK		(0x3 << CONTROL3_BTLDSET_SHIFT)
-#define CONTROL3_ADCDBSET_MASK		(0x3 << CONTROL3_ADCDBSET_SHIFT)
-
 enum max77693_muic_adc_debounce_time {
 	ADC_DEBOUNCE_TIME_5MS = 0,
 	ADC_DEBOUNCE_TIME_10MS,
@@ -127,8 +41,8 @@ struct max77693_muic_info {
 	struct device *dev;
 	struct max77693_dev *max77693;
 	struct extcon_dev *edev;
-	int prev_adc;
-	int prev_adc_gnd;
+	int prev_cable_type;
+	int prev_cable_type_gnd;
 	int prev_chg_type;
 	u8 status[2];
 
@@ -137,6 +51,13 @@ struct max77693_muic_info {
 	struct mutex mutex;
 };
 
+enum max77693_muic_cable_group {
+	MAX77693_CABLE_GROUP_ADC = 0,
+	MAX77693_CABLE_GROUP_ADC_GND,
+	MAX77693_CABLE_GROUP_CHG,
+	MAX77693_CABLE_GROUP_VBVOLT,
+};
+
 enum max77693_muic_charger_type {
 	MAX77693_CHARGER_TYPE_NONE = 0,
 	MAX77693_CHARGER_TYPE_USB,
@@ -221,21 +142,40 @@ enum max77693_muic_acc_type {
 };
 
 /* MAX77693 MUIC device support below list of accessories(external connector) */
-const char *max77693_extcon_cable[] = {
-	[0] = "USB",
-	[1] = "USB-Host",
-	[2] = "TA",
-	[3] = "Fast-charger",
-	[4] = "Slow-charger",
-	[5] = "Charge-downstream",
-	[6] = "MHL",
-	[7] = "Audio-video-load",
-	[8] = "Audio-video-noload",
-	[9] = "JIG",
+enum {
+	EXTCON_CABLE_USB = 0,
+	EXTCON_CABLE_USB_HOST,
+	EXTCON_CABLE_TA,
+	EXTCON_CABLE_FAST_CHARGER,
+	EXTCON_CABLE_SLOW_CHARGER,
+	EXTCON_CABLE_CHARGE_DOWNSTREAM,
+	EXTCON_CABLE_MHL,
+	EXTCON_CABLE_AUDIO_VIDEO_LOAD,
+	EXTCON_CABLE_AUDIO_VIDEO_NOLOAD,
+	EXTCON_CABLE_JIG,
+
+	_EXTCON_CABLE_NUM,
+};
 
+const char *max77693_extcon_cable[] = {
+	[EXTCON_CABLE_USB]			= "USB",
+	[EXTCON_CABLE_USB_HOST]			= "USB-Host",
+	[EXTCON_CABLE_TA]			= "TA",
+	[EXTCON_CABLE_FAST_CHARGER]		= "Fast-charger",
+	[EXTCON_CABLE_SLOW_CHARGER]		= "Slow-charger",
+	[EXTCON_CABLE_CHARGE_DOWNSTREAM]	= "Charge-downstream",
+	[EXTCON_CABLE_MHL]			= "MHL",
+	[EXTCON_CABLE_AUDIO_VIDEO_LOAD]		= "Audio-video-load",
+	[EXTCON_CABLE_AUDIO_VIDEO_NOLOAD]	= "Audio-video-noload",
+	[EXTCON_CABLE_JIG]			= "JIG",
 	NULL,
 };
 
+/*
+ * max77693_muic_set_debounce_time - Set the debounce time of ADC
+ * @info: the instance including private data of max77693 MUIC
+ * @time: the debounce time of ADC
+ */
 static int max77693_muic_set_debounce_time(struct max77693_muic_info *info,
 		enum max77693_muic_adc_debounce_time time)
 {
@@ -262,6 +202,16 @@ static int max77693_muic_set_debounce_time(struct max77693_muic_info *info,
 	return ret;
 };
 
+/*
+ * max77693_muic_set_path - Set hardware line according to attached cable
+ * @info: the instance including private data of max77693 MUIC
+ * @value: the path according to attached cable
+ * @attached: the state of cable (true:attached, false:detached)
+ *
+ * The max77693 MUIC device share outside H/W line among a varity of cables
+ * so, this function set internal path of H/W line according to the type of
+ * attached cable.
+ */
 static int max77693_muic_set_path(struct max77693_muic_info *info,
 		u8 val, bool attached)
 {
@@ -300,36 +250,169 @@ out:
 	return ret;
 }
 
-static int max77693_muic_adc_ground_handler(struct max77693_muic_info *info,
-		bool attached)
+/*
+ * max77693_muic_get_cable_type - Return cable type and check cable state
+ * @info: the instance including private data of max77693 MUIC
+ * @group: the path according to attached cable
+ * @attached: store cable state and return
+ *
+ * This function check the cable state either attached or detached,
+ * and then divide precise type of cable according to cable group.
+ *	- MAX77693_CABLE_GROUP_ADC
+ *	- MAX77693_CABLE_GROUP_ADC_GND
+ *	- MAX77693_CABLE_GROUP_CHG
+ *	- MAX77693_CABLE_GROUP_VBVOLT
+ */
+static int max77693_muic_get_cable_type(struct max77693_muic_info *info,
+		enum max77693_muic_cable_group group, bool *attached)
 {
-	int ret = 0;
-	int type;
-	int adc, adc1k, adclow;
+	int cable_type = 0;
+	int adc;
+	int adc1k;
+	int adclow;
+	int vbvolt;
+	int chg_type;
+
+	switch (group) {
+	case MAX77693_CABLE_GROUP_ADC:
+		/*
+		 * Read ADC value to check cable type and decide cable state
+		 * according to cable type
+		 */
+		adc = info->status[0] & STATUS1_ADC_MASK;
+		adc >>= STATUS1_ADC_SHIFT;
+
+		/*
+		 * Check current cable state/cable type and store cable type
+		 * (info->prev_cable_type) for handling cable when cable is
+		 * detached.
+		 */
+		if (adc == MAX77693_MUIC_ADC_OPEN) {
+			*attached = false;
+
+			cable_type = info->prev_cable_type;
+			info->prev_cable_type = MAX77693_MUIC_ADC_OPEN;
+		} else {
+			*attached = true;
+
+			cable_type = info->prev_cable_type = adc;
+		}
+		break;
+	case MAX77693_CABLE_GROUP_ADC_GND:
+		/*
+		 * Read ADC value to check cable type and decide cable state
+		 * according to cable type
+		 */
+		adc = info->status[0] & STATUS1_ADC_MASK;
+		adc >>= STATUS1_ADC_SHIFT;
 
-	if (attached) {
+		/*
+		 * Check current cable state/cable type and store cable type
+		 * (info->prev_cable_type/_gnd) for handling cable when cable
+		 * is detached.
+		 */
+		if (adc == MAX77693_MUIC_ADC_OPEN) {
+			*attached = false;
+
+			cable_type = info->prev_cable_type_gnd;
+			info->prev_cable_type_gnd = MAX77693_MUIC_ADC_OPEN;
+		} else {
+			*attached = true;
+
+			adclow = info->status[0] & STATUS1_ADCLOW_MASK;
+			adclow >>= STATUS1_ADCLOW_SHIFT;
+			adc1k = info->status[0] & STATUS1_ADC1K_MASK;
+			adc1k >>= STATUS1_ADC1K_SHIFT;
+
+			vbvolt = info->status[1] & STATUS2_VBVOLT_MASK;
+			vbvolt >>= STATUS2_VBVOLT_SHIFT;
+
+			/**
+			 * [0x1][VBVolt][ADCLow][ADC1K]
+			 * [0x1    0	   0       0  ]	: USB_OTG
+			 * [0x1    0       1       0  ] : Audio Video Cable with load
+			 * [0x1    0       1       1  ] : MHL without charging connector
+			 * [0x1    1       1       1  ] : MHL with charging connector
+			 */
+			cable_type = ((0x1 << 8)
+					| (vbvolt << 2)
+					| (adclow << 1)
+					| adc1k);
+
+			info->prev_cable_type = adc;
+			info->prev_cable_type_gnd = cable_type;
+		}
+
+		break;
+	case MAX77693_CABLE_GROUP_CHG:
+		/*
+		 * Read charger type to check cable type and decide cable state
+		 * according to type of charger cable.
+		 */
+		chg_type = info->status[1] & STATUS2_CHGTYP_MASK;
+		chg_type >>= STATUS2_CHGTYP_SHIFT;
+
+		if (chg_type == MAX77693_CHARGER_TYPE_NONE) {
+			*attached = false;
+
+			cable_type = info->prev_chg_type;
+			info->prev_chg_type = MAX77693_CHARGER_TYPE_NONE;
+		} else {
+			*attached = true;
+
+			/*
+			 * Check current cable state/cable type and store cable
+			 * type(info->prev_chg_type) for handling cable when
+			 * charger cable is detached.
+			 */
+			cable_type = info->prev_chg_type = chg_type;
+		}
+
+		break;
+	case MAX77693_CABLE_GROUP_VBVOLT:
+		/*
+		 * Read ADC value to check cable type and decide cable state
+		 * according to cable type
+		 */
 		adc = info->status[0] & STATUS1_ADC_MASK;
-		adclow = info->status[0] & STATUS1_ADCLOW_MASK;
-		adclow >>= STATUS1_ADCLOW_SHIFT;
-		adc1k = info->status[0] & STATUS1_ADC1K_MASK;
-		adc1k >>= STATUS1_ADC1K_SHIFT;
-
-		/**
-		 * [0x1][ADCLow][ADC1K]
-		 * [0x1    0       0  ]	: USB_OTG
-		 * [0x1    1       0  ] : Audio Video Cable with load
-		 * [0x1    1       1  ] : MHL
+		adc >>= STATUS1_ADC_SHIFT;
+		chg_type = info->status[1] & STATUS2_CHGTYP_MASK;
+		chg_type >>= STATUS2_CHGTYP_SHIFT;
+
+		if (adc == MAX77693_MUIC_ADC_OPEN
+				&& chg_type == MAX77693_CHARGER_TYPE_NONE)
+			*attached = false;
+		else
+			*attached = true;
+
+		/*
+		 * Read vbvolt field, if vbvolt is 1,
+		 * this cable is used for charging.
 		 */
-		type = ((0x1 << 8) | (adclow << 1) | adc1k);
+		vbvolt = info->status[1] & STATUS2_VBVOLT_MASK;
+		vbvolt >>= STATUS2_VBVOLT_SHIFT;
+
+		cable_type = vbvolt;
+		break;
+	default:
+		dev_err(info->dev, "Unknown cable group (%d)\n", group);
+		cable_type = -EINVAL;
+		break;
+	}
+
+	return cable_type;
+}
+
+static int max77693_muic_adc_ground_handler(struct max77693_muic_info *info)
+{
+	int cable_type_gnd;
+	int ret = 0;
+	bool attached;
 
-		/* Store previous ADC value to handle accessory
-		   when accessory will be detached */
-		info->prev_adc = adc;
-		info->prev_adc_gnd = type;
-	} else
-		type = info->prev_adc_gnd;
+	cable_type_gnd = max77693_muic_get_cable_type(info,
+				MAX77693_CABLE_GROUP_ADC_GND, &attached);
 
-	switch (type) {
+	switch (cable_type_gnd) {
 	case MAX77693_MUIC_GND_USB_OTG:
 		/* USB_OTG */
 		ret = max77693_muic_set_path(info, CONTROL1_SW_USB, attached);
@@ -352,8 +435,6 @@ static int max77693_muic_adc_ground_handler(struct max77693_muic_info *info,
 	default:
 		dev_err(info->dev, "failed to detect %s accessory\n",
 			attached ? "attached" : "detached");
-		dev_err(info->dev, "- adc:0x%x, adclow:0x%x, adc1k:0x%x\n",
-			adc, adclow, adc1k);
 		ret = -EINVAL;
 		break;
 	}
@@ -362,45 +443,36 @@ out:
 	return ret;
 }
 
-static int max77693_muic_adc_handler(struct max77693_muic_info *info,
-		int curr_adc, bool attached)
+static int max77693_muic_adc_handler(struct max77693_muic_info *info)
 {
+	int cable_type;
+	bool attached;
 	int ret = 0;
-	int adc;
 
-	if (attached) {
-		/* Store ADC value to handle accessory
-		   when accessory will be detached */
-		info->prev_adc = curr_adc;
-		adc = curr_adc;
-	} else
-		adc = info->prev_adc;
+	/* Check accessory state which is either detached or attached */
+	cable_type = max77693_muic_get_cable_type(info,
+				MAX77693_CABLE_GROUP_ADC, &attached);
 
 	dev_info(info->dev,
 		"external connector is %s (adc:0x%02x, prev_adc:0x%x)\n",
-		attached ? "attached" : "detached", curr_adc, info->prev_adc);
+		attached ? "attached" : "detached", cable_type,
+		info->prev_cable_type);
 
-	switch (adc) {
+	switch (cable_type) {
 	case MAX77693_MUIC_ADC_GROUND:
 		/* USB_OTG/MHL/Audio */
-		max77693_muic_adc_ground_handler(info, attached);
+		max77693_muic_adc_ground_handler(info);
 		break;
 	case MAX77693_MUIC_ADC_FACTORY_MODE_USB_OFF:
 	case MAX77693_MUIC_ADC_FACTORY_MODE_USB_ON:
-		/* USB */
-		ret = max77693_muic_set_path(info, CONTROL1_SW_USB, attached);
-		if (ret < 0)
-			goto out;
-		extcon_set_cable_state(info->edev, "USB", attached);
-		break;
 	case MAX77693_MUIC_ADC_FACTORY_MODE_UART_OFF:
-	case MAX77693_MUIC_ADC_FACTORY_MODE_UART_ON:
 		/* JIG */
 		ret = max77693_muic_set_path(info, CONTROL1_SW_UART, attached);
 		if (ret < 0)
 			goto out;
 		extcon_set_cable_state(info->edev, "JIG", attached);
 		break;
+	case MAX77693_MUIC_ADC_FACTORY_MODE_UART_ON:
 	case MAX77693_MUIC_ADC_AUDIO_MODE_REMOTE:
 		/* Audio Video cable with no-load */
 		ret = max77693_muic_set_path(info, CONTROL1_SW_AUDIO, attached);
@@ -439,12 +511,12 @@ static int max77693_muic_adc_handler(struct max77693_muic_info *info,
 		   proper operation when this accessory is attached/detached. */
 		dev_info(info->dev,
 			"accessory is %s but it isn't used (adc:0x%x)\n",
-			attached ? "attached" : "detached", adc);
+			attached ? "attached" : "detached", cable_type);
 		goto out;
 	default:
 		dev_err(info->dev,
 			"failed to detect %s accessory (adc:0x%x)\n",
-			attached ? "attached" : "detached", adc);
+			attached ? "attached" : "detached", cable_type);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -453,24 +525,19 @@ out:
 	return ret;
 }
 
-static int max77693_muic_chg_handler(struct max77693_muic_info *info,
-		int curr_chg_type, bool attached)
+static int max77693_muic_chg_handler(struct max77693_muic_info *info)
 {
-	int ret = 0;
 	int chg_type;
+	bool attached;
+	int ret = 0;
 
-	if (attached) {
-		/* Store previous charger type to control
-		   when charger accessory will be detached */
-		info->prev_chg_type = curr_chg_type;
-		chg_type = curr_chg_type;
-	} else
-		chg_type = info->prev_chg_type;
+	chg_type = max77693_muic_get_cable_type(info,
+				MAX77693_CABLE_GROUP_CHG, &attached);
 
 	dev_info(info->dev,
 		"external connector is %s(chg_type:0x%x, prev_chg_type:0x%x)\n",
 			attached ? "attached" : "detached",
-			curr_chg_type, info->prev_chg_type);
+			chg_type, info->prev_chg_type);
 
 	switch (chg_type) {
 	case MAX77693_CHARGER_TYPE_USB:
@@ -510,10 +577,8 @@ static void max77693_muic_irq_work(struct work_struct *work)
 {
 	struct max77693_muic_info *info = container_of(work,
 			struct max77693_muic_info, irq_work);
-	int curr_adc, curr_chg_type;
 	int irq_type = -1;
 	int i, ret = 0;
-	bool attached = true;
 
 	if (!info->edev)
 		return;
@@ -539,14 +604,7 @@ static void max77693_muic_irq_work(struct work_struct *work)
 	case MAX77693_MUIC_IRQ_INT1_ADC1K:
 		/* Handle all of accessory except for
 		   type of charger accessory */
-		curr_adc = info->status[0] & STATUS1_ADC_MASK;
-		curr_adc >>= STATUS1_ADC_SHIFT;
-
-		/* Check accessory state which is either detached or attached */
-		if (curr_adc == MAX77693_MUIC_ADC_OPEN)
-			attached = false;
-
-		ret = max77693_muic_adc_handler(info, curr_adc, attached);
+		ret = max77693_muic_adc_handler(info);
 		break;
 	case MAX77693_MUIC_IRQ_INT2_CHGTYP:
 	case MAX77693_MUIC_IRQ_INT2_CHGDETREUN:
@@ -555,15 +613,7 @@ static void max77693_muic_irq_work(struct work_struct *work)
 	case MAX77693_MUIC_IRQ_INT2_VBVOLT:
 	case MAX77693_MUIC_IRQ_INT2_VIDRM:
 		/* Handle charger accessory */
-		curr_chg_type = info->status[1] & STATUS2_CHGTYP_MASK;
-		curr_chg_type >>= STATUS2_CHGTYP_SHIFT;
-
-		/* Check charger accessory state which
-		   is either detached or attached */
-		if (curr_chg_type == MAX77693_CHARGER_TYPE_NONE)
-			attached = false;
-
-		ret = max77693_muic_chg_handler(info, curr_chg_type, attached);
+		ret = max77693_muic_chg_handler(info);
 		break;
 	case MAX77693_MUIC_IRQ_INT3_EOC:
 	case MAX77693_MUIC_IRQ_INT3_CGMBC:
@@ -604,7 +654,9 @@ static struct regmap_config max77693_muic_regmap_config = {
 static int max77693_muic_detect_accessory(struct max77693_muic_info *info)
 {
 	int ret = 0;
-	int adc, chg_type;
+	int adc;
+	int chg_type;
+	bool attached;
 
 	mutex_lock(&info->mutex);
 
@@ -617,34 +669,24 @@ static int max77693_muic_detect_accessory(struct max77693_muic_info *info)
 		return -EINVAL;
 	}
 
-	adc = info->status[0] & STATUS1_ADC_MASK;
-	adc >>= STATUS1_ADC_SHIFT;
-
-	if (adc != MAX77693_MUIC_ADC_OPEN) {
-		dev_info(info->dev,
-			"external connector is attached (adc:0x%02x)\n", adc);
-
-		ret = max77693_muic_adc_handler(info, adc, true);
+	adc = max77693_muic_get_cable_type(info, MAX77693_CABLE_GROUP_ADC,
+					&attached);
+	if (attached && adc != MAX77693_MUIC_ADC_OPEN) {
+		ret = max77693_muic_adc_handler(info);
 		if (ret < 0)
-			dev_err(info->dev, "failed to detect accessory\n");
-		goto out;
+			dev_err(info->dev, "Cannot detect accessory\n");
 	}
 
-	chg_type = info->status[1] & STATUS2_CHGTYP_MASK;
-	chg_type >>= STATUS2_CHGTYP_SHIFT;
-
-	if (chg_type != MAX77693_CHARGER_TYPE_NONE) {
-		dev_info(info->dev,
-			"external connector is attached (chg_type:0x%x)\n",
-			chg_type);
-
-		max77693_muic_chg_handler(info, chg_type, true);
+	chg_type = max77693_muic_get_cable_type(info, MAX77693_CABLE_GROUP_CHG,
+					&attached);
+	if (attached && chg_type != MAX77693_CHARGER_TYPE_NONE) {
+		ret = max77693_muic_chg_handler(info);
 		if (ret < 0)
-			dev_err(info->dev, "failed to detect charger accessory\n");
+			dev_err(info->dev, "Cannot detect charger accessory\n");
 	}
 
-out:
 	mutex_unlock(&info->mutex);
+
 	return ret;
 }
 
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 1eeae5c07915..5b18ecde69b5 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -106,6 +106,92 @@ enum max77693_muic_reg {
 	MAX77693_MUIC_REG_END,
 };
 
+/* MAX77693 MUIC - STATUS1~3 Register */
+#define STATUS1_ADC_SHIFT		(0)
+#define STATUS1_ADCLOW_SHIFT		(5)
+#define STATUS1_ADCERR_SHIFT		(6)
+#define STATUS1_ADC1K_SHIFT		(7)
+#define STATUS1_ADC_MASK		(0x1f << STATUS1_ADC_SHIFT)
+#define STATUS1_ADCLOW_MASK		(0x1 << STATUS1_ADCLOW_SHIFT)
+#define STATUS1_ADCERR_MASK		(0x1 << STATUS1_ADCERR_SHIFT)
+#define STATUS1_ADC1K_MASK		(0x1 << STATUS1_ADC1K_SHIFT)
+
+#define STATUS2_CHGTYP_SHIFT		(0)
+#define STATUS2_CHGDETRUN_SHIFT		(3)
+#define STATUS2_DCDTMR_SHIFT		(4)
+#define STATUS2_DXOVP_SHIFT		(5)
+#define STATUS2_VBVOLT_SHIFT		(6)
+#define STATUS2_VIDRM_SHIFT		(7)
+#define STATUS2_CHGTYP_MASK		(0x7 << STATUS2_CHGTYP_SHIFT)
+#define STATUS2_CHGDETRUN_MASK		(0x1 << STATUS2_CHGDETRUN_SHIFT)
+#define STATUS2_DCDTMR_MASK		(0x1 << STATUS2_DCDTMR_SHIFT)
+#define STATUS2_DXOVP_MASK		(0x1 << STATUS2_DXOVP_SHIFT)
+#define STATUS2_VBVOLT_MASK		(0x1 << STATUS2_VBVOLT_SHIFT)
+#define STATUS2_VIDRM_MASK		(0x1 << STATUS2_VIDRM_SHIFT)
+
+#define STATUS3_OVP_SHIFT		(2)
+#define STATUS3_OVP_MASK		(0x1 << STATUS3_OVP_SHIFT)
+
+/* MAX77693 CDETCTRL1~2 register */
+#define CDETCTRL1_CHGDETEN_SHIFT	(0)
+#define CDETCTRL1_CHGTYPMAN_SHIFT	(1)
+#define CDETCTRL1_DCDEN_SHIFT		(2)
+#define CDETCTRL1_DCD2SCT_SHIFT		(3)
+#define CDETCTRL1_CDDELAY_SHIFT		(4)
+#define CDETCTRL1_DCDCPL_SHIFT		(5)
+#define CDETCTRL1_CDPDET_SHIFT		(7)
+#define CDETCTRL1_CHGDETEN_MASK		(0x1 << CDETCTRL1_CHGDETEN_SHIFT)
+#define CDETCTRL1_CHGTYPMAN_MASK	(0x1 << CDETCTRL1_CHGTYPMAN_SHIFT)
+#define CDETCTRL1_DCDEN_MASK		(0x1 << CDETCTRL1_DCDEN_SHIFT)
+#define CDETCTRL1_DCD2SCT_MASK		(0x1 << CDETCTRL1_DCD2SCT_SHIFT)
+#define CDETCTRL1_CDDELAY_MASK		(0x1 << CDETCTRL1_CDDELAY_SHIFT)
+#define CDETCTRL1_DCDCPL_MASK		(0x1 << CDETCTRL1_DCDCPL_SHIFT)
+#define CDETCTRL1_CDPDET_MASK		(0x1 << CDETCTRL1_CDPDET_SHIFT)
+
+#define CDETCTRL2_VIDRMEN_SHIFT		(1)
+#define CDETCTRL2_DXOVPEN_SHIFT		(3)
+#define CDETCTRL2_VIDRMEN_MASK		(0x1 << CDETCTRL2_VIDRMEN_SHIFT)
+#define CDETCTRL2_DXOVPEN_MASK		(0x1 << CDETCTRL2_DXOVPEN_SHIFT)
+
+/* MAX77693 MUIC - CONTROL1~3 register */
+#define COMN1SW_SHIFT			(0)
+#define COMP2SW_SHIFT			(3)
+#define COMN1SW_MASK			(0x7 << COMN1SW_SHIFT)
+#define COMP2SW_MASK			(0x7 << COMP2SW_SHIFT)
+#define COMP_SW_MASK			(COMP2SW_MASK | COMN1SW_MASK)
+#define CONTROL1_SW_USB			((1 << COMP2SW_SHIFT) \
+						| (1 << COMN1SW_SHIFT))
+#define CONTROL1_SW_AUDIO		((2 << COMP2SW_SHIFT) \
+						| (2 << COMN1SW_SHIFT))
+#define CONTROL1_SW_UART		((3 << COMP2SW_SHIFT) \
+						| (3 << COMN1SW_SHIFT))
+#define CONTROL1_SW_OPEN		((0 << COMP2SW_SHIFT) \
+						| (0 << COMN1SW_SHIFT))
+
+#define CONTROL2_LOWPWR_SHIFT		(0)
+#define CONTROL2_ADCEN_SHIFT		(1)
+#define CONTROL2_CPEN_SHIFT		(2)
+#define CONTROL2_SFOUTASRT_SHIFT	(3)
+#define CONTROL2_SFOUTORD_SHIFT		(4)
+#define CONTROL2_ACCDET_SHIFT		(5)
+#define CONTROL2_USBCPINT_SHIFT		(6)
+#define CONTROL2_RCPS_SHIFT		(7)
+#define CONTROL2_LOWPWR_MASK		(0x1 << CONTROL2_LOWPWR_SHIFT)
+#define CONTROL2_ADCEN_MASK		(0x1 << CONTROL2_ADCEN_SHIFT)
+#define CONTROL2_CPEN_MASK		(0x1 << CONTROL2_CPEN_SHIFT)
+#define CONTROL2_SFOUTASRT_MASK		(0x1 << CONTROL2_SFOUTASRT_SHIFT)
+#define CONTROL2_SFOUTORD_MASK		(0x1 << CONTROL2_SFOUTORD_SHIFT)
+#define CONTROL2_ACCDET_MASK		(0x1 << CONTROL2_ACCDET_SHIFT)
+#define CONTROL2_USBCPINT_MASK		(0x1 << CONTROL2_USBCPINT_SHIFT)
+#define CONTROL2_RCPS_MASK		(0x1 << CONTROL2_RCPS_SHIFT)
+
+#define CONTROL3_JIGSET_SHIFT		(0)
+#define CONTROL3_BTLDSET_SHIFT		(2)
+#define CONTROL3_ADCDBSET_SHIFT		(4)
+#define CONTROL3_JIGSET_MASK		(0x3 << CONTROL3_JIGSET_SHIFT)
+#define CONTROL3_BTLDSET_MASK		(0x3 << CONTROL3_BTLDSET_SHIFT)
+#define CONTROL3_ADCDBSET_MASK		(0x3 << CONTROL3_ADCDBSET_SHIFT)
+
 /* Slave addr = 0x90: Haptic */
 enum max77693_haptic_reg {
 	MAX77693_HAPTIC_REG_STATUS		= 0x00,
-- 
cgit v1.2.3


From 297620fd1e14edf5fefa1736f873b9228336eee1 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 26 Dec 2012 13:10:11 +0900
Subject: extcon: max77693: Check the state/type of cable after boot completed

This patch check the state/type of cable after completing the initialization
of platform and notify platform of cable state/type through extcon. If extcon
provider driver notify the state/type of cable before completing platform boot,
this uevent is unused and ignored.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max77693.c | 37 ++++++++++++++++++++++++++++++++++---
 include/linux/mfd/max77693.h     |  2 ++
 2 files changed, 36 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index 07ea96bfd0cb..10f41f3d5be4 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -30,6 +30,7 @@
 #include <linux/irqdomain.h>
 
 #define	DEV_NAME			"max77693-muic"
+#define	DELAY_MS_DEFAULT		20000		/* unit: millisecond */
 
 enum max77693_muic_adc_debounce_time {
 	ADC_DEBOUNCE_TIME_5MS = 0,
@@ -52,6 +53,14 @@ struct max77693_muic_info {
 	struct work_struct irq_work;
 	struct mutex mutex;
 
+	/*
+	 * Use delayed workqueue to detect cable state and then
+	 * notify cable state to notifiee/platform through uevent.
+	 * After completing the booting of platform, the extcon provider
+	 * driver should notify cable state to upper layer.
+	 */
+	struct delayed_work wq_detcable;
+
 	/* Button of dock device */
 	struct input_dev *dock;
 };
@@ -912,13 +921,23 @@ static int max77693_muic_detect_accessory(struct max77693_muic_info *info)
 	return ret;
 }
 
+static void max77693_muic_detect_cable_wq(struct work_struct *work)
+{
+	struct max77693_muic_info *info = container_of(to_delayed_work(work),
+				struct max77693_muic_info, wq_detcable);
+
+	max77693_muic_detect_accessory(info);
+}
+
 static int max77693_muic_probe(struct platform_device *pdev)
 {
 	struct max77693_dev *max77693 = dev_get_drvdata(pdev->dev.parent);
 	struct max77693_platform_data *pdata = dev_get_platdata(max77693->dev);
 	struct max77693_muic_platform_data *muic_pdata = pdata->muic_data;
 	struct max77693_muic_info *info;
-	int ret, i;
+	int delay_jiffies;
+	int ret;
+	int i;
 	u8 id;
 
 	info = devm_kzalloc(&pdev->dev, sizeof(struct max77693_muic_info),
@@ -1051,8 +1070,20 @@ static int max77693_muic_probe(struct platform_device *pdev)
 	/* Set ADC debounce time */
 	max77693_muic_set_debounce_time(info, ADC_DEBOUNCE_TIME_25MS);
 
-	/* Detect accessory on boot */
-	max77693_muic_detect_accessory(info);
+	/*
+	 * Detect accessory after completing the initialization of platform
+	 *
+	 * - Use delayed workqueue to detect cable state and then
+	 * notify cable state to notifiee/platform through uevent.
+	 * After completing the booting of platform, the extcon provider
+	 * driver should notify cable state to upper layer.
+	 */
+	INIT_DELAYED_WORK(&info->wq_detcable, max77693_muic_detect_cable_wq);
+	if (muic_pdata->detcable_delay_ms)
+		delay_jiffies = msecs_to_jiffies(muic_pdata->detcable_delay_ms);
+	else
+		delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT);
+	schedule_delayed_work(&info->wq_detcable, delay_jiffies);
 
 	return ret;
 
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index fe03b2d35d4f..9aa9c203f28b 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -38,6 +38,8 @@ struct max77693_reg_data {
 struct max77693_muic_platform_data {
 	struct max77693_reg_data *init_data;
 	int num_init_data;
+
+	int detcable_delay_ms;
 };
 
 struct max77693_platform_data {
-- 
cgit v1.2.3


From 2b75799f5ae9f31ea9778003591fd295d3bc3159 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 6 Dec 2012 21:27:56 +0900
Subject: extcon: max77693: Set default uart/usb path by using platform data

This patch determine default uart/usb path by using platform data.
The MAX77693 MUIC device can possibliy set USB/UART/AUDIO/USB_AUX
/UART_AUX to internal h/w path of MUIC device. So, drvier should
determine default uart/usb path.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max77693.c | 24 ++++++++++++++++++++++++
 include/linux/mfd/max77693.h     |  7 +++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index 7b7f1a2a0846..abab068adc35 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -63,6 +63,13 @@ struct max77693_muic_info {
 
 	/* Button of dock device */
 	struct input_dev *dock;
+
+	/*
+	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
+	 * h/w path of COMP2/COMN1 on CONTROL1 register.
+	 */
+	int path_usb;
+	int path_uart;
 };
 
 enum max77693_muic_cable_group {
@@ -1058,6 +1065,23 @@ static int max77693_muic_probe(struct platform_device *pdev)
 				= muic_pdata->init_data[i].data;
 	}
 
+	/*
+	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
+	 * h/w path of COMP2/COMN1 on CONTROL1 register.
+	 */
+	if (muic_pdata->path_uart)
+		info->path_uart = muic_pdata->path_uart;
+	else
+		info->path_uart = CONTROL1_SW_UART;
+
+	if (muic_pdata->path_usb)
+		info->path_usb = muic_pdata->path_usb;
+	else
+		info->path_usb = CONTROL1_SW_USB;
+
+	/* Set initial path for UART */
+	 max77693_muic_set_path(info, info->path_uart, true);
+
 	/* Check revision number of MUIC device*/
 	ret = max77693_read_reg(info->max77693->regmap_muic,
 			MAX77693_MUIC_REG_ID, &id);
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index 9aa9c203f28b..3109a6c5c948 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -40,6 +40,13 @@ struct max77693_muic_platform_data {
 	int num_init_data;
 
 	int detcable_delay_ms;
+
+	/*
+	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
+	 * h/w path of COMP2/COMN1 on CONTROL1 register.
+	 */
+	int path_usb;
+	int path_uart;
 };
 
 struct max77693_platform_data {
-- 
cgit v1.2.3


From b17e54625cff1b06aec9df505b46c9bcdcd4823d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 11 Jan 2013 08:55:24 +0900
Subject: extcon: arizona: Allow configuration of MICBIAS rise time

Allow configuration of the rise time for MICBIAS via platform data, the
delay required depends on things like the external component selection.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-arizona.c   | 6 ++++++
 include/linux/mfd/arizona/pdata.h | 3 +++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index 9e1d104803e7..635b7078ce5e 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -417,6 +417,12 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 		}
 	}
 
+	if (arizona->pdata.micd_bias_start_time)
+		regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1,
+				   ARIZONA_MICD_BIAS_STARTTIME_MASK,
+				   arizona->pdata.micd_bias_start_time
+				   << ARIZONA_MICD_BIAS_STARTTIME_SHIFT);
+
 	arizona_extcon_set_mode(info, 0);
 
 	info->input = devm_input_allocate_device(&pdev->dev);
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 8b1d1daaae16..5b0508853290 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -99,6 +99,9 @@ struct arizona_pdata {
 	/** GPIO for mic detection polarity */
 	int micd_pol_gpio;
 
+	/** Mic detect ramp rate */
+	int micd_bias_start_time;
+
 	/** Headset polarity configurations */
 	struct arizona_micd_config *micd_configs;
 	int num_micd_configs;
-- 
cgit v1.2.3


From dab63eb25ced7539a51b8f4218f7b6b56d34df22 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 11 Jan 2013 08:55:36 +0900
Subject: extcon: arizona: Use microphone clamp function if available

Newer Arizona devices include a microphone clamp function which is tied to
jack detect. Activate this feature when present in order to ensure best
performance of the subsystem.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-arizona.c       | 19 +++++++++++++++++++
 include/linux/mfd/arizona/core.h      |  4 +++-
 include/linux/mfd/arizona/registers.h | 28 ++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index 635b7078ce5e..3ef3bee7d1e6 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -45,6 +45,7 @@ struct arizona_extcon_info {
 	int micd_num_modes;
 
 	bool micd_reva;
+	bool micd_clamp;
 
 	bool mic;
 	bool detecting;
@@ -375,6 +376,7 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 			info->micd_reva = true;
 			break;
 		default:
+			info->micd_clamp = true;
 			break;
 		}
 		break;
@@ -423,6 +425,19 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 				   arizona->pdata.micd_bias_start_time
 				   << ARIZONA_MICD_BIAS_STARTTIME_SHIFT);
 
+	/*
+	 * If we have a clamp use it.
+	 */
+	if (info->micd_clamp) {
+		regmap_update_bits(arizona->regmap,
+				   ARIZONA_MICD_CLAMP_CONTROL,
+				   ARIZONA_MICD_CLAMP_MODE_MASK, 4);
+		regmap_update_bits(arizona->regmap,
+				   ARIZONA_JACK_DETECT_DEBOUNCE,
+				   ARIZONA_MICD_CLAMP_DB,
+				   ARIZONA_MICD_CLAMP_DB);
+	}
+
 	arizona_extcon_set_mode(info, 0);
 
 	info->input = devm_input_allocate_device(&pdev->dev);
@@ -529,6 +544,10 @@ static int arizona_extcon_remove(struct platform_device *pdev)
 
 	pm_runtime_disable(&pdev->dev);
 
+	regmap_update_bits(arizona->regmap,
+			   ARIZONA_MICD_CLAMP_CONTROL,
+			   ARIZONA_MICD_CLAMP_MODE_MASK, 0);
+
 	arizona_set_irq_wake(arizona, ARIZONA_IRQ_JD_RISE, 0);
 	arizona_set_irq_wake(arizona, ARIZONA_IRQ_JD_FALL, 0);
 	arizona_free_irq(arizona, ARIZONA_IRQ_MICDET, info);
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index a580363a7d29..a710255528d7 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -75,8 +75,10 @@ enum arizona_type {
 #define ARIZONA_IRQ_DCS_HP_DONE           47
 #define ARIZONA_IRQ_FLL2_CLOCK_OK         48
 #define ARIZONA_IRQ_FLL1_CLOCK_OK         49
+#define ARIZONA_IRQ_MICD_CLAMP_RISE	  50
+#define ARIZONA_IRQ_MICD_CLAMP_FALL	  51
 
-#define ARIZONA_NUM_IRQ                   50
+#define ARIZONA_NUM_IRQ                   52
 
 struct snd_soc_dapm_context;
 
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 1f6fe31a4d5c..f3211f06cec6 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -119,6 +119,7 @@
 #define ARIZONA_ACCESSORY_DETECT_MODE_1          0x293
 #define ARIZONA_HEADPHONE_DETECT_1               0x29B
 #define ARIZONA_HEADPHONE_DETECT_2               0x29C
+#define ARIZONA_MICD_CLAMP_CONTROL               0x2A2
 #define ARIZONA_MIC_DETECT_1                     0x2A3
 #define ARIZONA_MIC_DETECT_2                     0x2A4
 #define ARIZONA_MIC_DETECT_3                     0x2A5
@@ -2069,6 +2070,13 @@
 #define ARIZONA_HP_LVL_SHIFT                          0  /* HP_LVL - [6:0] */
 #define ARIZONA_HP_LVL_WIDTH                          7  /* HP_LVL - [6:0] */
 
+/*
+ * R674 (0x2A2) - MICD clamp control
+ */
+#define ARIZONA_MICD_CLAMP_MODE_MASK             0x000F  /* MICD_CLAMP_MODE - [3:0] */
+#define ARIZONA_MICD_CLAMP_MODE_SHIFT                 0  /* MICD_CLAMP_MODE - [3:0] */
+#define ARIZONA_MICD_CLAMP_MODE_WIDTH                 4  /* MICD_CLAMP_MODE - [3:0] */
+
 /*
  * R675 (0x2A3) - Mic Detect 1
  */
@@ -5267,6 +5275,12 @@
 /*
  * R3409 (0xD51) - AOD IRQ1
  */
+#define ARIZONA_MICD_CLAMP_FALL_EINT1            0x0080  /* MICD_CLAMP_FALL_EINT1 */
+#define ARIZONA_MICD_CLAMP_FALL_EINT1_MASK       0x0080  /* MICD_CLAMP_FALL_EINT1 */
+#define ARIZONA_MICD_CLAMP_FALL_EINT1_SHIFT           7  /* MICD_CLAMP_FALL_EINT1 */
+#define ARIZONA_MICD_CLAMP_RISE_EINT1            0x0040  /* MICD_CLAMP_RISE_EINT1 */
+#define ARIZONA_MICD_CLAMP_RISE_EINT1_MASK       0x0040  /* MICD_CLAMP_RISE_EINT1 */
+#define ARIZONA_MICD_CLAMP_RISE_EINT1_SHIFT           6  /* MICD_CLAMP_RISE_EINT1 */
 #define ARIZONA_GP5_FALL_EINT1                   0x0020  /* GP5_FALL_EINT1 */
 #define ARIZONA_GP5_FALL_EINT1_MASK              0x0020  /* GP5_FALL_EINT1 */
 #define ARIZONA_GP5_FALL_EINT1_SHIFT                  5  /* GP5_FALL_EINT1 */
@@ -5295,6 +5309,12 @@
 /*
  * R3410 (0xD52) - AOD IRQ2
  */
+#define ARIZONA_MICD_CLAMP_FALL_EINT2            0x0080  /* MICD_CLAMP_FALL_EINT2 */
+#define ARIZONA_MICD_CLAMP_FALL_EINT2_MASK       0x0080  /* MICD_CLAMP_FALL_EINT2 */
+#define ARIZONA_MICD_CLAMP_FALL_EINT2_SHIFT           7  /* MICD_CLAMP_FALL_EINT2 */
+#define ARIZONA_MICD_CLAMP_RISE_EINT2            0x0040  /* MICD_CLAMP_RISE_EINT2 */
+#define ARIZONA_MICD_CLAMP_RISE_EINT2_MASK       0x0040  /* MICD_CLAMP_RISE_EINT2 */
+#define ARIZONA_MICD_CLAMP_RISE_EINT2_SHIFT           6  /* MICD_CLAMP_RISE_EINT2 */
 #define ARIZONA_GP5_FALL_EINT2                   0x0020  /* GP5_FALL_EINT2 */
 #define ARIZONA_GP5_FALL_EINT2_MASK              0x0020  /* GP5_FALL_EINT2 */
 #define ARIZONA_GP5_FALL_EINT2_SHIFT                  5  /* GP5_FALL_EINT2 */
@@ -5379,6 +5399,10 @@
 /*
  * R3413 (0xD55) - AOD IRQ Raw Status
  */
+#define ARIZONA_MICD_CLAMP_STS                   0x0008  /* MICD_CLAMP_STS */
+#define ARIZONA_MICD_CLAMP_STS_MASK              0x0008  /* MICD_CLAMP_STS */
+#define ARIZONA_MICD_CLAMP_STS_SHIFT                  3  /* MICD_CLAMP_STS */
+#define ARIZONA_MICD_CLAMP_STS_WIDTH                  1  /* MICD_CLAMP_STS */
 #define ARIZONA_GP5_STS                          0x0004  /* GP5_STS */
 #define ARIZONA_GP5_STS_MASK                     0x0004  /* GP5_STS */
 #define ARIZONA_GP5_STS_SHIFT                         2  /* GP5_STS */
@@ -5395,6 +5419,10 @@
 /*
  * R3414 (0xD56) - Jack detect debounce
  */
+#define ARIZONA_MICD_CLAMP_DB                    0x0008  /* MICD_CLAMP_DB */
+#define ARIZONA_MICD_CLAMP_DB_MASK               0x0008  /* MICD_CLAMP_DB */
+#define ARIZONA_MICD_CLAMP_DB_SHIFT                   3  /* MICD_CLAMP_DB */
+#define ARIZONA_MICD_CLAMP_DB_WIDTH                   1  /* MICD_CLAMP_DB */
 #define ARIZONA_JD2_DB                           0x0002  /* JD2_DB */
 #define ARIZONA_JD2_DB_MASK                      0x0002  /* JD2_DB */
 #define ARIZONA_JD2_DB_SHIFT                          1  /* JD2_DB */
-- 
cgit v1.2.3


From 92a49871b378b1e523a95da569cd38efdd06eee5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 11 Jan 2013 08:55:39 +0900
Subject: extcon: arizona: Support use of GPIO5 as an input to jack detection

Some system designs provide an input on GPIO5 which in conjunction with
the jack detection feature indicates the presence of an accessory.
Support such systems, using the microphone clamp feature to minimise
wakeups of the processor.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-arizona.c   | 76 +++++++++++++++++++++++++++++----------
 include/linux/mfd/arizona/pdata.h |  3 ++
 2 files changed, 61 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index 3ef3bee7d1e6..b5190a811601 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -290,13 +290,21 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 {
 	struct arizona_extcon_info *info = data;
 	struct arizona *arizona = info->arizona;
-	unsigned int val;
+	unsigned int val, present, mask;
 	int ret, i;
 
 	pm_runtime_get_sync(info->dev);
 
 	mutex_lock(&info->lock);
 
+	if (arizona->pdata.jd_gpio5) {
+		mask = ARIZONA_MICD_CLAMP_STS;
+		present = 0;
+	} else {
+		mask = ARIZONA_JD1_STS;
+		present = ARIZONA_JD1_STS;
+	}
+
 	ret = regmap_read(arizona->regmap, ARIZONA_AOD_IRQ_RAW_STATUS, &val);
 	if (ret != 0) {
 		dev_err(arizona->dev, "Failed to read jackdet status: %d\n",
@@ -306,7 +314,7 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	if (val & ARIZONA_JD1_STS) {
+	if ((val & mask) == present) {
 		dev_dbg(arizona->dev, "Detected jack\n");
 		ret = extcon_set_cable_state_(&info->edev,
 					      ARIZONA_CABLE_MECHANICAL, true);
@@ -321,6 +329,7 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 
 		arizona_stop_mic(info);
 
+
 		for (i = 0; i < ARIZONA_NUM_BUTTONS; i++)
 			input_report_key(info->input,
 					 arizona_lvl_to_key[i].report, 0);
@@ -345,6 +354,7 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 	struct arizona *arizona = dev_get_drvdata(pdev->dev.parent);
 	struct arizona_pdata *pdata;
 	struct arizona_extcon_info *info;
+	int jack_irq_fall, jack_irq_rise;
 	int ret, mode, i;
 
 	pdata = dev_get_platdata(arizona->dev);
@@ -426,12 +436,24 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 				   << ARIZONA_MICD_BIAS_STARTTIME_SHIFT);
 
 	/*
-	 * If we have a clamp use it.
+	 * If we have a clamp use it, activating in conjunction with
+	 * GPIO5 if that is connected for jack detect operation.
 	 */
 	if (info->micd_clamp) {
-		regmap_update_bits(arizona->regmap,
-				   ARIZONA_MICD_CLAMP_CONTROL,
-				   ARIZONA_MICD_CLAMP_MODE_MASK, 4);
+		if (arizona->pdata.jd_gpio5) {
+			/* Put the GPIO into input mode */
+			regmap_write(arizona->regmap, ARIZONA_GPIO5_CTRL,
+				     0xc101);
+
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_MICD_CLAMP_CONTROL,
+					   ARIZONA_MICD_CLAMP_MODE_MASK, 0x9);
+		} else {
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_MICD_CLAMP_CONTROL,
+					   ARIZONA_MICD_CLAMP_MODE_MASK, 0x4);
+		}
+
 		regmap_update_bits(arizona->regmap,
 				   ARIZONA_JACK_DETECT_DEBOUNCE,
 				   ARIZONA_MICD_CLAMP_DB,
@@ -458,7 +480,15 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 	pm_runtime_idle(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
-	ret = arizona_request_irq(arizona, ARIZONA_IRQ_JD_RISE,
+	if (arizona->pdata.jd_gpio5) {
+		jack_irq_rise = ARIZONA_IRQ_MICD_CLAMP_RISE;
+		jack_irq_fall = ARIZONA_IRQ_MICD_CLAMP_FALL;
+	} else {
+		jack_irq_rise = ARIZONA_IRQ_JD_RISE;
+		jack_irq_fall = ARIZONA_IRQ_JD_FALL;
+	}
+
+	ret = arizona_request_irq(arizona, jack_irq_rise,
 				  "JACKDET rise", arizona_jackdet, info);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to get JACKDET rise IRQ: %d\n",
@@ -466,21 +496,21 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 		goto err_input;
 	}
 
-	ret = arizona_set_irq_wake(arizona, ARIZONA_IRQ_JD_RISE, 1);
+	ret = arizona_set_irq_wake(arizona, jack_irq_rise, 1);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to set JD rise IRQ wake: %d\n",
 			ret);
 		goto err_rise;
 	}
 
-	ret = arizona_request_irq(arizona, ARIZONA_IRQ_JD_FALL,
+	ret = arizona_request_irq(arizona, jack_irq_fall,
 				  "JACKDET fall", arizona_jackdet, info);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to get JD fall IRQ: %d\n", ret);
 		goto err_rise_wake;
 	}
 
-	ret = arizona_set_irq_wake(arizona, ARIZONA_IRQ_JD_FALL, 1);
+	ret = arizona_set_irq_wake(arizona, jack_irq_fall, 1);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Failed to set JD fall IRQ wake: %d\n",
 			ret);
@@ -522,13 +552,13 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 err_micdet:
 	arizona_free_irq(arizona, ARIZONA_IRQ_MICDET, info);
 err_fall_wake:
-	arizona_set_irq_wake(arizona, ARIZONA_IRQ_JD_FALL, 0);
+	arizona_set_irq_wake(arizona, jack_irq_fall, 0);
 err_fall:
-	arizona_free_irq(arizona, ARIZONA_IRQ_JD_FALL, info);
+	arizona_free_irq(arizona, jack_irq_fall, info);
 err_rise_wake:
-	arizona_set_irq_wake(arizona, ARIZONA_IRQ_JD_RISE, 0);
+	arizona_set_irq_wake(arizona, jack_irq_rise, 0);
 err_rise:
-	arizona_free_irq(arizona, ARIZONA_IRQ_JD_RISE, info);
+	arizona_free_irq(arizona, jack_irq_rise, info);
 err_input:
 err_register:
 	pm_runtime_disable(&pdev->dev);
@@ -541,6 +571,7 @@ static int arizona_extcon_remove(struct platform_device *pdev)
 {
 	struct arizona_extcon_info *info = platform_get_drvdata(pdev);
 	struct arizona *arizona = info->arizona;
+	int jack_irq_rise, jack_irq_fall;
 
 	pm_runtime_disable(&pdev->dev);
 
@@ -548,11 +579,20 @@ static int arizona_extcon_remove(struct platform_device *pdev)
 			   ARIZONA_MICD_CLAMP_CONTROL,
 			   ARIZONA_MICD_CLAMP_MODE_MASK, 0);
 
-	arizona_set_irq_wake(arizona, ARIZONA_IRQ_JD_RISE, 0);
-	arizona_set_irq_wake(arizona, ARIZONA_IRQ_JD_FALL, 0);
+	if (arizona->pdata.jd_gpio5) {
+		jack_irq_rise = ARIZONA_IRQ_MICD_CLAMP_RISE;
+		jack_irq_fall = ARIZONA_IRQ_MICD_CLAMP_FALL;
+	} else {
+		jack_irq_rise = ARIZONA_IRQ_JD_RISE;
+		jack_irq_fall = ARIZONA_IRQ_JD_FALL;
+	}
+
+	arizona_set_irq_wake(arizona, jack_irq_rise, 0);
+	arizona_set_irq_wake(arizona, jack_irq_fall, 0);
+	arizona_free_irq(arizona, ARIZONA_IRQ_HPDET, info);
 	arizona_free_irq(arizona, ARIZONA_IRQ_MICDET, info);
-	arizona_free_irq(arizona, ARIZONA_IRQ_JD_RISE, info);
-	arizona_free_irq(arizona, ARIZONA_IRQ_JD_FALL, info);
+	arizona_free_irq(arizona, jack_irq_rise, info);
+	arizona_free_irq(arizona, jack_irq_fall, info);
 	regmap_update_bits(arizona->regmap, ARIZONA_JACK_DETECT_ANALOGUE,
 			   ARIZONA_JD1_ENA, 0);
 	arizona_clk32k_disable(arizona);
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 5b0508853290..0fc26a480be3 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -96,6 +96,9 @@ struct arizona_pdata {
 	/** Pin state for GPIO pins */
 	int gpio_defaults[ARIZONA_MAX_GPIO];
 
+	/** GPIO5 is used for jack detection */
+	bool jd_gpio5;
+
 	/** GPIO for mic detection polarity */
 	int micd_pol_gpio;
 
-- 
cgit v1.2.3


From 4f340333822de79b3439bddccdbf3846f9794e42 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 11 Jan 2013 08:55:43 +0900
Subject: extcon: arizona: Enable basic headphone identification

Use the headphone detection to identify if the accessory is a headphone or
line load. There are two different revisions of the IP with different
register layouts, support both.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-arizona.c       | 341 +++++++++++++++++++++++++++++++---
 drivers/mfd/wm5102-tables.c           |   3 +
 include/linux/mfd/arizona/registers.h |  12 ++
 3 files changed, 335 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index b5190a811601..cc258a8842ef 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -31,8 +31,14 @@
 #include <linux/mfd/arizona/pdata.h>
 #include <linux/mfd/arizona/registers.h>
 
+#define ARIZONA_DEFAULT_HP 32
+
 #define ARIZONA_NUM_BUTTONS 6
 
+#define ARIZONA_ACCDET_MODE_MIC 0
+#define ARIZONA_ACCDET_MODE_HPL 1
+#define ARIZONA_ACCDET_MODE_HPR 2
+
 struct arizona_extcon_info {
 	struct device *dev;
 	struct arizona *arizona;
@@ -47,10 +53,14 @@ struct arizona_extcon_info {
 	bool micd_reva;
 	bool micd_clamp;
 
+	bool hpdet_active;
+
 	bool mic;
 	bool detecting;
 	int jack_flips;
 
+	int hpdet_ip;
+
 	struct extcon_dev edev;
 };
 
@@ -74,11 +84,13 @@ static struct {
 #define ARIZONA_CABLE_MECHANICAL 0
 #define ARIZONA_CABLE_MICROPHONE 1
 #define ARIZONA_CABLE_HEADPHONE  2
+#define ARIZONA_CABLE_LINEOUT    3
 
 static const char *arizona_cable[] = {
 	"Mechanical",
 	"Microphone",
 	"Headphone",
+	"Line-out",
 	NULL,
 };
 
@@ -100,6 +112,290 @@ static void arizona_extcon_set_mode(struct arizona_extcon_info *info, int mode)
 	dev_dbg(arizona->dev, "Set jack polarity to %d\n", mode);
 }
 
+static struct {
+	unsigned int factor_a;
+	unsigned int factor_b;
+} arizona_hpdet_b_ranges[] = {
+	{  5528,   362464 },
+	{ 11084,  6186851 },
+	{ 11065, 65460395 },
+};
+
+static struct {
+	int min;
+	int max;
+} arizona_hpdet_c_ranges[] = {
+	{ 0,       30 },
+	{ 8,      100 },
+	{ 100,   1000 },
+	{ 1000, 10000 },
+};
+
+static int arizona_hpdet_read(struct arizona_extcon_info *info)
+{
+	struct arizona *arizona = info->arizona;
+	unsigned int val, range;
+	int ret;
+
+	ret = regmap_read(arizona->regmap, ARIZONA_HEADPHONE_DETECT_2, &val);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to read HPDET status: %d\n",
+			ret);
+		return ret;
+	}
+
+	switch (info->hpdet_ip) {
+	case 0:
+		if (!(val & ARIZONA_HP_DONE)) {
+			dev_err(arizona->dev, "HPDET did not complete: %x\n",
+				val);
+			val = ARIZONA_DEFAULT_HP;
+		}
+
+		val &= ARIZONA_HP_LVL_MASK;
+		break;
+
+	case 1:
+		if (!(val & ARIZONA_HP_DONE_B)) {
+			dev_err(arizona->dev, "HPDET did not complete: %x\n",
+				val);
+			return ARIZONA_DEFAULT_HP;
+		}
+
+		ret = regmap_read(arizona->regmap, ARIZONA_HP_DACVAL, &val);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to read HP value: %d\n",
+				ret);
+			return ARIZONA_DEFAULT_HP;
+		}
+
+		regmap_read(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1,
+			    &range);
+		range = (range & ARIZONA_HP_IMPEDANCE_RANGE_MASK)
+			   >> ARIZONA_HP_IMPEDANCE_RANGE_SHIFT;
+
+		if (range < ARRAY_SIZE(arizona_hpdet_b_ranges) - 1 &&
+		    (val < 100 || val > 0x3fb)) {
+			range++;
+			dev_dbg(arizona->dev, "Moving to HPDET range %d\n",
+				range);
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_HEADPHONE_DETECT_1,
+					   ARIZONA_HP_IMPEDANCE_RANGE_MASK,
+					   range <<
+					   ARIZONA_HP_IMPEDANCE_RANGE_SHIFT);
+			return -EAGAIN;
+		}
+
+		/* If we go out of range report top of range */
+		if (val < 100 || val > 0x3fb) {
+			dev_dbg(arizona->dev, "Measurement out of range\n");
+			return 10000;
+		}
+
+		dev_dbg(arizona->dev, "HPDET read %d in range %d\n",
+			val, range);
+
+		val = arizona_hpdet_b_ranges[range].factor_b
+			/ ((val * 100) -
+			   arizona_hpdet_b_ranges[range].factor_a);
+		break;
+
+	default:
+		dev_warn(arizona->dev, "Unknown HPDET IP revision %d\n",
+			 info->hpdet_ip);
+	case 2:
+		if (!(val & ARIZONA_HP_DONE_B)) {
+			dev_err(arizona->dev, "HPDET did not complete: %x\n",
+				val);
+			return ARIZONA_DEFAULT_HP;
+		}
+
+		val &= ARIZONA_HP_LVL_B_MASK;
+
+		regmap_read(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1,
+			    &range);
+		range = (range & ARIZONA_HP_IMPEDANCE_RANGE_MASK)
+			   >> ARIZONA_HP_IMPEDANCE_RANGE_SHIFT;
+
+		/* Skip up or down a range? */
+		if (range && (val < arizona_hpdet_c_ranges[range].min)) {
+			range--;
+			dev_dbg(arizona->dev, "Moving to HPDET range %d-%d\n",
+				arizona_hpdet_c_ranges[range].min,
+				arizona_hpdet_c_ranges[range].max);
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_HEADPHONE_DETECT_1,
+					   ARIZONA_HP_IMPEDANCE_RANGE_MASK,
+					   range <<
+					   ARIZONA_HP_IMPEDANCE_RANGE_SHIFT);
+			return -EAGAIN;
+		}
+
+		if (range < ARRAY_SIZE(arizona_hpdet_c_ranges) - 1 &&
+		    (val >= arizona_hpdet_c_ranges[range].max)) {
+			range++;
+			dev_dbg(arizona->dev, "Moving to HPDET range %d-%d\n",
+				arizona_hpdet_c_ranges[range].min,
+				arizona_hpdet_c_ranges[range].max);
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_HEADPHONE_DETECT_1,
+					   ARIZONA_HP_IMPEDANCE_RANGE_MASK,
+					   range <<
+					   ARIZONA_HP_IMPEDANCE_RANGE_SHIFT);
+			return -EAGAIN;
+		}
+	}
+
+	dev_dbg(arizona->dev, "HP impedance %d ohms\n", val);
+	return val;
+}
+
+static irqreturn_t arizona_hpdet_irq(int irq, void *data)
+{
+	struct arizona_extcon_info *info = data;
+	struct arizona *arizona = info->arizona;
+	int report = ARIZONA_CABLE_HEADPHONE;
+	int ret;
+
+	mutex_lock(&info->lock);
+
+	/* If we got a spurious IRQ for some reason then ignore it */
+	if (!info->hpdet_active) {
+		dev_warn(arizona->dev, "Spurious HPDET IRQ\n");
+		mutex_unlock(&info->lock);
+		return IRQ_NONE;
+	}
+
+	/* If the cable was removed while measuring ignore the result */
+	ret = extcon_get_cable_state_(&info->edev, ARIZONA_CABLE_MECHANICAL);
+	if (ret < 0) {
+		dev_err(arizona->dev, "Failed to check cable state: %d\n",
+			ret);
+		goto out;
+	} else if (!ret) {
+		dev_dbg(arizona->dev, "Ignoring HPDET for removed cable\n");
+		goto done;
+	}
+
+	ret = arizona_hpdet_read(info);
+	if (ret == -EAGAIN) {
+		goto out;
+	} else if (ret < 0) {
+		goto done;
+	}
+
+	/* Reset back to starting range */
+	regmap_update_bits(arizona->regmap,
+			   ARIZONA_HEADPHONE_DETECT_1,
+			   ARIZONA_HP_IMPEDANCE_RANGE_MASK, 0);
+
+	/* Report high impedence cables as line outputs */
+	if (ret >= 5000)
+		report = ARIZONA_CABLE_LINEOUT;
+	else
+		report = ARIZONA_CABLE_HEADPHONE;
+
+	ret = extcon_set_cable_state_(&info->edev, report, true);
+	if (ret != 0)
+		dev_err(arizona->dev, "Failed to report HP/line: %d\n",
+			ret);
+
+	ret = regmap_update_bits(arizona->regmap, 0x225, 0x4000, 0);
+	if (ret != 0)
+		dev_warn(arizona->dev, "Failed to undo magic: %d\n", ret);
+
+	ret = regmap_update_bits(arizona->regmap, 0x226, 0x4000, 0);
+	if (ret != 0)
+		dev_warn(arizona->dev, "Failed to undo magic: %d\n", ret);
+
+done:
+	regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1,
+			   ARIZONA_HP_POLL, 0);
+
+	/* Revert back to MICDET mode */
+	regmap_update_bits(arizona->regmap,
+			   ARIZONA_ACCESSORY_DETECT_MODE_1,
+			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
+
+	/* If we have a mic then reenable MICDET */
+	if (info->mic)
+		arizona_start_mic(info);
+
+	if (info->hpdet_active) {
+		pm_runtime_put_autosuspend(info->dev);
+		info->hpdet_active = false;
+	}
+
+out:
+	mutex_unlock(&info->lock);
+
+	return IRQ_HANDLED;
+}
+
+static void arizona_identify_headphone(struct arizona_extcon_info *info)
+{
+	struct arizona *arizona = info->arizona;
+	int ret;
+
+	dev_dbg(arizona->dev, "Starting HPDET\n");
+
+	/* Make sure we keep the device enabled during the measurement */
+	pm_runtime_get(info->dev);
+
+	info->hpdet_active = true;
+
+	if (info->mic)
+		arizona_stop_mic(info);
+
+	ret = regmap_update_bits(arizona->regmap, 0x225, 0x4000, 0x4000);
+	if (ret != 0)
+		dev_warn(arizona->dev, "Failed to do magic: %d\n", ret);
+
+	ret = regmap_update_bits(arizona->regmap, 0x226, 0x4000, 0x4000);
+	if (ret != 0)
+		dev_warn(arizona->dev, "Failed to do magic: %d\n", ret);
+
+	ret = regmap_update_bits(arizona->regmap,
+				 ARIZONA_ACCESSORY_DETECT_MODE_1,
+				 ARIZONA_ACCDET_MODE_MASK,
+				 ARIZONA_ACCDET_MODE_HPL);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to set HPDETL mode: %d\n", ret);
+		goto err;
+	}
+
+	ret = regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1,
+				 ARIZONA_HP_POLL, ARIZONA_HP_POLL);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Can't start HPDETL measurement: %d\n",
+			ret);
+		goto err;
+	}
+
+	return;
+
+err:
+	regmap_update_bits(arizona->regmap, ARIZONA_ACCESSORY_DETECT_MODE_1,
+			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
+
+	/* Just report headphone */
+	ret = extcon_update_state(&info->edev,
+				  1 << ARIZONA_CABLE_HEADPHONE,
+				  1 << ARIZONA_CABLE_HEADPHONE);
+	if (ret != 0)
+		dev_err(arizona->dev, "Failed to report headphone: %d\n", ret);
+
+	if (info->mic)
+		arizona_start_mic(info);
+
+	info->hpdet_active = false;
+}
+	}
+
+	info->hpdet_active = false;
+}
+
 static void arizona_start_mic(struct arizona_extcon_info *info)
 {
 	struct arizona *arizona = info->arizona;
@@ -125,6 +421,10 @@ static void arizona_start_mic(struct arizona_extcon_info *info)
 		regmap_write(arizona->regmap, 0x80, 0x0);
 	}
 
+	regmap_update_bits(arizona->regmap,
+			   ARIZONA_ACCESSORY_DETECT_MODE_1,
+			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
+
 	regmap_update_bits_check(arizona->regmap, ARIZONA_MIC_DETECT_1,
 				 ARIZONA_MICD_ENA, ARIZONA_MICD_ENA,
 				 &change);
@@ -189,11 +489,11 @@ static irqreturn_t arizona_micdet(int irq, void *data)
 
 	/* If we got a high impedence we should have a headset, report it. */
 	if (info->detecting && (val & 0x400)) {
+		arizona_identify_headphone(info);
+
 		ret = extcon_update_state(&info->edev,
-					  1 << ARIZONA_CABLE_MICROPHONE |
-					  1 << ARIZONA_CABLE_HEADPHONE,
-					  1 << ARIZONA_CABLE_MICROPHONE |
-					  1 << ARIZONA_CABLE_HEADPHONE);
+					  1 << ARIZONA_CABLE_MICROPHONE,
+					  1 << ARIZONA_CABLE_MICROPHONE);
 
 		if (ret != 0)
 			dev_err(arizona->dev, "Headset report failed: %d\n",
@@ -214,17 +514,12 @@ static irqreturn_t arizona_micdet(int irq, void *data)
 		info->jack_flips++;
 
 		if (info->jack_flips >= info->micd_num_modes) {
-			dev_dbg(arizona->dev, "Detected headphone\n");
+			dev_dbg(arizona->dev, "Detected HP/line\n");
+			arizona_identify_headphone(info);
+
 			info->detecting = false;
-			arizona_stop_mic(info);
 
-			ret = extcon_set_cable_state_(&info->edev,
-						      ARIZONA_CABLE_HEADPHONE,
-						      true);
-			if (ret != 0)
-				dev_err(arizona->dev,
-					"Headphone report failed: %d\n",
-				ret);
+			arizona_stop_mic(info);
 		} else {
 			info->micd_mode++;
 			if (info->micd_mode == info->micd_num_modes)
@@ -260,13 +555,7 @@ static irqreturn_t arizona_micdet(int irq, void *data)
 			info->detecting = false;
 			arizona_stop_mic(info);
 
-			ret = extcon_set_cable_state_(&info->edev,
-						      ARIZONA_CABLE_HEADPHONE,
-						      true);
-			if (ret != 0)
-				dev_err(arizona->dev,
-					"Headphone report failed: %d\n",
-				ret);
+			arizona_identify_headphone(info);
 		} else {
 			dev_warn(arizona->dev, "Button with no mic: %x\n",
 				 val);
@@ -387,6 +676,7 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 			break;
 		default:
 			info->micd_clamp = true;
+			info->hpdet_ip = 1;
 			break;
 		}
 		break;
@@ -524,6 +814,13 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 		goto err_fall_wake;
 	}
 
+	ret = arizona_request_irq(arizona, ARIZONA_IRQ_HPDET,
+				  "HPDET", arizona_hpdet_irq, info);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to get HPDET IRQ: %d\n", ret);
+		goto err_micdet;
+	}
+
 	regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1,
 			   ARIZONA_MICD_RATE_MASK,
 			   8 << ARIZONA_MICD_RATE_SHIFT);
@@ -544,11 +841,13 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 	ret = input_register_device(info->input);
 	if (ret) {
 		dev_err(&pdev->dev, "Can't register input device: %d\n", ret);
-		goto err_micdet;
+		goto err_hpdet;
 	}
 
 	return 0;
 
+err_hpdet:
+	arizona_free_irq(arizona, ARIZONA_IRQ_HPDET, info);
 err_micdet:
 	arizona_free_irq(arizona, ARIZONA_IRQ_MICDET, info);
 err_fall_wake:
diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 088872ab6338..311cc3657b0b 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1106,6 +1106,8 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ACCESSORY_DETECT_MODE_1:
 	case ARIZONA_HEADPHONE_DETECT_1:
 	case ARIZONA_HEADPHONE_DETECT_2:
+	case ARIZONA_HP_DACVAL:
+	case ARIZONA_MICD_CLAMP_CONTROL:
 	case ARIZONA_MIC_DETECT_1:
 	case ARIZONA_MIC_DETECT_2:
 	case ARIZONA_MIC_DETECT_3:
@@ -1875,6 +1877,7 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_DSP1_STATUS_2:
 	case ARIZONA_DSP1_STATUS_3:
 	case ARIZONA_HEADPHONE_DETECT_2:
+	case ARIZONA_HP_DACVAL:
 	case ARIZONA_MIC_DETECT_3:
 		return true;
 	default:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index f3211f06cec6..e9150533e701 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -119,6 +119,7 @@
 #define ARIZONA_ACCESSORY_DETECT_MODE_1          0x293
 #define ARIZONA_HEADPHONE_DETECT_1               0x29B
 #define ARIZONA_HEADPHONE_DETECT_2               0x29C
+#define ARIZONA_HP_DACVAL			 0x29F
 #define ARIZONA_MICD_CLAMP_CONTROL               0x2A2
 #define ARIZONA_MIC_DETECT_1                     0x2A3
 #define ARIZONA_MIC_DETECT_2                     0x2A4
@@ -2036,6 +2037,9 @@
 /*
  * R667 (0x29B) - Headphone Detect 1
  */
+#define ARIZONA_HP_IMPEDANCE_RANGE_MASK          0x0600  /* HP_IMPEDANCE_RANGE - [10:9] */
+#define ARIZONA_HP_IMPEDANCE_RANGE_SHIFT              9  /* HP_IMPEDANCE_RANGE - [10:9] */
+#define ARIZONA_HP_IMPEDANCE_RANGE_WIDTH              2  /* HP_IMPEDANCE_RANGE - [10:9] */
 #define ARIZONA_HP_STEP_SIZE                     0x0100  /* HP_STEP_SIZE */
 #define ARIZONA_HP_STEP_SIZE_MASK                0x0100  /* HP_STEP_SIZE */
 #define ARIZONA_HP_STEP_SIZE_SHIFT                    8  /* HP_STEP_SIZE */
@@ -2070,6 +2074,14 @@
 #define ARIZONA_HP_LVL_SHIFT                          0  /* HP_LVL - [6:0] */
 #define ARIZONA_HP_LVL_WIDTH                          7  /* HP_LVL - [6:0] */
 
+#define ARIZONA_HP_DONE_B                        0x8000  /* HP_DONE */
+#define ARIZONA_HP_DONE_B_MASK                   0x8000  /* HP_DONE */
+#define ARIZONA_HP_DONE_B_SHIFT                      15  /* HP_DONE */
+#define ARIZONA_HP_DONE_B_WIDTH                       1  /* HP_DONE */
+#define ARIZONA_HP_LVL_B_MASK                    0x7FFF  /* HP_LVL - [14:0] */
+#define ARIZONA_HP_LVL_B_SHIFT                        0  /* HP_LVL - [14:0] */
+#define ARIZONA_HP_LVL_B_WIDTH                       15  /* HP_LVL - [14:0] */
+
 /*
  * R674 (0x2A2) - MICD clamp control
  */
-- 
cgit v1.2.3


From dd235eea4ed75b1599dd9a53bb618fe5befeb731 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 11 Jan 2013 08:55:51 +0900
Subject: extcon: arizona: Support HPDET based accessory identification

The accessory detection functionality in Arizona devices is flexible and
supports several system designs in addition to the default one implemented
by the existing driver. One such design uses the HPDET feature to determine
what kind of accessory is present by comparing measurements taken with the
two headphone grounds available on the device, implement that if selected
by platform data.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-arizona.c   | 159 +++++++++++++++++++++++++++++++++++---
 include/linux/mfd/arizona/pdata.h |   3 +
 2 files changed, 150 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index dab4584a4ad5..ba9525ee4706 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -55,6 +55,9 @@ struct arizona_extcon_info {
 
 	bool hpdet_active;
 
+	int num_hpdet_res;
+	unsigned int hpdet_res[2];
+
 	bool mic;
 	bool detecting;
 	int jack_flips;
@@ -65,8 +68,8 @@ struct arizona_extcon_info {
 };
 
 static const struct arizona_micd_config micd_default_modes[] = {
-	{ ARIZONA_ACCDET_SRC, 1 << ARIZONA_MICD_BIAS_SRC_SHIFT, 0 },
 	{ 0,                  2 << ARIZONA_MICD_BIAS_SRC_SHIFT, 1 },
+	{ ARIZONA_ACCDET_SRC, 1 << ARIZONA_MICD_BIAS_SRC_SHIFT, 0 },
 };
 
 static struct {
@@ -118,10 +121,6 @@ static void arizona_start_mic(struct arizona_extcon_info *info)
 	bool change;
 	int ret;
 
-	info->detecting = true;
-	info->mic = false;
-	info->jack_flips = 0;
-
 	/* Microphone detection can't use idle mode */
 	pm_runtime_get(info->dev);
 
@@ -311,12 +310,80 @@ static int arizona_hpdet_read(struct arizona_extcon_info *info)
 	return val;
 }
 
+static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading)
+{
+	struct arizona *arizona = info->arizona;
+	int ret;
+
+	/*
+	 * If we're using HPDET for accessory identification we need
+	 * to take multiple measurements, step through them in sequence.
+	 */
+	if (arizona->pdata.hpdet_acc_id) {
+		info->hpdet_res[info->num_hpdet_res++] = *reading;
+
+		/*
+		 * If the impedence is too high don't measure the
+		 * second ground.
+		 */
+		if (info->num_hpdet_res == 1 && *reading >= 45) {
+			dev_dbg(arizona->dev, "Skipping ground flip\n");
+			info->hpdet_res[info->num_hpdet_res++] = *reading;
+		}
+
+		if (info->num_hpdet_res == 1) {
+			dev_dbg(arizona->dev, "Flipping ground\n");
+
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_ACCESSORY_DETECT_MODE_1,
+					   ARIZONA_ACCDET_SRC,
+					   ~info->micd_modes[0].src);
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_HEADPHONE_DETECT_1,
+					   ARIZONA_HP_POLL, 0);
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_HEADPHONE_DETECT_1,
+					   ARIZONA_HP_POLL, ARIZONA_HP_POLL);
+			return -EAGAIN;
+		}
+
+		/* OK, got both.  Now, compare... */
+		dev_dbg(arizona->dev, "HPDET measured %d %d\n",
+			info->hpdet_res[0], info->hpdet_res[1]);
+
+		if (info->hpdet_res[0] > info->hpdet_res[1] * 2) {
+			dev_dbg(arizona->dev, "Detected mic\n");
+			info->mic = true;
+			ret = extcon_set_cable_state_(&info->edev,
+						      ARIZONA_CABLE_MICROPHONE,
+						      true);
+			if (ret != 0) {
+				dev_err(arizona->dev,
+					"Failed to report mic: %d\n", ret);
+			}
+
+			/* Take the headphone impedance for the main report */
+			*reading = info->hpdet_res[1];
+		} else {
+			dev_dbg(arizona->dev, "Detected headphone\n");
+		}
+
+		/* Make sure everything is reset back to the real polarity */
+		regmap_update_bits(arizona->regmap,
+				   ARIZONA_ACCESSORY_DETECT_MODE_1,
+				   ARIZONA_ACCDET_SRC,
+				   info->micd_modes[0].src);
+	}
+
+	return 0;
+}
+
 static irqreturn_t arizona_hpdet_irq(int irq, void *data)
 {
 	struct arizona_extcon_info *info = data;
 	struct arizona *arizona = info->arizona;
 	int report = ARIZONA_CABLE_HEADPHONE;
-	int ret;
+	int ret, reading;
 
 	mutex_lock(&info->lock);
 
@@ -344,14 +411,23 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data)
 	} else if (ret < 0) {
 		goto done;
 	}
+	reading = ret;
 
 	/* Reset back to starting range */
 	regmap_update_bits(arizona->regmap,
 			   ARIZONA_HEADPHONE_DETECT_1,
-			   ARIZONA_HP_IMPEDANCE_RANGE_MASK, 0);
+			   ARIZONA_HP_IMPEDANCE_RANGE_MASK | ARIZONA_HP_POLL,
+			   0);
+
+	ret = arizona_hpdet_do_id(info, &reading);
+	if (ret == -EAGAIN) {
+		goto out;
+	} else if (ret < 0) {
+		goto done;
+	}
 
 	/* Report high impedence cables as line outputs */
-	if (ret >= 5000)
+	if (reading >= 5000)
 		report = ARIZONA_CABLE_LINEOUT;
 	else
 		report = ARIZONA_CABLE_HEADPHONE;
@@ -370,8 +446,6 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data)
 		dev_warn(arizona->dev, "Failed to undo magic: %d\n", ret);
 
 done:
-	regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1,
-			   ARIZONA_HP_POLL, 0);
 
 	/* Revert back to MICDET mode */
 	regmap_update_bits(arizona->regmap,
@@ -451,8 +525,58 @@ err:
 
 	info->hpdet_active = false;
 }
+
+static void arizona_start_hpdet_acc_id(struct arizona_extcon_info *info)
+{
+	struct arizona *arizona = info->arizona;
+	int ret;
+
+	dev_dbg(arizona->dev, "Starting identification via HPDET\n");
+
+	/* Make sure we keep the device enabled during the measurement */
+	pm_runtime_get(info->dev);
+
+	info->hpdet_active = true;
+
+	ret = regmap_update_bits(arizona->regmap, 0x225, 0x4000, 0x4000);
+	if (ret != 0)
+		dev_warn(arizona->dev, "Failed to do magic: %d\n", ret);
+
+	ret = regmap_update_bits(arizona->regmap, 0x226, 0x4000, 0x4000);
+	if (ret != 0)
+		dev_warn(arizona->dev, "Failed to do magic: %d\n", ret);
+
+	ret = regmap_update_bits(arizona->regmap,
+				 ARIZONA_ACCESSORY_DETECT_MODE_1,
+				 ARIZONA_ACCDET_SRC | ARIZONA_ACCDET_MODE_MASK,
+				 info->micd_modes[0].src |
+				 ARIZONA_ACCDET_MODE_HPL);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to set HPDETL mode: %d\n", ret);
+		goto err;
 	}
 
+	ret = regmap_update_bits(arizona->regmap, ARIZONA_HEADPHONE_DETECT_1,
+				 ARIZONA_HP_POLL, ARIZONA_HP_POLL);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Can't start HPDETL measurement: %d\n",
+			ret);
+		goto err;
+	}
+
+	return;
+
+err:
+	regmap_update_bits(arizona->regmap, ARIZONA_ACCESSORY_DETECT_MODE_1,
+			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
+
+	/* Just report headphone */
+	ret = extcon_update_state(&info->edev,
+				  1 << ARIZONA_CABLE_HEADPHONE,
+				  1 << ARIZONA_CABLE_HEADPHONE);
+	if (ret != 0)
+		dev_err(arizona->dev, "Failed to report headphone: %d\n", ret);
+
 	info->hpdet_active = false;
 }
 
@@ -612,12 +736,24 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 			dev_err(arizona->dev, "Mechanical report failed: %d\n",
 				ret);
 
-		arizona_start_mic(info);
+		if (!arizona->pdata.hpdet_acc_id) {
+			info->detecting = true;
+			info->mic = false;
+			info->jack_flips = 0;
+
+			arizona_start_mic(info);
+		} else {
+			arizona_start_hpdet_acc_id(info);
+		}
 	} else {
 		dev_dbg(arizona->dev, "Detected jack removal\n");
 
 		arizona_stop_mic(info);
 
+		info->num_hpdet_res = 0;
+		for (i = 0; i < ARRAY_SIZE(info->hpdet_res); i++)
+			info->hpdet_res[i] = 0;
+		info->mic = false;
 
 		for (i = 0; i < ARIZONA_NUM_BUTTONS; i++)
 			input_report_key(info->input,
@@ -665,7 +801,6 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 	mutex_init(&info->lock);
 	info->arizona = arizona;
 	info->dev = &pdev->dev;
-	info->detecting = true;
 	platform_set_drvdata(pdev, info);
 
 	switch (arizona->type) {
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 0fc26a480be3..7c0878778357 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -99,6 +99,9 @@ struct arizona_pdata {
 	/** GPIO5 is used for jack detection */
 	bool jd_gpio5;
 
+	/** Use the headphone detect circuit to identify the accessory */
+	bool hpdet_acc_id;
+
 	/** GPIO for mic detection polarity */
 	int micd_pol_gpio;
 
-- 
cgit v1.2.3


From 1eda6aa7ce101b59dfd91abef2c8b0e51e96e199 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 11 Jan 2013 08:55:54 +0900
Subject: extcon: arizona: Support direct microphone measurement via HPDET

With some GPIO control it is possible to detect microphones in a wider
range of configurations by directly measuring the microphone impedance
when the HPDET method cannot distinguish between the behaviour of the
two grounds. Allow a GPIO to be provided in platform data and use it to
implement this behaviour.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-arizona.c   | 50 +++++++++++++++++++++++++++++++++++----
 include/linux/mfd/arizona/pdata.h |  3 +++
 2 files changed, 48 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index ba9525ee4706..de141f72c8e5 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -56,7 +56,7 @@ struct arizona_extcon_info {
 	bool hpdet_active;
 
 	int num_hpdet_res;
-	unsigned int hpdet_res[2];
+	unsigned int hpdet_res[3];
 
 	bool mic;
 	bool detecting;
@@ -313,6 +313,7 @@ static int arizona_hpdet_read(struct arizona_extcon_info *info)
 static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading)
 {
 	struct arizona *arizona = info->arizona;
+	int id_gpio = arizona->pdata.hpdet_id_gpio;
 	int ret;
 
 	/*
@@ -338,9 +339,27 @@ static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading)
 					   ARIZONA_ACCESSORY_DETECT_MODE_1,
 					   ARIZONA_ACCDET_SRC,
 					   ~info->micd_modes[0].src);
+
 			regmap_update_bits(arizona->regmap,
 					   ARIZONA_HEADPHONE_DETECT_1,
-					   ARIZONA_HP_POLL, 0);
+					   ARIZONA_HP_POLL, ARIZONA_HP_POLL);
+			return -EAGAIN;
+		}
+
+		/* Only check the mic directly if we didn't already ID it */
+		if (id_gpio && info->num_hpdet_res == 2 &&
+		    !((info->hpdet_res[0] > info->hpdet_res[1] * 2))) {
+			dev_dbg(arizona->dev, "Measuring mic\n");
+
+			regmap_update_bits(arizona->regmap,
+					   ARIZONA_ACCESSORY_DETECT_MODE_1,
+					   ARIZONA_ACCDET_MODE_MASK |
+					   ARIZONA_ACCDET_SRC,
+					   ARIZONA_ACCDET_MODE_HPR |
+					   info->micd_modes[0].src);
+
+			gpio_set_value_cansleep(id_gpio, 1);
+
 			regmap_update_bits(arizona->regmap,
 					   ARIZONA_HEADPHONE_DETECT_1,
 					   ARIZONA_HP_POLL, ARIZONA_HP_POLL);
@@ -348,10 +367,16 @@ static int arizona_hpdet_do_id(struct arizona_extcon_info *info, int *reading)
 		}
 
 		/* OK, got both.  Now, compare... */
-		dev_dbg(arizona->dev, "HPDET measured %d %d\n",
-			info->hpdet_res[0], info->hpdet_res[1]);
+		dev_dbg(arizona->dev, "HPDET measured %d %d %d\n",
+			info->hpdet_res[0], info->hpdet_res[1],
+			info->hpdet_res[2]);
 
-		if (info->hpdet_res[0] > info->hpdet_res[1] * 2) {
+		/*
+		 * Either the two grounds measure differently or we
+		 * measure the mic as high impedance.
+		 */
+		if ((info->hpdet_res[0] > info->hpdet_res[1] * 2) ||
+		    (id_gpio && info->hpdet_res[2] > 10)) {
 			dev_dbg(arizona->dev, "Detected mic\n");
 			info->mic = true;
 			ret = extcon_set_cable_state_(&info->edev,
@@ -382,6 +407,7 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data)
 {
 	struct arizona_extcon_info *info = data;
 	struct arizona *arizona = info->arizona;
+	int id_gpio = arizona->pdata.hpdet_id_gpio;
 	int report = ARIZONA_CABLE_HEADPHONE;
 	int ret, reading;
 
@@ -446,6 +472,8 @@ static irqreturn_t arizona_hpdet_irq(int irq, void *data)
 		dev_warn(arizona->dev, "Failed to undo magic: %d\n", ret);
 
 done:
+	if (id_gpio)
+		gpio_set_value_cansleep(id_gpio, 0);
 
 	/* Revert back to MICDET mode */
 	regmap_update_bits(arizona->regmap,
@@ -854,6 +882,18 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 		}
 	}
 
+	if (arizona->pdata.hpdet_id_gpio > 0) {
+		ret = devm_gpio_request_one(&pdev->dev,
+					    arizona->pdata.hpdet_id_gpio,
+					    GPIOF_OUT_INIT_LOW,
+					    "HPDET");
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to request GPIO%d: %d\n",
+				arizona->pdata.hpdet_id_gpio, ret);
+			goto err_register;
+		}
+	}
+
 	if (arizona->pdata.micd_bias_start_time)
 		regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1,
 				   ARIZONA_MICD_BIAS_STARTTIME_MASK,
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 7c0878778357..bcbe4fda87cb 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -102,6 +102,9 @@ struct arizona_pdata {
 	/** Use the headphone detect circuit to identify the accessory */
 	bool hpdet_acc_id;
 
+	/** GPIO used for mic isolation with HPDET */
+	int hpdet_id_gpio;
+
 	/** GPIO for mic detection polarity */
 	int micd_pol_gpio;
 
-- 
cgit v1.2.3


From 689557d3c7045320958d5bc4141088f7b4dff7ba Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 11 Jan 2013 08:55:57 +0900
Subject: mfd: wm5102: Add microphone clamp control registers

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/mfd/wm5102-tables.c           | 7 +++++++
 include/linux/mfd/arizona/registers.h | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 311cc3657b0b..a396a3ae2045 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -84,6 +84,12 @@ int wm5102_patch(struct arizona *arizona)
 }
 
 static const struct regmap_irq wm5102_aod_irqs[ARIZONA_NUM_IRQ] = {
+	[ARIZONA_IRQ_MICD_CLAMP_FALL] = {
+		.mask = ARIZONA_MICD_CLAMP_FALL_EINT1
+	},
+	[ARIZONA_IRQ_MICD_CLAMP_RISE] = {
+		.mask = ARIZONA_MICD_CLAMP_RISE_EINT1
+	},
 	[ARIZONA_IRQ_GP5_FALL] = { .mask = ARIZONA_GP5_FALL_EINT1 },
 	[ARIZONA_IRQ_GP5_RISE] = { .mask = ARIZONA_GP5_RISE_EINT1 },
 	[ARIZONA_IRQ_JD_FALL] = { .mask = ARIZONA_JD1_FALL_EINT1 },
@@ -312,6 +318,7 @@ static const struct reg_default wm5102_reg_default[] = {
 	{ 0x0000021A, 0x01A6 },   /* R538   - Mic Bias Ctrl 3 */ 
 	{ 0x00000293, 0x0000 },   /* R659   - Accessory Detect Mode 1 */ 
 	{ 0x0000029B, 0x0020 },   /* R667   - Headphone Detect 1 */ 
+	{ 0x000002A2, 0x0000 },   /* R674   - Micd clamp control */
 	{ 0x000002A3, 0x1102 },   /* R675   - Mic Detect 1 */ 
 	{ 0x000002A4, 0x009F },   /* R676   - Mic Detect 2 */ 
 	{ 0x000002A5, 0x0000 },   /* R677   - Mic Detect 3 */ 
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index e9150533e701..79e9dd4073d8 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -1196,6 +1196,14 @@
 /*
  * R64 (0x40) - Wake control
  */
+#define ARIZONA_WKUP_MICD_CLAMP_FALL             0x0080  /* WKUP_MICD_CLAMP_FALL */
+#define ARIZONA_WKUP_MICD_CLAMP_FALL_MASK        0x0080  /* WKUP_MICD_CLAMP_FALL */
+#define ARIZONA_WKUP_MICD_CLAMP_FALL_SHIFT            7  /* WKUP_MICD_CLAMP_FALL */
+#define ARIZONA_WKUP_MICD_CLAMP_FALL_WIDTH            1  /* WKUP_MICD_CLAMP_FALL */
+#define ARIZONA_WKUP_MICD_CLAMP_RISE             0x0040  /* WKUP_MICD_CLAMP_RISE */
+#define ARIZONA_WKUP_MICD_CLAMP_RISE_MASK        0x0040  /* WKUP_MICD_CLAMP_RISE */
+#define ARIZONA_WKUP_MICD_CLAMP_RISE_SHIFT            6  /* WKUP_MICD_CLAMP_RISE */
+#define ARIZONA_WKUP_MICD_CLAMP_RISE_WIDTH            1  /* WKUP_MICD_CLAMP_RISE */
 #define ARIZONA_WKUP_GP5_FALL                    0x0020  /* WKUP_GP5_FALL */
 #define ARIZONA_WKUP_GP5_FALL_MASK               0x0020  /* WKUP_GP5_FALL */
 #define ARIZONA_WKUP_GP5_FALL_SHIFT                   5  /* WKUP_GP5_FALL */
-- 
cgit v1.2.3


From 8aef33a7cf40ca9da188e8578b2abe7267a38c52 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 15 Jan 2013 14:18:04 +0100
Subject: cpuidle: remove the power_specified field in the driver

We realized that the power usage field is never filled and when it
is filled for tegra, the power_specified flag is not set causing all
of these values to be reset when the driver is initialized with
set_power_state().

However, the power_specified flag can be simply removed under the
assumption that the states are always backward sorted, which is the
case with the current code.

This change allows the menu governor select function and the
cpuidle_play_dead() to be simplified.  Moreover, the
set_power_states() function can removed as it does not make sense
any more.

Drop the power_specified flag from struct cpuidle_driver and make
the related changes as described above.

As a consequence, this also fixes the bug where on the dynamic
C-states system, the power fields are not initialized.

[rjw: Changelog]
References: https://bugzilla.kernel.org/show_bug.cgi?id=42870
References: https://bugzilla.kernel.org/show_bug.cgi?id=43349
References: https://lkml.org/lkml/2012/10/16/518
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c        | 17 ++++-------------
 drivers/cpuidle/driver.c         | 25 -------------------------
 drivers/cpuidle/governors/menu.c |  8 ++------
 include/linux/cpuidle.h          |  2 +-
 4 files changed, 7 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index fb4a7dd57f94..e1f6860e069c 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -69,24 +69,15 @@ int cpuidle_play_dead(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
-	int i, dead_state = -1;
-	int power_usage = INT_MAX;
+	int i;
 
 	if (!drv)
 		return -ENODEV;
 
 	/* Find lowest-power state that supports long-term idle */
-	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
-		struct cpuidle_state *s = &drv->states[i];
-
-		if (s->power_usage < power_usage && s->enter_dead) {
-			power_usage = s->power_usage;
-			dead_state = i;
-		}
-	}
-
-	if (dead_state != -1)
-		return drv->states[dead_state].enter_dead(dev, dead_state);
+	for (i = drv->state_count - 1; i >= CPUIDLE_DRIVER_STATE_START; i--)
+		if (drv->states[i].enter_dead)
+			return drv->states[i].enter_dead(dev, i);
 
 	return -ENODEV;
 }
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index c2b281afe0ed..422c7b69ba7c 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -19,34 +19,9 @@ DEFINE_SPINLOCK(cpuidle_driver_lock);
 static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu);
 static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu);
 
-static void set_power_states(struct cpuidle_driver *drv)
-{
-	int i;
-
-	/*
-	 * cpuidle driver should set the drv->power_specified bit
-	 * before registering if the driver provides
-	 * power_usage numbers.
-	 *
-	 * If power_specified is not set,
-	 * we fill in power_usage with decreasing values as the
-	 * cpuidle code has an implicit assumption that state Cn
-	 * uses less power than C(n-1).
-	 *
-	 * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned
-	 * an power value of -1.  So we use -2, -3, etc, for other
-	 * c-states.
-	 */
-	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++)
-		drv->states[i].power_usage = -1 - i;
-}
-
 static void __cpuidle_driver_init(struct cpuidle_driver *drv)
 {
 	drv->refcnt = 0;
-
-	if (!drv->power_specified)
-		set_power_states(drv);
 }
 
 static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu)
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 20ea33afdda1..fe343a06b7da 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -312,7 +312,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
-	int power_usage = INT_MAX;
 	int i;
 	int multiplier;
 	struct timespec t;
@@ -383,11 +382,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		if (s->exit_latency * multiplier > data->predicted_us)
 			continue;
 
-		if (s->power_usage < power_usage) {
-			power_usage = s->power_usage;
-			data->last_state_idx = i;
-			data->exit_us = s->exit_latency;
-		}
+		data->last_state_idx = i;
+		data->exit_us = s->exit_latency;
 	}
 
 	/* not deepest C-state chosen for low predicted residency */
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3711b34dc4f9..24cd1037b6d6 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -126,9 +126,9 @@ struct cpuidle_driver {
 	struct module 		*owner;
 	int                     refcnt;
 
-	unsigned int		power_specified:1;
 	/* set to 1 to use the core cpuidle time keeping (for all states). */
 	unsigned int		en_core_tk_irqen:1;
+	/* states array must be ordered in decreasing power consumption */
 	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
 	int			state_count;
 	int			safe_state_index;
-- 
cgit v1.2.3


From 41d16512ebea6938b95c5e4f2ae008914e91abc1 Mon Sep 17 00:00:00 2001
From: Tony Prisk <linux@prisktech.co.nz>
Date: Tue, 15 Jan 2013 19:50:49 +1300
Subject: timer: vt8500: Convert vt8500 to use CLKSRC_OF

This patch converts arch-vt8500 to make use of CLKSRC_OF. Doing so
removes the need for include/linux/vt8500_timer.h as vt8500_timer_init
no longer needs to be visible outside vt8500_timer.c

Signed-off-by: Tony Prisk <linux@prisktech.co.nz>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-vt8500/Kconfig       |  1 +
 arch/arm/mach-vt8500/vt8500.c      |  4 ++--
 drivers/clocksource/vt8500_timer.c |  3 ++-
 include/linux/vt8500_timer.h       | 22 ----------------------
 4 files changed, 5 insertions(+), 25 deletions(-)
 delete mode 100644 include/linux/vt8500_timer.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-vt8500/Kconfig b/arch/arm/mach-vt8500/Kconfig
index 570a801fb862..d1cbda66dc9c 100644
--- a/arch/arm/mach-vt8500/Kconfig
+++ b/arch/arm/mach-vt8500/Kconfig
@@ -4,6 +4,7 @@ config ARCH_VT8500
 	select ARCH_HAS_CPUFREQ
 	select ARCH_REQUIRE_GPIOLIB
 	select CLKDEV_LOOKUP
+	select CLKSRC_OF
 	select CPU_ARM926T
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_GPIO
diff --git a/arch/arm/mach-vt8500/vt8500.c b/arch/arm/mach-vt8500/vt8500.c
index b9fd9d3cbfb3..fe99b709f11f 100644
--- a/arch/arm/mach-vt8500/vt8500.c
+++ b/arch/arm/mach-vt8500/vt8500.c
@@ -18,9 +18,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/clocksource.h>
 #include <linux/io.h>
 #include <linux/pm.h>
-#include <linux/vt8500_timer.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -186,8 +186,8 @@ DT_MACHINE_START(WMT_DT, "VIA/Wondermedia SoC (Device Tree Support)")
 	.dt_compat	= vt8500_dt_compat,
 	.map_io		= vt8500_map_io,
 	.init_irq	= vt8500_init_irq,
-	.init_time	= vt8500_timer_init,
 	.init_machine	= vt8500_init,
+	.init_time	= clocksource_of_init,
 	.restart	= vt8500_restart,
 	.handle_irq	= vt8500_handle_irq,
 MACHINE_END
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
index 3dd21a47881f..b75b8e31dab4 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/vt8500_timer.c
@@ -134,7 +134,7 @@ static struct of_device_id vt8500_timer_ids[] = {
 	{ }
 };
 
-void __init vt8500_timer_init(void)
+static void __init vt8500_timer_init(void)
 {
 	struct device_node *np;
 	int timer_irq;
@@ -182,3 +182,4 @@ void __init vt8500_timer_init(void)
 	clockevents_register_device(&clockevent);
 }
 
+CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init)
diff --git a/include/linux/vt8500_timer.h b/include/linux/vt8500_timer.h
deleted file mode 100644
index 33b0ee87d083..000000000000
--- a/include/linux/vt8500_timer.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2012 Tony Prisk <linux@prisktech.co.nz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __VT8500_TIMER_H
-#define __VT8500_TIMER_H
-
-#include <asm/mach/time.h>
-
-void vt8500_timer_init(void);
-
-#endif
-- 
cgit v1.2.3


From 3988a4df3499e604a3f2ae979372d27fc5664f77 Mon Sep 17 00:00:00 2001
From: Johan Bjornstedt <johan.bjornstedt@stericsson.com>
Date: Fri, 11 Jan 2013 13:12:50 +0000
Subject: ab8500_bm: Skip first CCEOC irq for instant current

When enabling the CCEOC irq we might get false interrupt
from ab8500-driver due to the latched value will be saved
and interpreted as an IRQ when enabled

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Johan Bjornstedt <johan.bjornstedt@stericsson.com>
Signed-off-by: Henrik Solver <henrik.solver@stericsson.com>
Reviewed-by: Karl KOMIEROWSKI <karl.komierowski@stericsson.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/ab8500_btemp.c         | 11 +++----
 drivers/power/ab8500_fg.c            | 63 +++++++++++++++++++++++++++++++-----
 include/linux/mfd/abx500/ab8500-bm.h |  7 ++++
 3 files changed, 66 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index e1d28039ce7b..4a570b6c9e47 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -374,13 +374,10 @@ static int ab8500_btemp_get_batctrl_res(struct ab8500_btemp *di)
 		return ret;
 	}
 
-	/*
-	 * Since there is no interrupt when current measurement is done,
-	 * loop for over 250ms (250ms is one sample conversion time
-	 * with 32.768 Khz RTC clock). Note that a stop time must be set
-	 * since the ab8500_btemp_read_batctrl_voltage call can block and
-	 * take an unknown amount of time to complete.
-	 */
+	do {
+		msleep(20);
+	} while (!ab8500_fg_inst_curr_started(di->fg));
+
 	i = 0;
 
 	do {
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index 3d05c73813c8..0f40c968286c 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -160,6 +160,7 @@ struct inst_curr_result_list {
  * @recovery_cnt:	Counter for recovery mode
  * @high_curr_cnt:	Counter for high current mode
  * @init_cnt:		Counter for init mode
+ * @nbr_cceoc_irq_cnt	Counter for number of CCEOC irqs received since enabled
  * @recovery_needed:	Indicate if recovery is needed
  * @high_curr_mode:	Indicate if we're in high current mode
  * @init_capacity:	Indicate if initial capacity measuring should be done
@@ -167,6 +168,7 @@ struct inst_curr_result_list {
  * @calib_state		State during offset calibration
  * @discharge_state:	Current discharge state
  * @charge_state:	Current charge state
+ * @ab8500_fg_started	Completion struct used for the instant current start
  * @ab8500_fg_complete	Completion struct used for the instant current reading
  * @flags:		Structure for information about events triggered
  * @bat_cap:		Structure for battery capacity specific parameters
@@ -199,6 +201,7 @@ struct ab8500_fg {
 	int recovery_cnt;
 	int high_curr_cnt;
 	int init_cnt;
+	int nbr_cceoc_irq_cnt;
 	bool recovery_needed;
 	bool high_curr_mode;
 	bool init_capacity;
@@ -206,6 +209,7 @@ struct ab8500_fg {
 	enum ab8500_fg_calibration_state calib_state;
 	enum ab8500_fg_discharge_state discharge_state;
 	enum ab8500_fg_charge_state charge_state;
+	struct completion ab8500_fg_started;
 	struct completion ab8500_fg_complete;
 	struct ab8500_fg_flags flags;
 	struct ab8500_fg_battery_capacity bat_cap;
@@ -524,13 +528,14 @@ cc_err:
  * Note: This is part "one" and has to be called before
  * ab8500_fg_inst_curr_finalize()
  */
- int ab8500_fg_inst_curr_start(struct ab8500_fg *di)
+int ab8500_fg_inst_curr_start(struct ab8500_fg *di)
 {
 	u8 reg_val;
 	int ret;
 
 	mutex_lock(&di->cc_lock);
 
+	di->nbr_cceoc_irq_cnt = 0;
 	ret = abx500_get_register_interruptible(di->dev, AB8500_RTC,
 		AB8500_RTC_CC_CONF_REG, &reg_val);
 	if (ret < 0)
@@ -558,6 +563,7 @@ cc_err:
 	}
 
 	/* Return and WFI */
+	INIT_COMPLETION(di->ab8500_fg_started);
 	INIT_COMPLETION(di->ab8500_fg_complete);
 	enable_irq(di->irq);
 
@@ -568,6 +574,17 @@ fail:
 	return ret;
 }
 
+/**
+ * ab8500_fg_inst_curr_started() - check if fg conversion has started
+ * @di:         pointer to the ab8500_fg structure
+ *
+ * Returns 1 if conversion started, 0 if still waiting
+ */
+int ab8500_fg_inst_curr_started(struct ab8500_fg *di)
+{
+	return completion_done(&di->ab8500_fg_started);
+}
+
 /**
  * ab8500_fg_inst_curr_done() - check if fg conversion is done
  * @di:         pointer to the ab8500_fg structure
@@ -596,13 +613,15 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res)
 	int timeout;
 
 	if (!completion_done(&di->ab8500_fg_complete)) {
-		timeout = wait_for_completion_timeout(&di->ab8500_fg_complete,
+		timeout = wait_for_completion_timeout(
+			&di->ab8500_fg_complete,
 			INS_CURR_TIMEOUT);
 		dev_dbg(di->dev, "Finalize time: %d ms\n",
 			((INS_CURR_TIMEOUT - timeout) * 1000) / HZ);
 		if (!timeout) {
 			ret = -ETIME;
 			disable_irq(di->irq);
+			di->nbr_cceoc_irq_cnt = 0;
 			dev_err(di->dev, "completion timed out [%d]\n",
 				__LINE__);
 			goto fail;
@@ -610,6 +629,7 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res)
 	}
 
 	disable_irq(di->irq);
+	di->nbr_cceoc_irq_cnt = 0;
 
 	ret = abx500_mask_and_set_register_interruptible(di->dev,
 			AB8500_GAS_GAUGE, AB8500_GASG_CC_CTRL_REG,
@@ -684,6 +704,7 @@ fail:
 int ab8500_fg_inst_curr_blocking(struct ab8500_fg *di)
 {
 	int ret;
+	int timeout;
 	int res = 0;
 
 	ret = ab8500_fg_inst_curr_start(di);
@@ -692,13 +713,32 @@ int ab8500_fg_inst_curr_blocking(struct ab8500_fg *di)
 		return 0;
 	}
 
+	/* Wait for CC to actually start */
+	if (!completion_done(&di->ab8500_fg_started)) {
+		timeout = wait_for_completion_timeout(
+			&di->ab8500_fg_started,
+			INS_CURR_TIMEOUT);
+		dev_dbg(di->dev, "Start time: %d ms\n",
+			((INS_CURR_TIMEOUT - timeout) * 1000) / HZ);
+		if (!timeout) {
+			ret = -ETIME;
+			dev_err(di->dev, "completion timed out [%d]\n",
+				__LINE__);
+			goto fail;
+		}
+	}
+
 	ret = ab8500_fg_inst_curr_finalize(di, &res);
 	if (ret) {
 		dev_err(di->dev, "Failed to finalize fg_inst\n");
 		return 0;
 	}
 
+	dev_dbg(di->dev, "%s instant current: %d", __func__, res);
 	return res;
+fail:
+	mutex_unlock(&di->cc_lock);
+	return ret;
 }
 
 /**
@@ -1523,8 +1563,6 @@ static void ab8500_fg_algorithm_discharging(struct ab8500_fg *di)
 
 	case AB8500_FG_DISCHARGE_WAKEUP:
 		ab8500_fg_coulomb_counter(di, true);
-		di->inst_curr = ab8500_fg_inst_curr_blocking(di);
-
 		ab8500_fg_calc_cap_discharge_voltage(di, true);
 
 		di->fg_samples = SEC_TO_SAMPLE(
@@ -1641,8 +1679,6 @@ static void ab8500_fg_periodic_work(struct work_struct *work)
 		fg_periodic_work.work);
 
 	if (di->init_capacity) {
-		/* A dummy read that will return 0 */
-		di->inst_curr = ab8500_fg_inst_curr_blocking(di);
 		/* Get an initial capacity calculation */
 		ab8500_fg_calc_cap_discharge_voltage(di, true);
 		ab8500_fg_check_capacity_limits(di, true);
@@ -1828,7 +1864,13 @@ static void ab8500_fg_instant_work(struct work_struct *work)
 static irqreturn_t ab8500_fg_cc_data_end_handler(int irq, void *_di)
 {
 	struct ab8500_fg *di = _di;
-	complete(&di->ab8500_fg_complete);
+	if (!di->nbr_cceoc_irq_cnt) {
+		di->nbr_cceoc_irq_cnt++;
+		complete(&di->ab8500_fg_started);
+	} else {
+		di->nbr_cceoc_irq_cnt = 0;
+		complete(&di->ab8500_fg_complete);
+	}
 	return IRQ_HANDLED;
 }
 
@@ -2551,7 +2593,11 @@ static int ab8500_fg_probe(struct platform_device *pdev)
 	di->fg_samples = SEC_TO_SAMPLE(di->bm->fg_params->init_timer);
 	ab8500_fg_coulomb_counter(di, true);
 
-	/* Initialize completion used to notify completion of inst current */
+	/*
+	 * Initialize completion used to notify completion and start
+	 * of inst current
+	 */
+	init_completion(&di->ab8500_fg_started);
 	init_completion(&di->ab8500_fg_complete);
 
 	/* Register interrupts */
@@ -2571,6 +2617,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
 	}
 	di->irq = platform_get_irq_byname(pdev, "CCEOC");
 	disable_irq(di->irq);
+	di->nbr_cceoc_irq_cnt = 0;
 
 	platform_set_drvdata(pdev, di);
 
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index 44310c98ee6e..6c6a02e53cd9 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -431,11 +431,18 @@ struct ab8500_fg *ab8500_fg_get(void);
 int ab8500_fg_inst_curr_blocking(struct ab8500_fg *dev);
 int ab8500_fg_inst_curr_start(struct ab8500_fg *di);
 int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res);
+int ab8500_fg_inst_curr_started(struct ab8500_fg *di);
 int ab8500_fg_inst_curr_done(struct ab8500_fg *di);
 
 #else
+int ab8500_fg_inst_curr_started(struct ab8500_fg *di)
+{
+	return 0;
+}
+
 int ab8500_fg_inst_curr_done(struct ab8500_fg *di)
 {
+	return 0;
 }
 static void ab8500_fg_reinit(void)
 {
-- 
cgit v1.2.3


From ea4024017831d61874351defe8f8c58ae73f8009 Mon Sep 17 00:00:00 2001
From: Marcus Cooper <marcus.xm.cooper@stericsson.com>
Date: Fri, 11 Jan 2013 13:12:54 +0000
Subject: ab8500_bm: Recharge condition not optimal for battery

Today the battery recharge is determined with a voltage threshold. This
voltage threshold is only valid when the battery is relaxed. In charging
algorithm the voltage read is the loaded battery voltage and no
compensation is done to get the relaxed voltage. When maintenance
charging is not selected, this makes the recharging condition to almost
immediately activate when there is a discharge present on the battery.

Depending on which vendor the battery comes from this behavior can wear
out the battery much faster than normal.

The fuelgauge driver is responsible to monitor the actual battery
capacity and is able to estimate the remaining capacity. It is better to
use the remaining capacity as a limit to determine when battery should
be recharged.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Marcus Cooper <marcus.xm.cooper@stericsson.com>
Reviewed-by: Hakan BERG <hakan.berg@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/ab8500_bmdata.c        |  19 ++---
 drivers/power/ab8500_fg.c            | 146 ++++++++++++++++++++++++++++++++---
 drivers/power/abx500_chargalg.c      |  30 +++----
 include/linux/mfd/abx500.h           |   6 +-
 include/linux/mfd/abx500/ab8500-bm.h |   2 +
 5 files changed, 168 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_bmdata.c b/drivers/power/ab8500_bmdata.c
index 0bf52369d6a5..20c157b4fe99 100644
--- a/drivers/power/ab8500_bmdata.c
+++ b/drivers/power/ab8500_bmdata.c
@@ -192,7 +192,7 @@ static struct abx500_battery_type bat_type_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4050,
 		.termination_curr = 200,
-		.recharge_vol = 3990,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 400,
 		.normal_vol_lvl = 4100,
 		.maint_a_cur_lvl = 400,
@@ -219,7 +219,7 @@ static struct abx500_battery_type bat_type_thermistor[] = {
 		.nominal_voltage = 3600,
 		.termination_vol = 4150,
 		.termination_curr = 80,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -247,7 +247,7 @@ static struct abx500_battery_type bat_type_thermistor[] = {
 		.nominal_voltage = 3600,
 		.termination_vol = 4150,
 		.termination_curr = 80,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -278,7 +278,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4050,
 		.termination_curr = 200,
-		.recharge_vol = 3990,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 400,
 		.normal_vol_lvl = 4100,
 		.maint_a_cur_lvl = 400,
@@ -310,7 +310,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4150,
 		.termination_curr = 100,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -337,7 +337,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4150,
 		.termination_curr = 100,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -364,7 +364,7 @@ static struct abx500_battery_type bat_type_ext_thermistor[] = {
 		.nominal_voltage = 3700,
 		.termination_vol = 4150,
 		.termination_curr = 100,
-		.recharge_vol = 4130,
+		.recharge_cap = 95,
 		.normal_cur_lvl = 700,
 		.normal_vol_lvl = 4200,
 		.maint_a_cur_lvl = 600,
@@ -405,8 +405,8 @@ static const struct abx500_fg_parameters fg = {
 	.lowbat_threshold = 3100,
 	.battok_falling_th_sel0 = 2860,
 	.battok_raising_th_sel1 = 2860,
+	.maint_thres = 95,
 	.user_cap_limit = 15,
-	.maint_thres = 97,
 };
 
 static const struct abx500_maxim_parameters maxi_params = {
@@ -435,6 +435,7 @@ struct abx500_bm_data ab8500_bm_data = {
 	.bkup_bat_v             = BUP_VCH_SEL_2P6V,
 	.bkup_bat_i             = BUP_ICH_SEL_150UA,
 	.no_maintenance         = false,
+	.capacity_scaling       = false,
 	.adc_therm              = ABx500_ADC_THERM_BATCTRL,
 	.chg_unknown_bat        = false,
 	.enable_overshoot       = false,
@@ -479,7 +480,7 @@ int __devinit ab8500_bm_of_probe(struct device *dev,
 		bm->chg_unknown_bat = true;
 		bm->bat_type[BATTERY_UNKNOWN].charge_full_design = 2600;
 		bm->bat_type[BATTERY_UNKNOWN].termination_vol    = 4150;
-		bm->bat_type[BATTERY_UNKNOWN].recharge_vol       = 4130;
+		bm->bat_type[BATTERY_UNKNOWN].recharge_cap       = 95;
 		bm->bat_type[BATTERY_UNKNOWN].normal_cur_lvl     = 520;
 		bm->bat_type[BATTERY_UNKNOWN].normal_vol_lvl     = 4200;
 	}
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index f3dbba8b3925..76fab6b5c541 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -113,6 +113,13 @@ struct ab8500_fg_avg_cap {
 	int sum;
 };
 
+struct ab8500_fg_cap_scaling {
+	bool enable;
+	int cap_to_scale[2];
+	int disable_cap_level;
+	int scaled_cap;
+};
+
 struct ab8500_fg_battery_capacity {
 	int max_mah_design;
 	int max_mah;
@@ -123,6 +130,7 @@ struct ab8500_fg_battery_capacity {
 	int prev_percent;
 	int prev_level;
 	int user_mah;
+	struct ab8500_fg_cap_scaling cap_scale;
 };
 
 struct ab8500_fg_flags {
@@ -1166,6 +1174,99 @@ static int ab8500_fg_capacity_level(struct ab8500_fg *di)
 	return ret;
 }
 
+/**
+ * ab8500_fg_calculate_scaled_capacity() - Capacity scaling
+ * @di:		pointer to the ab8500_fg structure
+ *
+ * Calculates the capacity to be shown to upper layers. Scales the capacity
+ * to have 100% as a reference from the actual capacity upon removal of charger
+ * when charging is in maintenance mode.
+ */
+static int ab8500_fg_calculate_scaled_capacity(struct ab8500_fg *di)
+{
+	struct ab8500_fg_cap_scaling *cs = &di->bat_cap.cap_scale;
+	int capacity = di->bat_cap.prev_percent;
+
+	if (!cs->enable)
+		return capacity;
+
+	/*
+	 * As long as we are in fully charge mode scale the capacity
+	 * to show 100%.
+	 */
+	if (di->flags.fully_charged) {
+		cs->cap_to_scale[0] = 100;
+		cs->cap_to_scale[1] =
+			max(capacity, di->bm->fg_params->maint_thres);
+		dev_dbg(di->dev, "Scale cap with %d/%d\n",
+			 cs->cap_to_scale[0], cs->cap_to_scale[1]);
+	}
+
+	/* Calculates the scaled capacity. */
+	if ((cs->cap_to_scale[0] != cs->cap_to_scale[1])
+					&& (cs->cap_to_scale[1] > 0))
+		capacity = min(100,
+				 DIV_ROUND_CLOSEST(di->bat_cap.prev_percent *
+						 cs->cap_to_scale[0],
+						 cs->cap_to_scale[1]));
+
+	if (di->flags.charging) {
+		if (capacity < cs->disable_cap_level) {
+			cs->disable_cap_level = capacity;
+			dev_dbg(di->dev, "Cap to stop scale lowered %d%%\n",
+				cs->disable_cap_level);
+		} else if (!di->flags.fully_charged) {
+			if (di->bat_cap.prev_percent >=
+			    cs->disable_cap_level) {
+				dev_dbg(di->dev, "Disabling scaled capacity\n");
+				cs->enable = false;
+				capacity = di->bat_cap.prev_percent;
+			} else {
+				dev_dbg(di->dev,
+					"Waiting in cap to level %d%%\n",
+					cs->disable_cap_level);
+				capacity = cs->disable_cap_level;
+			}
+		}
+	}
+
+	return capacity;
+}
+
+/**
+ * ab8500_fg_update_cap_scalers() - Capacity scaling
+ * @di:		pointer to the ab8500_fg structure
+ *
+ * To be called when state change from charge<->discharge to update
+ * the capacity scalers.
+ */
+static void ab8500_fg_update_cap_scalers(struct ab8500_fg *di)
+{
+	struct ab8500_fg_cap_scaling *cs = &di->bat_cap.cap_scale;
+
+	if (!cs->enable)
+		return;
+	if (di->flags.charging) {
+		di->bat_cap.cap_scale.disable_cap_level =
+			di->bat_cap.cap_scale.scaled_cap;
+		dev_dbg(di->dev, "Cap to stop scale at charge %d%%\n",
+				di->bat_cap.cap_scale.disable_cap_level);
+	} else {
+		if (cs->scaled_cap != 100) {
+			cs->cap_to_scale[0] = cs->scaled_cap;
+			cs->cap_to_scale[1] = di->bat_cap.prev_percent;
+		} else {
+			cs->cap_to_scale[0] = 100;
+			cs->cap_to_scale[1] =
+				max(di->bat_cap.prev_percent,
+				    di->bm->fg_params->maint_thres);
+		}
+
+		dev_dbg(di->dev, "Cap to scale at discharge %d/%d\n",
+				cs->cap_to_scale[0], cs->cap_to_scale[1]);
+	}
+}
+
 /**
  * ab8500_fg_check_capacity_limits() - Check if capacity has changed
  * @di:		pointer to the ab8500_fg structure
@@ -1214,16 +1315,24 @@ static void ab8500_fg_check_capacity_limits(struct ab8500_fg *di, bool init)
 	} else if (di->flags.fully_charged) {
 		/*
 		 * We report 100% if algorithm reported fully charged
-		 * unless capacity drops too much
+		 * and show 100% during maintenance charging (scaling).
 		 */
 		if (di->flags.force_full) {
 			di->bat_cap.prev_percent = di->bat_cap.permille / 10;
 			di->bat_cap.prev_mah = di->bat_cap.mah;
-		} else if (!di->flags.force_full &&
-			di->bat_cap.prev_percent !=
-			(di->bat_cap.permille) / 10 &&
-			(di->bat_cap.permille / 10) <
-			di->bm->fg_params->maint_thres) {
+
+			changed = true;
+
+			if (!di->bat_cap.cap_scale.enable &&
+						di->bm->capacity_scaling) {
+				di->bat_cap.cap_scale.enable = true;
+				di->bat_cap.cap_scale.cap_to_scale[0] = 100;
+				di->bat_cap.cap_scale.cap_to_scale[1] =
+						di->bat_cap.prev_percent;
+				di->bat_cap.cap_scale.disable_cap_level = 100;
+			}
+		} else if ( di->bat_cap.prev_percent !=
+			(di->bat_cap.permille) / 10) {
 			dev_dbg(di->dev,
 				"battery reported full "
 				"but capacity dropping: %d\n",
@@ -1272,6 +1381,14 @@ static void ab8500_fg_check_capacity_limits(struct ab8500_fg *di, bool init)
 	}
 
 	if (changed) {
+		if (di->bm->capacity_scaling) {
+			di->bat_cap.cap_scale.scaled_cap =
+				ab8500_fg_calculate_scaled_capacity(di);
+
+			dev_info(di->dev, "capacity=%d (%d)\n",
+				di->bat_cap.prev_percent,
+				di->bat_cap.cap_scale.scaled_cap);
+		}
 		power_supply_changed(&di->fg_psy);
 		if (di->flags.fully_charged && di->flags.force_full) {
 			dev_dbg(di->dev, "Battery full, notifying.\n");
@@ -1337,7 +1454,7 @@ static void ab8500_fg_algorithm_charging(struct ab8500_fg *di)
 		 * Read the FG and calculate the new capacity
 		 */
 		mutex_lock(&di->cc_lock);
-		if (!di->flags.conv_done) {
+		if (!di->flags.conv_done && !di->flags.force_full) {
 			/* Wasn't the CC IRQ that got us here */
 			mutex_unlock(&di->cc_lock);
 			dev_dbg(di->dev, "%s CC conv not done\n",
@@ -2027,7 +2144,9 @@ static int ab8500_fg_get_property(struct power_supply *psy,
 			val->intval = di->bat_cap.prev_mah;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
+		if (di->bm->capacity_scaling)
+			val->intval = di->bat_cap.cap_scale.scaled_cap;
+		else if (di->flags.batt_unknown && !di->bm->chg_unknown_bat &&
 				di->flags.batt_id_received)
 			val->intval = 100;
 		else
@@ -2091,6 +2210,8 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
 						break;
 					di->flags.charging = false;
 					di->flags.fully_charged = false;
+					if (di->bm->capacity_scaling)
+						ab8500_fg_update_cap_scalers(di);
 					queue_work(di->fg_wq, &di->fg_work);
 					break;
 				case POWER_SUPPLY_STATUS_FULL:
@@ -2103,10 +2224,13 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
 					queue_work(di->fg_wq, &di->fg_work);
 					break;
 				case POWER_SUPPLY_STATUS_CHARGING:
-					if (di->flags.charging)
+					if (di->flags.charging &&
+						!di->flags.fully_charged)
 						break;
 					di->flags.charging = true;
 					di->flags.fully_charged = false;
+					if (di->bm->capacity_scaling)
+						ab8500_fg_update_cap_scalers(di);
 					queue_work(di->fg_wq, &di->fg_work);
 					break;
 				};
@@ -2146,8 +2270,8 @@ static int ab8500_fg_get_ext_psy_data(struct device *dev, void *data)
 		case POWER_SUPPLY_PROP_TEMP:
 			switch (ext->type) {
 			case POWER_SUPPLY_TYPE_BATTERY:
-			    if (di->flags.batt_id_received)
-				di->bat_temp = ret.intval;
+				if (di->flags.batt_id_received)
+					di->bat_temp = ret.intval;
 				break;
 			default:
 				break;
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index 8b69da0ae5af..78b623572b52 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -33,9 +33,6 @@
 /* End-of-charge criteria counter */
 #define EOC_COND_CNT			10
 
-/* Recharge criteria counter */
-#define RCH_COND_CNT			3
-
 #define to_abx500_chargalg_device_info(x) container_of((x), \
 	struct abx500_chargalg, chargalg_psy);
 
@@ -196,7 +193,6 @@ enum maxim_ret {
  * @dev:		pointer to the structure device
  * @charge_status:	battery operating status
  * @eoc_cnt:		counter used to determine end-of_charge
- * @rch_cnt:		counter used to determine start of recharge
  * @maintenance_chg:	indicate if maintenance charge is active
  * @t_hyst_norm		temperature hysteresis when the temperature has been
  *			over or under normal limits
@@ -223,7 +219,6 @@ struct abx500_chargalg {
 	struct device *dev;
 	int charge_status;
 	int eoc_cnt;
-	int rch_cnt;
 	bool maintenance_chg;
 	int t_hyst_norm;
 	int t_hyst_lowhigh;
@@ -858,6 +853,7 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
 	union power_supply_propval ret;
 	int i, j;
 	bool psy_found = false;
+	bool capacity_updated = false;
 
 	psy = (struct power_supply *)data;
 	ext = dev_get_drvdata(dev);
@@ -870,6 +866,16 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
 	if (!psy_found)
 		return 0;
 
+	/*
+	 *  If external is not registering 'POWER_SUPPLY_PROP_CAPACITY' to its
+	 * property because of handling that sysfs entry on its own, this is
+	 * the place to get the battery capacity.
+	 */
+	if (!ext->get_property(ext, POWER_SUPPLY_PROP_CAPACITY, &ret)) {
+		di->batt_data.percent = ret.intval;
+		capacity_updated = true;
+	}
+
 	/* Go through all properties for the psy */
 	for (j = 0; j < ext->num_properties; j++) {
 		enum power_supply_property prop;
@@ -1154,7 +1160,8 @@ static int abx500_chargalg_get_ext_psy_data(struct device *dev, void *data)
 			}
 			break;
 		case POWER_SUPPLY_PROP_CAPACITY:
-			di->batt_data.percent = ret.intval;
+			if (!capacity_updated)
+				di->batt_data.percent = ret.intval;
 			break;
 		default:
 			break;
@@ -1424,16 +1431,13 @@ static void abx500_chargalg_algorithm(struct abx500_chargalg *di)
 	case STATE_WAIT_FOR_RECHARGE_INIT:
 		abx500_chargalg_hold_charging(di);
 		abx500_chargalg_state_to(di, STATE_WAIT_FOR_RECHARGE);
-		di->rch_cnt = RCH_COND_CNT;
 		/* Intentional fallthrough */
 
 	case STATE_WAIT_FOR_RECHARGE:
-		if (di->batt_data.volt <=
-			di->bm->bat_type[di->bm->batt_id].recharge_vol) {
-			if (di->rch_cnt-- == 0)
-				abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
-		} else
-			di->rch_cnt = RCH_COND_CNT;
+		if (di->batt_data.percent <=
+		    di->bm->bat_type[di->bm->batt_id].
+		    recharge_cap)
+			abx500_chargalg_state_to(di, STATE_NORMAL_INIT);
 		break;
 
 	case STATE_MAINTENANCE_A_INIT:
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 4906b1842d2f..0e6e90badfca 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -131,7 +131,7 @@ struct abx500_maxim_parameters {
  * @nominal_voltage:		Nominal voltage of the battery in mV
  * @termination_vol:		max voltage upto which battery can be charged
  * @termination_curr		battery charging termination current in mA
- * @recharge_vol		battery voltage limit that will trigger a new
+ * @recharge_cap		battery capacity limit that will trigger a new
  *				full charging cycle in the case where maintenan-
  *				-ce charging has been disabled
  * @normal_cur_lvl:		charger current in normal state in mA
@@ -160,7 +160,7 @@ struct abx500_battery_type {
 	int nominal_voltage;
 	int termination_vol;
 	int termination_curr;
-	int recharge_vol;
+	int recharge_cap;
 	int normal_cur_lvl;
 	int normal_vol_lvl;
 	int maint_a_cur_lvl;
@@ -224,6 +224,7 @@ struct abx500_bm_charger_parameters {
  * @bkup_bat_v		voltage which we charge the backup battery with
  * @bkup_bat_i		current which we charge the backup battery with
  * @no_maintenance	indicates that maintenance charging is disabled
+ * @capacity_scaling    indicates whether capacity scaling is to be used
  * @abx500_adc_therm	placement of thermistor, batctrl or battemp adc
  * @chg_unknown_bat	flag to enable charging of unknown batteries
  * @enable_overshoot	flag to enable VBAT overshoot control
@@ -254,6 +255,7 @@ struct abx500_bm_data {
 	int bkup_bat_v;
 	int bkup_bat_i;
 	bool no_maintenance;
+	bool capacity_scaling;
 	bool chg_unknown_bat;
 	bool enable_overshoot;
 	bool auto_trig;
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index 6c6a02e53cd9..fac684a6f346 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -355,6 +355,7 @@ struct ab8500_bm_charger_parameters {
  * @bkup_bat_v		voltage which we charge the backup battery with
  * @bkup_bat_i		current which we charge the backup battery with
  * @no_maintenance	indicates that maintenance charging is disabled
+ * @capacity_scaling    indicates whether capacity scaling is to be used
  * @adc_therm		placement of thermistor, batctrl or battemp adc
  * @chg_unknown_bat	flag to enable charging of unknown batteries
  * @enable_overshoot	flag to enable VBAT overshoot control
@@ -383,6 +384,7 @@ struct ab8500_bm_data {
 	int bkup_bat_v;
 	int bkup_bat_i;
 	bool no_maintenance;
+	bool capacity_scaling;
 	bool chg_unknown_bat;
 	bool enable_overshoot;
 	enum abx500_adc_therm adc_therm;
-- 
cgit v1.2.3


From d36e3e6d50ccdb5cdef6da0a01dedddd317f23fc Mon Sep 17 00:00:00 2001
From: Hakan Berg <hakan.berg@stericsson.com>
Date: Fri, 11 Jan 2013 13:12:57 +0000
Subject: ab8500_btemp: Ignore false btemp low interrupt

Ignore the low btemp interrupts for ab8500 3.0 and 3.3

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Hakan Berg <hakan.berg@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Tested-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/ab8500_btemp.c      | 8 ++++----
 include/linux/mfd/abx500/ab8500.h | 5 +++++
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index adebf6c6d146..b3f6467da7f3 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -622,9 +622,9 @@ static irqreturn_t ab8500_btemp_templow_handler(int irq, void *_di)
 {
 	struct ab8500_btemp *di = _di;
 
-	if (is_ab8500_2p0_or_earlier(di->parent)) {
+	if (is_ab8500_3p3_or_earlier(di->parent)) {
 		dev_dbg(di->dev, "Ignore false btemp low irq"
-			" for ABB cut 1.0, 1.1 and 2.0\n");
+			" for ABB cut 1.0, 1.1, 2.0 and 3.3\n");
 	} else {
 		dev_crit(di->dev, "Battery temperature lower than -10deg c\n");
 
@@ -738,10 +738,10 @@ static int ab8500_btemp_get_temp(struct ab8500_btemp *di)
 	int temp = 0;
 
 	/*
-	 * The BTEMP events are not reliabe on AB8500 cut2.0
+	 * The BTEMP events are not reliabe on AB8500 cut3.3
 	 * and prior versions
 	 */
-	if (is_ab8500_2p0_or_earlier(di->parent)) {
+	if (is_ab8500_3p3_or_earlier(di->parent)) {
 		temp = di->bat_temp * 10;
 	} else {
 		if (di->events.btemp_low) {
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 1cb5698b4d76..9dd9b99099df 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -335,6 +335,11 @@ static inline int is_ab8500_2p0_or_earlier(struct ab8500 *ab)
 	return (is_ab8500(ab) && (ab->chip_id <= AB8500_CUT2P0));
 }
 
+static inline int is_ab8500_3p3_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab8500(ab) && (ab->chip_id <= AB8500_CUT3P3));
+}
+
 /* exclude also ab8505, ab9540... */
 static inline int is_ab8500_2p0(struct ab8500 *ab)
 {
-- 
cgit v1.2.3


From 74a8e349b1c882e34419877207ae850ed87dddf7 Mon Sep 17 00:00:00 2001
From: Hakan Berg <hakan.berg@stericsson.com>
Date: Fri, 11 Jan 2013 13:12:58 +0000
Subject: ab8500_bm: Adds support for Car/Travel Adapters

The Travel and Carkit adapter should be handled directly by
the charger driver.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Marcus Cooper <marcus.xm.cooper@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/ab8500_charger.c       | 92 ++++++++++++++++++++++++++----------
 include/linux/mfd/abx500/ab8500-bm.h |  1 +
 2 files changed, 69 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 126b325cddc9..a0aeef2d2e05 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -108,6 +108,18 @@ enum ab8500_charger_link_status {
 	USB_STAT_HM_IDGND,
 	USB_STAT_RESERVED,
 	USB_STAT_NOT_VALID_LINK,
+	USB_STAT_PHY_EN,
+	USB_STAT_SUP_NO_IDGND_VBUS,
+	USB_STAT_SUP_IDGND_VBUS,
+	USB_STAT_CHARGER_LINE_1,
+	USB_STAT_CARKIT_1,
+	USB_STAT_CARKIT_2,
+	USB_STAT_ACA_DOCK_CHARGER,
+	USB_STAT_SAMSUNG_USB_PHY_DIS,
+	USB_STAT_SAMSUNG_USB_PHY_ENA,
+	USB_STAT_SAMSUNG_UART_PHY_DIS,
+	USB_STAT_SAMSUNG_UART_PHY_ENA,
+	USB_STAT_MOTOROLA_USB_PHY_ENA,
 };
 
 enum ab8500_usb_state {
@@ -586,7 +598,7 @@ static int ab8500_charger_detect_chargers(struct ab8500_charger *di)
  * Returns error code in case of failure else 0 on success
  */
 static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
-	enum ab8500_charger_link_status link_status)
+		enum ab8500_charger_link_status link_status)
 {
 	int ret = 0;
 
@@ -595,15 +607,19 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
 	case USB_STAT_STD_HOST_C_NS:
 	case USB_STAT_STD_HOST_C_S:
 		dev_dbg(di->dev, "USB Type - Standard host is "
-			"detected through USB driver\n");
+				"detected through USB driver\n");
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P09;
 		break;
 	case USB_STAT_HOST_CHG_HS_CHIRP:
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P5;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
 	case USB_STAT_HOST_CHG_HS:
 	case USB_STAT_ACA_RID_C_HS:
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P9;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
 	case USB_STAT_ACA_RID_A:
 		/*
@@ -611,6 +627,8 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
 		 * can consume (300mA). Closest level is 1100mA
 		 */
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_1P1;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
 	case USB_STAT_ACA_RID_B:
 		/*
@@ -618,34 +636,50 @@ static int ab8500_charger_max_usb_curr(struct ab8500_charger *di,
 		 * 100mA for potential accessory). Closest level is 1300mA
 		 */
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_1P3;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
-	case USB_STAT_DEDICATED_CHG:
 	case USB_STAT_HOST_CHG_NM:
-	case USB_STAT_ACA_RID_C_HS_CHIRP:
+	case USB_STAT_DEDICATED_CHG:
 	case USB_STAT_ACA_RID_C_NM:
+	case USB_STAT_ACA_RID_C_HS_CHIRP:
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_1P5;
-		break;
-	case USB_STAT_RESERVED:
-		/*
-		 * This state is used to indicate that VBUS has dropped below
-		 * the detection level 4 times in a row. This is due to the
-		 * charger output current is set to high making the charger
-		 * voltage collapse. This have to be propagated through to
-		 * chargalg. This is done using the property
-		 * POWER_SUPPLY_PROP_CURRENT_AVG = 1
-		 */
-		di->flags.vbus_collapse = true;
-		dev_dbg(di->dev, "USB Type - USB_STAT_RESERVED "
-			"VBUS has collapsed\n");
-		ret = -1;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
 		break;
 	case USB_STAT_HM_IDGND:
-	case USB_STAT_NOT_CONFIGURED:
 	case USB_STAT_NOT_VALID_LINK:
+	case USB_STAT_NOT_CONFIGURED:
 		dev_err(di->dev, "USB Type - Charging not allowed\n");
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P05;
 		ret = -ENXIO;
 		break;
+	case USB_STAT_RESERVED:
+		if (is_ab8500(di->parent)) {
+			di->flags.vbus_collapse = true;
+			dev_err(di->dev, "USB Type - USB_STAT_RESERVED "
+						"VBUS has collapsed\n");
+			ret = -ENXIO;
+			break;
+		}
+		if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+			dev_dbg(di->dev, "USB Type - Charging not allowed\n");
+			di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P05;
+			dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d",
+					link_status, di->max_usb_in_curr);
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case USB_STAT_CARKIT_1:
+	case USB_STAT_CARKIT_2:
+	case USB_STAT_ACA_DOCK_CHARGER:
+	case USB_STAT_CHARGER_LINE_1:
+		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P5;
+		dev_dbg(di->dev, "USB Type - 0x%02x MaxCurr: %d", link_status,
+				di->max_usb_in_curr);
+		break;
+
 	default:
 		dev_err(di->dev, "USB Type - Unknown\n");
 		di->max_usb_in_curr = USB_CH_IP_CUR_LVL_0P05;
@@ -677,8 +711,14 @@ static int ab8500_charger_read_usb_type(struct ab8500_charger *di)
 		dev_err(di->dev, "%s ab8500 read failed\n", __func__);
 		return ret;
 	}
-	ret = abx500_get_register_interruptible(di->dev, AB8500_USB,
-		AB8500_USB_LINE_STAT_REG, &val);
+	if (is_ab8500(di->parent)) {
+		ret = abx500_get_register_interruptible(di->dev, AB8500_USB,
+				AB8500_USB_LINE_STAT_REG, &val);
+	} else {
+		if (is_ab9540(di->parent) || is_ab8505(di->parent))
+			ret = abx500_get_register_interruptible(di->dev,
+				AB8500_USB, AB8500_USB_LINK1_STAT_REG, &val);
+	}
 	if (ret < 0) {
 		dev_err(di->dev, "%s ab8500 read failed\n", __func__);
 		return ret;
@@ -718,8 +758,13 @@ static int ab8500_charger_detect_usb_type(struct ab8500_charger *di)
 			dev_err(di->dev, "%s ab8500 read failed\n", __func__);
 			return ret;
 		}
-		ret = abx500_get_register_interruptible(di->dev, AB8500_USB,
-			AB8500_USB_LINE_STAT_REG, &val);
+
+		if (is_ab8500(di->parent))
+			ret = abx500_get_register_interruptible(di->dev,
+				AB8500_USB, AB8500_USB_LINE_STAT_REG, &val);
+		else
+			ret = abx500_get_register_interruptible(di->dev,
+				AB8500_USB, AB8500_USB_LINK1_STAT_REG, &val);
 		if (ret < 0) {
 			dev_err(di->dev, "%s ab8500 read failed\n", __func__);
 			return ret;
@@ -2944,7 +2989,6 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 	}
 
 	if (charger_status & USB_PW_CONN) {
-		dev_dbg(di->dev, "VBUS Detect during startup\n");
 		di->vbus_detected = true;
 		di->vbus_detected_start = true;
 		queue_work(di->charger_wq,
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index fac684a6f346..e2a1e6d84f72 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -23,6 +23,7 @@
  * Bank : 0x5
  */
 #define AB8500_USB_LINE_STAT_REG	0x80
+#define AB8500_USB_LINK1_STAT_REG	0x94
 
 /*
  * Charger / status register offfsets
-- 
cgit v1.2.3


From 1e817fb62cd185a2232ad4302579491805609489 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 19 Nov 2012 10:26:16 -0800
Subject: time: create __getnstimeofday for WARNless calls

The pstore RAM backend can get called during resume, and must be defensive
against a suspended time source. Expose getnstimeofday logic that returns
an error instead of a WARN. This can be detected and the timestamp can
be zeroed out.

Reported-by: Doug Anderson <dianders@chromium.org>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 fs/pstore/ram.c           | 10 +++++++---
 include/linux/time.h      |  1 +
 kernel/time/timekeeping.c | 29 ++++++++++++++++++++++++-----
 3 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 1a4f6da58eab..dacfe78aee7e 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -168,12 +168,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
 {
 	char *hdr;
-	struct timeval timestamp;
+	struct timespec timestamp;
 	size_t len;
 
-	do_gettimeofday(&timestamp);
+	/* Report zeroed timestamp if called before timekeeping has resumed. */
+	if (__getnstimeofday(&timestamp)) {
+		timestamp.tv_sec = 0;
+		timestamp.tv_nsec = 0;
+	}
 	hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
-		(long)timestamp.tv_sec, (long)timestamp.tv_usec);
+		(long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
 	WARN_ON_ONCE(!hdr);
 	len = hdr ? strlen(hdr) : 0;
 	persistent_ram_write(prz, hdr, len);
diff --git a/include/linux/time.h b/include/linux/time.h
index 4d358e9d10f1..0015aea4c4a7 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -158,6 +158,7 @@ extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
 extern unsigned int alarm_setitimer(unsigned int seconds);
 extern int do_getitimer(int which, struct itimerval *value);
+extern int __getnstimeofday(struct timespec *tv);
 extern void getnstimeofday(struct timespec *tv);
 extern void getrawmonotonic(struct timespec *ts);
 extern void getnstime_raw_and_real(struct timespec *ts_raw,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4c7de02eacdc..dfc7f87eb332 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -214,19 +214,18 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 }
 
 /**
- * getnstimeofday - Returns the time of day in a timespec
+ * __getnstimeofday - Returns the time of day in a timespec.
  * @ts:		pointer to the timespec to be set
  *
- * Returns the time of day in a timespec.
+ * Updates the time of day in the timespec.
+ * Returns 0 on success, or -ve when suspended (timespec will be undefined).
  */
-void getnstimeofday(struct timespec *ts)
+int __getnstimeofday(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long seq;
 	s64 nsecs = 0;
 
-	WARN_ON(timekeeping_suspended);
-
 	do {
 		seq = read_seqbegin(&tk->lock);
 
@@ -237,6 +236,26 @@ void getnstimeofday(struct timespec *ts)
 
 	ts->tv_nsec = 0;
 	timespec_add_ns(ts, nsecs);
+
+	/*
+	 * Do not bail out early, in case there were callers still using
+	 * the value, even in the face of the WARN_ON.
+	 */
+	if (unlikely(timekeeping_suspended))
+		return -EAGAIN;
+	return 0;
+}
+EXPORT_SYMBOL(__getnstimeofday);
+
+/**
+ * getnstimeofday - Returns the time of day in a timespec.
+ * @ts:		pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec (WARN if suspended).
+ */
+void getnstimeofday(struct timespec *ts)
+{
+	WARN_ON(__getnstimeofday(ts));
 }
 EXPORT_SYMBOL(getnstimeofday);
 
-- 
cgit v1.2.3


From 023f333a99cee9b5cd3268ff87298eb01a31f78e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Mon, 17 Dec 2012 14:30:53 -0700
Subject: NTP: Add a CONFIG_RTC_SYSTOHC configuration

The purpose of this option is to allow ARM/etc systems that rely on the
class RTC subsystem to have the same kind of automatic NTP based
synchronization that we have on PC platforms. Today ARM does not
implement update_persistent_clock and makes extensive use of the class
RTC system.

When enabled CONFIG_RTC_SYSTOHC will provide a generic
rtc_update_persistent_clock that stores the current time in the RTC and
is intended complement the existing CONFIG_RTC_HCTOSYS option that loads
the RTC at boot.

Like with RTC_HCTOSYS the platform's update_persistent_clock is used
first, if it works. Platforms with mixed class RTC and non-RTC drivers
need to return ENODEV when class RTC should be used. Such an update for
PPC is included in this patch.

Long term, implementations of update_persistent_clock should migrate to
proper class RTC drivers and use CONFIG_RTC_SYSTOHC instead.

Tested on ARM kirkwood and PPC405

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/powerpc/kernel/time.c |  2 +-
 drivers/rtc/Kconfig        | 10 +++++++++-
 drivers/rtc/Makefile       |  1 +
 drivers/rtc/systohc.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rtc.h        |  1 +
 kernel/time/ntp.c          | 16 ++++++++++++----
 6 files changed, 68 insertions(+), 6 deletions(-)
 create mode 100644 drivers/rtc/systohc.c

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ce4cb772dc78..bc844a857e05 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -667,7 +667,7 @@ int update_persistent_clock(struct timespec now)
 	struct rtc_time tm;
 
 	if (!ppc_md.set_rtc_time)
-		return 0;
+		return -ENODEV;
 
 	to_tm(now.tv_sec + 1 + timezone_offset, &tm);
 	tm.tm_year -= 1900;
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 19c03ab2bdcb..b377e9672497 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -25,9 +25,17 @@ config RTC_HCTOSYS
 	  the value read from a specified RTC device. This is useful to avoid
 	  unnecessary fsck runs at boot time, and to network better.
 
+config RTC_SYSTOHC
+	bool "Set the RTC time based on NTP synchronization"
+	default y
+	help
+	  If you say yes here, the system time (wall clock) will be stored
+	  in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
+	  minutes if userspace reports synchronized NTP status.
+
 config RTC_HCTOSYS_DEVICE
 	string "RTC used to set the system time"
-	depends on RTC_HCTOSYS = y
+	depends on RTC_HCTOSYS = y || RTC_SYSTOHC = y
 	default "rtc0"
 	help
 	  The RTC device that will be used to (re)initialize the system
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 56297f0fd388..69d11f1d76e4 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -6,6 +6,7 @@ ccflags-$(CONFIG_RTC_DEBUG)	:= -DDEBUG
 
 obj-$(CONFIG_RTC_LIB)		+= rtc-lib.o
 obj-$(CONFIG_RTC_HCTOSYS)	+= hctosys.o
+obj-$(CONFIG_RTC_SYSTOHC)	+= systohc.o
 obj-$(CONFIG_RTC_CLASS)		+= rtc-core.o
 rtc-core-y			:= class.o interface.o
 
diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c
new file mode 100644
index 000000000000..bf3e242ccc5c
--- /dev/null
+++ b/drivers/rtc/systohc.c
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ */
+#include <linux/rtc.h>
+#include <linux/time.h>
+
+/**
+ * rtc_set_ntp_time - Save NTP synchronized time to the RTC
+ * @now: Current time of day
+ *
+ * Replacement for the NTP platform function update_persistent_clock
+ * that stores time for later retrieval by rtc_hctosys.
+ *
+ * Returns 0 on successful RTC update, -ENODEV if a RTC update is not
+ * possible at all, and various other -errno for specific temporary failure
+ * cases.
+ *
+ * If temporary failure is indicated the caller should try again 'soon'
+ */
+int rtc_set_ntp_time(struct timespec now)
+{
+	struct rtc_device *rtc;
+	struct rtc_time tm;
+	int err = -ENODEV;
+
+	if (now.tv_nsec < (NSEC_PER_SEC >> 1))
+		rtc_time_to_tm(now.tv_sec, &tm);
+	else
+		rtc_time_to_tm(now.tv_sec + 1, &tm);
+
+	rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
+	if (rtc) {
+		/* rtc_hctosys exclusively uses UTC, so we call set_time here,
+		 * not set_mmss. */
+		if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss))
+			err = rtc_set_time(rtc, &tm);
+		rtc_class_close(rtc);
+	}
+
+	return err;
+}
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 9531845c419f..11d05f9fe8b6 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -138,6 +138,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
 extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
 extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
 extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
+extern int rtc_set_ntp_time(struct timespec now);
 int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
 extern int rtc_read_alarm(struct rtc_device *rtc,
 			struct rtc_wkalrm *alrm);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 24174b4d669b..313b161504b7 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -15,6 +15,7 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/rtc.h>
 
 #include "tick-internal.h"
 
@@ -483,8 +484,7 @@ out:
 	return leap;
 }
 
-#ifdef CONFIG_GENERIC_CMOS_UPDATE
-
+#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
 static void sync_cmos_clock(struct work_struct *work);
 
 static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -510,14 +510,22 @@ static void sync_cmos_clock(struct work_struct *work)
 	}
 
 	getnstimeofday(&now);
-	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
+	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) {
+		fail = -ENODEV;
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
 		fail = update_persistent_clock(now);
+#endif
+#ifdef CONFIG_RTC_SYSTOHC
+		if (fail == -ENODEV)
+			fail = rtc_set_ntp_time(now);
+#endif
+	}
 
 	next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
 	if (next.tv_nsec <= 0)
 		next.tv_nsec += NSEC_PER_SEC;
 
-	if (!fail)
+	if (!fail || fail == -ENODEV)
 		next.tv_sec = 659;
 	else
 		next.tv_sec = 0;
-- 
cgit v1.2.3


From 31ade30692dc9680bfc95700d794818fa3f754ac Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 16 Jan 2013 00:09:47 +0800
Subject: timekeeping: Add persistent_clock_exist flag

In current kernel, there are several places which need to check
whether there is a persistent clock for the platform. Current check
is done by calling the read_persistent_clock() and validating its
return value.

So one optimization is to do the check only once in timekeeping_init(),
and use a flag persistent_clock_exist to record it.

v2: Add a has_persistent_clock() helper function, as suggested by John.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/time.h      |  6 ++++++
 kernel/time/timekeeping.c | 16 +++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 0015aea4c4a7..dfbc4e82e8ba 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -115,6 +115,12 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 	return true;
 }
 
+extern bool persistent_clock_exist;
+static inline bool has_persistent_clock(void)
+{
+	return persistent_clock_exist;
+}
+
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
 extern int update_persistent_clock(struct timespec now);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index dfc7f87eb332..b7a584177618 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -28,6 +28,9 @@ static struct timekeeper timekeeper;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
+/* Flag for if there is a persistent clock on this platform */
+bool __read_mostly persistent_clock_exist = false;
+
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
 	while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
@@ -609,12 +612,14 @@ void __init timekeeping_init(void)
 	struct timespec now, boot, tmp;
 
 	read_persistent_clock(&now);
+
 	if (!timespec_valid_strict(&now)) {
 		pr_warn("WARNING: Persistent clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		now.tv_sec = 0;
 		now.tv_nsec = 0;
-	}
+	} else if (now.tv_sec || now.tv_nsec)
+		persistent_clock_exist = true;
 
 	read_boot_clock(&boot);
 	if (!timespec_valid_strict(&boot)) {
@@ -687,11 +692,12 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
-	struct timespec ts;
 
-	/* Make sure we don't set the clock twice */
-	read_persistent_clock(&ts);
-	if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
+	/*
+	 * Make sure we don't set the clock twice, as timekeeping_resume()
+	 * already did it
+	 */
+	if (has_persistent_clock())
 		return;
 
 	write_seqlock_irqsave(&tk->lock, flags);
-- 
cgit v1.2.3


From 05ad717c77b1b8e98a1dd768c3700036d634629e Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 16 Jan 2013 00:09:49 +0800
Subject: timekeeping: Add CONFIG_HAS_PERSISTENT_CLOCK option

Make the persistent clock check a kernel config option, so that some
platform can explicitely select it, also make CONFIG_RTC_HCTOSYS and
RTC_SYSTOHC depend on its non-existence, which could prevent the
persistent clock and RTC code from doing similar thing twice during
system's init/suspend/resume phases.

If the CONFIG_HAS_PERSISTENT_CLOCK=n, then no change happens for kernel
which still does the persistent clock check in timekeeping_init().

Cc: Thomas Gleixner <tglx@linutronix.de>
Suggested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Feng Tang <feng.tang@intel.com>
[jstultz: Added dependency for RTC_SYSTOHC as well]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/Kconfig  | 2 ++
 include/linux/time.h | 5 +++++
 kernel/time/Kconfig  | 5 +++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index b377e9672497..05761de7929b 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -20,6 +20,7 @@ if RTC_CLASS
 config RTC_HCTOSYS
 	bool "Set system time from RTC on startup and resume"
 	default y
+	depends on !HAS_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be set using
 	  the value read from a specified RTC device. This is useful to avoid
@@ -28,6 +29,7 @@ config RTC_HCTOSYS
 config RTC_SYSTOHC
 	bool "Set the RTC time based on NTP synchronization"
 	default y
+	depends on !HAS_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be stored
 	  in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
diff --git a/include/linux/time.h b/include/linux/time.h
index dfbc4e82e8ba..369b6e3b87d8 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -116,10 +116,15 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 }
 
 extern bool persistent_clock_exist;
+
+#ifdef CONFIG_HAS_PERSISTENT_CLOCK
+#define has_persistent_clock()	true
+#else
 static inline bool has_persistent_clock(void)
 {
 	return persistent_clock_exist;
 }
+#endif
 
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 8601f0db1261..f7e45b9b142b 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -12,6 +12,11 @@ config CLOCKSOURCE_WATCHDOG
 config ARCH_CLOCKSOURCE_DATA
 	bool
 
+# Platforms has a persistent clock
+config HAS_PERSISTENT_CLOCK
+	bool
+	default n
+
 # Timekeeping vsyscall support
 config GENERIC_TIME_VSYSCALL
 	bool
-- 
cgit v1.2.3


From 8ab3e6a08a98f7ff18c6814065eb30ba2e000233 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@ti.com>
Date: Wed, 2 Jan 2013 15:29:39 +0000
Subject: thermal: Use thermal zone device id in netlink messages

This patch changes the function thermal_generate_netlink_event
to receive a thermal zone device instead of a originator id.

This way, the messages will always be bound to a thermal zone.

Signed-off-by: Eduardo Valentin <eduardo.valentin@ti.com>
Reviewed-by: Durgadoss R <durgadoss.r@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 Documentation/thermal/sysfs-api.txt | 5 +++--
 drivers/thermal/thermal_sys.c       | 8 ++++++--
 include/linux/thermal.h             | 6 ++++--
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index 88c02334e356..526d4b90d6c1 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -329,8 +329,9 @@ The framework includes a simple notification mechanism, in the form of a
 netlink event. Netlink socket initialization is done during the _init_
 of the framework. Drivers which intend to use the notification mechanism
 just need to call thermal_generate_netlink_event() with two arguments viz
-(originator, event). Typically the originator will be an integer assigned
-to a thermal_zone_device when it registers itself with the framework. The
+(originator, event). The originator is a pointer to struct thermal_zone_device
+from where the event has been originated. An integer which represents the
+thermal zone device will be used in the message to identify the zone. The
 event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
 THERMAL_DEV_FAULT}. Notification can be sent when the current temperature
 crosses any of the configured thresholds.
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 8c8ce806180f..d85f51f433be 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -1711,7 +1711,8 @@ static struct genl_multicast_group thermal_event_mcgrp = {
 	.name = THERMAL_GENL_MCAST_GROUP_NAME,
 };
 
-int thermal_generate_netlink_event(u32 orig, enum events event)
+int thermal_generate_netlink_event(struct thermal_zone_device *tz,
+					enum events event)
 {
 	struct sk_buff *skb;
 	struct nlattr *attr;
@@ -1721,6 +1722,9 @@ int thermal_generate_netlink_event(u32 orig, enum events event)
 	int result;
 	static unsigned int thermal_event_seqnum;
 
+	if (!tz)
+		return -EINVAL;
+
 	/* allocate memory */
 	size = nla_total_size(sizeof(struct thermal_genl_event)) +
 	       nla_total_size(0);
@@ -1755,7 +1759,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event)
 
 	memset(thermal_event, 0, sizeof(struct thermal_genl_event));
 
-	thermal_event->orig = orig;
+	thermal_event->orig = tz->id;
 	thermal_event->event = event;
 
 	/* send multicast genetlink message */
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 883bcda7e1e4..9b78f8c6f773 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -246,9 +246,11 @@ int thermal_register_governor(struct thermal_governor *);
 void thermal_unregister_governor(struct thermal_governor *);
 
 #ifdef CONFIG_NET
-extern int thermal_generate_netlink_event(u32 orig, enum events event);
+extern int thermal_generate_netlink_event(struct thermal_zone_device *tz,
+						enum events event);
 #else
-static inline int thermal_generate_netlink_event(u32 orig, enum events event)
+static int thermal_generate_netlink_event(struct thermal_zone_device *tz,
+						enum events event)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 55c7c0fdc5aae43ca7eda3adb22ab782634fcb24 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 29 Nov 2012 09:03:00 +1030
Subject: serial: quatech: add the other serial identifiers and preliminary
 control code

Jonathan Woithe posted an out of tree enabler/control module for these
cards.  Lift the relevant identifiers and put them in the 8250_pci driver
along with code used to control custom registers on these cards.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jonathan Woithe <jwoithe@just42.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 309 +++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h            |  15 ++
 2 files changed, 324 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 26b9dc012ed0..89060ffb6b00 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1040,6 +1040,253 @@ static int pci_asix_setup(struct serial_private *priv,
 	return pci_default_setup(priv, board, port, idx);
 }
 
+/* Quatech devices have their own extra interface features */
+
+struct quatech_feature {
+	u16 devid;
+	bool amcc;
+};
+
+#define QPCR_TEST_FOR1		0x3F
+#define QPCR_TEST_GET1		0x00
+#define QPCR_TEST_FOR2		0x40
+#define QPCR_TEST_GET2		0x40
+#define QPCR_TEST_FOR3		0x80
+#define QPCR_TEST_GET3		0x40
+#define QPCR_TEST_FOR4		0xC0
+#define QPCR_TEST_GET4		0x80
+
+#define QOPR_CLOCK_X1		0x0000
+#define QOPR_CLOCK_X2		0x0001
+#define QOPR_CLOCK_X4		0x0002
+#define QOPR_CLOCK_X8		0x0003
+#define QOPR_CLOCK_RATE_MASK	0x0003
+
+
+static struct quatech_feature quatech_cards[] = {
+	{ PCI_DEVICE_ID_QUATECH_QSC100,   1 },
+	{ PCI_DEVICE_ID_QUATECH_DSC100,   1 },
+	{ PCI_DEVICE_ID_QUATECH_DSC100E,  0 },
+	{ PCI_DEVICE_ID_QUATECH_DSC200,   1 },
+	{ PCI_DEVICE_ID_QUATECH_DSC200E,  0 },
+	{ PCI_DEVICE_ID_QUATECH_ESC100D,  1 },
+	{ PCI_DEVICE_ID_QUATECH_ESC100M,  1 },
+	{ PCI_DEVICE_ID_QUATECH_QSCP100,  1 },
+	{ PCI_DEVICE_ID_QUATECH_DSCP100,  1 },
+	{ PCI_DEVICE_ID_QUATECH_QSCP200,  1 },
+	{ PCI_DEVICE_ID_QUATECH_DSCP200,  1 },
+	{ PCI_DEVICE_ID_QUATECH_ESCLP100, 0 },
+	{ PCI_DEVICE_ID_QUATECH_QSCLP100, 0 },
+	{ PCI_DEVICE_ID_QUATECH_DSCLP100, 0 },
+	{ PCI_DEVICE_ID_QUATECH_SSCLP100, 0 },
+	{ PCI_DEVICE_ID_QUATECH_QSCLP200, 0 },
+	{ PCI_DEVICE_ID_QUATECH_DSCLP200, 0 },
+	{ PCI_DEVICE_ID_QUATECH_SSCLP200, 0 },
+	{ PCI_DEVICE_ID_QUATECH_SPPXP_100, 0 },
+	{ 0, }
+};
+
+static int pci_quatech_amcc(u16 devid)
+{
+	struct quatech_feature *qf = &quatech_cards[0];
+	while (qf->devid) {
+		if (qf->devid == devid)
+			return qf->amcc;
+		qf++;
+	}
+	pr_err("quatech: unknown port type '0x%04X'.\n", devid);
+	return 0;
+};
+
+static int pci_quatech_rqopr(struct uart_8250_port *port)
+{
+	unsigned long base = port->port.iobase;
+	u8 LCR, val;
+
+	LCR = inb(base + UART_LCR);
+	outb(0xBF, base + UART_LCR);
+	val = inb(base + UART_SCR);
+	outb(LCR, base + UART_LCR);
+	return val;
+}
+
+static void pci_quatech_wqopr(struct uart_8250_port *port, u8 qopr)
+{
+	unsigned long base = port->port.iobase;
+	u8 LCR, val;
+
+	LCR = inb(base + UART_LCR);
+	outb(0xBF, base + UART_LCR);
+	val = inb(base + UART_SCR);
+	outb(qopr, base + UART_SCR);
+	outb(LCR, base + UART_LCR);
+}
+
+static int pci_quatech_rqmcr(struct uart_8250_port *port)
+{
+	unsigned long base = port->port.iobase;
+	u8 LCR, val, qmcr;
+
+	LCR = inb(base + UART_LCR);
+	outb(0xBF, base + UART_LCR);
+	val = inb(base + UART_SCR);
+	outb(val | 0x10, base + UART_SCR);
+	qmcr = inb(base + UART_MCR);
+	outb(val, base + UART_SCR);
+	outb(LCR, base + UART_LCR);
+
+	return qmcr;
+}
+
+static void pci_quatech_wqmcr(struct uart_8250_port *port, u8 qmcr)
+{
+	unsigned long base = port->port.iobase;
+	u8 LCR, val;
+
+	LCR = inb(base + UART_LCR);
+	outb(0xBF, base + UART_LCR);
+	val = inb(base + UART_SCR);
+	outb(val | 0x10, base + UART_SCR);
+	outb(qmcr, base + UART_MCR);
+	outb(val, base + UART_SCR);
+	outb(LCR, base + UART_LCR);
+}
+
+static int pci_quatech_has_qmcr(struct uart_8250_port *port)
+{
+	unsigned long base = port->port.iobase;
+	u8 LCR, val;
+
+	LCR = inb(base + UART_LCR);
+	outb(0xBF, base + UART_LCR);
+	val = inb(base + UART_SCR);
+	if (val & 0x20) {
+		outb(0x80, UART_LCR);
+		if (!(inb(UART_SCR) & 0x20)) {
+			outb(LCR, base + UART_LCR);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int pci_quatech_test(struct uart_8250_port *port)
+{
+	u8 reg;
+	u8 qopr = pci_quatech_rqopr(port);
+	pci_quatech_wqopr(port, qopr & QPCR_TEST_FOR1);
+	reg = pci_quatech_rqopr(port) & 0xC0;
+	if (reg != QPCR_TEST_GET1)
+		return -EINVAL;
+	pci_quatech_wqopr(port, (qopr & QPCR_TEST_FOR1)|QPCR_TEST_FOR2);
+	reg = pci_quatech_rqopr(port) & 0xC0;
+	if (reg != QPCR_TEST_GET2)
+		return -EINVAL;
+	pci_quatech_wqopr(port, (qopr & QPCR_TEST_FOR1)|QPCR_TEST_FOR3);
+	reg = pci_quatech_rqopr(port) & 0xC0;
+	if (reg != QPCR_TEST_GET3)
+		return -EINVAL;
+	pci_quatech_wqopr(port, (qopr & QPCR_TEST_FOR1)|QPCR_TEST_FOR4);
+	reg = pci_quatech_rqopr(port) & 0xC0;
+	if (reg != QPCR_TEST_GET4)
+		return -EINVAL;
+
+	pci_quatech_wqopr(port, qopr);
+	return 0;
+}
+
+static int pci_quatech_clock(struct uart_8250_port *port)
+{
+	u8 qopr, reg, set;
+	unsigned long clock;
+
+	if (pci_quatech_test(port) < 0)
+		return 1843200;
+
+	qopr = pci_quatech_rqopr(port);
+
+	pci_quatech_wqopr(port, qopr & ~QOPR_CLOCK_X8);
+	reg = pci_quatech_rqopr(port);
+	if (reg & QOPR_CLOCK_X8) {
+		clock = 1843200;
+		goto out;
+	}
+	pci_quatech_wqopr(port, qopr | QOPR_CLOCK_X8);
+	reg = pci_quatech_rqopr(port);
+	if (!(reg & QOPR_CLOCK_X8)) {
+		clock = 1843200;
+		goto out;
+	}
+	reg &= QOPR_CLOCK_X8;
+	if (reg == QOPR_CLOCK_X2) {
+		clock =  3685400;
+		set = QOPR_CLOCK_X2;
+	} else if (reg == QOPR_CLOCK_X4) {
+		clock = 7372800;
+		set = QOPR_CLOCK_X4;
+	} else if (reg == QOPR_CLOCK_X8) {
+		clock = 14745600;
+		set = QOPR_CLOCK_X8;
+	} else {
+		clock = 1843200;
+		set = QOPR_CLOCK_X1;
+	}
+	qopr &= ~QOPR_CLOCK_RATE_MASK;
+	qopr |= set;
+
+out:
+	pci_quatech_wqopr(port, qopr);
+	return clock;
+}
+
+static int pci_quatech_rs422(struct uart_8250_port *port)
+{
+	u8 qmcr;
+	int rs422 = 0;
+
+	if (!pci_quatech_has_qmcr(port))
+		return 0;
+	qmcr = pci_quatech_rqmcr(port);
+	pci_quatech_wqmcr(port, 0xFF);
+	if (pci_quatech_rqmcr(port))
+		rs422 = 1;
+	pci_quatech_wqmcr(port, qmcr);
+	return rs422;
+}
+
+static int pci_quatech_init(struct pci_dev *dev)
+{
+	if (pci_quatech_amcc(dev->device)) {
+		unsigned long base = pci_resource_start(dev, 0);
+		if (base) {
+			u32 tmp;
+			outl(inl(base + 0x38), base + 0x38);
+			tmp = inl(base + 0x3c);
+			outl(tmp | 0x01000000, base + 0x3c);
+			outl(tmp, base + 0x3c);
+		}
+	}
+	return 0;
+}
+
+static int pci_quatech_setup(struct serial_private *priv,
+		  const struct pciserial_board *board,
+		  struct uart_8250_port *port, int idx)
+{
+	/* Needed by pci_quatech calls below */
+	port->port.iobase = pci_resource_start(priv->dev, FL_GET_BASE(board->flags));
+	/* Set up the clocking */
+	port->port.uartclk = pci_quatech_clock(port);
+	/* For now just warn about RS422 */
+	if (pci_quatech_rs422(port))
+		pr_warn("quatech: software control of RS422 features not currently supported.\n");
+	return pci_default_setup(priv, board, port, idx);
+}
+
+static void __devexit pci_quatech_exit(struct pci_dev *dev)
+{
+}
+
 static int pci_default_setup(struct serial_private *priv,
 		  const struct pciserial_board *board,
 		  struct uart_8250_port *port, int idx)
@@ -1528,6 +1775,16 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.setup		= pci_ni8430_setup,
 		.exit		= pci_ni8430_exit,
 	},
+	/* Quatech */
+	{
+		.vendor		= PCI_VENDOR_ID_QUATECH,
+		.device		= PCI_ANY_ID,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.init		= pci_quatech_init,
+		.setup		= pci_quatech_setup,
+		.exit		= __devexit_p(pci_quatech_exit),
+	},
 	/*
 	 * Panacom
 	 */
@@ -3475,18 +3732,70 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_ROMULUS,
 		0x10b5, 0x106a, 0, 0,
 		pbn_plx_romulus },
+	/*
+	 * Quatech cards. These actually have configurable clocks but for
+	 * now we just use the default.
+	 *
+	 * 100 series are RS232, 200 series RS422,
+	 */
 	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_QSC100,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b1_4_115200 },
 	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_DSC100,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b1_2_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_DSC100E,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_2_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_DSC200,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b1_2_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_DSC200E,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_2_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_QSC200,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b1_4_115200 },
 	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_ESC100D,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b1_8_115200 },
 	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_ESC100M,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b1_8_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_QSCP100,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b1_4_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_DSCP100,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b1_2_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_QSCP200,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b1_4_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_DSCP200,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b1_2_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_QSCLP100,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_4_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_DSCLP100,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_2_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_SSCLP100,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_1_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_QSCLP200,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_4_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_DSCLP200,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_2_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_SSCLP200,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b2_1_115200 },
+	{	PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_ESCLP100,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b0_8_115200 },
+
 	{	PCI_VENDOR_ID_SPECIALIX, PCI_DEVICE_ID_OXSEMI_16PCI954,
 		PCI_VENDOR_ID_SPECIALIX, PCI_SUBDEVICE_ID_SPECIALIX_SPEED4,
 		0, 0,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0eb65796bcb9..19e8d7a42b34 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1868,8 +1868,23 @@
 #define PCI_VENDOR_ID_QUATECH		0x135C
 #define PCI_DEVICE_ID_QUATECH_QSC100	0x0010
 #define PCI_DEVICE_ID_QUATECH_DSC100	0x0020
+#define PCI_DEVICE_ID_QUATECH_DSC200	0x0030
+#define PCI_DEVICE_ID_QUATECH_QSC200	0x0040
 #define PCI_DEVICE_ID_QUATECH_ESC100D	0x0050
 #define PCI_DEVICE_ID_QUATECH_ESC100M	0x0060
+#define PCI_DEVICE_ID_QUATECH_QSCP100	0x0120
+#define PCI_DEVICE_ID_QUATECH_DSCP100	0x0130
+#define PCI_DEVICE_ID_QUATECH_QSCP200	0x0140
+#define PCI_DEVICE_ID_QUATECH_DSCP200	0x0150
+#define PCI_DEVICE_ID_QUATECH_QSCLP100	0x0170
+#define PCI_DEVICE_ID_QUATECH_DSCLP100	0x0180
+#define PCI_DEVICE_ID_QUATECH_DSC100E	0x0181
+#define PCI_DEVICE_ID_QUATECH_SSCLP100	0x0190
+#define PCI_DEVICE_ID_QUATECH_QSCLP200	0x01A0
+#define PCI_DEVICE_ID_QUATECH_DSCLP200	0x01B0
+#define PCI_DEVICE_ID_QUATECH_DSC200E	0x01B1
+#define PCI_DEVICE_ID_QUATECH_SSCLP200	0x01C0
+#define PCI_DEVICE_ID_QUATECH_ESCLP100	0x01E0
 #define PCI_DEVICE_ID_QUATECH_SPPXP_100 0x0278
 
 #define PCI_VENDOR_ID_SEALEVEL		0x135e
-- 
cgit v1.2.3


From 6f538fe31c1d453b7e7fc4f6e7c6a9bdead4a6f2 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 7 Dec 2012 11:36:08 +0100
Subject: tty: serial core: decouple pm states from ACPI

The serial core is using power states lifted from ACPI for no
good reason. Remove this reference from the documentation and
alter all users to use an enum specific to the serial core
instead, and define it in <linux/serial_core.h>.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Alan Cox <alan@linux.intel.com>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/serial/driver      |  5 ++---
 drivers/tty/serial/serial_core.c | 30 ++++++++++++++++--------------
 include/linux/serial_core.h      | 14 +++++++++++++-
 3 files changed, 31 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/serial/driver b/Documentation/serial/driver
index 0a25a9191864..a6ef8dc436f1 100644
--- a/Documentation/serial/driver
+++ b/Documentation/serial/driver
@@ -242,9 +242,8 @@ hardware.
 
   pm(port,state,oldstate)
 	Perform any power management related activities on the specified
-	port.  State indicates the new state (defined by ACPI D0-D3),
-	oldstate indicates the previous state.  Essentially, D0 means
-	fully on, D3 means powered down.
+	port.  State indicates the new state (defined by
+	enum uart_pm_state), oldstate indicates the previous state.
 
 	This function should not be used to grab any resources.
 
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 2c7230aaefd4..82d7ce8c9409 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -59,7 +59,8 @@ static struct lock_class_key port_lock_key;
 static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
 					struct ktermios *old_termios);
 static void uart_wait_until_sent(struct tty_struct *tty, int timeout);
-static void uart_change_pm(struct uart_state *state, int pm_state);
+static void uart_change_pm(struct uart_state *state,
+			   enum uart_pm_state pm_state);
 
 static void uart_port_shutdown(struct tty_port *port);
 
@@ -1365,7 +1366,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 		spin_lock_irqsave(&port->lock, flags);
 	} else if (!uart_console(uport)) {
 		spin_unlock_irqrestore(&port->lock, flags);
-		uart_change_pm(state, 3);
+		uart_change_pm(state, UART_PM_STATE_OFF);
 		spin_lock_irqsave(&port->lock, flags);
 	}
 
@@ -1579,7 +1580,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	 * Make sure the device is in D0 state.
 	 */
 	if (port->count == 1)
-		uart_change_pm(state, 0);
+		uart_change_pm(state, UART_PM_STATE_ON);
 
 	/*
 	 * Start up the serial port.
@@ -1620,7 +1621,7 @@ static void uart_line_info(struct seq_file *m, struct uart_driver *drv, int i)
 {
 	struct uart_state *state = drv->state + i;
 	struct tty_port *port = &state->port;
-	int pm_state;
+	enum uart_pm_state pm_state;
 	struct uart_port *uport = state->uart_port;
 	char stat_buf[32];
 	unsigned int status;
@@ -1645,12 +1646,12 @@ static void uart_line_info(struct seq_file *m, struct uart_driver *drv, int i)
 	if (capable(CAP_SYS_ADMIN)) {
 		mutex_lock(&port->mutex);
 		pm_state = state->pm_state;
-		if (pm_state)
-			uart_change_pm(state, 0);
+		if (pm_state != UART_PM_STATE_ON)
+			uart_change_pm(state, UART_PM_STATE_ON);
 		spin_lock_irq(&uport->lock);
 		status = uport->ops->get_mctrl(uport);
 		spin_unlock_irq(&uport->lock);
-		if (pm_state)
+		if (pm_state != UART_PM_STATE_ON)
 			uart_change_pm(state, pm_state);
 		mutex_unlock(&port->mutex);
 
@@ -1897,7 +1898,8 @@ EXPORT_SYMBOL_GPL(uart_set_options);
  *
  * Locking: port->mutex has to be held
  */
-static void uart_change_pm(struct uart_state *state, int pm_state)
+static void uart_change_pm(struct uart_state *state,
+			   enum uart_pm_state pm_state)
 {
 	struct uart_port *port = state->uart_port;
 
@@ -1982,7 +1984,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 		console_stop(uport->cons);
 
 	if (console_suspend_enabled || !uart_console(uport))
-		uart_change_pm(state, 3);
+		uart_change_pm(state, UART_PM_STATE_OFF);
 
 	mutex_unlock(&port->mutex);
 
@@ -2027,7 +2029,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 			termios = port->tty->termios;
 
 		if (console_suspend_enabled)
-			uart_change_pm(state, 0);
+			uart_change_pm(state, UART_PM_STATE_ON);
 		uport->ops->set_termios(uport, &termios, NULL);
 		if (console_suspend_enabled)
 			console_start(uport->cons);
@@ -2037,7 +2039,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 		const struct uart_ops *ops = uport->ops;
 		int ret;
 
-		uart_change_pm(state, 0);
+		uart_change_pm(state, UART_PM_STATE_ON);
 		spin_lock_irq(&uport->lock);
 		ops->set_mctrl(uport, 0);
 		spin_unlock_irq(&uport->lock);
@@ -2137,7 +2139,7 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
 		uart_report_port(drv, port);
 
 		/* Power up port for set_mctrl() */
-		uart_change_pm(state, 0);
+		uart_change_pm(state, UART_PM_STATE_ON);
 
 		/*
 		 * Ensure that the modem control lines are de-activated.
@@ -2161,7 +2163,7 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
 		 * console if we have one.
 		 */
 		if (!uart_console(port))
-			uart_change_pm(state, 3);
+			uart_change_pm(state, UART_PM_STATE_OFF);
 	}
 }
 
@@ -2588,7 +2590,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 	}
 
 	state->uart_port = uport;
-	state->pm_state = -1;
+	state->pm_state = UART_PM_STATE_UNDEFINED;
 
 	uport->cons = drv->cons;
 	uport->state = state;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index c6690a2a27fb..a116daa13113 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -208,13 +208,25 @@ static inline void serial_port_out(struct uart_port *up, int offset, int value)
 	up->serial_out(up, offset, value);
 }
 
+/**
+ * enum uart_pm_state - power states for UARTs
+ * @UART_PM_STATE_ON: UART is powered, up and operational
+ * @UART_PM_STATE_OFF: UART is powered off
+ * @UART_PM_STATE_UNDEFINED: sentinel
+ */
+enum uart_pm_state {
+	UART_PM_STATE_ON = 0,
+	UART_PM_STATE_OFF = 3, /* number taken from ACPI */
+	UART_PM_STATE_UNDEFINED,
+};
+
 /*
  * This is the state information which is persistent across opens.
  */
 struct uart_state {
 	struct tty_port		port;
 
-	int			pm_state;
+	enum uart_pm_state	pm_state;
 	struct circ_buf		xmit;
 
 	struct uart_port	*uart_port;
-- 
cgit v1.2.3


From a205a56dc24811a2879572e52c902a38425a4473 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 18 Dec 2012 11:41:14 +0100
Subject: serial: Remove RM9000 series serial driver.

Now that support for RM9000 and platforms based on it has been removed,
remove the serial driver for it as well.  It's really only been a quirk
for an almost 8250 compatible UART anyway.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

 drivers/tty/serial/8250/8250.c  | 70 +----------------------------------------
 drivers/tty/serial/8250/Kconfig |  9 ------
 include/linux/serial_core.h     |  1 -
 3 files changed, 1 insertion(+), 79 deletions(-)
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.c  | 70 +----------------------------------------
 drivers/tty/serial/8250/Kconfig |  9 ------
 include/linux/serial_core.h     |  1 -
 3 files changed, 1 insertion(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c
index d085e3a8ec06..5fb6577b94dc 100644
--- a/drivers/tty/serial/8250/8250.c
+++ b/drivers/tty/serial/8250/8250.c
@@ -239,13 +239,6 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE,
 	},
-	[PORT_RM9000] = {
-		.name		= "RM9000",
-		.fifo_size	= 16,
-		.tx_loadsz	= 16,
-		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
-		.flags		= UART_CAP_FIFO,
-	},
 	[PORT_OCTEON] = {
 		.name		= "OCTEON",
 		.fifo_size	= 64,
@@ -364,56 +357,6 @@ static void au_serial_dl_write(struct uart_8250_port *up, int value)
 
 #endif
 
-#ifdef CONFIG_SERIAL_8250_RM9K
-
-static const u8
-	regmap_in[8] = {
-		[UART_RX]	= 0x00,
-		[UART_IER]	= 0x0c,
-		[UART_IIR]	= 0x14,
-		[UART_LCR]	= 0x1c,
-		[UART_MCR]	= 0x20,
-		[UART_LSR]	= 0x24,
-		[UART_MSR]	= 0x28,
-		[UART_SCR]	= 0x2c
-	},
-	regmap_out[8] = {
-		[UART_TX] 	= 0x04,
-		[UART_IER]	= 0x0c,
-		[UART_FCR]	= 0x18,
-		[UART_LCR]	= 0x1c,
-		[UART_MCR]	= 0x20,
-		[UART_LSR]	= 0x24,
-		[UART_MSR]	= 0x28,
-		[UART_SCR]	= 0x2c
-	};
-
-static unsigned int rm9k_serial_in(struct uart_port *p, int offset)
-{
-	offset = regmap_in[offset] << p->regshift;
-	return readl(p->membase + offset);
-}
-
-static void rm9k_serial_out(struct uart_port *p, int offset, int value)
-{
-	offset = regmap_out[offset] << p->regshift;
-	writel(value, p->membase + offset);
-}
-
-static int rm9k_serial_dl_read(struct uart_8250_port *up)
-{
-	return ((__raw_readl(up->port.membase + 0x10) << 8) |
-		(__raw_readl(up->port.membase + 0x08) & 0xff)) & 0xffff;
-}
-
-static void rm9k_serial_dl_write(struct uart_8250_port *up, int value)
-{
-	__raw_writel(value, up->port.membase + 0x08);
-	__raw_writel(value >> 8, up->port.membase + 0x10);
-}
-
-#endif
-
 static unsigned int hub6_serial_in(struct uart_port *p, int offset)
 {
 	offset = offset << p->regshift;
@@ -491,15 +434,6 @@ static void set_io_from_upio(struct uart_port *p)
 		p->serial_out = mem32_serial_out;
 		break;
 
-#ifdef CONFIG_SERIAL_8250_RM9K
-	case UPIO_RM9000:
-		p->serial_in = rm9k_serial_in;
-		p->serial_out = rm9k_serial_out;
-		up->dl_read = rm9k_serial_dl_read;
-		up->dl_write = rm9k_serial_dl_write;
-		break;
-#endif
-
 #ifdef CONFIG_MIPS_ALCHEMY
 	case UPIO_AU:
 		p->serial_in = au_serial_in;
@@ -1343,9 +1277,7 @@ static void serial8250_start_tx(struct uart_port *port)
 			unsigned char lsr;
 			lsr = serial_in(up, UART_LSR);
 			up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
-			if ((port->type == PORT_RM9000) ?
-				(lsr & UART_LSR_THRE) :
-				(lsr & UART_LSR_TEMT))
+			if (lsr & UART_LSR_TEMT)
 				serial8250_tx_chars(up);
 		}
 	}
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index c31133a6ea8e..d79208f47237 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -249,15 +249,6 @@ config SERIAL_8250_ACORN
 	  system, say Y to this option.  The driver can handle 1, 2, or 3 port
 	  cards.  If unsure, say N.
 
-config SERIAL_8250_RM9K
-	bool "Support for MIPS RM9xxx integrated serial port"
-	depends on SERIAL_8250 != n && SERIAL_RM9000
-	select SERIAL_8250_SHARE_IRQ
-	help
-	  Selecting this option will add support for the integrated serial
-	  port hardware found on MIPS RM9122 and similar processors.
-	  If unsure, say N.
-
 config SERIAL_8250_FSL
 	bool
 	depends on SERIAL_8250_CONSOLE && PPC_UDBG_16550
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index a116daa13113..ec5df74c4506 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -136,7 +136,6 @@ struct uart_port {
 #define UPIO_MEM32		(3)
 #define UPIO_AU			(4)			/* Au1x00 type IO */
 #define UPIO_TSI		(5)			/* Tsi108/109 type IO */
-#define UPIO_RM9000		(6)			/* RM9000 type IO */
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
-- 
cgit v1.2.3


From e759d7c53b39a43fc908425bf9985b8b7d930550 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Wed, 26 Dec 2012 20:43:42 -0800
Subject: tty: Update serial core API documentation

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/serial/driver | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/serial_core.h |  8 ++++----
 2 files changed, 43 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/serial/driver b/Documentation/serial/driver
index a6ef8dc436f1..067c47d46917 100644
--- a/Documentation/serial/driver
+++ b/Documentation/serial/driver
@@ -133,6 +133,16 @@ hardware.
 	Interrupts: locally disabled.
 	This call must not sleep
 
+  send_xchar(port,ch)
+	Transmit a high priority character, even if the port is stopped.
+	This is used to implement XON/XOFF flow control and tcflow().  If
+	the serial driver does not implement this function, the tty core
+	will append the character to the circular buffer and then call
+	start_tx() / stop_tx() to flush the data out.
+
+	Locking: none.
+	Interrupts: caller dependent.
+
   stop_rx(port)
 	Stop receiving characters; the port is in the process of
 	being closed.
@@ -254,6 +264,10 @@ hardware.
 	Locking: none.
 	Interrupts: caller dependent.
 
+  set_wake(port,state)
+	Enable/disable power management wakeup on serial activity.  Not
+	currently implemented.
+
   type(port)
 	Return a pointer to a string constant describing the specified
 	port, or return NULL, in which case the string 'unknown' is
@@ -306,6 +320,31 @@ hardware.
 	Locking: none.
 	Interrupts: caller dependent.
 
+  poll_init(port)
+	Called by kgdb to perform the minimal hardware initialization needed
+	to support poll_put_char() and poll_get_char().  Unlike ->startup()
+	this should not request interrupts.
+
+	Locking: tty_mutex and tty_port->mutex taken.
+	Interrupts: n/a.
+
+  poll_put_char(port,ch)
+	Called by kgdb to write a single character directly to the serial
+	port.  It can and should block until there is space in the TX FIFO.
+
+	Locking: none.
+	Interrupts: caller dependent.
+	This call must not sleep
+
+  poll_get_char(port)
+	Called by kgdb to read a single character directly from the serial
+	port.  If data is available, it should be returned; otherwise
+	the function should return NO_POLL_CHAR immediately.
+
+	Locking: none.
+	Interrupts: caller dependent.
+	This call must not sleep
+
 Other functions
 ---------------
 
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index ec5df74c4506..82aebc8ff77f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -37,8 +37,8 @@ struct serial_struct;
 struct device;
 
 /*
- * This structure describes all the operations that can be
- * done on the physical hardware.
+ * This structure describes all the operations that can be done on the
+ * physical hardware.  See Documentation/serial/driver for details.
  */
 struct uart_ops {
 	unsigned int	(*tx_empty)(struct uart_port *);
@@ -65,7 +65,7 @@ struct uart_ops {
 	/*
 	 * Return a string describing the type of the port
 	 */
-	const char *(*type)(struct uart_port *);
+	const char	*(*type)(struct uart_port *);
 
 	/*
 	 * Release IO and memory resources used by the port.
@@ -83,7 +83,7 @@ struct uart_ops {
 	int		(*ioctl)(struct uart_port *, unsigned int, unsigned long);
 #ifdef CONFIG_CONSOLE_POLL
 	int		(*poll_init)(struct uart_port *);
-	void	(*poll_put_char)(struct uart_port *, unsigned char);
+	void		(*poll_put_char)(struct uart_port *, unsigned char);
 	int		(*poll_get_char)(struct uart_port *);
 #endif
 };
-- 
cgit v1.2.3


From 227434f8986c3827a1faedd1feb437acd6285315 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 3 Jan 2013 15:53:01 +0100
Subject: TTY: switch tty_buffer_request_room to tty_port

Now, we start converting tty buffer functions to actually use
tty_port. This will allow us to get rid of the need of tty pointer in
many call sites. Only tty_port will be needed and hence no more
tty_port_tty_get calls in those paths.

Here we start with tty_buffer_request_room.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mn10300/kernel/mn10300-serial.c        |  5 +++--
 arch/um/drivers/chan_kern.c                 |  3 ++-
 drivers/char/pcmcia/synclink_cs.c           |  3 ++-
 drivers/isdn/i4l/isdn_common.c              |  3 ++-
 drivers/isdn/i4l/isdn_tty.c                 | 12 +++++++-----
 drivers/staging/dgrp/dgrp_net_ops.c         |  4 ++--
 drivers/tty/cyclades.c                      | 13 ++++++++-----
 drivers/tty/ehv_bytechan.c                  |  2 +-
 drivers/tty/hvc/hvc_console.c               |  2 +-
 drivers/tty/hvc/hvcs.c                      |  2 +-
 drivers/tty/serial/cpm_uart/cpm_uart_core.c |  5 +++--
 drivers/tty/serial/ioc4_serial.c            |  2 +-
 drivers/tty/serial/jsm/jsm_tty.c            |  6 ++++--
 drivers/tty/serial/mpsc.c                   |  6 +++---
 drivers/tty/serial/mrst_max3110.c           |  6 ++++--
 drivers/tty/serial/pch_uart.c               |  5 +++--
 drivers/tty/serial/sh-sci.c                 |  8 +++++---
 drivers/tty/serial/ucc_uart.c               |  5 +++--
 drivers/tty/tty_buffer.c                    |  5 ++---
 drivers/usb/serial/quatech2.c               |  4 ++--
 include/linux/tty_flip.h                    |  2 +-
 21 files changed, 60 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
index 81d5cb9b6569..9b2232a78ff9 100644
--- a/arch/mn10300/kernel/mn10300-serial.c
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -524,7 +524,8 @@ static int mask_test_and_clear(volatile u8 *ptr, u8 mask)
 static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port)
 {
 	struct uart_icount *icount = &port->uart.icount;
-	struct tty_struct *tty = port->uart.state->port.tty;
+	struct tty_port *port = &port->uart.state->port;
+	struct tty_struct *tty = port->tty;
 	unsigned ix;
 	int count;
 	u8 st, ch, push, status, overrun;
@@ -534,7 +535,7 @@ static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port)
 	push = 0;
 
 	count = CIRC_CNT(port->rx_inp, port->rx_outp, MNSC_BUFFER_SIZE);
-	count = tty_buffer_request_room(tty, count);
+	count = tty_buffer_request_room(port, count);
 	if (count == 0) {
 		if (!tty->low_latency)
 			tty_flip_buffer_push(tty);
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index e9a0abc6a32f..4ff2503a1bb8 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -554,6 +554,7 @@ int parse_chan_pair(char *str, struct line *line, int device,
 
 void chan_interrupt(struct line *line, struct tty_struct *tty, int irq)
 {
+	struct tty_port *port = &line->port;
 	struct chan *chan = line->chan_in;
 	int err;
 	char c;
@@ -562,7 +563,7 @@ void chan_interrupt(struct line *line, struct tty_struct *tty, int irq)
 		goto out;
 
 	do {
-		if (tty && !tty_buffer_request_room(tty, 1)) {
+		if (!tty_buffer_request_room(port, 1)) {
 			schedule_delayed_work(&line->task, 1);
 			goto out;
 		}
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index b2f35d786025..0b1de715f097 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -888,6 +888,7 @@ static void rx_ready_hdlc(MGSLPC_INFO *info, int eom)
 
 static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty)
 {
+	struct tty_port *port = &info->port;
 	unsigned char data, status, flag;
 	int fifo_count;
 	int work = 0;
@@ -913,7 +914,7 @@ static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty)
 	} else
 		fifo_count = 32;
 
-	tty_buffer_request_room(tty, fifo_count);
+	tty_buffer_request_room(port, fifo_count);
 	/* Flush received async data to receive data buffer. */
 	while (fifo_count) {
 		data   = read_reg(info, CHA + RXFIFO);
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index e2a945ee9f05..7093169ee0c9 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -878,6 +878,7 @@ isdn_readbchan(int di, int channel, u_char *buf, u_char *fp, int len, wait_queue
 int
 isdn_readbchan_tty(int di, int channel, struct tty_struct *tty, int cisco_hack)
 {
+	struct tty_port *port = tty->port;
 	int count;
 	int count_pull;
 	int count_put;
@@ -891,7 +892,7 @@ isdn_readbchan_tty(int di, int channel, struct tty_struct *tty, int cisco_hack)
 	if (skb_queue_empty(&dev->drv[di]->rpqueue[channel]))
 		return 0;
 
-	len = tty_buffer_request_room(tty, dev->drv[di]->rcvcount[channel]);
+	len = tty_buffer_request_room(port, dev->drv[di]->rcvcount[channel]);
 	if (len == 0)
 		return len;
 
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index e09dc8a5e743..4f5bcee7cf32 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -60,6 +60,7 @@ static int si2bit[8] =
 static int
 isdn_tty_try_read(modem_info *info, struct sk_buff *skb)
 {
+	struct tty_port *port = &info->port;
 	int c;
 	int len;
 	struct tty_struct *tty;
@@ -68,7 +69,7 @@ isdn_tty_try_read(modem_info *info, struct sk_buff *skb)
 	if (!info->online)
 		return 0;
 
-	tty = info->port.tty;
+	tty = port->tty;
 	if (!tty)
 		return 0;
 
@@ -81,7 +82,7 @@ isdn_tty_try_read(modem_info *info, struct sk_buff *skb)
 #endif
 		;
 
-	c = tty_buffer_request_room(tty, len);
+	c = tty_buffer_request_room(port, len);
 	if (c < len)
 		return 0;
 
@@ -2230,6 +2231,7 @@ void
 isdn_tty_at_cout(char *msg, modem_info *info)
 {
 	struct tty_struct *tty;
+	struct tty_port *port = &info->port;
 	atemu *m = &info->emu;
 	char *p;
 	char c;
@@ -2246,15 +2248,15 @@ isdn_tty_at_cout(char *msg, modem_info *info)
 	l = strlen(msg);
 
 	spin_lock_irqsave(&info->readlock, flags);
-	tty = info->port.tty;
-	if ((info->port.flags & ASYNC_CLOSING) || (!tty)) {
+	tty = port->tty;
+	if ((port->flags & ASYNC_CLOSING) || (!tty)) {
 		spin_unlock_irqrestore(&info->readlock, flags);
 		return;
 	}
 
 	/* use queue instead of direct, if online and */
 	/* data is in queue or buffer is full */
-	if (info->online && ((tty_buffer_request_room(tty, l) < l) ||
+	if (info->online && ((tty_buffer_request_room(port, l) < l) ||
 			     !skb_queue_empty(&dev->drv[info->isdn_driver]->rpqueue[info->isdn_channel]))) {
 		skb = alloc_skb(l, GFP_ATOMIC);
 		if (!skb) {
diff --git a/drivers/staging/dgrp/dgrp_net_ops.c b/drivers/staging/dgrp/dgrp_net_ops.c
index 2d1bbfd5b67c..c0aeaaa094f1 100644
--- a/drivers/staging/dgrp/dgrp_net_ops.c
+++ b/drivers/staging/dgrp/dgrp_net_ops.c
@@ -211,7 +211,7 @@ static void dgrp_input(struct ch_struct *ch)
 	data_len = (ch->ch_rin - ch->ch_rout) & RBUF_MASK;
 
 	/* len is the amount of data we are going to transfer here */
-	len = tty_buffer_request_room(tty, data_len);
+	len = tty_buffer_request_room(&ch->port, data_len);
 
 	/* Check DPA flow control */
 	if ((nd->nd_dpa_debug) &&
@@ -2956,7 +2956,7 @@ check_query:
 			    I_BRKINT(ch->ch_tun.un_tty) &&
 			    !(I_IGNBRK(ch->ch_tun.un_tty))) {
 
-				tty_buffer_request_room(ch->ch_tun.un_tty, 1);
+				tty_buffer_request_room(&ch->port, 1);
 				tty_insert_flip_char(ch->ch_tun.un_tty, 0, TTY_BREAK);
 				tty_flip_buffer_push(ch->ch_tun.un_tty);
 
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index b09c8d1f9a66..b85acc74eb0d 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -442,6 +442,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 {
 	struct cyclades_port *info;
 	struct tty_struct *tty;
+	struct tty_port *port;
 	int len, index = cinfo->bus_index;
 	u8 ivr, save_xir, channel, save_car, data, char_count;
 
@@ -452,11 +453,12 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 	save_xir = readb(base_addr + (CyRIR << index));
 	channel = save_xir & CyIRChannel;
 	info = &cinfo->ports[channel + chip * 4];
+	port = &info->port;
 	save_car = cyy_readb(info, CyCAR);
 	cyy_writeb(info, CyCAR, save_xir);
 	ivr = cyy_readb(info, CyRIVR) & CyIVRMask;
 
-	tty = tty_port_tty_get(&info->port);
+	tty = tty_port_tty_get(port);
 	/* if there is nowhere to put the data, discard it */
 	if (tty == NULL) {
 		if (ivr == CyIVRRxEx) {	/* exception */
@@ -487,14 +489,14 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 			tty_kref_put(tty);
 			return;
 		}
-		if (tty_buffer_request_room(tty, 1)) {
+		if (tty_buffer_request_room(port, 1)) {
 			if (data & info->read_status_mask) {
 				if (data & CyBREAK) {
 					tty_insert_flip_char(tty,
 						cyy_readb(info, CyRDSR),
 						TTY_BREAK);
 					info->icount.rx++;
-					if (info->port.flags & ASYNC_SAK)
+					if (port->flags & ASYNC_SAK)
 						do_SAK(tty);
 				} else if (data & CyFRAME) {
 					tty_insert_flip_char(tty,
@@ -552,7 +554,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 			info->mon.char_max = char_count;
 		info->mon.char_last = char_count;
 #endif
-		len = tty_buffer_request_room(tty, char_count);
+		len = tty_buffer_request_room(port, char_count);
 		while (len--) {
 			data = cyy_readb(info, CyRDSR);
 			tty_insert_flip_char(tty, data, TTY_NORMAL);
@@ -928,6 +930,7 @@ static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty)
 {
 	struct BUF_CTRL __iomem *buf_ctrl = info->u.cyz.buf_ctrl;
 	struct cyclades_card *cinfo = info->card;
+	struct tty_port *port = &info->port;
 	unsigned int char_count;
 	int len;
 #ifdef BLOCKMOVE
@@ -983,7 +986,7 @@ static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty)
 				info->idle_stats.recv_bytes += len;
 			}
 #else
-			len = tty_buffer_request_room(tty, char_count);
+			len = tty_buffer_request_room(port, char_count);
 			while (len--) {
 				data = readb(cinfo->base_addr + rx_bufaddr +
 						new_rx_get);
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index c117d775a22f..af97e39d641c 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -386,7 +386,7 @@ static irqreturn_t ehv_bc_tty_rx_isr(int irq, void *data)
 	 * read from the byte channel will be accepted by the TTY layer.
 	 */
 	ev_byte_channel_poll(bc->handle, &rx_count, &tx_count);
-	count = tty_buffer_request_room(ttys, rx_count);
+	count = tty_buffer_request_room(&bc->port, rx_count);
 
 	/* 'count' is the maximum amount of data the TTY layer can accept at
 	 * this time.  However, during testing, I was never able to get 'count'
diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 13ee53bd0bf6..3d2ea92b8505 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -629,7 +629,7 @@ int hvc_poll(struct hvc_struct *hp)
 
 	/* Read data if any */
 	for (;;) {
-		int count = tty_buffer_request_room(tty, N_INBUF);
+		int count = tty_buffer_request_room(&hp->port, N_INBUF);
 
 		/* If flip is full, just reschedule a later read */
 		if (count == 0) {
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index 877635733952..4a0ab98e9a63 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -609,7 +609,7 @@ static int hvcs_io(struct hvcs_struct *hvcsd)
 	/* remove the read masks */
 	hvcsd->todo_mask &= ~(HVCS_READ_MASK);
 
-	if (tty_buffer_request_room(tty, HVCS_BUFF_LEN) >= HVCS_BUFF_LEN) {
+	if (tty_buffer_request_room(&hvcsd->port, HVCS_BUFF_LEN) >= HVCS_BUFF_LEN) {
 		got = hvc_get_chars(unit_address,
 				&buf[0],
 				HVCS_BUFF_LEN);
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index ad0caf176808..42d5eb0125b3 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -245,7 +245,8 @@ static void cpm_uart_int_rx(struct uart_port *port)
 	int i;
 	unsigned char ch;
 	u8 *cp;
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
 	cbd_t __iomem *bdp;
 	u16 status;
@@ -276,7 +277,7 @@ static void cpm_uart_int_rx(struct uart_port *port)
 		/* If we have not enough room in tty flip buffer, then we try
 		 * later, which will be the next rx-interrupt or a timeout
 		 */
-		if(tty_buffer_request_room(tty, i) < i) {
+		if (tty_buffer_request_room(tport, i) < i) {
 			printk(KERN_WARNING "No room in flip buffer\n");
 			return;
 		}
diff --git a/drivers/tty/serial/ioc4_serial.c b/drivers/tty/serial/ioc4_serial.c
index c4e30b841f94..710ce87ffbeb 100644
--- a/drivers/tty/serial/ioc4_serial.c
+++ b/drivers/tty/serial/ioc4_serial.c
@@ -2356,7 +2356,7 @@ static void receive_chars(struct uart_port *the_port)
 	spin_lock_irqsave(&the_port->lock, pflags);
 	tty = state->port.tty;
 
-	request_count = tty_buffer_request_room(tty, IOC4_MAX_CHARS);
+	request_count = tty_buffer_request_room(&state->port, IOC4_MAX_CHARS);
 
 	if (request_count > 0) {
 		icount = &the_port->icount;
diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c
index 4c00c5550b1a..3969e54744cc 100644
--- a/drivers/tty/serial/jsm/jsm_tty.c
+++ b/drivers/tty/serial/jsm/jsm_tty.c
@@ -521,6 +521,7 @@ void jsm_input(struct jsm_channel *ch)
 {
 	struct jsm_board *bd;
 	struct tty_struct *tp;
+	struct tty_port *port;
 	u32 rmask;
 	u16 head;
 	u16 tail;
@@ -536,7 +537,8 @@ void jsm_input(struct jsm_channel *ch)
 	if (!ch)
 		return;
 
-	tp = ch->uart_port.state->port.tty;
+	port = &ch->uart_port.state->port;
+	tp = port->tty;
 
 	bd = ch->ch_bd;
 	if(!bd)
@@ -600,7 +602,7 @@ void jsm_input(struct jsm_channel *ch)
 		return;
 	}
 
-	len = tty_buffer_request_room(tp, data_len);
+	len = tty_buffer_request_room(port, data_len);
 	n = len;
 
 	/*
diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c
index 6a9c6605666a..50366863cfa3 100644
--- a/drivers/tty/serial/mpsc.c
+++ b/drivers/tty/serial/mpsc.c
@@ -937,7 +937,8 @@ static int serial_polled;
 static int mpsc_rx_intr(struct mpsc_port_info *pi)
 {
 	struct mpsc_rx_desc *rxre;
-	struct tty_struct *tty = pi->port.state->port.tty;
+	struct tty_port *port = &pi->port.state->port;
+	struct tty_struct *tty = port->tty;
 	u32	cmdstat, bytes_in, i;
 	int	rc = 0;
 	u8	*bp;
@@ -968,8 +969,7 @@ static int mpsc_rx_intr(struct mpsc_port_info *pi)
 		}
 #endif
 		/* Following use of tty struct directly is deprecated */
-		if (unlikely(tty_buffer_request_room(tty, bytes_in)
-					< bytes_in)) {
+		if (tty_buffer_request_room(port, bytes_in) < bytes_in) {
 			if (tty->low_latency)
 				tty_flip_buffer_push(tty);
 			/*
diff --git a/drivers/tty/serial/mrst_max3110.c b/drivers/tty/serial/mrst_max3110.c
index 58734d7e746d..776431ff0190 100644
--- a/drivers/tty/serial/mrst_max3110.c
+++ b/drivers/tty/serial/mrst_max3110.c
@@ -339,6 +339,7 @@ static int
 receive_chars(struct uart_max3110 *max, unsigned short *str, int len)
 {
 	struct uart_port *port = &max->port;
+	struct tty_port *tport;
 	struct tty_struct *tty;
 	char buf[M3110_RX_FIFO_DEPTH];
 	int r, w, usable;
@@ -347,7 +348,8 @@ receive_chars(struct uart_max3110 *max, unsigned short *str, int len)
 	if (!port->state)
 		return 0;
 
-	tty = tty_port_tty_get(&port->state->port);
+	tport = &port->state->port;
+	tty = tty_port_tty_get(tport);
 	if (!tty)
 		return 0;
 
@@ -370,7 +372,7 @@ receive_chars(struct uart_max3110 *max, unsigned short *str, int len)
 	}
 
 	for (r = 0; w; r += usable, w -= usable) {
-		usable = tty_buffer_request_room(tty, w);
+		usable = tty_buffer_request_room(tport, w);
 		if (usable) {
 			tty_insert_flip_string(tty, buf + r, usable);
 			port->icount.rx += usable;
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index 8318925fbf6b..4f1774be2a8c 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -629,15 +629,16 @@ static int dma_push_rx(struct eg20t_port *priv, int size)
 	struct tty_struct *tty;
 	int room;
 	struct uart_port *port = &priv->port;
+	struct tty_port *tport = &port->state->port;
 
 	port = &priv->port;
-	tty = tty_port_tty_get(&port->state->port);
+	tty = tty_port_tty_get(tport);
 	if (!tty) {
 		dev_dbg(priv->port.dev, "%s:tty is busy now", __func__);
 		return 0;
 	}
 
-	room = tty_buffer_request_room(tty, size);
+	room = tty_buffer_request_room(tport, size);
 
 	if (room < size)
 		dev_warn(port->dev, "Rx overrun: dropping %u bytes\n",
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 61477567423f..cf96314770fb 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -596,7 +596,8 @@ static void sci_transmit_chars(struct uart_port *port)
 static void sci_receive_chars(struct uart_port *port)
 {
 	struct sci_port *sci_port = to_sci_port(port);
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	int i, count, copied = 0;
 	unsigned short status;
 	unsigned char flag;
@@ -607,7 +608,7 @@ static void sci_receive_chars(struct uart_port *port)
 
 	while (1) {
 		/* Don't copy more bytes than there is room for in the buffer */
-		count = tty_buffer_request_room(tty, sci_rxfill(port));
+		count = tty_buffer_request_room(tport, sci_rxfill(port));
 
 		/* If for any reason we can't copy more data, we're done! */
 		if (count == 0)
@@ -1263,9 +1264,10 @@ static int sci_dma_rx_push(struct sci_port *s, struct tty_struct *tty,
 			   size_t count)
 {
 	struct uart_port *port = &s->port;
+	struct tty_port *tport = &port->state->port;
 	int i, active, room;
 
-	room = tty_buffer_request_room(tty, count);
+	room = tty_buffer_request_room(tport, count);
 
 	if (s->active_rx == s->cookie_rx[0]) {
 		active = 0;
diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index f99b0c965f85..ed047d9ab1e2 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -469,7 +469,8 @@ static void qe_uart_int_rx(struct uart_qe_port *qe_port)
 	int i;
 	unsigned char ch, *cp;
 	struct uart_port *port = &qe_port->port;
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	struct qe_bd *bdp;
 	u16 status;
 	unsigned int flg;
@@ -491,7 +492,7 @@ static void qe_uart_int_rx(struct uart_qe_port *qe_port)
 		/* If we don't have enough room in RX buffer for the entire BD,
 		 * then we try later, which will be the next RX interrupt.
 		 */
-		if (tty_buffer_request_room(tty, i) < i) {
+		if (tty_buffer_request_room(tport, i) < i) {
 			dev_dbg(port->dev, "ucc-uart: no room in RX buffer\n");
 			return;
 		}
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 45d916198f78..f897332fb4ee 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -235,7 +235,7 @@ static int __tty_buffer_request_room(struct tty_port *port, size_t size)
 
 /**
  *	tty_buffer_request_room		-	grow tty buffer if needed
- *	@tty: tty structure
+ *	@port: tty port structure
  *	@size: size desired
  *
  *	Make at least size bytes of linear space available for the tty
@@ -243,9 +243,8 @@ static int __tty_buffer_request_room(struct tty_port *port, size_t size)
  *
  *	Locking: Takes port->buf.lock
  */
-int tty_buffer_request_room(struct tty_struct *tty, size_t size)
+int tty_buffer_request_room(struct tty_port *port, size_t size)
 {
-	struct tty_port *port = tty->port;
 	unsigned long flags;
 	int length;
 
diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index d152be97d041..1e67fd89e346 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -697,7 +697,7 @@ void qt2_process_read_urb(struct urb *urb)
 				escapeflag = true;
 				break;
 			case QT2_CONTROL_ESCAPE:
-				tty_buffer_request_room(tty, 2);
+				tty_buffer_request_room(&port->port, 2);
 				tty_insert_flip_string(tty, ch, 2);
 				i += 2;
 				escapeflag = true;
@@ -713,7 +713,7 @@ void qt2_process_read_urb(struct urb *urb)
 		}
 
 		if (tty) {
-			tty_buffer_request_room(tty, 1);
+			tty_buffer_request_room(&port->port, 1);
 			tty_insert_flip_string(tty, ch, 1);
 		}
 	}
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 2002344ed36a..78207ca8fab1 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_TTY_FLIP_H
 #define _LINUX_TTY_FLIP_H
 
-extern int tty_buffer_request_room(struct tty_struct *tty, size_t size);
+extern int tty_buffer_request_room(struct tty_port *port, size_t size);
 extern int tty_insert_flip_string_flags(struct tty_struct *tty, const unsigned char *chars, const char *flags, size_t size);
 extern int tty_insert_flip_string_fixed_flag(struct tty_struct *tty, const unsigned char *chars, char flag, size_t size);
 extern int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size);
-- 
cgit v1.2.3


From 2f69335710884ae6112fc8196ebe29b5cda7b79b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 3 Jan 2013 15:53:02 +0100
Subject: TTY: convert more flipping functions

Now, we start converting tty buffer functions to actually use
tty_port. This will allow us to get rid of the need of tty pointer in
many call sites. Only tty_port will be needed and hence no more
tty_port_tty_get calls in those paths.

Now 4 string flipping ones are on turn:
* tty_insert_flip_string_flags
* tty_insert_flip_string_fixed_flag
* tty_prepare_flip_string
* tty_prepare_flip_string_flags

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/dgrp/dgrp_net_ops.c  |  2 +-
 drivers/staging/fwserial/fwserial.c  |  8 +++++---
 drivers/tty/cyclades.c               |  2 +-
 drivers/tty/isicom.c                 |  4 ++--
 drivers/tty/moxa.c                   |  4 ++--
 drivers/tty/rocket.c                 |  2 +-
 drivers/tty/serial/msm_smd_tty.c     |  2 +-
 drivers/tty/tty_buffer.c             | 32 ++++++++++++++++----------------
 drivers/usb/serial/ark3116.c         |  2 +-
 drivers/usb/serial/belkin_sa.c       |  2 +-
 drivers/usb/serial/cypress_m8.c      |  2 +-
 drivers/usb/serial/digi_acceleport.c |  4 ++--
 drivers/usb/serial/f81232.c          |  2 +-
 drivers/usb/serial/ftdi_sio.c        |  2 +-
 drivers/usb/serial/pl2303.c          |  2 +-
 drivers/usb/serial/spcp8x5.c         |  2 +-
 drivers/usb/serial/ssu100.c          |  2 +-
 include/linux/tty_flip.h             | 16 ++++++++++------
 18 files changed, 49 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/dgrp/dgrp_net_ops.c b/drivers/staging/dgrp/dgrp_net_ops.c
index c0aeaaa094f1..df3ebcdf7ed8 100644
--- a/drivers/staging/dgrp/dgrp_net_ops.c
+++ b/drivers/staging/dgrp/dgrp_net_ops.c
@@ -232,7 +232,7 @@ static void dgrp_input(struct ch_struct *ch)
 		    (nd->nd_dpa_port == PORT_NUM(MINOR(tty_devnum(tty)))))
 			dgrp_dpa_data(nd, 1, myflipbuf, len);
 
-		tty_insert_flip_string_flags(tty, myflipbuf,
+		tty_insert_flip_string_flags(&ch->port, myflipbuf,
 					     myflipflagbuf, len);
 		tty_flip_buffer_push(tty);
 
diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c
index 61ee29083b26..85dbdc1eccec 100644
--- a/drivers/staging/fwserial/fwserial.c
+++ b/drivers/staging/fwserial/fwserial.c
@@ -507,7 +507,8 @@ static void fwtty_emit_breaks(struct work_struct *work)
 
 	while (n) {
 		t = min(n, 16);
-		c = tty_insert_flip_string_fixed_flag(tty, buf, TTY_BREAK, t);
+		c = tty_insert_flip_string_fixed_flag(&port->port, buf,
+				TTY_BREAK, t);
 		n -= c;
 		brk += c;
 		if (c < t)
@@ -535,7 +536,7 @@ static void fwtty_pushrx(struct work_struct *work)
 
 	spin_lock_bh(&port->lock);
 	list_for_each_entry_safe(buf, next, &port->buf_list, list) {
-		n = tty_insert_flip_string_fixed_flag(tty, buf->data,
+		n = tty_insert_flip_string_fixed_flag(&port->port, buf->data,
 						      TTY_NORMAL, buf->n);
 		c += n;
 		port->buffered -= n;
@@ -630,7 +631,8 @@ static int fwtty_rx(struct fwtty_port *port, unsigned char *data, size_t len)
 	}
 
 	if (!test_bit(BUFFERING_RX, &port->flags)) {
-		c = tty_insert_flip_string_fixed_flag(tty, data, TTY_NORMAL, n);
+		c = tty_insert_flip_string_fixed_flag(&port->port, data,
+				TTY_NORMAL, n);
 		if (c > 0)
 			tty_flip_buffer_push(tty);
 		n -= c;
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index b85acc74eb0d..0b7573dbf439 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -968,7 +968,7 @@ static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty)
 		   for performance, but because of buffer boundaries, there
 		   may be several steps to the operation */
 			while (1) {
-				len = tty_prepare_flip_string(tty, &buf,
+				len = tty_prepare_flip_string(port, &buf,
 						char_count);
 				if (!len)
 					break;
diff --git a/drivers/tty/isicom.c b/drivers/tty/isicom.c
index 3205b2e9090b..e9fc15f00f48 100644
--- a/drivers/tty/isicom.c
+++ b/drivers/tty/isicom.c
@@ -650,8 +650,8 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id)
 			break;
 		}
 	} else {				/* Data   Packet */
-
-		count = tty_prepare_flip_string(tty, &rp, byte_count & ~1);
+		count = tty_prepare_flip_string(&port->port, &rp,
+				byte_count & ~1);
 		pr_debug("%s: Can rx %d of %d bytes.\n",
 			 __func__, count, byte_count);
 		word_count = count >> 1;
diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c
index f9d28503bdec..fcaac4870d5f 100644
--- a/drivers/tty/moxa.c
+++ b/drivers/tty/moxa.c
@@ -1966,7 +1966,7 @@ static int MoxaPortReadData(struct moxa_port *port)
 			ofs = baseAddr + DynPage_addr + bufhead + head;
 			len = (tail >= head) ? (tail - head) :
 					(rx_mask + 1 - head);
-			len = tty_prepare_flip_string(tty, &dst,
+			len = tty_prepare_flip_string(&port->port, &dst,
 					min(len, count));
 			memcpy_fromio(dst, ofs, len);
 			head = (head + len) & rx_mask;
@@ -1978,7 +1978,7 @@ static int MoxaPortReadData(struct moxa_port *port)
 		while (count > 0) {
 			writew(pageno, baseAddr + Control_reg);
 			ofs = baseAddr + DynPage_addr + pageofs;
-			len = tty_prepare_flip_string(tty, &dst,
+			len = tty_prepare_flip_string(&port->port, &dst,
 					min(Page_size - pageofs, count));
 			memcpy_fromio(dst, ofs, len);
 
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index e42009a00529..77d7bc94afaa 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -399,7 +399,7 @@ static void rp_do_receive(struct r_port *info,
 		 * characters at time by doing repeated word IO
 		 * transfer.
 		 */
-		space = tty_prepare_flip_string(tty, &cbuf, ToRecv);
+		space = tty_prepare_flip_string(&info->port, &cbuf, ToRecv);
 		if (space < ToRecv) {
 #ifdef ROCKET_DEBUG_RECEIVE
 			printk(KERN_INFO "rp_do_receive:insufficient space ToRecv=%d space=%d\n", ToRecv, space);
diff --git a/drivers/tty/serial/msm_smd_tty.c b/drivers/tty/serial/msm_smd_tty.c
index 925d1fa153db..b43b4ec39269 100644
--- a/drivers/tty/serial/msm_smd_tty.c
+++ b/drivers/tty/serial/msm_smd_tty.c
@@ -70,7 +70,7 @@ static void smd_tty_notify(void *priv, unsigned event)
 		if (avail == 0)
 			break;
 
-		avail = tty_prepare_flip_string(tty, &ptr, avail);
+		avail = tty_prepare_flip_string(&info->port, &ptr, avail);
 
 		if (smd_read(info->ch, ptr, avail) != avail) {
 			/* shouldn't be possible since we're in interrupt
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index f897332fb4ee..31873e42602a 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -257,7 +257,7 @@ EXPORT_SYMBOL_GPL(tty_buffer_request_room);
 
 /**
  *	tty_insert_flip_string_fixed_flag - Add characters to the tty buffer
- *	@tty: tty structure
+ *	@port: tty port
  *	@chars: characters
  *	@flag: flag value for each character
  *	@size: size
@@ -268,10 +268,10 @@ EXPORT_SYMBOL_GPL(tty_buffer_request_room);
  *	Locking: Called functions may take port->buf.lock
  */
 
-int tty_insert_flip_string_fixed_flag(struct tty_struct *tty,
+int tty_insert_flip_string_fixed_flag(struct tty_port *port,
 		const unsigned char *chars, char flag, size_t size)
 {
-	struct tty_bufhead *buf = &tty->port->buf;
+	struct tty_bufhead *buf = &port->buf;
 	int copied = 0;
 	do {
 		int goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE);
@@ -280,7 +280,7 @@ int tty_insert_flip_string_fixed_flag(struct tty_struct *tty,
 		struct tty_buffer *tb;
 
 		spin_lock_irqsave(&buf->lock, flags);
-		space = __tty_buffer_request_room(tty->port, goal);
+		space = __tty_buffer_request_room(port, goal);
 		tb = buf->tail;
 		/* If there is no space then tb may be NULL */
 		if (unlikely(space == 0)) {
@@ -302,7 +302,7 @@ EXPORT_SYMBOL(tty_insert_flip_string_fixed_flag);
 
 /**
  *	tty_insert_flip_string_flags	-	Add characters to the tty buffer
- *	@tty: tty structure
+ *	@port: tty port
  *	@chars: characters
  *	@flags: flag bytes
  *	@size: size
@@ -314,10 +314,10 @@ EXPORT_SYMBOL(tty_insert_flip_string_fixed_flag);
  *	Locking: Called functions may take port->buf.lock
  */
 
-int tty_insert_flip_string_flags(struct tty_struct *tty,
+int tty_insert_flip_string_flags(struct tty_port *port,
 		const unsigned char *chars, const char *flags, size_t size)
 {
-	struct tty_bufhead *buf = &tty->port->buf;
+	struct tty_bufhead *buf = &port->buf;
 	int copied = 0;
 	do {
 		int goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE);
@@ -326,7 +326,7 @@ int tty_insert_flip_string_flags(struct tty_struct *tty,
 		struct tty_buffer *tb;
 
 		spin_lock_irqsave(&buf->lock, __flags);
-		space = __tty_buffer_request_room(tty->port, goal);
+		space = __tty_buffer_request_room(port, goal);
 		tb = buf->tail;
 		/* If there is no space then tb may be NULL */
 		if (unlikely(space == 0)) {
@@ -376,7 +376,7 @@ EXPORT_SYMBOL(tty_schedule_flip);
 
 /**
  *	tty_prepare_flip_string		-	make room for characters
- *	@tty: tty
+ *	@port: tty port
  *	@chars: return pointer for character write area
  *	@size: desired size
  *
@@ -389,16 +389,16 @@ EXPORT_SYMBOL(tty_schedule_flip);
  *	Locking: May call functions taking port->buf.lock
  */
 
-int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars,
+int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars,
 		size_t size)
 {
-	struct tty_bufhead *buf = &tty->port->buf;
+	struct tty_bufhead *buf = &port->buf;
 	int space;
 	unsigned long flags;
 	struct tty_buffer *tb;
 
 	spin_lock_irqsave(&buf->lock, flags);
-	space = __tty_buffer_request_room(tty->port, size);
+	space = __tty_buffer_request_room(port, size);
 
 	tb = buf->tail;
 	if (likely(space)) {
@@ -413,7 +413,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string);
 
 /**
  *	tty_prepare_flip_string_flags	-	make room for characters
- *	@tty: tty
+ *	@port: tty port
  *	@chars: return pointer for character write area
  *	@flags: return pointer for status flag write area
  *	@size: desired size
@@ -427,16 +427,16 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string);
  *	Locking: May call functions taking port->buf.lock
  */
 
-int tty_prepare_flip_string_flags(struct tty_struct *tty,
+int tty_prepare_flip_string_flags(struct tty_port *port,
 			unsigned char **chars, char **flags, size_t size)
 {
-	struct tty_bufhead *buf = &tty->port->buf;
+	struct tty_bufhead *buf = &port->buf;
 	int space;
 	unsigned long __flags;
 	struct tty_buffer *tb;
 
 	spin_lock_irqsave(&buf->lock, __flags);
-	space = __tty_buffer_request_room(tty->port, size);
+	space = __tty_buffer_request_room(port, size);
 
 	tb = buf->tail;
 	if (likely(space)) {
diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index a88882c0e237..e2d653d00ea2 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -705,7 +705,7 @@ static void ark3116_process_read_urb(struct urb *urb)
 		if (lsr & UART_LSR_OE)
 			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
 	}
-	tty_insert_flip_string_fixed_flag(tty, data, tty_flag,
+	tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index b72a4c166705..a213d1be9462 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -279,7 +279,7 @@ static void belkin_sa_process_read_urb(struct urb *urb)
 			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
 	}
 
-	tty_insert_flip_string_fixed_flag(tty, data, tty_flag,
+	tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index fd8c35fd452e..ac14e3eb95ea 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -1215,7 +1215,7 @@ static void cypress_read_int_callback(struct urb *urb)
 
 	/* process read if there is data other than line status */
 	if (tty && bytes > i) {
-		tty_insert_flip_string_fixed_flag(tty, data + i,
+		tty_insert_flip_string_fixed_flag(&port->port, data + i,
 				tty_flag, bytes - i);
 		tty_flip_buffer_push(tty);
 	}
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 45d4af62967f..efbc4035410c 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1455,8 +1455,8 @@ static int digi_read_inb_callback(struct urb *urb)
 		/* data length is len-1 (one byte of len is port_status) */
 		--len;
 		if (len > 0) {
-			tty_insert_flip_string_fixed_flag(tty, data, flag,
-									len);
+			tty_insert_flip_string_fixed_flag(&port->port, data,
+					flag, len);
 			tty_flip_buffer_push(tty);
 		}
 	}
diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 6e4eb57d0177..a8c6430bf1b3 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -140,7 +140,7 @@ static void f81232_process_read_urb(struct urb *urb)
 			if (!usb_serial_handle_sysrq_char(port, data[i]))
 				tty_insert_flip_char(tty, data[i], tty_flag);
 	} else {
-		tty_insert_flip_string_fixed_flag(tty, data, tty_flag,
+		tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
 	}
 
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 0a373b3ae96a..0416d952a448 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -2032,7 +2032,7 @@ static int ftdi_process_packet(struct tty_struct *tty,
 				tty_insert_flip_char(tty, *ch, flag);
 		}
 	} else {
-		tty_insert_flip_string_fixed_flag(tty, ch, flag, len);
+		tty_insert_flip_string_fixed_flag(&port->port, ch, flag, len);
 	}
 
 	return len;
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 600241901361..86789b0477c5 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -812,7 +812,7 @@ static void pl2303_process_read_urb(struct urb *urb)
 			if (!usb_serial_handle_sysrq_char(port, data[i]))
 				tty_insert_flip_char(tty, data[i], tty_flag);
 	} else {
-		tty_insert_flip_string_fixed_flag(tty, data, tty_flag,
+		tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
 	}
 
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index a42536af1256..fa42f1b907d0 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -505,7 +505,7 @@ static void spcp8x5_process_read_urb(struct urb *urb)
 				   priv->line_status & MSR_STATUS_LINE_DCD);
 	}
 
-	tty_insert_flip_string_fixed_flag(tty, data, tty_flag,
+	tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index 4543ea350229..37476c6240c2 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -617,7 +617,7 @@ static int ssu100_process_packet(struct urb *urb,
 				tty_insert_flip_char(tty, *ch, flag);
 		}
 	} else
-		tty_insert_flip_string_fixed_flag(tty, ch, flag, len);
+		tty_insert_flip_string_fixed_flag(&port->port, ch, flag, len);
 
 	return len;
 }
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 78207ca8fab1..743f1d076467 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -2,10 +2,14 @@
 #define _LINUX_TTY_FLIP_H
 
 extern int tty_buffer_request_room(struct tty_port *port, size_t size);
-extern int tty_insert_flip_string_flags(struct tty_struct *tty, const unsigned char *chars, const char *flags, size_t size);
-extern int tty_insert_flip_string_fixed_flag(struct tty_struct *tty, const unsigned char *chars, char flag, size_t size);
-extern int tty_prepare_flip_string(struct tty_struct *tty, unsigned char **chars, size_t size);
-extern int tty_prepare_flip_string_flags(struct tty_struct *tty, unsigned char **chars, char **flags, size_t size);
+extern int tty_insert_flip_string_flags(struct tty_port *port,
+		const unsigned char *chars, const char *flags, size_t size);
+extern int tty_insert_flip_string_fixed_flag(struct tty_port *port,
+		const unsigned char *chars, char flag, size_t size);
+extern int tty_prepare_flip_string(struct tty_port *port,
+		unsigned char **chars, size_t size);
+extern int tty_prepare_flip_string_flags(struct tty_port *port,
+		unsigned char **chars, char **flags, size_t size);
 void tty_schedule_flip(struct tty_struct *tty);
 
 static inline int tty_insert_flip_char(struct tty_struct *tty,
@@ -17,12 +21,12 @@ static inline int tty_insert_flip_char(struct tty_struct *tty,
 		tb->char_buf_ptr[tb->used++] = ch;
 		return 1;
 	}
-	return tty_insert_flip_string_flags(tty, &ch, &flag, 1);
+	return tty_insert_flip_string_flags(tty->port, &ch, &flag, 1);
 }
 
 static inline int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, size_t size)
 {
-	return tty_insert_flip_string_fixed_flag(tty, chars, TTY_NORMAL, size);
+	return tty_insert_flip_string_fixed_flag(tty->port, chars, TTY_NORMAL, size);
 }
 
 #endif /* _LINUX_TTY_FLIP_H */
-- 
cgit v1.2.3


From 92a19f9cec9a80ad93c06e115822deb729e2c6ad Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 3 Jan 2013 15:53:03 +0100
Subject: TTY: switch tty_insert_flip_char

Now, we start converting tty buffer functions to actually use
tty_port. This will allow us to get rid of the need of tty in many
call sites. Only tty_port will needed and hence no more
tty_port_tty_get in those paths.

tty_insert_flip_char is the next one to proceed. This one is used all
over the code, so the patch is huge.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/kernel/srmcons.c                 |  3 ++-
 arch/ia64/hp/sim/simserial.c                |  3 ++-
 arch/mn10300/kernel/mn10300-serial.c        |  4 +--
 arch/parisc/kernel/pdc_cons.c               |  2 +-
 arch/um/drivers/chan_kern.c                 |  8 +-----
 arch/xtensa/platforms/iss/console.c         |  3 ++-
 drivers/char/pcmcia/synclink_cs.c           |  2 +-
 drivers/ipack/devices/ipoctal.c             |  5 ++--
 drivers/isdn/i4l/isdn_common.c              |  8 +++---
 drivers/isdn/i4l/isdn_tty.c                 | 12 ++++-----
 drivers/mmc/card/sdio_uart.c                |  5 ++--
 drivers/s390/char/con3215.c                 |  3 ++-
 drivers/s390/char/keyboard.h                |  4 +--
 drivers/s390/char/sclp_tty.c                |  4 +--
 drivers/staging/dgrp/dgrp_net_ops.c         |  2 +-
 drivers/staging/fwserial/fwserial.c         |  2 +-
 drivers/staging/serqt_usb2/serqt_usb2.c     | 13 +++++----
 drivers/tty/amiserial.c                     |  4 +--
 drivers/tty/cyclades.c                      | 24 ++++++++---------
 drivers/tty/hvc/hvc_console.c               |  2 +-
 drivers/tty/hvc/hvsi.c                      | 22 +++++++--------
 drivers/tty/isicom.c                        |  4 +--
 drivers/tty/moxa.c                          |  2 +-
 drivers/tty/mxser.c                         |  4 +--
 drivers/tty/n_gsm.c                         | 24 ++++++++++-------
 drivers/tty/nozomi.c                        |  2 +-
 drivers/tty/rocket.c                        |  3 ++-
 drivers/tty/serial/68328serial.c            |  2 +-
 drivers/tty/serial/ar933x_uart.c            | 10 +++----
 drivers/tty/serial/bcm63xx_uart.c           |  7 ++---
 drivers/tty/serial/bfin_sport_uart.c        | 10 ++++---
 drivers/tty/serial/cpm_uart/cpm_uart_core.c |  2 +-
 drivers/tty/serial/crisv10.c                | 10 +++----
 drivers/tty/serial/efm32-uart.c             | 15 +++++------
 drivers/tty/serial/icom.c                   |  7 ++---
 drivers/tty/serial/imx.c                    |  4 ++-
 drivers/tty/serial/jsm/jsm_tty.c            |  8 +++---
 drivers/tty/serial/kgdb_nmi.c               |  2 +-
 drivers/tty/serial/lantiq.c                 |  7 ++---
 drivers/tty/serial/lpc32xx_hs.c             | 12 +++++----
 drivers/tty/serial/m32r_sio.c               |  7 ++---
 drivers/tty/serial/mpc52xx_uart.c           |  7 ++---
 drivers/tty/serial/mpsc.c                   |  4 +--
 drivers/tty/serial/msm_serial.c             | 12 +++++----
 drivers/tty/serial/msm_serial_hs.c          | 12 ++++-----
 drivers/tty/serial/mux.c                    |  5 ++--
 drivers/tty/serial/nwpserial.c              |  5 ++--
 drivers/tty/serial/pmac_zilog.c             |  8 +++---
 drivers/tty/serial/sc26xx.c                 | 10 ++++---
 drivers/tty/serial/serial_core.c            |  6 ++---
 drivers/tty/serial/sh-sci.c                 | 34 ++++++++++++-----------
 drivers/tty/serial/sn_console.c             |  6 +++--
 drivers/tty/serial/sunhv.c                  |  4 +--
 drivers/tty/serial/sunsab.c                 | 16 +++++------
 drivers/tty/serial/sunsu.c                  |  7 ++---
 drivers/tty/serial/sunzilog.c               | 17 +++++-------
 drivers/tty/serial/timbuart.c               |  4 +--
 drivers/tty/serial/uartlite.c               |  8 +++---
 drivers/tty/serial/ucc_uart.c               |  4 +--
 drivers/tty/serial/vt8500_serial.c          |  7 ++---
 drivers/tty/synclink.c                      |  4 +--
 drivers/tty/synclink_gt.c                   |  8 +++---
 drivers/tty/synclinkmp.c                    | 42 +++++++++++++----------------
 drivers/tty/vt/keyboard.c                   |  6 ++---
 drivers/tty/vt/vt.c                         |  2 +-
 drivers/usb/serial/ark3116.c                |  2 +-
 drivers/usb/serial/belkin_sa.c              |  2 +-
 drivers/usb/serial/digi_acceleport.c        |  2 +-
 drivers/usb/serial/f81232.c                 |  5 ++--
 drivers/usb/serial/ftdi_sio.c               | 11 ++++----
 drivers/usb/serial/generic.c                |  2 +-
 drivers/usb/serial/keyspan.c                | 21 ++++++++-------
 drivers/usb/serial/pl2303.c                 |  5 ++--
 drivers/usb/serial/spcp8x5.c                |  2 +-
 drivers/usb/serial/ssu100.c                 | 10 +++----
 include/linux/tty_flip.h                    |  6 ++---
 76 files changed, 297 insertions(+), 286 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 59b7bbad8394..21b57a66e809 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -46,13 +46,14 @@ typedef union _srmcons_result {
 static int
 srmcons_do_receive_chars(struct tty_struct *tty)
 {
+	struct tty_port *port = tty->port;
 	srmcons_result result;
 	int count = 0, loops = 0;
 
 	do {
 		result.as_long = callback_getc(0);
 		if (result.bits.status < 2) {
-			tty_insert_flip_char(tty, (char)result.bits.c, 0);
+			tty_insert_flip_char(port, (char)result.bits.c, 0);
 			count++;
 		}
 	} while((result.bits.status & 1) && (++loops < 10));
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index fc3924d18c1f..f8ae5d8eb106 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -55,6 +55,7 @@ static struct console *console;
 
 static void receive_chars(struct tty_struct *tty)
 {
+	struct tty_port *port = tty->port;
 	unsigned char ch;
 	static unsigned char seen_esc = 0;
 
@@ -81,7 +82,7 @@ static void receive_chars(struct tty_struct *tty)
 		}
 		seen_esc = 0;
 
-		if (tty_insert_flip_char(tty, ch, TTY_NORMAL) == 0)
+		if (tty_insert_flip_char(port, ch, TTY_NORMAL) == 0)
 			break;
 	}
 	tty_flip_buffer_push(tty);
diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
index 9b2232a78ff9..54ef40ceaaed 100644
--- a/arch/mn10300/kernel/mn10300-serial.c
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -667,14 +667,14 @@ insert:
 		else
 			flag = TTY_NORMAL;
 
-		tty_insert_flip_char(tty, ch, flag);
+		tty_insert_flip_char(port, ch, flag);
 	}
 
 	/* overrun is special, since it's reported immediately, and doesn't
 	 * affect the current character
 	 */
 	if (overrun)
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(port, 0, TTY_OVERRUN);
 
 	count--;
 	if (count <= 0) {
diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c
index efc5e7d30530..4d92a379eb21 100644
--- a/arch/parisc/kernel/pdc_cons.c
+++ b/arch/parisc/kernel/pdc_cons.c
@@ -147,7 +147,7 @@ static void pdc_console_poll(unsigned long unused)
 		data = pdc_console_poll_key(NULL);
 		if (data == -1)
 			break;
-		tty_insert_flip_char(tty, data & 0xFF, TTY_NORMAL);
+		tty_insert_flip_char(&tty_port, data & 0xFF, TTY_NORMAL);
 		count ++;
 	}
 
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 4ff2503a1bb8..795bd8102205 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -81,12 +81,6 @@ static const struct chan_ops not_configged_ops = {
 };
 #endif /* CONFIG_NOCONFIG_CHAN */
 
-static void tty_receive_char(struct tty_struct *tty, char ch)
-{
-	if (tty)
-		tty_insert_flip_char(tty, ch, TTY_NORMAL);
-}
-
 static int open_one_chan(struct chan *chan)
 {
 	int fd, err;
@@ -569,7 +563,7 @@ void chan_interrupt(struct line *line, struct tty_struct *tty, int irq)
 		}
 		err = chan->ops->read(chan->fd, &c, chan->data);
 		if (err > 0)
-			tty_receive_char(tty, c);
+			tty_insert_flip_char(port, c, TTY_NORMAL);
 	} while (err > 0);
 
 	if (err == 0)
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 8207a119eee9..62447d63890c 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -98,6 +98,7 @@ static int rs_write(struct tty_struct * tty,
 static void rs_poll(unsigned long priv)
 {
 	struct tty_struct* tty = (struct tty_struct*) priv;
+	struct tty_port *port = tty->port;
 
 	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 	int i = 0;
@@ -107,7 +108,7 @@ static void rs_poll(unsigned long priv)
 
 	while (__simc(SYS_select_one, 0, XTISS_SELECT_ONE_READ, (int)&tv,0,0)){
 		__simc (SYS_read, 0, (unsigned long)&c, 1, 0, 0);
-		tty_insert_flip_char(tty, c, TTY_NORMAL);
+		tty_insert_flip_char(port, c, TTY_NORMAL);
 		i++;
 	}
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 0b1de715f097..9bdfe27b2413 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -945,7 +945,7 @@ static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty)
 			else if (status & BIT6)
 				flag = TTY_FRAME;
 		}
-		work += tty_insert_flip_char(tty, data, flag);
+		work += tty_insert_flip_char(port, data, flag);
 	}
 	issue_command(info, CHA, CMD_RXFIFO);
 
diff --git a/drivers/ipack/devices/ipoctal.c b/drivers/ipack/devices/ipoctal.c
index 576d53d92677..8e0ed663ba9b 100644
--- a/drivers/ipack/devices/ipoctal.c
+++ b/drivers/ipack/devices/ipoctal.c
@@ -136,6 +136,7 @@ static int ipoctal_get_icount(struct tty_struct *tty,
 static void ipoctal_irq_rx(struct ipoctal_channel *channel,
 			   struct tty_struct *tty, u8 sr)
 {
+	struct tty_port *port = &channel->tty_port;
 	unsigned char value;
 	unsigned char flag = TTY_NORMAL;
 	u8 isr;
@@ -149,7 +150,7 @@ static void ipoctal_irq_rx(struct ipoctal_channel *channel,
 			if (sr & SR_OVERRUN_ERROR) {
 				channel->stats.overrun_err++;
 				/* Overrun doesn't affect the current character*/
-				tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+				tty_insert_flip_char(port, 0, TTY_OVERRUN);
 			}
 			if (sr & SR_PARITY_ERROR) {
 				channel->stats.parity_err++;
@@ -165,7 +166,7 @@ static void ipoctal_irq_rx(struct ipoctal_channel *channel,
 				flag = TTY_BREAK;
 			}
 		}
-		tty_insert_flip_char(tty, value, flag);
+		tty_insert_flip_char(port, value, flag);
 
 		/* Check if there are more characters in RX FIFO
 		 * If there are more, the isr register for this channel
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 7093169ee0c9..4a387ec021ad 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -913,7 +913,7 @@ isdn_readbchan_tty(int di, int channel, struct tty_struct *tty, int cisco_hack)
 			while ((count_pull < skb->len) && (len > 0)) {
 				/* push every character but the last to the tty buffer directly */
 				if (count_put)
-					tty_insert_flip_char(tty, last, TTY_NORMAL);
+					tty_insert_flip_char(port, last, TTY_NORMAL);
 				len--;
 				if (dev->drv[di]->DLEflag & DLEmask) {
 					last = DLE;
@@ -953,16 +953,16 @@ isdn_readbchan_tty(int di, int channel, struct tty_struct *tty, int cisco_hack)
 			 * Now we can dequeue it.
 			 */
 			if (cisco_hack)
-				tty_insert_flip_char(tty, last, 0xFF);
+				tty_insert_flip_char(port, last, 0xFF);
 			else
-				tty_insert_flip_char(tty, last, TTY_NORMAL);
+				tty_insert_flip_char(port, last, TTY_NORMAL);
 #ifdef CONFIG_ISDN_AUDIO
 			ISDN_AUDIO_SKB_LOCK(skb) = 0;
 #endif
 			skb = skb_dequeue(&dev->drv[di]->rpqueue[channel]);
 			dev_kfree_skb(skb);
 		} else {
-			tty_insert_flip_char(tty, last, TTY_NORMAL);
+			tty_insert_flip_char(port, last, TTY_NORMAL);
 			/* Not yet emptied this buff, so it
 			 * must stay in the queue, for further calls
 			 * but we pull off the data we got until now.
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index 4f5bcee7cf32..32d65d4bc848 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -92,11 +92,11 @@ isdn_tty_try_read(modem_info *info, struct sk_buff *skb)
 		unsigned char *dp = skb->data;
 		while (--l) {
 			if (*dp == DLE)
-				tty_insert_flip_char(tty, DLE, 0);
-			tty_insert_flip_char(tty, *dp++, 0);
+				tty_insert_flip_char(port, DLE, 0);
+			tty_insert_flip_char(port, *dp++, 0);
 		}
 		if (*dp == DLE)
-			tty_insert_flip_char(tty, DLE, 0);
+			tty_insert_flip_char(port, DLE, 0);
 		last = *dp;
 	} else {
 #endif
@@ -107,9 +107,9 @@ isdn_tty_try_read(modem_info *info, struct sk_buff *skb)
 	}
 #endif
 	if (info->emu.mdmreg[REG_CPPP] & BIT_CPPP)
-		tty_insert_flip_char(tty, last, 0xFF);
+		tty_insert_flip_char(port, last, 0xFF);
 	else
-		tty_insert_flip_char(tty, last, TTY_NORMAL);
+		tty_insert_flip_char(port, last, TTY_NORMAL);
 	tty_flip_buffer_push(tty);
 	kfree_skb(skb);
 
@@ -2287,7 +2287,7 @@ isdn_tty_at_cout(char *msg, modem_info *info)
 		if (skb) {
 			*sp++ = c;
 		} else {
-			if (tty_insert_flip_char(tty, c, TTY_NORMAL) == 0)
+			if (tty_insert_flip_char(port, c, TTY_NORMAL) == 0)
 				break;
 		}
 	}
diff --git a/drivers/mmc/card/sdio_uart.c b/drivers/mmc/card/sdio_uart.c
index bd57a11acc79..894078be0b96 100644
--- a/drivers/mmc/card/sdio_uart.c
+++ b/drivers/mmc/card/sdio_uart.c
@@ -419,7 +419,7 @@ static void sdio_uart_receive_chars(struct sdio_uart_port *port,
 
 		if ((*status & port->ignore_status_mask & ~UART_LSR_OE) == 0)
 			if (tty)
-				tty_insert_flip_char(tty, ch, flag);
+				tty_insert_flip_char(&port->port, ch, flag);
 
 		/*
 		 * Overrun is special.  Since it's reported immediately,
@@ -427,7 +427,8 @@ static void sdio_uart_receive_chars(struct sdio_uart_port *port,
 		 */
 		if (*status & ~port->ignore_status_mask & UART_LSR_OE)
 			if (tty)
-				tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+				tty_insert_flip_char(&port->port, 0,
+						TTY_OVERRUN);
 
 		*status = sdio_in(port, UART_LSR);
 	} while ((*status & UART_LSR_DR) && (max_count-- > 0));
diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 40084501c31b..7c7294590880 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -411,7 +411,8 @@ static void raw3215_irq(struct ccw_device *cdev, unsigned long intparm,
 				break;
 
 			case CTRLCHAR_CTRL:
-				tty_insert_flip_char(tty, cchar, TTY_NORMAL);
+				tty_insert_flip_char(&raw->port, cchar,
+						TTY_NORMAL);
 				tty_flip_buffer_push(tty);
 				break;
 
diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h
index d0ae2be58191..acab28d4f06b 100644
--- a/drivers/s390/char/keyboard.h
+++ b/drivers/s390/char/keyboard.h
@@ -46,7 +46,7 @@ kbd_put_queue(struct tty_port *port, int ch)
 	struct tty_struct *tty = tty_port_tty_get(port);
 	if (!tty)
 		return;
-	tty_insert_flip_char(tty, ch, 0);
+	tty_insert_flip_char(port, ch, 0);
 	tty_schedule_flip(tty);
 	tty_kref_put(tty);
 }
@@ -58,7 +58,7 @@ kbd_puts_queue(struct tty_port *port, char *cp)
 	if (!tty)
 		return;
 	while (*cp)
-		tty_insert_flip_char(tty, *cp++, 0);
+		tty_insert_flip_char(port, *cp++, 0);
 	tty_schedule_flip(tty);
 	tty_kref_put(tty);
 }
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 877fbc37c1e7..c03863a7d455 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -342,7 +342,7 @@ sclp_tty_input(unsigned char* buf, unsigned int count)
 	case CTRLCHAR_SYSRQ:
 		break;
 	case CTRLCHAR_CTRL:
-		tty_insert_flip_char(tty, cchar, TTY_NORMAL);
+		tty_insert_flip_char(&sclp_port, cchar, TTY_NORMAL);
 		tty_flip_buffer_push(tty);
 		break;
 	case CTRLCHAR_NONE:
@@ -352,7 +352,7 @@ sclp_tty_input(unsigned char* buf, unsigned int count)
 		     strncmp((const char *) buf + count - 2, "\252n", 2))) {
 			/* add the auto \n */
 			tty_insert_flip_string(tty, buf, count);
-			tty_insert_flip_char(tty, '\n', TTY_NORMAL);
+			tty_insert_flip_char(&sclp_port, '\n', TTY_NORMAL);
 		} else
 			tty_insert_flip_string(tty, buf, count - 2);
 		tty_flip_buffer_push(tty);
diff --git a/drivers/staging/dgrp/dgrp_net_ops.c b/drivers/staging/dgrp/dgrp_net_ops.c
index df3ebcdf7ed8..e618a667d84c 100644
--- a/drivers/staging/dgrp/dgrp_net_ops.c
+++ b/drivers/staging/dgrp/dgrp_net_ops.c
@@ -2957,7 +2957,7 @@ check_query:
 			    !(I_IGNBRK(ch->ch_tun.un_tty))) {
 
 				tty_buffer_request_room(&ch->port, 1);
-				tty_insert_flip_char(ch->ch_tun.un_tty, 0, TTY_BREAK);
+				tty_insert_flip_char(&ch->port, 0, TTY_BREAK);
 				tty_flip_buffer_push(ch->ch_tun.un_tty);
 
 			}
diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c
index 85dbdc1eccec..a2a0c43dec1c 100644
--- a/drivers/staging/fwserial/fwserial.c
+++ b/drivers/staging/fwserial/fwserial.c
@@ -617,7 +617,7 @@ static int fwtty_rx(struct fwtty_port *port, unsigned char *data, size_t len)
 
 	lsr &= port->status_mask;
 	if (lsr & ~port->ignore_mask & UART_LSR_OE) {
-		if (!tty_insert_flip_char(tty, 0, TTY_OVERRUN)) {
+		if (!tty_insert_flip_char(&port->port, 0, TTY_OVERRUN)) {
 			err = -EIO;
 			goto out;
 		}
diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c
index 1b3e995d3a27..14965662d09c 100644
--- a/drivers/staging/serqt_usb2/serqt_usb2.c
+++ b/drivers/staging/serqt_usb2/serqt_usb2.c
@@ -255,12 +255,11 @@ static void ProcessModemStatus(struct quatech_port *qt_port,
 	wake_up_interruptible(&qt_port->wait);
 }
 
-static void ProcessRxChar(struct tty_struct *tty, struct usb_serial_port *port,
-						unsigned char data)
+static void ProcessRxChar(struct usb_serial_port *port, unsigned char data)
 {
 	struct urb *urb = port->read_urb;
 	if (urb->actual_length)
-		tty_insert_flip_char(tty, data, TTY_NORMAL);
+		tty_insert_flip_char(&port->port, data, TTY_NORMAL);
 }
 
 static void qt_write_bulk_callback(struct urb *urb)
@@ -335,8 +334,8 @@ static void qt_status_change_check(struct tty_struct *tty,
 			case 0xff:
 				dev_dbg(&port->dev, "No status sequence.\n");
 
-				ProcessRxChar(tty, port, data[i]);
-				ProcessRxChar(tty, port, data[i + 1]);
+				ProcessRxChar(port, data[i]);
+				ProcessRxChar(port, data[i + 1]);
 
 				i += 2;
 				break;
@@ -345,8 +344,8 @@ static void qt_status_change_check(struct tty_struct *tty,
 				continue;
 		}
 
-		if (tty && urb->actual_length)
-			tty_insert_flip_char(tty, data[i], TTY_NORMAL);
+		if (urb->actual_length)
+			tty_insert_flip_char(&port->port, data[i], TTY_NORMAL);
 
 	}
 	tty_flip_buffer_push(tty);
diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index 9d7d00cdfecb..2e670d0c5366 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -328,9 +328,9 @@ static void receive_chars(struct serial_state *info)
 	     oe = 1;
 	  }
 	}
-	tty_insert_flip_char(tty, ch, flag);
+	tty_insert_flip_char(&info->tport, ch, flag);
 	if (oe == 1)
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(&info->tport, 0, TTY_OVERRUN);
 	tty_flip_buffer_push(tty);
 out:
 	return;
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index 0b7573dbf439..d1fe9a1f8475 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -492,34 +492,34 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 		if (tty_buffer_request_room(port, 1)) {
 			if (data & info->read_status_mask) {
 				if (data & CyBREAK) {
-					tty_insert_flip_char(tty,
+					tty_insert_flip_char(port,
 						cyy_readb(info, CyRDSR),
 						TTY_BREAK);
 					info->icount.rx++;
 					if (port->flags & ASYNC_SAK)
 						do_SAK(tty);
 				} else if (data & CyFRAME) {
-					tty_insert_flip_char(tty,
+					tty_insert_flip_char(port,
 						cyy_readb(info, CyRDSR),
 						TTY_FRAME);
 					info->icount.rx++;
 					info->idle_stats.frame_errs++;
 				} else if (data & CyPARITY) {
 					/* Pieces of seven... */
-					tty_insert_flip_char(tty,
+					tty_insert_flip_char(port,
 						cyy_readb(info, CyRDSR),
 						TTY_PARITY);
 					info->icount.rx++;
 					info->idle_stats.parity_errs++;
 				} else if (data & CyOVERRUN) {
-					tty_insert_flip_char(tty, 0,
+					tty_insert_flip_char(port, 0,
 							TTY_OVERRUN);
 					info->icount.rx++;
 					/* If the flip buffer itself is
 					   overflowing, we still lose
 					   the next incoming character.
 					 */
-					tty_insert_flip_char(tty,
+					tty_insert_flip_char(port,
 						cyy_readb(info, CyRDSR),
 						TTY_FRAME);
 					info->icount.rx++;
@@ -529,12 +529,12 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 				/* } else if(data & CyTIMEOUT) { */
 				/* } else if(data & CySPECHAR) { */
 				} else {
-					tty_insert_flip_char(tty, 0,
+					tty_insert_flip_char(port, 0,
 							TTY_NORMAL);
 					info->icount.rx++;
 				}
 			} else {
-				tty_insert_flip_char(tty, 0, TTY_NORMAL);
+				tty_insert_flip_char(port, 0, TTY_NORMAL);
 				info->icount.rx++;
 			}
 		} else {
@@ -557,7 +557,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 		len = tty_buffer_request_room(port, char_count);
 		while (len--) {
 			data = cyy_readb(info, CyRDSR);
-			tty_insert_flip_char(tty, data, TTY_NORMAL);
+			tty_insert_flip_char(port, data, TTY_NORMAL);
 			info->idle_stats.recv_bytes++;
 			info->icount.rx++;
 #ifdef CY_16Y_HACK
@@ -992,7 +992,7 @@ static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty)
 						new_rx_get);
 				new_rx_get = (new_rx_get + 1) &
 							(rx_bufsize - 1);
-				tty_insert_flip_char(tty, data, TTY_NORMAL);
+				tty_insert_flip_char(port, data, TTY_NORMAL);
 				info->idle_stats.recv_bytes++;
 				info->icount.rx++;
 			}
@@ -1117,17 +1117,17 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 
 		switch (cmd) {
 		case C_CM_PR_ERROR:
-			tty_insert_flip_char(tty, 0, TTY_PARITY);
+			tty_insert_flip_char(&info->port, 0, TTY_PARITY);
 			info->icount.rx++;
 			special_count++;
 			break;
 		case C_CM_FR_ERROR:
-			tty_insert_flip_char(tty, 0, TTY_FRAME);
+			tty_insert_flip_char(&info->port, 0, TTY_FRAME);
 			info->icount.rx++;
 			special_count++;
 			break;
 		case C_CM_RXBRK:
-			tty_insert_flip_char(tty, 0, TTY_BREAK);
+			tty_insert_flip_char(&info->port, 0, TTY_BREAK);
 			info->icount.rx++;
 			special_count++;
 			break;
diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 3d2ea92b8505..8c2fe3a0e091 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -672,7 +672,7 @@ int hvc_poll(struct hvc_struct *hp)
 				}
 			}
 #endif /* CONFIG_MAGIC_SYSRQ */
-			tty_insert_flip_char(tty, buf[i], 0);
+			tty_insert_flip_char(&hp->port, buf[i], 0);
 		}
 
 		read_total += n;
diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c
index 68357a6e4de9..1f528b8ebf5f 100644
--- a/drivers/tty/hvc/hvsi.c
+++ b/drivers/tty/hvc/hvsi.c
@@ -329,8 +329,7 @@ static void hvsi_recv_query(struct hvsi_struct *hp, uint8_t *packet)
 	}
 }
 
-static void hvsi_insert_chars(struct hvsi_struct *hp, struct tty_struct *tty,
-		const char *buf, int len)
+static void hvsi_insert_chars(struct hvsi_struct *hp, const char *buf, int len)
 {
 	int i;
 
@@ -346,7 +345,7 @@ static void hvsi_insert_chars(struct hvsi_struct *hp, struct tty_struct *tty,
 			continue;
 		}
 #endif /* CONFIG_MAGIC_SYSRQ */
-		tty_insert_flip_char(tty, c, 0);
+		tty_insert_flip_char(&hp->port, c, 0);
 	}
 }
 
@@ -359,8 +358,7 @@ static void hvsi_insert_chars(struct hvsi_struct *hp, struct tty_struct *tty,
  * revisited.
  */
 #define TTY_THRESHOLD_THROTTLE 128
-static bool hvsi_recv_data(struct hvsi_struct *hp, struct tty_struct *tty,
-		const uint8_t *packet)
+static bool hvsi_recv_data(struct hvsi_struct *hp, const uint8_t *packet)
 {
 	const struct hvsi_header *header = (const struct hvsi_header *)packet;
 	const uint8_t *data = packet + sizeof(struct hvsi_header);
@@ -377,7 +375,7 @@ static bool hvsi_recv_data(struct hvsi_struct *hp, struct tty_struct *tty,
 		datalen = TTY_THRESHOLD_THROTTLE;
 	}
 
-	hvsi_insert_chars(hp, tty, data, datalen);
+	hvsi_insert_chars(hp, data, datalen);
 
 	if (overflow > 0) {
 		/*
@@ -438,9 +436,7 @@ static int hvsi_load_chunk(struct hvsi_struct *hp, struct tty_struct *tty,
 			case VS_DATA_PACKET_HEADER:
 				if (!is_open(hp))
 					break;
-				if (tty == NULL)
-					break; /* no tty buffer to put data in */
-				flip = hvsi_recv_data(hp, tty, packet);
+				flip = hvsi_recv_data(hp, packet);
 				break;
 			case VS_CONTROL_PACKET_HEADER:
 				hvsi_recv_control(hp, packet, tty, handshake);
@@ -474,12 +470,12 @@ static int hvsi_load_chunk(struct hvsi_struct *hp, struct tty_struct *tty,
 	return 1;
 }
 
-static void hvsi_send_overflow(struct hvsi_struct *hp, struct tty_struct *tty)
+static void hvsi_send_overflow(struct hvsi_struct *hp)
 {
 	pr_debug("%s: delivering %i bytes overflow\n", __func__,
 			hp->n_throttle);
 
-	hvsi_insert_chars(hp, tty, hp->throttle_buf, hp->n_throttle);
+	hvsi_insert_chars(hp, hp->throttle_buf, hp->n_throttle);
 	hp->n_throttle = 0;
 }
 
@@ -514,7 +510,7 @@ static irqreturn_t hvsi_interrupt(int irq, void *arg)
 	if (tty && hp->n_throttle && !test_bit(TTY_THROTTLED, &tty->flags)) {
 		/* we weren't hung up and we weren't throttled, so we can
 		 * deliver the rest now */
-		hvsi_send_overflow(hp, tty);
+		hvsi_send_overflow(hp);
 		tty_flip_buffer_push(tty);
 	}
 	spin_unlock_irqrestore(&hp->lock, flags);
@@ -1001,7 +997,7 @@ static void hvsi_unthrottle(struct tty_struct *tty)
 
 	spin_lock_irqsave(&hp->lock, flags);
 	if (hp->n_throttle) {
-		hvsi_send_overflow(hp, tty);
+		hvsi_send_overflow(hp);
 		tty_flip_buffer_push(tty);
 	}
 	spin_unlock_irqrestore(&hp->lock, flags);
diff --git a/drivers/tty/isicom.c b/drivers/tty/isicom.c
index e9fc15f00f48..c70144f55fc0 100644
--- a/drivers/tty/isicom.c
+++ b/drivers/tty/isicom.c
@@ -634,7 +634,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id)
 			break;
 
 		case 1:	/* Received Break !!! */
-			tty_insert_flip_char(tty, 0, TTY_BREAK);
+			tty_insert_flip_char(&port->port, 0, TTY_BREAK);
 			if (port->port.flags & ASYNC_SAK)
 				do_SAK(tty);
 			tty_flip_buffer_push(tty);
@@ -658,7 +658,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id)
 		insw(base, rp, word_count);
 		byte_count -= (word_count << 1);
 		if (count & 0x0001) {
-			tty_insert_flip_char(tty,  inw(base) & 0xff,
+			tty_insert_flip_char(&port->port, inw(base) & 0xff,
 				TTY_NORMAL);
 			byte_count -= 2;
 		}
diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c
index fcaac4870d5f..f42492db31c9 100644
--- a/drivers/tty/moxa.c
+++ b/drivers/tty/moxa.c
@@ -1429,7 +1429,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle,
 		goto put;
 
 	if (tty && (intr & IntrBreak) && !I_IGNBRK(tty)) { /* BREAK */
-		tty_insert_flip_char(tty, 0, TTY_BREAK);
+		tty_insert_flip_char(&p->port, 0, TTY_BREAK);
 		tty_schedule_flip(tty);
 	}
 
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index 40113868bec2..450c4507cb5b 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -2079,7 +2079,7 @@ static void mxser_receive_chars(struct tty_struct *tty,
 		}
 		while (gdl--) {
 			ch = inb(port->ioaddr + UART_RX);
-			tty_insert_flip_char(tty, ch, 0);
+			tty_insert_flip_char(&port->port, ch, 0);
 			cnt++;
 		}
 		goto end_intr;
@@ -2118,7 +2118,7 @@ intr_old:
 				} else
 					flag = TTY_BREAK;
 			}
-			tty_insert_flip_char(tty, ch, flag);
+			tty_insert_flip_char(&port->port, ch, flag);
 			cnt++;
 			if (cnt >= recv_room) {
 				if (!port->ldisc_stop_rx)
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 4572117988f8..769016504c88 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -1070,9 +1070,9 @@ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci,
 		if ((mlines & TIOCM_CD) == 0 && (dlci->modem_rx & TIOCM_CD))
 			if (!(tty->termios.c_cflag & CLOCAL))
 				tty_hangup(tty);
-		if (brk & 0x01)
-			tty_insert_flip_char(tty, 0, TTY_BREAK);
 	}
+	if (brk & 0x01)
+		tty_insert_flip_char(&dlci->port, 0, TTY_BREAK);
 	dlci->modem_rx = mlines;
 }
 
@@ -1140,6 +1140,7 @@ static void gsm_control_modem(struct gsm_mux *gsm, u8 *data, int clen)
 
 static void gsm_control_rls(struct gsm_mux *gsm, u8 *data, int clen)
 {
+	struct tty_port *port;
 	struct tty_struct *tty;
 	unsigned int addr = 0 ;
 	u8 bits;
@@ -1163,16 +1164,19 @@ static void gsm_control_rls(struct gsm_mux *gsm, u8 *data, int clen)
 	bits = *dp;
 	if ((bits & 1) == 0)
 		return;
-	/* See if we have an uplink tty */
-	tty = tty_port_tty_get(&gsm->dlci[addr]->port);
 
+	port = &gsm->dlci[addr]->port;
+
+	if (bits & 2)
+		tty_insert_flip_char(port, 0, TTY_OVERRUN);
+	if (bits & 4)
+		tty_insert_flip_char(port, 0, TTY_PARITY);
+	if (bits & 8)
+		tty_insert_flip_char(port, 0, TTY_FRAME);
+
+	/* See if we have an uplink tty */
+	tty = tty_port_tty_get(port);
 	if (tty) {
-		if (bits & 2)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
-		if (bits & 4)
-			tty_insert_flip_char(tty, 0, TTY_PARITY);
-		if (bits & 8)
-			tty_insert_flip_char(tty, 0, TTY_FRAME);
 		tty_flip_buffer_push(tty);
 		tty_kref_put(tty);
 	}
diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c
index a0c69ab04399..437a6366fb7b 100644
--- a/drivers/tty/nozomi.c
+++ b/drivers/tty/nozomi.c
@@ -855,7 +855,7 @@ static int receive_data(enum port_type index, struct nozomi *dc)
 		read_mem32((u32 *) buf, addr + offset, RECEIVE_BUF_MAX);
 
 		if (size == 1) {
-			tty_insert_flip_char(tty, buf[0], TTY_NORMAL);
+			tty_insert_flip_char(&port->port, buf[0], TTY_NORMAL);
 			size = 0;
 		} else if (size < RECEIVE_BUF_MAX) {
 			size -= tty_insert_flip_string(tty, (char *) buf, size);
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index 77d7bc94afaa..5848a767001a 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -379,7 +379,8 @@ static void rp_do_receive(struct r_port *info,
 				flag = TTY_OVERRUN;
 			else
 				flag = TTY_NORMAL;
-			tty_insert_flip_char(tty, CharNStat & 0xff, flag);
+			tty_insert_flip_char(&info->port, CharNStat & 0xff,
+					flag);
 			ToRecv--;
 		}
 
diff --git a/drivers/tty/serial/68328serial.c b/drivers/tty/serial/68328serial.c
index f99a84526f82..3719273cf0be 100644
--- a/drivers/tty/serial/68328serial.c
+++ b/drivers/tty/serial/68328serial.c
@@ -305,7 +305,7 @@ static void receive_chars(struct m68k_serial *info, struct tty_struct *tty,
 		else if (rx & URX_FRAME_ERROR)
 			flag = TTY_FRAME;
 
-		tty_insert_flip_char(tty, ch, flag);
+		tty_insert_flip_char(&info->tport, ch, flag);
 #ifndef CONFIG_XCOPILOT_BUGS
 	} while((rx = uart->urx.w) & URX_DATA_READY);
 #endif
diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
index 505c490c0b44..6ca5dd615f9e 100644
--- a/drivers/tty/serial/ar933x_uart.c
+++ b/drivers/tty/serial/ar933x_uart.c
@@ -297,10 +297,11 @@ static void ar933x_uart_set_termios(struct uart_port *port,
 
 static void ar933x_uart_rx_chars(struct ar933x_uart_port *up)
 {
+	struct tty_port *port = &up->port.state->port;
 	struct tty_struct *tty;
 	int max_count = 256;
 
-	tty = tty_port_tty_get(&up->port.state->port);
+	tty = tty_port_tty_get(port);
 	do {
 		unsigned int rdata;
 		unsigned char ch;
@@ -313,11 +314,6 @@ static void ar933x_uart_rx_chars(struct ar933x_uart_port *up)
 		ar933x_uart_write(up, AR933X_UART_DATA_REG,
 				  AR933X_UART_DATA_RX_CSR);
 
-		if (!tty) {
-			/* discard the data if no tty available */
-			continue;
-		}
-
 		up->port.icount.rx++;
 		ch = rdata & AR933X_UART_DATA_TX_RX_MASK;
 
@@ -325,7 +321,7 @@ static void ar933x_uart_rx_chars(struct ar933x_uart_port *up)
 			continue;
 
 		if ((up->port.ignore_status_mask & AR933X_DUMMY_STATUS_RD) == 0)
-			tty_insert_flip_char(tty, ch, TTY_NORMAL);
+			tty_insert_flip_char(port, ch, TTY_NORMAL);
 	} while (max_count-- > 0);
 
 	if (tty) {
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index c76a226080f2..de30b1909728 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -235,6 +235,7 @@ static const char *bcm_uart_type(struct uart_port *port)
  */
 static void bcm_uart_do_rx(struct uart_port *port)
 {
+	struct tty_port *port = &port->state->port;
 	struct tty_struct *tty;
 	unsigned int max_count;
 
@@ -242,7 +243,7 @@ static void bcm_uart_do_rx(struct uart_port *port)
 	 * higher than fifo size anyway since we're much faster than
 	 * serial port */
 	max_count = 32;
-	tty = port->state->port.tty;
+	tty = port->tty;
 	do {
 		unsigned int iestat, c, cstat;
 		char flag;
@@ -261,7 +262,7 @@ static void bcm_uart_do_rx(struct uart_port *port)
 			bcm_uart_writel(port, val, UART_CTL_REG);
 
 			port->icount.overrun++;
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(port, 0, TTY_OVERRUN);
 		}
 
 		if (!(iestat & UART_IR_STAT(UART_IR_RXNOTEMPTY)))
@@ -300,7 +301,7 @@ static void bcm_uart_do_rx(struct uart_port *port)
 
 
 		if ((cstat & port->ignore_status_mask) == 0)
-			tty_insert_flip_char(tty, c, flag);
+			tty_insert_flip_char(port, c, flag);
 
 	} while (--max_count);
 
diff --git a/drivers/tty/serial/bfin_sport_uart.c b/drivers/tty/serial/bfin_sport_uart.c
index f5d117379b60..e4d3ac2e8992 100644
--- a/drivers/tty/serial/bfin_sport_uart.c
+++ b/drivers/tty/serial/bfin_sport_uart.c
@@ -149,7 +149,8 @@ static int sport_uart_setup(struct sport_uart_port *up, int size, int baud_rate)
 static irqreturn_t sport_uart_rx_irq(int irq, void *dev_id)
 {
 	struct sport_uart_port *up = dev_id;
-	struct tty_struct *tty = up->port.state->port.tty;
+	struct tty_port *port = &up->port.state->port;
+	struct tty_struct *tty = tport->tty;
 	unsigned int ch;
 
 	spin_lock(&up->port.lock);
@@ -159,7 +160,7 @@ static irqreturn_t sport_uart_rx_irq(int irq, void *dev_id)
 		up->port.icount.rx++;
 
 		if (!uart_handle_sysrq_char(&up->port, ch))
-			tty_insert_flip_char(tty, ch, TTY_NORMAL);
+			tty_insert_flip_char(port, ch, TTY_NORMAL);
 	}
 	tty_flip_buffer_push(tty);
 
@@ -182,7 +183,6 @@ static irqreturn_t sport_uart_tx_irq(int irq, void *dev_id)
 static irqreturn_t sport_uart_err_irq(int irq, void *dev_id)
 {
 	struct sport_uart_port *up = dev_id;
-	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int stat = SPORT_GET_STAT(up);
 
 	spin_lock(&up->port.lock);
@@ -190,7 +190,7 @@ static irqreturn_t sport_uart_err_irq(int irq, void *dev_id)
 	/* Overflow in RX FIFO */
 	if (stat & ROVF) {
 		up->port.icount.overrun++;
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(&up->port.state->port, 0, TTY_OVERRUN);
 		SPORT_PUT_STAT(up, ROVF); /* Clear ROVF bit */
 	}
 	/* These should not happen */
@@ -205,6 +205,8 @@ static irqreturn_t sport_uart_err_irq(int irq, void *dev_id)
 	SSYNC();
 
 	spin_unlock(&up->port.lock);
+	/* XXX we don't push the overrun bit to TTY? */
+
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index 42d5eb0125b3..108122f8f3c2 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -303,7 +303,7 @@ static void cpm_uart_int_rx(struct uart_port *port)
 			}
 #endif
 		      error_return:
-			tty_insert_flip_char(tty, ch, flg);
+			tty_insert_flip_char(tport, ch, flg);
 
 		}		/* End while (i--) */
 
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index 35ee6a2c6877..d12306625458 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -1760,8 +1760,7 @@ add_char_and_flag(struct e100_serial *info, unsigned char data, unsigned char fl
 
 		info->icount.rx++;
 	} else {
-		struct tty_struct *tty = info->port.tty;
-		tty_insert_flip_char(tty, data, flag);
+		tty_insert_flip_char(&info->port, data, flag);
 		info->icount.rx++;
 	}
 
@@ -2338,8 +2337,7 @@ more_data:
 					data_in, data_read);
 				char flag = TTY_NORMAL;
 				if (info->errorcode == ERRCODE_INSERT_BREAK) {
-					struct tty_struct *tty = info->port.tty;
-					tty_insert_flip_char(tty, 0, flag);
+					tty_insert_flip_char(&info->port, 0, flag);
 					info->icount.rx++;
 				}
 
@@ -2353,7 +2351,7 @@ more_data:
 					info->icount.frame++;
 					flag = TTY_FRAME;
 				}
-				tty_insert_flip_char(tty, data, flag);
+				tty_insert_flip_char(&info->port, data, flag);
 				info->errorcode = 0;
 			}
 			info->break_detected_cnt = 0;
@@ -2369,7 +2367,7 @@ more_data:
 			log_int(rdpc(), 0, 0);
 		}
 		);
-		tty_insert_flip_char(tty,
+		tty_insert_flip_char(&info->port,
 			IO_EXTRACT(R_SERIAL0_READ, data_in, data_read),
 			TTY_NORMAL);
 	} else {
diff --git a/drivers/tty/serial/efm32-uart.c b/drivers/tty/serial/efm32-uart.c
index a8cbb2670521..bdf67b0cb8b6 100644
--- a/drivers/tty/serial/efm32-uart.c
+++ b/drivers/tty/serial/efm32-uart.c
@@ -194,8 +194,7 @@ static void efm32_uart_break_ctl(struct uart_port *port, int ctl)
 	/* not possible without fiddling with gpios */
 }
 
-static void efm32_uart_rx_chars(struct efm32_uart_port *efm_port,
-		struct tty_struct *tty)
+static void efm32_uart_rx_chars(struct efm32_uart_port *efm_port)
 {
 	struct uart_port *port = &efm_port->port;
 
@@ -237,8 +236,8 @@ static void efm32_uart_rx_chars(struct efm32_uart_port *efm_port,
 					rxdata & UARTn_RXDATAX_RXDATA__MASK))
 			continue;
 
-		if (tty && (rxdata & port->ignore_status_mask) == 0)
-			tty_insert_flip_char(tty,
+		if ((rxdata & port->ignore_status_mask) == 0)
+			tty_insert_flip_char(&port->state->port,
 					rxdata & UARTn_RXDATAX_RXDATA__MASK, flag);
 	}
 }
@@ -249,15 +248,16 @@ static irqreturn_t efm32_uart_rxirq(int irq, void *data)
 	u32 irqflag = efm32_uart_read32(efm_port, UARTn_IF);
 	int handled = IRQ_NONE;
 	struct uart_port *port = &efm_port->port;
+	struct tty_port *tport = &port->state->port;
 	struct tty_struct *tty;
 
 	spin_lock(&port->lock);
 
-	tty = tty_kref_get(port->state->port.tty);
+	tty = tty_kref_get(tport->tty);
 
 	if (irqflag & UARTn_IF_RXDATAV) {
 		efm32_uart_write32(efm_port, UARTn_IF_RXDATAV, UARTn_IFC);
-		efm32_uart_rx_chars(efm_port, tty);
+		efm32_uart_rx_chars(efm_port);
 
 		handled = IRQ_HANDLED;
 	}
@@ -265,8 +265,7 @@ static irqreturn_t efm32_uart_rxirq(int irq, void *data)
 	if (irqflag & UARTn_IF_RXOF) {
 		efm32_uart_write32(efm_port, UARTn_IF_RXOF, UARTn_IFC);
 		port->icount.overrun++;
-		if (tty)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 
 		handled = IRQ_HANDLED;
 	}
diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c
index 72b6334bcf1a..2b0b60ff7f01 100644
--- a/drivers/tty/serial/icom.c
+++ b/drivers/tty/serial/icom.c
@@ -734,7 +734,8 @@ static void xmit_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 {
 	short int count, rcv_buff;
-	struct tty_struct *tty = icom_port->uart_port.state->port.tty;
+	struct tty_port *port = &icom_port->uart_port.state->port;
+	struct tty_struct *tty = port->tty;
 	unsigned short int status;
 	struct uart_icount *icount;
 	unsigned long offset;
@@ -812,7 +813,7 @@ static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 
 		}
 
-		tty_insert_flip_char(tty, *(icom_port->recv_buf + offset + count - 1), flag);
+		tty_insert_flip_char(port, *(icom_port->recv_buf + offset + count - 1), flag);
 
 		if (status & SA_FLAGS_OVERRUN)
 			/*
@@ -820,7 +821,7 @@ static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 			 * reported immediately, and doesn't
 			 * affect the current character
 			 */
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(port, 0, TTY_OVERRUN);
 ignore_char:
 		icom_port->statStg->rcv[rcv_buff].flags = 0;
 		icom_port->statStg->rcv[rcv_buff].leLength = 0;
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 1a2488495f69..f60c4028b6e1 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -517,6 +517,8 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
 	struct imx_port *sport = dev_id;
 	unsigned int rx, flg, ignored = 0;
 	struct tty_struct *tty = sport->port.state->port.tty;
+	struct tty_port *port = &sport->port.state->port;
+	struct tty_struct *tty = port->tty;
 	unsigned long flags, temp;
 
 	spin_lock_irqsave(&sport->port.lock, flags);
@@ -569,7 +571,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
 #endif
 		}
 
-		tty_insert_flip_char(tty, rx, flg);
+		tty_insert_flip_char(port, rx, flg);
 	}
 
 out:
diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c
index 3969e54744cc..ac1d36cb2032 100644
--- a/drivers/tty/serial/jsm/jsm_tty.c
+++ b/drivers/tty/serial/jsm/jsm_tty.c
@@ -631,13 +631,13 @@ void jsm_input(struct jsm_channel *ch)
 				 * format it likes.
 				 */
 				if (*(ch->ch_equeue +tail +i) & UART_LSR_BI)
-					tty_insert_flip_char(tp, *(ch->ch_rqueue +tail +i),  TTY_BREAK);
+					tty_insert_flip_char(port, *(ch->ch_rqueue +tail +i),  TTY_BREAK);
 				else if (*(ch->ch_equeue +tail +i) & UART_LSR_PE)
-					tty_insert_flip_char(tp, *(ch->ch_rqueue +tail +i), TTY_PARITY);
+					tty_insert_flip_char(port, *(ch->ch_rqueue +tail +i), TTY_PARITY);
 				else if (*(ch->ch_equeue +tail +i) & UART_LSR_FE)
-					tty_insert_flip_char(tp, *(ch->ch_rqueue +tail +i), TTY_FRAME);
+					tty_insert_flip_char(port, *(ch->ch_rqueue +tail +i), TTY_FRAME);
 				else
-					tty_insert_flip_char(tp, *(ch->ch_rqueue +tail +i), TTY_NORMAL);
+					tty_insert_flip_char(port, *(ch->ch_rqueue +tail +i), TTY_NORMAL);
 			}
 		} else {
 			tty_insert_flip_string(tp, ch->ch_rqueue + tail, s) ;
diff --git a/drivers/tty/serial/kgdb_nmi.c b/drivers/tty/serial/kgdb_nmi.c
index 6ac2b797a764..ba2ef627d9c6 100644
--- a/drivers/tty/serial/kgdb_nmi.c
+++ b/drivers/tty/serial/kgdb_nmi.c
@@ -216,7 +216,7 @@ static void kgdb_nmi_tty_receiver(unsigned long data)
 		return;
 
 	while (kfifo_out(&priv->fifo, &ch, 1))
-		tty_insert_flip_char(priv->port.tty, ch, TTY_NORMAL);
+		tty_insert_flip_char(&priv->port, ch, TTY_NORMAL);
 	tty_flip_buffer_push(priv->port.tty);
 
 	tty_kref_put(tty);
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
index 02da071fe1e7..1933fe3c98dd 100644
--- a/drivers/tty/serial/lantiq.c
+++ b/drivers/tty/serial/lantiq.c
@@ -162,7 +162,8 @@ lqasc_enable_ms(struct uart_port *port)
 static int
 lqasc_rx_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tty_port_tty_get(tport);
 	unsigned int ch = 0, rsr = 0, fifocnt;
 
 	if (!tty) {
@@ -208,7 +209,7 @@ lqasc_rx_chars(struct uart_port *port)
 		}
 
 		if ((rsr & port->ignore_status_mask) == 0)
-			tty_insert_flip_char(tty, ch, flag);
+			tty_insert_flip_char(tport, ch, flag);
 
 		if (rsr & ASCSTATE_ROE)
 			/*
@@ -216,7 +217,7 @@ lqasc_rx_chars(struct uart_port *port)
 			 * immediately, and doesn't affect the current
 			 * character
 			 */
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 	}
 	if (ch != 0)
 		tty_flip_buffer_push(tty);
diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c
index 0e86bff3fe2a..5cd180564c03 100644
--- a/drivers/tty/serial/lpc32xx_hs.c
+++ b/drivers/tty/serial/lpc32xx_hs.c
@@ -257,8 +257,9 @@ static void __serial_uart_flush(struct uart_port *port)
 
 static void __serial_lpc32xx_rx(struct uart_port *port)
 {
+	struct tty_port *tport = &port->state->port;
 	unsigned int tmp, flag;
-	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
+	struct tty_struct *tty = tty_port_tty_get(tport);
 
 	if (!tty) {
 		/* Discard data: no tty available */
@@ -281,10 +282,10 @@ static void __serial_lpc32xx_rx(struct uart_port *port)
 			       LPC32XX_HSUART_IIR(port->membase));
 			port->icount.frame++;
 			flag = TTY_FRAME;
-			tty_insert_flip_char(tty, 0, TTY_FRAME);
+			tty_insert_flip_char(tport, 0, TTY_FRAME);
 		}
 
-		tty_insert_flip_char(tty, (tmp & 0xFF), flag);
+		tty_insert_flip_char(tport, (tmp & 0xFF), flag);
 
 		tmp = readl(LPC32XX_HSUART_FIFO(port->membase));
 	}
@@ -332,7 +333,8 @@ exit_tx:
 static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
+	struct tty_port *port = &port->state->port;
+	struct tty_struct *tty = tty_port_tty_get(tport);
 	u32 status;
 
 	spin_lock(&port->lock);
@@ -356,8 +358,8 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id)
 		writel(LPC32XX_HSU_RX_OE_INT,
 		       LPC32XX_HSUART_IIR(port->membase));
 		port->icount.overrun++;
+		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 		if (tty) {
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
 			tty_schedule_flip(tty);
 		}
 	}
diff --git a/drivers/tty/serial/m32r_sio.c b/drivers/tty/serial/m32r_sio.c
index b13949ad3408..2e9a390f2ac4 100644
--- a/drivers/tty/serial/m32r_sio.c
+++ b/drivers/tty/serial/m32r_sio.c
@@ -300,7 +300,8 @@ static void m32r_sio_enable_ms(struct uart_port *port)
 
 static void receive_chars(struct uart_sio_port *up, int *status)
 {
-	struct tty_struct *tty = up->port.state->port.tty;
+	struct tty_port *port = &up->port.state->port;
+	struct tty_struct *tty = tport->tty;
 	unsigned char ch;
 	unsigned char flag;
 	int max_count = 256;
@@ -355,7 +356,7 @@ static void receive_chars(struct uart_sio_port *up, int *status)
 		if (uart_handle_sysrq_char(&up->port, ch))
 			goto ignore_char;
 		if ((*status & up->port.ignore_status_mask) == 0)
-			tty_insert_flip_char(tty, ch, flag);
+			tty_insert_flip_char(port, ch, flag);
 
 		if (*status & UART_LSR_OE) {
 			/*
@@ -363,7 +364,7 @@ static void receive_chars(struct uart_sio_port *up, int *status)
 			 * immediately, and doesn't affect the current
 			 * character.
 			 */
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(port, 0, TTY_OVERRUN);
 		}
 	ignore_char:
 		*status = serial_in(up, UART_LSR);
diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c
index 7c23c4f4c58d..0145aeb7721c 100644
--- a/drivers/tty/serial/mpc52xx_uart.c
+++ b/drivers/tty/serial/mpc52xx_uart.c
@@ -941,7 +941,8 @@ static struct uart_ops mpc52xx_uart_ops = {
 static inline int
 mpc52xx_uart_int_rx_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	unsigned char ch, flag;
 	unsigned short status;
 
@@ -986,14 +987,14 @@ mpc52xx_uart_int_rx_chars(struct uart_port *port)
 			out_8(&PSC(port)->command, MPC52xx_PSC_RST_ERR_STAT);
 
 		}
-		tty_insert_flip_char(tty, ch, flag);
+		tty_insert_flip_char(tport, ch, flag);
 		if (status & MPC52xx_PSC_SR_OE) {
 			/*
 			 * Overrun is special, since it's
 			 * reported immediately, and doesn't
 			 * affect the current character
 			 */
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 			port->icount.overrun++;
 		}
 	}
diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c
index 50366863cfa3..4bcbc66c48c4 100644
--- a/drivers/tty/serial/mpsc.c
+++ b/drivers/tty/serial/mpsc.c
@@ -1040,10 +1040,10 @@ static int mpsc_rx_intr(struct mpsc_port_info *pi)
 						| SDMA_DESC_CMDSTAT_FR
 						| SDMA_DESC_CMDSTAT_OR)))
 				&& !(cmdstat & pi->port.ignore_status_mask)) {
-			tty_insert_flip_char(tty, *bp, flag);
+			tty_insert_flip_char(port, *bp, flag);
 		} else {
 			for (i=0; i<bytes_in; i++)
-				tty_insert_flip_char(tty, *bp++, TTY_NORMAL);
+				tty_insert_flip_char(port, *bp++, TTY_NORMAL);
 
 			pi->port.icount.rx += bytes_in;
 		}
diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index 95fd39be2934..e4eb81a12793 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -91,14 +91,15 @@ static void msm_enable_ms(struct uart_port *port)
 
 static void handle_rx_dm(struct uart_port *port, unsigned int misr)
 {
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	unsigned int sr;
 	int count = 0;
 	struct msm_port *msm_port = UART_TO_MSM(port);
 
 	if ((msm_read(port, UART_SR) & UART_SR_OVERRUN)) {
 		port->icount.overrun++;
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 		msm_write(port, UART_CR_CMD_RESET_ERR, UART_CR);
 	}
 
@@ -146,7 +147,8 @@ static void handle_rx_dm(struct uart_port *port, unsigned int misr)
 
 static void handle_rx(struct uart_port *port)
 {
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	unsigned int sr;
 
 	/*
@@ -155,7 +157,7 @@ static void handle_rx(struct uart_port *port)
 	 */
 	if ((msm_read(port, UART_SR) & UART_SR_OVERRUN)) {
 		port->icount.overrun++;
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 		msm_write(port, UART_CR_CMD_RESET_ERR, UART_CR);
 	}
 
@@ -186,7 +188,7 @@ static void handle_rx(struct uart_port *port)
 		}
 
 		if (!uart_handle_sysrq_char(port, c))
-			tty_insert_flip_char(tty, c, flag);
+			tty_insert_flip_char(tport, c, flag);
 	}
 
 	tty_flip_buffer_push(tty);
diff --git a/drivers/tty/serial/msm_serial_hs.c b/drivers/tty/serial/msm_serial_hs.c
index 1fa92284ade0..6aa9d470ef54 100644
--- a/drivers/tty/serial/msm_serial_hs.c
+++ b/drivers/tty/serial/msm_serial_hs.c
@@ -908,6 +908,7 @@ static void msm_hs_dmov_rx_callback(struct msm_dmov_cmd *cmd_ptr,
 	unsigned long flags;
 	unsigned int flush;
 	struct tty_struct *tty;
+	struct tty_port *port;
 	struct uart_port *uport;
 	struct msm_hs_port *msm_uport;
 
@@ -917,7 +918,8 @@ static void msm_hs_dmov_rx_callback(struct msm_dmov_cmd *cmd_ptr,
 	spin_lock_irqsave(&uport->lock, flags);
 	clk_enable(msm_uport->clk);
 
-	tty = uport->state->port.tty;
+	port = &uport->state->port;
+	tty = port->tty;
 
 	msm_hs_write(uport, UARTDM_CR_ADDR, STALE_EVENT_DISABLE);
 
@@ -926,7 +928,7 @@ static void msm_hs_dmov_rx_callback(struct msm_dmov_cmd *cmd_ptr,
 	/* overflow is not connect to data in a FIFO */
 	if (unlikely((status & UARTDM_SR_OVERRUN_BMSK) &&
 		     (uport->read_status_mask & CREAD))) {
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(port, 0, TTY_OVERRUN);
 		uport->icount.buf_overrun++;
 		error_f = 1;
 	}
@@ -939,7 +941,7 @@ static void msm_hs_dmov_rx_callback(struct msm_dmov_cmd *cmd_ptr,
 		uport->icount.parity++;
 		error_f = 1;
 		if (uport->ignore_status_mask & IGNPAR)
-			tty_insert_flip_char(tty, 0, TTY_PARITY);
+			tty_insert_flip_char(port, 0, TTY_PARITY);
 	}
 
 	if (error_f)
@@ -1344,7 +1346,6 @@ static irqreturn_t msm_hs_rx_wakeup_isr(int irq, void *dev)
 	unsigned long flags;
 	struct msm_hs_port *msm_uport = dev;
 	struct uart_port *uport = &msm_uport->uport;
-	struct tty_struct *tty = NULL;
 
 	spin_lock_irqsave(&uport->lock, flags);
 	if (msm_uport->clk_state == MSM_HS_CLK_OFF) {
@@ -1361,8 +1362,7 @@ static irqreturn_t msm_hs_rx_wakeup_isr(int irq, void *dev)
 		 * optionally inject char into tty rx */
 		msm_hs_request_clock_on_locked(uport);
 		if (msm_uport->rx_wakeup.inject_rx) {
-			tty = uport->state->port.tty;
-			tty_insert_flip_char(tty,
+			tty_insert_flip_char(&uport->state->port,
 					     msm_uport->rx_wakeup.rx_to_inject,
 					     TTY_NORMAL);
 			queue_work(msm_hs_workqueue, &msm_uport->rx.tty_work);
diff --git a/drivers/tty/serial/mux.c b/drivers/tty/serial/mux.c
index e2775b6df5a5..83b21686020e 100644
--- a/drivers/tty/serial/mux.c
+++ b/drivers/tty/serial/mux.c
@@ -242,8 +242,9 @@ static void mux_write(struct uart_port *port)
  */
 static void mux_read(struct uart_port *port)
 {
+	struct tty_port *tport = &port->state->port;
 	int data;
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_struct *tty = tport->tty;
 	__u32 start_count = port->icount.rx;
 
 	while(1) {
@@ -266,7 +267,7 @@ static void mux_read(struct uart_port *port)
 		if (uart_handle_sysrq_char(port, data & 0xffu))
 			continue;
 
-		tty_insert_flip_char(tty, data & 0xFF, TTY_NORMAL);
+		tty_insert_flip_char(tport, data & 0xFF, TTY_NORMAL);
 	}
 	
 	if (start_count != port->icount.rx) {
diff --git a/drivers/tty/serial/nwpserial.c b/drivers/tty/serial/nwpserial.c
index dd4c31d1aee5..10d64a3697fb 100644
--- a/drivers/tty/serial/nwpserial.c
+++ b/drivers/tty/serial/nwpserial.c
@@ -128,7 +128,8 @@ static void nwpserial_config_port(struct uart_port *port, int flags)
 static irqreturn_t nwpserial_interrupt(int irq, void *dev_id)
 {
 	struct nwpserial_port *up = dev_id;
-	struct tty_struct *tty = up->port.state->port.tty;
+	struct tty_port *port = &up->port.state->port;
+	struct tty_struct *tty = port->tty;
 	irqreturn_t ret;
 	unsigned int iir;
 	unsigned char ch;
@@ -146,7 +147,7 @@ static irqreturn_t nwpserial_interrupt(int irq, void *dev_id)
 		up->port.icount.rx++;
 		ch = dcr_read(up->dcr_host, UART_RX);
 		if (up->port.ignore_status_mask != NWPSERIAL_STATUS_RXVALID)
-			tty_insert_flip_char(tty, ch, TTY_NORMAL);
+			tty_insert_flip_char(port, ch, TTY_NORMAL);
 	} while (dcr_read(up->dcr_host, UART_LSR) & UART_LSR_DR);
 
 	tty_flip_buffer_push(tty);
diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index 333c8d012b0e..73a3f295e7c4 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -229,6 +229,7 @@ static void pmz_interrupt_control(struct uart_pmac_port *uap, int enable)
 
 static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap)
 {
+	struct tty_port *port;
 	struct tty_struct *tty = NULL;
 	unsigned char ch, r1, drop, error, flag;
 	int loops = 0;
@@ -239,7 +240,8 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap)
 		(void)read_zsdata(uap);
 		return NULL;
 	}
-	tty = uap->port.state->port.tty;
+	port = &uap->port.state->port;
+	tty = port->tty; /* TOCTOU above */
 
 	while (1) {
 		error = 0;
@@ -309,10 +311,10 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap)
 
 		if (uap->port.ignore_status_mask == 0xff ||
 		    (r1 & uap->port.ignore_status_mask) == 0) {
-			tty_insert_flip_char(tty, ch, flag);
+			tty_insert_flip_char(port, ch, flag);
 		}
 		if (r1 & Rx_OVR)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(port, 0, TTY_OVERRUN);
 	next_char:
 		/* We can get stuck in an infinite loop getting char 0 when the
 		 * line is in a wrong HW state, we break that here.
diff --git a/drivers/tty/serial/sc26xx.c b/drivers/tty/serial/sc26xx.c
index aced1dd923d8..0cd0e4ac12a6 100644
--- a/drivers/tty/serial/sc26xx.c
+++ b/drivers/tty/serial/sc26xx.c
@@ -138,14 +138,18 @@ static void sc26xx_disable_irq(struct uart_port *port, int mask)
 
 static struct tty_struct *receive_chars(struct uart_port *port)
 {
+	struct tty_port *tport = NULL;
 	struct tty_struct *tty = NULL;
 	int limit = 10000;
 	unsigned char ch;
 	char flag;
 	u8 status;
 
-	if (port->state != NULL)		/* Unopened serial console */
-		tty = port->state->port.tty;
+	/* FIXME what is this trying to achieve? */
+	if (port->state != NULL) {		/* Unopened serial console */
+		tport = &port->state->port;
+		tty = tport->tty;
+	}
 
 	while (limit-- > 0) {
 		status = READ_SC_PORT(port, SR);
@@ -185,7 +189,7 @@ static struct tty_struct *receive_chars(struct uart_port *port)
 		if (status & port->ignore_status_mask)
 			continue;
 
-		tty_insert_flip_char(tty, ch, flag);
+		tty_insert_flip_char(tport, ch, flag);
 	}
 	return tty;
 }
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index ba7863bbbb4d..675343a20f24 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2792,10 +2792,10 @@ EXPORT_SYMBOL_GPL(uart_handle_cts_change);
 void uart_insert_char(struct uart_port *port, unsigned int status,
 		 unsigned int overrun, unsigned int ch, unsigned int flag)
 {
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
 
 	if ((status & port->ignore_status_mask & ~overrun) == 0)
-		if (tty_insert_flip_char(tty, ch, flag) == 0)
+		if (tty_insert_flip_char(tport, ch, flag) == 0)
 			++port->icount.buf_overrun;
 
 	/*
@@ -2803,7 +2803,7 @@ void uart_insert_char(struct uart_port *port, unsigned int status,
 	 * it doesn't affect the current character.
 	 */
 	if (status & ~port->ignore_status_mask & overrun)
-		if (tty_insert_flip_char(tty, 0, TTY_OVERRUN) == 0)
+		if (tty_insert_flip_char(tport, 0, TTY_OVERRUN) == 0)
 			++port->icount.buf_overrun;
 }
 EXPORT_SYMBOL_GPL(uart_insert_char);
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index cf96314770fb..ecef748f5385 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -620,7 +620,7 @@ static void sci_receive_chars(struct uart_port *port)
 			    sci_port->break_flag)
 				count = 0;
 			else
-				tty_insert_flip_char(tty, c, TTY_NORMAL);
+				tty_insert_flip_char(tport, c, TTY_NORMAL);
 		} else {
 			for (i = 0; i < count; i++) {
 				char c = serial_port_in(port, SCxRDR);
@@ -662,7 +662,7 @@ static void sci_receive_chars(struct uart_port *port)
 				} else
 					flag = TTY_NORMAL;
 
-				tty_insert_flip_char(tty, c, flag);
+				tty_insert_flip_char(tport, c, flag);
 			}
 		}
 
@@ -721,7 +721,8 @@ static int sci_handle_errors(struct uart_port *port)
 {
 	int copied = 0;
 	unsigned short status = serial_port_in(port, SCxSR);
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	struct sci_port *s = to_sci_port(port);
 
 	/*
@@ -732,7 +733,7 @@ static int sci_handle_errors(struct uart_port *port)
 			port->icount.overrun++;
 
 			/* overrun error */
-			if (tty_insert_flip_char(tty, 0, TTY_OVERRUN))
+			if (tty_insert_flip_char(tport, 0, TTY_OVERRUN))
 				copied++;
 
 			dev_notice(port->dev, "overrun error");
@@ -756,7 +757,7 @@ static int sci_handle_errors(struct uart_port *port)
 
 				dev_dbg(port->dev, "BREAK detected\n");
 
-				if (tty_insert_flip_char(tty, 0, TTY_BREAK))
+				if (tty_insert_flip_char(tport, 0, TTY_BREAK))
 					copied++;
 			}
 
@@ -764,7 +765,7 @@ static int sci_handle_errors(struct uart_port *port)
 			/* frame error */
 			port->icount.frame++;
 
-			if (tty_insert_flip_char(tty, 0, TTY_FRAME))
+			if (tty_insert_flip_char(tport, 0, TTY_FRAME))
 				copied++;
 
 			dev_notice(port->dev, "frame error\n");
@@ -775,7 +776,7 @@ static int sci_handle_errors(struct uart_port *port)
 		/* parity error */
 		port->icount.parity++;
 
-		if (tty_insert_flip_char(tty, 0, TTY_PARITY))
+		if (tty_insert_flip_char(tport, 0, TTY_PARITY))
 			copied++;
 
 		dev_notice(port->dev, "parity error");
@@ -789,7 +790,8 @@ static int sci_handle_errors(struct uart_port *port)
 
 static int sci_handle_fifo_overrun(struct uart_port *port)
 {
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	struct sci_port *s = to_sci_port(port);
 	struct plat_sci_reg *reg;
 	int copied = 0;
@@ -803,7 +805,7 @@ static int sci_handle_fifo_overrun(struct uart_port *port)
 
 		port->icount.overrun++;
 
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 		tty_flip_buffer_push(tty);
 
 		dev_notice(port->dev, "overrun error\n");
@@ -817,7 +819,8 @@ static int sci_handle_breaks(struct uart_port *port)
 {
 	int copied = 0;
 	unsigned short status = serial_port_in(port, SCxSR);
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	struct sci_port *s = to_sci_port(port);
 
 	if (uart_handle_break(port))
@@ -832,7 +835,7 @@ static int sci_handle_breaks(struct uart_port *port)
 		port->icount.brk++;
 
 		/* Notify of BREAK */
-		if (tty_insert_flip_char(tty, 0, TTY_BREAK))
+		if (tty_insert_flip_char(tport, 0, TTY_BREAK))
 			copied++;
 
 		dev_dbg(port->dev, "BREAK detected\n");
@@ -1260,8 +1263,7 @@ static void sci_dma_tx_complete(void *arg)
 }
 
 /* Locking: called with port lock held */
-static int sci_dma_rx_push(struct sci_port *s, struct tty_struct *tty,
-			   size_t count)
+static int sci_dma_rx_push(struct sci_port *s, size_t count)
 {
 	struct uart_port *port = &s->port;
 	struct tty_port *tport = &port->state->port;
@@ -1285,7 +1287,7 @@ static int sci_dma_rx_push(struct sci_port *s, struct tty_struct *tty,
 		return room;
 
 	for (i = 0; i < room; i++)
-		tty_insert_flip_char(tty, ((u8 *)sg_virt(&s->sg_rx[active]))[i],
+		tty_insert_flip_char(tport, ((u8 *)sg_virt(&s->sg_rx[active]))[i],
 				     TTY_NORMAL);
 
 	port->icount.rx += room;
@@ -1305,7 +1307,7 @@ static void sci_dma_rx_complete(void *arg)
 
 	spin_lock_irqsave(&port->lock, flags);
 
-	count = sci_dma_rx_push(s, tty, s->buf_len_rx);
+	count = sci_dma_rx_push(s, s->buf_len_rx);
 
 	mod_timer(&s->rx_timer, jiffies + s->rx_timeout);
 
@@ -1418,7 +1420,7 @@ static void work_fn_rx(struct work_struct *work)
 			sh_desc->partial, sh_desc->cookie);
 
 		spin_lock_irqsave(&port->lock, flags);
-		count = sci_dma_rx_push(s, tty, sh_desc->partial);
+		count = sci_dma_rx_push(s, sh_desc->partial);
 		spin_unlock_irqrestore(&port->lock, flags);
 
 		if (count)
diff --git a/drivers/tty/serial/sn_console.c b/drivers/tty/serial/sn_console.c
index 1c6de9f58699..283232c64656 100644
--- a/drivers/tty/serial/sn_console.c
+++ b/drivers/tty/serial/sn_console.c
@@ -457,6 +457,7 @@ static int sn_debug_printf(const char *fmt, ...)
 static void
 sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
 {
+	struct tty_port *tport = NULL;
 	int ch;
 	struct tty_struct *tty;
 
@@ -472,7 +473,8 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
 
 	if (port->sc_port.state) {
 		/* The serial_core stuffs are initialized, use them */
-		tty = port->sc_port.state->port.tty;
+		tport = &port->sc_port.state->port;
+		tty = tport->tty;
 	}
 	else {
 		/* Not registered yet - can't pass to tty layer.  */
@@ -517,7 +519,7 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
 
 		/* record the character to pass up to the tty layer */
 		if (tty) {
-			if(tty_insert_flip_char(tty, ch, TTY_NORMAL) == 0)
+			if (tty_insert_flip_char(tport, ch, TTY_NORMAL) == 0)
 				break;
 		}
 		port->sc_port.icount.rx++;
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index b9bf9c53f7fd..bbb102e3c035 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -99,7 +99,7 @@ static int receive_chars_getchar(struct uart_port *port, struct tty_struct *tty)
 			uart_handle_dcd_change(port, 1);
 		}
 
-		if (tty == NULL) {
+		if (port->state == NULL) {
 			uart_handle_sysrq_char(port, c);
 			continue;
 		}
@@ -109,7 +109,7 @@ static int receive_chars_getchar(struct uart_port *port, struct tty_struct *tty)
 		if (uart_handle_sysrq_char(port, c))
 			continue;
 
-		tty_insert_flip_char(tty, c, TTY_NORMAL);
+		tty_insert_flip_char(&port->state->port, c, TTY_NORMAL);
 	}
 
 	return saw_console_brk;
diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c
index bd8b3b634103..4abc4d43a8e8 100644
--- a/drivers/tty/serial/sunsab.c
+++ b/drivers/tty/serial/sunsab.c
@@ -111,6 +111,7 @@ static struct tty_struct *
 receive_chars(struct uart_sunsab_port *up,
 	      union sab82532_irq_status *stat)
 {
+	struct tty_port *port = NULL;
 	struct tty_struct *tty = NULL;
 	unsigned char buf[32];
 	int saw_console_brk = 0;
@@ -118,8 +119,10 @@ receive_chars(struct uart_sunsab_port *up,
 	int count = 0;
 	int i;
 
-	if (up->port.state != NULL)		/* Unopened serial console */
-		tty = up->port.state->port.tty;
+	if (up->port.state != NULL) {		/* Unopened serial console */
+		port = &up->port.state->port;
+		tty = port->tty;
+	}
 
 	/* Read number of BYTES (Character + Status) available. */
 	if (stat->sreg.isr0 & SAB82532_ISR0_RPF) {
@@ -160,11 +163,6 @@ receive_chars(struct uart_sunsab_port *up,
 	for (i = 0; i < count; i++) {
 		unsigned char ch = buf[i], flag;
 
-		if (tty == NULL) {
-			uart_handle_sysrq_char(&up->port, ch);
-			continue;
-		}
-
 		flag = TTY_NORMAL;
 		up->port.icount.rx++;
 
@@ -213,9 +211,9 @@ receive_chars(struct uart_sunsab_port *up,
 
 		if ((stat->sreg.isr0 & (up->port.ignore_status_mask & 0xff)) == 0 &&
 		    (stat->sreg.isr1 & ((up->port.ignore_status_mask >> 8) & 0xff)) == 0)
-			tty_insert_flip_char(tty, ch, flag);
+			tty_insert_flip_char(port, ch, flag);
 		if (stat->sreg.isr0 & SAB82532_ISR0_RFO)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(port, 0, TTY_OVERRUN);
 	}
 
 	if (saw_console_brk)
diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c
index 220da3f9724f..52325968b06c 100644
--- a/drivers/tty/serial/sunsu.c
+++ b/drivers/tty/serial/sunsu.c
@@ -318,7 +318,8 @@ static void sunsu_enable_ms(struct uart_port *port)
 static struct tty_struct *
 receive_chars(struct uart_sunsu_port *up, unsigned char *status)
 {
-	struct tty_struct *tty = up->port.state->port.tty;
+	struct tty_port *port = &up->port.state->port;
+	struct tty_struct *tty = port->tty;
 	unsigned char ch, flag;
 	int max_count = 256;
 	int saw_console_brk = 0;
@@ -376,14 +377,14 @@ receive_chars(struct uart_sunsu_port *up, unsigned char *status)
 		if (uart_handle_sysrq_char(&up->port, ch))
 			goto ignore_char;
 		if ((*status & up->port.ignore_status_mask) == 0)
-			tty_insert_flip_char(tty, ch, flag);
+			tty_insert_flip_char(port, ch, flag);
 		if (*status & UART_LSR_OE)
 			/*
 			 * Overrun is special, since it's reported
 			 * immediately, and doesn't affect the current
 			 * character.
 			 */
-			 tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			 tty_insert_flip_char(port, 0, TTY_OVERRUN);
 	ignore_char:
 		*status = serial_inp(up, UART_LSR);
 	} while ((*status & UART_LSR_DR) && (max_count-- > 0));
diff --git a/drivers/tty/serial/sunzilog.c b/drivers/tty/serial/sunzilog.c
index aef4fab957c3..4a11be3849f6 100644
--- a/drivers/tty/serial/sunzilog.c
+++ b/drivers/tty/serial/sunzilog.c
@@ -327,13 +327,15 @@ static struct tty_struct *
 sunzilog_receive_chars(struct uart_sunzilog_port *up,
 		       struct zilog_channel __iomem *channel)
 {
+	struct tty_port *port = NULL;
 	struct tty_struct *tty;
 	unsigned char ch, r1, flag;
 
 	tty = NULL;
-	if (up->port.state != NULL &&		/* Unopened serial console */
-	    up->port.state->port.tty != NULL)	/* Keyboard || mouse */
-		tty = up->port.state->port.tty;
+	if (up->port.state != NULL) {		/* Unopened serial console */
+		port = &up->port.state->port;
+		tty = port->tty;		/* mouse => tty is NULL */
+	}
 
 	for (;;) {
 
@@ -366,11 +368,6 @@ sunzilog_receive_chars(struct uart_sunzilog_port *up,
 			continue;
 		}
 
-		if (tty == NULL) {
-			uart_handle_sysrq_char(&up->port, ch);
-			continue;
-		}
-
 		/* A real serial line, record the character and status.  */
 		flag = TTY_NORMAL;
 		up->port.icount.rx++;
@@ -400,10 +397,10 @@ sunzilog_receive_chars(struct uart_sunzilog_port *up,
 
 		if (up->port.ignore_status_mask == 0xff ||
 		    (r1 & up->port.ignore_status_mask) == 0) {
-		    	tty_insert_flip_char(tty, ch, flag);
+		    	tty_insert_flip_char(port, ch, flag);
 		}
 		if (r1 & Rx_OVR)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(port, 0, TTY_OVERRUN);
 	}
 
 	return tty;
diff --git a/drivers/tty/serial/timbuart.c b/drivers/tty/serial/timbuart.c
index 5be0d68feceb..f40c634f7528 100644
--- a/drivers/tty/serial/timbuart.c
+++ b/drivers/tty/serial/timbuart.c
@@ -91,12 +91,12 @@ static void timbuart_flush_buffer(struct uart_port *port)
 
 static void timbuart_rx_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
 
 	while (ioread32(port->membase + TIMBUART_ISR) & RXDP) {
 		u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
 		port->icount.rx++;
-		tty_insert_flip_char(tty, ch, TTY_NORMAL);
+		tty_insert_flip_char(tport, ch, TTY_NORMAL);
 	}
 
 	spin_unlock(&port->lock);
diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index 89eee43c4e2d..5caf1f0ebc82 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -66,7 +66,7 @@ static struct uart_port ulite_ports[ULITE_NR_UARTS];
 
 static int ulite_receive(struct uart_port *port, int stat)
 {
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
 	unsigned char ch = 0;
 	char flag = TTY_NORMAL;
 
@@ -103,13 +103,13 @@ static int ulite_receive(struct uart_port *port, int stat)
 	stat &= ~port->ignore_status_mask;
 
 	if (stat & ULITE_STATUS_RXVALID)
-		tty_insert_flip_char(tty, ch, flag);
+		tty_insert_flip_char(tport, ch, flag);
 
 	if (stat & ULITE_STATUS_FRAME)
-		tty_insert_flip_char(tty, 0, TTY_FRAME);
+		tty_insert_flip_char(tport, 0, TTY_FRAME);
 
 	if (stat & ULITE_STATUS_OVERRUN)
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 
 	return 1;
 }
diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index ed047d9ab1e2..7a2378627fa5 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -513,7 +513,7 @@ static void qe_uart_int_rx(struct uart_qe_port *qe_port)
 				continue;
 
 error_return:
-			tty_insert_flip_char(tty, ch, flg);
+			tty_insert_flip_char(tport, ch, flg);
 
 		}
 
@@ -561,7 +561,7 @@ handle_error:
 
 	/* Overrun does not affect the current character ! */
 	if (status & BD_SC_OV)
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 #ifdef SUPPORT_SYSRQ
 	port->sysrq = 0;
 #endif
diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c
index 8fd181436a6b..7f4112423f3d 100644
--- a/drivers/tty/serial/vt8500_serial.c
+++ b/drivers/tty/serial/vt8500_serial.c
@@ -136,7 +136,8 @@ static void vt8500_enable_ms(struct uart_port *port)
 
 static void handle_rx(struct uart_port *port)
 {
-	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tty_port_tty_get(tport);
 	if (!tty) {
 		/* Discard data: no tty available */
 		int count = (vt8500_read(port, VT8500_URFIDX) & 0x1f00) >> 8;
@@ -151,7 +152,7 @@ static void handle_rx(struct uart_port *port)
 	 */
 	if ((vt8500_read(port, VT8500_URISR) & RXOVER)) {
 		port->icount.overrun++;
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 	}
 
 	/* and now the main RX loop */
@@ -174,7 +175,7 @@ static void handle_rx(struct uart_port *port)
 		port->icount.rx++;
 
 		if (!uart_handle_sysrq_char(port, c))
-			tty_insert_flip_char(tty, c, flag);
+			tty_insert_flip_char(tport, c, flag);
 	}
 
 	tty_flip_buffer_push(tty);
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index d42b66195a49..33656b35db05 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -1507,13 +1507,13 @@ static void mgsl_isr_receive_data( struct mgsl_struct *info )
 			else if (status & RXSTATUS_FRAMING_ERROR)
 				flag = TTY_FRAME;
 		}	/* end of if (error) */
-		tty_insert_flip_char(tty, DataByte, flag);
+		tty_insert_flip_char(&info->port, DataByte, flag);
 		if (status & RXSTATUS_OVERRUN) {
 			/* Overrun is special, since it's
 			 * reported immediately, and doesn't
 			 * affect the current character
 			 */
-			work += tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			work += tty_insert_flip_char(&info->port, 0, TTY_OVERRUN);
 		}
 	}
 
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 62a0db7ead07..473d7406db83 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1893,10 +1893,8 @@ static void rx_async(struct slgt_info *info)
 				else if (status & BIT0)
 					stat = TTY_FRAME;
 			}
-			if (tty) {
-				tty_insert_flip_char(tty, ch, stat);
-				chars++;
-			}
+			tty_insert_flip_char(&info->port, ch, stat);
+			chars++;
 		}
 
 		if (i < count) {
@@ -2182,7 +2180,7 @@ static void isr_serial(struct slgt_info *info)
 			if (info->port.tty) {
 				if (!(status & info->ignore_status_mask)) {
 					if (info->read_status_mask & MASK_BREAK) {
-						tty_insert_flip_char(info->port.tty, 0, TTY_BREAK);
+						tty_insert_flip_char(&info->port, 0, TTY_BREAK);
 						if (info->port.flags & ASYNC_SAK)
 							do_SAK(info->port.tty);
 					}
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 33b7314cc6c7..f5794f3d840f 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -2131,13 +2131,11 @@ static void isr_rxint(SLMP_INFO * info)
 			/* process break detection if tty control
 			 * is not set to ignore it
 			 */
-			if ( tty ) {
-				if (!(status & info->ignore_status_mask1)) {
-					if (info->read_status_mask1 & BRKD) {
-						tty_insert_flip_char(tty, 0, TTY_BREAK);
-						if (info->port.flags & ASYNC_SAK)
-							do_SAK(tty);
-					}
+			if (!(status & info->ignore_status_mask1)) {
+				if (info->read_status_mask1 & BRKD) {
+					tty_insert_flip_char(&info->port, 0, TTY_BREAK);
+					if (tty && (info->port.flags & ASYNC_SAK))
+						do_SAK(tty);
 				}
 			}
 		}
@@ -2202,26 +2200,22 @@ static void isr_rxrdy(SLMP_INFO * info)
 
 			status &= info->read_status_mask2;
 
-			if ( tty ) {
-				if (status & PE)
-					flag = TTY_PARITY;
-				else if (status & FRME)
-					flag = TTY_FRAME;
-				if (status & OVRN) {
-					/* Overrun is special, since it's
-					 * reported immediately, and doesn't
-					 * affect the current character
-					 */
-					over = true;
-				}
+			if (status & PE)
+				flag = TTY_PARITY;
+			else if (status & FRME)
+				flag = TTY_FRAME;
+			if (status & OVRN) {
+				/* Overrun is special, since it's
+				 * reported immediately, and doesn't
+				 * affect the current character
+				 */
+				over = true;
 			}
 		}	/* end of if (error) */
 
-		if ( tty ) {
-			tty_insert_flip_char(tty, DataByte, flag);
-			if (over)
-				tty_insert_flip_char(tty, 0, TTY_OVERRUN);
-		}
+		tty_insert_flip_char(&info->port, DataByte, flag);
+		if (over)
+			tty_insert_flip_char(&info->port, 0, TTY_OVERRUN);
 	}
 
 	if ( debug_level >= DEBUG_LEVEL_ISR ) {
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 681765baef69..5aace4d47cb6 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -309,8 +309,8 @@ static void put_queue(struct vc_data *vc, int ch)
 {
 	struct tty_struct *tty = vc->port.tty;
 
+	tty_insert_flip_char(&vc->port, ch, 0);
 	if (tty) {
-		tty_insert_flip_char(tty, ch, 0);
 		tty_schedule_flip(tty);
 	}
 }
@@ -323,7 +323,7 @@ static void puts_queue(struct vc_data *vc, char *cp)
 		return;
 
 	while (*cp) {
-		tty_insert_flip_char(tty, *cp, 0);
+		tty_insert_flip_char(&vc->port, *cp, 0);
 		cp++;
 	}
 	tty_schedule_flip(tty);
@@ -586,7 +586,7 @@ static void fn_send_intr(struct vc_data *vc)
 
 	if (!tty)
 		return;
-	tty_insert_flip_char(tty, 0, TTY_BREAK);
+	tty_insert_flip_char(&vc->port, 0, TTY_BREAK);
 	tty_schedule_flip(tty);
 }
 
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 8fd89687d068..811f2505e9ee 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1336,7 +1336,7 @@ static void csi_m(struct vc_data *vc)
 static void respond_string(const char *p, struct tty_struct *tty)
 {
 	while (*p) {
-		tty_insert_flip_char(tty, *p, 0);
+		tty_insert_flip_char(tty->port, *p, 0);
 		p++;
 	}
 	tty_schedule_flip(tty);
diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index e2d653d00ea2..1614feb6a76e 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -703,7 +703,7 @@ static void ark3116_process_read_urb(struct urb *urb)
 
 		/* overrun is special, not associated with a char */
 		if (lsr & UART_LSR_OE)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 	}
 	tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index a213d1be9462..7ba2c0bdcec9 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -276,7 +276,7 @@ static void belkin_sa_process_read_urb(struct urb *urb)
 
 		/* Overrun is special, not associated with a char. */
 		if (status & BELKIN_SA_LSR_OE)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 	}
 
 	tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index efbc4035410c..b5fa738512ca 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1441,7 +1441,7 @@ static int digi_read_inb_callback(struct urb *urb)
 
 		/* overrun is special, not associated with a char */
 		if (port_status & DIGI_OVERRUN_ERROR)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 
 		/* break takes precedence over parity, */
 		/* which takes precedence over framing errors */
diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index a8c6430bf1b3..6b880c33d258 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -133,12 +133,13 @@ static void f81232_process_read_urb(struct urb *urb)
 
 	/* overrun is special, not associated with a char */
 	if (line_status & UART_OVERRUN_ERROR)
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < urb->actual_length; ++i)
 			if (!usb_serial_handle_sysrq_char(port, data[i]))
-				tty_insert_flip_char(tty, data[i], tty_flag);
+				tty_insert_flip_char(&port->port, data[i],
+						tty_flag);
 	} else {
 		tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 0416d952a448..eb59ba3789ad 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1958,9 +1958,8 @@ static int ftdi_prepare_write_buffer(struct usb_serial_port *port,
 
 #define FTDI_RS_ERR_MASK (FTDI_RS_BI | FTDI_RS_PE | FTDI_RS_FE | FTDI_RS_OE)
 
-static int ftdi_process_packet(struct tty_struct *tty,
-		struct usb_serial_port *port, struct ftdi_private *priv,
-		char *packet, int len)
+static int ftdi_process_packet(struct usb_serial_port *port,
+		struct ftdi_private *priv, char *packet, int len)
 {
 	int i;
 	char status;
@@ -2010,7 +2009,7 @@ static int ftdi_process_packet(struct tty_struct *tty,
 		/* Overrun is special, not associated with a char */
 		if (packet[1] & FTDI_RS_OE) {
 			priv->icount.overrun++;
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 		}
 	}
 
@@ -2029,7 +2028,7 @@ static int ftdi_process_packet(struct tty_struct *tty,
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < len; i++, ch++) {
 			if (!usb_serial_handle_sysrq_char(port, *ch))
-				tty_insert_flip_char(tty, *ch, flag);
+				tty_insert_flip_char(&port->port, *ch, flag);
 		}
 	} else {
 		tty_insert_flip_string_fixed_flag(&port->port, ch, flag, len);
@@ -2054,7 +2053,7 @@ static void ftdi_process_read_urb(struct urb *urb)
 
 	for (i = 0; i < urb->actual_length; i += priv->max_packet_size) {
 		len = min_t(int, urb->actual_length - i, priv->max_packet_size);
-		count += ftdi_process_packet(tty, port, priv, &data[i], len);
+		count += ftdi_process_packet(port, priv, &data[i], len);
 	}
 
 	if (count)
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 2ea70a631996..b00110cd5689 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -332,7 +332,7 @@ void usb_serial_generic_process_read_urb(struct urb *urb)
 	else {
 		for (i = 0; i < urb->actual_length; i++, ch++) {
 			if (!usb_serial_handle_sysrq_char(port, *ch))
-				tty_insert_flip_char(tty, *ch, TTY_NORMAL);
+				tty_insert_flip_char(&port->port, *ch, TTY_NORMAL);
 		}
 	}
 	tty_flip_buffer_push(tty);
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 97bc49f68efd..a4f5caebda43 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -315,7 +315,7 @@ static void	usa26_indat_callback(struct urb *urb)
 			else
 				err = 0;
 			for (i = 1; i < urb->actual_length ; ++i)
-				tty_insert_flip_char(tty, data[i], err);
+				tty_insert_flip_char(&port->port, data[i], err);
 		} else {
 			/* some bytes had errors, every byte has status */
 			dev_dbg(&port->dev, "%s - RX error!!!!\n", __func__);
@@ -328,7 +328,8 @@ static void	usa26_indat_callback(struct urb *urb)
 				if (stat & RXERROR_PARITY)
 					flag |= TTY_PARITY;
 				/* XXX should handle break (0x10) */
-				tty_insert_flip_char(tty, data[i+1], flag);
+				tty_insert_flip_char(&port->port, data[i+1],
+						flag);
 			}
 		}
 		tty_flip_buffer_push(tty);
@@ -700,7 +701,8 @@ static void	usa49_indat_callback(struct urb *urb)
 				if (stat & RXERROR_PARITY)
 					flag |= TTY_PARITY;
 				/* XXX should handle break (0x10) */
-				tty_insert_flip_char(tty, data[i+1], flag);
+				tty_insert_flip_char(&port->port, data[i+1],
+						flag);
 			}
 		}
 		tty_flip_buffer_push(tty);
@@ -751,7 +753,8 @@ static void usa49wg_indat_callback(struct urb *urb)
 				/* no error on any byte */
 				i++;
 				for (x = 1; x < len ; ++x)
-					tty_insert_flip_char(tty, data[i++], 0);
+					tty_insert_flip_char(&port->port,
+							data[i++], 0);
 			} else {
 				/*
 				 * some bytes had errors, every byte has status
@@ -765,7 +768,7 @@ static void usa49wg_indat_callback(struct urb *urb)
 					if (stat & RXERROR_PARITY)
 						flag |= TTY_PARITY;
 					/* XXX should handle break (0x10) */
-					tty_insert_flip_char(tty,
+					tty_insert_flip_char(&port->port,
 							data[i+1], flag);
 					i += 2;
 				}
@@ -824,8 +827,8 @@ static void usa90_indat_callback(struct urb *urb)
 				else
 					err = 0;
 				for (i = 1; i < urb->actual_length ; ++i)
-					tty_insert_flip_char(tty, data[i],
-									err);
+					tty_insert_flip_char(&port->port,
+							data[i], err);
 			}  else {
 			/* some bytes had errors, every byte has status */
 				dev_dbg(&port->dev, "%s - RX error!!!!\n", __func__);
@@ -838,8 +841,8 @@ static void usa90_indat_callback(struct urb *urb)
 					if (stat & RXERROR_PARITY)
 						flag |= TTY_PARITY;
 					/* XXX should handle break (0x10) */
-					tty_insert_flip_char(tty, data[i+1],
-									flag);
+					tty_insert_flip_char(&port->port,
+							data[i+1], flag);
 				}
 			}
 		}
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 86789b0477c5..00047f3c7293 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -805,12 +805,13 @@ static void pl2303_process_read_urb(struct urb *urb)
 
 	/* overrun is special, not associated with a char */
 	if (line_status & UART_OVERRUN_ERROR)
-		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < urb->actual_length; ++i)
 			if (!usb_serial_handle_sysrq_char(port, data[i]))
-				tty_insert_flip_char(tty, data[i], tty_flag);
+				tty_insert_flip_char(&port->port, data[i],
+						tty_flag);
 	} else {
 		tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index fa42f1b907d0..04e373152724 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -498,7 +498,7 @@ static void spcp8x5_process_read_urb(struct urb *urb)
 
 		/* overrun is special, not associated with a char */
 		if (status & UART_OVERRUN_ERROR)
-			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+			tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 
 		if (status & UART_DCD)
 			usb_serial_handle_dcd_change(port, tty,
diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index 37476c6240c2..38713156e957 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -582,8 +582,7 @@ static void ssu100_update_lsr(struct usb_serial_port *port, u8 lsr,
 
 }
 
-static int ssu100_process_packet(struct urb *urb,
-				 struct tty_struct *tty)
+static int ssu100_process_packet(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	char *packet = (char *)urb->transfer_buffer;
@@ -598,7 +597,8 @@ static int ssu100_process_packet(struct urb *urb,
 		if (packet[2] == 0x00) {
 			ssu100_update_lsr(port, packet[3], &flag);
 			if (flag == TTY_OVERRUN)
-				tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+				tty_insert_flip_char(&port->port, 0,
+						TTY_OVERRUN);
 		}
 		if (packet[2] == 0x01)
 			ssu100_update_msr(port, packet[3]);
@@ -614,7 +614,7 @@ static int ssu100_process_packet(struct urb *urb,
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < len; i++, ch++) {
 			if (!usb_serial_handle_sysrq_char(port, *ch))
-				tty_insert_flip_char(tty, *ch, flag);
+				tty_insert_flip_char(&port->port, *ch, flag);
 		}
 	} else
 		tty_insert_flip_string_fixed_flag(&port->port, ch, flag, len);
@@ -632,7 +632,7 @@ static void ssu100_process_read_urb(struct urb *urb)
 	if (!tty)
 		return;
 
-	count = ssu100_process_packet(urb, tty);
+	count = ssu100_process_packet(urb);
 
 	if (count)
 		tty_flip_buffer_push(tty);
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 743f1d076467..f9acb578c6ad 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -12,16 +12,16 @@ extern int tty_prepare_flip_string_flags(struct tty_port *port,
 		unsigned char **chars, char **flags, size_t size);
 void tty_schedule_flip(struct tty_struct *tty);
 
-static inline int tty_insert_flip_char(struct tty_struct *tty,
+static inline int tty_insert_flip_char(struct tty_port *port,
 					unsigned char ch, char flag)
 {
-	struct tty_buffer *tb = tty->port->buf.tail;
+	struct tty_buffer *tb = port->buf.tail;
 	if (tb && tb->used < tb->size) {
 		tb->flag_buf_ptr[tb->used] = flag;
 		tb->char_buf_ptr[tb->used++] = ch;
 		return 1;
 	}
-	return tty_insert_flip_string_flags(tty->port, &ch, &flag, 1);
+	return tty_insert_flip_string_flags(port, &ch, &flag, 1);
 }
 
 static inline int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, size_t size)
-- 
cgit v1.2.3


From 05c7cd39907184328f48d3e7899f9cdd653ad336 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 3 Jan 2013 15:53:04 +0100
Subject: TTY: switch tty_insert_flip_string

Now, we start converting tty buffer functions to actually use
tty_port. This will allow us to get rid of the need of tty in many
call sites. Only tty_port will needed and hence no more
tty_port_tty_get in those paths.

tty_insert_flip_string this time.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/gigaset/interface.c      |  2 +-
 drivers/isdn/i4l/isdn_common.c        |  5 ++---
 drivers/isdn/i4l/isdn_common.h        |  2 +-
 drivers/isdn/i4l/isdn_tty.c           |  6 +++---
 drivers/net/usb/hso.c                 |  7 +++----
 drivers/s390/char/con3215.c           |  3 ++-
 drivers/s390/char/sclp_tty.c          |  4 ++--
 drivers/s390/char/sclp_vt220.c        |  2 +-
 drivers/staging/ccg/u_serial.c        |  2 +-
 drivers/tty/bfin_jtag_comm.c          |  2 +-
 drivers/tty/ehv_bytechan.c            |  2 +-
 drivers/tty/hvc/hvcs.c                |  2 +-
 drivers/tty/ipwireless/tty.c          |  2 +-
 drivers/tty/n_gsm.c                   |  2 +-
 drivers/tty/nozomi.c                  | 14 +++++---------
 drivers/tty/pty.c                     |  2 +-
 drivers/tty/serial/amba-pl011.c       |  6 +++---
 drivers/tty/serial/atmel_serial.c     |  6 ++++--
 drivers/tty/serial/crisv10.c          |  2 +-
 drivers/tty/serial/icom.c             |  2 +-
 drivers/tty/serial/ifx6x60.c          |  2 +-
 drivers/tty/serial/ioc3_serial.c      |  3 ++-
 drivers/tty/serial/ioc4_serial.c      |  2 +-
 drivers/tty/serial/jsm/jsm_tty.c      |  2 +-
 drivers/tty/serial/mfd.c              |  5 +++--
 drivers/tty/serial/mrst_max3110.c     |  2 +-
 drivers/tty/serial/msm_serial.c       |  2 +-
 drivers/tty/serial/msm_serial_hs.c    |  2 +-
 drivers/tty/serial/mxs-auart.c        |  5 +++--
 drivers/tty/serial/pch_uart.c         | 10 +++++-----
 drivers/tty/serial/sunhv.c            | 13 +++++++------
 drivers/usb/class/cdc-acm.c           |  3 ++-
 drivers/usb/gadget/u_serial.c         | 11 ++++-------
 drivers/usb/serial/aircable.c         |  9 ++++-----
 drivers/usb/serial/cyberjack.c        |  2 +-
 drivers/usb/serial/garmin_gps.c       |  2 +-
 drivers/usb/serial/generic.c          |  2 +-
 drivers/usb/serial/io_edgeport.c      | 12 ++++++------
 drivers/usb/serial/io_ti.c            | 13 +++++++------
 drivers/usb/serial/ir-usb.c           |  2 +-
 drivers/usb/serial/iuu_phoenix.c      |  2 +-
 drivers/usb/serial/keyspan.c          |  8 +++++---
 drivers/usb/serial/keyspan_pda.c      |  2 +-
 drivers/usb/serial/kl5kusb105.c       |  2 +-
 drivers/usb/serial/kobil_sct.c        |  2 +-
 drivers/usb/serial/mct_u232.c         |  2 +-
 drivers/usb/serial/metro-usb.c        |  2 +-
 drivers/usb/serial/mos7720.c          |  2 +-
 drivers/usb/serial/mos7840.c          |  5 +++--
 drivers/usb/serial/navman.c           |  2 +-
 drivers/usb/serial/omninet.c          |  5 +++--
 drivers/usb/serial/opticon.c          |  2 +-
 drivers/usb/serial/oti6858.c          |  2 +-
 drivers/usb/serial/quatech2.c         |  8 +++-----
 drivers/usb/serial/safe_serial.c      |  2 +-
 drivers/usb/serial/sierra.c           |  2 +-
 drivers/usb/serial/symbolserial.c     |  3 ++-
 drivers/usb/serial/ti_usb_3410_5052.c | 11 +++++------
 drivers/usb/serial/usb_wwan.c         |  2 +-
 include/linux/tty_flip.h              |  5 +++--
 net/bluetooth/rfcomm/tty.c            |  5 +++--
 net/irda/ircomm/ircomm_tty.c          |  2 +-
 62 files changed, 130 insertions(+), 127 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/gigaset/interface.c b/drivers/isdn/gigaset/interface.c
index 67abf3ff45e8..dd2a57eca2e7 100644
--- a/drivers/isdn/gigaset/interface.c
+++ b/drivers/isdn/gigaset/interface.c
@@ -569,7 +569,7 @@ void gigaset_if_receive(struct cardstate *cs,
 		return;
 	}
 
-	tty_insert_flip_string(tty, buffer, len);
+	tty_insert_flip_string(&cs->port, buffer, len);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 }
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 4a387ec021ad..b87d9e577be2 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -876,9 +876,8 @@ isdn_readbchan(int di, int channel, u_char *buf, u_char *fp, int len, wait_queue
  * of the mapping (di,ch)<->minor, happen during the sleep? --he
  */
 int
-isdn_readbchan_tty(int di, int channel, struct tty_struct *tty, int cisco_hack)
+isdn_readbchan_tty(int di, int channel, struct tty_port *port, int cisco_hack)
 {
-	struct tty_port *port = tty->port;
 	int count;
 	int count_pull;
 	int count_put;
@@ -941,7 +940,7 @@ isdn_readbchan_tty(int di, int channel, struct tty_struct *tty, int cisco_hack)
 			}
 			count_put = count_pull;
 			if (count_put > 1)
-				tty_insert_flip_string(tty, skb->data, count_put - 1);
+				tty_insert_flip_string(port, skb->data, count_put - 1);
 			last = skb->data[count_put - 1];
 			len -= count_put;
 #ifdef CONFIG_ISDN_AUDIO
diff --git a/drivers/isdn/i4l/isdn_common.h b/drivers/isdn/i4l/isdn_common.h
index 9a471f62e1d4..2260ef07ab9c 100644
--- a/drivers/isdn/i4l/isdn_common.h
+++ b/drivers/isdn/i4l/isdn_common.h
@@ -37,7 +37,7 @@ extern void isdn_timer_ctrl(int tf, int onoff);
 extern void isdn_unexclusive_channel(int di, int ch);
 extern int isdn_getnum(char **);
 extern int isdn_readbchan(int, int, u_char *, u_char *, int, wait_queue_head_t *);
-extern int isdn_readbchan_tty(int, int, struct tty_struct *, int);
+extern int isdn_readbchan_tty(int, int, struct tty_port *, int);
 extern int isdn_get_free_channel(int, int, int, int, int, char *);
 extern int isdn_writebuf_skb_stub(int, int, int, struct sk_buff *);
 extern int register_isdn(isdn_if *i);
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index 32d65d4bc848..9bb9986659e4 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -101,7 +101,7 @@ isdn_tty_try_read(modem_info *info, struct sk_buff *skb)
 	} else {
 #endif
 		if (len > 1)
-			tty_insert_flip_string(tty, skb->data, len - 1);
+			tty_insert_flip_string(port, skb->data, len - 1);
 		last = skb->data[len - 1];
 #ifdef CONFIG_ISDN_AUDIO
 	}
@@ -150,9 +150,9 @@ isdn_tty_readmodem(void)
 			if (info->mcr & UART_MCR_RTS) {
 				/* CISCO AsyncPPP Hack */
 				if (!(info->emu.mdmreg[REG_CPPP] & BIT_CPPP))
-					r = isdn_readbchan_tty(info->isdn_driver, info->isdn_channel, tty, 0);
+					r = isdn_readbchan_tty(info->isdn_driver, info->isdn_channel, &info->port, 0);
 				else
-					r = isdn_readbchan_tty(info->isdn_driver, info->isdn_channel, tty, 1);
+					r = isdn_readbchan_tty(info->isdn_driver, info->isdn_channel, &info->port, 1);
 				if (r)
 					tty_flip_buffer_push(tty);
 			} else
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index cd8ccb240f4b..d235ca07548f 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2044,10 +2044,9 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
 				tty_kref_put(tty);
 				return -1;
 			}
-			curr_write_len =  tty_insert_flip_string
-				(tty, urb->transfer_buffer +
-				 serial->curr_rx_urb_offset,
-				 write_length_remaining);
+			curr_write_len = tty_insert_flip_string(&serial->port,
+				urb->transfer_buffer + serial->curr_rx_urb_offset,
+				write_length_remaining);
 			serial->curr_rx_urb_offset += curr_write_len;
 			write_length_remaining -= curr_write_len;
 			tty_flip_buffer_push(tty);
diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 7c7294590880..4c6743dd5357 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -425,7 +425,8 @@ static void raw3215_irq(struct ccw_device *cdev, unsigned long intparm,
 					count++;
 				} else
 					count -= 2;
-				tty_insert_flip_string(tty, raw->inbuf, count);
+				tty_insert_flip_string(&raw->port, raw->inbuf,
+						count);
 				tty_flip_buffer_push(tty);
 				break;
 			}
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index c03863a7d455..3ffddbb53d2f 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -351,10 +351,10 @@ sclp_tty_input(unsigned char* buf, unsigned int count)
 		    (strncmp((const char *) buf + count - 2, "^n", 2) &&
 		     strncmp((const char *) buf + count - 2, "\252n", 2))) {
 			/* add the auto \n */
-			tty_insert_flip_string(tty, buf, count);
+			tty_insert_flip_string(&sclp_port, buf, count);
 			tty_insert_flip_char(&sclp_port, '\n', TTY_NORMAL);
 		} else
-			tty_insert_flip_string(tty, buf, count - 2);
+			tty_insert_flip_string(&sclp_port, buf, count - 2);
 		tty_flip_buffer_push(tty);
 		break;
 	}
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index effcc8756e0a..b5507f199b24 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -480,7 +480,7 @@ sclp_vt220_receiver_fn(struct evbuf_header *evbuf)
 		/* Send input to line discipline */
 		buffer++;
 		count--;
-		tty_insert_flip_string(tty, buffer, count);
+		tty_insert_flip_string(&sclp_vt220_port, buffer, count);
 		tty_flip_buffer_push(tty);
 		break;
 	}
diff --git a/drivers/staging/ccg/u_serial.c b/drivers/staging/ccg/u_serial.c
index 373c40656b52..7df2c02d1137 100644
--- a/drivers/staging/ccg/u_serial.c
+++ b/drivers/staging/ccg/u_serial.c
@@ -529,7 +529,7 @@ static void gs_rx_push(unsigned long _port)
 				size -= n;
 			}
 
-			count = tty_insert_flip_string(tty, packet, size);
+			count = tty_insert_flip_string(&port->port, packet, size);
 			if (count)
 				do_push = true;
 			if (count != size) {
diff --git a/drivers/tty/bfin_jtag_comm.c b/drivers/tty/bfin_jtag_comm.c
index 1cfcdbf1d0cc..143c38579cb5 100644
--- a/drivers/tty/bfin_jtag_comm.c
+++ b/drivers/tty/bfin_jtag_comm.c
@@ -104,7 +104,7 @@ bfin_jc_emudat_manager(void *arg)
 					size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
 					pr_debug("  incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
 					inbound_len -= num_chars;
-					tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars);
+					tty_insert_flip_string(&port, (unsigned char *)&emudat, num_chars);
 					tty_flip_buffer_push(tty);
 				}
 			}
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index af97e39d641c..5164f9a57017 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -407,7 +407,7 @@ static irqreturn_t ehv_bc_tty_rx_isr(int irq, void *data)
 		 */
 
 		/* Pass the received data to the tty layer. */
-		ret = tty_insert_flip_string(ttys, buffer, len);
+		ret = tty_insert_flip_string(&bc->port, buffer, len);
 
 		/* 'ret' is the number of bytes that the TTY layer accepted.
 		 * If it's not equal to 'len', then it means the buffer is
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index 4a0ab98e9a63..7bfc0a924b2f 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -613,7 +613,7 @@ static int hvcs_io(struct hvcs_struct *hvcsd)
 		got = hvc_get_chars(unit_address,
 				&buf[0],
 				HVCS_BUFF_LEN);
-		tty_insert_flip_string(tty, buf, got);
+		tty_insert_flip_string(&hvcsd->port, buf, got);
 	}
 
 	/* Give the TTY time to process the data we just sent. */
diff --git a/drivers/tty/ipwireless/tty.c b/drivers/tty/ipwireless/tty.c
index 2cde13ddf9fc..a3ad5e14cbef 100644
--- a/drivers/tty/ipwireless/tty.c
+++ b/drivers/tty/ipwireless/tty.c
@@ -176,7 +176,7 @@ void ipwireless_tty_received(struct ipw_tty *tty, unsigned char *data,
 	}
 	mutex_unlock(&tty->ipw_tty_mutex);
 
-	work = tty_insert_flip_string(linux_tty, data, length);
+	work = tty_insert_flip_string(&tty->port, data, length);
 
 	if (work != length)
 		printk(KERN_DEBUG IPWIRELESS_PCCARD_NAME
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 769016504c88..4a3342d21c8f 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -1578,7 +1578,7 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, u8 *data, int clen)
 		/* Line state will go via DLCI 0 controls only */
 		case 1:
 		default:
-			tty_insert_flip_string(tty, data, len);
+			tty_insert_flip_string(port, data, len);
 			tty_flip_buffer_push(tty);
 		}
 		tty_kref_put(tty);
diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c
index 437a6366fb7b..941fe8060ea5 100644
--- a/drivers/tty/nozomi.c
+++ b/drivers/tty/nozomi.c
@@ -827,15 +827,10 @@ static int receive_data(enum port_type index, struct nozomi *dc)
 	struct tty_struct *tty = tty_port_tty_get(&port->port);
 	int i, ret;
 
-	if (unlikely(!tty)) {
-		DBG1("tty not open for port: %d?", index);
-		return 1;
-	}
-
 	read_mem32((u32 *) &size, addr, 4);
 	/*  DBG1( "%d bytes port: %d", size, index); */
 
-	if (test_bit(TTY_THROTTLED, &tty->flags)) {
+	if (tty && test_bit(TTY_THROTTLED, &tty->flags)) {
 		DBG1("No room in tty, don't read data, don't ack interrupt, "
 			"disable interrupt");
 
@@ -858,10 +853,11 @@ static int receive_data(enum port_type index, struct nozomi *dc)
 			tty_insert_flip_char(&port->port, buf[0], TTY_NORMAL);
 			size = 0;
 		} else if (size < RECEIVE_BUF_MAX) {
-			size -= tty_insert_flip_string(tty, (char *) buf, size);
+			size -= tty_insert_flip_string(&port->port,
+					(char *)buf, size);
 		} else {
-			i = tty_insert_flip_string(tty, \
-						(char *) buf, RECEIVE_BUF_MAX);
+			i = tty_insert_flip_string(&port->port,
+					(char *)buf, RECEIVE_BUF_MAX);
 			size -= i;
 			offset += i;
 		}
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index be6a373601b7..3c285d398f38 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -120,7 +120,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
 
 	if (c > 0) {
 		/* Stuff the data into the input queue of the other end */
-		c = tty_insert_flip_string(to, buf, c);
+		c = tty_insert_flip_string(to->port, buf, c);
 		/* And shovel */
 		if (c) {
 			tty_flip_buffer_push(to);
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 7fca4022a8b2..e1257d17f5f0 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -698,7 +698,8 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 			       u32 pending, bool use_buf_b,
 			       bool readfifo)
 {
-	struct tty_struct *tty = uap->port.state->port.tty;
+	struct tty_port *port = &uap->port.state->port;
+	struct tty_struct *tty = port->tty;
 	struct pl011_sgbuf *sgbuf = use_buf_b ?
 		&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
 	struct device *dev = uap->dmarx.chan->device->dev;
@@ -715,8 +716,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 		 * Note that tty_insert_flip_buf() tries to take as many chars
 		 * as it can.
 		 */
-		dma_count = tty_insert_flip_string(uap->port.state->port.tty,
-						   sgbuf->buf, pending);
+		dma_count = tty_insert_flip_string(port, sgbuf->buf, pending);
 
 		/* Return buffer to device */
 		dma_sync_sg_for_device(dev, &sgbuf->sg, 1, DMA_FROM_DEVICE);
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 922e85aeb63a..929567038c5a 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -781,7 +781,8 @@ static void atmel_rx_from_ring(struct uart_port *port)
 static void atmel_rx_from_dma(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	struct atmel_dma_buffer *pdc;
 	int rx_idx = atmel_port->pdc_rx_idx;
 	unsigned int head;
@@ -820,7 +821,8 @@ static void atmel_rx_from_dma(struct uart_port *port)
 			 */
 			count = head - tail;
 
-			tty_insert_flip_string(tty, pdc->buf + pdc->ofs, count);
+			tty_insert_flip_string(tport, pdc->buf + pdc->ofs,
+						count);
 
 			dma_sync_single_for_device(port->dev, pdc->dma_addr,
 					pdc->dma_size, DMA_FROM_DEVICE);
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index d12306625458..c82601d4dc53 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -2119,7 +2119,7 @@ static void flush_to_flip_buffer(struct e100_serial *info)
 	while ((buffer = info->first_recv_buffer) != NULL) {
 		unsigned int count = buffer->length;
 
-		tty_insert_flip_string(tty, buffer->buffer, count);
+		tty_insert_flip_string(&info->port, buffer->buffer, count);
 		info->recv_cnt -= count;
 
 		if (count == buffer->length) {
diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c
index 2b0b60ff7f01..54903ee5e5ab 100644
--- a/drivers/tty/serial/icom.c
+++ b/drivers/tty/serial/icom.c
@@ -762,7 +762,7 @@ static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 		/* Block copy all but the last byte as this may have status */
 		if (count > 0) {
 			first = icom_port->recv_buf[offset];
-			tty_insert_flip_string(tty, icom_port->recv_buf + offset, count - 1);
+			tty_insert_flip_string(port, icom_port->recv_buf + offset, count - 1);
 		}
 
 		icount = &icom_port->uart_port.icount;
diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c
index 675d94ab0aff..bfb634ea8145 100644
--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c
@@ -672,7 +672,7 @@ static void ifx_spi_insert_flip_string(struct ifx_spi_device *ifx_dev,
 	struct tty_struct *tty = tty_port_tty_get(&ifx_dev->tty_port);
 	if (!tty)
 		return;
-	tty_insert_flip_string(tty, chars, size);
+	tty_insert_flip_string(&ifx_dev->tty_port, chars, size);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 }
diff --git a/drivers/tty/serial/ioc3_serial.c b/drivers/tty/serial/ioc3_serial.c
index d8f1d1d54471..0f25ce49c7f9 100644
--- a/drivers/tty/serial/ioc3_serial.c
+++ b/drivers/tty/serial/ioc3_serial.c
@@ -1415,7 +1415,8 @@ static int receive_chars(struct uart_port *the_port)
 	read_count = do_read(the_port, ch, MAX_CHARS);
 	if (read_count > 0) {
 		flip = 1;
-		read_room = tty_insert_flip_string(tty, ch, read_count);
+		read_room = tty_insert_flip_string(&state->port, ch,
+				read_count);
 		the_port->icount.rx += read_count;
 	}
 	spin_unlock_irqrestore(&the_port->lock, pflags);
diff --git a/drivers/tty/serial/ioc4_serial.c b/drivers/tty/serial/ioc4_serial.c
index 710ce87ffbeb..3b021b03ae56 100644
--- a/drivers/tty/serial/ioc4_serial.c
+++ b/drivers/tty/serial/ioc4_serial.c
@@ -2362,7 +2362,7 @@ static void receive_chars(struct uart_port *the_port)
 		icount = &the_port->icount;
 		read_count = do_read(the_port, ch, request_count);
 		if (read_count > 0) {
-			tty_insert_flip_string(tty, ch, read_count);
+			tty_insert_flip_string(&state->port, ch, read_count);
 			icount->rx += read_count;
 		}
 	}
diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c
index ac1d36cb2032..c9ce00dd1f8a 100644
--- a/drivers/tty/serial/jsm/jsm_tty.c
+++ b/drivers/tty/serial/jsm/jsm_tty.c
@@ -640,7 +640,7 @@ void jsm_input(struct jsm_channel *ch)
 					tty_insert_flip_char(port, *(ch->ch_rqueue +tail +i), TTY_NORMAL);
 			}
 		} else {
-			tty_insert_flip_string(tp, ch->ch_rqueue + tail, s) ;
+			tty_insert_flip_string(port, ch->ch_rqueue + tail, s);
 		}
 		tail += s;
 		n -= s;
diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c
index 2c01344dc332..60d585ab4870 100644
--- a/drivers/tty/serial/mfd.c
+++ b/drivers/tty/serial/mfd.c
@@ -387,7 +387,8 @@ void hsu_dma_rx(struct uart_hsu_port *up, u32 int_sts)
 	struct hsu_dma_buffer *dbuf = &up->rxbuf;
 	struct hsu_dma_chan *chan = up->rxc;
 	struct uart_port *port = &up->port;
-	struct tty_struct *tty = port->state->port.tty;
+	struct tty_port *tport = &port->state->port;
+	struct tty_struct *tty = tport->tty;
 	int count;
 
 	if (!tty)
@@ -423,7 +424,7 @@ void hsu_dma_rx(struct uart_hsu_port *up, u32 int_sts)
 	 * explicitly set tail to 0. So head will
 	 * always be greater than tail.
 	 */
-	tty_insert_flip_string(tty, dbuf->buf, count);
+	tty_insert_flip_string(tport, dbuf->buf, count);
 	port->icount.rx += count;
 
 	dma_sync_single_for_device(up->port.dev, dbuf->dma_addr,
diff --git a/drivers/tty/serial/mrst_max3110.c b/drivers/tty/serial/mrst_max3110.c
index 776431ff0190..3b8df7b93b73 100644
--- a/drivers/tty/serial/mrst_max3110.c
+++ b/drivers/tty/serial/mrst_max3110.c
@@ -374,7 +374,7 @@ receive_chars(struct uart_max3110 *max, unsigned short *str, int len)
 	for (r = 0; w; r += usable, w -= usable) {
 		usable = tty_buffer_request_room(tport, w);
 		if (usable) {
-			tty_insert_flip_string(tty, buf + r, usable);
+			tty_insert_flip_string(tport, buf + r, usable);
 			port->icount.rx += usable;
 		}
 	}
diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index e4eb81a12793..cb787c0e279a 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -133,7 +133,7 @@ static void handle_rx_dm(struct uart_port *port, unsigned int misr)
 			port->icount.frame++;
 
 		/* TODO: handle sysrq */
-		tty_insert_flip_string(tty, (char *) &c,
+		tty_insert_flip_string(tport, (char *)&c,
 				       (count > 4) ? 4 : count);
 		count -= 4;
 	}
diff --git a/drivers/tty/serial/msm_serial_hs.c b/drivers/tty/serial/msm_serial_hs.c
index 6aa9d470ef54..11b7f5b2eb5f 100644
--- a/drivers/tty/serial/msm_serial_hs.c
+++ b/drivers/tty/serial/msm_serial_hs.c
@@ -961,7 +961,7 @@ static void msm_hs_dmov_rx_callback(struct msm_dmov_cmd *cmd_ptr,
 	rx_count = msm_hs_read(uport, UARTDM_RX_TOTAL_SNAP_ADDR);
 
 	if (0 != (uport->read_status_mask & CREAD)) {
-		retval = tty_insert_flip_string(tty, msm_uport->rx.buffer,
+		retval = tty_insert_flip_string(port, msm_uport->rx.buffer,
 						rx_count);
 		BUG_ON(retval != rx_count);
 	}
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index fa31bc38b105..37a0046ef531 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -457,7 +457,8 @@ static int mxs_auart_dma_prep_rx(struct mxs_auart_port *s);
 static void dma_rx_callback(void *arg)
 {
 	struct mxs_auart_port *s = (struct mxs_auart_port *) arg;
-	struct tty_struct *tty = s->port.state->port.tty;
+	struct tty_port *port = &s->port.state->port;
+	struct tty_struct *tty = port->tty;
 	int count;
 	u32 stat;
 
@@ -468,7 +469,7 @@ static void dma_rx_callback(void *arg)
 			AUART_STAT_PERR | AUART_STAT_FERR);
 
 	count = stat & AUART_STAT_RXCOUNT_MASK;
-	tty_insert_flip_string(tty, s->rx_dma_buf, count);
+	tty_insert_flip_string(port, s->rx_dma_buf, count);
 
 	writel(stat, s->port.membase + AUART_STAT);
 	tty_flip_buffer_push(tty);
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index 4f1774be2a8c..967f1cb311f3 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -591,17 +591,17 @@ static void pch_uart_hal_set_break(struct eg20t_port *priv, int on)
 static int push_rx(struct eg20t_port *priv, const unsigned char *buf,
 		   int size)
 {
-	struct uart_port *port;
+	struct uart_port *port = &priv->port;
+	struct tty_port *tport = &port->state->port;
 	struct tty_struct *tty;
 
-	port = &priv->port;
-	tty = tty_port_tty_get(&port->state->port);
+	tty = tty_port_tty_get(tport);
 	if (!tty) {
 		dev_dbg(priv->port.dev, "%s:tty is busy now", __func__);
 		return -EBUSY;
 	}
 
-	tty_insert_flip_string(tty, buf, size);
+	tty_insert_flip_string(tport, buf, size);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 
@@ -646,7 +646,7 @@ static int dma_push_rx(struct eg20t_port *priv, int size)
 	if (!room)
 		return room;
 
-	tty_insert_flip_string(tty, sg_virt(&priv->sg_rx), size);
+	tty_insert_flip_string(tport, sg_virt(&priv->sg_rx), size);
 
 	port->icount.rx += room;
 	tty_kref_put(tty);
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index bbb102e3c035..defe92b19e16 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -72,7 +72,7 @@ static void transmit_chars_write(struct uart_port *port, struct circ_buf *xmit)
 	}
 }
 
-static int receive_chars_getchar(struct uart_port *port, struct tty_struct *tty)
+static int receive_chars_getchar(struct uart_port *port)
 {
 	int saw_console_brk = 0;
 	int limit = 10000;
@@ -115,7 +115,7 @@ static int receive_chars_getchar(struct uart_port *port, struct tty_struct *tty)
 	return saw_console_brk;
 }
 
-static int receive_chars_read(struct uart_port *port, struct tty_struct *tty)
+static int receive_chars_read(struct uart_port *port)
 {
 	int saw_console_brk = 0;
 	int limit = 10000;
@@ -152,12 +152,13 @@ static int receive_chars_read(struct uart_port *port, struct tty_struct *tty)
 		for (i = 0; i < bytes_read; i++)
 			uart_handle_sysrq_char(port, con_read_page[i]);
 
-		if (tty == NULL)
+		if (port->state == NULL)
 			continue;
 
 		port->icount.rx += bytes_read;
 
-		tty_insert_flip_string(tty, con_read_page, bytes_read);
+		tty_insert_flip_string(&port->state->port, con_read_page,
+				bytes_read);
 	}
 
 	return saw_console_brk;
@@ -165,7 +166,7 @@ static int receive_chars_read(struct uart_port *port, struct tty_struct *tty)
 
 struct sunhv_ops {
 	void (*transmit_chars)(struct uart_port *port, struct circ_buf *xmit);
-	int (*receive_chars)(struct uart_port *port, struct tty_struct *tty);
+	int (*receive_chars)(struct uart_port *port);
 };
 
 static struct sunhv_ops bychar_ops = {
@@ -187,7 +188,7 @@ static struct tty_struct *receive_chars(struct uart_port *port)
 	if (port->state != NULL)		/* Unopened serial console */
 		tty = port->state->port.tty;
 
-	if (sunhv_ops->receive_chars(port, tty))
+	if (sunhv_ops->receive_chars(port))
 		sun_do_break();
 
 	return tty;
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 8d809a811e16..20dc2add27ba 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -419,7 +419,8 @@ static void acm_process_read_urb(struct acm *acm, struct urb *urb)
 	if (!tty)
 		return;
 
-	tty_insert_flip_string(tty, urb->transfer_buffer, urb->actual_length);
+	tty_insert_flip_string(&acm->port, urb->transfer_buffer,
+			urb->actual_length);
 	tty_flip_buffer_push(tty);
 
 	tty_kref_put(tty);
diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c
index d0f95482f40e..3560799d530a 100644
--- a/drivers/usb/gadget/u_serial.c
+++ b/drivers/usb/gadget/u_serial.c
@@ -495,12 +495,8 @@ static void gs_rx_push(unsigned long _port)
 
 		req = list_first_entry(queue, struct usb_request, list);
 
-		/* discard data if tty was closed */
-		if (!tty)
-			goto recycle;
-
 		/* leave data queued if tty was rx throttled */
-		if (test_bit(TTY_THROTTLED, &tty->flags))
+		if (tty && test_bit(TTY_THROTTLED, &tty->flags))
 			break;
 
 		switch (req->status) {
@@ -533,7 +529,8 @@ static void gs_rx_push(unsigned long _port)
 				size -= n;
 			}
 
-			count = tty_insert_flip_string(tty, packet, size);
+			count = tty_insert_flip_string(&port->port, packet,
+					size);
 			if (count)
 				do_push = true;
 			if (count != size) {
@@ -546,7 +543,7 @@ static void gs_rx_push(unsigned long _port)
 			}
 			port->n_read = 0;
 		}
-recycle:
+
 		list_move(&req->list, &port->read_pool);
 		port->read_started--;
 	}
diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c
index 6d110a3bc7e7..3bb1f8f11fc8 100644
--- a/drivers/usb/serial/aircable.c
+++ b/drivers/usb/serial/aircable.c
@@ -119,9 +119,8 @@ static int aircable_probe(struct usb_serial *serial,
 	return 0;
 }
 
-static int aircable_process_packet(struct tty_struct *tty,
-			struct usb_serial_port *port, int has_headers,
-			char *packet, int len)
+static int aircable_process_packet(struct usb_serial_port *port,
+		int has_headers, char *packet, int len)
 {
 	if (has_headers) {
 		len -= HCI_HEADER_LENGTH;
@@ -132,7 +131,7 @@ static int aircable_process_packet(struct tty_struct *tty,
 		return 0;
 	}
 
-	tty_insert_flip_string(tty, packet, len);
+	tty_insert_flip_string(&port->port, packet, len);
 
 	return len;
 }
@@ -156,7 +155,7 @@ static void aircable_process_read_urb(struct urb *urb)
 	count = 0;
 	for (i = 0; i < urb->actual_length; i += HCI_COMPLETE_FRAME) {
 		len = min_t(int, urb->actual_length - i, HCI_COMPLETE_FRAME);
-		count += aircable_process_packet(tty, port, has_headers,
+		count += aircable_process_packet(port, has_headers,
 								&data[i], len);
 	}
 
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index 69a4fa1cee25..e6976a974472 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -343,7 +343,7 @@ static void cyberjack_read_bulk_callback(struct urb *urb)
 		return;
 	}
 	if (urb->actual_length) {
-		tty_insert_flip_string(tty, data, urb->actual_length);
+		tty_insert_flip_string(&port->port, data, urb->actual_length);
 		tty_flip_buffer_push(tty);
 	}
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 203358d7e7bc..498b5f0da639 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -256,7 +256,7 @@ static void send_to_tty(struct usb_serial_port *port,
 
 	if (tty && actual_length) {
 		usb_serial_debug_data(&port->dev, __func__, actual_length, data);
-		tty_insert_flip_string(tty, data, actual_length);
+		tty_insert_flip_string(&port->port, data, actual_length);
 		tty_flip_buffer_push(tty);
 	}
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index b00110cd5689..3780f6a501b3 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -328,7 +328,7 @@ void usb_serial_generic_process_read_urb(struct urb *urb)
 	   stuff like 3G modems, so shortcircuit it in the 99.9999999% of cases
 	   where the USB serial is not a console anyway */
 	if (!port->port.console || !port->sysrq)
-		tty_insert_flip_string(tty, ch, urb->actual_length);
+		tty_insert_flip_string(&port->port, ch, urb->actual_length);
 	else {
 		for (i = 0; i < urb->actual_length; i++, ch++) {
 			if (!usb_serial_handle_sysrq_char(port, *ch))
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index 7b770c7f8b11..f96b91da964f 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -232,7 +232,7 @@ static void  process_rcvd_data(struct edgeport_serial *edge_serial,
 				unsigned char *buffer, __u16 bufferLength);
 static void process_rcvd_status(struct edgeport_serial *edge_serial,
 				__u8 byte2, __u8 byte3);
-static void edge_tty_recv(struct device *dev, struct tty_struct *tty,
+static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
 				unsigned char *data, int length);
 static void handle_new_msr(struct edgeport_port *edge_port, __u8 newMsr);
 static void handle_new_lsr(struct edgeport_port *edge_port, __u8 lsrData,
@@ -1865,7 +1865,7 @@ static void process_rcvd_data(struct edgeport_serial *edge_serial,
 					if (tty) {
 						dev_dbg(dev, "%s - Sending %d bytes to TTY for port %d\n",
 							__func__, rxLen, edge_serial->rxPort);
-						edge_tty_recv(&edge_serial->serial->dev->dev, tty, buffer, rxLen);
+						edge_tty_recv(edge_port->port, tty, buffer, rxLen);
 						tty_kref_put(tty);
 					}
 					edge_port->icount.rx += rxLen;
@@ -2017,14 +2017,14 @@ static void process_rcvd_status(struct edgeport_serial *edge_serial,
  * edge_tty_recv
  *	this function passes data on to the tty flip buffer
  *****************************************************************************/
-static void edge_tty_recv(struct device *dev, struct tty_struct *tty,
+static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
 					unsigned char *data, int length)
 {
 	int cnt;
 
-	cnt = tty_insert_flip_string(tty, data, length);
+	cnt = tty_insert_flip_string(&port->port, data, length);
 	if (cnt < length) {
-		dev_err(dev, "%s - dropping data, %d bytes lost\n",
+		dev_err(&port->dev, "%s - dropping data, %d bytes lost\n",
 				__func__, length - cnt);
 	}
 	data += cnt;
@@ -2090,7 +2090,7 @@ static void handle_new_lsr(struct edgeport_port *edge_port, __u8 lsrData,
 		struct tty_struct *tty =
 				tty_port_tty_get(&edge_port->port->port);
 		if (tty) {
-			edge_tty_recv(&edge_port->port->dev, tty, &data, 1);
+			edge_tty_recv(edge_port->port, tty, &data, 1);
 			tty_kref_put(tty);
 		}
 	}
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 58184f3de686..1286a0b2e2b7 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -201,7 +201,7 @@ static int closing_wait = EDGE_CLOSING_WAIT;
 static bool ignore_cpu_rev;
 static int default_uart_mode;		/* RS232 */
 
-static void edge_tty_recv(struct device *dev, struct tty_struct *tty,
+static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
 			  unsigned char *data, int length);
 
 static void stop_read(struct edgeport_port *edge_port);
@@ -1557,7 +1557,7 @@ static void handle_new_lsr(struct edgeport_port *edge_port, int lsr_data,
 	if (lsr_data) {
 		tty = tty_port_tty_get(&edge_port->port->port);
 		if (tty) {
-			edge_tty_recv(&edge_port->port->dev, tty, &data, 1);
+			edge_tty_recv(edge_port->port, tty, &data, 1);
 			tty_kref_put(tty);
 		}
 	}
@@ -1722,7 +1722,8 @@ static void edge_bulk_in_callback(struct urb *urb)
 			dev_dbg(dev, "%s - close pending, dropping data on the floor\n",
 								__func__);
 		else
-			edge_tty_recv(dev, tty, data, urb->actual_length);
+			edge_tty_recv(edge_port->port, tty, data,
+					urb->actual_length);
 		edge_port->icount.rx += urb->actual_length;
 	}
 	tty_kref_put(tty);
@@ -1740,14 +1741,14 @@ exit:
 		dev_err(dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval);
 }
 
-static void edge_tty_recv(struct device *dev, struct tty_struct *tty,
+static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
 					unsigned char *data, int length)
 {
 	int queued;
 
-	queued = tty_insert_flip_string(tty, data, length);
+	queued = tty_insert_flip_string(&port->port, data, length);
 	if (queued < length)
-		dev_err(dev, "%s - dropping data, %d bytes lost\n",
+		dev_err(&port->dev, "%s - dropping data, %d bytes lost\n",
 			__func__, length - queued);
 	tty_flip_buffer_push(tty);
 }
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index e24e2d4f4c1b..171dae1f4a62 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -305,7 +305,7 @@ static void ir_process_read_urb(struct urb *urb)
 	tty = tty_port_tty_get(&port->port);
 	if (!tty)
 		return;
-	tty_insert_flip_string(tty, data + 1, urb->actual_length - 1);
+	tty_insert_flip_string(&port->port, data + 1, urb->actual_length - 1);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 }
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 1e1fbed65ef2..dd0d910730c7 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -596,7 +596,7 @@ static void read_buf_callback(struct urb *urb)
 	if (data == NULL)
 		dev_dbg(&port->dev, "%s - data is NULL !!!\n", __func__);
 	if (tty && urb->actual_length && data) {
-		tty_insert_flip_string(tty, data, urb->actual_length);
+		tty_insert_flip_string(&port->port, data, urb->actual_length);
 		tty_flip_buffer_push(tty);
 	}
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index a4f5caebda43..14a219ba4ee6 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -472,7 +472,8 @@ static void usa28_indat_callback(struct urb *urb)
 
 		tty = tty_port_tty_get(&port->port);
 		if (tty && urb->actual_length) {
-			tty_insert_flip_string(tty, data, urb->actual_length);
+			tty_insert_flip_string(&port->port, data,
+					urb->actual_length);
 			tty_flip_buffer_push(tty);
 		}
 		tty_kref_put(tty);
@@ -688,7 +689,7 @@ static void	usa49_indat_callback(struct urb *urb)
 		/* 0x80 bit is error flag */
 		if ((data[0] & 0x80) == 0) {
 			/* no error on any byte */
-			tty_insert_flip_string(tty, data + 1,
+			tty_insert_flip_string(&port->port, data + 1,
 						urb->actual_length - 1);
 		} else {
 			/* some bytes had errors, every byte has status */
@@ -816,7 +817,8 @@ static void usa90_indat_callback(struct urb *urb)
 		   otherwise looks like usa26 data format */
 
 		if (p_priv->baud > 57600)
-			tty_insert_flip_string(tty, data, urb->actual_length);
+			tty_insert_flip_string(&port->port, data,
+					urb->actual_length);
 		else {
 			/* 0x80 bit is error flag */
 			if ((data[0] & 0x80) == 0) {
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index 41b01092af07..334b1a295c6b 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -166,7 +166,7 @@ static void keyspan_pda_rx_interrupt(struct urb *urb)
 		tty = tty_port_tty_get(&port->port);
 		 /* rest of message is rx data */
 		if (tty && urb->actual_length) {
-			tty_insert_flip_string(tty, data + 1,
+			tty_insert_flip_string(&port->port, data + 1,
 						urb->actual_length - 1);
 			tty_flip_buffer_push(tty);
 		}
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index fc9e14a1e9b3..8ee0825ad700 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -411,7 +411,7 @@ static void klsi_105_process_read_urb(struct urb *urb)
 		len = urb->actual_length - KLSI_HDR_LEN;
 	}
 
-	tty_insert_flip_string(tty, data + KLSI_HDR_LEN, len);
+	tty_insert_flip_string(&port->port, data + KLSI_HDR_LEN, len);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 }
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index b747ba615d0b..135c8b4b26f7 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -353,7 +353,7 @@ static void kobil_read_int_callback(struct urb *urb)
 		*/
 		/* END DEBUG */
 
-		tty_insert_flip_string(tty, data, urb->actual_length);
+		tty_insert_flip_string(&port->port, data, urb->actual_length);
 		tty_flip_buffer_push(tty);
 	}
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index b6911757c855..ba20bb037b28 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -563,7 +563,7 @@ static void mct_u232_read_int_callback(struct urb *urb)
 		if (urb->actual_length) {
 			tty = tty_port_tty_get(&port->port);
 			if (tty) {
-				tty_insert_flip_string(tty, data,
+				tty_insert_flip_string(&port->port, data,
 						urb->actual_length);
 				tty_flip_buffer_push(tty);
 			}
diff --git a/drivers/usb/serial/metro-usb.c b/drivers/usb/serial/metro-usb.c
index 3d258448c29a..6264f3974ea7 100644
--- a/drivers/usb/serial/metro-usb.c
+++ b/drivers/usb/serial/metro-usb.c
@@ -127,7 +127,7 @@ static void metrousb_read_int_callback(struct urb *urb)
 	tty = tty_port_tty_get(&port->port);
 	if (tty && urb->actual_length) {
 		/* Loop through the data copying each byte to the tty layer. */
-		tty_insert_flip_string(tty, data, urb->actual_length);
+		tty_insert_flip_string(&port->port, data, urb->actual_length);
 
 		/* Force the data to the tty layer. */
 		tty_flip_buffer_push(tty);
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index f57a6b1fe787..22818fb765e0 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -915,7 +915,7 @@ static void mos7720_bulk_in_callback(struct urb *urb)
 
 	tty = tty_port_tty_get(&port->port);
 	if (tty && urb->actual_length) {
-		tty_insert_flip_string(tty, data, urb->actual_length);
+		tty_insert_flip_string(&port->port, data, urb->actual_length);
 		tty_flip_buffer_push(tty);
 	}
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 66d9e088d9d9..3ddd7a1f7ff3 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -773,9 +773,10 @@ static void mos7840_bulk_in_callback(struct urb *urb)
 	usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data);
 
 	if (urb->actual_length) {
-		tty = tty_port_tty_get(&mos7840_port->port->port);
+		struct tty_port *tport = &mos7840_port->port->port;
+		tty = tty_port_tty_get(tport);
 		if (tty) {
-			tty_insert_flip_string(tty, data, urb->actual_length);
+			tty_insert_flip_string(tport, data, urb->actual_length);
 			tty_flip_buffer_push(tty);
 			tty_kref_put(tty);
 		}
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index 1566f8f500ae..0d96a1a7b9e5 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -57,7 +57,7 @@ static void navman_read_int_callback(struct urb *urb)
 
 	tty = tty_port_tty_get(&port->port);
 	if (tty && urb->actual_length) {
-		tty_insert_flip_string(tty, data, urb->actual_length);
+		tty_insert_flip_string(&port->port, data, urb->actual_length);
 		tty_flip_buffer_push(tty);
 	}
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 7818af931a48..338191bae5a3 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -176,8 +176,9 @@ static void omninet_read_bulk_callback(struct urb *urb)
 	if (urb->actual_length && header->oh_len) {
 		struct tty_struct *tty = tty_port_tty_get(&port->port);
 		if (tty) {
-			tty_insert_flip_string(tty, data + OMNINET_DATAOFFSET,
-							header->oh_len);
+			tty_insert_flip_string(&port->port,
+					data + OMNINET_DATAOFFSET,
+					header->oh_len);
 			tty_flip_buffer_push(tty);
 			tty_kref_put(tty);
 		}
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index c6bfb83efb1e..d3b74e50aff1 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -57,7 +57,7 @@ static void opticon_process_data_packet(struct usb_serial_port *port,
 	if (!tty)
 		return;
 
-	tty_insert_flip_string(tty, buf, len);
+	tty_insert_flip_string(&port->port, buf, len);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 }
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index d217fd6ee43f..7a53fe9f3af3 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -837,7 +837,7 @@ static void oti6858_read_bulk_callback(struct urb *urb)
 
 	tty = tty_port_tty_get(&port->port);
 	if (tty != NULL && urb->actual_length > 0) {
-		tty_insert_flip_string(tty, data, urb->actual_length);
+		tty_insert_flip_string(&port->port, data, urb->actual_length);
 		tty_flip_buffer_push(tty);
 	}
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index 1e67fd89e346..5dccc4f957df 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -698,7 +698,7 @@ void qt2_process_read_urb(struct urb *urb)
 				break;
 			case QT2_CONTROL_ESCAPE:
 				tty_buffer_request_room(&port->port, 2);
-				tty_insert_flip_string(tty, ch, 2);
+				tty_insert_flip_string(&port->port, ch, 2);
 				i += 2;
 				escapeflag = true;
 				break;
@@ -712,10 +712,8 @@ void qt2_process_read_urb(struct urb *urb)
 				continue;
 		}
 
-		if (tty) {
-			tty_buffer_request_room(&port->port, 1);
-			tty_insert_flip_string(tty, ch, 1);
-		}
+		tty_buffer_request_room(&port->port, 1);
+		tty_insert_flip_string(&port->port, ch, 1);
 	}
 
 	if (tty) {
diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index c949ce6ef0c6..ad12e9e2c7ee 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -235,7 +235,7 @@ static void safe_process_read_urb(struct urb *urb)
 	dev_info(&urb->dev->dev, "%s - actual: %d\n", __func__, actual_length);
 	length = actual_length;
 out:
-	tty_insert_flip_string(tty, data, length);
+	tty_insert_flip_string(&port->port, data, length);
 	tty_flip_buffer_push(tty);
 err:
 	tty_kref_put(tty);
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index af06f2f5f38b..64e53fda149b 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -583,7 +583,7 @@ static void sierra_indat_callback(struct urb *urb)
 		if (urb->actual_length) {
 			tty = tty_port_tty_get(&port->port);
 			if (tty) {
-				tty_insert_flip_string(tty, data,
+				tty_insert_flip_string(&port->port, data,
 					urb->actual_length);
 				tty_flip_buffer_push(tty);
 
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 701fffa8431f..2ffa6ae3b5ed 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -84,7 +84,8 @@ static void symbol_int_callback(struct urb *urb)
 		 */
 		tty = tty_port_tty_get(&port->port);
 		if (tty) {
-			tty_insert_flip_string(tty, &data[1], data_length);
+			tty_insert_flip_string(&port->port, &data[1],
+					data_length);
 			tty_flip_buffer_push(tty);
 			tty_kref_put(tty);
 		}
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index f2530d2ef3c4..05077e3c7631 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -121,7 +121,7 @@ static void ti_interrupt_callback(struct urb *urb);
 static void ti_bulk_in_callback(struct urb *urb);
 static void ti_bulk_out_callback(struct urb *urb);
 
-static void ti_recv(struct device *dev, struct tty_struct *tty,
+static void ti_recv(struct usb_serial_port *port, struct tty_struct *tty,
 	unsigned char *data, int length);
 static void ti_send(struct ti_port *tport);
 static int ti_set_mcr(struct ti_port *tport, unsigned int mcr);
@@ -1155,8 +1155,7 @@ static void ti_bulk_in_callback(struct urb *urb)
 				dev_dbg(dev, "%s - port closed, dropping data\n",
 					__func__);
 			else
-				ti_recv(&urb->dev->dev, tty,
-						urb->transfer_buffer,
+				ti_recv(port, tty, urb->transfer_buffer,
 						urb->actual_length);
 			spin_lock(&tport->tp_lock);
 			tport->tp_icount.rx += urb->actual_length;
@@ -1210,15 +1209,15 @@ static void ti_bulk_out_callback(struct urb *urb)
 }
 
 
-static void ti_recv(struct device *dev, struct tty_struct *tty,
+static void ti_recv(struct usb_serial_port *port, struct tty_struct *tty,
 	unsigned char *data, int length)
 {
 	int cnt;
 
 	do {
-		cnt = tty_insert_flip_string(tty, data, length);
+		cnt = tty_insert_flip_string(&port->port, data, length);
 		if (cnt < length) {
-			dev_err(dev, "%s - dropping data, %d bytes lost\n",
+			dev_err(&port->dev, "%s - dropping data, %d bytes lost\n",
 						__func__, length - cnt);
 			if (cnt == 0)
 				break;
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index 01c94aada56c..293b460030cb 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -291,7 +291,7 @@ static void usb_wwan_indat_callback(struct urb *urb)
 		tty = tty_port_tty_get(&port->port);
 		if (tty) {
 			if (urb->actual_length) {
-				tty_insert_flip_string(tty, data,
+				tty_insert_flip_string(&port->port, data,
 						urb->actual_length);
 				tty_flip_buffer_push(tty);
 			} else
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index f9acb578c6ad..5cb694aba322 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -24,9 +24,10 @@ static inline int tty_insert_flip_char(struct tty_port *port,
 	return tty_insert_flip_string_flags(port, &ch, &flag, 1);
 }
 
-static inline int tty_insert_flip_string(struct tty_struct *tty, const unsigned char *chars, size_t size)
+static inline int tty_insert_flip_string(struct tty_port *port,
+		const unsigned char *chars, size_t size)
 {
-	return tty_insert_flip_string_fixed_flag(tty->port, chars, TTY_NORMAL, size);
+	return tty_insert_flip_string_fixed_flag(port, chars, TTY_NORMAL, size);
 }
 
 #endif /* _LINUX_TTY_FLIP_H */
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index bd6fd0f43d2b..cbec3b642871 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -556,7 +556,7 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
 
 	BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len);
 
-	tty_insert_flip_string(tty, skb->data, skb->len);
+	tty_insert_flip_string(&dev->port, skb->data, skb->len);
 	tty_flip_buffer_push(tty);
 
 	kfree_skb(skb);
@@ -633,7 +633,8 @@ static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev)
 	rfcomm_dlc_lock(dev->dlc);
 
 	while ((skb = skb_dequeue(&dev->pending))) {
-		inserted += tty_insert_flip_string(tty, skb->data, skb->len);
+		inserted += tty_insert_flip_string(&dev->port, skb->data,
+				skb->len);
 		kfree_skb(skb);
 	}
 
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index a68c88cdec6e..14b08e376f9f 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -1141,7 +1141,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
 	 * Use flip buffer functions since the code may be called from interrupt
 	 * context
 	 */
-	tty_insert_flip_string(tty, skb->data, skb->len);
+	tty_insert_flip_string(&self->port, skb->data, skb->len);
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 
-- 
cgit v1.2.3


From d6c53c0e9bd0a83f9f9ddbc9fd80141a54d83896 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 3 Jan 2013 15:53:05 +0100
Subject: TTY: move low_latency to tty_port

One point is to have less places where we actually need tty pointer.
The other is that low_latency is bound to buffer processing and
buffers are now in tty_port. So it makes sense to move low_latency to
tty_port too.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/ia64/hp/sim/simserial.c                | 2 +-
 arch/mn10300/kernel/mn10300-serial.c        | 6 +++---
 drivers/char/pcmcia/synclink_cs.c           | 2 +-
 drivers/isdn/gigaset/interface.c            | 2 +-
 drivers/net/caif/caif_serial.c              | 2 +-
 drivers/net/irda/irtty-sir.c                | 2 +-
 drivers/s390/char/con3215.c                 | 2 +-
 drivers/s390/char/sclp_tty.c                | 2 +-
 drivers/s390/char/sclp_vt220.c              | 2 +-
 drivers/s390/char/tty3270.c                 | 4 ++--
 drivers/tty/amiserial.c                     | 4 ++--
 drivers/tty/ipwireless/tty.c                | 2 +-
 drivers/tty/mxser.c                         | 2 +-
 drivers/tty/serial/cpm_uart/cpm_uart_core.c | 2 +-
 drivers/tty/serial/crisv10.c                | 4 ++--
 drivers/tty/serial/ifx6x60.c                | 2 +-
 drivers/tty/serial/ioc3_serial.c            | 2 +-
 drivers/tty/serial/ioc4_serial.c            | 2 +-
 drivers/tty/serial/max3100.c                | 2 +-
 drivers/tty/serial/mpsc.c                   | 2 +-
 drivers/tty/serial/mrst_max3110.c           | 2 +-
 drivers/tty/serial/msm_serial_hs.c          | 2 +-
 drivers/tty/serial/serial_core.c            | 7 +++----
 drivers/tty/synclink.c                      | 2 +-
 drivers/tty/synclink_gt.c                   | 2 +-
 drivers/tty/synclinkmp.c                    | 2 +-
 drivers/tty/tty_buffer.c                    | 9 +++++----
 include/linux/tty.h                         | 5 +++--
 net/irda/ircomm/ircomm_tty.c                | 2 +-
 29 files changed, 42 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index f8ae5d8eb106..942022a5bc86 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -436,7 +436,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
 	struct tty_port *port = &info->port;
 
 	tty->driver_data = info;
-	tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	port->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	/*
 	 * figure out which console to use (should be one already)
diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
index 54ef40ceaaed..ae61bd692b4b 100644
--- a/arch/mn10300/kernel/mn10300-serial.c
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -537,7 +537,7 @@ static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port)
 	count = CIRC_CNT(port->rx_inp, port->rx_outp, MNSC_BUFFER_SIZE);
 	count = tty_buffer_request_room(port, count);
 	if (count == 0) {
-		if (!tty->low_latency)
+		if (!port->low_latency)
 			tty_flip_buffer_push(tty);
 		return;
 	}
@@ -546,7 +546,7 @@ try_again:
 	/* pull chars out of the hat */
 	ix = ACCESS_ONCE(port->rx_outp);
 	if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) {
-		if (push && !tty->low_latency)
+		if (push && !port->low_latency)
 			tty_flip_buffer_push(tty);
 		return;
 	}
@@ -678,7 +678,7 @@ insert:
 
 	count--;
 	if (count <= 0) {
-		if (!tty->low_latency)
+		if (!port->low_latency)
 			tty_flip_buffer_push(tty);
 		return;
 	}
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 9bdfe27b2413..92dc7327c7aa 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -2522,7 +2522,7 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp)
 		goto cleanup;
 	}
 
-	tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	port->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	spin_lock_irqsave(&info->netlock, flags);
 	if (info->netcount) {
diff --git a/drivers/isdn/gigaset/interface.c b/drivers/isdn/gigaset/interface.c
index dd2a57eca2e7..6dcecd40a819 100644
--- a/drivers/isdn/gigaset/interface.c
+++ b/drivers/isdn/gigaset/interface.c
@@ -164,7 +164,7 @@ static int if_open(struct tty_struct *tty, struct file *filp)
 
 	if (cs->port.count == 1) {
 		tty_port_tty_set(&cs->port, tty);
-		tty->low_latency = 1;
+		cs->port.low_latency = 1;
 	}
 
 	mutex_unlock(&cs->mutex);
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 5de74e762021..666891a9a248 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -91,7 +91,7 @@ static inline void update_tty_status(struct ser_device *ser)
 		ser->tty->hw_stopped << 4 |
 		ser->tty->flow_stopped << 3 |
 		ser->tty->packet << 2 |
-		ser->tty->low_latency << 1 |
+		ser->tty->port->low_latency << 1 |
 		ser->tty->warned;
 }
 static inline void debugfs_init(struct ser_device *ser, struct tty_struct *tty)
diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c
index 6e4d4b62c9a8..a41267197839 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/net/irda/irtty-sir.c
@@ -210,7 +210,7 @@ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t
  *    been received, which can now be decapsulated and delivered for
  *    further processing 
  *
- * calling context depends on underlying driver and tty->low_latency!
+ * calling context depends on underlying driver and tty->port->low_latency!
  * for example (low_latency: 1 / 0):
  * serial.c:	uart-interrupt / softint
  * usbserial:	urb-complete-interrupt / softint
diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 4c6743dd5357..41b75c5ae0d5 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -968,7 +968,7 @@ static int tty3215_open(struct tty_struct *tty, struct file * filp)
 
 	tty_port_tty_set(&raw->port, tty);
 
-	tty->low_latency = 0;  /* don't use bottom half for pushing chars */
+	raw->port.low_latency = 0; /* don't use bottom half for pushing chars */
 	/*
 	 * Start up 3215 device
 	 */
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 3ffddbb53d2f..19b7c516c07d 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -65,7 +65,7 @@ sclp_tty_open(struct tty_struct *tty, struct file *filp)
 {
 	tty_port_tty_set(&sclp_port, tty);
 	tty->driver_data = NULL;
-	tty->low_latency = 0;
+	sclp_port.low_latency = 0;
 	return 0;
 }
 
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index b5507f199b24..0eca99b98712 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -495,7 +495,7 @@ sclp_vt220_open(struct tty_struct *tty, struct file *filp)
 {
 	if (tty->count == 1) {
 		tty_port_tty_set(&sclp_vt220_port, tty);
-		tty->low_latency = 0;
+		sclp_vt220_port.low_latency = 0;
 		if (!tty->winsize.ws_row && !tty->winsize.ws_col) {
 			tty->winsize.ws_row = 24;
 			tty->winsize.ws_col = 80;
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index 43ea0593bdb0..3860e796b65f 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -860,7 +860,7 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty)
 		tty->driver_data = tp;
 		tty->winsize.ws_row = tp->view.rows - 2;
 		tty->winsize.ws_col = tp->view.cols;
-		tty->low_latency = 0;
+		tp->port.low_latency = 0;
 		/* why to reassign? */
 		tty_port_tty_set(&tp->port, tty);
 		tp->inattr = TF_INPUT;
@@ -893,7 +893,7 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty)
 	}
 
 	tty_port_tty_set(&tp->port, tty);
-	tty->low_latency = 0;
+	tp->port.low_latency = 0;
 	tty->winsize.ws_row = tp->view.rows - 2;
 	tty->winsize.ws_col = tp->view.cols;
 
diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index 2e670d0c5366..2d1357acbc23 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -1099,7 +1099,7 @@ static int set_serial_info(struct tty_struct *tty, struct serial_state *state,
 	state->custom_divisor = new_serial.custom_divisor;
 	port->close_delay = new_serial.close_delay * HZ/100;
 	port->closing_wait = new_serial.closing_wait * HZ/100;
-	tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	port->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 check_and_exit:
 	if (port->flags & ASYNC_INITIALIZED) {
@@ -1528,7 +1528,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
 	if (serial_paranoia_check(info, tty->name, "rs_open"))
 		return -ENODEV;
 
-	tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	port->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	retval = startup(tty, info);
 	if (retval) {
diff --git a/drivers/tty/ipwireless/tty.c b/drivers/tty/ipwireless/tty.c
index a3ad5e14cbef..c43da7445432 100644
--- a/drivers/tty/ipwireless/tty.c
+++ b/drivers/tty/ipwireless/tty.c
@@ -106,7 +106,7 @@ static int ipw_open(struct tty_struct *linux_tty, struct file *filp)
 
 	tty->port.tty = linux_tty;
 	linux_tty->driver_data = tty;
-	linux_tty->low_latency = 1;
+	tty->port.low_latency = 1;
 
 	if (tty->tty_type == TTYTYPE_MODEM)
 		ipwireless_ppp_open(tty->network);
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index 450c4507cb5b..e9cdfdfe06e9 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -1264,7 +1264,7 @@ static int mxser_set_serial_info(struct tty_struct *tty,
 				(new_serial.flags & ASYNC_FLAGS));
 		port->close_delay = new_serial.close_delay * HZ / 100;
 		port->closing_wait = new_serial.closing_wait * HZ / 100;
-		tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+		port->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 		if ((port->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST &&
 				(new_serial.baud_base != info->baud_base ||
 				new_serial.custom_divisor !=
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index 108122f8f3c2..0bb24378a3c0 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -508,7 +508,7 @@ static void cpm_uart_set_termios(struct uart_port *port,
 
 	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
 	if (baud < HW_BUF_SPD_THRESHOLD ||
-	    (pinfo->port.state && pinfo->port.state->port.tty->low_latency))
+	    (pinfo->port.state && pinfo->port.state->port.low_latency))
 		pinfo->rx_fifosize = 1;
 	else
 		pinfo->rx_fifosize = RX_BUF_SIZE;
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index c82601d4dc53..52449adc09ac 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -3462,7 +3462,7 @@ set_serial_info(struct e100_serial *info,
 	info->type = new_serial.type;
 	info->close_delay = new_serial.close_delay;
 	info->closing_wait = new_serial.closing_wait;
-	info->port.tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	info->port.low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
  check_and_exit:
 	if (info->flags & ASYNC_INITIALIZED) {
@@ -4106,7 +4106,7 @@ rs_open(struct tty_struct *tty, struct file * filp)
 	tty->driver_data = info;
 	info->port.tty = tty;
 
-	tty->low_latency = !!(info->flags & ASYNC_LOW_LATENCY);
+	info->port.low_latency = !!(info->flags & ASYNC_LOW_LATENCY);
 
 	/*
 	 * If the port is in the middle of closing, bail out now
diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c
index bfb634ea8145..4bc6e47890b4 100644
--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c
@@ -615,7 +615,7 @@ static int ifx_port_activate(struct tty_port *port, struct tty_struct *tty)
 	tty->driver_data = ifx_dev;
 
 	/* allows flip string push from int context */
-	tty->low_latency = 1;
+	port->low_latency = 1;
 
 	/* set flag to allows data transfer */
 	set_bit(IFX_SPI_STATE_IO_AVAILABLE, &ifx_dev->flags);
diff --git a/drivers/tty/serial/ioc3_serial.c b/drivers/tty/serial/ioc3_serial.c
index 0f25ce49c7f9..edbdc4e45075 100644
--- a/drivers/tty/serial/ioc3_serial.c
+++ b/drivers/tty/serial/ioc3_serial.c
@@ -1000,7 +1000,7 @@ ioc3_change_speed(struct uart_port *the_port,
 
 	the_port->ignore_status_mask = N_ALL_INPUT;
 
-	state->port.tty->low_latency = 1;
+	state->port.low_latency = 1;
 
 	if (iflag & IGNPAR)
 		the_port->ignore_status_mask &= ~(N_PARITY_ERROR
diff --git a/drivers/tty/serial/ioc4_serial.c b/drivers/tty/serial/ioc4_serial.c
index 3b021b03ae56..86f64ed89b45 100644
--- a/drivers/tty/serial/ioc4_serial.c
+++ b/drivers/tty/serial/ioc4_serial.c
@@ -1740,7 +1740,7 @@ ioc4_change_speed(struct uart_port *the_port,
 
 	the_port->ignore_status_mask = N_ALL_INPUT;
 
-	state->port.tty->low_latency = 1;
+	state->port.low_latency = 1;
 
 	if (iflag & IGNPAR)
 		the_port->ignore_status_mask &= ~(N_PARITY_ERROR
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index 7ce3197087bb..e238e80cd981 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -530,7 +530,7 @@ max3100_set_termios(struct uart_port *port, struct ktermios *termios,
 			MAX3100_STATUS_OE;
 
 	/* we are sending char from a workqueue so enable */
-	s->port.state->port.tty->low_latency = 1;
+	s->port.state->port.low_latency = 1;
 
 	if (s->poll_time > 0)
 		del_timer_sync(&s->timer);
diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c
index 4bcbc66c48c4..6f2d2ceb326a 100644
--- a/drivers/tty/serial/mpsc.c
+++ b/drivers/tty/serial/mpsc.c
@@ -970,7 +970,7 @@ static int mpsc_rx_intr(struct mpsc_port_info *pi)
 #endif
 		/* Following use of tty struct directly is deprecated */
 		if (tty_buffer_request_room(port, bytes_in) < bytes_in) {
-			if (tty->low_latency)
+			if (port->low_latency)
 				tty_flip_buffer_push(tty);
 			/*
 			 * If this failed then we will throw away the bytes
diff --git a/drivers/tty/serial/mrst_max3110.c b/drivers/tty/serial/mrst_max3110.c
index 3b8df7b93b73..4632db7a24b7 100644
--- a/drivers/tty/serial/mrst_max3110.c
+++ b/drivers/tty/serial/mrst_max3110.c
@@ -495,7 +495,7 @@ static int serial_m3110_startup(struct uart_port *port)
 			| WC_BAUD_DR2;
 
 	/* as we use thread to handle tx/rx, need set low latency */
-	port->state->port.tty->low_latency = 1;
+	port->state->port.low_latency = 1;
 
 	if (max->irq) {
 		max->read_thread = NULL;
diff --git a/drivers/tty/serial/msm_serial_hs.c b/drivers/tty/serial/msm_serial_hs.c
index 11b7f5b2eb5f..c356ffff3c71 100644
--- a/drivers/tty/serial/msm_serial_hs.c
+++ b/drivers/tty/serial/msm_serial_hs.c
@@ -1400,7 +1400,7 @@ static int msm_hs_startup(struct uart_port *uport)
 
 	/* do not let tty layer execute RX in global workqueue, use a
 	 * dedicated workqueue managed by this driver */
-	uport->state->port.tty->low_latency = 1;
+	uport->state->port.low_latency = 1;
 
 	/* turn on uart clk */
 	ret = msm_hs_init_clk_locked(uport);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 675343a20f24..b5c4e64f2990 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -867,9 +867,7 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
 	port->closing_wait    = closing_wait;
 	if (new_info->xmit_fifo_size)
 		uport->fifosize = new_info->xmit_fifo_size;
-	if (port->tty)
-		port->tty->low_latency =
-			(uport->flags & UPF_LOW_LATENCY) ? 1 : 0;
+	port->low_latency = (uport->flags & UPF_LOW_LATENCY) ? 1 : 0;
 
  check_and_exit:
 	retval = 0;
@@ -1565,7 +1563,8 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	 */
 	tty->driver_data = state;
 	state->uart_port->state = state;
-	tty->low_latency = (state->uart_port->flags & UPF_LOW_LATENCY) ? 1 : 0;
+	state->port.low_latency =
+		(state->uart_port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 	tty_port_tty_set(port, tty);
 
 	/*
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 33656b35db05..2f6967d61a80 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -3415,7 +3415,7 @@ static int mgsl_open(struct tty_struct *tty, struct file * filp)
 		goto cleanup;
 	}
 	
-	info->port.tty->low_latency = (info->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	info->port.low_latency = (info->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	spin_lock_irqsave(&info->netlock, flags);
 	if (info->netcount) {
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 473d7406db83..9a0358a1e0dd 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -682,7 +682,7 @@ static int open(struct tty_struct *tty, struct file *filp)
 	}
 
 	mutex_lock(&info->port.mutex);
-	info->port.tty->low_latency = (info->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	info->port.low_latency = (info->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	spin_lock_irqsave(&info->netlock, flags);
 	if (info->netcount) {
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index f5794f3d840f..419f58ff4a13 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -761,7 +761,7 @@ static int open(struct tty_struct *tty, struct file *filp)
 		goto cleanup;
 	}
 
-	info->port.tty->low_latency = (info->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	info->port.low_latency = (info->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	spin_lock_irqsave(&info->netlock, flags);
 	if (info->netcount) {
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 31873e42602a..1bfe97a8e2eb 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -364,7 +364,7 @@ void tty_schedule_flip(struct tty_struct *tty)
 {
 	struct tty_bufhead *buf = &tty->port->buf;
 	unsigned long flags;
-	WARN_ON(tty->low_latency);
+	WARN_ON(tty->port->low_latency);
 
 	spin_lock_irqsave(&buf->lock, flags);
 	if (buf->tail != NULL)
@@ -538,7 +538,7 @@ static void flush_to_ldisc(struct work_struct *work)
  */
 void tty_flush_to_ldisc(struct tty_struct *tty)
 {
-	if (!tty->low_latency)
+	if (!tty->port->low_latency)
 		flush_work(&tty->port->buf.work);
 }
 
@@ -547,7 +547,8 @@ void tty_flush_to_ldisc(struct tty_struct *tty)
  *	@tty: tty to push
  *
  *	Queue a push of the terminal flip buffers to the line discipline. This
- *	function must not be called from IRQ context if tty->low_latency is set.
+ *	function must not be called from IRQ context if port->low_latency is
+ *	set.
  *
  *	In the event of the queue being busy for flipping the work will be
  *	held off and retried later.
@@ -565,7 +566,7 @@ void tty_flip_buffer_push(struct tty_struct *tty)
 		buf->tail->commit = buf->tail->used;
 	spin_unlock_irqrestore(&buf->lock, flags);
 
-	if (tty->low_latency)
+	if (tty->port->low_latency)
 		flush_to_ldisc(&buf->work);
 	else
 		schedule_work(&buf->work);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 8db1b569c37a..f16a47a13a09 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -202,7 +202,8 @@ struct tty_port {
 	unsigned long		iflags;		/* TTYP_ internal flags */
 #define TTYP_FLUSHING			1  /* Flushing to ldisc in progress */
 #define TTYP_FLUSHPENDING		2  /* Queued buffer flush pending */
-	unsigned char		console:1;	/* port is a console */
+	unsigned char		console:1,	/* port is a console */
+				low_latency:1;	/* direct buffer flush */
 	struct mutex		mutex;		/* Locking */
 	struct mutex		buf_mutex;	/* Buffer alloc lock */
 	unsigned char		*xmit_buf;	/* Optional buffer */
@@ -254,7 +255,7 @@ struct tty_struct {
 	int count;
 	struct winsize winsize;		/* termios mutex */
 	unsigned char stopped:1, hw_stopped:1, flow_stopped:1, packet:1;
-	unsigned char low_latency:1, warned:1;
+	unsigned char warned:1;
 	unsigned char ctrl_status;	/* ctrl_lock */
 	unsigned int receive_room;	/* Bytes free for queue */
 
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 14b08e376f9f..2491f6f53871 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -452,7 +452,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
 		   self->line, self->port.count);
 
 	/* Not really used by us, but lets do it anyway */
-	tty->low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+	self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
 	/*
 	 * If the port is the middle of closing, bail out now
-- 
cgit v1.2.3


From 2e124b4a390ca85325fae75764bef92f0547fa25 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 3 Jan 2013 15:53:06 +0100
Subject: TTY: switch tty_flip_buffer_push

Now, we start converting tty buffer functions to actually use
tty_port. This will allow us to get rid of the need of tty in many
call sites. Only tty_port will needed and hence no more
tty_port_tty_get in those paths.

Now, the one where most of tty_port_tty_get gets removed:
tty_flip_buffer_push.

IOW we also closed all the races in drivers not using tty_port_tty_get
at all yet.

Also we move tty_flip_buffer_push declaration from include/linux/tty.h
to include/linux/tty_flip.h to all others while we are changing it
anyway.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/ia64/hp/sim/simserial.c                | 18 ++-------
 arch/mn10300/kernel/mn10300-serial.c        |  7 ++--
 arch/parisc/kernel/pdc_cons.c               |  8 +---
 arch/um/drivers/chan.h                      |  3 +-
 arch/um/drivers/chan_kern.c                 | 14 +++----
 arch/um/drivers/line.c                      |  7 ++--
 arch/xtensa/platforms/iss/console.c         |  9 ++---
 drivers/char/pcmcia/synclink_cs.c           | 14 ++-----
 drivers/ipack/devices/ipoctal.c             | 14 ++-----
 drivers/isdn/gigaset/interface.c            | 10 +----
 drivers/isdn/i4l/isdn_tty.c                 | 39 ++++++++-----------
 drivers/mmc/card/sdio_uart.c                | 14 ++-----
 drivers/net/usb/hso.c                       | 31 ++++++++-------
 drivers/s390/char/con3215.c                 |  4 +-
 drivers/s390/char/sclp_tty.c                |  4 +-
 drivers/s390/char/sclp_vt220.c              |  8 +---
 drivers/staging/ccg/u_serial.c              | 11 ++----
 drivers/staging/dgrp/dgrp_net_ops.c         |  4 +-
 drivers/staging/fwserial/fwserial.c         | 41 ++++++++------------
 drivers/staging/serqt_usb2/serqt_usb2.c     |  9 ++---
 drivers/tty/amiserial.c                     |  5 +--
 drivers/tty/bfin_jtag_comm.c                | 22 +++++------
 drivers/tty/ehv_bytechan.c                  |  9 +----
 drivers/tty/hvc/hvc_console.c               |  2 +-
 drivers/tty/hvc/hvcs.c                      |  2 +-
 drivers/tty/hvc/hvsi.c                      |  6 +--
 drivers/tty/ipwireless/tty.c                |  8 +---
 drivers/tty/isicom.c                        |  4 +-
 drivers/tty/mxser.c                         |  2 +-
 drivers/tty/n_gsm.c                         | 58 ++++++++++++++---------------
 drivers/tty/nozomi.c                        | 14 +++----
 drivers/tty/pty.c                           |  2 +-
 drivers/tty/rocket.c                        | 25 ++++++-------
 drivers/tty/serial/21285.c                  |  3 +-
 drivers/tty/serial/8250/8250.c              |  3 +-
 drivers/tty/serial/altera_jtaguart.c        |  2 +-
 drivers/tty/serial/altera_uart.c            |  2 +-
 drivers/tty/serial/amba-pl010.c             |  3 +-
 drivers/tty/serial/amba-pl011.c             |  7 +---
 drivers/tty/serial/apbuart.c                |  3 +-
 drivers/tty/serial/ar933x_uart.c            |  7 +---
 drivers/tty/serial/arc_uart.c               |  8 +---
 drivers/tty/serial/atmel_serial.c           |  5 +--
 drivers/tty/serial/bcm63xx_uart.c           |  4 +-
 drivers/tty/serial/bfin_sport_uart.c        |  4 +-
 drivers/tty/serial/bfin_uart.c              | 10 ++---
 drivers/tty/serial/clps711x.c               |  8 +---
 drivers/tty/serial/cpm_uart/cpm_uart_core.c |  3 +-
 drivers/tty/serial/crisv10.c                | 17 +--------
 drivers/tty/serial/dz.c                     |  4 +-
 drivers/tty/serial/efm32-uart.c             |  8 +---
 drivers/tty/serial/icom.c                   |  3 +-
 drivers/tty/serial/ifx6x60.c                |  6 +--
 drivers/tty/serial/imx.c                    |  3 +-
 drivers/tty/serial/ioc3_serial.c            |  6 +--
 drivers/tty/serial/ioc4_serial.c            |  6 +--
 drivers/tty/serial/jsm/jsm_tty.c            |  2 +-
 drivers/tty/serial/kgdb_nmi.c               | 10 +----
 drivers/tty/serial/lantiq.c                 | 15 +++-----
 drivers/tty/serial/lpc32xx_hs.c             | 16 +-------
 drivers/tty/serial/m32r_sio.c               |  3 +-
 drivers/tty/serial/max3100.c                |  8 ++--
 drivers/tty/serial/max310x.c                |  8 +---
 drivers/tty/serial/mcf.c                    |  2 +-
 drivers/tty/serial/mfd.c                    | 12 +-----
 drivers/tty/serial/mpc52xx_uart.c           |  3 +-
 drivers/tty/serial/mpsc.c                   |  5 +--
 drivers/tty/serial/mrst_max3110.c           | 11 +-----
 drivers/tty/serial/msm_serial.c             |  6 +--
 drivers/tty/serial/msm_serial_hs.c          |  3 +-
 drivers/tty/serial/msm_smd_tty.c            |  2 +-
 drivers/tty/serial/mux.c                    |  6 +--
 drivers/tty/serial/mxs-auart.c              |  6 +--
 drivers/tty/serial/netx-serial.c            |  4 +-
 drivers/tty/serial/nwpserial.c              |  3 +-
 drivers/tty/serial/omap-serial.c            |  3 +-
 drivers/tty/serial/pch_uart.c               | 19 +---------
 drivers/tty/serial/pmac_zilog.c             | 30 +++++++--------
 drivers/tty/serial/pnx8xxx_uart.c           |  3 +-
 drivers/tty/serial/pxa.c                    |  3 +-
 drivers/tty/serial/sa1100.c                 |  3 +-
 drivers/tty/serial/samsung.c                |  3 +-
 drivers/tty/serial/sb1250-duart.c           |  2 +-
 drivers/tty/serial/sc26xx.c                 | 27 ++++++--------
 drivers/tty/serial/sccnxp.c                 |  8 +---
 drivers/tty/serial/serial_ks8695.c          |  3 +-
 drivers/tty/serial/serial_txx9.c            |  3 +-
 drivers/tty/serial/sh-sci.c                 | 18 +++------
 drivers/tty/serial/sirfsoc_uart.c           |  8 +---
 drivers/tty/serial/sn_console.c             | 12 ++----
 drivers/tty/serial/sunhv.c                  | 16 ++++----
 drivers/tty/serial/sunsab.c                 | 20 ++++------
 drivers/tty/serial/sunsu.c                  | 13 ++-----
 drivers/tty/serial/sunzilog.c               | 28 ++++++--------
 drivers/tty/serial/timbuart.c               |  2 +-
 drivers/tty/serial/uartlite.c               |  2 +-
 drivers/tty/serial/ucc_uart.c               |  3 +-
 drivers/tty/serial/vr41xx_siu.c             |  4 +-
 drivers/tty/serial/vt8500_serial.c          | 12 +-----
 drivers/tty/serial/xilinx_uartps.c          | 14 ++-----
 drivers/tty/serial/zs.c                     |  2 +-
 drivers/tty/synclink.c                      |  5 +--
 drivers/tty/synclink_gt.c                   |  5 +--
 drivers/tty/synclinkmp.c                    |  4 +-
 drivers/tty/tty_buffer.c                    |  8 ++--
 drivers/usb/class/cdc-acm.c                 | 10 +----
 drivers/usb/gadget/u_serial.c               |  4 +-
 drivers/usb/serial/aircable.c               |  8 +---
 drivers/usb/serial/ark3116.c                |  8 +---
 drivers/usb/serial/belkin_sa.c              |  8 +---
 drivers/usb/serial/cyberjack.c              |  9 +----
 drivers/usb/serial/cypress_m8.c             |  4 +-
 drivers/usb/serial/digi_acceleport.c        |  8 +---
 drivers/usb/serial/f81232.c                 |  8 +---
 drivers/usb/serial/ftdi_sio.c               |  8 +---
 drivers/usb/serial/garmin_gps.c             |  7 +---
 drivers/usb/serial/generic.c                |  8 +---
 drivers/usb/serial/io_edgeport.c            | 35 +++++++----------
 drivers/usb/serial/io_ti.c                  | 27 +++++---------
 drivers/usb/serial/ir-usb.c                 |  7 +---
 drivers/usb/serial/iuu_phoenix.c            |  7 +---
 drivers/usb/serial/keyspan.c                | 31 ++++-----------
 drivers/usb/serial/keyspan_pda.c            |  7 +---
 drivers/usb/serial/kl5kusb105.c             |  8 +---
 drivers/usb/serial/kobil_sct.c              |  7 +---
 drivers/usb/serial/mct_u232.c               | 11 ++----
 drivers/usb/serial/metro-usb.c              |  7 +---
 drivers/usb/serial/mos7720.c                |  7 +---
 drivers/usb/serial/mos7840.c                |  9 +----
 drivers/usb/serial/navman.c                 |  7 +---
 drivers/usb/serial/omninet.c                | 11 ++----
 drivers/usb/serial/opticon.c                |  9 +----
 drivers/usb/serial/oti6858.c                |  7 +---
 drivers/usb/serial/pl2303.c                 |  8 +---
 drivers/usb/serial/quatech2.c               | 19 +---------
 drivers/usb/serial/safe_serial.c            | 13 ++-----
 drivers/usb/serial/sierra.c                 | 17 +++------
 drivers/usb/serial/spcp8x5.c                | 18 ++++-----
 drivers/usb/serial/ssu100.c                 | 23 ++----------
 drivers/usb/serial/symbolserial.c           | 10 +----
 drivers/usb/serial/ti_usb_3410_5052.c       | 39 ++++++++-----------
 drivers/usb/serial/usb_wwan.c               | 17 +++------
 include/linux/tty.h                         |  1 -
 include/linux/tty_flip.h                    |  1 +
 net/bluetooth/rfcomm/tty.c                  | 16 +++-----
 net/irda/ircomm/ircomm_tty.c                |  4 +-
 146 files changed, 446 insertions(+), 988 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 942022a5bc86..da2f319fb71d 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -53,9 +53,8 @@ struct tty_driver *hp_simserial_driver;
 
 static struct console *console;
 
-static void receive_chars(struct tty_struct *tty)
+static void receive_chars(struct tty_port *port)
 {
-	struct tty_port *port = tty->port;
 	unsigned char ch;
 	static unsigned char seen_esc = 0;
 
@@ -85,7 +84,7 @@ static void receive_chars(struct tty_struct *tty)
 		if (tty_insert_flip_char(port, ch, TTY_NORMAL) == 0)
 			break;
 	}
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 }
 
 /*
@@ -94,18 +93,9 @@ static void receive_chars(struct tty_struct *tty)
 static irqreturn_t rs_interrupt_single(int irq, void *dev_id)
 {
 	struct serial_state *info = dev_id;
-	struct tty_struct *tty = tty_port_tty_get(&info->port);
 
-	if (!tty) {
-		printk(KERN_INFO "%s: tty=0 problem\n", __func__);
-		return IRQ_NONE;
-	}
-	/*
-	 * pretty simple in our case, because we only get interrupts
-	 * on inbound traffic
-	 */
-	receive_chars(tty);
-	tty_kref_put(tty);
+	receive_chars(&info->port);
+
 	return IRQ_HANDLED;
 }
 
diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c
index ae61bd692b4b..1dd20dbfd098 100644
--- a/arch/mn10300/kernel/mn10300-serial.c
+++ b/arch/mn10300/kernel/mn10300-serial.c
@@ -525,7 +525,6 @@ static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port)
 {
 	struct uart_icount *icount = &port->uart.icount;
 	struct tty_port *port = &port->uart.state->port;
-	struct tty_struct *tty = port->tty;
 	unsigned ix;
 	int count;
 	u8 st, ch, push, status, overrun;
@@ -538,7 +537,7 @@ static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port)
 	count = tty_buffer_request_room(port, count);
 	if (count == 0) {
 		if (!port->low_latency)
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(port);
 		return;
 	}
 
@@ -547,7 +546,7 @@ try_again:
 	ix = ACCESS_ONCE(port->rx_outp);
 	if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) {
 		if (push && !port->low_latency)
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(port);
 		return;
 	}
 
@@ -679,7 +678,7 @@ insert:
 	count--;
 	if (count <= 0) {
 		if (!port->low_latency)
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(port);
 		return;
 	}
 
diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c
index 4d92a379eb21..d5cae55195ec 100644
--- a/arch/parisc/kernel/pdc_cons.c
+++ b/arch/parisc/kernel/pdc_cons.c
@@ -138,10 +138,6 @@ static const struct tty_operations pdc_console_tty_ops = {
 static void pdc_console_poll(unsigned long unused)
 {
 	int data, count = 0;
-	struct tty_struct *tty = tty_port_tty_get(&tty_port);
-
-	if (!tty)
-		return;
 
 	while (1) {
 		data = pdc_console_poll_key(NULL);
@@ -152,9 +148,7 @@ static void pdc_console_poll(unsigned long unused)
 	}
 
 	if (count)
-		tty_flip_buffer_push(tty);
-
-	tty_kref_put(tty);
+		tty_flip_buffer_push(&tty_port);
 
 	if (pdc_cons.flags & CON_ENABLED)
 		mod_timer(&pdc_console_timer, jiffies + PDC_CONS_POLL_DELAY);
diff --git a/arch/um/drivers/chan.h b/arch/um/drivers/chan.h
index 02b5a76e98d9..78f1b8999964 100644
--- a/arch/um/drivers/chan.h
+++ b/arch/um/drivers/chan.h
@@ -27,8 +27,7 @@ struct chan {
 	void *data;
 };
 
-extern void chan_interrupt(struct line *line,
-			   struct tty_struct *tty, int irq);
+extern void chan_interrupt(struct line *line, int irq);
 extern int parse_chan_pair(char *str, struct line *line, int device,
 			   const struct chan_opts *opts, char **error_out);
 extern int write_chan(struct chan *chan, const char *buf, int len,
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 795bd8102205..15c553c239a1 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -131,11 +131,9 @@ void chan_enable_winch(struct chan *chan, struct tty_struct *tty)
 static void line_timer_cb(struct work_struct *work)
 {
 	struct line *line = container_of(work, struct line, task.work);
-	struct tty_struct *tty = tty_port_tty_get(&line->port);
 
 	if (!line->throttled)
-		chan_interrupt(line, tty, line->driver->read_irq);
-	tty_kref_put(tty);
+		chan_interrupt(line, line->driver->read_irq);
 }
 
 int enable_chan(struct line *line)
@@ -546,7 +544,7 @@ int parse_chan_pair(char *str, struct line *line, int device,
 	return 0;
 }
 
-void chan_interrupt(struct line *line, struct tty_struct *tty, int irq)
+void chan_interrupt(struct line *line, int irq)
 {
 	struct tty_port *port = &line->port;
 	struct chan *chan = line->chan_in;
@@ -570,8 +568,11 @@ void chan_interrupt(struct line *line, struct tty_struct *tty, int irq)
 		reactivate_fd(chan->fd, irq);
 	if (err == -EIO) {
 		if (chan->primary) {
-			if (tty != NULL)
+			struct tty_struct *tty = tty_port_tty_get(&line->port);
+			if (tty != NULL) {
 				tty_hangup(tty);
+				tty_kref_put(tty);
+			}
 			if (line->chan_out != chan)
 				close_one_chan(line->chan_out, 1);
 		}
@@ -580,6 +581,5 @@ void chan_interrupt(struct line *line, struct tty_struct *tty, int irq)
 			return;
 	}
  out:
-	if (tty)
-		tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 }
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 9ffc28bd4b7a..f1b38571f94e 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -19,11 +19,10 @@ static irqreturn_t line_interrupt(int irq, void *data)
 {
 	struct chan *chan = data;
 	struct line *line = chan->line;
-	struct tty_struct *tty = tty_port_tty_get(&line->port);
 
 	if (line)
-		chan_interrupt(line, tty, irq);
-	tty_kref_put(tty);
+		chan_interrupt(line, irq);
+
 	return IRQ_HANDLED;
 }
 
@@ -234,7 +233,7 @@ void line_unthrottle(struct tty_struct *tty)
 	struct line *line = tty->driver_data;
 
 	line->throttled = 0;
-	chan_interrupt(line, tty, line->driver->read_irq);
+	chan_interrupt(line, line->driver->read_irq);
 
 	/*
 	 * Maybe there is enough stuff pending that calling the interrupt
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 62447d63890c..da9866f7fecf 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -58,7 +58,8 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
 	tty->port = &serial_port;
 	spin_lock(&timer_lock);
 	if (tty->count == 1) {
-		setup_timer(&serial_timer, rs_poll, (unsigned long)tty);
+		setup_timer(&serial_timer, rs_poll,
+				(unsigned long)&serial_port);
 		mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
 	}
 	spin_unlock(&timer_lock);
@@ -97,9 +98,7 @@ static int rs_write(struct tty_struct * tty,
 
 static void rs_poll(unsigned long priv)
 {
-	struct tty_struct* tty = (struct tty_struct*) priv;
-	struct tty_port *port = tty->port;
-
+	struct tty_port *port = (struct tty_port *)priv;
 	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 	int i = 0;
 	unsigned char c;
@@ -113,7 +112,7 @@ static void rs_poll(unsigned long priv)
 	}
 
 	if (i)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(port);
 
 
 	mod_timer(&serial_timer, jiffies + SERIAL_TIMER_VALUE);
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 92dc7327c7aa..f334aec65fc7 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -886,7 +886,7 @@ static void rx_ready_hdlc(MGSLPC_INFO *info, int eom)
 	issue_command(info, CHA, CMD_RXFIFO);
 }
 
-static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty)
+static void rx_ready_async(MGSLPC_INFO *info, int tcd)
 {
 	struct tty_port *port = &info->port;
 	unsigned char data, status, flag;
@@ -894,14 +894,6 @@ static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty)
 	int work = 0;
  	struct mgsl_icount *icount = &info->icount;
 
-	if (!tty) {
-		/* tty is not available anymore */
-		issue_command(info, CHA, CMD_RXRESET);
-		if (debug_level >= DEBUG_LEVEL_ISR)
-			printk("%s(%d):rx_ready_async(tty=NULL)\n",__FILE__,__LINE__);
-		return;
-	}
-
 	if (tcd) {
 		/* early termination, get FIFO count from RBCL register */
 		fifo_count = (unsigned char)(read_reg(info, CHA+RBCL) & 0x1f);
@@ -958,7 +950,7 @@ static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty)
 	}
 
 	if (work)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(port);
 }
 
 
@@ -1218,7 +1210,7 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id)
 				if (info->params.mode == MGSL_MODE_HDLC)
 					rx_ready_hdlc(info, isr & IRQ_RXEOM);
 				else
-					rx_ready_async(info, isr & IRQ_RXEOM, tty);
+					rx_ready_async(info, isr & IRQ_RXEOM);
 			}
 
 			/* transmit IRQs */
diff --git a/drivers/ipack/devices/ipoctal.c b/drivers/ipack/devices/ipoctal.c
index 8e0ed663ba9b..ab20a0851dd2 100644
--- a/drivers/ipack/devices/ipoctal.c
+++ b/drivers/ipack/devices/ipoctal.c
@@ -133,8 +133,7 @@ static int ipoctal_get_icount(struct tty_struct *tty,
 	return 0;
 }
 
-static void ipoctal_irq_rx(struct ipoctal_channel *channel,
-			   struct tty_struct *tty, u8 sr)
+static void ipoctal_irq_rx(struct ipoctal_channel *channel, u8 sr)
 {
 	struct tty_port *port = &channel->tty_port;
 	unsigned char value;
@@ -176,7 +175,7 @@ static void ipoctal_irq_rx(struct ipoctal_channel *channel,
 		sr = ioread8(&channel->regs->r.sr);
 	} while (isr & channel->isr_rx_rdy_mask);
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 }
 
 static void ipoctal_irq_tx(struct ipoctal_channel *channel)
@@ -209,15 +208,11 @@ static void ipoctal_irq_tx(struct ipoctal_channel *channel)
 static void ipoctal_irq_channel(struct ipoctal_channel *channel)
 {
 	u8 isr, sr;
-	struct tty_struct *tty;
 
 	/* If there is no client, skip the check */
 	if (!atomic_read(&channel->open))
 		return;
 
-	tty = tty_port_tty_get(&channel->tty_port);
-	if (!tty)
-		return;
 	/* The HW is organized in pair of channels.  See which register we need
 	 * to read from */
 	isr = ioread8(&channel->block_regs->r.isr);
@@ -236,14 +231,13 @@ static void ipoctal_irq_channel(struct ipoctal_channel *channel)
 
 	/* RX data */
 	if ((isr & channel->isr_rx_rdy_mask) && (sr & SR_RX_READY))
-		ipoctal_irq_rx(channel, tty, sr);
+		ipoctal_irq_rx(channel, sr);
 
 	/* TX of each character */
 	if ((isr & channel->isr_tx_rdy_mask) && (sr & SR_TX_READY))
 		ipoctal_irq_tx(channel);
 
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&channel->tty_port);
 }
 
 static irqreturn_t ipoctal_irq_handler(void *arg)
diff --git a/drivers/isdn/gigaset/interface.c b/drivers/isdn/gigaset/interface.c
index 6dcecd40a819..0fbf4f215d86 100644
--- a/drivers/isdn/gigaset/interface.c
+++ b/drivers/isdn/gigaset/interface.c
@@ -562,16 +562,8 @@ void gigaset_if_free(struct cardstate *cs)
 void gigaset_if_receive(struct cardstate *cs,
 			unsigned char *buffer, size_t len)
 {
-	struct tty_struct *tty = tty_port_tty_get(&cs->port);
-
-	if (tty == NULL) {
-		gig_dbg(DEBUG_IF, "receive on closed device");
-		return;
-	}
-
 	tty_insert_flip_string(&cs->port, buffer, len);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&cs->port);
 }
 EXPORT_SYMBOL_GPL(gigaset_if_receive);
 
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index 9bb9986659e4..d8a7d8323414 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -63,16 +63,11 @@ isdn_tty_try_read(modem_info *info, struct sk_buff *skb)
 	struct tty_port *port = &info->port;
 	int c;
 	int len;
-	struct tty_struct *tty;
 	char last;
 
 	if (!info->online)
 		return 0;
 
-	tty = port->tty;
-	if (!tty)
-		return 0;
-
 	if (!(info->mcr & UART_MCR_RTS))
 		return 0;
 
@@ -110,7 +105,7 @@ isdn_tty_try_read(modem_info *info, struct sk_buff *skb)
 		tty_insert_flip_char(port, last, 0xFF);
 	else
 		tty_insert_flip_char(port, last, TTY_NORMAL);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 	kfree_skb(skb);
 
 	return 1;
@@ -127,7 +122,6 @@ isdn_tty_readmodem(void)
 	int midx;
 	int i;
 	int r;
-	struct tty_struct *tty;
 	modem_info *info;
 
 	for (i = 0; i < ISDN_MAX_CHANNELS; i++) {
@@ -145,20 +139,21 @@ isdn_tty_readmodem(void)
 		if ((info->vonline & 1) && (info->emu.vpar[1]))
 			isdn_audio_eval_silence(info);
 #endif
-		tty = info->port.tty;
-		if (tty) {
-			if (info->mcr & UART_MCR_RTS) {
-				/* CISCO AsyncPPP Hack */
-				if (!(info->emu.mdmreg[REG_CPPP] & BIT_CPPP))
-					r = isdn_readbchan_tty(info->isdn_driver, info->isdn_channel, &info->port, 0);
-				else
-					r = isdn_readbchan_tty(info->isdn_driver, info->isdn_channel, &info->port, 1);
-				if (r)
-					tty_flip_buffer_push(tty);
-			} else
-				r = 1;
+		if (info->mcr & UART_MCR_RTS) {
+			/* CISCO AsyncPPP Hack */
+			if (!(info->emu.mdmreg[REG_CPPP] & BIT_CPPP))
+				r = isdn_readbchan_tty(info->isdn_driver,
+						info->isdn_channel,
+						&info->port, 0);
+			else
+				r = isdn_readbchan_tty(info->isdn_driver,
+						info->isdn_channel,
+						&info->port, 1);
+			if (r)
+				tty_flip_buffer_push(&info->port);
 		} else
 			r = 1;
+
 		if (r) {
 			info->rcvsched = 0;
 			resched = 1;
@@ -2230,7 +2225,6 @@ isdn_tty_stat_callback(int i, isdn_ctrl *c)
 void
 isdn_tty_at_cout(char *msg, modem_info *info)
 {
-	struct tty_struct *tty;
 	struct tty_port *port = &info->port;
 	atemu *m = &info->emu;
 	char *p;
@@ -2248,8 +2242,7 @@ isdn_tty_at_cout(char *msg, modem_info *info)
 	l = strlen(msg);
 
 	spin_lock_irqsave(&info->readlock, flags);
-	tty = port->tty;
-	if ((port->flags & ASYNC_CLOSING) || (!tty)) {
+	if (port->flags & ASYNC_CLOSING) {
 		spin_unlock_irqrestore(&info->readlock, flags);
 		return;
 	}
@@ -2301,7 +2294,7 @@ isdn_tty_at_cout(char *msg, modem_info *info)
 
 	} else {
 		spin_unlock_irqrestore(&info->readlock, flags);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(port);
 	}
 }
 
diff --git a/drivers/mmc/card/sdio_uart.c b/drivers/mmc/card/sdio_uart.c
index 894078be0b96..c931dfe6a59c 100644
--- a/drivers/mmc/card/sdio_uart.c
+++ b/drivers/mmc/card/sdio_uart.c
@@ -381,7 +381,6 @@ static void sdio_uart_stop_rx(struct sdio_uart_port *port)
 static void sdio_uart_receive_chars(struct sdio_uart_port *port,
 				    unsigned int *status)
 {
-	struct tty_struct *tty = tty_port_tty_get(&port->port);
 	unsigned int ch, flag;
 	int max_count = 256;
 
@@ -418,24 +417,19 @@ static void sdio_uart_receive_chars(struct sdio_uart_port *port,
 		}
 
 		if ((*status & port->ignore_status_mask & ~UART_LSR_OE) == 0)
-			if (tty)
-				tty_insert_flip_char(&port->port, ch, flag);
+			tty_insert_flip_char(&port->port, ch, flag);
 
 		/*
 		 * Overrun is special.  Since it's reported immediately,
 		 * it doesn't affect the current character.
 		 */
 		if (*status & ~port->ignore_status_mask & UART_LSR_OE)
-			if (tty)
-				tty_insert_flip_char(&port->port, 0,
-						TTY_OVERRUN);
+			tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 
 		*status = sdio_in(port, UART_LSR);
 	} while ((*status & UART_LSR_DR) && (max_count-- > 0));
-	if (tty) {
-		tty_flip_buffer_push(tty);
-		tty_kref_put(tty);
-	}
+
+	tty_flip_buffer_push(&port->port);
 }
 
 static void sdio_uart_transmit_chars(struct sdio_uart_port *port)
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index d235ca07548f..f902a14da88c 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2035,24 +2035,23 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial)
 	tty = tty_port_tty_get(&serial->port);
 
 	/* Push data to tty */
-	if (tty) {
-		write_length_remaining = urb->actual_length -
-			serial->curr_rx_urb_offset;
-		D1("data to push to tty");
-		while (write_length_remaining) {
-			if (test_bit(TTY_THROTTLED, &tty->flags)) {
-				tty_kref_put(tty);
-				return -1;
-			}
-			curr_write_len = tty_insert_flip_string(&serial->port,
-				urb->transfer_buffer + serial->curr_rx_urb_offset,
-				write_length_remaining);
-			serial->curr_rx_urb_offset += curr_write_len;
-			write_length_remaining -= curr_write_len;
-			tty_flip_buffer_push(tty);
+	write_length_remaining = urb->actual_length -
+		serial->curr_rx_urb_offset;
+	D1("data to push to tty");
+	while (write_length_remaining) {
+		if (tty && test_bit(TTY_THROTTLED, &tty->flags)) {
+			tty_kref_put(tty);
+			return -1;
 		}
-		tty_kref_put(tty);
+		curr_write_len = tty_insert_flip_string(&serial->port,
+			urb->transfer_buffer + serial->curr_rx_urb_offset,
+			write_length_remaining);
+		serial->curr_rx_urb_offset += curr_write_len;
+		write_length_remaining -= curr_write_len;
+		tty_flip_buffer_push(&serial->port);
 	}
+	tty_kref_put(tty);
+
 	if (write_length_remaining == 0) {
 		serial->curr_rx_urb_offset = 0;
 		serial->rx_urb_filled[hso_urb_to_index(serial, urb)] = 0;
diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 41b75c5ae0d5..2f58e9fde156 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -413,7 +413,7 @@ static void raw3215_irq(struct ccw_device *cdev, unsigned long intparm,
 			case CTRLCHAR_CTRL:
 				tty_insert_flip_char(&raw->port, cchar,
 						TTY_NORMAL);
-				tty_flip_buffer_push(tty);
+				tty_flip_buffer_push(&raw->port);
 				break;
 
 			case CTRLCHAR_NONE:
@@ -427,7 +427,7 @@ static void raw3215_irq(struct ccw_device *cdev, unsigned long intparm,
 					count -= 2;
 				tty_insert_flip_string(&raw->port, raw->inbuf,
 						count);
-				tty_flip_buffer_push(tty);
+				tty_flip_buffer_push(&raw->port);
 				break;
 			}
 		} else if (req->type == RAW3215_WRITE) {
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 19b7c516c07d..14b4cb8abcc8 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -343,7 +343,7 @@ sclp_tty_input(unsigned char* buf, unsigned int count)
 		break;
 	case CTRLCHAR_CTRL:
 		tty_insert_flip_char(&sclp_port, cchar, TTY_NORMAL);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&sclp_port);
 		break;
 	case CTRLCHAR_NONE:
 		/* send (normal) input to line discipline */
@@ -355,7 +355,7 @@ sclp_tty_input(unsigned char* buf, unsigned int count)
 			tty_insert_flip_char(&sclp_port, '\n', TTY_NORMAL);
 		} else
 			tty_insert_flip_string(&sclp_port, buf, count - 2);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&sclp_port);
 		break;
 	}
 	tty_kref_put(tty);
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index 0eca99b98712..6c92f62623be 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -461,14 +461,9 @@ sclp_vt220_write(struct tty_struct *tty, const unsigned char *buf, int count)
 static void
 sclp_vt220_receiver_fn(struct evbuf_header *evbuf)
 {
-	struct tty_struct *tty = tty_port_tty_get(&sclp_vt220_port);
 	char *buffer;
 	unsigned int count;
 
-	/* Ignore input if device is not open */
-	if (tty == NULL)
-		return;
-
 	buffer = (char *) ((addr_t) evbuf + sizeof(struct evbuf_header));
 	count = evbuf->length - sizeof(struct evbuf_header);
 
@@ -481,10 +476,9 @@ sclp_vt220_receiver_fn(struct evbuf_header *evbuf)
 		buffer++;
 		count--;
 		tty_insert_flip_string(&sclp_vt220_port, buffer, count);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&sclp_vt220_port);
 		break;
 	}
-	tty_kref_put(tty);
 }
 
 /*
diff --git a/drivers/staging/ccg/u_serial.c b/drivers/staging/ccg/u_serial.c
index 7df2c02d1137..b10947ae0ac5 100644
--- a/drivers/staging/ccg/u_serial.c
+++ b/drivers/staging/ccg/u_serial.c
@@ -491,12 +491,8 @@ static void gs_rx_push(unsigned long _port)
 
 		req = list_first_entry(queue, struct usb_request, list);
 
-		/* discard data if tty was closed */
-		if (!tty)
-			goto recycle;
-
 		/* leave data queued if tty was rx throttled */
-		if (test_bit(TTY_THROTTLED, &tty->flags))
+		if (tty && test_bit(TTY_THROTTLED, &tty->flags))
 			break;
 
 		switch (req->status) {
@@ -542,7 +538,6 @@ static void gs_rx_push(unsigned long _port)
 			}
 			port->n_read = 0;
 		}
-recycle:
 		list_move(&req->list, &port->read_pool);
 		port->read_started--;
 	}
@@ -550,8 +545,8 @@ recycle:
 	/* Push from tty to ldisc; without low_latency set this is handled by
 	 * a workqueue, so we won't get callbacks and can hold port_lock
 	 */
-	if (tty && do_push)
-		tty_flip_buffer_push(tty);
+	if (do_push)
+		tty_flip_buffer_push(&port->port);
 
 
 	/* We want our data queue to become empty ASAP, keeping data
diff --git a/drivers/staging/dgrp/dgrp_net_ops.c b/drivers/staging/dgrp/dgrp_net_ops.c
index e618a667d84c..4c7abfabf197 100644
--- a/drivers/staging/dgrp/dgrp_net_ops.c
+++ b/drivers/staging/dgrp/dgrp_net_ops.c
@@ -234,7 +234,7 @@ static void dgrp_input(struct ch_struct *ch)
 
 		tty_insert_flip_string_flags(&ch->port, myflipbuf,
 					     myflipflagbuf, len);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&ch->port);
 
 		ch->ch_rxcount += len;
 	}
@@ -2958,7 +2958,7 @@ check_query:
 
 				tty_buffer_request_room(&ch->port, 1);
 				tty_insert_flip_char(&ch->port, 0, TTY_BREAK);
-				tty_flip_buffer_push(ch->ch_tun.un_tty);
+				tty_flip_buffer_push(&ch->port);
 
 			}
 
diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c
index a2a0c43dec1c..b403393c49c3 100644
--- a/drivers/staging/fwserial/fwserial.c
+++ b/drivers/staging/fwserial/fwserial.c
@@ -489,16 +489,11 @@ static void fwtty_do_hangup(struct work_struct *work)
 static void fwtty_emit_breaks(struct work_struct *work)
 {
 	struct fwtty_port *port = to_port(to_delayed_work(work), emit_breaks);
-	struct tty_struct *tty;
 	static const char buf[16];
 	unsigned long now = jiffies;
 	unsigned long elapsed = now - port->break_last;
 	int n, t, c, brk = 0;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	/* generate breaks at the line rate (but at least 1) */
 	n = (elapsed * port->cps) / HZ + 1;
 	port->break_last = now;
@@ -514,9 +509,7 @@ static void fwtty_emit_breaks(struct work_struct *work)
 		if (c < t)
 			break;
 	}
-	tty_flip_buffer_push(tty);
-
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 
 	if (port->mstatus & (UART_LSR_BI << 24))
 		schedule_delayed_work(&port->emit_breaks, FREQ_BREAKS);
@@ -530,10 +523,6 @@ static void fwtty_pushrx(struct work_struct *work)
 	struct buffered_rx *buf, *next;
 	int n, c = 0;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	spin_lock_bh(&port->lock);
 	list_for_each_entry_safe(buf, next, &port->buf_list, list) {
 		n = tty_insert_flip_string_fixed_flag(&port->port, buf->data,
@@ -545,7 +534,11 @@ static void fwtty_pushrx(struct work_struct *work)
 				memmove(buf->data, buf->data + n, buf->n - n);
 				buf->n -= n;
 			}
-			__fwtty_throttle(port, tty);
+			tty = tty_port_tty_get(&port->port);
+			if (tty) {
+				__fwtty_throttle(port, tty);
+				tty_kref_put(tty);
+			}
 			break;
 		} else {
 			list_del(&buf->list);
@@ -553,13 +546,11 @@ static void fwtty_pushrx(struct work_struct *work)
 		}
 	}
 	if (c > 0)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 
 	if (list_empty(&port->buf_list))
 		clear_bit(BUFFERING_RX, &port->flags);
 	spin_unlock_bh(&port->lock);
-
-	tty_kref_put(tty);
 }
 
 static int fwtty_buffer_rx(struct fwtty_port *port, unsigned char *d, size_t n)
@@ -594,10 +585,6 @@ static int fwtty_rx(struct fwtty_port *port, unsigned char *data, size_t len)
 	unsigned lsr;
 	int err = 0;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return -ENOENT;
-
 	fwtty_dbg(port, "%d", n);
 	profile_size_distrib(port->stats.reads, n);
 
@@ -634,16 +621,20 @@ static int fwtty_rx(struct fwtty_port *port, unsigned char *data, size_t len)
 		c = tty_insert_flip_string_fixed_flag(&port->port, data,
 				TTY_NORMAL, n);
 		if (c > 0)
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(&port->port);
 		n -= c;
 
 		if (n) {
 			/* start buffering and throttling */
 			n -= fwtty_buffer_rx(port, &data[c], n);
 
-			spin_lock_bh(&port->lock);
-			__fwtty_throttle(port, tty);
-			spin_unlock_bh(&port->lock);
+			tty = tty_port_tty_get(&port->port);
+			if (tty) {
+				spin_lock_bh(&port->lock);
+				__fwtty_throttle(port, tty);
+				spin_unlock_bh(&port->lock);
+				tty_kref_put(tty);
+			}
 		}
 	} else
 		n -= fwtty_buffer_rx(port, data, n);
@@ -654,8 +645,6 @@ static int fwtty_rx(struct fwtty_port *port, unsigned char *data, size_t len)
 	}
 
 out:
-	tty_kref_put(tty);
-
 	port->icount.rx += len;
 	port->stats.lost += n;
 	return err;
diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c
index 14965662d09c..df29a3de29f2 100644
--- a/drivers/staging/serqt_usb2/serqt_usb2.c
+++ b/drivers/staging/serqt_usb2/serqt_usb2.c
@@ -290,8 +290,7 @@ static void qt_interrupt_callback(struct urb *urb)
 	/* FIXME */
 }
 
-static void qt_status_change_check(struct tty_struct *tty,
-				   struct urb *urb,
+static void qt_status_change_check(struct urb *urb,
 				   struct quatech_port *qt_port,
 				   struct usb_serial_port *port)
 {
@@ -348,7 +347,7 @@ static void qt_status_change_check(struct tty_struct *tty,
 			tty_insert_flip_char(&port->port, data[i], TTY_NORMAL);
 
 	}
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static void qt_read_bulk_callback(struct urb *urb)
@@ -411,7 +410,7 @@ static void qt_read_bulk_callback(struct urb *urb)
 	}
 
 	if (urb->actual_length)
-		qt_status_change_check(tty, urb, qt_port, port);
+		qt_status_change_check(urb, qt_port, port);
 
 	/* Continue trying to always read  */
 	usb_fill_bulk_urb(port->read_urb, serial->dev,
@@ -427,7 +426,7 @@ static void qt_read_bulk_callback(struct urb *urb)
 			__func__, result);
 	else {
 		if (urb->actual_length) {
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(&port->port);
 			tty_schedule_flip(tty);
 		}
 	}
diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index 2d1357acbc23..4c7d70172193 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -251,7 +251,6 @@ static void receive_chars(struct serial_state *info)
 {
         int status;
 	int serdatr;
-	struct tty_struct *tty = info->tport.tty;
 	unsigned char ch, flag;
 	struct	async_icount *icount;
 	int oe = 0;
@@ -314,7 +313,7 @@ static void receive_chars(struct serial_state *info)
 #endif
 	    flag = TTY_BREAK;
 	    if (info->tport.flags & ASYNC_SAK)
-	      do_SAK(tty);
+	      do_SAK(info->tport.tty);
 	  } else if (status & UART_LSR_PE)
 	    flag = TTY_PARITY;
 	  else if (status & UART_LSR_FE)
@@ -331,7 +330,7 @@ static void receive_chars(struct serial_state *info)
 	tty_insert_flip_char(&info->tport, ch, flag);
 	if (oe == 1)
 		tty_insert_flip_char(&info->tport, 0, TTY_OVERRUN);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&info->tport);
 out:
 	return;
 }
diff --git a/drivers/tty/bfin_jtag_comm.c b/drivers/tty/bfin_jtag_comm.c
index 143c38579cb5..a93a424873fa 100644
--- a/drivers/tty/bfin_jtag_comm.c
+++ b/drivers/tty/bfin_jtag_comm.c
@@ -95,18 +95,16 @@ bfin_jc_emudat_manager(void *arg)
 
 		/* if incoming data is ready, eat it */
 		if (bfin_read_DBGSTAT() & EMUDIF) {
-			if (tty != NULL) {
-				uint32_t emudat = bfin_read_emudat();
-				if (inbound_len == 0) {
-					pr_debug("incoming length: 0x%08x\n", emudat);
-					inbound_len = emudat;
-				} else {
-					size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
-					pr_debug("  incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
-					inbound_len -= num_chars;
-					tty_insert_flip_string(&port, (unsigned char *)&emudat, num_chars);
-					tty_flip_buffer_push(tty);
-				}
+			uint32_t emudat = bfin_read_emudat();
+			if (inbound_len == 0) {
+				pr_debug("incoming length: 0x%08x\n", emudat);
+				inbound_len = emudat;
+			} else {
+				size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
+				pr_debug("  incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
+				inbound_len -= num_chars;
+				tty_insert_flip_string(&port, (unsigned char *)&emudat, num_chars);
+				tty_flip_buffer_push(&port);
 			}
 		}
 
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 5164f9a57017..ed92622b8949 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -371,16 +371,11 @@ console_initcall(ehv_bc_console_init);
 static irqreturn_t ehv_bc_tty_rx_isr(int irq, void *data)
 {
 	struct ehv_bc_data *bc = data;
-	struct tty_struct *ttys = tty_port_tty_get(&bc->port);
 	unsigned int rx_count, tx_count, len;
 	int count;
 	char buffer[EV_BYTE_CHANNEL_MAX_BYTES];
 	int ret;
 
-	/* ttys could be NULL during a hangup */
-	if (!ttys)
-		return IRQ_HANDLED;
-
 	/* Find out how much data needs to be read, and then ask the TTY layer
 	 * if it can handle that much.  We want to ensure that every byte we
 	 * read from the byte channel will be accepted by the TTY layer.
@@ -422,9 +417,7 @@ static irqreturn_t ehv_bc_tty_rx_isr(int irq, void *data)
 	}
 
 	/* Tell the tty layer that we're done. */
-	tty_flip_buffer_push(ttys);
-
-	tty_kref_put(ttys);
+	tty_flip_buffer_push(&bc->port);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c
index 8c2fe3a0e091..eb255e807c06 100644
--- a/drivers/tty/hvc/hvc_console.c
+++ b/drivers/tty/hvc/hvc_console.c
@@ -691,7 +691,7 @@ int hvc_poll(struct hvc_struct *hp)
 		   a minimum for performance. */
 		timeout = MIN_TIMEOUT;
 
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&hp->port);
 	}
 	tty_kref_put(tty);
 
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index 7bfc0a924b2f..1956593ee89d 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -623,7 +623,7 @@ static int hvcs_io(struct hvcs_struct *hvcsd)
 	spin_unlock_irqrestore(&hvcsd->lock, flags);
 	/* This is synch because tty->low_latency == 1 */
 	if(got)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&hvcsd->port);
 
 	if (!got) {
 		/* Do this _after_ the flip_buffer_push */
diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c
index 1f528b8ebf5f..dc591290120b 100644
--- a/drivers/tty/hvc/hvsi.c
+++ b/drivers/tty/hvc/hvsi.c
@@ -465,7 +465,7 @@ static int hvsi_load_chunk(struct hvsi_struct *hp, struct tty_struct *tty,
 	compact_inbuf(hp, packet);
 
 	if (flip)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&hp->port);
 
 	return 1;
 }
@@ -511,7 +511,7 @@ static irqreturn_t hvsi_interrupt(int irq, void *arg)
 		/* we weren't hung up and we weren't throttled, so we can
 		 * deliver the rest now */
 		hvsi_send_overflow(hp);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&hp->port);
 	}
 	spin_unlock_irqrestore(&hp->lock, flags);
 
@@ -998,7 +998,7 @@ static void hvsi_unthrottle(struct tty_struct *tty)
 	spin_lock_irqsave(&hp->lock, flags);
 	if (hp->n_throttle) {
 		hvsi_send_overflow(hp);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&hp->port);
 	}
 	spin_unlock_irqrestore(&hp->lock, flags);
 
diff --git a/drivers/tty/ipwireless/tty.c b/drivers/tty/ipwireless/tty.c
index c43da7445432..8fd72ff9436e 100644
--- a/drivers/tty/ipwireless/tty.c
+++ b/drivers/tty/ipwireless/tty.c
@@ -160,15 +160,9 @@ static void ipw_close(struct tty_struct *linux_tty, struct file *filp)
 void ipwireless_tty_received(struct ipw_tty *tty, unsigned char *data,
 			unsigned int length)
 {
-	struct tty_struct *linux_tty;
 	int work = 0;
 
 	mutex_lock(&tty->ipw_tty_mutex);
-	linux_tty = tty->port.tty;
-	if (linux_tty == NULL) {
-		mutex_unlock(&tty->ipw_tty_mutex);
-		return;
-	}
 
 	if (!tty->port.count) {
 		mutex_unlock(&tty->ipw_tty_mutex);
@@ -187,7 +181,7 @@ void ipwireless_tty_received(struct ipw_tty *tty, unsigned char *data,
 	 * This may sleep if ->low_latency is set
 	 */
 	if (work)
-		tty_flip_buffer_push(linux_tty);
+		tty_flip_buffer_push(&tty->port);
 }
 
 static void ipw_write_packet_sent_callback(void *callback_data,
diff --git a/drivers/tty/isicom.c b/drivers/tty/isicom.c
index c70144f55fc0..858291ca889c 100644
--- a/drivers/tty/isicom.c
+++ b/drivers/tty/isicom.c
@@ -637,7 +637,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id)
 			tty_insert_flip_char(&port->port, 0, TTY_BREAK);
 			if (port->port.flags & ASYNC_SAK)
 				do_SAK(tty);
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(&port->port);
 			break;
 
 		case 2:	/* Statistics		 */
@@ -671,7 +671,7 @@ static irqreturn_t isicom_interrupt(int irq, void *dev_id)
 				byte_count -= 2;
 			}
 		}
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
 	outw(0x0000, base+0x04); /* enable interrupts */
 	spin_unlock(&card->card_lock);
diff --git a/drivers/tty/mxser.c b/drivers/tty/mxser.c
index e9cdfdfe06e9..ad34a202a34d 100644
--- a/drivers/tty/mxser.c
+++ b/drivers/tty/mxser.c
@@ -2145,7 +2145,7 @@ end_intr:
 	 * recursive locking.
 	 */
 	spin_unlock(&port->slock);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->port);
 	spin_lock(&port->slock);
 }
 
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 4a3342d21c8f..d84dcfeadce3 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -1141,7 +1141,6 @@ static void gsm_control_modem(struct gsm_mux *gsm, u8 *data, int clen)
 static void gsm_control_rls(struct gsm_mux *gsm, u8 *data, int clen)
 {
 	struct tty_port *port;
-	struct tty_struct *tty;
 	unsigned int addr = 0 ;
 	u8 bits;
 	int len = clen;
@@ -1174,12 +1173,8 @@ static void gsm_control_rls(struct gsm_mux *gsm, u8 *data, int clen)
 	if (bits & 8)
 		tty_insert_flip_char(port, 0, TTY_FRAME);
 
-	/* See if we have an uplink tty */
-	tty = tty_port_tty_get(port);
-	if (tty) {
-		tty_flip_buffer_push(tty);
-		tty_kref_put(tty);
-	}
+	tty_flip_buffer_push(port);
+
 	gsm_control_reply(gsm, CMD_RLS, data, clen);
 }
 
@@ -1552,36 +1547,37 @@ static void gsm_dlci_data(struct gsm_dlci *dlci, u8 *data, int clen)
 {
 	/* krefs .. */
 	struct tty_port *port = &dlci->port;
-	struct tty_struct *tty = tty_port_tty_get(port);
+	struct tty_struct *tty;
 	unsigned int modem = 0;
 	int len = clen;
 
 	if (debug & 16)
-		pr_debug("%d bytes for tty %p\n", len, tty);
-	if (tty) {
-		switch (dlci->adaption)  {
-		/* Unsupported types */
-		/* Packetised interruptible data */
-		case 4:
-			break;
-		/* Packetised uininterruptible voice/data */
-		case 3:
-			break;
-		/* Asynchronous serial with line state in each frame */
-		case 2:
-			while (gsm_read_ea(&modem, *data++) == 0) {
-				len--;
-				if (len == 0)
-					return;
-			}
+		pr_debug("%d bytes for tty\n", len);
+	switch (dlci->adaption)  {
+	/* Unsupported types */
+	/* Packetised interruptible data */
+	case 4:
+		break;
+	/* Packetised uininterruptible voice/data */
+	case 3:
+		break;
+	/* Asynchronous serial with line state in each frame */
+	case 2:
+		while (gsm_read_ea(&modem, *data++) == 0) {
+			len--;
+			if (len == 0)
+				return;
+		}
+		tty = tty_port_tty_get(port);
+		if (tty) {
 			gsm_process_modem(tty, dlci, modem, clen);
-		/* Line state will go via DLCI 0 controls only */
-		case 1:
-		default:
-			tty_insert_flip_string(port, data, len);
-			tty_flip_buffer_push(tty);
+			tty_kref_put(tty);
 		}
-		tty_kref_put(tty);
+	/* Line state will go via DLCI 0 controls only */
+	case 1:
+	default:
+		tty_insert_flip_string(port, data, len);
+		tty_flip_buffer_push(port);
 	}
 }
 
diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c
index 941fe8060ea5..afdd7732d925 100644
--- a/drivers/tty/nozomi.c
+++ b/drivers/tty/nozomi.c
@@ -1272,15 +1272,11 @@ static irqreturn_t interrupt_handler(int irq, void *dev_id)
 
 exit_handler:
 	spin_unlock(&dc->spin_mutex);
-	for (a = 0; a < NOZOMI_MAX_PORTS; a++) {
-		struct tty_struct *tty;
-		if (test_and_clear_bit(a, &dc->flip)) {
-			tty = tty_port_tty_get(&dc->port[a].port);
-			if (tty)
-				tty_flip_buffer_push(tty);
-			tty_kref_put(tty);
-		}
-	}
+
+	for (a = 0; a < NOZOMI_MAX_PORTS; a++)
+		if (test_and_clear_bit(a, &dc->flip))
+			tty_flip_buffer_push(&dc->port[a].port);
+
 	return IRQ_HANDLED;
 none:
 	spin_unlock(&dc->spin_mutex);
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 3c285d398f38..32d027c303aa 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -123,7 +123,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
 		c = tty_insert_flip_string(to->port, buf, c);
 		/* And shovel */
 		if (c) {
-			tty_flip_buffer_push(to);
+			tty_flip_buffer_push(to->port);
 			tty_wakeup(tty);
 		}
 	}
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index 5848a767001a..8073cc0dff59 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -315,9 +315,8 @@ static inline int rocket_paranoia_check(struct r_port *info,
  *  that receive data is present on a serial port.  Pulls data from FIFO, moves it into the 
  *  tty layer.  
  */
-static void rp_do_receive(struct r_port *info,
-			  struct tty_struct *tty,
-			  CHANNEL_t * cp, unsigned int ChanStatus)
+static void rp_do_receive(struct r_port *info, CHANNEL_t *cp,
+		unsigned int ChanStatus)
 {
 	unsigned int CharNStat;
 	int ToRecv, wRecv, space;
@@ -416,7 +415,7 @@ static void rp_do_receive(struct r_port *info,
 			cbuf[ToRecv - 1] = sInB(sGetTxRxDataIO(cp));
 	}
 	/*  Push the data up to the tty layer */
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&info->port);
 }
 
 /*
@@ -495,7 +494,6 @@ static void rp_do_transmit(struct r_port *info)
 static void rp_handle_port(struct r_port *info)
 {
 	CHANNEL_t *cp;
-	struct tty_struct *tty;
 	unsigned int IntMask, ChanStatus;
 
 	if (!info)
@@ -506,12 +504,7 @@ static void rp_handle_port(struct r_port *info)
 				"info->flags & NOT_INIT\n");
 		return;
 	}
-	tty = tty_port_tty_get(&info->port);
-	if (!tty) {
-		printk(KERN_WARNING "rp: WARNING: rp_handle_port called with "
-				"tty==NULL\n");
-		return;
-	}
+
 	cp = &info->channel;
 
 	IntMask = sGetChanIntID(cp) & info->intmask;
@@ -520,7 +513,7 @@ static void rp_handle_port(struct r_port *info)
 #endif
 	ChanStatus = sGetChanStatus(cp);
 	if (IntMask & RXF_TRIG) {	/* Rx FIFO trigger level */
-		rp_do_receive(info, tty, cp, ChanStatus);
+		rp_do_receive(info, cp, ChanStatus);
 	}
 	if (IntMask & DELTA_CD) {	/* CD change  */
 #if (defined(ROCKET_DEBUG_OPEN) || defined(ROCKET_DEBUG_INTR) || defined(ROCKET_DEBUG_HANGUP))
@@ -528,10 +521,15 @@ static void rp_handle_port(struct r_port *info)
 		       (ChanStatus & CD_ACT) ? "on" : "off");
 #endif
 		if (!(ChanStatus & CD_ACT) && info->cd_status) {
+			struct tty_struct *tty;
 #ifdef ROCKET_DEBUG_HANGUP
 			printk(KERN_INFO "CD drop, calling hangup.\n");
 #endif
-			tty_hangup(tty);
+			tty = tty_port_tty_get(&info->port);
+			if (tty) {
+				tty_hangup(tty);
+				tty_kref_put(tty);
+			}
 		}
 		info->cd_status = (ChanStatus & CD_ACT) ? 1 : 0;
 		wake_up_interruptible(&info->port.open_wait);
@@ -544,7 +542,6 @@ static void rp_handle_port(struct r_port *info)
 		printk(KERN_INFO "DSR change...\n");
 	}
 #endif
-	tty_kref_put(tty);
 }
 
 /*
diff --git a/drivers/tty/serial/21285.c b/drivers/tty/serial/21285.c
index a44345a2dbb4..c7e8b60b6177 100644
--- a/drivers/tty/serial/21285.c
+++ b/drivers/tty/serial/21285.c
@@ -85,7 +85,6 @@ static void serial21285_enable_ms(struct uart_port *port)
 static irqreturn_t serial21285_rx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, flag, rxs, max_count = 256;
 
 	status = *CSR_UARTFLG;
@@ -115,7 +114,7 @@ static irqreturn_t serial21285_rx_chars(int irq, void *dev_id)
 
 		status = *CSR_UARTFLG;
 	}
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->state->port);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c
index 5fb6577b94dc..0d771ec16750 100644
--- a/drivers/tty/serial/8250/8250.c
+++ b/drivers/tty/serial/8250/8250.c
@@ -1323,7 +1323,6 @@ unsigned char
 serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr)
 {
 	struct uart_port *port = &up->port;
-	struct tty_struct *tty = port->state->port.tty;
 	unsigned char ch;
 	int max_count = 256;
 	char flag;
@@ -1388,7 +1387,7 @@ ignore_char:
 		lsr = serial_in(up, UART_LSR);
 	} while ((lsr & (UART_LSR_DR | UART_LSR_BI)) && (max_count-- > 0));
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->state->port);
 	spin_lock(&port->lock);
 	return lsr;
 }
diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index 872f14ae43d2..84b90fd48063 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -139,7 +139,7 @@ static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp)
 		uart_insert_char(port, 0, 0, ch, flag);
 	}
 
-	tty_flip_buffer_push(port->state->port.tty);
+	tty_flip_buffer_push(&port->state->port);
 }
 
 static void altera_jtaguart_tx_chars(struct altera_jtaguart *pp)
diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 684a0808e1c7..e133c8814bb5 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c
@@ -231,7 +231,7 @@ static void altera_uart_rx_chars(struct altera_uart *pp)
 				 flag);
 	}
 
-	tty_flip_buffer_push(port->state->port.tty);
+	tty_flip_buffer_push(&port->state->port);
 }
 
 static void altera_uart_tx_chars(struct altera_uart *pp)
diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c
index 22317dd16474..c36840519527 100644
--- a/drivers/tty/serial/amba-pl010.c
+++ b/drivers/tty/serial/amba-pl010.c
@@ -116,7 +116,6 @@ static void pl010_enable_ms(struct uart_port *port)
 
 static void pl010_rx_chars(struct uart_amba_port *uap)
 {
-	struct tty_struct *tty = uap->port.state->port.tty;
 	unsigned int status, ch, flag, rsr, max_count = 256;
 
 	status = readb(uap->port.membase + UART01x_FR);
@@ -165,7 +164,7 @@ static void pl010_rx_chars(struct uart_amba_port *uap)
 		status = readb(uap->port.membase + UART01x_FR);
 	}
 	spin_unlock(&uap->port.lock);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&uap->port.state->port);
 	spin_lock(&uap->port.lock);
 }
 
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index e1257d17f5f0..3ea5408fcbeb 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -699,7 +699,6 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 			       bool readfifo)
 {
 	struct tty_port *port = &uap->port.state->port;
-	struct tty_struct *tty = port->tty;
 	struct pl011_sgbuf *sgbuf = use_buf_b ?
 		&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
 	struct device *dev = uap->dmarx.chan->device->dev;
@@ -754,7 +753,7 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 	dev_vdbg(uap->port.dev,
 		 "Took %d chars from DMA buffer and %d chars from the FIFO\n",
 		 dma_count, fifotaken);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 	spin_lock(&uap->port.lock);
 }
 
@@ -1076,12 +1075,10 @@ static void pl011_enable_ms(struct uart_port *port)
 
 static void pl011_rx_chars(struct uart_amba_port *uap)
 {
-	struct tty_struct *tty = uap->port.state->port.tty;
-
 	pl011_fifo_to_tty(uap);
 
 	spin_unlock(&uap->port.lock);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&uap->port.state->port);
 	/*
 	 * If we were temporarily out of DMA mode for a while,
 	 * attempt to switch back to DMA mode again.
diff --git a/drivers/tty/serial/apbuart.c b/drivers/tty/serial/apbuart.c
index 59ae2b53e765..6331464d9101 100644
--- a/drivers/tty/serial/apbuart.c
+++ b/drivers/tty/serial/apbuart.c
@@ -78,7 +78,6 @@ static void apbuart_enable_ms(struct uart_port *port)
 
 static void apbuart_rx_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, rsr, flag;
 	unsigned int max_chars = port->fifosize;
 
@@ -126,7 +125,7 @@ static void apbuart_rx_chars(struct uart_port *port)
 		status = UART_GET_STATUS(port);
 	}
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->state->port);
 }
 
 static void apbuart_tx_chars(struct uart_port *port)
diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
index 6ca5dd615f9e..27f20c57abed 100644
--- a/drivers/tty/serial/ar933x_uart.c
+++ b/drivers/tty/serial/ar933x_uart.c
@@ -298,10 +298,8 @@ static void ar933x_uart_set_termios(struct uart_port *port,
 static void ar933x_uart_rx_chars(struct ar933x_uart_port *up)
 {
 	struct tty_port *port = &up->port.state->port;
-	struct tty_struct *tty;
 	int max_count = 256;
 
-	tty = tty_port_tty_get(port);
 	do {
 		unsigned int rdata;
 		unsigned char ch;
@@ -324,10 +322,7 @@ static void ar933x_uart_rx_chars(struct ar933x_uart_port *up)
 			tty_insert_flip_char(port, ch, TTY_NORMAL);
 	} while (max_count-- > 0);
 
-	if (tty) {
-		tty_flip_buffer_push(tty);
-		tty_kref_put(tty);
-	}
+	tty_flip_buffer_push(port);
 }
 
 static void ar933x_uart_tx_chars(struct ar933x_uart_port *up)
diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
index b46860104312..da734222e537 100644
--- a/drivers/tty/serial/arc_uart.c
+++ b/drivers/tty/serial/arc_uart.c
@@ -211,12 +211,8 @@ static void arc_serial_start_tx(struct uart_port *port)
 
 static void arc_serial_rx_chars(struct arc_uart_port *uart)
 {
-	struct tty_struct *tty = tty_port_tty_get(&uart->port.state->port);
 	unsigned int status, ch, flg = 0;
 
-	if (!tty)
-		return;
-
 	/*
 	 * UART has 4 deep RX-FIFO. Driver's recongnition of this fact
 	 * is very subtle. Here's how ...
@@ -252,10 +248,8 @@ static void arc_serial_rx_chars(struct arc_uart_port *uart)
 		uart_insert_char(&uart->port, status, RXOERR, ch, flg);
 
 done:
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&uart->port.state->port);
 	}
-
-	tty_kref_put(tty);
 }
 
 /*
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 929567038c5a..d4a7c241b751 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -774,7 +774,7 @@ static void atmel_rx_from_ring(struct uart_port *port)
 	 * uart_start(), which takes the lock.
 	 */
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(port->state->port.tty);
+	tty_flip_buffer_push(&port->state->port);
 	spin_lock(&port->lock);
 }
 
@@ -782,7 +782,6 @@ static void atmel_rx_from_dma(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	struct atmel_dma_buffer *pdc;
 	int rx_idx = atmel_port->pdc_rx_idx;
 	unsigned int head;
@@ -850,7 +849,7 @@ static void atmel_rx_from_dma(struct uart_port *port)
 	 * uart_start(), which takes the lock.
 	 */
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(tport);
 	spin_lock(&port->lock);
 
 	UART_PUT_IER(port, ATMEL_US_ENDRX | ATMEL_US_TIMEOUT);
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index de30b1909728..719594e5fc21 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -236,14 +236,12 @@ static const char *bcm_uart_type(struct uart_port *port)
 static void bcm_uart_do_rx(struct uart_port *port)
 {
 	struct tty_port *port = &port->state->port;
-	struct tty_struct *tty;
 	unsigned int max_count;
 
 	/* limit number of char read in interrupt, should not be
 	 * higher than fifo size anyway since we're much faster than
 	 * serial port */
 	max_count = 32;
-	tty = port->tty;
 	do {
 		unsigned int iestat, c, cstat;
 		char flag;
@@ -305,7 +303,7 @@ static void bcm_uart_do_rx(struct uart_port *port)
 
 	} while (--max_count);
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 }
 
 /*
diff --git a/drivers/tty/serial/bfin_sport_uart.c b/drivers/tty/serial/bfin_sport_uart.c
index e4d3ac2e8992..487c173b0f72 100644
--- a/drivers/tty/serial/bfin_sport_uart.c
+++ b/drivers/tty/serial/bfin_sport_uart.c
@@ -150,7 +150,6 @@ static irqreturn_t sport_uart_rx_irq(int irq, void *dev_id)
 {
 	struct sport_uart_port *up = dev_id;
 	struct tty_port *port = &up->port.state->port;
-	struct tty_struct *tty = tport->tty;
 	unsigned int ch;
 
 	spin_lock(&up->port.lock);
@@ -162,7 +161,8 @@ static irqreturn_t sport_uart_rx_irq(int irq, void *dev_id)
 		if (!uart_handle_sysrq_char(&up->port, ch))
 			tty_insert_flip_char(port, ch, TTY_NORMAL);
 	}
-	tty_flip_buffer_push(tty);
+	/* XXX this won't deadlock with lowlat? */
+	tty_flip_buffer_push(port);
 
 	spin_unlock(&up->port.lock);
 
diff --git a/drivers/tty/serial/bfin_uart.c b/drivers/tty/serial/bfin_uart.c
index 2e2b2c1cb722..12dceda9db33 100644
--- a/drivers/tty/serial/bfin_uart.c
+++ b/drivers/tty/serial/bfin_uart.c
@@ -223,7 +223,6 @@ static void bfin_serial_enable_ms(struct uart_port *port)
 #ifdef CONFIG_SERIAL_BFIN_PIO
 static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 {
-	struct tty_struct *tty = NULL;
 	unsigned int status, ch, flg;
 	static struct timeval anomaly_start = { .tv_sec = 0 };
 
@@ -242,11 +241,9 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 			return;
 		}
 
-	if (!uart->port.state || !uart->port.state->port.tty)
+	if (!uart->port.state)
 		return;
 #endif
-	tty = uart->port.state->port.tty;
-
 	if (ANOMALY_05000363) {
 		/* The BF533 (and BF561) family of processors have a nice anomaly
 		 * where they continuously generate characters for a "single" break.
@@ -325,7 +322,7 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 	uart_insert_char(&uart->port, status, OE, ch, flg);
 
  ignore_char:
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&uart->port.state->port);
 }
 
 static void bfin_serial_tx_chars(struct bfin_serial_port *uart)
@@ -426,7 +423,6 @@ static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart)
 
 static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
 {
-	struct tty_struct *tty = uart->port.state->port.tty;
 	int i, flg, status;
 
 	status = UART_GET_LSR(uart);
@@ -471,7 +467,7 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
 	}
 
  dma_ignore_char:
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&uart->port.state->port);
 }
 
 void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
diff --git a/drivers/tty/serial/clps711x.c b/drivers/tty/serial/clps711x.c
index 3fd2526d121e..bfb17968c8db 100644
--- a/drivers/tty/serial/clps711x.c
+++ b/drivers/tty/serial/clps711x.c
@@ -85,12 +85,8 @@ static void uart_clps711x_enable_ms(struct uart_port *port)
 static irqreturn_t uart_clps711x_int_rx(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
 	unsigned int status, ch, flg;
 
-	if (!tty)
-		return IRQ_HANDLED;
-
 	for (;;) {
 		status = clps_readl(SYSFLG(port));
 		if (status & SYSFLG_URXFE)
@@ -130,9 +126,7 @@ static irqreturn_t uart_clps711x_int_rx(int irq, void *dev_id)
 		uart_insert_char(port, status, UARTDR_OVERR, ch, flg);
 	}
 
-	tty_flip_buffer_push(tty);
-
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->state->port);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index 0bb24378a3c0..97f4e1858649 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -246,7 +246,6 @@ static void cpm_uart_int_rx(struct uart_port *port)
 	unsigned char ch;
 	u8 *cp;
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
 	cbd_t __iomem *bdp;
 	u16 status;
@@ -323,7 +322,7 @@ static void cpm_uart_int_rx(struct uart_port *port)
 	pinfo->rx_cur = bdp;
 
 	/* activate BH processing */
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(tport);
 
 	return;
 
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index 52449adc09ac..45acf103433e 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -2104,17 +2104,10 @@ static int force_eop_if_needed(struct e100_serial *info)
 
 static void flush_to_flip_buffer(struct e100_serial *info)
 {
-	struct tty_struct *tty;
 	struct etrax_recv_buffer *buffer;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	tty = info->port.tty;
-
-	if (!tty) {
-		local_irq_restore(flags);
-		return;
-	}
 
 	while ((buffer = info->first_recv_buffer) != NULL) {
 		unsigned int count = buffer->length;
@@ -2138,7 +2131,7 @@ static void flush_to_flip_buffer(struct e100_serial *info)
 	local_irq_restore(flags);
 
 	/* This includes a check for low-latency */
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&info->port);
 }
 
 static void check_flush_timeout(struct e100_serial *info)
@@ -2274,12 +2267,6 @@ static
 struct e100_serial * handle_ser_rx_interrupt_no_dma(struct e100_serial *info)
 {
 	unsigned long data_read;
-	struct tty_struct *tty = info->port.tty;
-
-	if (!tty) {
-		printk("!NO TTY!\n");
-		return info;
-	}
 
 	/* Read data and status at the same time */
 	data_read = *((unsigned long *)&info->ioport[REG_DATA_STATUS32]);
@@ -2382,7 +2369,7 @@ more_data:
 		goto more_data;
 	}
 
-	tty_flip_buffer_push(info->port.tty);
+	tty_flip_buffer_push(&info->port);
 	return info;
 }
 
diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c
index 6491b8644a7f..2f2b2e538a54 100644
--- a/drivers/tty/serial/dz.c
+++ b/drivers/tty/serial/dz.c
@@ -187,7 +187,6 @@ static inline void dz_receive_chars(struct dz_mux *mux)
 {
 	struct uart_port *uport;
 	struct dz_port *dport = &mux->dport[0];
-	struct tty_struct *tty = NULL;
 	struct uart_icount *icount;
 	int lines_rx[DZ_NB_PORT] = { [0 ... DZ_NB_PORT - 1] = 0 };
 	unsigned char ch, flag;
@@ -197,7 +196,6 @@ static inline void dz_receive_chars(struct dz_mux *mux)
 	while ((status = dz_in(dport, DZ_RBUF)) & DZ_DVAL) {
 		dport = &mux->dport[LINE(status)];
 		uport = &dport->port;
-		tty = uport->state->port.tty;	/* point to the proper dev */
 
 		ch = UCHAR(status);		/* grab the char */
 		flag = TTY_NORMAL;
@@ -249,7 +247,7 @@ static inline void dz_receive_chars(struct dz_mux *mux)
 	}
 	for (i = 0; i < DZ_NB_PORT; i++)
 		if (lines_rx[i])
-			tty_flip_buffer_push(mux->dport[i].port.state->port.tty);
+			tty_flip_buffer_push(&mux->dport[i].port.state->port);
 }
 
 /*
diff --git a/drivers/tty/serial/efm32-uart.c b/drivers/tty/serial/efm32-uart.c
index bdf67b0cb8b6..de14bd7dce10 100644
--- a/drivers/tty/serial/efm32-uart.c
+++ b/drivers/tty/serial/efm32-uart.c
@@ -249,12 +249,9 @@ static irqreturn_t efm32_uart_rxirq(int irq, void *data)
 	int handled = IRQ_NONE;
 	struct uart_port *port = &efm_port->port;
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty;
 
 	spin_lock(&port->lock);
 
-	tty = tty_kref_get(tport->tty);
-
 	if (irqflag & UARTn_IF_RXDATAV) {
 		efm32_uart_write32(efm_port, UARTn_IF_RXDATAV, UARTn_IFC);
 		efm32_uart_rx_chars(efm_port);
@@ -270,10 +267,7 @@ static irqreturn_t efm32_uart_rxirq(int irq, void *data)
 		handled = IRQ_HANDLED;
 	}
 
-	if (tty) {
-		tty_flip_buffer_push(tty);
-		tty_kref_put(tty);
-	}
+	tty_flip_buffer_push(tport);
 
 	spin_unlock(&port->lock);
 
diff --git a/drivers/tty/serial/icom.c b/drivers/tty/serial/icom.c
index 54903ee5e5ab..bc9e6b017b05 100644
--- a/drivers/tty/serial/icom.c
+++ b/drivers/tty/serial/icom.c
@@ -735,7 +735,6 @@ static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 {
 	short int count, rcv_buff;
 	struct tty_port *port = &icom_port->uart_port.state->port;
-	struct tty_struct *tty = port->tty;
 	unsigned short int status;
 	struct uart_icount *icount;
 	unsigned long offset;
@@ -835,7 +834,7 @@ ignore_char:
 		status = cpu_to_le16(icom_port->statStg->rcv[rcv_buff].flags);
 	}
 	icom_port->next_rcv = rcv_buff;
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 }
 
 static void process_interrupt(u16 port_int_reg,
diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c
index 4bc6e47890b4..6a6668bbb330 100644
--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c
@@ -669,12 +669,8 @@ static const struct tty_operations ifx_spi_serial_ops = {
 static void ifx_spi_insert_flip_string(struct ifx_spi_device *ifx_dev,
 				    unsigned char *chars, size_t size)
 {
-	struct tty_struct *tty = tty_port_tty_get(&ifx_dev->tty_port);
-	if (!tty)
-		return;
 	tty_insert_flip_string(&ifx_dev->tty_port, chars, size);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&ifx_dev->tty_port);
 }
 
 /**
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index f60c4028b6e1..be26345bf6a4 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -518,7 +518,6 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
 	unsigned int rx, flg, ignored = 0;
 	struct tty_struct *tty = sport->port.state->port.tty;
 	struct tty_port *port = &sport->port.state->port;
-	struct tty_struct *tty = port->tty;
 	unsigned long flags, temp;
 
 	spin_lock_irqsave(&sport->port.lock, flags);
@@ -576,7 +575,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
 
 out:
 	spin_unlock_irqrestore(&sport->port.lock, flags);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/tty/serial/ioc3_serial.c b/drivers/tty/serial/ioc3_serial.c
index edbdc4e45075..6e4c715c5d26 100644
--- a/drivers/tty/serial/ioc3_serial.c
+++ b/drivers/tty/serial/ioc3_serial.c
@@ -1393,7 +1393,6 @@ static inline int do_read(struct uart_port *the_port, char *buf, int len)
  */
 static int receive_chars(struct uart_port *the_port)
 {
-	struct tty_struct *tty;
 	unsigned char ch[MAX_CHARS];
 	int read_count = 0, read_room, flip = 0;
 	struct uart_state *state = the_port->state;
@@ -1403,14 +1402,11 @@ static int receive_chars(struct uart_port *the_port)
 	/* Make sure all the pointers are "good" ones */
 	if (!state)
 		return 0;
-	if (!state->port.tty)
-		return 0;
 
 	if (!(port->ip_flags & INPUT_ENABLE))
 		return 0;
 
 	spin_lock_irqsave(&the_port->lock, pflags);
-	tty = state->port.tty;
 
 	read_count = do_read(the_port, ch, MAX_CHARS);
 	if (read_count > 0) {
@@ -1422,7 +1418,7 @@ static int receive_chars(struct uart_port *the_port)
 	spin_unlock_irqrestore(&the_port->lock, pflags);
 
 	if (flip)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&state->port);
 
 	return read_count;
 }
diff --git a/drivers/tty/serial/ioc4_serial.c b/drivers/tty/serial/ioc4_serial.c
index 86f64ed89b45..e2520abcb1c4 100644
--- a/drivers/tty/serial/ioc4_serial.c
+++ b/drivers/tty/serial/ioc4_serial.c
@@ -2340,7 +2340,6 @@ static inline int do_read(struct uart_port *the_port, unsigned char *buf,
  */
 static void receive_chars(struct uart_port *the_port)
 {
-	struct tty_struct *tty;
 	unsigned char ch[IOC4_MAX_CHARS];
 	int read_count, request_count = IOC4_MAX_CHARS;
 	struct uart_icount *icount;
@@ -2350,11 +2349,8 @@ static void receive_chars(struct uart_port *the_port)
 	/* Make sure all the pointers are "good" ones */
 	if (!state)
 		return;
-	if (!state->port.tty)
-		return;
 
 	spin_lock_irqsave(&the_port->lock, pflags);
-	tty = state->port.tty;
 
 	request_count = tty_buffer_request_room(&state->port, IOC4_MAX_CHARS);
 
@@ -2369,7 +2365,7 @@ static void receive_chars(struct uart_port *the_port)
 
 	spin_unlock_irqrestore(&the_port->lock, pflags);
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&state->port);
 }
 
 /**
diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c
index c9ce00dd1f8a..00f250ae14c5 100644
--- a/drivers/tty/serial/jsm/jsm_tty.c
+++ b/drivers/tty/serial/jsm/jsm_tty.c
@@ -654,7 +654,7 @@ void jsm_input(struct jsm_channel *ch)
 	spin_unlock_irqrestore(&ch->ch_lock, lock_flags);
 
 	/* Tell the tty layer its okay to "eat" the data now */
-	tty_flip_buffer_push(tp);
+	tty_flip_buffer_push(port);
 
 	jsm_dbg(IOCTL, &ch->ch_bd->pci_dev, "finish\n");
 }
diff --git a/drivers/tty/serial/kgdb_nmi.c b/drivers/tty/serial/kgdb_nmi.c
index ba2ef627d9c6..26a50b0c868b 100644
--- a/drivers/tty/serial/kgdb_nmi.c
+++ b/drivers/tty/serial/kgdb_nmi.c
@@ -202,7 +202,6 @@ bool kgdb_nmi_poll_knock(void)
 static void kgdb_nmi_tty_receiver(unsigned long data)
 {
 	struct kgdb_nmi_tty_priv *priv = (void *)data;
-	struct tty_struct *tty;
 	char ch;
 
 	tasklet_schedule(&priv->tlet);
@@ -210,16 +209,9 @@ static void kgdb_nmi_tty_receiver(unsigned long data)
 	if (likely(!kgdb_nmi_tty_enabled || !kfifo_len(&priv->fifo)))
 		return;
 
-	/* Port is there, but tty might be hung up, check. */
-	tty = tty_port_tty_get(kgdb_nmi_port);
-	if (!tty)
-		return;
-
 	while (kfifo_out(&priv->fifo, &ch, 1))
 		tty_insert_flip_char(&priv->port, ch, TTY_NORMAL);
-	tty_flip_buffer_push(priv->port.tty);
-
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&priv->port);
 }
 
 static int kgdb_nmi_tty_activate(struct tty_port *port, struct tty_struct *tty)
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
index 1933fe3c98dd..15733da757c6 100644
--- a/drivers/tty/serial/lantiq.c
+++ b/drivers/tty/serial/lantiq.c
@@ -163,21 +163,15 @@ static int
 lqasc_rx_chars(struct uart_port *port)
 {
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tty_port_tty_get(tport);
 	unsigned int ch = 0, rsr = 0, fifocnt;
 
-	if (!tty) {
-		dev_dbg(port->dev, "%s:tty is busy now", __func__);
-		return -EBUSY;
-	}
-	fifocnt =
-		ltq_r32(port->membase + LTQ_ASC_FSTAT) & ASCFSTAT_RXFFLMASK;
+	fifocnt = ltq_r32(port->membase + LTQ_ASC_FSTAT) & ASCFSTAT_RXFFLMASK;
 	while (fifocnt--) {
 		u8 flag = TTY_NORMAL;
 		ch = ltq_r8(port->membase + LTQ_ASC_RBUF);
 		rsr = (ltq_r32(port->membase + LTQ_ASC_STATE)
 			& ASCSTATE_ANY) | UART_DUMMY_UER_RX;
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(tport);
 		port->icount.rx++;
 
 		/*
@@ -219,9 +213,10 @@ lqasc_rx_chars(struct uart_port *port)
 			 */
 			tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 	}
+
 	if (ch != 0)
-		tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+		tty_flip_buffer_push(tport);
+
 	return 0;
 }
 
diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c
index 5cd180564c03..c8448e6f52e5 100644
--- a/drivers/tty/serial/lpc32xx_hs.c
+++ b/drivers/tty/serial/lpc32xx_hs.c
@@ -259,16 +259,6 @@ static void __serial_lpc32xx_rx(struct uart_port *port)
 {
 	struct tty_port *tport = &port->state->port;
 	unsigned int tmp, flag;
-	struct tty_struct *tty = tty_port_tty_get(tport);
-
-	if (!tty) {
-		/* Discard data: no tty available */
-		while (!(readl(LPC32XX_HSUART_FIFO(port->membase)) &
-			 LPC32XX_HSU_RX_EMPTY))
-			;
-
-		return;
-	}
 
 	/* Read data from FIFO and push into terminal */
 	tmp = readl(LPC32XX_HSUART_FIFO(port->membase));
@@ -289,8 +279,7 @@ static void __serial_lpc32xx_rx(struct uart_port *port)
 
 		tmp = readl(LPC32XX_HSUART_FIFO(port->membase));
 	}
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(tport);
 }
 
 static void __serial_lpc32xx_tx(struct uart_port *port)
@@ -367,8 +356,7 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id)
 	/* Data received? */
 	if (status & (LPC32XX_HSU_RX_TIMEOUT_INT | LPC32XX_HSU_RX_TRIG_INT)) {
 		__serial_lpc32xx_rx(port);
-		if (tty)
-			tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(tport);
 	}
 
 	/* Transmit data request? */
diff --git a/drivers/tty/serial/m32r_sio.c b/drivers/tty/serial/m32r_sio.c
index 2e9a390f2ac4..bb1afa0922e1 100644
--- a/drivers/tty/serial/m32r_sio.c
+++ b/drivers/tty/serial/m32r_sio.c
@@ -301,7 +301,6 @@ static void m32r_sio_enable_ms(struct uart_port *port)
 static void receive_chars(struct uart_sio_port *up, int *status)
 {
 	struct tty_port *port = &up->port.state->port;
-	struct tty_struct *tty = tport->tty;
 	unsigned char ch;
 	unsigned char flag;
 	int max_count = 256;
@@ -369,7 +368,7 @@ static void receive_chars(struct uart_sio_port *up, int *status)
 	ignore_char:
 		*status = serial_in(up, UART_LSR);
 	} while ((*status & UART_LSR_DR) && (max_count-- > 0));
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 }
 
 static void transmit_chars(struct uart_sio_port *up)
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index e238e80cd981..791e1dfb8b11 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -311,8 +311,8 @@ static void max3100_work(struct work_struct *w)
 			}
 		}
 
-		if (rxchars > 16 && s->port.state->port.tty != NULL) {
-			tty_flip_buffer_push(s->port.state->port.tty);
+		if (rxchars > 16) {
+			tty_flip_buffer_push(&s->port.state->port);
 			rxchars = 0;
 		}
 		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
@@ -324,8 +324,8 @@ static void max3100_work(struct work_struct *w)
 		  (!uart_circ_empty(xmit) &&
 		   !uart_tx_stopped(&s->port))));
 
-	if (rxchars > 0 && s->port.state->port.tty != NULL)
-		tty_flip_buffer_push(s->port.state->port.tty);
+	if (rxchars > 0)
+		tty_flip_buffer_push(&s->port.state->port);
 }
 
 static irqreturn_t max3100_irq(int irqno, void *dev_id)
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index a801f6872cad..0c2422cb04ea 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -460,10 +460,6 @@ static int max310x_set_ref_clk(struct max310x_port *s)
 static void max310x_handle_rx(struct max310x_port *s, unsigned int rxlen)
 {
 	unsigned int sts = 0, ch = 0, flag;
-	struct tty_struct *tty = tty_port_tty_get(&s->port.state->port);
-
-	if (!tty)
-		return;
 
 	if (unlikely(rxlen >= MAX310X_FIFO_SIZE)) {
 		dev_warn(s->port.dev, "Possible RX FIFO overrun %d\n", rxlen);
@@ -516,9 +512,7 @@ static void max310x_handle_rx(struct max310x_port *s, unsigned int rxlen)
 				 ch, flag);
 	}
 
-	tty_flip_buffer_push(tty);
-
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&s->port.state->port);
 }
 
 static void max310x_handle_tx(struct max310x_port *s)
diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c
index fcd56ab6053f..7ed99274572f 100644
--- a/drivers/tty/serial/mcf.c
+++ b/drivers/tty/serial/mcf.c
@@ -310,7 +310,7 @@ static void mcf_rx_chars(struct mcf_uart *pp)
 		uart_insert_char(port, status, MCFUART_USR_RXOVERRUN, ch, flag);
 	}
 
-	tty_flip_buffer_push(port->state->port.tty);
+	tty_flip_buffer_push(&port->state->port);
 }
 
 /****************************************************************************/
diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c
index 60d585ab4870..5f4765a7a5c5 100644
--- a/drivers/tty/serial/mfd.c
+++ b/drivers/tty/serial/mfd.c
@@ -388,12 +388,8 @@ void hsu_dma_rx(struct uart_hsu_port *up, u32 int_sts)
 	struct hsu_dma_chan *chan = up->rxc;
 	struct uart_port *port = &up->port;
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	int count;
 
-	if (!tty)
-		return;
-
 	/*
 	 * First need to know how many is already transferred,
 	 * then check if its a timeout DMA irq, and return
@@ -438,7 +434,7 @@ void hsu_dma_rx(struct uart_hsu_port *up, u32 int_sts)
 					 | (0x1 << 16)
 					 | (0x1 << 24)	/* timeout bit, see HSU Errata 1 */
 					 );
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(tport);
 
 	chan_writel(chan, HSU_CH_CR, 0x3);
 
@@ -461,13 +457,9 @@ static void serial_hsu_stop_rx(struct uart_port *port)
 
 static inline void receive_chars(struct uart_hsu_port *up, int *status)
 {
-	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int ch, flag;
 	unsigned int max_count = 256;
 
-	if (!tty)
-		return;
-
 	do {
 		ch = serial_in(up, UART_RX);
 		flag = TTY_NORMAL;
@@ -523,7 +515,7 @@ static inline void receive_chars(struct uart_hsu_port *up, int *status)
 	ignore_char:
 		*status = serial_in(up, UART_LSR);
 	} while ((*status & UART_LSR_DR) && max_count--);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&up->port.state->port);
 }
 
 static void transmit_chars(struct uart_hsu_port *up)
diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c
index 0145aeb7721c..c0e1fad51be7 100644
--- a/drivers/tty/serial/mpc52xx_uart.c
+++ b/drivers/tty/serial/mpc52xx_uart.c
@@ -942,7 +942,6 @@ static inline int
 mpc52xx_uart_int_rx_chars(struct uart_port *port)
 {
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	unsigned char ch, flag;
 	unsigned short status;
 
@@ -1000,7 +999,7 @@ mpc52xx_uart_int_rx_chars(struct uart_port *port)
 	}
 
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(tport);
 	spin_lock(&port->lock);
 
 	return psc_ops->raw_rx_rdy(port);
diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c
index 6f2d2ceb326a..bc24f4931670 100644
--- a/drivers/tty/serial/mpsc.c
+++ b/drivers/tty/serial/mpsc.c
@@ -938,7 +938,6 @@ static int mpsc_rx_intr(struct mpsc_port_info *pi)
 {
 	struct mpsc_rx_desc *rxre;
 	struct tty_port *port = &pi->port.state->port;
-	struct tty_struct *tty = port->tty;
 	u32	cmdstat, bytes_in, i;
 	int	rc = 0;
 	u8	*bp;
@@ -971,7 +970,7 @@ static int mpsc_rx_intr(struct mpsc_port_info *pi)
 		/* Following use of tty struct directly is deprecated */
 		if (tty_buffer_request_room(port, bytes_in) < bytes_in) {
 			if (port->low_latency)
-				tty_flip_buffer_push(tty);
+				tty_flip_buffer_push(port);
 			/*
 			 * If this failed then we will throw away the bytes
 			 * but must do so to clear interrupts.
@@ -1081,7 +1080,7 @@ next_frame:
 	if ((readl(pi->sdma_base + SDMA_SDCM) & SDMA_SDCM_ERD) == 0)
 		mpsc_start_rx(pi);
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 	return rc;
 }
 
diff --git a/drivers/tty/serial/mrst_max3110.c b/drivers/tty/serial/mrst_max3110.c
index 4632db7a24b7..f641c232beca 100644
--- a/drivers/tty/serial/mrst_max3110.c
+++ b/drivers/tty/serial/mrst_max3110.c
@@ -340,7 +340,6 @@ receive_chars(struct uart_max3110 *max, unsigned short *str, int len)
 {
 	struct uart_port *port = &max->port;
 	struct tty_port *tport;
-	struct tty_struct *tty;
 	char buf[M3110_RX_FIFO_DEPTH];
 	int r, w, usable;
 
@@ -349,9 +348,6 @@ receive_chars(struct uart_max3110 *max, unsigned short *str, int len)
 		return 0;
 
 	tport = &port->state->port;
-	tty = tty_port_tty_get(tport);
-	if (!tty)
-		return 0;
 
 	for (r = 0, w = 0; r < len; r++) {
 		if (str[r] & MAX3110_BREAK &&
@@ -366,10 +362,8 @@ receive_chars(struct uart_max3110 *max, unsigned short *str, int len)
 		}
 	}
 
-	if (!w) {
-		tty_kref_put(tty);
+	if (!w)
 		return 0;
-	}
 
 	for (r = 0; w; r += usable, w -= usable) {
 		usable = tty_buffer_request_room(tport, w);
@@ -378,8 +372,7 @@ receive_chars(struct uart_max3110 *max, unsigned short *str, int len)
 			port->icount.rx += usable;
 		}
 	}
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(tport);
 
 	return r;
 }
diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index cb787c0e279a..b11e99797fd8 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -92,7 +92,6 @@ static void msm_enable_ms(struct uart_port *port)
 static void handle_rx_dm(struct uart_port *port, unsigned int misr)
 {
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	unsigned int sr;
 	int count = 0;
 	struct msm_port *msm_port = UART_TO_MSM(port);
@@ -138,7 +137,7 @@ static void handle_rx_dm(struct uart_port *port, unsigned int misr)
 		count -= 4;
 	}
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(tport);
 	if (misr & (UART_IMR_RXSTALE))
 		msm_write(port, UART_CR_CMD_RESET_STALE_INT, UART_CR);
 	msm_write(port, 0xFFFFFF, UARTDM_DMRX);
@@ -148,7 +147,6 @@ static void handle_rx_dm(struct uart_port *port, unsigned int misr)
 static void handle_rx(struct uart_port *port)
 {
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	unsigned int sr;
 
 	/*
@@ -191,7 +189,7 @@ static void handle_rx(struct uart_port *port)
 			tty_insert_flip_char(tport, c, flag);
 	}
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(tport);
 }
 
 static void reset_dm_count(struct uart_port *port)
diff --git a/drivers/tty/serial/msm_serial_hs.c b/drivers/tty/serial/msm_serial_hs.c
index c356ffff3c71..4a942c78347e 100644
--- a/drivers/tty/serial/msm_serial_hs.c
+++ b/drivers/tty/serial/msm_serial_hs.c
@@ -981,9 +981,8 @@ static void msm_hs_tty_flip_buffer_work(struct work_struct *work)
 {
 	struct msm_hs_port *msm_uport =
 			container_of(work, struct msm_hs_port, rx.tty_work);
-	struct tty_struct *tty = msm_uport->uport.state->port.tty;
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&msm_uport->uport.state->port);
 }
 
 /*
diff --git a/drivers/tty/serial/msm_smd_tty.c b/drivers/tty/serial/msm_smd_tty.c
index b43b4ec39269..e722ff163d91 100644
--- a/drivers/tty/serial/msm_smd_tty.c
+++ b/drivers/tty/serial/msm_smd_tty.c
@@ -80,7 +80,7 @@ static void smd_tty_notify(void *priv, unsigned event)
 			pr_err("OOPS - smd_tty_buffer mismatch?!");
 		}
 
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&info->port);
 	}
 
 	/* XXX only when writable and necessary */
diff --git a/drivers/tty/serial/mux.c b/drivers/tty/serial/mux.c
index 83b21686020e..7fd6aaaacd8e 100644
--- a/drivers/tty/serial/mux.c
+++ b/drivers/tty/serial/mux.c
@@ -244,7 +244,6 @@ static void mux_read(struct uart_port *port)
 {
 	struct tty_port *tport = &port->state->port;
 	int data;
-	struct tty_struct *tty = tport->tty;
 	__u32 start_count = port->icount.rx;
 
 	while(1) {
@@ -270,9 +269,8 @@ static void mux_read(struct uart_port *port)
 		tty_insert_flip_char(tport, data & 0xFF, TTY_NORMAL);
 	}
 	
-	if (start_count != port->icount.rx) {
-		tty_flip_buffer_push(tty);
-	}
+	if (start_count != port->icount.rx)
+		tty_flip_buffer_push(tport);
 }
 
 /**
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 37a0046ef531..df0ba32f88ad 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -364,7 +364,6 @@ out:
 
 static void mxs_auart_rx_chars(struct mxs_auart_port *s)
 {
-	struct tty_struct *tty = s->port.state->port.tty;
 	u32 stat = 0;
 
 	for (;;) {
@@ -375,7 +374,7 @@ static void mxs_auart_rx_chars(struct mxs_auart_port *s)
 	}
 
 	writel(stat, s->port.membase + AUART_STAT);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&s->port.state->port);
 }
 
 static int mxs_auart_request_port(struct uart_port *u)
@@ -458,7 +457,6 @@ static void dma_rx_callback(void *arg)
 {
 	struct mxs_auart_port *s = (struct mxs_auart_port *) arg;
 	struct tty_port *port = &s->port.state->port;
-	struct tty_struct *tty = port->tty;
 	int count;
 	u32 stat;
 
@@ -472,7 +470,7 @@ static void dma_rx_callback(void *arg)
 	tty_insert_flip_string(port, s->rx_dma_buf, count);
 
 	writel(stat, s->port.membase + AUART_STAT);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 
 	/* start the next DMA for RX. */
 	mxs_auart_dma_prep_rx(s);
diff --git a/drivers/tty/serial/netx-serial.c b/drivers/tty/serial/netx-serial.c
index d40da78e7c85..b9a40ed70be2 100644
--- a/drivers/tty/serial/netx-serial.c
+++ b/drivers/tty/serial/netx-serial.c
@@ -199,7 +199,6 @@ static void netx_txint(struct uart_port *port)
 static void netx_rxint(struct uart_port *port)
 {
 	unsigned char rx, flg, status;
-	struct tty_struct *tty = port->state->port.tty;
 
 	while (!(readl(port->membase + UART_FR) & FR_RXFE)) {
 		rx = readl(port->membase + UART_DR);
@@ -237,8 +236,7 @@ static void netx_rxint(struct uart_port *port)
 		uart_insert_char(port, status, SR_OE, rx, flg);
 	}
 
-	tty_flip_buffer_push(tty);
-	return;
+	tty_flip_buffer_push(&port->state->port);
 }
 
 static irqreturn_t netx_int(int irq, void *dev_id)
diff --git a/drivers/tty/serial/nwpserial.c b/drivers/tty/serial/nwpserial.c
index 10d64a3697fb..77287c54f331 100644
--- a/drivers/tty/serial/nwpserial.c
+++ b/drivers/tty/serial/nwpserial.c
@@ -129,7 +129,6 @@ static irqreturn_t nwpserial_interrupt(int irq, void *dev_id)
 {
 	struct nwpserial_port *up = dev_id;
 	struct tty_port *port = &up->port.state->port;
-	struct tty_struct *tty = port->tty;
 	irqreturn_t ret;
 	unsigned int iir;
 	unsigned char ch;
@@ -150,7 +149,7 @@ static irqreturn_t nwpserial_interrupt(int irq, void *dev_id)
 			tty_insert_flip_char(port, ch, TTY_NORMAL);
 	} while (dcr_read(up->dcr_host, UART_LSR) & UART_LSR_DR);
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(port);
 	ret = IRQ_HANDLED;
 
 	/* clear interrupt */
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index ec90651d661a..6f3dbf740f05 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -483,7 +483,6 @@ static void serial_omap_rdi(struct uart_omap_port *up, unsigned int lsr)
 static irqreturn_t serial_omap_irq(int irq, void *dev_id)
 {
 	struct uart_omap_port *up = dev_id;
-	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int iir, lsr;
 	unsigned int type;
 	irqreturn_t ret = IRQ_NONE;
@@ -530,7 +529,7 @@ static irqreturn_t serial_omap_irq(int irq, void *dev_id)
 
 	spin_unlock(&up->port.lock);
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&up->port.state->port);
 
 	pm_runtime_mark_last_busy(up->dev);
 	pm_runtime_put_autosuspend(up->dev);
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index 967f1cb311f3..8b40a1fc9681 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -593,17 +593,9 @@ static int push_rx(struct eg20t_port *priv, const unsigned char *buf,
 {
 	struct uart_port *port = &priv->port;
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty;
-
-	tty = tty_port_tty_get(tport);
-	if (!tty) {
-		dev_dbg(priv->port.dev, "%s:tty is busy now", __func__);
-		return -EBUSY;
-	}
 
 	tty_insert_flip_string(tport, buf, size);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(tport);
 
 	return 0;
 }
@@ -744,19 +736,12 @@ static void pch_dma_rx_complete(void *arg)
 {
 	struct eg20t_port *priv = arg;
 	struct uart_port *port = &priv->port;
-	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
 	int count;
 
-	if (!tty) {
-		dev_dbg(priv->port.dev, "%s:tty is busy now", __func__);
-		return;
-	}
-
 	dma_sync_sg_for_cpu(port->dev, &priv->sg_rx, 1, DMA_FROM_DEVICE);
 	count = dma_push_rx(priv, priv->trigger_level);
 	if (count)
-		tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+		tty_flip_buffer_push(&port->state->port);
 	async_tx_ack(priv->desc_rx);
 	pch_uart_hal_enable_interrupt(priv, PCH_UART_HAL_RX_INT |
 					    PCH_UART_HAL_RX_ERR_INT);
diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index 73a3f295e7c4..b1785f58b6e3 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -227,21 +227,19 @@ static void pmz_interrupt_control(struct uart_pmac_port *uap, int enable)
 	write_zsreg(uap, R1, uap->curregs[1]);
 }
 
-static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap)
+static bool pmz_receive_chars(struct uart_pmac_port *uap)
 {
 	struct tty_port *port;
-	struct tty_struct *tty = NULL;
 	unsigned char ch, r1, drop, error, flag;
 	int loops = 0;
 
 	/* Sanity check, make sure the old bug is no longer happening */
-	if (uap->port.state == NULL || uap->port.state->port.tty == NULL) {
+	if (uap->port.state == NULL) {
 		WARN_ON(1);
 		(void)read_zsdata(uap);
-		return NULL;
+		return false;
 	}
 	port = &uap->port.state->port;
-	tty = port->tty; /* TOCTOU above */
 
 	while (1) {
 		error = 0;
@@ -330,11 +328,11 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap)
 			break;
 	}
 
-	return tty;
+	return true;
  flood:
 	pmz_interrupt_control(uap, 0);
 	pmz_error("pmz: rx irq flood !\n");
-	return tty;
+	return true;
 }
 
 static void pmz_status_handle(struct uart_pmac_port *uap)
@@ -455,7 +453,7 @@ static irqreturn_t pmz_interrupt(int irq, void *dev_id)
 	struct uart_pmac_port *uap_a;
 	struct uart_pmac_port *uap_b;
 	int rc = IRQ_NONE;
-	struct tty_struct *tty;
+	bool push;
 	u8 r3;
 
 	uap_a = pmz_get_port_A(uap);
@@ -468,7 +466,7 @@ static irqreturn_t pmz_interrupt(int irq, void *dev_id)
 	pmz_debug("irq, r3: %x\n", r3);
 #endif
 	/* Channel A */
-	tty = NULL;
+	push = false;
 	if (r3 & (CHAEXT | CHATxIP | CHARxIP)) {
 		if (!ZS_IS_OPEN(uap_a)) {
 			pmz_debug("ChanA interrupt while not open !\n");
@@ -479,21 +477,21 @@ static irqreturn_t pmz_interrupt(int irq, void *dev_id)
 		if (r3 & CHAEXT)
 			pmz_status_handle(uap_a);
 		if (r3 & CHARxIP)
-			tty = pmz_receive_chars(uap_a);
+			push = pmz_receive_chars(uap_a);
 		if (r3 & CHATxIP)
 			pmz_transmit_chars(uap_a);
 		rc = IRQ_HANDLED;
 	}
  skip_a:
 	spin_unlock(&uap_a->port.lock);
-	if (tty != NULL)
-		tty_flip_buffer_push(tty);
+	if (push)
+		tty_flip_buffer_push(&uap->port.state->port);
 
 	if (!uap_b)
 		goto out;
 
 	spin_lock(&uap_b->port.lock);
-	tty = NULL;
+	push = false;
 	if (r3 & (CHBEXT | CHBTxIP | CHBRxIP)) {
 		if (!ZS_IS_OPEN(uap_b)) {
 			pmz_debug("ChanB interrupt while not open !\n");
@@ -504,15 +502,15 @@ static irqreturn_t pmz_interrupt(int irq, void *dev_id)
 		if (r3 & CHBEXT)
 			pmz_status_handle(uap_b);
 		if (r3 & CHBRxIP)
-			tty = pmz_receive_chars(uap_b);
+			push = pmz_receive_chars(uap_b);
 		if (r3 & CHBTxIP)
 			pmz_transmit_chars(uap_b);
 		rc = IRQ_HANDLED;
 	}
  skip_b:
 	spin_unlock(&uap_b->port.lock);
-	if (tty != NULL)
-		tty_flip_buffer_push(tty);
+	if (push)
+		tty_flip_buffer_push(&uap->port.state->port);
 
  out:
 	return rc;
diff --git a/drivers/tty/serial/pnx8xxx_uart.c b/drivers/tty/serial/pnx8xxx_uart.c
index 0aa75a97531c..7e277a5384a7 100644
--- a/drivers/tty/serial/pnx8xxx_uart.c
+++ b/drivers/tty/serial/pnx8xxx_uart.c
@@ -181,7 +181,6 @@ static void pnx8xxx_enable_ms(struct uart_port *port)
 
 static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport)
 {
-	struct tty_struct *tty = sport->port.state->port.tty;
 	unsigned int status, ch, flg;
 
 	status = FIFO_TO_SM(serial_in(sport, PNX8XXX_FIFO)) |
@@ -238,7 +237,7 @@ static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport)
 		status = FIFO_TO_SM(serial_in(sport, PNX8XXX_FIFO)) |
 			 ISTAT_TO_SM(serial_in(sport, PNX8XXX_ISTAT));
 	}
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&sport->port.state->port);
 }
 
 static void pnx8xxx_tx_chars(struct pnx8xxx_port *sport)
diff --git a/drivers/tty/serial/pxa.c b/drivers/tty/serial/pxa.c
index 2764828251f5..3b671bc3f966 100644
--- a/drivers/tty/serial/pxa.c
+++ b/drivers/tty/serial/pxa.c
@@ -98,7 +98,6 @@ static void serial_pxa_stop_rx(struct uart_port *port)
 
 static inline void receive_chars(struct uart_pxa_port *up, int *status)
 {
-	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int ch, flag;
 	int max_count = 256;
 
@@ -168,7 +167,7 @@ static inline void receive_chars(struct uart_pxa_port *up, int *status)
 	ignore_char:
 		*status = serial_in(up, UART_LSR);
 	} while ((*status & UART_LSR_DR) && (max_count-- > 0));
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&up->port.state->port);
 
 	/* work around Errata #20 according to
 	 * Intel(R) PXA27x Processor Family
diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c
index 5d4b9b449b4a..af6b3e3ad24d 100644
--- a/drivers/tty/serial/sa1100.c
+++ b/drivers/tty/serial/sa1100.c
@@ -188,7 +188,6 @@ static void sa1100_enable_ms(struct uart_port *port)
 static void
 sa1100_rx_chars(struct sa1100_port *sport)
 {
-	struct tty_struct *tty = sport->port.state->port.tty;
 	unsigned int status, ch, flg;
 
 	status = UTSR1_TO_SM(UART_GET_UTSR1(sport)) |
@@ -233,7 +232,7 @@ sa1100_rx_chars(struct sa1100_port *sport)
 		status = UTSR1_TO_SM(UART_GET_UTSR1(sport)) |
 			 UTSR0_TO_SM(UART_GET_UTSR0(sport));
 	}
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&sport->port.state->port);
 }
 
 static void sa1100_tx_chars(struct sa1100_port *sport)
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index 0684529eb2ad..3aa3c4c83f8b 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -220,7 +220,6 @@ s3c24xx_serial_rx_chars(int irq, void *dev_id)
 {
 	struct s3c24xx_uart_port *ourport = dev_id;
 	struct uart_port *port = &ourport->port;
-	struct tty_struct *tty = port->state->port.tty;
 	unsigned int ufcon, ch, flag, ufstat, uerstat;
 	unsigned long flags;
 	int max_count = 64;
@@ -298,7 +297,7 @@ s3c24xx_serial_rx_chars(int irq, void *dev_id)
  ignore_char:
 		continue;
 	}
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->state->port);
 
  out:
 	spin_unlock_irqrestore(&port->lock, flags);
diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c
index f76b1688c5c8..a7cdec2962dd 100644
--- a/drivers/tty/serial/sb1250-duart.c
+++ b/drivers/tty/serial/sb1250-duart.c
@@ -384,7 +384,7 @@ static void sbd_receive_chars(struct sbd_port *sport)
 		uart_insert_char(uport, status, M_DUART_OVRUN_ERR, ch, flag);
 	}
 
-	tty_flip_buffer_push(uport->state->port.tty);
+	tty_flip_buffer_push(&uport->state->port);
 }
 
 static void sbd_transmit_chars(struct sbd_port *sport)
diff --git a/drivers/tty/serial/sc26xx.c b/drivers/tty/serial/sc26xx.c
index 0cd0e4ac12a6..c9735680762d 100644
--- a/drivers/tty/serial/sc26xx.c
+++ b/drivers/tty/serial/sc26xx.c
@@ -136,20 +136,17 @@ static void sc26xx_disable_irq(struct uart_port *port, int mask)
 	WRITE_SC(port, IMR, up->imr);
 }
 
-static struct tty_struct *receive_chars(struct uart_port *port)
+static bool receive_chars(struct uart_port *port)
 {
 	struct tty_port *tport = NULL;
-	struct tty_struct *tty = NULL;
 	int limit = 10000;
 	unsigned char ch;
 	char flag;
 	u8 status;
 
 	/* FIXME what is this trying to achieve? */
-	if (port->state != NULL) {		/* Unopened serial console */
+	if (port->state != NULL)		/* Unopened serial console */
 		tport = &port->state->port;
-		tty = tport->tty;
-	}
 
 	while (limit-- > 0) {
 		status = READ_SC_PORT(port, SR);
@@ -191,7 +188,7 @@ static struct tty_struct *receive_chars(struct uart_port *port)
 
 		tty_insert_flip_char(tport, ch, flag);
 	}
-	return tty;
+	return !!tport;
 }
 
 static void transmit_chars(struct uart_port *port)
@@ -221,36 +218,36 @@ static void transmit_chars(struct uart_port *port)
 static irqreturn_t sc26xx_interrupt(int irq, void *dev_id)
 {
 	struct uart_sc26xx_port *up = dev_id;
-	struct tty_struct *tty;
 	unsigned long flags;
+	bool push;
 	u8 isr;
 
 	spin_lock_irqsave(&up->port[0].lock, flags);
 
-	tty = NULL;
+	push = false;
 	isr = READ_SC(&up->port[0], ISR);
 	if (isr & ISR_TXRDYA)
 	    transmit_chars(&up->port[0]);
 	if (isr & ISR_RXRDYA)
-	    tty = receive_chars(&up->port[0]);
+	    push = receive_chars(&up->port[0]);
 
 	spin_unlock(&up->port[0].lock);
 
-	if (tty)
-		tty_flip_buffer_push(tty);
+	if (push)
+		tty_flip_buffer_push(&up->port[0].state->port);
 
 	spin_lock(&up->port[1].lock);
 
-	tty = NULL;
+	push = false;
 	if (isr & ISR_TXRDYB)
 	    transmit_chars(&up->port[1]);
 	if (isr & ISR_RXRDYB)
-	    tty = receive_chars(&up->port[1]);
+	    push = receive_chars(&up->port[1]);
 
 	spin_unlock_irqrestore(&up->port[1].lock, flags);
 
-	if (tty)
-		tty_flip_buffer_push(tty);
+	if (push)
+		tty_flip_buffer_push(&up->port[1].state->port);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c
index 418b495e3233..2ced871becff 100644
--- a/drivers/tty/serial/sccnxp.c
+++ b/drivers/tty/serial/sccnxp.c
@@ -285,10 +285,6 @@ static void sccnxp_handle_rx(struct uart_port *port)
 {
 	u8 sr;
 	unsigned int ch, flag;
-	struct tty_struct *tty = tty_port_tty_get(&port->state->port);
-
-	if (!tty)
-		return;
 
 	for (;;) {
 		sr = sccnxp_port_read(port, SCCNXP_SR_REG);
@@ -333,9 +329,7 @@ static void sccnxp_handle_rx(struct uart_port *port)
 		uart_insert_char(port, sr, SR_OVR, ch, flag);
 	}
 
-	tty_flip_buffer_push(tty);
-
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->state->port);
 }
 
 static void sccnxp_handle_tx(struct uart_port *port)
diff --git a/drivers/tty/serial/serial_ks8695.c b/drivers/tty/serial/serial_ks8695.c
index 9bd004f9da89..e1caa99e3d3b 100644
--- a/drivers/tty/serial/serial_ks8695.c
+++ b/drivers/tty/serial/serial_ks8695.c
@@ -153,7 +153,6 @@ static void ks8695uart_disable_ms(struct uart_port *port)
 static irqreturn_t ks8695uart_rx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, lsr, flg, max_count = 256;
 
 	status = UART_GET_LSR(port);		/* clears pending LSR interrupts */
@@ -200,7 +199,7 @@ static irqreturn_t ks8695uart_rx_chars(int irq, void *dev_id)
 ignore_char:
 		status = UART_GET_LSR(port);
 	}
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->state->port);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c
index b52b21aeb250..fe48a0c2b4ca 100644
--- a/drivers/tty/serial/serial_txx9.c
+++ b/drivers/tty/serial/serial_txx9.c
@@ -277,7 +277,6 @@ static void serial_txx9_initialize(struct uart_port *port)
 static inline void
 receive_chars(struct uart_txx9_port *up, unsigned int *status)
 {
-	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch;
 	unsigned int disr = *status;
 	int max_count = 256;
@@ -346,7 +345,7 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status)
 		disr = sio_in(up, TXX9_SIDISR);
 	} while (!(disr & TXX9_SIDISR_UVALID) && (max_count-- > 0));
 	spin_unlock(&up->port.lock);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&up->port.state->port);
 	spin_lock(&up->port.lock);
 	*status = disr;
 }
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index ecef748f5385..156418619949 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -597,7 +597,6 @@ static void sci_receive_chars(struct uart_port *port)
 {
 	struct sci_port *sci_port = to_sci_port(port);
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	int i, count, copied = 0;
 	unsigned short status;
 	unsigned char flag;
@@ -675,7 +674,7 @@ static void sci_receive_chars(struct uart_port *port)
 
 	if (copied) {
 		/* Tell the rest of the system the news. New characters! */
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(tport);
 	} else {
 		serial_port_in(port, SCxSR); /* dummy read */
 		serial_port_out(port, SCxSR, SCxSR_RDxF_CLEAR(port));
@@ -722,7 +721,6 @@ static int sci_handle_errors(struct uart_port *port)
 	int copied = 0;
 	unsigned short status = serial_port_in(port, SCxSR);
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	struct sci_port *s = to_sci_port(port);
 
 	/*
@@ -783,7 +781,7 @@ static int sci_handle_errors(struct uart_port *port)
 	}
 
 	if (copied)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(tport);
 
 	return copied;
 }
@@ -791,7 +789,6 @@ static int sci_handle_errors(struct uart_port *port)
 static int sci_handle_fifo_overrun(struct uart_port *port)
 {
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	struct sci_port *s = to_sci_port(port);
 	struct plat_sci_reg *reg;
 	int copied = 0;
@@ -806,7 +803,7 @@ static int sci_handle_fifo_overrun(struct uart_port *port)
 		port->icount.overrun++;
 
 		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(tport);
 
 		dev_notice(port->dev, "overrun error\n");
 		copied++;
@@ -820,7 +817,6 @@ static int sci_handle_breaks(struct uart_port *port)
 	int copied = 0;
 	unsigned short status = serial_port_in(port, SCxSR);
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	struct sci_port *s = to_sci_port(port);
 
 	if (uart_handle_break(port))
@@ -842,7 +838,7 @@ static int sci_handle_breaks(struct uart_port *port)
 	}
 
 	if (copied)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(tport);
 
 	copied += sci_handle_fifo_overrun(port);
 
@@ -1299,7 +1295,6 @@ static void sci_dma_rx_complete(void *arg)
 {
 	struct sci_port *s = arg;
 	struct uart_port *port = &s->port;
-	struct tty_struct *tty = port->state->port.tty;
 	unsigned long flags;
 	int count;
 
@@ -1314,7 +1309,7 @@ static void sci_dma_rx_complete(void *arg)
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	if (count)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->state->port);
 
 	schedule_work(&s->work_rx);
 }
@@ -1408,7 +1403,6 @@ static void work_fn_rx(struct work_struct *work)
 	if (dma_async_is_tx_complete(s->chan_rx, s->active_rx, NULL, NULL) !=
 	    DMA_SUCCESS) {
 		/* Handle incomplete DMA receive */
-		struct tty_struct *tty = port->state->port.tty;
 		struct dma_chan *chan = s->chan_rx;
 		struct shdma_desc *sh_desc = container_of(desc,
 					struct shdma_desc, async_tx);
@@ -1424,7 +1418,7 @@ static void work_fn_rx(struct work_struct *work)
 		spin_unlock_irqrestore(&port->lock, flags);
 
 		if (count)
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(&port->state->port);
 
 		sci_submit_rx(s);
 
diff --git a/drivers/tty/serial/sirfsoc_uart.c b/drivers/tty/serial/sirfsoc_uart.c
index 142217cd01f4..8f3d6c091acc 100644
--- a/drivers/tty/serial/sirfsoc_uart.c
+++ b/drivers/tty/serial/sirfsoc_uart.c
@@ -206,11 +206,6 @@ static unsigned int
 sirfsoc_uart_pio_rx_chars(struct uart_port *port, unsigned int max_rx_count)
 {
 	unsigned int ch, rx_count = 0;
-	struct tty_struct *tty;
-
-	tty = tty_port_tty_get(&port->state->port);
-	if (!tty)
-		return -ENODEV;
 
 	while (!(rd_regl(port, SIRFUART_RX_FIFO_STATUS) &
 					SIRFUART_FIFOEMPTY_MASK(port))) {
@@ -224,8 +219,7 @@ sirfsoc_uart_pio_rx_chars(struct uart_port *port, unsigned int max_rx_count)
 	}
 
 	port->icount.rx += rx_count;
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->state->port);
 
 	return rx_count;
 }
diff --git a/drivers/tty/serial/sn_console.c b/drivers/tty/serial/sn_console.c
index 283232c64656..f51ffdc696fd 100644
--- a/drivers/tty/serial/sn_console.c
+++ b/drivers/tty/serial/sn_console.c
@@ -459,7 +459,6 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
 {
 	struct tty_port *tport = NULL;
 	int ch;
-	struct tty_struct *tty;
 
 	if (!port) {
 		printk(KERN_ERR "sn_receive_chars - port NULL so can't receive\n");
@@ -474,11 +473,6 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
 	if (port->sc_port.state) {
 		/* The serial_core stuffs are initialized, use them */
 		tport = &port->sc_port.state->port;
-		tty = tport->tty;
-	}
-	else {
-		/* Not registered yet - can't pass to tty layer.  */
-		tty = NULL;
 	}
 
 	while (port->sc_ops->sal_input_pending()) {
@@ -518,15 +512,15 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
 #endif /* CONFIG_MAGIC_SYSRQ */
 
 		/* record the character to pass up to the tty layer */
-		if (tty) {
+		if (tport) {
 			if (tty_insert_flip_char(tport, ch, TTY_NORMAL) == 0)
 				break;
 		}
 		port->sc_port.icount.rx++;
 	}
 
-	if (tty)
-		tty_flip_buffer_push(tty);
+	if (tport)
+		tty_flip_buffer_push(tport);
 }
 
 /**
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index defe92b19e16..ba60708053e0 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -181,17 +181,17 @@ static struct sunhv_ops bywrite_ops = {
 
 static struct sunhv_ops *sunhv_ops = &bychar_ops;
 
-static struct tty_struct *receive_chars(struct uart_port *port)
+static struct tty_port *receive_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = NULL;
+	struct tty_port *tport = NULL;
 
 	if (port->state != NULL)		/* Unopened serial console */
-		tty = port->state->port.tty;
+		tport = &port->state->port;
 
 	if (sunhv_ops->receive_chars(port))
 		sun_do_break();
 
-	return tty;
+	return tport;
 }
 
 static void transmit_chars(struct uart_port *port)
@@ -214,16 +214,16 @@ static void transmit_chars(struct uart_port *port)
 static irqreturn_t sunhv_interrupt(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty;
+	struct tty_port *tport;
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
-	tty = receive_chars(port);
+	tport = receive_chars(port);
 	transmit_chars(port);
 	spin_unlock_irqrestore(&port->lock, flags);
 
-	if (tty)
-		tty_flip_buffer_push(tty);
+	if (tport)
+		tty_flip_buffer_push(tport);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c
index 4abc4d43a8e8..8de2213664e0 100644
--- a/drivers/tty/serial/sunsab.c
+++ b/drivers/tty/serial/sunsab.c
@@ -107,22 +107,19 @@ static __inline__ void sunsab_cec_wait(struct uart_sunsab_port *up)
 		udelay(1);
 }
 
-static struct tty_struct *
+static struct tty_port *
 receive_chars(struct uart_sunsab_port *up,
 	      union sab82532_irq_status *stat)
 {
 	struct tty_port *port = NULL;
-	struct tty_struct *tty = NULL;
 	unsigned char buf[32];
 	int saw_console_brk = 0;
 	int free_fifo = 0;
 	int count = 0;
 	int i;
 
-	if (up->port.state != NULL) {		/* Unopened serial console */
+	if (up->port.state != NULL)		/* Unopened serial console */
 		port = &up->port.state->port;
-		tty = port->tty;
-	}
 
 	/* Read number of BYTES (Character + Status) available. */
 	if (stat->sreg.isr0 & SAB82532_ISR0_RPF) {
@@ -139,7 +136,7 @@ receive_chars(struct uart_sunsab_port *up,
 	if (stat->sreg.isr0 & SAB82532_ISR0_TIME) {
 		sunsab_cec_wait(up);
 		writeb(SAB82532_CMDR_RFRD, &up->regs->w.cmdr);
-		return tty;
+		return port;
 	}
 
 	if (stat->sreg.isr0 & SAB82532_ISR0_RFO)
@@ -219,7 +216,7 @@ receive_chars(struct uart_sunsab_port *up,
 	if (saw_console_brk)
 		sun_do_break();
 
-	return tty;
+	return port;
 }
 
 static void sunsab_stop_tx(struct uart_port *);
@@ -302,7 +299,7 @@ static void check_status(struct uart_sunsab_port *up,
 static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
 {
 	struct uart_sunsab_port *up = dev_id;
-	struct tty_struct *tty;
+	struct tty_port *port = NULL;
 	union sab82532_irq_status status;
 	unsigned long flags;
 	unsigned char gis;
@@ -316,12 +313,11 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
 	if (gis & 2)
 		status.sreg.isr1 = readb(&up->regs->r.isr1);
 
-	tty = NULL;
 	if (status.stat) {
 		if ((status.sreg.isr0 & (SAB82532_ISR0_TCD | SAB82532_ISR0_TIME |
 					 SAB82532_ISR0_RFO | SAB82532_ISR0_RPF)) ||
 		    (status.sreg.isr1 & SAB82532_ISR1_BRK))
-			tty = receive_chars(up, &status);
+			port = receive_chars(up, &status);
 		if ((status.sreg.isr0 & SAB82532_ISR0_CDSC) ||
 		    (status.sreg.isr1 & SAB82532_ISR1_CSC))
 			check_status(up, &status);
@@ -331,8 +327,8 @@ static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
 
 	spin_unlock_irqrestore(&up->port.lock, flags);
 
-	if (tty)
-		tty_flip_buffer_push(tty);
+	if (port)
+		tty_flip_buffer_push(port);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c
index 52325968b06c..e343d6670854 100644
--- a/drivers/tty/serial/sunsu.c
+++ b/drivers/tty/serial/sunsu.c
@@ -315,11 +315,10 @@ static void sunsu_enable_ms(struct uart_port *port)
 	spin_unlock_irqrestore(&up->port.lock, flags);
 }
 
-static struct tty_struct *
+static void
 receive_chars(struct uart_sunsu_port *up, unsigned char *status)
 {
 	struct tty_port *port = &up->port.state->port;
-	struct tty_struct *tty = port->tty;
 	unsigned char ch, flag;
 	int max_count = 256;
 	int saw_console_brk = 0;
@@ -391,8 +390,6 @@ receive_chars(struct uart_sunsu_port *up, unsigned char *status)
 
 	if (saw_console_brk)
 		sun_do_break();
-
-	return tty;
 }
 
 static void transmit_chars(struct uart_sunsu_port *up)
@@ -461,20 +458,16 @@ static irqreturn_t sunsu_serial_interrupt(int irq, void *dev_id)
 	spin_lock_irqsave(&up->port.lock, flags);
 
 	do {
-		struct tty_struct *tty;
-
 		status = serial_inp(up, UART_LSR);
-		tty = NULL;
 		if (status & UART_LSR_DR)
-			tty = receive_chars(up, &status);
+			receive_chars(up, &status);
 		check_modem_status(up);
 		if (status & UART_LSR_THRE)
 			transmit_chars(up);
 
 		spin_unlock_irqrestore(&up->port.lock, flags);
 
-		if (tty)
-			tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&up->port.state->port);
 
 		spin_lock_irqsave(&up->port.lock, flags);
 
diff --git a/drivers/tty/serial/sunzilog.c b/drivers/tty/serial/sunzilog.c
index 4a11be3849f6..27669ff3d446 100644
--- a/drivers/tty/serial/sunzilog.c
+++ b/drivers/tty/serial/sunzilog.c
@@ -323,19 +323,15 @@ static void sunzilog_kbdms_receive_chars(struct uart_sunzilog_port *up,
 	}
 }
 
-static struct tty_struct *
+static struct tty_port *
 sunzilog_receive_chars(struct uart_sunzilog_port *up,
 		       struct zilog_channel __iomem *channel)
 {
 	struct tty_port *port = NULL;
-	struct tty_struct *tty;
 	unsigned char ch, r1, flag;
 
-	tty = NULL;
-	if (up->port.state != NULL) {		/* Unopened serial console */
+	if (up->port.state != NULL)		/* Unopened serial console */
 		port = &up->port.state->port;
-		tty = port->tty;		/* mouse => tty is NULL */
-	}
 
 	for (;;) {
 
@@ -403,7 +399,7 @@ sunzilog_receive_chars(struct uart_sunzilog_port *up,
 			tty_insert_flip_char(port, 0, TTY_OVERRUN);
 	}
 
-	return tty;
+	return port;
 }
 
 static void sunzilog_status_handle(struct uart_sunzilog_port *up,
@@ -536,21 +532,21 @@ static irqreturn_t sunzilog_interrupt(int irq, void *dev_id)
 	while (up) {
 		struct zilog_channel __iomem *channel
 			= ZILOG_CHANNEL_FROM_PORT(&up->port);
-		struct tty_struct *tty;
+		struct tty_port *port;
 		unsigned char r3;
 
 		spin_lock(&up->port.lock);
 		r3 = read_zsreg(channel, R3);
 
 		/* Channel A */
-		tty = NULL;
+		port = NULL;
 		if (r3 & (CHAEXT | CHATxIP | CHARxIP)) {
 			writeb(RES_H_IUS, &channel->control);
 			ZSDELAY();
 			ZS_WSYNC(channel);
 
 			if (r3 & CHARxIP)
-				tty = sunzilog_receive_chars(up, channel);
+				port = sunzilog_receive_chars(up, channel);
 			if (r3 & CHAEXT)
 				sunzilog_status_handle(up, channel);
 			if (r3 & CHATxIP)
@@ -558,22 +554,22 @@ static irqreturn_t sunzilog_interrupt(int irq, void *dev_id)
 		}
 		spin_unlock(&up->port.lock);
 
-		if (tty)
-			tty_flip_buffer_push(tty);
+		if (port)
+			tty_flip_buffer_push(port);
 
 		/* Channel B */
 		up = up->next;
 		channel = ZILOG_CHANNEL_FROM_PORT(&up->port);
 
 		spin_lock(&up->port.lock);
-		tty = NULL;
+		port = NULL;
 		if (r3 & (CHBEXT | CHBTxIP | CHBRxIP)) {
 			writeb(RES_H_IUS, &channel->control);
 			ZSDELAY();
 			ZS_WSYNC(channel);
 
 			if (r3 & CHBRxIP)
-				tty = sunzilog_receive_chars(up, channel);
+				port = sunzilog_receive_chars(up, channel);
 			if (r3 & CHBEXT)
 				sunzilog_status_handle(up, channel);
 			if (r3 & CHBTxIP)
@@ -581,8 +577,8 @@ static irqreturn_t sunzilog_interrupt(int irq, void *dev_id)
 		}
 		spin_unlock(&up->port.lock);
 
-		if (tty)
-			tty_flip_buffer_push(tty);
+		if (port)
+			tty_flip_buffer_push(port);
 
 		up = up->next;
 	}
diff --git a/drivers/tty/serial/timbuart.c b/drivers/tty/serial/timbuart.c
index f40c634f7528..6818410a2bea 100644
--- a/drivers/tty/serial/timbuart.c
+++ b/drivers/tty/serial/timbuart.c
@@ -100,7 +100,7 @@ static void timbuart_rx_chars(struct uart_port *port)
 	}
 
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(port->state->port.tty);
+	tty_flip_buffer_push(tport);
 	spin_lock(&port->lock);
 
 	dev_dbg(port->dev, "%s - total read %d bytes\n",
diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index 5caf1f0ebc82..5486505e87c7 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -156,7 +156,7 @@ static irqreturn_t ulite_isr(int irq, void *dev_id)
 
 	/* work done? */
 	if (n > 1) {
-		tty_flip_buffer_push(port->state->port.tty);
+		tty_flip_buffer_push(&port->state->port);
 		return IRQ_HANDLED;
 	} else {
 		return IRQ_NONE;
diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index 7a2378627fa5..7355303dad99 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -470,7 +470,6 @@ static void qe_uart_int_rx(struct uart_qe_port *qe_port)
 	unsigned char ch, *cp;
 	struct uart_port *port = &qe_port->port;
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tport->tty;
 	struct qe_bd *bdp;
 	u16 status;
 	unsigned int flg;
@@ -531,7 +530,7 @@ error_return:
 	qe_port->rx_cur = bdp;
 
 	/* Activate BH processing */
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(tport);
 
 	return;
 
diff --git a/drivers/tty/serial/vr41xx_siu.c b/drivers/tty/serial/vr41xx_siu.c
index 62ee0166bc65..f655997f44af 100644
--- a/drivers/tty/serial/vr41xx_siu.c
+++ b/drivers/tty/serial/vr41xx_siu.c
@@ -313,12 +313,10 @@ static void siu_break_ctl(struct uart_port *port, int ctl)
 
 static inline void receive_chars(struct uart_port *port, uint8_t *status)
 {
-	struct tty_struct *tty;
 	uint8_t lsr, ch;
 	char flag;
 	int max_count = RX_MAX_COUNT;
 
-	tty = port->state->port.tty;
 	lsr = *status;
 
 	do {
@@ -365,7 +363,7 @@ static inline void receive_chars(struct uart_port *port, uint8_t *status)
 		lsr = siu_read(port, UART_LSR);
 	} while ((lsr & UART_LSR_DR) && (max_count-- > 0));
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->state->port);
 
 	*status = lsr;
 }
diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c
index 7f4112423f3d..f1a398c672fa 100644
--- a/drivers/tty/serial/vt8500_serial.c
+++ b/drivers/tty/serial/vt8500_serial.c
@@ -137,15 +137,6 @@ static void vt8500_enable_ms(struct uart_port *port)
 static void handle_rx(struct uart_port *port)
 {
 	struct tty_port *tport = &port->state->port;
-	struct tty_struct *tty = tty_port_tty_get(tport);
-	if (!tty) {
-		/* Discard data: no tty available */
-		int count = (vt8500_read(port, VT8500_URFIDX) & 0x1f00) >> 8;
-		u16 ch;
-		while (count--)
-			ch = readw(port->membase + VT8500_RXFIFO);
-		return;
-	}
 
 	/*
 	 * Handle overrun
@@ -178,8 +169,7 @@ static void handle_rx(struct uart_port *port)
 			tty_insert_flip_char(tport, c, flag);
 	}
 
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(tport);
 }
 
 static void handle_tx(struct uart_port *port)
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index 9ab910370c56..82a3151e393c 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -147,15 +147,11 @@
 static irqreturn_t xuartps_isr(int irq, void *dev_id)
 {
 	struct uart_port *port = (struct uart_port *)dev_id;
-	struct tty_struct *tty;
 	unsigned long flags;
 	unsigned int isrstatus, numbytes;
 	unsigned int data;
 	char status = TTY_NORMAL;
 
-	/* Get the tty which could be NULL so don't assume it's valid */
-	tty = tty_port_tty_get(&port->state->port);
-
 	spin_lock_irqsave(&port->lock, flags);
 
 	/* Read the interrupt status register to determine which
@@ -187,14 +183,11 @@ static irqreturn_t xuartps_isr(int irq, void *dev_id)
 			} else if (isrstatus & XUARTPS_IXR_OVERRUN)
 				port->icount.overrun++;
 
-			if (tty)
-				uart_insert_char(port, isrstatus,
-						XUARTPS_IXR_OVERRUN, data,
-						status);
+			uart_insert_char(port, isrstatus, XUARTPS_IXR_OVERRUN,
+					data, status);
 		}
 		spin_unlock(&port->lock);
-		if (tty)
-			tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->state->port);
 		spin_lock(&port->lock);
 	}
 
@@ -237,7 +230,6 @@ static irqreturn_t xuartps_isr(int irq, void *dev_id)
 
 	/* be sure to release the lock and tty before leaving */
 	spin_unlock_irqrestore(&port->lock, flags);
-	tty_kref_put(tty);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c
index 92c00b24d0df..6a169877109b 100644
--- a/drivers/tty/serial/zs.c
+++ b/drivers/tty/serial/zs.c
@@ -603,7 +603,7 @@ static void zs_receive_chars(struct zs_port *zport)
 		uart_insert_char(uport, status, Rx_OVR, ch, flag);
 	}
 
-	tty_flip_buffer_push(uport->state->port.tty);
+	tty_flip_buffer_push(&uport->state->port);
 }
 
 static void zs_raw_transmit_chars(struct zs_port *zport)
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 2f6967d61a80..555fdc0ed0f1 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -1439,7 +1439,6 @@ static void mgsl_isr_receive_data( struct mgsl_struct *info )
 	u16 status;
 	int work = 0;
 	unsigned char DataByte;
- 	struct tty_struct *tty = info->port.tty;
  	struct	mgsl_icount *icount = &info->icount;
 	
 	if ( debug_level >= DEBUG_LEVEL_ISR )	
@@ -1501,7 +1500,7 @@ static void mgsl_isr_receive_data( struct mgsl_struct *info )
 			if (status & RXSTATUS_BREAK_RECEIVED) {
 				flag = TTY_BREAK;
 				if (info->port.flags & ASYNC_SAK)
-					do_SAK(tty);
+					do_SAK(info->port.tty);
 			} else if (status & RXSTATUS_PARITY_ERROR)
 				flag = TTY_PARITY;
 			else if (status & RXSTATUS_FRAMING_ERROR)
@@ -1524,7 +1523,7 @@ static void mgsl_isr_receive_data( struct mgsl_struct *info )
 	}
 			
 	if(work)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&info->port);
 }
 
 /* mgsl_isr_misc()
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 9a0358a1e0dd..fced6acc74ee 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1854,7 +1854,6 @@ static void hdlcdev_exit(struct slgt_info *info)
  */
 static void rx_async(struct slgt_info *info)
 {
- 	struct tty_struct *tty = info->port.tty;
  	struct mgsl_icount *icount = &info->icount;
 	unsigned int start, end;
 	unsigned char *p;
@@ -1915,8 +1914,8 @@ static void rx_async(struct slgt_info *info)
 			break;
 	}
 
-	if (tty && chars)
-		tty_flip_buffer_push(tty);
+	if (chars)
+		tty_flip_buffer_push(&info->port);
 }
 
 /*
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 419f58ff4a13..e4a2904af565 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -2167,7 +2167,6 @@ static void isr_rxrdy(SLMP_INFO * info)
 {
 	u16 status;
 	unsigned char DataByte;
- 	struct tty_struct *tty = info->port.tty;
  	struct	mgsl_icount *icount = &info->icount;
 
 	if ( debug_level >= DEBUG_LEVEL_ISR )
@@ -2225,8 +2224,7 @@ static void isr_rxrdy(SLMP_INFO * info)
 			icount->frame,icount->overrun);
 	}
 
-	if ( tty )
-		tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&info->port);
 }
 
 static void isr_txeom(SLMP_INFO * info, unsigned char status)
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 1bfe97a8e2eb..b6efacadf23b 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -544,7 +544,7 @@ void tty_flush_to_ldisc(struct tty_struct *tty)
 
 /**
  *	tty_flip_buffer_push	-	terminal
- *	@tty: tty to push
+ *	@port: tty port to push
  *
  *	Queue a push of the terminal flip buffers to the line discipline. This
  *	function must not be called from IRQ context if port->low_latency is
@@ -556,9 +556,9 @@ void tty_flush_to_ldisc(struct tty_struct *tty)
  *	Locking: tty buffer lock. Driver locks in low latency mode.
  */
 
-void tty_flip_buffer_push(struct tty_struct *tty)
+void tty_flip_buffer_push(struct tty_port *port)
 {
-	struct tty_bufhead *buf = &tty->port->buf;
+	struct tty_bufhead *buf = &port->buf;
 	unsigned long flags;
 
 	spin_lock_irqsave(&buf->lock, flags);
@@ -566,7 +566,7 @@ void tty_flip_buffer_push(struct tty_struct *tty)
 		buf->tail->commit = buf->tail->used;
 	spin_unlock_irqrestore(&buf->lock, flags);
 
-	if (tty->port->low_latency)
+	if (port->low_latency)
 		flush_to_ldisc(&buf->work);
 	else
 		schedule_work(&buf->work);
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 20dc2add27ba..15b36e2efa85 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -410,20 +410,12 @@ static int acm_submit_read_urbs(struct acm *acm, gfp_t mem_flags)
 
 static void acm_process_read_urb(struct acm *acm, struct urb *urb)
 {
-	struct tty_struct *tty;
-
 	if (!urb->actual_length)
 		return;
 
-	tty = tty_port_tty_get(&acm->port);
-	if (!tty)
-		return;
-
 	tty_insert_flip_string(&acm->port, urb->transfer_buffer,
 			urb->actual_length);
-	tty_flip_buffer_push(tty);
-
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&acm->port);
 }
 
 static void acm_read_bulk_callback(struct urb *urb)
diff --git a/drivers/usb/gadget/u_serial.c b/drivers/usb/gadget/u_serial.c
index 3560799d530a..ca4fc3d3e7ff 100644
--- a/drivers/usb/gadget/u_serial.c
+++ b/drivers/usb/gadget/u_serial.c
@@ -551,8 +551,8 @@ static void gs_rx_push(unsigned long _port)
 	/* Push from tty to ldisc; without low_latency set this is handled by
 	 * a workqueue, so we won't get callbacks and can hold port_lock
 	 */
-	if (tty && do_push)
-		tty_flip_buffer_push(tty);
+	if (do_push)
+		tty_flip_buffer_push(&port->port);
 
 
 	/* We want our data queue to become empty ASAP, keeping data
diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c
index 3bb1f8f11fc8..6e320cec397d 100644
--- a/drivers/usb/serial/aircable.c
+++ b/drivers/usb/serial/aircable.c
@@ -140,16 +140,11 @@ static void aircable_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	char *data = (char *)urb->transfer_buffer;
-	struct tty_struct *tty;
 	int has_headers;
 	int count;
 	int len;
 	int i;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	has_headers = (urb->actual_length > 2 && data[0] == RX_HEADER_0);
 
 	count = 0;
@@ -160,8 +155,7 @@ static void aircable_process_read_urb(struct urb *urb)
 	}
 
 	if (count)
-		tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+		tty_flip_buffer_push(&port->port);
 }
 
 static struct usb_serial_driver aircable_device = {
diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index 1614feb6a76e..cbd904b8fba5 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -674,7 +674,6 @@ static void ark3116_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct ark3116_private *priv = usb_get_serial_port_data(port);
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	char tty_flag = TTY_NORMAL;
 	unsigned long flags;
@@ -689,10 +688,6 @@ static void ark3116_process_read_urb(struct urb *urb)
 	if (!urb->actual_length)
 		return;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	if (lsr & UART_LSR_BRK_ERROR_BITS) {
 		if (lsr & UART_LSR_BI)
 			tty_flag = TTY_BREAK;
@@ -707,8 +702,7 @@ static void ark3116_process_read_urb(struct urb *urb)
 	}
 	tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static struct usb_serial_driver ark3116_device = {
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index 7ba2c0bdcec9..84217e78ded4 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -242,7 +242,6 @@ static void belkin_sa_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct belkin_sa_private *priv = usb_get_serial_port_data(port);
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	unsigned long flags;
 	unsigned char status;
@@ -259,10 +258,6 @@ static void belkin_sa_process_read_urb(struct urb *urb)
 	if (!urb->actual_length)
 		return;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	if (status & BELKIN_SA_LSR_ERR) {
 		/* Break takes precedence over parity, which takes precedence
 		 * over framing errors. */
@@ -281,8 +276,7 @@ static void belkin_sa_process_read_urb(struct urb *urb)
 
 	tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static void belkin_sa_set_termios(struct tty_struct *tty,
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index e6976a974472..629bd2894506 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -324,7 +324,6 @@ static void cyberjack_read_bulk_callback(struct urb *urb)
 	struct usb_serial_port *port = urb->context;
 	struct cyberjack_private *priv = usb_get_serial_port_data(port);
 	struct device *dev = &port->dev;
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	short todo;
 	int result;
@@ -337,16 +336,10 @@ static void cyberjack_read_bulk_callback(struct urb *urb)
 		return;
 	}
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty) {
-		dev_dbg(dev, "%s - ignoring since device not open\n", __func__);
-		return;
-	}
 	if (urb->actual_length) {
 		tty_insert_flip_string(&port->port, data, urb->actual_length);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 
 	spin_lock(&priv->lock);
 
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index ac14e3eb95ea..8efa19d0e9fb 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -1214,10 +1214,10 @@ static void cypress_read_int_callback(struct urb *urb)
 		spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* process read if there is data other than line status */
-	if (tty && bytes > i) {
+	if (bytes > i) {
 		tty_insert_flip_string_fixed_flag(&port->port, data + i,
 				tty_flag, bytes - i);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
 
 	spin_lock_irqsave(&priv->lock, flags);
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index b5fa738512ca..ebe45fa0ed50 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -1399,9 +1399,7 @@ static void digi_read_bulk_callback(struct urb *urb)
 
 static int digi_read_inb_callback(struct urb *urb)
 {
-
 	struct usb_serial_port *port = urb->context;
-	struct tty_struct *tty;
 	struct digi_port *priv = usb_get_serial_port_data(port);
 	int opcode = ((unsigned char *)urb->transfer_buffer)[0];
 	int len = ((unsigned char *)urb->transfer_buffer)[1];
@@ -1425,7 +1423,6 @@ static int digi_read_inb_callback(struct urb *urb)
 		return -1;
 	}
 
-	tty = tty_port_tty_get(&port->port);
 	spin_lock(&priv->dp_port_lock);
 
 	/* check for throttle; if set, do not resubmit read urb */
@@ -1435,7 +1432,7 @@ static int digi_read_inb_callback(struct urb *urb)
 		priv->dp_throttle_restart = 1;
 
 	/* receive data */
-	if (tty && opcode == DIGI_CMD_RECEIVE_DATA) {
+	if (opcode == DIGI_CMD_RECEIVE_DATA) {
 		/* get flag from port_status */
 		flag = 0;
 
@@ -1457,11 +1454,10 @@ static int digi_read_inb_callback(struct urb *urb)
 		if (len > 0) {
 			tty_insert_flip_string_fixed_flag(&port->port, data,
 					flag, len);
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(&port->port);
 		}
 	}
 	spin_unlock(&priv->dp_port_lock);
-	tty_kref_put(tty);
 
 	if (opcode == DIGI_CMD_RECEIVE_DISABLE)
 		dev_dbg(&port->dev, "%s: got RECEIVE_DISABLE\n", __func__);
diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 6b880c33d258..b1b2dc64b50b 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -100,7 +100,6 @@ static void f81232_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct f81232_private *priv = usb_get_serial_port_data(port);
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	char tty_flag = TTY_NORMAL;
 	unsigned long flags;
@@ -117,10 +116,6 @@ static void f81232_process_read_urb(struct urb *urb)
 	if (!urb->actual_length)
 		return;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	/* break takes precedence over parity, */
 	/* which takes precedence over framing errors */
 	if (line_status & UART_BREAK_ERROR)
@@ -145,8 +140,7 @@ static void f81232_process_read_urb(struct urb *urb)
 							urb->actual_length);
 	}
 
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static int set_control_lines(struct usb_device *dev, u8 value)
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index eb59ba3789ad..a96083b7fabc 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -2040,25 +2040,19 @@ static int ftdi_process_packet(struct usb_serial_port *port,
 static void ftdi_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
-	struct tty_struct *tty;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
 	char *data = (char *)urb->transfer_buffer;
 	int i;
 	int len;
 	int count = 0;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	for (i = 0; i < urb->actual_length; i += priv->max_packet_size) {
 		len = min_t(int, urb->actual_length - i, priv->max_packet_size);
 		count += ftdi_process_packet(port, priv, &data[i], len);
 	}
 
 	if (count)
-		tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+		tty_flip_buffer_push(&port->port);
 }
 
 static void ftdi_break_ctl(struct tty_struct *tty, int break_state)
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 498b5f0da639..1a07b12ef341 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -252,14 +252,11 @@ static inline int isAbortTrfCmnd(const unsigned char *buf)
 static void send_to_tty(struct usb_serial_port *port,
 			char *data, unsigned int actual_length)
 {
-	struct tty_struct *tty = tty_port_tty_get(&port->port);
-
-	if (tty && actual_length) {
+	if (actual_length) {
 		usb_serial_debug_data(&port->dev, __func__, actual_length, data);
 		tty_insert_flip_string(&port->port, data, actual_length);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 }
 
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 3780f6a501b3..4c5c23f1cae5 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -313,17 +313,12 @@ EXPORT_SYMBOL_GPL(usb_serial_generic_submit_read_urbs);
 void usb_serial_generic_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
-	struct tty_struct *tty;
 	char *ch = (char *)urb->transfer_buffer;
 	int i;
 
 	if (!urb->actual_length)
 		return;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	/* The per character mucking around with sysrq path it too slow for
 	   stuff like 3G modems, so shortcircuit it in the 99.9999999% of cases
 	   where the USB serial is not a console anyway */
@@ -335,8 +330,7 @@ void usb_serial_generic_process_read_urb(struct urb *urb)
 				tty_insert_flip_char(&port->port, *ch, TTY_NORMAL);
 		}
 	}
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_process_read_urb);
 
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index f96b91da964f..b00e5cbf741f 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -232,8 +232,8 @@ static void  process_rcvd_data(struct edgeport_serial *edge_serial,
 				unsigned char *buffer, __u16 bufferLength);
 static void process_rcvd_status(struct edgeport_serial *edge_serial,
 				__u8 byte2, __u8 byte3);
-static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
-				unsigned char *data, int length);
+static void edge_tty_recv(struct usb_serial_port *port, unsigned char *data,
+		int length);
 static void handle_new_msr(struct edgeport_port *edge_port, __u8 newMsr);
 static void handle_new_lsr(struct edgeport_port *edge_port, __u8 lsrData,
 				__u8 lsr, __u8 data);
@@ -1752,7 +1752,6 @@ static void process_rcvd_data(struct edgeport_serial *edge_serial,
 	struct device *dev = &edge_serial->serial->dev->dev;
 	struct usb_serial_port *port;
 	struct edgeport_port *edge_port;
-	struct tty_struct *tty;
 	__u16 lastBufferLength;
 	__u16 rxLen;
 
@@ -1860,14 +1859,11 @@ static void process_rcvd_data(struct edgeport_serial *edge_serial,
 							edge_serial->rxPort];
 				edge_port = usb_get_serial_port_data(port);
 				if (edge_port->open) {
-					tty = tty_port_tty_get(
-						&edge_port->port->port);
-					if (tty) {
-						dev_dbg(dev, "%s - Sending %d bytes to TTY for port %d\n",
-							__func__, rxLen, edge_serial->rxPort);
-						edge_tty_recv(edge_port->port, tty, buffer, rxLen);
-						tty_kref_put(tty);
-					}
+					dev_dbg(dev, "%s - Sending %d bytes to TTY for port %d\n",
+						__func__, rxLen,
+						edge_serial->rxPort);
+					edge_tty_recv(edge_port->port, buffer,
+							rxLen);
 					edge_port->icount.rx += rxLen;
 				}
 				buffer += rxLen;
@@ -2017,8 +2013,8 @@ static void process_rcvd_status(struct edgeport_serial *edge_serial,
  * edge_tty_recv
  *	this function passes data on to the tty flip buffer
  *****************************************************************************/
-static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
-					unsigned char *data, int length)
+static void edge_tty_recv(struct usb_serial_port *port, unsigned char *data,
+		int length)
 {
 	int cnt;
 
@@ -2030,7 +2026,7 @@ static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
 	data += cnt;
 	length -= cnt;
 
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 
@@ -2086,14 +2082,9 @@ static void handle_new_lsr(struct edgeport_port *edge_port, __u8 lsrData,
 	}
 
 	/* Place LSR data byte into Rx buffer */
-	if (lsrData) {
-		struct tty_struct *tty =
-				tty_port_tty_get(&edge_port->port->port);
-		if (tty) {
-			edge_tty_recv(edge_port->port, tty, &data, 1);
-			tty_kref_put(tty);
-		}
-	}
+	if (lsrData)
+		edge_tty_recv(edge_port->port, &data, 1);
+
 	/* update input line counters */
 	icount = &edge_port->icount;
 	if (newLsr & LSR_BREAK)
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 1286a0b2e2b7..d6485be49ebf 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -201,8 +201,8 @@ static int closing_wait = EDGE_CLOSING_WAIT;
 static bool ignore_cpu_rev;
 static int default_uart_mode;		/* RS232 */
 
-static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
-			  unsigned char *data, int length);
+static void edge_tty_recv(struct usb_serial_port *port, unsigned char *data,
+		int length);
 
 static void stop_read(struct edgeport_port *edge_port);
 static int restart_read(struct edgeport_port *edge_port);
@@ -1540,7 +1540,6 @@ static void handle_new_lsr(struct edgeport_port *edge_port, int lsr_data,
 	struct async_icount *icount;
 	__u8 new_lsr = (__u8)(lsr & (__u8)(LSR_OVER_ERR | LSR_PAR_ERR |
 						LSR_FRM_ERR | LSR_BREAK));
-	struct tty_struct *tty;
 
 	dev_dbg(&edge_port->port->dev, "%s - %02x\n", __func__, new_lsr);
 
@@ -1554,13 +1553,8 @@ static void handle_new_lsr(struct edgeport_port *edge_port, int lsr_data,
 		new_lsr &= (__u8)(LSR_OVER_ERR | LSR_BREAK);
 
 	/* Place LSR data byte into Rx buffer */
-	if (lsr_data) {
-		tty = tty_port_tty_get(&edge_port->port->port);
-		if (tty) {
-			edge_tty_recv(edge_port->port, tty, &data, 1);
-			tty_kref_put(tty);
-		}
-	}
+	if (lsr_data)
+		edge_tty_recv(edge_port->port, &data, 1);
 
 	/* update input line counters */
 	icount = &edge_port->icount;
@@ -1676,7 +1670,6 @@ static void edge_bulk_in_callback(struct urb *urb)
 	struct edgeport_port *edge_port = urb->context;
 	struct device *dev = &edge_port->port->dev;
 	unsigned char *data = urb->transfer_buffer;
-	struct tty_struct *tty;
 	int retval = 0;
 	int port_number;
 	int status = urb->status;
@@ -1715,18 +1708,16 @@ static void edge_bulk_in_callback(struct urb *urb)
 		++data;
 	}
 
-	tty = tty_port_tty_get(&edge_port->port->port);
-	if (tty && urb->actual_length) {
+	if (urb->actual_length) {
 		usb_serial_debug_data(dev, __func__, urb->actual_length, data);
 		if (edge_port->close_pending)
 			dev_dbg(dev, "%s - close pending, dropping data on the floor\n",
 								__func__);
 		else
-			edge_tty_recv(edge_port->port, tty, data,
+			edge_tty_recv(edge_port->port, data,
 					urb->actual_length);
 		edge_port->icount.rx += urb->actual_length;
 	}
-	tty_kref_put(tty);
 
 exit:
 	/* continue read unless stopped */
@@ -1741,8 +1732,8 @@ exit:
 		dev_err(dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval);
 }
 
-static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
-					unsigned char *data, int length)
+static void edge_tty_recv(struct usb_serial_port *port, unsigned char *data,
+		int length)
 {
 	int queued;
 
@@ -1750,7 +1741,7 @@ static void edge_tty_recv(struct usb_serial_port *port, struct tty_struct *tty,
 	if (queued < length)
 		dev_err(&port->dev, "%s - dropping data, %d bytes lost\n",
 			__func__, length - queued);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static void edge_bulk_out_callback(struct urb *urb)
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 171dae1f4a62..716930ab1bb1 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -287,7 +287,6 @@ static void ir_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	unsigned char *data = urb->transfer_buffer;
-	struct tty_struct *tty;
 
 	if (!urb->actual_length)
 		return;
@@ -302,12 +301,8 @@ static void ir_process_read_urb(struct urb *urb)
 	if (urb->actual_length == 1)
 		return;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
 	tty_insert_flip_string(&port->port, data + 1, urb->actual_length - 1);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static void ir_set_termios_callback(struct urb *urb)
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index dd0d910730c7..ff77027160aa 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -581,7 +581,6 @@ static void read_buf_callback(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	unsigned char *data = urb->transfer_buffer;
-	struct tty_struct *tty;
 	int status = urb->status;
 
 	if (status) {
@@ -592,14 +591,12 @@ static void read_buf_callback(struct urb *urb)
 	}
 
 	dev_dbg(&port->dev, "%s - %i chars to write\n", __func__, urb->actual_length);
-	tty = tty_port_tty_get(&port->port);
 	if (data == NULL)
 		dev_dbg(&port->dev, "%s - data is NULL !!!\n", __func__);
-	if (tty && urb->actual_length && data) {
+	if (urb->actual_length && data) {
 		tty_insert_flip_string(&port->port, data, urb->actual_length);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 	iuu_led_activity_on(urb);
 }
 
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 14a219ba4ee6..f6d7f68fa43c 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -291,7 +291,6 @@ static void	usa26_indat_callback(struct urb *urb)
 	int			i, err;
 	int			endpoint;
 	struct usb_serial_port	*port;
-	struct tty_struct	*tty;
 	unsigned char 		*data = urb->transfer_buffer;
 	int status = urb->status;
 
@@ -304,8 +303,7 @@ static void	usa26_indat_callback(struct urb *urb)
 	}
 
 	port =  urb->context;
-	tty = tty_port_tty_get(&port->port);
-	if (tty && urb->actual_length) {
+	if (urb->actual_length) {
 		/* 0x80 bit is error flag */
 		if ((data[0] & 0x80) == 0) {
 			/* no errors on individual bytes, only
@@ -332,9 +330,8 @@ static void	usa26_indat_callback(struct urb *urb)
 						flag);
 			}
 		}
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 
 	/* Resubmit urb so we continue receiving */
 	err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -447,7 +444,6 @@ static void usa28_indat_callback(struct urb *urb)
 {
 	int                     err;
 	struct usb_serial_port  *port;
-	struct tty_struct       *tty;
 	unsigned char           *data;
 	struct keyspan_port_private             *p_priv;
 	int status = urb->status;
@@ -470,13 +466,11 @@ static void usa28_indat_callback(struct urb *urb)
 		p_priv = usb_get_serial_port_data(port);
 		data = urb->transfer_buffer;
 
-		tty = tty_port_tty_get(&port->port);
-		if (tty && urb->actual_length) {
+		if (urb->actual_length) {
 			tty_insert_flip_string(&port->port, data,
 					urb->actual_length);
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(&port->port);
 		}
-		tty_kref_put(tty);
 
 		/* Resubmit urb so we continue receiving */
 		err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -671,7 +665,6 @@ static void	usa49_indat_callback(struct urb *urb)
 	int			i, err;
 	int			endpoint;
 	struct usb_serial_port	*port;
-	struct tty_struct	*tty;
 	unsigned char 		*data = urb->transfer_buffer;
 	int status = urb->status;
 
@@ -684,8 +677,7 @@ static void	usa49_indat_callback(struct urb *urb)
 	}
 
 	port =  urb->context;
-	tty = tty_port_tty_get(&port->port);
-	if (tty && urb->actual_length) {
+	if (urb->actual_length) {
 		/* 0x80 bit is error flag */
 		if ((data[0] & 0x80) == 0) {
 			/* no error on any byte */
@@ -706,9 +698,8 @@ static void	usa49_indat_callback(struct urb *urb)
 						flag);
 			}
 		}
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 
 	/* Resubmit urb so we continue receiving */
 	err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -721,7 +712,6 @@ static void usa49wg_indat_callback(struct urb *urb)
 	int			i, len, x, err;
 	struct usb_serial	*serial;
 	struct usb_serial_port	*port;
-	struct tty_struct	*tty;
 	unsigned char 		*data = urb->transfer_buffer;
 	int status = urb->status;
 
@@ -746,7 +736,6 @@ static void usa49wg_indat_callback(struct urb *urb)
 				return;
 			}
 			port = serial->port[data[i++]];
-			tty = tty_port_tty_get(&port->port);
 			len = data[i++];
 
 			/* 0x80 bit is error flag */
@@ -774,8 +763,7 @@ static void usa49wg_indat_callback(struct urb *urb)
 					i += 2;
 				}
 			}
-			tty_flip_buffer_push(tty);
-			tty_kref_put(tty);
+			tty_flip_buffer_push(&port->port);
 		}
 	}
 
@@ -796,7 +784,6 @@ static void usa90_indat_callback(struct urb *urb)
 	int			endpoint;
 	struct usb_serial_port	*port;
 	struct keyspan_port_private	 	*p_priv;
-	struct tty_struct	*tty;
 	unsigned char 		*data = urb->transfer_buffer;
 	int status = urb->status;
 
@@ -812,7 +799,6 @@ static void usa90_indat_callback(struct urb *urb)
 	p_priv = usb_get_serial_port_data(port);
 
 	if (urb->actual_length) {
-		tty = tty_port_tty_get(&port->port);
 		/* if current mode is DMA, looks like usa28 format
 		   otherwise looks like usa26 data format */
 
@@ -848,8 +834,7 @@ static void usa90_indat_callback(struct urb *urb)
 				}
 			}
 		}
-		tty_flip_buffer_push(tty);
-		tty_kref_put(tty);
+		tty_flip_buffer_push(&port->port);
 	}
 
 	/* Resubmit urb so we continue receiving */
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index 334b1a295c6b..3b17d5d13dc8 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -138,7 +138,6 @@ static void keyspan_pda_request_unthrottle(struct work_struct *work)
 static void keyspan_pda_rx_interrupt(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	int retval;
 	int status = urb->status;
@@ -163,14 +162,12 @@ static void keyspan_pda_rx_interrupt(struct urb *urb)
 	/* see if the message is data or a status interrupt */
 	switch (data[0]) {
 	case 0:
-		tty = tty_port_tty_get(&port->port);
 		 /* rest of message is rx data */
-		if (tty && urb->actual_length) {
+		if (urb->actual_length) {
 			tty_insert_flip_string(&port->port, data + 1,
 						urb->actual_length - 1);
-			tty_flip_buffer_push(tty);
+			tty_flip_buffer_push(&port->port);
 		}
-		tty_kref_put(tty);
 		break;
 	case 1:
 		/* status interrupt */
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index 8ee0825ad700..769d910ae0a5 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -389,7 +389,6 @@ static void klsi_105_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	unsigned char *data = urb->transfer_buffer;
-	struct tty_struct *tty;
 	unsigned len;
 
 	/* empty urbs seem to happen, we ignore them */
@@ -401,10 +400,6 @@ static void klsi_105_process_read_urb(struct urb *urb)
 		return;
 	}
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	len = get_unaligned_le16(data);
 	if (len > urb->actual_length - KLSI_HDR_LEN) {
 		dev_dbg(&port->dev, "%s - packet length mismatch\n", __func__);
@@ -412,8 +407,7 @@ static void klsi_105_process_read_urb(struct urb *urb)
 	}
 
 	tty_insert_flip_string(&port->port, data + KLSI_HDR_LEN, len);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static void klsi_105_set_termios(struct tty_struct *tty,
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index 135c8b4b26f7..903d938e174b 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -324,7 +324,6 @@ static void kobil_read_int_callback(struct urb *urb)
 {
 	int result;
 	struct usb_serial_port *port = urb->context;
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	int status = urb->status;
 
@@ -333,8 +332,7 @@ static void kobil_read_int_callback(struct urb *urb)
 		return;
 	}
 
-	tty = tty_port_tty_get(&port->port);
-	if (tty && urb->actual_length) {
+	if (urb->actual_length) {
 
 		/* BEGIN DEBUG */
 		/*
@@ -354,9 +352,8 @@ static void kobil_read_int_callback(struct urb *urb)
 		/* END DEBUG */
 
 		tty_insert_flip_string(&port->port, data, urb->actual_length);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 
 	result = usb_submit_urb(port->interrupt_in_urb, GFP_ATOMIC);
 	dev_dbg(&port->dev, "%s - Send read URB returns: %i\n", __func__, result);
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index ba20bb037b28..f42528e05d7b 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -531,7 +531,6 @@ static void mct_u232_read_int_callback(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct mct_u232_private *priv = usb_get_serial_port_data(port);
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	int retval;
 	int status = urb->status;
@@ -561,13 +560,9 @@ static void mct_u232_read_int_callback(struct urb *urb)
 	 */
 	if (urb->transfer_buffer_length > 2) {
 		if (urb->actual_length) {
-			tty = tty_port_tty_get(&port->port);
-			if (tty) {
-				tty_insert_flip_string(&port->port, data,
-						urb->actual_length);
-				tty_flip_buffer_push(tty);
-			}
-			tty_kref_put(tty);
+			tty_insert_flip_string(&port->port, data,
+					urb->actual_length);
+			tty_flip_buffer_push(&port->port);
 		}
 		goto exit;
 	}
diff --git a/drivers/usb/serial/metro-usb.c b/drivers/usb/serial/metro-usb.c
index 6264f3974ea7..bf3c7a23553e 100644
--- a/drivers/usb/serial/metro-usb.c
+++ b/drivers/usb/serial/metro-usb.c
@@ -95,7 +95,6 @@ static void metrousb_read_int_callback(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct metrousb_private *metro_priv = usb_get_serial_port_data(port);
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	int throttled = 0;
 	int result = 0;
@@ -124,15 +123,13 @@ static void metrousb_read_int_callback(struct urb *urb)
 
 
 	/* Set the data read from the usb port into the serial port buffer. */
-	tty = tty_port_tty_get(&port->port);
-	if (tty && urb->actual_length) {
+	if (urb->actual_length) {
 		/* Loop through the data copying each byte to the tty layer. */
 		tty_insert_flip_string(&port->port, data, urb->actual_length);
 
 		/* Force the data to the tty layer. */
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 
 	/* Set any port variables. */
 	spin_lock_irqsave(&metro_priv->lock, flags);
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 22818fb765e0..e0ebec3b5d6a 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -899,7 +899,6 @@ static void mos7720_bulk_in_callback(struct urb *urb)
 	int retval;
 	unsigned char *data ;
 	struct usb_serial_port *port;
-	struct tty_struct *tty;
 	int status = urb->status;
 
 	if (status) {
@@ -913,12 +912,10 @@ static void mos7720_bulk_in_callback(struct urb *urb)
 
 	data = urb->transfer_buffer;
 
-	tty = tty_port_tty_get(&port->port);
-	if (tty && urb->actual_length) {
+	if (urb->actual_length) {
 		tty_insert_flip_string(&port->port, data, urb->actual_length);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 
 	if (port->read_urb->status != -EINPROGRESS) {
 		retval = usb_submit_urb(port->read_urb, GFP_ATOMIC);
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 3ddd7a1f7ff3..809fb329eca5 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -744,7 +744,6 @@ static void mos7840_bulk_in_callback(struct urb *urb)
 	struct usb_serial *serial;
 	struct usb_serial_port *port;
 	struct moschip_port *mos7840_port;
-	struct tty_struct *tty;
 	int status = urb->status;
 
 	mos7840_port = urb->context;
@@ -774,12 +773,8 @@ static void mos7840_bulk_in_callback(struct urb *urb)
 
 	if (urb->actual_length) {
 		struct tty_port *tport = &mos7840_port->port->port;
-		tty = tty_port_tty_get(tport);
-		if (tty) {
-			tty_insert_flip_string(tport, data, urb->actual_length);
-			tty_flip_buffer_push(tty);
-			tty_kref_put(tty);
-		}
+		tty_insert_flip_string(tport, data, urb->actual_length);
+		tty_flip_buffer_push(tport);
 		mos7840_port->icount.rx += urb->actual_length;
 		smp_wmb();
 		dev_dbg(&port->dev, "mos7840_port->icount.rx is %d:\n", mos7840_port->icount.rx);
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index 0d96a1a7b9e5..38725fc8c2c8 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -32,7 +32,6 @@ static void navman_read_int_callback(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	unsigned char *data = urb->transfer_buffer;
-	struct tty_struct *tty;
 	int status = urb->status;
 	int result;
 
@@ -55,12 +54,10 @@ static void navman_read_int_callback(struct urb *urb)
 
 	usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data);
 
-	tty = tty_port_tty_get(&port->port);
-	if (tty && urb->actual_length) {
+	if (urb->actual_length) {
 		tty_insert_flip_string(&port->port, data, urb->actual_length);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 
 exit:
 	result = usb_submit_urb(urb, GFP_ATOMIC);
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 338191bae5a3..1e1cafe287e4 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -174,14 +174,9 @@ static void omninet_read_bulk_callback(struct urb *urb)
 	}
 
 	if (urb->actual_length && header->oh_len) {
-		struct tty_struct *tty = tty_port_tty_get(&port->port);
-		if (tty) {
-			tty_insert_flip_string(&port->port,
-					data + OMNINET_DATAOFFSET,
-					header->oh_len);
-			tty_flip_buffer_push(tty);
-			tty_kref_put(tty);
-		}
+		tty_insert_flip_string(&port->port, data + OMNINET_DATAOFFSET,
+				header->oh_len);
+		tty_flip_buffer_push(&port->port);
 	}
 
 	/* Continue trying to always read  */
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index d3b74e50aff1..e13e1a4d3e1e 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -51,15 +51,8 @@ struct opticon_private {
 static void opticon_process_data_packet(struct usb_serial_port *port,
 					const unsigned char *buf, size_t len)
 {
-	struct tty_struct *tty;
-
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	tty_insert_flip_string(&port->port, buf, len);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static void opticon_process_status_packet(struct usb_serial_port *port,
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index 7a53fe9f3af3..a958fd41b5b3 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -820,7 +820,6 @@ static void oti6858_read_bulk_callback(struct urb *urb)
 {
 	struct usb_serial_port *port =  urb->context;
 	struct oti6858_private *priv = usb_get_serial_port_data(port);
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	unsigned long flags;
 	int status = urb->status;
@@ -835,12 +834,10 @@ static void oti6858_read_bulk_callback(struct urb *urb)
 		return;
 	}
 
-	tty = tty_port_tty_get(&port->port);
-	if (tty != NULL && urb->actual_length > 0) {
+	if (urb->actual_length > 0) {
 		tty_insert_flip_string(&port->port, data, urb->actual_length);
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 	}
-	tty_kref_put(tty);
 
 	/* schedule the interrupt urb */
 	result = usb_submit_urb(port->interrupt_in_urb, GFP_ATOMIC);
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 00047f3c7293..54adc9125e5c 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -772,7 +772,6 @@ static void pl2303_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct pl2303_private *priv = usb_get_serial_port_data(port);
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	char tty_flag = TTY_NORMAL;
 	unsigned long flags;
@@ -789,10 +788,6 @@ static void pl2303_process_read_urb(struct urb *urb)
 	if (!urb->actual_length)
 		return;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	/* break takes precedence over parity, */
 	/* which takes precedence over framing errors */
 	if (line_status & UART_BREAK_ERROR)
@@ -817,8 +812,7 @@ static void pl2303_process_read_urb(struct urb *urb)
 							urb->actual_length);
 	}
 
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 /* All of the device info needed for the PL2303 SIO serial converter */
diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index 5dccc4f957df..6850745808c3 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -609,7 +609,6 @@ void qt2_process_read_urb(struct urb *urb)
 	struct qt2_serial_private *serial_priv;
 	struct usb_serial_port *port;
 	struct qt2_port_private *port_priv;
-	struct tty_struct *tty;
 	bool escapeflag;
 	unsigned char *ch;
 	int i;
@@ -620,15 +619,11 @@ void qt2_process_read_urb(struct urb *urb)
 		return;
 
 	ch = urb->transfer_buffer;
-	tty = NULL;
 	serial = urb->context;
 	serial_priv = usb_get_serial_data(serial);
 	port = serial->port[serial_priv->current_port];
 	port_priv = usb_get_serial_port_data(port);
 
-	if (port_priv->is_open)
-		tty = tty_port_tty_get(&port->port);
-
 	for (i = 0; i < urb->actual_length; i++) {
 		ch = (unsigned char *)urb->transfer_buffer + i;
 		if ((i <= (len - 3)) &&
@@ -666,10 +661,7 @@ void qt2_process_read_urb(struct urb *urb)
 						 __func__);
 					break;
 				}
-				if (tty) {
-					tty_flip_buffer_push(tty);
-					tty_kref_put(tty);
-				}
+				tty_flip_buffer_push(&port->port);
 
 				newport = *(ch + 3);
 
@@ -683,10 +675,6 @@ void qt2_process_read_urb(struct urb *urb)
 				serial_priv->current_port = newport;
 				port = serial->port[serial_priv->current_port];
 				port_priv = usb_get_serial_port_data(port);
-				if (port_priv->is_open)
-					tty = tty_port_tty_get(&port->port);
-				else
-					tty = NULL;
 				i += 3;
 				escapeflag = true;
 				break;
@@ -716,10 +704,7 @@ void qt2_process_read_urb(struct urb *urb)
 		tty_insert_flip_string(&port->port, ch, 1);
 	}
 
-	if (tty) {
-		tty_flip_buffer_push(tty);
-		tty_kref_put(tty);
-	}
+	tty_flip_buffer_push(&port->port);
 }
 
 static void qt2_write_bulk_callback(struct urb *urb)
diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index ad12e9e2c7ee..21cd7bf2a8cc 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -207,38 +207,31 @@ static void safe_process_read_urb(struct urb *urb)
 	unsigned char *data = urb->transfer_buffer;
 	unsigned char length = urb->actual_length;
 	int actual_length;
-	struct tty_struct *tty;
 	__u16 fcs;
 
 	if (!length)
 		return;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	if (!safe)
 		goto out;
 
 	fcs = fcs_compute10(data, length, CRC10_INITFCS);
 	if (fcs) {
 		dev_err(&port->dev, "%s - bad CRC %x\n", __func__, fcs);
-		goto err;
+		return;
 	}
 
 	actual_length = data[length - 2] >> 2;
 	if (actual_length > (length - 2)) {
 		dev_err(&port->dev, "%s - inconsistent lengths %d:%d\n",
 				__func__, actual_length, length);
-		goto err;
+		return;
 	}
 	dev_info(&urb->dev->dev, "%s - actual: %d\n", __func__, actual_length);
 	length = actual_length;
 out:
 	tty_insert_flip_string(&port->port, data, length);
-	tty_flip_buffer_push(tty);
-err:
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static int safe_prepare_write_buffer(struct usb_serial_port *port,
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 64e53fda149b..70aee8d59f23 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -569,7 +569,6 @@ static void sierra_indat_callback(struct urb *urb)
 	int err;
 	int endpoint;
 	struct usb_serial_port *port;
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	int status = urb->status;
 
@@ -581,16 +580,12 @@ static void sierra_indat_callback(struct urb *urb)
 			" endpoint %02x\n", __func__, status, endpoint);
 	} else {
 		if (urb->actual_length) {
-			tty = tty_port_tty_get(&port->port);
-			if (tty) {
-				tty_insert_flip_string(&port->port, data,
-					urb->actual_length);
-				tty_flip_buffer_push(tty);
-
-				tty_kref_put(tty);
-				usb_serial_debug_data(&port->dev, __func__,
-						      urb->actual_length, data);
-			}
+			tty_insert_flip_string(&port->port, data,
+				urb->actual_length);
+			tty_flip_buffer_push(&port->port);
+
+			usb_serial_debug_data(&port->dev, __func__,
+					      urb->actual_length, data);
 		} else {
 			dev_dbg(&port->dev, "%s: empty read urb"
 				" received\n", __func__);
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 04e373152724..91ff8e3bddbd 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -462,7 +462,6 @@ static void spcp8x5_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct spcp8x5_private *priv = usb_get_serial_port_data(port);
-	struct tty_struct *tty;
 	unsigned char *data = urb->transfer_buffer;
 	unsigned long flags;
 	u8 status;
@@ -481,9 +480,6 @@ static void spcp8x5_process_read_urb(struct urb *urb)
 	if (!urb->actual_length)
 		return;
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
 
 	if (status & UART_STATE_TRANSIENT_MASK) {
 		/* break takes precedence over parity, which takes precedence
@@ -500,15 +496,19 @@ static void spcp8x5_process_read_urb(struct urb *urb)
 		if (status & UART_OVERRUN_ERROR)
 			tty_insert_flip_char(&port->port, 0, TTY_OVERRUN);
 
-		if (status & UART_DCD)
-			usb_serial_handle_dcd_change(port, tty,
-				   priv->line_status & MSR_STATUS_LINE_DCD);
+		if (status & UART_DCD) {
+			struct tty_struct *tty = tty_port_tty_get(&port->port);
+			if (tty) {
+				usb_serial_handle_dcd_change(port, tty,
+				       priv->line_status & MSR_STATUS_LINE_DCD);
+				tty_kref_put(tty);
+			}
+		}
 	}
 
 	tty_insert_flip_string_fixed_flag(&port->port, data, tty_flag,
 							urb->actual_length);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static int spcp8x5_wait_modem_info(struct usb_serial_port *port,
diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index 38713156e957..58bc7e793524 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -582,7 +582,7 @@ static void ssu100_update_lsr(struct usb_serial_port *port, u8 lsr,
 
 }
 
-static int ssu100_process_packet(struct urb *urb)
+static void ssu100_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	char *packet = (char *)urb->transfer_buffer;
@@ -609,7 +609,7 @@ static int ssu100_process_packet(struct urb *urb)
 		ch = packet;
 
 	if (!len)
-		return 0;	/* status only */
+		return;	/* status only */
 
 	if (port->port.console && port->sysrq) {
 		for (i = 0; i < len; i++, ch++) {
@@ -619,24 +619,7 @@ static int ssu100_process_packet(struct urb *urb)
 	} else
 		tty_insert_flip_string_fixed_flag(&port->port, ch, flag, len);
 
-	return len;
-}
-
-static void ssu100_process_read_urb(struct urb *urb)
-{
-	struct usb_serial_port *port = urb->context;
-	struct tty_struct *tty;
-	int count;
-
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
-	count = ssu100_process_packet(urb);
-
-	if (count)
-		tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&port->port);
 }
 
 static struct usb_serial_driver ssu100_device = {
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 2ffa6ae3b5ed..be05e6caf9a3 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -48,7 +48,6 @@ static void symbol_int_callback(struct urb *urb)
 	unsigned char *data = urb->transfer_buffer;
 	struct usb_serial_port *port = priv->port;
 	int status = urb->status;
-	struct tty_struct *tty;
 	int result;
 	int data_length;
 
@@ -82,13 +81,8 @@ static void symbol_int_callback(struct urb *urb)
 		 * we pretty much just ignore the size and send everything
 		 * else to the tty layer.
 		 */
-		tty = tty_port_tty_get(&port->port);
-		if (tty) {
-			tty_insert_flip_string(&port->port, &data[1],
-					data_length);
-			tty_flip_buffer_push(tty);
-			tty_kref_put(tty);
-		}
+		tty_insert_flip_string(&port->port, &data[1], data_length);
+		tty_flip_buffer_push(&port->port);
 	} else {
 		dev_dbg(&priv->udev->dev,
 			"Improper amount of data received from the device, "
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 05077e3c7631..39cb9b807c3c 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -121,8 +121,8 @@ static void ti_interrupt_callback(struct urb *urb);
 static void ti_bulk_in_callback(struct urb *urb);
 static void ti_bulk_out_callback(struct urb *urb);
 
-static void ti_recv(struct usb_serial_port *port, struct tty_struct *tty,
-	unsigned char *data, int length);
+static void ti_recv(struct usb_serial_port *port, unsigned char *data,
+		int length);
 static void ti_send(struct ti_port *tport);
 static int ti_set_mcr(struct ti_port *tport, unsigned int mcr);
 static int ti_get_lsr(struct ti_port *tport);
@@ -1118,7 +1118,6 @@ static void ti_bulk_in_callback(struct urb *urb)
 	struct device *dev = &urb->dev->dev;
 	int status = urb->status;
 	int retval = 0;
-	struct tty_struct *tty;
 
 	switch (status) {
 	case 0:
@@ -1145,23 +1144,18 @@ static void ti_bulk_in_callback(struct urb *urb)
 		return;
 	}
 
-	tty = tty_port_tty_get(&port->port);
-	if (tty) {
-		if (urb->actual_length) {
-			usb_serial_debug_data(dev, __func__, urb->actual_length,
-					      urb->transfer_buffer);
+	if (urb->actual_length) {
+		usb_serial_debug_data(dev, __func__, urb->actual_length,
+				      urb->transfer_buffer);
 
-			if (!tport->tp_is_open)
-				dev_dbg(dev, "%s - port closed, dropping data\n",
-					__func__);
-			else
-				ti_recv(port, tty, urb->transfer_buffer,
-						urb->actual_length);
-			spin_lock(&tport->tp_lock);
-			tport->tp_icount.rx += urb->actual_length;
-			spin_unlock(&tport->tp_lock);
-		}
-		tty_kref_put(tty);
+		if (!tport->tp_is_open)
+			dev_dbg(dev, "%s - port closed, dropping data\n",
+				__func__);
+		else
+			ti_recv(port, urb->transfer_buffer, urb->actual_length);
+		spin_lock(&tport->tp_lock);
+		tport->tp_icount.rx += urb->actual_length;
+		spin_unlock(&tport->tp_lock);
 	}
 
 exit:
@@ -1209,8 +1203,8 @@ static void ti_bulk_out_callback(struct urb *urb)
 }
 
 
-static void ti_recv(struct usb_serial_port *port, struct tty_struct *tty,
-	unsigned char *data, int length)
+static void ti_recv(struct usb_serial_port *port, unsigned char *data,
+		int length)
 {
 	int cnt;
 
@@ -1222,11 +1216,10 @@ static void ti_recv(struct usb_serial_port *port, struct tty_struct *tty,
 			if (cnt == 0)
 				break;
 		}
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&port->port);
 		data += cnt;
 		length -= cnt;
 	} while (length > 0);
-
 }
 
 
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index 293b460030cb..a547c91e3c05 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -275,7 +275,6 @@ static void usb_wwan_indat_callback(struct urb *urb)
 	int err;
 	int endpoint;
 	struct usb_serial_port *port;
-	struct tty_struct *tty;
 	struct device *dev;
 	unsigned char *data = urb->transfer_buffer;
 	int status = urb->status;
@@ -288,16 +287,12 @@ static void usb_wwan_indat_callback(struct urb *urb)
 		dev_dbg(dev, "%s: nonzero status: %d on endpoint %02x.\n",
 			__func__, status, endpoint);
 	} else {
-		tty = tty_port_tty_get(&port->port);
-		if (tty) {
-			if (urb->actual_length) {
-				tty_insert_flip_string(&port->port, data,
-						urb->actual_length);
-				tty_flip_buffer_push(tty);
-			} else
-				dev_dbg(dev, "%s: empty read urb received\n", __func__);
-			tty_kref_put(tty);
-		}
+		if (urb->actual_length) {
+			tty_insert_flip_string(&port->port, data,
+					urb->actual_length);
+			tty_flip_buffer_push(&port->port);
+		} else
+			dev_dbg(dev, "%s: empty read urb received\n", __func__);
 
 		/* Resubmit urb so we continue receiving */
 		err = usb_submit_urb(urb, GFP_ATOMIC);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f16a47a13a09..f89acd1ed6d3 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -387,7 +387,6 @@ extern void do_SAK(struct tty_struct *tty);
 extern void __do_SAK(struct tty_struct *tty);
 extern void disassociate_ctty(int priv);
 extern void no_tty(void);
-extern void tty_flip_buffer_push(struct tty_struct *tty);
 extern void tty_flush_to_ldisc(struct tty_struct *tty);
 extern void tty_buffer_free_all(struct tty_port *port);
 extern void tty_buffer_flush(struct tty_struct *tty);
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 5cb694aba322..c5572807633a 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -10,6 +10,7 @@ extern int tty_prepare_flip_string(struct tty_port *port,
 		unsigned char **chars, size_t size);
 extern int tty_prepare_flip_string_flags(struct tty_port *port,
 		unsigned char **chars, char **flags, size_t size);
+extern void tty_flip_buffer_push(struct tty_port *port);
 void tty_schedule_flip(struct tty_struct *tty);
 
 static inline int tty_insert_flip_char(struct tty_port *port,
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index cbec3b642871..b6e44ad6cca6 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -541,23 +541,21 @@ int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
 {
 	struct rfcomm_dev *dev = dlc->owner;
-	struct tty_struct *tty;
 
 	if (!dev) {
 		kfree_skb(skb);
 		return;
 	}
 
-	tty = dev->port.tty;
-	if (!tty || !skb_queue_empty(&dev->pending)) {
+	if (!skb_queue_empty(&dev->pending)) {
 		skb_queue_tail(&dev->pending, skb);
 		return;
 	}
 
-	BT_DBG("dlc %p tty %p len %d", dlc, tty, skb->len);
+	BT_DBG("dlc %p len %d", dlc, skb->len);
 
 	tty_insert_flip_string(&dev->port, skb->data, skb->len);
-	tty_flip_buffer_push(tty);
+	tty_flip_buffer_push(&dev->port);
 
 	kfree_skb(skb);
 }
@@ -621,14 +619,10 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
 /* ---- TTY functions ---- */
 static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev)
 {
-	struct tty_struct *tty = dev->port.tty;
 	struct sk_buff *skb;
 	int inserted = 0;
 
-	if (!tty)
-		return;
-
-	BT_DBG("dev %p tty %p", dev, tty);
+	BT_DBG("dev %p", dev);
 
 	rfcomm_dlc_lock(dev->dlc);
 
@@ -641,7 +635,7 @@ static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev)
 	rfcomm_dlc_unlock(dev->dlc);
 
 	if (inserted > 0)
-		tty_flip_buffer_push(tty);
+		tty_flip_buffer_push(&dev->port);
 }
 
 static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 2491f6f53871..9a5fd3c3e530 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -1136,14 +1136,14 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
 		ircomm_tty_send_initial_parameters(self);
 		ircomm_tty_link_established(self);
 	}
+	tty_kref_put(tty);
 
 	/*
 	 * Use flip buffer functions since the code may be called from interrupt
 	 * context
 	 */
 	tty_insert_flip_string(&self->port, skb->data, skb->len);
-	tty_flip_buffer_push(tty);
-	tty_kref_put(tty);
+	tty_flip_buffer_push(&self->port);
 
 	/* No need to kfree_skb - see ircomm_ttp_data_indication() */
 
-- 
cgit v1.2.3


From 6732c8bb8671acbdac6cdc93dd72ddd581dd5e25 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 3 Jan 2013 15:53:07 +0100
Subject: TTY: switch tty_schedule_flip

Now, we start converting tty buffer functions to actually use
tty_port. This will allow us to get rid of the need of tty in many
call sites. Only tty_port will needed and hence no more
tty_port_tty_get in those paths.

This is the last one: tty_schedule_flip

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/kernel/srmcons.c             | 17 ++++++++---------
 drivers/s390/char/keyboard.h            | 12 ++----------
 drivers/staging/serqt_usb2/serqt_usb2.c | 19 ++++++-------------
 drivers/tty/cyclades.c                  | 33 ++++++++++++---------------------
 drivers/tty/moxa.c                      |  4 ++--
 drivers/tty/serial/68328serial.c        | 15 ++++-----------
 drivers/tty/serial/lpc32xx_hs.c         |  6 +-----
 drivers/tty/tty_buffer.c                |  8 ++++----
 drivers/tty/vt/keyboard.c               | 19 +++----------------
 drivers/tty/vt/vt.c                     | 16 ++++++++--------
 include/linux/tty_flip.h                |  2 +-
 11 files changed, 51 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 21b57a66e809..6f01d9ad7b81 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -44,9 +44,8 @@ typedef union _srmcons_result {
 
 /* called with callback_lock held */
 static int
-srmcons_do_receive_chars(struct tty_struct *tty)
+srmcons_do_receive_chars(struct tty_port *port)
 {
-	struct tty_port *port = tty->port;
 	srmcons_result result;
 	int count = 0, loops = 0;
 
@@ -59,7 +58,7 @@ srmcons_do_receive_chars(struct tty_struct *tty)
 	} while((result.bits.status & 1) && (++loops < 10));
 
 	if (count)
-		tty_schedule_flip(tty);
+		tty_schedule_flip(port);
 
 	return count;
 }
@@ -74,7 +73,7 @@ srmcons_receive_chars(unsigned long data)
 
 	local_irq_save(flags);
 	if (spin_trylock(&srmcons_callback_lock)) {
-		if (!srmcons_do_receive_chars(port->tty))
+		if (!srmcons_do_receive_chars(port))
 			incr = 100;
 		spin_unlock(&srmcons_callback_lock);
 	} 
@@ -89,7 +88,7 @@ srmcons_receive_chars(unsigned long data)
 
 /* called with callback_lock held */
 static int
-srmcons_do_write(struct tty_struct *tty, const char *buf, int count)
+srmcons_do_write(struct tty_port *port, const char *buf, int count)
 {
 	static char str_cr[1] = "\r";
 	long c, remaining = count;
@@ -114,10 +113,10 @@ srmcons_do_write(struct tty_struct *tty, const char *buf, int count)
 			cur += result.bits.c;
 
 			/*
-			 * Check for pending input iff a tty was provided
+			 * Check for pending input iff a tty port was provided
 			 */
-			if (tty)
-				srmcons_do_receive_chars(tty);
+			if (port)
+				srmcons_do_receive_chars(port);
 		}
 
 		while (need_cr) {
@@ -136,7 +135,7 @@ srmcons_write(struct tty_struct *tty,
 	unsigned long flags;
 
 	spin_lock_irqsave(&srmcons_callback_lock, flags);
-	srmcons_do_write(tty, (const char *) buf, count);
+	srmcons_do_write(tty->port, (const char *) buf, count);
 	spin_unlock_irqrestore(&srmcons_callback_lock, flags);
 
 	return count;
diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h
index acab28d4f06b..a31f339211d5 100644
--- a/drivers/s390/char/keyboard.h
+++ b/drivers/s390/char/keyboard.h
@@ -43,22 +43,14 @@ int kbd_ioctl(struct kbd_data *, unsigned int, unsigned long);
 static inline void
 kbd_put_queue(struct tty_port *port, int ch)
 {
-	struct tty_struct *tty = tty_port_tty_get(port);
-	if (!tty)
-		return;
 	tty_insert_flip_char(port, ch, 0);
-	tty_schedule_flip(tty);
-	tty_kref_put(tty);
+	tty_schedule_flip(port);
 }
 
 static inline void
 kbd_puts_queue(struct tty_port *port, char *cp)
 {
-	struct tty_struct *tty = tty_port_tty_get(port);
-	if (!tty)
-		return;
 	while (*cp)
 		tty_insert_flip_char(port, *cp++, 0);
-	tty_schedule_flip(tty);
-	tty_kref_put(tty);
+	tty_schedule_flip(port);
 }
diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c
index df29a3de29f2..b1bb1a6abe81 100644
--- a/drivers/staging/serqt_usb2/serqt_usb2.c
+++ b/drivers/staging/serqt_usb2/serqt_usb2.c
@@ -356,7 +356,6 @@ static void qt_read_bulk_callback(struct urb *urb)
 	struct usb_serial_port *port = urb->context;
 	struct usb_serial *serial = get_usb_serial(port, __func__);
 	struct quatech_port *qt_port = qt_get_port_private(port);
-	struct tty_struct *tty;
 	int result;
 
 	if (urb->status) {
@@ -367,27 +366,23 @@ static void qt_read_bulk_callback(struct urb *urb)
 		return;
 	}
 
-	tty = tty_port_tty_get(&port->port);
-	if (!tty)
-		return;
-
 	dev_dbg(&port->dev,
 		"%s - port->RxHolding = %d\n", __func__, qt_port->RxHolding);
 
 	if (port_paranoia_check(port, __func__) != 0) {
 		qt_port->ReadBulkStopped = 1;
-		goto exit;
+		return;
 	}
 
 	if (!serial)
-		goto exit;
+		return;
 
 	if (qt_port->closePending == 1) {
 		/* Were closing , stop reading */
 		dev_dbg(&port->dev,
 			"%s - (qt_port->closepending == 1\n", __func__);
 		qt_port->ReadBulkStopped = 1;
-		goto exit;
+		return;
 	}
 
 	/*
@@ -397,7 +392,7 @@ static void qt_read_bulk_callback(struct urb *urb)
 	 */
 	if (qt_port->RxHolding == 1) {
 		qt_port->ReadBulkStopped = 1;
-		goto exit;
+		return;
 	}
 
 	if (urb->status) {
@@ -406,7 +401,7 @@ static void qt_read_bulk_callback(struct urb *urb)
 		dev_dbg(&port->dev,
 			"%s - nonzero read bulk status received: %d\n",
 			__func__, urb->status);
-		goto exit;
+		return;
 	}
 
 	if (urb->actual_length)
@@ -427,13 +422,11 @@ static void qt_read_bulk_callback(struct urb *urb)
 	else {
 		if (urb->actual_length) {
 			tty_flip_buffer_push(&port->port);
-			tty_schedule_flip(tty);
+			tty_schedule_flip(&port->port);
 		}
 	}
 
 	schedule_work(&port->work);
-exit:
-	tty_kref_put(tty);
 }
 
 /*
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index d1fe9a1f8475..42a329b8af9f 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -441,7 +441,6 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 		void __iomem *base_addr)
 {
 	struct cyclades_port *info;
-	struct tty_struct *tty;
 	struct tty_port *port;
 	int len, index = cinfo->bus_index;
 	u8 ivr, save_xir, channel, save_car, data, char_count;
@@ -458,18 +457,6 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 	cyy_writeb(info, CyCAR, save_xir);
 	ivr = cyy_readb(info, CyRIVR) & CyIVRMask;
 
-	tty = tty_port_tty_get(port);
-	/* if there is nowhere to put the data, discard it */
-	if (tty == NULL) {
-		if (ivr == CyIVRRxEx) {	/* exception */
-			data = cyy_readb(info, CyRDSR);
-		} else {	/* normal character reception */
-			char_count = cyy_readb(info, CyRDCR);
-			while (char_count--)
-				data = cyy_readb(info, CyRDSR);
-		}
-		goto end;
-	}
 	/* there is an open port for this data */
 	if (ivr == CyIVRRxEx) {	/* exception */
 		data = cyy_readb(info, CyRDSR);
@@ -486,7 +473,6 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 
 		if (data & info->ignore_status_mask) {
 			info->icount.rx++;
-			tty_kref_put(tty);
 			return;
 		}
 		if (tty_buffer_request_room(port, 1)) {
@@ -496,8 +482,14 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 						cyy_readb(info, CyRDSR),
 						TTY_BREAK);
 					info->icount.rx++;
-					if (port->flags & ASYNC_SAK)
-						do_SAK(tty);
+					if (port->flags & ASYNC_SAK) {
+						struct tty_struct *tty =
+							tty_port_tty_get(port);
+						if (tty) {
+							do_SAK(tty);
+							tty_kref_put(tty);
+						}
+					}
 				} else if (data & CyFRAME) {
 					tty_insert_flip_char(port,
 						cyy_readb(info, CyRDSR),
@@ -566,9 +558,8 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 		}
 		info->idle_stats.recv_idle = jiffies;
 	}
-	tty_schedule_flip(tty);
-	tty_kref_put(tty);
-end:
+	tty_schedule_flip(port);
+
 	/* end of service */
 	cyy_writeb(info, CyRIR, save_xir & 0x3f);
 	cyy_writeb(info, CyCAR, save_car);
@@ -1012,7 +1003,7 @@ static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty)
 						jiffies + 1);
 #endif
 			info->idle_stats.recv_idle = jiffies;
-			tty_schedule_flip(tty);
+			tty_schedule_flip(&info->port);
 		}
 		/* Update rx_get */
 		cy_writel(&buf_ctrl->rx_get, new_rx_get);
@@ -1191,7 +1182,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 		if (delta_count)
 			wake_up_interruptible(&info->port.delta_msr_wait);
 		if (special_count)
-			tty_schedule_flip(tty);
+			tty_schedule_flip(&info->port);
 		tty_kref_put(tty);
 	}
 }
diff --git a/drivers/tty/moxa.c b/drivers/tty/moxa.c
index f42492db31c9..adeac255e526 100644
--- a/drivers/tty/moxa.c
+++ b/drivers/tty/moxa.c
@@ -1405,7 +1405,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle,
 		if (inited && !test_bit(TTY_THROTTLED, &tty->flags) &&
 				MoxaPortRxQueue(p) > 0) { /* RX */
 			MoxaPortReadData(p);
-			tty_schedule_flip(tty);
+			tty_schedule_flip(&p->port);
 		}
 	} else {
 		clear_bit(EMPTYWAIT, &p->statusflags);
@@ -1430,7 +1430,7 @@ static int moxa_poll_port(struct moxa_port *p, unsigned int handle,
 
 	if (tty && (intr & IntrBreak) && !I_IGNBRK(tty)) { /* BREAK */
 		tty_insert_flip_char(&p->port, 0, TTY_BREAK);
-		tty_schedule_flip(tty);
+		tty_schedule_flip(&p->port);
 	}
 
 	if (intr & IntrLine)
diff --git a/drivers/tty/serial/68328serial.c b/drivers/tty/serial/68328serial.c
index 3719273cf0be..641a5a4d73d9 100644
--- a/drivers/tty/serial/68328serial.c
+++ b/drivers/tty/serial/68328serial.c
@@ -262,8 +262,7 @@ static void rs_start(struct tty_struct *tty)
 	local_irq_restore(flags);
 }
 
-static void receive_chars(struct m68k_serial *info, struct tty_struct *tty,
-		unsigned short rx)
+static void receive_chars(struct m68k_serial *info, unsigned short rx)
 {
 	m68328_uart *uart = &uart_addr[info->line];
 	unsigned char ch, flag;
@@ -293,9 +292,6 @@ static void receive_chars(struct m68k_serial *info, struct tty_struct *tty,
 			}
 		}
 
-		if(!tty)
-			goto clear_and_exit;
-		
 		flag = TTY_NORMAL;
 
 		if (rx & URX_PARITY_ERROR)
@@ -310,10 +306,7 @@ static void receive_chars(struct m68k_serial *info, struct tty_struct *tty,
 	} while((rx = uart->urx.w) & URX_DATA_READY);
 #endif
 
-	tty_schedule_flip(tty);
-
-clear_and_exit:
-	return;
+	tty_schedule_flip(&info->tport);
 }
 
 static void transmit_chars(struct m68k_serial *info, struct tty_struct *tty)
@@ -367,11 +360,11 @@ irqreturn_t rs_interrupt(int irq, void *dev_id)
 	tx = uart->utx.w;
 
 	if (rx & URX_DATA_READY)
-		receive_chars(info, tty, rx);
+		receive_chars(info, rx);
 	if (tx & UTX_TX_AVAIL)
 		transmit_chars(info, tty);
 #else
-	receive_chars(info, tty, rx);
+	receive_chars(info, rx);
 #endif
 	tty_kref_put(tty);
 
diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c
index c8448e6f52e5..c01b58f3729c 100644
--- a/drivers/tty/serial/lpc32xx_hs.c
+++ b/drivers/tty/serial/lpc32xx_hs.c
@@ -323,7 +323,6 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
 	struct tty_port *port = &port->state->port;
-	struct tty_struct *tty = tty_port_tty_get(tport);
 	u32 status;
 
 	spin_lock(&port->lock);
@@ -348,9 +347,7 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id)
 		       LPC32XX_HSUART_IIR(port->membase));
 		port->icount.overrun++;
 		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
-		if (tty) {
-			tty_schedule_flip(tty);
-		}
+		tty_schedule_flip(tport);
 	}
 
 	/* Data received? */
@@ -366,7 +363,6 @@ static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id)
 	}
 
 	spin_unlock(&port->lock);
-	tty_kref_put(tty);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index b6efacadf23b..d6969f6e4c43 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -349,7 +349,7 @@ EXPORT_SYMBOL(tty_insert_flip_string_flags);
 
 /**
  *	tty_schedule_flip	-	push characters to ldisc
- *	@tty: tty to push from
+ *	@port: tty port to push from
  *
  *	Takes any pending buffers and transfers their ownership to the
  *	ldisc side of the queue. It then schedules those characters for
@@ -360,11 +360,11 @@ EXPORT_SYMBOL(tty_insert_flip_string_flags);
  *	Locking: Takes port->buf.lock
  */
 
-void tty_schedule_flip(struct tty_struct *tty)
+void tty_schedule_flip(struct tty_port *port)
 {
-	struct tty_bufhead *buf = &tty->port->buf;
+	struct tty_bufhead *buf = &port->buf;
 	unsigned long flags;
-	WARN_ON(tty->port->low_latency);
+	WARN_ON(port->low_latency);
 
 	spin_lock_irqsave(&buf->lock, flags);
 	if (buf->tail != NULL)
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 5aace4d47cb6..a9af1b9ae160 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -307,26 +307,17 @@ int kbd_rate(struct kbd_repeat *rep)
  */
 static void put_queue(struct vc_data *vc, int ch)
 {
-	struct tty_struct *tty = vc->port.tty;
-
 	tty_insert_flip_char(&vc->port, ch, 0);
-	if (tty) {
-		tty_schedule_flip(tty);
-	}
+	tty_schedule_flip(&vc->port);
 }
 
 static void puts_queue(struct vc_data *vc, char *cp)
 {
-	struct tty_struct *tty = vc->port.tty;
-
-	if (!tty)
-		return;
-
 	while (*cp) {
 		tty_insert_flip_char(&vc->port, *cp, 0);
 		cp++;
 	}
-	tty_schedule_flip(tty);
+	tty_schedule_flip(&vc->port);
 }
 
 static void applkey(struct vc_data *vc, int key, char mode)
@@ -582,12 +573,8 @@ static void fn_inc_console(struct vc_data *vc)
 
 static void fn_send_intr(struct vc_data *vc)
 {
-	struct tty_struct *tty = vc->port.tty;
-
-	if (!tty)
-		return;
 	tty_insert_flip_char(&vc->port, 0, TTY_BREAK);
-	tty_schedule_flip(tty);
+	tty_schedule_flip(&vc->port);
 }
 
 static void fn_scroll_forw(struct vc_data *vc)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 811f2505e9ee..1a2728034599 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1333,13 +1333,13 @@ static void csi_m(struct vc_data *vc)
 	update_attr(vc);
 }
 
-static void respond_string(const char *p, struct tty_struct *tty)
+static void respond_string(const char *p, struct tty_port *port)
 {
 	while (*p) {
-		tty_insert_flip_char(tty->port, *p, 0);
+		tty_insert_flip_char(port, *p, 0);
 		p++;
 	}
-	tty_schedule_flip(tty);
+	tty_schedule_flip(port);
 }
 
 static void cursor_report(struct vc_data *vc, struct tty_struct *tty)
@@ -1347,17 +1347,17 @@ static void cursor_report(struct vc_data *vc, struct tty_struct *tty)
 	char buf[40];
 
 	sprintf(buf, "\033[%d;%dR", vc->vc_y + (vc->vc_decom ? vc->vc_top + 1 : 1), vc->vc_x + 1);
-	respond_string(buf, tty);
+	respond_string(buf, tty->port);
 }
 
 static inline void status_report(struct tty_struct *tty)
 {
-	respond_string("\033[0n", tty);	/* Terminal ok */
+	respond_string("\033[0n", tty->port);	/* Terminal ok */
 }
 
-static inline void respond_ID(struct tty_struct * tty)
+static inline void respond_ID(struct tty_struct *tty)
 {
-	respond_string(VT102ID, tty);
+	respond_string(VT102ID, tty->port);
 }
 
 void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry)
@@ -1366,7 +1366,7 @@ void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry)
 
 	sprintf(buf, "\033[M%c%c%c", (char)(' ' + butt), (char)('!' + mrx),
 		(char)('!' + mry));
-	respond_string(buf, tty);
+	respond_string(buf, tty->port);
 }
 
 /* invoked via ioctl(TIOCLINUX) and through set_selection */
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index c5572807633a..e0f252633b47 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -11,7 +11,7 @@ extern int tty_prepare_flip_string(struct tty_port *port,
 extern int tty_prepare_flip_string_flags(struct tty_port *port,
 		unsigned char **chars, char **flags, size_t size);
 extern void tty_flip_buffer_push(struct tty_port *port);
-void tty_schedule_flip(struct tty_struct *tty);
+void tty_schedule_flip(struct tty_port *port);
 
 static inline int tty_insert_flip_char(struct tty_port *port,
 					unsigned char ch, char flag)
-- 
cgit v1.2.3


From ec063899b7b308019afa9f5eb32f0a58a6c6ee53 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Mon, 3 Dec 2012 22:23:31 +0400
Subject: serial: sccnxp: Implement polling mode

This patch adds support for polling work mode, i.e. system is perform
periodical check chip status when IRQ-line is not connected to CPU.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sccnxp.c          | 148 ++++++++++++++++++++++++-----------
 include/linux/platform_data/sccnxp.h |   2 +
 2 files changed, 105 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c
index 2ced871becff..3a4c57e6ea1e 100644
--- a/drivers/tty/serial/sccnxp.c
+++ b/drivers/tty/serial/sccnxp.c
@@ -23,6 +23,7 @@
 #include <linux/io.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
+#include <linux/spinlock.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/sccnxp.h>
 
@@ -106,6 +107,7 @@ enum {
 struct sccnxp_port {
 	struct uart_driver	uart;
 	struct uart_port	port[SCCNXP_MAX_UARTS];
+	bool			opened[SCCNXP_MAX_UARTS];
 
 	const char		*name;
 	int			irq;
@@ -122,7 +124,10 @@ struct sccnxp_port {
 	struct console		console;
 #endif
 
-	struct mutex		sccnxp_mutex;
+	spinlock_t		lock;
+
+	bool			poll;
+	struct timer_list	timer;
 
 	struct sccnxp_pdata	pdata;
 };
@@ -371,31 +376,48 @@ static void sccnxp_handle_tx(struct uart_port *port)
 		uart_write_wakeup(port);
 }
 
-static irqreturn_t sccnxp_ist(int irq, void *dev_id)
+static void sccnxp_handle_events(struct sccnxp_port *s)
 {
 	int i;
 	u8 isr;
-	struct sccnxp_port *s = (struct sccnxp_port *)dev_id;
-
-	mutex_lock(&s->sccnxp_mutex);
 
-	for (;;) {
+	do {
 		isr = sccnxp_read(&s->port[0], SCCNXP_ISR_REG);
 		isr &= s->imr;
 		if (!isr)
 			break;
 
-		dev_dbg(s->port[0].dev, "IRQ status: 0x%02x\n", isr);
-
 		for (i = 0; i < s->uart.nr; i++) {
-			if (isr & ISR_RXRDY(i))
+			if (s->opened[i] && (isr & ISR_RXRDY(i)))
 				sccnxp_handle_rx(&s->port[i]);
-			if (isr & ISR_TXRDY(i))
+			if (s->opened[i] && (isr & ISR_TXRDY(i)))
 				sccnxp_handle_tx(&s->port[i]);
 		}
-	}
+	} while (1);
+}
+
+static void sccnxp_timer(unsigned long data)
+{
+	struct sccnxp_port *s = (struct sccnxp_port *)data;
+	unsigned long flags;
 
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
+	sccnxp_handle_events(s);
+	spin_unlock_irqrestore(&s->lock, flags);
+
+	if (!timer_pending(&s->timer))
+		mod_timer(&s->timer, jiffies +
+			  usecs_to_jiffies(s->pdata.poll_time_us));
+}
+
+static irqreturn_t sccnxp_ist(int irq, void *dev_id)
+{
+	struct sccnxp_port *s = (struct sccnxp_port *)dev_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&s->lock, flags);
+	sccnxp_handle_events(s);
+	spin_unlock_irqrestore(&s->lock, flags);
 
 	return IRQ_HANDLED;
 }
@@ -403,8 +425,9 @@ static irqreturn_t sccnxp_ist(int irq, void *dev_id)
 static void sccnxp_start_tx(struct uart_port *port)
 {
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
+	unsigned long flags;
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 
 	/* Set direction to output */
 	if (s->flags & SCCNXP_HAVE_IO)
@@ -412,7 +435,7 @@ static void sccnxp_start_tx(struct uart_port *port)
 
 	sccnxp_enable_irq(port, IMR_TXRDY);
 
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static void sccnxp_stop_tx(struct uart_port *port)
@@ -423,20 +446,22 @@ static void sccnxp_stop_tx(struct uart_port *port)
 static void sccnxp_stop_rx(struct uart_port *port)
 {
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
+	unsigned long flags;
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 	sccnxp_port_write(port, SCCNXP_CR_REG, CR_RX_DISABLE);
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static unsigned int sccnxp_tx_empty(struct uart_port *port)
 {
 	u8 val;
+	unsigned long flags;
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 	val = sccnxp_port_read(port, SCCNXP_SR_REG);
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 
 	return (val & SR_TXEMT) ? TIOCSER_TEMT : 0;
 }
@@ -449,28 +474,30 @@ static void sccnxp_enable_ms(struct uart_port *port)
 static void sccnxp_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
+	unsigned long flags;
 
 	if (!(s->flags & SCCNXP_HAVE_IO))
 		return;
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 
 	sccnxp_set_bit(port, DTR_OP, mctrl & TIOCM_DTR);
 	sccnxp_set_bit(port, RTS_OP, mctrl & TIOCM_RTS);
 
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static unsigned int sccnxp_get_mctrl(struct uart_port *port)
 {
 	u8 bitmask, ipr;
+	unsigned long flags;
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
 	unsigned int mctrl = TIOCM_DSR | TIOCM_CTS | TIOCM_CAR;
 
 	if (!(s->flags & SCCNXP_HAVE_IO))
 		return mctrl;
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 
 	ipr = ~sccnxp_read(port, SCCNXP_IPCR_REG);
 
@@ -499,7 +526,7 @@ static unsigned int sccnxp_get_mctrl(struct uart_port *port)
 		mctrl |= (ipr & bitmask) ? TIOCM_RNG : 0;
 	}
 
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 
 	return mctrl;
 }
@@ -507,21 +534,23 @@ static unsigned int sccnxp_get_mctrl(struct uart_port *port)
 static void sccnxp_break_ctl(struct uart_port *port, int break_state)
 {
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
+	unsigned long flags;
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 	sccnxp_port_write(port, SCCNXP_CR_REG, break_state ?
 			  CR_CMD_START_BREAK : CR_CMD_STOP_BREAK);
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static void sccnxp_set_termios(struct uart_port *port,
 			       struct ktermios *termios, struct ktermios *old)
 {
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
+	unsigned long flags;
 	u8 mr1, mr2;
 	int baud;
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 
 	/* Mask termios capabilities we don't support */
 	termios->c_cflag &= ~CMSPAR;
@@ -588,20 +617,22 @@ static void sccnxp_set_termios(struct uart_port *port,
 	/* Update timeout according to new baud rate */
 	uart_update_timeout(port, termios->c_cflag, baud);
 
+	/* Report actual baudrate back to core */
 	if (tty_termios_baud_rate(termios))
 		tty_termios_encode_baud_rate(termios, baud, baud);
 
 	/* Enable RX & TX */
 	sccnxp_port_write(port, SCCNXP_CR_REG, CR_RX_ENABLE | CR_TX_ENABLE);
 
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static int sccnxp_startup(struct uart_port *port)
 {
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
+	unsigned long flags;
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 
 	if (s->flags & SCCNXP_HAVE_IO) {
 		/* Outputs are controlled manually */
@@ -620,7 +651,9 @@ static int sccnxp_startup(struct uart_port *port)
 	/* Enable RX interrupt */
 	sccnxp_enable_irq(port, IMR_RXRDY);
 
-	mutex_unlock(&s->sccnxp_mutex);
+	s->opened[port->line] = 1;
+
+	spin_unlock_irqrestore(&s->lock, flags);
 
 	return 0;
 }
@@ -628,8 +661,11 @@ static int sccnxp_startup(struct uart_port *port)
 static void sccnxp_shutdown(struct uart_port *port)
 {
 	struct sccnxp_port *s = dev_get_drvdata(port->dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&s->lock, flags);
 
-	mutex_lock(&s->sccnxp_mutex);
+	s->opened[port->line] = 0;
 
 	/* Disable interrupts */
 	sccnxp_disable_irq(port, IMR_TXRDY | IMR_RXRDY);
@@ -641,7 +677,7 @@ static void sccnxp_shutdown(struct uart_port *port)
 	if (s->flags & SCCNXP_HAVE_IO)
 		sccnxp_set_bit(port, DIR_OP, 0);
 
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static const char *sccnxp_type(struct uart_port *port)
@@ -715,10 +751,11 @@ static void sccnxp_console_write(struct console *co, const char *c, unsigned n)
 {
 	struct sccnxp_port *s = (struct sccnxp_port *)co->data;
 	struct uart_port *port = &s->port[co->index];
+	unsigned long flags;
 
-	mutex_lock(&s->sccnxp_mutex);
+	spin_lock_irqsave(&s->lock, flags);
 	uart_console_write(port, c, n, sccnxp_console_putchar);
-	mutex_unlock(&s->sccnxp_mutex);
+	spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static int sccnxp_console_setup(struct console *co, char *options)
@@ -757,7 +794,7 @@ static int sccnxp_probe(struct platform_device *pdev)
 	}
 	platform_set_drvdata(pdev, s);
 
-	mutex_init(&s->sccnxp_mutex);
+	spin_lock_init(&s->lock);
 
 	/* Individual chip settings */
 	switch (chiptype) {
@@ -854,11 +891,19 @@ static int sccnxp_probe(struct platform_device *pdev)
 	} else
 		memcpy(&s->pdata, pdata, sizeof(struct sccnxp_pdata));
 
-	s->irq = platform_get_irq(pdev, 0);
-	if (s->irq <= 0) {
-		dev_err(&pdev->dev, "Missing irq resource data\n");
-		ret = -ENXIO;
-		goto err_out;
+	if (pdata->poll_time_us) {
+		dev_info(&pdev->dev, "Using poll mode, resolution %u usecs\n",
+			 pdata->poll_time_us);
+		s->poll = 1;
+	}
+
+	if (!s->poll) {
+		s->irq = platform_get_irq(pdev, 0);
+		if (s->irq < 0) {
+			dev_err(&pdev->dev, "Missing irq resource data\n");
+			ret = -ENXIO;
+			goto err_out;
+		}
 	}
 
 	/* Check input frequency */
@@ -923,13 +968,23 @@ static int sccnxp_probe(struct platform_device *pdev)
 	if (s->pdata.init)
 		s->pdata.init();
 
-	ret = devm_request_threaded_irq(&pdev->dev, s->irq, NULL, sccnxp_ist,
-					IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-					dev_name(&pdev->dev), s);
-	if (!ret)
+	if (!s->poll) {
+		ret = devm_request_threaded_irq(&pdev->dev, s->irq, NULL,
+						sccnxp_ist,
+						IRQF_TRIGGER_FALLING |
+						IRQF_ONESHOT,
+						dev_name(&pdev->dev), s);
+		if (!ret)
+			return 0;
+
+		dev_err(&pdev->dev, "Unable to reguest IRQ %i\n", s->irq);
+	} else {
+		init_timer(&s->timer);
+		setup_timer(&s->timer, sccnxp_timer, (unsigned long)s);
+		mod_timer(&s->timer, jiffies +
+			  usecs_to_jiffies(s->pdata.poll_time_us));
 		return 0;
-
-	dev_err(&pdev->dev, "Unable to reguest IRQ %i\n", s->irq);
+	}
 
 err_out:
 	platform_set_drvdata(pdev, NULL);
@@ -942,7 +997,10 @@ static int sccnxp_remove(struct platform_device *pdev)
 	int i;
 	struct sccnxp_port *s = platform_get_drvdata(pdev);
 
-	devm_free_irq(&pdev->dev, s->irq, s);
+	if (!s->poll)
+		devm_free_irq(&pdev->dev, s->irq, s);
+	else
+		del_timer_sync(&s->timer);
 
 	for (i = 0; i < s->uart.nr; i++)
 		uart_remove_one_port(&s->uart, &s->port[i]);
diff --git a/include/linux/platform_data/sccnxp.h b/include/linux/platform_data/sccnxp.h
index 7311ccd3217f..096de90cf848 100644
--- a/include/linux/platform_data/sccnxp.h
+++ b/include/linux/platform_data/sccnxp.h
@@ -84,6 +84,8 @@ struct sccnxp_pdata {
 	const u8		reg_shift;
 	/* Modem control lines configuration */
 	const u32		mctrl_cfg[SCCNXP_MAX_UARTS];
+	/* Timer value for polling mode (usecs) */
+	const unsigned int	poll_time_us;
 	/* Called during startup */
 	void (*init)(void);
 	/* Called before finish */
-- 
cgit v1.2.3


From 463dcc42e4832150eb3de804682b924f9b73a91a Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Mon, 3 Dec 2012 22:23:32 +0400
Subject: serial: sccnxp: Rename header file to match functionality

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/sni/a20r.c                        |  2 +-
 drivers/tty/serial/sccnxp.c                 |  2 +-
 include/linux/platform_data/sccnxp.h        | 95 -----------------------------
 include/linux/platform_data/serial-sccnxp.h | 95 +++++++++++++++++++++++++++++
 4 files changed, 97 insertions(+), 97 deletions(-)
 delete mode 100644 include/linux/platform_data/sccnxp.h
 create mode 100644 include/linux/platform_data/serial-sccnxp.h

(limited to 'include/linux')

diff --git a/arch/mips/sni/a20r.c b/arch/mips/sni/a20r.c
index 9cb9d43a3a0e..e05ad4da44d9 100644
--- a/arch/mips/sni/a20r.c
+++ b/arch/mips/sni/a20r.c
@@ -118,7 +118,7 @@ static struct resource sc26xx_rsrc[] = {
 	}
 };
 
-#include <linux/platform_data/sccnxp.h>
+#include <linux/platform_data/serial-sccnxp.h>
 
 static struct sccnxp_pdata sccnxp_data = {
 	.reg_shift	= 2,
diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c
index 3a4c57e6ea1e..c864353352c5 100644
--- a/drivers/tty/serial/sccnxp.c
+++ b/drivers/tty/serial/sccnxp.c
@@ -25,7 +25,7 @@
 #include <linux/tty_flip.h>
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/sccnxp.h>
+#include <linux/platform_data/serial-sccnxp.h>
 
 #define SCCNXP_NAME			"uart-sccnxp"
 #define SCCNXP_MAJOR			204
diff --git a/include/linux/platform_data/sccnxp.h b/include/linux/platform_data/sccnxp.h
deleted file mode 100644
index 096de90cf848..000000000000
--- a/include/linux/platform_data/sccnxp.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *  NXP (Philips) SCC+++(SCN+++) serial driver
- *
- *  Copyright (C) 2012 Alexander Shiyan <shc_work@mail.ru>
- *
- *  Based on sc26xx.c, by Thomas Bogendörfer (tsbogend@alpha.franken.de)
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- */
-
-#ifndef __SCCNXP_H
-#define __SCCNXP_H
-
-#define SCCNXP_MAX_UARTS	2
-
-/* Output lines */
-#define LINE_OP0		1
-#define LINE_OP1		2
-#define LINE_OP2		3
-#define LINE_OP3		4
-#define LINE_OP4		5
-#define LINE_OP5		6
-#define LINE_OP6		7
-#define LINE_OP7		8
-
-/* Input lines */
-#define LINE_IP0		9
-#define LINE_IP1		10
-#define LINE_IP2		11
-#define LINE_IP3		12
-#define LINE_IP4		13
-#define LINE_IP5		14
-#define LINE_IP6		15
-
-/* Signals */
-#define DTR_OP			0	/* DTR */
-#define RTS_OP			4	/* RTS */
-#define DSR_IP			8	/* DSR */
-#define CTS_IP			12	/* CTS */
-#define DCD_IP			16	/* DCD */
-#define RNG_IP			20	/* RNG */
-
-#define DIR_OP			24	/* Special signal for control RS-485.
-					 * Goes high when transmit,
-					 * then goes low.
-					 */
-
-/* Routing control signal 'sig' to line 'line' */
-#define MCTRL_SIG(sig, line)	((line) << (sig))
-
-/*
- * Example board initialization data:
- *
- * static struct resource sc2892_resources[] = {
- *	DEFINE_RES_MEM(UART_PHYS_START, 0x10),
- *	DEFINE_RES_IRQ(IRQ_EXT2),
- * };
- *
- * static struct sccnxp_pdata sc2892_info = {
- *	.frequency	= 3686400,
- *	.mctrl_cfg[0]	= MCTRL_SIG(DIR_OP, LINE_OP0),
- *	.mctrl_cfg[1]	= MCTRL_SIG(DIR_OP, LINE_OP1),
- * };
- *
- * static struct platform_device sc2892 = {
- *	.name		= "sc2892",
- *	.id		= -1,
- *	.resource	= sc2892_resources,
- *	.num_resources	= ARRAY_SIZE(sc2892_resources),
- *	.dev = {
- *		.platform_data	= &sc2892_info,
- *	},
- * };
- */
-
-/* SCCNXP platform data structure */
-struct sccnxp_pdata {
-	/* Frequency (extrenal clock or crystal) */
-	int			frequency;
-	/* Shift for A0 line */
-	const u8		reg_shift;
-	/* Modem control lines configuration */
-	const u32		mctrl_cfg[SCCNXP_MAX_UARTS];
-	/* Timer value for polling mode (usecs) */
-	const unsigned int	poll_time_us;
-	/* Called during startup */
-	void (*init)(void);
-	/* Called before finish */
-	void (*exit)(void);
-};
-
-#endif
diff --git a/include/linux/platform_data/serial-sccnxp.h b/include/linux/platform_data/serial-sccnxp.h
new file mode 100644
index 000000000000..215574d1e81d
--- /dev/null
+++ b/include/linux/platform_data/serial-sccnxp.h
@@ -0,0 +1,95 @@
+/*
+ *  NXP (Philips) SCC+++(SCN+++) serial driver
+ *
+ *  Copyright (C) 2012 Alexander Shiyan <shc_work@mail.ru>
+ *
+ *  Based on sc26xx.c, by Thomas Bogendörfer (tsbogend@alpha.franken.de)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+
+#ifndef _PLATFORM_DATA_SERIAL_SCCNXP_H_
+#define _PLATFORM_DATA_SERIAL_SCCNXP_H_
+
+#define SCCNXP_MAX_UARTS	2
+
+/* Output lines */
+#define LINE_OP0		1
+#define LINE_OP1		2
+#define LINE_OP2		3
+#define LINE_OP3		4
+#define LINE_OP4		5
+#define LINE_OP5		6
+#define LINE_OP6		7
+#define LINE_OP7		8
+
+/* Input lines */
+#define LINE_IP0		9
+#define LINE_IP1		10
+#define LINE_IP2		11
+#define LINE_IP3		12
+#define LINE_IP4		13
+#define LINE_IP5		14
+#define LINE_IP6		15
+
+/* Signals */
+#define DTR_OP			0	/* DTR */
+#define RTS_OP			4	/* RTS */
+#define DSR_IP			8	/* DSR */
+#define CTS_IP			12	/* CTS */
+#define DCD_IP			16	/* DCD */
+#define RNG_IP			20	/* RNG */
+
+#define DIR_OP			24	/* Special signal for control RS-485.
+					 * Goes high when transmit,
+					 * then goes low.
+					 */
+
+/* Routing control signal 'sig' to line 'line' */
+#define MCTRL_SIG(sig, line)	((line) << (sig))
+
+/*
+ * Example board initialization data:
+ *
+ * static struct resource sc2892_resources[] = {
+ *	DEFINE_RES_MEM(UART_PHYS_START, 0x10),
+ *	DEFINE_RES_IRQ(IRQ_EXT2),
+ * };
+ *
+ * static struct sccnxp_pdata sc2892_info = {
+ *	.frequency	= 3686400,
+ *	.mctrl_cfg[0]	= MCTRL_SIG(DIR_OP, LINE_OP0),
+ *	.mctrl_cfg[1]	= MCTRL_SIG(DIR_OP, LINE_OP1),
+ * };
+ *
+ * static struct platform_device sc2892 = {
+ *	.name		= "sc2892",
+ *	.id		= -1,
+ *	.resource	= sc2892_resources,
+ *	.num_resources	= ARRAY_SIZE(sc2892_resources),
+ *	.dev = {
+ *		.platform_data	= &sc2892_info,
+ *	},
+ * };
+ */
+
+/* SCCNXP platform data structure */
+struct sccnxp_pdata {
+	/* Frequency (extrenal clock or crystal) */
+	int			frequency;
+	/* Shift for A0 line */
+	const u8		reg_shift;
+	/* Modem control lines configuration */
+	const u32		mctrl_cfg[SCCNXP_MAX_UARTS];
+	/* Timer value for polling mode (usecs) */
+	const unsigned int	poll_time_us;
+	/* Called during startup */
+	void (*init)(void);
+	/* Called before finish */
+	void (*exit)(void);
+};
+
+#endif
-- 
cgit v1.2.3


From 9ee4b83e51f741a645c43e61b9f3f8075ca0fdf4 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 10 Jan 2013 11:25:11 +0200
Subject: serial: 8250: Add support for dmaengine

Add support for dmaengine API. The drivers can implement the
struct uart_8250_dma member in struct uart_8250_port and
8250.c can take care of the rest.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.c     |  31 +++++-
 drivers/tty/serial/8250/8250.h     |  50 +++++++++
 drivers/tty/serial/8250/8250_dma.c | 213 +++++++++++++++++++++++++++++++++++++
 drivers/tty/serial/8250/Kconfig    |   8 ++
 drivers/tty/serial/8250/Makefile   |   1 +
 include/linux/serial_8250.h        |   4 +
 6 files changed, 304 insertions(+), 3 deletions(-)
 create mode 100644 drivers/tty/serial/8250/8250_dma.c

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c
index e2ac25a037ef..cb7db5a8ae92 100644
--- a/drivers/tty/serial/8250/8250.c
+++ b/drivers/tty/serial/8250/8250.c
@@ -1269,7 +1269,9 @@ static void serial8250_start_tx(struct uart_port *port)
 	struct uart_8250_port *up =
 		container_of(port, struct uart_8250_port, port);
 
-	if (!(up->ier & UART_IER_THRI)) {
+	if (up->dma && !serial8250_tx_dma(up)) {
+		return;
+	} else if (!(up->ier & UART_IER_THRI)) {
 		up->ier |= UART_IER_THRI;
 		serial_port_out(port, UART_IER, up->ier);
 
@@ -1467,6 +1469,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
 	unsigned long flags;
 	struct uart_8250_port *up =
 		container_of(port, struct uart_8250_port, port);
+	int dma_err = 0;
 
 	if (iir & UART_IIR_NO_INT)
 		return 0;
@@ -1477,8 +1480,13 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir)
 
 	DEBUG_INTR("status = %x...", status);
 
-	if (status & (UART_LSR_DR | UART_LSR_BI))
-		status = serial8250_rx_chars(up, status);
+	if (status & (UART_LSR_DR | UART_LSR_BI)) {
+		if (up->dma)
+			dma_err = serial8250_rx_dma(up, iir);
+
+		if (!up->dma || dma_err)
+			status = serial8250_rx_chars(up, status);
+	}
 	serial8250_modem_status(up);
 	if (status & UART_LSR_THRE)
 		serial8250_tx_chars(up);
@@ -2120,6 +2128,18 @@ dont_test_tx_en:
 	up->lsr_saved_flags = 0;
 	up->msr_saved_flags = 0;
 
+	/*
+	 * Request DMA channels for both RX and TX.
+	 */
+	if (up->dma) {
+		retval = serial8250_request_dma(up);
+		if (retval) {
+			pr_warn_ratelimited("ttyS%d - failed to request DMA\n",
+					    serial_index(port));
+			up->dma = NULL;
+		}
+	}
+
 	/*
 	 * Finally, enable interrupts.  Note: Modem status interrupts
 	 * are set via set_termios(), which will be occurring imminently
@@ -2153,6 +2173,9 @@ static void serial8250_shutdown(struct uart_port *port)
 	up->ier = 0;
 	serial_port_out(port, UART_IER, 0);
 
+	if (up->dma)
+		serial8250_release_dma(up);
+
 	spin_lock_irqsave(&port->lock, flags);
 	if (port->flags & UPF_FOURPORT) {
 		/* reset interrupts on the AST Fourport board */
@@ -3217,6 +3240,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 			uart->dl_read = up->dl_read;
 		if (up->dl_write)
 			uart->dl_write = up->dl_write;
+		if (up->dma)
+			uart->dma = up->dma;
 
 		if (serial8250_isa_config != NULL)
 			serial8250_isa_config(0, &uart->port,
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 3b4ea84898c2..363d549d2e0d 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -12,6 +12,35 @@
  */
 
 #include <linux/serial_8250.h>
+#include <linux/dmaengine.h>
+
+struct uart_8250_dma {
+	dma_filter_fn		fn;
+	void			*rx_param;
+	void			*tx_param;
+
+	int			rx_chan_id;
+	int			tx_chan_id;
+
+	struct dma_slave_config	rxconf;
+	struct dma_slave_config	txconf;
+
+	struct dma_chan		*rxchan;
+	struct dma_chan		*txchan;
+
+	dma_addr_t		rx_addr;
+	dma_addr_t		tx_addr;
+
+	dma_cookie_t		rx_cookie;
+	dma_cookie_t		tx_cookie;
+
+	void			*rx_buf;
+
+	size_t			rx_size;
+	size_t			tx_size;
+
+	unsigned char		tx_running:1;
+};
 
 struct old_serial_port {
 	unsigned int uart;
@@ -142,3 +171,24 @@ static inline int is_omap1510_8250(struct uart_8250_port *pt)
 	return 0;
 }
 #endif
+
+#ifdef CONFIG_SERIAL_8250_DMA
+extern int serial8250_tx_dma(struct uart_8250_port *);
+extern int serial8250_rx_dma(struct uart_8250_port *, unsigned int iir);
+extern int serial8250_request_dma(struct uart_8250_port *);
+extern void serial8250_release_dma(struct uart_8250_port *);
+#else
+static inline int serial8250_tx_dma(struct uart_8250_port *p)
+{
+	return -1;
+}
+static inline int serial8250_rx_dma(struct uart_8250_port *p, unsigned int iir)
+{
+	return -1;
+}
+static inline int serial8250_request_dma(struct uart_8250_port *p)
+{
+	return -1;
+}
+static inline void serial8250_release_dma(struct uart_8250_port *p) { }
+#endif
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
new file mode 100644
index 000000000000..95516a15a441
--- /dev/null
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -0,0 +1,213 @@
+/*
+ * 8250_dma.c - DMA Engine API support for 8250.c
+ *
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_reg.h>
+#include <linux/dma-mapping.h>
+
+#include "8250.h"
+
+static void __dma_tx_complete(void *param)
+{
+	struct uart_8250_port	*p = param;
+	struct uart_8250_dma	*dma = p->dma;
+	struct circ_buf		*xmit = &p->port.state->xmit;
+
+	dma->tx_running = 0;
+
+	dma_sync_single_for_cpu(dma->txchan->device->dev, dma->tx_addr,
+				UART_XMIT_SIZE, DMA_TO_DEVICE);
+
+	xmit->tail += dma->tx_size;
+	xmit->tail &= UART_XMIT_SIZE - 1;
+	p->port.icount.tx += dma->tx_size;
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&p->port);
+
+	if (!uart_circ_empty(xmit) && !uart_tx_stopped(&p->port)) {
+		serial8250_tx_dma(p);
+		uart_write_wakeup(&p->port);
+	}
+}
+
+static void __dma_rx_complete(void *param)
+{
+	struct uart_8250_port	*p = param;
+	struct uart_8250_dma	*dma = p->dma;
+	struct tty_struct	*tty = p->port.state->port.tty;
+	struct dma_tx_state	state;
+
+	dma_sync_single_for_cpu(dma->rxchan->device->dev, dma->rx_addr,
+				dma->rx_size, DMA_FROM_DEVICE);
+
+	dmaengine_tx_status(dma->rxchan, dma->rx_cookie, &state);
+	dmaengine_terminate_all(dma->rxchan);
+
+	tty_insert_flip_string(tty, dma->rx_buf, dma->rx_size - state.residue);
+	p->port.icount.rx += dma->rx_size - state.residue;
+
+	tty_flip_buffer_push(tty);
+}
+
+int serial8250_tx_dma(struct uart_8250_port *p)
+{
+	struct uart_8250_dma		*dma = p->dma;
+	struct circ_buf			*xmit = &p->port.state->xmit;
+	struct dma_async_tx_descriptor	*desc;
+
+	if (dma->tx_running) {
+		uart_write_wakeup(&p->port);
+		return -EBUSY;
+	}
+
+	dma->tx_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
+
+	desc = dmaengine_prep_slave_single(dma->txchan,
+					   dma->tx_addr + xmit->tail,
+					   dma->tx_size, DMA_MEM_TO_DEV,
+					   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc)
+		return -EBUSY;
+
+	dma->tx_running = 1;
+
+	desc->callback = __dma_tx_complete;
+	desc->callback_param = p;
+
+	dma->tx_cookie = dmaengine_submit(desc);
+
+	dma_sync_single_for_device(dma->txchan->device->dev, dma->tx_addr,
+				   UART_XMIT_SIZE, DMA_TO_DEVICE);
+
+	dma_async_issue_pending(dma->txchan);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(serial8250_tx_dma);
+
+int serial8250_rx_dma(struct uart_8250_port *p, unsigned int iir)
+{
+	struct uart_8250_dma		*dma = p->dma;
+	struct dma_async_tx_descriptor	*desc;
+	struct dma_tx_state		state;
+	int				dma_status;
+
+	/*
+	 * If RCVR FIFO trigger level was not reached, complete the transfer and
+	 * let 8250.c copy the remaining data.
+	 */
+	if ((iir & 0x3f) == UART_IIR_RX_TIMEOUT) {
+		dma_status = dmaengine_tx_status(dma->rxchan, dma->rx_cookie,
+						&state);
+		if (dma_status == DMA_IN_PROGRESS) {
+			dmaengine_pause(dma->rxchan);
+			__dma_rx_complete(p);
+		}
+		return -ETIMEDOUT;
+	}
+
+	desc = dmaengine_prep_slave_single(dma->rxchan, dma->rx_addr,
+					   dma->rx_size, DMA_DEV_TO_MEM,
+					   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc)
+		return -EBUSY;
+
+	desc->callback = __dma_rx_complete;
+	desc->callback_param = p;
+
+	dma->rx_cookie = dmaengine_submit(desc);
+
+	dma_sync_single_for_device(dma->rxchan->device->dev, dma->rx_addr,
+				   dma->rx_size, DMA_FROM_DEVICE);
+
+	dma_async_issue_pending(dma->rxchan);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(serial8250_rx_dma);
+
+int serial8250_request_dma(struct uart_8250_port *p)
+{
+	struct uart_8250_dma	*dma = p->dma;
+	dma_cap_mask_t		mask;
+
+	dma->rxconf.src_addr = p->port.mapbase + UART_RX;
+	dma->txconf.dst_addr = p->port.mapbase + UART_TX;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/* Get a channel for RX */
+	dma->rxchan = dma_request_channel(mask, dma->fn, dma->rx_param);
+	if (!dma->rxchan)
+		return -ENODEV;
+
+	dmaengine_slave_config(dma->rxchan, &dma->rxconf);
+
+	/* Get a channel for TX */
+	dma->txchan = dma_request_channel(mask, dma->fn, dma->tx_param);
+	if (!dma->txchan) {
+		dma_release_channel(dma->rxchan);
+		return -ENODEV;
+	}
+
+	dmaengine_slave_config(dma->txchan, &dma->txconf);
+
+	/* RX buffer */
+	if (!dma->rx_size)
+		dma->rx_size = PAGE_SIZE;
+
+	dma->rx_buf = dma_alloc_coherent(dma->rxchan->device->dev, dma->rx_size,
+					&dma->rx_addr, GFP_KERNEL);
+	if (!dma->rx_buf) {
+		dma_release_channel(dma->rxchan);
+		dma_release_channel(dma->txchan);
+		return -ENOMEM;
+	}
+
+	/* TX buffer */
+	dma->tx_addr = dma_map_single(dma->txchan->device->dev,
+					p->port.state->xmit.buf,
+					UART_XMIT_SIZE,
+					DMA_TO_DEVICE);
+
+	dev_dbg_ratelimited(p->port.dev, "got both dma channels\n");
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(serial8250_request_dma);
+
+void serial8250_release_dma(struct uart_8250_port *p)
+{
+	struct uart_8250_dma *dma = p->dma;
+
+	if (!dma)
+		return;
+
+	/* Release RX resources */
+	dmaengine_terminate_all(dma->rxchan);
+	dma_free_coherent(dma->rxchan->device->dev, dma->rx_size, dma->rx_buf,
+			  dma->rx_addr);
+	dma_release_channel(dma->rxchan);
+	dma->rxchan = NULL;
+
+	/* Release TX resources */
+	dmaengine_terminate_all(dma->txchan);
+	dma_unmap_single(dma->txchan->device->dev, dma->tx_addr,
+			 UART_XMIT_SIZE, DMA_TO_DEVICE);
+	dma_release_channel(dma->txchan);
+	dma->txchan = NULL;
+	dma->tx_running = 0;
+
+	dev_dbg_ratelimited(p->port.dev, "dma channels released\n");
+}
+EXPORT_SYMBOL_GPL(serial8250_release_dma);
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index 3ce3e7533e4c..d31f4c63d4ba 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -84,6 +84,14 @@ config SERIAL_8250_GSC
 	depends on SERIAL_8250 && GSC
 	default SERIAL_8250
 
+config SERIAL_8250_DMA
+	bool "DMA support for 16550 compatible UART controllers" if EXPERT
+	depends on SERIAL_8250 && DMADEVICES=y
+	default SERIAL_8250
+	help
+	  This builds DMA support that can be used with 8250/16650
+	  compatible UART controllers that support DMA signaling.
+
 config SERIAL_8250_PCI
 	tristate "8250/16550 PCI device support" if EXPERT
 	depends on SERIAL_8250 && PCI
diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile
index 108fe7fe13e2..a23838a4d535 100644
--- a/drivers/tty/serial/8250/Makefile
+++ b/drivers/tty/serial/8250/Makefile
@@ -5,6 +5,7 @@
 obj-$(CONFIG_SERIAL_8250)		+= 8250_core.o
 8250_core-y				:= 8250.o
 8250_core-$(CONFIG_SERIAL_8250_PNP)	+= 8250_pnp.o
+8250_core-$(CONFIG_SERIAL_8250_DMA)	+= 8250_dma.o
 obj-$(CONFIG_SERIAL_8250_GSC)		+= 8250_gsc.o
 obj-$(CONFIG_SERIAL_8250_PCI)		+= 8250_pci.o
 obj-$(CONFIG_SERIAL_8250_HP300)		+= 8250_hp300.o
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index c490d20b3fb8..af47a8af6024 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -59,6 +59,8 @@ enum {
 	PLAT8250_DEV_SM501,
 };
 
+struct uart_8250_dma;
+
 /*
  * This should be used by drivers which want to register
  * their own 8250 ports without registering their own
@@ -91,6 +93,8 @@ struct uart_8250_port {
 #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
 	unsigned char		msr_saved_flags;
 
+	struct uart_8250_dma	*dma;
+
 	/* 8250 specific callbacks */
 	int			(*dl_read)(struct uart_8250_port *);
 	void			(*dl_write)(struct uart_8250_port *, int);
-- 
cgit v1.2.3


From 774a1221e862b343388347bac9b318767336b20b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 15 Jan 2013 18:52:51 -0800
Subject: module, async: async_synchronize_full() on module init iff async is
 used

If the default iosched is built as module, the kernel may deadlock
while trying to load the iosched module on device probe if the probing
was running off async.  This is because async_synchronize_full() at
the end of module init ends up waiting for the async job which
initiated the module loading.

 async A				modprobe

 1. finds a device
 2. registers the block device
 3. request_module(default iosched)
					4. modprobe in userland
					5. load and init module
					6. async_synchronize_full()

Async A waits for modprobe to finish in request_module() and modprobe
waits for async A to finish in async_synchronize_full().

Because there's no easy to track dependency once control goes out to
userland, implementing properly nested flushing is difficult.  For
now, make module init perform async_synchronize_full() iff module init
has queued async jobs as suggested by Linus.

This avoids the described deadlock because iosched module doesn't use
async and thus wouldn't invoke async_synchronize_full().  This is
hacky and incomplete.  It will deadlock if async module loading nests;
however, this works around the known problem case and seems to be the
best of bad options.

For more details, please refer to the following thread.

  http://thread.gmane.org/gmane.linux.kernel/1420814

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Alex Riesen <raa.lkml@gmail.com>
Tested-by: Ming Lei <ming.lei@canonical.com>
Tested-by: Alex Riesen <raa.lkml@gmail.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 +
 kernel/async.c        |  3 +++
 kernel/module.c       | 27 +++++++++++++++++++++++++--
 3 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 206bb089c06b..6fc8f45de4e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1810,6 +1810,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
+#define PF_USED_ASYNC	0x00004000	/* used async_schedule*(), used by module init */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
diff --git a/kernel/async.c b/kernel/async.c
index 9d3118384858..a1d585c351d6 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -196,6 +196,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
 	atomic_inc(&entry_count);
 	spin_unlock_irqrestore(&async_lock, flags);
 
+	/* mark that this task has queued an async job, used by module init */
+	current->flags |= PF_USED_ASYNC;
+
 	/* schedule for execution */
 	queue_work(system_unbound_wq, &entry->work);
 
diff --git a/kernel/module.c b/kernel/module.c
index 250092c1d57d..b10b048367e1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3013,6 +3013,12 @@ static int do_init_module(struct module *mod)
 {
 	int ret = 0;
 
+	/*
+	 * We want to find out whether @mod uses async during init.  Clear
+	 * PF_USED_ASYNC.  async_schedule*() will set it.
+	 */
+	current->flags &= ~PF_USED_ASYNC;
+
 	blocking_notifier_call_chain(&module_notify_list,
 			MODULE_STATE_COMING, mod);
 
@@ -3058,8 +3064,25 @@ static int do_init_module(struct module *mod)
 	blocking_notifier_call_chain(&module_notify_list,
 				     MODULE_STATE_LIVE, mod);
 
-	/* We need to finish all async code before the module init sequence is done */
-	async_synchronize_full();
+	/*
+	 * We need to finish all async code before the module init sequence
+	 * is done.  This has potential to deadlock.  For example, a newly
+	 * detected block device can trigger request_module() of the
+	 * default iosched from async probing task.  Once userland helper
+	 * reaches here, async_synchronize_full() will wait on the async
+	 * task waiting on request_module() and deadlock.
+	 *
+	 * This deadlock is avoided by perfomring async_synchronize_full()
+	 * iff module init queued any async jobs.  This isn't a full
+	 * solution as it will deadlock the same if module loading from
+	 * async jobs nests more than once; however, due to the various
+	 * constraints, this hack seems to be the best option for now.
+	 * Please refer to the following thread for details.
+	 *
+	 * http://thread.gmane.org/gmane.linux.kernel/1420814
+	 */
+	if (current->flags & PF_USED_ASYNC)
+		async_synchronize_full();
 
 	mutex_lock(&module_mutex);
 	/* Drop initial reference. */
-- 
cgit v1.2.3


From e65b9ad222c280c031bc8d3642cc38dd3026fe06 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 15 Jan 2013 20:12:37 +0100
Subject: lockdep, rwsem: fix down_write_nest_lock() if
 !CONFIG_DEBUG_LOCK_ALLOC

Commit 1b963c81b145 ("lockdep, rwsem: provide down_write_nest_lock()")
contains a bug in a codepath when CONFIG_DEBUG_LOCK_ALLOC is disabled,
which causes down_read() to be called instead of down_write() by mistake
on such configurations.  Fix that.

Reported-and-tested-by: Andrew Clayton <andrew@digital-domain.net>
Reported-and-tested-by: Zlatko Calusic <zlatko.calusic@iskon.hr>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rwsem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 413cc11e414a..8da67d625e13 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -135,7 +135,7 @@ do {								\
 
 #else
 # define down_read_nested(sem, subclass)		down_read(sem)
-# define down_write_nest_lock(sem, nest_lock)	down_read(sem)
+# define down_write_nest_lock(sem, nest_lock)	down_write(sem)
 # define down_write_nested(sem, subclass)	down_write(sem)
 #endif
 
-- 
cgit v1.2.3


From 6e8575faa8fa680d59404a4d58d12190667be815 Mon Sep 17 00:00:00 2001
From: Sam Lang <sam.lang@inktank.com>
Date: Fri, 28 Dec 2012 09:56:46 -0800
Subject: ceph: Check for created flag in response from mds

The mds now sends back a created inode if the create request
performed the create.  If the file already existed, no inode is
returned in the reply.  This allows ceph to set the created flag
in atomic_open so that permissions are properly checked in the case
that the file wasn't created by the create call to the mds.

To ensure compability with previous kernels, a feature for sending
back the inode in the create reply was added, so that the mds will
only send back the inode if the client indicates it supports the
feature.

Signed-off-by: Sam Lang <sam.lang@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 fs/ceph/file.c                     |  3 +++
 fs/ceph/mds_client.c               | 33 +++++++++++++++++++++++++++++++--
 fs/ceph/mds_client.h               |  6 ++++++
 include/linux/ceph/ceph_features.h |  5 ++++-
 4 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 2c71cbd78332..22b5b71b5401 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -266,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		err = finish_no_open(file, dn);
 	} else {
 		dout("atomic_open finish_open on dn %p\n", dn);
+		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
+			*opened |= FILE_CREATED;
+		}
 		err = finish_open(file, dentry, ceph_open, opened);
 	}
 
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9165eb8309eb..d95842036c8b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -232,6 +232,30 @@ bad:
 	return -EIO;
 }
 
+/*
+ * parse create results
+ */
+static int parse_reply_info_create(void **p, void *end,
+				  struct ceph_mds_reply_info_parsed *info,
+				  int features)
+{
+	if (features & CEPH_FEATURE_REPLY_CREATE_INODE) {
+		if (*p == end) {
+			info->has_create_ino = false;
+		} else {
+			info->has_create_ino = true;
+			info->ino = ceph_decode_64(p);
+		}
+	}
+
+	if (unlikely(*p != end))
+		goto bad;
+	return 0;
+
+bad:
+	return -EIO;
+}
+
 /*
  * parse extra results
  */
@@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end,
 {
 	if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
 		return parse_reply_info_filelock(p, end, info, features);
-	else
+	else if (info->head->op == CEPH_MDS_OP_READDIR)
 		return parse_reply_info_dir(p, end, info, features);
+	else if (info->head->op == CEPH_MDS_OP_CREATE)
+		return parse_reply_info_create(p, end, info, features);
+	else
+		return -EIO;
 }
 
 /*
@@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	mutex_lock(&req->r_fill_mutex);
 	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
 	if (err == 0) {
-		if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
+		if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
+				    req->r_op == CEPH_MDS_OP_LSSNAP) &&
 		    rinfo->dir_nr)
 			ceph_readdir_prepopulate(req, req->r_session);
 		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index dd26846dd71d..567f7c60354e 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed {
 			struct ceph_mds_reply_info_in *dir_in;
 			u8                            dir_complete, dir_end;
 		};
+
+		/* for create results */
+		struct {
+			bool has_create_ino;
+			u64 ino;
+		};
 	};
 
 	/* encoded blob describing snapshot contexts for certain
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index dad579b0c0e6..6b7c6acbb3bf 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -14,13 +14,16 @@
 #define CEPH_FEATURE_DIRLAYOUTHASH  (1<<7)
 /* bits 8-17 defined by user-space; not supported yet here */
 #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
+/* bits 19-25 defined by user-space; not supported yet here */
+#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
 
 /*
  * Features supported.
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  \
 	(CEPH_FEATURE_NOSRCADDR |	 \
-	 CEPH_FEATURE_CRUSH_TUNABLES)
+	 CEPH_FEATURE_CRUSH_TUNABLES |   \
+	 CEPH_FEATURE_REPLY_CREATE_INODE)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR)
-- 
cgit v1.2.3


From 1604f488ac2dcce33c8218e75a000e8c5fb57e61 Mon Sep 17 00:00:00 2001
From: Jim Schutt <jaschut@sandia.gov>
Date: Fri, 30 Nov 2012 09:15:25 -0700
Subject: libceph: for chooseleaf rules, retry CRUSH map descent from root if
 leaf is failed

Add libceph support for a new CRUSH tunable recently added to Ceph servers.

Consider the CRUSH rule
  step chooseleaf firstn 0 type <node_type>

This rule means that <n> replicas will be chosen in a manner such that
each chosen leaf's branch will contain a unique instance of <node_type>.

When an object is re-replicated after a leaf failure, if the CRUSH map uses
a chooseleaf rule the remapped replica ends up under the <node_type> bucket
that held the failed leaf.  This causes uneven data distribution across the
storage cluster, to the point that when all the leaves but one fail under a
particular <node_type> bucket, that remaining leaf holds all the data from
its failed peers.

This behavior also limits the number of peers that can participate in the
re-replication of the data held by the failed leaf, which increases the
time required to re-replicate after a failure.

For a chooseleaf CRUSH rule, the tree descent has two steps: call them the
inner and outer descents.

If the tree descent down to <node_type> is the outer descent, and the descent
from <node_type> down to a leaf is the inner descent, the issue is that a
down leaf is detected on the inner descent, so only the inner descent is
retried.

In order to disperse re-replicated data as widely as possible across a
storage cluster after a failure, we want to retry the outer descent. So,
fix up crush_choose() to allow the inner descent to return immediately on
choosing a failed leaf.  Wire this up as a new CRUSH tunable.

Note that after this change, for a chooseleaf rule, if the primary OSD
in a placement group has failed, choosing a replacement may result in
one of the other OSDs in the PG colliding with the new primary.  This
requires that OSD's data for that PG to need moving as well.  This
seems unavoidable but should be relatively rare.

This corresponds to ceph.git commit 88f218181a9e6d2292e2697fc93797d0f6d6e5dc.

Signed-off-by: Jim Schutt <jaschut@sandia.gov>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 include/linux/ceph/ceph_features.h |  7 +++++--
 include/linux/crush/crush.h        |  2 ++
 net/ceph/crush/mapper.c            | 13 ++++++++++---
 net/ceph/osdmap.c                  |  6 ++++++
 4 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 6b7c6acbb3bf..2160aab482f6 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -14,7 +14,9 @@
 #define CEPH_FEATURE_DIRLAYOUTHASH  (1<<7)
 /* bits 8-17 defined by user-space; not supported yet here */
 #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
-/* bits 19-25 defined by user-space; not supported yet here */
+/* bits 19-24 defined by user-space; not supported yet here */
+#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
+/* bit 26 defined by user-space; not supported yet here */
 #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
 
 /*
@@ -22,7 +24,8 @@
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  \
 	(CEPH_FEATURE_NOSRCADDR |	 \
-	 CEPH_FEATURE_CRUSH_TUNABLES |   \
+	 CEPH_FEATURE_CRUSH_TUNABLES |	  \
+	 CEPH_FEATURE_CRUSH_TUNABLES2 |   \
 	 CEPH_FEATURE_REPLY_CREATE_INODE)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 25baa287cff7..6a1101f24cfb 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -162,6 +162,8 @@ struct crush_map {
 	__u32 choose_local_fallback_tries;
 	/* choose attempts before giving up */ 
 	__u32 choose_total_tries;
+	/* attempt chooseleaf inner descent once; on failure retry outer descent */
+	__u32 chooseleaf_descend_once;
 };
 
 
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 35fce755ce10..96c8a58937db 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in
  * @outpos: our position in that vector
  * @firstn: true if choosing "first n" items, false if choosing "indep"
  * @recurse_to_leaf: true if we want one device under each item of given type
+ * @descend_once: true if we should only try one descent before giving up
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  */
 static int crush_choose(const struct crush_map *map,
@@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map,
 			int x, int numrep, int type,
 			int *out, int outpos,
 			int firstn, int recurse_to_leaf,
-			int *out2)
+			int descend_once, int *out2)
 {
 	int rep;
 	unsigned int ftotal, flocal;
@@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map,
 							 x, outpos+1, 0,
 							 out2, outpos,
 							 firstn, 0,
+							 map->chooseleaf_descend_once,
 							 NULL) <= outpos)
 							/* didn't get leaf */
 							reject = 1;
@@ -422,7 +424,10 @@ reject:
 					ftotal++;
 					flocal++;
 
-					if (collide && flocal <= map->choose_local_tries)
+					if (reject && descend_once)
+						/* let outer call try again */
+						skip_rep = 1;
+					else if (collide && flocal <= map->choose_local_tries)
 						/* retry locally a few times */
 						retry_bucket = 1;
 					else if (map->choose_local_fallback_tries > 0 &&
@@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map,
 	int i, j;
 	int numrep;
 	int firstn;
+	const int descend_once = 0;
 
 	if ((__u32)ruleno >= map->max_rules) {
 		dprintk(" bad ruleno %d\n", ruleno);
@@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map,
 						      curstep->arg2,
 						      o+osize, j,
 						      firstn,
-						      recurse_to_leaf, c+osize);
+						      recurse_to_leaf,
+						      descend_once, c+osize);
 			}
 
 			if (recurse_to_leaf)
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index de73214b5d26..ca05871635bc 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -170,6 +170,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         c->choose_local_tries = 2;
         c->choose_local_fallback_tries = 5;
         c->choose_total_tries = 19;
+	c->chooseleaf_descend_once = 0;
 
 	ceph_decode_need(p, end, 4*sizeof(u32), bad);
 	magic = ceph_decode_32(p);
@@ -336,6 +337,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         dout("crush decode tunable choose_total_tries = %d",
              c->choose_total_tries);
 
+	ceph_decode_need(p, end, sizeof(u32), done);
+	c->chooseleaf_descend_once = ceph_decode_32(p);
+	dout("crush decode tunable chooseleaf_descend_once = %d",
+	     c->chooseleaf_descend_once);
+
 done:
 	dout("crush_decode success\n");
 	return c;
-- 
cgit v1.2.3


From 132368bd0b286457f83f56d0bbdecd85999562dc Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:33 -0800
Subject: Drivers: hv: Add state to manage batched reading

For the "read" side signaling optimization, the reader has to completely
drain the queue before exiting. Add state to manage this "batched"
reading.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c |  7 +++++++
 include/linux/hyperv.h    | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 2f84c5cff8d4..7bf59177aaf8 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -275,6 +275,13 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 		return;
 	}
 
+	/*
+	 * By default we setup state to enable batched
+	 * reading. A specific service can choose to
+	 * disable this prior to opening the channel.
+	 */
+	newchannel->batched_reading = true;
+
 	memcpy(&newchannel->offermsg, offer,
 	       sizeof(struct vmbus_channel_offer_channel));
 	newchannel->monitor_grp = (u8)offer->monitorid / 32;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e73b852156b1..1ffe84de6c55 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -882,8 +882,28 @@ struct vmbus_channel {
 
 	void (*onchannel_callback)(void *context);
 	void *channel_callback_context;
+
+	/*
+	 * A channel can be marked for efficient (batched)
+	 * reading:
+	 * If batched_reading is set to "true", we read until the
+	 * channel is empty and hold off interrupts from the host
+	 * during the entire read process.
+	 * If batched_reading is set to "false", the client is not
+	 * going to perform batched reading.
+	 *
+	 * By default we will enable batched reading; specific
+	 * drivers that don't want this behavior can turn it off.
+	 */
+
+	bool batched_reading;
 };
 
+static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
+{
+	c->batched_reading = state;
+}
+
 void vmbus_onmessage(void *context);
 
 int vmbus_request_offers(void);
-- 
cgit v1.2.3


From 610071c38463998d5a66388ff9956aaeb24b49a8 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:38 -0800
Subject: Drivers: hv: Support handling multiple VMBUS versions

The current code hard coded the vmbus version independent of the host
it was running on. Add code to dynamically negotiate the most appropriate
version.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/connection.c | 165 ++++++++++++++++++++++++++++++++----------------
 include/linux/hyperv.h  |   6 --
 2 files changed, 111 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index d1019a770ad7..2b56a3f47b30 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -39,16 +39,112 @@ struct vmbus_connection vmbus_connection = {
 	.next_gpadl_handle	= ATOMIC_INIT(0xE1E10),
 };
 
+/*
+ * VMBUS version is 32 bit entity broken up into
+ * two 16 bit quantities: major_number. minor_number.
+ *
+ * 0 . 13 (Windows Server 2008)
+ * 1 . 1  (Windows 7)
+ * 2 . 4  (Windows 8)
+ */
+
+#define VERSION_WS2008	((0 << 16) | (13))
+#define VERSION_WIN7	((1 << 16) | (1))
+#define VERSION_WIN8	((2 << 16) | (4))
+
+#define VERSION_INVAL -1
+
+static __u32 vmbus_get_next_version(__u32 current_version)
+{
+	switch (current_version) {
+	case (VERSION_WIN7):
+		return VERSION_WS2008;
+
+	case (VERSION_WIN8):
+		return VERSION_WIN7;
+
+	case (VERSION_WS2008):
+	default:
+		return VERSION_INVAL;
+	}
+}
+
+static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
+					__u32 version)
+{
+	int ret = 0;
+	struct vmbus_channel_initiate_contact *msg;
+	unsigned long flags;
+	int t;
+
+	init_completion(&msginfo->waitevent);
+
+	msg = (struct vmbus_channel_initiate_contact *)msginfo->msg;
+
+	msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT;
+	msg->vmbus_version_requested = version;
+	msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
+	msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages);
+	msg->monitor_page2 = virt_to_phys(
+			(void *)((unsigned long)vmbus_connection.monitor_pages +
+				 PAGE_SIZE));
+
+	/*
+	 * Add to list before we send the request since we may
+	 * receive the response before returning from this routine
+	 */
+	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+	list_add_tail(&msginfo->msglistentry,
+		      &vmbus_connection.chn_msg_list);
+
+	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+
+	ret = vmbus_post_msg(msg,
+			       sizeof(struct vmbus_channel_initiate_contact));
+	if (ret != 0) {
+		spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+		list_del(&msginfo->msglistentry);
+		spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock,
+					flags);
+		return ret;
+	}
+
+	/* Wait for the connection response */
+	t =  wait_for_completion_timeout(&msginfo->waitevent, 5*HZ);
+	if (t == 0) {
+		spin_lock_irqsave(&vmbus_connection.channelmsg_lock,
+				flags);
+		list_del(&msginfo->msglistentry);
+		spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock,
+					flags);
+		return -ETIMEDOUT;
+	}
+
+	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+	list_del(&msginfo->msglistentry);
+	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+
+	/* Check if successful */
+	if (msginfo->response.version_response.version_supported) {
+		vmbus_connection.conn_state = CONNECTED;
+	} else {
+		pr_err("Unable to connect, "
+			"Version %d not supported by Hyper-V\n",
+			version);
+		return -ECONNREFUSED;
+	}
+
+	return ret;
+}
+
 /*
  * vmbus_connect - Sends a connect request on the partition service connection
  */
 int vmbus_connect(void)
 {
 	int ret = 0;
-	int t;
 	struct vmbus_channel_msginfo *msginfo = NULL;
-	struct vmbus_channel_initiate_contact *msg;
-	unsigned long flags;
+	__u32 version;
 
 	/* Initialize the vmbus connection */
 	vmbus_connection.conn_state = CONNECTING;
@@ -99,64 +195,25 @@ int vmbus_connect(void)
 		goto cleanup;
 	}
 
-	init_completion(&msginfo->waitevent);
-
-	msg = (struct vmbus_channel_initiate_contact *)msginfo->msg;
-
-	msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT;
-	msg->vmbus_version_requested = VMBUS_REVISION_NUMBER;
-	msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
-	msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages);
-	msg->monitor_page2 = virt_to_phys(
-			(void *)((unsigned long)vmbus_connection.monitor_pages +
-				 PAGE_SIZE));
-
 	/*
-	 * Add to list before we send the request since we may
-	 * receive the response before returning from this routine
+	 * Negotiate a compatible VMBUS version number with the
+	 * host. We start with the highest number we can support
+	 * and work our way down until we negotiate a compatible
+	 * version.
 	 */
-	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
-	list_add_tail(&msginfo->msglistentry,
-		      &vmbus_connection.chn_msg_list);
-
-	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 
-	ret = vmbus_post_msg(msg,
-			       sizeof(struct vmbus_channel_initiate_contact));
-	if (ret != 0) {
-		spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
-		list_del(&msginfo->msglistentry);
-		spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock,
-					flags);
-		goto cleanup;
-	}
+	version = VERSION_WS2008;
 
-	/* Wait for the connection response */
-	t =  wait_for_completion_timeout(&msginfo->waitevent, 5*HZ);
-	if (t == 0) {
-		spin_lock_irqsave(&vmbus_connection.channelmsg_lock,
-				flags);
-		list_del(&msginfo->msglistentry);
-		spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock,
-					flags);
-		ret = -ETIMEDOUT;
-		goto cleanup;
-	}
+	do {
+		ret = vmbus_negotiate_version(msginfo, version);
+		if (ret == 0)
+			break;
 
-	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
-	list_del(&msginfo->msglistentry);
-	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+		version = vmbus_get_next_version(version);
+	} while (version != VERSION_INVAL);
 
-	/* Check if successful */
-	if (msginfo->response.version_response.version_supported) {
-		vmbus_connection.conn_state = CONNECTED;
-	} else {
-		pr_err("Unable to connect, "
-			"Version %d not supported by Hyper-V\n",
-			VMBUS_REVISION_NUMBER);
-		ret = -ECONNREFUSED;
+	if (version == VERSION_INVAL)
 		goto cleanup;
-	}
 
 	kfree(msginfo);
 	return 0;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 1ffe84de6c55..b097bf9d9328 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -406,12 +406,6 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
 #define HV_DRV_VERSION           "3.1"
 
 
-/*
- * A revision number of vmbus that is used for ensuring both ends on a
- * partition are using compatible versions.
- */
-#define VMBUS_REVISION_NUMBER		13
-
 /* Make maximum size of pipe payload of 16K */
 #define MAX_PIPE_DATA_PAYLOAD		(sizeof(u8) * 16384)
 
-- 
cgit v1.2.3


From 2416603ef1e12955850ade0d0cdecbef1fc59fa3 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:39 -0800
Subject: Drivers: hv: Update the ring buffer structure to match win8
 functionality

Update the ringbuffer structure to support win8 functionality.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b097bf9d9328..2b5480126394 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -325,14 +325,28 @@ struct hv_ring_buffer {
 
 	u32 interrupt_mask;
 
-	/* Pad it to PAGE_SIZE so that data starts on page boundary */
-	u8	reserved[4084];
-
-	/* NOTE:
-	 * The interrupt_mask field is used only for channels but since our
-	 * vmbus connection also uses this data structure and its data starts
-	 * here, we commented out this field.
+	/*
+	 * Win8 uses some of the reserved bits to implement
+	 * interrupt driven flow management. On the send side
+	 * we can request that the receiver interrupt the sender
+	 * when the ring transitions from being full to being able
+	 * to handle a message of size "pending_send_sz".
+	 *
+	 * Add necessary state for this enhancement.
 	 */
+	u32 pending_send_sz;
+
+	u32 reserved1[12];
+
+	union {
+		struct {
+			u32 feat_pending_send_sz:1;
+		};
+		u32 value;
+	} feature_bits;
+
+	/* Pad it to PAGE_SIZE so that data starts on page boundary */
+	u8	reserved2[4028];
 
 	/*
 	 * Ring data starts here + RingDataStartOffset
-- 
cgit v1.2.3


From 29423b7e51a8ea4d687cf98c3a50f33c554be194 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:40 -0800
Subject: Drivers: hv: Extend/modify vmbus_channel_offer_channel for win7 and
 beyond

The "offfer" message sent by the host has been extended in win7 (ws2008 R2).
Add/modify state to reflect this extension. All these changes are backward
compatible.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2b5480126394..bee559ada3bb 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -440,9 +440,13 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
 struct vmbus_channel_offer {
 	uuid_le if_type;
 	uuid_le if_instance;
-	u64 int_latency; /* in 100ns units */
-	u32 if_revision;
-	u32 server_ctx_size;	/* in bytes */
+
+	/*
+	 * These two fields are not currently used.
+	 */
+	u64 reserved1;
+	u64 reserved2;
+
 	u16 chn_flags;
 	u16 mmio_megabytes;		/* in bytes * 1024 * 1024 */
 
@@ -464,7 +468,11 @@ struct vmbus_channel_offer {
 			unsigned char user_def[MAX_PIPE_USER_DEFINED_BYTES];
 		} pipe;
 	} u;
-	u32 padding;
+	/*
+	 * The sub_channel_index is defined in win8.
+	 */
+	u16 sub_channel_index;
+	u16 reserved3;
 } __packed;
 
 /* Server Flags */
@@ -660,7 +668,25 @@ struct vmbus_channel_offer_channel {
 	struct vmbus_channel_offer offer;
 	u32 child_relid;
 	u8 monitorid;
-	u8 monitor_allocated;
+	/*
+	 * win7 and beyond splits this field into a bit field.
+	 */
+	u8 monitor_allocated:1;
+	u8 reserved:7;
+	/*
+	 * These are new fields added in win7 and later.
+	 * Do not access these fields without checking the
+	 * negotiated protocol.
+	 *
+	 * If "is_dedicated_interrupt" is set, we must not set the
+	 * associated bit in the channel bitmap while sending the
+	 * interrupt to the host.
+	 *
+	 * connection_id is to be used in signaling the host.
+	 */
+	u16 is_dedicated_interrupt:1;
+	u16 reserved1:15;
+	u32 connection_id;
 } __packed;
 
 /* Rescind Offer parameters */
-- 
cgit v1.2.3


From 37f7278b81a9ab9b76cf4d716ba420444ca4a616 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:41 -0800
Subject: Drivers: hv: Save and export negotiated vmbus version

Export the negotiated vmbus version as this may be useful for
individual drivers.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/connection.c | 9 +++++++++
 include/linux/hyperv.h  | 6 ++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 2b56a3f47b30..70ea5d1fc16c 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/hyperv.h>
+#include <linux/export.h>
 #include <asm/hyperv.h>
 #include "hyperv_vmbus.h"
 
@@ -54,6 +55,12 @@ struct vmbus_connection vmbus_connection = {
 
 #define VERSION_INVAL -1
 
+/*
+ * Negotiated protocol version with the host.
+ */
+__u32 vmbus_proto_version;
+EXPORT_SYMBOL_GPL(vmbus_proto_version);
+
 static __u32 vmbus_get_next_version(__u32 current_version)
 {
 	switch (current_version) {
@@ -215,6 +222,8 @@ int vmbus_connect(void)
 	if (version == VERSION_INVAL)
 		goto cleanup;
 
+	vmbus_proto_version = version;
+	pr_info("Negotiated host information %d\n", version);
 	kfree(msginfo);
 	return 0;
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index bee559ada3bb..134a2022a7a3 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1204,5 +1204,11 @@ int hv_kvp_init(struct hv_util_service *);
 void hv_kvp_deinit(void);
 void hv_kvp_onchannelcallback(void *);
 
+/*
+ * Negotiated version with the Host.
+ */
+
+extern __u32 vmbus_proto_version;
+
 #endif /* __KERNEL__ */
 #endif /* _HYPERV_H */
-- 
cgit v1.2.3


From eafa7072e7cd806dff42b705284ca26189e527a4 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:44 -0800
Subject: Drivers: hv: Move vmbus version definitions to hyperv.h

To support version specific optimization in various vmbus drivers,
move the vmbus definitions to the public header file.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/connection.c | 15 ---------------
 include/linux/hyperv.h  | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 0d8b13290e93..56b14e57bdd8 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -40,21 +40,6 @@ struct vmbus_connection vmbus_connection = {
 	.next_gpadl_handle	= ATOMIC_INIT(0xE1E10),
 };
 
-/*
- * VMBUS version is 32 bit entity broken up into
- * two 16 bit quantities: major_number. minor_number.
- *
- * 0 . 13 (Windows Server 2008)
- * 1 . 1  (Windows 7)
- * 2 . 4  (Windows 8)
- */
-
-#define VERSION_WS2008	((0 << 16) | (13))
-#define VERSION_WIN7	((1 << 16) | (1))
-#define VERSION_WIN8	((2 << 16) | (4))
-
-#define VERSION_INVAL -1
-
 /*
  * Negotiated protocol version with the host.
  */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 134a2022a7a3..e72502689cdc 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -419,6 +419,21 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
  */
 #define HV_DRV_VERSION           "3.1"
 
+/*
+ * VMBUS version is 32 bit entity broken up into
+ * two 16 bit quantities: major_number. minor_number.
+ *
+ * 0 . 13 (Windows Server 2008)
+ * 1 . 1  (Windows 7)
+ * 2 . 4  (Windows 8)
+ */
+
+#define VERSION_WS2008  ((0 << 16) | (13))
+#define VERSION_WIN7    ((1 << 16) | (1))
+#define VERSION_WIN8    ((2 << 16) | (4))
+
+#define VERSION_INVAL -1
+
 
 /* Make maximum size of pipe payload of 16K */
 #define MAX_PIPE_DATA_PAYLOAD		(sizeof(u8) * 16384)
-- 
cgit v1.2.3


From b3bf60c7b4665d40b8eae2217b54c4745f49f470 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:45 -0800
Subject: Drivers: hv: Manage signaling state on a per-connection basis

The current code has a global handle for supporting signaling of the host
from guest. Make this a per-channel attribute as on some versions of the
host we can signal on per-channel handle.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 20 ++++++++++++++++++++
 drivers/hv/hyperv_vmbus.h | 21 ---------------------
 include/linux/hyperv.h    | 25 +++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 7bf59177aaf8..f4d990285d99 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -282,6 +282,26 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 	 */
 	newchannel->batched_reading = true;
 
+	/*
+	 * Setup state for signalling the host.
+	 */
+	newchannel->sig_event = (struct hv_input_signal_event *)
+				(ALIGN((unsigned long)
+				&newchannel->sig_buf,
+				HV_HYPERCALL_PARAM_ALIGN));
+
+	newchannel->sig_event->connectionid.asu32 = 0;
+	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
+	newchannel->sig_event->flag_number = 0;
+	newchannel->sig_event->rsvdz = 0;
+
+	if (vmbus_proto_version != VERSION_WS2008) {
+		newchannel->is_dedicated_interrupt =
+				(offer->is_dedicated_interrupt != 0);
+		newchannel->sig_event->connectionid.u.id =
+				offer->connection_id;
+	}
+
 	memcpy(&newchannel->offermsg, offer,
 	       sizeof(struct vmbus_channel_offer_channel));
 	newchannel->monitor_grp = (u8)offer->monitorid / 32;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index cd48ac331708..1bc7500fb84c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -101,15 +101,6 @@ enum hv_message_type {
 /* Define invalid partition identifier. */
 #define HV_PARTITION_ID_INVALID		((u64)0x0)
 
-/* Define connection identifier type. */
-union hv_connection_id {
-	u32 asu32;
-	struct {
-		u32 id:24;
-		u32 reserved:8;
-	} u;
-};
-
 /* Define port identifier type. */
 union hv_port_id {
 	u32 asu32;
@@ -338,13 +329,6 @@ struct hv_input_post_message {
 	u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 };
 
-/* Definition of the hv_signal_event hypercall input structure. */
-struct hv_input_signal_event {
-	union hv_connection_id connectionid;
-	u16 flag_number;
-	u16 rsvdz;
-};
-
 /*
  * Versioning definitions used for guests reporting themselves to the
  * hypervisor, and visa versa.
@@ -498,11 +482,6 @@ static const uuid_le VMBUS_SERVICE_ID = {
 
 
-struct hv_input_signal_event_buffer {
-	u64 align8;
-	struct hv_input_signal_event event;
-};
-
 struct hv_context {
 	/* We only support running on top of Hyper-V
 	* So at this point this really can only contain the Hyper-V ID
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e72502689cdc..c6e2c44a1be9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -897,6 +897,27 @@ struct vmbus_close_msg {
 	struct vmbus_channel_close_channel msg;
 };
 
+/* Define connection identifier type. */
+union hv_connection_id {
+	u32 asu32;
+	struct {
+		u32 id:24;
+		u32 reserved:8;
+	} u;
+};
+
+/* Definition of the hv_signal_event hypercall input structure. */
+struct hv_input_signal_event {
+	union hv_connection_id connectionid;
+	u16 flag_number;
+	u16 rsvdz;
+};
+
+struct hv_input_signal_event_buffer {
+	u64 align8;
+	struct hv_input_signal_event event;
+};
+
 struct vmbus_channel {
 	struct list_head listentry;
 
@@ -946,6 +967,10 @@ struct vmbus_channel {
 	 */
 
 	bool batched_reading;
+
+	bool is_dedicated_interrupt;
+	struct hv_input_signal_event_buffer sig_buf;
+	struct hv_input_signal_event *sig_event;
 };
 
 static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
-- 
cgit v1.2.3


From abbf3b2aa090b4a6bf22c935924b6467990266da Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:48 -0800
Subject: Drivers: hv: Add state to manage incoming channel interrupt load

Add state to bind a channel to a specific VCPU. This will help us better
distribute incoming interrupt load.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      |  2 +-
 drivers/hv/channel_mgmt.c |  2 ++
 include/linux/hyperv.h    | 21 +++++++++++++++++++--
 3 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 70a34daa04c1..9303252b2e19 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -181,7 +181,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
 	open_msg->downstream_ringbuffer_pageoffset = send_ringbuffer_size >>
 						  PAGE_SHIFT;
-	open_msg->server_contextarea_gpadlhandle = 0;
+	open_msg->target_vp = newchannel->target_vp;
 
 	if (userdatalen > MAX_USER_DEFINED_BYTES) {
 		err = -EINVAL;
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f4d990285d99..56ed45c74d01 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -302,6 +302,8 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 				offer->connection_id;
 	}
 
+	newchannel->target_vp = 0;
+
 	memcpy(&newchannel->offermsg, offer,
 	       sizeof(struct vmbus_channel_offer_channel));
 	newchannel->monitor_grp = (u8)offer->monitorid / 32;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c6e2c44a1be9..8c3cb1fc34d4 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -732,8 +732,15 @@ struct vmbus_channel_open_channel {
 	/* GPADL for the channel's ring buffer. */
 	u32 ringbuffer_gpadlhandle;
 
-	/* GPADL for the channel's server context save area. */
-	u32 server_contextarea_gpadlhandle;
+	/*
+	 * Starting with win8, this field will be used to specify
+	 * the target virtual processor on which to deliver the interrupt for
+	 * the host to guest communication.
+	 * Prior to win8, incoming channel interrupts would only
+	 * be delivered on cpu 0. Setting this value to 0 would
+	 * preserve the earlier behavior.
+	 */
+	u32 target_vp;
 
 	/*
 	* The upstream ring buffer begins at offset zero in the memory
@@ -971,6 +978,16 @@ struct vmbus_channel {
 	bool is_dedicated_interrupt;
 	struct hv_input_signal_event_buffer sig_buf;
 	struct hv_input_signal_event *sig_event;
+
+	/*
+	 * Starting with win8, this field will be used to specify
+	 * the target virtual processor on which to deliver the interrupt for
+	 * the host to guest communication.
+	 * Prior to win8, incoming channel interrupts would only
+	 * be delivered on cpu 0. Setting this value to 0 would
+	 * preserve the earlier behavior.
+	 */
+	u32 target_vp;
 };
 
 static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
-- 
cgit v1.2.3


From 2a5c43a821b3b26e6af1cdb987b4daeba6f13a6f Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sat, 1 Dec 2012 06:46:56 -0800
Subject: Drivers: hv: Enable protocol negotiation with win8 hosts

Now that we have implemented all of the Win8 (WS2012) functionality, negotiate
Win8 protocol with the host.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/connection.c | 2 +-
 include/linux/hyperv.h  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index ac71653abb77..3965537a659d 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -194,7 +194,7 @@ int vmbus_connect(void)
 	 * version.
 	 */
 
-	version = VERSION_WS2008;
+	version = VERSION_CURRENT;
 
 	do {
 		ret = vmbus_negotiate_version(msginfo, version);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 8c3cb1fc34d4..5095b066df94 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -434,6 +434,7 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi,
 
 #define VERSION_INVAL -1
 
+#define VERSION_CURRENT VERSION_WIN8
 
 /* Make maximum size of pipe payload of 16K */
 #define MAX_PIPE_DATA_PAYLOAD		(sizeof(u8) * 16384)
-- 
cgit v1.2.3


From dd5f049dbdf973d9bceebef1fd73647a5ede6732 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Thu, 1 Nov 2012 08:39:27 -0500
Subject: ceph: define ceph_encode_8_safe()

It's kind of a silly macro, but ceph_encode_8_safe() is the only one
missing from an otherwise pretty complete set.  It's not used, but
neither are a couple of the others in this set.

While in there, insert some whitespace to tidy up the alignment of
the line-terminating backslashes in some of the macro definitions.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Dan Mick <dan.mick@inktank.com>
---
 include/linux/ceph/decode.h | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 4bbf2db45f46..cd679f2d348b 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
 	return end >= *p && n <= end - *p;
 }
 
-#define ceph_decode_need(p, end, n, bad)		\
-	do {						\
-		if (!likely(ceph_has_room(p, end, n)))	\
-			goto bad;			\
+#define ceph_decode_need(p, end, n, bad)			\
+	do {							\
+		if (!likely(ceph_has_room(p, end, n)))		\
+			goto bad;				\
 	} while (0)
 
 #define ceph_decode_64_safe(p, end, v, bad)			\
@@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
  *
  * There are two possible failures:
  *   - converting the string would require accessing memory at or
- *     beyond the "end" pointer provided (-E
- *   - memory could not be allocated for the result
+ *     beyond the "end" pointer provided (-ERANGE)
+ *   - memory could not be allocated for the result (-ENOMEM)
  */
 static inline char *ceph_extract_encoded_string(void **p, void *end,
 						size_t *lenp, gfp_t gfp)
@@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end,
 	*p += len;
 }
 
-#define ceph_encode_need(p, end, n, bad)		\
-	do {						\
-		if (!likely(ceph_has_room(p, end, n)))	\
-			goto bad;			\
+#define ceph_encode_need(p, end, n, bad)			\
+	do {							\
+		if (!likely(ceph_has_room(p, end, n)))		\
+			goto bad;				\
 	} while (0)
 
 #define ceph_encode_64_safe(p, end, v, bad)			\
@@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end,
 #define ceph_encode_32_safe(p, end, v, bad)			\
 	do {							\
 		ceph_encode_need(p, end, sizeof(u32), bad);	\
-		ceph_encode_32(p, v);			\
+		ceph_encode_32(p, v);				\
 	} while (0)
 #define ceph_encode_16_safe(p, end, v, bad)			\
 	do {							\
 		ceph_encode_need(p, end, sizeof(u16), bad);	\
-		ceph_encode_16(p, v);			\
+		ceph_encode_16(p, v);				\
+	} while (0)
+#define ceph_encode_8_safe(p, end, v, bad)			\
+	do {							\
+		ceph_encode_need(p, end, sizeof(u8), bad);	\
+		ceph_encode_8(p, v);				\
 	} while (0)
 
 #define ceph_encode_copy_safe(p, end, pv, n, bad)		\
-- 
cgit v1.2.3


From 7266507d89991fa1e989283e4e032c6d9357fe26 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Mon, 17 Dec 2012 18:33:58 +0000
Subject: netfilter: nf_ct_sip: support Cisco 7941/7945 IP phones

Most SIP devices use a source port of 5060/udp on SIP requests, so the
response automatically comes back to port 5060:

    phone_ip:5060 -> proxy_ip:5060   REGISTER
    proxy_ip:5060 -> phone_ip:5060   100 Trying

The newer Cisco IP phones, however, use a randomly chosen high source
port for the SIP request but expect the response on port 5060:

    phone_ip:49173 -> proxy_ip:5060  REGISTER
    proxy_ip:5060 -> phone_ip:5060   100 Trying

Standard Linux NAT, with or without nf_nat_sip, will send the reply back
to port 49173, not 5060:

    phone_ip:49173 -> proxy_ip:5060  REGISTER
    proxy_ip:5060 -> phone_ip:49173  100 Trying

But the phone is not listening on 49173, so it will never see the reply.

This patch modifies nf_*_sip to work around this quirk by extracting
the SIP response port from the Via: header, iff the source IP in the
packet header matches the source IP in the SIP request.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_sip.h |  3 +++
 net/netfilter/nf_conntrack_sip.c           | 17 +++++++++++++++++
 net/netfilter/nf_nat_sip.c                 | 27 ++++++++++++++++++++++++---
 3 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 387bdd02945d..ba7f571a2b1c 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -4,12 +4,15 @@
 
 #include <net/netfilter/nf_conntrack_expect.h>
 
+#include <linux/types.h>
+
 #define SIP_PORT	5060
 #define SIP_TIMEOUT	3600
 
 struct nf_ct_sip_master {
 	unsigned int	register_cseq;
 	unsigned int	invite_cseq;
+	__be16		forced_dport;
 };
 
 enum sip_expectation_classes {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index df8f4f284481..72a67bbe3518 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1440,8 +1440,25 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned int matchoff, matchlen;
 	unsigned int cseq, i;
+	union nf_inet_addr addr;
+	__be16 port;
+
+	/* Many Cisco IP phones use a high source port for SIP requests, but
+	 * listen for the response on port 5060.  If we are the local
+	 * router for one of these phones, save the port number from the
+	 * Via: header so that nf_nat_sip can redirect the responses to
+	 * the correct port.
+	 */
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+				    SIP_HDR_VIA_UDP, NULL, &matchoff,
+				    &matchlen, &addr, &port) > 0 &&
+	    port != ct->tuplehash[dir].tuple.src.u.udp.port &&
+	    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3))
+		ct_sip_info->forced_dport = port;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
 		const struct sip_handler *handler;
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 16303c752213..5951146e7688 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -95,6 +95,7 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
 	unsigned int buflen;
 	union nf_inet_addr newaddr;
@@ -107,7 +108,8 @@ static int map_addr(struct sk_buff *skb, unsigned int protoff,
 	} else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) &&
 		   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
 		newaddr = ct->tuplehash[!dir].tuple.src.u3;
-		newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
+		newport = ct_sip_info->forced_dport ? :
+			  ct->tuplehash[!dir].tuple.src.u.udp.port;
 	} else
 		return 1;
 
@@ -144,6 +146,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	unsigned int coff, matchoff, matchlen;
 	enum sip_header_types hdr;
 	union nf_inet_addr addr;
@@ -258,6 +261,21 @@ next:
 	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO))
 		return NF_DROP;
 
+	/* Mangle destination port for Cisco phones, then fix up checksums */
+	if (dir == IP_CT_DIR_REPLY && ct_sip_info->forced_dport) {
+		struct udphdr *uh;
+
+		if (!skb_make_writable(skb, skb->len))
+			return NF_DROP;
+
+		uh = (void *)skb->data + protoff;
+		uh->dest = ct_sip_info->forced_dport;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, protoff,
+					      0, 0, NULL, 0))
+			return NF_DROP;
+	}
+
 	return NF_ACCEPT;
 }
 
@@ -311,8 +329,10 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct);
 	union nf_inet_addr newaddr;
 	u_int16_t port;
+	__be16 srcport;
 	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
 	unsigned int buflen;
 
@@ -326,8 +346,9 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	/* If the signalling port matches the connection's source port in the
 	 * original direction, try to use the destination port in the opposite
 	 * direction. */
-	if (exp->tuple.dst.u.udp.port ==
-	    ct->tuplehash[dir].tuple.src.u.udp.port)
+	srcport = ct_sip_info->forced_dport ? :
+		  ct->tuplehash[dir].tuple.src.u.udp.port;
+	if (exp->tuple.dst.u.udp.port == srcport)
 		port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
 	else
 		port = ntohs(exp->tuple.dst.u.udp.port);
-- 
cgit v1.2.3


From 7a555613eb77c69eb6e48b61bc5f72dd42fa1780 Mon Sep 17 00:00:00 2001
From: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Date: Wed, 5 Dec 2012 16:48:27 -0500
Subject: dynamic_debug: dynamic hex dump

Introduce print_hex_dump_debug() that can be dynamically controlled, similar to
pr_debug.

Also, make print_hex_dump_bytes() dynamically controlled

Implement only 'p' flag (_DPRINTK_FLAGS_PRINT) to keep it simple since hex dump prints
multiple lines and long prefix would impact readability.
To provide line/file etc. information, use pr_debug or similar
before/after print_hex_dump_debug()

Signed-off-by: Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
Signed-off-by: Jason Baron <jbaron@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/dynamic-debug-howto.txt | 15 +++++++++++++--
 include/linux/dynamic_debug.h         | 11 +++++++++++
 include/linux/printk.h                | 17 +++++++++++++++++
 lib/hexdump.c                         |  4 +++-
 4 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt
index 6e1684981da2..72322c6d7352 100644
--- a/Documentation/dynamic-debug-howto.txt
+++ b/Documentation/dynamic-debug-howto.txt
@@ -6,8 +6,16 @@ This document describes how to use the dynamic debug (dyndbg) feature.
 
 Dynamic debug is designed to allow you to dynamically enable/disable
 kernel code to obtain additional kernel information.  Currently, if
-CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() calls can
-be dynamically enabled per-callsite.
+CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() and
+print_hex_dump_debug()/print_hex_dump_bytes() calls can be dynamically
+enabled per-callsite.
+
+If CONFIG_DYNAMIC_DEBUG is not set, print_hex_dump_debug() is just
+shortcut for print_hex_dump(KERN_DEBUG).
+
+For print_hex_dump_debug()/print_hex_dump_bytes(), format string is
+its 'prefix_str' argument, if it is constant string; or "hexdump"
+in case 'prefix_str' is build dynamically.
 
 Dynamic debug has even more useful features:
 
@@ -202,6 +210,9 @@ The flags are:
   t    Include thread ID in messages not generated from interrupt context
   _    No flags are set. (Or'd with others on input)
 
+For print_hex_dump_debug() and print_hex_dump_bytes(), only 'p' flag
+have meaning, other flags ignored.
+
 For display, the flags are preceded by '='
 (mnemonic: what the flags are currently equal to).
 
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 6dd4787a798a..2fe93b26b42f 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -95,6 +95,17 @@ do {								\
 				     ##__VA_ARGS__);		\
 } while (0)
 
+#define dynamic_hex_dump(prefix_str, prefix_type, rowsize,	\
+			 groupsize, buf, len, ascii)		\
+do {								\
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,		\
+		__builtin_constant_p(prefix_str) ? prefix_str : "hexdump");\
+	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT))	\
+		print_hex_dump(KERN_DEBUG, prefix_str,		\
+			       prefix_type, rowsize, groupsize,	\
+			       buf, len, ascii);		\
+} while (0)
+
 #else
 
 #include <linux/string.h>
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9afc01e5a0a6..02c95cf872cd 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -321,8 +321,13 @@ extern void hex_dump_to_buffer(const void *buf, size_t len,
 extern void print_hex_dump(const char *level, const char *prefix_str,
 			   int prefix_type, int rowsize, int groupsize,
 			   const void *buf, size_t len, bool ascii);
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)	\
+	dynamic_hex_dump(prefix_str, prefix_type, 16, 1, buf, len, true)
+#else
 extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
 				 const void *buf, size_t len);
+#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
 #else
 static inline void print_hex_dump(const char *level, const char *prefix_str,
 				  int prefix_type, int rowsize, int groupsize,
@@ -336,4 +341,16 @@ static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
 
 #endif
 
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define print_hex_dump_debug(prefix_str, prefix_type, rowsize,	\
+			     groupsize, buf, len, ascii)	\
+	dynamic_hex_dump(prefix_str, prefix_type, rowsize,	\
+			 groupsize, buf, len, ascii)
+#else
+#define print_hex_dump_debug(prefix_str, prefix_type, rowsize,		\
+			     groupsize, buf, len, ascii)		\
+	print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize,	\
+		       groupsize, buf, len, ascii)
+#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
+
 #endif
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 6540d657dca4..3f0494c9d57a 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -227,6 +227,7 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
 }
 EXPORT_SYMBOL(print_hex_dump);
 
+#if !defined(CONFIG_DYNAMIC_DEBUG)
 /**
  * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
  * @prefix_str: string to prefix each line with;
@@ -246,4 +247,5 @@ void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
 		       buf, len, true);
 }
 EXPORT_SYMBOL(print_hex_dump_bytes);
-#endif
+#endif /* !defined(CONFIG_DYNAMIC_DEBUG) */
+#endif /* defined(CONFIG_PRINTK) */
-- 
cgit v1.2.3


From af77f26caa35a95af09d1dac5c513b3901de7e37 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 9 Nov 2012 08:43:15 -0600
Subject: rbd: drop oid parameters from ceph_osdc_build_request()

The last two parameters to ceph_osd_build_request() describe the
object id, but the values passed always come from the osd request
structure whose address is also provided.  Get rid of those last
two parameters.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  6 +-----
 include/linux/ceph/osd_client.h |  4 +---
 net/ceph/osd_client.c           | 13 +++++--------
 3 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index d4e93a28fb6a..9a701effa0ef 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1176,11 +1176,7 @@ static int rbd_do_request(struct request *rq,
 				snapid, ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
-	ceph_osdc_build_request(osd_req, ofs, &len,
-				ops,
-				snapc,
-				&mtime,
-				osd_req->r_oid, osd_req->r_oid_len);
+	ceph_osdc_build_request(osd_req, ofs, &len, ops, snapc, &mtime);
 
 	if (linger_req) {
 		ceph_osdc_set_request_linger(osdc, osd_req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d9b880e977e6..f2e5d2cdca06 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -227,9 +227,7 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req,
 				    u64 off, u64 *plen,
 				    struct ceph_osd_req_op *src_ops,
 				    struct ceph_snap_context *snapc,
-				    struct timespec *mtime,
-				    const char *oid,
-				    int oid_len);
+				    struct timespec *mtime);
 
 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      struct ceph_file_layout *layout,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index eade41bb7102..7d38327a8e89 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -376,9 +376,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 			     u64 off, u64 *plen,
 			     struct ceph_osd_req_op *src_ops,
 			     struct ceph_snap_context *snapc,
-			     struct timespec *mtime,
-			     const char *oid,
-			     int oid_len)
+			     struct timespec *mtime)
 {
 	struct ceph_msg *msg = req->r_request;
 	struct ceph_osd_request_head *head;
@@ -405,9 +403,9 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 
 
 	/* fill in oid */
-	head->object_len = cpu_to_le32(oid_len);
-	memcpy(p, oid, oid_len);
-	p += oid_len;
+	head->object_len = cpu_to_le32(req->r_oid_len);
+	memcpy(p, req->r_oid, req->r_oid_len);
+	p += req->r_oid_len;
 
 	src_op = src_ops;
 	while (src_op->op) {
@@ -506,8 +504,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
 	ceph_osdc_build_request(req, off, plen, ops,
 				snapc,
-				mtime,
-				req->r_oid, req->r_oid_len);
+				mtime);
 
 	return req;
 }
-- 
cgit v1.2.3


From c885837f7d4f8c4f5cb2a744cc6929bc078e9dc0 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: always allow trail in osd request

An osd request structure contains an optional trail portion, which
if present will contain data to be passed in the payload portion of
the message containing the request.  The trail field is a
ceph_pagelist pointer, and if null it indicates there is no trail.

A ceph_pagelist structure contains a length field, and it can
legitimately hold value 0.  Make use of this to change the
interpretation of the "trail" of an osd request so that every osd
request has trailing data, it just might have length 0.

This means we change the r_trail field in a ceph_osd_request
structure from a pointer to a structure that is always initialized.

Note that in ceph_osdc_start_request(), the trail pointer (or now
address of that structure) is assigned to a ceph message's trail
field.  Here's why that's still OK (looking at net/ceph/messenger.c):
    - What would have resulted in a null pointer previously will now
      refer to a 0-length page list.  That message trail pointer
      is used in two functions, write_partial_msg_pages() and
      out_msg_pos_next().
    - In write_partial_msg_pages(), a null page list pointer is
      handled the same as a message with 0-length trail, and both
      result in a "in_trail" variable set to false.  The trail
      pointer is only used if in_trail is true.
    - The only other place the message trail pointer is used is
      out_msg_pos_next().  That function is only called by
      write_partial_msg_pages() and only touches the trail pointer
      if the in_trail value it is passed is true.
Therefore a null ceph_msg->trail pointer is equivalent to a non-null
pointer referring to a 0-length page list structure.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osd_client.h |  4 ++--
 net/ceph/osd_client.c           | 43 ++++++++++++-----------------------------
 2 files changed, 14 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f2e5d2cdca06..61562c792855 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -10,6 +10,7 @@
 #include <linux/ceph/osdmap.h>
 #include <linux/ceph/messenger.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
 
 /* 
  * Maximum object name size 
@@ -22,7 +23,6 @@ struct ceph_snap_context;
 struct ceph_osd_request;
 struct ceph_osd_client;
 struct ceph_authorizer;
-struct ceph_pagelist;
 
 /*
  * completion callback for async writepages
@@ -95,7 +95,7 @@ struct ceph_osd_request {
 	struct bio       *r_bio;	      /* instead of pages */
 #endif
 
-	struct ceph_pagelist *r_trail;	      /* trailing part of the data */
+	struct ceph_pagelist r_trail;	      /* trailing part of the data */
 };
 
 struct ceph_osd_event {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 7d38327a8e89..2be50d82ccbc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,10 +171,7 @@ void ceph_osdc_release_request(struct kref *kref)
 		bio_put(req->r_bio);
 #endif
 	ceph_put_snap_context(req->r_snapc);
-	if (req->r_trail) {
-		ceph_pagelist_release(req->r_trail);
-		kfree(req->r_trail);
-	}
+	ceph_pagelist_release(&req->r_trail);
 	if (req->r_mempool)
 		mempool_free(req, req->r_osdc->req_mempool);
 	else
@@ -208,8 +205,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
-	int needs_trail;
-	int num_op = get_num_ops(ops, &needs_trail);
+	int num_op = get_num_ops(ops, NULL);
 	size_t msg_size = sizeof(struct ceph_osd_request_head);
 
 	msg_size += num_op*sizeof(struct ceph_osd_op);
@@ -252,15 +248,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	}
 	req->r_reply = msg;
 
-	/* allocate space for the trailing data */
-	if (needs_trail) {
-		req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags);
-		if (!req->r_trail) {
-			ceph_osdc_put_request(req);
-			return NULL;
-		}
-		ceph_pagelist_init(req->r_trail);
-	}
+	ceph_pagelist_init(&req->r_trail);
 
 	/* create request message; allow space for oid */
 	msg_size += MAX_OBJ_NAME_SIZE;
@@ -312,29 +300,25 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 	case CEPH_OSD_OP_GETXATTR:
 	case CEPH_OSD_OP_SETXATTR:
 	case CEPH_OSD_OP_CMPXATTR:
-		BUG_ON(!req->r_trail);
-
 		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
 		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
 		dst->xattr.cmp_op = src->xattr.cmp_op;
 		dst->xattr.cmp_mode = src->xattr.cmp_mode;
-		ceph_pagelist_append(req->r_trail, src->xattr.name,
+		ceph_pagelist_append(&req->r_trail, src->xattr.name,
 				     src->xattr.name_len);
-		ceph_pagelist_append(req->r_trail, src->xattr.val,
+		ceph_pagelist_append(&req->r_trail, src->xattr.val,
 				     src->xattr.value_len);
 		break;
 	case CEPH_OSD_OP_CALL:
-		BUG_ON(!req->r_trail);
-
 		dst->cls.class_len = src->cls.class_len;
 		dst->cls.method_len = src->cls.method_len;
 		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
 
-		ceph_pagelist_append(req->r_trail, src->cls.class_name,
+		ceph_pagelist_append(&req->r_trail, src->cls.class_name,
 				     src->cls.class_len);
-		ceph_pagelist_append(req->r_trail, src->cls.method_name,
+		ceph_pagelist_append(&req->r_trail, src->cls.method_name,
 				     src->cls.method_len);
-		ceph_pagelist_append(req->r_trail, src->cls.indata,
+		ceph_pagelist_append(&req->r_trail, src->cls.indata,
 				     src->cls.indata_len);
 		break;
 	case CEPH_OSD_OP_ROLLBACK:
@@ -347,11 +331,9 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 			__le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
 			__le32 timeout = cpu_to_le32(src->watch.timeout);
 
-			BUG_ON(!req->r_trail);
-
-			ceph_pagelist_append(req->r_trail,
+			ceph_pagelist_append(&req->r_trail,
 						&prot_ver, sizeof(prot_ver));
-			ceph_pagelist_append(req->r_trail,
+			ceph_pagelist_append(&req->r_trail,
 						&timeout, sizeof(timeout));
 		}
 	case CEPH_OSD_OP_NOTIFY_ACK:
@@ -414,8 +396,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 		op++;
 	}
 
-	if (req->r_trail)
-		data_len += req->r_trail->length;
+	data_len += req->r_trail.length;
 
 	if (snapc) {
 		head->snap_seq = cpu_to_le64(snapc->seq);
@@ -1715,7 +1696,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 #ifdef CONFIG_BLOCK
 	req->r_request->bio = req->r_bio;
 #endif
-	req->r_request->trail = req->r_trail;
+	req->r_request->trail = &req->r_trail;
 
 	register_request(osdc, req);
 
-- 
cgit v1.2.3


From 0120be3c60d46d6d55f4bf7a3d654cc705eb0c54 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 14 Nov 2012 09:38:19 -0600
Subject: libceph: pass length to ceph_osdc_build_request()

The len argument to ceph_osdc_build_request() is set up to be
passed by address, but that function never updates its value
so there's no need to do this.  Tighten up the interface by
passing the length directly.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             | 2 +-
 include/linux/ceph/osd_client.h | 2 +-
 net/ceph/osd_client.c           | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 54bd9fc3ef7c..c1b135b6cb97 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1172,7 +1172,7 @@ static int rbd_do_request(struct request *rq,
 				snapid, ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
-	ceph_osdc_build_request(osd_req, ofs, &len, ops, snapc, &mtime);
+	ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, &mtime);
 
 	if (linger_req) {
 		ceph_osdc_set_request_linger(osdc, osd_req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 61562c792855..4bfb4582439a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -224,7 +224,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *
 					       struct bio *bio);
 
 extern void ceph_osdc_build_request(struct ceph_osd_request *req,
-				    u64 off, u64 *plen,
+				    u64 off, u64 len,
 				    struct ceph_osd_req_op *src_ops,
 				    struct ceph_snap_context *snapc,
 				    struct timespec *mtime);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 37d43d5b828c..e29a3ed92958 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -336,7 +336,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
  *
  */
 void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 *plen,
+			     u64 off, u64 len,
 			     struct ceph_osd_req_op *src_ops,
 			     struct ceph_snap_context *snapc,
 			     struct timespec *mtime)
@@ -390,7 +390,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 
 	if (flags & CEPH_OSD_FLAG_WRITE) {
 		req->r_request->hdr.data_off = cpu_to_le16(off);
-		req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len);
+		req->r_request->hdr.data_len = cpu_to_le32(len + data_len);
 	} else if (data_len) {
 		req->r_request->hdr.data_off = 0;
 		req->r_request->hdr.data_len = cpu_to_le32(data_len);
@@ -464,7 +464,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	req->r_num_pages = calc_pages_for(page_align, *plen);
 	req->r_page_alignment = page_align;
 
-	ceph_osdc_build_request(req, off, plen, ops,
+	ceph_osdc_build_request(req, off, *plen, ops,
 				snapc,
 				mtime);
 
-- 
cgit v1.2.3


From e8afad656cbcd06d02a7bacd4b318fa0e2907de0 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 14 Nov 2012 09:38:19 -0600
Subject: libceph: pass length to ceph_calc_file_object_mapping()

ceph_calc_file_object_mapping() takes (among other things) a "file"
offset and length, and based on the layout, determines the object
number ("bno") backing the affected portion of the file's data and
the offset into that object where the desired range begins.  It also
computes the size that should be used for the request--either the
amount requested or something less if that would exceed the end of
the object.

This patch changes the input length parameter in this function so it
is used only for input.  That is, the argument will be passed by
value rather than by address, so the value provided won't get
updated by the function.

The value would only get updated if the length would surpass the
current object, and in that case the value it got updated to would
be exactly that returned in *oxlen.

Only one of the two callers is affected by this change.  Update
ceph_calc_raw_layout() so it records any updated value.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 fs/ceph/ioctl.c             | 2 +-
 include/linux/ceph/osdmap.h | 2 +-
 net/ceph/osd_client.c       | 6 ++++--
 net/ceph/osdmap.c           | 9 ++++-----
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 36549a46e311..3b22150d3e19 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -194,7 +194,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 		return -EFAULT;
 
 	down_read(&osdc->map_sem);
-	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
+	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
 					  &dl.object_no, &dl.object_offset,
 					  &olen);
 	if (r < 0)
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 5ea57ba69320..1f653e2ff5cc 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -110,7 +110,7 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
 
 /* calculate mapping of a file extent to an object */
 extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-					 u64 off, u64 *plen,
+					 u64 off, u64 len,
 					 u64 *bno, u64 *oxoff, u64 *oxlen);
 
 /* calculate mapping of object to a placement group */
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e29a3ed92958..47e5f5b1f94c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -53,13 +53,15 @@ int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 	reqhead->snapid = cpu_to_le64(snapid);
 
 	/* object extent? */
-	r = ceph_calc_file_object_mapping(layout, off, plen, bno,
+	r = ceph_calc_file_object_mapping(layout, off, orig_len, bno,
 					  &objoff, &objlen);
 	if (r < 0)
 		return r;
-	if (*plen < orig_len)
+	if (objlen < orig_len) {
+		*plen = objlen;
 		dout(" skipping last %llu, final file extent %llu~%llu\n",
 		     orig_len - *plen, off, *plen);
+	}
 
 	if (op_has_extent(op->op)) {
 		u32 osize = le32_to_cpu(layout->fl_object_size);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index ca05871635bc..369f03ba9ee5 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1016,7 +1016,7 @@ bad:
  * pass a stride back to the caller.
  */
 int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-				   u64 off, u64 *plen,
+				   u64 off, u64 len,
 				   u64 *ono,
 				   u64 *oxoff, u64 *oxlen)
 {
@@ -1027,7 +1027,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 	u32 su_per_object;
 	u64 t, su_offset;
 
-	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, *plen,
+	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len,
 	     osize, su);
 	if (su == 0 || sc == 0)
 		goto invalid;
@@ -1060,11 +1060,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 
 	/*
 	 * Calculate the length of the extent being written to the selected
-	 * object. This is the minimum of the full length requested (plen) or
+	 * object. This is the minimum of the full length requested (len) or
 	 * the remainder of the current stripe being written to.
 	 */
-	*oxlen = min_t(u64, *plen, su - su_offset);
-	*plen = *oxlen;
+	*oxlen = min_t(u64, len, su - su_offset);
 
 	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
 	return 0;
-- 
cgit v1.2.3


From 4d6b250bf18d44571d69a0f4afec4b6a1969729f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: drop snapid in ceph_calc_raw_layout()

A snapshot id must be provided to ceph_calc_raw_layout() even though
it is not needed at all for calculating the layout.

Where the snapshot id *is* needed is when building the request
message for an osd operation.

Drop the snapid parameter from ceph_calc_raw_layout() and pass
that value instead in ceph_osdc_build_request().

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  4 ++--
 include/linux/ceph/osd_client.h |  2 +-
 net/ceph/osd_client.c           | 14 ++++----------
 3 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c1b135b6cb97..fa371868e9b0 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1169,10 +1169,10 @@ static int rbd_do_request(struct request *rq,
 
 	rbd_layout_init(&osd_req->r_file_layout, rbd_dev->spec->pool_id);
 	ret = ceph_calc_raw_layout(osdc, &osd_req->r_file_layout,
-				snapid, ofs, &len, &bno, osd_req, ops);
+				ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
-	ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, &mtime);
+	ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, snapid, &mtime);
 
 	if (linger_req) {
 		ceph_osdc_set_request_linger(osdc, osd_req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 4bfb4582439a..0e82a0a967ef 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -209,7 +209,6 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 
 extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 			struct ceph_file_layout *layout,
-			u64 snapid,
 			u64 off, u64 *plen, u64 *bno,
 			struct ceph_osd_request *req,
 			struct ceph_osd_req_op *op);
@@ -227,6 +226,7 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req,
 				    u64 off, u64 len,
 				    struct ceph_osd_req_op *src_ops,
 				    struct ceph_snap_context *snapc,
+				    u64 snap_id,
 				    struct timespec *mtime);
 
 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 47e5f5b1f94c..b5a4b2875e8a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -40,18 +40,14 @@ static int op_has_extent(int op)
 
 int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 			struct ceph_file_layout *layout,
-			u64 snapid,
 			u64 off, u64 *plen, u64 *bno,
 			struct ceph_osd_request *req,
 			struct ceph_osd_req_op *op)
 {
-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
 	u64 orig_len = *plen;
 	u64 objoff, objlen;    /* extent in object */
 	int r;
 
-	reqhead->snapid = cpu_to_le64(snapid);
-
 	/* object extent? */
 	r = ceph_calc_file_object_mapping(layout, off, orig_len, bno,
 					  &objoff, &objlen);
@@ -121,8 +117,7 @@ static int calc_layout(struct ceph_osd_client *osdc,
 	u64 bno;
 	int r;
 
-	r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
-				 plen, &bno, req, op);
+	r = ceph_calc_raw_layout(osdc, layout, off, plen, &bno, req, op);
 	if (r < 0)
 		return r;
 
@@ -340,7 +335,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 void ceph_osdc_build_request(struct ceph_osd_request *req,
 			     u64 off, u64 len,
 			     struct ceph_osd_req_op *src_ops,
-			     struct ceph_snap_context *snapc,
+			     struct ceph_snap_context *snapc, u64 snap_id,
 			     struct timespec *mtime)
 {
 	struct ceph_msg *msg = req->r_request;
@@ -355,6 +350,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	int i;
 
 	head = msg->front.iov_base;
+	head->snapid = cpu_to_le64(snap_id);
 	op = (void *)(head + 1);
 	p = (void *)(op + num_op);
 
@@ -466,9 +462,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	req->r_num_pages = calc_pages_for(page_align, *plen);
 	req->r_page_alignment = page_align;
 
-	ceph_osdc_build_request(req, off, *plen, ops,
-				snapc,
-				mtime);
+	ceph_osdc_build_request(req, off, *plen, ops, snapc, vino.snap, mtime);
 
 	return req;
 }
-- 
cgit v1.2.3


From e75b45cf36565fd8ba206a9d80f670a86e61ba2f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:14 -0600
Subject: libceph: drop osdc from ceph_calc_raw_layout()

The osdc parameter to ceph_calc_raw_layout() is not used, so get rid
of it.  Consequently, the corresponding parameter in calc_layout()
becomes unused, so get rid of that as well.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  2 +-
 include/linux/ceph/osd_client.h |  3 +--
 net/ceph/osd_client.c           | 10 ++++------
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index fa371868e9b0..ac8fd3856509 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1168,7 +1168,7 @@ static int rbd_do_request(struct request *rq,
 	osd_req->r_oid_len = strlen(osd_req->r_oid);
 
 	rbd_layout_init(&osd_req->r_file_layout, rbd_dev->spec->pool_id);
-	ret = ceph_calc_raw_layout(osdc, &osd_req->r_file_layout,
+	ret = ceph_calc_raw_layout(&osd_req->r_file_layout,
 				ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 0e82a0a967ef..fe3a6e8db1f9 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -207,8 +207,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
 
-extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
+extern int ceph_calc_raw_layout(struct ceph_file_layout *layout,
 			u64 off, u64 *plen, u64 *bno,
 			struct ceph_osd_request *req,
 			struct ceph_osd_req_op *op);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b5a4b2875e8a..cd9c28387de3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -38,8 +38,7 @@ static int op_has_extent(int op)
 		op == CEPH_OSD_OP_WRITE);
 }
 
-int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
+int ceph_calc_raw_layout(struct ceph_file_layout *layout,
 			u64 off, u64 *plen, u64 *bno,
 			struct ceph_osd_request *req,
 			struct ceph_osd_req_op *op)
@@ -107,8 +106,7 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
  *
  * fill osd op in request message.
  */
-static int calc_layout(struct ceph_osd_client *osdc,
-		       struct ceph_vino vino,
+static int calc_layout(struct ceph_vino vino,
 		       struct ceph_file_layout *layout,
 		       u64 off, u64 *plen,
 		       struct ceph_osd_request *req,
@@ -117,7 +115,7 @@ static int calc_layout(struct ceph_osd_client *osdc,
 	u64 bno;
 	int r;
 
-	r = ceph_calc_raw_layout(osdc, layout, off, plen, &bno, req, op);
+	r = ceph_calc_raw_layout(layout, off, plen, &bno, req, op);
 	if (r < 0)
 		return r;
 
@@ -452,7 +450,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 		return ERR_PTR(-ENOMEM);
 
 	/* calculate max write size */
-	r = calc_layout(osdc, vino, layout, off, plen, req, ops);
+	r = calc_layout(vino, layout, off, plen, req, ops);
 	if (r < 0)
 		return ERR_PTR(r);
 	req->r_file_layout = *layout;  /* keep a copy */
-- 
cgit v1.2.3


From d178a9e74006e80f568d87e29f2a68f14fc7cbb1 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: don't set flags in ceph_osdc_alloc_request()

The only thing ceph_osdc_alloc_request() really does with the
flags value it is passed is assign it to the newly-created
osd request structure.  Do that in the caller instead.

Both callers subsequently call ceph_osdc_build_request(), so have
that function (instead of ceph_osdc_alloc_request()) issue a warning
if a request comes through with neither the read nor write flags set.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  3 ++-
 include/linux/ceph/osd_client.h |  1 -
 net/ceph/osd_client.c           | 11 ++++-------
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ac8fd3856509..bdbaa4cfd9d3 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1148,13 +1148,14 @@ static int rbd_do_request(struct request *rq,
 		(unsigned long long) len, coll, coll_index);
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
-	osd_req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, ops,
 					false, GFP_NOIO, pages, bio);
 	if (!osd_req) {
 		ret = -ENOMEM;
 		goto done_pages;
 	}
 
+	osd_req->r_flags = flags;
 	osd_req->r_callback = rbd_cb;
 
 	rbd_req->rq = rq;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index fe3a6e8db1f9..6ddda5bbd1a6 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -213,7 +213,6 @@ extern int ceph_calc_raw_layout(struct ceph_file_layout *layout,
 			struct ceph_osd_req_op *op);
 
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
 					       struct ceph_snap_context *snapc,
 					       struct ceph_osd_req_op *ops,
 					       bool use_mempool,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index cd9c28387de3..77ce1edaa07d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,7 +171,6 @@ static int get_num_ops(struct ceph_osd_req_op *ops)
 }
 
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
 					       struct ceph_snap_context *snapc,
 					       struct ceph_osd_req_op *ops,
 					       bool use_mempool,
@@ -208,10 +207,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	INIT_LIST_HEAD(&req->r_req_lru_item);
 	INIT_LIST_HEAD(&req->r_osd_item);
 
-	req->r_flags = flags;
-
-	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
-
 	/* create reply message */
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
@@ -347,6 +342,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	u64 data_len = 0;
 	int i;
 
+	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
+
 	head = msg->front.iov_base;
 	head->snapid = cpu_to_le64(snap_id);
 	op = (void *)(head + 1);
@@ -442,12 +439,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	} else
 		ops[1].op = 0;
 
-	req = ceph_osdc_alloc_request(osdc, flags,
-					 snapc, ops,
+	req = ceph_osdc_alloc_request(osdc, snapc, ops,
 					 use_mempool,
 					 GFP_NOFS, NULL, NULL);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
+	req->r_flags = flags;
 
 	/* calculate max write size */
 	r = calc_layout(vino, layout, off, plen, req, ops);
-- 
cgit v1.2.3


From 54a5400721da7fa5a16cea151aade5bdfee74111 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: don't set pages or bio in ceph_osdc_alloc_request()

Only one of the two callers of ceph_osdc_alloc_request() provides
page or bio data for its payload.  And essentially all that function
was doing with those arguments was assigning them to fields in the
osd request structure.

Simplify ceph_osdc_alloc_request() by having the caller take care of
making those assignments

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  8 ++++++--
 include/linux/ceph/osd_client.h |  4 +---
 net/ceph/osd_client.c           | 15 ++-------------
 3 files changed, 9 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bdbaa4cfd9d3..d1445df6398a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1148,14 +1148,18 @@ static int rbd_do_request(struct request *rq,
 		(unsigned long long) len, coll, coll_index);
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
-	osd_req = ceph_osdc_alloc_request(osdc, snapc, ops,
-					false, GFP_NOIO, pages, bio);
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, ops, false, GFP_NOIO);
 	if (!osd_req) {
 		ret = -ENOMEM;
 		goto done_pages;
 	}
 
 	osd_req->r_flags = flags;
+	osd_req->r_pages = pages;
+	if (bio) {
+		osd_req->r_bio = bio;
+		bio_get(osd_req->r_bio);
+	}
 	osd_req->r_callback = rbd_cb;
 
 	rbd_req->rq = rq;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 6ddda5bbd1a6..75f56d372d44 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -216,9 +216,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *
 					       struct ceph_snap_context *snapc,
 					       struct ceph_osd_req_op *ops,
 					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio);
+					       gfp_t gfp_flags);
 
 extern void ceph_osdc_build_request(struct ceph_osd_request *req,
 				    u64 off, u64 len,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 77ce1edaa07d..bdc3bb12bfd5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -174,9 +174,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
 					       struct ceph_osd_req_op *ops,
 					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio)
+					       gfp_t gfp_flags)
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
@@ -237,13 +235,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	memset(msg->front.iov_base, 0, msg->front.iov_len);
 
 	req->r_request = msg;
-	req->r_pages = pages;
-#ifdef CONFIG_BLOCK
-	if (bio) {
-		req->r_bio = bio;
-		bio_get(req->r_bio);
-	}
-#endif
 
 	return req;
 }
@@ -439,9 +430,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	} else
 		ops[1].op = 0;
 
-	req = ceph_osdc_alloc_request(osdc, snapc, ops,
-					 use_mempool,
-					 GFP_NOFS, NULL, NULL);
+	req = ceph_osdc_alloc_request(osdc, snapc, ops, use_mempool, GFP_NOFS);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 	req->r_flags = flags;
-- 
cgit v1.2.3


From ae7ca4a35b1f5df86e2c32b2cfc01a8d528c7b8c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: pass num_op with ops

Both ceph_osdc_alloc_request() and ceph_osdc_build_request() are
provided an array of ceph osd request operations.  Rather than just
passing the number of operations in the array, the caller is
required append an additional zeroed operation structure to signal
the end of the array.

All callers know the number of operations at the time these
functions are called, so drop the silly zero entry and supply that
number directly.  As a result, get_num_ops() is no longer needed.
This also means that ceph_osdc_alloc_request() never uses its ops
argument, so that can be dropped.

Also rbd_create_rw_ops() no longer needs to add one to reserve room
for the additional op.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  9 +++++----
 include/linux/ceph/osd_client.h |  3 ++-
 net/ceph/osd_client.c           | 43 ++++++++++++++---------------------------
 3 files changed, 22 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b6872d3cb04c..88de8ccb29bd 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1026,12 +1026,12 @@ out_err:
 /*
  * helpers for osd request op vectors.
  */
-static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops,
+static struct ceph_osd_req_op *rbd_create_rw_ops(int num_op,
 					int opcode, u32 payload_len)
 {
 	struct ceph_osd_req_op *ops;
 
-	ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO);
+	ops = kzalloc(num_op * sizeof (*ops), GFP_NOIO);
 	if (!ops)
 		return NULL;
 
@@ -1149,7 +1149,7 @@ static int rbd_do_request(struct request *rq,
 		(unsigned long long) len, coll, coll_index);
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
-	osd_req = ceph_osdc_alloc_request(osdc, snapc, ops, false, GFP_NOIO);
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_op, false, GFP_NOIO);
 	if (!osd_req) {
 		ret = -ENOMEM;
 		goto done_pages;
@@ -1178,7 +1178,8 @@ static int rbd_do_request(struct request *rq,
 				ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
-	ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, snapid, &mtime);
+	ceph_osdc_build_request(osd_req, ofs, len, num_op, ops,
+				snapc, snapid, &mtime);
 
 	if (linger_req) {
 		ceph_osdc_set_request_linger(osdc, osd_req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 75f56d372d44..2b04d054e09d 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -214,12 +214,13 @@ extern int ceph_calc_raw_layout(struct ceph_file_layout *layout,
 
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
+					       unsigned int num_op,
 					       bool use_mempool,
 					       gfp_t gfp_flags);
 
 extern void ceph_osdc_build_request(struct ceph_osd_request *req,
 				    u64 off, u64 len,
+				    unsigned int num_op,
 				    struct ceph_osd_req_op *src_ops,
 				    struct ceph_snap_context *snapc,
 				    u64 snap_id,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index bdc3bb12bfd5..500ae8b49321 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -160,25 +160,14 @@ void ceph_osdc_release_request(struct kref *kref)
 }
 EXPORT_SYMBOL(ceph_osdc_release_request);
 
-static int get_num_ops(struct ceph_osd_req_op *ops)
-{
-	int i = 0;
-
-	while (ops[i].op)
-		i++;
-
-	return i;
-}
-
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
+					       unsigned int num_op,
 					       bool use_mempool,
 					       gfp_t gfp_flags)
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
-	int num_op = get_num_ops(ops);
 	size_t msg_size = sizeof(struct ceph_osd_request_head);
 
 	msg_size += num_op*sizeof(struct ceph_osd_op);
@@ -317,7 +306,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
  *
  */
 void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 len,
+			     u64 off, u64 len, unsigned int num_op,
 			     struct ceph_osd_req_op *src_ops,
 			     struct ceph_snap_context *snapc, u64 snap_id,
 			     struct timespec *mtime)
@@ -327,7 +316,6 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	struct ceph_osd_req_op *src_op;
 	struct ceph_osd_op *op;
 	void *p;
-	int num_op = get_num_ops(src_ops);
 	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
 	int flags = req->r_flags;
 	u64 data_len = 0;
@@ -346,20 +334,17 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	head->flags = cpu_to_le32(flags);
 	if (flags & CEPH_OSD_FLAG_WRITE)
 		ceph_encode_timespec(&head->mtime, mtime);
+	BUG_ON(num_op > (unsigned int) ((u16) -1));
 	head->num_ops = cpu_to_le16(num_op);
 
-
 	/* fill in oid */
 	head->object_len = cpu_to_le32(req->r_oid_len);
 	memcpy(p, req->r_oid, req->r_oid_len);
 	p += req->r_oid_len;
 
 	src_op = src_ops;
-	while (src_op->op) {
-		osd_req_encode_op(req, op, src_op);
-		src_op++;
-		op++;
-	}
+	while (num_op--)
+		osd_req_encode_op(req, op++, src_op++);
 
 	data_len += req->r_trail.length;
 
@@ -414,23 +399,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       bool use_mempool, int num_reply,
 					       int page_align)
 {
-	struct ceph_osd_req_op ops[3];
+	struct ceph_osd_req_op ops[2];
 	struct ceph_osd_request *req;
+	unsigned int num_op = 1;
 	int r;
 
+	memset(&ops, 0, sizeof ops);
+
 	ops[0].op = opcode;
 	ops[0].extent.truncate_seq = truncate_seq;
 	ops[0].extent.truncate_size = truncate_size;
-	ops[0].payload_len = 0;
 
 	if (do_sync) {
 		ops[1].op = CEPH_OSD_OP_STARTSYNC;
-		ops[1].payload_len = 0;
-		ops[2].op = 0;
-	} else
-		ops[1].op = 0;
+		num_op++;
+	}
 
-	req = ceph_osdc_alloc_request(osdc, snapc, ops, use_mempool, GFP_NOFS);
+	req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool,
+					GFP_NOFS);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 	req->r_flags = flags;
@@ -446,7 +432,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	req->r_num_pages = calc_pages_for(page_align, *plen);
 	req->r_page_alignment = page_align;
 
-	ceph_osdc_build_request(req, off, *plen, ops, snapc, vino.snap, mtime);
+	ceph_osdc_build_request(req, off, *plen, num_op, ops,
+				snapc, vino.snap, mtime);
 
 	return req;
 }
-- 
cgit v1.2.3


From 2b5fc648af5eec2f4fe984cb6b926214e02c5cf4 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 14 Nov 2012 09:38:20 -0600
Subject: rbd: kill ceph_osd_req_op->flags

The flags field of struct ceph_osd_req_op is never used, so just get
rid of it.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osd_client.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 2b04d054e09d..69287ccfe68a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -157,7 +157,6 @@ struct ceph_osd_client {
 
 struct ceph_osd_req_op {
 	u16 op;           /* CEPH_OSD_OP_* */
-	u32 flags;        /* CEPH_OSD_FLAG_* */
 	union {
 		struct {
 			u64 offset, length;
-- 
cgit v1.2.3


From bab734fcdf2c690371e6749ce484b17653b3cb0e Mon Sep 17 00:00:00 2001
From: Fabio Porcedda <fabio.porcedda@gmail.com>
Date: Wed, 9 Jan 2013 12:15:26 +0100
Subject: driver core: add helper macro for platform_driver_probe() boilerplate

For simple modules that contain a single platform_driver without any
additional setup code then ends up being a block of duplicated
boilerplate.  This patch adds a new macro,
module_platform_driver_probe(), which replaces the
module_init()/module_exit() registrations with template functions.

This macro use the same idea of module_platform_driver().

This macro is useful to stop the misuse of module_platform_driver() for
removing the platform_driver_probe() boilerplate.

Signed-off-by: Fabio Porcedda <fabio.porcedda@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/platform_device.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index a9ded9a3c175..c082c71f7225 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -204,6 +204,24 @@ static inline void platform_set_drvdata(struct platform_device *pdev, void *data
 	module_driver(__platform_driver, platform_driver_register, \
 			platform_driver_unregister)
 
+/* module_platform_driver_probe() - Helper macro for drivers that don't do
+ * anything special in module init/exit.  This eliminates a lot of
+ * boilerplate.  Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit()
+ */
+#define module_platform_driver_probe(__platform_driver, __platform_probe) \
+static int __init __platform_driver##_init(void) \
+{ \
+	return platform_driver_probe(&(__platform_driver), \
+				     __platform_probe);    \
+} \
+module_init(__platform_driver##_init); \
+static void __exit __platform_driver##_exit(void) \
+{ \
+	platform_driver_unregister(&(__platform_driver)); \
+} \
+module_exit(__platform_driver##_exit);
+
 extern struct platform_device *platform_create_bundle(struct platform_driver *driver,
 					int (*probe)(struct platform_device *),
 					struct resource *res, unsigned int n_res,
-- 
cgit v1.2.3


From fce8a7bb5b4bfb8a27324703fd5b002ee9247e90 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@intel.com>
Date: Fri, 16 Nov 2012 19:27:12 -0700
Subject: PCI-Express Non-Transparent Bridge Support

A PCI-Express non-transparent bridge (NTB) is a point-to-point PCIe bus
connecting 2 systems, providing electrical isolation between the two subsystems.
A non-transparent bridge is functionally similar to a transparent bridge except
that both sides of the bridge have their own independent address domains.  The
host on one side of the bridge will not have the visibility of the complete
memory or I/O space on the other side of the bridge.  To communicate across the
non-transparent bridge, each NTB endpoint has one (or more) apertures exposed to
the local system.  Writes to these apertures are mirrored to memory on the
remote system.  Communications can also occur through the use of doorbell
registers that initiate interrupts to the alternate domain, and scratch-pad
registers accessible from both sides.

The NTB device driver is needed to configure these memory windows, doorbell, and
scratch-pad registers as well as use them in such a way as they can be turned
into a viable communication channel to the remote system.  ntb_hw.[ch]
determines the usage model (NTB to NTB or NTB to Root Port) and abstracts away
the underlying hardware to provide access and a common interface to the doorbell
registers, scratch pads, and memory windows.  These hardware interfaces are
exported so that other, non-mainlined kernel drivers can access these.
ntb_transport.[ch] also uses the exported interfaces in ntb_hw.[ch] to setup a
communication channel(s) and provide a reliable way of transferring data from
one side to the other, which it then exports so that "client" drivers can access
them.  These client drivers are used to provide a standard kernel interface
(i.e., Ethernet device) to NTB, such that Linux can transfer data from one
system to the other in a standard way.

Signed-off-by: Jon Mason <jon.mason@intel.com>
Reviewed-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS                 |    6 +
 drivers/Kconfig             |    2 +
 drivers/Makefile            |    1 +
 drivers/ntb/Kconfig         |   13 +
 drivers/ntb/Makefile        |    3 +
 drivers/ntb/ntb_hw.c        | 1157 +++++++++++++++++++++++++++++++++++
 drivers/ntb/ntb_hw.h        |  181 ++++++
 drivers/ntb/ntb_regs.h      |  139 +++++
 drivers/ntb/ntb_transport.c | 1427 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/ntb.h         |   83 +++
 10 files changed, 3012 insertions(+)
 create mode 100644 drivers/ntb/Kconfig
 create mode 100644 drivers/ntb/Makefile
 create mode 100644 drivers/ntb/ntb_hw.c
 create mode 100644 drivers/ntb/ntb_hw.h
 create mode 100644 drivers/ntb/ntb_regs.h
 create mode 100644 drivers/ntb/ntb_transport.c
 create mode 100644 include/linux/ntb.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 915564eda145..c5d675a8c53d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5394,6 +5394,12 @@ S:	Maintained
 F:	Documentation/scsi/NinjaSCSI.txt
 F:	drivers/scsi/nsp32*
 
+NTB DRIVER
+M:	Jon Mason <jon.mason@intel.com>
+S:	Supported
+F:	drivers/ntb/
+F:	include/linux/ntb.h
+
 NTFS FILESYSTEM
 M:	Anton Altaparmakov <anton@tuxera.com>
 L:	linux-ntfs-dev@lists.sourceforge.net
diff --git a/drivers/Kconfig b/drivers/Kconfig
index f5fb0722a63a..c13044cb8aa9 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -150,6 +150,8 @@ source "drivers/memory/Kconfig"
 
 source "drivers/iio/Kconfig"
 
+source "drivers/ntb/Kconfig"
+
 source "drivers/vme/Kconfig"
 
 source "drivers/pwm/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 7863b9fee50b..3d92e1269672 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -146,3 +146,4 @@ obj-$(CONFIG_MEMORY)		+= memory/
 obj-$(CONFIG_IIO)		+= iio/
 obj-$(CONFIG_VME_BUS)		+= vme/
 obj-$(CONFIG_IPACK_BUS)		+= ipack/
+obj-$(CONFIG_NTB)		+= ntb/
diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
new file mode 100644
index 000000000000..f69df793dbe2
--- /dev/null
+++ b/drivers/ntb/Kconfig
@@ -0,0 +1,13 @@
+config NTB
+       tristate "Intel Non-Transparent Bridge support"
+       depends on PCI
+       depends on X86
+       help
+        The PCI-E Non-transparent bridge hardware is a point-to-point PCI-E bus
+        connecting 2 systems.  When configured, writes to the device's PCI
+        mapped memory will be mirrored to a buffer on the remote system.  The
+        ntb Linux driver uses this point-to-point communication as a method to
+        transfer data from one system to the other.
+
+        If unsure, say N.
+
diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
new file mode 100644
index 000000000000..15cb59fd354e
--- /dev/null
+++ b/drivers/ntb/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_NTB) += ntb.o
+
+ntb-objs := ntb_hw.o ntb_transport.o
diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
new file mode 100644
index 000000000000..facad51fbc7a
--- /dev/null
+++ b/drivers/ntb/ntb_hw.c
@@ -0,0 +1,1157 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Jon Mason <jon.mason@intel.com>
+ */
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include "ntb_hw.h"
+#include "ntb_regs.h"
+
+#define NTB_NAME	"Intel(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER		"0.24"
+
+MODULE_DESCRIPTION(NTB_NAME);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+enum {
+	NTB_CONN_CLASSIC = 0,
+	NTB_CONN_B2B,
+	NTB_CONN_RP,
+};
+
+enum {
+	NTB_DEV_USD = 0,
+	NTB_DEV_DSD,
+};
+
+enum {
+	SNB_HW = 0,
+	BWD_HW,
+};
+
+/* Translate memory window 0,1 to BAR 2,4 */
+#define MW_TO_BAR(mw)	(mw * 2 + 2)
+
+static DEFINE_PCI_DEVICE_TABLE(ntb_pci_tbl) = {
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_CLASSIC_JSF)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_RP_JSF)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_RP_SNB)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
+	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_CLASSIC_SNB)},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, ntb_pci_tbl);
+
+/**
+ * ntb_register_event_callback() - register event callback
+ * @ndev: pointer to ntb_device instance
+ * @func: callback function to register
+ *
+ * This function registers a callback for any HW driver events such as link
+ * up/down, power management notices and etc.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_register_event_callback(struct ntb_device *ndev,
+				void (*func)(void *handle, unsigned int event))
+{
+	if (ndev->event_cb)
+		return -EINVAL;
+
+	ndev->event_cb = func;
+
+	return 0;
+}
+
+/**
+ * ntb_unregister_event_callback() - unregisters the event callback
+ * @ndev: pointer to ntb_device instance
+ *
+ * This function unregisters the existing callback from transport
+ */
+void ntb_unregister_event_callback(struct ntb_device *ndev)
+{
+	ndev->event_cb = NULL;
+}
+
+/**
+ * ntb_register_db_callback() - register a callback for doorbell interrupt
+ * @ndev: pointer to ntb_device instance
+ * @idx: doorbell index to register callback, zero based
+ * @func: callback function to register
+ *
+ * This function registers a callback function for the doorbell interrupt
+ * on the primary side. The function will unmask the doorbell as well to
+ * allow interrupt.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx,
+			     void *data, void (*func)(void *data, int db_num))
+{
+	unsigned long mask;
+
+	if (idx >= ndev->max_cbs || ndev->db_cb[idx].callback) {
+		dev_warn(&ndev->pdev->dev, "Invalid Index.\n");
+		return -EINVAL;
+	}
+
+	ndev->db_cb[idx].callback = func;
+	ndev->db_cb[idx].data = data;
+
+	/* unmask interrupt */
+	mask = readw(ndev->reg_ofs.pdb_mask);
+	clear_bit(idx * ndev->bits_per_vector, &mask);
+	writew(mask, ndev->reg_ofs.pdb_mask);
+
+	return 0;
+}
+
+/**
+ * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
+ * @ndev: pointer to ntb_device instance
+ * @idx: doorbell index to register callback, zero based
+ *
+ * This function unregisters a callback function for the doorbell interrupt
+ * on the primary side. The function will also mask the said doorbell.
+ */
+void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx)
+{
+	unsigned long mask;
+
+	if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback)
+		return;
+
+	mask = readw(ndev->reg_ofs.pdb_mask);
+	set_bit(idx * ndev->bits_per_vector, &mask);
+	writew(mask, ndev->reg_ofs.pdb_mask);
+
+	ndev->db_cb[idx].callback = NULL;
+}
+
+/**
+ * ntb_find_transport() - find the transport pointer
+ * @transport: pointer to pci device
+ *
+ * Given the pci device pointer, return the transport pointer passed in when
+ * the transport attached when it was inited.
+ *
+ * RETURNS: pointer to transport.
+ */
+void *ntb_find_transport(struct pci_dev *pdev)
+{
+	struct ntb_device *ndev = pci_get_drvdata(pdev);
+	return ndev->ntb_transport;
+}
+
+/**
+ * ntb_register_transport() - Register NTB transport with NTB HW driver
+ * @transport: transport identifier
+ *
+ * This function allows a transport to reserve the hardware driver for
+ * NTB usage.
+ *
+ * RETURNS: pointer to ntb_device, NULL on error.
+ */
+struct ntb_device *ntb_register_transport(struct pci_dev *pdev, void *transport)
+{
+	struct ntb_device *ndev = pci_get_drvdata(pdev);
+
+	if (ndev->ntb_transport)
+		return NULL;
+
+	ndev->ntb_transport = transport;
+	return ndev;
+}
+
+/**
+ * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
+ * @ndev - ntb_device of the transport to be freed
+ *
+ * This function unregisters the transport from the HW driver and performs any
+ * necessary cleanups.
+ */
+void ntb_unregister_transport(struct ntb_device *ndev)
+{
+	int i;
+
+	if (!ndev->ntb_transport)
+		return;
+
+	for (i = 0; i < ndev->max_cbs; i++)
+		ntb_unregister_db_callback(ndev, i);
+
+	ntb_unregister_event_callback(ndev);
+	ndev->ntb_transport = NULL;
+}
+
+/**
+ * ntb_get_max_spads() - get the total scratch regs usable
+ * @ndev: pointer to ntb_device instance
+ *
+ * This function returns the max 32bit scratchpad registers usable by the
+ * upper layer.
+ *
+ * RETURNS: total number of scratch pad registers available
+ */
+int ntb_get_max_spads(struct ntb_device *ndev)
+{
+	return ndev->limits.max_spads;
+}
+
+/**
+ * ntb_write_local_spad() - write to the secondary scratchpad register
+ * @ndev: pointer to ntb_device instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. This writes over the data mirrored to the local scratchpad register
+ * by the remote system.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
+{
+	if (idx >= ndev->limits.max_spads)
+		return -EINVAL;
+
+	dev_dbg(&ndev->pdev->dev, "Writing %x to local scratch pad index %d\n",
+		val, idx);
+	writel(val, ndev->reg_ofs.spad_read + idx * 4);
+
+	return 0;
+}
+
+/**
+ * ntb_read_local_spad() - read from the primary scratchpad register
+ * @ndev: pointer to ntb_device instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.  This allows the local system to read data
+ * written and mirrored to the scratchpad register by the remote system.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
+{
+	if (idx >= ndev->limits.max_spads)
+		return -EINVAL;
+
+	*val = readl(ndev->reg_ofs.spad_write + idx * 4);
+	dev_dbg(&ndev->pdev->dev,
+		"Reading %x from local scratch pad index %d\n", *val, idx);
+
+	return 0;
+}
+
+/**
+ * ntb_write_remote_spad() - write to the secondary scratchpad register
+ * @ndev: pointer to ntb_device instance
+ * @idx: index to the scratchpad register, 0 based
+ * @val: the data value to put into the register
+ *
+ * This function allows writing of a 32bit value to the indexed scratchpad
+ * register. The register resides on the secondary (external) side.  This allows
+ * the local system to write data to be mirrored to the remote systems
+ * scratchpad register.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
+{
+	if (idx >= ndev->limits.max_spads)
+		return -EINVAL;
+
+	dev_dbg(&ndev->pdev->dev, "Writing %x to remote scratch pad index %d\n",
+		val, idx);
+	writel(val, ndev->reg_ofs.spad_write + idx * 4);
+
+	return 0;
+}
+
+/**
+ * ntb_read_remote_spad() - read from the primary scratchpad register
+ * @ndev: pointer to ntb_device instance
+ * @idx: index to scratchpad register, 0 based
+ * @val: pointer to 32bit integer for storing the register value
+ *
+ * This function allows reading of the 32bit scratchpad register on
+ * the primary (internal) side.  This alloows the local system to read the data
+ * it wrote to be mirrored on the remote system.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
+{
+	if (idx >= ndev->limits.max_spads)
+		return -EINVAL;
+
+	*val = readl(ndev->reg_ofs.spad_read + idx * 4);
+	dev_dbg(&ndev->pdev->dev,
+		"Reading %x from remote scratch pad index %d\n", *val, idx);
+
+	return 0;
+}
+
+/**
+ * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
+ * @ndev: pointer to ntb_device instance
+ * @mw: memory window number
+ *
+ * This function provides the base virtual address of the memory window
+ * specified.
+ *
+ * RETURNS: pointer to virtual address, or NULL on error.
+ */
+void *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw)
+{
+	if (mw > NTB_NUM_MW)
+		return NULL;
+
+	return ndev->mw[mw].vbase;
+}
+
+/**
+ * ntb_get_mw_size() - return size of NTB memory window
+ * @ndev: pointer to ntb_device instance
+ * @mw: memory window number
+ *
+ * This function provides the physical size of the memory window specified
+ *
+ * RETURNS: the size of the memory window or zero on error
+ */
+resource_size_t ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw)
+{
+	if (mw > NTB_NUM_MW)
+		return 0;
+
+	return ndev->mw[mw].bar_sz;
+}
+
+/**
+ * ntb_set_mw_addr - set the memory window address
+ * @ndev: pointer to ntb_device instance
+ * @mw: memory window number
+ * @addr: base address for data
+ *
+ * This function sets the base physical address of the memory window.  This
+ * memory address is where data from the remote system will be transfered into
+ * or out of depending on how the transport is configured.
+ */
+void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
+{
+	if (mw > NTB_NUM_MW)
+		return;
+
+	dev_dbg(&ndev->pdev->dev, "Writing addr %Lx to BAR %d\n", addr,
+		MW_TO_BAR(mw));
+
+	ndev->mw[mw].phys_addr = addr;
+
+	switch (MW_TO_BAR(mw)) {
+	case NTB_BAR_23:
+		writeq(addr, ndev->reg_ofs.sbar2_xlat);
+		break;
+	case NTB_BAR_45:
+		writeq(addr, ndev->reg_ofs.sbar4_xlat);
+		break;
+	}
+}
+
+/**
+ * ntb_ring_sdb() - Set the doorbell on the secondary/external side
+ * @ndev: pointer to ntb_device instance
+ * @db: doorbell to ring
+ *
+ * This function allows triggering of a doorbell on the secondary/external
+ * side that will initiate an interrupt on the remote host
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+void ntb_ring_sdb(struct ntb_device *ndev, unsigned int db)
+{
+	dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db);
+
+	if (ndev->hw_type == BWD_HW)
+		writeq((u64) 1 << db, ndev->reg_ofs.sdb);
+	else
+		writew(((1 << ndev->bits_per_vector) - 1) <<
+		       (db * ndev->bits_per_vector), ndev->reg_ofs.sdb);
+}
+
+static void ntb_link_event(struct ntb_device *ndev, int link_state)
+{
+	unsigned int event;
+
+	if (ndev->link_status == link_state)
+		return;
+
+	if (link_state == NTB_LINK_UP) {
+		u16 status;
+
+		dev_info(&ndev->pdev->dev, "Link Up\n");
+		ndev->link_status = NTB_LINK_UP;
+		event = NTB_EVENT_HW_LINK_UP;
+
+		if (ndev->hw_type == BWD_HW)
+			status = readw(ndev->reg_ofs.lnk_stat);
+		else {
+			int rc = pci_read_config_word(ndev->pdev,
+						      SNB_LINK_STATUS_OFFSET,
+						      &status);
+			if (rc)
+				return;
+		}
+		dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n",
+			 (status & NTB_LINK_WIDTH_MASK) >> 4,
+			 (status & NTB_LINK_SPEED_MASK));
+	} else {
+		dev_info(&ndev->pdev->dev, "Link Down\n");
+		ndev->link_status = NTB_LINK_DOWN;
+		event = NTB_EVENT_HW_LINK_DOWN;
+	}
+
+	/* notify the upper layer if we have an event change */
+	if (ndev->event_cb)
+		ndev->event_cb(ndev->ntb_transport, event);
+}
+
+static int ntb_link_status(struct ntb_device *ndev)
+{
+	int link_state;
+
+	if (ndev->hw_type == BWD_HW) {
+		u32 ntb_cntl;
+
+		ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
+		if (ntb_cntl & BWD_CNTL_LINK_DOWN)
+			link_state = NTB_LINK_DOWN;
+		else
+			link_state = NTB_LINK_UP;
+	} else {
+		u16 status;
+		int rc;
+
+		rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET,
+					  &status);
+		if (rc)
+			return rc;
+
+		if (status & NTB_LINK_STATUS_ACTIVE)
+			link_state = NTB_LINK_UP;
+		else
+			link_state = NTB_LINK_DOWN;
+	}
+
+	ntb_link_event(ndev, link_state);
+
+	return 0;
+}
+
+/* BWD doesn't have link status interrupt, poll on that platform */
+static void bwd_link_poll(struct work_struct *work)
+{
+	struct ntb_device *ndev = container_of(work, struct ntb_device,
+					       hb_timer.work);
+	unsigned long ts = jiffies;
+
+	/* If we haven't gotten an interrupt in a while, check the BWD link
+	 * status bit
+	 */
+	if (ts > ndev->last_ts + NTB_HB_TIMEOUT) {
+		int rc = ntb_link_status(ndev);
+		if (rc)
+			dev_err(&ndev->pdev->dev,
+				"Error determining link status\n");
+	}
+
+	schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
+}
+
+static int ntb_xeon_setup(struct ntb_device *ndev)
+{
+	int rc;
+	u8 val;
+
+	ndev->hw_type = SNB_HW;
+
+	rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &val);
+	if (rc)
+		return rc;
+
+	switch (val & SNB_PPD_CONN_TYPE) {
+	case NTB_CONN_B2B:
+		ndev->conn_type = NTB_CONN_B2B;
+		break;
+	case NTB_CONN_CLASSIC:
+	case NTB_CONN_RP:
+	default:
+		dev_err(&ndev->pdev->dev, "Only B2B supported at this time\n");
+		return -EINVAL;
+	}
+
+	if (val & SNB_PPD_DEV_TYPE)
+		ndev->dev_type = NTB_DEV_DSD;
+	else
+		ndev->dev_type = NTB_DEV_USD;
+
+	ndev->reg_ofs.pdb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
+	ndev->reg_ofs.pdb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
+	ndev->reg_ofs.sbar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
+	ndev->reg_ofs.sbar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
+	ndev->reg_ofs.lnk_cntl = ndev->reg_base + SNB_NTBCNTL_OFFSET;
+	ndev->reg_ofs.lnk_stat = ndev->reg_base + SNB_LINK_STATUS_OFFSET;
+	ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
+	ndev->reg_ofs.spci_cmd = ndev->reg_base + SNB_PCICMD_OFFSET;
+
+	if (ndev->conn_type == NTB_CONN_B2B) {
+		ndev->reg_ofs.sdb = ndev->reg_base + SNB_B2B_DOORBELL_OFFSET;
+		ndev->reg_ofs.spad_write = ndev->reg_base + SNB_B2B_SPAD_OFFSET;
+		ndev->limits.max_spads = SNB_MAX_SPADS;
+	} else {
+		ndev->reg_ofs.sdb = ndev->reg_base + SNB_SDOORBELL_OFFSET;
+		ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET;
+		ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS;
+	}
+
+	ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
+	ndev->limits.msix_cnt = SNB_MSIX_CNT;
+	ndev->bits_per_vector = SNB_DB_BITS_PER_VEC;
+
+	return 0;
+}
+
+static int ntb_bwd_setup(struct ntb_device *ndev)
+{
+	int rc;
+	u32 val;
+
+	ndev->hw_type = BWD_HW;
+
+	rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &val);
+	if (rc)
+		return rc;
+
+	switch ((val & BWD_PPD_CONN_TYPE) >> 8) {
+	case NTB_CONN_B2B:
+		ndev->conn_type = NTB_CONN_B2B;
+		break;
+	case NTB_CONN_RP:
+	default:
+		dev_err(&ndev->pdev->dev, "Only B2B supported at this time\n");
+		return -EINVAL;
+	}
+
+	if (val & BWD_PPD_DEV_TYPE)
+		ndev->dev_type = NTB_DEV_DSD;
+	else
+		ndev->dev_type = NTB_DEV_USD;
+
+	/* Initiate PCI-E link training */
+	rc = pci_write_config_dword(ndev->pdev, NTB_PPD_OFFSET,
+				    val | BWD_PPD_INIT_LINK);
+	if (rc)
+		return rc;
+
+	ndev->reg_ofs.pdb = ndev->reg_base + BWD_PDOORBELL_OFFSET;
+	ndev->reg_ofs.pdb_mask = ndev->reg_base + BWD_PDBMSK_OFFSET;
+	ndev->reg_ofs.sbar2_xlat = ndev->reg_base + BWD_SBAR2XLAT_OFFSET;
+	ndev->reg_ofs.sbar4_xlat = ndev->reg_base + BWD_SBAR4XLAT_OFFSET;
+	ndev->reg_ofs.lnk_cntl = ndev->reg_base + BWD_NTBCNTL_OFFSET;
+	ndev->reg_ofs.lnk_stat = ndev->reg_base + BWD_LINK_STATUS_OFFSET;
+	ndev->reg_ofs.spad_read = ndev->reg_base + BWD_SPAD_OFFSET;
+	ndev->reg_ofs.spci_cmd = ndev->reg_base + BWD_PCICMD_OFFSET;
+
+	if (ndev->conn_type == NTB_CONN_B2B) {
+		ndev->reg_ofs.sdb = ndev->reg_base + BWD_B2B_DOORBELL_OFFSET;
+		ndev->reg_ofs.spad_write = ndev->reg_base + BWD_B2B_SPAD_OFFSET;
+		ndev->limits.max_spads = BWD_MAX_SPADS;
+	} else {
+		ndev->reg_ofs.sdb = ndev->reg_base + BWD_PDOORBELL_OFFSET;
+		ndev->reg_ofs.spad_write = ndev->reg_base + BWD_SPAD_OFFSET;
+		ndev->limits.max_spads = BWD_MAX_COMPAT_SPADS;
+	}
+
+	ndev->limits.max_db_bits = BWD_MAX_DB_BITS;
+	ndev->limits.msix_cnt = BWD_MSIX_CNT;
+	ndev->bits_per_vector = BWD_DB_BITS_PER_VEC;
+
+	/* Since bwd doesn't have a link interrupt, setup a poll timer */
+	INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll);
+	schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
+
+	return 0;
+}
+
+static int __devinit ntb_device_setup(struct ntb_device *ndev)
+{
+	int rc;
+
+	switch (ndev->pdev->device) {
+	case PCI_DEVICE_ID_INTEL_NTB_2ND_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_RP_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_RP_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_CLASSIC_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_CLASSIC_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
+		rc = ntb_xeon_setup(ndev);
+		break;
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD:
+		rc = ntb_bwd_setup(ndev);
+		break;
+	default:
+		rc = -ENODEV;
+	}
+
+	/* Enable Bus Master and Memory Space on the secondary side */
+	writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, ndev->reg_ofs.spci_cmd);
+
+	return rc;
+}
+
+static void ntb_device_free(struct ntb_device *ndev)
+{
+	if (ndev->hw_type == BWD_HW)
+		cancel_delayed_work_sync(&ndev->hb_timer);
+}
+
+static irqreturn_t bwd_callback_msix_irq(int irq, void *data)
+{
+	struct ntb_db_cb *db_cb = data;
+	struct ntb_device *ndev = db_cb->ndev;
+
+	dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
+		db_cb->db_num);
+
+	if (db_cb->callback)
+		db_cb->callback(db_cb->data, db_cb->db_num);
+
+	/* No need to check for the specific HB irq, any interrupt means
+	 * we're connected.
+	 */
+	ndev->last_ts = jiffies;
+
+	writeq((u64) 1 << db_cb->db_num, ndev->reg_ofs.pdb);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
+{
+	struct ntb_db_cb *db_cb = data;
+	struct ntb_device *ndev = db_cb->ndev;
+
+	dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
+		db_cb->db_num);
+
+	if (db_cb->callback)
+		db_cb->callback(db_cb->data, db_cb->db_num);
+
+	/* On Sandybridge, there are 16 bits in the interrupt register
+	 * but only 4 vectors.  So, 5 bits are assigned to the first 3
+	 * vectors, with the 4th having a single bit for link
+	 * interrupts.
+	 */
+	writew(((1 << ndev->bits_per_vector) - 1) <<
+	       (db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.pdb);
+
+	return IRQ_HANDLED;
+}
+
+/* Since we do not have a HW doorbell in BWD, this is only used in JF/JT */
+static irqreturn_t xeon_event_msix_irq(int irq, void *dev)
+{
+	struct ntb_device *ndev = dev;
+	int rc;
+
+	dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for Events\n", irq);
+
+	rc = ntb_link_status(ndev);
+	if (rc)
+		dev_err(&ndev->pdev->dev, "Error determining link status\n");
+
+	/* bit 15 is always the link bit */
+	writew(1 << ndev->limits.max_db_bits, ndev->reg_ofs.pdb);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ntb_interrupt(int irq, void *dev)
+{
+	struct ntb_device *ndev = dev;
+	unsigned int i = 0;
+
+	if (ndev->hw_type == BWD_HW) {
+		u64 pdb = readq(ndev->reg_ofs.pdb);
+
+		dev_dbg(&ndev->pdev->dev, "irq %d - pdb = %Lx\n", irq, pdb);
+
+		while (pdb) {
+			i = __ffs(pdb);
+			pdb &= pdb - 1;
+			bwd_callback_msix_irq(irq, &ndev->db_cb[i]);
+		}
+	} else {
+		u16 pdb = readw(ndev->reg_ofs.pdb);
+
+		dev_dbg(&ndev->pdev->dev, "irq %d - pdb = %x sdb %x\n", irq,
+			pdb, readw(ndev->reg_ofs.sdb));
+
+		if (pdb & SNB_DB_HW_LINK) {
+			xeon_event_msix_irq(irq, dev);
+			pdb &= ~SNB_DB_HW_LINK;
+		}
+
+		while (pdb) {
+			i = __ffs(pdb);
+			pdb &= pdb - 1;
+			xeon_callback_msix_irq(irq, &ndev->db_cb[i]);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int ntb_setup_msix(struct ntb_device *ndev)
+{
+	struct pci_dev *pdev = ndev->pdev;
+	struct msix_entry *msix;
+	int msix_entries;
+	int rc, i, pos;
+	u16 val;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+	if (!pos) {
+		rc = -EIO;
+		goto err;
+	}
+
+	rc = pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &val);
+	if (rc)
+		goto err;
+
+	msix_entries = msix_table_size(val);
+	if (msix_entries > ndev->limits.msix_cnt) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	ndev->msix_entries = kmalloc(sizeof(struct msix_entry) * msix_entries,
+				     GFP_KERNEL);
+	if (!ndev->msix_entries) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	for (i = 0; i < msix_entries; i++)
+		ndev->msix_entries[i].entry = i;
+
+	rc = pci_enable_msix(pdev, ndev->msix_entries, msix_entries);
+	if (rc < 0)
+		goto err1;
+	if (rc > 0) {
+		/* On SNB, the link interrupt is always tied to 4th vector.  If
+		 * we can't get all 4, then we can't use MSI-X.
+		 */
+		if (ndev->hw_type != BWD_HW) {
+			rc = -EIO;
+			goto err1;
+		}
+
+		dev_warn(&pdev->dev,
+			 "Only %d MSI-X vectors.  Limiting the number of queues to that number.\n",
+			 rc);
+		msix_entries = rc;
+	}
+
+	for (i = 0; i < msix_entries; i++) {
+		msix = &ndev->msix_entries[i];
+		WARN_ON(!msix->vector);
+
+		/* Use the last MSI-X vector for Link status */
+		if (ndev->hw_type == BWD_HW) {
+			rc = request_irq(msix->vector, bwd_callback_msix_irq, 0,
+					 "ntb-callback-msix", &ndev->db_cb[i]);
+			if (rc)
+				goto err2;
+		} else {
+			if (i == msix_entries - 1) {
+				rc = request_irq(msix->vector,
+						 xeon_event_msix_irq, 0,
+						 "ntb-event-msix", ndev);
+				if (rc)
+					goto err2;
+			} else {
+				rc = request_irq(msix->vector,
+						 xeon_callback_msix_irq, 0,
+						 "ntb-callback-msix",
+						 &ndev->db_cb[i]);
+				if (rc)
+					goto err2;
+			}
+		}
+	}
+
+	ndev->num_msix = msix_entries;
+	if (ndev->hw_type == BWD_HW)
+		ndev->max_cbs = msix_entries;
+	else
+		ndev->max_cbs = msix_entries - 1;
+
+	return 0;
+
+err2:
+	while (--i >= 0) {
+		msix = &ndev->msix_entries[i];
+		if (ndev->hw_type != BWD_HW && i == ndev->num_msix - 1)
+			free_irq(msix->vector, ndev);
+		else
+			free_irq(msix->vector, &ndev->db_cb[i]);
+	}
+	pci_disable_msix(pdev);
+err1:
+	kfree(ndev->msix_entries);
+	dev_err(&pdev->dev, "Error allocating MSI-X interrupt\n");
+err:
+	ndev->num_msix = 0;
+	return rc;
+}
+
+static int ntb_setup_msi(struct ntb_device *ndev)
+{
+	struct pci_dev *pdev = ndev->pdev;
+	int rc;
+
+	rc = pci_enable_msi(pdev);
+	if (rc)
+		return rc;
+
+	rc = request_irq(pdev->irq, ntb_interrupt, 0, "ntb-msi", ndev);
+	if (rc) {
+		pci_disable_msi(pdev);
+		dev_err(&pdev->dev, "Error allocating MSI interrupt\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static int ntb_setup_intx(struct ntb_device *ndev)
+{
+	struct pci_dev *pdev = ndev->pdev;
+	int rc;
+
+	pci_msi_off(pdev);
+
+	/* Verify intx is enabled */
+	pci_intx(pdev, 1);
+
+	rc = request_irq(pdev->irq, ntb_interrupt, IRQF_SHARED, "ntb-intx",
+			 ndev);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static int __devinit ntb_setup_interrupts(struct ntb_device *ndev)
+{
+	int rc;
+
+	/* On BWD, disable all interrupts.  On SNB, disable all but Link
+	 * Interrupt.  The rest will be unmasked as callbacks are registered.
+	 */
+	if (ndev->hw_type == BWD_HW)
+		writeq(~0, ndev->reg_ofs.pdb_mask);
+	else
+		writew(~(1 << ndev->limits.max_db_bits),
+		       ndev->reg_ofs.pdb_mask);
+
+	rc = ntb_setup_msix(ndev);
+	if (!rc)
+		goto done;
+
+	ndev->bits_per_vector = 1;
+	ndev->max_cbs = ndev->limits.max_db_bits;
+
+	rc = ntb_setup_msi(ndev);
+	if (!rc)
+		goto done;
+
+	rc = ntb_setup_intx(ndev);
+	if (rc) {
+		dev_err(&ndev->pdev->dev, "no usable interrupts\n");
+		return rc;
+	}
+
+done:
+	return 0;
+}
+
+static void __devexit ntb_free_interrupts(struct ntb_device *ndev)
+{
+	struct pci_dev *pdev = ndev->pdev;
+
+	/* mask interrupts */
+	if (ndev->hw_type == BWD_HW)
+		writeq(~0, ndev->reg_ofs.pdb_mask);
+	else
+		writew(~0, ndev->reg_ofs.pdb_mask);
+
+	if (ndev->num_msix) {
+		struct msix_entry *msix;
+		u32 i;
+
+		for (i = 0; i < ndev->num_msix; i++) {
+			msix = &ndev->msix_entries[i];
+			if (ndev->hw_type != BWD_HW && i == ndev->num_msix - 1)
+				free_irq(msix->vector, ndev);
+			else
+				free_irq(msix->vector, &ndev->db_cb[i]);
+		}
+		pci_disable_msix(pdev);
+	} else {
+		free_irq(pdev->irq, ndev);
+
+		if (pci_dev_msi_enabled(pdev))
+			pci_disable_msi(pdev);
+	}
+}
+
+static int __devinit ntb_create_callbacks(struct ntb_device *ndev)
+{
+	int i;
+
+	/* Checken-egg issue.  We won't know how many callbacks are necessary
+	 * until we see how many MSI-X vectors we get, but these pointers need
+	 * to be passed into the MSI-X register fucntion.  So, we allocate the
+	 * max, knowing that they might not all be used, to work around this.
+	 */
+	ndev->db_cb = kcalloc(ndev->limits.max_db_bits,
+			      sizeof(struct ntb_db_cb),
+			      GFP_KERNEL);
+	if (!ndev->db_cb)
+		return -ENOMEM;
+
+	for (i = 0; i < ndev->limits.max_db_bits; i++) {
+		ndev->db_cb[i].db_num = i;
+		ndev->db_cb[i].ndev = ndev;
+	}
+
+	return 0;
+}
+
+static void ntb_free_callbacks(struct ntb_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < ndev->limits.max_db_bits; i++)
+		ntb_unregister_db_callback(ndev, i);
+
+	kfree(ndev->db_cb);
+}
+
+static int __devinit
+ntb_pci_probe(struct pci_dev *pdev,
+	      __attribute__((unused)) const struct pci_device_id *id)
+{
+	struct ntb_device *ndev;
+	int rc, i;
+
+	ndev = kzalloc(sizeof(struct ntb_device), GFP_KERNEL);
+	if (!ndev)
+		return -ENOMEM;
+
+	ndev->pdev = pdev;
+	ndev->link_status = NTB_LINK_DOWN;
+	pci_set_drvdata(pdev, ndev);
+
+	rc = pci_enable_device(pdev);
+	if (rc)
+		goto err;
+
+	pci_set_master(ndev->pdev);
+
+	rc = pci_request_selected_regions(pdev, NTB_BAR_MASK, KBUILD_MODNAME);
+	if (rc)
+		goto err1;
+
+	ndev->reg_base = pci_ioremap_bar(pdev, NTB_BAR_MMIO);
+	if (!ndev->reg_base) {
+		dev_warn(&pdev->dev, "Cannot remap BAR 0\n");
+		rc = -EIO;
+		goto err2;
+	}
+
+	for (i = 0; i < NTB_NUM_MW; i++) {
+		ndev->mw[i].bar_sz = pci_resource_len(pdev, MW_TO_BAR(i));
+		ndev->mw[i].vbase =
+		    ioremap_wc(pci_resource_start(pdev, MW_TO_BAR(i)),
+			       ndev->mw[i].bar_sz);
+		dev_info(&pdev->dev, "MW %d size %d\n", i,
+			 (u32) pci_resource_len(pdev, MW_TO_BAR(i)));
+		if (!ndev->mw[i].vbase) {
+			dev_warn(&pdev->dev, "Cannot remap BAR %d\n",
+				 MW_TO_BAR(i));
+			rc = -EIO;
+			goto err3;
+		}
+	}
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc)
+			goto err3;
+
+		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
+	}
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc)
+			goto err3;
+
+		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
+	}
+
+	rc = ntb_device_setup(ndev);
+	if (rc)
+		goto err3;
+
+	rc = ntb_create_callbacks(ndev);
+	if (rc)
+		goto err4;
+
+	rc = ntb_setup_interrupts(ndev);
+	if (rc)
+		goto err5;
+
+	/* The scratchpad registers keep the values between rmmod/insmod,
+	 * blast them now
+	 */
+	for (i = 0; i < ndev->limits.max_spads; i++) {
+		ntb_write_local_spad(ndev, i, 0);
+		ntb_write_remote_spad(ndev, i, 0);
+	}
+
+	rc = ntb_transport_init(pdev);
+	if (rc)
+		goto err6;
+
+	/* Let's bring the NTB link up */
+	writel(NTB_CNTL_BAR23_SNOOP | NTB_CNTL_BAR45_SNOOP,
+	       ndev->reg_ofs.lnk_cntl);
+
+	return 0;
+
+err6:
+	ntb_free_interrupts(ndev);
+err5:
+	ntb_free_callbacks(ndev);
+err4:
+	ntb_device_free(ndev);
+err3:
+	for (i--; i >= 0; i--)
+		iounmap(ndev->mw[i].vbase);
+	iounmap(ndev->reg_base);
+err2:
+	pci_release_selected_regions(pdev, NTB_BAR_MASK);
+err1:
+	pci_disable_device(pdev);
+err:
+	kfree(ndev);
+
+	dev_err(&pdev->dev, "Error loading %s module\n", KBUILD_MODNAME);
+	return rc;
+}
+
+static void __devexit ntb_pci_remove(struct pci_dev *pdev)
+{
+	struct ntb_device *ndev = pci_get_drvdata(pdev);
+	int i;
+	u32 ntb_cntl;
+
+	/* Bring NTB link down */
+	ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
+	ntb_cntl |= NTB_LINK_DISABLE;
+	writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
+
+	ntb_transport_free(ndev->ntb_transport);
+
+	ntb_free_interrupts(ndev);
+	ntb_free_callbacks(ndev);
+	ntb_device_free(ndev);
+
+	for (i = 0; i < NTB_NUM_MW; i++)
+		iounmap(ndev->mw[i].vbase);
+
+	iounmap(ndev->reg_base);
+	pci_release_selected_regions(pdev, NTB_BAR_MASK);
+	pci_disable_device(pdev);
+	kfree(ndev);
+}
+
+static struct pci_driver ntb_pci_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = ntb_pci_tbl,
+	.probe = ntb_pci_probe,
+	.remove = __devexit_p(ntb_pci_remove),
+};
+module_pci_driver(ntb_pci_driver);
diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h
new file mode 100644
index 000000000000..5e009519c64f
--- /dev/null
+++ b/drivers/ntb/ntb_hw.h
@@ -0,0 +1,181 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Jon Mason <jon.mason@intel.com>
+ */
+
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_JSF		0x3725
+#define PCI_DEVICE_ID_INTEL_NTB_CLASSIC_JSF	0x3726
+#define PCI_DEVICE_ID_INTEL_NTB_RP_JSF		0x3727
+#define PCI_DEVICE_ID_INTEL_NTB_RP_SNB		0x3C08
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_SNB		0x3C0D
+#define PCI_DEVICE_ID_INTEL_NTB_CLASSIC_SNB	0x3C0E
+#define PCI_DEVICE_ID_INTEL_NTB_2ND_SNB		0x3C0F
+#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD		0x0C4E
+
+#define msix_table_size(control)	((control & PCI_MSIX_FLAGS_QSIZE)+1)
+
+#define NTB_BAR_MMIO		0
+#define NTB_BAR_23		2
+#define NTB_BAR_45		4
+#define NTB_BAR_MASK		((1 << NTB_BAR_MMIO) | (1 << NTB_BAR_23) |\
+				 (1 << NTB_BAR_45))
+
+#define NTB_LINK_DOWN		0
+#define NTB_LINK_UP		1
+
+#define NTB_HB_TIMEOUT		msecs_to_jiffies(1000)
+
+#define NTB_NUM_MW		2
+
+enum ntb_hw_event {
+	NTB_EVENT_SW_EVENT0 = 0,
+	NTB_EVENT_SW_EVENT1,
+	NTB_EVENT_SW_EVENT2,
+	NTB_EVENT_HW_ERROR,
+	NTB_EVENT_HW_LINK_UP,
+	NTB_EVENT_HW_LINK_DOWN,
+};
+
+struct ntb_mw {
+	dma_addr_t phys_addr;
+	void __iomem *vbase;
+	resource_size_t bar_sz;
+};
+
+struct ntb_db_cb {
+	void (*callback) (void *data, int db_num);
+	unsigned int db_num;
+	void *data;
+	struct ntb_device *ndev;
+};
+
+struct ntb_device {
+	struct pci_dev *pdev;
+	struct msix_entry *msix_entries;
+	void __iomem *reg_base;
+	struct ntb_mw mw[NTB_NUM_MW];
+	struct {
+		unsigned int max_spads;
+		unsigned int max_db_bits;
+		unsigned int msix_cnt;
+	} limits;
+	struct {
+		void __iomem *pdb;
+		void __iomem *pdb_mask;
+		void __iomem *sdb;
+		void __iomem *sbar2_xlat;
+		void __iomem *sbar4_xlat;
+		void __iomem *spad_write;
+		void __iomem *spad_read;
+		void __iomem *lnk_cntl;
+		void __iomem *lnk_stat;
+		void __iomem *spci_cmd;
+	} reg_ofs;
+	struct ntb_transport *ntb_transport;
+	void (*event_cb)(void *handle, enum ntb_hw_event event);
+
+	struct ntb_db_cb *db_cb;
+	unsigned char hw_type;
+	unsigned char conn_type;
+	unsigned char dev_type;
+	unsigned char num_msix;
+	unsigned char bits_per_vector;
+	unsigned char max_cbs;
+	unsigned char link_status;
+	struct delayed_work hb_timer;
+	unsigned long last_ts;
+};
+
+/**
+ * ntb_hw_link_status() - return the hardware link status
+ * @ndev: pointer to ntb_device instance
+ *
+ * Returns true if the hardware is connected to the remote system
+ *
+ * RETURNS: true or false based on the hardware link state
+ */
+static inline bool ntb_hw_link_status(struct ntb_device *ndev)
+{
+	return ndev->link_status == NTB_LINK_UP;
+}
+
+/**
+ * ntb_query_pdev() - return the pci_dev pointer
+ * @ndev: pointer to ntb_device instance
+ *
+ * Given the ntb pointer return the pci_dev pointerfor the NTB hardware device
+ *
+ * RETURNS: a pointer to the ntb pci_dev
+ */
+static inline struct pci_dev *ntb_query_pdev(struct ntb_device *ndev)
+{
+	return ndev->pdev;
+}
+
+struct ntb_device *ntb_register_transport(struct pci_dev *pdev,
+					  void *transport);
+void ntb_unregister_transport(struct ntb_device *ndev);
+void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr);
+int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx,
+			     void *data, void (*db_cb_func) (void *data,
+							     int db_num));
+void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx);
+int ntb_register_event_callback(struct ntb_device *ndev,
+				void (*event_cb_func) (void *handle,
+						       unsigned int event));
+void ntb_unregister_event_callback(struct ntb_device *ndev);
+int ntb_get_max_spads(struct ntb_device *ndev);
+int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val);
+int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val);
+int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val);
+int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val);
+void *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw);
+resource_size_t ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw);
+void ntb_ring_sdb(struct ntb_device *ndev, unsigned int idx);
+void *ntb_find_transport(struct pci_dev *pdev);
+
+int ntb_transport_init(struct pci_dev *pdev);
+void ntb_transport_free(void *transport);
diff --git a/drivers/ntb/ntb_regs.h b/drivers/ntb/ntb_regs.h
new file mode 100644
index 000000000000..5bfa8c06c059
--- /dev/null
+++ b/drivers/ntb/ntb_regs.h
@@ -0,0 +1,139 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Jon Mason <jon.mason@intel.com>
+ */
+
+#define NTB_LINK_ENABLE		0x0000
+#define NTB_LINK_DISABLE	0x0002
+#define NTB_LINK_STATUS_ACTIVE	0x2000
+#define NTB_LINK_SPEED_MASK	0x000f
+#define NTB_LINK_WIDTH_MASK	0x03f0
+
+#define SNB_MSIX_CNT		4
+#define SNB_MAX_SPADS		16
+#define SNB_MAX_COMPAT_SPADS	8
+/* Reserve the uppermost bit for link interrupt */
+#define SNB_MAX_DB_BITS		15
+#define SNB_DB_BITS_PER_VEC	5
+
+#define SNB_DB_HW_LINK		0x8000
+
+#define SNB_PCICMD_OFFSET	0x0504
+#define SNB_DEVCTRL_OFFSET	0x0598
+#define SNB_LINK_STATUS_OFFSET	0x01A2
+
+#define SNB_PBAR2LMT_OFFSET	0x0000
+#define SNB_PBAR4LMT_OFFSET	0x0008
+#define SNB_PBAR2XLAT_OFFSET	0x0010
+#define SNB_PBAR4XLAT_OFFSET	0x0018
+#define SNB_SBAR2LMT_OFFSET	0x0020
+#define SNB_SBAR4LMT_OFFSET	0x0028
+#define SNB_SBAR2XLAT_OFFSET	0x0030
+#define SNB_SBAR4XLAT_OFFSET	0x0038
+#define SNB_SBAR0BASE_OFFSET	0x0040
+#define SNB_SBAR2BASE_OFFSET	0x0048
+#define SNB_SBAR4BASE_OFFSET	0x0050
+#define SNB_NTBCNTL_OFFSET	0x0058
+#define SNB_SBDF_OFFSET		0x005C
+#define SNB_PDOORBELL_OFFSET	0x0060
+#define SNB_PDBMSK_OFFSET	0x0062
+#define SNB_SDOORBELL_OFFSET	0x0064
+#define SNB_SDBMSK_OFFSET	0x0066
+#define SNB_USMEMMISS		0x0070
+#define SNB_SPAD_OFFSET		0x0080
+#define SNB_SPADSEMA4_OFFSET	0x00c0
+#define SNB_WCCNTRL_OFFSET	0x00e0
+#define SNB_B2B_SPAD_OFFSET	0x0100
+#define SNB_B2B_DOORBELL_OFFSET	0x0140
+#define SNB_B2B_XLAT_OFFSET	0x0144
+
+#define BWD_MSIX_CNT		34
+#define BWD_MAX_SPADS		16
+#define BWD_MAX_COMPAT_SPADS	16
+#define BWD_MAX_DB_BITS		34
+#define BWD_DB_BITS_PER_VEC	1
+
+#define BWD_PCICMD_OFFSET	0xb004
+#define BWD_MBAR23_OFFSET	0xb018
+#define BWD_MBAR45_OFFSET	0xb020
+#define BWD_DEVCTRL_OFFSET	0xb048
+#define BWD_LINK_STATUS_OFFSET	0xb052
+
+#define BWD_SBAR2XLAT_OFFSET	0x0008
+#define BWD_SBAR4XLAT_OFFSET	0x0010
+#define BWD_PDOORBELL_OFFSET	0x0020
+#define BWD_PDBMSK_OFFSET	0x0028
+#define BWD_NTBCNTL_OFFSET	0x0060
+#define BWD_EBDF_OFFSET		0x0064
+#define BWD_SPAD_OFFSET		0x0080
+#define BWD_SPADSEMA_OFFSET	0x00c0
+#define BWD_STKYSPAD_OFFSET	0x00c4
+#define BWD_PBAR2XLAT_OFFSET	0x8008
+#define BWD_PBAR4XLAT_OFFSET	0x8010
+#define BWD_B2B_DOORBELL_OFFSET	0x8020
+#define BWD_B2B_SPAD_OFFSET	0x8080
+#define BWD_B2B_SPADSEMA_OFFSET	0x80c0
+#define BWD_B2B_STKYSPAD_OFFSET	0x80c4
+
+#define NTB_CNTL_BAR23_SNOOP	(1 << 2)
+#define NTB_CNTL_BAR45_SNOOP	(1 << 6)
+#define BWD_CNTL_LINK_DOWN	(1 << 16)
+
+#define NTB_PPD_OFFSET		0x00D4
+#define SNB_PPD_CONN_TYPE	0x0003
+#define SNB_PPD_DEV_TYPE	0x0010
+#define BWD_PPD_INIT_LINK	0x0008
+#define BWD_PPD_CONN_TYPE	0x0300
+#define BWD_PPD_DEV_TYPE	0x1000
+
+#define BWD_PBAR2XLAT_USD_ADDR	0x0000004000000000
+#define BWD_PBAR4XLAT_USD_ADDR	0x0000008000000000
+#define BWD_MBAR23_USD_ADDR	0x000000410000000C
+#define BWD_MBAR45_USD_ADDR	0x000000810000000C
+#define BWD_PBAR2XLAT_DSD_ADDR	0x0000004100000000
+#define BWD_PBAR4XLAT_DSD_ADDR	0x0000008100000000
+#define BWD_MBAR23_DSD_ADDR	0x000000400000000C
+#define BWD_MBAR45_DSD_ADDR	0x000000800000000C
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
new file mode 100644
index 000000000000..c907e0773532
--- /dev/null
+++ b/drivers/ntb/ntb_transport.c
@@ -0,0 +1,1427 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Jon Mason <jon.mason@intel.com>
+ */
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/ntb.h>
+#include "ntb_hw.h"
+
+#define NTB_TRANSPORT_VERSION	1
+
+static int transport_mtu = 0x401E;
+module_param(transport_mtu, uint, 0644);
+MODULE_PARM_DESC(transport_mtu, "Maximum size of NTB transport packets");
+
+static unsigned char max_num_clients = 2;
+module_param(max_num_clients, byte, 0644);
+MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients");
+
+struct ntb_queue_entry {
+	/* ntb_queue list reference */
+	struct list_head entry;
+	/* pointers to data to be transfered */
+	void *cb_data;
+	void *buf;
+	unsigned int len;
+	unsigned int flags;
+};
+
+struct ntb_transport_qp {
+	struct ntb_transport *transport;
+	struct ntb_device *ndev;
+	void *cb_data;
+
+	bool client_ready;
+	bool qp_link;
+	u8 qp_num;	/* Only 64 QP's are allowed.  0-63 */
+
+	void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data,
+			    void *data, int len);
+	struct list_head tx_free_q;
+	spinlock_t ntb_tx_free_q_lock;
+	void *tx_mw_begin;
+	void *tx_mw_end;
+	void *tx_offset;
+
+	void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data,
+			    void *data, int len);
+	struct tasklet_struct rx_work;
+	struct list_head rx_pend_q;
+	struct list_head rx_free_q;
+	spinlock_t ntb_rx_pend_q_lock;
+	spinlock_t ntb_rx_free_q_lock;
+	void *rx_buff_begin;
+	void *rx_buff_end;
+	void *rx_offset;
+
+	void (*event_handler) (void *data, int status);
+	struct delayed_work link_work;
+
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_stats;
+
+	/* Stats */
+	u64 rx_bytes;
+	u64 rx_pkts;
+	u64 rx_ring_empty;
+	u64 rx_err_no_buf;
+	u64 rx_err_oflow;
+	u64 rx_err_ver;
+	u64 tx_bytes;
+	u64 tx_pkts;
+	u64 tx_ring_full;
+};
+
+struct ntb_transport_mw {
+	size_t size;
+	void *virt_addr;
+	dma_addr_t dma_addr;
+};
+
+struct ntb_transport_client_dev {
+	struct list_head entry;
+	struct device dev;
+};
+
+struct ntb_transport {
+	struct list_head entry;
+	struct list_head client_devs;
+
+	struct ntb_device *ndev;
+	struct ntb_transport_mw mw[NTB_NUM_MW];
+	struct ntb_transport_qp *qps;
+	unsigned int max_qps;
+	unsigned long qp_bitmap;
+	bool transport_link;
+	struct delayed_work link_work;
+	struct dentry *debugfs_dir;
+};
+
+enum {
+	DESC_DONE_FLAG = 1 << 0,
+	LINK_DOWN_FLAG = 1 << 1,
+};
+
+struct ntb_payload_header {
+	u64 ver;
+	unsigned int len;
+	unsigned int flags;
+};
+
+enum {
+	VERSION = 0,
+	MW0_SZ,
+	MW1_SZ,
+	NUM_QPS,
+	QP_LINKS,
+	MAX_SPAD,
+};
+
+#define QP_TO_MW(qp)		((qp) % NTB_NUM_MW)
+#define NTB_QP_DEF_NUM_ENTRIES	100
+#define NTB_LINK_DOWN_TIMEOUT	10
+
+static int ntb_match_bus(struct device *dev, struct device_driver *drv)
+{
+	return !strncmp(dev_name(dev), drv->name, strlen(drv->name));
+}
+
+static int ntb_client_probe(struct device *dev)
+{
+	const struct ntb_client *drv = container_of(dev->driver,
+						    struct ntb_client, driver);
+	struct pci_dev *pdev = container_of(dev->parent, struct pci_dev, dev);
+	int rc = -EINVAL;
+
+	get_device(dev);
+	if (drv && drv->probe)
+		rc = drv->probe(pdev);
+	if (rc)
+		put_device(dev);
+
+	return rc;
+}
+
+static int ntb_client_remove(struct device *dev)
+{
+	const struct ntb_client *drv = container_of(dev->driver,
+						    struct ntb_client, driver);
+	struct pci_dev *pdev = container_of(dev->parent, struct pci_dev, dev);
+
+	if (drv && drv->remove)
+		drv->remove(pdev);
+
+	put_device(dev);
+
+	return 0;
+}
+
+struct bus_type ntb_bus_type = {
+	.name = "ntb_bus",
+	.match = ntb_match_bus,
+	.probe = ntb_client_probe,
+	.remove = ntb_client_remove,
+};
+
+static LIST_HEAD(ntb_transport_list);
+
+static int __devinit ntb_bus_init(struct ntb_transport *nt)
+{
+	if (list_empty(&ntb_transport_list)) {
+		int rc = bus_register(&ntb_bus_type);
+		if (rc)
+			return rc;
+	}
+
+	list_add(&nt->entry, &ntb_transport_list);
+
+	return 0;
+}
+
+static void __devexit ntb_bus_remove(struct ntb_transport *nt)
+{
+	struct ntb_transport_client_dev *client_dev, *cd;
+
+	list_for_each_entry_safe(client_dev, cd, &nt->client_devs, entry) {
+		dev_err(client_dev->dev.parent, "%s still attached to bus, removing\n",
+			dev_name(&client_dev->dev));
+		list_del(&client_dev->entry);
+		device_unregister(&client_dev->dev);
+	}
+
+	list_del(&nt->entry);
+
+	if (list_empty(&ntb_transport_list))
+		bus_unregister(&ntb_bus_type);
+}
+
+static void ntb_client_release(struct device *dev)
+{
+	struct ntb_transport_client_dev *client_dev;
+	client_dev = container_of(dev, struct ntb_transport_client_dev, dev);
+
+	kfree(client_dev);
+}
+
+/**
+ * ntb_unregister_client_dev - Unregister NTB client device
+ * @device_name: Name of NTB client device
+ *
+ * Unregister an NTB client device with the NTB transport layer
+ */
+void ntb_unregister_client_dev(char *device_name)
+{
+	struct ntb_transport_client_dev *client, *cd;
+	struct ntb_transport *nt;
+
+	list_for_each_entry(nt, &ntb_transport_list, entry)
+		list_for_each_entry_safe(client, cd, &nt->client_devs, entry)
+			if (!strncmp(dev_name(&client->dev), device_name,
+				     strlen(device_name))) {
+				list_del(&client->entry);
+				device_unregister(&client->dev);
+			}
+}
+EXPORT_SYMBOL_GPL(ntb_unregister_client_dev);
+
+/**
+ * ntb_register_client_dev - Register NTB client device
+ * @device_name: Name of NTB client device
+ *
+ * Register an NTB client device with the NTB transport layer
+ */
+int ntb_register_client_dev(char *device_name)
+{
+	struct ntb_transport_client_dev *client_dev;
+	struct ntb_transport *nt;
+	int rc;
+
+	list_for_each_entry(nt, &ntb_transport_list, entry) {
+		struct device *dev;
+
+		client_dev = kzalloc(sizeof(struct ntb_transport_client_dev),
+				     GFP_KERNEL);
+		if (!client_dev) {
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		dev = &client_dev->dev;
+
+		/* setup and register client devices */
+		dev_set_name(dev, "%s", device_name);
+		dev->bus = &ntb_bus_type;
+		dev->release = ntb_client_release;
+		dev->parent = &ntb_query_pdev(nt->ndev)->dev;
+
+		rc = device_register(dev);
+		if (rc) {
+			kfree(client_dev);
+			goto err;
+		}
+
+		list_add_tail(&client_dev->entry, &nt->client_devs);
+	}
+
+	return 0;
+
+err:
+	ntb_unregister_client_dev(device_name);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ntb_register_client_dev);
+
+/**
+ * ntb_register_client - Register NTB client driver
+ * @drv: NTB client driver to be registered
+ *
+ * Register an NTB client driver with the NTB transport layer
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_register_client(struct ntb_client *drv)
+{
+	drv->driver.bus = &ntb_bus_type;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(ntb_register_client);
+
+/**
+ * ntb_unregister_client - Unregister NTB client driver
+ * @drv: NTB client driver to be unregistered
+ *
+ * Unregister an NTB client driver with the NTB transport layer
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+void ntb_unregister_client(struct ntb_client *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(ntb_unregister_client);
+
+static int debugfs_open(struct inode *inode, struct file *filp)
+{
+	filp->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
+			    loff_t *offp)
+{
+	struct ntb_transport_qp *qp;
+	char buf[1024];
+	ssize_t ret, out_offset, out_count;
+
+	out_count = 1024;
+
+	qp = filp->private_data;
+	out_offset = 0;
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "NTB QP stats\n");
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_bytes - \t%llu\n", qp->rx_bytes);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_pkts - \t%llu\n", qp->rx_pkts);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_ring_empty - %llu\n", qp->rx_ring_empty);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_err_no_buf - %llu\n", qp->rx_err_no_buf);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_err_oflow - \t%llu\n", qp->rx_err_oflow);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_err_ver - \t%llu\n", qp->rx_err_ver);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_buff_begin - %p\n", qp->rx_buff_begin);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_offset - \t%p\n", qp->rx_offset);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "rx_buff_end - \t%p\n", qp->rx_buff_end);
+
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_bytes - \t%llu\n", qp->tx_bytes);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_pkts - \t%llu\n", qp->tx_pkts);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_ring_full - \t%llu\n", qp->tx_ring_full);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_mw_begin - \t%p\n", qp->tx_mw_begin);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_offset - \t%p\n", qp->tx_offset);
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "tx_mw_end - \t%p\n", qp->tx_mw_end);
+
+	out_offset += snprintf(buf + out_offset, out_count - out_offset,
+			       "QP Link %s\n", (qp->qp_link == NTB_LINK_UP) ?
+			       "Up" : "Down");
+
+	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+	return ret;
+}
+
+static const struct file_operations ntb_qp_debugfs_stats = {
+	.owner = THIS_MODULE,
+	.open = debugfs_open,
+	.read = debugfs_read,
+};
+
+static void ntb_list_add(spinlock_t *lock, struct list_head *entry,
+			 struct list_head *list)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+	list_add_tail(entry, list);
+	spin_unlock_irqrestore(lock, flags);
+}
+
+static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock,
+						struct list_head *list)
+{
+	struct ntb_queue_entry *entry;
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+	if (list_empty(list)) {
+		entry = NULL;
+		goto out;
+	}
+	entry = list_first_entry(list, struct ntb_queue_entry, entry);
+	list_del(&entry->entry);
+out:
+	spin_unlock_irqrestore(lock, flags);
+
+	return entry;
+}
+
+static void ntb_transport_setup_qp_mw(struct ntb_transport *nt,
+				      unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qps[qp_num];
+	unsigned int size, num_qps_mw;
+	u8 mw_num = QP_TO_MW(qp_num);
+
+	WARN_ON(nt->mw[mw_num].virt_addr == 0);
+
+	if (nt->max_qps % NTB_NUM_MW && !mw_num)
+		num_qps_mw = nt->max_qps / NTB_NUM_MW +
+			     (nt->max_qps % NTB_NUM_MW - mw_num);
+	else
+		num_qps_mw = nt->max_qps / NTB_NUM_MW;
+
+	size = nt->mw[mw_num].size / num_qps_mw;
+
+	qp->rx_buff_begin = nt->mw[mw_num].virt_addr +
+			    (qp_num / NTB_NUM_MW * size);
+	qp->rx_buff_end = qp->rx_buff_begin + size;
+	qp->rx_offset = qp->rx_buff_begin;
+
+	qp->tx_mw_begin = ntb_get_mw_vbase(nt->ndev, mw_num) +
+			  (qp_num / NTB_NUM_MW * size);
+	qp->tx_mw_end = qp->tx_mw_begin + size;
+	qp->tx_offset = qp->tx_mw_begin;
+
+	qp->rx_pkts = 0;
+	qp->tx_pkts = 0;
+}
+
+static int ntb_set_mw(struct ntb_transport *nt, int num_mw, unsigned int size)
+{
+	struct ntb_transport_mw *mw = &nt->mw[num_mw];
+	struct pci_dev *pdev = ntb_query_pdev(nt->ndev);
+	void *offset;
+
+	/* Alloc memory for receiving data.  Must be 4k aligned */
+	mw->size = ALIGN(size, 4096);
+
+	mw->virt_addr = dma_alloc_coherent(&pdev->dev, mw->size, &mw->dma_addr,
+					   GFP_KERNEL);
+	if (!mw->virt_addr) {
+		dev_err(&pdev->dev, "Unable to allocate MW buffer of size %d\n",
+		       (int) mw->size);
+		return -ENOMEM;
+	}
+
+	/* setup the hdr offsets with 0's */
+	for (offset = mw->virt_addr + transport_mtu -
+		      sizeof(struct ntb_payload_header);
+	     offset < mw->virt_addr + size; offset += transport_mtu)
+		memset(offset, 0, sizeof(struct ntb_payload_header));
+
+	/* Notify HW the memory location of the receive buffer */
+	ntb_set_mw_addr(nt->ndev, num_mw, mw->dma_addr);
+
+	return 0;
+}
+
+static void ntb_qp_link_down(struct ntb_transport_qp *qp)
+{
+	struct ntb_transport *nt = qp->transport;
+	struct pci_dev *pdev = ntb_query_pdev(nt->ndev);
+
+	if (qp->qp_link == NTB_LINK_DOWN) {
+		cancel_delayed_work_sync(&qp->link_work);
+		return;
+	}
+
+	if (qp->event_handler)
+		qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
+
+	dev_info(&pdev->dev, "qp %d: Link Down\n", qp->qp_num);
+	qp->qp_link = NTB_LINK_DOWN;
+
+	if (nt->transport_link == NTB_LINK_UP)
+		schedule_delayed_work(&qp->link_work,
+				      msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
+}
+
+static void ntb_transport_conn_down(struct ntb_transport *nt)
+{
+	int i;
+
+	if (nt->transport_link == NTB_LINK_DOWN)
+		cancel_delayed_work_sync(&nt->link_work);
+	else
+		nt->transport_link = NTB_LINK_DOWN;
+
+	/* Pass along the info to any clients */
+	for (i = 0; i < nt->max_qps; i++)
+		if (!test_bit(i, &nt->qp_bitmap))
+			ntb_qp_link_down(&nt->qps[i]);
+
+	/* The scratchpad registers keep the values if the remote side
+	 * goes down, blast them now to give them a sane value the next
+	 * time they are accessed
+	 */
+	for (i = 0; i < MAX_SPAD; i++)
+		ntb_write_local_spad(nt->ndev, i, 0);
+}
+
+static void ntb_transport_event_callback(void *data, enum ntb_hw_event event)
+{
+	struct ntb_transport *nt = data;
+
+	switch (event) {
+	case NTB_EVENT_HW_LINK_UP:
+		schedule_delayed_work(&nt->link_work, 0);
+		break;
+	case NTB_EVENT_HW_LINK_DOWN:
+		ntb_transport_conn_down(nt);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void ntb_transport_link_work(struct work_struct *work)
+{
+	struct ntb_transport *nt = container_of(work, struct ntb_transport,
+						link_work.work);
+	struct ntb_device *ndev = nt->ndev;
+	struct pci_dev *pdev = ntb_query_pdev(ndev);
+	u32 val;
+	int rc, i;
+
+	/* send the local info */
+	rc = ntb_write_remote_spad(ndev, VERSION, NTB_TRANSPORT_VERSION);
+	if (rc) {
+		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
+			0, VERSION);
+		goto out;
+	}
+
+	rc = ntb_write_remote_spad(ndev, MW0_SZ, ntb_get_mw_size(ndev, 0));
+	if (rc) {
+		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
+			(u32) ntb_get_mw_size(ndev, 0), MW0_SZ);
+		goto out;
+	}
+
+	rc = ntb_write_remote_spad(ndev, MW1_SZ, ntb_get_mw_size(ndev, 1));
+	if (rc) {
+		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
+			(u32) ntb_get_mw_size(ndev, 1), MW1_SZ);
+		goto out;
+	}
+
+	rc = ntb_write_remote_spad(ndev, NUM_QPS, nt->max_qps);
+	if (rc) {
+		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
+			nt->max_qps, NUM_QPS);
+		goto out;
+	}
+
+	rc = ntb_read_local_spad(nt->ndev, QP_LINKS, &val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error reading spad %d\n", QP_LINKS);
+		goto out;
+	}
+
+	rc = ntb_write_remote_spad(ndev, QP_LINKS, val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
+			val, QP_LINKS);
+		goto out;
+	}
+
+	/* Query the remote side for its info */
+	rc = ntb_read_remote_spad(ndev, VERSION, &val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error reading remote spad %d\n", VERSION);
+		goto out;
+	}
+
+	if (val != NTB_TRANSPORT_VERSION)
+		goto out;
+	dev_dbg(&pdev->dev, "Remote version = %d\n", val);
+
+	rc = ntb_read_remote_spad(ndev, NUM_QPS, &val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_QPS);
+		goto out;
+	}
+
+	if (val != nt->max_qps)
+		goto out;
+	dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val);
+
+	rc = ntb_read_remote_spad(ndev, MW0_SZ, &val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error reading remote spad %d\n", MW0_SZ);
+		goto out;
+	}
+
+	if (!val)
+		goto out;
+	dev_dbg(&pdev->dev, "Remote MW0 size = %d\n", val);
+
+	rc = ntb_set_mw(nt, 0, val);
+	if (rc)
+		goto out;
+
+	rc = ntb_read_remote_spad(ndev, MW1_SZ, &val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error reading remote spad %d\n", MW1_SZ);
+		goto out;
+	}
+
+	if (!val)
+		goto out;
+	dev_dbg(&pdev->dev, "Remote MW1 size = %d\n", val);
+
+	rc = ntb_set_mw(nt, 1, val);
+	if (rc)
+		goto out;
+
+	nt->transport_link = NTB_LINK_UP;
+
+	for (i = 0; i < nt->max_qps; i++) {
+		struct ntb_transport_qp *qp = &nt->qps[i];
+
+		ntb_transport_setup_qp_mw(nt, i);
+
+		if (qp->client_ready == NTB_LINK_UP)
+			schedule_delayed_work(&qp->link_work, 0);
+	}
+
+	return;
+
+out:
+	if (ntb_hw_link_status(ndev))
+		schedule_delayed_work(&nt->link_work,
+				      msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
+}
+
+static void ntb_qp_link_work(struct work_struct *work)
+{
+	struct ntb_transport_qp *qp = container_of(work,
+						   struct ntb_transport_qp,
+						   link_work.work);
+	struct pci_dev *pdev = ntb_query_pdev(qp->ndev);
+	struct ntb_transport *nt = qp->transport;
+	int rc, val;
+
+	WARN_ON(nt->transport_link != NTB_LINK_UP);
+
+	rc = ntb_read_local_spad(nt->ndev, QP_LINKS, &val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error reading spad %d\n", QP_LINKS);
+		return;
+	}
+
+	rc = ntb_write_remote_spad(nt->ndev, QP_LINKS, val | 1 << qp->qp_num);
+	if (rc)
+		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
+			val | 1 << qp->qp_num, QP_LINKS);
+
+	/* query remote spad for qp ready bits */
+	rc = ntb_read_remote_spad(nt->ndev, QP_LINKS, &val);
+	if (rc)
+		dev_err(&pdev->dev, "Error reading remote spad %d\n", QP_LINKS);
+
+	dev_dbg(&pdev->dev, "Remote QP link status = %x\n", val);
+
+	/* See if the remote side is up */
+	if (1 << qp->qp_num & val) {
+		qp->qp_link = NTB_LINK_UP;
+
+		dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num);
+		if (qp->event_handler)
+			qp->event_handler(qp->cb_data, NTB_LINK_UP);
+	} else if (nt->transport_link == NTB_LINK_UP)
+		schedule_delayed_work(&qp->link_work,
+				      msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
+}
+
+static void ntb_transport_init_queue(struct ntb_transport *nt,
+				     unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp;
+
+	qp = &nt->qps[qp_num];
+	qp->qp_num = qp_num;
+	qp->transport = nt;
+	qp->ndev = nt->ndev;
+	qp->qp_link = NTB_LINK_DOWN;
+	qp->client_ready = NTB_LINK_DOWN;
+	qp->event_handler = NULL;
+
+	if (nt->debugfs_dir) {
+		char debugfs_name[4];
+
+		snprintf(debugfs_name, 4, "qp%d", qp_num);
+		qp->debugfs_dir = debugfs_create_dir(debugfs_name,
+						     nt->debugfs_dir);
+
+		qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR,
+							qp->debugfs_dir, qp,
+							&ntb_qp_debugfs_stats);
+	}
+
+	INIT_DELAYED_WORK(&qp->link_work, ntb_qp_link_work);
+
+	spin_lock_init(&qp->ntb_rx_pend_q_lock);
+	spin_lock_init(&qp->ntb_rx_free_q_lock);
+	spin_lock_init(&qp->ntb_tx_free_q_lock);
+
+	INIT_LIST_HEAD(&qp->rx_pend_q);
+	INIT_LIST_HEAD(&qp->rx_free_q);
+	INIT_LIST_HEAD(&qp->tx_free_q);
+}
+
+int ntb_transport_init(struct pci_dev *pdev)
+{
+	struct ntb_transport *nt;
+	int rc, i;
+
+	nt = kzalloc(sizeof(struct ntb_transport), GFP_KERNEL);
+	if (!nt)
+		return -ENOMEM;
+
+	if (debugfs_initialized())
+		nt->debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	else
+		nt->debugfs_dir = NULL;
+
+	nt->ndev = ntb_register_transport(pdev, nt);
+	if (!nt->ndev) {
+		rc = -EIO;
+		goto err;
+	}
+
+	nt->max_qps = min(nt->ndev->max_cbs, max_num_clients);
+
+	nt->qps = kcalloc(nt->max_qps, sizeof(struct ntb_transport_qp),
+			  GFP_KERNEL);
+	if (!nt->qps) {
+		rc = -ENOMEM;
+		goto err1;
+	}
+
+	nt->qp_bitmap = ((u64) 1 << nt->max_qps) - 1;
+
+	for (i = 0; i < nt->max_qps; i++)
+		ntb_transport_init_queue(nt, i);
+
+	INIT_DELAYED_WORK(&nt->link_work, ntb_transport_link_work);
+
+	rc = ntb_register_event_callback(nt->ndev,
+					 ntb_transport_event_callback);
+	if (rc)
+		goto err2;
+
+	INIT_LIST_HEAD(&nt->client_devs);
+	rc = ntb_bus_init(nt);
+	if (rc)
+		goto err3;
+
+	if (ntb_hw_link_status(nt->ndev))
+		schedule_delayed_work(&nt->link_work, 0);
+
+	return 0;
+
+err3:
+	ntb_unregister_event_callback(nt->ndev);
+err2:
+	kfree(nt->qps);
+err1:
+	ntb_unregister_transport(nt->ndev);
+err:
+	debugfs_remove_recursive(nt->debugfs_dir);
+	kfree(nt);
+	return rc;
+}
+
+void ntb_transport_free(void *transport)
+{
+	struct ntb_transport *nt = transport;
+	struct pci_dev *pdev;
+	int i;
+
+	nt->transport_link = NTB_LINK_DOWN;
+
+	/* verify that all the qp's are freed */
+	for (i = 0; i < nt->max_qps; i++)
+		if (!test_bit(i, &nt->qp_bitmap))
+			ntb_transport_free_queue(&nt->qps[i]);
+
+	ntb_bus_remove(nt);
+
+	cancel_delayed_work_sync(&nt->link_work);
+
+	debugfs_remove_recursive(nt->debugfs_dir);
+
+	ntb_unregister_event_callback(nt->ndev);
+
+	pdev = ntb_query_pdev(nt->ndev);
+
+	for (i = 0; i < NTB_NUM_MW; i++)
+		if (nt->mw[i].virt_addr)
+			dma_free_coherent(&pdev->dev, nt->mw[i].size,
+					  nt->mw[i].virt_addr,
+					  nt->mw[i].dma_addr);
+
+	kfree(nt->qps);
+	ntb_unregister_transport(nt->ndev);
+	kfree(nt);
+}
+
+static void ntb_rx_copy_task(struct ntb_transport_qp *qp,
+			     struct ntb_queue_entry *entry, void *offset)
+{
+
+	struct ntb_payload_header *hdr;
+
+	BUG_ON(offset < qp->rx_buff_begin ||
+	       offset + transport_mtu >= qp->rx_buff_end);
+
+	hdr = offset + transport_mtu - sizeof(struct ntb_payload_header);
+	entry->len = hdr->len;
+
+	memcpy(entry->buf, offset, entry->len);
+
+	/* Ensure that the data is fully copied out before clearing the flag */
+	wmb();
+	hdr->flags = 0;
+
+	if (qp->rx_handler && qp->client_ready == NTB_LINK_UP)
+		qp->rx_handler(qp, qp->cb_data, entry->cb_data, entry->len);
+
+	ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q);
+}
+
+static int ntb_process_rxc(struct ntb_transport_qp *qp)
+{
+	struct ntb_payload_header *hdr;
+	struct ntb_queue_entry *entry;
+	void *offset;
+
+	entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q);
+	if (!entry) {
+		hdr = offset + transport_mtu -
+		      sizeof(struct ntb_payload_header);
+		dev_dbg(&ntb_query_pdev(qp->ndev)->dev,
+			"no buffer - HDR ver %llu, len %d, flags %x\n",
+			hdr->ver, hdr->len, hdr->flags);
+		qp->rx_err_no_buf++;
+		return -ENOMEM;
+	}
+
+	offset = qp->rx_offset;
+	hdr = offset + transport_mtu - sizeof(struct ntb_payload_header);
+
+	if (!(hdr->flags & DESC_DONE_FLAG)) {
+		ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry,
+				  &qp->rx_pend_q);
+		qp->rx_ring_empty++;
+		return -EAGAIN;
+	}
+
+	if (hdr->ver != qp->rx_pkts) {
+		dev_dbg(&ntb_query_pdev(qp->ndev)->dev,
+			"qp %d: version mismatch, expected %llu - got %llu\n",
+			qp->qp_num, qp->rx_pkts, hdr->ver);
+		ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry,
+				  &qp->rx_pend_q);
+		qp->rx_err_ver++;
+		return -EIO;
+	}
+
+	if (hdr->flags & LINK_DOWN_FLAG) {
+		ntb_qp_link_down(qp);
+
+		ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry,
+				  &qp->rx_pend_q);
+
+		/* Ensure that the data is fully copied out before clearing the
+		 * done flag
+		 */
+		wmb();
+		hdr->flags = 0;
+		goto out;
+	}
+
+	dev_dbg(&ntb_query_pdev(qp->ndev)->dev,
+		"rx offset %p, ver %llu - %d payload received, buf size %d\n",
+		qp->rx_offset, hdr->ver, hdr->len, entry->len);
+
+	if (hdr->len <= entry->len)
+		ntb_rx_copy_task(qp, entry, offset);
+	else {
+		ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry,
+				  &qp->rx_pend_q);
+
+		/* Ensure that the data is fully copied out before clearing the
+		 * done flag
+		 */
+		wmb();
+		hdr->flags = 0;
+		qp->rx_err_oflow++;
+		dev_dbg(&ntb_query_pdev(qp->ndev)->dev,
+			"RX overflow! Wanted %d got %d\n",
+			hdr->len, entry->len);
+	}
+
+	qp->rx_bytes += hdr->len;
+	qp->rx_pkts++;
+
+out:
+	qp->rx_offset += transport_mtu;
+	if (qp->rx_offset + transport_mtu >= qp->rx_buff_end)
+		qp->rx_offset = qp->rx_buff_begin;
+
+	return 0;
+}
+
+static void ntb_transport_rx(unsigned long data)
+{
+	struct ntb_transport_qp *qp = (struct ntb_transport_qp *)data;
+	int rc;
+
+	do {
+		rc = ntb_process_rxc(qp);
+	} while (!rc);
+}
+
+static void ntb_transport_rxc_db(void *data, int db_num)
+{
+	struct ntb_transport_qp *qp = data;
+
+	dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%s: doorbell %d received\n",
+		__func__, db_num);
+
+	tasklet_schedule(&qp->rx_work);
+}
+
+static void ntb_tx_copy_task(struct ntb_transport_qp *qp,
+			     struct ntb_queue_entry *entry,
+			     void *offset)
+{
+	struct ntb_payload_header *hdr;
+
+	BUG_ON(offset < qp->tx_mw_begin ||
+	       offset + transport_mtu >= qp->tx_mw_end);
+
+	memcpy_toio(offset, entry->buf, entry->len);
+
+	hdr = offset + transport_mtu - sizeof(struct ntb_payload_header);
+	hdr->len = entry->len;
+	hdr->ver = qp->tx_pkts;
+
+	/* Ensure that the data is fully copied out before setting the flag */
+	mmiowb();
+	hdr->flags = entry->flags | DESC_DONE_FLAG;
+
+	ntb_ring_sdb(qp->ndev, qp->qp_num);
+
+	/* The entry length can only be zero if the packet is intended to be a
+	 * "link down" or similar.  Since no payload is being sent in these
+	 * cases, there is nothing to add to the completion queue.
+	 */
+	if (entry->len > 0) {
+		qp->tx_bytes += entry->len;
+
+		if (qp->tx_handler)
+			qp->tx_handler(qp, qp->cb_data, entry->cb_data,
+				       entry->len);
+	}
+
+	ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry, &qp->tx_free_q);
+}
+
+static int ntb_process_tx(struct ntb_transport_qp *qp,
+			  struct ntb_queue_entry *entry)
+{
+	struct ntb_payload_header *hdr;
+	void *offset;
+
+	offset = qp->tx_offset;
+	hdr = offset + transport_mtu - sizeof(struct ntb_payload_header);
+
+	dev_dbg(&ntb_query_pdev(qp->ndev)->dev, "%lld - offset %p, tx %p, entry len %d flags %x buff %p\n",
+		 qp->tx_pkts, offset, qp->tx_offset, entry->len, entry->flags,
+		 entry->buf);
+	if (hdr->flags) {
+		qp->tx_ring_full++;
+		return -EAGAIN;
+	}
+
+	if (entry->len > transport_mtu - sizeof(struct ntb_payload_header)) {
+		if (qp->tx_handler)
+			qp->tx_handler(qp->cb_data, qp, NULL, -EIO);
+
+		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
+			     &qp->tx_free_q);
+		return 0;
+	}
+
+	ntb_tx_copy_task(qp, entry, offset);
+
+	qp->tx_offset += transport_mtu;
+	if (qp->tx_offset + transport_mtu >= qp->tx_mw_end)
+		qp->tx_offset = qp->tx_mw_begin;
+
+	qp->tx_pkts++;
+
+	return 0;
+}
+
+static void ntb_send_link_down(struct ntb_transport_qp *qp)
+{
+	struct pci_dev *pdev = ntb_query_pdev(qp->ndev);
+	struct ntb_queue_entry *entry;
+	int i, rc;
+
+	if (qp->qp_link == NTB_LINK_DOWN)
+		return;
+
+	qp->qp_link = NTB_LINK_DOWN;
+	dev_info(&pdev->dev, "qp %d: Link Down\n", qp->qp_num);
+
+	for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
+		entry = ntb_list_rm(&qp->ntb_tx_free_q_lock,
+					 &qp->tx_free_q);
+		if (entry)
+			break;
+		msleep(100);
+	}
+
+	if (!entry)
+		return;
+
+	entry->cb_data = NULL;
+	entry->buf = NULL;
+	entry->len = 0;
+	entry->flags = LINK_DOWN_FLAG;
+
+	rc = ntb_process_tx(qp, entry);
+	if (rc)
+		dev_err(&pdev->dev, "ntb: QP%d unable to send linkdown msg\n",
+			qp->qp_num);
+}
+
+/**
+ * ntb_transport_create_queue - Create a new NTB transport layer queue
+ * @rx_handler: receive callback function
+ * @tx_handler: transmit callback function
+ * @event_handler: event callback function
+ *
+ * Create a new NTB transport layer queue and provide the queue with a callback
+ * routine for both transmit and receive.  The receive callback routine will be
+ * used to pass up data when the transport has received it on the queue.   The
+ * transmit callback routine will be called when the transport has completed the
+ * transmission of the data on the queue and the data is ready to be freed.
+ *
+ * RETURNS: pointer to newly created ntb_queue, NULL on error.
+ */
+struct ntb_transport_qp *
+ntb_transport_create_queue(void *data, struct pci_dev *pdev,
+			   const struct ntb_queue_handlers *handlers)
+{
+	struct ntb_queue_entry *entry;
+	struct ntb_transport_qp *qp;
+	struct ntb_transport *nt;
+	unsigned int free_queue;
+	int rc, i;
+
+	nt = ntb_find_transport(pdev);
+	if (!nt)
+		goto err;
+
+	free_queue = ffs(nt->qp_bitmap);
+	if (!free_queue)
+		goto err;
+
+	/* decrement free_queue to make it zero based */
+	free_queue--;
+
+	clear_bit(free_queue, &nt->qp_bitmap);
+
+	qp = &nt->qps[free_queue];
+	qp->cb_data = data;
+	qp->rx_handler = handlers->rx_handler;
+	qp->tx_handler = handlers->tx_handler;
+	qp->event_handler = handlers->event_handler;
+
+	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+		entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC);
+		if (!entry)
+			goto err1;
+
+		ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry,
+				  &qp->rx_free_q);
+	}
+
+	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
+		entry = kzalloc(sizeof(struct ntb_queue_entry), GFP_ATOMIC);
+		if (!entry)
+			goto err2;
+
+		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
+				  &qp->tx_free_q);
+	}
+
+	tasklet_init(&qp->rx_work, ntb_transport_rx, (unsigned long) qp);
+
+	rc = ntb_register_db_callback(qp->ndev, free_queue, qp,
+				      ntb_transport_rxc_db);
+	if (rc)
+		goto err3;
+
+	dev_info(&pdev->dev, "NTB Transport QP %d created\n", qp->qp_num);
+
+	return qp;
+
+err3:
+	tasklet_disable(&qp->rx_work);
+err2:
+	while ((entry =
+		ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
+		kfree(entry);
+err1:
+	while ((entry =
+		ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q)))
+		kfree(entry);
+	set_bit(free_queue, &nt->qp_bitmap);
+err:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
+
+/**
+ * ntb_transport_free_queue - Frees NTB transport queue
+ * @qp: NTB queue to be freed
+ *
+ * Frees NTB transport queue
+ */
+void ntb_transport_free_queue(struct ntb_transport_qp *qp)
+{
+	struct pci_dev *pdev = ntb_query_pdev(qp->ndev);
+	struct ntb_queue_entry *entry;
+
+	if (!qp)
+		return;
+
+	cancel_delayed_work_sync(&qp->link_work);
+
+	ntb_unregister_db_callback(qp->ndev, qp->qp_num);
+	tasklet_disable(&qp->rx_work);
+
+	while ((entry =
+		ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q)))
+		kfree(entry);
+
+	while ((entry =
+		ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q))) {
+		dev_warn(&pdev->dev, "Freeing item from a non-empty queue\n");
+		kfree(entry);
+	}
+
+	while ((entry =
+		ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
+		kfree(entry);
+
+	set_bit(qp->qp_num, &qp->transport->qp_bitmap);
+
+	dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_free_queue);
+
+/**
+ * ntb_transport_rx_remove - Dequeues enqueued rx packet
+ * @qp: NTB queue to be freed
+ * @len: pointer to variable to write enqueued buffers length
+ *
+ * Dequeues unused buffers from receive queue.  Should only be used during
+ * shutdown of qp.
+ *
+ * RETURNS: NULL error value on error, or void* for success.
+ */
+void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len)
+{
+	struct ntb_queue_entry *entry;
+	void *buf;
+
+	if (!qp || qp->client_ready == NTB_LINK_UP)
+		return NULL;
+
+	entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q);
+	if (!entry)
+		return NULL;
+
+	buf = entry->cb_data;
+	*len = entry->len;
+
+	ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry,
+			  &qp->rx_free_q);
+
+	return buf;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_rx_remove);
+
+/**
+ * ntb_transport_rx_enqueue - Enqueue a new NTB queue entry
+ * @qp: NTB transport layer queue the entry is to be enqueued on
+ * @cb: per buffer pointer for callback function to use
+ * @data: pointer to data buffer that incoming packets will be copied into
+ * @len: length of the data buffer
+ *
+ * Enqueue a new receive buffer onto the transport queue into which a NTB
+ * payload can be received into.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len)
+{
+	struct ntb_queue_entry *entry;
+
+	if (!qp)
+		return -EINVAL;
+
+	entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->cb_data = cb;
+	entry->buf = data;
+	entry->len = len;
+
+	ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry,
+			  &qp->rx_pend_q);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_rx_enqueue);
+
+/**
+ * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
+ * @qp: NTB transport layer queue the entry is to be enqueued on
+ * @cb: per buffer pointer for callback function to use
+ * @data: pointer to data buffer that will be sent
+ * @len: length of the data buffer
+ *
+ * Enqueue a new transmit buffer onto the transport queue from which a NTB
+ * payload will be transmitted.  This assumes that a lock is behing held to
+ * serialize access to the qp.
+ *
+ * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
+ */
+int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len)
+{
+	struct ntb_queue_entry *entry;
+	int rc;
+
+	if (!qp || qp->qp_link != NTB_LINK_UP || !len)
+		return -EINVAL;
+
+	entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->cb_data = cb;
+	entry->buf = data;
+	entry->len = len;
+	entry->flags = 0;
+
+	rc = ntb_process_tx(qp, entry);
+	if (rc)
+		ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
+			     &qp->tx_free_q);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_tx_enqueue);
+
+/**
+ * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
+ * @qp: NTB transport layer queue to be enabled
+ *
+ * Notify NTB transport layer of client readiness to use queue
+ */
+void ntb_transport_link_up(struct ntb_transport_qp *qp)
+{
+	if (!qp)
+		return;
+
+	qp->client_ready = NTB_LINK_UP;
+
+	if (qp->transport->transport_link == NTB_LINK_UP)
+		schedule_delayed_work(&qp->link_work, 0);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_link_up);
+
+/**
+ * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
+ * @qp: NTB transport layer queue to be disabled
+ *
+ * Notify NTB transport layer of client's desire to no longer receive data on
+ * transport queue specified.  It is the client's responsibility to ensure all
+ * entries on queue are purged or otherwise handled appropraitely.
+ */
+void ntb_transport_link_down(struct ntb_transport_qp *qp)
+{
+	struct pci_dev *pdev = ntb_query_pdev(qp->ndev);
+	int rc, val;
+
+	if (!qp)
+		return;
+
+	qp->client_ready = NTB_LINK_DOWN;
+
+	rc = ntb_read_local_spad(qp->ndev, QP_LINKS, &val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error reading spad %d\n", QP_LINKS);
+		return;
+	}
+
+	rc = ntb_write_remote_spad(qp->ndev, QP_LINKS,
+				   val & ~(1 << qp->qp_num));
+	if (rc)
+		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
+			val & ~(1 << qp->qp_num), QP_LINKS);
+
+	if (qp->qp_link == NTB_LINK_UP)
+		ntb_send_link_down(qp);
+	else
+		cancel_delayed_work_sync(&qp->link_work);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_link_down);
+
+/**
+ * ntb_transport_link_query - Query transport link state
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query connectivity to the remote system of the NTB transport queue
+ *
+ * RETURNS: true for link up or false for link down
+ */
+bool ntb_transport_link_query(struct ntb_transport_qp *qp)
+{
+	return qp->qp_link == NTB_LINK_UP;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_link_query);
+
+/**
+ * ntb_transport_qp_num - Query the qp number
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query qp number of the NTB transport queue
+ *
+ * RETURNS: a zero based number specifying the qp number
+ */
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp)
+{
+	return qp->qp_num;
+}
+EXPORT_SYMBOL_GPL(ntb_transport_qp_num);
+
+/**
+ * ntb_transport_max_size - Query the max payload size of a qp
+ * @qp: NTB transport layer queue to be queried
+ *
+ * Query the maximum payload size permissible on the given qp
+ *
+ * RETURNS: the max payload size of a qp
+ */
+unsigned int
+ntb_transport_max_size(__attribute__((unused)) struct ntb_transport_qp *qp)
+{
+	return transport_mtu - sizeof(struct ntb_payload_header);
+}
+EXPORT_SYMBOL_GPL(ntb_transport_max_size);
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
new file mode 100644
index 000000000000..f6a15205853b
--- /dev/null
+++ b/include/linux/ntb.h
@@ -0,0 +1,83 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Jon Mason <jon.mason@intel.com>
+ */
+
+struct ntb_transport_qp;
+
+struct ntb_client {
+	struct device_driver driver;
+	int (*probe) (struct pci_dev *pdev);
+	void (*remove) (struct pci_dev *pdev);
+};
+
+int ntb_register_client(struct ntb_client *drvr);
+void ntb_unregister_client(struct ntb_client *drvr);
+int ntb_register_client_dev(char *device_name);
+void ntb_unregister_client_dev(char *device_name);
+
+struct ntb_queue_handlers {
+	void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data,
+			    void *data, int len);
+	void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data,
+			    void *data, int len);
+	void (*event_handler) (void *data, int status);
+};
+
+unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp);
+unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
+struct ntb_transport_qp *
+ntb_transport_create_queue(void *data, struct pci_dev *pdev,
+			   const struct ntb_queue_handlers *handlers);
+void ntb_transport_free_queue(struct ntb_transport_qp *qp);
+int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len);
+int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
+			     unsigned int len);
+void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len);
+void ntb_transport_link_up(struct ntb_transport_qp *qp);
+void ntb_transport_link_down(struct ntb_transport_qp *qp);
+bool ntb_transport_link_query(struct ntb_transport_qp *qp);
-- 
cgit v1.2.3


From 337dc3a7684cf6577b5595b9bb96e1af06baec41 Mon Sep 17 00:00:00 2001
From: Praveen Paneri <p.paneri@samsung.com>
Date: Fri, 23 Nov 2012 16:03:06 +0530
Subject: usb: phy: samsung: Introducing usb phy driver for hsotg

This driver uses usb_phy interface to interact with s3c-hsotg. Supports
phy_init and phy_shutdown functions to enable/disable usb phy. Support
will be extended to host controllers and more Samsung SoCs.

Signed-off-by: Praveen Paneri <p.paneri@samsung.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 .../devicetree/bindings/usb/samsung-usbphy.txt     |  11 +
 drivers/usb/phy/Kconfig                            |   8 +
 drivers/usb/phy/Makefile                           |   1 +
 drivers/usb/phy/samsung-usbphy.c                   | 354 +++++++++++++++++++++
 include/linux/platform_data/samsung-usbphy.h       |  27 ++
 5 files changed, 401 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/usb/samsung-usbphy.txt
 create mode 100644 drivers/usb/phy/samsung-usbphy.c
 create mode 100644 include/linux/platform_data/samsung-usbphy.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/samsung-usbphy.txt b/Documentation/devicetree/bindings/usb/samsung-usbphy.txt
new file mode 100644
index 000000000000..7b26e2d6ea04
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/samsung-usbphy.txt
@@ -0,0 +1,11 @@
+* Samsung's usb phy transceiver
+
+The Samsung's phy transceiver is used for controlling usb otg phy for
+s3c-hsotg usb device controller.
+TODO: Adding the PHY binding with controller(s) according to the under
+developement generic PHY driver.
+
+Required properties:
+- compatible : should be "samsung,exynos4210-usbphy"
+- reg : base physical address of the phy registers and length of memory mapped
+	region.
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 5de6e7f39f9c..36a85b675429 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -45,3 +45,11 @@ config USB_RCAR_PHY
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rcar-phy.
+
+config SAMSUNG_USBPHY
+	bool "Samsung USB PHY controller Driver"
+	depends on USB_S3C_HSOTG
+	select USB_OTG_UTILS
+	help
+	  Enable this to support Samsung USB phy controller for samsung
+	  SoCs.
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index 1a579a860a03..ec304f642402 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_USB_ISP1301)		+= isp1301.o
 obj-$(CONFIG_MV_U3D_PHY)		+= mv_u3d_phy.o
 obj-$(CONFIG_USB_EHCI_TEGRA)	+= tegra_usb_phy.o
 obj-$(CONFIG_USB_RCAR_PHY)		+= rcar-phy.o
+obj-$(CONFIG_SAMSUNG_USBPHY)		+= samsung-usbphy.o
diff --git a/drivers/usb/phy/samsung-usbphy.c b/drivers/usb/phy/samsung-usbphy.c
new file mode 100644
index 000000000000..5c5e1bb5de7b
--- /dev/null
+++ b/drivers/usb/phy/samsung-usbphy.c
@@ -0,0 +1,354 @@
+/* linux/drivers/usb/phy/samsung-usbphy.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *              http://www.samsung.com
+ *
+ * Author: Praveen Paneri <p.paneri@samsung.com>
+ *
+ * Samsung USB2.0 High-speed OTG transceiver, talks to S3C HS OTG controller
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/usb/otg.h>
+#include <linux/platform_data/samsung-usbphy.h>
+
+/* Register definitions */
+
+#define SAMSUNG_PHYPWR				(0x00)
+
+#define PHYPWR_NORMAL_MASK			(0x19 << 0)
+#define PHYPWR_OTG_DISABLE			(0x1 << 4)
+#define PHYPWR_ANALOG_POWERDOWN			(0x1 << 3)
+#define PHYPWR_FORCE_SUSPEND			(0x1 << 1)
+/* For Exynos4 */
+#define PHYPWR_NORMAL_MASK_PHY0			(0x39 << 0)
+#define PHYPWR_SLEEP_PHY0			(0x1 << 5)
+
+#define SAMSUNG_PHYCLK				(0x04)
+
+#define PHYCLK_MODE_USB11			(0x1 << 6)
+#define PHYCLK_EXT_OSC				(0x1 << 5)
+#define PHYCLK_COMMON_ON_N			(0x1 << 4)
+#define PHYCLK_ID_PULL				(0x1 << 2)
+#define PHYCLK_CLKSEL_MASK			(0x3 << 0)
+#define PHYCLK_CLKSEL_48M			(0x0 << 0)
+#define PHYCLK_CLKSEL_12M			(0x2 << 0)
+#define PHYCLK_CLKSEL_24M			(0x3 << 0)
+
+#define SAMSUNG_RSTCON				(0x08)
+
+#define RSTCON_PHYLINK_SWRST			(0x1 << 2)
+#define RSTCON_HLINK_SWRST			(0x1 << 1)
+#define RSTCON_SWRST				(0x1 << 0)
+
+#ifndef MHZ
+#define MHZ (1000*1000)
+#endif
+
+enum samsung_cpu_type {
+	TYPE_S3C64XX,
+	TYPE_EXYNOS4210,
+};
+
+/*
+ * struct samsung_usbphy - transceiver driver state
+ * @phy: transceiver structure
+ * @plat: platform data
+ * @dev: The parent device supplied to the probe function
+ * @clk: usb phy clock
+ * @regs: usb phy register memory base
+ * @ref_clk_freq: reference clock frequency selection
+ * @cpu_type: machine identifier
+ */
+struct samsung_usbphy {
+	struct usb_phy	phy;
+	struct samsung_usbphy_data *plat;
+	struct device	*dev;
+	struct clk	*clk;
+	void __iomem	*regs;
+	int		ref_clk_freq;
+	int		cpu_type;
+};
+
+#define phy_to_sphy(x)		container_of((x), struct samsung_usbphy, phy)
+
+/*
+ * Returns reference clock frequency selection value
+ */
+static int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy)
+{
+	struct clk *ref_clk;
+	int refclk_freq = 0;
+
+	ref_clk = clk_get(sphy->dev, "xusbxti");
+	if (IS_ERR(ref_clk)) {
+		dev_err(sphy->dev, "Failed to get reference clock\n");
+		return PTR_ERR(ref_clk);
+	}
+
+	switch (clk_get_rate(ref_clk)) {
+	case 12 * MHZ:
+		refclk_freq = PHYCLK_CLKSEL_12M;
+		break;
+	case 24 * MHZ:
+		refclk_freq = PHYCLK_CLKSEL_24M;
+		break;
+	case 48 * MHZ:
+		refclk_freq = PHYCLK_CLKSEL_48M;
+		break;
+	default:
+		if (sphy->cpu_type == TYPE_S3C64XX)
+			refclk_freq = PHYCLK_CLKSEL_48M;
+		else
+			refclk_freq = PHYCLK_CLKSEL_24M;
+		break;
+	}
+	clk_put(ref_clk);
+
+	return refclk_freq;
+}
+
+static void samsung_usbphy_enable(struct samsung_usbphy *sphy)
+{
+	void __iomem *regs = sphy->regs;
+	u32 phypwr;
+	u32 phyclk;
+	u32 rstcon;
+
+	/* set clock frequency for PLL */
+	phyclk = sphy->ref_clk_freq;
+	phypwr = readl(regs + SAMSUNG_PHYPWR);
+	rstcon = readl(regs + SAMSUNG_RSTCON);
+
+	switch (sphy->cpu_type) {
+	case TYPE_S3C64XX:
+		phyclk &= ~PHYCLK_COMMON_ON_N;
+		phypwr &= ~PHYPWR_NORMAL_MASK;
+		rstcon |= RSTCON_SWRST;
+		break;
+	case TYPE_EXYNOS4210:
+		phypwr &= ~PHYPWR_NORMAL_MASK_PHY0;
+		rstcon |= RSTCON_SWRST;
+	default:
+		break;
+	}
+
+	writel(phyclk, regs + SAMSUNG_PHYCLK);
+	/* Configure PHY0 for normal operation*/
+	writel(phypwr, regs + SAMSUNG_PHYPWR);
+	/* reset all ports of PHY and Link */
+	writel(rstcon, regs + SAMSUNG_RSTCON);
+	udelay(10);
+	rstcon &= ~RSTCON_SWRST;
+	writel(rstcon, regs + SAMSUNG_RSTCON);
+}
+
+static void samsung_usbphy_disable(struct samsung_usbphy *sphy)
+{
+	void __iomem *regs = sphy->regs;
+	u32 phypwr;
+
+	phypwr = readl(regs + SAMSUNG_PHYPWR);
+
+	switch (sphy->cpu_type) {
+	case TYPE_S3C64XX:
+		phypwr |= PHYPWR_NORMAL_MASK;
+		break;
+	case TYPE_EXYNOS4210:
+		phypwr |= PHYPWR_NORMAL_MASK_PHY0;
+	default:
+		break;
+	}
+
+	/* Disable analog and otg block power */
+	writel(phypwr, regs + SAMSUNG_PHYPWR);
+}
+
+/*
+ * The function passed to the usb driver for phy initialization
+ */
+static int samsung_usbphy_init(struct usb_phy *phy)
+{
+	struct samsung_usbphy *sphy;
+	int ret = 0;
+
+	sphy = phy_to_sphy(phy);
+
+	/* Enable the phy clock */
+	ret = clk_prepare_enable(sphy->clk);
+	if (ret) {
+		dev_err(sphy->dev, "%s: clk_prepare_enable failed\n", __func__);
+		return ret;
+	}
+
+	/* Disable phy isolation */
+	if (sphy->plat && sphy->plat->pmu_isolation)
+		sphy->plat->pmu_isolation(false);
+
+	/* Initialize usb phy registers */
+	samsung_usbphy_enable(sphy);
+
+	/* Disable the phy clock */
+	clk_disable_unprepare(sphy->clk);
+	return ret;
+}
+
+/*
+ * The function passed to the usb driver for phy shutdown
+ */
+static void samsung_usbphy_shutdown(struct usb_phy *phy)
+{
+	struct samsung_usbphy *sphy;
+
+	sphy = phy_to_sphy(phy);
+
+	if (clk_prepare_enable(sphy->clk)) {
+		dev_err(sphy->dev, "%s: clk_prepare_enable failed\n", __func__);
+		return;
+	}
+
+	/* De-initialize usb phy registers */
+	samsung_usbphy_disable(sphy);
+
+	/* Enable phy isolation */
+	if (sphy->plat && sphy->plat->pmu_isolation)
+		sphy->plat->pmu_isolation(true);
+
+	clk_disable_unprepare(sphy->clk);
+}
+
+static const struct of_device_id samsung_usbphy_dt_match[];
+
+static inline int samsung_usbphy_get_driver_data(struct platform_device *pdev)
+{
+	if (pdev->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(samsung_usbphy_dt_match,
+							pdev->dev.of_node);
+		return (int) match->data;
+	}
+
+	return platform_get_device_id(pdev)->driver_data;
+}
+
+static int __devinit samsung_usbphy_probe(struct platform_device *pdev)
+{
+	struct samsung_usbphy *sphy;
+	struct samsung_usbphy_data *pdata;
+	struct device *dev = &pdev->dev;
+	struct resource *phy_mem;
+	void __iomem	*phy_base;
+	struct clk *clk;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "%s: no platform data defined\n", __func__);
+		return -EINVAL;
+	}
+
+	phy_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!phy_mem) {
+		dev_err(dev, "%s: missing mem resource\n", __func__);
+		return -ENODEV;
+	}
+
+	phy_base = devm_request_and_ioremap(dev, phy_mem);
+	if (!phy_base) {
+		dev_err(dev, "%s: register mapping failed\n", __func__);
+		return -ENXIO;
+	}
+
+	sphy = devm_kzalloc(dev, sizeof(*sphy), GFP_KERNEL);
+	if (!sphy)
+		return -ENOMEM;
+
+	clk = devm_clk_get(dev, "otg");
+	if (IS_ERR(clk)) {
+		dev_err(dev, "Failed to get otg clock\n");
+		return PTR_ERR(clk);
+	}
+
+	sphy->dev		= &pdev->dev;
+	sphy->plat		= pdata;
+	sphy->regs		= phy_base;
+	sphy->clk		= clk;
+	sphy->phy.dev		= sphy->dev;
+	sphy->phy.label		= "samsung-usbphy";
+	sphy->phy.init		= samsung_usbphy_init;
+	sphy->phy.shutdown	= samsung_usbphy_shutdown;
+	sphy->cpu_type		= samsung_usbphy_get_driver_data(pdev);
+	sphy->ref_clk_freq	= samsung_usbphy_get_refclk_freq(sphy);
+
+	platform_set_drvdata(pdev, sphy);
+
+	return usb_add_phy(&sphy->phy, USB_PHY_TYPE_USB2);
+}
+
+static int __exit samsung_usbphy_remove(struct platform_device *pdev)
+{
+	struct samsung_usbphy *sphy = platform_get_drvdata(pdev);
+
+	usb_remove_phy(&sphy->phy);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id samsung_usbphy_dt_match[] = {
+	{
+		.compatible = "samsung,s3c64xx-usbphy",
+		.data = (void *)TYPE_S3C64XX,
+	}, {
+		.compatible = "samsung,exynos4210-usbphy",
+		.data = (void *)TYPE_EXYNOS4210,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, samsung_usbphy_dt_match);
+#endif
+
+static struct platform_device_id samsung_usbphy_driver_ids[] = {
+	{
+		.name		= "s3c64xx-usbphy",
+		.driver_data	= TYPE_S3C64XX,
+	}, {
+		.name		= "exynos4210-usbphy",
+		.driver_data	= TYPE_EXYNOS4210,
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(platform, samsung_usbphy_driver_ids);
+
+static struct platform_driver samsung_usbphy_driver = {
+	.probe		= samsung_usbphy_probe,
+	.remove		= __devexit_p(samsung_usbphy_remove),
+	.id_table	= samsung_usbphy_driver_ids,
+	.driver		= {
+		.name	= "samsung-usbphy",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(samsung_usbphy_dt_match),
+	},
+};
+
+module_platform_driver(samsung_usbphy_driver);
+
+MODULE_DESCRIPTION("Samsung USB phy controller");
+MODULE_AUTHOR("Praveen Paneri <p.paneri@samsung.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:samsung-usbphy");
diff --git a/include/linux/platform_data/samsung-usbphy.h b/include/linux/platform_data/samsung-usbphy.h
new file mode 100644
index 000000000000..1bd24cba982b
--- /dev/null
+++ b/include/linux/platform_data/samsung-usbphy.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2012 Samsung Electronics Co.Ltd
+ *		http://www.samsung.com/
+ * Author: Praveen Paneri <p.paneri@samsung.com>
+ *
+ * Defines platform data for samsung usb phy driver.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __SAMSUNG_USBPHY_PLATFORM_H
+#define __SAMSUNG_USBPHY_PLATFORM_H
+
+/**
+ * samsung_usbphy_data - Platform data for USB PHY driver.
+ * @pmu_isolation: Function to control usb phy isolation in PMU.
+ */
+struct samsung_usbphy_data {
+	void (*pmu_isolation)(int on);
+};
+
+extern void samsung_usbphy_set_pdata(struct samsung_usbphy_data *pd);
+
+#endif /* __SAMSUNG_USBPHY_PLATFORM_H */
-- 
cgit v1.2.3


From f868ef995b32f0f08d7b6ce700bbe25de3ad65ac Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Tue, 8 Jan 2013 22:43:12 +0100
Subject: pinctrl: pinconf-generic: add drive strength parameter

Some pin configurations IP allows to set the current output to the pin.
This patch adds such a parameter to the pinconf-generic mechanism.

This parameter takes as argument the drive strength in mA.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf-generic.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index b23d99da3b4b..40d7bb9c7562 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -46,6 +46,8 @@
  * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source
  *	(open emitter). Sending this config will enabale open drain mode, the
  *	argument is ignored.
+ * @PIN_CONFIG_DRIVE_STRENGTH: the pin will output the current passed as
+ * 	argument. The argument is in mA.
  * @PIN_CONFIG_INPUT_SCHMITT_DISABLE: disable schmitt-trigger mode on the pin.
  * @PIN_CONFIG_INPUT_SCHMITT: this will configure an input pin to run in
  *	schmitt-trigger mode. If the schmitt-trigger has adjustable hysteresis,
@@ -76,6 +78,7 @@ enum pin_config_param {
 	PIN_CONFIG_DRIVE_PUSH_PULL,
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
+	PIN_CONFIG_DRIVE_STRENGTH,
 	PIN_CONFIG_INPUT_SCHMITT_DISABLE,
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_DEBOUNCE,
-- 
cgit v1.2.3


From bb813f4c933ae9f887a014483690d5f8b8ec05e1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Jan 2013 14:05:56 -0800
Subject: init, block: try to load default elevator module early during boot

This patch adds default module loading and uses it to load the default
block elevator.  During boot, it's called right after initramfs or
initrd is made available and right before control is passed to
userland.  This ensures that as long as the modules are available in
the usual places in initramfs, initrd or the root filesystem, the
default modules are loaded as soon as possible.

This will replace the on-demand elevator module loading from elevator
init path.

v2: Fixed build breakage when !CONFIG_BLOCK.  Reported by kbuild test
    robot.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alex Riesen <raa.lkml@gmail.com>
Cc: Fengguang We <fengguang.wu@intel.com>
---
 block/elevator.c         | 16 ++++++++++++++++
 include/linux/elevator.h |  5 +++++
 include/linux/init.h     |  1 +
 init/do_mounts_initrd.c  |  3 +++
 init/initramfs.c         |  8 +++++++-
 init/main.c              | 16 ++++++++++++++++
 6 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index 9edba1b8323e..c2d61d56e0b7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -136,6 +136,22 @@ static int __init elevator_setup(char *str)
 
 __setup("elevator=", elevator_setup);
 
+/* called during boot to load the elevator chosen by the elevator param */
+void __init load_default_elevator_module(void)
+{
+	struct elevator_type *e;
+
+	if (!chosen_elevator[0])
+		return;
+
+	spin_lock(&elv_list_lock);
+	e = elevator_find(chosen_elevator);
+	spin_unlock(&elv_list_lock);
+
+	if (!e)
+		request_module("%s-iosched", chosen_elevator);
+}
+
 static struct kobj_type elv_ktype;
 
 static struct elevator_queue *elevator_alloc(struct request_queue *q,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c03af7687bb4..186620631750 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -138,6 +138,7 @@ extern void elv_drain_elevator(struct request_queue *);
 /*
  * io scheduler registration
  */
+extern void __init load_default_elevator_module(void);
 extern int elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
@@ -206,5 +207,9 @@ enum {
 	INIT_LIST_HEAD(&(rq)->csd.list);	\
 	} while (0)
 
+#else /* CONFIG_BLOCK */
+
+static inline void load_default_elevator_module(void) { }
+
 #endif /* CONFIG_BLOCK */
 #endif
diff --git a/include/linux/init.h b/include/linux/init.h
index a799273714ac..9230c9408d8b 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -161,6 +161,7 @@ extern unsigned int reset_devices;
 /* used by init/main.c */
 void setup_arch(char **);
 void prepare_namespace(void);
+void __init load_default_modules(void);
 
 extern void (*late_time_init)(void);
 
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 5e4ded51788e..dfe606a7dd61 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -57,6 +57,9 @@ static void __init handle_initrd(void)
 	sys_mkdir("/old", 0700);
 	sys_chdir("/old");
 
+	/* try loading default modules from initrd */
+	load_default_modules();
+
 	/*
 	 * In case that a resume from disk is carried out by linuxrc or one of
 	 * its children, we need to tell the freezer not to wait for us.
diff --git a/init/initramfs.c b/init/initramfs.c
index 84c6bf111300..a67ef9dbda9d 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -592,7 +592,7 @@ static int __init populate_rootfs(void)
 			initrd_end - initrd_start);
 		if (!err) {
 			free_initrd();
-			return 0;
+			goto done;
 		} else {
 			clean_rootfs();
 			unpack_to_rootfs(__initramfs_start, __initramfs_size);
@@ -607,6 +607,7 @@ static int __init populate_rootfs(void)
 			sys_close(fd);
 			free_initrd();
 		}
+	done:
 #else
 		printk(KERN_INFO "Unpacking initramfs...\n");
 		err = unpack_to_rootfs((char *)initrd_start,
@@ -615,6 +616,11 @@ static int __init populate_rootfs(void)
 			printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
 		free_initrd();
 #endif
+		/*
+		 * Try loading default modules from initramfs.  This gives
+		 * us a chance to load before device_initcalls.
+		 */
+		load_default_modules();
 	}
 	return 0;
 }
diff --git a/init/main.c b/init/main.c
index baf1f0f5c461..18efadb11cf6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,8 @@
 #include <linux/perf_event.h>
 #include <linux/file.h>
 #include <linux/ptrace.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -794,6 +796,17 @@ static void __init do_pre_smp_initcalls(void)
 		do_one_initcall(*fn);
 }
 
+/*
+ * This function requests modules which should be loaded by default and is
+ * called twice right after initrd is mounted and right before init is
+ * exec'd.  If such modules are on either initrd or rootfs, they will be
+ * loaded before control is passed to userland.
+ */
+void __init load_default_modules(void)
+{
+	load_default_elevator_module();
+}
+
 static int run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
@@ -898,4 +911,7 @@ static void __init kernel_init_freeable(void)
 	 * we're essentially up and running. Get rid of the
 	 * initmem segments and start the user-mode stuff..
 	 */
+
+	/* rootfs is available now, try loading default modules */
+	load_default_modules();
 }
-- 
cgit v1.2.3


From 84b233adcca3cacd5cfa8013a5feda7a3db4a9af Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Jan 2013 14:05:56 -0800
Subject: workqueue: implement current_is_async()

This function queries whether %current is an async worker executing an
async item.  This will be used to implement warning on synchronous
request_module() from async workers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/async.h       |  1 +
 kernel/async.c              | 14 ++++++++++++++
 kernel/workqueue_internal.h | 11 +++++++++++
 3 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/async.h b/include/linux/async.h
index 7a24fe9b44b4..345169cfa304 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -52,4 +52,5 @@ extern void async_synchronize_full_domain(struct async_domain *domain);
 extern void async_synchronize_cookie(async_cookie_t cookie);
 extern void async_synchronize_cookie_domain(async_cookie_t cookie,
 					    struct async_domain *domain);
+extern bool current_is_async(void);
 #endif
diff --git a/kernel/async.c b/kernel/async.c
index 9d3118384858..d9bf2a9b5cee 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel.
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 
+#include "workqueue_internal.h"
+
 static async_cookie_t next_cookie = 1;
 
 #define MAX_WORK	32768
@@ -337,3 +339,15 @@ void async_synchronize_cookie(async_cookie_t cookie)
 	async_synchronize_cookie_domain(cookie, &async_running);
 }
 EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+/**
+ * current_is_async - is %current an async worker task?
+ *
+ * Returns %true if %current is an async worker task.
+ */
+bool current_is_async(void)
+{
+	struct worker *worker = current_wq_worker();
+
+	return worker && worker->current_func == async_run_entry_fn;
+}
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 02549fa04587..cc35e7e62091 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -8,6 +8,7 @@
 #define _KERNEL_WORKQUEUE_INTERNAL_H
 
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 
 struct global_cwq;
 struct worker_pool;
@@ -44,6 +45,16 @@ struct worker {
 	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
 };
 
+/**
+ * current_wq_worker - return struct worker if %current is a workqueue worker
+ */
+static inline struct worker *current_wq_worker(void)
+{
+	if (current->flags & PF_WQ_WORKER)
+		return kthread_data(current);
+	return NULL;
+}
+
 /*
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
  * sched.c and workqueue.c.
-- 
cgit v1.2.3


From 833d6e01b4cfbc033a915ae02744247bbf7c619d Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Fri, 18 Jan 2013 22:40:32 +0200
Subject: debugfs: regset32: make struct debugfs_reg32 pointer const

no user of that should ever change that pointer,
so let's mark it const to prevent that from
happening.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/debugfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 66c434f5dd1e..63f2465807d4 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -33,7 +33,7 @@ struct debugfs_reg32 {
 };
 
 struct debugfs_regset32 {
-	struct debugfs_reg32 *regs;
+	const struct debugfs_reg32 *regs;
 	int nregs;
 	void __iomem *base;
 };
-- 
cgit v1.2.3


From 4f73bc4dd3e8563ef4109f293a092820dff66d92 Mon Sep 17 00:00:00 2001
From: Joe Millenbach <jmillenbach@gmail.com>
Date: Thu, 17 Jan 2013 22:44:22 -0800
Subject: tty: Added a CONFIG_TTY option to allow removal of TTY

The option allows you to remove TTY and compile without errors. This
saves space on systems that won't support TTY interfaces anyway.
bloat-o-meter output is below.

The bulk of this patch consists of Kconfig changes adding "depends on
TTY" to various serial devices and similar drivers that require the TTY
layer.  Ideally, these dependencies would occur on a common intermediate
symbol such as SERIO, but most drivers "select SERIO" rather than
"depends on SERIO", and "select" does not respect dependencies.

bloat-o-meter output comparing our previous minimal to new minimal by
removing TTY.  The list is filtered to not show removed entries with awk
'$3 != "-"' as the list was very long.

add/remove: 0/226 grow/shrink: 2/14 up/down: 6/-35356 (-35350)
function                                     old     new   delta
chr_dev_init                                 166     170      +4
allow_signal                                  80      82      +2
static.__warned                              143     142      -1
disallow_signal                               63      62      -1
__set_special_pids                            95      94      -1
unregister_console                           126     121      -5
start_kernel                                 546     541      -5
register_console                             593     588      -5
copy_from_user                                45      40      -5
sys_setsid                                   128     120      -8
sys_vhangup                                   32      19     -13
do_exit                                     1543    1526     -17
bitmap_zero                                   60      40     -20
arch_local_irq_save                          137     117     -20
release_task                                 674     652     -22
static.spin_unlock_irqrestore                308     260     -48

Signed-off-by: Joe Millenbach <jmillenbach@gmail.com>
Reviewed-by: Jamey Sharp <jamey@minilop.net>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/Kconfig                  |  2 ++
 arch/ia64/hp/sim/Kconfig            |  1 +
 arch/m68k/Kconfig.devices           |  2 +-
 arch/parisc/Kconfig                 |  1 +
 arch/tile/Kconfig                   |  1 +
 arch/um/Kconfig.common              |  1 +
 arch/xtensa/Kconfig                 |  1 +
 drivers/bluetooth/Kconfig           |  1 +
 drivers/char/Kconfig                |  7 +++---
 drivers/char/pcmcia/Kconfig         |  4 ++--
 drivers/i2c/busses/Kconfig          |  2 +-
 drivers/input/joystick/Kconfig      |  4 ++++
 drivers/input/keyboard/Kconfig      | 10 ++++++++-
 drivers/input/mouse/Kconfig         |  3 +++
 drivers/input/serio/Kconfig         |  1 +
 drivers/input/touchscreen/Kconfig   | 22 +++++++++++++++++++
 drivers/ipack/devices/Kconfig       |  2 +-
 drivers/isdn/Kconfig                |  1 +
 drivers/isdn/capi/Kconfig           |  1 +
 drivers/isdn/gigaset/Kconfig        |  1 +
 drivers/isdn/hardware/mISDN/Kconfig |  1 +
 drivers/lguest/Kconfig              |  2 +-
 drivers/media/radio/wl128x/Kconfig  |  2 +-
 drivers/misc/Kconfig                |  2 +-
 drivers/misc/ti-st/Kconfig          |  2 +-
 drivers/mmc/card/Kconfig            |  1 +
 drivers/net/caif/Kconfig            |  2 +-
 drivers/net/can/Kconfig             |  2 +-
 drivers/net/hamradio/Kconfig        |  4 ++--
 drivers/net/irda/Kconfig            |  2 +-
 drivers/net/ppp/Kconfig             |  3 +++
 drivers/net/slip/Kconfig            |  1 +
 drivers/net/usb/Kconfig             |  4 ++--
 drivers/net/wan/Kconfig             |  2 +-
 drivers/pps/clients/Kconfig         |  2 +-
 drivers/s390/char/Kconfig           |  8 +++----
 drivers/staging/ccg/Kconfig         |  2 +-
 drivers/staging/dgrp/Kconfig        |  2 +-
 drivers/staging/fwserial/Kconfig    |  2 +-
 drivers/staging/sb105x/Kconfig      |  2 +-
 drivers/tty/Kconfig                 | 13 +++++++++++
 drivers/tty/Makefile                |  2 +-
 drivers/tty/hvc/Kconfig             |  3 +++
 drivers/tty/serial/Kconfig          |  4 ++++
 drivers/usb/class/Kconfig           |  2 +-
 drivers/usb/gadget/Kconfig          |  6 +++++
 drivers/usb/serial/Kconfig          |  2 +-
 fs/proc/Makefile                    |  3 ++-
 include/linux/console.h             |  5 +++++
 include/linux/proc_fs.h             |  5 +++++
 include/linux/tty.h                 | 44 +++++++++++++++++++++++++++----------
 lib/Kconfig.kgdb                    |  1 +
 net/bluetooth/rfcomm/Kconfig        |  1 +
 net/irda/ircomm/Kconfig             |  2 +-
 sound/soc/codecs/Kconfig            |  3 ++-
 55 files changed, 165 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9d5904cc7712..1ef196ddadeb 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -125,6 +125,7 @@ choice
 
 config ALPHA_GENERIC
 	bool "Generic"
+	depends on TTY
 	help
 	  A generic kernel will run on all supported Alpha hardware.
 
@@ -491,6 +492,7 @@ config VGA_HOSE
 
 config ALPHA_SRM
 	bool "Use SRM as bootloader" if ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_NAUTILUS || ALPHA_NONAME
+	depends on TTY
 	default y if ALPHA_JENSEN || ALPHA_MIKASA || ALPHA_SABLE || ALPHA_LYNX || ALPHA_NORITAKE || ALPHA_DP264 || ALPHA_RAWHIDE || ALPHA_EIGER || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_SHARK || ALPHA_MARVEL
 	---help---
 	  There are two different types of booting firmware on Alphas: SRM,
diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig
index 8d513a8c5266..d84707d55203 100644
--- a/arch/ia64/hp/sim/Kconfig
+++ b/arch/ia64/hp/sim/Kconfig
@@ -8,6 +8,7 @@ config HP_SIMETH
 
 config HP_SIMSERIAL
 	bool "Simulated serial driver support"
+	depends on TTY
 
 config HP_SIMSERIAL_CONSOLE
 	bool "Console for HP simulator"
diff --git a/arch/m68k/Kconfig.devices b/arch/m68k/Kconfig.devices
index c4cdfe444c64..4bc945dfe467 100644
--- a/arch/m68k/Kconfig.devices
+++ b/arch/m68k/Kconfig.devices
@@ -41,7 +41,7 @@ config NFBLOCK
 
 config NFCON
 	tristate "NatFeat console driver"
-	depends on NATFEAT
+	depends on TTY && NATFEAT
 	help
 	  Say Y to include support for the ARAnyM NatFeat console driver
 	  which allows the console output to be redirected to the stderr
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index b77feffbadea..df5beb639760 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -23,6 +23,7 @@ config PARISC
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS
+	select TTY # Needed for pdc_cons.c
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 875d008828b8..ae8a7ca67fa4 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -121,6 +121,7 @@ config DEBUG_COPY_FROM_USER
 	def_bool n
 
 config HVC_TILE
+	depends on TTY
 	select HVC_DRIVER
 	def_bool y
 
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index 648121b037d5..bceee6623b00 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -12,6 +12,7 @@ config UML
 	select GENERIC_CPU_DEVICES
 	select GENERIC_IO
 	select GENERIC_CLOCKEVENTS
+	select TTY # Needed for line.c
 
 config MMU
 	bool
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 5aab1acabf1c..ad64c73b8675 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -132,6 +132,7 @@ choice
 
 config XTENSA_PLATFORM_ISS
 	bool "ISS"
+	depends on TTY
 	select XTENSA_CALIBRATE_CCOUNT
 	select SERIAL_CONSOLE
 	select XTENSA_ISS_NETWORK
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index e9f203eadb1f..fdfd61a2d523 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -26,6 +26,7 @@ config BT_HCIBTSDIO
 
 config BT_HCIUART
 	tristate "HCI UART driver"
+	depends on TTY
 	help
 	  Bluetooth HCI UART driver.
 	  This driver is required if you want to use Bluetooth devices with
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 72bedad6bf8c..3bb6fa3930be 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -53,7 +53,7 @@ source "drivers/tty/serial/Kconfig"
 
 config TTY_PRINTK
 	bool "TTY driver to output user messages via printk"
-	depends on EXPERT
+	depends on EXPERT && TTY
 	default n
 	---help---
 	  If you say Y here, the support for writing user messages (i.e.
@@ -159,7 +159,7 @@ source "drivers/tty/hvc/Kconfig"
 
 config VIRTIO_CONSOLE
 	tristate "Virtio console"
-	depends on VIRTIO
+	depends on VIRTIO && TTY
 	select HVC_DRIVER
 	help
 	  Virtio console for use with lguest and other hypervisors.
@@ -392,6 +392,7 @@ config XILINX_HWICAP
 
 config R3964
 	tristate "Siemens R3964 line discipline"
+	depends on TTY
 	---help---
 	  This driver allows synchronous communication with devices using the
 	  Siemens R3964 packet protocol. Unless you are dealing with special
@@ -439,7 +440,7 @@ source "drivers/char/pcmcia/Kconfig"
 
 config MWAVE
 	tristate "ACP Modem (Mwave) support"
-	depends on X86
+	depends on X86 && TTY
 	select SERIAL_8250
 	---help---
 	  The ACP modem (Mwave) for Linux is a WinModem. It is composed of a
diff --git a/drivers/char/pcmcia/Kconfig b/drivers/char/pcmcia/Kconfig
index 6614416a8623..2a166d56738a 100644
--- a/drivers/char/pcmcia/Kconfig
+++ b/drivers/char/pcmcia/Kconfig
@@ -7,7 +7,7 @@ menu "PCMCIA character devices"
 
 config SYNCLINK_CS
 	tristate "SyncLink PC Card support"
-	depends on PCMCIA
+	depends on PCMCIA && TTY
 	help
 	  Enable support for the SyncLink PC Card serial adapter, running
 	  asynchronous and HDLC communications up to 512Kbps. The port is
@@ -45,7 +45,7 @@ config CARDMAN_4040
 
 config IPWIRELESS
 	tristate "IPWireless 3G UMTS PCMCIA card support"
-	depends on PCMCIA && NETDEVICES
+	depends on PCMCIA && NETDEVICES && TTY
 	select PPP
 	help
 	  This is a driver for 3G UMTS PCMCIA card from IPWireless company. In
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index bdca5111eb9d..cf474b2df4ae 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -803,7 +803,7 @@ config I2C_PARPORT_LIGHT
 
 config I2C_TAOS_EVM
 	tristate "TAOS evaluation module"
-	depends on EXPERIMENTAL
+	depends on EXPERIMENTAL && TTY
 	select SERIO
 	select SERIO_SERPORT
 	default n
diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig
index 56eb471b5576..055bcaba774c 100644
--- a/drivers/input/joystick/Kconfig
+++ b/drivers/input/joystick/Kconfig
@@ -132,6 +132,8 @@ config JOYSTICK_TMDC
 
 source "drivers/input/joystick/iforce/Kconfig"
 
+if TTY
+
 config JOYSTICK_WARRIOR
 	tristate "Logitech WingMan Warrior joystick"
 	select SERIO
@@ -205,6 +207,8 @@ config JOYSTICK_ZHENHUA
 	  To compile this driver as a module, choose M here: the
 	  module will be called zhenhua.
 
+endif # TTY
+
 config JOYSTICK_DB9
 	tristate "Multisystem, Sega Genesis, Saturn joysticks and gamepads"
 	depends on PARPORT
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 5a240c60342d..008f96aaf19e 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -69,6 +69,7 @@ config KEYBOARD_ATARI
 config KEYBOARD_ATKBD
 	tristate "AT keyboard" if EXPERT || !X86
 	default y
+	depends on TTY
 	select SERIO
 	select SERIO_LIBPS2
 	select SERIO_I8042 if X86
@@ -153,6 +154,7 @@ config KEYBOARD_BFIN
 
 config KEYBOARD_LKKBD
 	tristate "DECstation/VAXstation LK201/LK401 keyboard"
+	depends on TTY
 	select SERIO
 	help
 	  Say Y here if you want to use a LK201 or LK401 style serial
@@ -268,7 +270,7 @@ config KEYBOARD_HIL_OLD
 
 config KEYBOARD_HIL
 	tristate "HP HIL keyboard/pointer support"
-	depends on GSC || HP300
+	depends on (GSC || HP300) && TTY
 	default y
 	select HP_SDC
 	select HIL_MLC
@@ -400,6 +402,7 @@ config KEYBOARD_IMX
 
 config KEYBOARD_NEWTON
 	tristate "Newton keyboard"
+	depends on TTY
 	select SERIO
 	help
 	  Say Y here if you have a Newton keyboard on a serial port.
@@ -479,6 +482,8 @@ config KEYBOARD_SAMSUNG
 	  To compile this driver as a module, choose M here: the
 	  module will be called samsung-keypad.
 
+if TTY
+
 config KEYBOARD_STOWAWAY
 	tristate "Stowaway keyboard"
 	select SERIO
@@ -501,6 +506,8 @@ config KEYBOARD_SUNKBD
 	  To compile this driver as a module, choose M here: the
 	  module will be called sunkbd.
 
+endif # TTY
+
 config KEYBOARD_SH_KEYSC
 	tristate "SuperH KEYSC keypad support"
 	depends on SUPERH || ARCH_SHMOBILE
@@ -597,6 +604,7 @@ config KEYBOARD_TWL4030
 
 config KEYBOARD_XTKBD
 	tristate "XT keyboard"
+	depends on TTY
 	select SERIO
 	help
 	  Say Y here if you want to use the old IBM PC/XT keyboard (or
diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index cd6268cf7cd5..fc160f72dc4e 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -14,6 +14,7 @@ if INPUT_MOUSE
 
 config MOUSE_PS2
 	tristate "PS/2 mouse"
+	depends on TTY
 	default y
 	select SERIO
 	select SERIO_LIBPS2
@@ -138,6 +139,7 @@ config MOUSE_PS2_OLPC
 
 config MOUSE_SERIAL
 	tristate "Serial mouse"
+	depends on TTY
 	select SERIO
 	help
 	  Say Y here if you have a serial (RS-232, COM port) mouse connected
@@ -262,6 +264,7 @@ config MOUSE_RISCPC
 
 config MOUSE_VSXXXAA
 	tristate "DEC VSXXX-AA/GA mouse and VSXXX-AB tablet"
+	depends on TTY
 	select SERIO
 	help
 	  Say Y (or M) if you want to use a DEC VSXXX-AA (hockey
diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index 4a4e182c33e7..81ee7551148e 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig
@@ -4,6 +4,7 @@
 config SERIO
 	tristate "Serial I/O support" if EXPERT || !X86
 	default y
+	depends on TTY
 	help
 	  Say Yes here if you have any input device that uses serial I/O to
 	  communicate with the system. This includes the
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 515cfe790543..3d6f548dd3d4 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -192,6 +192,8 @@ config TOUCHSCREEN_DA9052
 	  To compile this driver as a module, choose M here: the
 	  module will be called da9052_tsi.
 
+if TTY
+
 config TOUCHSCREEN_DYNAPRO
 	tristate "Dynapro serial touchscreen"
 	select SERIO
@@ -216,6 +218,8 @@ config TOUCHSCREEN_HAMPSHIRE
 	  To compile this driver as a module, choose M here: the
 	  module will be called hampshire.
 
+endif # TTY
+
 config TOUCHSCREEN_EETI
 	tristate "EETI touchscreen panel support"
 	depends on I2C
@@ -237,6 +241,7 @@ config TOUCHSCREEN_EGALAX
 
 config TOUCHSCREEN_FUJITSU
 	tristate "Fujitsu serial touchscreen"
+	depends on TTY
 	select SERIO
 	help
 	  Say Y here if you have the Fujitsu touchscreen (such as one
@@ -275,6 +280,8 @@ config TOUCHSCREEN_S3C2410
 	  To compile this driver as a module, choose M here: the
 	  module will be called s3c2410_ts.
 
+if TTY
+
 config TOUCHSCREEN_GUNZE
 	tristate "Gunze AHL-51S touchscreen"
 	select SERIO
@@ -311,6 +318,8 @@ config TOUCHSCREEN_WACOM_W8001
 	  To compile this driver as a module, choose M here: the
 	  module will be called wacom_w8001.
 
+endif # TTY
+
 config TOUCHSCREEN_WACOM_I2C
 	tristate "Wacom Tablet support (I2C)"
 	depends on I2C
@@ -369,6 +378,8 @@ config TOUCHSCREEN_MMS114
 	  To compile this driver as a module, choose M here: the
 	  module will be called mms114.
 
+if TTY
+
 config TOUCHSCREEN_MTOUCH
 	tristate "MicroTouch serial touchscreens"
 	select SERIO
@@ -393,6 +404,8 @@ config TOUCHSCREEN_INEXIO
 	  To compile this driver as a module, choose M here: the
 	  module will be called inexio.
 
+endif # TTY
+
 config TOUCHSCREEN_INTEL_MID
 	tristate "Intel MID platform resistive touchscreen"
 	depends on INTEL_SCU_IPC
@@ -450,6 +463,7 @@ config TOUCHSCREEN_HTCPEN
 
 config TOUCHSCREEN_PENMOUNT
 	tristate "Penmount serial touchscreen"
+	depends on TTY
 	select SERIO
 	help
 	  Say Y here if you have a Penmount serial touchscreen connected to
@@ -493,6 +507,8 @@ config TOUCHSCREEN_TNETV107X
 	  To compile this driver as a module, choose M here: the
 	  module will be called tnetv107x-ts.
 
+if TTY
+
 config TOUCHSCREEN_TOUCHRIGHT
 	tristate "Touchright serial touchscreen"
 	select SERIO
@@ -517,6 +533,8 @@ config TOUCHSCREEN_TOUCHWIN
 	  To compile this driver as a module, choose M here: the
 	  module will be called touchwin.
 
+endif # TTY
+
 config TOUCHSCREEN_TI_AM335X_TSC
 	tristate "TI Touchscreen Interface"
 	depends on MFD_TI_AM335X_TSCADC
@@ -790,6 +808,8 @@ config TOUCHSCREEN_USB_EASYTOUCH
 	  Say Y here if you have an EasyTouch USB Touch controller.
 	  If unsure, say N.
 
+if TTY
+
 config TOUCHSCREEN_TOUCHIT213
 	tristate "Sahara TouchIT-213 touchscreen"
 	select SERIO
@@ -813,6 +833,8 @@ config TOUCHSCREEN_TSC_SERIO
 	  To compile this driver as a module, choose M here: the
 	  module will be called tsc40.
 
+endif # TTY
+
 config TOUCHSCREEN_TSC2005
         tristate "TSC2005 based touchscreens"
         depends on SPI_MASTER && GENERIC_HARDIRQS
diff --git a/drivers/ipack/devices/Kconfig b/drivers/ipack/devices/Kconfig
index 0b82fdc198c0..907a8cb48f2a 100644
--- a/drivers/ipack/devices/Kconfig
+++ b/drivers/ipack/devices/Kconfig
@@ -1,6 +1,6 @@
 config SERIAL_IPOCTAL
 	tristate "IndustryPack IP-OCTAL uart support"
-	depends on IPACK_BUS
+	depends on IPACK_BUS && TTY
 	help
 	  This driver supports the IPOCTAL serial port device for the IndustryPack bus.
 	default n
diff --git a/drivers/isdn/Kconfig b/drivers/isdn/Kconfig
index 86cd75a0e84d..ef661acdda17 100644
--- a/drivers/isdn/Kconfig
+++ b/drivers/isdn/Kconfig
@@ -22,6 +22,7 @@ if ISDN
 
 menuconfig ISDN_I4L
 	tristate "Old ISDN4Linux (deprecated)"
+	depends on TTY
 	---help---
 	  This driver allows you to use an ISDN adapter for networking
 	  connections and as dialin/out device.  The isdn-tty's have a built
diff --git a/drivers/isdn/capi/Kconfig b/drivers/isdn/capi/Kconfig
index 15c3ffd9d860..f04686580040 100644
--- a/drivers/isdn/capi/Kconfig
+++ b/drivers/isdn/capi/Kconfig
@@ -18,6 +18,7 @@ config CAPI_TRACE
 
 config ISDN_CAPI_MIDDLEWARE
 	bool "CAPI2.0 Middleware support"
+	depends on TTY
 	help
 	  This option will enhance the capabilities of the /dev/capi20
 	  interface.  It will provide a means of moving a data connection,
diff --git a/drivers/isdn/gigaset/Kconfig b/drivers/isdn/gigaset/Kconfig
index b18a92c32184..dde5e09e6267 100644
--- a/drivers/isdn/gigaset/Kconfig
+++ b/drivers/isdn/gigaset/Kconfig
@@ -1,5 +1,6 @@
 menuconfig ISDN_DRV_GIGASET
 	tristate "Siemens Gigaset support"
+	depends on TTY
 	select CRC_CCITT
 	select BITREVERSE
 	help
diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig
index eadc1cd34a20..b8611e3e5e74 100644
--- a/drivers/isdn/hardware/mISDN/Kconfig
+++ b/drivers/isdn/hardware/mISDN/Kconfig
@@ -76,6 +76,7 @@ config MISDN_NETJET
 	tristate "Support for NETJet cards"
 	depends on MISDN
 	depends on PCI
+	depends on TTY
 	select MISDN_IPAC
 	select ISDN_HDLC
 	select ISDN_I4L
diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig
index 34ae49dc557c..f9c43145a611 100644
--- a/drivers/lguest/Kconfig
+++ b/drivers/lguest/Kconfig
@@ -1,6 +1,6 @@
 config LGUEST
 	tristate "Linux hypervisor example code"
-	depends on X86_32 && EXPERIMENTAL && EVENTFD
+	depends on X86_32 && EXPERIMENTAL && EVENTFD && TTY
 	select HVC_DRIVER
 	---help---
 	  This is a very simple module which allows you to run
diff --git a/drivers/media/radio/wl128x/Kconfig b/drivers/media/radio/wl128x/Kconfig
index ea1e6545df36..f359be7e9dd9 100644
--- a/drivers/media/radio/wl128x/Kconfig
+++ b/drivers/media/radio/wl128x/Kconfig
@@ -4,7 +4,7 @@
 menu "Texas Instruments WL128x FM driver (ST based)"
 config RADIO_WL128X
 	tristate "Texas Instruments WL128x FM Radio"
-	depends on VIDEO_V4L2 && RFKILL && GPIOLIB
+	depends on VIDEO_V4L2 && RFKILL && GPIOLIB && TTY
 	select TI_ST if NET
 	help
 	Choose Y here if you have this FM radio chip.
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index b151b7c1bd59..4b2bb939dde1 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -127,7 +127,7 @@ config PHANTOM
 
 config INTEL_MID_PTI
 	tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
-	depends on PCI
+	depends on PCI && TTY
 	default n
 	help
 	  The PTI (Parallel Trace Interface) driver directs
diff --git a/drivers/misc/ti-st/Kconfig b/drivers/misc/ti-st/Kconfig
index abb5de1afce3..f34dcc514730 100644
--- a/drivers/misc/ti-st/Kconfig
+++ b/drivers/misc/ti-st/Kconfig
@@ -5,7 +5,7 @@
 menu "Texas Instruments shared transport line discipline"
 config TI_ST
 	tristate "Shared transport core driver"
-	depends on NET && GPIOLIB
+	depends on NET && GPIOLIB && TTY
 	select FW_LOADER
 	help
 	  This enables the shared transport core driver for TI
diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig
index 3b1f783bf924..5562308699bc 100644
--- a/drivers/mmc/card/Kconfig
+++ b/drivers/mmc/card/Kconfig
@@ -52,6 +52,7 @@ config MMC_BLOCK_BOUNCE
 
 config SDIO_UART
 	tristate "SDIO UART/GPS class support"
+	depends on TTY
 	help
 	  SDIO function driver for SDIO cards that implements the UART
 	  class, as well as the GPS class which appears like a UART.
diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
index abf4d7a9dcce..60c2142373c9 100644
--- a/drivers/net/caif/Kconfig
+++ b/drivers/net/caif/Kconfig
@@ -6,7 +6,7 @@ comment "CAIF transport drivers"
 
 config CAIF_TTY
 	tristate "CAIF TTY transport driver"
-	depends on CAIF
+	depends on CAIF && TTY
 	default n
 	---help---
 	The CAIF TTY transport driver is a Line Discipline (ldisc)
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index b56bd9e80957..72df3a306a08 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -13,7 +13,7 @@ config CAN_VCAN
 
 config CAN_SLCAN
 	tristate "Serial / USB serial CAN Adaptors (slcan)"
-	depends on CAN
+	depends on CAN && TTY
 	---help---
 	  CAN driver for several 'low cost' CAN interfaces that are attached
 	  via serial lines or via USB-to-serial adapters using the LAWICEL
diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
index 95dbcfdf131d..bf5e59687680 100644
--- a/drivers/net/hamradio/Kconfig
+++ b/drivers/net/hamradio/Kconfig
@@ -1,6 +1,6 @@
 config MKISS
 	tristate "Serial port KISS driver"
-	depends on AX25
+	depends on AX25 && TTY
 	select CRC16
 	---help---
 	  KISS is a protocol used for the exchange of data between a computer
@@ -18,7 +18,7 @@ config MKISS
 
 config 6PACK
 	tristate "Serial port 6PACK driver"
-	depends on AX25
+	depends on AX25 && TTY
 	---help---
 	  6pack is a transmission protocol for the data exchange between your
 	  PC and your TNC (the Terminal Node Controller acts as a kind of
diff --git a/drivers/net/irda/Kconfig b/drivers/net/irda/Kconfig
index 595205406d73..e1454cdec14b 100644
--- a/drivers/net/irda/Kconfig
+++ b/drivers/net/irda/Kconfig
@@ -5,7 +5,7 @@ comment "SIR device drivers"
 
 config IRTTY_SIR
 	tristate "IrTTY (uses Linux serial driver)"
-	depends on IRDA
+	depends on IRDA && TTY
 	help
 	  Say Y here if you want to build support for the IrTTY line
 	  discipline.  To compile it as a module, choose M here: the module
diff --git a/drivers/net/ppp/Kconfig b/drivers/net/ppp/Kconfig
index 872df3ef07a6..3d9ef4f1e600 100644
--- a/drivers/net/ppp/Kconfig
+++ b/drivers/net/ppp/Kconfig
@@ -147,6 +147,7 @@ config PPPOL2TP
 	  Support for PPP-over-L2TP socket family. L2TP is a protocol
 	  used by ISPs and enterprises to tunnel PPP traffic over UDP
 	  tunnels. L2TP is replacing PPTP for VPN uses.
+if TTY
 
 config PPP_ASYNC
 	tristate "PPP support for async serial ports"
@@ -172,4 +173,6 @@ config PPP_SYNC_TTY
 
 	  To compile this driver as a module, choose M here.
 
+endif # TTY
+
 endif # PPP
diff --git a/drivers/net/slip/Kconfig b/drivers/net/slip/Kconfig
index 211b160e4e9c..48e68714eef3 100644
--- a/drivers/net/slip/Kconfig
+++ b/drivers/net/slip/Kconfig
@@ -4,6 +4,7 @@
 
 config SLIP
 	tristate "SLIP (serial line) support"
+	depends on TTY
 	---help---
 	  Say Y if you intend to use SLIP or CSLIP (compressed SLIP) to
 	  connect to your Internet service provider or to connect to some
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index ef976215b649..bd33153261ce 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -445,7 +445,7 @@ config USB_NET_QMI_WWAN
 
 config USB_HSO
 	tristate "Option USB High Speed Mobile Devices"
-	depends on USB && RFKILL
+	depends on USB && RFKILL && TTY
 	default n
 	help
 	  Choose this option if you have an Option HSDPA/HSUPA card.
@@ -493,7 +493,7 @@ config USB_SIERRA_NET
 
 config USB_VL600
 	tristate "LG VL600 modem dongle"
-	depends on USB_NET_CDCETHER
+	depends on USB_NET_CDCETHER && TTY
 	select USB_ACM
 	help
 	  Select this if you want to use an LG Electronics 4G/LTE usb modem
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index d58431e99f73..61eb8994b34f 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -429,7 +429,7 @@ config LAPBETHER
 
 config X25_ASY
 	tristate "X.25 async driver (EXPERIMENTAL)"
-	depends on LAPB && X25
+	depends on LAPB && X25 && TTY
 	---help---
 	  Send and receive X.25 frames over regular asynchronous serial
 	  lines such as telephone lines equipped with ordinary modems.
diff --git a/drivers/pps/clients/Kconfig b/drivers/pps/clients/Kconfig
index 445197d4a8c4..6efd9b60d8ff 100644
--- a/drivers/pps/clients/Kconfig
+++ b/drivers/pps/clients/Kconfig
@@ -17,7 +17,7 @@ config PPS_CLIENT_KTIMER
 
 config PPS_CLIENT_LDISC
 	tristate "PPS line discipline"
-	depends on PPS
+	depends on PPS && TTY
 	help
 	  If you say yes here you get support for a PPS source connected
 	  with the CD (Carrier Detect) pin of your serial port.
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index 2c9a776bd63c..71bf959732fe 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -11,7 +11,7 @@ config TN3270
 config TN3270_TTY
 	def_tristate y
 	prompt "Support for tty input/output on 3270 terminals"
-	depends on TN3270
+	depends on TN3270 && TTY
 	help
 	  Include support for using an IBM 3270 terminal as a Linux tty.
 
@@ -33,7 +33,7 @@ config TN3270_CONSOLE
 config TN3215
 	def_bool y
 	prompt "Support for 3215 line mode terminal"
-	depends on CCW
+	depends on CCW && TTY
 	help
 	  Include support for IBM 3215 line-mode terminals.
 
@@ -51,7 +51,7 @@ config CCW_CONSOLE
 config SCLP_TTY
 	def_bool y
 	prompt "Support for SCLP line mode terminal"
-	depends on S390
+	depends on S390 && TTY
 	help
 	  Include support for IBM SCLP line-mode terminals.
 
@@ -66,7 +66,7 @@ config SCLP_CONSOLE
 config SCLP_VT220_TTY
 	def_bool y
 	prompt "Support for SCLP VT220-compatible terminal"
-	depends on S390
+	depends on S390 && TTY
 	help
 	  Include support for an IBM SCLP VT220-compatible terminal.
 
diff --git a/drivers/staging/ccg/Kconfig b/drivers/staging/ccg/Kconfig
index 8997a8c757aa..7ed5bc6caadb 100644
--- a/drivers/staging/ccg/Kconfig
+++ b/drivers/staging/ccg/Kconfig
@@ -2,7 +2,7 @@ if USB_GADGET
 
 config USB_G_CCG
 	tristate "Configurable Composite Gadget (STAGING)"
-	depends on STAGING && BLOCK && NET && !USB_ZERO && !USB_ZERO_HNPTEST && !USB_AUDIO && !GADGET_UAC1 && !USB_ETH && !USB_ETH_RNDIS && !USB_ETH_EEM && !USB_G_NCM && !USB_GADGETFS && !USB_FUNCTIONFS && !USB_FUNCTIONFS_ETH && !USB_FUNCTIONFS_RNDIS && !USB_FUNCTIONFS_GENERIC && !USB_FILE_STORAGE && !USB_FILE_STORAGE_TEST && !USB_MASS_STORAGE && !USB_G_SERIAL && !USB_MIDI_GADGET && !USB_G_PRINTER && !USB_CDC_COMPOSITE && !USB_G_NOKIA && !USB_G_ACM_MS && !USB_G_MULTI && !USB_G_MULTI_RNDIS && !USB_G_MULTI_CDC && !USB_G_HID && !USB_G_DBGP && !USB_G_WEBCAM
+	depends on STAGING && BLOCK && NET && !USB_ZERO && !USB_ZERO_HNPTEST && !USB_AUDIO && !GADGET_UAC1 && !USB_ETH && !USB_ETH_RNDIS && !USB_ETH_EEM && !USB_G_NCM && !USB_GADGETFS && !USB_FUNCTIONFS && !USB_FUNCTIONFS_ETH && !USB_FUNCTIONFS_RNDIS && !USB_FUNCTIONFS_GENERIC && !USB_FILE_STORAGE && !USB_FILE_STORAGE_TEST && !USB_MASS_STORAGE && !USB_G_SERIAL && !USB_MIDI_GADGET && !USB_G_PRINTER && !USB_CDC_COMPOSITE && !USB_G_NOKIA && !USB_G_ACM_MS && !USB_G_MULTI && !USB_G_MULTI_RNDIS && !USB_G_MULTI_CDC && !USB_G_HID && !USB_G_DBGP && !USB_G_WEBCAM && TTY
 	help
 	  The Configurable Composite Gadget supports multiple USB
 	  functions: acm, mass storage, rndis and FunctionFS.
diff --git a/drivers/staging/dgrp/Kconfig b/drivers/staging/dgrp/Kconfig
index 39f4bb65ec83..e4c41552923a 100644
--- a/drivers/staging/dgrp/Kconfig
+++ b/drivers/staging/dgrp/Kconfig
@@ -1,7 +1,7 @@
 config DGRP
        tristate "Digi Realport driver"
        default n
-       depends on SYSFS
+       depends on SYSFS && TTY
        ---help---
        Support for Digi Realport devices.  These devices allow you to
        access remote serial ports as if they are local tty devices.  This
diff --git a/drivers/staging/fwserial/Kconfig b/drivers/staging/fwserial/Kconfig
index 580406cb1808..9cdb3cdc4b66 100644
--- a/drivers/staging/fwserial/Kconfig
+++ b/drivers/staging/fwserial/Kconfig
@@ -1,6 +1,6 @@
 config FIREWIRE_SERIAL
        tristate "TTY over Firewire"
-       depends on FIREWIRE
+       depends on FIREWIRE && TTY
        help
           This enables TTY over IEEE 1394, providing high-speed serial
 	  connectivity to cabled peers.
diff --git a/drivers/staging/sb105x/Kconfig b/drivers/staging/sb105x/Kconfig
index 3d0d0eb95b8c..6a2e1b78e844 100644
--- a/drivers/staging/sb105x/Kconfig
+++ b/drivers/staging/sb105x/Kconfig
@@ -1,7 +1,7 @@
 config SB105X
 	tristate "SystemBase PCI Multiport UART"
 	select SERIAL_CORE
-	depends on PCI && BROKEN
+	depends on PCI && TTY && BROKEN
 	help
 	  A driver for the SystemBase Multi-2/PCI serial card
 
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 0ecf22b6a38e..a79fd8ebd56e 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -1,3 +1,14 @@
+config TTY
+	bool "Enable TTY" if EXPERT
+	default y
+	---help---
+	  Allows you to remove TTY support which can save space, and
+	  blocks features that require TTY from inclusion in the kernel.
+	  TTY is required for any text terminals or serial port
+	  communication. Most users should leave this enabled.
+
+if TTY
+
 config VT
 	bool "Virtual terminal" if EXPERT
 	depends on !S390 && !UML
@@ -388,3 +399,5 @@ config PPC_EARLY_DEBUG_EHV_BC_HANDLE
 	  If the number you specify is not a valid byte channel handle, then
 	  there simply will be no early console output.  This is true also
 	  if you don't boot under a hypervisor at all.
+
+endif # TTY
diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile
index 2953059530e4..df5663d0d55e 100644
--- a/drivers/tty/Makefile
+++ b/drivers/tty/Makefile
@@ -1,4 +1,4 @@
-obj-y				+= tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o \
+obj-$(CONFIG_TTY)		+= tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o \
 				   tty_buffer.o tty_port.o tty_mutex.o
 obj-$(CONFIG_LEGACY_PTYS)	+= pty.o
 obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
index f47b734c6a7a..8902f9b4df71 100644
--- a/drivers/tty/hvc/Kconfig
+++ b/drivers/tty/hvc/Kconfig
@@ -1,3 +1,5 @@
+if TTY
+
 config HVC_DRIVER
 	bool
 	help
@@ -119,3 +121,4 @@ config HVCS
 	  which will also be compiled when this driver is built as a
 	  module.
 
+endif # TTY
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 2dc429357fe3..e9aeccdfbe35 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -2,6 +2,8 @@
 # Serial device configuration
 #
 
+if TTY
+
 menu "Serial drivers"
 	depends on HAS_IOMEM
 
@@ -1483,3 +1485,5 @@ config SERIAL_RP2_NR_UARTS
 	  need to be increased.
 
 endmenu
+
+endif # TTY
diff --git a/drivers/usb/class/Kconfig b/drivers/usb/class/Kconfig
index 2519e320098f..316aac8e4ca1 100644
--- a/drivers/usb/class/Kconfig
+++ b/drivers/usb/class/Kconfig
@@ -6,7 +6,7 @@ comment "USB Device Class drivers"
 
 config USB_ACM
 	tristate "USB Modem (CDC ACM) support"
-	depends on USB
+	depends on USB && TTY
 	---help---
 	  This driver supports USB modems and ISDN adapters which support the
 	  Communication Device Class Abstract Control Model interface.
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 14625fd2cecd..a1bd951f9cb7 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -750,6 +750,7 @@ config USB_GADGET_TARGET
 
 config USB_G_SERIAL
 	tristate "Serial Gadget (with CDC ACM and CDC OBEX support)"
+	depends on TTY
 	select USB_LIBCOMPOSITE
 	help
 	  The Serial Gadget talks to the Linux-USB generic serial driver.
@@ -799,6 +800,8 @@ config USB_G_PRINTER
 	  For more information, see Documentation/usb/gadget_printer.txt
 	  which includes sample code for accessing the device file.
 
+if TTY
+
 config USB_CDC_COMPOSITE
 	tristate "CDC Composite Device (Ethernet and ACM)"
 	depends on NET
@@ -879,6 +882,8 @@ config USB_G_MULTI_CDC
 
 	  If unsure, say "y".
 
+endif # TTY
+
 config USB_G_HID
 	tristate "HID Gadget"
 	select USB_LIBCOMPOSITE
@@ -895,6 +900,7 @@ config USB_G_HID
 # Standalone / single function gadgets
 config USB_G_DBGP
 	tristate "EHCI Debug Device Gadget"
+	depends on TTY
 	select USB_LIBCOMPOSITE
 	help
 	  This gadget emulates an EHCI Debug device. This is useful when you want
diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
index 76f462241738..d8e35fe30b0c 100644
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig
@@ -4,7 +4,7 @@
 
 menuconfig USB_SERIAL
 	tristate "USB Serial Converter support"
-	depends on USB
+	depends on USB && TTY
 	---help---
 	  Say Y here if you have a USB device that provides normal serial
 	  ports, or acts like a serial device, and you want to connect it to
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 981b05601931..712f24db9600 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,8 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o
 
 proc-y       += inode.o root.o base.o generic.o array.o \
-		proc_tty.o fd.o
+		fd.o
+proc-$(CONFIG_TTY)      += proc_tty.o
 proc-y	+= cmdline.o
 proc-y	+= consoles.o
 proc-y	+= cpuinfo.o
diff --git a/include/linux/console.h b/include/linux/console.h
index dedb082fe50f..3b709da1786e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -157,7 +157,12 @@ extern int is_console_locked(void);
 extern int braille_register_console(struct console *, int index,
 		char *console_options, char *braille_options);
 extern int braille_unregister_console(struct console *);
+#ifdef CONFIG_TTY
 extern void console_sysfs_notify(void);
+#else
+static inline void console_sysfs_notify(void)
+{ }
+#endif
 extern bool console_suspend_enabled;
 
 /* Suspend and resume console messages over PM events */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 32676b35d2f5..3c22538aab66 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -127,7 +127,12 @@ extern void pid_ns_release_proc(struct pid_namespace *ns);
  * proc_tty.c
  */
 struct tty_driver;
+#ifdef CONFIG_TTY
 extern void proc_tty_init(void);
+#else
+static inline void proc_tty_init(void)
+{ }
+#endif
 extern void proc_tty_register_driver(struct tty_driver *driver);
 extern void proc_tty_unregister_driver(struct tty_driver *driver);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index f89acd1ed6d3..c75d886b0307 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -318,11 +318,43 @@ struct tty_file_private {
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
 
+#ifdef CONFIG_TTY
+extern void console_init(void);
+extern void tty_kref_put(struct tty_struct *tty);
+extern struct pid *tty_get_pgrp(struct tty_struct *tty);
+extern void tty_vhangup_self(void);
+extern void disassociate_ctty(int priv);
+extern dev_t tty_devnum(struct tty_struct *tty);
+extern void proc_clear_tty(struct task_struct *p);
+extern struct tty_struct *get_current_tty(void);
+/* tty_io.c */
+extern int __init tty_init(void);
+#else
+static inline void console_init(void)
+{ }
+static inline void tty_kref_put(struct tty_struct *tty)
+{ }
+static inline struct pid *tty_get_pgrp(struct tty_struct *tty)
+{ return NULL; }
+static inline void tty_vhangup_self(void)
+{ }
+static inline void disassociate_ctty(int priv)
+{ }
+static inline dev_t tty_devnum(struct tty_struct *tty)
+{ return 0; }
+static inline void proc_clear_tty(struct task_struct *p)
+{ }
+static inline struct tty_struct *get_current_tty(void)
+{ return NULL; }
+/* tty_io.c */
+static inline int __init tty_init(void)
+{ return 0; }
+#endif
+
 extern void tty_write_flush(struct tty_struct *);
 
 extern struct ktermios tty_std_termios;
 
-extern void console_init(void);
 extern int vcs_init(void);
 
 extern struct class *tty_class;
@@ -342,7 +374,6 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
 		kref_get(&tty->kref);
 	return tty;
 }
-extern void tty_kref_put(struct tty_struct *tty);
 
 extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
 			      const char *routine);
@@ -374,18 +405,15 @@ extern void tty_driver_remove_tty(struct tty_driver *driver,
 				  struct tty_struct *tty);
 extern void tty_free_termios(struct tty_struct *tty);
 extern int is_current_pgrp_orphaned(void);
-extern struct pid *tty_get_pgrp(struct tty_struct *tty);
 extern int is_ignored(int sig);
 extern int tty_signal(int sig, struct tty_struct *tty);
 extern void tty_hangup(struct tty_struct *tty);
 extern void tty_vhangup(struct tty_struct *tty);
 extern void tty_vhangup_locked(struct tty_struct *tty);
-extern void tty_vhangup_self(void);
 extern void tty_unhangup(struct file *filp);
 extern int tty_hung_up_p(struct file *filp);
 extern void do_SAK(struct tty_struct *tty);
 extern void __do_SAK(struct tty_struct *tty);
-extern void disassociate_ctty(int priv);
 extern void no_tty(void);
 extern void tty_flush_to_ldisc(struct tty_struct *tty);
 extern void tty_buffer_free_all(struct tty_port *port);
@@ -415,9 +443,6 @@ extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
 			unsigned int cmd, unsigned long arg);
 extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg);
-extern dev_t tty_devnum(struct tty_struct *tty);
-extern void proc_clear_tty(struct task_struct *p);
-extern struct tty_struct *get_current_tty(void);
 extern void tty_default_fops(struct file_operations *fops);
 extern struct tty_struct *alloc_tty_struct(void);
 extern int tty_alloc_file(struct file *file);
@@ -543,9 +568,6 @@ static inline int tty_audit_push_task(struct task_struct *tsk,
 }
 #endif
 
-/* tty_io.c */
-extern int __init tty_init(void);
-
 /* tty_ioctl.c */
 extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 		       unsigned int cmd, unsigned long arg);
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 43cb93fa2651..30894fab84d6 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -22,6 +22,7 @@ config KGDB_SERIAL_CONSOLE
 	tristate "KGDB: use kgdb over the serial console"
 	select CONSOLE_POLL
 	select MAGIC_SYSRQ
+	depends on TTY
 	default y
 	help
 	  Share a serial console with kgdb. Sysrq-g must be used
diff --git a/net/bluetooth/rfcomm/Kconfig b/net/bluetooth/rfcomm/Kconfig
index 22e718b554e4..18d352ea2bc7 100644
--- a/net/bluetooth/rfcomm/Kconfig
+++ b/net/bluetooth/rfcomm/Kconfig
@@ -12,6 +12,7 @@ config BT_RFCOMM
 config BT_RFCOMM_TTY
 	bool "RFCOMM TTY support"
 	depends on BT_RFCOMM
+	depends on TTY
 	help
 	  This option enables TTY emulation support for RFCOMM channels.
 
diff --git a/net/irda/ircomm/Kconfig b/net/irda/ircomm/Kconfig
index 2d4c6b4a78d6..19492c1707b7 100644
--- a/net/irda/ircomm/Kconfig
+++ b/net/irda/ircomm/Kconfig
@@ -1,6 +1,6 @@
 config IRCOMM
 	tristate "IrCOMM protocol"
-	depends on IRDA
+	depends on IRDA && TTY
 	help
 	  Say Y here if you want to build support for the IrCOMM protocol.
 	  To compile it as modules, choose M here: the modules will be
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 3a847828932a..298822c0ad65 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -34,7 +34,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_CS42L73 if I2C
 	select SND_SOC_CS4270 if I2C
 	select SND_SOC_CS4271 if SND_SOC_I2C_AND_SPI
-	select SND_SOC_CX20442
+	select SND_SOC_CX20442 if TTY
 	select SND_SOC_DA7210 if I2C
 	select SND_SOC_DA732X if I2C
 	select SND_SOC_DA9055 if I2C
@@ -236,6 +236,7 @@ config SND_SOC_CS4271
 
 config SND_SOC_CX20442
 	tristate
+	depends on TTY
 
 config SND_SOC_JZ4740_CODEC
 	select REGMAP_MMIO
-- 
cgit v1.2.3


From d79beb39922e41083e8bbbb3de084a6ca958e25f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 19 Jan 2013 14:29:31 +0100
Subject: ACPI / PM: Fix build for unusual combination of Kconfig options

CONFIG_PM_SLEEP may be set even if CONFIG_ACPI_SLEEP is unset,
although that is unusual.  For this reason, make the headers of
functions built for both CONFIG_ACPI and CONFIG_PM_SLEEP set
simultaneously depend on that combination of Kconfig options
instead of CONFIG_ACPI_SLEEP.

This fixes a build problem reported by Randy Dunlap.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 8c1d6f2a2193..5f2be833aaf8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -511,7 +511,7 @@ static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; }
 static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
 #endif
 
-#ifdef CONFIG_ACPI_SLEEP
+#if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP)
 int acpi_dev_suspend_late(struct device *dev);
 int acpi_dev_resume_early(struct device *dev);
 int acpi_subsys_prepare(struct device *dev);
-- 
cgit v1.2.3


From edea0d03ee5f0ae0051b6adb6681ebdf976b1ca4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 20 Jan 2013 20:25:47 +0100
Subject: ia64: kill thread_matches(), unexport ptrace_check_attach()

The ia64 function "thread_matches()" has no users since commit
e868a55c2a8c ("[IA64] remove find_thread_for_addr()").  Remove it.

This allows us to make ptrace_check_attach() static to kernel/ptrace.c,
which is good since we'll need to change the semantics of it and fix up
all the callers.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/ptrace.c | 27 ---------------------------
 include/linux/ptrace.h    |  1 -
 kernel/ptrace.c           |  2 +-
 3 files changed, 1 insertion(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 4265ff64219b..b7a5fffe0924 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -672,33 +672,6 @@ ptrace_attach_sync_user_rbs (struct task_struct *child)
 	read_unlock(&tasklist_lock);
 }
 
-static inline int
-thread_matches (struct task_struct *thread, unsigned long addr)
-{
-	unsigned long thread_rbs_end;
-	struct pt_regs *thread_regs;
-
-	if (ptrace_check_attach(thread, 0) < 0)
-		/*
-		 * If the thread is not in an attachable state, we'll
-		 * ignore it.  The net effect is that if ADDR happens
-		 * to overlap with the portion of the thread's
-		 * register backing store that is currently residing
-		 * on the thread's kernel stack, then ptrace() may end
-		 * up accessing a stale value.  But if the thread
-		 * isn't stopped, that's a problem anyhow, so we're
-		 * doing as well as we can...
-		 */
-		return 0;
-
-	thread_regs = task_pt_regs(thread);
-	thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL);
-	if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end))
-		return 0;
-
-	return 1;	/* looks like we've got a winner */
-}
-
 /*
  * Write f32-f127 back to task->thread.fph if it has been modified.
  */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 1693775ecfe8..89573a33ab3c 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -45,7 +45,6 @@ extern long arch_ptrace(struct task_struct *child, long request,
 extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern void ptrace_disable(struct task_struct *);
-extern int ptrace_check_attach(struct task_struct *task, bool ignore_state);
 extern int ptrace_request(struct task_struct *child, long request,
 			  unsigned long addr, unsigned long data);
 extern void ptrace_notify(int exit_code);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1599157336a6..612a56126851 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -139,7 +139,7 @@ void __ptrace_unlink(struct task_struct *child)
  * RETURNS:
  * 0 on success, -ESRCH if %child is not ready.
  */
-int ptrace_check_attach(struct task_struct *child, bool ignore_state)
+static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 {
 	int ret = -ESRCH;
 
-- 
cgit v1.2.3


From 373d4d099761cb1f637bed488ab3871945882273 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 21 Jan 2013 17:17:39 +1030
Subject: taint: add explicit flag to show whether lock dep is still OK.

Fix up all callers as they were before, with make one change: an
unsigned module taints the kernel, but doesn't turn off lockdep.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/alpha/kernel/traps.c            |  2 +-
 arch/arm/kernel/traps.c              |  2 +-
 arch/arm64/kernel/traps.c            |  2 +-
 arch/avr32/kernel/traps.c            |  2 +-
 arch/hexagon/kernel/traps.c          |  2 +-
 arch/ia64/kernel/traps.c             |  2 +-
 arch/m68k/kernel/traps.c             |  2 +-
 arch/mips/kernel/traps.c             |  2 +-
 arch/parisc/kernel/traps.c           |  2 +-
 arch/powerpc/kernel/traps.c          |  2 +-
 arch/s390/kernel/traps.c             |  2 +-
 arch/sh/kernel/traps.c               |  2 +-
 arch/sparc/kernel/setup_64.c         |  2 +-
 arch/sparc/kernel/traps_32.c         |  2 +-
 arch/sparc/kernel/traps_64.c         |  2 +-
 arch/unicore32/kernel/traps.c        |  2 +-
 arch/x86/kernel/cpu/amd.c            |  3 +--
 arch/x86/kernel/cpu/mcheck/mce.c     |  2 +-
 arch/x86/kernel/cpu/mcheck/p5.c      |  2 +-
 arch/x86/kernel/cpu/mcheck/winchip.c |  2 +-
 arch/x86/kernel/cpu/mtrr/generic.c   |  2 +-
 arch/x86/kernel/dumpstack.c          |  2 +-
 arch/xtensa/kernel/traps.c           |  2 +-
 drivers/acpi/custom_method.c         |  2 +-
 drivers/acpi/osl.c                   |  2 +-
 drivers/base/regmap/regmap-debugfs.c |  2 +-
 include/linux/kernel.h               |  6 +++++-
 kernel/module.c                      | 26 +++++++++++++++-----------
 kernel/panic.c                       | 34 ++++++++++++++--------------------
 kernel/sched/core.c                  |  2 +-
 kernel/sysctl.c                      |  2 +-
 lib/bug.c                            |  3 ++-
 mm/memory.c                          |  2 +-
 mm/page_alloc.c                      |  2 +-
 mm/slab.c                            |  2 +-
 mm/slub.c                            |  2 +-
 sound/soc/soc-core.c                 |  2 +-
 37 files changed, 69 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 272666d006df..4037461a6493 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -186,7 +186,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
 #endif
 	printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
 	dik_show_regs(regs, r9_15);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	dik_show_trace((unsigned long *)(regs+1));
 	dik_show_code((unsigned int *)regs->pc);
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index b0179b89a04c..1c089119b2d7 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -296,7 +296,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 
 	bust_spinlocks(0);
 	die_owner = -1;
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	die_nest_count--;
 	if (!die_nest_count)
 		/* Nest count reaches zero, release the lock. */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 3883f842434f..b3c5f628bdb4 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -242,7 +242,7 @@ void die(const char *str, struct pt_regs *regs, int err)
 		crash_kexec(regs);
 
 	bust_spinlocks(0);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	raw_spin_unlock_irq(&die_lock);
 	oops_exit();
 
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 3d760c06f024..682b2478691a 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -61,7 +61,7 @@ void die(const char *str, struct pt_regs *regs, long err)
 	show_regs_log_lvl(regs, KERN_EMERG);
 	show_stack_log_lvl(current, regs->sp, regs, KERN_EMERG);
 	bust_spinlocks(0);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	spin_unlock_irq(&die_lock);
 
 	if (in_interrupt())
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index a41eeb8eeaa1..be5e2dd9c9d3 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -225,7 +225,7 @@ int die(const char *str, struct pt_regs *regs, long err)
 	do_show_stack(current, &regs->r30, pt_elr(regs));
 
 	bust_spinlocks(0);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 
 	spin_unlock_irq(&die.lock);
 
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index bd42b76000d1..f7f9f9c6caf0 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -72,7 +72,7 @@ die (const char *str, struct pt_regs *regs, long err)
 
 	bust_spinlocks(0);
 	die.lock_owner = -1;
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	spin_unlock_irq(&die.lock);
 
 	if (!regs)
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index cbc624af4494..f32ab22e7ed3 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1176,7 +1176,7 @@ void die_if_kernel (char *str, struct pt_regs *fp, int nr)
 	console_verbose();
 	printk("%s: %08x\n",str,nr);
 	show_registers(fp);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	do_exit(SIGSEGV);
 }
 
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index cf7ac5483f53..9007966d56d4 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -396,7 +396,7 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 
 	printk("%s[#%d]:\n", str, ++die_counter);
 	show_registers(regs);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	raw_spin_unlock_irq(&die_lock);
 
 	oops_exit();
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 45ba99f5080b..aeb8f8f2c07a 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -282,7 +282,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
 
 	show_regs(regs);
 	dump_stack();
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 32518401af68..c579db859388 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -138,7 +138,7 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
 {
 	bust_spinlocks(0);
 	die_owner = -1;
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	die_nest_count--;
 	oops_exit();
 	printk("\n");
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 70ecfc5fe8f0..13dd63fba367 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -271,7 +271,7 @@ void die(struct pt_regs *regs, const char *str)
 	print_modules();
 	show_regs(regs);
 	bust_spinlocks(0);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	spin_unlock_irq(&die_lock);
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 72246bc06884..dfdad72c61ca 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -38,7 +38,7 @@ void die(const char *str, struct pt_regs *regs, long err)
 	notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV);
 
 	bust_spinlocks(0);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	spin_unlock_irq(&die_lock);
 	oops_exit();
 
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 0eaf0059aaef..88a127b9c69e 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -115,7 +115,7 @@ static void __init process_switch(char c)
 			break;
 		}
 		cheetah_pcache_forced_on = 1;
-		add_taint(TAINT_MACHINE_CHECK);
+		add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 		cheetah_enable_pcache();
 		break;
 
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index a5785ea2a85d..662982946a89 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -58,7 +58,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 
 	printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
 	show_regs(regs);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 
 	__SAVE; __SAVE; __SAVE; __SAVE;
 	__SAVE; __SAVE; __SAVE; __SAVE;
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index e7ecf1507d90..8d38ca97aa23 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2383,7 +2383,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 	notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
 	__asm__ __volatile__("flushw");
 	show_regs(regs);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	if (regs->tstate & TSTATE_PRIV) {
 		struct thread_info *tp = current_thread_info();
 		struct reg_window *rw = (struct reg_window *)
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 2054f0d4db13..0870b68d2ad9 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -231,7 +231,7 @@ void die(const char *str, struct pt_regs *regs, int err)
 	ret = __die(str, err, thread, regs);
 
 	bust_spinlocks(0);
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	spin_unlock_irq(&die_lock);
 	oops_exit();
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 15239fffd6fe..5853e57523e5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -220,8 +220,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
 	 */
 	WARN_ONCE(1, "WARNING: This combination of AMD"
 		" processors is not suitable for SMP.\n");
-	if (!test_taint(TAINT_UNSAFE_SMP))
-		add_taint(TAINT_UNSAFE_SMP);
+	add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE);
 
 valid_k7:
 	;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 80dbda84f1c3..6bc15edbc8cd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1085,7 +1085,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		/*
 		 * Set taint even when machine check was not enabled.
 		 */
-		add_taint(TAINT_MACHINE_CHECK);
+		add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
 		severity = mce_severity(&m, cfg->tolerant, NULL);
 
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2d5454cd2c4f..1c044b1ccc59 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -33,7 +33,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
 			smp_processor_id());
 	}
 
-	add_taint(TAINT_MACHINE_CHECK);
+	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 }
 
 /* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 2d7998fb628c..e9a701aecaa1 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -15,7 +15,7 @@
 static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
 	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
-	add_taint(TAINT_MACHINE_CHECK);
+	add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 }
 
 /* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e9fe907cd249..fa72a39e5d46 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -542,7 +542,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 
 		if (tmp != mask_lo) {
 			printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
-			add_taint(TAINT_FIRMWARE_WORKAROUND);
+			add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 			mask_lo = tmp;
 		}
 	}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ae42418bc50f..c8797d55b245 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -232,7 +232,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 
 	bust_spinlocks(0);
 	die_owner = -1;
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	die_nest_count--;
 	if (!die_nest_count)
 		/* Nest count reaches zero, release the lock. */
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 01e0111bf787..ded955d45155 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -524,7 +524,7 @@ void die(const char * str, struct pt_regs * regs, long err)
 	if (!user_mode(regs))
 		show_stack(NULL, (unsigned long*)regs->areg[1]);
 
-	add_taint(TAINT_DIE);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 	spin_unlock_irq(&die_lock);
 
 	if (in_interrupt())
diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c
index 5d42c2414ae5..cd9a68e2fea9 100644
--- a/drivers/acpi/custom_method.c
+++ b/drivers/acpi/custom_method.c
@@ -66,7 +66,7 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
 		buf = NULL;
 		if (ACPI_FAILURE(status))
 			return -EINVAL;
-		add_taint(TAINT_OVERRIDDEN_ACPI_TABLE);
+		add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
 	}
 
 	return count;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 3ff267861541..535e888bad78 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -661,7 +661,7 @@ static void acpi_table_taint(struct acpi_table_header *table)
 	pr_warn(PREFIX
 		"Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n",
 		table->signature, table->oem_table_id);
-	add_taint(TAINT_OVERRIDDEN_ACPI_TABLE);
+	add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 46a213a596e2..f63c830083ec 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -267,7 +267,7 @@ static ssize_t regmap_map_write_file(struct file *file,
 		return -EINVAL;
 
 	/* Userspace has been fiddling around behind the kernel's back */
-	add_taint(TAINT_USER);
+	add_taint(TAINT_USER, LOCKDEP_NOW_UNRELIABLE);
 
 	regmap_write(map, reg, value);
 	return buf_size;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index c566927efcbd..80d36874689b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -398,7 +398,11 @@ extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
 extern int sysctl_panic_on_stackoverflow;
 extern const char *print_tainted(void);
-extern void add_taint(unsigned flag);
+enum lockdep_ok {
+	LOCKDEP_STILL_OK,
+	LOCKDEP_NOW_UNRELIABLE
+};
+extern void add_taint(unsigned flag, enum lockdep_ok);
 extern int test_taint(unsigned flag);
 extern unsigned long get_taint(void);
 extern int root_mountflags;
diff --git a/kernel/module.c b/kernel/module.c
index e69a5a68766f..cc000dd6e4a8 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -197,9 +197,10 @@ static inline int strong_try_module_get(struct module *mod)
 		return -ENOENT;
 }
 
-static inline void add_taint_module(struct module *mod, unsigned flag)
+static inline void add_taint_module(struct module *mod, unsigned flag,
+				    enum lockdep_ok lockdep_ok)
 {
-	add_taint(flag);
+	add_taint(flag, lockdep_ok);
 	mod->taints |= (1U << flag);
 }
 
@@ -727,7 +728,7 @@ static inline int try_force_unload(unsigned int flags)
 {
 	int ret = (flags & O_TRUNC);
 	if (ret)
-		add_taint(TAINT_FORCED_RMMOD);
+		add_taint(TAINT_FORCED_RMMOD, LOCKDEP_NOW_UNRELIABLE);
 	return ret;
 }
 #else
@@ -1138,7 +1139,7 @@ static int try_to_force_load(struct module *mod, const char *reason)
 	if (!test_taint(TAINT_FORCED_MODULE))
 		printk(KERN_WARNING "%s: %s: kernel tainted.\n",
 		       mod->name, reason);
-	add_taint_module(mod, TAINT_FORCED_MODULE);
+	add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_NOW_UNRELIABLE);
 	return 0;
 #else
 	return -ENOEXEC;
@@ -2147,7 +2148,8 @@ static void set_license(struct module *mod, const char *license)
 		if (!test_taint(TAINT_PROPRIETARY_MODULE))
 			printk(KERN_WARNING "%s: module license '%s' taints "
 				"kernel.\n", mod->name, license);
-		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+		add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
+				 LOCKDEP_NOW_UNRELIABLE);
 	}
 }
 
@@ -2700,10 +2702,10 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
 	}
 
 	if (!get_modinfo(info, "intree"))
-		add_taint_module(mod, TAINT_OOT_MODULE);
+		add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
 
 	if (get_modinfo(info, "staging")) {
-		add_taint_module(mod, TAINT_CRAP);
+		add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
 		printk(KERN_WARNING "%s: module is from the staging directory,"
 		       " the quality is unknown, you have been warned.\n",
 		       mod->name);
@@ -2869,15 +2871,17 @@ static int check_module_license_and_versions(struct module *mod)
 	 * using GPL-only symbols it needs.
 	 */
 	if (strcmp(mod->name, "ndiswrapper") == 0)
-		add_taint(TAINT_PROPRIETARY_MODULE);
+		add_taint(TAINT_PROPRIETARY_MODULE, LOCKDEP_NOW_UNRELIABLE);
 
 	/* driverloader was caught wrongly pretending to be under GPL */
 	if (strcmp(mod->name, "driverloader") == 0)
-		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+		add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
+				 LOCKDEP_NOW_UNRELIABLE);
 
 	/* lve claims to be GPL but upstream won't provide source */
 	if (strcmp(mod->name, "lve") == 0)
-		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+		add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
+				 LOCKDEP_NOW_UNRELIABLE);
 
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs)
@@ -3197,7 +3201,7 @@ again:
 			    "%s: module verification failed: signature and/or"
 			    " required key missing - tainting kernel\n",
 			    mod->name);
-		add_taint_module(mod, TAINT_FORCED_MODULE);
+		add_taint_module(mod, TAINT_FORCED_MODULE, LOCKDEP_STILL_OK);
 	}
 #endif
 
diff --git a/kernel/panic.c b/kernel/panic.c
index e1b2822fff97..7c57cc9eee2c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -259,26 +259,19 @@ unsigned long get_taint(void)
 	return tainted_mask;
 }
 
-void add_taint(unsigned flag)
+/**
+ * add_taint: add a taint flag if not already set.
+ * @flag: one of the TAINT_* constants.
+ * @lockdep_ok: whether lock debugging is still OK.
+ *
+ * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for
+ * some notewortht-but-not-corrupting cases, it can be set to true.
+ */
+void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
 {
-	/*
-	 * Can't trust the integrity of the kernel anymore.
-	 * We don't call directly debug_locks_off() because the issue
-	 * is not necessarily serious enough to set oops_in_progress to 1
-	 * Also we want to keep up lockdep for staging/out-of-tree
-	 * development and post-warning case.
-	 */
-	switch (flag) {
-	case TAINT_CRAP:
-	case TAINT_OOT_MODULE:
-	case TAINT_WARN:
-	case TAINT_FIRMWARE_WORKAROUND:
-		break;
-
-	default:
-		if (__debug_locks_off())
-			printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n");
-	}
+	if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off())
+		printk(KERN_WARNING
+		       "Disabling lock debugging due to kernel taint\n");
 
 	set_bit(flag, &tainted_mask);
 }
@@ -421,7 +414,8 @@ static void warn_slowpath_common(const char *file, int line, void *caller,
 	print_modules();
 	dump_stack();
 	print_oops_end_marker();
-	add_taint(taint);
+	/* Just a warning, don't kill lockdep. */
+	add_taint(taint, LOCKDEP_STILL_OK);
 }
 
 void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..662f3d512183 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2785,7 +2785,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
 	if (irqs_disabled())
 		print_irqtrace_events(prev);
 	dump_stack();
-	add_taint(TAINT_WARN);
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c88878db491e..f97f9d75cde8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2006,7 +2006,7 @@ static int proc_taint(struct ctl_table *table, int write,
 		int i;
 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
 			if ((tmptaint >> i) & 1)
-				add_taint(i);
+				add_taint(i, LOCKDEP_STILL_OK);
 		}
 	}
 
diff --git a/lib/bug.c b/lib/bug.c
index d0cdf14c651a..168603477f02 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -166,7 +166,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 		print_modules();
 		show_regs(regs);
 		print_oops_end_marker();
-		add_taint(BUG_GET_TAINT(bug));
+		/* Just a warning, don't kill lockdep. */
+		add_taint(BUG_GET_TAINT(bug), LOCKDEP_STILL_OK);
 		return BUG_TRAP_TYPE_WARN;
 	}
 
diff --git a/mm/memory.c b/mm/memory.c
index bb1369f7b9b4..bc8bec762db7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -716,7 +716,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 		print_symbol(KERN_ALERT "vma->vm_file->f_op->mmap: %s\n",
 				(unsigned long)vma->vm_file->f_op->mmap);
 	dump_stack();
-	add_taint(TAINT_BAD_PAGE);
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 static inline bool is_cow_mapping(vm_flags_t flags)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index df2022ff0c8a..4c99cb7e276a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -320,7 +320,7 @@ static void bad_page(struct page *page)
 out:
 	/* Leave bad fields for debug, except PageBuddy could make trouble */
 	reset_page_mapcount(page); /* remove PageBuddy */
-	add_taint(TAINT_BAD_PAGE);
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 /*
diff --git a/mm/slab.c b/mm/slab.c
index e7667a3584bc..856e4a192d25 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -812,7 +812,7 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
 	printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
 	       function, cachep->name, msg);
 	dump_stack();
-	add_taint(TAINT_BAD_PAGE);
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 #endif
 
diff --git a/mm/slub.c b/mm/slub.c
index ba2ca53f6c3a..7ec3041bdd0d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -562,7 +562,7 @@ static void slab_bug(struct kmem_cache *s, char *fmt, ...)
 	printk(KERN_ERR "----------------------------------------"
 			"-------------------------------------\n\n");
 
-	add_taint(TAINT_BAD_PAGE);
+	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 }
 
 static void slab_fix(struct kmem_cache *s, char *fmt, ...)
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 2370063b5824..4c6526c0db03 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -251,7 +251,7 @@ static ssize_t codec_reg_write_file(struct file *file,
 		return -EINVAL;
 
 	/* Userspace has been fiddling around behind the kernel's back */
-	add_taint(TAINT_USER);
+	add_taint(TAINT_USER, LOCKDEP_NOW_UNRELIABLE);
 
 	snd_soc_write(codec, reg, value);
 	return buf_size;
-- 
cgit v1.2.3


From 93843b3764170819aeb07fc867a0499b33a875ca Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 21 Jan 2013 17:17:39 +1030
Subject: module: constify within_module_*

These helper functions just check a set intersection with a range, and
don't actually modify struct module.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 1375ee3f03aa..ead1b5719a12 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -396,13 +396,13 @@ bool is_module_address(unsigned long addr);
 bool is_module_percpu_address(unsigned long addr);
 bool is_module_text_address(unsigned long addr);
 
-static inline int within_module_core(unsigned long addr, struct module *mod)
+static inline int within_module_core(unsigned long addr, const struct module *mod)
 {
 	return (unsigned long)mod->module_core <= addr &&
 	       addr < (unsigned long)mod->module_core + mod->core_size;
 }
 
-static inline int within_module_init(unsigned long addr, struct module *mod)
+static inline int within_module_init(unsigned long addr, const struct module *mod)
 {
 	return (unsigned long)mod->module_init <= addr &&
 	       addr < (unsigned long)mod->module_init + mod->init_size;
-- 
cgit v1.2.3


From c94aa30edac4d328674e9c127918317009d30c1a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 17 Jan 2013 16:35:14 +0900
Subject: ASoC: arizona: Allow number of channels clocked to be restricted

Place a cap on the number of channels clocks are generated for. This is
intended for use with systems which have the WM5102 master an I2S bus with
multiple data lines.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/arizona/pdata.h |  9 +++++++++
 sound/soc/codecs/arizona.c        | 14 ++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 8b1d1daaae16..ec3e2a2a6d77 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -62,6 +62,8 @@
 
 #define ARIZONA_MAX_OUTPUT 6
 
+#define ARIZONA_MAX_AIF 3
+
 #define ARIZONA_HAP_ACT_ERM 0
 #define ARIZONA_HAP_ACT_LRA 2
 
@@ -96,6 +98,13 @@ struct arizona_pdata {
 	/** Pin state for GPIO pins */
 	int gpio_defaults[ARIZONA_MAX_GPIO];
 
+	/**
+	 * Maximum number of channels clocks will be generated for,
+	 * useful for systems where and I2S bus with multiple data
+	 * lines is mastered.
+	 */
+	int max_channels_clocked[ARIZONA_MAX_AIF];
+
 	/** GPIO for mic detection polarity */
 	int micd_pol_gpio;
 
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 845d25630ba2..d855a6c098d4 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -762,18 +762,28 @@ static int arizona_hw_params(struct snd_pcm_substream *substream,
 			     struct snd_soc_dai *dai)
 {
 	struct snd_soc_codec *codec = dai->codec;
+	struct arizona_priv *priv = snd_soc_codec_get_drvdata(codec);
+	struct arizona *arizona = priv->arizona;
 	int base = dai->driver->base;
 	const int *rates;
 	int i, ret;
-	int bclk, lrclk, wl, frame;
+	int chan_limit = arizona->pdata.max_channels_clocked[dai->id - 1];
+	int bclk, lrclk, wl, frame, bclk_target;
 
 	if (params_rate(params) % 8000)
 		rates = &arizona_44k1_bclk_rates[0];
 	else
 		rates = &arizona_48k_bclk_rates[0];
 
+	bclk_target = snd_soc_params_to_bclk(params);
+	if (chan_limit && chan_limit < params_channels(params)) {
+		arizona_aif_dbg(dai, "Limiting to %d channels\n", chan_limit);
+		bclk_target /= params_channels(params);
+		bclk_target *= chan_limit;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(arizona_44k1_bclk_rates); i++) {
-		if (rates[i] >= snd_soc_params_to_bclk(params) &&
+		if (rates[i] >= bclk_target &&
 		    rates[i] % params_rate(params) == 0) {
 			bclk = i;
 			break;
-- 
cgit v1.2.3


From d6e99abb424a916ecbb127dba065a379b460a062 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Fri, 18 Jan 2013 15:31:06 +0800
Subject: pinctrl: core: get devname from pinctrl_dev

Add new function to get devname from pinctrl_dev. pinctrl_dev_get_name()
can only get pinctrl description name. If we want to use gpio driver to
find pinctrl device node, we need to fetch the pinctrl device name.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c          | 6 ++++++
 include/linux/pinctrl/pinctrl.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 361175372598..e6373f30aeeb 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -83,6 +83,12 @@ const char *pinctrl_dev_get_name(struct pinctrl_dev *pctldev)
 }
 EXPORT_SYMBOL_GPL(pinctrl_dev_get_name);
 
+const char *pinctrl_dev_get_devname(struct pinctrl_dev *pctldev)
+{
+	return dev_name(pctldev->dev);
+}
+EXPORT_SYMBOL_GPL(pinctrl_dev_get_devname);
+
 void *pinctrl_dev_get_drvdata(struct pinctrl_dev *pctldev)
 {
 	return pctldev->driver_data;
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 04d6700d99af..778804df293f 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -154,6 +154,7 @@ struct pinctrl_dev *of_pinctrl_get(struct device_node *np)
 #endif /* CONFIG_OF */
 
 extern const char *pinctrl_dev_get_name(struct pinctrl_dev *pctldev);
+extern const char *pinctrl_dev_get_devname(struct pinctrl_dev *pctldev);
 extern void *pinctrl_dev_get_drvdata(struct pinctrl_dev *pctldev);
 #else
 
-- 
cgit v1.2.3


From 684697cbbcd076b8fde78d8863e341700533b542 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Fri, 18 Jan 2013 15:31:15 +0800
Subject: pinctrl: generic: add slew rate config parameter

Add PIN_CONFIG_SLEW_RATE parameter into pinconf-generic driver.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf-generic.c       | 1 +
 include/linux/pinctrl/pinconf-generic.h | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index bcf8157ceea7..e5948f8172af 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -45,6 +45,7 @@ struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL),
 	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "time units"),
 	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector"),
+	PCONFDUMP(PIN_CONFIG_SLEW_RATE, "slew rate", NULL),
 	PCONFDUMP(PIN_CONFIG_LOW_POWER_MODE, "pin low power", "mode"),
 	PCONFDUMP(PIN_CONFIG_OUTPUT, "pin output", "level"),
 };
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 40d7bb9c7562..3e7909aa5c03 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -60,6 +60,9 @@
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
  *	supplies, the argument to this parameter (on a custom format) tells
  *	the driver which alternative power source to use.
+ * @PIN_CONFIG_SLEW_RATE: if the pin can select slew rate, the argument to
+ * 	this parameter (on a custom format) tells the driver which alternative
+ * 	slew rate to use.
  * @PIN_CONFIG_LOW_POWER_MODE: this will configure the pin for low power
  *	operation, if several modes of operation are supported these can be
  *	passed in the argument on a custom form, else just use argument 1
@@ -83,6 +86,7 @@ enum pin_config_param {
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_DEBOUNCE,
 	PIN_CONFIG_POWER_SOURCE,
+	PIN_CONFIG_SLEW_RATE,
 	PIN_CONFIG_LOW_POWER_MODE,
 	PIN_CONFIG_OUTPUT,
 	PIN_CONFIG_END = 0x7FFF,
-- 
cgit v1.2.3


From 418c59e49ddc77fcb7054f2c8d52c9d47403b43e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 14 Dec 2012 13:15:08 -0500
Subject: tracing: Fix sparse warning with is_signed_type() macro

Sparse complains when is_signed_type() is used on a pointer.
This macro is needed for the format output used for ftrace
and perf, to know if a binary field is a signed type or not.
The is_signed_type() macro is used against all fields that are
recorded by events to automate the operation.

The problem sparse has is with the current way is_signed_type()
works:

  ((type)-1 < 0)

If "type" is a poiner, than sparse does not like it being compared
to an integer (zero). The simple fix is to just give zero the
same type. The runtime result stays the same.

Reported-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index a3d489531d83..43ef8b6cce7f 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -272,7 +272,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type,
 extern int trace_add_event_call(struct ftrace_event_call *call);
 extern void trace_remove_event_call(struct ftrace_event_call *call);
 
-#define is_signed_type(type)	(((type)(-1)) < 0)
+#define is_signed_type(type)	(((type)(-1)) < (type)0)
 
 int trace_set_clr_event(const char *system, const char *event, int set);
 
-- 
cgit v1.2.3


From 0f1ac8fd254b6c3e77950a1c4ee67be5dc88f7e0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 15 Jan 2013 22:11:19 -0500
Subject: tracing/lockdep: Disable lockdep first in entering NMI

When function tracing with either debug locks enabled or tracing
preempt disabled, the add_preempt_count() is traced. This is an
issue with lockdep and function tracing. As function tracing
can disable interrupts, and lockdep records that change,
lockdep may not be able to handle this recursion if it happens from
an NMI context.

The first thing that an NMI does is:

 #define nmi_enter()					\
	do {							\
		ftrace_nmi_enter();				\
		BUG_ON(in_nmi());				\
		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
		lockdep_off();					\
		rcu_nmi_enter();				\
		trace_hardirq_enter();				\
	} while (0)

When the add_preempt_count() is traced, and the tracing callback
disables interrupts, it will jump into the lockdep code. There's
some places in lockdep that can't handle this re-entrance, and
causes lockdep to fail.

As the lockdep_off() (and lockdep_on) is a simple:

void lockdep_off(void)
{
	current->lockdep_recursion++;
}

and is never traced, it can be called first in nmi_enter()
and lockdep_on() last in nmi_exit().

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/hardirq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 624ef3f45c8e..57bfdce8fb90 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -180,10 +180,10 @@ extern void irq_exit(void);
 
 #define nmi_enter()						\
 	do {							\
+		lockdep_off();					\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
 		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
-		lockdep_off();					\
 		rcu_nmi_enter();				\
 		trace_hardirq_enter();				\
 	} while (0)
@@ -192,10 +192,10 @@ extern void irq_exit(void);
 	do {							\
 		trace_hardirq_exit();				\
 		rcu_nmi_exit();					\
-		lockdep_on();					\
 		BUG_ON(!in_nmi());				\
 		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
+		lockdep_on();					\
 	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */
-- 
cgit v1.2.3


From 06aeaaeabf69da4a3e86df532425640f51b01cef Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 28 Sep 2012 17:15:17 +0900
Subject: ftrace: Move ARCH_SUPPORTS_FTRACE_SAVE_REGS in Kconfig

Move SAVE_REGS support flag into Kconfig and rename
it to CONFIG_DYNAMIC_FTRACE_WITH_REGS. This also introduces
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS which indicates
the architecture depending part of ftrace has a code
that saves full registers.
On the other hand, CONFIG_DYNAMIC_FTRACE_WITH_REGS indicates
the code is enabled.

Link: http://lkml.kernel.org/r/20120928081516.3560.72534.stgit@ltc138.sdl.hitachi.co.jp

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/Kconfig              | 1 +
 arch/x86/include/asm/ftrace.h | 1 -
 include/linux/ftrace.h        | 6 +++---
 kernel/trace/Kconfig          | 8 ++++++++
 kernel/trace/ftrace.c         | 6 +++---
 kernel/trace/trace_selftest.c | 2 +-
 6 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 79795af59810..996ccecc694c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -44,6 +44,7 @@ config X86
 	select HAVE_FENTRY if X86_64
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_GRAPH_FP_TEST
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b522d377..86cb51e1ca96 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
-#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 92691d85c320..e5ca8ef50e9b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  * SAVE_REGS - The ftrace_ops wants regs saved at each function called
  *            and passed to the callback. If this flag is set, but the
  *            architecture does not support passing regs
- *            (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
+ *            (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the
  *            ftrace_ops will fail to register, unless the next flag
  *            is set.
  * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
@@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command);
 #endif
 
 #ifndef FTRACE_REGS_ADDR
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
 #else
 # define FTRACE_REGS_ADDR FTRACE_ADDR
@@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod,
  */
 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
 
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 /**
  * ftrace_modify_call - convert from one addr to another (no nop)
  * @rec: the mcount call site record
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5d89335a485f..cdc9d284d24e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE
 	help
 	  See Documentation/trace/ftrace-design.txt
 
+config HAVE_DYNAMIC_FTRACE_WITH_REGS
+	bool
+
 config HAVE_FTRACE_MCOUNT_RECORD
 	bool
 	help
@@ -434,6 +437,11 @@ config DYNAMIC_FTRACE
 	  were made. If so, it runs stop_machine (stops all CPUS)
 	  and modifies the code to jump over the call to ftrace.
 
+config DYNAMIC_FTRACE_WITH_REGS
+	def_bool y
+	depends on DYNAMIC_FTRACE
+	depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
+
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
 	depends on FUNCTION_TRACER
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 41473b4ad7a4..6e34dc162fe1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -337,7 +337,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
 		return -EINVAL;
 
-#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 	/*
 	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
 	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
@@ -4143,8 +4143,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
  * Archs are to support both the regs and ftrace_ops at the same time.
  * If they support ftrace_ops, it is assumed they support regs.
  * If call backs want to use regs, they must either check for regs
- * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS.
- * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved.
+ * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS.
+ * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
  * An architecture can pass partial regs with ftrace_ops and still
  * set the ARCH_SUPPORT_FTARCE_OPS.
  */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 47623169a815..6c62d58d8e87 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -568,7 +568,7 @@ trace_selftest_function_regs(void)
 	int ret;
 	int supported = 0;
 
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 	supported = 1;
 #endif
 
-- 
cgit v1.2.3


From e7dbfe349d12eabb7783b117e0c115f6f3d9ef9e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 28 Sep 2012 17:15:20 +0900
Subject: kprobes/x86: Move ftrace-based kprobe code into kprobes-ftrace.c

Split ftrace-based kprobes code from kprobes, and introduce
CONFIG_(HAVE_)KPROBES_ON_FTRACE Kconfig flags.
For the cleanup reason, this also moves kprobe_ftrace check
into skip_singlestep.

Link: http://lkml.kernel.org/r/20120928081520.3560.25624.stgit@ltc138.sdl.hitachi.co.jp

Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/Kconfig                     | 12 ++++++
 arch/x86/Kconfig                 |  1 +
 arch/x86/kernel/Makefile         |  1 +
 arch/x86/kernel/kprobes-common.h | 11 +++++
 arch/x86/kernel/kprobes-ftrace.c | 93 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/kprobes.c        | 70 +-----------------------------
 include/linux/kprobes.h          | 12 +-----
 kernel/kprobes.c                 |  8 ++--
 8 files changed, 125 insertions(+), 83 deletions(-)
 create mode 100644 arch/x86/kernel/kprobes-ftrace.c

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 7f8f281f2585..97fb7d0365d1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -76,6 +76,15 @@ config OPTPROBES
 	depends on KPROBES && HAVE_OPTPROBES
 	depends on !PREEMPT
 
+config KPROBES_ON_FTRACE
+	def_bool y
+	depends on KPROBES && HAVE_KPROBES_ON_FTRACE
+	depends on DYNAMIC_FTRACE_WITH_REGS
+	help
+	 If function tracer is enabled and the arch supports full
+	 passing of pt_regs to function tracing, then kprobes can
+	 optimize on top of function tracing.
+
 config UPROBES
 	bool "Transparent user-space probes (EXPERIMENTAL)"
 	depends on UPROBE_EVENT && PERF_EVENTS
@@ -158,6 +167,9 @@ config HAVE_KRETPROBES
 config HAVE_OPTPROBES
 	bool
 
+config HAVE_KPROBES_ON_FTRACE
+	bool
+
 config HAVE_NMI_WATCHDOG
 	bool
 #
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 996ccecc694c..be8b2b3ab979 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -40,6 +40,7 @@ config X86
 	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
 	select HAVE_KRETPROBES
 	select HAVE_OPTPROBES
+	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FENTRY if X86_64
 	select HAVE_C_RECORDMCOUNT
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34e923a53762..cc5d31f8830c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_OPTPROBES)		+= kprobes-opt.o
+obj-$(CONFIG_KPROBES_ON_FTRACE)	+= kprobes-ftrace.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
index 3230b68ef29a..2e9d4b5af036 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes-common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
 	return addr;
 }
 #endif
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+			   struct kprobe_ctlblk *kcb);
+#else
+static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				  struct kprobe_ctlblk *kcb)
+{
+	return 0;
+}
+#endif
 #endif
diff --git a/arch/x86/kernel/kprobes-ftrace.c b/arch/x86/kernel/kprobes-ftrace.c
new file mode 100644
index 000000000000..70a81c7aa0a7
--- /dev/null
+++ b/arch/x86/kernel/kprobes-ftrace.c
@@ -0,0 +1,93 @@
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+#include "kprobes-common.h"
+
+static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+			     struct kprobe_ctlblk *kcb)
+{
+	/*
+	 * Emulate singlestep (and also recover regs->ip)
+	 * as if there is a 5byte nop
+	 */
+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+	if (unlikely(p->post_handler)) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	__this_cpu_write(current_kprobe, NULL);
+	return 1;
+}
+
+int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+			      struct kprobe_ctlblk *kcb)
+{
+	if (kprobe_ftrace(p))
+		return __skip_singlestep(p, regs, kcb);
+	else
+		return 0;
+}
+
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+				     struct ftrace_ops *ops, struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+	unsigned long flags;
+
+	/* Disable irq for emulating a breakpoint and avoiding preempt */
+	local_irq_save(flags);
+
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (unlikely(!p) || kprobe_disabled(p))
+		goto end;
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+		regs->ip = ip + sizeof(kprobe_opcode_t);
+
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (!p->pre_handler || !p->pre_handler(p, regs))
+			__skip_singlestep(p, regs, kcb);
+		/*
+		 * If pre_handler returns !0, it sets regs->ip and
+		 * resets current kprobe.
+		 */
+	}
+end:
+	local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	p->ainsn.boostable = -1;
+	return 0;
+}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 57916c0d3cf6..18114bfb10f3 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 	return 1;
 }
 
-#ifdef KPROBES_CAN_USE_FTRACE
-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-				      struct kprobe_ctlblk *kcb)
-{
-	/*
-	 * Emulate singlestep (and also recover regs->ip)
-	 * as if there is a 5byte nop
-	 */
-	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
-	if (unlikely(p->post_handler)) {
-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
-		p->post_handler(p, regs, 0);
-	}
-	__this_cpu_write(current_kprobe, NULL);
-}
-#endif
-
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	} else if (kprobe_running()) {
 		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
-#ifdef KPROBES_CAN_USE_FTRACE
-			if (kprobe_ftrace(p)) {
-				skip_singlestep(p, regs, kcb);
-				return 1;
-			}
-#endif
-			setup_singlestep(p, regs, kcb, 0);
+			if (!skip_singlestep(p, regs, kcb))
+				setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
 	} /* else: not a kprobe fault; let the kernel handle it */
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
-#ifdef KPROBES_CAN_USE_FTRACE
-/* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
-				     struct ftrace_ops *ops, struct pt_regs *regs)
-{
-	struct kprobe *p;
-	struct kprobe_ctlblk *kcb;
-	unsigned long flags;
-
-	/* Disable irq for emulating a breakpoint and avoiding preempt */
-	local_irq_save(flags);
-
-	p = get_kprobe((kprobe_opcode_t *)ip);
-	if (unlikely(!p) || kprobe_disabled(p))
-		goto end;
-
-	kcb = get_kprobe_ctlblk();
-	if (kprobe_running()) {
-		kprobes_inc_nmissed_count(p);
-	} else {
-		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
-		regs->ip = ip + sizeof(kprobe_opcode_t);
-
-		__this_cpu_write(current_kprobe, p);
-		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-		if (!p->pre_handler || !p->pre_handler(p, regs))
-			skip_singlestep(p, regs, kcb);
-		/*
-		 * If pre_handler returns !0, it sets regs->ip and
-		 * resets current kprobe.
-		 */
-	}
-end:
-	local_irq_restore(flags);
-}
-
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
-	p->ainsn.insn = NULL;
-	p->ainsn.boostable = -1;
-	return 0;
-}
-#endif
-
 int __init arch_init_kprobes(void)
 {
 	return arch_init_optprobes();
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 23755ba42abc..4b6ef4d33cc2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -49,16 +49,6 @@
 #define KPROBE_REENTER		0x00000004
 #define KPROBE_HIT_SSDONE	0x00000008
 
-/*
- * If function tracer is enabled and the arch supports full
- * passing of pt_regs to function tracing, then kprobes can
- * optimize on top of function tracing.
- */
-#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \
-	&& defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE)
-# define KPROBES_CAN_USE_FTRACE
-#endif
-
 /* Attach to insert probes on any functions which should be ignored*/
 #define __kprobes	__attribute__((__section__(".kprobes.text")))
 
@@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 #endif
 
 #endif /* CONFIG_OPTPROBES */
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				  struct ftrace_ops *ops, struct pt_regs *regs);
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 098f396aa409..f423c3ef4a82 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 }
 #endif /* CONFIG_OPTPROBES */
 
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
 	.func = kprobe_ftrace_handler,
 	.flags = FTRACE_OPS_FL_SAVE_REGS,
@@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
 			   (unsigned long)p->addr, 1, 0);
 	WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
 }
-#else	/* !KPROBES_CAN_USE_FTRACE */
+#else	/* !CONFIG_KPROBES_ON_FTRACE */
 #define prepare_kprobe(p)	arch_prepare_kprobe(p)
 #define arm_kprobe_ftrace(p)	do {} while (0)
 #define disarm_kprobe_ftrace(p)	do {} while (0)
@@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 	 */
 	ftrace_addr = ftrace_location((unsigned long)p->addr);
 	if (ftrace_addr) {
-#ifdef KPROBES_CAN_USE_FTRACE
+#ifdef CONFIG_KPROBES_ON_FTRACE
 		/* Given address is not on the instruction boundary */
 		if ((unsigned long)p->addr != ftrace_addr)
 			return -EILSEQ;
 		p->flags |= KPROBE_FLAG_FTRACE;
-#else	/* !KPROBES_CAN_USE_FTRACE */
+#else	/* !CONFIG_KPROBES_ON_FTRACE */
 		return -EINVAL;
 #endif
 	}
-- 
cgit v1.2.3


From de53c25447117eae6b3f8952f663f08a09e0dbb7 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 23 Dec 2012 21:10:00 +0100
Subject: usb: gadget: add some infracture to register/unregister functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch provides an infrastructure to register & unregister a USB
function. This allows to turn a function into a module and avoid the
'#include "f_.*.c"' magic and we get a clear API / cut between the bare
gadget and its functions.
The concept is simple:
Each function defines the DECLARE_USB_FUNCTION_INIT macro whith an unique
name of the function and two allocation functions.
- one to create an "instance". The instance holds the current configuration
  set. In case there are two usb_configudations with one function there will
  be one instance and two usb_functions
- one to create an "function" from the instance.

The name of the instance is used to automaticaly load the module if it the
instance is not yet available.
The usb_function callbacks are slightly modified and extended:
- usb_get_function()
  creates a struct usb_function inclunding all pointers (bind,
  unbind,…). It uses the "instance" to map its configuration. So we can
  have _two_ struct usb_function, one for each usb_configuration.
- ->unbind()
  Since the struct usb_function was not allocated in ->bind() it should
  not kfree()d here. This function should only reverse what happens in
  ->bind() that is request cleanup and the cleanup of allocated
  descriptors.
- ->free_func()
  a simple kfree() of the struct usb_function

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/Makefile    |  2 +-
 drivers/usb/gadget/composite.c | 47 ++++++++++++++++++++++++--------------
 include/linux/usb/composite.h  | 52 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile
index 8b4acfd92aa3..fef41f5d6e8d 100644
--- a/drivers/usb/gadget/Makefile
+++ b/drivers/usb/gadget/Makefile
@@ -6,7 +6,7 @@ ccflags-$(CONFIG_USB_GADGET_DEBUG) := -DDEBUG
 obj-$(CONFIG_USB_GADGET)	+= udc-core.o
 obj-$(CONFIG_USB_LIBCOMPOSITE)	+= libcomposite.o
 libcomposite-y			:= usbstring.o config.o epautoconf.o
-libcomposite-y			+= composite.o
+libcomposite-y			+= composite.o functions.o
 obj-$(CONFIG_USB_DUMMY_HCD)	+= dummy_hcd.o
 obj-$(CONFIG_USB_NET2272)	+= net2272.o
 obj-$(CONFIG_USB_NET2280)	+= net2280.o
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 9db000013f5d..4aa0e4652228 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -683,6 +683,31 @@ done:
 	return result;
 }
 
+int usb_add_config_only(struct usb_composite_dev *cdev,
+		struct usb_configuration *config)
+{
+	struct usb_configuration *c;
+
+	if (!config->bConfigurationValue)
+		return -EINVAL;
+
+	/* Prevent duplicate configuration identifiers */
+	list_for_each_entry(c, &cdev->configs, list) {
+		if (c->bConfigurationValue == config->bConfigurationValue)
+			return -EBUSY;
+	}
+
+	config->cdev = cdev;
+	list_add_tail(&config->list, &cdev->configs);
+
+	INIT_LIST_HEAD(&config->functions);
+	config->next_interface_id = 0;
+	memset(config->interface, 0, sizeof(config->interface));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usb_add_config_only);
+
 /**
  * usb_add_config() - add a configuration to a device.
  * @cdev: wraps the USB gadget
@@ -703,30 +728,18 @@ int usb_add_config(struct usb_composite_dev *cdev,
 		int (*bind)(struct usb_configuration *))
 {
 	int				status = -EINVAL;
-	struct usb_configuration	*c;
+
+	if (!bind)
+		goto done;
 
 	DBG(cdev, "adding config #%u '%s'/%p\n",
 			config->bConfigurationValue,
 			config->label, config);
 
-	if (!config->bConfigurationValue || !bind)
+	status = usb_add_config_only(cdev, config);
+	if (status)
 		goto done;
 
-	/* Prevent duplicate configuration identifiers */
-	list_for_each_entry(c, &cdev->configs, list) {
-		if (c->bConfigurationValue == config->bConfigurationValue) {
-			status = -EBUSY;
-			goto done;
-		}
-	}
-
-	config->cdev = cdev;
-	list_add_tail(&config->list, &cdev->configs);
-
-	INIT_LIST_HEAD(&config->functions);
-	config->next_interface_id = 0;
-	memset(config->interface, 0, sizeof(config->interface));
-
 	status = bind(config);
 	if (status < 0) {
 		while (!list_empty(&config->functions)) {
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index dc512c9432d7..3834e3330b23 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -77,6 +77,8 @@ struct usb_configuration;
  *	in interface or class descriptors; endpoints; I/O buffers; and so on.
  * @unbind: Reverses @bind; called as a side effect of unregistering the
  *	driver which added this function.
+ * @free_func: free the struct usb_function.
+ * @mod: (internal) points to the module that created this structure.
  * @set_alt: (REQUIRED) Reconfigures altsettings; function drivers may
  *	initialize usb_ep.driver data at this time (when it is used).
  *	Note that setting an interface to its current altsetting resets
@@ -116,6 +118,7 @@ struct usb_configuration;
  * two or more distinct instances within the same configuration, providing
  * several independent logical data links to a USB host.
  */
+
 struct usb_function {
 	const char			*name;
 	struct usb_gadget_strings	**strings;
@@ -136,6 +139,8 @@ struct usb_function {
 					struct usb_function *);
 	void			(*unbind)(struct usb_configuration *,
 					struct usb_function *);
+	void			(*free_func)(struct usb_function *f);
+	struct module		*mod;
 
 	/* runtime state management */
 	int			(*set_alt)(struct usb_function *,
@@ -432,6 +437,53 @@ static inline u16 get_default_bcdDevice(void)
 	return bcdDevice;
 }
 
+struct usb_function_driver {
+	const char *name;
+	struct module *mod;
+	struct list_head list;
+	struct usb_function_instance *(*alloc_inst)(void);
+	struct usb_function *(*alloc_func)(struct usb_function_instance *inst);
+};
+
+struct usb_function_instance {
+	struct usb_function_driver *fd;
+	void (*free_func_inst)(struct usb_function_instance *inst);
+};
+
+void usb_function_unregister(struct usb_function_driver *f);
+int usb_function_register(struct usb_function_driver *newf);
+void usb_put_function_instance(struct usb_function_instance *fi);
+void usb_put_function(struct usb_function *f);
+struct usb_function_instance *usb_get_function_instance(const char *name);
+struct usb_function *usb_get_function(struct usb_function_instance *fi);
+
+struct usb_configuration *usb_get_config(struct usb_composite_dev *cdev,
+		int val);
+int usb_add_config_only(struct usb_composite_dev *cdev,
+		struct usb_configuration *config);
+
+#define DECLARE_USB_FUNCTION(_name, _inst_alloc, _func_alloc)		\
+	static struct usb_function_driver _name ## usb_func = {		\
+		.name = __stringify(_name),				\
+		.mod  = THIS_MODULE,					\
+		.alloc_inst = _inst_alloc,				\
+		.alloc_func = _func_alloc,				\
+	};								\
+	MODULE_ALIAS("usbfunc:"__stringify(_name));
+
+#define DECLARE_USB_FUNCTION_INIT(_name, _inst_alloc, _func_alloc)	\
+	DECLARE_USB_FUNCTION(_name, _inst_alloc, _func_alloc)		\
+	static int __init _name ## mod_init(void)			\
+	{								\
+		return usb_function_register(&_name ## usb_func);	\
+	}								\
+	static void __exit _name ## mod_exit(void)			\
+	{								\
+		usb_function_unregister(&_name ## usb_func);		\
+	}								\
+	module_init(_name ## mod_init);					\
+	module_exit(_name ## mod_exit)
+
 /* messaging utils */
 #define DBG(d, fmt, args...) \
 	dev_dbg(&(d)->gadget->dev , fmt , ## args)
-- 
cgit v1.2.3


From b473577854fea63055ff9ab84f0f52a3e8aed15e Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 23 Dec 2012 21:10:05 +0100
Subject: usb: gadget: composite: add usb_remove_function()

This will be used to remove a single function from a given config. Right
now "ignore" that an error at ->bind() time and cleanup later during
composite_unbind() / remove_config().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 12 ++++++++++++
 include/linux/usb/composite.h  |  1 +
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 4aa0e4652228..366facccf4f6 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -215,6 +215,18 @@ done:
 }
 EXPORT_SYMBOL_GPL(usb_add_function);
 
+void usb_remove_function(struct usb_configuration *c, struct usb_function *f)
+{
+	if (f->disable)
+		f->disable(f);
+
+	bitmap_zero(f->endpoints, 32);
+	list_del(&f->list);
+	if (f->unbind)
+		f->unbind(c, f);
+}
+EXPORT_SYMBOL_GPL(usb_remove_function);
+
 /**
  * usb_function_deactivate - prevent function and gadget enumeration
  * @function: the function that isn't yet ready to respond
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 3834e3330b23..8c7a6295ae78 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -461,6 +461,7 @@ struct usb_configuration *usb_get_config(struct usb_composite_dev *cdev,
 		int val);
 int usb_add_config_only(struct usb_composite_dev *cdev,
 		struct usb_configuration *config);
+void usb_remove_function(struct usb_configuration *c, struct usb_function *f);
 
 #define DECLARE_USB_FUNCTION(_name, _inst_alloc, _func_alloc)		\
 	static struct usb_function_driver _name ## usb_func = {		\
-- 
cgit v1.2.3


From 0062f6e56f70bd2230ba1ebd1667d1b32a1af3b2 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 23 Dec 2012 21:10:15 +0100
Subject: usb: gadget: add a forward pointer from usb_function to its
 "instance"

We can have multiple usb_functions which origin is the same "instance".
Within one USB configuration there should be only one function of an
instance. This back pointer helps configfs to recoginze to which
instance a given usb_function belongs.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/functions.c | 116 +++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/composite.h  |   1 +
 2 files changed, 117 insertions(+)
 create mode 100644 drivers/usb/gadget/functions.c

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/functions.c b/drivers/usb/gadget/functions.c
new file mode 100644
index 000000000000..b13f839e7368
--- /dev/null
+++ b/drivers/usb/gadget/functions.c
@@ -0,0 +1,116 @@
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/err.h>
+
+#include <linux/usb/composite.h>
+
+static LIST_HEAD(func_list);
+static DEFINE_MUTEX(func_lock);
+
+static struct usb_function_instance *try_get_usb_function_instance(const char *name)
+{
+	struct usb_function_driver *fd;
+	struct usb_function_instance *fi;
+
+	fi = ERR_PTR(-ENOENT);
+	mutex_lock(&func_lock);
+	list_for_each_entry(fd, &func_list, list) {
+
+		if (strcmp(name, fd->name))
+			continue;
+
+		if (!try_module_get(fd->mod)) {
+			fi = ERR_PTR(-EBUSY);
+			break;
+		}
+		fi = fd->alloc_inst();
+		if (IS_ERR(fi))
+			module_put(fd->mod);
+		else
+			fi->fd = fd;
+		break;
+	}
+	mutex_unlock(&func_lock);
+	return fi;
+}
+
+struct usb_function_instance *usb_get_function_instance(const char *name)
+{
+	struct usb_function_instance *fi;
+	int ret;
+
+	fi = try_get_usb_function_instance(name);
+	if (!IS_ERR(fi))
+		return fi;
+	ret = PTR_ERR(fi);
+	if (ret != -ENOENT)
+		return fi;
+	ret = request_module("usbfunc:%s", name);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	return try_get_usb_function_instance(name);
+}
+EXPORT_SYMBOL_GPL(usb_get_function_instance);
+
+struct usb_function *usb_get_function(struct usb_function_instance *fi)
+{
+	struct usb_function *f;
+
+	f = fi->fd->alloc_func(fi);
+	if (IS_ERR(f))
+		return f;
+	f->fi = fi;
+	return f;
+}
+EXPORT_SYMBOL_GPL(usb_get_function);
+
+void usb_put_function_instance(struct usb_function_instance *fi)
+{
+	struct module *mod;
+
+	if (!fi)
+		return;
+
+	mod = fi->fd->mod;
+	fi->free_func_inst(fi);
+	module_put(mod);
+}
+EXPORT_SYMBOL_GPL(usb_put_function_instance);
+
+void usb_put_function(struct usb_function *f)
+{
+	if (!f)
+		return;
+
+	f->free_func(f);
+}
+EXPORT_SYMBOL_GPL(usb_put_function);
+
+int usb_function_register(struct usb_function_driver *newf)
+{
+	struct usb_function_driver *fd;
+	int ret;
+
+	ret = -EEXIST;
+
+	mutex_lock(&func_lock);
+	list_for_each_entry(fd, &func_list, list) {
+		if (!strcmp(fd->name, newf->name))
+			goto out;
+	}
+	ret = 0;
+	list_add_tail(&newf->list, &func_list);
+out:
+	mutex_unlock(&func_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(usb_function_register);
+
+void usb_function_unregister(struct usb_function_driver *fd)
+{
+	mutex_lock(&func_lock);
+	list_del(&fd->list);
+	mutex_unlock(&func_lock);
+}
+EXPORT_SYMBOL_GPL(usb_function_unregister);
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 8c7a6295ae78..771de7acf8dd 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -161,6 +161,7 @@ struct usb_function {
 	/* internals */
 	struct list_head		list;
 	DECLARE_BITMAP(endpoints, 32);
+	const struct usb_function_instance *fi;
 };
 
 int usb_add_function(struct usb_configuration *, struct usb_function *);
-- 
cgit v1.2.3


From 4c49a5f0ef1bc61395329ea7a9fce2893e97eaa6 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 23 Dec 2012 21:10:19 +0100
Subject: usb: gadget: udc-core: introduce UDC binding by name

This patch adds udc_attach_driver() which allows to bind an UDC which is
specified by name to a driver. The name of available UDCs can be
obtained from /sys/class/udc. This interface is intended for configfs
interface.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/udc-core.c | 72 ++++++++++++++++++++++++++++++-------------
 include/linux/usb/gadget.h    |  2 ++
 2 files changed, 53 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index 4d90a800063c..e7c591621a3b 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -311,26 +311,10 @@ EXPORT_SYMBOL_GPL(usb_del_gadget_udc);
 
 /* ------------------------------------------------------------------------- */
 
-int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
+static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *driver)
 {
-	struct usb_udc		*udc = NULL;
-	int			ret;
-
-	if (!driver || !driver->bind || !driver->setup)
-		return -EINVAL;
+	int ret;
 
-	mutex_lock(&udc_lock);
-	list_for_each_entry(udc, &udc_list, list) {
-		/* For now we take the first one */
-		if (!udc->driver)
-			goto found;
-	}
-
-	pr_debug("couldn't find an available UDC\n");
-	mutex_unlock(&udc_lock);
-	return -ENODEV;
-
-found:
 	dev_dbg(&udc->dev, "registering UDC driver [%s]\n",
 			driver->function);
 
@@ -352,18 +336,64 @@ found:
 		ret = usb_gadget_start(udc->gadget, driver, driver->bind);
 		if (ret)
 			goto err1;
-
 	}
 
 	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
-	mutex_unlock(&udc_lock);
 	return 0;
-
 err1:
 	dev_err(&udc->dev, "failed to start %s: %d\n",
 			udc->driver->function, ret);
 	udc->driver = NULL;
 	udc->dev.driver = NULL;
+	return ret;
+}
+
+int udc_attach_driver(const char *name, struct usb_gadget_driver *driver)
+{
+	struct usb_udc *udc = NULL;
+	int ret = -ENODEV;
+
+	mutex_lock(&udc_lock);
+	list_for_each_entry(udc, &udc_list, list) {
+		ret = strcmp(name, dev_name(&udc->dev));
+		if (!ret)
+			break;
+	}
+	if (ret) {
+		ret = -ENODEV;
+		goto out;
+	}
+	if (udc->driver) {
+		ret = -EBUSY;
+		goto out;
+	}
+	ret = udc_bind_to_driver(udc, driver);
+out:
+	mutex_unlock(&udc_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(udc_attach_driver);
+
+int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
+{
+	struct usb_udc		*udc = NULL;
+	int			ret;
+
+	if (!driver || !driver->bind || !driver->setup)
+		return -EINVAL;
+
+	mutex_lock(&udc_lock);
+	list_for_each_entry(udc, &udc_list, list) {
+		/* For now we take the first one */
+		if (!udc->driver)
+			goto found;
+	}
+
+	pr_debug("couldn't find an available UDC\n");
+	mutex_unlock(&udc_lock);
+	return -ENODEV;
+found:
+	ret = udc_bind_to_driver(udc, driver);
 	mutex_unlock(&udc_lock);
 	return ret;
 }
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 0af6569b8cc6..62156701e4f1 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -880,6 +880,8 @@ int usb_gadget_unregister_driver(struct usb_gadget_driver *driver);
 
 extern int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget);
 extern void usb_del_gadget_udc(struct usb_gadget *gadget);
+extern int udc_attach_driver(const char *name,
+		struct usb_gadget_driver *driver);
 
 /*-------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From a59233407aed54b8a9121cea75d9c6a2a470d8d3 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 23 Dec 2012 21:10:20 +0100
Subject: usb: gadget: factor out two helper functions from composite_bind()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch factors out two helper functions from composite_bind()
that is composite_dev_prepare() and its counterpart
composite_dev_cleanup().

This will be used by the configfs which requries a slightly different
bind/setup code because part of its configurations (i.e. config
descripts, cdev, …) are setup in advance and VID/PID and so one should
not be overwritten. Also the setup of ep0 endpoint can be delayed until
the UDC is assigned.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 82 ++++++++++++++++++++++++++----------------
 include/linux/usb/composite.h  |  8 +++++
 2 files changed, 59 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 366facccf4f6..9083ec93f38e 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1394,11 +1394,8 @@ static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver)
 	if (cdev->driver->unbind && unbind_driver)
 		cdev->driver->unbind(cdev);
 
-	if (cdev->req) {
-		kfree(cdev->req->buf);
-		usb_ep_free_request(gadget->ep0, cdev->req);
-	}
-	device_remove_file(&gadget->dev, &dev_attr_suspended);
+	composite_dev_cleanup(cdev);
+
 	kfree(cdev->def_manufacturer);
 	kfree(cdev);
 	set_gadget_data(gadget, NULL);
@@ -1447,34 +1444,25 @@ static void update_unchanged_dev_desc(struct usb_device_descriptor *new,
 		new->iProduct = iProduct;
 }
 
-static struct usb_composite_driver *to_cdriver(struct usb_gadget_driver *gdrv)
+int composite_dev_prepare(struct usb_composite_driver *composite,
+		struct usb_composite_dev *cdev)
 {
-	return container_of(gdrv, struct usb_composite_driver, gadget_driver);
-}
-
-static int composite_bind(struct usb_gadget *gadget,
-		struct usb_gadget_driver *gdriver)
-{
-	struct usb_composite_dev	*cdev;
-	struct usb_composite_driver	*composite = to_cdriver(gdriver);
-	int				status = -ENOMEM;
-
-	cdev = kzalloc(sizeof *cdev, GFP_KERNEL);
-	if (!cdev)
-		return status;
-
-	spin_lock_init(&cdev->lock);
-	cdev->gadget = gadget;
-	set_gadget_data(gadget, cdev);
-	INIT_LIST_HEAD(&cdev->configs);
+	struct usb_gadget *gadget = cdev->gadget;
+	int ret = -ENOMEM;
 
 	/* preallocate control response and buffer */
 	cdev->req = usb_ep_alloc_request(gadget->ep0, GFP_KERNEL);
 	if (!cdev->req)
-		goto fail;
+		return -ENOMEM;
+
 	cdev->req->buf = kmalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL);
 	if (!cdev->req->buf)
 		goto fail;
+
+	ret = device_create_file(&gadget->dev, &dev_attr_suspended);
+	if (ret)
+		goto fail_dev;
+
 	cdev->req->complete = composite_setup_complete;
 	gadget->ep0->driver_data = cdev;
 
@@ -1492,7 +1480,44 @@ static int composite_bind(struct usb_gadget *gadget,
 	 * we force endpoints to start unassigned; few controller
 	 * drivers will zero ep->driver_data.
 	 */
-	usb_ep_autoconfig_reset(cdev->gadget);
+	usb_ep_autoconfig_reset(gadget);
+	return 0;
+fail_dev:
+	kfree(cdev->req->buf);
+fail:
+	usb_ep_free_request(gadget->ep0, cdev->req);
+	cdev->req = NULL;
+	return ret;
+}
+
+void composite_dev_cleanup(struct usb_composite_dev *cdev)
+{
+	if (cdev->req) {
+		kfree(cdev->req->buf);
+		usb_ep_free_request(cdev->gadget->ep0, cdev->req);
+	}
+	device_remove_file(&cdev->gadget->dev, &dev_attr_suspended);
+}
+
+static int composite_bind(struct usb_gadget *gadget,
+		struct usb_gadget_driver *gdriver)
+{
+	struct usb_composite_dev	*cdev;
+	struct usb_composite_driver	*composite = to_cdriver(gdriver);
+	int				status = -ENOMEM;
+
+	cdev = kzalloc(sizeof *cdev, GFP_KERNEL);
+	if (!cdev)
+		return status;
+
+	spin_lock_init(&cdev->lock);
+	cdev->gadget = gadget;
+	set_gadget_data(gadget, cdev);
+	INIT_LIST_HEAD(&cdev->configs);
+
+	status = composite_dev_prepare(composite, cdev);
+	if (status)
+		goto fail;
 
 	/* composite gadget needs to assign strings for whole device (like
 	 * serial number), register function drivers, potentially update
@@ -1508,11 +1533,6 @@ static int composite_bind(struct usb_gadget *gadget,
 	if (composite->needs_serial && !cdev->desc.iSerialNumber)
 		WARNING(cdev, "userspace failed to provide iSerialNumber\n");
 
-	/* finish up */
-	status = device_create_file(&gadget->dev, &dev_attr_suspended);
-	if (status)
-		goto fail;
-
 	INFO(cdev, "%s ready\n", composite->name);
 	return 0;
 
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 771de7acf8dd..bd6d857c12f4 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -323,7 +323,15 @@ struct usb_composite_driver {
 extern int usb_composite_probe(struct usb_composite_driver *driver);
 extern void usb_composite_unregister(struct usb_composite_driver *driver);
 extern void usb_composite_setup_continue(struct usb_composite_dev *cdev);
+extern int composite_dev_prepare(struct usb_composite_driver *composite,
+		struct usb_composite_dev *cdev);
+void composite_dev_cleanup(struct usb_composite_dev *cdev);
 
+static inline struct usb_composite_driver *to_cdriver(
+		struct usb_gadget_driver *gdrv)
+{
+	return container_of(gdrv, struct usb_composite_driver, gadget_driver);
+}
 
 /**
  * struct usb_composite_device - represents one composite usb gadget
-- 
cgit v1.2.3


From 2d5a88990260d226a69acddf22c04f47c267b33a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 23 Dec 2012 21:10:21 +0100
Subject: usb: gadget: export composite's setup & disconnect function

The configfs can't use all of composite's hooks because ->bind() and
->unbind() has to be done a little differently. ->disconnect() and
->setup() on the hand can be recycled.
This patch exports them both so configfs can use them.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 4 ++--
 include/linux/usb/composite.h  | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 9083ec93f38e..8a1c3752f75f 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1077,7 +1077,7 @@ static void composite_setup_complete(struct usb_ep *ep, struct usb_request *req)
  * housekeeping for the gadget function we're implementing.  Most of
  * the work is in config and function specific setup.
  */
-static int
+int
 composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 {
 	struct usb_composite_dev	*cdev = get_gadget_data(gadget);
@@ -1344,7 +1344,7 @@ done:
 	return value;
 }
 
-static void composite_disconnect(struct usb_gadget *gadget)
+void composite_disconnect(struct usb_gadget *gadget)
 {
 	struct usb_composite_dev	*cdev = get_gadget_data(gadget);
 	unsigned long			flags;
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index bd6d857c12f4..a212ec3e9d69 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -398,6 +398,10 @@ extern int usb_string_ids_tab(struct usb_composite_dev *c,
 			      struct usb_string *str);
 extern int usb_string_ids_n(struct usb_composite_dev *c, unsigned n);
 
+extern void composite_disconnect(struct usb_gadget *gadget);
+extern int composite_setup(struct usb_gadget *gadget,
+		const struct usb_ctrlrequest *ctrl);
+
 /*
  * Some systems will need runtime overrides for the  product identifiers
  * published in the device descriptor, either numbers or strings or both.
-- 
cgit v1.2.3


From 9bb2859f8a8dbc9b42f3100641dd0ae80cfbe86a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sun, 23 Dec 2012 21:10:22 +0100
Subject: usb: gadget: composite: introduce usb_gstrings_attach()

The USB strings don't (yet) fully work in multiple configs/gadget
environment. The string id is assigned to the descriptor and the struct
usb_strings. We create a copy of the individual descriptor so we don't
clash if we use a function more than once. However, we have only one
struct usb_string for each string.

Currently each function which is used multiple times checks for
"id != 0" and only assigns string ids if it did not happen yet. This
works well if we use the same function multiple times as long as we do
it within the "one" gadget we have. Trouble starts once we use the same
function in a second gadget.

In order to solve this I introduce usb_gstrings_attach(). This function will
crate a copy all structs except for the strings which are not copied.
After the copy it will assign USB ids and attach it to cdev.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 135 +++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/composite.h  |   4 ++
 include/linux/usb/gadget.h     |   5 ++
 3 files changed, 144 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 8a1c3752f75f..9d7a1fabc482 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -28,6 +28,12 @@
  * with the relevant device-wide data.
  */
 
+static struct usb_gadget_strings **get_containers_gs(
+		struct usb_gadget_string_container *uc)
+{
+	return (struct usb_gadget_strings **)uc->stash;
+}
+
 /**
  * next_ep_desc() - advance to the next EP descriptor
  * @t: currect pointer within descriptor array
@@ -904,6 +910,7 @@ static int get_string(struct usb_composite_dev *cdev,
 		void *buf, u16 language, int id)
 {
 	struct usb_composite_driver	*composite = cdev->driver;
+	struct usb_gadget_string_container *uc;
 	struct usb_configuration	*c;
 	struct usb_function		*f;
 	int				len;
@@ -946,6 +953,15 @@ static int get_string(struct usb_composite_dev *cdev,
 		return s->bLength;
 	}
 
+	list_for_each_entry(uc, &cdev->gstrings, list) {
+		struct usb_gadget_strings **sp;
+
+		sp = get_containers_gs(uc);
+		len = lookup_string(sp, buf, language, id);
+		if (len > 0)
+			return len;
+	}
+
 	/* String IDs are device-scoped, so we look up each string
 	 * table we're told about.  These lookups are infrequent;
 	 * simpler-is-better here.
@@ -1031,6 +1047,119 @@ int usb_string_ids_tab(struct usb_composite_dev *cdev, struct usb_string *str)
 }
 EXPORT_SYMBOL_GPL(usb_string_ids_tab);
 
+static struct usb_gadget_string_container *copy_gadget_strings(
+		struct usb_gadget_strings **sp, unsigned n_gstrings,
+		unsigned n_strings)
+{
+	struct usb_gadget_string_container *uc;
+	struct usb_gadget_strings **gs_array;
+	struct usb_gadget_strings *gs;
+	struct usb_string *s;
+	unsigned mem;
+	unsigned n_gs;
+	unsigned n_s;
+	void *stash;
+
+	mem = sizeof(*uc);
+	mem += sizeof(void *) * (n_gstrings + 1);
+	mem += sizeof(struct usb_gadget_strings) * n_gstrings;
+	mem += sizeof(struct usb_string) * (n_strings + 1) * (n_gstrings);
+	uc = kmalloc(mem, GFP_KERNEL);
+	if (!uc)
+		return ERR_PTR(-ENOMEM);
+	gs_array = get_containers_gs(uc);
+	stash = uc->stash;
+	stash += sizeof(void *) * (n_gstrings + 1);
+	for (n_gs = 0; n_gs < n_gstrings; n_gs++) {
+		struct usb_string *org_s;
+
+		gs_array[n_gs] = stash;
+		gs = gs_array[n_gs];
+		stash += sizeof(struct usb_gadget_strings);
+		gs->language = sp[n_gs]->language;
+		gs->strings = stash;
+		org_s = sp[n_gs]->strings;
+
+		for (n_s = 0; n_s < n_strings; n_s++) {
+			s = stash;
+			stash += sizeof(struct usb_string);
+			if (org_s->s)
+				s->s = org_s->s;
+			else
+				s->s = "";
+			org_s++;
+		}
+		s = stash;
+		s->s = NULL;
+		stash += sizeof(struct usb_string);
+
+	}
+	gs_array[n_gs] = NULL;
+	return uc;
+}
+
+/**
+ * usb_gstrings_attach() - attach gadget strings to a cdev and assign ids
+ * @cdev: the device whose string descriptor IDs are being allocated
+ * and attached.
+ * @sp: an array of usb_gadget_strings to attach.
+ * @n_strings: number of entries in each usb_strings array (sp[]->strings)
+ *
+ * This function will create a deep copy of usb_gadget_strings and usb_string
+ * and attach it to the cdev. The actual string (usb_string.s) will not be
+ * copied but only a referenced will be made. The struct usb_gadget_strings
+ * array may contain multiple languges and should be NULL terminated.
+ * The ->language pointer of each struct usb_gadget_strings has to contain the
+ * same amount of entries.
+ * For instance: sp[0] is en-US, sp[1] is es-ES. It is expected that the first
+ * usb_string entry of es-ES containts the translation of the first usb_string
+ * entry of en-US. Therefore both entries become the same id assign.
+ */
+struct usb_string *usb_gstrings_attach(struct usb_composite_dev *cdev,
+		struct usb_gadget_strings **sp, unsigned n_strings)
+{
+	struct usb_gadget_string_container *uc;
+	struct usb_gadget_strings **n_gs;
+	unsigned n_gstrings = 0;
+	unsigned i;
+	int ret;
+
+	for (i = 0; sp[i]; i++)
+		n_gstrings++;
+
+	if (!n_gstrings)
+		return ERR_PTR(-EINVAL);
+
+	uc = copy_gadget_strings(sp, n_gstrings, n_strings);
+	if (IS_ERR(uc))
+		return ERR_PTR(PTR_ERR(uc));
+
+	n_gs = get_containers_gs(uc);
+	ret = usb_string_ids_tab(cdev, n_gs[0]->strings);
+	if (ret)
+		goto err;
+
+	for (i = 1; i < n_gstrings; i++) {
+		struct usb_string *m_s;
+		struct usb_string *s;
+		unsigned n;
+
+		m_s = n_gs[0]->strings;
+		s = n_gs[i]->strings;
+		for (n = 0; n < n_strings; n++) {
+			s->id = m_s->id;
+			s++;
+			m_s++;
+		}
+	}
+	list_add_tail(&uc->list, &cdev->gstrings);
+	return n_gs[0]->strings;
+err:
+	kfree(uc);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(usb_gstrings_attach);
+
 /**
  * usb_string_ids_n() - allocate unused string IDs in batch
  * @c: the device whose string descriptor IDs are being allocated
@@ -1377,6 +1506,7 @@ static DEVICE_ATTR(suspended, 0444, composite_show_suspended, NULL);
 static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver)
 {
 	struct usb_composite_dev	*cdev = get_gadget_data(gadget);
+	struct usb_gadget_string_container *uc, *tmp;
 
 	/* composite_disconnect() must already have been called
 	 * by the underlying peripheral controller driver!
@@ -1391,6 +1521,10 @@ static void __composite_unbind(struct usb_gadget *gadget, bool unbind_driver)
 				struct usb_configuration, list);
 		remove_config(cdev, c);
 	}
+	list_for_each_entry_safe(uc, tmp, &cdev->gstrings, list) {
+		list_del(&uc->list);
+		kfree(uc);
+	}
 	if (cdev->driver->unbind && unbind_driver)
 		cdev->driver->unbind(cdev);
 
@@ -1514,6 +1648,7 @@ static int composite_bind(struct usb_gadget *gadget,
 	cdev->gadget = gadget;
 	set_gadget_data(gadget, cdev);
 	INIT_LIST_HEAD(&cdev->configs);
+	INIT_LIST_HEAD(&cdev->gstrings);
 
 	status = composite_dev_prepare(composite, cdev);
 	if (status)
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index a212ec3e9d69..3c671c1b37f6 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -375,6 +375,7 @@ struct usb_composite_dev {
 	unsigned int			suspended:1;
 	struct usb_device_descriptor	desc;
 	struct list_head		configs;
+	struct list_head		gstrings;
 	struct usb_composite_driver	*driver;
 	u8				next_string_id;
 	char				*def_manufacturer;
@@ -396,6 +397,9 @@ struct usb_composite_dev {
 extern int usb_string_id(struct usb_composite_dev *c);
 extern int usb_string_ids_tab(struct usb_composite_dev *c,
 			      struct usb_string *str);
+extern struct usb_string *usb_gstrings_attach(struct usb_composite_dev *cdev,
+		struct usb_gadget_strings **sp, unsigned n_strings);
+
 extern int usb_string_ids_n(struct usb_composite_dev *c, unsigned n);
 
 extern void composite_disconnect(struct usb_gadget *gadget);
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 62156701e4f1..e4c119ee4ebe 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -913,6 +913,11 @@ struct usb_gadget_strings {
 	struct usb_string	*strings;
 };
 
+struct usb_gadget_string_container {
+	struct list_head        list;
+	u8                      *stash[0];
+};
+
 /* put descriptor for string with that id into buf (buflen >= 256) */
 int usb_gadget_get_string(struct usb_gadget_strings *table, int id, u8 *buf);
 
-- 
cgit v1.2.3


From bbb923a4c2d17ebd5ec34755fe19a33914cbd86f Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 21 Jan 2013 06:00:25 +0000
Subject: mcast: define and use MRT[6]_MAX in ip[6]_mroute_opt()

This will ease further addition of new MRT[6]_* values and avoid to update
in6.h each time.
Note that we reduce the maximum value from 210 to 209, but 210 does not match
any known value in ip[6]_mroute_setsockopt().

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h       |  2 +-
 include/linux/mroute6.h      |  2 +-
 include/uapi/linux/in6.h     | 15 ++++-----------
 include/uapi/linux/mroute.h  |  1 +
 include/uapi/linux/mroute6.h |  1 +
 5 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index ea00d9162ee5..79aaa9fc1a15 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -9,7 +9,7 @@
 #ifdef CONFIG_IP_MROUTE
 static inline int ip_mroute_opt(int opt)
 {
-	return (opt >= MRT_BASE) && (opt <= MRT_BASE + 10);
+	return (opt >= MRT_BASE) && (opt <= MRT_MAX);
 }
 #else
 static inline int ip_mroute_opt(int opt)
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index a223561ba12e..66982e764051 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -10,7 +10,7 @@
 #ifdef CONFIG_IPV6_MROUTE
 static inline int ip6_mroute_opt(int opt)
 {
-	return (opt >= MRT6_BASE) && (opt <= MRT6_BASE + 10);
+	return (opt >= MRT6_BASE) && (opt <= MRT6_MAX);
 }
 #else
 static inline int ip6_mroute_opt(int opt)
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 5673b97dcf54..53b1d56a6e7f 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -259,17 +259,10 @@ struct in6_flowlabel_req {
 
 /*
  * Multicast Routing:
- * see include/linux/mroute6.h.
+ * see include/uapi/linux/mroute6.h.
  *
- * MRT6_INIT			200
- * MRT6_DONE			201
- * MRT6_ADD_MIF			202
- * MRT6_DEL_MIF			203
- * MRT6_ADD_MFC			204
- * MRT6_DEL_MFC			205
- * MRT6_VERSION			206
- * MRT6_ASSERT			207
- * MRT6_PIM			208
- * (reserved)			209
+ * MRT6_BASE			200
+ * ...
+ * MRT6_MAX
  */
 #endif /* _UAPI_LINUX_IN6_H */
diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 16929993acc4..1c11004af5db 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -26,6 +26,7 @@
 #define MRT_ASSERT	(MRT_BASE+7)	/* Activate PIM assert mode		*/
 #define MRT_PIM		(MRT_BASE+8)	/* enable PIM code			*/
 #define MRT_TABLE	(MRT_BASE+9)	/* Specify mroute table ID		*/
+#define MRT_MAX		(MRT_BASE+9)
 
 #define SIOCGETVIFCNT	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT	(SIOCPROTOPRIVATE+1)
diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index 3e89b5e7f9e3..c206ae3a2327 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -26,6 +26,7 @@
 #define MRT6_ASSERT	(MRT6_BASE+7)	/* Activate PIM assert mode		*/
 #define MRT6_PIM	(MRT6_BASE+8)	/* enable PIM code			*/
 #define MRT6_TABLE	(MRT6_BASE+9)	/* Specify mroute table ID		*/
+#define MRT6_MAX	(MRT6_BASE+9)
 
 #define SIOCGETMIFCNT_IN6	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT_IN6	(SIOCPROTOPRIVATE+1)
-- 
cgit v1.2.3


From fa0879e37b59e8e3f130a30a9e6fa515717c5bdd Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@gmail.com>
Date: Mon, 21 Jan 2013 01:17:22 +0000
Subject: net: split eth_mac_addr for better error handling

When we set mac address, software mac address in system and hardware mac
address all need to be updated. Current eth_mac_addr() doesn't allow
callers to implement error handling nicely.

This patch split eth_mac_addr() to prepare part and real commit part,
then we can prepare first, and try to change hardware address, then do
the real commit if hardware address is set successfully.

Signed-off-by: Stefan Hajnoczi <stefanha@gmail.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  2 ++
 net/ethernet/eth.c          | 41 +++++++++++++++++++++++++++++++++++------
 2 files changed, 37 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 1a43e1b4f7ad..c623861964e4 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -40,6 +40,8 @@ extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
 extern void eth_header_cache_update(struct hh_cache *hh,
 				    const struct net_device *dev,
 				    const unsigned char *haddr);
+extern int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
+extern void eth_commit_mac_addr_change(struct net_device *dev, void *p);
 extern int eth_mac_addr(struct net_device *dev, void *p);
 extern int eth_change_mtu(struct net_device *dev, int new_mtu);
 extern int eth_validate_addr(struct net_device *dev);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index bc39c8c8f589..a36c85eab5b4 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -271,6 +271,36 @@ void eth_header_cache_update(struct hh_cache *hh,
 }
 EXPORT_SYMBOL(eth_header_cache_update);
 
+/**
+ * eth_prepare_mac_addr_change - prepare for mac change
+ * @dev: network device
+ * @p: socket address
+ */
+int eth_prepare_mac_addr_change(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
+		return -EBUSY;
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+	return 0;
+}
+EXPORT_SYMBOL(eth_prepare_mac_addr_change);
+
+/**
+ * eth_commit_mac_addr_change - commit mac change
+ * @dev: network device
+ * @p: socket address
+ */
+void eth_commit_mac_addr_change(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+}
+EXPORT_SYMBOL(eth_commit_mac_addr_change);
+
 /**
  * eth_mac_addr - set new Ethernet hardware address
  * @dev: network device
@@ -283,13 +313,12 @@ EXPORT_SYMBOL(eth_header_cache_update);
  */
 int eth_mac_addr(struct net_device *dev, void *p)
 {
-	struct sockaddr *addr = p;
+	int ret;
 
-	if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
-		return -EBUSY;
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	ret = eth_prepare_mac_addr_change(dev, p);
+	if (ret < 0)
+		return ret;
+	eth_commit_mac_addr_change(dev, p);
 	return 0;
 }
 EXPORT_SYMBOL(eth_mac_addr);
-- 
cgit v1.2.3


From 6509141f9c2ba74df6cc72ec35cd1865276ae3a4 Mon Sep 17 00:00:00 2001
From: Wei Shuai <cpuwolf@gmail.com>
Date: Mon, 21 Jan 2013 06:00:31 +0000
Subject: usbnet: add new flag FLAG_NOARP for usb net devices

We do have some USB net devices, which cannot do ARP.
so we can introduce a new flag FLAG_NOARP, then client drivers
can easily handle this kind of devices

Signed-off-by: Wei Shuai <cpuwolf@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 4 ++++
 include/linux/usb/usbnet.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 3d4bf01641b4..f34b2ebee815 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1448,6 +1448,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 		if ((dev->driver_info->flags & FLAG_WWAN) != 0)
 			strcpy(net->name, "wwan%d");
 
+		/* devices that cannot do ARP */
+		if ((dev->driver_info->flags & FLAG_NOARP) != 0)
+			net->flags |= IFF_NOARP;
+
 		/* maybe the remote can't receive an Ethernet MTU */
 		if (net->mtu > (dev->hard_mtu - net->hard_header_len))
 			net->mtu = dev->hard_mtu - net->hard_header_len;
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index bd45eb7bedc8..5de7a220e986 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -100,6 +100,7 @@ struct driver_info {
 #define FLAG_LINK_INTR	0x0800		/* updates link (carrier) status */
 
 #define FLAG_POINTTOPOINT 0x1000	/* possibly use "usb%d" names */
+#define FLAG_NOARP	0x2000		/* device can't do ARP */
 
 /*
  * Indicates to usbnet, that USB driver accumulates multiple IP packets.
-- 
cgit v1.2.3


From afe759511808cd5bb508b598007cf0c7b0ca8e08 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Tue, 15 Jan 2013 17:20:58 +0800
Subject: libata: identify and init ZPODD devices

The ODD can be enabled for ZPODD if the following three conditions are
satisfied:
1 The ODD supports device attention;
2 The platform can runtime power off the ODD through ACPI;
3 The ODD is either slot type or drawer type.
For such ODDs, zpodd_init is called and a new structure is allocated for
it to store ZPODD related stuffs.

And the zpodd_dev_enabled function is used to test if ZPODD is currently
enabled for this ODD.

A new config CONFIG_SATA_ZPODD is added to selectively build ZPODD code.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/Kconfig        |  13 ++++++
 drivers/ata/Makefile       |   1 +
 drivers/ata/libata-core.c  |   4 +-
 drivers/ata/libata-scsi.c  |   2 +
 drivers/ata/libata-zpodd.c | 100 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata.h       |  14 +++++++
 include/linux/libata.h     |   3 ++
 include/uapi/linux/cdrom.h |  34 +++++++++++++++
 8 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 drivers/ata/libata-zpodd.c

(limited to 'include/linux')

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index e08d322d01d7..996d16c9c6e5 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -58,6 +58,19 @@ config ATA_ACPI
 	  You can disable this at kernel boot time by using the
 	  option libata.noacpi=1
 
+config SATA_ZPODD
+	bool "SATA Zero Power ODD Support"
+	depends on ATA_ACPI
+	default n
+	help
+	  This option adds support for SATA ZPODD. It requires both
+	  ODD and the platform support, and if enabled, will automatically
+	  power on/off the ODD when certain condition is satisfied. This
+	  does not impact user's experience of the ODD, only power is saved
+	  when ODD is not in use(i.e. no disc inside).
+
+	  If unsure, say N.
+
 config SATA_PMP
 	bool "SATA Port Multiplier support"
 	default y
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 9329dafba91b..85e3de463ed1 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -107,3 +107,4 @@ libata-y	:= libata-core.o libata-scsi.o libata-eh.o libata-transport.o
 libata-$(CONFIG_ATA_SFF)	+= libata-sff.o
 libata-$(CONFIG_SATA_PMP)	+= libata-pmp.o
 libata-$(CONFIG_ATA_ACPI)	+= libata-acpi.o
+libata-$(CONFIG_SATA_ZPODD)	+= libata-zpodd.o
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 275941b576a8..c7ecd8492f1e 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2401,8 +2401,10 @@ int ata_dev_configure(struct ata_device *dev)
 			dma_dir_string = ", DMADIR";
 		}
 
-		if (ata_id_has_da(dev->id))
+		if (ata_id_has_da(dev->id)) {
 			dev->flags |= ATA_DFLAG_DA;
+			zpodd_init(dev);
+		}
 
 		/* print device info to dmesg */
 		if (ata_msg_drv(ap) && print_info)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7c337e754dab..1ff018525e3b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3755,6 +3755,8 @@ static void ata_scsi_remove_dev(struct ata_device *dev)
 	mutex_lock(&ap->scsi_host->scan_mutex);
 	spin_lock_irqsave(ap->lock, flags);
 
+	if (zpodd_dev_enabled(dev))
+		zpodd_exit(dev);
 	ata_acpi_unbind(dev);
 
 	/* clearing dev->sdev is protected by host lock */
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
new file mode 100644
index 000000000000..27eed2f09a8a
--- /dev/null
+++ b/drivers/ata/libata-zpodd.c
@@ -0,0 +1,100 @@
+#include <linux/libata.h>
+#include <linux/cdrom.h>
+
+#include "libata.h"
+
+enum odd_mech_type {
+	ODD_MECH_TYPE_SLOT,
+	ODD_MECH_TYPE_DRAWER,
+	ODD_MECH_TYPE_UNSUPPORTED,
+};
+
+struct zpodd {
+	enum odd_mech_type	mech_type; /* init during probe, RO afterwards */
+	struct ata_device	*dev;
+};
+
+/* Per the spec, only slot type and drawer type ODD can be supported */
+static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev)
+{
+	char buf[16];
+	unsigned int ret;
+	struct rm_feature_desc *desc = (void *)(buf + 8);
+	struct ata_taskfile tf = {};
+
+	char cdb[] = {  GPCMD_GET_CONFIGURATION,
+			2,      /* only 1 feature descriptor requested */
+			0, 3,   /* 3, removable medium feature */
+			0, 0, 0,/* reserved */
+			0, sizeof(buf),
+			0, 0, 0,
+	};
+
+	tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf.command = ATA_CMD_PACKET;
+	tf.protocol = ATAPI_PROT_PIO;
+	tf.lbam = sizeof(buf);
+
+	ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
+				buf, sizeof(buf), 0);
+	if (ret)
+		return ODD_MECH_TYPE_UNSUPPORTED;
+
+	if (be16_to_cpu(desc->feature_code) != 3)
+		return ODD_MECH_TYPE_UNSUPPORTED;
+
+	if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1)
+		return ODD_MECH_TYPE_SLOT;
+	else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1)
+		return ODD_MECH_TYPE_DRAWER;
+	else
+		return ODD_MECH_TYPE_UNSUPPORTED;
+}
+
+static bool odd_can_poweroff(struct ata_device *ata_dev)
+{
+	acpi_handle handle;
+	acpi_status status;
+	struct acpi_device *acpi_dev;
+
+	handle = ata_dev_acpi_handle(ata_dev);
+	if (!handle)
+		return false;
+
+	status = acpi_bus_get_device(handle, &acpi_dev);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	return acpi_device_can_poweroff(acpi_dev);
+}
+
+void zpodd_init(struct ata_device *dev)
+{
+	enum odd_mech_type mech_type;
+	struct zpodd *zpodd;
+
+	if (dev->zpodd)
+		return;
+
+	if (!odd_can_poweroff(dev))
+		return;
+
+	mech_type = zpodd_get_mech_type(dev);
+	if (mech_type == ODD_MECH_TYPE_UNSUPPORTED)
+		return;
+
+	zpodd = kzalloc(sizeof(struct zpodd), GFP_KERNEL);
+	if (!zpodd)
+		return;
+
+	zpodd->mech_type = mech_type;
+
+	zpodd->dev = dev;
+	dev->zpodd = zpodd;
+}
+
+void zpodd_exit(struct ata_device *dev)
+{
+	kfree(dev->zpodd);
+	dev->zpodd = NULL;
+}
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 7148a58020b9..a21740b4ee11 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -230,4 +230,18 @@ static inline void ata_sff_exit(void)
 { }
 #endif /* CONFIG_ATA_SFF */
 
+/* libata-zpodd.c */
+#ifdef CONFIG_SATA_ZPODD
+void zpodd_init(struct ata_device *dev);
+void zpodd_exit(struct ata_device *dev);
+static inline bool zpodd_dev_enabled(struct ata_device *dev)
+{
+	return dev->zpodd != NULL;
+}
+#else /* CONFIG_SATA_ZPODD */
+static inline void zpodd_init(struct ata_device *dev) {}
+static inline void zpodd_exit(struct ata_device *dev) {}
+static inline bool zpodd_dev_enabled(struct ata_device *dev) { return false; }
+#endif /* CONFIG_SATA_ZPODD */
+
 #endif /* __LIBATA_H__ */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7ae207eb29a0..65ff67e34b77 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -620,6 +620,9 @@ struct ata_device {
 #ifdef CONFIG_ATA_ACPI
 	union acpi_object	*gtf_cache;
 	unsigned int		gtf_filter;
+#endif
+#ifdef CONFIG_SATA_ZPODD
+	void			*zpodd;
 #endif
 	struct device		tdev;
 	/* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */
diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 898b866b300c..bd17ad5aa06d 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -908,5 +908,39 @@ struct mode_page_header {
 	__be16 desc_length;
 };
 
+/* removable medium feature descriptor */
+struct rm_feature_desc {
+	__be16 feature_code;
+#if defined(__BIG_ENDIAN_BITFIELD)
+	__u8 reserved1:2;
+	__u8 feature_version:4;
+	__u8 persistent:1;
+	__u8 curr:1;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8 curr:1;
+	__u8 persistent:1;
+	__u8 feature_version:4;
+	__u8 reserved1:2;
+#endif
+	__u8 add_len;
+#if defined(__BIG_ENDIAN_BITFIELD)
+	__u8 mech_type:3;
+	__u8 load:1;
+	__u8 eject:1;
+	__u8 pvnt_jmpr:1;
+	__u8 dbml:1;
+	__u8 lock:1;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8 lock:1;
+	__u8 dbml:1;
+	__u8 pvnt_jmpr:1;
+	__u8 eject:1;
+	__u8 load:1;
+	__u8 mech_type:3;
+#endif
+	__u8 reserved2;
+	__u8 reserved3;
+	__u8 reserved4;
+};
 
 #endif /* _UAPI_LINUX_CDROM_H */
-- 
cgit v1.2.3


From b000c8065a92b0fe0e1694f41b2c8d8ba7b7b1ec Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 18 Jan 2013 10:31:20 -0500
Subject: tracing: Remove the extra 4 bytes of padding in events

Due to a userspace issue with PowerTop v2beta, which hardcoded
the offset of event fields that it was using, it broke when
we removed the Big Kernel Lock counter from the event header.

 (commit e6e1e2593 "tracing: Remove lock_depth from event entry")

Because this broke userspace, it was determined that we must
keep those 4 bytes around.

 (commit a3a4a5acd "Regression: partial revert "tracing: Remove lock_depth from event entry"")

This unfortunately wastes space in the ring buffer. 4 bytes per
event, where a lot of events are just 24 bytes. That's 16% of the
buffer wasted. A million events will add 4 megs of white space
into the buffer.

It was later noticed that PowerTop v2beta could not work on systems
where the kernel was 64 bit but the userspace was 32 bits.
The reason was because the offsets are different between the
two and the hard coded offset of one would not work with the other.

With PowerTop v2 final, it implemented the same interface that both
perf and trace-cmd use. That is, it reads the format file of
the event to find the offsets of the fields it needs. This fixes
the problem with running powertop on a 32 bit userspace running
on a 64 bit kernel. It also no longer requires the 4 byte padding.

As PowerTop v2 has been out for a while, and is included in all
major distributions, it is time that we can safely remove the
4 bytes of padding. Users of PowerTop v2beta should upgrade to
PowerTop v2 final.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 1 -
 kernel/trace/trace.c         | 1 -
 kernel/trace/trace_events.c  | 1 -
 3 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 43ef8b6cce7f..6f8d0b77006b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -49,7 +49,6 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
-	int			padding;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d62248dfda76..a387bd271e71 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1173,7 +1173,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
-	entry->padding			= 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 880073d0b946..57e9b284250c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
-	__common_field(int, padding);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 6a89a314ab107a12af08c71420c19a37a30fc2d3 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Fri, 18 Jan 2013 15:57:46 +0800
Subject: gpio: devm_gpio_* support should not depend on GPIOLIB

Some architectures (e.g. blackfin) provide gpio API without requiring
GPIOLIB support (ARCH_WANT_OPTIONAL_GPIOLIB).  devm_gpio_* functions
should also work for these architectures, since they do not really
depend on GPIOLIB.

Add a new option GPIO_DEVRES (enabled by default) to control the build
of devres.c.  It also removes the empty version of devm_gpio_*
functions for !GENERIC_GPIO build from linux/gpio.h, and moves the
function declarations from asm-generic/gpio.h into linux/gpio.h.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/Kconfig       |  3 +++
 drivers/gpio/Makefile      |  3 ++-
 include/asm-generic/gpio.h |  6 ------
 include/linux/gpio.h       | 28 ++++++++--------------------
 4 files changed, 13 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 682de754d63f..d9729322b1c7 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -30,6 +30,9 @@ config ARCH_REQUIRE_GPIOLIB
 	  Selecting this from the architecture code will cause the gpiolib
 	  code to always get built in.
 
+config GPIO_DEVRES
+	def_bool y
+	depends on HAS_IOMEM
 
 
 menuconfig GPIOLIB
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index c5aebd008dde..36ca605ff038 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -2,7 +2,8 @@
 
 ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 
-obj-$(CONFIG_GPIOLIB)		+= gpiolib.o devres.o
+obj-$(CONFIG_GPIO_DEVRES)	+= devres.o
+obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 obj-$(CONFIG_OF_GPIO)		+= gpiolib-of.o
 obj-$(CONFIG_GPIO_ACPI)		+= gpiolib-acpi.o
 
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 23410147555f..45b8916805f3 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -192,12 +192,6 @@ extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *labe
 extern int gpio_request_array(const struct gpio *array, size_t num);
 extern void gpio_free_array(const struct gpio *array, size_t num);
 
-/* bindings for managed devices that want to request gpios */
-int devm_gpio_request(struct device *dev, unsigned gpio, const char *label);
-int devm_gpio_request_one(struct device *dev, unsigned gpio,
-			  unsigned long flags, const char *label);
-void devm_gpio_free(struct device *dev, unsigned int gpio);
-
 #ifdef CONFIG_GPIO_SYSFS
 
 /*
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index bfe665621536..f6c7ae3e223b 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -94,24 +94,12 @@ static inline int gpio_request(unsigned gpio, const char *label)
 	return -ENOSYS;
 }
 
-static inline int devm_gpio_request(struct device *dev, unsigned gpio,
-				    const char *label)
-{
-	return -ENOSYS;
-}
-
 static inline int gpio_request_one(unsigned gpio,
 					unsigned long flags, const char *label)
 {
 	return -ENOSYS;
 }
 
-static inline int devm_gpio_request_one(struct device *dev, unsigned gpio,
-					unsigned long flags, const char *label)
-{
-	return -ENOSYS;
-}
-
 static inline int gpio_request_array(const struct gpio *array, size_t num)
 {
 	return -ENOSYS;
@@ -125,14 +113,6 @@ static inline void gpio_free(unsigned gpio)
 	WARN_ON(1);
 }
 
-static inline void devm_gpio_free(struct device *dev, unsigned gpio)
-{
-	might_sleep();
-
-	/* GPIO can never have been requested */
-	WARN_ON(1);
-}
-
 static inline void gpio_free_array(const struct gpio *array, size_t num)
 {
 	might_sleep();
@@ -248,4 +228,12 @@ gpiochip_remove_pin_ranges(struct gpio_chip *chip)
 
 #endif /* ! CONFIG_GENERIC_GPIO */
 
+struct device;
+
+/* bindings for managed devices that want to request gpios */
+int devm_gpio_request(struct device *dev, unsigned gpio, const char *label);
+int devm_gpio_request_one(struct device *dev, unsigned gpio,
+			  unsigned long flags, const char *label);
+void devm_gpio_free(struct device *dev, unsigned int gpio);
+
 #endif /* __LINUX_GPIO_H */
-- 
cgit v1.2.3


From 00441b5e6b98ad6a50b5cb7f88d473e3ea1e0d75 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 9 Jan 2013 10:06:03 +0000
Subject: mfd: Fix compile errors and warnings when !CONFIG_AB8500_BM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/mfd/ab8500-core.c:1015:21: error: ‘ab8500_bm_data’ undeclared here

include/linux/mfd/abx500/ab8500-bm.h:445:13: warning: ‘ab8500_fg_reinit’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:448:13: warning: ‘ab8500_charger_usb_state_changed’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:451:29: warning: ‘ab8500_btemp_get’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:455:12: warning: ‘ab8500_btemp_get_batctrl_temp’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:463:12: warning: ‘ab8500_fg_inst_curr_blocking’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:442:12: warning: ‘ab8500_fg_inst_curr_done’ defined but not used
include/linux/mfd/abx500/ab8500-bm.h:447:26: warning: ‘ab8500_fg_get’ defined but not used

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab8500-core.c            |  1 +
 include/linux/mfd/abx500.h           |  2 --
 include/linux/mfd/abx500/ab8500-bm.h | 29 ++++-------------------------
 3 files changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index e1650badd106..4778bb124efe 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -19,6 +19,7 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/abx500.h>
 #include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-bm.h>
 #include <linux/mfd/dbx500-prcmu.h>
 #include <linux/regulator/ab8500.h>
 #include <linux/of.h>
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 2138bd33021a..e53dcfeaee69 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -272,8 +272,6 @@ struct abx500_bm_data {
 	const struct abx500_fg_parameters *fg_params;
 };
 
-extern struct abx500_bm_data ab8500_bm_data;
-
 enum {
 	NTC_EXTERNAL = 0,
 	NTC_INTERNAL,
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index 44310c98ee6e..9bd037df97d9 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -422,7 +422,10 @@ struct ab8500_chargalg_platform_data {
 struct ab8500_btemp;
 struct ab8500_gpadc;
 struct ab8500_fg;
+
 #ifdef CONFIG_AB8500_BM
+extern struct abx500_bm_data ab8500_bm_data;
+
 void ab8500_fg_reinit(void);
 void ab8500_charger_usb_state_changed(u8 bm_usb_state, u16 mA);
 struct ab8500_btemp *ab8500_btemp_get(void);
@@ -434,31 +437,7 @@ int ab8500_fg_inst_curr_finalize(struct ab8500_fg *di, int *res);
 int ab8500_fg_inst_curr_done(struct ab8500_fg *di);
 
 #else
-int ab8500_fg_inst_curr_done(struct ab8500_fg *di)
-{
-}
-static void ab8500_fg_reinit(void)
-{
-}
-static void ab8500_charger_usb_state_changed(u8 bm_usb_state, u16 mA)
-{
-}
-static struct ab8500_btemp *ab8500_btemp_get(void)
-{
-	return NULL;
-}
-static int ab8500_btemp_get_batctrl_temp(struct ab8500_btemp *btemp)
-{
-	return 0;
-}
-struct ab8500_fg *ab8500_fg_get(void)
-{
-	return NULL;
-}
-static int ab8500_fg_inst_curr_blocking(struct ab8500_fg *dev)
-{
-	return -ENODEV;
-}
+static struct abx500_bm_data ab8500_bm_data;
 
 static inline int ab8500_fg_inst_curr_start(struct ab8500_fg *di)
 {
-- 
cgit v1.2.3


From 910ffdb18a6408e14febbb6e4b6840fd2c928c82 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Jan 2013 20:47:41 +0100
Subject: ptrace: introduce signal_wake_up_state() and ptrace_signal_wake_up()

Cleanup and preparation for the next change.

signal_wake_up(resume => true) is overused. None of ptrace/jctl callers
actually want to wakeup a TASK_WAKEKILL task, but they can't specify the
necessary mask.

Turn signal_wake_up() into signal_wake_up_state(state), reintroduce
signal_wake_up() as a trivial helper, and add ptrace_signal_wake_up()
which adds __TASK_TRACED.

This way ptrace_signal_wake_up() can work "inside" ptrace_request()
even if the tracee doesn't have the TASK_WAKEKILL bit set.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 11 ++++++++++-
 kernel/ptrace.c       |  8 ++++----
 kernel/signal.c       | 14 ++++----------
 3 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6fc8f45de4e9..d2112477ff5e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2714,7 +2714,16 @@ static inline void thread_group_cputime_init(struct signal_struct *sig)
 extern void recalc_sigpending_and_wake(struct task_struct *t);
 extern void recalc_sigpending(void);
 
-extern void signal_wake_up(struct task_struct *t, int resume_stopped);
+extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
+
+static inline void signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
+}
+static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
+}
 
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 612a56126851..62f7c2774b16 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -117,7 +117,7 @@ void __ptrace_unlink(struct task_struct *child)
 	 * TASK_KILLABLE sleeps.
 	 */
 	if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child))
-		signal_wake_up(child, task_is_traced(child));
+		ptrace_signal_wake_up(child, true);
 
 	spin_unlock(&child->sighand->siglock);
 }
@@ -317,7 +317,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 	 */
 	if (task_is_stopped(task) &&
 	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
-		signal_wake_up(task, 1);
+		signal_wake_up_state(task, __TASK_STOPPED);
 
 	spin_unlock(&task->sighand->siglock);
 
@@ -737,7 +737,7 @@ int ptrace_request(struct task_struct *child, long request,
 		 * tracee into STOP.
 		 */
 		if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP)))
-			signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
+			ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
 
 		unlock_task_sighand(child, &flags);
 		ret = 0;
@@ -763,7 +763,7 @@ int ptrace_request(struct task_struct *child, long request,
 			 * start of this trap and now.  Trigger re-trap.
 			 */
 			if (child->jobctl & JOBCTL_TRAP_NOTIFY)
-				signal_wake_up(child, true);
+				ptrace_signal_wake_up(child, true);
 			ret = 0;
 		}
 		unlock_task_sighand(child, &flags);
diff --git a/kernel/signal.c b/kernel/signal.c
index 53cd5c4d1172..6e97aa6fa32c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -680,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
  * No need to set need_resched since signal event passing
  * goes through ->blocked
  */
-void signal_wake_up(struct task_struct *t, int resume)
+void signal_wake_up_state(struct task_struct *t, unsigned int state)
 {
-	unsigned int mask;
-
 	set_tsk_thread_flag(t, TIF_SIGPENDING);
-
 	/*
-	 * For SIGKILL, we want to wake it up in the stopped/traced/killable
+	 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
 	 * case. We don't check t->state here because there is a race with it
 	 * executing another processor and just now entering stopped state.
 	 * By using wake_up_state, we ensure the process will wake up and
 	 * handle its death signal.
 	 */
-	mask = TASK_INTERRUPTIBLE;
-	if (resume)
-		mask |= TASK_WAKEKILL;
-	if (!wake_up_state(t, mask))
+	if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
 		kick_process(t);
 }
 
@@ -844,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t)
 	assert_spin_locked(&t->sighand->siglock);
 
 	task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
-	signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
+	ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
 }
 
 /*
-- 
cgit v1.2.3


From 75096579c3ac39ddc2f8b0d9a8924eba31f4d920 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Mon, 21 Jan 2013 11:08:54 +0100
Subject: lib: devres: Introduce devm_ioremap_resource()

The devm_request_and_ioremap() function is very useful and helps avoid a
whole lot of boilerplate. However, one issue that keeps popping up is
its lack of a specific error code to determine which of the steps that
it performs failed. Furthermore, while the function gives an example and
suggests what error code to return on failure, a wide variety of error
codes are used throughout the tree.

In an attempt to fix these problems, this patch adds a new function that
drivers can transition to. The devm_ioremap_resource() returns a pointer
to the remapped I/O memory on success or an ERR_PTR() encoded error code
on failure. Callers can check for failure using IS_ERR() and determine
its cause by extracting the error code using PTR_ERR().

devm_request_and_ioremap() is implemented as a wrapper around the new
API and return NULL on failure as before. This ensures that backwards
compatibility is maintained until all users have been converted to the
new API, at which point the old devm_request_and_ioremap() function
should be removed.

A semantic patch is included which can be used to convert from the old
devm_request_and_ioremap() API to the new devm_ioremap_resource() API.
Some non-trivial cases may require manual intervention, though.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-model/devres.txt              |  3 +-
 include/linux/device.h                             |  1 +
 lib/devres.c                                       | 57 ++++++++++----
 scripts/coccinelle/api/devm_ioremap_resource.cocci | 90 ++++++++++++++++++++++
 4 files changed, 137 insertions(+), 14 deletions(-)
 create mode 100644 scripts/coccinelle/api/devm_ioremap_resource.cocci

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index db5e4448e6cd..b4671459857f 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -266,7 +266,8 @@ IOMAP
   devm_ioremap()
   devm_ioremap_nocache()
   devm_iounmap()
-  devm_request_and_ioremap() : checks resource, requests region, ioremaps
+  devm_ioremap_resource() : checks resource, requests memory region, ioremaps
+  devm_request_and_ioremap() : obsoleted by devm_ioremap_resource()
   pcim_iomap()
   pcim_iounmap()
   pcim_iomap_table()	: array of mapped addresses indexed by BAR
diff --git a/include/linux/device.h b/include/linux/device.h
index 43dcda937ddf..251f33b21ef9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -573,6 +573,7 @@ extern int devres_release_group(struct device *dev, void *id);
 extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp);
 extern void devm_kfree(struct device *dev, void *p);
 
+void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
 void __iomem *devm_request_and_ioremap(struct device *dev,
 			struct resource *res);
 
diff --git a/lib/devres.c b/lib/devres.c
index 80b9c76d436a..9c76b3a9cc72 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -86,22 +86,24 @@ void devm_iounmap(struct device *dev, void __iomem *addr)
 EXPORT_SYMBOL(devm_iounmap);
 
 /**
- * devm_request_and_ioremap() - Check, request region, and ioremap resource
- * @dev: Generic device to handle the resource for
+ * devm_ioremap_resource() - check, request region, and ioremap resource
+ * @dev: generic device to handle the resource for
  * @res: resource to be handled
  *
- * Takes all necessary steps to ioremap a mem resource. Uses managed device, so
- * everything is undone on driver detach. Checks arguments, so you can feed
- * it the result from e.g. platform_get_resource() directly. Returns the
- * remapped pointer or NULL on error. Usage example:
+ * Checks that a resource is a valid memory region, requests the memory region
+ * and ioremaps it either as cacheable or as non-cacheable memory depending on
+ * the resource's flags. All operations are managed and will be undone on
+ * driver detach.
+ *
+ * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure. Usage example:
  *
  *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- *	base = devm_request_and_ioremap(&pdev->dev, res);
- *	if (!base)
- *		return -EADDRNOTAVAIL;
+ *	base = devm_ioremap_resource(&pdev->dev, res);
+ *	if (IS_ERR(base))
+ *		return PTR_ERR(base);
  */
-void __iomem *devm_request_and_ioremap(struct device *dev,
-			struct resource *res)
+void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 {
 	resource_size_t size;
 	const char *name;
@@ -111,7 +113,7 @@ void __iomem *devm_request_and_ioremap(struct device *dev,
 
 	if (!res || resource_type(res) != IORESOURCE_MEM) {
 		dev_err(dev, "invalid resource\n");
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	}
 
 	size = resource_size(res);
@@ -119,7 +121,7 @@ void __iomem *devm_request_and_ioremap(struct device *dev,
 
 	if (!devm_request_mem_region(dev, res->start, size, name)) {
 		dev_err(dev, "can't request region for resource %pR\n", res);
-		return NULL;
+		return ERR_PTR(-EBUSY);
 	}
 
 	if (res->flags & IORESOURCE_CACHEABLE)
@@ -130,10 +132,39 @@ void __iomem *devm_request_and_ioremap(struct device *dev,
 	if (!dest_ptr) {
 		dev_err(dev, "ioremap failed for resource %pR\n", res);
 		devm_release_mem_region(dev, res->start, size);
+		dest_ptr = ERR_PTR(-ENOMEM);
 	}
 
 	return dest_ptr;
 }
+EXPORT_SYMBOL(devm_ioremap_resource);
+
+/**
+ * devm_request_and_ioremap() - Check, request region, and ioremap resource
+ * @dev: Generic device to handle the resource for
+ * @res: resource to be handled
+ *
+ * Takes all necessary steps to ioremap a mem resource. Uses managed device, so
+ * everything is undone on driver detach. Checks arguments, so you can feed
+ * it the result from e.g. platform_get_resource() directly. Returns the
+ * remapped pointer or NULL on error. Usage example:
+ *
+ *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ *	base = devm_request_and_ioremap(&pdev->dev, res);
+ *	if (!base)
+ *		return -EADDRNOTAVAIL;
+ */
+void __iomem *devm_request_and_ioremap(struct device *device,
+				       struct resource *res)
+{
+	void __iomem *dest_ptr;
+
+	dest_ptr = devm_ioremap_resource(device, res);
+	if (IS_ERR(dest_ptr))
+		return NULL;
+
+	return dest_ptr;
+}
 EXPORT_SYMBOL(devm_request_and_ioremap);
 
 #ifdef CONFIG_HAS_IOPORT
diff --git a/scripts/coccinelle/api/devm_ioremap_resource.cocci b/scripts/coccinelle/api/devm_ioremap_resource.cocci
new file mode 100644
index 000000000000..495daa3dbf77
--- /dev/null
+++ b/scripts/coccinelle/api/devm_ioremap_resource.cocci
@@ -0,0 +1,90 @@
+virtual patch
+virtual report
+
+@depends on patch@
+expression base, dev, res;
+@@
+
+-base = devm_request_and_ioremap(dev, res);
++base = devm_ioremap_resource(dev, res);
+ ...
+ if (
+-base == NULL
++IS_ERR(base)
+ || ...) {
+<...
+-	return ...;
++	return PTR_ERR(base);
+...>
+ }
+
+@depends on patch@
+expression e, E, ret;
+identifier l;
+@@
+
+ e = devm_ioremap_resource(...);
+ ...
+ if (IS_ERR(e) || ...) {
+ 	... when any
+-	ret = E;
++	ret = PTR_ERR(e);
+ 	...
+(
+ 	return ret;
+|
+ 	goto l;
+)
+ }
+
+@depends on patch@
+expression e;
+@@
+
+ e = devm_ioremap_resource(...);
+ ...
+ if (IS_ERR(e) || ...) {
+ 	...
+-	\(dev_dbg\|dev_err\|pr_debug\|pr_err\|DRM_ERROR\)(...);
+ 	...
+ }
+
+@depends on patch@
+expression e;
+identifier l;
+@@
+
+ e = devm_ioremap_resource(...);
+ ...
+ if (IS_ERR(e) || ...)
+-{
+(
+ 	return ...;
+|
+ 	goto l;
+)
+-}
+
+@r depends on report@
+expression e;
+identifier l;
+position p1;
+@@
+
+*e = devm_request_and_ioremap@p1(...);
+ ...
+ if (e == NULL || ...) {
+ 	...
+(
+ 	return ...;
+|
+ 	goto l;
+)
+ }
+
+@script:python depends on r@
+p1 << r.p1;
+@@
+
+msg = "ERROR: deprecated devm_request_and_ioremap() API used on line %s" % (p1[0].line)
+coccilib.report.print_report(p1[0], msg)
-- 
cgit v1.2.3


From 01ec8c5423901c4fe8d97f786ed9a0c31215b53a Mon Sep 17 00:00:00 2001
From: Michel JAOUEN <michel.jaouen@stericsson.com>
Date: Thu, 26 Apr 2012 10:00:04 +0200
Subject: pm2301: Provide u9540 support for the pm2301 charger

AC charger driver for the DB9540 based platforms.

Signed-off-by: Rajkumar Kasirajan <rajkumar.kasirajan@stericsson.com>
Signed-off-by: Loic Pallardy <loic.pallardy@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Tested-by: Michel JAOUEN <michel.jaouen@stericsson.com>
---
 drivers/power/Kconfig                     |    7 +
 drivers/power/Makefile                    |    1 +
 drivers/power/ab8500_charger.c            |   36 +-
 drivers/power/pm2301_charger.c            | 1455 +++++++++++++++++++++++++++++
 include/linux/mfd/abx500/ab8500-bm.h      |    2 +
 include/linux/mfd/abx500/ux500_chargalg.h |    2 +
 include/linux/pm2301_charger.h            |   60 ++
 7 files changed, 1550 insertions(+), 13 deletions(-)
 create mode 100644 drivers/power/pm2301_charger.c
 create mode 100644 include/linux/pm2301_charger.h

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 9f45e2f77d53..4811b59ba730 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -346,6 +346,13 @@ config AB8500_BM
 	help
 	  Say Y to include support for AB8500 battery management.
 
+config CHARGER_PM2301
+	bool "PM2301 Battery Charger Driver"
+	depends on AB8500_BM
+	help
+	  Say Y to include support for PM2301 charger driver.
+	  Depends on AB8500 battery management core.
+
 source "drivers/power/reset/Kconfig"
 
 endif # POWER_SUPPLY
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index b11e0c7ea0f1..aa966e806834 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_CHARGER_LP8727)	+= lp8727_charger.o
 obj-$(CONFIG_CHARGER_LP8788)	+= lp8788-charger.o
 obj-$(CONFIG_CHARGER_GPIO)	+= gpio-charger.o
 obj-$(CONFIG_CHARGER_MANAGER)	+= charger-manager.o
+obj-$(CONFIG_CHARGER_PM2301)	+= pm2301_charger.o
 obj-$(CONFIG_CHARGER_MAX8997)	+= max8997_charger.o
 obj-$(CONFIG_CHARGER_MAX8998)	+= max8998_charger.o
 obj-$(CONFIG_CHARGER_BQ2415X)	+= bq2415x_charger.o
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index d5a8bdadb49a..43ec82ba4275 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2830,8 +2830,11 @@ static int ab8500_charger_remove(struct platform_device *pdev)
 	destroy_workqueue(di->charger_wq);
 
 	flush_scheduled_work();
-	power_supply_unregister(&di->usb_chg.psy);
-	power_supply_unregister(&di->ac_chg.psy);
+	if(di->usb_chg.enabled)
+		power_supply_unregister(&di->usb_chg.psy);
+	if(di->ac_chg.enabled)
+		power_supply_unregister(&di->ac_chg.psy);
+
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
@@ -2899,6 +2902,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 		ARRAY_SIZE(ab8500_charger_voltage_map) - 1];
 	di->ac_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
+	di->ac_chg.enabled = di->pdata->ac_enabled;
 
 	/* USB supply */
 	/* power_supply base class */
@@ -2917,7 +2921,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 		ARRAY_SIZE(ab8500_charger_voltage_map) - 1];
 	di->usb_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
-
+	di->usb_chg.enabled = di->pdata->usb_enabled;
 
 	/* Create a work queue for the charger */
 	di->charger_wq =
@@ -2995,17 +2999,21 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 	}
 
 	/* Register AC charger class */
-	ret = power_supply_register(di->dev, &di->ac_chg.psy);
-	if (ret) {
-		dev_err(di->dev, "failed to register AC charger\n");
-		goto free_charger_wq;
+	if(di->ac_chg.enabled) {
+		ret = power_supply_register(di->dev, &di->ac_chg.psy);
+		if (ret) {
+			dev_err(di->dev, "failed to register AC charger\n");
+			goto free_charger_wq;
+		}
 	}
 
 	/* Register USB charger class */
-	ret = power_supply_register(di->dev, &di->usb_chg.psy);
-	if (ret) {
-		dev_err(di->dev, "failed to register USB charger\n");
-		goto free_ac;
+	if(di->usb_chg.enabled) {
+		ret = power_supply_register(di->dev, &di->usb_chg.psy);
+		if (ret) {
+			dev_err(di->dev, "failed to register USB charger\n");
+			goto free_ac;
+		}
 	}
 
 	di->usb_phy = usb_get_phy(USB_PHY_TYPE_USB2);
@@ -3085,9 +3093,11 @@ free_irq:
 put_usb_phy:
 	usb_put_phy(di->usb_phy);
 free_usb:
-	power_supply_unregister(&di->usb_chg.psy);
+	if(di->usb_chg.enabled)
+		power_supply_unregister(&di->usb_chg.psy);
 free_ac:
-	power_supply_unregister(&di->ac_chg.psy);
+	if(di->ac_chg.enabled)
+		power_supply_unregister(&di->ac_chg.psy);
 free_charger_wq:
 	destroy_workqueue(di->charger_wq);
 	return ret;
diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
new file mode 100644
index 000000000000..60a30323151e
--- /dev/null
+++ b/drivers/power/pm2301_charger.c
@@ -0,0 +1,1455 @@
+/*
+ * Power supply driver for ST Ericsson pm2xxx_charger charger
+ *
+ * Copyright 2012 ST Ericsson.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/completion.h>
+#include <linux/regulator/consumer.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/workqueue.h>
+#include <linux/kobject.h>
+#include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-bm.h>
+#include <linux/mfd/abx500/ab8500-gpadc.h>
+#include <linux/mfd/abx500/ux500_chargalg.h>
+#include <linux/pm2301_charger.h>
+
+#define MAIN_WDOG_ENA			0x01
+#define MAIN_WDOG_KICK			0x02
+#define MAIN_WDOG_DIS			0x00
+#define CHARG_WD_KICK			0x01
+#define MAIN_CH_ENA			0x01
+#define MAIN_CH_NO_OVERSHOOT_ENA_N	0x02
+#define MAIN_CH_DET			0x01
+#define MAIN_CH_CV_ON			0x04
+#define OTP_ENABLE_WD			0x01
+
+#define MAIN_CH_INPUT_CURR_SHIFT	4
+
+#define LED_INDICATOR_PWM_ENA		0x01
+#define LED_INDICATOR_PWM_DIS		0x00
+#define LED_IND_CUR_5MA			0x04
+#define LED_INDICATOR_PWM_DUTY_252_256	0xBF
+
+/* HW failure constants */
+#define MAIN_CH_TH_PROT			0x02
+#define MAIN_CH_NOK			0x01
+
+/* Watchdog timeout constant */
+#define WD_TIMER			0x30 /* 4min */
+#define WD_KICK_INTERVAL		(60 * HZ)
+
+/* Constant voltage/current */
+#define PM2XXX_CONST_CURR		0x0
+#define PM2XXX_CONST_VOLT		0x1
+
+/* Lowest charger voltage is 3.39V -> 0x4E */
+#define LOW_VOLT_REG			0x4E
+
+#define PM2XXX_BATT_CTRL_REG1		0x00
+#define PM2XXX_BATT_CTRL_REG2		0x01
+#define PM2XXX_BATT_CTRL_REG3		0x02
+#define PM2XXX_BATT_CTRL_REG4		0x03
+#define PM2XXX_BATT_CTRL_REG5		0x04
+#define PM2XXX_BATT_CTRL_REG6		0x05
+#define PM2XXX_BATT_CTRL_REG7		0x06
+#define PM2XXX_BATT_CTRL_REG8		0x07
+#define PM2XXX_NTC_CTRL_REG1		0x08
+#define PM2XXX_NTC_CTRL_REG2		0x09
+#define PM2XXX_BATT_CTRL_REG9		0x0A
+#define PM2XXX_BATT_STAT_REG1		0x0B
+#define PM2XXX_INP_VOLT_VPWR2		0x11
+#define PM2XXX_INP_DROP_VPWR2		0x13
+#define PM2XXX_INP_VOLT_VPWR1		0x15
+#define PM2XXX_INP_DROP_VPWR1		0x17
+#define PM2XXX_INP_MODE_VPWR		0x18
+#define PM2XXX_BATT_WD_KICK		0x70
+#define PM2XXX_DEV_VER_STAT		0x0C
+#define PM2XXX_THERM_WARN_CTRL_REG	0x20
+#define PM2XXX_BATT_DISC_REG		0x21
+#define PM2XXX_BATT_LOW_LEV_COMP_REG	0x22
+#define PM2XXX_BATT_LOW_LEV_VAL_REG	0x23
+#define PM2XXX_I2C_PAD_CTRL_REG		0x24
+#define PM2XXX_SW_CTRL_REG		0x26
+#define PM2XXX_LED_CTRL_REG		0x28
+
+#define PM2XXX_REG_INT1		0x40
+#define PM2XXX_MASK_REG_INT1	0x50
+#define PM2XXX_SRCE_REG_INT1	0x60
+#define PM2XXX_REG_INT2		0x41
+#define PM2XXX_MASK_REG_INT2	0x51
+#define PM2XXX_SRCE_REG_INT2	0x61
+#define PM2XXX_REG_INT3		0x42
+#define PM2XXX_MASK_REG_INT3	0x52
+#define PM2XXX_SRCE_REG_INT3	0x62
+#define PM2XXX_REG_INT4		0x43
+#define PM2XXX_MASK_REG_INT4	0x53
+#define PM2XXX_SRCE_REG_INT4	0x63
+#define PM2XXX_REG_INT5		0x44
+#define PM2XXX_MASK_REG_INT5	0x54
+#define PM2XXX_SRCE_REG_INT5	0x64
+#define PM2XXX_REG_INT6		0x45
+#define PM2XXX_MASK_REG_INT6	0x55
+#define PM2XXX_SRCE_REG_INT6	0x65
+
+#define VPWR_OVV 0x0
+#define VSYSTEM_OVV 0x1
+
+/* control Reg 1 */
+#define PM2XXX_CH_RESUME_EN	     0x1
+#define PM2XXX_CH_RESUME_DIS		0x0
+
+/* control Reg 2 */
+#define PM2XXX_CH_AUTO_RESUME_EN	0X2
+#define PM2XXX_CH_AUTO_RESUME_DIS	0X0
+#define PM2XXX_CHARGER_ENA		0x4
+#define PM2XXX_CHARGER_DIS		0x0
+
+/* control Reg 3 */
+#define PM2XXX_CH_WD_CC_PHASE_OFF	0x0
+#define PM2XXX_CH_WD_CC_PHASE_5MIN	0x1
+#define PM2XXX_CH_WD_CC_PHASE_10MIN	0x2
+#define PM2XXX_CH_WD_CC_PHASE_30MIN	0x3
+#define PM2XXX_CH_WD_CC_PHASE_60MIN	0x4
+#define PM2XXX_CH_WD_CC_PHASE_120MIN	0x5
+#define PM2XXX_CH_WD_CC_PHASE_240MIN	0x6
+#define PM2XXX_CH_WD_CC_PHASE_360MIN	0x7
+
+#define PM2XXX_CH_WD_CV_PHASE_OFF	(0x0<<3)
+#define PM2XXX_CH_WD_CV_PHASE_5MIN	(0x1<<3)
+#define PM2XXX_CH_WD_CV_PHASE_10MIN	(0x2<<3)
+#define PM2XXX_CH_WD_CV_PHASE_30MIN	(0x3<<3)
+#define PM2XXX_CH_WD_CV_PHASE_60MIN	(0x4<<3)
+#define PM2XXX_CH_WD_CV_PHASE_120MIN	(0x5<<3)
+#define PM2XXX_CH_WD_CV_PHASE_240MIN	(0x6<<3)
+#define PM2XXX_CH_WD_CV_PHASE_360MIN	(0x7<<3)
+
+/* control Reg 4 */
+#define PM2XXX_CH_WD_PRECH_PHASE_OFF	0x0
+#define PM2XXX_CH_WD_PRECH_PHASE_1MIN	0x1
+#define PM2XXX_CH_WD_PRECH_PHASE_5MIN	0x2
+#define PM2XXX_CH_WD_PRECH_PHASE_10MIN	0x3
+#define PM2XXX_CH_WD_PRECH_PHASE_30MIN	0x4
+#define PM2XXX_CH_WD_PRECH_PHASE_60MIN	0x5
+#define PM2XXX_CH_WD_PRECH_PHASE_120MIN	0x6
+#define PM2XXX_CH_WD_PRECH_PHASE_240MIN	0x7
+
+/* control Reg 5 */
+#define PM2XXX_CH_WD_AUTO_TIMEOUT_NONE	0x0
+#define PM2XXX_CH_WD_AUTO_TIMEOUT_20MIN	0x1
+
+/* control Reg 6 */
+#define PM2XXX_DIR_CH_CC_CURRENT_MASK	0x0F
+#define PM2XXX_DIR_CH_CC_CURRENT_200MA	0x0
+#define PM2XXX_DIR_CH_CC_CURRENT_400MA	0x2
+#define PM2XXX_DIR_CH_CC_CURRENT_600MA	0x3
+#define PM2XXX_DIR_CH_CC_CURRENT_800MA	0x4
+#define PM2XXX_DIR_CH_CC_CURRENT_1000MA	0x5
+#define PM2XXX_DIR_CH_CC_CURRENT_1200MA	0x6
+#define PM2XXX_DIR_CH_CC_CURRENT_1400MA	0x7
+#define PM2XXX_DIR_CH_CC_CURRENT_1600MA	0x8
+#define PM2XXX_DIR_CH_CC_CURRENT_1800MA	0x9
+#define PM2XXX_DIR_CH_CC_CURRENT_2000MA	0xA
+#define PM2XXX_DIR_CH_CC_CURRENT_2200MA	0xB
+#define PM2XXX_DIR_CH_CC_CURRENT_2400MA	0xC
+#define PM2XXX_DIR_CH_CC_CURRENT_2600MA	0xD
+#define PM2XXX_DIR_CH_CC_CURRENT_2800MA	0xE
+#define PM2XXX_DIR_CH_CC_CURRENT_3000MA	0xF
+
+#define PM2XXX_CH_PRECH_CURRENT_MASK	0x30
+#define PM2XXX_CH_PRECH_CURRENT_25MA	(0x0<<4)
+#define PM2XXX_CH_PRECH_CURRENT_50MA	(0x1<<4)
+#define PM2XXX_CH_PRECH_CURRENT_75MA	(0x2<<4)
+#define PM2XXX_CH_PRECH_CURRENT_100MA	(0x3<<4)
+
+#define PM2XXX_CH_EOC_CURRENT_MASK	0xC0
+#define PM2XXX_CH_EOC_CURRENT_100MA	(0x0<<6)
+#define PM2XXX_CH_EOC_CURRENT_150MA	(0x1<<6)
+#define PM2XXX_CH_EOC_CURRENT_300MA	(0x2<<6)
+#define PM2XXX_CH_EOC_CURRENT_400MA	(0x3<<6)
+
+/* control Reg 7 */
+#define PM2XXX_CH_PRECH_VOL_2_5		0x0
+#define PM2XXX_CH_PRECH_VOL_2_7		0x1
+#define PM2XXX_CH_PRECH_VOL_2_9		0x2
+#define PM2XXX_CH_PRECH_VOL_3_1		0x3
+
+#define PM2XXX_CH_VRESUME_VOL_3_2	(0x0<<2)
+#define PM2XXX_CH_VRESUME_VOL_3_4	(0x1<<2)
+#define PM2XXX_CH_VRESUME_VOL_3_6	(0x2<<2)
+#define PM2XXX_CH_VRESUME_VOL_3_8	(0x3<<2)
+
+/* control Reg 8 */
+#define PM2XXX_CH_VOLT_MASK		0x3F
+#define PM2XXX_CH_VOLT_3_5		0x0
+#define PM2XXX_CH_VOLT_3_5225		0x1
+#define PM2XXX_CH_VOLT_3_6		0x4
+#define PM2XXX_CH_VOLT_3_7		0x8
+#define PM2XXX_CH_VOLT_4_0		0x14
+#define PM2XXX_CH_VOLT_4_175		0x1B
+#define PM2XXX_CH_VOLT_4_2		0x1C
+#define PM2XXX_CH_VOLT_4_275		0x1F
+#define PM2XXX_CH_VOLT_4_3		0x20
+
+/*NTC control register 1*/
+#define PM2XXX_BTEMP_HIGH_TH_45		0x0
+#define PM2XXX_BTEMP_HIGH_TH_50		0x1
+#define PM2XXX_BTEMP_HIGH_TH_55		0x2
+#define PM2XXX_BTEMP_HIGH_TH_60		0x3
+#define PM2XXX_BTEMP_HIGH_TH_65		0x4
+
+#define PM2XXX_BTEMP_LOW_TH_N5		(0x0<<3)
+#define PM2XXX_BTEMP_LOW_TH_0		(0x1<<3)
+#define PM2XXX_BTEMP_LOW_TH_5		(0x2<<3)
+#define PM2XXX_BTEMP_LOW_TH_10		(0x3<<3)
+
+/*NTC control register 2*/
+#define PM2XXX_NTC_BETA_COEFF_3477	0x0
+#define PM2XXX_NTC_BETA_COEFF_3964	0x1
+
+#define PM2XXX_NTC_RES_10K		(0x0<<2)
+#define PM2XXX_NTC_RES_47K		(0x1<<2)
+#define PM2XXX_NTC_RES_100K		(0x2<<2)
+#define PM2XXX_NTC_RES_NO_NTC		(0x3<<2)
+
+/* control Reg 9 */
+#define PM2XXX_CH_CC_MODEDROP_EN	1
+#define PM2XXX_CH_CC_MODEDROP_DIS	0
+
+#define PM2XXX_CH_CC_REDUCED_CURRENT_100MA	(0x0<<1)
+#define PM2XXX_CH_CC_REDUCED_CURRENT_200MA	(0x1<<1)
+#define PM2XXX_CH_CC_REDUCED_CURRENT_400MA	(0x2<<1)
+#define PM2XXX_CH_CC_REDUCED_CURRENT_IDENT	(0x3<<1)
+
+#define PM2XXX_CHARCHING_INFO_DIS	(0<<3)
+#define PM2XXX_CHARCHING_INFO_EN	(1<<3)
+
+#define PM2XXX_CH_150MV_DROP_300MV	(0<<4)
+#define PM2XXX_CH_150MV_DROP_150MV	(1<<4)
+
+
+/* charger status register */
+#define PM2XXX_CHG_STATUS_OFF		0x0
+#define PM2XXX_CHG_STATUS_ON		0x1
+#define PM2XXX_CHG_STATUS_FULL		0x2
+#define PM2XXX_CHG_STATUS_ERR		0x3
+#define PM2XXX_CHG_STATUS_WAIT		0x4
+#define PM2XXX_CHG_STATUS_NOBAT		0x5
+
+/* Input charger voltage VPWR2 */
+#define PM2XXX_VPWR2_OVV_6_0		0x0
+#define PM2XXX_VPWR2_OVV_6_3		0x1
+#define PM2XXX_VPWR2_OVV_10		0x2
+#define PM2XXX_VPWR2_OVV_NONE		0x3
+
+/* Input charger voltage VPWR1 */
+#define PM2XXX_VPWR1_OVV_6_0		0x0
+#define PM2XXX_VPWR1_OVV_6_3		0x1
+#define PM2XXX_VPWR1_OVV_10		0x2
+#define PM2XXX_VPWR1_OVV_NONE		0x3
+
+/* Battery low level comparator control register */
+#define PM2XXX_VBAT_LOW_MONITORING_DIS	0x0
+#define PM2XXX_VBAT_LOW_MONITORING_ENA	0x1
+
+/* Battery low level value control register */
+#define PM2XXX_VBAT_LOW_LEVEL_2_3	0x0
+#define PM2XXX_VBAT_LOW_LEVEL_2_4	0x1
+#define PM2XXX_VBAT_LOW_LEVEL_2_5	0x2
+#define PM2XXX_VBAT_LOW_LEVEL_2_6	0x3
+#define PM2XXX_VBAT_LOW_LEVEL_2_7	0x4
+#define PM2XXX_VBAT_LOW_LEVEL_2_8	0x5
+#define PM2XXX_VBAT_LOW_LEVEL_2_9	0x6
+#define PM2XXX_VBAT_LOW_LEVEL_3_0	0x7
+#define PM2XXX_VBAT_LOW_LEVEL_3_1	0x8
+#define PM2XXX_VBAT_LOW_LEVEL_3_2	0x9
+#define PM2XXX_VBAT_LOW_LEVEL_3_3	0xA
+#define PM2XXX_VBAT_LOW_LEVEL_3_4	0xB
+#define PM2XXX_VBAT_LOW_LEVEL_3_5	0xC
+#define PM2XXX_VBAT_LOW_LEVEL_3_6	0xD
+#define PM2XXX_VBAT_LOW_LEVEL_3_7	0xE
+#define PM2XXX_VBAT_LOW_LEVEL_3_8	0xF
+#define PM2XXX_VBAT_LOW_LEVEL_3_9	0x10
+#define PM2XXX_VBAT_LOW_LEVEL_4_0	0x11
+#define PM2XXX_VBAT_LOW_LEVEL_4_1	0x12
+#define PM2XXX_VBAT_LOW_LEVEL_4_2	0x13
+
+/* SW CTRL */
+#define PM2XXX_SWCTRL_HW		0x0
+#define PM2XXX_SWCTRL_SW		0x1
+
+
+/* LED Driver Control */
+#define PM2XXX_LED_CURRENT_MASK		0x0C
+#define PM2XXX_LED_CURRENT_2_5MA	(0X0<<2)
+#define PM2XXX_LED_CURRENT_1MA		(0X1<<2)
+#define PM2XXX_LED_CURRENT_5MA		(0X2<<2)
+#define PM2XXX_LED_CURRENT_10MA		(0X3<<2)
+
+#define PM2XXX_LED_SELECT_MASK		0x02
+#define PM2XXX_LED_SELECT_EN		(0X0<<1)
+#define PM2XXX_LED_SELECT_DIS		(0X1<<1)
+
+#define PM2XXX_ANTI_OVERSHOOT_MASK	0x01
+#define PM2XXX_ANTI_OVERSHOOT_DIS	0X0
+#define PM2XXX_ANTI_OVERSHOOT_EN	0X1
+
+#define to_pm2xxx_charger_ac_device_info(x) container_of((x), \
+		struct pm2xxx_charger, ac_chg)
+
+static int pm2xxx_interrupt_registers[] = {
+	PM2XXX_REG_INT1,
+	PM2XXX_REG_INT2,
+	PM2XXX_REG_INT3,
+	PM2XXX_REG_INT4,
+	PM2XXX_REG_INT5,
+	PM2XXX_REG_INT6,
+};
+
+enum pm2xxx_reg_int1 {
+	PM2XXX_INT1_ITVBATDISCONNECT	= 0x02,
+	PM2XXX_INT1_ITVBATLOWR		= 0x04,
+	PM2XXX_INT1_ITVBATLOWF		= 0x08,
+};
+
+enum pm2xxx_mask_reg_int1 {
+	PM2XXX_INT1_M_ITVBATDISCONNECT	= 0x02,
+	PM2XXX_INT1_M_ITVBATLOWR	= 0x04,
+	PM2XXX_INT1_M_ITVBATLOWF	= 0x08,
+};
+
+enum pm2xxx_source_reg_int1 {
+	PM2XXX_INT1_S_ITVBATDISCONNECT	= 0x02,
+	PM2XXX_INT1_S_ITVBATLOWR	= 0x04,
+	PM2XXX_INT1_S_ITVBATLOWF	= 0x08,
+};
+
+enum pm2xxx_reg_int2 {
+	PM2XXX_INT2_ITVPWR2PLUG		= 0x01,
+	PM2XXX_INT2_ITVPWR2UNPLUG	= 0x02,
+	PM2XXX_INT2_ITVPWR1PLUG		= 0x04,
+	PM2XXX_INT2_ITVPWR1UNPLUG	= 0x08,
+};
+
+enum pm2xxx_mask_reg_int2 {
+	PM2XXX_INT2_M_ITVPWR2PLUG	= 0x01,
+	PM2XXX_INT2_M_ITVPWR2UNPLUG	= 0x02,
+	PM2XXX_INT2_M_ITVPWR1PLUG	= 0x04,
+	PM2XXX_INT2_M_ITVPWR1UNPLUG	= 0x08,
+};
+
+enum pm2xxx_source_reg_int2 {
+	PM2XXX_INT2_S_ITVPWR2PLUG	= 0x03,
+	PM2XXX_INT2_S_ITVPWR1PLUG	= 0x0c,
+};
+
+enum pm2xxx_reg_int3 {
+	PM2XXX_INT3_ITCHPRECHARGEWD	= 0x01,
+	PM2XXX_INT3_ITCHCCWD		= 0x02,
+	PM2XXX_INT3_ITCHCVWD		= 0x04,
+	PM2XXX_INT3_ITAUTOTIMEOUTWD	= 0x08,
+};
+
+enum pm2xxx_mask_reg_int3 {
+	PM2XXX_INT3_M_ITCHPRECHARGEWD	= 0x01,
+	PM2XXX_INT3_M_ITCHCCWD		= 0x02,
+	PM2XXX_INT3_M_ITCHCVWD		= 0x04,
+	PM2XXX_INT3_M_ITAUTOTIMEOUTWD	= 0x08,
+};
+
+enum pm2xxx_source_reg_int3 {
+	PM2XXX_INT3_S_ITCHPRECHARGEWD	= 0x01,
+	PM2XXX_INT3_S_ITCHCCWD		= 0x02,
+	PM2XXX_INT3_S_ITCHCVWD		= 0x04,
+	PM2XXX_INT3_S_ITAUTOTIMEOUTWD	= 0x08,
+};
+
+enum pm2xxx_reg_int4 {
+	PM2XXX_INT4_ITBATTEMPCOLD	= 0x01,
+	PM2XXX_INT4_ITBATTEMPHOT	= 0x02,
+	PM2XXX_INT4_ITVPWR2OVV		= 0x04,
+	PM2XXX_INT4_ITVPWR1OVV		= 0x08,
+	PM2XXX_INT4_ITCHARGINGON	= 0x10,
+	PM2XXX_INT4_ITVRESUME		= 0x20,
+	PM2XXX_INT4_ITBATTFULL		= 0x40,
+	PM2XXX_INT4_ITCVPHASE		= 0x80,
+};
+
+enum pm2xxx_mask_reg_int4 {
+	PM2XXX_INT4_M_ITBATTEMPCOLD	= 0x01,
+	PM2XXX_INT4_M_ITBATTEMPHOT	= 0x02,
+	PM2XXX_INT4_M_ITVPWR2OVV	= 0x04,
+	PM2XXX_INT4_M_ITVPWR1OVV	= 0x08,
+	PM2XXX_INT4_M_ITCHARGINGON	= 0x10,
+	PM2XXX_INT4_M_ITVRESUME		= 0x20,
+	PM2XXX_INT4_M_ITBATTFULL	= 0x40,
+	PM2XXX_INT4_M_ITCVPHASE		= 0x80,
+};
+
+enum pm2xxx_source_reg_int4 {
+	PM2XXX_INT4_S_ITBATTEMPCOLD	= 0x01,
+	PM2XXX_INT4_S_ITBATTEMPHOT	= 0x02,
+	PM2XXX_INT4_S_ITVPWR2OVV	= 0x04,
+	PM2XXX_INT4_S_ITVPWR1OVV	= 0x08,
+	PM2XXX_INT4_S_ITCHARGINGON	= 0x10,
+	PM2XXX_INT4_S_ITVRESUME		= 0x20,
+	PM2XXX_INT4_S_ITBATTFULL	= 0x40,
+	PM2XXX_INT4_S_ITCVPHASE		= 0x80,
+};
+
+enum pm2xxx_reg_int5 {
+	PM2XXX_INT5_ITTHERMALSHUTDOWNRISE	= 0x01,
+	PM2XXX_INT5_ITTHERMALSHUTDOWNFALL	= 0x02,
+	PM2XXX_INT5_ITTHERMALWARNINGRISE	= 0x04,
+	PM2XXX_INT5_ITTHERMALWARNINGFALL	= 0x08,
+	PM2XXX_INT5_ITVSYSTEMOVV		= 0x10,
+};
+
+enum pm2xxx_mask_reg_int5 {
+	PM2XXX_INT5_M_ITTHERMALSHUTDOWNRISE	= 0x01,
+	PM2XXX_INT5_M_ITTHERMALSHUTDOWNFALL	= 0x02,
+	PM2XXX_INT5_M_ITTHERMALWARNINGRISE	= 0x04,
+	PM2XXX_INT5_M_ITTHERMALWARNINGFALL	= 0x08,
+	PM2XXX_INT5_M_ITVSYSTEMOVV		= 0x10,
+};
+
+enum pm2xxx_source_reg_int5 {
+	PM2XXX_INT5_S_ITTHERMALSHUTDOWNRISE	= 0x01,
+	PM2XXX_INT5_S_ITTHERMALSHUTDOWNFALL	= 0x02,
+	PM2XXX_INT5_S_ITTHERMALWARNINGRISE	= 0x04,
+	PM2XXX_INT5_S_ITTHERMALWARNINGFALL	= 0x08,
+	PM2XXX_INT5_S_ITVSYSTEMOVV		= 0x10,
+};
+
+enum pm2xxx_reg_int6 {
+	PM2XXX_INT6_ITVPWR2DROP		= 0x01,
+	PM2XXX_INT6_ITVPWR1DROP		= 0x02,
+	PM2XXX_INT6_ITVPWR2VALIDRISE	= 0x04,
+	PM2XXX_INT6_ITVPWR2VALIDFALL	= 0x08,
+	PM2XXX_INT6_ITVPWR1VALIDRISE	= 0x10,
+	PM2XXX_INT6_ITVPWR1VALIDFALL	= 0x20,
+};
+
+enum pm2xxx_mask_reg_int6 {
+	PM2XXX_INT6_M_ITVPWR2DROP	= 0x01,
+	PM2XXX_INT6_M_ITVPWR1DROP	= 0x02,
+	PM2XXX_INT6_M_ITVPWR2VALIDRISE	= 0x04,
+	PM2XXX_INT6_M_ITVPWR2VALIDFALL	= 0x08,
+	PM2XXX_INT6_M_ITVPWR1VALIDRISE	= 0x10,
+	PM2XXX_INT6_M_ITVPWR1VALIDFALL	= 0x20,
+};
+
+enum pm2xxx_source_reg_int6 {
+	PM2XXX_INT6_S_ITVPWR2DROP	= 0x01,
+	PM2XXX_INT6_S_ITVPWR1DROP	= 0x02,
+	PM2XXX_INT6_S_ITVPWR2VALIDRISE	= 0x04,
+	PM2XXX_INT6_S_ITVPWR2VALIDFALL	= 0x08,
+	PM2XXX_INT6_S_ITVPWR1VALIDRISE	= 0x10,
+	PM2XXX_INT6_S_ITVPWR1VALIDFALL	= 0x20,
+};
+
+static enum power_supply_property pm2xxx_charger_ac_props[] = {
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_VOLTAGE_AVG,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+};
+
+static int pm2xxx_charger_voltage_map[] = {
+	3500,
+	3525,
+	3550,
+	3575,
+	3600,
+	3625,
+	3650,
+	3675,
+	3700,
+	3725,
+	3750,
+	3775,
+	3800,
+	3825,
+	3850,
+	3875,
+	3900,
+	3925,
+	3950,
+	3975,
+	4000,
+	4025,
+	4050,
+	4075,
+	4100,
+	4125,
+	4150,
+	4175,
+	4200,
+	4225,
+	4250,
+	4275,
+	4300,
+};
+
+static int pm2xxx_charger_current_map[] = {
+	200,
+	200,
+	400,
+	600,
+	800,
+	1000,
+	1200,
+	1400,
+	1600,
+	1800,
+	2000,
+	2200,
+	2400,
+	2600,
+	2800,
+	3000,
+};
+
+struct pm2xxx_irq {
+	char *name;
+	irqreturn_t (*isr)(int irq, void *data);
+};
+
+struct pm2xxx_charger_info {
+	int charger_connected;
+	int charger_online;
+	int charger_voltage;
+	int cv_active;
+	bool wd_expired;
+};
+
+struct pm2xxx_charger_event_flags {
+	bool mainextchnotok;
+	bool main_thermal_prot;
+	bool ovv;
+	bool chgwdexp;
+};
+
+struct pm2xxx_config {
+	struct i2c_client *pm2xxx_i2c;
+	struct i2c_device_id *pm2xxx_id;
+};
+
+struct pm2xxx_charger {
+	struct device *dev;
+	u8 chip_id;
+	bool vddadc_en_ac;
+	struct pm2xxx_config config;
+	bool ac_conn;
+	unsigned int gpio_irq;
+	int vbat;
+	int old_vbat;
+	int failure_case;
+	int failure_input_ovv;
+	u8 pm2_int[6];
+	struct ab8500_gpadc *gpadc;
+	struct regulator *regu;
+	struct pm2xxx_bm_data *bat;
+	struct mutex lock;
+	struct ab8500 *parent;
+	struct pm2xxx_charger_info ac;
+	struct pm2xxx_charger_platform_data *pdata;
+	struct workqueue_struct *charger_wq;
+	struct delayed_work check_vbat_work;
+	struct work_struct ac_work;
+	struct work_struct check_main_thermal_prot_work;
+	struct ux500_charger ac_chg;
+	struct pm2xxx_charger_event_flags flags;
+};
+
+static const struct i2c_device_id pm2xxx_ident[] = {
+	{ "pm2301", 0 },
+	{ }
+};
+
+static int pm2xxx_reg_read(struct pm2xxx_charger *pm2, int reg, u8 *val)
+{
+	int ret;
+
+	ret = i2c_smbus_read_i2c_block_data(pm2->config.pm2xxx_i2c, reg,
+				1, val);
+	if (ret < 0)
+		dev_err(pm2->dev, "Error reading register at 0x%x\n", reg);
+
+	return ret;
+}
+
+static int pm2xxx_reg_write(struct pm2xxx_charger *pm2, int reg, u8 val)
+{
+	int ret;
+
+	ret = i2c_smbus_write_i2c_block_data(pm2->config.pm2xxx_i2c, reg,
+				1, &val);
+	if (ret < 0)
+		dev_err(pm2->dev, "Error writing register at 0x%x\n", reg);
+
+	return ret;
+}
+
+static int pm2xxx_charging_enable_mngt(struct pm2xxx_charger *pm2)
+{
+	int ret;
+
+	/* Enable charging */
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG2,
+			(PM2XXX_CH_AUTO_RESUME_EN | PM2XXX_CHARGER_ENA));
+
+	return ret;
+}
+
+static int pm2xxx_charging_disable_mngt(struct pm2xxx_charger *pm2)
+{
+	int ret;
+
+	/* Disable charging */
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG2,
+			(PM2XXX_CH_AUTO_RESUME_DIS | PM2XXX_CHARGER_DIS));
+
+	return ret;
+}
+
+static int pm2xxx_charger_batt_therm_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	queue_work(pm2->charger_wq, &pm2->check_main_thermal_prot_work);
+
+	return 0;
+}
+
+
+int pm2xxx_charger_die_therm_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	queue_work(pm2->charger_wq, &pm2->check_main_thermal_prot_work);
+
+	return 0;
+}
+
+static int pm2xxx_charger_ovv_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	int ret = 0;
+
+	pm2->failure_input_ovv++;
+	if (pm2->failure_input_ovv < 4) {
+		ret = pm2xxx_charging_enable_mngt(pm2);
+		goto out;
+	} else {
+		pm2->failure_input_ovv = 0;
+		dev_err(pm2->dev, "Overvoltage detected\n");
+		pm2->flags.ovv = true;
+		power_supply_changed(&pm2->ac_chg.psy);
+	}
+
+out:
+	return ret;
+}
+
+static int pm2xxx_charger_wd_exp_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	dev_dbg(pm2->dev , "20 minutes watchdog occured\n");
+
+	pm2->ac.wd_expired = true;
+	power_supply_changed(&pm2->ac_chg.psy);
+
+	return 0;
+}
+
+static int pm2xxx_charger_vbat_lsig_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	switch (val) {
+	case PM2XXX_INT1_ITVBATLOWR:
+		dev_dbg(pm2->dev, "VBAT grows above VBAT_LOW level\n");
+		break;
+
+	case PM2XXX_INT1_ITVBATLOWF:
+		dev_dbg(pm2->dev, "VBAT drops below VBAT_LOW level\n");
+		break;
+
+	default:
+		dev_err(pm2->dev, "Unknown VBAT level\n");
+	}
+
+	return 0;
+}
+
+static int pm2xxx_charger_bat_disc_mngt(struct pm2xxx_charger *pm2, int val)
+{
+	dev_dbg(pm2->dev, "battery disconnected\n");
+
+	return (pm2xxx_charging_disable_mngt(pm2));
+}
+
+static int pm2xxx_charger_detection(struct pm2xxx_charger *pm2, u8 *val)
+{
+	int ret = 0;
+
+	ret = pm2xxx_reg_read(pm2, PM2XXX_SRCE_REG_INT2, val);
+
+	if (ret < 0) {
+		dev_err(pm2->dev, "Charger detection failed\n");
+		goto out;
+	}
+
+	*val &= (PM2XXX_INT2_S_ITVPWR1PLUG | PM2XXX_INT2_S_ITVPWR2PLUG);
+out:
+	return ret;
+}
+
+static int pm2xxx_charger_itv_pwr_plug_mngt(struct pm2xxx_charger *pm2, int val)
+{
+
+	int ret;
+	u8 read_val;
+
+	/*
+	 * Since we can't be sure that the events are received
+	 * synchronously, we have the check if the main charger is
+	 * connected by reading the interrupt source register.
+	 */
+	ret = pm2xxx_charger_detection(pm2, &read_val);
+
+	if ((ret == 0) && read_val) {
+		pm2->ac.charger_connected = 1;
+		pm2->ac_conn = true;
+		queue_work(pm2->charger_wq, &pm2->ac_work);
+	}
+
+
+	return ret;
+}
+
+static int pm2xxx_charger_itv_pwr_unplug_mngt(struct pm2xxx_charger *pm2,
+								int val)
+{
+	pm2->ac.charger_connected = 0;
+	queue_work(pm2->charger_wq, &pm2->ac_work);
+
+	return 0;
+}
+
+static int pm2_int_reg0(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[0] &
+			(PM2XXX_INT1_ITVBATLOWR | PM2XXX_INT1_ITVBATLOWF)) {
+		ret = pm2xxx_charger_vbat_lsig_mngt(pm2, pm2->pm2_int[0] &
+			(PM2XXX_INT1_ITVBATLOWR | PM2XXX_INT1_ITVBATLOWF));
+	}
+
+	if (pm2->pm2_int[0] & PM2XXX_INT1_ITVBATDISCONNECT) {
+		ret = pm2xxx_charger_bat_disc_mngt(pm2,
+				PM2XXX_INT1_ITVBATDISCONNECT);
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg1(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[1] &
+		(PM2XXX_INT2_ITVPWR1PLUG | PM2XXX_INT2_ITVPWR2PLUG)) {
+		dev_dbg(pm2->dev , "Main charger plugged\n");
+		ret = pm2xxx_charger_itv_pwr_plug_mngt(pm2, pm2->pm2_int[1] &
+			(PM2XXX_INT2_ITVPWR1PLUG | PM2XXX_INT2_ITVPWR2PLUG));
+	}
+
+	if (pm2->pm2_int[1] &
+		(PM2XXX_INT2_ITVPWR1UNPLUG | PM2XXX_INT2_ITVPWR2UNPLUG)) {
+		dev_dbg(pm2->dev , "Main charger unplugged\n");
+		ret = pm2xxx_charger_itv_pwr_unplug_mngt(pm2, pm2->pm2_int[1] &
+						(PM2XXX_INT2_ITVPWR1UNPLUG |
+						PM2XXX_INT2_ITVPWR2UNPLUG));
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg2(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[2] & PM2XXX_INT3_ITAUTOTIMEOUTWD)
+		ret = pm2xxx_charger_wd_exp_mngt(pm2, pm2->pm2_int[2]);
+
+	if (pm2->pm2_int[2] & (PM2XXX_INT3_ITCHPRECHARGEWD |
+				PM2XXX_INT3_ITCHCCWD | PM2XXX_INT3_ITCHCVWD)) {
+		dev_dbg(pm2->dev,
+			"Watchdog occured for precharge, CC and CV charge\n");
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg3(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_ITCHARGINGON)) {
+		dev_dbg(pm2->dev ,
+			"chargind operation has started\n");
+	}
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_ITVRESUME)) {
+		dev_dbg(pm2->dev,
+			"battery discharged down to VResume threshold\n");
+	}
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_ITBATTFULL)) {
+		dev_dbg(pm2->dev , "battery fully detected\n");
+	}
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_ITCVPHASE)) {
+		dev_dbg(pm2->dev, "CV phase enter with 0.5C charging\n");
+	}
+
+	if (pm2->pm2_int[3] &
+			(PM2XXX_INT4_ITVPWR2OVV | PM2XXX_INT4_ITVPWR1OVV)) {
+		pm2->failure_case = VPWR_OVV;
+		ret = pm2xxx_charger_ovv_mngt(pm2, pm2->pm2_int[3] &
+			(PM2XXX_INT4_ITVPWR2OVV | PM2XXX_INT4_ITVPWR1OVV));
+		dev_dbg(pm2->dev, "VPWR/VSYSTEM overvoltage detected\n");
+	}
+
+	if (pm2->pm2_int[3] & (PM2XXX_INT4_S_ITBATTEMPCOLD |
+				PM2XXX_INT4_S_ITBATTEMPHOT)) {
+		ret = pm2xxx_charger_batt_therm_mngt(pm2,
+			pm2->pm2_int[3] & (PM2XXX_INT4_S_ITBATTEMPCOLD |
+					PM2XXX_INT4_S_ITBATTEMPHOT));
+		dev_dbg(pm2->dev, "BTEMP is too Low/High\n");
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg4(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	if (pm2->pm2_int[4] & PM2XXX_INT5_ITVSYSTEMOVV) {
+		pm2->failure_case = VSYSTEM_OVV;
+		ret = pm2xxx_charger_ovv_mngt(pm2, pm2->pm2_int[4] &
+						PM2XXX_INT5_ITVSYSTEMOVV);
+		dev_dbg(pm2->dev, "VSYSTEM overvoltage detected\n");
+	}
+
+	if (pm2->pm2_int[4] & (PM2XXX_INT5_ITTHERMALWARNINGFALL |
+				PM2XXX_INT5_ITTHERMALWARNINGRISE |
+				PM2XXX_INT5_ITTHERMALSHUTDOWNFALL |
+				PM2XXX_INT5_ITTHERMALSHUTDOWNRISE)) {
+		dev_dbg(pm2->dev, "BTEMP die temperature is too Low/High\n");
+		ret = pm2xxx_charger_die_therm_mngt(pm2, pm2->pm2_int[4] &
+			(PM2XXX_INT5_ITTHERMALWARNINGFALL |
+			PM2XXX_INT5_ITTHERMALWARNINGRISE |
+			PM2XXX_INT5_ITTHERMALSHUTDOWNFALL |
+			PM2XXX_INT5_ITTHERMALSHUTDOWNRISE));
+	}
+
+	return ret;
+}
+
+static int pm2_int_reg5(struct pm2xxx_charger *pm2)
+{
+
+	if (pm2->pm2_int[5]
+		& (PM2XXX_INT6_ITVPWR2DROP | PM2XXX_INT6_ITVPWR1DROP)) {
+		dev_dbg(pm2->dev, "VMPWR drop to VBAT level\n");
+	}
+
+	if (pm2->pm2_int[5] & (PM2XXX_INT6_ITVPWR2VALIDRISE |
+				PM2XXX_INT6_ITVPWR1VALIDRISE |
+				PM2XXX_INT6_ITVPWR2VALIDFALL |
+				PM2XXX_INT6_ITVPWR1VALIDFALL)) {
+		dev_dbg(pm2->dev, "Falling/Rising edge on WPWR1/2\n");
+	}
+
+	return 0;
+}
+
+static irqreturn_t  pm2xxx_irq_int(int irq, void *data)
+{
+	struct pm2xxx_charger *pm2 = data;
+	int ret, i;
+
+	for (i = 0; i < ARRAY_SIZE(pm2->pm2_int); i++) {
+		ret = pm2xxx_reg_read(pm2, pm2xxx_interrupt_registers[i],
+				&(pm2->pm2_int[i]));
+	}
+
+	pm2_int_reg0(pm2);
+	pm2_int_reg1(pm2);
+	pm2_int_reg2(pm2);
+	pm2_int_reg3(pm2);
+	pm2_int_reg4(pm2);
+	pm2_int_reg5(pm2);
+
+	return IRQ_HANDLED;
+}
+
+static int pm2xxx_charger_get_ac_voltage(struct pm2xxx_charger *pm2)
+{
+	int vch = 0;
+
+	if (pm2->ac.charger_connected) {
+		vch = ab8500_gpadc_convert(pm2->gpadc, MAIN_CHARGER_V);
+		if (vch < 0)
+			dev_err(pm2->dev, "%s gpadc conv failed,\n", __func__);
+	}
+
+	return vch;
+}
+
+static int pm2xxx_charger_get_ac_cv(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+	u8 val;
+
+	if (pm2->ac.charger_connected && pm2->ac.charger_online) {
+
+		ret = pm2xxx_reg_read(pm2, PM2XXX_SRCE_REG_INT4, &val);
+		if (ret < 0) {
+			dev_err(pm2->dev, "%s pm2xxx read failed\n", __func__);
+			goto out;
+		}
+
+		if (val & PM2XXX_INT4_S_ITCVPHASE)
+			ret = PM2XXX_CONST_VOLT;
+		else
+			ret = PM2XXX_CONST_CURR;
+	}
+out:
+	return ret;
+}
+
+static int pm2xxx_charger_get_ac_current(struct pm2xxx_charger *pm2)
+{
+	int ich = 0;
+
+	if (pm2->ac.charger_online) {
+		ich = ab8500_gpadc_convert(pm2->gpadc, MAIN_CHARGER_C);
+		if (ich < 0)
+			dev_err(pm2->dev, "%s gpadc conv failed\n", __func__);
+	}
+
+	return ich;
+}
+
+static int pm2xxx_current_to_regval(int curr)
+{
+	int i;
+
+	if (curr < pm2xxx_charger_current_map[0])
+		return 0;
+
+	for (i = 1; i < ARRAY_SIZE(pm2xxx_charger_current_map); i++) {
+		if (curr < pm2xxx_charger_current_map[i])
+			return (i - 1);
+	}
+
+	i = ARRAY_SIZE(pm2xxx_charger_current_map) - 1;
+	if (curr == pm2xxx_charger_current_map[i])
+		return i;
+	else
+		return -EINVAL;
+}
+
+static int pm2xxx_voltage_to_regval(int curr)
+{
+	int i;
+
+	if (curr < pm2xxx_charger_voltage_map[0])
+		return 0;
+
+	for (i = 1; i < ARRAY_SIZE(pm2xxx_charger_voltage_map); i++) {
+		if (curr < pm2xxx_charger_voltage_map[i])
+			return i - 1;
+	}
+
+	i = ARRAY_SIZE(pm2xxx_charger_voltage_map) - 1;
+	if (curr == pm2xxx_charger_voltage_map[i])
+		return i;
+	else
+		return -EINVAL;
+}
+
+static int pm2xxx_charger_update_charger_current(struct ux500_charger *charger,
+		int ich_out)
+{
+	int ret;
+	int curr_index;
+	struct pm2xxx_charger *pm2;
+	u8 val;
+
+	if (charger->psy.type == POWER_SUPPLY_TYPE_MAINS)
+		pm2 = to_pm2xxx_charger_ac_device_info(charger);
+	else
+		return -ENXIO;
+
+	curr_index = pm2xxx_current_to_regval(ich_out);
+	if (curr_index < 0) {
+		dev_err(pm2->dev,
+			"Charger current too high: charging not started\n");
+		return -ENXIO;
+	}
+
+	ret = pm2xxx_reg_read(pm2, PM2XXX_BATT_CTRL_REG6, &val);
+	if (ret >= 0) {
+		val &= ~PM2XXX_DIR_CH_CC_CURRENT_MASK;
+		val |= curr_index;
+		ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG6, val);
+		if (ret < 0) {
+			dev_err(pm2->dev,
+				"%s write failed\n", __func__);
+		}
+	}
+	else
+		dev_err(pm2->dev, "%s read failed\n", __func__);
+
+	return ret;
+}
+
+static int pm2xxx_charger_ac_get_property(struct power_supply *psy,
+	enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	struct pm2xxx_charger *pm2;
+
+	pm2 = to_pm2xxx_charger_ac_device_info(psy_to_ux500_charger(psy));
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_HEALTH:
+		if (pm2->flags.mainextchnotok)
+			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		else if (pm2->ac.wd_expired)
+			val->intval = POWER_SUPPLY_HEALTH_DEAD;
+		else if (pm2->flags.main_thermal_prot)
+			val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+		else
+			val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = pm2->ac.charger_online;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = pm2->ac.charger_connected;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		pm2->ac.charger_voltage = pm2xxx_charger_get_ac_voltage(pm2);
+		val->intval = pm2->ac.charger_voltage * 1000;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_AVG:
+		pm2->ac.cv_active = pm2xxx_charger_get_ac_cv(pm2);
+		val->intval = pm2->ac.cv_active;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		val->intval = pm2xxx_charger_get_ac_current(pm2) * 1000;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int pm2xxx_charging_init(struct pm2xxx_charger *pm2)
+{
+	int ret = 0;
+
+	/* enable CC and CV watchdog */
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG3,
+		(PM2XXX_CH_WD_CV_PHASE_60MIN | PM2XXX_CH_WD_CC_PHASE_60MIN));
+	if( ret < 0)
+		return ret;
+
+	/* enable precharge watchdog */
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG4,
+					PM2XXX_CH_WD_PRECH_PHASE_60MIN);
+
+	return ret;
+}
+
+static int pm2xxx_charger_ac_en(struct ux500_charger *charger,
+	int enable, int vset, int iset)
+{
+	int ret;
+	int volt_index;
+	int curr_index;
+	u8 val;
+
+	struct pm2xxx_charger *pm2 = to_pm2xxx_charger_ac_device_info(charger);
+
+	if (enable) {
+		if (!pm2->ac.charger_connected) {
+			dev_dbg(pm2->dev, "AC charger not connected\n");
+			return -ENXIO;
+		}
+
+		dev_dbg(pm2->dev, "Enable AC: %dmV %dmA\n", vset, iset);
+		if (!pm2->vddadc_en_ac) {
+			regulator_enable(pm2->regu);
+			pm2->vddadc_en_ac = true;
+		}
+
+		ret = pm2xxx_charging_init(pm2);
+		if (ret < 0) {
+			dev_err(pm2->dev, "%s charging init failed\n",
+					__func__);
+			goto error_occured;
+		}
+
+		volt_index = pm2xxx_voltage_to_regval(vset);
+		curr_index = pm2xxx_current_to_regval(iset);
+
+		if (volt_index < 0 || curr_index < 0) {
+			dev_err(pm2->dev,
+				"Charger voltage or current too high, "
+				"charging not started\n");
+			return -ENXIO;
+		}
+
+		ret = pm2xxx_reg_read(pm2, PM2XXX_BATT_CTRL_REG8, &val);
+		if (ret >= 0) {
+			val &= ~PM2XXX_CH_VOLT_MASK;
+			val |= volt_index;
+			ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG8, val);
+
+			if (ret < 0) {
+				dev_err(pm2->dev,
+					"%s write failed\n", __func__);
+				goto error_occured;
+			}
+		else
+			dev_err(pm2->dev, "%s read failed\n", __func__);
+		}
+
+		ret = pm2xxx_reg_read(pm2, PM2XXX_BATT_CTRL_REG6, &val);
+		if (ret >= 0) {
+			val &= ~PM2XXX_DIR_CH_CC_CURRENT_MASK;
+			val |= curr_index;
+			ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_CTRL_REG6, val);
+			if (ret < 0) {
+				dev_err(pm2->dev,
+					"%s write failed\n", __func__);
+				goto error_occured;
+			}
+		else
+			dev_err(pm2->dev, "%s read failed\n", __func__);
+		}
+
+		if (!pm2->bat->enable_overshoot) {
+			ret = pm2xxx_reg_read(pm2, PM2XXX_LED_CTRL_REG, &val);
+			if (ret >= 0) {
+				val |= PM2XXX_ANTI_OVERSHOOT_EN;
+				ret = pm2xxx_reg_write(pm2, PM2XXX_LED_CTRL_REG,
+							val);
+				if (ret < 0){
+					dev_err(pm2->dev, "%s write failed\n",
+							__func__);
+					goto error_occured;
+				}
+			}
+		else
+			dev_err(pm2->dev, "%s read failed\n", __func__);
+		}
+
+		ret = pm2xxx_charging_enable_mngt(pm2);
+		if (ret) {
+			dev_err(pm2->dev, "%s write failed\n", __func__);
+			goto error_occured;
+		}
+
+		pm2->ac.charger_online = 1;
+	} else {
+		pm2->ac.charger_online = 0;
+		pm2->ac.wd_expired = false;
+
+		/* Disable regulator if enabled */
+		if (pm2->vddadc_en_ac) {
+			regulator_disable(pm2->regu);
+			pm2->vddadc_en_ac = false;
+		}
+
+		ret = pm2xxx_charging_disable_mngt(pm2);
+		if (ret) {
+			dev_err(pm2->dev, "%s write failed\n", __func__);
+			return ret;
+		}
+
+		dev_dbg(pm2->dev, "PM2301: " "Disabled AC charging\n");
+	}
+	power_supply_changed(&pm2->ac_chg.psy);
+
+error_occured:
+	return ret;
+}
+
+static int pm2xxx_charger_watchdog_kick(struct ux500_charger *charger)
+{
+	int ret;
+	struct pm2xxx_charger *pm2;
+
+	if (charger->psy.type == POWER_SUPPLY_TYPE_MAINS)
+		pm2 = to_pm2xxx_charger_ac_device_info(charger);
+	else
+		return -ENXIO;
+
+	ret = pm2xxx_reg_write(pm2, PM2XXX_BATT_WD_KICK, WD_TIMER);
+	if (ret)
+		dev_err(pm2->dev, "Failed to kick WD!\n");
+
+	return ret;
+}
+
+static void pm2xxx_charger_ac_work(struct work_struct *work)
+{
+	struct pm2xxx_charger *pm2 = container_of(work,
+		struct pm2xxx_charger, ac_work);
+
+
+	power_supply_changed(&pm2->ac_chg.psy);
+	sysfs_notify(&pm2->ac_chg.psy.dev->kobj, NULL, "present");
+};
+
+static void pm2xxx_charger_check_main_thermal_prot_work(
+	struct work_struct *work)
+{
+};
+
+static struct pm2xxx_irq pm2xxx_charger_irq[] = {
+	{"PM2XXX_IRQ_INT", pm2xxx_irq_int},
+};
+
+static int pm2xxx_wall_charger_resume(struct i2c_client *i2c_client)
+{
+	return 0;
+}
+
+static int pm2xxx_wall_charger_suspend(struct i2c_client *i2c_client,
+	pm_message_t state)
+{
+	return 0;
+}
+
+static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
+		const struct i2c_device_id *id)
+{
+	struct pm2xxx_platform_data *pl_data = i2c_client->dev.platform_data;
+	struct pm2xxx_charger *pm2;
+	int ret = 0;
+	u8 val;
+
+	pm2 = kzalloc(sizeof(struct pm2xxx_charger), GFP_KERNEL);
+	if (!pm2) {
+		dev_err(pm2->dev, "pm2xxx_charger allocation failed\n");
+		return -ENOMEM;
+	}
+
+	/* get parent data */
+	pm2->dev = &i2c_client->dev;
+	pm2->gpadc = ab8500_gpadc_get("ab8500-gpadc.0");
+
+	/* get charger spcific platform data */
+	if (!pl_data->wall_charger) {
+		dev_err(pm2->dev, "no charger platform data supplied\n");
+		ret = -EINVAL;
+		goto free_device_info;
+	}
+
+	pm2->pdata = pl_data->wall_charger;
+
+	/* get battery specific platform data */
+	if (!pl_data->battery) {
+		dev_err(pm2->dev, "no battery platform data supplied\n");
+		ret = -EINVAL;
+		goto free_device_info;
+	}
+
+	pm2->bat = pl_data->battery;
+
+	if (!i2c_check_functionality(i2c_client->adapter,
+			I2C_FUNC_SMBUS_BYTE_DATA |
+			I2C_FUNC_SMBUS_READ_WORD_DATA)) {
+		ret = -ENODEV;
+		dev_info(pm2->dev, "pm2301 i2c_check_functionality failed\n");
+		goto free_device_info;
+	}
+
+	pm2->config.pm2xxx_i2c = i2c_client;
+	pm2->config.pm2xxx_id = (struct i2c_device_id *) id;
+	i2c_set_clientdata(i2c_client, pm2);
+
+	/* AC supply */
+	/* power_supply base class */
+	pm2->ac_chg.psy.name = pm2->pdata->label;
+	pm2->ac_chg.psy.type = POWER_SUPPLY_TYPE_MAINS;
+	pm2->ac_chg.psy.properties = pm2xxx_charger_ac_props;
+	pm2->ac_chg.psy.num_properties = ARRAY_SIZE(pm2xxx_charger_ac_props);
+	pm2->ac_chg.psy.get_property = pm2xxx_charger_ac_get_property;
+	pm2->ac_chg.psy.supplied_to = pm2->pdata->supplied_to;
+	pm2->ac_chg.psy.num_supplicants = pm2->pdata->num_supplicants;
+	/* pm2xxx_charger sub-class */
+	pm2->ac_chg.ops.enable = &pm2xxx_charger_ac_en;
+	pm2->ac_chg.ops.kick_wd = &pm2xxx_charger_watchdog_kick;
+	pm2->ac_chg.ops.update_curr = &pm2xxx_charger_update_charger_current;
+	pm2->ac_chg.max_out_volt = pm2xxx_charger_voltage_map[
+		ARRAY_SIZE(pm2xxx_charger_voltage_map) - 1];
+	pm2->ac_chg.max_out_curr = pm2xxx_charger_current_map[
+		ARRAY_SIZE(pm2xxx_charger_current_map) - 1];
+
+	/* Create a work queue for the charger */
+	pm2->charger_wq =
+		create_singlethread_workqueue("pm2xxx_charger_wq");
+	if (pm2->charger_wq == NULL) {
+		dev_err(pm2->dev, "failed to create work queue\n");
+		goto free_device_info;
+	}
+
+	/* Init work for charger detection */
+	INIT_WORK(&pm2->ac_work, pm2xxx_charger_ac_work);
+
+	/* Init work for checking HW status */
+	INIT_WORK(&pm2->check_main_thermal_prot_work,
+		pm2xxx_charger_check_main_thermal_prot_work);
+
+	/*
+	 * VDD ADC supply needs to be enabled from this driver when there
+	 * is a charger connected to avoid erroneous BTEMP_HIGH/LOW
+	 * interrupts during charging
+	 */
+	pm2->regu = regulator_get(pm2->dev, "vddadc");
+	if (IS_ERR(pm2->regu)) {
+		ret = PTR_ERR(pm2->regu);
+		dev_err(pm2->dev, "failed to get vddadc regulator\n");
+		goto free_charger_wq;
+	}
+
+	/* Register AC charger class */
+	ret = power_supply_register(pm2->dev, &pm2->ac_chg.psy);
+	if (ret) {
+		dev_err(pm2->dev, "failed to register AC charger\n");
+		goto free_regulator;
+	}
+
+	/* Register interrupts */
+	ret = request_threaded_irq(pm2->pdata->irq_number, NULL,
+				pm2xxx_charger_irq[0].isr,
+				pm2->pdata->irq_type,
+				pm2xxx_charger_irq[0].name, pm2);
+
+	if (ret != 0) {
+		dev_err(pm2->dev, "failed to request %s IRQ %d: %d\n",
+		pm2xxx_charger_irq[0].name, pm2->pdata->irq_number, ret);
+		goto unregister_pm2xxx_charger;
+	}
+
+	/*
+	 * I2C Read/Write will fail, if AC adaptor is not connected.
+	 * fix the charger detection mechanism.
+	 */
+	ret = pm2xxx_charger_detection(pm2, &val);
+
+	if ((ret == 0) && val) {
+		pm2->ac.charger_connected = 1;
+		pm2->ac_conn = true;
+		power_supply_changed(&pm2->ac_chg.psy);
+		sysfs_notify(&pm2->ac_chg.psy.dev->kobj, NULL, "present");
+	}
+
+	return 0;
+
+unregister_pm2xxx_charger:
+	/* unregister power supply */
+	power_supply_unregister(&pm2->ac_chg.psy);
+free_regulator:
+	/* disable the regulator */
+	regulator_put(pm2->regu);
+free_charger_wq:
+	destroy_workqueue(pm2->charger_wq);
+free_device_info:
+	kfree(pm2);
+	return ret;
+}
+
+static int __devexit pm2xxx_wall_charger_remove(struct i2c_client *i2c_client)
+{
+	struct pm2xxx_charger *pm2 = i2c_get_clientdata(i2c_client);
+
+	/* Disable AC charging */
+	pm2xxx_charger_ac_en(&pm2->ac_chg, false, 0, 0);
+
+	/* Disable interrupts */
+	free_irq(pm2->pdata->irq_number, pm2);
+
+	/* Delete the work queue */
+	destroy_workqueue(pm2->charger_wq);
+
+	flush_scheduled_work();
+
+	/* disable the regulator */
+	regulator_put(pm2->regu);
+
+	power_supply_unregister(&pm2->ac_chg.psy);
+
+	kfree(pm2);
+
+	return 0;
+}
+
+static const struct i2c_device_id pm2xxx_id[] = {
+	{ "pm2301", 0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(i2c, pm2xxx_id);
+
+static struct i2c_driver pm2xxx_charger_driver = {
+	.probe = pm2xxx_wall_charger_probe,
+	.remove = __devexit_p(pm2xxx_wall_charger_remove),
+	.suspend = pm2xxx_wall_charger_suspend,
+	.resume = pm2xxx_wall_charger_resume,
+	.driver = {
+		.name = "pm2xxx-wall_charger",
+		.owner = THIS_MODULE,
+	},
+	.id_table = pm2xxx_id,
+};
+
+static int __init pm2xxx_charger_init(void)
+{
+	return i2c_add_driver(&pm2xxx_charger_driver);
+}
+
+static void __exit pm2xxx_charger_exit(void)
+{
+	i2c_del_driver(&pm2xxx_charger_driver);
+}
+
+subsys_initcall_sync(pm2xxx_charger_init);
+module_exit(pm2xxx_charger_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Rajkumar kasirajan, Olivier Launay");
+MODULE_ALIAS("platform:pm2xxx-charger");
+MODULE_DESCRIPTION("PM2xxx charger management driver");
+
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index e2a1e6d84f72..a03d4fdf9c2c 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -406,6 +406,8 @@ struct ab8500_charger_platform_data {
 	char **supplied_to;
 	size_t num_supplicants;
 	bool autopower_cfg;
+	bool ac_enabled;
+	bool usb_enabled;
 };
 
 struct ab8500_btemp_platform_data {
diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h
index 9b07725750c9..5b77a610c6b6 100644
--- a/include/linux/mfd/abx500/ux500_chargalg.h
+++ b/include/linux/mfd/abx500/ux500_chargalg.h
@@ -27,12 +27,14 @@ struct ux500_charger_ops {
  * @ops			ux500 charger operations
  * @max_out_volt	maximum output charger voltage in mV
  * @max_out_curr	maximum output charger current in mA
+ * @enabled		indicates if this charger is used or not
  */
 struct ux500_charger {
 	struct power_supply psy;
 	struct ux500_charger_ops ops;
 	int max_out_volt;
 	int max_out_curr;
+	bool enabled;
 };
 
 #endif
diff --git a/include/linux/pm2301_charger.h b/include/linux/pm2301_charger.h
new file mode 100644
index 000000000000..16bb1d34b9d5
--- /dev/null
+++ b/include/linux/pm2301_charger.h
@@ -0,0 +1,60 @@
+/*
+ * PM2301 charger driver.
+ *
+ * Copyright (C) 2012 ST Ericsson Corporation
+ *
+ * Contact: Olivier LAUNAY (olivier.launay@stericsson.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef __LINUX_PM2301_H
+#define __LINUX_PM2301_H
+
+/**
+ * struct pm2xxx_bm_charger_parameters - Charger specific parameters
+ * @ac_volt_max:	maximum allowed AC charger voltage in mV
+ * @ac_curr_max:	maximum allowed AC charger current in mA
+ */
+struct pm2xxx_bm_charger_parameters {
+	int ac_volt_max;
+	int ac_curr_max;
+};
+
+/**
+ * struct pm2xxx_bm_data - pm2xxx battery management data
+ * @enable_overshoot    flag to enable VBAT overshoot control
+ * @chg_params	  charger parameters
+ */
+struct pm2xxx_bm_data {
+	bool enable_overshoot;
+	const struct pm2xxx_bm_charger_parameters *chg_params;
+};
+
+struct pm2xxx_charger_platform_data {
+	char **supplied_to;
+	size_t num_supplicants;
+	int i2c_bus;
+	const char *label;
+	int irq_number;
+	int irq_type;
+};
+
+struct pm2xxx_platform_data {
+	struct pm2xxx_charger_platform_data *wall_charger;
+	struct pm2xxx_bm_data *battery;
+};
+
+#endif /* __LINUX_PM2301_H */
-- 
cgit v1.2.3


From e6aac611b86956bdd981f30e8862ee2ac8aaf664 Mon Sep 17 00:00:00 2001
From: Michel JAOUEN <michel.jaouen@stericsson.com>
Date: Tue, 22 May 2012 15:46:46 +0200
Subject: ab8500-btemp: Adaptation to AB8505 and AB9540 platforms

Add AB9540 and AB8505 support to ABx500 BTEMP driver.

Signed-off-by: Rajkumar Kasirajan <rajkumar.kasirajan@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Reviewed-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Tested-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Tested-by: Jonas ABERG <jonas.aberg@stericsson.com>
---
 drivers/power/ab8500_btemp.c         | 67 ++++++++++++++++++++++++++++--------
 include/linux/mfd/abx500/ab8500-bm.h |  2 ++
 2 files changed, 55 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index 8ccf3590fd76..301a9ad3474d 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -39,6 +39,9 @@
 #define BTEMP_BATCTRL_CURR_SRC_7UA	7
 #define BTEMP_BATCTRL_CURR_SRC_20UA	20
 
+#define BTEMP_BATCTRL_CURR_SRC_16UA	16
+#define BTEMP_BATCTRL_CURR_SRC_18UA	18
+
 #define to_ab8500_btemp_device_info(x) container_of((x), \
 	struct ab8500_btemp, btemp_psy);
 
@@ -212,10 +215,18 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 
 	/* Only do this for batteries with internal NTC */
 	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && enable) {
-		if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_7UA)
-			curr = BAT_CTRL_7U_ENA;
-		else
-			curr = BAT_CTRL_20U_ENA;
+
+		if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+			if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_16UA)
+				curr = BAT_CTRL_16U_ENA;
+			else
+				curr = BAT_CTRL_18U_ENA;
+		} else {
+			if (di->curr_source == BTEMP_BATCTRL_CURR_SRC_7UA)
+				curr = BAT_CTRL_7U_ENA;
+			else
+				curr = BAT_CTRL_20U_ENA;
+		}
 
 		dev_dbg(di->dev, "Set BATCTRL %duA\n", di->curr_source);
 
@@ -246,11 +257,22 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 	} else if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL && !enable) {
 		dev_dbg(di->dev, "Disable BATCTRL curr source\n");
 
-		/* Write 0 to the curr bits */
-		ret = abx500_mask_and_set_register_interruptible(di->dev,
-			AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
-			BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA,
-			~(BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA));
+		if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+			/* Write 0 to the curr bits */
+			ret = abx500_mask_and_set_register_interruptible(
+				di->dev,
+				AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
+				BAT_CTRL_16U_ENA | BAT_CTRL_18U_ENA,
+				~(BAT_CTRL_16U_ENA | BAT_CTRL_18U_ENA));
+		} else {
+			/* Write 0 to the curr bits */
+			ret = abx500_mask_and_set_register_interruptible(
+				di->dev,
+				AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
+				BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA,
+				~(BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA));
+		}
+
 		if (ret) {
 			dev_err(di->dev, "%s failed disabling current source\n",
 				__func__);
@@ -292,11 +314,20 @@ static int ab8500_btemp_curr_source_enable(struct ab8500_btemp *di,
 	 * if we got an error above
 	 */
 disable_curr_source:
-	/* Write 0 to the curr bits */
-	ret = abx500_mask_and_set_register_interruptible(di->dev,
+	if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+		/* Write 0 to the curr bits */
+		ret = abx500_mask_and_set_register_interruptible(di->dev,
+			AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
+			BAT_CTRL_16U_ENA | BAT_CTRL_18U_ENA,
+			~(BAT_CTRL_16U_ENA | BAT_CTRL_18U_ENA));
+	} else {
+		/* Write 0 to the curr bits */
+		ret = abx500_mask_and_set_register_interruptible(di->dev,
 			AB8500_CHARGER, AB8500_BAT_CTRL_CURRENT_SOURCE,
 			BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA,
 			~(BAT_CTRL_7U_ENA | BAT_CTRL_20U_ENA));
+	}
+
 	if (ret) {
 		dev_err(di->dev, "%s failed disabling current source\n",
 			__func__);
@@ -510,8 +541,11 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 {
 	int res;
 	u8 i;
+	if (is_ab9540(di->parent) || is_ab8505(di->parent))
+		di->curr_source = BTEMP_BATCTRL_CURR_SRC_16UA;
+	else
+		di->curr_source = BTEMP_BATCTRL_CURR_SRC_7UA;
 
-	di->curr_source = BTEMP_BATCTRL_CURR_SRC_7UA;
 	di->bm->batt_id = BATTERY_UNKNOWN;
 
 	res =  ab8500_btemp_get_batctrl_res(di);
@@ -549,8 +583,13 @@ static int ab8500_btemp_id(struct ab8500_btemp *di)
 	 */
 	if (di->bm->adc_therm == ABx500_ADC_THERM_BATCTRL &&
 			di->bm->batt_id == 1) {
-		dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n");
-		di->curr_source = BTEMP_BATCTRL_CURR_SRC_20UA;
+		if (is_ab9540(di->parent) || is_ab8505(di->parent)) {
+			dev_dbg(di->dev, "Set BATCTRL current source to 16uA\n");
+			di->curr_source = BTEMP_BATCTRL_CURR_SRC_16UA;
+		} else {
+			dev_dbg(di->dev, "Set BATCTRL current source to 20uA\n");
+			di->curr_source = BTEMP_BATCTRL_CURR_SRC_20UA;
+		}
 	}
 
 	return di->bm->batt_id;
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index a03d4fdf9c2c..ec796c700e4c 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -226,6 +226,8 @@
 /* BatCtrl Current Source Constants */
 #define BAT_CTRL_7U_ENA			0x01
 #define BAT_CTRL_20U_ENA		0x02
+#define BAT_CTRL_18U_ENA		0x01
+#define BAT_CTRL_16U_ENA		0x02
 #define BAT_CTRL_CMP_ENA		0x04
 #define FORCE_BAT_CTRL_CMP_HIGH		0x08
 #define BAT_CTRL_PULL_UP_ENA		0x10
-- 
cgit v1.2.3


From b506eebc504caaf863b5c5a68a1e1d304d610482 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <gautam.vivek@samsung.com>
Date: Tue, 22 Jan 2013 18:30:40 +0530
Subject: ARM: EXYNOS: Update & move usb-phy types to generic include layer

Updating the names of usb-phy types to more generic names:
USB_PHY_TYPE_DEIVCE & USB_PHY_TYPE_HOST; and further update
its dependencies.

Signed-off-by: Praveen Paneri <p.paneri@samsung.com>
Signed-off-by: Vivek Gautam <gautam.vivek@samsung.com>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/host/ehci-s5p.c         |  9 +++++----
 drivers/usb/host/ohci-exynos.c      |  9 +++++----
 include/linux/usb/samsung_usb_phy.h | 16 ++++++++++++++++
 3 files changed, 26 insertions(+), 8 deletions(-)
 create mode 100644 include/linux/usb/samsung_usb_phy.h

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-s5p.c b/drivers/usb/host/ehci-s5p.c
index 319dcfaa8735..46ca5efac82b 100644
--- a/drivers/usb/host/ehci-s5p.c
+++ b/drivers/usb/host/ehci-s5p.c
@@ -17,6 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/of_gpio.h>
 #include <linux/platform_data/usb-ehci-s5p.h>
+#include <linux/usb/samsung_usb_phy.h>
 #include <plat/usb-phy.h>
 
 #define EHCI_INSNREG00(base)			(base + 0x90)
@@ -164,7 +165,7 @@ static int s5p_ehci_probe(struct platform_device *pdev)
 	}
 
 	if (pdata->phy_init)
-		pdata->phy_init(pdev, S5P_USB_PHY_HOST);
+		pdata->phy_init(pdev, USB_PHY_TYPE_HOST);
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = hcd->regs;
@@ -198,7 +199,7 @@ static int s5p_ehci_remove(struct platform_device *pdev)
 	usb_remove_hcd(hcd);
 
 	if (pdata && pdata->phy_exit)
-		pdata->phy_exit(pdev, S5P_USB_PHY_HOST);
+		pdata->phy_exit(pdev, USB_PHY_TYPE_HOST);
 
 	clk_disable_unprepare(s5p_ehci->clk);
 
@@ -229,7 +230,7 @@ static int s5p_ehci_suspend(struct device *dev)
 	rc = ehci_suspend(hcd, do_wakeup);
 
 	if (pdata && pdata->phy_exit)
-		pdata->phy_exit(pdev, S5P_USB_PHY_HOST);
+		pdata->phy_exit(pdev, USB_PHY_TYPE_HOST);
 
 	clk_disable_unprepare(s5p_ehci->clk);
 
@@ -246,7 +247,7 @@ static int s5p_ehci_resume(struct device *dev)
 	clk_prepare_enable(s5p_ehci->clk);
 
 	if (pdata && pdata->phy_init)
-		pdata->phy_init(pdev, S5P_USB_PHY_HOST);
+		pdata->phy_init(pdev, USB_PHY_TYPE_HOST);
 
 	/* DMA burst Enable */
 	writel(EHCI_INSNREG00_ENABLE_DMA_BURST, EHCI_INSNREG00(hcd->regs));
diff --git a/drivers/usb/host/ohci-exynos.c b/drivers/usb/host/ohci-exynos.c
index aa3b8844bb9f..804fb62a888c 100644
--- a/drivers/usb/host/ohci-exynos.c
+++ b/drivers/usb/host/ohci-exynos.c
@@ -15,6 +15,7 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/usb-exynos.h>
+#include <linux/usb/samsung_usb_phy.h>
 #include <plat/usb-phy.h>
 
 struct exynos_ohci_hcd {
@@ -153,7 +154,7 @@ static int exynos_ohci_probe(struct platform_device *pdev)
 	}
 
 	if (pdata->phy_init)
-		pdata->phy_init(pdev, S5P_USB_PHY_HOST);
+		pdata->phy_init(pdev, USB_PHY_TYPE_HOST);
 
 	ohci = hcd_to_ohci(hcd);
 	ohci_hcd_init(ohci);
@@ -184,7 +185,7 @@ static int exynos_ohci_remove(struct platform_device *pdev)
 	usb_remove_hcd(hcd);
 
 	if (pdata && pdata->phy_exit)
-		pdata->phy_exit(pdev, S5P_USB_PHY_HOST);
+		pdata->phy_exit(pdev, USB_PHY_TYPE_HOST);
 
 	clk_disable_unprepare(exynos_ohci->clk);
 
@@ -229,7 +230,7 @@ static int exynos_ohci_suspend(struct device *dev)
 	clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 
 	if (pdata && pdata->phy_exit)
-		pdata->phy_exit(pdev, S5P_USB_PHY_HOST);
+		pdata->phy_exit(pdev, USB_PHY_TYPE_HOST);
 
 	clk_disable_unprepare(exynos_ohci->clk);
 
@@ -249,7 +250,7 @@ static int exynos_ohci_resume(struct device *dev)
 	clk_prepare_enable(exynos_ohci->clk);
 
 	if (pdata && pdata->phy_init)
-		pdata->phy_init(pdev, S5P_USB_PHY_HOST);
+		pdata->phy_init(pdev, USB_PHY_TYPE_HOST);
 
 	ohci_resume(hcd, false);
 
diff --git a/include/linux/usb/samsung_usb_phy.h b/include/linux/usb/samsung_usb_phy.h
new file mode 100644
index 000000000000..916782699f1c
--- /dev/null
+++ b/include/linux/usb/samsung_usb_phy.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2012 Samsung Electronics Co.Ltd
+ *		http://www.samsung.com/
+ *
+ * Defines phy types for samsung usb phy controllers - HOST or DEIVCE.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+enum samsung_usb_phy_type {
+	USB_PHY_TYPE_DEVICE,
+	USB_PHY_TYPE_HOST,
+};
-- 
cgit v1.2.3


From e07a56453b14b929cf01bf032cc3e3220094609c Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@stericsson.com>
Date: Thu, 10 May 2012 15:33:56 +0200
Subject: pm2301: Update watchdog for pm2xxx support

AB and PMxxx doesn't have same watchdog refresh period. Add watchdog
to refresh period parameters in x500 charger structure, this should
kick watchdog every 30sec. The AC charging should also kick both
pm2xxx and the AB charger watchdog.

Signed-off-by: Rajkumar Kasirajan <rajkumar.kasirajan@stericsson.com>
Signed-off-by: Loic Pallardy <loic.pallardy@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Reviewed-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Tested-by: Michel JAOUEN <michel.jaouen@stericsson.com>
Tested-by: Jonas ABERG <jonas.aberg@stericsson.com>
---
 drivers/power/ab8500_charger.c            |  6 ++++++
 drivers/power/abx500_chargalg.c           | 12 +++++++++++-
 drivers/power/pm2301_charger.c            |  2 ++
 drivers/power/pm2301_charger.h            |  2 +-
 include/linux/mfd/abx500/ux500_chargalg.h |  3 +++
 5 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 4c66172aaa64..0483e7cb2268 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -93,6 +93,8 @@
 
 #define CHARGER_STATUS_POLL 10 /* in ms */
 
+#define CHG_WD_INTERVAL			(60 * HZ)
+
 /* UsbLineStatus register - usb types */
 enum ab8500_charger_link_status {
 	USB_STAT_NOT_CONFIGURED,
@@ -2953,7 +2955,9 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 		ARRAY_SIZE(ab8500_charger_voltage_map) - 1];
 	di->ac_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
+	di->ac_chg.wdt_refresh = CHG_WD_INTERVAL;
 	di->ac_chg.enabled = di->pdata->ac_enabled;
+	di->ac_chg.external = false;
 
 	/* USB supply */
 	/* power_supply base class */
@@ -2972,7 +2976,9 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 		ARRAY_SIZE(ab8500_charger_voltage_map) - 1];
 	di->usb_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
+	di->usb_chg.wdt_refresh = CHG_WD_INTERVAL;
 	di->usb_chg.enabled = di->pdata->usb_enabled;
+	di->usb_chg.external = false;
 
 	/* Create a work queue for the charger */
 	di->charger_wq =
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index f59bc025ca58..7defb3e91d59 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -445,8 +445,18 @@ static int abx500_chargalg_kick_watchdog(struct abx500_chargalg *di)
 {
 	/* Check if charger exists and kick watchdog if charging */
 	if (di->ac_chg && di->ac_chg->ops.kick_wd &&
-			di->chg_info.online_chg & AC_CHG)
+	    di->chg_info.online_chg & AC_CHG) {
+		/*
+		 * If AB charger watchdog expired, pm2xxx charging
+		 * gets disabled. To be safe, kick both AB charger watchdog
+		 * and pm2xxx watchdog.
+		 */
+		if (di->ac_chg->external &&
+		    di->usb_chg && di->usb_chg->ops.kick_wd)
+			di->usb_chg->ops.kick_wd(di->usb_chg);
+
 		return di->ac_chg->ops.kick_wd(di->ac_chg);
+	}
 	else if (di->usb_chg && di->usb_chg->ops.kick_wd &&
 			di->chg_info.online_chg & USB_CHG)
 		return di->usb_chg->ops.kick_wd(di->usb_chg);
diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index 085e468d8a2b..8c1dd5628d16 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c
@@ -875,7 +875,9 @@ static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
 		ARRAY_SIZE(pm2xxx_charger_voltage_map) - 1];
 	pm2->ac_chg.max_out_curr = pm2xxx_charger_current_map[
 		ARRAY_SIZE(pm2xxx_charger_current_map) - 1];
+	pm2->ac_chg.wdt_refresh = WD_KICK_INTERVAL;
 	pm2->ac_chg.enabled = true;
+	pm2->ac_chg.external = true;
 
 	/* Create a work queue for the charger */
 	pm2->charger_wq =
diff --git a/drivers/power/pm2301_charger.h b/drivers/power/pm2301_charger.h
index da804716aa82..3531cc5a9056 100644
--- a/drivers/power/pm2301_charger.h
+++ b/drivers/power/pm2301_charger.h
@@ -32,7 +32,7 @@
 
 /* Watchdog timeout constant */
 #define WD_TIMER			0x30 /* 4min */
-#define WD_KICK_INTERVAL		(60 * HZ)
+#define WD_KICK_INTERVAL		(30 * HZ)
 
 #define PM2XXX_NUM_INT_REG		0x6
 
diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h
index 5b77a610c6b6..d43ac0f35526 100644
--- a/include/linux/mfd/abx500/ux500_chargalg.h
+++ b/include/linux/mfd/abx500/ux500_chargalg.h
@@ -28,13 +28,16 @@ struct ux500_charger_ops {
  * @max_out_volt	maximum output charger voltage in mV
  * @max_out_curr	maximum output charger current in mA
  * @enabled		indicates if this charger is used or not
+ * @external		external charger unit (pm2xxx)
  */
 struct ux500_charger {
 	struct power_supply psy;
 	struct ux500_charger_ops ops;
 	int max_out_volt;
 	int max_out_curr;
+	int wdt_refresh;
 	bool enabled;
+	bool external;
 };
 
 #endif
-- 
cgit v1.2.3


From 3988043b0ee1104d4cca7c57bbc23b16ea798b6f Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 23 Jan 2013 14:38:15 +0000
Subject: pm2301: LPN mode control support

The AC charger plug-in detection while booting causes I2C read
failure if AC charger is not connected. Now the LPN pin is enabled
for every PM2301 register access, which solves the issue.

Signed-off-by: Rupesh Kumar <rupesh.kumar@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Reviewed-by: Vijaya Kumar K-1 <vijay.kilari@stericsson.com>
Reviewed-by: Rabin VINCENT <rabin.vincent@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
Tested-by: Jonas ABERG <jonas.aberg@stericsson.com>
---
 drivers/power/pm2301_charger.c | 71 ++++++++++++++++++++++++++++++++++++++++--
 drivers/power/pm2301_charger.h |  1 +
 include/linux/pm2301_charger.h |  1 +
 3 files changed, 71 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index 9b2e8943f944..ed48d75bb786 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c
@@ -28,6 +28,7 @@
 #include <linux/mfd/abx500/ab8500-gpadc.h>
 #include <linux/mfd/abx500/ux500_chargalg.h>
 #include <linux/pm2301_charger.h>
+#include <linux/gpio.h>
 
 #include "pm2301_charger.h"
 
@@ -110,9 +111,35 @@ static const struct i2c_device_id pm2xxx_ident[] = {
 	{ }
 };
 
+static void set_lpn_pin(struct pm2xxx_charger *pm2)
+{
+	if (pm2->ac.charger_connected)
+		return;
+	gpio_set_value(pm2->lpn_pin, 1);
+
+	return;
+}
+
+static void clear_lpn_pin(struct pm2xxx_charger *pm2)
+{
+	if (pm2->ac.charger_connected)
+		return;
+	gpio_set_value(pm2->lpn_pin, 0);
+
+	return;
+}
+
 static int pm2xxx_reg_read(struct pm2xxx_charger *pm2, int reg, u8 *val)
 {
 	int ret;
+	/*
+	 * When AC adaptor is unplugged, the host
+	 * must put LPN high to be able to
+	 * communicate by I2C with PM2301
+	 * and receive I2C "acknowledge" from PM2301.
+	 */
+	mutex_lock(&pm2->lock);
+	set_lpn_pin(pm2);
 
 	ret = i2c_smbus_read_i2c_block_data(pm2->config.pm2xxx_i2c, reg,
 				1, val);
@@ -120,6 +147,8 @@ static int pm2xxx_reg_read(struct pm2xxx_charger *pm2, int reg, u8 *val)
 		dev_err(pm2->dev, "Error reading register at 0x%x\n", reg);
 	else
 		ret = 0;
+	clear_lpn_pin(pm2);
+	mutex_unlock(&pm2->lock);
 
 	return ret;
 }
@@ -127,6 +156,14 @@ static int pm2xxx_reg_read(struct pm2xxx_charger *pm2, int reg, u8 *val)
 static int pm2xxx_reg_write(struct pm2xxx_charger *pm2, int reg, u8 val)
 {
 	int ret;
+	/*
+	 * When AC adaptor is unplugged, the host
+	 * must put LPN high to be able to
+	 * communicate by I2C with PM2301
+	 * and receive I2C "acknowledge" from PM2301.
+	 */
+	mutex_lock(&pm2->lock);
+	set_lpn_pin(pm2);
 
 	ret = i2c_smbus_write_i2c_block_data(pm2->config.pm2xxx_i2c, reg,
 				1, &val);
@@ -134,6 +171,8 @@ static int pm2xxx_reg_write(struct pm2xxx_charger *pm2, int reg, u8 val)
 		dev_err(pm2->dev, "Error writing register at 0x%x\n", reg);
 	else
 		ret = 0;
+	clear_lpn_pin(pm2);
+	mutex_unlock(&pm2->lock);
 
 	return ret;
 }
@@ -850,6 +889,14 @@ static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
 
 	pm2->bat = pl_data->battery;
 
+	/*get lpn GPIO from platform data*/
+	if (!pm2->pdata->lpn_gpio) {
+		dev_err(pm2->dev, "no lpn gpio data supplied\n");
+		ret = -EINVAL;
+		goto free_device_info;
+	}
+	pm2->lpn_pin = pm2->pdata->lpn_gpio;
+
 	if (!i2c_check_functionality(i2c_client->adapter,
 			I2C_FUNC_SMBUS_BYTE_DATA |
 			I2C_FUNC_SMBUS_READ_WORD_DATA)) {
@@ -929,10 +976,25 @@ static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
 		goto unregister_pm2xxx_charger;
 	}
 
+	/*Initialize lock*/
+	mutex_init(&pm2->lock);
+
 	/*
-	 * I2C Read/Write will fail, if AC adaptor is not connected.
-	 * fix the charger detection mechanism.
+	 * Charger detection mechanism requires pulling up the LPN pin
+	 * while i2c communication if Charger is not connected
+	 * LPN pin of PM2301 is GPIO60 of AB9540
 	 */
+	ret = gpio_request(pm2->lpn_pin, "pm2301_lpm_gpio");
+	if (ret < 0) {
+		dev_err(pm2->dev, "pm2301_lpm_gpio request failed\n");
+		goto unregister_pm2xxx_charger;
+	}
+	ret = gpio_direction_output(pm2->lpn_pin, 0);
+	if (ret < 0) {
+		dev_err(pm2->dev, "pm2301_lpm_gpio direction failed\n");
+		goto free_gpio;
+	}
+
 	ret = pm2xxx_charger_detection(pm2, &val);
 
 	if ((ret == 0) && val) {
@@ -944,6 +1006,8 @@ static int __devinit pm2xxx_wall_charger_probe(struct i2c_client *i2c_client,
 
 	return 0;
 
+free_gpio:
+	gpio_free(pm2->lpn_pin);
 unregister_pm2xxx_charger:
 	/* unregister power supply */
 	power_supply_unregister(&pm2->ac_chg.psy);
@@ -977,6 +1041,9 @@ static int __devexit pm2xxx_wall_charger_remove(struct i2c_client *i2c_client)
 
 	power_supply_unregister(&pm2->ac_chg.psy);
 
+	/*Free GPIO60*/
+	gpio_free(pm2->lpn_pin);
+
 	kfree(pm2);
 
 	return 0;
diff --git a/drivers/power/pm2301_charger.h b/drivers/power/pm2301_charger.h
index 3531cc5a9056..e6319cdbc94f 100644
--- a/drivers/power/pm2301_charger.h
+++ b/drivers/power/pm2301_charger.h
@@ -493,6 +493,7 @@ struct pm2xxx_charger {
 	int old_vbat;
 	int failure_case;
 	int failure_input_ovv;
+	unsigned int lpn_pin;
 	struct pm2xxx_interrupts *pm2_int;
 	struct ab8500_gpadc *gpadc;
 	struct regulator *regu;
diff --git a/include/linux/pm2301_charger.h b/include/linux/pm2301_charger.h
index 16bb1d34b9d5..fc3f026922ae 100644
--- a/include/linux/pm2301_charger.h
+++ b/include/linux/pm2301_charger.h
@@ -49,6 +49,7 @@ struct pm2xxx_charger_platform_data {
 	int i2c_bus;
 	const char *label;
 	int irq_number;
+	unsigned int lpn_gpio;
 	int irq_type;
 };
 
-- 
cgit v1.2.3


From 0ed5107fa86013c91b1752230d44b79dffee0cda Mon Sep 17 00:00:00 2001
From: Jonas Aaberg <jonas.aberg@stericsson.com>
Date: Fri, 11 May 2012 12:42:25 +0200
Subject: ab8500-charger: Do not touch VBUSOVV bits

Do not touch the VBUSOVV in USBCHTRL2 when running on AB8505.

Signed-off-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Marcus COOPER <marcus.xm.cooper@stericsson.com>
Tested-by: Mian Yousaf KAUKAB <mian.yousaf.kaukab@stericsson.com>
---
 drivers/power/ab8500_charger.c    | 22 ++++++++++++++++------
 include/linux/mfd/abx500/ab8500.h | 19 +++++++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index a632b94e38b9..871bf5a7c42a 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2671,13 +2671,23 @@ static int ab8500_charger_init_hw_registers(struct ab8500_charger *di)
 		}
 	}
 
-	/* VBUS OVV set to 6.3V and enable automatic current limitiation */
-	ret = abx500_set_register_interruptible(di->dev,
-		AB8500_CHARGER,
-		AB8500_USBCH_CTRL2_REG,
-		VBUS_OVV_SELECT_6P3V | VBUS_AUTO_IN_CURR_LIM_ENA);
+	if (is_ab9540_2p0(di->parent) || is_ab8505_2p0(di->parent))
+		ret = abx500_mask_and_set_register_interruptible(di->dev,
+			AB8500_CHARGER,
+			AB8500_USBCH_CTRL2_REG,
+			VBUS_AUTO_IN_CURR_LIM_ENA,
+			VBUS_AUTO_IN_CURR_LIM_ENA);
+	else
+		/*
+		 * VBUS OVV set to 6.3V and enable automatic current limitation
+		 */
+		ret = abx500_set_register_interruptible(di->dev,
+			AB8500_CHARGER,
+			AB8500_USBCH_CTRL2_REG,
+			VBUS_OVV_SELECT_6P3V | VBUS_AUTO_IN_CURR_LIM_ENA);
 	if (ret) {
-		dev_err(di->dev, "failed to set VBUS OVV\n");
+		dev_err(di->dev,
+			"failed to set automatic current limitation\n");
 		goto out;
 	}
 
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 9dd9b99099df..b9a6a847ff61 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -346,4 +346,23 @@ static inline int is_ab8500_2p0(struct ab8500 *ab)
 	return (is_ab8500(ab) && (ab->chip_id == AB8500_CUT2P0));
 }
 
+static inline int is_ab8505_1p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id <= AB8500_CUT1P0));
+}
+
+static inline int is_ab8505_2p0(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id == AB8500_CUT2P0));
+}
+
+static inline int is_ab9540_1p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id <= AB8500_CUT1P0));
+}
+
+static inline int is_ab9540_2p0(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id == AB8500_CUT2P0));
+}
 #endif /* MFD_AB8500_H */
-- 
cgit v1.2.3


From 97034a1e042d4316a83a3f68d61edf6c42e3f265 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 17 Jan 2013 16:08:42 +0000
Subject: ab8500-bm: Remove individual [charger|btemp|fg|chargalg] pdata
 structures

None of the aforementioned components have their own dedicated
platform data structures anymore. Instead they have all been
merged into one big Battery Management container. Let's remove
them and place all the nice newly added attributes into the core
container.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/power/ab8500_charger.c       |  4 ++--
 include/linux/mfd/abx500.h           |  3 +++
 include/linux/mfd/abx500/ab8500-bm.h | 22 ----------------------
 3 files changed, 5 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index 871bf5a7c42a..432f6bc48764 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2998,7 +2998,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 	di->ac_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
 	di->ac_chg.wdt_refresh = CHG_WD_INTERVAL;
-	di->ac_chg.enabled = di->pdata->ac_enabled;
+	di->ac_chg.enabled = di->bm->ac_enabled;
 	di->ac_chg.external = false;
 
 	/* USB supply */
@@ -3019,7 +3019,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 	di->usb_chg.max_out_curr = ab8500_charger_current_map[
 		ARRAY_SIZE(ab8500_charger_current_map) - 1];
 	di->usb_chg.wdt_refresh = CHG_WD_INTERVAL;
-	di->usb_chg.enabled = di->pdata->usb_enabled;
+	di->usb_chg.enabled = di->bm->usb_enabled;
 	di->usb_chg.external = false;
 
 	/* Create a work queue for the charger */
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 0e6e90badfca..1beaa056f195 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -254,6 +254,9 @@ struct abx500_bm_data {
 	int usb_safety_tmr_h;
 	int bkup_bat_v;
 	int bkup_bat_i;
+	bool autopower_cfg;
+	bool ac_enabled;
+	bool usb_enabled;
 	bool no_maintenance;
 	bool capacity_scaling;
 	bool chg_unknown_bat;
diff --git a/include/linux/mfd/abx500/ab8500-bm.h b/include/linux/mfd/abx500/ab8500-bm.h
index ec796c700e4c..345bc159f978 100644
--- a/include/linux/mfd/abx500/ab8500-bm.h
+++ b/include/linux/mfd/abx500/ab8500-bm.h
@@ -404,28 +404,6 @@ struct ab8500_bm_data {
 	const struct ab8500_fg_parameters *fg_params;
 };
 
-struct ab8500_charger_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-	bool autopower_cfg;
-	bool ac_enabled;
-	bool usb_enabled;
-};
-
-struct ab8500_btemp_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-};
-
-struct ab8500_fg_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-};
-
-struct ab8500_chargalg_platform_data {
-	char **supplied_to;
-	size_t num_supplicants;
-};
 struct ab8500_btemp;
 struct ab8500_gpadc;
 struct ab8500_fg;
-- 
cgit v1.2.3


From ab78029ecc347debbd737f06688d788bd9d60c1d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 22 Jan 2013 10:56:14 -0700
Subject: drivers/pinctrl: grab default handles from device core

This makes the device core auto-grab the pinctrl handle and set
the "default" (PINCTRL_STATE_DEFAULT) state for every device
that is present in the device model right before probe. This will
account for the lion's share of embedded silicon devcies.

A modification of the semantics for pinctrl_get() is also done:
previously if the pinctrl handle for a certain device was already
taken, the pinctrl core would return an error. Now, since the
core may have already default-grabbed the handle and set its
state to "default", if the handle was already taken, this will
be disregarded and the located, previously instanitated handle
will be returned to the caller.

This way all code in drivers explicitly requesting their pinctrl
handlers will still be functional, and drivers that want to
explicitly retrieve and switch their handles can still do that.
But if the desired functionality is just boilerplate of this
type in the probe() function:

struct pinctrl  *p;

p = devm_pinctrl_get_select_default(&dev);
if (IS_ERR(p)) {
   if (PTR_ERR(p) == -EPROBE_DEFER)
        return -EPROBE_DEFER;
        dev_warn(&dev, "no pinctrl handle\n");
}

The discussion began with the addition of such boilerplate
to the omap4 keypad driver:
http://marc.info/?l=linux-input&m=135091157719300&w=2

A previous approach using notifiers was discussed:
http://marc.info/?l=linux-kernel&m=135263661110528&w=2
This failed because it could not handle deferred probes.

This patch alone does not solve the entire dilemma faced:
whether code should be distributed into the drivers or
if it should be centralized to e.g. a PM domain. But it
solves the immediate issue of the addition of boilerplate
to a lot of drivers that just want to grab the default
state. As mentioned, they can later explicitly retrieve
the handle and set different states, and this could as
well be done by e.g. PM domains as it is only related
to a certain struct device * pointer.

ChangeLog v4->v5 (Stephen):
- Simplified the devicecore grab code.
- Deleted a piece of documentation recommending that pins
  be mapped to a device rather than hogged.
ChangeLog v3->v4 (Linus):
- Drop overzealous NULL checks.
- Move kref initialization to pinctrl_create().
- Seeking Tested-by from Stephen Warren so we do not disturb
  the Tegra platform.
- Seeking ACK on this from Greg (and others who like it) so I
  can merge it through the pinctrl subsystem.
ChangeLog v2->v3 (Linus):
- Abstain from using IS_ERR_OR_NULL() in the driver core,
  Russell recently sent a patch to remove it. Handle the
  NULL case explicitly even though it's a bogus case.
- Make sure we handle probe deferral correctly in the device
  core file. devm_kfree() the container on error so we don't
  waste memory for devices without pinctrl handles.
- Introduce reference counting into the pinctrl core using
  <linux/kref.h> so that we don't release pinctrl handles
  that have been obtained for two or more places.
ChangeLog v1->v2 (Linus):
- Only store a pointer in the device struct, and only allocate
  this if it's really used by the device.

Cc: Felipe Balbi <balbi@ti.com>
Cc: Benoit Cousson <b-cousson@ti.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Mitch Bradley <wmb@firmworks.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Cc: Rickard Andersson <rickard.andersson@stericsson.com>
Cc: Russell King <linux@arm.linux.org.uk>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
[swarren: fixed and simplified error-handling in pinctrl_bind_pins(), to
correctly handle deferred probe. Removed admonition from docs not to use
pinctrl hogs for devices]
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt       | 18 +++++++++--
 drivers/base/Makefile           |  1 +
 drivers/base/dd.c               |  7 +++++
 drivers/base/pinctrl.c          | 69 +++++++++++++++++++++++++++++++++++++++++
 drivers/pinctrl/core.c          | 30 +++++++++++++++---
 drivers/pinctrl/core.h          |  3 ++
 include/linux/device.h          |  7 +++++
 include/linux/pinctrl/devinfo.h | 45 +++++++++++++++++++++++++++
 8 files changed, 173 insertions(+), 7 deletions(-)
 create mode 100644 drivers/base/pinctrl.c
 create mode 100644 include/linux/pinctrl/devinfo.h

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index da40efbef6ec..a2b57e0a1db0 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -972,6 +972,18 @@ pinmux core.
 Pin control requests from drivers
 =================================
 
+When a device driver is about to probe the device core will automatically
+attempt to issue pinctrl_get_select_default() on these devices.
+This way driver writers do not need to add any of the boilerplate code
+of the type found below. However when doing fine-grained state selection
+and not using the "default" state, you may have to do some device driver
+handling of the pinctrl handles and states.
+
+So if you just want to put the pins for a certain device into the default
+state and be done with it, there is nothing you need to do besides
+providing the proper mapping table. The device core will take care of
+the rest.
+
 Generally it is discouraged to let individual drivers get and enable pin
 control. So if possible, handle the pin control in platform code or some other
 place where you have access to all the affected struct device * pointers. In
@@ -1097,9 +1109,9 @@ situations that can be electrically unpleasant, you will certainly want to
 mux in and bias pins in a certain way before the GPIO subsystems starts to
 deal with them.
 
-The above can be hidden: using pinctrl hogs, the pin control driver may be
-setting up the config and muxing for the pins when it is probing,
-nevertheless orthogonal to the GPIO subsystem.
+The above can be hidden: using the device core, the pinctrl core may be
+setting up the config and muxing for the pins right before the device is
+probing, nevertheless orthogonal to the GPIO subsystem.
 
 But there are also situations where it makes sense for the GPIO subsystem
 to communicate directly with with the pinctrl subsystem, using the latter
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 5aa2d703d19f..4e22ce3ed73d 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -21,6 +21,7 @@ endif
 obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
 obj-$(CONFIG_REGMAP)	+= regmap/
 obj-$(CONFIG_SOC_BUS) += soc.o
+obj-$(CONFIG_PINCTRL) += pinctrl.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index e3bbed8a617c..656310156dde 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -24,6 +24,7 @@
 #include <linux/wait.h>
 #include <linux/async.h>
 #include <linux/pm_runtime.h>
+#include <linux/pinctrl/devinfo.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -269,6 +270,12 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 	WARN_ON(!list_empty(&dev->devres_head));
 
 	dev->driver = drv;
+
+	/* If using pinctrl, bind pins now before probing */
+	ret = pinctrl_bind_pins(dev);
+	if (ret)
+		goto probe_failed;
+
 	if (driver_sysfs_add(dev)) {
 		printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n",
 			__func__, dev_name(dev));
diff --git a/drivers/base/pinctrl.c b/drivers/base/pinctrl.c
new file mode 100644
index 000000000000..67a274e86727
--- /dev/null
+++ b/drivers/base/pinctrl.c
@@ -0,0 +1,69 @@
+/*
+ * Driver core interface to the pinctrl subsystem.
+ *
+ * Copyright (C) 2012 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ * Based on bits of regulator core, gpio core and clk core
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/device.h>
+#include <linux/pinctrl/devinfo.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/slab.h>
+
+/**
+ * pinctrl_bind_pins() - called by the device core before probe
+ * @dev: the device that is just about to probe
+ */
+int pinctrl_bind_pins(struct device *dev)
+{
+	int ret;
+
+	dev->pins = devm_kzalloc(dev, sizeof(*(dev->pins)), GFP_KERNEL);
+	if (!dev->pins)
+		return -ENOMEM;
+
+	dev->pins->p = devm_pinctrl_get(dev);
+	if (IS_ERR(dev->pins->p)) {
+		dev_dbg(dev, "no pinctrl handle\n");
+		ret = PTR_ERR(dev->pins->p);
+		goto cleanup_alloc;
+	}
+
+	dev->pins->default_state = pinctrl_lookup_state(dev->pins->p,
+					PINCTRL_STATE_DEFAULT);
+	if (IS_ERR(dev->pins->default_state)) {
+		dev_dbg(dev, "no default pinctrl state\n");
+		ret = 0;
+		goto cleanup_get;
+	}
+
+	ret = pinctrl_select_state(dev->pins->p, dev->pins->default_state);
+	if (ret) {
+		dev_dbg(dev, "failed to activate default pinctrl state\n");
+		goto cleanup_get;
+	}
+
+	return 0;
+
+	/*
+	 * If no pinctrl handle or default state was found for this device,
+	 * let's explicitly free the pin container in the device, there is
+	 * no point in keeping it around.
+	 */
+cleanup_get:
+	devm_pinctrl_put(dev->pins->p);
+cleanup_alloc:
+	devm_kfree(dev, dev->pins);
+	dev->pins = NULL;
+
+	/* Only return deferrals */
+	if (ret != -EPROBE_DEFER)
+		ret = 0;
+
+	return ret;
+}
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index e6373f30aeeb..5a2fe9aae20f 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "pinctrl core: " fmt
 
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/device.h>
@@ -727,6 +728,8 @@ static struct pinctrl *create_pinctrl(struct device *dev)
 		return ERR_PTR(ret);
 	}
 
+	kref_init(&p->users);
+
 	/* Add the pinctrl handle to the global list */
 	list_add_tail(&p->node, &pinctrl_list);
 
@@ -740,9 +743,17 @@ static struct pinctrl *pinctrl_get_locked(struct device *dev)
 	if (WARN_ON(!dev))
 		return ERR_PTR(-EINVAL);
 
+	/*
+	 * See if somebody else (such as the device core) has already
+	 * obtained a handle to the pinctrl for this device. In that case,
+	 * return another pointer to it.
+	 */
 	p = find_pinctrl(dev);
-	if (p != NULL)
-		return ERR_PTR(-EBUSY);
+	if (p != NULL) {
+		dev_dbg(dev, "obtain a copy of previously claimed pinctrl\n");
+		kref_get(&p->users);
+		return p;
+	}
 
 	return create_pinctrl(dev);
 }
@@ -798,13 +809,24 @@ static void pinctrl_put_locked(struct pinctrl *p, bool inlist)
 }
 
 /**
- * pinctrl_put() - release a previously claimed pinctrl handle
+ * pinctrl_release() - release the pinctrl handle
+ * @kref: the kref in the pinctrl being released
+ */
+void pinctrl_release(struct kref *kref)
+{
+	struct pinctrl *p = container_of(kref, struct pinctrl, users);
+
+	pinctrl_put_locked(p, true);
+}
+
+/**
+ * pinctrl_put() - decrease use count on a previously claimed pinctrl handle
  * @p: the pinctrl handle to release
  */
 void pinctrl_put(struct pinctrl *p)
 {
 	mutex_lock(&pinctrl_mutex);
-	pinctrl_put_locked(p, true);
+	kref_put(&p->users, pinctrl_release);
 	mutex_unlock(&pinctrl_mutex);
 }
 EXPORT_SYMBOL_GPL(pinctrl_put);
diff --git a/drivers/pinctrl/core.h b/drivers/pinctrl/core.h
index 232a9f6db4aa..fdd350d639f6 100644
--- a/drivers/pinctrl/core.h
+++ b/drivers/pinctrl/core.h
@@ -9,6 +9,7 @@
  * License terms: GNU General Public License (GPL) version 2
  */
 
+#include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
 #include <linux/pinctrl/pinconf.h>
@@ -58,6 +59,7 @@ struct pinctrl_dev {
  * @state: the current state
  * @dt_maps: the mapping table chunks dynamically parsed from device tree for
  *	this device, if any
+ * @users: reference count
  */
 struct pinctrl {
 	struct list_head node;
@@ -65,6 +67,7 @@ struct pinctrl {
 	struct list_head states;
 	struct pinctrl_state *state;
 	struct list_head dt_maps;
+	struct kref users;
 };
 
 /**
diff --git a/include/linux/device.h b/include/linux/device.h
index 43dcda937ddf..001f6637aa47 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -21,6 +21,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
+#include <linux/pinctrl/devinfo.h>
 #include <linux/pm.h>
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
@@ -620,6 +621,8 @@ struct acpi_dev_node {
  * @pm_domain:	Provide callbacks that are executed during system suspend,
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.
+ * @pins:	For device pin management.
+ *		See Documentation/pinctrl.txt for details.
  * @numa_node:	NUMA node this device is close to.
  * @dma_mask:	Dma mask (if dma'ble device).
  * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
@@ -672,6 +675,10 @@ struct device {
 	struct dev_pm_info	power;
 	struct dev_pm_domain	*pm_domain;
 
+#ifdef CONFIG_PINCTRL
+	struct dev_pin_info	*pins;
+#endif
+
 #ifdef CONFIG_NUMA
 	int		numa_node;	/* NUMA node this device is close to */
 #endif
diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h
new file mode 100644
index 000000000000..6e5f8a985ea7
--- /dev/null
+++ b/include/linux/pinctrl/devinfo.h
@@ -0,0 +1,45 @@
+/*
+ * Per-device information from the pin control system.
+ * This is the stuff that get included into the device
+ * core.
+ *
+ * Copyright (C) 2012 ST-Ericsson SA
+ * Written on behalf of Linaro for ST-Ericsson
+ * This interface is used in the core to keep track of pins.
+ *
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef PINCTRL_DEVINFO_H
+#define PINCTRL_DEVINFO_H
+
+#ifdef CONFIG_PINCTRL
+
+/* The device core acts as a consumer toward pinctrl */
+#include <linux/pinctrl/consumer.h>
+
+/**
+ * struct dev_pin_info - pin state container for devices
+ * @p: pinctrl handle for the containing device
+ * @default_state: the default state for the handle, if found
+ */
+struct dev_pin_info {
+	struct pinctrl *p;
+	struct pinctrl_state *default_state;
+};
+
+extern int pinctrl_bind_pins(struct device *dev);
+
+#else
+
+/* Stubs if we're not using pinctrl */
+
+static inline int pinctrl_bind_pins(struct device *dev)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PINCTRL */
+#endif /* PINCTRL_DEVINFO_H */
-- 
cgit v1.2.3


From 8723d5037cafea09c7242303c6c8e5d7058cec61 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Jan 2013 09:32:30 -0800
Subject: async: bring sanity to the use of words domain and running

In the beginning, running lists were literal struct list_heads.  Later
on, struct async_domain was added.  For some reason, while the
conversion substituted list_heads with async_domains, the variable
names weren't fully converted.  In more places, "running" was used for
struct async_domain while other places adopted new "domain" name.

The situation is made much worse by having async_domain's running list
named "domain" and async_entry's field pointing to async_domain named
"running".

So, we end up with mix of "running" and "domain" for variable names
for async_domain, with the field names of async_domain and async_entry
swapped between "running" and "domain".

It feels almost intentionally made to be as confusing as possible.
Bring some sanity by

* Renaming all async_domain variables "domain".

* s/async_running/async_dfl_domain/

* s/async_domain->domain/async_domain->running/

* s/async_entry->running/async_entry->domain/

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <djbw@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/async.h |  6 ++---
 kernel/async.c        | 68 +++++++++++++++++++++++++--------------------------
 2 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/async.h b/include/linux/async.h
index 345169cfa304..34ff5c610e0e 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -19,7 +19,7 @@ typedef u64 async_cookie_t;
 typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
 struct async_domain {
 	struct list_head node;
-	struct list_head domain;
+	struct list_head running;
 	int count;
 	unsigned registered:1;
 };
@@ -29,7 +29,7 @@ struct async_domain {
  */
 #define ASYNC_DOMAIN(_name) \
 	struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
-				      .domain = LIST_HEAD_INIT(_name.domain), \
+				      .running = LIST_HEAD_INIT(_name.running), \
 				      .count = 0, \
 				      .registered = 1 }
 
@@ -39,7 +39,7 @@ struct async_domain {
  */
 #define ASYNC_DOMAIN_EXCLUSIVE(_name) \
 	struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
-				      .domain = LIST_HEAD_INIT(_name.domain), \
+				      .running = LIST_HEAD_INIT(_name.running), \
 				      .count = 0, \
 				      .registered = 0 }
 
diff --git a/kernel/async.c b/kernel/async.c
index 6c68fc3fae7b..29d51d483bee 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -64,7 +64,7 @@ static async_cookie_t next_cookie = 1;
 #define MAX_WORK	32768
 
 static LIST_HEAD(async_pending);
-static ASYNC_DOMAIN(async_running);
+static ASYNC_DOMAIN(async_dfl_domain);
 static LIST_HEAD(async_domains);
 static DEFINE_SPINLOCK(async_lock);
 static DEFINE_MUTEX(async_register_mutex);
@@ -75,7 +75,7 @@ struct async_entry {
 	async_cookie_t		cookie;
 	async_func_ptr		*func;
 	void			*data;
-	struct async_domain	*running;
+	struct async_domain	*domain;
 };
 
 static DECLARE_WAIT_QUEUE_HEAD(async_done);
@@ -86,7 +86,7 @@ static atomic_t entry_count;
 /*
  * MUST be called with the lock held!
  */
-static async_cookie_t  __lowest_in_progress(struct async_domain *running)
+static async_cookie_t __lowest_in_progress(struct async_domain *domain)
 {
 	async_cookie_t first_running = next_cookie;	/* infinity value */
 	async_cookie_t first_pending = next_cookie;	/* ditto */
@@ -96,13 +96,13 @@ static async_cookie_t  __lowest_in_progress(struct async_domain *running)
 	 * Both running and pending lists are sorted but not disjoint.
 	 * Take the first cookies from both and return the min.
 	 */
-	if (!list_empty(&running->domain)) {
-		entry = list_first_entry(&running->domain, typeof(*entry), list);
+	if (!list_empty(&domain->running)) {
+		entry = list_first_entry(&domain->running, typeof(*entry), list);
 		first_running = entry->cookie;
 	}
 
 	list_for_each_entry(entry, &async_pending, list) {
-		if (entry->running == running) {
+		if (entry->domain == domain) {
 			first_pending = entry->cookie;
 			break;
 		}
@@ -111,13 +111,13 @@ static async_cookie_t  __lowest_in_progress(struct async_domain *running)
 	return min(first_running, first_pending);
 }
 
-static async_cookie_t  lowest_in_progress(struct async_domain *running)
+static async_cookie_t lowest_in_progress(struct async_domain *domain)
 {
 	unsigned long flags;
 	async_cookie_t ret;
 
 	spin_lock_irqsave(&async_lock, flags);
-	ret = __lowest_in_progress(running);
+	ret = __lowest_in_progress(domain);
 	spin_unlock_irqrestore(&async_lock, flags);
 	return ret;
 }
@@ -132,11 +132,11 @@ static void async_run_entry_fn(struct work_struct *work)
 	struct async_entry *pos;
 	unsigned long flags;
 	ktime_t uninitialized_var(calltime), delta, rettime;
-	struct async_domain *running = entry->running;
+	struct async_domain *domain = entry->domain;
 
 	/* 1) move self to the running queue, make sure it stays sorted */
 	spin_lock_irqsave(&async_lock, flags);
-	list_for_each_entry_reverse(pos, &running->domain, list)
+	list_for_each_entry_reverse(pos, &domain->running, list)
 		if (entry->cookie < pos->cookie)
 			break;
 	list_move_tail(&entry->list, &pos->list);
@@ -162,8 +162,8 @@ static void async_run_entry_fn(struct work_struct *work)
 	/* 3) remove self from the running queue */
 	spin_lock_irqsave(&async_lock, flags);
 	list_del(&entry->list);
-	if (running->registered && --running->count == 0)
-		list_del_init(&running->node);
+	if (domain->registered && --domain->count == 0)
+		list_del_init(&domain->node);
 
 	/* 4) free the entry */
 	kfree(entry);
@@ -175,7 +175,7 @@ static void async_run_entry_fn(struct work_struct *work)
 	wake_up(&async_done);
 }
 
-static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *running)
+static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *domain)
 {
 	struct async_entry *entry;
 	unsigned long flags;
@@ -201,13 +201,13 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
 	INIT_WORK(&entry->work, async_run_entry_fn);
 	entry->func = ptr;
 	entry->data = data;
-	entry->running = running;
+	entry->domain = domain;
 
 	spin_lock_irqsave(&async_lock, flags);
 	newcookie = entry->cookie = next_cookie++;
 	list_add_tail(&entry->list, &async_pending);
-	if (running->registered && running->count++ == 0)
-		list_add_tail(&running->node, &async_domains);
+	if (domain->registered && domain->count++ == 0)
+		list_add_tail(&domain->node, &async_domains);
 	atomic_inc(&entry_count);
 	spin_unlock_irqrestore(&async_lock, flags);
 
@@ -230,7 +230,7 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
  */
 async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
 {
-	return __async_schedule(ptr, data, &async_running);
+	return __async_schedule(ptr, data, &async_dfl_domain);
 }
 EXPORT_SYMBOL_GPL(async_schedule);
 
@@ -238,18 +238,18 @@ EXPORT_SYMBOL_GPL(async_schedule);
  * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
  * @ptr: function to execute asynchronously
  * @data: data pointer to pass to the function
- * @running: running list for the domain
+ * @domain: the domain
  *
  * Returns an async_cookie_t that may be used for checkpointing later.
- * @running may be used in the async_synchronize_*_domain() functions
- * to wait within a certain synchronization domain rather than globally.
- * A synchronization domain is specified via the running queue @running to use.
- * Note: This function may be called from atomic or non-atomic contexts.
+ * @domain may be used in the async_synchronize_*_domain() functions to
+ * wait within a certain synchronization domain rather than globally.  A
+ * synchronization domain is specified via @domain.  Note: This function
+ * may be called from atomic or non-atomic contexts.
  */
 async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
-				     struct async_domain *running)
+				     struct async_domain *domain)
 {
-	return __async_schedule(ptr, data, running);
+	return __async_schedule(ptr, data, domain);
 }
 EXPORT_SYMBOL_GPL(async_schedule_domain);
 
@@ -289,7 +289,7 @@ void async_unregister_domain(struct async_domain *domain)
 	mutex_lock(&async_register_mutex);
 	spin_lock_irq(&async_lock);
 	WARN_ON(!domain->registered || !list_empty(&domain->node) ||
-		!list_empty(&domain->domain));
+		!list_empty(&domain->running));
 	domain->registered = 0;
 	spin_unlock_irq(&async_lock);
 	mutex_unlock(&async_register_mutex);
@@ -298,10 +298,10 @@ EXPORT_SYMBOL_GPL(async_unregister_domain);
 
 /**
  * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
- * @domain: running list to synchronize on
+ * @domain: the domain to synchronize
  *
  * This function waits until all asynchronous function calls for the
- * synchronization domain specified by the running list @domain have been done.
+ * synchronization domain specified by @domain have been done.
  */
 void async_synchronize_full_domain(struct async_domain *domain)
 {
@@ -312,17 +312,17 @@ EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
 /**
  * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
  * @cookie: async_cookie_t to use as checkpoint
- * @running: running list to synchronize on
+ * @domain: the domain to synchronize
  *
  * This function waits until all asynchronous function calls for the
- * synchronization domain specified by running list @running submitted
- * prior to @cookie have been done.
+ * synchronization domain specified by @domain submitted prior to @cookie
+ * have been done.
  */
-void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *running)
+void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain)
 {
 	ktime_t uninitialized_var(starttime), delta, endtime;
 
-	if (!running)
+	if (!domain)
 		return;
 
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
@@ -330,7 +330,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain
 		starttime = ktime_get();
 	}
 
-	wait_event(async_done, lowest_in_progress(running) >= cookie);
+	wait_event(async_done, lowest_in_progress(domain) >= cookie);
 
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
 		endtime = ktime_get();
@@ -352,7 +352,7 @@ EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
  */
 void async_synchronize_cookie(async_cookie_t cookie)
 {
-	async_synchronize_cookie_domain(cookie, &async_running);
+	async_synchronize_cookie_domain(cookie, &async_dfl_domain);
 }
 EXPORT_SYMBOL_GPL(async_synchronize_cookie);
 
-- 
cgit v1.2.3


From 52722794d6a48162fd8906d54618ae60a4abdb21 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Jan 2013 09:32:30 -0800
Subject: async: keep pending tasks on async_domain and remove async_pending

Async kept single global pending list and per-domain running lists.
When an async item is queued, it's put on the global pending list.
The item is moved to the per-domain running list when its execution
starts.

At this point, this design complicates execution and synchronization
without bringing any benefit.  The list only matters for
synchronization which doesn't care whether a given async item is
pending or executing.  Also, global synchronization is done by
iterating through all active registered async_domains, so the global
async_pending list doesn't help anything either.

Rename async_domain->running to async_domain->pending and put async
items directly there and remove when execution completes.  This
simplifies lowest_in_progress() a lot - the first item on the pending
list is the one with the lowest cookie, and async_run_entry_fn()
doesn't have to mess with moving the item from pending to running.

After the change, whether a domain is empty or not can be trivially
determined by looking at async_domain->pending.  Remove
async_domain->count and use list_empty() on pending instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dan Williams <djbw@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/async.h |  9 +++-----
 kernel/async.c        | 63 ++++++++++++---------------------------------------
 2 files changed, 17 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/async.h b/include/linux/async.h
index 34ff5c610e0e..a2e3f18b2ad6 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -19,8 +19,7 @@ typedef u64 async_cookie_t;
 typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
 struct async_domain {
 	struct list_head node;
-	struct list_head running;
-	int count;
+	struct list_head pending;
 	unsigned registered:1;
 };
 
@@ -29,8 +28,7 @@ struct async_domain {
  */
 #define ASYNC_DOMAIN(_name) \
 	struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
-				      .running = LIST_HEAD_INIT(_name.running), \
-				      .count = 0, \
+				      .pending = LIST_HEAD_INIT(_name.pending), \
 				      .registered = 1 }
 
 /*
@@ -39,8 +37,7 @@ struct async_domain {
  */
 #define ASYNC_DOMAIN_EXCLUSIVE(_name) \
 	struct async_domain _name = { .node = LIST_HEAD_INIT(_name.node), \
-				      .running = LIST_HEAD_INIT(_name.running), \
-				      .count = 0, \
+				      .pending = LIST_HEAD_INIT(_name.pending), \
 				      .registered = 0 }
 
 extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data);
diff --git a/kernel/async.c b/kernel/async.c
index a4c1a9e63b2e..7c9f50f436d6 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -64,7 +64,6 @@ static async_cookie_t next_cookie = 1;
 #define MAX_WORK		32768
 #define ASYNC_COOKIE_MAX	ULLONG_MAX	/* infinity cookie */
 
-static LIST_HEAD(async_pending);
 static ASYNC_DOMAIN(async_dfl_domain);
 static LIST_HEAD(async_domains);
 static DEFINE_SPINLOCK(async_lock);
@@ -83,42 +82,17 @@ static DECLARE_WAIT_QUEUE_HEAD(async_done);
 
 static atomic_t entry_count;
 
-
-/*
- * MUST be called with the lock held!
- */
-static async_cookie_t __lowest_in_progress(struct async_domain *domain)
-{
-	async_cookie_t first_running = ASYNC_COOKIE_MAX;
-	async_cookie_t first_pending = ASYNC_COOKIE_MAX;
-	struct async_entry *entry;
-
-	/*
-	 * Both running and pending lists are sorted but not disjoint.
-	 * Take the first cookies from both and return the min.
-	 */
-	if (!list_empty(&domain->running)) {
-		entry = list_first_entry(&domain->running, typeof(*entry), list);
-		first_running = entry->cookie;
-	}
-
-	list_for_each_entry(entry, &async_pending, list) {
-		if (entry->domain == domain) {
-			first_pending = entry->cookie;
-			break;
-		}
-	}
-
-	return min(first_running, first_pending);
-}
-
 static async_cookie_t lowest_in_progress(struct async_domain *domain)
 {
+	async_cookie_t ret = ASYNC_COOKIE_MAX;
 	unsigned long flags;
-	async_cookie_t ret;
 
 	spin_lock_irqsave(&async_lock, flags);
-	ret = __lowest_in_progress(domain);
+	if (!list_empty(&domain->pending)) {
+		struct async_entry *first = list_first_entry(&domain->pending,
+						struct async_entry, list);
+		ret = first->cookie;
+	}
 	spin_unlock_irqrestore(&async_lock, flags);
 	return ret;
 }
@@ -130,20 +104,11 @@ static void async_run_entry_fn(struct work_struct *work)
 {
 	struct async_entry *entry =
 		container_of(work, struct async_entry, work);
-	struct async_entry *pos;
 	unsigned long flags;
 	ktime_t uninitialized_var(calltime), delta, rettime;
 	struct async_domain *domain = entry->domain;
 
-	/* 1) move self to the running queue, make sure it stays sorted */
-	spin_lock_irqsave(&async_lock, flags);
-	list_for_each_entry_reverse(pos, &domain->running, list)
-		if (entry->cookie < pos->cookie)
-			break;
-	list_move_tail(&entry->list, &pos->list);
-	spin_unlock_irqrestore(&async_lock, flags);
-
-	/* 2) run (and print duration) */
+	/* 1) run (and print duration) */
 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
 		printk(KERN_DEBUG "calling  %lli_%pF @ %i\n",
 			(long long)entry->cookie,
@@ -160,19 +125,19 @@ static void async_run_entry_fn(struct work_struct *work)
 			(long long)ktime_to_ns(delta) >> 10);
 	}
 
-	/* 3) remove self from the running queue */
+	/* 2) remove self from the pending queues */
 	spin_lock_irqsave(&async_lock, flags);
 	list_del(&entry->list);
-	if (domain->registered && --domain->count == 0)
+	if (domain->registered && list_empty(&domain->pending))
 		list_del_init(&domain->node);
 
-	/* 4) free the entry */
+	/* 3) free the entry */
 	kfree(entry);
 	atomic_dec(&entry_count);
 
 	spin_unlock_irqrestore(&async_lock, flags);
 
-	/* 5) wake up any waiters */
+	/* 4) wake up any waiters */
 	wake_up(&async_done);
 }
 
@@ -206,9 +171,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
 
 	spin_lock_irqsave(&async_lock, flags);
 	newcookie = entry->cookie = next_cookie++;
-	list_add_tail(&entry->list, &async_pending);
-	if (domain->registered && domain->count++ == 0)
+	if (domain->registered && list_empty(&domain->pending))
 		list_add_tail(&domain->node, &async_domains);
+	list_add_tail(&entry->list, &domain->pending);
 	atomic_inc(&entry_count);
 	spin_unlock_irqrestore(&async_lock, flags);
 
@@ -290,7 +255,7 @@ void async_unregister_domain(struct async_domain *domain)
 	mutex_lock(&async_register_mutex);
 	spin_lock_irq(&async_lock);
 	WARN_ON(!domain->registered || !list_empty(&domain->node) ||
-		!list_empty(&domain->running));
+		!list_empty(&domain->pending));
 	domain->registered = 0;
 	spin_unlock_irq(&async_lock);
 	mutex_unlock(&async_register_mutex);
-- 
cgit v1.2.3


From 055dc21a1d1d219608cd4baac7d0683fb2cbbe8a Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 22 Jan 2013 09:49:50 +0000
Subject: soreuseport: infrastructure

Definitions and macros for implementing soreusport.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h   | 2 +-
 arch/avr32/include/uapi/asm/socket.h   | 2 +-
 arch/cris/include/uapi/asm/socket.h    | 2 +-
 arch/frv/include/uapi/asm/socket.h     | 2 +-
 arch/h8300/include/uapi/asm/socket.h   | 2 +-
 arch/ia64/include/uapi/asm/socket.h    | 2 +-
 arch/m32r/include/uapi/asm/socket.h    | 2 +-
 arch/mips/include/uapi/asm/socket.h    | 3 +--
 arch/mn10300/include/uapi/asm/socket.h | 2 +-
 arch/parisc/include/uapi/asm/socket.h  | 2 +-
 arch/powerpc/include/uapi/asm/socket.h | 2 +-
 arch/s390/include/uapi/asm/socket.h    | 2 +-
 arch/sparc/include/uapi/asm/socket.h   | 2 +-
 arch/xtensa/include/uapi/asm/socket.h  | 2 +-
 include/linux/random.h                 | 6 ++++++
 include/net/sock.h                     | 5 ++++-
 include/uapi/asm-generic/socket.h      | 3 +--
 net/core/sock.c                        | 7 +++++++
 18 files changed, 32 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 755702eefd9c..c5195524d1ef 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -19,7 +19,7 @@
 #define SO_BROADCAST	0x0020
 #define SO_LINGER	0x0080
 #define SO_OOBINLINE	0x0100
-/* To add :#define SO_REUSEPORT 0x0200 */
+#define SO_REUSEPORT	0x0200
 
 #define SO_TYPE		0x1008
 #define SO_ERROR	0x1007
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index f3f38a0e2ef9..51c6401582ea 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index 406b5838defd..50692b738c75 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -24,7 +24,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index d8e1132a1ab6..595391f0f98c 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h
index c8b87a828206..43e32621da7d 100644
--- a/arch/h8300/include/uapi/asm/socket.h
+++ b/arch/h8300/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index f390896c3104..c567adc8bea5 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -31,7 +31,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 6a895155e7a3..519afa2755db 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 9d11a7713923..7e2723637b35 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -28,8 +28,7 @@
 #define SO_LINGER	0x0080	/* Block on close of a reliable
 				   socket to transmit pending data.  */
 #define SO_OOBINLINE 0x0100	/* Receive out-of-band data in-band.  */
-#if 0
-To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
+#define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 #endif
 
 #define SO_TYPE		0x1008	/* Compatible name for SO_STYLE.  */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index ab702c40b30e..5c7c7c988544 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -22,7 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index da2c8d3c209e..526e4b9aece0 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -13,7 +13,7 @@
 #define SO_BROADCAST	0x0020
 #define SO_LINGER	0x0080
 #define SO_OOBINLINE	0x0100
-/* To add :#define SO_REUSEPORT 0x0200 */
+#define SO_REUSEPORT	0x0200
 #define SO_SNDBUF	0x1001
 #define SO_RCVBUF	0x1002
 #define SO_SNDBUFFORCE	0x100a
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index e6ca31816cc9..a26dcaece509 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -29,7 +29,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_RCVLOWAT	16
 #define SO_SNDLOWAT	17
 #define SO_RCVTIMEO	18
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 9ce60b68f070..f99eea7fff0f 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -28,7 +28,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index fbbba57547d1..cbbad74b2e06 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -15,7 +15,7 @@
 #define SO_PEERCRED	0x0040
 #define SO_LINGER	0x0080
 #define SO_OOBINLINE	0x0100
-/* To add :#define SO_REUSEPORT 0x0200 */
+#define SO_REUSEPORT	0x0200
 #define SO_BSDCOMPAT    0x0400
 #define SO_RCVLOWAT     0x0800
 #define SO_SNDLOWAT     0x1000
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index dbf316487b51..35905cb6e419 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -32,7 +32,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
+#define SO_REUSEPORT	15
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
diff --git a/include/linux/random.h b/include/linux/random.h
index d9846088c2c5..347ce553a306 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -74,4 +74,10 @@ static inline int arch_get_random_int(unsigned int *v)
 }
 #endif
 
+/* Pseudo random number generator from numerical recipes. */
+static inline u32 next_pseudo_random32(u32 seed)
+{
+	return seed * 1664525 + 1013904223;
+}
+
 #endif /* _LINUX_RANDOM_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 5a34e2f03657..581dc6bd7dc6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -140,6 +140,7 @@ typedef __u64 __bitwise __addrpair;
  *	@skc_family: network address family
  *	@skc_state: Connection state
  *	@skc_reuse: %SO_REUSEADDR setting
+ *	@skc_reuseport: %SO_REUSEPORT setting
  *	@skc_bound_dev_if: bound device index if != 0
  *	@skc_bind_node: bind hash linkage for various protocol lookup tables
  *	@skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
@@ -179,7 +180,8 @@ struct sock_common {
 
 	unsigned short		skc_family;
 	volatile unsigned char	skc_state;
-	unsigned char		skc_reuse;
+	unsigned char		skc_reuse:4;
+	unsigned char		skc_reuseport:4;
 	int			skc_bound_dev_if;
 	union {
 		struct hlist_node	skc_bind_node;
@@ -297,6 +299,7 @@ struct sock {
 #define sk_family		__sk_common.skc_family
 #define sk_state		__sk_common.skc_state
 #define sk_reuse		__sk_common.skc_reuse
+#define sk_reuseport		__sk_common.skc_reuseport
 #define sk_bound_dev_if		__sk_common.skc_bound_dev_if
 #define sk_bind_node		__sk_common.skc_bind_node
 #define sk_prot			__sk_common.skc_prot
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 3f6a99201410..4ef3acbba5da 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -22,8 +22,7 @@
 #define SO_PRIORITY	12
 #define SO_LINGER	13
 #define SO_BSDCOMPAT	14
-/* To add :#define SO_REUSEPORT 15 */
-
+#define SO_REUSEPORT	15
 #ifndef SO_PASSCRED /* powerpc only differs in these */
 #define SO_PASSCRED	16
 #define SO_PEERCRED	17
diff --git a/net/core/sock.c b/net/core/sock.c
index 8258fb741e9a..235fb89e8973 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -665,6 +665,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	case SO_REUSEADDR:
 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
 		break;
+	case SO_REUSEPORT:
+		sk->sk_reuseport = valbool;
+		break;
 	case SO_TYPE:
 	case SO_PROTOCOL:
 	case SO_DOMAIN:
@@ -972,6 +975,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_reuse;
 		break;
 
+	case SO_REUSEPORT:
+		v.val = sk->sk_reuseport;
+		break;
+
 	case SO_KEEPALIVE:
 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
 		break;
-- 
cgit v1.2.3


From 35525b79786b2ba58ef13822198ce22c497bc7a2 Mon Sep 17 00:00:00 2001
From: Andriy Skulysh <andriy_skulysh@xyratex.com>
Date: Mon, 7 Jan 2013 00:12:15 +0200
Subject: sunrpc: Fix lockd sleeping until timeout

There is a race in enqueueing thread to a pool and
waking up a thread.
lockd doesn't wake up on reception of lock granted callback
if svc_wake_up() is called before lockd's thread is added
to a pool.

Signed-off-by: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 1 +
 net/sunrpc/svc_xprt.c      | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 676ddf53b3ee..1f0216b9a6c9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -50,6 +50,7 @@ struct svc_pool {
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
 	struct svc_pool_stats	sp_stats;	/* statistics on pool operation */
+	int			sp_task_pending;/* has pending task */
 } ____cacheline_aligned_in_smp;
 
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b8e47fac7315..5a9d40c5a9f3 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -499,7 +499,8 @@ void svc_wake_up(struct svc_serv *serv)
 			rqstp->rq_xprt = NULL;
 			 */
 			wake_up(&rqstp->rq_wait);
-		}
+		} else
+			pool->sp_task_pending = 1;
 		spin_unlock_bh(&pool->sp_lock);
 	}
 }
@@ -634,7 +635,13 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 		 * long for cache updates.
 		 */
 		rqstp->rq_chandle.thread_wait = 1*HZ;
+		pool->sp_task_pending = 0;
 	} else {
+		if (pool->sp_task_pending) {
+			pool->sp_task_pending = 0;
+			spin_unlock_bh(&pool->sp_lock);
+			return ERR_PTR(-EAGAIN);
+		}
 		/* No data pending. Go to sleep */
 		svc_thread_enqueue(pool, rqstp);
 
-- 
cgit v1.2.3


From 2db54c72395298a58f29c75ae880be9e478fdbbd Mon Sep 17 00:00:00 2001
From: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Date: Wed, 14 Nov 2012 10:55:04 +0100
Subject: fbmon: add videomode helpers

Add a function to convert from the generic videomode to a fb_videomode.

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Reviewed-by: Thierry Reding <thierry.reding@avionic-design.de>
Acked-by: Thierry Reding <thierry.reding@avionic-design.de>
Tested-by: Thierry Reding <thierry.reding@avionic-design.de>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Afzal Mohammed <Afzal@ti.com>
Tested-by: Rob Clark <robclark@gmail.com>
Tested-by: Leela Krishna Amudala <leelakrishna.a@gmail.com>
---
 drivers/video/fbmon.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fb.h    |  4 ++++
 2 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c
index cef65574db6c..17ce135a18fd 100644
--- a/drivers/video/fbmon.c
+++ b/drivers/video/fbmon.c
@@ -31,6 +31,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <video/edid.h>
+#include <video/videomode.h>
 #ifdef CONFIG_PPC_OF
 #include <asm/prom.h>
 #include <asm/pci-bridge.h>
@@ -1373,6 +1374,57 @@ int fb_get_mode(int flags, u32 val, struct fb_var_screeninfo *var, struct fb_inf
 	kfree(timings);
 	return err;
 }
+
+#if IS_ENABLED(CONFIG_VIDEOMODE)
+int fb_videomode_from_videomode(const struct videomode *vm,
+				struct fb_videomode *fbmode)
+{
+	unsigned int htotal, vtotal;
+
+	fbmode->xres = vm->hactive;
+	fbmode->left_margin = vm->hback_porch;
+	fbmode->right_margin = vm->hfront_porch;
+	fbmode->hsync_len = vm->hsync_len;
+
+	fbmode->yres = vm->vactive;
+	fbmode->upper_margin = vm->vback_porch;
+	fbmode->lower_margin = vm->vfront_porch;
+	fbmode->vsync_len = vm->vsync_len;
+
+	/* prevent division by zero in KHZ2PICOS macro */
+	fbmode->pixclock = vm->pixelclock ?
+			KHZ2PICOS(vm->pixelclock / 1000) : 0;
+
+	fbmode->sync = 0;
+	fbmode->vmode = 0;
+	if (vm->dmt_flags & VESA_DMT_HSYNC_HIGH)
+		fbmode->sync |= FB_SYNC_HOR_HIGH_ACT;
+	if (vm->dmt_flags & VESA_DMT_HSYNC_HIGH)
+		fbmode->sync |= FB_SYNC_VERT_HIGH_ACT;
+	if (vm->data_flags & DISPLAY_FLAGS_INTERLACED)
+		fbmode->vmode |= FB_VMODE_INTERLACED;
+	if (vm->data_flags & DISPLAY_FLAGS_DOUBLESCAN)
+		fbmode->vmode |= FB_VMODE_DOUBLE;
+	fbmode->flag = 0;
+
+	htotal = vm->hactive + vm->hfront_porch + vm->hback_porch +
+		 vm->hsync_len;
+	vtotal = vm->vactive + vm->vfront_porch + vm->vback_porch +
+		 vm->vsync_len;
+	/* prevent division by zero */
+	if (htotal && vtotal) {
+		fbmode->refresh = vm->pixelclock / (htotal * vtotal);
+	/* a mode must have htotal and vtotal != 0 or it is invalid */
+	} else {
+		fbmode->refresh = 0;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fb_videomode_from_videomode);
+#endif
+
 #else
 int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var)
 {
diff --git a/include/linux/fb.h b/include/linux/fb.h
index c7a95714b1fe..100a1765b787 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -19,6 +19,7 @@ struct vm_area_struct;
 struct fb_info;
 struct device;
 struct file;
+struct videomode;
 
 /* Definitions below are used in the parsed monitor specs */
 #define FB_DPMS_ACTIVE_OFF	1
@@ -714,6 +715,9 @@ extern void fb_destroy_modedb(struct fb_videomode *modedb);
 extern int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb);
 extern unsigned char *fb_ddc_read(struct i2c_adapter *adapter);
 
+extern int fb_videomode_from_videomode(const struct videomode *vm,
+				       struct fb_videomode *fbmode);
+
 /* drivers/video/modedb.c */
 #define VESA_MODEDB_SIZE 34
 extern void fb_var_to_videomode(struct fb_videomode *mode,
-- 
cgit v1.2.3


From 790890444f591145d3a00623af461c9006e17d51 Mon Sep 17 00:00:00 2001
From: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Date: Wed, 14 Nov 2012 11:12:38 +0100
Subject: fbmon: add of_videomode helpers

Add helper to get fb_videomode from devicetree.

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Reviewed-by: Thierry Reding <thierry.reding@avionic-design.de>
Acked-by: Thierry Reding <thierry.reding@avionic-design.de>
Tested-by: Thierry Reding <thierry.reding@avionic-design.de>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Afzal Mohammed <Afzal@ti.com>
Tested-by: Rob Clark <robclark@gmail.com>
Tested-by: Leela Krishna Amudala <leelakrishna.a@gmail.com>
---
 drivers/video/fbmon.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/fb.h    |  4 ++++
 2 files changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c
index 17ce135a18fd..94ad0f71383c 100644
--- a/drivers/video/fbmon.c
+++ b/drivers/video/fbmon.c
@@ -31,6 +31,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <video/edid.h>
+#include <video/of_videomode.h>
 #include <video/videomode.h>
 #ifdef CONFIG_PPC_OF
 #include <asm/prom.h>
@@ -1425,6 +1426,47 @@ int fb_videomode_from_videomode(const struct videomode *vm,
 EXPORT_SYMBOL_GPL(fb_videomode_from_videomode);
 #endif
 
+#if IS_ENABLED(CONFIG_OF_VIDEOMODE)
+static inline void dump_fb_videomode(const struct fb_videomode *m)
+{
+	pr_debug("fb_videomode = %ux%u@%uHz (%ukHz) %u %u %u %u %u %u %u %u %u\n",
+		 m->xres, m->yres, m->refresh, m->pixclock, m->left_margin,
+		 m->right_margin, m->upper_margin, m->lower_margin,
+		 m->hsync_len, m->vsync_len, m->sync, m->vmode, m->flag);
+}
+
+/**
+ * of_get_fb_videomode - get a fb_videomode from devicetree
+ * @np: device_node with the timing specification
+ * @fb: will be set to the return value
+ * @index: index into the list of display timings in devicetree
+ *
+ * DESCRIPTION:
+ * This function is expensive and should only be used, if only one mode is to be
+ * read from DT. To get multiple modes start with of_get_display_timings ond
+ * work with that instead.
+ */
+int of_get_fb_videomode(struct device_node *np, struct fb_videomode *fb,
+			int index)
+{
+	struct videomode vm;
+	int ret;
+
+	ret = of_get_videomode(np, &vm, index);
+	if (ret)
+		return ret;
+
+	fb_videomode_from_videomode(&vm, fb);
+
+	pr_debug("%s: got %dx%d display mode from %s\n",
+		of_node_full_name(np), vm.hactive, vm.vactive, np->name);
+	dump_fb_videomode(fb);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_get_fb_videomode);
+#endif
+
 #else
 int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var)
 {
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 100a1765b787..58b98606ac26 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -20,6 +20,7 @@ struct fb_info;
 struct device;
 struct file;
 struct videomode;
+struct device_node;
 
 /* Definitions below are used in the parsed monitor specs */
 #define FB_DPMS_ACTIVE_OFF	1
@@ -715,6 +716,9 @@ extern void fb_destroy_modedb(struct fb_videomode *modedb);
 extern int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb);
 extern unsigned char *fb_ddc_read(struct i2c_adapter *adapter);
 
+extern int of_get_fb_videomode(struct device_node *np,
+			       struct fb_videomode *fb,
+			       int index);
 extern int fb_videomode_from_videomode(const struct videomode *vm,
 				       struct fb_videomode *fbmode);
 
-- 
cgit v1.2.3


From ba6fdda46b377034c782c0b89c8f1090b31eabd8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 22 Dec 2012 16:59:51 +0100
Subject: profiling: Remove unused timer hook

The last remaining user was oprofile and its use has been
removed a while ago in commit bc078e4eab65f11bba
("oprofile: convert oprofile from timer_hook to hrtimer").

There doesn't seem to be any upstream user of this hook
for about two years now. And I'm not even aware of any out of
tree user.

Let's remove it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1356191991-2251-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/profile.h | 13 -------------
 kernel/profile.c        | 24 ------------------------
 2 files changed, 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/profile.h b/include/linux/profile.h
index a0fc32279fc0..21123902366d 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n);
 int profile_event_register(enum profile_type, struct notifier_block * n);
 int profile_event_unregister(enum profile_type, struct notifier_block * n);
 
-int register_timer_hook(int (*hook)(struct pt_regs *));
-void unregister_timer_hook(int (*hook)(struct pt_regs *));
-
 struct pt_regs;
 
 #else
@@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_
 #define profile_handoff_task(a) (0)
 #define profile_munmap(a) do { } while (0)
 
-static inline int register_timer_hook(int (*hook)(struct pt_regs *))
-{
-	return -ENOSYS;
-}
-
-static inline void unregister_timer_hook(int (*hook)(struct pt_regs *))
-{
-	return;
-}
-
 #endif /* CONFIG_PROFILING */
 
 #endif /* _LINUX_PROFILE_H */
diff --git a/kernel/profile.c b/kernel/profile.c
index 1f391819c42f..dc3384ee874e 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -37,9 +37,6 @@ struct profile_hit {
 #define NR_PROFILE_HIT		(PAGE_SIZE/sizeof(struct profile_hit))
 #define NR_PROFILE_GRP		(NR_PROFILE_HIT/PROFILE_GRPSZ)
 
-/* Oprofile timer tick hook */
-static int (*timer_hook)(struct pt_regs *) __read_mostly;
-
 static atomic_t *prof_buffer;
 static unsigned long prof_len, prof_shift;
 
@@ -208,25 +205,6 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n)
 }
 EXPORT_SYMBOL_GPL(profile_event_unregister);
 
-int register_timer_hook(int (*hook)(struct pt_regs *))
-{
-	if (timer_hook)
-		return -EBUSY;
-	timer_hook = hook;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(register_timer_hook);
-
-void unregister_timer_hook(int (*hook)(struct pt_regs *))
-{
-	WARN_ON(hook != timer_hook);
-	timer_hook = NULL;
-	/* make sure all CPUs see the NULL hook */
-	synchronize_sched();  /* Allow ongoing interrupts to complete. */
-}
-EXPORT_SYMBOL_GPL(unregister_timer_hook);
-
-
 #ifdef CONFIG_SMP
 /*
  * Each cpu has a pair of open-addressed hashtables for pending
@@ -436,8 +414,6 @@ void profile_tick(int type)
 {
 	struct pt_regs *regs = get_irq_regs();
 
-	if (type == CPU_PROFILING && timer_hook)
-		timer_hook(regs);
 	if (!user_mode(regs) && prof_cpu_mask != NULL &&
 	    cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
 		profile_hit(type, (void *)profile_pc(regs));
-- 
cgit v1.2.3


From d437c86baacf265a640dfc462c75941d02c0e153 Mon Sep 17 00:00:00 2001
From: Bing Zhao <bzhao@marvell.com>
Date: Wed, 23 Jan 2013 20:33:58 -0800
Subject: ieee80211: define AKM suite selectors type 5, 6 and 7

Reference: IEEE 802.11-2012 8.4.2.27.3 "AKM suites"

Signed-off-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ccf9ee1dca8c..11c8bc87fdcb 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1898,7 +1898,10 @@ enum ieee80211_sa_query_action {
 /* AKM suite selectors */
 #define WLAN_AKM_SUITE_8021X		0x000FAC01
 #define WLAN_AKM_SUITE_PSK		0x000FAC02
-#define WLAN_AKM_SUITE_SAE			0x000FAC08
+#define WLAN_AKM_SUITE_8021X_SHA256	0x000FAC05
+#define WLAN_AKM_SUITE_PSK_SHA256	0x000FAC06
+#define WLAN_AKM_SUITE_TDLS		0x000FAC07
+#define WLAN_AKM_SUITE_SAE		0x000FAC08
 #define WLAN_AKM_SUITE_FT_OVER_SAE	0x000FAC09
 
 #define WLAN_MAX_KEY_LEN		32
-- 
cgit v1.2.3


From d28574e043e8b7cb35482de6e9a553118a32803d Mon Sep 17 00:00:00 2001
From: Maxim Patlasov <mpatlasov@parallels.com>
Date: Fri, 26 Oct 2012 19:50:04 +0400
Subject: mm: minor cleanup of iov_iter_single_seg_count()

The function does not modify iov_iter which 'i' points to.

Signed-off-by: Maxim Patlasov <mpatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fs.h | 2 +-
 mm/filemap.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7617ee04f066..7d2e893ec3d1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -301,7 +301,7 @@ size_t iov_iter_copy_from_user(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
-size_t iov_iter_single_seg_count(struct iov_iter *i);
+size_t iov_iter_single_seg_count(const struct iov_iter *i);
 
 static inline void iov_iter_init(struct iov_iter *i,
 			const struct iovec *iov, unsigned long nr_segs,
diff --git a/mm/filemap.c b/mm/filemap.c
index 83efee76a5c0..24a7ea583f0c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2056,7 +2056,7 @@ EXPORT_SYMBOL(iov_iter_fault_in_readable);
 /*
  * Return the count of just the current iov_iter segment.
  */
-size_t iov_iter_single_seg_count(struct iov_iter *i)
+size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
 	const struct iovec *iov = i->iov;
 	if (i->nr_segs == 1)
-- 
cgit v1.2.3


From 51906e779f2b13b38f8153774c4c7163d412ffd9 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Mon, 19 Nov 2012 16:01:29 +0100
Subject: x86/MSI: Support multiple MSIs in presense of IRQ remapping

The MSI specification has several constraints in comparison with
MSI-X, most notable of them is the inability to configure MSIs
independently. As a result, it is impossible to dispatch
interrupts from different queues to different CPUs. This is
largely devalues the support of multiple MSIs in SMP systems.

Also, a necessity to allocate a contiguous block of vector
numbers for devices capable of multiple MSIs might cause a
considerable pressure on x86 interrupt vector allocator and
could lead to fragmentation of the interrupt vectors space.

This patch overcomes both drawbacks in presense of IRQ remapping
and lets devices take advantage of multiple queues and per-IRQ
affinity assignments.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/c8bd86ff56b5fc118257436768aaa04489ac0a4c.1353324359.git.agordeev@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/io_apic.c | 165 +++++++++++++++++++++++++++++++++--------
 include/linux/irq.h            |   5 ++
 kernel/irq/chip.c              |  30 ++++++--
 3 files changed, 160 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index b739d398bb29..2016f9dabd72 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -300,9 +300,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
 	return cfg;
 }
 
-static int alloc_irq_from(unsigned int from, int node)
+static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
 {
-	return irq_alloc_desc_from(from, node);
+	return irq_alloc_descs_from(from, count, node);
 }
 
 static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -2982,37 +2982,58 @@ device_initcall(ioapic_init_ops);
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int from, int node)
+unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
 {
-	struct irq_cfg *cfg;
+	struct irq_cfg **cfg;
 	unsigned long flags;
-	unsigned int ret = 0;
-	int irq;
+	int irq, i;
 
 	if (from < nr_irqs_gsi)
 		from = nr_irqs_gsi;
 
-	irq = alloc_irq_from(from, node);
-	if (irq < 0)
-		return 0;
-	cfg = alloc_irq_cfg(irq, node);
-	if (!cfg) {
-		free_irq_at(irq, NULL);
+	cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
+	if (!cfg)
 		return 0;
+
+	irq = alloc_irqs_from(from, count, node);
+	if (irq < 0)
+		goto out_cfgs;
+
+	for (i = 0; i < count; i++) {
+		cfg[i] = alloc_irq_cfg(irq + i, node);
+		if (!cfg[i])
+			goto out_irqs;
 	}
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
-		ret = irq;
+	for (i = 0; i < count; i++)
+		if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
+			goto out_vecs;
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
-	if (ret) {
-		irq_set_chip_data(irq, cfg);
-		irq_clear_status_flags(irq, IRQ_NOREQUEST);
-	} else {
-		free_irq_at(irq, cfg);
+	for (i = 0; i < count; i++) {
+		irq_set_chip_data(irq + i, cfg[i]);
+		irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
 	}
-	return ret;
+
+	kfree(cfg);
+	return irq;
+
+out_vecs:
+	for (i--; i >= 0; i--)
+		__clear_irq_vector(irq + i, cfg[i]);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+out_irqs:
+	for (i = 0; i < count; i++)
+		free_irq_at(irq + i, cfg[i]);
+out_cfgs:
+	kfree(cfg);
+	return 0;
+}
+
+unsigned int create_irq_nr(unsigned int from, int node)
+{
+	return __create_irqs(from, 1, node);
 }
 
 int create_irq(void)
@@ -3045,6 +3066,14 @@ void destroy_irq(unsigned int irq)
 	free_irq_at(irq, cfg);
 }
 
+static inline void destroy_irqs(unsigned int irq, unsigned int count)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		destroy_irq(irq + i);
+}
+
 /*
  * MSI message composition
  */
@@ -3071,7 +3100,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 
 	if (irq_remapped(cfg)) {
 		compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
-		return err;
+		return 0;
 	}
 
 	if (x2apic_enabled())
@@ -3098,7 +3127,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
 			MSI_DATA_DELIVERY_LOWPRI) |
 		MSI_DATA_VECTOR(cfg->vector);
 
-	return err;
+	return 0;
 }
 
 static int
@@ -3136,18 +3165,26 @@ static struct irq_chip msi_chip = {
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+			 unsigned int irq_base, unsigned int irq_offset)
 {
 	struct irq_chip *chip = &msi_chip;
 	struct msi_msg msg;
+	unsigned int irq = irq_base + irq_offset;
 	int ret;
 
 	ret = msi_compose_msg(dev, irq, &msg, -1);
 	if (ret < 0)
 		return ret;
 
-	irq_set_msi_desc(irq, msidesc);
-	write_msi_msg(irq, &msg);
+	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+
+	/*
+	 * MSI-X message is written per-IRQ, the offset is always 0.
+	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+	 */
+	if (!irq_offset)
+		write_msi_msg(irq, &msg);
 
 	if (irq_remapped(irq_get_chip_data(irq))) {
 		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
@@ -3161,23 +3198,19 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 	return 0;
 }
 
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int setup_msix_irqs(struct pci_dev *dev, int nvec)
 {
 	int node, ret, sub_handle, index = 0;
 	unsigned int irq, irq_want;
 	struct msi_desc *msidesc;
 
-	/* x86 doesn't support multiple MSI yet */
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-
 	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
-			return -1;
+			return -ENOSPC;
 		irq_want = irq + 1;
 		if (!irq_remapping_enabled)
 			goto no_ir;
@@ -3199,7 +3232,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 				goto error;
 		}
 no_ir:
-		ret = setup_msi_irq(dev, msidesc, irq);
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
 		if (ret < 0)
 			goto error;
 		sub_handle++;
@@ -3211,6 +3244,74 @@ error:
 	return ret;
 }
 
+int setup_msi_irqs(struct pci_dev *dev, int nvec)
+{
+	int node, ret, sub_handle, index = 0;
+	unsigned int irq;
+	struct msi_desc *msidesc;
+
+	if (nvec > 1 && !irq_remapping_enabled)
+		return 1;
+
+	nvec = __roundup_pow_of_two(nvec);
+
+	WARN_ON(!list_is_singular(&dev->msi_list));
+	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+	WARN_ON(msidesc->irq);
+	WARN_ON(msidesc->msi_attrib.multiple);
+
+	node = dev_to_node(&dev->dev);
+	irq = __create_irqs(nr_irqs_gsi, nvec, node);
+	if (irq == 0)
+		return -ENOSPC;
+
+	if (!irq_remapping_enabled) {
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0)
+			goto error;
+		return 0;
+	}
+
+	msidesc->msi_attrib.multiple = ilog2(nvec);
+	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
+		if (!sub_handle) {
+			index = msi_alloc_remapped_irq(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			ret = msi_setup_remapped_irq(dev, irq + sub_handle,
+						     index, sub_handle);
+			if (ret < 0)
+				goto error;
+		}
+		ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
+		if (ret < 0)
+			goto error;
+	}
+	return 0;
+
+error:
+	destroy_irqs(irq, nvec);
+
+	/*
+	 * Restore altered MSI descriptor fields and prevent just destroyed
+	 * IRQs from tearing down again in default_teardown_msi_irqs()
+	 */
+	msidesc->irq = 0;
+	msidesc->msi_attrib.multiple = 0;
+
+	return ret;
+}
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	if (type == PCI_CAP_ID_MSI)
+		return setup_msi_irqs(dev, nvec);
+	return setup_msix_irqs(dev, nvec);
+}
+
 void native_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fdf2c4a238cc..1eab99111e94 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -528,6 +528,8 @@ extern int irq_set_handler_data(unsigned int irq, void *data);
 extern int irq_set_chip_data(unsigned int irq, void *data);
 extern int irq_set_irq_type(unsigned int irq, unsigned int type);
 extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
+extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
+				struct msi_desc *entry);
 extern struct irq_data *irq_get_irq_data(unsigned int irq);
 
 static inline struct irq_chip *irq_get_chip(unsigned int irq)
@@ -590,6 +592,9 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 #define irq_alloc_desc_from(from, node)		\
 	irq_alloc_descs(-1, from, 1, node)
 
+#define irq_alloc_descs_from(from, cnt, node)	\
+	irq_alloc_descs(-1, from, cnt, node)
+
 void irq_free_descs(unsigned int irq, unsigned int cnt);
 int irq_reserve_irqs(unsigned int from, unsigned int cnt);
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3aca9f29d30e..cbd97ce0b000 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -90,26 +90,40 @@ int irq_set_handler_data(unsigned int irq, void *data)
 EXPORT_SYMBOL(irq_set_handler_data);
 
 /**
- *	irq_set_msi_desc - set MSI descriptor data for an irq
- *	@irq:	Interrupt number
- *	@entry:	Pointer to MSI descriptor data
+ *	irq_set_msi_desc_off - set MSI descriptor data for an irq at offset
+ *	@irq_base:	Interrupt number base
+ *	@irq_offset:	Interrupt number offset
+ *	@entry:		Pointer to MSI descriptor data
  *
- *	Set the MSI descriptor entry for an irq
+ *	Set the MSI descriptor entry for an irq at offset
  */
-int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
+int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
+			 struct msi_desc *entry)
 {
 	unsigned long flags;
-	struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+	struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
 
 	if (!desc)
 		return -EINVAL;
 	desc->irq_data.msi_desc = entry;
-	if (entry)
-		entry->irq = irq;
+	if (entry && !irq_offset)
+		entry->irq = irq_base;
 	irq_put_desc_unlock(desc, flags);
 	return 0;
 }
 
+/**
+ *	irq_set_msi_desc - set MSI descriptor data for an irq
+ *	@irq:	Interrupt number
+ *	@entry:	Pointer to MSI descriptor data
+ *
+ *	Set the MSI descriptor entry for an irq
+ */
+int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
+{
+	return irq_set_msi_desc_off(irq, 0, entry);
+}
+
 /**
  *	irq_set_chip_data - set irq chip data for an irq
  *	@irq:	Interrupt number
-- 
cgit v1.2.3


From 08261d87f7d1b6253ab3223756625a5c74532293 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Mon, 19 Nov 2012 16:02:10 +0100
Subject: PCI/MSI: Enable multiple MSIs with pci_enable_msi_block_auto()

The new function pci_enable_msi_block_auto() tries to allocate
maximum possible number of MSIs up to the number the device
supports. It generalizes a pattern when pci_enable_msi_block()
is contiguously called until it succeeds or fails.

Opposite to pci_enable_msi_block() which takes the number of
MSIs to allocate as a input parameter,
pci_enable_msi_block_auto() could be used by device drivers to
obtain the number of assigned MSIs and the number of MSIs the
device supports.

Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Matthew Wilcox <willy@linux.intel.com>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/c3de2419df94a0f95ca1a6f755afc421486455e6.1353324359.git.agordeev@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/PCI/MSI-HOWTO.txt | 37 ++++++++++++++++++++++++++++++++-----
 drivers/pci/msi.c               | 26 ++++++++++++++++++++++++++
 include/linux/pci.h             |  7 +++++++
 3 files changed, 65 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 53e6fca146d7..a09178086c30 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -127,15 +127,42 @@ on the number of vectors that can be allocated; pci_enable_msi_block()
 returns as soon as it finds any constraint that doesn't allow the
 call to succeed.
 
-4.2.3 pci_disable_msi
+4.2.3 pci_enable_msi_block_auto
+
+int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count)
+
+This variation on pci_enable_msi() call allows a device driver to request
+the maximum possible number of MSIs.  The MSI specification only allows
+interrupts to be allocated in powers of two, up to a maximum of 2^5 (32).
+
+If this function returns a positive number, it indicates that it has
+succeeded and the returned value is the number of allocated interrupts. In
+this case, the function enables MSI on this device and updates dev->irq to
+be the lowest of the new interrupts assigned to it.  The other interrupts
+assigned to the device are in the range dev->irq to dev->irq + returned
+value - 1.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device.
+
+If the device driver needs to know the number of interrupts the device
+supports it can pass the pointer count where that number is stored. The
+device driver must decide what action to take if pci_enable_msi_block_auto()
+succeeds, but returns a value less than the number of interrupts supported.
+If the device driver does not need to know the number of interrupts
+supported, it can set the pointer count to NULL.
+
+4.2.4 pci_disable_msi
 
 void pci_disable_msi(struct pci_dev *dev)
 
 This function should be used to undo the effect of pci_enable_msi() or
-pci_enable_msi_block().  Calling it restores dev->irq to the pin-based
-interrupt number and frees the previously allocated message signaled
-interrupt(s).  The interrupt may subsequently be assigned to another
-device, so drivers should not cache the value of dev->irq.
+pci_enable_msi_block() or pci_enable_msi_block_auto().  Calling it restores
+dev->irq to the pin-based interrupt number and frees the previously
+allocated message signaled interrupt(s).  The interrupt may subsequently be
+assigned to another device, so drivers should not cache the value of
+dev->irq.
 
 Before calling this function, a device driver must always call free_irq()
 on any interrupt for which it previously called request_irq().
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5099636a6e5f..00cc78c7aa04 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -845,6 +845,32 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 }
 EXPORT_SYMBOL(pci_enable_msi_block);
 
+int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
+{
+	int ret, pos, nvec;
+	u16 msgctl;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	if (!pos)
+		return -EINVAL;
+
+	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
+	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
+
+	if (maxvec)
+		*maxvec = ret;
+
+	do {
+		nvec = ret;
+		ret = pci_enable_msi_block(dev, nvec);
+	} while (ret > 0);
+
+	if (ret < 0)
+		return ret;
+	return nvec;
+}
+EXPORT_SYMBOL(pci_enable_msi_block_auto);
+
 void pci_msi_shutdown(struct pci_dev *dev)
 {
 	struct msi_desc *desc;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15472d691ee6..6fa4dd2a3b9e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1101,6 +1101,12 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
 	return -1;
 }
 
+static inline int
+pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
+{
+	return -1;
+}
+
 static inline void pci_msi_shutdown(struct pci_dev *dev)
 { }
 static inline void pci_disable_msi(struct pci_dev *dev)
@@ -1132,6 +1138,7 @@ static inline int pci_msi_enabled(void)
 }
 #else
 extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec);
+extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec);
 extern void pci_msi_shutdown(struct pci_dev *dev);
 extern void pci_disable_msi(struct pci_dev *dev);
 extern int pci_msix_table_size(struct pci_dev *dev);
-- 
cgit v1.2.3


From e2905b29122173b72b612c962b138e3fa07476b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:32 -0800
Subject: workqueue: unexport work_cpu()

This function no longer has any external users.  Unexport it.  It will
be removed later on.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 1 -
 kernel/workqueue.c        | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2b58905d3504..ff68b1d95b41 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -426,7 +426,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
 extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
-extern unsigned int work_cpu(struct work_struct *work);
 extern unsigned int work_busy(struct work_struct *work);
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2ffa240052fa..fe0745f54fcd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -449,6 +449,7 @@ static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
 };
 
 static int worker_thread(void *__worker);
+static unsigned int work_cpu(struct work_struct *work);
 
 static int worker_pool_pri(struct worker_pool *pool)
 {
@@ -3419,13 +3420,12 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  * RETURNS:
  * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
  */
-unsigned int work_cpu(struct work_struct *work)
+static unsigned int work_cpu(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
 
 	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
 }
-EXPORT_SYMBOL_GPL(work_cpu);
 
 /**
  * work_busy - test whether a work is currently pending or running
-- 
cgit v1.2.3


From 715b06b864c99a18cb8368dfb187da4f569788cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: workqueue: introduce WORK_OFFQ_CPU_NONE

Currently, when a work item is off queue, high bits of its data
encodes the last CPU it was on.  This is scheduled to be changed to
pool ID, which will make it impossible to use WORK_CPU_NONE to
indicate no association.

This patch limits the number of bits which are used for off-queue cpu
number to 31 (so that the max fits in an int) and uses the highest
possible value - WORK_OFFQ_CPU_NONE - to indicate no association.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 10 +++++++++-
 kernel/workqueue.c        |  4 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ff68b1d95b41..f8b35763e55f 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -73,13 +73,21 @@ enum {
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
 
+	/*
+	 * When a work item is off queue, its high bits point to the last
+	 * cpu it was on.  Cap at 31 bits and use the highest number to
+	 * indicate that no cpu is associated.
+	 */
 	WORK_OFFQ_FLAG_BITS	= 1,
 	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT,
+	WORK_OFFQ_CPU_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+	WORK_OFFQ_CPU_NONE	= (1LU << WORK_OFFQ_CPU_BITS) - 1,
 
 	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_OFFQ_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1a686e481132..405282d30046 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -573,7 +573,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
 
 	cpu = data >> WORK_OFFQ_CPU_SHIFT;
-	if (cpu == WORK_CPU_NONE)
+	if (cpu == WORK_OFFQ_CPU_NONE)
 		return NULL;
 
 	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
@@ -583,7 +583,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 static void mark_work_canceling(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
-	unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
+	unsigned long cpu = gcwq ? gcwq->cpu : WORK_OFFQ_CPU_NONE;
 
 	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
 		      WORK_STRUCT_PENDING);
-- 
cgit v1.2.3


From 7c3eed5cd60d0f736516e6ade77d90c6255860bd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: workqueue: record pool ID instead of CPU in work->data when off-queue

Currently, when a work item is off-queue, work->data records the CPU
it was last on, which is used to locate the last executing instance
for non-reentrance, flushing, etc.

We're in the process of removing global_cwq and making worker_pool the
top level abstraction.  This patch makes work->data point to the pool
it was last associated with instead of CPU.

After the previous WORK_OFFQ_POOL_CPU and worker_poo->id additions,
the conversion is fairly straight-forward.  WORK_OFFQ constants and
functions are modified to record and read back pool ID instead.
worker_pool_by_id() is added to allow looking up pool from ID.
get_work_pool() replaces get_work_gcwq(), which is reimplemented using
get_work_pool().  get_work_pool_id() replaces work_cpu().

This patch shouldn't introduce any observable behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  18 ++++----
 kernel/workqueue.c        | 111 ++++++++++++++++++++++++++++------------------
 2 files changed, 76 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f8b35763e55f..a94e4e84e7b1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -75,19 +75,19 @@ enum {
 
 	/*
 	 * When a work item is off queue, its high bits point to the last
-	 * cpu it was on.  Cap at 31 bits and use the highest number to
-	 * indicate that no cpu is associated.
+	 * pool it was on.  Cap at 31 bits and use the highest number to
+	 * indicate that no pool is associated.
 	 */
 	WORK_OFFQ_FLAG_BITS	= 1,
-	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
-	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT,
-	WORK_OFFQ_CPU_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
-	WORK_OFFQ_CPU_NONE	= (1LU << WORK_OFFQ_CPU_BITS) - 1,
+	WORK_OFFQ_POOL_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
+	WORK_OFFQ_POOL_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+	WORK_OFFQ_POOL_NONE	= (1LU << WORK_OFFQ_POOL_BITS) - 1,
 
 	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_OFFQ_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+	WORK_STRUCT_NO_POOL	= (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
@@ -103,9 +103,9 @@ struct work_struct {
 #endif
 };
 
-#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
+#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
 #define WORK_DATA_STATIC_INIT()	\
-	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC)
+	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
 
 struct delayed_work {
 	struct work_struct work;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9c6ad974bb9e..a4d7e3f0a874 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -451,7 +451,6 @@ static DEFINE_MUTEX(worker_pool_idr_mutex);
 static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
-static unsigned int work_cpu(struct work_struct *work);
 
 static int std_worker_pool_pri(struct worker_pool *pool)
 {
@@ -479,6 +478,15 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 	return ret;
 }
 
+/*
+ * Lookup worker_pool by id.  The idr currently is built during boot and
+ * never modified.  Don't worry about locking for now.
+ */
+static struct worker_pool *worker_pool_by_id(int pool_id)
+{
+	return idr_find(&worker_pool_idr, pool_id);
+}
+
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
 	int cpu = pool->gcwq->cpu;
@@ -520,17 +528,17 @@ static int work_next_color(int color)
 /*
  * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
  * contain the pointer to the queued cwq.  Once execution starts, the flag
- * is cleared and the high bits contain OFFQ flags and CPU number.
+ * is cleared and the high bits contain OFFQ flags and pool ID.
  *
- * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the cwq, cpu or clear
+ * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the cwq, pool or clear
  * work->data.  These functions should only be called while the work is
  * owned - ie. while the PENDING bit is set.
  *
- * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
- * a work.  gcwq is available once the work has been queued anywhere after
- * initialization until it is sync canceled.  cwq is available only while
- * the work item is queued.
+ * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq
+ * corresponding to a work.  Pool is available once the work has been
+ * queued anywhere after initialization until it is sync canceled.  cwq is
+ * available only while the work item is queued.
  *
  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
  * canceled.  While being canceled, a work item may have its PENDING set
@@ -552,8 +560,8 @@ static void set_work_cwq(struct work_struct *work,
 		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
-static void set_work_cpu_and_clear_pending(struct work_struct *work,
-					   unsigned int cpu)
+static void set_work_pool_and_clear_pending(struct work_struct *work,
+					    int pool_id)
 {
 	/*
 	 * The following wmb is paired with the implied mb in
@@ -562,13 +570,13 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work,
 	 * owner.
 	 */
 	smp_wmb();
-	set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
+	set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
 {
-	smp_wmb();	/* see set_work_cpu_and_clear_pending() */
-	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
+	smp_wmb();	/* see set_work_pool_and_clear_pending() */
+	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
@@ -581,30 +589,58 @@ static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 		return NULL;
 }
 
-static struct global_cwq *get_work_gcwq(struct work_struct *work)
+/**
+ * get_work_pool - return the worker_pool a given work was associated with
+ * @work: the work item of interest
+ *
+ * Return the worker_pool @work was last associated with.  %NULL if none.
+ */
+static struct worker_pool *get_work_pool(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
-	unsigned int cpu;
+	struct worker_pool *pool;
+	int pool_id;
 
 	if (data & WORK_STRUCT_CWQ)
 		return ((struct cpu_workqueue_struct *)
-			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
+			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
 
-	cpu = data >> WORK_OFFQ_CPU_SHIFT;
-	if (cpu == WORK_OFFQ_CPU_NONE)
+	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
+	if (pool_id == WORK_OFFQ_POOL_NONE)
 		return NULL;
 
-	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
-	return get_gcwq(cpu);
+	pool = worker_pool_by_id(pool_id);
+	WARN_ON_ONCE(!pool);
+	return pool;
+}
+
+/**
+ * get_work_pool_id - return the worker pool ID a given work is associated with
+ * @work: the work item of interest
+ *
+ * Return the worker_pool ID @work was last associated with.
+ * %WORK_OFFQ_POOL_NONE if none.
+ */
+static int get_work_pool_id(struct work_struct *work)
+{
+	struct worker_pool *pool = get_work_pool(work);
+
+	return pool ? pool->id : WORK_OFFQ_POOL_NONE;
+}
+
+static struct global_cwq *get_work_gcwq(struct work_struct *work)
+{
+	struct worker_pool *pool = get_work_pool(work);
+
+	return pool ? pool->gcwq : NULL;
 }
 
 static void mark_work_canceling(struct work_struct *work)
 {
-	struct global_cwq *gcwq = get_work_gcwq(work);
-	unsigned long cpu = gcwq ? gcwq->cpu : WORK_OFFQ_CPU_NONE;
+	unsigned long pool_id = get_work_pool_id(work);
 
-	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
-		      WORK_STRUCT_PENDING);
+	pool_id <<= WORK_OFFQ_POOL_SHIFT;
+	set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
 }
 
 static bool work_is_canceling(struct work_struct *work)
@@ -2192,12 +2228,12 @@ __acquires(&gcwq->lock)
 		wake_up_worker(pool);
 
 	/*
-	 * Record the last CPU and clear PENDING which should be the last
+	 * Record the last pool and clear PENDING which should be the last
 	 * update to @work.  Also, do this inside @gcwq->lock so that
 	 * PENDING and queued state changes happen together while IRQ is
 	 * disabled.
 	 */
-	set_work_cpu_and_clear_pending(work, gcwq->cpu);
+	set_work_pool_and_clear_pending(work, pool->id);
 
 	spin_unlock_irq(&gcwq->lock);
 
@@ -2967,7 +3003,8 @@ bool cancel_delayed_work(struct delayed_work *dwork)
 	if (unlikely(ret < 0))
 		return false;
 
-	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
+	set_work_pool_and_clear_pending(&dwork->work,
+					get_work_pool_id(&dwork->work));
 	local_irq_restore(flags);
 	return ret;
 }
@@ -3430,20 +3467,6 @@ bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
-/**
- * work_cpu - return the last known associated cpu for @work
- * @work: the work of interest
- *
- * RETURNS:
- * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
- */
-static unsigned int work_cpu(struct work_struct *work)
-{
-	struct global_cwq *gcwq = get_work_gcwq(work);
-
-	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
-}
-
 /**
  * work_busy - test whether a work is currently pending or running
  * @work: the work to be tested
@@ -3816,9 +3839,9 @@ static int __init init_workqueues(void)
 {
 	unsigned int cpu;
 
-	/* make sure we have enough bits for OFFQ CPU number */
-	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
-		     WORK_CPU_LAST);
+	/* make sure we have enough bits for OFFQ pool ID */
+	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
+		     WORK_CPU_LAST * NR_STD_WORKER_POOLS);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
-- 
cgit v1.2.3


From f2f6c2556dcc432e50003bc8fa4d62d95906f149 Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Fri, 4 Jan 2013 12:30:52 +0530
Subject: clk: add common of_clk_init() function

Modify of_clk_init function so that it will determine which
driver to initialize based on device tree instead of each driver
registering to it.

Based on a similar patch for drivers/irqchip by Thomas Petazzoni and
drivers/clocksource by Stephen Warren.

Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Tested-by: Tony Prisk <linux@prisktech.co.nz>
Tested-by: Pawel Moll <pawel.moll@arm.com>
Tested-by: Rob Herring <rob.herring@calxeda.com>
Tested-by: Josh Cartwright <josh.cartwright@ni.com>
Reviewed-by: Josh Cartwright <josh.cartwright@ni.com>
Acked-by: Maxime Ripard <maxime.ripard@anandra.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
[mturquette@linaro.org: merge conflict from missing CLKSRC_OF_TABLES()]

Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-fixed-rate.c      |  1 +
 drivers/clk/clk.c                 |  9 +++++++++
 include/asm-generic/vmlinux.lds.h | 10 ++++++++++
 include/linux/clk-provider.h      |  6 ++++++
 4 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index a53b53be3d65..dc58fbd8516f 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -101,4 +101,5 @@ void of_fixed_clk_setup(struct device_node *node)
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 }
 EXPORT_SYMBOL_GPL(of_fixed_clk_setup);
+CLK_OF_DECLARE(fixed_clk, "fixed-clock", of_fixed_clk_setup);
 #endif
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index ad2ac94fede8..fabbfe1a9253 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/device.h>
+#include <linux/init.h>
 
 static DEFINE_SPINLOCK(enable_lock);
 static DEFINE_MUTEX(prepare_lock);
@@ -1803,6 +1804,11 @@ struct of_clk_provider {
 	void *data;
 };
 
+extern struct of_device_id __clk_of_table[];
+
+static const struct of_device_id __clk_of_table_sentinel
+	__used __section(__clk_of_table_end);
+
 static LIST_HEAD(of_clk_providers);
 static DEFINE_MUTEX(of_clk_lock);
 
@@ -1931,6 +1937,9 @@ void __init of_clk_init(const struct of_device_id *matches)
 {
 	struct device_node *np;
 
+	if (!matches)
+		matches = __clk_of_table;
+
 	for_each_matching_node(np, matches) {
 		const struct of_device_id *match = of_match_node(matches, np);
 		of_clk_init_cb_t clk_init_cb = match->data;
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d1ea7ce0b4cb..c1fe60ad1540 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -150,6 +150,15 @@
 #endif
 
 
+#ifdef CONFIG_COMMON_CLK
+#define CLK_OF_TABLES() . = ALIGN(8);				\
+			VMLINUX_SYMBOL(__clk_of_table) = .;	\
+			*(__clk_of_table)			\
+			*(__clk_of_table_end)
+#else
+#define CLK_OF_TABLES()
+#endif
+
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
 	VMLINUX_SYMBOL(__dtb_start) = .;				\
@@ -493,6 +502,7 @@
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)					\
+	CLK_OF_TABLES()							\
 	KERNEL_DTB()
 
 #define INIT_TEXT							\
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 4989b8a7bed1..7f197d7addb0 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -379,7 +379,13 @@ struct clk_onecell_data {
 };
 struct clk *of_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data);
 const char *of_clk_get_parent_name(struct device_node *np, int index);
+
 void of_clk_init(const struct of_device_id *matches);
 
+#define CLK_OF_DECLARE(name, compat, fn)			\
+	static const struct of_device_id __clk_of_table_##name	\
+		__used __section(__clk_of_table)		\
+		= { .compatible = compat, .data = fn };
+
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PROVIDER_H */
-- 
cgit v1.2.3


From 2d7ebbb0946e9e13285eee348df1dbc48f0580e0 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 24 Jan 2013 11:00:15 +0200
Subject: usb: gadget: completely remove ->start/->stop

Those have been deprecated for a long time and
previous patches just converted all remaining
users of those.

Since there are no in-tree users and we don't
want any new users for them, let's obliterate
every piece of code related to those calls.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/udc-core.c | 89 +++++++------------------------------------
 include/linux/usb/gadget.h    |  6 ---
 2 files changed, 14 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index e7c591621a3b..2a9cd369f71c 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -101,28 +101,6 @@ EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
 
 /* ------------------------------------------------------------------------- */
 
-/**
- * usb_gadget_start - tells usb device controller to start up
- * @gadget: The gadget we want to get started
- * @driver: The driver we want to bind to @gadget
- * @bind: The bind function for @driver
- *
- * This call is issued by the UDC Class driver when it's about
- * to register a gadget driver to the device controller, before
- * calling gadget driver's bind() method.
- *
- * It allows the controller to be powered off until strictly
- * necessary to have it powered on.
- *
- * Returns zero on success, else negative errno.
- */
-static inline int usb_gadget_start(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver,
-		int (*bind)(struct usb_gadget *, struct usb_gadget_driver *))
-{
-	return gadget->ops->start(driver, bind);
-}
-
 /**
  * usb_gadget_udc_start - tells usb device controller to start up
  * @gadget: The gadget we want to get started
@@ -143,24 +121,6 @@ static inline int usb_gadget_udc_start(struct usb_gadget *gadget,
 	return gadget->ops->udc_start(gadget, driver);
 }
 
-/**
- * usb_gadget_stop - tells usb device controller we don't need it anymore
- * @gadget: The device we want to stop activity
- * @driver: The driver to unbind from @gadget
- *
- * This call is issued by the UDC Class driver after calling
- * gadget driver's unbind() method.
- *
- * The details are implementation specific, but it can go as
- * far as powering off UDC completely and disable its data
- * line pullups.
- */
-static inline void usb_gadget_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
-{
-	gadget->ops->stop(driver);
-}
-
 /**
  * usb_gadget_udc_stop - tells usb device controller we don't need it anymore
  * @gadget: The device we want to stop activity
@@ -246,14 +206,6 @@ err1:
 }
 EXPORT_SYMBOL_GPL(usb_add_gadget_udc);
 
-static int udc_is_newstyle(struct usb_udc *udc)
-{
-	if (udc->gadget->ops->udc_start && udc->gadget->ops->udc_stop)
-		return 1;
-	return 0;
-}
-
-
 static void usb_gadget_remove_driver(struct usb_udc *udc)
 {
 	dev_dbg(&udc->dev, "unregistering UDC driver [%s]\n",
@@ -261,14 +213,10 @@ static void usb_gadget_remove_driver(struct usb_udc *udc)
 
 	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
 
-	if (udc_is_newstyle(udc)) {
-		usb_gadget_disconnect(udc->gadget);
-		udc->driver->disconnect(udc->gadget);
-		udc->driver->unbind(udc->gadget);
-		usb_gadget_udc_stop(udc->gadget, udc->driver);
-	} else {
-		usb_gadget_stop(udc->gadget, udc->driver);
-	}
+	usb_gadget_disconnect(udc->gadget);
+	udc->driver->disconnect(udc->gadget);
+	udc->driver->unbind(udc->gadget);
+	usb_gadget_udc_stop(udc->gadget, udc->driver);
 
 	udc->driver = NULL;
 	udc->dev.driver = NULL;
@@ -321,22 +269,15 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri
 	udc->driver = driver;
 	udc->dev.driver = &driver->driver;
 
-	if (udc_is_newstyle(udc)) {
-		ret = driver->bind(udc->gadget, driver);
-		if (ret)
-			goto err1;
-		ret = usb_gadget_udc_start(udc->gadget, driver);
-		if (ret) {
-			driver->unbind(udc->gadget);
-			goto err1;
-		}
-		usb_gadget_connect(udc->gadget);
-	} else {
-
-		ret = usb_gadget_start(udc->gadget, driver, driver->bind);
-		if (ret)
-			goto err1;
+	ret = driver->bind(udc->gadget, driver);
+	if (ret)
+		goto err1;
+	ret = usb_gadget_udc_start(udc->gadget, driver);
+	if (ret) {
+		driver->unbind(udc->gadget);
+		goto err1;
 	}
+	usb_gadget_connect(udc->gadget);
 
 	kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
 	return 0;
@@ -440,13 +381,11 @@ static ssize_t usb_udc_softconn_store(struct device *dev,
 	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
 
 	if (sysfs_streq(buf, "connect")) {
-		if (udc_is_newstyle(udc))
-			usb_gadget_udc_start(udc->gadget, udc->driver);
+		usb_gadget_udc_start(udc->gadget, udc->driver);
 		usb_gadget_connect(udc->gadget);
 	} else if (sysfs_streq(buf, "disconnect")) {
 		usb_gadget_disconnect(udc->gadget);
-		if (udc_is_newstyle(udc))
-			usb_gadget_udc_stop(udc->gadget, udc->driver);
+		usb_gadget_udc_stop(udc->gadget, udc->driver);
 	} else {
 		dev_err(dev, "unsupported command '%s'\n", buf);
 		return -EINVAL;
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index e4c119ee4ebe..2e297e80d59a 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -471,12 +471,6 @@ struct usb_gadget_ops {
 			struct usb_gadget_driver *);
 	int	(*udc_stop)(struct usb_gadget *,
 			struct usb_gadget_driver *);
-
-	/* Those two are deprecated */
-	int	(*start)(struct usb_gadget_driver *,
-			int (*bind)(struct usb_gadget *,
-				struct usb_gadget_driver *driver));
-	int	(*stop)(struct usb_gadget_driver *);
 };
 
 /**
-- 
cgit v1.2.3


From 85a181986c9cf8bbd2c4f2fb6f2add7ac5db1f76 Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Fri, 4 Jan 2013 12:30:54 +0530
Subject: clk: sunxi: Use common of_clk_init() function

Use common of_clk_init() function to initialize clocks.

Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Acked-by: Maxime Ripard <maxime.ripard@anandra.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-sunxi.c           | 30 ------------------------------
 drivers/clocksource/sunxi_timer.c |  4 ++--
 include/linux/clk/sunxi.h         | 22 ----------------------
 3 files changed, 2 insertions(+), 54 deletions(-)
 delete mode 100644 drivers/clk/clk-sunxi.c
 delete mode 100644 include/linux/clk/sunxi.h

(limited to 'include/linux')

diff --git a/drivers/clk/clk-sunxi.c b/drivers/clk/clk-sunxi.c
deleted file mode 100644
index 0e831b584ba7..000000000000
--- a/drivers/clk/clk-sunxi.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2012 Maxime Ripard
- *
- * Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/clk-provider.h>
-#include <linux/clkdev.h>
-#include <linux/clk/sunxi.h>
-#include <linux/of.h>
-
-static const __initconst struct of_device_id clk_match[] = {
-	{ .compatible = "fixed-clock", .data = of_fixed_clk_setup, },
-	{}
-};
-
-void __init sunxi_init_clocks(void)
-{
-	of_clk_init(clk_match);
-}
diff --git a/drivers/clocksource/sunxi_timer.c b/drivers/clocksource/sunxi_timer.c
index 3cd1bd3d7aee..93d09d0e009f 100644
--- a/drivers/clocksource/sunxi_timer.c
+++ b/drivers/clocksource/sunxi_timer.c
@@ -23,7 +23,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/sunxi_timer.h>
-#include <linux/clk/sunxi.h>
+#include <linux/clk-provider.h>
 
 #define TIMER_CTL_REG		0x00
 #define TIMER_CTL_ENABLE		(1 << 0)
@@ -124,7 +124,7 @@ static void __init sunxi_timer_init(void)
 	if (irq <= 0)
 		panic("Can't parse IRQ");
 
-	sunxi_init_clocks();
+	of_clk_init(NULL);
 
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk))
diff --git a/include/linux/clk/sunxi.h b/include/linux/clk/sunxi.h
deleted file mode 100644
index e074fdd5a236..000000000000
--- a/include/linux/clk/sunxi.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 2012 Maxime Ripard
- *
- * Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __LINUX_CLK_SUNXI_H_
-#define __LINUX_CLK_SUNXI_H_
-
-void __init sunxi_init_clocks(void);
-
-#endif
-- 
cgit v1.2.3


From ace783b9bbfa2182b4a561498db3f09a0c56bc79 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Thu, 24 Jan 2013 14:30:48 +0800
Subject: sched: split out css_online/css_offline from tg creation/destruction

This is a preparaton for later patches.

- What do we gain from cpu_cgroup_css_online():

After ss->css_alloc() and before ss->css_online(), there's a small
window that tg->css.cgroup is NULL. With this change, tg won't be seen
before ss->css_online(), where it's added to the global list, so we're
guaranteed we'll never see NULL tg->css.cgroup.

- What do we gain from cpu_cgroup_css_offline():

tg is freed via RCU, so is cgroup. Without this change, This is how
synchronization works:

cgroup_rmdir()
  no ss->css_offline()
diput()
  syncornize_rcu()
  ss->css_free()       <-- unregister tg, and free it via call_rcu()
  kfree_rcu(cgroup)    <-- wait possible refs to cgroup, and free cgroup

We can't just kfree(cgroup), because tg might access tg->css.cgroup.

With this change:

cgroup_rmdir()
  ss->css_offline()    <-- unregister tg
diput()
  synchronize_rcu()    <-- wait possible refs to tg and cgroup
  ss->css_free()       <-- free tg
  kfree_rcu(cgroup)    <-- free cgroup

As you see, kfree_rcu() is redundant now.

Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h     |  3 +++
 kernel/sched/auto_group.c |  3 +++
 kernel/sched/core.c       | 49 +++++++++++++++++++++++++++++++++++++----------
 3 files changed, 45 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 206bb089c06b..577eb973de7a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2750,7 +2750,10 @@ extern void normalize_rt_tasks(void);
 extern struct task_group root_task_group;
 
 extern struct task_group *sched_create_group(struct task_group *parent);
+extern void sched_online_group(struct task_group *tg,
+			       struct task_group *parent);
 extern void sched_destroy_group(struct task_group *tg);
+extern void sched_offline_group(struct task_group *tg);
 extern void sched_move_task(struct task_struct *tsk);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 0984a21076a3..64de5f8b0c9e 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref)
 	ag->tg->rt_se = NULL;
 	ag->tg->rt_rq = NULL;
 #endif
+	sched_offline_group(ag->tg);
 	sched_destroy_group(ag->tg);
 }
 
@@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void)
 	if (IS_ERR(tg))
 		goto out_free;
 
+	sched_online_group(tg, &root_task_group);
+
 	kref_init(&ag->kref);
 	init_rwsem(&ag->lock);
 	ag->id = atomic_inc_return(&autogroup_seq_nr);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..106167243d68 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7159,7 +7159,6 @@ static void free_sched_group(struct task_group *tg)
 struct task_group *sched_create_group(struct task_group *parent)
 {
 	struct task_group *tg;
-	unsigned long flags;
 
 	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
 	if (!tg)
@@ -7171,6 +7170,17 @@ struct task_group *sched_create_group(struct task_group *parent)
 	if (!alloc_rt_sched_group(tg, parent))
 		goto err;
 
+	return tg;
+
+err:
+	free_sched_group(tg);
+	return ERR_PTR(-ENOMEM);
+}
+
+void sched_online_group(struct task_group *tg, struct task_group *parent)
+{
+	unsigned long flags;
+
 	spin_lock_irqsave(&task_group_lock, flags);
 	list_add_rcu(&tg->list, &task_groups);
 
@@ -7180,12 +7190,6 @@ struct task_group *sched_create_group(struct task_group *parent)
 	INIT_LIST_HEAD(&tg->children);
 	list_add_rcu(&tg->siblings, &parent->children);
 	spin_unlock_irqrestore(&task_group_lock, flags);
-
-	return tg;
-
-err:
-	free_sched_group(tg);
-	return ERR_PTR(-ENOMEM);
 }
 
 /* rcu callback to free various structures associated with a task group */
@@ -7197,6 +7201,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
 
 /* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
+{
+	/* wait for possible concurrent references to cfs_rqs complete */
+	call_rcu(&tg->rcu, free_sched_group_rcu);
+}
+
+void sched_offline_group(struct task_group *tg)
 {
 	unsigned long flags;
 	int i;
@@ -7209,9 +7219,6 @@ void sched_destroy_group(struct task_group *tg)
 	list_del_rcu(&tg->list);
 	list_del_rcu(&tg->siblings);
 	spin_unlock_irqrestore(&task_group_lock, flags);
-
-	/* wait for possible concurrent references to cfs_rqs complete */
-	call_rcu(&tg->rcu, free_sched_group_rcu);
 }
 
 /* change task's runqueue when it moves between groups.
@@ -7563,6 +7570,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
 	return &tg->css;
 }
 
+static int cpu_cgroup_css_online(struct cgroup *cgrp)
+{
+	struct task_group *tg = cgroup_tg(cgrp);
+	struct task_group *parent;
+
+	if (!cgrp->parent)
+		return 0;
+
+	parent = cgroup_tg(cgrp->parent);
+	sched_online_group(tg, parent);
+	return 0;
+}
+
 static void cpu_cgroup_css_free(struct cgroup *cgrp)
 {
 	struct task_group *tg = cgroup_tg(cgrp);
@@ -7570,6 +7590,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
 	sched_destroy_group(tg);
 }
 
+static void cpu_cgroup_css_offline(struct cgroup *cgrp)
+{
+	struct task_group *tg = cgroup_tg(cgrp);
+
+	sched_offline_group(tg);
+}
+
 static int cpu_cgroup_can_attach(struct cgroup *cgrp,
 				 struct cgroup_taskset *tset)
 {
@@ -7925,6 +7952,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.name		= "cpu",
 	.css_alloc	= cpu_cgroup_css_alloc,
 	.css_free	= cpu_cgroup_css_free,
+	.css_online	= cpu_cgroup_css_online,
+	.css_offline	= cpu_cgroup_css_offline,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,
 	.exit		= cpu_cgroup_exit,
-- 
cgit v1.2.3


From be44562613851235d801d41d5b3976dc4333f622 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Thu, 24 Jan 2013 14:31:42 +0800
Subject: cgroup: remove synchronize_rcu() from cgroup_diput()

Free cgroup via call_rcu(). The actual work is done through
workqueue.

Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 72 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 44 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 8118a3120378..900af5964f55 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -203,6 +203,7 @@ struct cgroup {
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
+	struct work_struct free_work;
 
 	/* List of events which userspace want to receive */
 	struct list_head event_list;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index af993919aa04..02e4f201472e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -852,12 +852,52 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
 	return inode;
 }
 
+static void cgroup_free_fn(struct work_struct *work)
+{
+	struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
+	struct cgroup_subsys *ss;
+
+	mutex_lock(&cgroup_mutex);
+	/*
+	 * Release the subsystem state objects.
+	 */
+	for_each_subsys(cgrp->root, ss)
+		ss->css_free(cgrp);
+
+	cgrp->root->number_of_cgroups--;
+	mutex_unlock(&cgroup_mutex);
+
+	/*
+	 * Drop the active superblock reference that we took when we
+	 * created the cgroup
+	 */
+	deactivate_super(cgrp->root->sb);
+
+	/*
+	 * if we're getting rid of the cgroup, refcount should ensure
+	 * that there are no pidlists left.
+	 */
+	BUG_ON(!list_empty(&cgrp->pidlists));
+
+	simple_xattrs_free(&cgrp->xattrs);
+
+	ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
+	kfree(cgrp);
+}
+
+static void cgroup_free_rcu(struct rcu_head *head)
+{
+	struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
+
+	schedule_work(&cgrp->free_work);
+}
+
 static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 {
 	/* is dentry a directory ? if so, kfree() associated cgroup */
 	if (S_ISDIR(inode->i_mode)) {
 		struct cgroup *cgrp = dentry->d_fsdata;
-		struct cgroup_subsys *ss;
+
 		BUG_ON(!(cgroup_is_removed(cgrp)));
 		/* It's possible for external users to be holding css
 		 * reference counts on a cgroup; css_put() needs to
@@ -865,34 +905,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 		 * the reference count in order to know if it needs to
 		 * queue the cgroup to be handled by the release
 		 * agent */
-		synchronize_rcu();
-
-		mutex_lock(&cgroup_mutex);
-		/*
-		 * Release the subsystem state objects.
-		 */
-		for_each_subsys(cgrp->root, ss)
-			ss->css_free(cgrp);
-
-		cgrp->root->number_of_cgroups--;
-		mutex_unlock(&cgroup_mutex);
-
-		/*
-		 * Drop the active superblock reference that we took when we
-		 * created the cgroup
-		 */
-		deactivate_super(cgrp->root->sb);
-
-		/*
-		 * if we're getting rid of the cgroup, refcount should ensure
-		 * that there are no pidlists left.
-		 */
-		BUG_ON(!list_empty(&cgrp->pidlists));
-
-		simple_xattrs_free(&cgrp->xattrs);
-
-		ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
-		kfree(cgrp);
+		call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
 	} else {
 		struct cfent *cfe = __d_cfe(dentry);
 		struct cgroup *cgrp = dentry->d_parent->d_fsdata;
@@ -1391,6 +1404,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->allcg_node);
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
+	INIT_WORK(&cgrp->free_work, cgroup_free_fn);
 	mutex_init(&cgrp->pidlist_mutex);
 	INIT_LIST_HEAD(&cgrp->event_list);
 	spin_lock_init(&cgrp->event_list_lock);
-- 
cgit v1.2.3


From 6543becf26fff612cdadeed7250ccc8d49f67f27 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@linux-m68k.org>
Date: Sun, 20 Jan 2013 17:58:47 +0100
Subject: mod/file2alias: make modalias generation safe for cross compiling

Use the target compiler to compute the offsets for the fields of the
device_id structures, so that it won't be broken by different alignments
between the host and target ABIs.

This also fixes missing endian corrections for some modaliases.

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 Makefile                          |   1 -
 include/linux/mod_devicetable.h   |  58 +---
 scripts/Makefile.modpost          |   7 +-
 scripts/mod/.gitignore            |   1 +
 scripts/mod/Makefile              |  35 ++
 scripts/mod/devicetable-offsets.c | 178 ++++++++++
 scripts/mod/file2alias.c          | 674 +++++++++++++++++++++-----------------
 scripts/mod/modpost.c             |   5 +-
 8 files changed, 591 insertions(+), 368 deletions(-)
 create mode 100644 scripts/mod/devicetable-offsets.c

(limited to 'include/linux')

diff --git a/Makefile b/Makefile
index 275b9567382c..5812ec9a56b7 100644
--- a/Makefile
+++ b/Makefile
@@ -191,7 +191,6 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
 # "make" in the configured kernel build directory always uses that.
 # Default value for CROSS_COMPILE is not to prefix executables
 # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
-export KBUILD_BUILDHOST := $(SUBARCH)
 ARCH		?= $(SUBARCH)
 CROSS_COMPILE	?= $(CONFIG_CROSS_COMPILE:"%"=%)
 
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index fed3def62818..779cf7c4a3d1 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -33,8 +33,7 @@ struct ieee1394_device_id {
 	__u32 model_id;
 	__u32 specifier_id;
 	__u32 version;
-	kernel_ulong_t driver_data
-		__attribute__((aligned(sizeof(kernel_ulong_t))));
+	kernel_ulong_t driver_data;
 };
 
 
@@ -148,8 +147,7 @@ struct hid_device_id {
 	__u16 group;
 	__u32 vendor;
 	__u32 product;
-	kernel_ulong_t driver_data
-		__attribute__((aligned(sizeof(kernel_ulong_t))));
+	kernel_ulong_t driver_data;
 };
 
 /* s390 CCW devices */
@@ -173,8 +171,6 @@ struct ccw_device_id {
 struct ap_device_id {
 	__u16 match_flags;	/* which fields to match against */
 	__u8 dev_type;		/* device type */
-	__u8 pad1;
-	__u32 pad2;
 	kernel_ulong_t driver_info;
 };
 
@@ -184,13 +180,10 @@ struct ap_device_id {
 struct css_device_id {
 	__u8 match_flags;
 	__u8 type; /* subchannel type */
-	__u16 pad2;
-	__u32 pad3;
 	kernel_ulong_t driver_data;
 };
 
-#define ACPI_ID_LEN	16 /* only 9 bytes needed here, 16 bytes are used */
-			   /* to workaround crosscompile issues */
+#define ACPI_ID_LEN	9
 
 struct acpi_device_id {
 	__u8 id[ACPI_ID_LEN];
@@ -231,11 +224,7 @@ struct of_device_id
 	char	name[32];
 	char	type[32];
 	char	compatible[128];
-#ifdef __KERNEL__
 	const void *data;
-#else
-	kernel_ulong_t data;
-#endif
 };
 
 /* VIO */
@@ -260,24 +249,14 @@ struct pcmcia_device_id {
 	/* for pseudo multi-function devices */
 	__u8  		device_no;
 
-	__u32 		prod_id_hash[4]
-		__attribute__((aligned(sizeof(__u32))));
+	__u32 		prod_id_hash[4];
 
 	/* not matched against in kernelspace*/
-#ifdef __KERNEL__
 	const char *	prod_id[4];
-#else
-	kernel_ulong_t	prod_id[4]
-		__attribute__((aligned(sizeof(kernel_ulong_t))));
-#endif
 
 	/* not matched against */
 	kernel_ulong_t	driver_info;
-#ifdef __KERNEL__
 	char *		cisfile;
-#else
-	kernel_ulong_t	cisfile;
-#endif
 };
 
 #define PCMCIA_DEV_ID_MATCH_MANF_ID	0x0001
@@ -373,8 +352,7 @@ struct sdio_device_id {
 	__u8	class;			/* Standard interface or SDIO_ANY_ID */
 	__u16	vendor;			/* Vendor or SDIO_ANY_ID */
 	__u16	device;			/* Device ID or SDIO_ANY_ID */
-	kernel_ulong_t driver_data	/* Data private to the driver */
-		__attribute__((aligned(sizeof(kernel_ulong_t))));
+	kernel_ulong_t driver_data;	/* Data private to the driver */
 };
 
 /* SSB core, see drivers/ssb/ */
@@ -420,8 +398,7 @@ struct virtio_device_id {
  */
 struct hv_vmbus_device_id {
 	__u8 guid[16];
-	kernel_ulong_t driver_data	/* Data private to the driver */
-			__attribute__((aligned(sizeof(kernel_ulong_t))));
+	kernel_ulong_t driver_data;	/* Data private to the driver */
 };
 
 /* rpmsg */
@@ -440,8 +417,7 @@ struct rpmsg_device_id {
 
 struct i2c_device_id {
 	char name[I2C_NAME_SIZE];
-	kernel_ulong_t driver_data	/* Data private to the driver */
-			__attribute__((aligned(sizeof(kernel_ulong_t))));
+	kernel_ulong_t driver_data;	/* Data private to the driver */
 };
 
 /* spi */
@@ -451,8 +427,7 @@ struct i2c_device_id {
 
 struct spi_device_id {
 	char name[SPI_NAME_SIZE];
-	kernel_ulong_t driver_data	/* Data private to the driver */
-			__attribute__((aligned(sizeof(kernel_ulong_t))));
+	kernel_ulong_t driver_data;	/* Data private to the driver */
 };
 
 /* dmi */
@@ -484,15 +459,6 @@ struct dmi_strmatch {
 	char substr[79];
 };
 
-#ifndef __KERNEL__
-struct dmi_system_id {
-	kernel_ulong_t callback;
-	kernel_ulong_t ident;
-	struct dmi_strmatch matches[4];
-	kernel_ulong_t driver_data
-			__attribute__((aligned(sizeof(kernel_ulong_t))));
-};
-#else
 struct dmi_system_id {
 	int (*callback)(const struct dmi_system_id *);
 	const char *ident;
@@ -506,7 +472,6 @@ struct dmi_system_id {
  *	error: storage size of '__mod_dmi_device_table' isn't known
  */
 #define dmi_device_id dmi_system_id
-#endif
 
 #define DMI_MATCH(a, b)	{ a, b }
 
@@ -515,8 +480,7 @@ struct dmi_system_id {
 
 struct platform_device_id {
 	char name[PLATFORM_NAME_SIZE];
-	kernel_ulong_t driver_data
-			__attribute__((aligned(sizeof(kernel_ulong_t))));
+	kernel_ulong_t driver_data;
 };
 
 #define MDIO_MODULE_PREFIX	"mdio:"
@@ -572,11 +536,7 @@ struct isapnp_device_id {
 struct amba_id {
 	unsigned int		id;
 	unsigned int		mask;
-#ifndef __KERNEL__
-	kernel_ulong_t		data;
-#else
 	void			*data;
-#endif
 };
 
 /*
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index a1cb0222ebe6..cf82c832458f 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -66,10 +66,6 @@ modules   := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o)))
 # Stop after building .o files if NOFINAL is set. Makes compile tests quicker
 _modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules))
 
-ifneq ($(KBUILD_BUILDHOST),$(ARCH))
-        cross_build := 1
-endif
-
 # Step 2), invoke modpost
 #  Includes step 3,4
 modpost = scripts/mod/modpost                    \
@@ -80,8 +76,7 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
  $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S)      \
- $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \
- $(if $(cross_build),-c)
+ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
 
 quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
       cmd_modpost = $(modpost) -s
diff --git a/scripts/mod/.gitignore b/scripts/mod/.gitignore
index e9b7abe7b95b..33bae0df4de5 100644
--- a/scripts/mod/.gitignore
+++ b/scripts/mod/.gitignore
@@ -1,4 +1,5 @@
 elfconfig.h
 mk_elfconfig
 modpost
+devicetable-offsets.h
 
diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index ff954f8168c1..9415b5663364 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -3,9 +3,44 @@ always		:= $(hostprogs-y) empty.o
 
 modpost-objs	:= modpost.o file2alias.o sumversion.o
 
+devicetable-offsets-file := devicetable-offsets.h
+
+define sed-y
+	"/^->/{s:->#\(.*\):/* \1 */:; \
+	s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
+	s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
+	s:->::; p;}"
+endef
+
+quiet_cmd_offsets = GEN     $@
+define cmd_offsets
+	(set -e; \
+	 echo "#ifndef __DEVICEVTABLE_OFFSETS_H__"; \
+	 echo "#define __DEVICEVTABLE_OFFSETS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Kbuild"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
+
+# We use internal kbuild rules to avoid the "is up to date" message from make
+scripts/mod/devicetable-offsets.s: scripts/mod/devicetable-offsets.c FORCE
+	$(Q)mkdir -p $(dir $@)
+	$(call if_changed_dep,cc_s_c)
+
+$(obj)/$(devicetable-offsets-file): scripts/mod/devicetable-offsets.s
+	$(call cmd,offsets)
+
 # dependencies on generated files need to be listed explicitly
 
 $(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
+$(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
 
 quiet_cmd_elfconfig = MKELF   $@
       cmd_elfconfig = $(obj)/mk_elfconfig < $< > $@
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
new file mode 100644
index 000000000000..b45260bfeaa0
--- /dev/null
+++ b/scripts/mod/devicetable-offsets.c
@@ -0,0 +1,178 @@
+#include <linux/kbuild.h>
+#include <linux/mod_devicetable.h>
+
+#define DEVID(devid) DEFINE(SIZE_##devid, sizeof(struct devid))
+#define DEVID_FIELD(devid, field) \
+	DEFINE(OFF_##devid##_##field, offsetof(struct devid, field))
+
+int main(void)
+{
+	DEVID(usb_device_id);
+	DEVID_FIELD(usb_device_id, match_flags);
+	DEVID_FIELD(usb_device_id, idVendor);
+	DEVID_FIELD(usb_device_id, idProduct);
+	DEVID_FIELD(usb_device_id, bcdDevice_lo);
+	DEVID_FIELD(usb_device_id, bcdDevice_hi);
+	DEVID_FIELD(usb_device_id, bDeviceClass);
+	DEVID_FIELD(usb_device_id, bDeviceSubClass);
+	DEVID_FIELD(usb_device_id, bDeviceProtocol);
+	DEVID_FIELD(usb_device_id, bInterfaceClass);
+	DEVID_FIELD(usb_device_id, bInterfaceSubClass);
+	DEVID_FIELD(usb_device_id, bInterfaceProtocol);
+	DEVID_FIELD(usb_device_id, bInterfaceNumber);
+
+	DEVID(hid_device_id);
+	DEVID_FIELD(hid_device_id, bus);
+	DEVID_FIELD(hid_device_id, group);
+	DEVID_FIELD(hid_device_id, vendor);
+	DEVID_FIELD(hid_device_id, product);
+
+	DEVID(ieee1394_device_id);
+	DEVID_FIELD(ieee1394_device_id, match_flags);
+	DEVID_FIELD(ieee1394_device_id, vendor_id);
+	DEVID_FIELD(ieee1394_device_id, model_id);
+	DEVID_FIELD(ieee1394_device_id, specifier_id);
+	DEVID_FIELD(ieee1394_device_id, version);
+
+	DEVID(pci_device_id);
+	DEVID_FIELD(pci_device_id, vendor);
+	DEVID_FIELD(pci_device_id, device);
+	DEVID_FIELD(pci_device_id, subvendor);
+	DEVID_FIELD(pci_device_id, subdevice);
+	DEVID_FIELD(pci_device_id, class);
+	DEVID_FIELD(pci_device_id, class_mask);
+
+	DEVID(ccw_device_id);
+	DEVID_FIELD(ccw_device_id, match_flags);
+	DEVID_FIELD(ccw_device_id, cu_type);
+	DEVID_FIELD(ccw_device_id, cu_model);
+	DEVID_FIELD(ccw_device_id, dev_type);
+	DEVID_FIELD(ccw_device_id, dev_model);
+
+	DEVID(ap_device_id);
+	DEVID_FIELD(ap_device_id, dev_type);
+
+	DEVID(css_device_id);
+	DEVID_FIELD(css_device_id, type);
+
+	DEVID(serio_device_id);
+	DEVID_FIELD(serio_device_id, type);
+	DEVID_FIELD(serio_device_id, proto);
+	DEVID_FIELD(serio_device_id, id);
+	DEVID_FIELD(serio_device_id, extra);
+
+	DEVID(acpi_device_id);
+	DEVID_FIELD(acpi_device_id, id);
+
+	DEVID(pnp_device_id);
+	DEVID_FIELD(pnp_device_id, id);
+
+	DEVID(pnp_card_device_id);
+	DEVID_FIELD(pnp_card_device_id, devs);
+
+	DEVID(pcmcia_device_id);
+	DEVID_FIELD(pcmcia_device_id, match_flags);
+	DEVID_FIELD(pcmcia_device_id, manf_id);
+	DEVID_FIELD(pcmcia_device_id, card_id);
+	DEVID_FIELD(pcmcia_device_id, func_id);
+	DEVID_FIELD(pcmcia_device_id, function);
+	DEVID_FIELD(pcmcia_device_id, device_no);
+	DEVID_FIELD(pcmcia_device_id, prod_id_hash);
+
+	DEVID(of_device_id);
+	DEVID_FIELD(of_device_id, name);
+	DEVID_FIELD(of_device_id, type);
+	DEVID_FIELD(of_device_id, compatible);
+
+	DEVID(vio_device_id);
+	DEVID_FIELD(vio_device_id, type);
+	DEVID_FIELD(vio_device_id, compat);
+
+	DEVID(input_device_id);
+	DEVID_FIELD(input_device_id, flags);
+	DEVID_FIELD(input_device_id, bustype);
+	DEVID_FIELD(input_device_id, vendor);
+	DEVID_FIELD(input_device_id, product);
+	DEVID_FIELD(input_device_id, version);
+	DEVID_FIELD(input_device_id, evbit);
+	DEVID_FIELD(input_device_id, keybit);
+	DEVID_FIELD(input_device_id, relbit);
+	DEVID_FIELD(input_device_id, absbit);
+	DEVID_FIELD(input_device_id, mscbit);
+	DEVID_FIELD(input_device_id, ledbit);
+	DEVID_FIELD(input_device_id, sndbit);
+	DEVID_FIELD(input_device_id, ffbit);
+	DEVID_FIELD(input_device_id, swbit);
+
+	DEVID(eisa_device_id);
+	DEVID_FIELD(eisa_device_id, sig);
+
+	DEVID(parisc_device_id);
+	DEVID_FIELD(parisc_device_id, hw_type);
+	DEVID_FIELD(parisc_device_id, hversion);
+	DEVID_FIELD(parisc_device_id, hversion_rev);
+	DEVID_FIELD(parisc_device_id, sversion);
+
+	DEVID(sdio_device_id);
+	DEVID_FIELD(sdio_device_id, class);
+	DEVID_FIELD(sdio_device_id, vendor);
+	DEVID_FIELD(sdio_device_id, device);
+
+	DEVID(ssb_device_id);
+	DEVID_FIELD(ssb_device_id, vendor);
+	DEVID_FIELD(ssb_device_id, coreid);
+	DEVID_FIELD(ssb_device_id, revision);
+
+	DEVID(bcma_device_id);
+	DEVID_FIELD(bcma_device_id, manuf);
+	DEVID_FIELD(bcma_device_id, id);
+	DEVID_FIELD(bcma_device_id, rev);
+	DEVID_FIELD(bcma_device_id, class);
+
+	DEVID(virtio_device_id);
+	DEVID_FIELD(virtio_device_id, device);
+	DEVID_FIELD(virtio_device_id, vendor);
+
+	DEVID(hv_vmbus_device_id);
+	DEVID_FIELD(hv_vmbus_device_id, guid);
+
+	DEVID(i2c_device_id);
+	DEVID_FIELD(i2c_device_id, name);
+
+	DEVID(spi_device_id);
+	DEVID_FIELD(spi_device_id, name);
+
+	DEVID(dmi_system_id);
+	DEVID_FIELD(dmi_system_id, matches);
+
+	DEVID(platform_device_id);
+	DEVID_FIELD(platform_device_id, name);
+
+	DEVID(mdio_device_id);
+	DEVID_FIELD(mdio_device_id, phy_id);
+	DEVID_FIELD(mdio_device_id, phy_id_mask);
+
+	DEVID(zorro_device_id);
+	DEVID_FIELD(zorro_device_id, id);
+
+	DEVID(isapnp_device_id);
+	DEVID_FIELD(isapnp_device_id, vendor);
+	DEVID_FIELD(isapnp_device_id, function);
+
+	DEVID(ipack_device_id);
+	DEVID_FIELD(ipack_device_id, format);
+	DEVID_FIELD(ipack_device_id, vendor);
+	DEVID_FIELD(ipack_device_id, device);
+
+	DEVID(amba_id);
+	DEVID_FIELD(amba_id, id);
+	DEVID_FIELD(amba_id, mask);
+
+	DEVID(x86_cpu_id);
+	DEVID_FIELD(x86_cpu_id, feature);
+	DEVID_FIELD(x86_cpu_id, family);
+	DEVID_FIELD(x86_cpu_id, model);
+	DEVID_FIELD(x86_cpu_id, vendor);
+
+	return 0;
+}
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index df4fc23dd836..771ac17f635d 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -11,6 +11,7 @@
  */
 
 #include "modpost.h"
+#include "devicetable-offsets.h"
 
 /* We use the ELF typedefs for kernel_ulong_t but bite the bullet and
  * use either stdint.h or inttypes.h for the rest. */
@@ -84,13 +85,25 @@ extern struct devtable *__start___devtable[], *__stop___devtable[];
 # define __used			__attribute__((__used__))
 #endif
 
+/* Define a variable f that holds the value of field f of struct devid
+ * based at address m.
+ */
+#define DEF_FIELD(m, devid, f) \
+	typeof(((struct devid *)0)->f) f = TO_NATIVE(*(typeof(f) *)((m) + OFF_##devid##_##f))
+/* Define a variable f that holds the address of field f of struct devid
+ * based at address m.  Due to the way typeof works, for a field of type
+ * T[N] the variable has type T(*)[N], _not_ T*.
+ */
+#define DEF_FIELD_ADDR(m, devid, f) \
+	typeof(((struct devid *)0)->f) *f = ((m) + OFF_##devid##_##f)
+
 /* Add a table entry.  We test function type matches while we're here. */
 #define ADD_TO_DEVTABLE(device_id, type, function) \
 	static struct devtable __cat(devtable,__LINE__) = {	\
 		device_id + 0*sizeof((function)((const char *)NULL,	\
-						(type *)NULL,		\
+						(void *)NULL,		\
 						(char *)NULL)),		\
-		sizeof(type), (function) };				\
+		SIZE_##type, (function) };				\
 	static struct devtable *SECTION(__devtable) __used \
 		__cat(devtable_ptr,__LINE__) = &__cat(devtable,__LINE__)
 
@@ -116,7 +129,6 @@ static inline void add_wildcard(char *str)
 		strcat(str + len, "*");
 }
 
-unsigned int cross_build = 0;
 /**
  * Check that sizeof(device_id type) are consistent with size of section
  * in .o file. If in-consistent then userspace and kernel does not agree
@@ -131,8 +143,6 @@ static void device_id_check(const char *modname, const char *device_id,
 	int i;
 
 	if (size % id_size || size < id_size) {
-		if (cross_build != 0)
-			return;
 		fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo "
 		      "of the size of section __mod_%s_device_table=%lu.\n"
 		      "Fix definition of struct %s_device_id "
@@ -157,17 +167,29 @@ static void device_id_check(const char *modname, const char *device_id,
 
 /* USB is special because the bcdDevice can be matched against a numeric range */
 /* Looks like "usb:vNpNdNdcNdscNdpNicNiscNipNinN" */
-static void do_usb_entry(struct usb_device_id *id,
+static void do_usb_entry(void *symval,
 			 unsigned int bcdDevice_initial, int bcdDevice_initial_digits,
 			 unsigned char range_lo, unsigned char range_hi,
 			 unsigned char max, struct module *mod)
 {
 	char alias[500];
+	DEF_FIELD(symval, usb_device_id, match_flags);
+	DEF_FIELD(symval, usb_device_id, idVendor);
+	DEF_FIELD(symval, usb_device_id, idProduct);
+	DEF_FIELD(symval, usb_device_id, bcdDevice_lo);
+	DEF_FIELD(symval, usb_device_id, bDeviceClass);
+	DEF_FIELD(symval, usb_device_id, bDeviceSubClass);
+	DEF_FIELD(symval, usb_device_id, bDeviceProtocol);
+	DEF_FIELD(symval, usb_device_id, bInterfaceClass);
+	DEF_FIELD(symval, usb_device_id, bInterfaceSubClass);
+	DEF_FIELD(symval, usb_device_id, bInterfaceProtocol);
+	DEF_FIELD(symval, usb_device_id, bInterfaceNumber);
+
 	strcpy(alias, "usb:");
-	ADD(alias, "v", id->match_flags&USB_DEVICE_ID_MATCH_VENDOR,
-	    id->idVendor);
-	ADD(alias, "p", id->match_flags&USB_DEVICE_ID_MATCH_PRODUCT,
-	    id->idProduct);
+	ADD(alias, "v", match_flags&USB_DEVICE_ID_MATCH_VENDOR,
+	    idVendor);
+	ADD(alias, "p", match_flags&USB_DEVICE_ID_MATCH_PRODUCT,
+	    idProduct);
 
 	strcat(alias, "d");
 	if (bcdDevice_initial_digits)
@@ -190,29 +212,23 @@ static void do_usb_entry(struct usb_device_id *id,
 				range_lo);
 		}
 	}
-	if (bcdDevice_initial_digits < (sizeof(id->bcdDevice_lo) * 2 - 1))
+	if (bcdDevice_initial_digits < (sizeof(bcdDevice_lo) * 2 - 1))
 		strcat(alias, "*");
 
-	ADD(alias, "dc", id->match_flags&USB_DEVICE_ID_MATCH_DEV_CLASS,
-	    id->bDeviceClass);
-	ADD(alias, "dsc",
-	    id->match_flags&USB_DEVICE_ID_MATCH_DEV_SUBCLASS,
-	    id->bDeviceSubClass);
-	ADD(alias, "dp",
-	    id->match_flags&USB_DEVICE_ID_MATCH_DEV_PROTOCOL,
-	    id->bDeviceProtocol);
-	ADD(alias, "ic",
-	    id->match_flags&USB_DEVICE_ID_MATCH_INT_CLASS,
-	    id->bInterfaceClass);
-	ADD(alias, "isc",
-	    id->match_flags&USB_DEVICE_ID_MATCH_INT_SUBCLASS,
-	    id->bInterfaceSubClass);
-	ADD(alias, "ip",
-	    id->match_flags&USB_DEVICE_ID_MATCH_INT_PROTOCOL,
-	    id->bInterfaceProtocol);
-	ADD(alias, "in",
-	    id->match_flags&USB_DEVICE_ID_MATCH_INT_NUMBER,
-	    id->bInterfaceNumber);
+	ADD(alias, "dc", match_flags&USB_DEVICE_ID_MATCH_DEV_CLASS,
+	    bDeviceClass);
+	ADD(alias, "dsc", match_flags&USB_DEVICE_ID_MATCH_DEV_SUBCLASS,
+	    bDeviceSubClass);
+	ADD(alias, "dp", match_flags&USB_DEVICE_ID_MATCH_DEV_PROTOCOL,
+	    bDeviceProtocol);
+	ADD(alias, "ic", match_flags&USB_DEVICE_ID_MATCH_INT_CLASS,
+	    bInterfaceClass);
+	ADD(alias, "isc", match_flags&USB_DEVICE_ID_MATCH_INT_SUBCLASS,
+	    bInterfaceSubClass);
+	ADD(alias, "ip", match_flags&USB_DEVICE_ID_MATCH_INT_PROTOCOL,
+	    bInterfaceProtocol);
+	ADD(alias, "in", match_flags&USB_DEVICE_ID_MATCH_INT_NUMBER,
+	    bInterfaceNumber);
 
 	add_wildcard(alias);
 	buf_printf(&mod->dev_table_buf,
@@ -258,24 +274,28 @@ static unsigned int incbcd(unsigned int *bcd,
 	return init;
 }
 
-static void do_usb_entry_multi(struct usb_device_id *id, struct module *mod)
+static void do_usb_entry_multi(void *symval, struct module *mod)
 {
 	unsigned int devlo, devhi;
 	unsigned char chi, clo, max;
 	int ndigits;
 
-	id->match_flags = TO_NATIVE(id->match_flags);
-	id->idVendor = TO_NATIVE(id->idVendor);
-	id->idProduct = TO_NATIVE(id->idProduct);
+	DEF_FIELD(symval, usb_device_id, match_flags);
+	DEF_FIELD(symval, usb_device_id, idVendor);
+	DEF_FIELD(symval, usb_device_id, idProduct);
+	DEF_FIELD(symval, usb_device_id, bcdDevice_lo);
+	DEF_FIELD(symval, usb_device_id, bcdDevice_hi);
+	DEF_FIELD(symval, usb_device_id, bDeviceClass);
+	DEF_FIELD(symval, usb_device_id, bInterfaceClass);
 
-	devlo = id->match_flags & USB_DEVICE_ID_MATCH_DEV_LO ?
-		TO_NATIVE(id->bcdDevice_lo) : 0x0U;
-	devhi = id->match_flags & USB_DEVICE_ID_MATCH_DEV_HI ?
-		TO_NATIVE(id->bcdDevice_hi) : ~0x0U;
+	devlo = match_flags & USB_DEVICE_ID_MATCH_DEV_LO ?
+		bcdDevice_lo : 0x0U;
+	devhi = match_flags & USB_DEVICE_ID_MATCH_DEV_HI ?
+		bcdDevice_hi : ~0x0U;
 
 	/* Figure out if this entry is in bcd or hex format */
 	max = 0x9; /* Default to decimal format */
-	for (ndigits = 0 ; ndigits < sizeof(id->bcdDevice_lo) * 2 ; ndigits++) {
+	for (ndigits = 0 ; ndigits < sizeof(bcdDevice_lo) * 2 ; ndigits++) {
 		clo = (devlo >> (ndigits << 2)) & 0xf;
 		chi = ((devhi > 0x9999 ? 0x9999 : devhi) >> (ndigits << 2)) & 0xf;
 		if (clo > max || chi > max) {
@@ -288,11 +308,11 @@ static void do_usb_entry_multi(struct usb_device_id *id, struct module *mod)
 	 * Some modules (visor) have empty slots as placeholder for
 	 * run-time specification that results in catch-all alias
 	 */
-	if (!(id->idVendor | id->idProduct | id->bDeviceClass | id->bInterfaceClass))
+	if (!(idVendor | idProduct | bDeviceClass | bInterfaceClass))
 		return;
 
 	/* Convert numeric bcdDevice range into fnmatch-able pattern(s) */
-	for (ndigits = sizeof(id->bcdDevice_lo) * 2 - 1; devlo <= devhi; ndigits--) {
+	for (ndigits = sizeof(bcdDevice_lo) * 2 - 1; devlo <= devhi; ndigits--) {
 		clo = devlo & 0xf;
 		chi = devhi & 0xf;
 		if (chi > max)	/* If we are in bcd mode, truncate if necessary */
@@ -301,20 +321,20 @@ static void do_usb_entry_multi(struct usb_device_id *id, struct module *mod)
 		devhi >>= 4;
 
 		if (devlo == devhi || !ndigits) {
-			do_usb_entry(id, devlo, ndigits, clo, chi, max, mod);
+			do_usb_entry(symval, devlo, ndigits, clo, chi, max, mod);
 			break;
 		}
 
 		if (clo > 0x0)
-			do_usb_entry(id,
+			do_usb_entry(symval,
 				     incbcd(&devlo, 1, max,
-					    sizeof(id->bcdDevice_lo) * 2),
+					    sizeof(bcdDevice_lo) * 2),
 				     ndigits, clo, max, max, mod);
 
 		if (chi < max)
-			do_usb_entry(id,
+			do_usb_entry(symval,
 				     incbcd(&devhi, -1, max,
-					    sizeof(id->bcdDevice_lo) * 2),
+					    sizeof(bcdDevice_lo) * 2),
 				     ndigits, 0x0, chi, max, mod);
 	}
 }
@@ -323,7 +343,7 @@ static void do_usb_table(void *symval, unsigned long size,
 			 struct module *mod)
 {
 	unsigned int i;
-	const unsigned long id_size = sizeof(struct usb_device_id);
+	const unsigned long id_size = SIZE_usb_device_id;
 
 	device_id_check(mod->name, "usb", size, id_size, symval);
 
@@ -336,81 +356,81 @@ static void do_usb_table(void *symval, unsigned long size,
 
 /* Looks like: hid:bNvNpN */
 static int do_hid_entry(const char *filename,
-			     struct hid_device_id *id, char *alias)
+			     void *symval, char *alias)
 {
-	id->bus = TO_NATIVE(id->bus);
-	id->group = TO_NATIVE(id->group);
-	id->vendor = TO_NATIVE(id->vendor);
-	id->product = TO_NATIVE(id->product);
+	DEF_FIELD(symval, hid_device_id, bus);
+	DEF_FIELD(symval, hid_device_id, group);
+	DEF_FIELD(symval, hid_device_id, vendor);
+	DEF_FIELD(symval, hid_device_id, product);
 
 	sprintf(alias, "hid:");
-	ADD(alias, "b", id->bus != HID_BUS_ANY, id->bus);
-	ADD(alias, "g", id->group != HID_GROUP_ANY, id->group);
-	ADD(alias, "v", id->vendor != HID_ANY_ID, id->vendor);
-	ADD(alias, "p", id->product != HID_ANY_ID, id->product);
+	ADD(alias, "b", bus != HID_BUS_ANY, bus);
+	ADD(alias, "g", group != HID_GROUP_ANY, group);
+	ADD(alias, "v", vendor != HID_ANY_ID, vendor);
+	ADD(alias, "p", product != HID_ANY_ID, product);
 
 	return 1;
 }
-ADD_TO_DEVTABLE("hid", struct hid_device_id, do_hid_entry);
+ADD_TO_DEVTABLE("hid", hid_device_id, do_hid_entry);
 
 /* Looks like: ieee1394:venNmoNspNverN */
 static int do_ieee1394_entry(const char *filename,
-			     struct ieee1394_device_id *id, char *alias)
+			     void *symval, char *alias)
 {
-	id->match_flags = TO_NATIVE(id->match_flags);
-	id->vendor_id = TO_NATIVE(id->vendor_id);
-	id->model_id = TO_NATIVE(id->model_id);
-	id->specifier_id = TO_NATIVE(id->specifier_id);
-	id->version = TO_NATIVE(id->version);
+	DEF_FIELD(symval, ieee1394_device_id, match_flags);
+	DEF_FIELD(symval, ieee1394_device_id, vendor_id);
+	DEF_FIELD(symval, ieee1394_device_id, model_id);
+	DEF_FIELD(symval, ieee1394_device_id, specifier_id);
+	DEF_FIELD(symval, ieee1394_device_id, version);
 
 	strcpy(alias, "ieee1394:");
-	ADD(alias, "ven", id->match_flags & IEEE1394_MATCH_VENDOR_ID,
-	    id->vendor_id);
-	ADD(alias, "mo", id->match_flags & IEEE1394_MATCH_MODEL_ID,
-	    id->model_id);
-	ADD(alias, "sp", id->match_flags & IEEE1394_MATCH_SPECIFIER_ID,
-	    id->specifier_id);
-	ADD(alias, "ver", id->match_flags & IEEE1394_MATCH_VERSION,
-	    id->version);
+	ADD(alias, "ven", match_flags & IEEE1394_MATCH_VENDOR_ID,
+	    vendor_id);
+	ADD(alias, "mo", match_flags & IEEE1394_MATCH_MODEL_ID,
+	    model_id);
+	ADD(alias, "sp", match_flags & IEEE1394_MATCH_SPECIFIER_ID,
+	    specifier_id);
+	ADD(alias, "ver", match_flags & IEEE1394_MATCH_VERSION,
+	    version);
 
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("ieee1394", struct ieee1394_device_id, do_ieee1394_entry);
+ADD_TO_DEVTABLE("ieee1394", ieee1394_device_id, do_ieee1394_entry);
 
 /* Looks like: pci:vNdNsvNsdNbcNscNiN. */
 static int do_pci_entry(const char *filename,
-			struct pci_device_id *id, char *alias)
+			void *symval, char *alias)
 {
 	/* Class field can be divided into these three. */
 	unsigned char baseclass, subclass, interface,
 		baseclass_mask, subclass_mask, interface_mask;
 
-	id->vendor = TO_NATIVE(id->vendor);
-	id->device = TO_NATIVE(id->device);
-	id->subvendor = TO_NATIVE(id->subvendor);
-	id->subdevice = TO_NATIVE(id->subdevice);
-	id->class = TO_NATIVE(id->class);
-	id->class_mask = TO_NATIVE(id->class_mask);
+	DEF_FIELD(symval, pci_device_id, vendor);
+	DEF_FIELD(symval, pci_device_id, device);
+	DEF_FIELD(symval, pci_device_id, subvendor);
+	DEF_FIELD(symval, pci_device_id, subdevice);
+	DEF_FIELD(symval, pci_device_id, class);
+	DEF_FIELD(symval, pci_device_id, class_mask);
 
 	strcpy(alias, "pci:");
-	ADD(alias, "v", id->vendor != PCI_ANY_ID, id->vendor);
-	ADD(alias, "d", id->device != PCI_ANY_ID, id->device);
-	ADD(alias, "sv", id->subvendor != PCI_ANY_ID, id->subvendor);
-	ADD(alias, "sd", id->subdevice != PCI_ANY_ID, id->subdevice);
-
-	baseclass = (id->class) >> 16;
-	baseclass_mask = (id->class_mask) >> 16;
-	subclass = (id->class) >> 8;
-	subclass_mask = (id->class_mask) >> 8;
-	interface = id->class;
-	interface_mask = id->class_mask;
+	ADD(alias, "v", vendor != PCI_ANY_ID, vendor);
+	ADD(alias, "d", device != PCI_ANY_ID, device);
+	ADD(alias, "sv", subvendor != PCI_ANY_ID, subvendor);
+	ADD(alias, "sd", subdevice != PCI_ANY_ID, subdevice);
+
+	baseclass = (class) >> 16;
+	baseclass_mask = (class_mask) >> 16;
+	subclass = (class) >> 8;
+	subclass_mask = (class_mask) >> 8;
+	interface = class;
+	interface_mask = class_mask;
 
 	if ((baseclass_mask != 0 && baseclass_mask != 0xFF)
 	    || (subclass_mask != 0 && subclass_mask != 0xFF)
 	    || (interface_mask != 0 && interface_mask != 0xFF)) {
 		warn("Can't handle masks in %s:%04X\n",
-		     filename, id->class_mask);
+		     filename, class_mask);
 		return 0;
 	}
 
@@ -420,101 +440,105 @@ static int do_pci_entry(const char *filename,
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("pci", struct pci_device_id, do_pci_entry);
+ADD_TO_DEVTABLE("pci", pci_device_id, do_pci_entry);
 
 /* looks like: "ccw:tNmNdtNdmN" */
 static int do_ccw_entry(const char *filename,
-			struct ccw_device_id *id, char *alias)
+			void *symval, char *alias)
 {
-	id->match_flags = TO_NATIVE(id->match_flags);
-	id->cu_type = TO_NATIVE(id->cu_type);
-	id->cu_model = TO_NATIVE(id->cu_model);
-	id->dev_type = TO_NATIVE(id->dev_type);
-	id->dev_model = TO_NATIVE(id->dev_model);
+	DEF_FIELD(symval, ccw_device_id, match_flags);
+	DEF_FIELD(symval, ccw_device_id, cu_type);
+	DEF_FIELD(symval, ccw_device_id, cu_model);
+	DEF_FIELD(symval, ccw_device_id, dev_type);
+	DEF_FIELD(symval, ccw_device_id, dev_model);
 
 	strcpy(alias, "ccw:");
-	ADD(alias, "t", id->match_flags&CCW_DEVICE_ID_MATCH_CU_TYPE,
-	    id->cu_type);
-	ADD(alias, "m", id->match_flags&CCW_DEVICE_ID_MATCH_CU_MODEL,
-	    id->cu_model);
-	ADD(alias, "dt", id->match_flags&CCW_DEVICE_ID_MATCH_DEVICE_TYPE,
-	    id->dev_type);
-	ADD(alias, "dm", id->match_flags&CCW_DEVICE_ID_MATCH_DEVICE_MODEL,
-	    id->dev_model);
+	ADD(alias, "t", match_flags&CCW_DEVICE_ID_MATCH_CU_TYPE,
+	    cu_type);
+	ADD(alias, "m", match_flags&CCW_DEVICE_ID_MATCH_CU_MODEL,
+	    cu_model);
+	ADD(alias, "dt", match_flags&CCW_DEVICE_ID_MATCH_DEVICE_TYPE,
+	    dev_type);
+	ADD(alias, "dm", match_flags&CCW_DEVICE_ID_MATCH_DEVICE_MODEL,
+	    dev_model);
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("ccw", struct ccw_device_id, do_ccw_entry);
+ADD_TO_DEVTABLE("ccw", ccw_device_id, do_ccw_entry);
 
 /* looks like: "ap:tN" */
 static int do_ap_entry(const char *filename,
-		       struct ap_device_id *id, char *alias)
+		       void *symval, char *alias)
 {
-	sprintf(alias, "ap:t%02X*", id->dev_type);
+	DEF_FIELD(symval, ap_device_id, dev_type);
+
+	sprintf(alias, "ap:t%02X*", dev_type);
 	return 1;
 }
-ADD_TO_DEVTABLE("ap", struct ap_device_id, do_ap_entry);
+ADD_TO_DEVTABLE("ap", ap_device_id, do_ap_entry);
 
 /* looks like: "css:tN" */
 static int do_css_entry(const char *filename,
-			struct css_device_id *id, char *alias)
+			void *symval, char *alias)
 {
-	sprintf(alias, "css:t%01X", id->type);
+	DEF_FIELD(symval, css_device_id, type);
+
+	sprintf(alias, "css:t%01X", type);
 	return 1;
 }
-ADD_TO_DEVTABLE("css", struct css_device_id, do_css_entry);
+ADD_TO_DEVTABLE("css", css_device_id, do_css_entry);
 
 /* Looks like: "serio:tyNprNidNexN" */
 static int do_serio_entry(const char *filename,
-			  struct serio_device_id *id, char *alias)
+			  void *symval, char *alias)
 {
-	id->type = TO_NATIVE(id->type);
-	id->proto = TO_NATIVE(id->proto);
-	id->id = TO_NATIVE(id->id);
-	id->extra = TO_NATIVE(id->extra);
+	DEF_FIELD(symval, serio_device_id, type);
+	DEF_FIELD(symval, serio_device_id, proto);
+	DEF_FIELD(symval, serio_device_id, id);
+	DEF_FIELD(symval, serio_device_id, extra);
 
 	strcpy(alias, "serio:");
-	ADD(alias, "ty", id->type != SERIO_ANY, id->type);
-	ADD(alias, "pr", id->proto != SERIO_ANY, id->proto);
-	ADD(alias, "id", id->id != SERIO_ANY, id->id);
-	ADD(alias, "ex", id->extra != SERIO_ANY, id->extra);
+	ADD(alias, "ty", type != SERIO_ANY, type);
+	ADD(alias, "pr", proto != SERIO_ANY, proto);
+	ADD(alias, "id", id != SERIO_ANY, id);
+	ADD(alias, "ex", extra != SERIO_ANY, extra);
 
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("serio", struct serio_device_id, do_serio_entry);
+ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry);
 
 /* looks like: "acpi:ACPI0003 or acpi:PNP0C0B" or "acpi:LNXVIDEO" */
 static int do_acpi_entry(const char *filename,
-			struct acpi_device_id *id, char *alias)
+			void *symval, char *alias)
 {
-	sprintf(alias, "acpi*:%s:*", id->id);
+	DEF_FIELD_ADDR(symval, acpi_device_id, id);
+	sprintf(alias, "acpi*:%s:*", *id);
 	return 1;
 }
-ADD_TO_DEVTABLE("acpi", struct acpi_device_id, do_acpi_entry);
+ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry);
 
 /* looks like: "pnp:dD" */
 static void do_pnp_device_entry(void *symval, unsigned long size,
 				struct module *mod)
 {
-	const unsigned long id_size = sizeof(struct pnp_device_id);
+	const unsigned long id_size = SIZE_pnp_device_id;
 	const unsigned int count = (size / id_size)-1;
-	const struct pnp_device_id *devs = symval;
 	unsigned int i;
 
 	device_id_check(mod->name, "pnp", size, id_size, symval);
 
 	for (i = 0; i < count; i++) {
-		const char *id = (char *)devs[i].id;
-		char acpi_id[sizeof(devs[0].id)];
+		DEF_FIELD_ADDR(symval + i*id_size, pnp_device_id, id);
+		char acpi_id[sizeof(*id)];
 		int j;
 
 		buf_printf(&mod->dev_table_buf,
-			   "MODULE_ALIAS(\"pnp:d%s*\");\n", id);
+			   "MODULE_ALIAS(\"pnp:d%s*\");\n", *id);
 
 		/* fix broken pnp bus lowercasing */
 		for (j = 0; j < sizeof(acpi_id); j++)
-			acpi_id[j] = toupper(id[j]);
+			acpi_id[j] = toupper((*id)[j]);
 		buf_printf(&mod->dev_table_buf,
 			   "MODULE_ALIAS(\"acpi*:%s:*\");\n", acpi_id);
 	}
@@ -524,19 +548,18 @@ static void do_pnp_device_entry(void *symval, unsigned long size,
 static void do_pnp_card_entries(void *symval, unsigned long size,
 				struct module *mod)
 {
-	const unsigned long id_size = sizeof(struct pnp_card_device_id);
+	const unsigned long id_size = SIZE_pnp_card_device_id;
 	const unsigned int count = (size / id_size)-1;
-	const struct pnp_card_device_id *cards = symval;
 	unsigned int i;
 
 	device_id_check(mod->name, "pnp", size, id_size, symval);
 
 	for (i = 0; i < count; i++) {
 		unsigned int j;
-		const struct pnp_card_device_id *card = &cards[i];
+		DEF_FIELD_ADDR(symval + i*id_size, pnp_card_device_id, devs);
 
 		for (j = 0; j < PNP_MAX_DEVICES; j++) {
-			const char *id = (char *)card->devs[j].id;
+			const char *id = (char *)(*devs)[j].id;
 			int i2, j2;
 			int dup = 0;
 
@@ -545,10 +568,10 @@ static void do_pnp_card_entries(void *symval, unsigned long size,
 
 			/* find duplicate, already added value */
 			for (i2 = 0; i2 < i && !dup; i2++) {
-				const struct pnp_card_device_id *card2 = &cards[i2];
+				DEF_FIELD_ADDR(symval + i2*id_size, pnp_card_device_id, devs);
 
 				for (j2 = 0; j2 < PNP_MAX_DEVICES; j2++) {
-					const char *id2 = (char *)card2->devs[j2].id;
+					const char *id2 = (char *)(*devs)[j2].id;
 
 					if (!id2[0])
 						break;
@@ -562,7 +585,7 @@ static void do_pnp_card_entries(void *symval, unsigned long size,
 
 			/* add an individual alias for every device entry */
 			if (!dup) {
-				char acpi_id[sizeof(card->devs[0].id)];
+				char acpi_id[PNP_ID_LEN];
 				int k;
 
 				buf_printf(&mod->dev_table_buf,
@@ -580,54 +603,58 @@ static void do_pnp_card_entries(void *symval, unsigned long size,
 
 /* Looks like: pcmcia:mNcNfNfnNpfnNvaNvbNvcNvdN. */
 static int do_pcmcia_entry(const char *filename,
-			   struct pcmcia_device_id *id, char *alias)
+			   void *symval, char *alias)
 {
 	unsigned int i;
-
-	id->match_flags = TO_NATIVE(id->match_flags);
-	id->manf_id = TO_NATIVE(id->manf_id);
-	id->card_id = TO_NATIVE(id->card_id);
-	id->func_id = TO_NATIVE(id->func_id);
-	id->function = TO_NATIVE(id->function);
-	id->device_no = TO_NATIVE(id->device_no);
+	DEF_FIELD(symval, pcmcia_device_id, match_flags);
+	DEF_FIELD(symval, pcmcia_device_id, manf_id);
+	DEF_FIELD(symval, pcmcia_device_id, card_id);
+	DEF_FIELD(symval, pcmcia_device_id, func_id);
+	DEF_FIELD(symval, pcmcia_device_id, function);
+	DEF_FIELD(symval, pcmcia_device_id, device_no);
+	DEF_FIELD_ADDR(symval, pcmcia_device_id, prod_id_hash);
 
 	for (i=0; i<4; i++) {
-		id->prod_id_hash[i] = TO_NATIVE(id->prod_id_hash[i]);
-       }
-
-       strcpy(alias, "pcmcia:");
-       ADD(alias, "m", id->match_flags & PCMCIA_DEV_ID_MATCH_MANF_ID,
-	   id->manf_id);
-       ADD(alias, "c", id->match_flags & PCMCIA_DEV_ID_MATCH_CARD_ID,
-	   id->card_id);
-       ADD(alias, "f", id->match_flags & PCMCIA_DEV_ID_MATCH_FUNC_ID,
-	   id->func_id);
-       ADD(alias, "fn", id->match_flags & PCMCIA_DEV_ID_MATCH_FUNCTION,
-	   id->function);
-       ADD(alias, "pfn", id->match_flags & PCMCIA_DEV_ID_MATCH_DEVICE_NO,
-	   id->device_no);
-       ADD(alias, "pa", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID1, id->prod_id_hash[0]);
-       ADD(alias, "pb", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID2, id->prod_id_hash[1]);
-       ADD(alias, "pc", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3, id->prod_id_hash[2]);
-       ADD(alias, "pd", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4, id->prod_id_hash[3]);
+		(*prod_id_hash)[i] = TO_NATIVE((*prod_id_hash)[i]);
+	}
+
+	strcpy(alias, "pcmcia:");
+	ADD(alias, "m", match_flags & PCMCIA_DEV_ID_MATCH_MANF_ID,
+	    manf_id);
+	ADD(alias, "c", match_flags & PCMCIA_DEV_ID_MATCH_CARD_ID,
+	    card_id);
+	ADD(alias, "f", match_flags & PCMCIA_DEV_ID_MATCH_FUNC_ID,
+	    func_id);
+	ADD(alias, "fn", match_flags & PCMCIA_DEV_ID_MATCH_FUNCTION,
+	    function);
+	ADD(alias, "pfn", match_flags & PCMCIA_DEV_ID_MATCH_DEVICE_NO,
+	    device_no);
+	ADD(alias, "pa", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID1, (*prod_id_hash)[0]);
+	ADD(alias, "pb", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID2, (*prod_id_hash)[1]);
+	ADD(alias, "pc", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3, (*prod_id_hash)[2]);
+	ADD(alias, "pd", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4, (*prod_id_hash)[3]);
 
 	add_wildcard(alias);
-       return 1;
+	return 1;
 }
-ADD_TO_DEVTABLE("pcmcia", struct pcmcia_device_id, do_pcmcia_entry);
+ADD_TO_DEVTABLE("pcmcia", pcmcia_device_id, do_pcmcia_entry);
 
-static int do_of_entry (const char *filename, struct of_device_id *of, char *alias)
+static int do_of_entry (const char *filename, void *symval, char *alias)
 {
     int len;
     char *tmp;
+    DEF_FIELD_ADDR(symval, of_device_id, name);
+    DEF_FIELD_ADDR(symval, of_device_id, type);
+    DEF_FIELD_ADDR(symval, of_device_id, compatible);
+
     len = sprintf (alias, "of:N%sT%s",
-                    of->name[0] ? of->name : "*",
-                    of->type[0] ? of->type : "*");
+                    (*name)[0] ? *name : "*",
+                    (*type)[0] ? *type : "*");
 
-    if (of->compatible[0])
+    if (compatible[0])
         sprintf (&alias[len], "%sC%s",
-                     of->type[0] ? "*" : "",
-                     of->compatible);
+                     (*type)[0] ? "*" : "",
+                     *compatible);
 
     /* Replace all whitespace with underscores */
     for (tmp = alias; tmp && *tmp; tmp++)
@@ -637,15 +664,17 @@ static int do_of_entry (const char *filename, struct of_device_id *of, char *ali
     add_wildcard(alias);
     return 1;
 }
-ADD_TO_DEVTABLE("of", struct of_device_id, do_of_entry);
+ADD_TO_DEVTABLE("of", of_device_id, do_of_entry);
 
-static int do_vio_entry(const char *filename, struct vio_device_id *vio,
+static int do_vio_entry(const char *filename, void *symval,
 		char *alias)
 {
 	char *tmp;
+	DEF_FIELD_ADDR(symval, vio_device_id, type);
+	DEF_FIELD_ADDR(symval, vio_device_id, compat);
 
-	sprintf(alias, "vio:T%sS%s", vio->type[0] ? vio->type : "*",
-			vio->compat[0] ? vio->compat : "*");
+	sprintf(alias, "vio:T%sS%s", (*type)[0] ? *type : "*",
+			(*compat)[0] ? *compat : "*");
 
 	/* Replace all whitespace with underscores */
 	for (tmp = alias; tmp && *tmp; tmp++)
@@ -655,7 +684,7 @@ static int do_vio_entry(const char *filename, struct vio_device_id *vio,
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("vio", struct vio_device_id, do_vio_entry);
+ADD_TO_DEVTABLE("vio", vio_device_id, do_vio_entry);
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
@@ -664,154 +693,172 @@ static void do_input(char *alias,
 {
 	unsigned int i;
 
+	for (i = min / BITS_PER_LONG; i < max / BITS_PER_LONG + 1; i++)
+		arr[i] = TO_NATIVE(arr[i]);
 	for (i = min; i < max; i++)
 		if (arr[i / BITS_PER_LONG] & (1L << (i%BITS_PER_LONG)))
 			sprintf(alias + strlen(alias), "%X,*", i);
 }
 
 /* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */
-static int do_input_entry(const char *filename, struct input_device_id *id,
+static int do_input_entry(const char *filename, void *symval,
 			  char *alias)
 {
+	DEF_FIELD(symval, input_device_id, flags);
+	DEF_FIELD(symval, input_device_id, bustype);
+	DEF_FIELD(symval, input_device_id, vendor);
+	DEF_FIELD(symval, input_device_id, product);
+	DEF_FIELD(symval, input_device_id, version);
+	DEF_FIELD_ADDR(symval, input_device_id, evbit);
+	DEF_FIELD_ADDR(symval, input_device_id, keybit);
+	DEF_FIELD_ADDR(symval, input_device_id, relbit);
+	DEF_FIELD_ADDR(symval, input_device_id, absbit);
+	DEF_FIELD_ADDR(symval, input_device_id, mscbit);
+	DEF_FIELD_ADDR(symval, input_device_id, ledbit);
+	DEF_FIELD_ADDR(symval, input_device_id, sndbit);
+	DEF_FIELD_ADDR(symval, input_device_id, ffbit);
+	DEF_FIELD_ADDR(symval, input_device_id, swbit);
+
 	sprintf(alias, "input:");
 
-	ADD(alias, "b", id->flags & INPUT_DEVICE_ID_MATCH_BUS, id->bustype);
-	ADD(alias, "v", id->flags & INPUT_DEVICE_ID_MATCH_VENDOR, id->vendor);
-	ADD(alias, "p", id->flags & INPUT_DEVICE_ID_MATCH_PRODUCT, id->product);
-	ADD(alias, "e", id->flags & INPUT_DEVICE_ID_MATCH_VERSION, id->version);
+	ADD(alias, "b", flags & INPUT_DEVICE_ID_MATCH_BUS, bustype);
+	ADD(alias, "v", flags & INPUT_DEVICE_ID_MATCH_VENDOR, vendor);
+	ADD(alias, "p", flags & INPUT_DEVICE_ID_MATCH_PRODUCT, product);
+	ADD(alias, "e", flags & INPUT_DEVICE_ID_MATCH_VERSION, version);
 
 	sprintf(alias + strlen(alias), "-e*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_EVBIT)
-		do_input(alias, id->evbit, 0, INPUT_DEVICE_ID_EV_MAX);
+	if (flags & INPUT_DEVICE_ID_MATCH_EVBIT)
+		do_input(alias, *evbit, 0, INPUT_DEVICE_ID_EV_MAX);
 	sprintf(alias + strlen(alias), "k*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_KEYBIT)
-		do_input(alias, id->keybit,
+	if (flags & INPUT_DEVICE_ID_MATCH_KEYBIT)
+		do_input(alias, *keybit,
 			 INPUT_DEVICE_ID_KEY_MIN_INTERESTING,
 			 INPUT_DEVICE_ID_KEY_MAX);
 	sprintf(alias + strlen(alias), "r*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_RELBIT)
-		do_input(alias, id->relbit, 0, INPUT_DEVICE_ID_REL_MAX);
+	if (flags & INPUT_DEVICE_ID_MATCH_RELBIT)
+		do_input(alias, *relbit, 0, INPUT_DEVICE_ID_REL_MAX);
 	sprintf(alias + strlen(alias), "a*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_ABSBIT)
-		do_input(alias, id->absbit, 0, INPUT_DEVICE_ID_ABS_MAX);
+	if (flags & INPUT_DEVICE_ID_MATCH_ABSBIT)
+		do_input(alias, *absbit, 0, INPUT_DEVICE_ID_ABS_MAX);
 	sprintf(alias + strlen(alias), "m*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_MSCIT)
-		do_input(alias, id->mscbit, 0, INPUT_DEVICE_ID_MSC_MAX);
+	if (flags & INPUT_DEVICE_ID_MATCH_MSCIT)
+		do_input(alias, *mscbit, 0, INPUT_DEVICE_ID_MSC_MAX);
 	sprintf(alias + strlen(alias), "l*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_LEDBIT)
-		do_input(alias, id->ledbit, 0, INPUT_DEVICE_ID_LED_MAX);
+	if (flags & INPUT_DEVICE_ID_MATCH_LEDBIT)
+		do_input(alias, *ledbit, 0, INPUT_DEVICE_ID_LED_MAX);
 	sprintf(alias + strlen(alias), "s*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_SNDBIT)
-		do_input(alias, id->sndbit, 0, INPUT_DEVICE_ID_SND_MAX);
+	if (flags & INPUT_DEVICE_ID_MATCH_SNDBIT)
+		do_input(alias, *sndbit, 0, INPUT_DEVICE_ID_SND_MAX);
 	sprintf(alias + strlen(alias), "f*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_FFBIT)
-		do_input(alias, id->ffbit, 0, INPUT_DEVICE_ID_FF_MAX);
+	if (flags & INPUT_DEVICE_ID_MATCH_FFBIT)
+		do_input(alias, *ffbit, 0, INPUT_DEVICE_ID_FF_MAX);
 	sprintf(alias + strlen(alias), "w*");
-	if (id->flags & INPUT_DEVICE_ID_MATCH_SWBIT)
-		do_input(alias, id->swbit, 0, INPUT_DEVICE_ID_SW_MAX);
+	if (flags & INPUT_DEVICE_ID_MATCH_SWBIT)
+		do_input(alias, *swbit, 0, INPUT_DEVICE_ID_SW_MAX);
 	return 1;
 }
-ADD_TO_DEVTABLE("input", struct input_device_id, do_input_entry);
+ADD_TO_DEVTABLE("input", input_device_id, do_input_entry);
 
-static int do_eisa_entry(const char *filename, struct eisa_device_id *eisa,
+static int do_eisa_entry(const char *filename, void *symval,
 		char *alias)
 {
-	if (eisa->sig[0])
-		sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", eisa->sig);
+	DEF_FIELD_ADDR(symval, eisa_device_id, sig);
+	if (sig[0])
+		sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", *sig);
 	else
 		strcat(alias, "*");
 	return 1;
 }
-ADD_TO_DEVTABLE("eisa", struct eisa_device_id, do_eisa_entry);
+ADD_TO_DEVTABLE("eisa", eisa_device_id, do_eisa_entry);
 
 /* Looks like: parisc:tNhvNrevNsvN */
-static int do_parisc_entry(const char *filename, struct parisc_device_id *id,
+static int do_parisc_entry(const char *filename, void *symval,
 		char *alias)
 {
-	id->hw_type = TO_NATIVE(id->hw_type);
-	id->hversion = TO_NATIVE(id->hversion);
-	id->hversion_rev = TO_NATIVE(id->hversion_rev);
-	id->sversion = TO_NATIVE(id->sversion);
+	DEF_FIELD(symval, parisc_device_id, hw_type);
+	DEF_FIELD(symval, parisc_device_id, hversion);
+	DEF_FIELD(symval, parisc_device_id, hversion_rev);
+	DEF_FIELD(symval, parisc_device_id, sversion);
 
 	strcpy(alias, "parisc:");
-	ADD(alias, "t", id->hw_type != PA_HWTYPE_ANY_ID, id->hw_type);
-	ADD(alias, "hv", id->hversion != PA_HVERSION_ANY_ID, id->hversion);
-	ADD(alias, "rev", id->hversion_rev != PA_HVERSION_REV_ANY_ID, id->hversion_rev);
-	ADD(alias, "sv", id->sversion != PA_SVERSION_ANY_ID, id->sversion);
+	ADD(alias, "t", hw_type != PA_HWTYPE_ANY_ID, hw_type);
+	ADD(alias, "hv", hversion != PA_HVERSION_ANY_ID, hversion);
+	ADD(alias, "rev", hversion_rev != PA_HVERSION_REV_ANY_ID, hversion_rev);
+	ADD(alias, "sv", sversion != PA_SVERSION_ANY_ID, sversion);
 
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("parisc", struct parisc_device_id, do_parisc_entry);
+ADD_TO_DEVTABLE("parisc", parisc_device_id, do_parisc_entry);
 
 /* Looks like: sdio:cNvNdN. */
 static int do_sdio_entry(const char *filename,
-			struct sdio_device_id *id, char *alias)
+			void *symval, char *alias)
 {
-	id->class = TO_NATIVE(id->class);
-	id->vendor = TO_NATIVE(id->vendor);
-	id->device = TO_NATIVE(id->device);
+	DEF_FIELD(symval, sdio_device_id, class);
+	DEF_FIELD(symval, sdio_device_id, vendor);
+	DEF_FIELD(symval, sdio_device_id, device);
 
 	strcpy(alias, "sdio:");
-	ADD(alias, "c", id->class != (__u8)SDIO_ANY_ID, id->class);
-	ADD(alias, "v", id->vendor != (__u16)SDIO_ANY_ID, id->vendor);
-	ADD(alias, "d", id->device != (__u16)SDIO_ANY_ID, id->device);
+	ADD(alias, "c", class != (__u8)SDIO_ANY_ID, class);
+	ADD(alias, "v", vendor != (__u16)SDIO_ANY_ID, vendor);
+	ADD(alias, "d", device != (__u16)SDIO_ANY_ID, device);
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("sdio", struct sdio_device_id, do_sdio_entry);
+ADD_TO_DEVTABLE("sdio", sdio_device_id, do_sdio_entry);
 
 /* Looks like: ssb:vNidNrevN. */
 static int do_ssb_entry(const char *filename,
-			struct ssb_device_id *id, char *alias)
+			void *symval, char *alias)
 {
-	id->vendor = TO_NATIVE(id->vendor);
-	id->coreid = TO_NATIVE(id->coreid);
-	id->revision = TO_NATIVE(id->revision);
+	DEF_FIELD(symval, ssb_device_id, vendor);
+	DEF_FIELD(symval, ssb_device_id, coreid);
+	DEF_FIELD(symval, ssb_device_id, revision);
 
 	strcpy(alias, "ssb:");
-	ADD(alias, "v", id->vendor != SSB_ANY_VENDOR, id->vendor);
-	ADD(alias, "id", id->coreid != SSB_ANY_ID, id->coreid);
-	ADD(alias, "rev", id->revision != SSB_ANY_REV, id->revision);
+	ADD(alias, "v", vendor != SSB_ANY_VENDOR, vendor);
+	ADD(alias, "id", coreid != SSB_ANY_ID, coreid);
+	ADD(alias, "rev", revision != SSB_ANY_REV, revision);
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("ssb", struct ssb_device_id, do_ssb_entry);
+ADD_TO_DEVTABLE("ssb", ssb_device_id, do_ssb_entry);
 
 /* Looks like: bcma:mNidNrevNclN. */
 static int do_bcma_entry(const char *filename,
-			 struct bcma_device_id *id, char *alias)
+			 void *symval, char *alias)
 {
-	id->manuf = TO_NATIVE(id->manuf);
-	id->id = TO_NATIVE(id->id);
-	id->rev = TO_NATIVE(id->rev);
-	id->class = TO_NATIVE(id->class);
+	DEF_FIELD(symval, bcma_device_id, manuf);
+	DEF_FIELD(symval, bcma_device_id, id);
+	DEF_FIELD(symval, bcma_device_id, rev);
+	DEF_FIELD(symval, bcma_device_id, class);
 
 	strcpy(alias, "bcma:");
-	ADD(alias, "m", id->manuf != BCMA_ANY_MANUF, id->manuf);
-	ADD(alias, "id", id->id != BCMA_ANY_ID, id->id);
-	ADD(alias, "rev", id->rev != BCMA_ANY_REV, id->rev);
-	ADD(alias, "cl", id->class != BCMA_ANY_CLASS, id->class);
+	ADD(alias, "m", manuf != BCMA_ANY_MANUF, manuf);
+	ADD(alias, "id", id != BCMA_ANY_ID, id);
+	ADD(alias, "rev", rev != BCMA_ANY_REV, rev);
+	ADD(alias, "cl", class != BCMA_ANY_CLASS, class);
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("bcma", struct bcma_device_id, do_bcma_entry);
+ADD_TO_DEVTABLE("bcma", bcma_device_id, do_bcma_entry);
 
 /* Looks like: virtio:dNvN */
-static int do_virtio_entry(const char *filename, struct virtio_device_id *id,
+static int do_virtio_entry(const char *filename, void *symval,
 			   char *alias)
 {
-	id->device = TO_NATIVE(id->device);
-	id->vendor = TO_NATIVE(id->vendor);
+	DEF_FIELD(symval, virtio_device_id, device);
+	DEF_FIELD(symval, virtio_device_id, vendor);
 
 	strcpy(alias, "virtio:");
-	ADD(alias, "d", id->device != VIRTIO_DEV_ANY_ID, id->device);
-	ADD(alias, "v", id->vendor != VIRTIO_DEV_ANY_ID, id->vendor);
+	ADD(alias, "d", device != VIRTIO_DEV_ANY_ID, device);
+	ADD(alias, "v", vendor != VIRTIO_DEV_ANY_ID, vendor);
 
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("virtio", struct virtio_device_id, do_virtio_entry);
+ADD_TO_DEVTABLE("virtio", virtio_device_id, do_virtio_entry);
 
 /*
  * Looks like: vmbus:guid
@@ -819,41 +866,44 @@ ADD_TO_DEVTABLE("virtio", struct virtio_device_id, do_virtio_entry);
  * in the name.
  */
 
-static int do_vmbus_entry(const char *filename, struct hv_vmbus_device_id *id,
+static int do_vmbus_entry(const char *filename, void *symval,
 			  char *alias)
 {
 	int i;
-	char guid_name[((sizeof(id->guid) + 1)) * 2];
+	DEF_FIELD_ADDR(symval, hv_vmbus_device_id, guid);
+	char guid_name[(sizeof(*guid) + 1) * 2];
 
-	for (i = 0; i < (sizeof(id->guid) * 2); i += 2)
-		sprintf(&guid_name[i], "%02x", id->guid[i/2]);
+	for (i = 0; i < (sizeof(*guid) * 2); i += 2)
+		sprintf(&guid_name[i], "%02x", TO_NATIVE((*guid)[i/2]));
 
 	strcpy(alias, "vmbus:");
 	strcat(alias, guid_name);
 
 	return 1;
 }
-ADD_TO_DEVTABLE("vmbus", struct hv_vmbus_device_id, do_vmbus_entry);
+ADD_TO_DEVTABLE("vmbus", hv_vmbus_device_id, do_vmbus_entry);
 
 /* Looks like: i2c:S */
-static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
+static int do_i2c_entry(const char *filename, void *symval,
 			char *alias)
 {
-	sprintf(alias, I2C_MODULE_PREFIX "%s", id->name);
+	DEF_FIELD_ADDR(symval, i2c_device_id, name);
+	sprintf(alias, I2C_MODULE_PREFIX "%s", *name);
 
 	return 1;
 }
-ADD_TO_DEVTABLE("i2c", struct i2c_device_id, do_i2c_entry);
+ADD_TO_DEVTABLE("i2c", i2c_device_id, do_i2c_entry);
 
 /* Looks like: spi:S */
-static int do_spi_entry(const char *filename, struct spi_device_id *id,
+static int do_spi_entry(const char *filename, void *symval,
 			char *alias)
 {
-	sprintf(alias, SPI_MODULE_PREFIX "%s", id->name);
+	DEF_FIELD_ADDR(symval, spi_device_id, name);
+	sprintf(alias, SPI_MODULE_PREFIX "%s", *name);
 
 	return 1;
 }
-ADD_TO_DEVTABLE("spi", struct spi_device_id, do_spi_entry);
+ADD_TO_DEVTABLE("spi", spi_device_id, do_spi_entry);
 
 static const struct dmifield {
 	const char *prefix;
@@ -885,21 +935,21 @@ static void dmi_ascii_filter(char *d, const char *s)
 }
 
 
-static int do_dmi_entry(const char *filename, struct dmi_system_id *id,
+static int do_dmi_entry(const char *filename, void *symval,
 			char *alias)
 {
 	int i, j;
-
+	DEF_FIELD_ADDR(symval, dmi_system_id, matches);
 	sprintf(alias, "dmi*");
 
 	for (i = 0; i < ARRAY_SIZE(dmi_fields); i++) {
 		for (j = 0; j < 4; j++) {
-			if (id->matches[j].slot &&
-			    id->matches[j].slot == dmi_fields[i].field) {
+			if ((*matches)[j].slot &&
+			    (*matches)[j].slot == dmi_fields[i].field) {
 				sprintf(alias + strlen(alias), ":%s*",
 					dmi_fields[i].prefix);
 				dmi_ascii_filter(alias + strlen(alias),
-						 id->matches[j].substr);
+						 (*matches)[j].substr);
 				strcat(alias, "*");
 			}
 		}
@@ -908,27 +958,30 @@ static int do_dmi_entry(const char *filename, struct dmi_system_id *id,
 	strcat(alias, ":");
 	return 1;
 }
-ADD_TO_DEVTABLE("dmi", struct dmi_system_id, do_dmi_entry);
+ADD_TO_DEVTABLE("dmi", dmi_system_id, do_dmi_entry);
 
 static int do_platform_entry(const char *filename,
-			     struct platform_device_id *id, char *alias)
+			     void *symval, char *alias)
 {
-	sprintf(alias, PLATFORM_MODULE_PREFIX "%s", id->name);
+	DEF_FIELD_ADDR(symval, platform_device_id, name);
+	sprintf(alias, PLATFORM_MODULE_PREFIX "%s", *name);
 	return 1;
 }
-ADD_TO_DEVTABLE("platform", struct platform_device_id, do_platform_entry);
+ADD_TO_DEVTABLE("platform", platform_device_id, do_platform_entry);
 
 static int do_mdio_entry(const char *filename,
-			 struct mdio_device_id *id, char *alias)
+			 void *symval, char *alias)
 {
 	int i;
+	DEF_FIELD(symval, mdio_device_id, phy_id);
+	DEF_FIELD(symval, mdio_device_id, phy_id_mask);
 
 	alias += sprintf(alias, MDIO_MODULE_PREFIX);
 
 	for (i = 0; i < 32; i++) {
-		if (!((id->phy_id_mask >> (31-i)) & 1))
+		if (!((phy_id_mask >> (31-i)) & 1))
 			*(alias++) = '?';
-		else if ((id->phy_id >> (31-i)) & 1)
+		else if ((phy_id >> (31-i)) & 1)
 			*(alias++) = '1';
 		else
 			*(alias++) = '0';
@@ -939,47 +992,50 @@ static int do_mdio_entry(const char *filename,
 
 	return 1;
 }
-ADD_TO_DEVTABLE("mdio", struct mdio_device_id, do_mdio_entry);
+ADD_TO_DEVTABLE("mdio", mdio_device_id, do_mdio_entry);
 
 /* Looks like: zorro:iN. */
-static int do_zorro_entry(const char *filename, struct zorro_device_id *id,
+static int do_zorro_entry(const char *filename, void *symval,
 			  char *alias)
 {
-	id->id = TO_NATIVE(id->id);
+	DEF_FIELD(symval, zorro_device_id, id);
 	strcpy(alias, "zorro:");
-	ADD(alias, "i", id->id != ZORRO_WILDCARD, id->id);
+	ADD(alias, "i", id != ZORRO_WILDCARD, id);
 	return 1;
 }
-ADD_TO_DEVTABLE("zorro", struct zorro_device_id, do_zorro_entry);
+ADD_TO_DEVTABLE("zorro", zorro_device_id, do_zorro_entry);
 
 /* looks like: "pnp:dD" */
 static int do_isapnp_entry(const char *filename,
-			   struct isapnp_device_id *id, char *alias)
+			   void *symval, char *alias)
 {
+	DEF_FIELD(symval, isapnp_device_id, vendor);
+	DEF_FIELD(symval, isapnp_device_id, function);
 	sprintf(alias, "pnp:d%c%c%c%x%x%x%x*",
-		'A' + ((id->vendor >> 2) & 0x3f) - 1,
-		'A' + (((id->vendor & 3) << 3) | ((id->vendor >> 13) & 7)) - 1,
-		'A' + ((id->vendor >> 8) & 0x1f) - 1,
-		(id->function >> 4) & 0x0f, id->function & 0x0f,
-		(id->function >> 12) & 0x0f, (id->function >> 8) & 0x0f);
+		'A' + ((vendor >> 2) & 0x3f) - 1,
+		'A' + (((vendor & 3) << 3) | ((vendor >> 13) & 7)) - 1,
+		'A' + ((vendor >> 8) & 0x1f) - 1,
+		(function >> 4) & 0x0f, function & 0x0f,
+		(function >> 12) & 0x0f, (function >> 8) & 0x0f);
 	return 1;
 }
-ADD_TO_DEVTABLE("isapnp", struct isapnp_device_id, do_isapnp_entry);
+ADD_TO_DEVTABLE("isapnp", isapnp_device_id, do_isapnp_entry);
 
 /* Looks like: "ipack:fNvNdN". */
 static int do_ipack_entry(const char *filename,
-			  struct ipack_device_id *id, char *alias)
+			  void *symval, char *alias)
 {
-	id->vendor = TO_NATIVE(id->vendor);
-	id->device = TO_NATIVE(id->device);
+	DEF_FIELD(symval, ipack_device_id, format);
+	DEF_FIELD(symval, ipack_device_id, vendor);
+	DEF_FIELD(symval, ipack_device_id, device);
 	strcpy(alias, "ipack:");
-	ADD(alias, "f", id->format != IPACK_ANY_FORMAT, id->format);
-	ADD(alias, "v", id->vendor != IPACK_ANY_ID, id->vendor);
-	ADD(alias, "d", id->device != IPACK_ANY_ID, id->device);
+	ADD(alias, "f", format != IPACK_ANY_FORMAT, format);
+	ADD(alias, "v", vendor != IPACK_ANY_ID, vendor);
+	ADD(alias, "d", device != IPACK_ANY_ID, device);
 	add_wildcard(alias);
 	return 1;
 }
-ADD_TO_DEVTABLE("ipack", struct ipack_device_id, do_ipack_entry);
+ADD_TO_DEVTABLE("ipack", ipack_device_id, do_ipack_entry);
 
 /*
  * Append a match expression for a single masked hex digit.
@@ -1030,25 +1086,27 @@ static void append_nibble_mask(char **outp,
  *	a ? or [] pattern matching exactly one digit.
  */
 static int do_amba_entry(const char *filename,
-			 struct amba_id *id, char *alias)
+			 void *symval, char *alias)
 {
 	unsigned int digit;
 	char *p = alias;
+	DEF_FIELD(symval, amba_id, id);
+	DEF_FIELD(symval, amba_id, mask);
 
-	if ((id->id & id->mask) != id->id)
+	if ((id & mask) != id)
 		fatal("%s: Masked-off bit(s) of AMBA device ID are non-zero: "
 		      "id=0x%08X, mask=0x%08X.  Please fix this driver.\n",
-		      filename, id->id, id->mask);
+		      filename, id, mask);
 
 	p += sprintf(alias, "amba:d");
 	for (digit = 0; digit < 8; digit++)
 		append_nibble_mask(&p,
-				   (id->id >> (4 * (7 - digit))) & 0xf,
-				   (id->mask >> (4 * (7 - digit))) & 0xf);
+				   (id >> (4 * (7 - digit))) & 0xf,
+				   (mask >> (4 * (7 - digit))) & 0xf);
 
 	return 1;
 }
-ADD_TO_DEVTABLE("amba", struct amba_id, do_amba_entry);
+ADD_TO_DEVTABLE("amba", amba_id, do_amba_entry);
 
 /* LOOKS like x86cpu:vendor:VVVV:family:FFFF:model:MMMM:feature:*,FEAT,*
  * All fields are numbers. It would be nicer to use strings for vendor
@@ -1056,24 +1114,24 @@ ADD_TO_DEVTABLE("amba", struct amba_id, do_amba_entry);
  * complicated.
  */
 
-static int do_x86cpu_entry(const char *filename, struct x86_cpu_id *id,
+static int do_x86cpu_entry(const char *filename, void *symval,
 			   char *alias)
 {
-	id->feature = TO_NATIVE(id->feature);
-	id->family = TO_NATIVE(id->family);
-	id->model = TO_NATIVE(id->model);
-	id->vendor = TO_NATIVE(id->vendor);
+	DEF_FIELD(symval, x86_cpu_id, feature);
+	DEF_FIELD(symval, x86_cpu_id, family);
+	DEF_FIELD(symval, x86_cpu_id, model);
+	DEF_FIELD(symval, x86_cpu_id, vendor);
 
 	strcpy(alias, "x86cpu:");
-	ADD(alias, "vendor:",  id->vendor != X86_VENDOR_ANY, id->vendor);
-	ADD(alias, ":family:", id->family != X86_FAMILY_ANY, id->family);
-	ADD(alias, ":model:",  id->model  != X86_MODEL_ANY,  id->model);
+	ADD(alias, "vendor:",  vendor != X86_VENDOR_ANY, vendor);
+	ADD(alias, ":family:", family != X86_FAMILY_ANY, family);
+	ADD(alias, ":model:",  model  != X86_MODEL_ANY,  model);
 	strcat(alias, ":feature:*");
-	if (id->feature != X86_FEATURE_ANY)
-		sprintf(alias + strlen(alias), "%04X*", id->feature);
+	if (feature != X86_FEATURE_ANY)
+		sprintf(alias + strlen(alias), "%04X*", feature);
 	return 1;
 }
-ADD_TO_DEVTABLE("x86cpu", struct x86_cpu_id, do_x86cpu_entry);
+ADD_TO_DEVTABLE("x86cpu", x86_cpu_id, do_x86cpu_entry);
 
 /* Does namelen bytes of name exactly match the symbol? */
 static bool sym_is(const char *name, unsigned namelen, const char *symbol)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index ff36c508a10e..11a8c3010ed9 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2126,7 +2126,7 @@ int main(int argc, char **argv)
 	struct ext_sym_list *extsym_iter;
 	struct ext_sym_list *extsym_start = NULL;
 
-	while ((opt = getopt(argc, argv, "i:I:e:cmsSo:awM:K:")) != -1) {
+	while ((opt = getopt(argc, argv, "i:I:e:msSo:awM:K:")) != -1) {
 		switch (opt) {
 		case 'i':
 			kernel_read = optarg;
@@ -2135,9 +2135,6 @@ int main(int argc, char **argv)
 			module_read = optarg;
 			external_module = 1;
 			break;
-		case 'c':
-			cross_build = 1;
-			break;
 		case 'e':
 			external_module = 1;
 			extsym_iter =
-- 
cgit v1.2.3


From d4e62d0094e1b0f69946c3f16ce8ec882302a461 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 15 Dec 2012 23:50:43 +0100
Subject: sh-pfc: Split platform data from the sh_pfc structure

Create a sh_pfc_platform_data structure to store platform data and
reference it from the core sh_pfc structure.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/sh/pfc/core.c    | 89 +++++++++++++++++++++++-------------------------
 drivers/sh/pfc/gpio.c    | 19 ++++++-----
 drivers/sh/pfc/pinctrl.c | 27 ++++++++-------
 include/linux/sh_pfc.h   | 20 ++++++-----
 4 files changed, 79 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc/core.c b/drivers/sh/pfc/core.c
index 68169373c98b..ecbe51d9f563 100644
--- a/drivers/sh/pfc/core.c
+++ b/drivers/sh/pfc/core.c
@@ -21,18 +21,13 @@
 #include <linux/ioport.h>
 #include <linux/pinctrl/machine.h>
 
-static struct sh_pfc *sh_pfc __read_mostly;
-
-static inline bool sh_pfc_initialized(void)
-{
-	return !!sh_pfc;
-}
+static struct sh_pfc sh_pfc __read_mostly;
 
 static void pfc_iounmap(struct sh_pfc *pfc)
 {
 	int k;
 
-	for (k = 0; k < pfc->num_resources; k++)
+	for (k = 0; k < pfc->pdata->num_resources; k++)
 		if (pfc->window[k].virt)
 			iounmap(pfc->window[k].virt);
 
@@ -45,16 +40,16 @@ static int pfc_ioremap(struct sh_pfc *pfc)
 	struct resource *res;
 	int k;
 
-	if (!pfc->num_resources)
+	if (!pfc->pdata->num_resources)
 		return 0;
 
-	pfc->window = kzalloc(pfc->num_resources * sizeof(*pfc->window),
+	pfc->window = kzalloc(pfc->pdata->num_resources * sizeof(*pfc->window),
 			      GFP_NOWAIT);
 	if (!pfc->window)
 		goto err1;
 
-	for (k = 0; k < pfc->num_resources; k++) {
-		res = pfc->resource + k;
+	for (k = 0; k < pfc->pdata->num_resources; k++) {
+		res = pfc->pdata->resource + k;
 		WARN_ON(resource_type(res) != IORESOURCE_MEM);
 		pfc->window[k].phys = res->start;
 		pfc->window[k].size = resource_size(res);
@@ -79,7 +74,7 @@ static void __iomem *pfc_phys_to_virt(struct sh_pfc *pfc,
 	int k;
 
 	/* scan through physical windows and convert address */
-	for (k = 0; k < pfc->num_resources; k++) {
+	for (k = 0; k < pfc->pdata->num_resources; k++) {
 		window = pfc->window + k;
 
 		if (address < window->phys)
@@ -232,8 +227,8 @@ static void write_config_reg(struct sh_pfc *pfc,
 	data &= mask;
 	data |= value;
 
-	if (pfc->unlock_reg)
-		gpio_write_raw_reg(pfc_phys_to_virt(pfc, pfc->unlock_reg),
+	if (pfc->pdata->unlock_reg)
+		gpio_write_raw_reg(pfc_phys_to_virt(pfc, pfc->pdata->unlock_reg),
 				   32, ~data);
 
 	gpio_write_raw_reg(mapped_reg, crp->reg_width, data);
@@ -241,16 +236,16 @@ static void write_config_reg(struct sh_pfc *pfc,
 
 static int setup_data_reg(struct sh_pfc *pfc, unsigned gpio)
 {
-	struct pinmux_gpio *gpiop = &pfc->gpios[gpio];
+	struct pinmux_gpio *gpiop = &pfc->pdata->gpios[gpio];
 	struct pinmux_data_reg *data_reg;
 	int k, n;
 
-	if (!enum_in_range(gpiop->enum_id, &pfc->data))
+	if (!enum_in_range(gpiop->enum_id, &pfc->pdata->data))
 		return -1;
 
 	k = 0;
 	while (1) {
-		data_reg = pfc->data_regs + k;
+		data_reg = pfc->pdata->data_regs + k;
 
 		if (!data_reg->reg_width)
 			break;
@@ -279,12 +274,12 @@ static void setup_data_regs(struct sh_pfc *pfc)
 	struct pinmux_data_reg *drp;
 	int k;
 
-	for (k = pfc->first_gpio; k <= pfc->last_gpio; k++)
+	for (k = pfc->pdata->first_gpio; k <= pfc->pdata->last_gpio; k++)
 		setup_data_reg(pfc, k);
 
 	k = 0;
 	while (1) {
-		drp = pfc->data_regs + k;
+		drp = pfc->pdata->data_regs + k;
 
 		if (!drp->reg_width)
 			break;
@@ -298,15 +293,15 @@ static void setup_data_regs(struct sh_pfc *pfc)
 int sh_pfc_get_data_reg(struct sh_pfc *pfc, unsigned gpio,
 			struct pinmux_data_reg **drp, int *bitp)
 {
-	struct pinmux_gpio *gpiop = &pfc->gpios[gpio];
+	struct pinmux_gpio *gpiop = &pfc->pdata->gpios[gpio];
 	int k, n;
 
-	if (!enum_in_range(gpiop->enum_id, &pfc->data))
+	if (!enum_in_range(gpiop->enum_id, &pfc->pdata->data))
 		return -1;
 
 	k = (gpiop->flags & PINMUX_FLAG_DREG) >> PINMUX_FLAG_DREG_SHIFT;
 	n = (gpiop->flags & PINMUX_FLAG_DBIT) >> PINMUX_FLAG_DBIT_SHIFT;
-	*drp = pfc->data_regs + k;
+	*drp = pfc->pdata->data_regs + k;
 	*bitp = n;
 	return 0;
 }
@@ -323,7 +318,7 @@ static int get_config_reg(struct sh_pfc *pfc, pinmux_enum_t enum_id,
 
 	k = 0;
 	while (1) {
-		config_reg = pfc->cfg_regs + k;
+		config_reg = pfc->pdata->cfg_regs + k;
 
 		r_width = config_reg->reg_width;
 		f_width = config_reg->field_width;
@@ -361,12 +356,12 @@ static int get_config_reg(struct sh_pfc *pfc, pinmux_enum_t enum_id,
 int sh_pfc_gpio_to_enum(struct sh_pfc *pfc, unsigned gpio, int pos,
 			pinmux_enum_t *enum_idp)
 {
-	pinmux_enum_t enum_id = pfc->gpios[gpio].enum_id;
-	pinmux_enum_t *data = pfc->gpio_data;
+	pinmux_enum_t enum_id = pfc->pdata->gpios[gpio].enum_id;
+	pinmux_enum_t *data = pfc->pdata->gpio_data;
 	int k;
 
-	if (!enum_in_range(enum_id, &pfc->data)) {
-		if (!enum_in_range(enum_id, &pfc->mark)) {
+	if (!enum_in_range(enum_id, &pfc->pdata->data)) {
+		if (!enum_in_range(enum_id, &pfc->pdata->mark)) {
 			pr_err("non data/mark enum_id for gpio %d\n", gpio);
 			return -1;
 		}
@@ -377,7 +372,7 @@ int sh_pfc_gpio_to_enum(struct sh_pfc *pfc, unsigned gpio, int pos,
 		return pos + 1;
 	}
 
-	for (k = 0; k < pfc->gpio_data_size; k++) {
+	for (k = 0; k < pfc->pdata->gpio_data_size; k++) {
 		if (data[k] == enum_id) {
 			*enum_idp = data[k + 1];
 			return k + 1;
@@ -405,19 +400,19 @@ int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
 		break;
 
 	case PINMUX_TYPE_OUTPUT:
-		range = &pfc->output;
+		range = &pfc->pdata->output;
 		break;
 
 	case PINMUX_TYPE_INPUT:
-		range = &pfc->input;
+		range = &pfc->pdata->input;
 		break;
 
 	case PINMUX_TYPE_INPUT_PULLUP:
-		range = &pfc->input_pu;
+		range = &pfc->pdata->input_pu;
 		break;
 
 	case PINMUX_TYPE_INPUT_PULLDOWN:
-		range = &pfc->input_pd;
+		range = &pfc->pdata->input_pd;
 		break;
 
 	default:
@@ -437,7 +432,7 @@ int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
 			break;
 
 		/* first check if this is a function enum */
-		in_range = enum_in_range(enum_id, &pfc->function);
+		in_range = enum_in_range(enum_id, &pfc->pdata->function);
 		if (!in_range) {
 			/* not a function enum */
 			if (range) {
@@ -502,7 +497,7 @@ int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
 }
 EXPORT_SYMBOL_GPL(sh_pfc_config_gpio);
 
-int register_sh_pfc(struct sh_pfc *pfc)
+int register_sh_pfc(struct sh_pfc_platform_data *pdata)
 {
 	int (*initroutine)(struct sh_pfc *) = NULL;
 	int ret;
@@ -512,26 +507,28 @@ int register_sh_pfc(struct sh_pfc *pfc)
 	 */
 	BUILD_BUG_ON(PINMUX_FLAG_TYPE > ((1 << PINMUX_FLAG_DBIT_SHIFT) - 1));
 
-	if (sh_pfc)
+	if (sh_pfc.pdata)
 		return -EBUSY;
 
-	ret = pfc_ioremap(pfc);
-	if (unlikely(ret < 0))
+	sh_pfc.pdata = pdata;
+
+	ret = pfc_ioremap(&sh_pfc);
+	if (unlikely(ret < 0)) {
+		sh_pfc.pdata = NULL;
 		return ret;
+	}
 
-	spin_lock_init(&pfc->lock);
+	spin_lock_init(&sh_pfc.lock);
 
 	pinctrl_provide_dummies();
-	setup_data_regs(pfc);
-
-	sh_pfc = pfc;
+	setup_data_regs(&sh_pfc);
 
 	/*
 	 * Initialize pinctrl bindings first
 	 */
 	initroutine = symbol_request(sh_pfc_register_pinctrl);
 	if (initroutine) {
-		ret = (*initroutine)(pfc);
+		ret = (*initroutine)(&sh_pfc);
 		symbol_put_addr(initroutine);
 
 		if (unlikely(ret != 0))
@@ -546,7 +543,7 @@ int register_sh_pfc(struct sh_pfc *pfc)
 	 */
 	initroutine = symbol_request(sh_pfc_register_gpiochip);
 	if (initroutine) {
-		ret = (*initroutine)(pfc);
+		ret = (*initroutine)(&sh_pfc);
 		symbol_put_addr(initroutine);
 
 		/*
@@ -560,13 +557,13 @@ int register_sh_pfc(struct sh_pfc *pfc)
 		}
 	}
 
-	pr_info("%s support registered\n", pfc->name);
+	pr_info("%s support registered\n", sh_pfc.pdata->name);
 
 	return 0;
 
 err:
-	pfc_iounmap(pfc);
-	sh_pfc = NULL;
+	pfc_iounmap(&sh_pfc);
+	sh_pfc.pdata = NULL;
 
 	return ret;
 }
diff --git a/drivers/sh/pfc/gpio.c b/drivers/sh/pfc/gpio.c
index 6a24f07c2013..7597a024fac8 100644
--- a/drivers/sh/pfc/gpio.c
+++ b/drivers/sh/pfc/gpio.c
@@ -103,11 +103,11 @@ static int sh_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
 		if (pos <= 0 || !enum_id)
 			break;
 
-		for (i = 0; i < pfc->gpio_irq_size; i++) {
-			enum_ids = pfc->gpio_irq[i].enum_ids;
+		for (i = 0; i < pfc->pdata->gpio_irq_size; i++) {
+			enum_ids = pfc->pdata->gpio_irq[i].enum_ids;
 			for (k = 0; enum_ids[k]; k++) {
 				if (enum_ids[k] == enum_id)
-					return pfc->gpio_irq[i].irq;
+					return pfc->pdata->gpio_irq[i].irq;
 			}
 		}
 	}
@@ -128,12 +128,12 @@ static void sh_pfc_gpio_setup(struct sh_pfc_chip *chip)
 	gc->set = sh_gpio_set;
 	gc->to_irq = sh_gpio_to_irq;
 
-	WARN_ON(pfc->first_gpio != 0); /* needs testing */
+	WARN_ON(pfc->pdata->first_gpio != 0); /* needs testing */
 
-	gc->label = pfc->name;
+	gc->label = pfc->pdata->name;
 	gc->owner = THIS_MODULE;
-	gc->base = pfc->first_gpio;
-	gc->ngpio = (pfc->last_gpio - pfc->first_gpio) + 1;
+	gc->base = pfc->pdata->first_gpio;
+	gc->ngpio = (pfc->pdata->last_gpio - pfc->pdata->first_gpio) + 1;
 }
 
 int sh_pfc_register_gpiochip(struct sh_pfc *pfc)
@@ -154,7 +154,8 @@ int sh_pfc_register_gpiochip(struct sh_pfc *pfc)
 		kfree(chip);
 
 	pr_info("%s handling gpio %d -> %d\n",
-		pfc->name, pfc->first_gpio, pfc->last_gpio);
+		pfc->pdata->name, pfc->pdata->first_gpio,
+		pfc->pdata->last_gpio);
 
 	return ret;
 }
@@ -179,7 +180,7 @@ static int sh_pfc_gpio_probe(struct platform_device *pdev)
 	chip = gpio_to_pfc_chip(gc);
 	platform_set_drvdata(pdev, chip);
 
-	pr_info("attaching to GPIO chip %s\n", chip->pfc->name);
+	pr_info("attaching to GPIO chip %s\n", chip->pfc->pdata->name);
 
 	return 0;
 }
diff --git a/drivers/sh/pfc/pinctrl.c b/drivers/sh/pfc/pinctrl.c
index 4109b769eac0..3a2c77d3248f 100644
--- a/drivers/sh/pfc/pinctrl.c
+++ b/drivers/sh/pfc/pinctrl.c
@@ -140,7 +140,7 @@ static int sh_pfc_reconfig_pin(struct sh_pfc *pfc, unsigned offset,
 
 	spin_lock_irqsave(&pfc->lock, flags);
 
-	pinmux_type = pfc->gpios[offset].flags & PINMUX_FLAG_TYPE;
+	pinmux_type = pfc->pdata->gpios[offset].flags & PINMUX_FLAG_TYPE;
 
 	/*
 	 * See if the present config needs to first be de-configured.
@@ -172,8 +172,8 @@ static int sh_pfc_reconfig_pin(struct sh_pfc *pfc, unsigned offset,
 			       GPIO_CFG_REQ) != 0)
 		goto err;
 
-	pfc->gpios[offset].flags &= ~PINMUX_FLAG_TYPE;
-	pfc->gpios[offset].flags |= new_type;
+	pfc->pdata->gpios[offset].flags &= ~PINMUX_FLAG_TYPE;
+	pfc->pdata->gpios[offset].flags |= new_type;
 
 	ret = 0;
 
@@ -195,7 +195,7 @@ static int sh_pfc_gpio_request_enable(struct pinctrl_dev *pctldev,
 
 	spin_lock_irqsave(&pfc->lock, flags);
 
-	pinmux_type = pfc->gpios[offset].flags & PINMUX_FLAG_TYPE;
+	pinmux_type = pfc->pdata->gpios[offset].flags & PINMUX_FLAG_TYPE;
 
 	switch (pinmux_type) {
 	case PINMUX_TYPE_FUNCTION:
@@ -236,7 +236,7 @@ static void sh_pfc_gpio_disable_free(struct pinctrl_dev *pctldev,
 
 	spin_lock_irqsave(&pfc->lock, flags);
 
-	pinmux_type = pfc->gpios[offset].flags & PINMUX_FLAG_TYPE;
+	pinmux_type = pfc->pdata->gpios[offset].flags & PINMUX_FLAG_TYPE;
 
 	sh_pfc_config_gpio(pfc, offset, pinmux_type, GPIO_CFG_FREE);
 
@@ -270,7 +270,7 @@ static int sh_pfc_pinconf_get(struct pinctrl_dev *pctldev, unsigned pin,
 	struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev);
 	struct sh_pfc *pfc = pmx->pfc;
 
-	*config = pfc->gpios[pin].flags & PINMUX_FLAG_TYPE;
+	*config = pfc->pdata->gpios[pin].flags & PINMUX_FLAG_TYPE;
 
 	return 0;
 }
@@ -356,7 +356,7 @@ static int sh_pfc_map_gpios(struct sh_pfc *pfc, struct sh_pfc_pinctrl *pmx)
 	unsigned long flags;
 	int i;
 
-	pmx->nr_pads = pfc->last_gpio - pfc->first_gpio + 1;
+	pmx->nr_pads = pfc->pdata->last_gpio - pfc->pdata->first_gpio + 1;
 
 	pmx->pads = kmalloc(sizeof(struct pinctrl_pin_desc) * pmx->nr_pads,
 			    GFP_KERNEL);
@@ -375,9 +375,9 @@ static int sh_pfc_map_gpios(struct sh_pfc *pfc, struct sh_pfc_pinctrl *pmx)
 	 */
 	for (i = 0; i < pmx->nr_pads; i++) {
 		struct pinctrl_pin_desc *pin = pmx->pads + i;
-		struct pinmux_gpio *gpio = pfc->gpios + i;
+		struct pinmux_gpio *gpio = pfc->pdata->gpios + i;
 
-		pin->number = pfc->first_gpio + i;
+		pin->number = pfc->pdata->first_gpio + i;
 		pin->name = gpio->name;
 
 		/* XXX */
@@ -408,7 +408,7 @@ static int sh_pfc_map_functions(struct sh_pfc *pfc, struct sh_pfc_pinctrl *pmx)
 	spin_lock_irqsave(&pmx->lock, flags);
 
 	for (i = fn = 0; i < pmx->nr_pads; i++) {
-		struct pinmux_gpio *gpio = pfc->gpios + i;
+		struct pinmux_gpio *gpio = pfc->pdata->gpios + i;
 
 		if ((gpio->flags & PINMUX_FLAG_TYPE) == PINMUX_TYPE_FUNCTION)
 			pmx->functions[fn++] = gpio;
@@ -444,9 +444,10 @@ static int sh_pfc_pinctrl_probe(struct platform_device *pdev)
 		goto free_functions;
 	}
 
-	sh_pfc_gpio_range.npins = pfc->last_gpio - pfc->first_gpio + 1;
-	sh_pfc_gpio_range.base = pfc->first_gpio;
-	sh_pfc_gpio_range.pin_base = pfc->first_gpio;
+	sh_pfc_gpio_range.npins = pfc->pdata->last_gpio
+				- pfc->pdata->first_gpio + 1;
+	sh_pfc_gpio_range.base = pfc->pdata->first_gpio;
+	sh_pfc_gpio_range.pin_base = pfc->pdata->first_gpio;
 
 	pinctrl_add_gpio_range(sh_pfc_pmx->pctl, &sh_pfc_gpio_range);
 
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index c19a0925829a..58587f94d67a 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -94,7 +94,7 @@ struct pfc_window {
 	unsigned long size;
 };
 
-struct sh_pfc {
+struct sh_pfc_platform_data {
 	char *name;
 	pinmux_enum_t reserved_id;
 	struct pinmux_range data;
@@ -117,17 +117,21 @@ struct sh_pfc {
 	struct pinmux_irq *gpio_irq;
 	unsigned int gpio_irq_size;
 
-	spinlock_t lock;
-
 	struct resource *resource;
 	unsigned int num_resources;
-	struct pfc_window *window;
 
 	unsigned long unlock_reg;
 };
 
+struct sh_pfc {
+	struct sh_pfc_platform_data *pdata;
+	spinlock_t lock;
+
+	struct pfc_window *window;
+};
+
 /* XXX compat for now */
-#define pinmux_info sh_pfc
+#define pinmux_info sh_pfc_platform_data
 
 /* drivers/sh/pfc/gpio.c */
 int sh_pfc_register_gpiochip(struct sh_pfc *pfc);
@@ -136,7 +140,7 @@ int sh_pfc_register_gpiochip(struct sh_pfc *pfc);
 int sh_pfc_register_pinctrl(struct sh_pfc *pfc);
 
 /* drivers/sh/pfc/core.c */
-int register_sh_pfc(struct sh_pfc *pfc);
+int register_sh_pfc(struct sh_pfc_platform_data *pfc);
 
 int sh_pfc_read_bit(struct pinmux_data_reg *dr, unsigned long in_pos);
 void sh_pfc_write_bit(struct pinmux_data_reg *dr, unsigned long in_pos,
@@ -151,8 +155,8 @@ int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
 /* xxx */
 static inline int register_pinmux(struct pinmux_info *pip)
 {
-	struct sh_pfc *pfc = pip;
-	return register_sh_pfc(pfc);
+	struct sh_pfc_platform_data *pdata = pip;
+	return register_sh_pfc(pdata);
 }
 
 enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
-- 
cgit v1.2.3


From f9165132c5ee681235068857e4f86c7ecc5a4617 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 15 Dec 2012 23:50:44 +0100
Subject: sh-pfc: Move private definitions and declarations to private header

Move all private structure definitions and function declarations from
include/linux/sh_pfc.h to drivers/sh/pfc/core.h.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/sh/pfc/core.c    |  2 ++
 drivers/sh/pfc/core.h    | 44 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/sh/pfc/gpio.c    |  2 ++
 drivers/sh/pfc/pinctrl.c |  2 ++
 include/linux/sh_pfc.h   | 29 -----------------------------
 5 files changed, 50 insertions(+), 29 deletions(-)
 create mode 100644 drivers/sh/pfc/core.h

(limited to 'include/linux')

diff --git a/drivers/sh/pfc/core.c b/drivers/sh/pfc/core.c
index ecbe51d9f563..72421a4a883d 100644
--- a/drivers/sh/pfc/core.c
+++ b/drivers/sh/pfc/core.c
@@ -21,6 +21,8 @@
 #include <linux/ioport.h>
 #include <linux/pinctrl/machine.h>
 
+#include "core.h"
+
 static struct sh_pfc sh_pfc __read_mostly;
 
 static void pfc_iounmap(struct sh_pfc *pfc)
diff --git a/drivers/sh/pfc/core.h b/drivers/sh/pfc/core.h
new file mode 100644
index 000000000000..b07ae259c0eb
--- /dev/null
+++ b/drivers/sh/pfc/core.h
@@ -0,0 +1,44 @@
+/*
+ * SuperH Pin Function Controller support.
+ *
+ * Copyright (C) 2012  Renesas Solutions Corp.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __SH_PFC_CORE_H__
+#define __SH_PFC_CORE_H__
+
+#include <linux/compiler.h>
+#include <linux/sh_pfc.h>
+#include <linux/types.h>
+
+struct pfc_window {
+	phys_addr_t phys;
+	void __iomem *virt;
+	unsigned long size;
+};
+
+struct sh_pfc {
+	struct sh_pfc_platform_data *pdata;
+	spinlock_t lock;
+
+	struct pfc_window *window;
+};
+
+int sh_pfc_register_gpiochip(struct sh_pfc *pfc);
+
+int sh_pfc_register_pinctrl(struct sh_pfc *pfc);
+
+int sh_pfc_read_bit(struct pinmux_data_reg *dr, unsigned long in_pos);
+void sh_pfc_write_bit(struct pinmux_data_reg *dr, unsigned long in_pos,
+		      unsigned long value);
+int sh_pfc_get_data_reg(struct sh_pfc *pfc, unsigned gpio,
+			struct pinmux_data_reg **drp, int *bitp);
+int sh_pfc_gpio_to_enum(struct sh_pfc *pfc, unsigned gpio, int pos,
+			pinmux_enum_t *enum_idp);
+int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
+		       int cfg_mode);
+
+#endif /* __SH_PFC_CORE_H__ */
diff --git a/drivers/sh/pfc/gpio.c b/drivers/sh/pfc/gpio.c
index 7597a024fac8..565b366c909f 100644
--- a/drivers/sh/pfc/gpio.c
+++ b/drivers/sh/pfc/gpio.c
@@ -19,6 +19,8 @@
 #include <linux/pinctrl/consumer.h>
 #include <linux/sh_pfc.h>
 
+#include "core.h"
+
 struct sh_pfc_chip {
 	struct sh_pfc		*pfc;
 	struct gpio_chip	gpio_chip;
diff --git a/drivers/sh/pfc/pinctrl.c b/drivers/sh/pfc/pinctrl.c
index 3a2c77d3248f..5801a5686148 100644
--- a/drivers/sh/pfc/pinctrl.c
+++ b/drivers/sh/pfc/pinctrl.c
@@ -24,6 +24,8 @@
 #include <linux/pinctrl/pinmux.h>
 #include <linux/pinctrl/pinconf-generic.h>
 
+#include "core.h"
+
 struct sh_pfc_pinctrl {
 	struct pinctrl_dev *pctl;
 	struct sh_pfc *pfc;
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 58587f94d67a..f7f01b231b66 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -88,12 +88,6 @@ struct pinmux_range {
 	pinmux_enum_t force;
 };
 
-struct pfc_window {
-	phys_addr_t phys;
-	void __iomem *virt;
-	unsigned long size;
-};
-
 struct sh_pfc_platform_data {
 	char *name;
 	pinmux_enum_t reserved_id;
@@ -123,35 +117,12 @@ struct sh_pfc_platform_data {
 	unsigned long unlock_reg;
 };
 
-struct sh_pfc {
-	struct sh_pfc_platform_data *pdata;
-	spinlock_t lock;
-
-	struct pfc_window *window;
-};
-
 /* XXX compat for now */
 #define pinmux_info sh_pfc_platform_data
 
-/* drivers/sh/pfc/gpio.c */
-int sh_pfc_register_gpiochip(struct sh_pfc *pfc);
-
-/* drivers/sh/pfc/pinctrl.c */
-int sh_pfc_register_pinctrl(struct sh_pfc *pfc);
-
 /* drivers/sh/pfc/core.c */
 int register_sh_pfc(struct sh_pfc_platform_data *pfc);
 
-int sh_pfc_read_bit(struct pinmux_data_reg *dr, unsigned long in_pos);
-void sh_pfc_write_bit(struct pinmux_data_reg *dr, unsigned long in_pos,
-		      unsigned long value);
-int sh_pfc_get_data_reg(struct sh_pfc *pfc, unsigned gpio,
-			struct pinmux_data_reg **drp, int *bitp);
-int sh_pfc_gpio_to_enum(struct sh_pfc *pfc, unsigned gpio, int pos,
-			pinmux_enum_t *enum_idp);
-int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
-		       int cfg_mode);
-
 /* xxx */
 static inline int register_pinmux(struct pinmux_info *pip)
 {
-- 
cgit v1.2.3


From 8682b3c522c639f3a0de31c86bb91f2f9a6c76cb Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 15 Dec 2012 23:51:17 +0100
Subject: sh-pfc: Remove platform device registration

The PFC platform device is now registered by arch code, remove the
legacy registration mechanism.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/sh/pfc/core.c  | 12 ------------
 include/linux/sh_pfc.h | 10 ----------
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc/core.c b/drivers/sh/pfc/core.c
index b2e121d927a0..9456c70a2da7 100644
--- a/drivers/sh/pfc/core.c
+++ b/drivers/sh/pfc/core.c
@@ -578,18 +578,6 @@ static struct platform_driver sh_pfc_driver = {
 	},
 };
 
-static struct platform_device sh_pfc_device = {
-	.name		= DRV_NAME,
-	.id		= -1,
-};
-
-int __init register_sh_pfc(struct sh_pfc_platform_data *pdata)
-{
-	sh_pfc_device.dev.platform_data = pdata;
-
-	return platform_device_register(&sh_pfc_device);
-}
-
 static int __init sh_pfc_init(void)
 {
 	return platform_driver_register(&sh_pfc_driver);
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index f7f01b231b66..fa1fec084229 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -120,16 +120,6 @@ struct sh_pfc_platform_data {
 /* XXX compat for now */
 #define pinmux_info sh_pfc_platform_data
 
-/* drivers/sh/pfc/core.c */
-int register_sh_pfc(struct sh_pfc_platform_data *pfc);
-
-/* xxx */
-static inline int register_pinmux(struct pinmux_info *pip)
-{
-	struct sh_pfc_platform_data *pdata = pip;
-	return register_sh_pfc(pdata);
-}
-
 enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
 
 /* helper macro for port */
-- 
cgit v1.2.3


From 56dc04af3b5e54be330e18630301d2bda5d365eb Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 15 Dec 2012 23:51:18 +0100
Subject: sh-pfc: Remove unused resource and num_resources platform data fields

The fields are now unused, remove them.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/sh/pfc/core.c  | 17 ++++-------------
 include/linux/sh_pfc.h |  3 ---
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc/core.c b/drivers/sh/pfc/core.c
index 9456c70a2da7..8e7818bccb29 100644
--- a/drivers/sh/pfc/core.c
+++ b/drivers/sh/pfc/core.c
@@ -28,31 +28,22 @@
 
 static int sh_pfc_ioremap(struct sh_pfc *pfc, struct platform_device *pdev)
 {
-	unsigned int num_resources;
 	struct resource *res;
 	int k;
 
-	if (pdev->num_resources) {
-		num_resources = pdev->num_resources;
-		res = pdev->resource;
-	} else {
-		num_resources = pfc->pdata->num_resources;
-		res = pfc->pdata->resource;
-	}
-
-	if (num_resources == 0) {
+	if (pdev->num_resources == 0) {
 		pfc->num_windows = 0;
 		return 0;
 	}
 
-	pfc->window = devm_kzalloc(pfc->dev, num_resources *
+	pfc->window = devm_kzalloc(pfc->dev, pdev->num_resources *
 				   sizeof(*pfc->window), GFP_NOWAIT);
 	if (!pfc->window)
 		return -ENOMEM;
 
-	pfc->num_windows = num_resources;
+	pfc->num_windows = pdev->num_resources;
 
-	for (k = 0; k < num_resources; k++, res++) {
+	for (k = 0, res = pdev->resource; k < pdev->num_resources; k++, res++) {
 		WARN_ON(resource_type(res) != IORESOURCE_MEM);
 		pfc->window[k].phys = res->start;
 		pfc->window[k].size = resource_size(res);
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index fa1fec084229..4f942337d77e 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -111,9 +111,6 @@ struct sh_pfc_platform_data {
 	struct pinmux_irq *gpio_irq;
 	unsigned int gpio_irq_size;
 
-	struct resource *resource;
-	unsigned int num_resources;
-
 	unsigned long unlock_reg;
 };
 
-- 
cgit v1.2.3


From 19bb7fe36950ff74ce322cc29f6f4e025999f1f0 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 15 Dec 2012 23:51:20 +0100
Subject: sh-pfc: Support pinmux info in driver data instead of platform data

Pinmux information should be provided by the pinmux driver, not arch
code. Make it possible to do so by supporting pinmux information passed
through the driver_data field in the platform ID table. Platform data
will remain supported until all arch code has been converted.

Rename the sh_pfc_platform_data structure to sh_pfc_soc_info to reflect
this.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/pinctrl/sh-pfc/core.c    | 52 +++++++++++++++++++++-------------------
 drivers/pinctrl/sh-pfc/core.h    |  2 +-
 drivers/pinctrl/sh-pfc/gpio.c    | 18 +++++++-------
 drivers/pinctrl/sh-pfc/pinctrl.c | 28 +++++++++++-----------
 include/linux/sh_pfc.h           |  4 ++--
 5 files changed, 53 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c
index 8e7818bccb29..95cd10f83e18 100644
--- a/drivers/pinctrl/sh-pfc/core.c
+++ b/drivers/pinctrl/sh-pfc/core.c
@@ -213,9 +213,9 @@ static void sh_pfc_write_config_reg(struct sh_pfc *pfc,
 	data &= mask;
 	data |= value;
 
-	if (pfc->pdata->unlock_reg)
+	if (pfc->info->unlock_reg)
 		sh_pfc_write_raw_reg(
-			sh_pfc_phys_to_virt(pfc, pfc->pdata->unlock_reg), 32,
+			sh_pfc_phys_to_virt(pfc, pfc->info->unlock_reg), 32,
 			~data);
 
 	sh_pfc_write_raw_reg(mapped_reg, crp->reg_width, data);
@@ -223,16 +223,16 @@ static void sh_pfc_write_config_reg(struct sh_pfc *pfc,
 
 static int sh_pfc_setup_data_reg(struct sh_pfc *pfc, unsigned gpio)
 {
-	struct pinmux_gpio *gpiop = &pfc->pdata->gpios[gpio];
+	struct pinmux_gpio *gpiop = &pfc->info->gpios[gpio];
 	struct pinmux_data_reg *data_reg;
 	int k, n;
 
-	if (!sh_pfc_enum_in_range(gpiop->enum_id, &pfc->pdata->data))
+	if (!sh_pfc_enum_in_range(gpiop->enum_id, &pfc->info->data))
 		return -1;
 
 	k = 0;
 	while (1) {
-		data_reg = pfc->pdata->data_regs + k;
+		data_reg = pfc->info->data_regs + k;
 
 		if (!data_reg->reg_width)
 			break;
@@ -261,12 +261,12 @@ static void sh_pfc_setup_data_regs(struct sh_pfc *pfc)
 	struct pinmux_data_reg *drp;
 	int k;
 
-	for (k = pfc->pdata->first_gpio; k <= pfc->pdata->last_gpio; k++)
+	for (k = pfc->info->first_gpio; k <= pfc->info->last_gpio; k++)
 		sh_pfc_setup_data_reg(pfc, k);
 
 	k = 0;
 	while (1) {
-		drp = pfc->pdata->data_regs + k;
+		drp = pfc->info->data_regs + k;
 
 		if (!drp->reg_width)
 			break;
@@ -280,15 +280,15 @@ static void sh_pfc_setup_data_regs(struct sh_pfc *pfc)
 int sh_pfc_get_data_reg(struct sh_pfc *pfc, unsigned gpio,
 			struct pinmux_data_reg **drp, int *bitp)
 {
-	struct pinmux_gpio *gpiop = &pfc->pdata->gpios[gpio];
+	struct pinmux_gpio *gpiop = &pfc->info->gpios[gpio];
 	int k, n;
 
-	if (!sh_pfc_enum_in_range(gpiop->enum_id, &pfc->pdata->data))
+	if (!sh_pfc_enum_in_range(gpiop->enum_id, &pfc->info->data))
 		return -1;
 
 	k = (gpiop->flags & PINMUX_FLAG_DREG) >> PINMUX_FLAG_DREG_SHIFT;
 	n = (gpiop->flags & PINMUX_FLAG_DBIT) >> PINMUX_FLAG_DBIT_SHIFT;
-	*drp = pfc->pdata->data_regs + k;
+	*drp = pfc->info->data_regs + k;
 	*bitp = n;
 	return 0;
 }
@@ -303,7 +303,7 @@ static int sh_pfc_get_config_reg(struct sh_pfc *pfc, pinmux_enum_t enum_id,
 
 	k = 0;
 	while (1) {
-		config_reg = pfc->pdata->cfg_regs + k;
+		config_reg = pfc->info->cfg_regs + k;
 
 		r_width = config_reg->reg_width;
 		f_width = config_reg->field_width;
@@ -341,12 +341,12 @@ static int sh_pfc_get_config_reg(struct sh_pfc *pfc, pinmux_enum_t enum_id,
 int sh_pfc_gpio_to_enum(struct sh_pfc *pfc, unsigned gpio, int pos,
 			pinmux_enum_t *enum_idp)
 {
-	pinmux_enum_t enum_id = pfc->pdata->gpios[gpio].enum_id;
-	pinmux_enum_t *data = pfc->pdata->gpio_data;
+	pinmux_enum_t enum_id = pfc->info->gpios[gpio].enum_id;
+	pinmux_enum_t *data = pfc->info->gpio_data;
 	int k;
 
-	if (!sh_pfc_enum_in_range(enum_id, &pfc->pdata->data)) {
-		if (!sh_pfc_enum_in_range(enum_id, &pfc->pdata->mark)) {
+	if (!sh_pfc_enum_in_range(enum_id, &pfc->info->data)) {
+		if (!sh_pfc_enum_in_range(enum_id, &pfc->info->mark)) {
 			pr_err("non data/mark enum_id for gpio %d\n", gpio);
 			return -1;
 		}
@@ -357,7 +357,7 @@ int sh_pfc_gpio_to_enum(struct sh_pfc *pfc, unsigned gpio, int pos,
 		return pos + 1;
 	}
 
-	for (k = 0; k < pfc->pdata->gpio_data_size; k++) {
+	for (k = 0; k < pfc->info->gpio_data_size; k++) {
 		if (data[k] == enum_id) {
 			*enum_idp = data[k + 1];
 			return k + 1;
@@ -384,19 +384,19 @@ int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
 		break;
 
 	case PINMUX_TYPE_OUTPUT:
-		range = &pfc->pdata->output;
+		range = &pfc->info->output;
 		break;
 
 	case PINMUX_TYPE_INPUT:
-		range = &pfc->pdata->input;
+		range = &pfc->info->input;
 		break;
 
 	case PINMUX_TYPE_INPUT_PULLUP:
-		range = &pfc->pdata->input_pu;
+		range = &pfc->info->input_pu;
 		break;
 
 	case PINMUX_TYPE_INPUT_PULLDOWN:
-		range = &pfc->pdata->input_pd;
+		range = &pfc->info->input_pd;
 		break;
 
 	default:
@@ -416,7 +416,7 @@ int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
 			break;
 
 		/* first check if this is a function enum */
-		in_range = sh_pfc_enum_in_range(enum_id, &pfc->pdata->function);
+		in_range = sh_pfc_enum_in_range(enum_id, &pfc->info->function);
 		if (!in_range) {
 			/* not a function enum */
 			if (range) {
@@ -482,7 +482,7 @@ int sh_pfc_config_gpio(struct sh_pfc *pfc, unsigned gpio, int pinmux_type,
 
 static int sh_pfc_probe(struct platform_device *pdev)
 {
-	struct sh_pfc_platform_data *pdata = pdev->dev.platform_data;
+	struct sh_pfc_soc_info *info;
 	struct sh_pfc *pfc;
 	int ret;
 
@@ -491,14 +491,16 @@ static int sh_pfc_probe(struct platform_device *pdev)
 	 */
 	BUILD_BUG_ON(PINMUX_FLAG_TYPE > ((1 << PINMUX_FLAG_DBIT_SHIFT) - 1));
 
-	if (pdata == NULL)
+	info = pdev->id_entry->driver_data
+	      ? (void *)pdev->id_entry->driver_data : pdev->dev.platform_data;
+	if (info == NULL)
 		return -ENODEV;
 
 	pfc = devm_kzalloc(&pdev->dev, sizeof(pfc), GFP_KERNEL);
 	if (pfc == NULL)
 		return -ENOMEM;
 
-	pfc->pdata = pdata;
+	pfc->info = info;
 	pfc->dev = &pdev->dev;
 
 	ret = sh_pfc_ioremap(pfc, pdev);
@@ -534,7 +536,7 @@ static int sh_pfc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, pfc);
 
-	pr_info("%s support registered\n", pdata->name);
+	pr_info("%s support registered\n", info->name);
 
 	return 0;
 }
diff --git a/drivers/pinctrl/sh-pfc/core.h b/drivers/pinctrl/sh-pfc/core.h
index 87ae5fd2a201..2c82e9df6f8a 100644
--- a/drivers/pinctrl/sh-pfc/core.h
+++ b/drivers/pinctrl/sh-pfc/core.h
@@ -25,7 +25,7 @@ struct sh_pfc_pinctrl;
 
 struct sh_pfc {
 	struct device *dev;
-	struct sh_pfc_platform_data *pdata;
+	struct sh_pfc_soc_info *info;
 	spinlock_t lock;
 
 	unsigned int num_windows;
diff --git a/drivers/pinctrl/sh-pfc/gpio.c b/drivers/pinctrl/sh-pfc/gpio.c
index a0454f321710..3cbdfea1dec0 100644
--- a/drivers/pinctrl/sh-pfc/gpio.c
+++ b/drivers/pinctrl/sh-pfc/gpio.c
@@ -106,11 +106,11 @@ static int sh_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
 		if (pos <= 0 || !enum_id)
 			break;
 
-		for (i = 0; i < pfc->pdata->gpio_irq_size; i++) {
-			enum_ids = pfc->pdata->gpio_irq[i].enum_ids;
+		for (i = 0; i < pfc->info->gpio_irq_size; i++) {
+			enum_ids = pfc->info->gpio_irq[i].enum_ids;
 			for (k = 0; enum_ids[k]; k++) {
 				if (enum_ids[k] == enum_id)
-					return pfc->pdata->gpio_irq[i].irq;
+					return pfc->info->gpio_irq[i].irq;
 			}
 		}
 	}
@@ -131,12 +131,12 @@ static void sh_pfc_gpio_setup(struct sh_pfc_chip *chip)
 	gc->set = sh_gpio_set;
 	gc->to_irq = sh_gpio_to_irq;
 
-	WARN_ON(pfc->pdata->first_gpio != 0); /* needs testing */
+	WARN_ON(pfc->info->first_gpio != 0); /* needs testing */
 
-	gc->label = pfc->pdata->name;
+	gc->label = pfc->info->name;
 	gc->owner = THIS_MODULE;
-	gc->base = pfc->pdata->first_gpio;
-	gc->ngpio = (pfc->pdata->last_gpio - pfc->pdata->first_gpio) + 1;
+	gc->base = pfc->info->first_gpio;
+	gc->ngpio = (pfc->info->last_gpio - pfc->info->first_gpio) + 1;
 }
 
 int sh_pfc_register_gpiochip(struct sh_pfc *pfc)
@@ -159,8 +159,8 @@ int sh_pfc_register_gpiochip(struct sh_pfc *pfc)
 	pfc->gpio = chip;
 
 	pr_info("%s handling gpio %d -> %d\n",
-		pfc->pdata->name, pfc->pdata->first_gpio,
-		pfc->pdata->last_gpio);
+		pfc->info->name, pfc->info->first_gpio,
+		pfc->info->last_gpio);
 
 	return 0;
 }
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index 221bde03913a..fdfe7bea1502 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -140,7 +140,7 @@ static int sh_pfc_reconfig_pin(struct sh_pfc *pfc, unsigned offset,
 
 	spin_lock_irqsave(&pfc->lock, flags);
 
-	pinmux_type = pfc->pdata->gpios[offset].flags & PINMUX_FLAG_TYPE;
+	pinmux_type = pfc->info->gpios[offset].flags & PINMUX_FLAG_TYPE;
 
 	/*
 	 * See if the present config needs to first be de-configured.
@@ -172,8 +172,8 @@ static int sh_pfc_reconfig_pin(struct sh_pfc *pfc, unsigned offset,
 			       GPIO_CFG_REQ) != 0)
 		goto err;
 
-	pfc->pdata->gpios[offset].flags &= ~PINMUX_FLAG_TYPE;
-	pfc->pdata->gpios[offset].flags |= new_type;
+	pfc->info->gpios[offset].flags &= ~PINMUX_FLAG_TYPE;
+	pfc->info->gpios[offset].flags |= new_type;
 
 	ret = 0;
 
@@ -195,7 +195,7 @@ static int sh_pfc_gpio_request_enable(struct pinctrl_dev *pctldev,
 
 	spin_lock_irqsave(&pfc->lock, flags);
 
-	pinmux_type = pfc->pdata->gpios[offset].flags & PINMUX_FLAG_TYPE;
+	pinmux_type = pfc->info->gpios[offset].flags & PINMUX_FLAG_TYPE;
 
 	switch (pinmux_type) {
 	case PINMUX_TYPE_FUNCTION:
@@ -236,7 +236,7 @@ static void sh_pfc_gpio_disable_free(struct pinctrl_dev *pctldev,
 
 	spin_lock_irqsave(&pfc->lock, flags);
 
-	pinmux_type = pfc->pdata->gpios[offset].flags & PINMUX_FLAG_TYPE;
+	pinmux_type = pfc->info->gpios[offset].flags & PINMUX_FLAG_TYPE;
 
 	sh_pfc_config_gpio(pfc, offset, pinmux_type, GPIO_CFG_FREE);
 
@@ -270,7 +270,7 @@ static int sh_pfc_pinconf_get(struct pinctrl_dev *pctldev, unsigned pin,
 	struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev);
 	struct sh_pfc *pfc = pmx->pfc;
 
-	*config = pfc->pdata->gpios[pin].flags & PINMUX_FLAG_TYPE;
+	*config = pfc->info->gpios[pin].flags & PINMUX_FLAG_TYPE;
 
 	return 0;
 }
@@ -354,7 +354,7 @@ static int sh_pfc_map_gpios(struct sh_pfc *pfc, struct sh_pfc_pinctrl *pmx)
 	unsigned long flags;
 	int i;
 
-	pmx->nr_pads = pfc->pdata->last_gpio - pfc->pdata->first_gpio + 1;
+	pmx->nr_pads = pfc->info->last_gpio - pfc->info->first_gpio + 1;
 
 	pmx->pads = devm_kzalloc(pfc->dev, sizeof(*pmx->pads) * pmx->nr_pads,
 				 GFP_KERNEL);
@@ -373,9 +373,9 @@ static int sh_pfc_map_gpios(struct sh_pfc *pfc, struct sh_pfc_pinctrl *pmx)
 	 */
 	for (i = 0; i < pmx->nr_pads; i++) {
 		struct pinctrl_pin_desc *pin = pmx->pads + i;
-		struct pinmux_gpio *gpio = pfc->pdata->gpios + i;
+		struct pinmux_gpio *gpio = pfc->info->gpios + i;
 
-		pin->number = pfc->pdata->first_gpio + i;
+		pin->number = pfc->info->first_gpio + i;
 		pin->name = gpio->name;
 
 		/* XXX */
@@ -406,7 +406,7 @@ static int sh_pfc_map_functions(struct sh_pfc *pfc, struct sh_pfc_pinctrl *pmx)
 	spin_lock_irqsave(&pmx->lock, flags);
 
 	for (i = fn = 0; i < pmx->nr_pads; i++) {
-		struct pinmux_gpio *gpio = pfc->pdata->gpios + i;
+		struct pinmux_gpio *gpio = pfc->info->gpios + i;
 
 		if ((gpio->flags & PINMUX_FLAG_TYPE) == PINMUX_TYPE_FUNCTION)
 			pmx->functions[fn++] = gpio;
@@ -443,10 +443,10 @@ int sh_pfc_register_pinctrl(struct sh_pfc *pfc)
 	if (IS_ERR(pmx->pctl))
 		return PTR_ERR(pmx->pctl);
 
-	sh_pfc_gpio_range.npins = pfc->pdata->last_gpio
-				- pfc->pdata->first_gpio + 1;
-	sh_pfc_gpio_range.base = pfc->pdata->first_gpio;
-	sh_pfc_gpio_range.pin_base = pfc->pdata->first_gpio;
+	sh_pfc_gpio_range.npins = pfc->info->last_gpio
+				- pfc->info->first_gpio + 1;
+	sh_pfc_gpio_range.base = pfc->info->first_gpio;
+	sh_pfc_gpio_range.pin_base = pfc->info->first_gpio;
 
 	pinctrl_add_gpio_range(pmx->pctl, &sh_pfc_gpio_range);
 
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 4f942337d77e..9fd62282c625 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -88,7 +88,7 @@ struct pinmux_range {
 	pinmux_enum_t force;
 };
 
-struct sh_pfc_platform_data {
+struct sh_pfc_soc_info {
 	char *name;
 	pinmux_enum_t reserved_id;
 	struct pinmux_range data;
@@ -115,7 +115,7 @@ struct sh_pfc_platform_data {
 };
 
 /* XXX compat for now */
-#define pinmux_info sh_pfc_platform_data
+#define pinmux_info sh_pfc_soc_info
 
 enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
 
-- 
cgit v1.2.3


From b93911e3d59bcd665a810fdf8ec7040a74eb4ff4 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 15 Dec 2012 23:51:54 +0100
Subject: sh-pfc: Remove pinmux_info definition

The pinmux_info alias to sh_pfc_soc_info isn't needed anymore, remove
it.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 include/linux/sh_pfc.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 9fd62282c625..13049c4c8d30 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -114,9 +114,6 @@ struct sh_pfc_soc_info {
 	unsigned long unlock_reg;
 };
 
-/* XXX compat for now */
-#define pinmux_info sh_pfc_soc_info
-
 enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
 
 /* helper macro for port */
-- 
cgit v1.2.3


From c3323806a67c0c656e27956b7340e37ba6c6968b Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 15 Dec 2012 23:51:55 +0100
Subject: sh-pfc: Move sh_pfc.h from include/linux/ to driver directory

The header file isn't used by arch code anymore. Make it private to the
driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/pinctrl/sh-pfc/core.c        |   1 -
 drivers/pinctrl/sh-pfc/core.h        |   3 +-
 drivers/pinctrl/sh-pfc/gpio.c        |   1 -
 drivers/pinctrl/sh-pfc/pfc-r8a7740.c |   3 +-
 drivers/pinctrl/sh-pfc/pfc-r8a7779.c |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7203.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7264.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7269.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7372.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh73a0.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7720.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7722.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7723.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7724.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7734.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7757.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7785.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-sh7786.c  |   3 +-
 drivers/pinctrl/sh-pfc/pfc-shx3.c    |   3 +-
 drivers/pinctrl/sh-pfc/pinctrl.c     |   1 -
 drivers/pinctrl/sh-pfc/sh_pfc.h      | 195 +++++++++++++++++++++++++++++++++++
 include/linux/sh_pfc.h               | 195 -----------------------------------
 22 files changed, 229 insertions(+), 215 deletions(-)
 create mode 100644 drivers/pinctrl/sh-pfc/sh_pfc.h
 delete mode 100644 include/linux/sh_pfc.h

(limited to 'include/linux')

diff --git a/drivers/pinctrl/sh-pfc/core.c b/drivers/pinctrl/sh-pfc/core.c
index 33181f4b0731..d323c24fffaf 100644
--- a/drivers/pinctrl/sh-pfc/core.c
+++ b/drivers/pinctrl/sh-pfc/core.c
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/pinctrl/machine.h>
 #include <linux/platform_device.h>
-#include <linux/sh_pfc.h>
 #include <linux/slab.h>
 
 #include "core.h"
diff --git a/drivers/pinctrl/sh-pfc/core.h b/drivers/pinctrl/sh-pfc/core.h
index 804082678a5c..ba7c33c33599 100644
--- a/drivers/pinctrl/sh-pfc/core.h
+++ b/drivers/pinctrl/sh-pfc/core.h
@@ -11,9 +11,10 @@
 #define __SH_PFC_CORE_H__
 
 #include <linux/compiler.h>
-#include <linux/sh_pfc.h>
 #include <linux/types.h>
 
+#include "sh_pfc.h"
+
 struct sh_pfc_window {
 	phys_addr_t phys;
 	void __iomem *virt;
diff --git a/drivers/pinctrl/sh-pfc/gpio.c b/drivers/pinctrl/sh-pfc/gpio.c
index 3cbdfea1dec0..a535075c8b69 100644
--- a/drivers/pinctrl/sh-pfc/gpio.c
+++ b/drivers/pinctrl/sh-pfc/gpio.c
@@ -16,7 +16,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pinctrl/consumer.h>
-#include <linux/sh_pfc.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c
index 0ab4cb6dc09b..214788c4a606 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c
@@ -19,10 +19,11 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <mach/r8a7740.h>
 #include <mach/irqs.h>
 
+#include "sh_pfc.h"
+
 #define CPU_ALL_PORT(fn, pfx, sfx)					\
 	PORT_10(fn, pfx, sfx),		PORT_90(fn, pfx, sfx),		\
 	PORT_10(fn, pfx##10, sfx),	PORT_90(fn, pfx##1, sfx),	\
diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c
index 81471722682f..13feaa0c0eb7 100644
--- a/drivers/pinctrl/sh-pfc/pfc-r8a7779.c
+++ b/drivers/pinctrl/sh-pfc/pfc-r8a7779.c
@@ -19,9 +19,10 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <mach/r8a7779.h>
 
+#include "sh_pfc.h"
+
 #define CPU_32_PORT(fn, pfx, sfx)				\
 	PORT_10(fn, pfx, sfx), PORT_10(fn, pfx##1, sfx),	\
 	PORT_10(fn, pfx##2, sfx), PORT_1(fn, pfx##30, sfx),	\
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7203.c b/drivers/pinctrl/sh-pfc/pfc-sh7203.c
index 62bd17329d29..01b425dfd162 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7203.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7203.c
@@ -10,9 +10,10 @@
 
 #include <linux/kernel.h>
 #include <linux/gpio.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7203.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c
index 03d3f9c51373..2ba5639dcf34 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c
@@ -10,9 +10,10 @@
 
 #include <linux/kernel.h>
 #include <linux/gpio.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7264.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c
index dab04d45644f..b1b5d6d4ad76 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c
@@ -11,9 +11,10 @@
 
 #include <linux/kernel.h>
 #include <linux/gpio.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7269.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7372.c b/drivers/pinctrl/sh-pfc/pfc-sh7372.c
index a52fabe90f38..d44e7f02069b 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7372.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7372.c
@@ -21,10 +21,11 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <mach/irqs.h>
 #include <mach/sh7372.h>
 
+#include "sh_pfc.h"
+
 #define CPU_ALL_PORT(fn, pfx, sfx) \
 	PORT_10(fn, pfx, sfx),		PORT_90(fn, pfx, sfx), \
 	PORT_10(fn, pfx##10, sfx),	PORT_10(fn, pfx##11, sfx), \
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
index 8a0eee95e025..709008e94124 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
@@ -19,10 +19,11 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <mach/sh73a0.h>
 #include <mach/irqs.h>
 
+#include "sh_pfc.h"
+
 #define CPU_ALL_PORT(fn, pfx, sfx)				\
 	PORT_10(fn, pfx,    sfx), PORT_10(fn, pfx##1, sfx),	\
 	PORT_10(fn, pfx##2, sfx), PORT_10(fn, pfx##3, sfx),	\
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7720.c b/drivers/pinctrl/sh-pfc/pfc-sh7720.c
index 20d05776d124..10872ed688a6 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7720.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7720.c
@@ -10,9 +10,10 @@
 
 #include <linux/kernel.h>
 #include <linux/gpio.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7720.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7722.c b/drivers/pinctrl/sh-pfc/pfc-sh7722.c
index f58f00948454..2de0929315e6 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7722.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7722.c
@@ -1,9 +1,10 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/gpio.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7722.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7723.c b/drivers/pinctrl/sh-pfc/pfc-sh7723.c
index fa3d36bba192..609673d3d70e 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7723.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7723.c
@@ -10,9 +10,10 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7723.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7724.c b/drivers/pinctrl/sh-pfc/pfc-sh7724.c
index 7ad72c20209d..233fbf750b39 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7724.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7724.c
@@ -15,9 +15,10 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7724.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
index cbb5f4c5f4b0..23d76d262c32 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
@@ -10,9 +10,10 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7734.h>
 
+#include "sh_pfc.h"
+
 #define CPU_32_PORT(fn, pfx, sfx)				\
 	PORT_10(fn, pfx, sfx), PORT_10(fn, pfx##1, sfx),	\
 	PORT_10(fn, pfx##2, sfx), PORT_1(fn, pfx##30, sfx),	\
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7757.c b/drivers/pinctrl/sh-pfc/pfc-sh7757.c
index 6f707a0dffee..5ed74cd0ba99 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7757.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7757.c
@@ -15,9 +15,10 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7757.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7785.c b/drivers/pinctrl/sh-pfc/pfc-sh7785.c
index 23f2c316412c..3b1825d925bb 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7785.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7785.c
@@ -10,9 +10,10 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7785.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7786.c b/drivers/pinctrl/sh-pfc/pfc-sh7786.c
index e1453cf7ed22..1e18b58f9e5f 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7786.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7786.c
@@ -15,9 +15,10 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <cpu/sh7786.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-shx3.c b/drivers/pinctrl/sh-pfc/pfc-shx3.c
index bd54042a1066..ccf6918b03c6 100644
--- a/drivers/pinctrl/sh-pfc/pfc-shx3.c
+++ b/drivers/pinctrl/sh-pfc/pfc-shx3.c
@@ -9,9 +9,10 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sh_pfc.h>
 #include <cpu/shx3.h>
 
+#include "sh_pfc.h"
+
 enum {
 	PINMUX_RESERVED = 0,
 
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index fdfe7bea1502..11e0e1374d65 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -20,7 +20,6 @@
 #include <linux/pinctrl/pinconf-generic.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinmux.h>
-#include <linux/sh_pfc.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
diff --git a/drivers/pinctrl/sh-pfc/sh_pfc.h b/drivers/pinctrl/sh-pfc/sh_pfc.h
new file mode 100644
index 000000000000..13049c4c8d30
--- /dev/null
+++ b/drivers/pinctrl/sh-pfc/sh_pfc.h
@@ -0,0 +1,195 @@
+/*
+ * SuperH Pin Function Controller Support
+ *
+ * Copyright (c) 2008 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __SH_PFC_H
+#define __SH_PFC_H
+
+#include <linux/stringify.h>
+#include <asm-generic/gpio.h>
+
+typedef unsigned short pinmux_enum_t;
+typedef unsigned short pinmux_flag_t;
+
+enum {
+	PINMUX_TYPE_NONE,
+
+	PINMUX_TYPE_FUNCTION,
+	PINMUX_TYPE_GPIO,
+	PINMUX_TYPE_OUTPUT,
+	PINMUX_TYPE_INPUT,
+	PINMUX_TYPE_INPUT_PULLUP,
+	PINMUX_TYPE_INPUT_PULLDOWN,
+
+	PINMUX_FLAG_TYPE,	/* must be last */
+};
+
+#define PINMUX_FLAG_DBIT_SHIFT      5
+#define PINMUX_FLAG_DBIT            (0x1f << PINMUX_FLAG_DBIT_SHIFT)
+#define PINMUX_FLAG_DREG_SHIFT      10
+#define PINMUX_FLAG_DREG            (0x3f << PINMUX_FLAG_DREG_SHIFT)
+
+struct pinmux_gpio {
+	pinmux_enum_t enum_id;
+	pinmux_flag_t flags;
+	const char *name;
+};
+
+#define PINMUX_GPIO(gpio, data_or_mark) \
+	[gpio] = { .name = __stringify(gpio), .enum_id = data_or_mark, .flags = PINMUX_TYPE_NONE }
+
+#define PINMUX_DATA(data_or_mark, ids...) data_or_mark, ids, 0
+
+struct pinmux_cfg_reg {
+	unsigned long reg, reg_width, field_width;
+	unsigned long *cnt;
+	pinmux_enum_t *enum_ids;
+	unsigned long *var_field_width;
+};
+
+#define PINMUX_CFG_REG(name, r, r_width, f_width) \
+	.reg = r, .reg_width = r_width, .field_width = f_width,		\
+	.cnt = (unsigned long [r_width / f_width]) {}, \
+	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)])
+
+#define PINMUX_CFG_REG_VAR(name, r, r_width, var_fw0, var_fwn...) \
+	.reg = r, .reg_width = r_width,	\
+	.cnt = (unsigned long [r_width]) {}, \
+	.var_field_width = (unsigned long [r_width]) { var_fw0, var_fwn, 0 }, \
+	.enum_ids = (pinmux_enum_t [])
+
+struct pinmux_data_reg {
+	unsigned long reg, reg_width, reg_shadow;
+	pinmux_enum_t *enum_ids;
+	void __iomem *mapped_reg;
+};
+
+#define PINMUX_DATA_REG(name, r, r_width) \
+	.reg = r, .reg_width = r_width,	\
+	.enum_ids = (pinmux_enum_t [r_width]) \
+
+struct pinmux_irq {
+	int irq;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_IRQ(irq_nr, ids...)			   \
+	{ .irq = irq_nr, .enum_ids = (pinmux_enum_t []) { ids, 0 } }	\
+
+struct pinmux_range {
+	pinmux_enum_t begin;
+	pinmux_enum_t end;
+	pinmux_enum_t force;
+};
+
+struct sh_pfc_soc_info {
+	char *name;
+	pinmux_enum_t reserved_id;
+	struct pinmux_range data;
+	struct pinmux_range input;
+	struct pinmux_range input_pd;
+	struct pinmux_range input_pu;
+	struct pinmux_range output;
+	struct pinmux_range mark;
+	struct pinmux_range function;
+
+	unsigned first_gpio, last_gpio;
+
+	struct pinmux_gpio *gpios;
+	struct pinmux_cfg_reg *cfg_regs;
+	struct pinmux_data_reg *data_regs;
+
+	pinmux_enum_t *gpio_data;
+	unsigned int gpio_data_size;
+
+	struct pinmux_irq *gpio_irq;
+	unsigned int gpio_irq_size;
+
+	unsigned long unlock_reg;
+};
+
+enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
+
+/* helper macro for port */
+#define PORT_1(fn, pfx, sfx) fn(pfx, sfx)
+
+#define PORT_10(fn, pfx, sfx) \
+	PORT_1(fn, pfx##0, sfx), PORT_1(fn, pfx##1, sfx),	\
+	PORT_1(fn, pfx##2, sfx), PORT_1(fn, pfx##3, sfx),	\
+	PORT_1(fn, pfx##4, sfx), PORT_1(fn, pfx##5, sfx),	\
+	PORT_1(fn, pfx##6, sfx), PORT_1(fn, pfx##7, sfx),	\
+	PORT_1(fn, pfx##8, sfx), PORT_1(fn, pfx##9, sfx)
+
+#define PORT_90(fn, pfx, sfx) \
+	PORT_10(fn, pfx##1, sfx), PORT_10(fn, pfx##2, sfx),	\
+	PORT_10(fn, pfx##3, sfx), PORT_10(fn, pfx##4, sfx),	\
+	PORT_10(fn, pfx##5, sfx), PORT_10(fn, pfx##6, sfx),	\
+	PORT_10(fn, pfx##7, sfx), PORT_10(fn, pfx##8, sfx),	\
+	PORT_10(fn, pfx##9, sfx)
+
+#define _PORT_ALL(pfx, sfx) pfx##_##sfx
+#define _GPIO_PORT(pfx, sfx) PINMUX_GPIO(GPIO_PORT##pfx, PORT##pfx##_DATA)
+#define PORT_ALL(str)	CPU_ALL_PORT(_PORT_ALL, PORT, str)
+#define GPIO_PORT_ALL()	CPU_ALL_PORT(_GPIO_PORT, , unused)
+#define GPIO_FN(str) PINMUX_GPIO(GPIO_FN_##str, str##_MARK)
+
+/* helper macro for pinmux_enum_t */
+#define PORT_DATA_I(nr)	\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_IN)
+
+#define PORT_DATA_I_PD(nr)	\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0,	\
+		    PORT##nr##_IN, PORT##nr##_IN_PD)
+
+#define PORT_DATA_I_PU(nr)	\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0,	\
+		    PORT##nr##_IN, PORT##nr##_IN_PU)
+
+#define PORT_DATA_I_PU_PD(nr)	\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0,			\
+		    PORT##nr##_IN, PORT##nr##_IN_PD, PORT##nr##_IN_PU)
+
+#define PORT_DATA_O(nr)		\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT)
+
+#define PORT_DATA_IO(nr)	\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT,	\
+		    PORT##nr##_IN)
+
+#define PORT_DATA_IO_PD(nr)	\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT,	\
+		    PORT##nr##_IN, PORT##nr##_IN_PD)
+
+#define PORT_DATA_IO_PU(nr)	\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT,	\
+		    PORT##nr##_IN, PORT##nr##_IN_PU)
+
+#define PORT_DATA_IO_PU_PD(nr)	\
+	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT,	\
+		    PORT##nr##_IN, PORT##nr##_IN_PD, PORT##nr##_IN_PU)
+
+/* helper macro for top 4 bits in PORTnCR */
+#define _PCRH(in, in_pd, in_pu, out)	\
+	0, (out), (in), 0,		\
+	0, 0, 0, 0,			\
+	0, 0, (in_pd), 0,		\
+	0, 0, (in_pu), 0
+
+#define PORTCR(nr, reg)							\
+	{								\
+		PINMUX_CFG_REG("PORT" nr "CR", reg, 8, 4) {		\
+			_PCRH(PORT##nr##_IN, PORT##nr##_IN_PD,		\
+			      PORT##nr##_IN_PU, PORT##nr##_OUT),	\
+				PORT##nr##_FN0, PORT##nr##_FN1,		\
+				PORT##nr##_FN2, PORT##nr##_FN3,		\
+				PORT##nr##_FN4, PORT##nr##_FN5,		\
+				PORT##nr##_FN6, PORT##nr##_FN7 }	\
+	}
+
+#endif /* __SH_PFC_H */
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
deleted file mode 100644
index 13049c4c8d30..000000000000
--- a/include/linux/sh_pfc.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * SuperH Pin Function Controller Support
- *
- * Copyright (c) 2008 Magnus Damm
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef __SH_PFC_H
-#define __SH_PFC_H
-
-#include <linux/stringify.h>
-#include <asm-generic/gpio.h>
-
-typedef unsigned short pinmux_enum_t;
-typedef unsigned short pinmux_flag_t;
-
-enum {
-	PINMUX_TYPE_NONE,
-
-	PINMUX_TYPE_FUNCTION,
-	PINMUX_TYPE_GPIO,
-	PINMUX_TYPE_OUTPUT,
-	PINMUX_TYPE_INPUT,
-	PINMUX_TYPE_INPUT_PULLUP,
-	PINMUX_TYPE_INPUT_PULLDOWN,
-
-	PINMUX_FLAG_TYPE,	/* must be last */
-};
-
-#define PINMUX_FLAG_DBIT_SHIFT      5
-#define PINMUX_FLAG_DBIT            (0x1f << PINMUX_FLAG_DBIT_SHIFT)
-#define PINMUX_FLAG_DREG_SHIFT      10
-#define PINMUX_FLAG_DREG            (0x3f << PINMUX_FLAG_DREG_SHIFT)
-
-struct pinmux_gpio {
-	pinmux_enum_t enum_id;
-	pinmux_flag_t flags;
-	const char *name;
-};
-
-#define PINMUX_GPIO(gpio, data_or_mark) \
-	[gpio] = { .name = __stringify(gpio), .enum_id = data_or_mark, .flags = PINMUX_TYPE_NONE }
-
-#define PINMUX_DATA(data_or_mark, ids...) data_or_mark, ids, 0
-
-struct pinmux_cfg_reg {
-	unsigned long reg, reg_width, field_width;
-	unsigned long *cnt;
-	pinmux_enum_t *enum_ids;
-	unsigned long *var_field_width;
-};
-
-#define PINMUX_CFG_REG(name, r, r_width, f_width) \
-	.reg = r, .reg_width = r_width, .field_width = f_width,		\
-	.cnt = (unsigned long [r_width / f_width]) {}, \
-	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)])
-
-#define PINMUX_CFG_REG_VAR(name, r, r_width, var_fw0, var_fwn...) \
-	.reg = r, .reg_width = r_width,	\
-	.cnt = (unsigned long [r_width]) {}, \
-	.var_field_width = (unsigned long [r_width]) { var_fw0, var_fwn, 0 }, \
-	.enum_ids = (pinmux_enum_t [])
-
-struct pinmux_data_reg {
-	unsigned long reg, reg_width, reg_shadow;
-	pinmux_enum_t *enum_ids;
-	void __iomem *mapped_reg;
-};
-
-#define PINMUX_DATA_REG(name, r, r_width) \
-	.reg = r, .reg_width = r_width,	\
-	.enum_ids = (pinmux_enum_t [r_width]) \
-
-struct pinmux_irq {
-	int irq;
-	pinmux_enum_t *enum_ids;
-};
-
-#define PINMUX_IRQ(irq_nr, ids...)			   \
-	{ .irq = irq_nr, .enum_ids = (pinmux_enum_t []) { ids, 0 } }	\
-
-struct pinmux_range {
-	pinmux_enum_t begin;
-	pinmux_enum_t end;
-	pinmux_enum_t force;
-};
-
-struct sh_pfc_soc_info {
-	char *name;
-	pinmux_enum_t reserved_id;
-	struct pinmux_range data;
-	struct pinmux_range input;
-	struct pinmux_range input_pd;
-	struct pinmux_range input_pu;
-	struct pinmux_range output;
-	struct pinmux_range mark;
-	struct pinmux_range function;
-
-	unsigned first_gpio, last_gpio;
-
-	struct pinmux_gpio *gpios;
-	struct pinmux_cfg_reg *cfg_regs;
-	struct pinmux_data_reg *data_regs;
-
-	pinmux_enum_t *gpio_data;
-	unsigned int gpio_data_size;
-
-	struct pinmux_irq *gpio_irq;
-	unsigned int gpio_irq_size;
-
-	unsigned long unlock_reg;
-};
-
-enum { GPIO_CFG_DRYRUN, GPIO_CFG_REQ, GPIO_CFG_FREE };
-
-/* helper macro for port */
-#define PORT_1(fn, pfx, sfx) fn(pfx, sfx)
-
-#define PORT_10(fn, pfx, sfx) \
-	PORT_1(fn, pfx##0, sfx), PORT_1(fn, pfx##1, sfx),	\
-	PORT_1(fn, pfx##2, sfx), PORT_1(fn, pfx##3, sfx),	\
-	PORT_1(fn, pfx##4, sfx), PORT_1(fn, pfx##5, sfx),	\
-	PORT_1(fn, pfx##6, sfx), PORT_1(fn, pfx##7, sfx),	\
-	PORT_1(fn, pfx##8, sfx), PORT_1(fn, pfx##9, sfx)
-
-#define PORT_90(fn, pfx, sfx) \
-	PORT_10(fn, pfx##1, sfx), PORT_10(fn, pfx##2, sfx),	\
-	PORT_10(fn, pfx##3, sfx), PORT_10(fn, pfx##4, sfx),	\
-	PORT_10(fn, pfx##5, sfx), PORT_10(fn, pfx##6, sfx),	\
-	PORT_10(fn, pfx##7, sfx), PORT_10(fn, pfx##8, sfx),	\
-	PORT_10(fn, pfx##9, sfx)
-
-#define _PORT_ALL(pfx, sfx) pfx##_##sfx
-#define _GPIO_PORT(pfx, sfx) PINMUX_GPIO(GPIO_PORT##pfx, PORT##pfx##_DATA)
-#define PORT_ALL(str)	CPU_ALL_PORT(_PORT_ALL, PORT, str)
-#define GPIO_PORT_ALL()	CPU_ALL_PORT(_GPIO_PORT, , unused)
-#define GPIO_FN(str) PINMUX_GPIO(GPIO_FN_##str, str##_MARK)
-
-/* helper macro for pinmux_enum_t */
-#define PORT_DATA_I(nr)	\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_IN)
-
-#define PORT_DATA_I_PD(nr)	\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0,	\
-		    PORT##nr##_IN, PORT##nr##_IN_PD)
-
-#define PORT_DATA_I_PU(nr)	\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0,	\
-		    PORT##nr##_IN, PORT##nr##_IN_PU)
-
-#define PORT_DATA_I_PU_PD(nr)	\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0,			\
-		    PORT##nr##_IN, PORT##nr##_IN_PD, PORT##nr##_IN_PU)
-
-#define PORT_DATA_O(nr)		\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT)
-
-#define PORT_DATA_IO(nr)	\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT,	\
-		    PORT##nr##_IN)
-
-#define PORT_DATA_IO_PD(nr)	\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT,	\
-		    PORT##nr##_IN, PORT##nr##_IN_PD)
-
-#define PORT_DATA_IO_PU(nr)	\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT,	\
-		    PORT##nr##_IN, PORT##nr##_IN_PU)
-
-#define PORT_DATA_IO_PU_PD(nr)	\
-	PINMUX_DATA(PORT##nr##_DATA, PORT##nr##_FN0, PORT##nr##_OUT,	\
-		    PORT##nr##_IN, PORT##nr##_IN_PD, PORT##nr##_IN_PU)
-
-/* helper macro for top 4 bits in PORTnCR */
-#define _PCRH(in, in_pd, in_pu, out)	\
-	0, (out), (in), 0,		\
-	0, 0, 0, 0,			\
-	0, 0, (in_pd), 0,		\
-	0, 0, (in_pu), 0
-
-#define PORTCR(nr, reg)							\
-	{								\
-		PINMUX_CFG_REG("PORT" nr "CR", reg, 8, 4) {		\
-			_PCRH(PORT##nr##_IN, PORT##nr##_IN_PD,		\
-			      PORT##nr##_IN_PU, PORT##nr##_OUT),	\
-				PORT##nr##_FN0, PORT##nr##_FN1,		\
-				PORT##nr##_FN2, PORT##nr##_FN3,		\
-				PORT##nr##_FN4, PORT##nr##_FN5,		\
-				PORT##nr##_FN6, PORT##nr##_FN7 }	\
-	}
-
-#endif /* __SH_PFC_H */
-- 
cgit v1.2.3


From c3ad83d9efdfe6a86efd44945a781f00c879b7b4 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 24 Jan 2013 23:24:56 -0500
Subject: quota: autoload the quota_v2 module for QFMT_VFS_V1 quota format

Otherwise, ext4 file systems with the quota featured enable will get a
very confusing "No such process" error message if the quota code is
built as a module and the quota_v2 module has not been loaded.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: stable@vger.kernel.org
---
 include/linux/quota.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/quota.h b/include/linux/quota.h
index 58fdef125252..d13371134c59 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -405,6 +405,7 @@ struct quota_module_name {
 #define INIT_QUOTA_MODULE_NAMES {\
 	{QFMT_VFS_OLD, "quota_v1"},\
 	{QFMT_VFS_V0, "quota_v2"},\
+	{QFMT_VFS_V1, "quota_v2"},\
 	{0, NULL}}
 
 #endif /* _QUOTA_ */
-- 
cgit v1.2.3


From 7e41bba94617b7e4f77d3531a63fbfacdf6842a6 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 25 Jan 2013 08:30:49 +0530
Subject: usb: dwc3: omap: Add an API to write to dwc mailbox

Add an API in the omap glue layer to write to the mailbox register which
can be used by comparator driver(twl). To pass the detection of the attached
device (signified by VBUS, ID) to the dwc3 core, dwc3 core has to write
to the mailbox regiter.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/dwc3/dwc3-omap.c  | 60 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/dwc3-omap.h | 30 ++++++++++++++++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 include/linux/usb/dwc3-omap.h

(limited to 'include/linux')

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index f85ae5e6129d..831b75fa4386 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -43,6 +43,7 @@
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/dwc3-omap.h>
+#include <linux/usb/dwc3-omap.h>
 #include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
 #include <linux/ioport.h>
@@ -131,6 +132,8 @@ struct dwc3_omap {
 	u32			dma_status:1;
 };
 
+struct dwc3_omap		*_omap;
+
 static inline u32 dwc3_omap_readl(void __iomem *base, u32 offset)
 {
 	return readl(base + offset);
@@ -141,6 +144,57 @@ static inline void dwc3_omap_writel(void __iomem *base, u32 offset, u32 value)
 	writel(value, base + offset);
 }
 
+void dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status)
+{
+	u32			val;
+	struct dwc3_omap	*omap = _omap;
+
+	switch (status) {
+	case OMAP_DWC3_ID_GROUND:
+		dev_dbg(omap->dev, "ID GND\n");
+
+		val = dwc3_omap_readl(omap->base, USBOTGSS_UTMI_OTG_STATUS);
+		val &= ~(USBOTGSS_UTMI_OTG_STATUS_IDDIG
+				| USBOTGSS_UTMI_OTG_STATUS_VBUSVALID
+				| USBOTGSS_UTMI_OTG_STATUS_SESSEND);
+		val |= USBOTGSS_UTMI_OTG_STATUS_SESSVALID
+				| USBOTGSS_UTMI_OTG_STATUS_POWERPRESENT;
+		dwc3_omap_writel(omap->base, USBOTGSS_UTMI_OTG_STATUS, val);
+		break;
+
+	case OMAP_DWC3_VBUS_VALID:
+		dev_dbg(omap->dev, "VBUS Connect\n");
+
+		val = dwc3_omap_readl(omap->base, USBOTGSS_UTMI_OTG_STATUS);
+		val &= ~USBOTGSS_UTMI_OTG_STATUS_SESSEND;
+		val |= USBOTGSS_UTMI_OTG_STATUS_IDDIG
+				| USBOTGSS_UTMI_OTG_STATUS_VBUSVALID
+				| USBOTGSS_UTMI_OTG_STATUS_SESSVALID
+				| USBOTGSS_UTMI_OTG_STATUS_POWERPRESENT;
+		dwc3_omap_writel(omap->base, USBOTGSS_UTMI_OTG_STATUS, val);
+		break;
+
+	case OMAP_DWC3_ID_FLOAT:
+	case OMAP_DWC3_VBUS_OFF:
+		dev_dbg(omap->dev, "VBUS Disconnect\n");
+
+		val = dwc3_omap_readl(omap->base, USBOTGSS_UTMI_OTG_STATUS);
+		val &= ~(USBOTGSS_UTMI_OTG_STATUS_SESSVALID
+				| USBOTGSS_UTMI_OTG_STATUS_VBUSVALID
+				| USBOTGSS_UTMI_OTG_STATUS_POWERPRESENT);
+		val |= USBOTGSS_UTMI_OTG_STATUS_SESSEND
+				| USBOTGSS_UTMI_OTG_STATUS_IDDIG;
+		dwc3_omap_writel(omap->base, USBOTGSS_UTMI_OTG_STATUS, val);
+		break;
+
+	default:
+		dev_dbg(omap->dev, "ID float\n");
+	}
+
+	return;
+}
+EXPORT_SYMBOL_GPL(dwc3_omap_mailbox);
+
 static int dwc3_omap_register_phys(struct dwc3_omap *omap)
 {
 	struct nop_usb_xceiv_platform_data pdata;
@@ -320,6 +374,12 @@ static int dwc3_omap_probe(struct platform_device *pdev)
 	omap->irq	= irq;
 	omap->base	= base;
 
+	/*
+	 * REVISIT if we ever have two instances of the wrapper, we will be
+	 * in big trouble
+	 */
+	_omap	= omap;
+
 	pm_runtime_enable(dev);
 	ret = pm_runtime_get_sync(dev);
 	if (ret < 0) {
diff --git a/include/linux/usb/dwc3-omap.h b/include/linux/usb/dwc3-omap.h
new file mode 100644
index 000000000000..51eae14477f7
--- /dev/null
+++ b/include/linux/usb/dwc3-omap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2013 by Texas Instruments
+ *
+ * The Inventra Controller Driver for Linux is free software; you
+ * can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation.
+ */
+
+#ifndef __DWC3_OMAP_H__
+#define __DWC3_OMAP_H__
+
+enum omap_dwc3_vbus_id_status {
+	OMAP_DWC3_UNKNOWN = 0,
+	OMAP_DWC3_ID_GROUND,
+	OMAP_DWC3_ID_FLOAT,
+	OMAP_DWC3_VBUS_VALID,
+	OMAP_DWC3_VBUS_OFF,
+};
+
+#if (defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_DWC3_MODULE))
+extern void dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status);
+#else
+static inline void dwc3_omap_mailbox(enum omap_dwc3_vbus_id_status status)
+{
+	return;
+}
+#endif
+
+#endif	/* __DWC3_OMAP_H__ */
-- 
cgit v1.2.3


From 57d2aa00dcec67afa52478730f2b524521af14fb Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 17 Jul 2012 15:03:43 +0800
Subject: sched/rt: Avoid updating RT entry timeout twice within one tick
 period

The issue below was found in 2.6.34-rt rather than mainline rt
kernel, but the issue still exists upstream as well.

So please let me describe how it was noticed on 2.6.34-rt:

On this version, each softirq has its own thread, it means there
is at least one RT FIFO task per cpu. The priority of these
tasks is set to 49 by default. If user launches an RT FIFO task
with priority lower than 49 of softirq RT tasks, it's possible
there are two RT FIFO tasks enqueued one cpu runqueue at one
moment. By current strategy of balancing RT tasks, when it comes
to RT tasks, we really need to put them off to a CPU that they
can run on as soon as possible. Even if it means a bit of cache
line flushing, we want RT tasks to be run with the least latency.

When the user RT FIFO task which just launched before is
running, the sched timer tick of the current cpu happens. In this
tick period, the timeout value of the user RT task will be
updated once. Subsequently, we try to wake up one softirq RT
task on its local cpu. As the priority of current user RT task
is lower than the softirq RT task, the current task will be
preempted by the higher priority softirq RT task. Before
preemption, we check to see if current can readily move to a
different cpu. If so, we will reschedule to allow the RT push logic
to try to move current somewhere else. Whenever the woken
softirq RT task runs, it first tries to migrate the user FIFO RT
task over to a cpu that is running a task of lesser priority. If
migration is done, it will send a reschedule request to the found
cpu by IPI interrupt. Once the target cpu responds the IPI
interrupt, it will pick the migrated user RT task to preempt its
current task. When the user RT task is running on the new cpu,
the sched timer tick of the cpu fires. So it will tick the user
RT task again. This also means the RT task timeout value will be
updated again. As the migration may be done in one tick period,
it means the user RT task timeout value will be updated twice
within one tick.

If we set a limit on the amount of cpu time for the user RT task
by setrlimit(RLIMIT_RTTIME), the SIGXCPU signal should be posted
upon reaching the soft limit.

But exactly when the SIGXCPU signal should be sent depends on the
RT task timeout value. In fact the timeout mechanism of sending
the SIGXCPU signal assumes the RT task timeout is increased once
every tick.

However, currently the timeout value may be added twice per
tick. So it results in the SIGXCPU signal being sent earlier
than expected.

To solve this issue, we prevent the timeout value from increasing
twice within one tick time by remembering the jiffies value of
last updating the timeout. As long as the RT task's jiffies is
different with the global jiffies value, we allow its timeout to
be updated.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Fan Du <fan.du@windriver.com>
Reviewed-by: Yong Zhang <yong.zhang0@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1342508623-2887-1-git-send-email-ying.xue@windriver.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 +
 kernel/sched/rt.c     | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2112477ff5e..924e42a8df58 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1208,6 +1208,7 @@ struct sched_entity {
 struct sched_rt_entity {
 	struct list_head run_list;
 	unsigned long timeout;
+	unsigned long watchdog_stamp;
 	unsigned int time_slice;
 
 	struct sched_rt_entity *back;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 29bda5bdf2a5..2f69ca997826 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1988,7 +1988,11 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 	if (soft != RLIM_INFINITY) {
 		unsigned long next;
 
-		p->rt.timeout++;
+		if (p->rt.watchdog_stamp != jiffies) {
+			p->rt.timeout++;
+			p->rt.watchdog_stamp = jiffies;
+		}
+
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
 		if (p->rt.timeout > next)
 			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
-- 
cgit v1.2.3


From b4a83e4df1bc864e89d3bb90e97f9caab656545d Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 25 Jan 2013 08:03:21 +0530
Subject: usb: otg: add an api to bind the usb controller and phy

In order to support platforms which has multiple PHY's (of same type) and
which has multiple USB controllers, a new design is adopted wherin the binding
information (between the PHY and the USB controller) should be passed to the
PHY library from platform specific file (board file).
So added a new API to pass the binding information.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/otg/otg.c   | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/usb/phy.h | 22 ++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/otg.c b/drivers/usb/otg/otg.c
index a30c04115115..8e756d95a28f 100644
--- a/drivers/usb/otg/otg.c
+++ b/drivers/usb/otg/otg.c
@@ -18,6 +18,7 @@
 #include <linux/usb/otg.h>
 
 static LIST_HEAD(phy_list);
+static LIST_HEAD(phy_bind_list);
 static DEFINE_SPINLOCK(phy_lock);
 
 static struct usb_phy *__usb_find_phy(struct list_head *list,
@@ -201,6 +202,42 @@ void usb_remove_phy(struct usb_phy *x)
 }
 EXPORT_SYMBOL(usb_remove_phy);
 
+/**
+ * usb_bind_phy - bind the phy and the controller that uses the phy
+ * @dev_name: the device name of the device that will bind to the phy
+ * @index: index to specify the port number
+ * @phy_dev_name: the device name of the phy
+ *
+ * Fills the phy_bind structure with the dev_name and phy_dev_name. This will
+ * be used when the phy driver registers the phy and when the controller
+ * requests this phy.
+ *
+ * To be used by platform specific initialization code.
+ */
+int __init usb_bind_phy(const char *dev_name, u8 index,
+				const char *phy_dev_name)
+{
+	struct usb_phy_bind *phy_bind;
+	unsigned long flags;
+
+	phy_bind = kzalloc(sizeof(*phy_bind), GFP_KERNEL);
+	if (!phy_bind) {
+		pr_err("phy_bind(): No memory for phy_bind");
+		return -ENOMEM;
+	}
+
+	phy_bind->dev_name = dev_name;
+	phy_bind->phy_dev_name = phy_dev_name;
+	phy_bind->index = index;
+
+	spin_lock_irqsave(&phy_lock, flags);
+	list_add_tail(&phy_bind->list, &phy_bind_list);
+	spin_unlock_irqrestore(&phy_lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usb_bind_phy);
+
 const char *otg_state_string(enum usb_otg_state state)
 {
 	switch (state) {
diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index a29ae1eb9346..a812ed5a1691 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -106,6 +106,21 @@ struct usb_phy {
 			enum usb_device_speed speed);
 };
 
+/**
+ * struct usb_phy_bind - represent the binding for the phy
+ * @dev_name: the device name of the device that will bind to the phy
+ * @phy_dev_name: the device name of the phy
+ * @index: used if a single controller uses multiple phys
+ * @phy: reference to the phy
+ * @list: to maintain a linked list of the binding information
+ */
+struct usb_phy_bind {
+	const char	*dev_name;
+	const char	*phy_dev_name;
+	u8		index;
+	struct usb_phy	*phy;
+	struct list_head list;
+};
 
 /* for board-specific init logic */
 extern int usb_add_phy(struct usb_phy *, enum usb_phy_type type);
@@ -151,6 +166,8 @@ extern struct usb_phy *devm_usb_get_phy(struct device *dev,
 	enum usb_phy_type type);
 extern void usb_put_phy(struct usb_phy *);
 extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x);
+extern int usb_bind_phy(const char *dev_name, u8 index,
+				const char *phy_dev_name);
 #else
 static inline struct usb_phy *usb_get_phy(enum usb_phy_type type)
 {
@@ -171,6 +188,11 @@ static inline void devm_usb_put_phy(struct device *dev, struct usb_phy *x)
 {
 }
 
+static inline int usb_bind_phy(const char *dev_name, u8 index,
+				const char *phy_dev_name)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 static inline int
-- 
cgit v1.2.3


From 0fa4fab4ee46470ccd463c83be95434936942e05 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 25 Jan 2013 08:03:22 +0530
Subject: usb: otg: utils: add facilities in phy lib to support multiple PHYs
 of same type

In order to add support for multipe PHY's of the same type, new API's
for adding PHY and getting PHY has been added. Now the binding
information for the PHY and controller should be done in platform file
using usb_bind_phy API. And for getting a PHY, the device pointer of the
USB controller and an index should be passed. Based on the binding
information that is added in the platform file, usb_get_phy_dev will return the
appropriate PHY.
Already existing API's to add and get phy by type is not removed. These
API's are deprecated and will be removed once all the platforms start to
use the new API.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/otg/otg.c   | 118 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/usb/phy.h |  13 ++++++
 2 files changed, 130 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/otg.c b/drivers/usb/otg/otg.c
index 8e756d95a28f..4bb4333c9653 100644
--- a/drivers/usb/otg/otg.c
+++ b/drivers/usb/otg/otg.c
@@ -36,6 +36,24 @@ static struct usb_phy *__usb_find_phy(struct list_head *list,
 	return ERR_PTR(-ENODEV);
 }
 
+static struct usb_phy *__usb_find_phy_dev(struct device *dev,
+	struct list_head *list, u8 index)
+{
+	struct usb_phy_bind *phy_bind = NULL;
+
+	list_for_each_entry(phy_bind, list, list) {
+		if (!(strcmp(phy_bind->dev_name, dev_name(dev))) &&
+				phy_bind->index == index) {
+			if (phy_bind->phy)
+				return phy_bind->phy;
+			else
+				return ERR_PTR(-EPROBE_DEFER);
+		}
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
 static void devm_usb_phy_release(struct device *dev, void *res)
 {
 	struct usb_phy *phy = *(struct usb_phy **)res;
@@ -111,6 +129,69 @@ err0:
 }
 EXPORT_SYMBOL(usb_get_phy);
 
+/**
+ * usb_get_phy_dev - find the USB PHY
+ * @dev - device that requests this phy
+ * @index - the index of the phy
+ *
+ * Returns the phy driver, after getting a refcount to it; or
+ * -ENODEV if there is no such phy.  The caller is responsible for
+ * calling usb_put_phy() to release that count.
+ *
+ * For use by USB host and peripheral drivers.
+ */
+struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index)
+{
+	struct usb_phy	*phy = NULL;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&phy_lock, flags);
+
+	phy = __usb_find_phy_dev(dev, &phy_bind_list, index);
+	if (IS_ERR(phy)) {
+		pr_err("unable to find transceiver\n");
+		goto err0;
+	}
+
+	get_device(phy->dev);
+
+err0:
+	spin_unlock_irqrestore(&phy_lock, flags);
+
+	return phy;
+}
+EXPORT_SYMBOL(usb_get_phy_dev);
+
+/**
+ * devm_usb_get_phy_dev - find the USB PHY using device ptr and index
+ * @dev - device that requests this phy
+ * @index - the index of the phy
+ *
+ * Gets the phy using usb_get_phy_dev(), and associates a device with it using
+ * devres. On driver detach, release function is invoked on the devres data,
+ * then, devres data is freed.
+ *
+ * For use by USB host and peripheral drivers.
+ */
+struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index)
+{
+	struct usb_phy **ptr, *phy;
+
+	ptr = devres_alloc(devm_usb_phy_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	phy = usb_get_phy_dev(dev, index);
+	if (!IS_ERR(phy)) {
+		*ptr = phy;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return phy;
+}
+EXPORT_SYMBOL(devm_usb_get_phy_dev);
+
 /**
  * devm_usb_put_phy - release the USB PHY
  * @dev - device that wants to release this phy
@@ -185,6 +266,36 @@ out:
 }
 EXPORT_SYMBOL(usb_add_phy);
 
+/**
+ * usb_add_phy_dev - declare the USB PHY
+ * @x: the USB phy to be used; or NULL
+ *
+ * This call is exclusively for use by phy drivers, which
+ * coordinate the activities of drivers for host and peripheral
+ * controllers, and in some cases for VBUS current regulation.
+ */
+int usb_add_phy_dev(struct usb_phy *x)
+{
+	struct usb_phy_bind *phy_bind;
+	unsigned long flags;
+
+	if (!x->dev) {
+		dev_err(x->dev, "no device provided for PHY\n");
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&phy_lock, flags);
+	list_for_each_entry(phy_bind, &phy_bind_list, list)
+		if (!(strcmp(phy_bind->phy_dev_name, dev_name(x->dev))))
+			phy_bind->phy = x;
+
+	list_add_tail(&x->head, &phy_list);
+
+	spin_unlock_irqrestore(&phy_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(usb_add_phy_dev);
+
 /**
  * usb_remove_phy - remove the OTG PHY
  * @x: the USB OTG PHY to be removed;
@@ -194,10 +305,15 @@ EXPORT_SYMBOL(usb_add_phy);
 void usb_remove_phy(struct usb_phy *x)
 {
 	unsigned long	flags;
+	struct usb_phy_bind *phy_bind;
 
 	spin_lock_irqsave(&phy_lock, flags);
-	if (x)
+	if (x) {
+		list_for_each_entry(phy_bind, &phy_bind_list, list)
+			if (phy_bind->phy == x)
+				phy_bind->phy = NULL;
 		list_del(&x->head);
+	}
 	spin_unlock_irqrestore(&phy_lock, flags);
 }
 EXPORT_SYMBOL(usb_remove_phy);
diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index a812ed5a1691..359db7de61e4 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -124,6 +124,7 @@ struct usb_phy_bind {
 
 /* for board-specific init logic */
 extern int usb_add_phy(struct usb_phy *, enum usb_phy_type type);
+extern int usb_add_phy_dev(struct usb_phy *);
 extern void usb_remove_phy(struct usb_phy *);
 
 /* helpers for direct access thru low-level io interface */
@@ -164,6 +165,8 @@ usb_phy_shutdown(struct usb_phy *x)
 extern struct usb_phy *usb_get_phy(enum usb_phy_type type);
 extern struct usb_phy *devm_usb_get_phy(struct device *dev,
 	enum usb_phy_type type);
+extern struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index);
+extern struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index);
 extern void usb_put_phy(struct usb_phy *);
 extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x);
 extern int usb_bind_phy(const char *dev_name, u8 index,
@@ -180,6 +183,16 @@ static inline struct usb_phy *devm_usb_get_phy(struct device *dev,
 	return NULL;
 }
 
+static inline struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index)
+{
+	return NULL;
+}
+
+static inline struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index)
+{
+	return NULL;
+}
+
 static inline void usb_put_phy(struct usb_phy *x)
 {
 }
-- 
cgit v1.2.3


From 5d3c28b5a42df5ceaa854901ba2cccb76883c77e Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 25 Jan 2013 08:03:25 +0530
Subject: usb: otg: add device tree support to otg library

Added an API devm_usb_get_phy_by_phandle(), to get usb phy by passing a
device node phandle value. This function will return a pointer to
the phy on success, -EPROBE_DEFER if there is a device_node for the phandle,
but the phy has not been added, or a ERR_PTR() otherwise.

Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/otg/otg.c   | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/phy.h |  8 +++++
 2 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/otg/otg.c b/drivers/usb/otg/otg.c
index 4bb4333c9653..e1814397ca3a 100644
--- a/drivers/usb/otg/otg.c
+++ b/drivers/usb/otg/otg.c
@@ -13,7 +13,9 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/device.h>
+#include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 
 #include <linux/usb/otg.h>
 
@@ -54,6 +56,20 @@ static struct usb_phy *__usb_find_phy_dev(struct device *dev,
 	return ERR_PTR(-ENODEV);
 }
 
+static struct usb_phy *__of_usb_find_phy(struct device_node *node)
+{
+	struct usb_phy  *phy;
+
+	list_for_each_entry(phy, &phy_list, head) {
+		if (node != phy->dev->of_node)
+			continue;
+
+		return phy;
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
 static void devm_usb_phy_release(struct device *dev, void *res)
 {
 	struct usb_phy *phy = *(struct usb_phy **)res;
@@ -129,6 +145,70 @@ err0:
 }
 EXPORT_SYMBOL(usb_get_phy);
 
+ /**
+ * devm_usb_get_phy_by_phandle - find the USB PHY by phandle
+ * @dev - device that requests this phy
+ * @phandle - name of the property holding the phy phandle value
+ * @index - the index of the phy
+ *
+ * Returns the phy driver associated with the given phandle value,
+ * after getting a refcount to it, -ENODEV if there is no such phy or
+ * -EPROBE_DEFER if there is a phandle to the phy, but the device is
+ * not yet loaded. While at that, it also associates the device with
+ * the phy using devres. On driver detach, release function is invoked
+ * on the devres data, then, devres data is freed.
+ *
+ * For use by USB host and peripheral drivers.
+ */
+struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
+	const char *phandle, u8 index)
+{
+	struct usb_phy	*phy = ERR_PTR(-ENOMEM), **ptr;
+	unsigned long	flags;
+	struct device_node *node;
+
+	if (!dev->of_node) {
+		dev_dbg(dev, "device does not have a device node entry\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	node = of_parse_phandle(dev->of_node, phandle, index);
+	if (!node) {
+		dev_dbg(dev, "failed to get %s phandle in %s node\n", phandle,
+			dev->of_node->full_name);
+		return ERR_PTR(-ENODEV);
+	}
+
+	ptr = devres_alloc(devm_usb_phy_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr) {
+		dev_dbg(dev, "failed to allocate memory for devres\n");
+		goto err0;
+	}
+
+	spin_lock_irqsave(&phy_lock, flags);
+
+	phy = __of_usb_find_phy(node);
+	if (IS_ERR(phy) || !try_module_get(phy->dev->driver->owner)) {
+		phy = ERR_PTR(-EPROBE_DEFER);
+		devres_free(ptr);
+		goto err1;
+	}
+
+	*ptr = phy;
+	devres_add(dev, ptr);
+
+	get_device(phy->dev);
+
+err1:
+	spin_unlock_irqrestore(&phy_lock, flags);
+
+err0:
+	of_node_put(node);
+
+	return phy;
+}
+EXPORT_SYMBOL(devm_usb_get_phy_by_phandle);
+
 /**
  * usb_get_phy_dev - find the USB PHY
  * @dev - device that requests this phy
diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index 359db7de61e4..15847cbdb512 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -167,6 +167,8 @@ extern struct usb_phy *devm_usb_get_phy(struct device *dev,
 	enum usb_phy_type type);
 extern struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index);
 extern struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index);
+extern struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
+	const char *phandle, u8 index);
 extern void usb_put_phy(struct usb_phy *);
 extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x);
 extern int usb_bind_phy(const char *dev_name, u8 index,
@@ -193,6 +195,12 @@ static inline struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index)
 	return NULL;
 }
 
+static inline struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
+	const char *phandle, u8 index)
+{
+	return NULL;
+}
+
 static inline void usb_put_phy(struct usb_phy *x)
 {
 }
-- 
cgit v1.2.3


From 01658f0f8d1322dbf94f289aa610731d539bf888 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 25 Jan 2013 15:53:57 +0530
Subject: usb: phy: add a new driver for usb part of control module

Added a new driver for the usb part of control module. This has an API
to power on the USB2 phy and an API to write to the mailbox depending on
whether MUSB has to act in host mode or in device mode.

Writing to control module registers for doing the above task which was
previously done in omap glue and in omap-usb2 phy will be removed.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 Documentation/devicetree/bindings/usb/omap-usb.txt |  30 ++-
 Documentation/devicetree/bindings/usb/usb-phy.txt  |   5 +
 drivers/usb/phy/Kconfig                            |  10 +
 drivers/usb/phy/Makefile                           |   1 +
 drivers/usb/phy/omap-control-usb.c                 | 295 +++++++++++++++++++++
 include/linux/usb/omap_control_usb.h               |  92 +++++++
 6 files changed, 432 insertions(+), 1 deletion(-)
 create mode 100644 drivers/usb/phy/omap-control-usb.c
 create mode 100644 include/linux/usb/omap_control_usb.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt
index 29a043ecda52..3d78cc2b486e 100644
--- a/Documentation/devicetree/bindings/usb/omap-usb.txt
+++ b/Documentation/devicetree/bindings/usb/omap-usb.txt
@@ -1,4 +1,4 @@
-OMAP GLUE
+OMAP GLUE AND OTHER OMAP SPECIFIC COMPONENTS
 
 OMAP MUSB GLUE
  - compatible : Should be "ti,omap4-musb" or "ti,omap3-musb"
@@ -16,6 +16,10 @@ OMAP MUSB GLUE
  - power : Should be "50". This signifies the controller can supply upto
    100mA when operating in host mode.
 
+Optional properties:
+ - ctrl-module : phandle of the control module this glue uses to write to
+   mailbox
+
 SOC specific device node entry
 usb_otg_hs: usb_otg_hs@4a0ab000 {
 	compatible = "ti,omap4-musb";
@@ -23,6 +27,7 @@ usb_otg_hs: usb_otg_hs@4a0ab000 {
 	multipoint = <1>;
 	num_eps = <16>;
 	ram_bits = <12>;
+	ctrl-module = <&omap_control_usb>;
 };
 
 Board specific device node entry
@@ -31,3 +36,26 @@ Board specific device node entry
 	mode = <3>;
 	power = <50>;
 };
+
+OMAP CONTROL USB
+
+Required properties:
+ - compatible: Should be "ti,omap-control-usb"
+ - reg : Address and length of the register set for the device. It contains
+   the address of "control_dev_conf" and "otghs_control" or "phy_power_usb"
+   depending upon omap4 or omap5.
+ - reg-names: The names of the register addresses corresponding to the registers
+   filled in "reg".
+ - ti,type: This is used to differentiate whether the control module has
+   usb mailbox or usb3 phy power. omap4 has usb mailbox in control module to
+   notify events to the musb core and omap5 has usb3 phy power register to
+   power on usb3 phy. Should be "1" if it has mailbox and "2" if it has usb3
+   phy power.
+
+omap_control_usb: omap-control-usb@4a002300 {
+	compatible = "ti,omap-control-usb";
+	reg = <0x4a002300 0x4>,
+	      <0x4a00233c 0x4>;
+	reg-names = "control_dev_conf", "otghs_control";
+	ti,type = <1>;
+};
diff --git a/Documentation/devicetree/bindings/usb/usb-phy.txt b/Documentation/devicetree/bindings/usb/usb-phy.txt
index 80d4148cb661..4234105302db 100644
--- a/Documentation/devicetree/bindings/usb/usb-phy.txt
+++ b/Documentation/devicetree/bindings/usb/usb-phy.txt
@@ -8,10 +8,15 @@ Required properties:
 add the address of control module dev conf register until a driver for
 control module is added
 
+Optional properties:
+ - ctrl-module : phandle of the control module used by PHY driver to power on
+   the PHY.
+
 This is usually a subnode of ocp2scp to which it is connected.
 
 usb2phy@4a0ad080 {
 	compatible = "ti,omap-usb2";
 	reg = <0x4a0ad080 0x58>,
 	      <0x4a002300 0x4>;
+	ctrl-module = <&omap_control_usb>;
 };
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index fae4d08c0ddd..053696a2b6ef 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -14,6 +14,16 @@ config OMAP_USB2
 	  The USB OTG controller communicates with the comparator using this
 	  driver.
 
+config OMAP_CONTROL_USB
+	tristate "OMAP CONTROL USB Driver"
+	depends on ARCH_OMAP2PLUS
+	help
+	  Enable this to add support for the USB part present in the control
+	  module. This driver has API to power on the USB2 PHY and to write to
+	  the mailbox. The mailbox is present only in omap4 and the register to
+	  power on the USB2 PHY is present in OMAP4 and OMAP5. OMAP5 has an
+	  additional register to power on USB3 PHY.
+
 config USB_ISP1301
 	tristate "NXP ISP1301 USB transceiver support"
 	depends on USB || USB_GADGET
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index ec304f642402..ee977671f530 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -5,6 +5,7 @@
 ccflags-$(CONFIG_USB_DEBUG) := -DDEBUG
 
 obj-$(CONFIG_OMAP_USB2)			+= omap-usb2.o
+obj-$(CONFIG_OMAP_CONTROL_USB)		+= omap-control-usb.o
 obj-$(CONFIG_USB_ISP1301)		+= isp1301.o
 obj-$(CONFIG_MV_U3D_PHY)		+= mv_u3d_phy.o
 obj-$(CONFIG_USB_EHCI_TEGRA)	+= tegra_usb_phy.o
diff --git a/drivers/usb/phy/omap-control-usb.c b/drivers/usb/phy/omap-control-usb.c
new file mode 100644
index 000000000000..5323b71c3521
--- /dev/null
+++ b/drivers/usb/phy/omap-control-usb.c
@@ -0,0 +1,295 @@
+/*
+ * omap-control-usb.c - The USB part of control module.
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/usb/omap_control_usb.h>
+
+static struct omap_control_usb *control_usb;
+
+/**
+ * omap_get_control_dev - returns the device pointer for this control device
+ *
+ * This API should be called to get the device pointer for this control
+ * module device. This device pointer should be used for called other
+ * exported API's in this driver.
+ *
+ * To be used by PHY driver and glue driver.
+ */
+struct device *omap_get_control_dev(void)
+{
+	if (!control_usb)
+		return ERR_PTR(-ENODEV);
+
+	return control_usb->dev;
+}
+EXPORT_SYMBOL_GPL(omap_get_control_dev);
+
+/**
+ * omap_control_usb3_phy_power - power on/off the serializer using control
+ *	module
+ * @dev: the control module device
+ * @on: 0 to off and 1 to on based on powering on or off the PHY
+ *
+ * usb3 PHY driver should call this API to power on or off the PHY.
+ */
+void omap_control_usb3_phy_power(struct device *dev, bool on)
+{
+	u32 val;
+	unsigned long rate;
+	struct omap_control_usb	*control_usb = dev_get_drvdata(dev);
+
+	if (control_usb->type != OMAP_CTRL_DEV_TYPE2)
+		return;
+
+	rate = clk_get_rate(control_usb->sys_clk);
+	rate = rate/1000000;
+
+	val = readl(control_usb->phy_power);
+
+	if (on) {
+		val &= ~(OMAP_CTRL_USB_PWRCTL_CLK_CMD_MASK |
+			OMAP_CTRL_USB_PWRCTL_CLK_FREQ_MASK);
+		val |= OMAP_CTRL_USB3_PHY_TX_RX_POWERON <<
+			OMAP_CTRL_USB_PWRCTL_CLK_CMD_SHIFT;
+		val |= rate << OMAP_CTRL_USB_PWRCTL_CLK_FREQ_SHIFT;
+	} else {
+		val &= ~OMAP_CTRL_USB_PWRCTL_CLK_CMD_MASK;
+		val |= OMAP_CTRL_USB3_PHY_TX_RX_POWEROFF <<
+			OMAP_CTRL_USB_PWRCTL_CLK_CMD_SHIFT;
+	}
+
+	writel(val, control_usb->phy_power);
+}
+EXPORT_SYMBOL_GPL(omap_control_usb3_phy_power);
+
+/**
+ * omap_control_usb_phy_power - power on/off the phy using control module reg
+ * @dev: the control module device
+ * @on: 0 or 1, based on powering on or off the PHY
+ */
+void omap_control_usb_phy_power(struct device *dev, int on)
+{
+	u32 val;
+	struct omap_control_usb	*control_usb = dev_get_drvdata(dev);
+
+	val = readl(control_usb->dev_conf);
+
+	if (on)
+		val &= ~OMAP_CTRL_DEV_PHY_PD;
+	else
+		val |= OMAP_CTRL_DEV_PHY_PD;
+
+	writel(val, control_usb->dev_conf);
+}
+EXPORT_SYMBOL_GPL(omap_control_usb_phy_power);
+
+/**
+ * omap_control_usb_host_mode - set AVALID, VBUSVALID and ID pin in grounded
+ * @ctrl_usb: struct omap_control_usb *
+ *
+ * Writes to the mailbox register to notify the usb core that a usb
+ * device has been connected.
+ */
+static void omap_control_usb_host_mode(struct omap_control_usb *ctrl_usb)
+{
+	u32 val;
+
+	val = readl(ctrl_usb->otghs_control);
+	val &= ~(OMAP_CTRL_DEV_IDDIG | OMAP_CTRL_DEV_SESSEND);
+	val |= OMAP_CTRL_DEV_AVALID | OMAP_CTRL_DEV_VBUSVALID;
+	writel(val, ctrl_usb->otghs_control);
+}
+
+/**
+ * omap_control_usb_device_mode - set AVALID, VBUSVALID and ID pin in high
+ * impedance
+ * @ctrl_usb: struct omap_control_usb *
+ *
+ * Writes to the mailbox register to notify the usb core that it has been
+ * connected to a usb host.
+ */
+static void omap_control_usb_device_mode(struct omap_control_usb *ctrl_usb)
+{
+	u32 val;
+
+	val = readl(ctrl_usb->otghs_control);
+	val &= ~OMAP_CTRL_DEV_SESSEND;
+	val |= OMAP_CTRL_DEV_IDDIG | OMAP_CTRL_DEV_AVALID |
+		OMAP_CTRL_DEV_VBUSVALID;
+	writel(val, ctrl_usb->otghs_control);
+}
+
+/**
+ * omap_control_usb_set_sessionend - Enable SESSIONEND and IDIG to high
+ * impedance
+ * @ctrl_usb: struct omap_control_usb *
+ *
+ * Writes to the mailbox register to notify the usb core it's now in
+ * disconnected state.
+ */
+static void omap_control_usb_set_sessionend(struct omap_control_usb *ctrl_usb)
+{
+	u32 val;
+
+	val = readl(ctrl_usb->otghs_control);
+	val &= ~(OMAP_CTRL_DEV_AVALID | OMAP_CTRL_DEV_VBUSVALID);
+	val |= OMAP_CTRL_DEV_IDDIG | OMAP_CTRL_DEV_SESSEND;
+	writel(val, ctrl_usb->otghs_control);
+}
+
+/**
+ * omap_control_usb_set_mode - Calls to functions to set USB in one of host mode
+ * or device mode or to denote disconnected state
+ * @dev: the control module device
+ * @mode: The mode to which usb should be configured
+ *
+ * This is an API to write to the mailbox register to notify the usb core that
+ * a usb device has been connected.
+ */
+void omap_control_usb_set_mode(struct device *dev,
+	enum omap_control_usb_mode mode)
+{
+	struct omap_control_usb	*ctrl_usb;
+
+	if (IS_ERR(dev) || control_usb->type != OMAP_CTRL_DEV_TYPE1)
+		return;
+
+	ctrl_usb = dev_get_drvdata(dev);
+
+	switch (mode) {
+	case USB_MODE_HOST:
+		omap_control_usb_host_mode(ctrl_usb);
+		break;
+	case USB_MODE_DEVICE:
+		omap_control_usb_device_mode(ctrl_usb);
+		break;
+	case USB_MODE_DISCONNECT:
+		omap_control_usb_set_sessionend(ctrl_usb);
+		break;
+	default:
+		dev_vdbg(dev, "invalid omap control usb mode\n");
+	}
+}
+EXPORT_SYMBOL_GPL(omap_control_usb_set_mode);
+
+static int omap_control_usb_probe(struct platform_device *pdev)
+{
+	struct resource	*res;
+	struct device_node *np = pdev->dev.of_node;
+	struct omap_control_usb_platform_data *pdata = pdev->dev.platform_data;
+
+	control_usb = devm_kzalloc(&pdev->dev, sizeof(*control_usb),
+		GFP_KERNEL);
+	if (!control_usb) {
+		dev_err(&pdev->dev, "unable to alloc memory for control usb\n");
+		return -ENOMEM;
+	}
+
+	if (np) {
+		of_property_read_u32(np, "ti,type", &control_usb->type);
+	} else if (pdata) {
+		control_usb->type = pdata->type;
+	} else {
+		dev_err(&pdev->dev, "no pdata present\n");
+		return -EINVAL;
+	}
+
+	control_usb->dev	= &pdev->dev;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+		"control_dev_conf");
+	control_usb->dev_conf = devm_request_and_ioremap(&pdev->dev, res);
+	if (!control_usb->dev_conf) {
+		dev_err(&pdev->dev, "Failed to obtain io memory\n");
+		return -EADDRNOTAVAIL;
+	}
+
+	if (control_usb->type == OMAP_CTRL_DEV_TYPE1) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+			"otghs_control");
+		control_usb->otghs_control = devm_request_and_ioremap(
+			&pdev->dev, res);
+		if (!control_usb->otghs_control) {
+			dev_err(&pdev->dev, "Failed to obtain io memory\n");
+			return -EADDRNOTAVAIL;
+		}
+	}
+
+	if (control_usb->type == OMAP_CTRL_DEV_TYPE2) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+			"phy_power_usb");
+		control_usb->phy_power = devm_request_and_ioremap(
+			&pdev->dev, res);
+		if (!control_usb->phy_power) {
+			dev_dbg(&pdev->dev, "Failed to obtain io memory\n");
+			return -EADDRNOTAVAIL;
+		}
+
+		control_usb->sys_clk = devm_clk_get(control_usb->dev,
+			"sys_clkin");
+		if (IS_ERR(control_usb->sys_clk)) {
+			pr_err("%s: unable to get sys_clkin\n", __func__);
+			return -EINVAL;
+		}
+	}
+
+
+	dev_set_drvdata(control_usb->dev, control_usb);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id omap_control_usb_id_table[] = {
+	{ .compatible = "ti,omap-control-usb" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, omap_control_usb_id_table);
+#endif
+
+static struct platform_driver omap_control_usb_driver = {
+	.probe		= omap_control_usb_probe,
+	.driver		= {
+		.name	= "omap-control-usb",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(omap_control_usb_id_table),
+	},
+};
+
+static int __init omap_control_usb_init(void)
+{
+	return platform_driver_register(&omap_control_usb_driver);
+}
+subsys_initcall(omap_control_usb_init);
+
+static void __exit omap_control_usb_exit(void)
+{
+	platform_driver_unregister(&omap_control_usb_driver);
+}
+module_exit(omap_control_usb_exit);
+
+MODULE_ALIAS("platform: omap_control_usb");
+MODULE_AUTHOR("Texas Instruments Inc.");
+MODULE_DESCRIPTION("OMAP Control Module USB Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/usb/omap_control_usb.h b/include/linux/usb/omap_control_usb.h
new file mode 100644
index 000000000000..f306db7149ca
--- /dev/null
+++ b/include/linux/usb/omap_control_usb.h
@@ -0,0 +1,92 @@
+/*
+ * omap_control_usb.h - Header file for the USB part of control module.
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __OMAP_CONTROL_USB_H__
+#define __OMAP_CONTROL_USB_H__
+
+struct omap_control_usb {
+	struct device *dev;
+
+	u32 __iomem *dev_conf;
+	u32 __iomem *otghs_control;
+	u32 __iomem *phy_power;
+
+	struct clk *sys_clk;
+
+	u32 type;
+};
+
+struct omap_control_usb_platform_data {
+	u8 type;
+};
+
+enum omap_control_usb_mode {
+	USB_MODE_UNDEFINED = 0,
+	USB_MODE_HOST,
+	USB_MODE_DEVICE,
+	USB_MODE_DISCONNECT,
+};
+
+/* To differentiate ctrl module IP having either mailbox or USB3 PHY power */
+#define	OMAP_CTRL_DEV_TYPE1		0x1
+#define	OMAP_CTRL_DEV_TYPE2		0x2
+
+#define	OMAP_CTRL_DEV_PHY_PD		BIT(0)
+
+#define	OMAP_CTRL_DEV_AVALID		BIT(0)
+#define	OMAP_CTRL_DEV_BVALID		BIT(1)
+#define	OMAP_CTRL_DEV_VBUSVALID		BIT(2)
+#define	OMAP_CTRL_DEV_SESSEND		BIT(3)
+#define	OMAP_CTRL_DEV_IDDIG		BIT(4)
+
+#define	OMAP_CTRL_USB_PWRCTL_CLK_CMD_MASK	0x003FC000
+#define	OMAP_CTRL_USB_PWRCTL_CLK_CMD_SHIFT	0xE
+
+#define	OMAP_CTRL_USB_PWRCTL_CLK_FREQ_MASK	0xFFC00000
+#define	OMAP_CTRL_USB_PWRCTL_CLK_FREQ_SHIFT	0x16
+
+#define	OMAP_CTRL_USB3_PHY_TX_RX_POWERON	0x3
+#define	OMAP_CTRL_USB3_PHY_TX_RX_POWEROFF	0x0
+
+#if IS_ENABLED(CONFIG_OMAP_CONTROL_USB)
+extern struct device *omap_get_control_dev(void);
+extern void omap_control_usb_phy_power(struct device *dev, int on);
+extern void omap_control_usb3_phy_power(struct device *dev, bool on);
+extern void omap_control_usb_set_mode(struct device *dev,
+	enum omap_control_usb_mode mode);
+#else
+static inline struct device *omap_get_control_dev()
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void omap_control_usb_phy_power(struct device *dev, int on)
+{
+}
+
+static inline void omap_control_usb3_phy_power(struct device *dev, int on)
+{
+}
+
+static inline void omap_control_usb_set_mode(struct device *dev,
+	enum omap_control_usb_mode mode)
+{
+}
+#endif
+
+#endif	/* __OMAP_CONTROL_USB_H__ */
-- 
cgit v1.2.3


From ca784be36cc725bca9b526eba342de7550329731 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 25 Jan 2013 15:54:00 +0530
Subject: usb: start using the control module driver

Start using the control module driver for powering on the PHY and for
writing to the mailbox instead of writing to the control module
registers on their own.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 Documentation/devicetree/bindings/usb/omap-usb.txt |  4 ++
 Documentation/devicetree/bindings/usb/usb-phy.txt  |  7 +--
 drivers/usb/musb/Kconfig                           |  1 +
 drivers/usb/musb/omap2430.c                        | 68 ++++++++--------------
 drivers/usb/musb/omap2430.h                        |  9 ---
 drivers/usb/phy/Kconfig                            |  1 +
 drivers/usb/phy/omap-usb2.c                        | 41 +++----------
 include/linux/usb/omap_usb.h                       |  4 +-
 8 files changed, 40 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt
index 3d78cc2b486e..1ef0ce71f8fa 100644
--- a/Documentation/devicetree/bindings/usb/omap-usb.txt
+++ b/Documentation/devicetree/bindings/usb/omap-usb.txt
@@ -3,6 +3,9 @@ OMAP GLUE AND OTHER OMAP SPECIFIC COMPONENTS
 OMAP MUSB GLUE
  - compatible : Should be "ti,omap4-musb" or "ti,omap3-musb"
  - ti,hwmods : must be "usb_otg_hs"
+ - ti,has-mailbox : to specify that omap uses an external mailbox
+   (in control module) to communicate with the musb core during device connect
+   and disconnect.
  - multipoint : Should be "1" indicating the musb controller supports
    multipoint. This is a MUSB configuration-specific setting.
  - num_eps : Specifies the number of endpoints. This is also a
@@ -24,6 +27,7 @@ SOC specific device node entry
 usb_otg_hs: usb_otg_hs@4a0ab000 {
 	compatible = "ti,omap4-musb";
 	ti,hwmods = "usb_otg_hs";
+	ti,has-mailbox;
 	multipoint = <1>;
 	num_eps = <16>;
 	ram_bits = <12>;
diff --git a/Documentation/devicetree/bindings/usb/usb-phy.txt b/Documentation/devicetree/bindings/usb/usb-phy.txt
index 4234105302db..b4b86bb831b2 100644
--- a/Documentation/devicetree/bindings/usb/usb-phy.txt
+++ b/Documentation/devicetree/bindings/usb/usb-phy.txt
@@ -4,9 +4,7 @@ OMAP USB2 PHY
 
 Required properties:
  - compatible: Should be "ti,omap-usb2"
- - reg : Address and length of the register set for the device. Also
-add the address of control module dev conf register until a driver for
-control module is added
+ - reg : Address and length of the register set for the device.
 
 Optional properties:
  - ctrl-module : phandle of the control module used by PHY driver to power on
@@ -16,7 +14,6 @@ This is usually a subnode of ocp2scp to which it is connected.
 
 usb2phy@4a0ad080 {
 	compatible = "ti,omap-usb2";
-	reg = <0x4a0ad080 0x58>,
-	      <0x4a002300 0x4>;
+	reg = <0x4a0ad080 0x58>;
 	ctrl-module = <&omap_control_usb>;
 };
diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 23a0b7f0892d..de6e5ce26316 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -11,6 +11,7 @@ config USB_MUSB_HDRC
 	select NOP_USB_XCEIV if (SOC_TI81XX || SOC_AM33XX)
 	select TWL4030_USB if MACH_OMAP_3430SDP
 	select TWL6030_USB if MACH_OMAP_4430SDP || MACH_OMAP4_PANDA
+	select OMAP_CONTROL_USB if MACH_OMAP_4430SDP || MACH_OMAP4_PANDA
 	select USB_OTG_UTILS
 	help
 	  Say Y here if your system has a dual role high speed USB
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index da00af460794..779862d24590 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -37,6 +37,7 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/usb/musb-omap.h>
+#include <linux/usb/omap_control_usb.h>
 
 #include "musb_core.h"
 #include "omap2430.h"
@@ -46,7 +47,7 @@ struct omap2430_glue {
 	struct platform_device	*musb;
 	enum omap_musb_vbus_id_status status;
 	struct work_struct	omap_musb_mailbox_work;
-	u32 __iomem		*control_otghs;
+	struct device		*control_otghs;
 };
 #define glue_to_musb(g)		platform_get_drvdata(g->musb)
 
@@ -54,26 +55,6 @@ struct omap2430_glue		*_glue;
 
 static struct timer_list musb_idle_timer;
 
-/**
- * omap4_usb_phy_mailbox - write to usb otg mailbox
- * @glue: struct omap2430_glue *
- * @val: the value to be written to the mailbox
- *
- * On detection of a device (ID pin is grounded), this API should be called
- * to set AVALID, VBUSVALID and ID pin is grounded.
- *
- * When OMAP is connected to a host (OMAP in device mode), this API
- * is called to set AVALID, VBUSVALID and ID pin in high impedance.
- *
- * XXX: This function will be removed once we have a seperate driver for
- * control module
- */
-static void omap4_usb_phy_mailbox(struct omap2430_glue *glue, u32 val)
-{
-	if (glue->control_otghs)
-		writel(val, glue->control_otghs);
-}
-
 static void musb_do_idle(unsigned long _musb)
 {
 	struct musb	*musb = (void *)_musb;
@@ -269,7 +250,6 @@ EXPORT_SYMBOL_GPL(omap_musb_mailbox);
 
 static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 {
-	u32 val;
 	struct musb *musb = glue_to_musb(glue);
 	struct device *dev = musb->controller;
 	struct musb_hdrc_platform_data *pdata = dev->platform_data;
@@ -285,8 +265,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		musb->xceiv->last_event = USB_EVENT_ID;
 		if (musb->gadget_driver) {
 			pm_runtime_get_sync(dev);
-			val = AVALID | VBUSVALID;
-			omap4_usb_phy_mailbox(glue, val);
+			omap_control_usb_set_mode(glue->control_otghs,
+				USB_MODE_HOST);
 			omap2430_musb_set_vbus(musb, 1);
 		}
 		break;
@@ -299,8 +279,7 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		musb->xceiv->last_event = USB_EVENT_VBUS;
 		if (musb->gadget_driver)
 			pm_runtime_get_sync(dev);
-		val = IDDIG | AVALID | VBUSVALID;
-		omap4_usb_phy_mailbox(glue, val);
+		omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DEVICE);
 		break;
 
 	case OMAP_MUSB_ID_FLOAT:
@@ -317,8 +296,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 			if (musb->xceiv->otg->set_vbus)
 				otg_set_vbus(musb->xceiv->otg, 0);
 		}
-		val = SESSEND | IDDIG;
-		omap4_usb_phy_mailbox(glue, val);
+		omap_control_usb_set_mode(glue->control_otghs,
+			USB_MODE_DISCONNECT);
 		break;
 	default:
 		dev_dbg(dev, "ID float\n");
@@ -415,7 +394,6 @@ err1:
 static void omap2430_musb_enable(struct musb *musb)
 {
 	u8		devctl;
-	u32		val;
 	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
 	struct device *dev = musb->controller;
 	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
@@ -425,8 +403,7 @@ static void omap2430_musb_enable(struct musb *musb)
 	switch (glue->status) {
 
 	case OMAP_MUSB_ID_GROUND:
-		val = AVALID | VBUSVALID;
-		omap4_usb_phy_mailbox(glue, val);
+		omap_control_usb_set_mode(glue->control_otghs, USB_MODE_HOST);
 		if (data->interface_type != MUSB_INTERFACE_UTMI)
 			break;
 		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
@@ -445,8 +422,7 @@ static void omap2430_musb_enable(struct musb *musb)
 		break;
 
 	case OMAP_MUSB_VBUS_VALID:
-		val = IDDIG | AVALID | VBUSVALID;
-		omap4_usb_phy_mailbox(glue, val);
+		omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DEVICE);
 		break;
 
 	default:
@@ -456,14 +432,12 @@ static void omap2430_musb_enable(struct musb *musb)
 
 static void omap2430_musb_disable(struct musb *musb)
 {
-	u32 val;
 	struct device *dev = musb->controller;
 	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
-	if (glue->status != OMAP_MUSB_UNKNOWN) {
-		val = SESSEND | IDDIG;
-		omap4_usb_phy_mailbox(glue, val);
-	}
+	if (glue->status != OMAP_MUSB_UNKNOWN)
+		omap_control_usb_set_mode(glue->control_otghs,
+			USB_MODE_DISCONNECT);
 }
 
 static int omap2430_musb_exit(struct musb *musb)
@@ -498,7 +472,6 @@ static int omap2430_probe(struct platform_device *pdev)
 	struct omap2430_glue		*glue;
 	struct device_node		*np = pdev->dev.of_node;
 	struct musb_hdrc_config		*config;
-	struct resource			*res;
 	int				ret = -ENOMEM;
 
 	glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
@@ -521,12 +494,6 @@ static int omap2430_probe(struct platform_device *pdev)
 	glue->musb			= musb;
 	glue->status			= OMAP_MUSB_UNKNOWN;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-
-	glue->control_otghs = devm_request_and_ioremap(&pdev->dev, res);
-	if (glue->control_otghs == NULL)
-		dev_dbg(&pdev->dev, "Failed to obtain control memory\n");
-
 	if (np) {
 		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 		if (!pdata) {
@@ -558,11 +525,22 @@ static int omap2430_probe(struct platform_device *pdev)
 		of_property_read_u32(np, "ram_bits", (u32 *)&config->ram_bits);
 		of_property_read_u32(np, "power", (u32 *)&pdata->power);
 		config->multipoint = of_property_read_bool(np, "multipoint");
+		pdata->has_mailbox = of_property_read_bool(np,
+		    "ti,has-mailbox");
 
 		pdata->board_data	= data;
 		pdata->config		= config;
 	}
 
+	if (pdata->has_mailbox) {
+		glue->control_otghs = omap_get_control_dev();
+		if (IS_ERR(glue->control_otghs)) {
+			dev_vdbg(&pdev->dev, "Failed to get control device\n");
+			return -ENODEV;
+		}
+	} else {
+		glue->control_otghs = ERR_PTR(-ENODEV);
+	}
 	pdata->platform_ops		= &omap2430_ops;
 
 	platform_set_drvdata(pdev, glue);
diff --git a/drivers/usb/musb/omap2430.h b/drivers/usb/musb/omap2430.h
index 8ef656659fcb..1b5e83a9840e 100644
--- a/drivers/usb/musb/omap2430.h
+++ b/drivers/usb/musb/omap2430.h
@@ -49,13 +49,4 @@
 #define OTG_FORCESTDBY		0x414
 #	define	ENABLEFORCE		(1 << 0)
 
-/*
- * Control Module bit definitions
- * XXX: Will be removed once we have a driver for control module.
- */
-#define	AVALID				BIT(0)
-#define	BVALID				BIT(1)
-#define	VBUSVALID			BIT(2)
-#define	SESSEND				BIT(3)
-#define	IDDIG				BIT(4)
 #endif	/* __MUSB_OMAP243X_H__ */
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 053696a2b6ef..f989511f13da 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -8,6 +8,7 @@ config OMAP_USB2
 	tristate "OMAP USB2 PHY Driver"
 	depends on ARCH_OMAP2PLUS
 	select USB_OTG_UTILS
+	select OMAP_CONTROL_USB
 	help
 	  Enable this to support the transceiver that is part of SOC. This
 	  driver takes care of all the PHY functionality apart from comparator.
diff --git a/drivers/usb/phy/omap-usb2.c b/drivers/usb/phy/omap-usb2.c
index 26ae8f49225c..c2b4c8e6f3c6 100644
--- a/drivers/usb/phy/omap-usb2.c
+++ b/drivers/usb/phy/omap-usb2.c
@@ -27,6 +27,7 @@
 #include <linux/err.h>
 #include <linux/pm_runtime.h>
 #include <linux/delay.h>
+#include <linux/usb/omap_control_usb.h>
 
 /**
  * omap_usb2_set_comparator - links the comparator present in the sytem with
@@ -52,29 +53,6 @@ int omap_usb2_set_comparator(struct phy_companion *comparator)
 }
 EXPORT_SYMBOL_GPL(omap_usb2_set_comparator);
 
-/**
- * omap_usb_phy_power - power on/off the phy using control module reg
- * @phy: struct omap_usb *
- * @on: 0 or 1, based on powering on or off the PHY
- *
- * XXX: Remove this function once control module driver gets merged
- */
-static void omap_usb_phy_power(struct omap_usb *phy, int on)
-{
-	u32 val;
-
-	if (on) {
-		val = readl(phy->control_dev);
-		if (val & PHY_PD) {
-			writel(~PHY_PD, phy->control_dev);
-			/* XXX: add proper documentation for this delay */
-			mdelay(200);
-		}
-	} else {
-		writel(PHY_PD, phy->control_dev);
-	}
-}
-
 static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled)
 {
 	struct omap_usb *phy = phy_to_omapusb(otg->phy);
@@ -124,7 +102,7 @@ static int omap_usb2_suspend(struct usb_phy *x, int suspend)
 	struct omap_usb *phy = phy_to_omapusb(x);
 
 	if (suspend && !phy->is_suspended) {
-		omap_usb_phy_power(phy, 0);
+		omap_control_usb_phy_power(phy->control_dev, 0);
 		pm_runtime_put_sync(phy->dev);
 		phy->is_suspended = 1;
 	} else if (!suspend && phy->is_suspended) {
@@ -134,7 +112,7 @@ static int omap_usb2_suspend(struct usb_phy *x, int suspend)
 									ret);
 			return ret;
 		}
-		omap_usb_phy_power(phy, 1);
+		omap_control_usb_phy_power(phy->control_dev, 1);
 		phy->is_suspended = 0;
 	}
 
@@ -145,7 +123,6 @@ static int omap_usb2_probe(struct platform_device *pdev)
 {
 	struct omap_usb			*phy;
 	struct usb_otg			*otg;
-	struct resource			*res;
 
 	phy = devm_kzalloc(&pdev->dev, sizeof(*phy), GFP_KERNEL);
 	if (!phy) {
@@ -166,16 +143,14 @@ static int omap_usb2_probe(struct platform_device *pdev)
 	phy->phy.set_suspend	= omap_usb2_suspend;
 	phy->phy.otg		= otg;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-
-	phy->control_dev = devm_request_and_ioremap(&pdev->dev, res);
-	if (phy->control_dev == NULL) {
-		dev_err(&pdev->dev, "Failed to obtain io memory\n");
-		return -ENXIO;
+	phy->control_dev = omap_get_control_dev();
+	if (IS_ERR(phy->control_dev)) {
+		dev_dbg(&pdev->dev, "Failed to get control device\n");
+		return -ENODEV;
 	}
 
 	phy->is_suspended	= 1;
-	omap_usb_phy_power(phy, 0);
+	omap_control_usb_phy_power(phy->control_dev, 0);
 
 	otg->set_host		= omap_usb_set_host;
 	otg->set_peripheral	= omap_usb_set_peripheral;
diff --git a/include/linux/usb/omap_usb.h b/include/linux/usb/omap_usb.h
index 0ea17f8ae820..3db9b5316b10 100644
--- a/include/linux/usb/omap_usb.h
+++ b/include/linux/usb/omap_usb.h
@@ -25,13 +25,11 @@ struct omap_usb {
 	struct usb_phy		phy;
 	struct phy_companion	*comparator;
 	struct device		*dev;
-	u32 __iomem		*control_dev;
+	struct device		*control_dev;
 	struct clk		*wkupclk;
 	u8			is_suspended:1;
 };
 
-#define	PHY_PD	0x1
-
 #define	phy_to_omapusb(x)	container_of((x), struct omap_usb, phy)
 
 #if defined(CONFIG_OMAP_USB2) || defined(CONFIG_OMAP_USB2_MODULE)
-- 
cgit v1.2.3


From 57f6ce072e35770a63be0c5d5e82f90d8da7d665 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Fri, 25 Jan 2013 08:21:48 +0530
Subject: usb: phy: add a new driver for usb3 phy

Added a driver for usb3 phy that handles the interaction between usb phy
device and dwc3 controller.

This also includes device tree support for usb3 phy driver and
the documentation with device tree binding information is updated.

Currently writing to control module register is taken care in this
driver which will be removed once the control module driver is in place.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Moiz Sonasath <m-sonasath@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 Documentation/devicetree/bindings/usb/usb-phy.txt |  23 ++
 drivers/usb/phy/Kconfig                           |  10 +
 drivers/usb/phy/Makefile                          |   1 +
 drivers/usb/phy/omap-usb3.c                       | 355 ++++++++++++++++++++++
 include/linux/usb/omap_usb.h                      |  23 ++
 5 files changed, 412 insertions(+)
 create mode 100644 drivers/usb/phy/omap-usb3.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/usb-phy.txt b/Documentation/devicetree/bindings/usb/usb-phy.txt
index b4b86bb831b2..61496f5cb095 100644
--- a/Documentation/devicetree/bindings/usb/usb-phy.txt
+++ b/Documentation/devicetree/bindings/usb/usb-phy.txt
@@ -17,3 +17,26 @@ usb2phy@4a0ad080 {
 	reg = <0x4a0ad080 0x58>;
 	ctrl-module = <&omap_control_usb>;
 };
+
+OMAP USB3 PHY
+
+Required properties:
+ - compatible: Should be "ti,omap-usb3"
+ - reg : Address and length of the register set for the device.
+ - reg-names: The names of the register addresses corresponding to the registers
+   filled in "reg".
+
+Optional properties:
+ - ctrl-module : phandle of the control module used by PHY driver to power on
+   the PHY.
+
+This is usually a subnode of ocp2scp to which it is connected.
+
+usb3phy@4a084400 {
+	compatible = "ti,omap-usb3";
+	reg = <0x4a084400 0x80>,
+	      <0x4a084800 0x64>,
+	      <0x4a084c00 0x40>;
+	reg-names = "phy_rx", "phy_tx", "pll_ctrl";
+	ctrl-module = <&omap_control_usb>;
+};
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index f989511f13da..9bbedecb3371 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -15,6 +15,16 @@ config OMAP_USB2
 	  The USB OTG controller communicates with the comparator using this
 	  driver.
 
+config OMAP_USB3
+	tristate "OMAP USB3 PHY Driver"
+	select USB_OTG_UTILS
+	select OMAP_CONTROL_USB
+	help
+	  Enable this to support the USB3 PHY that is part of SOC. This
+	  driver takes care of all the PHY functionality apart from comparator.
+	  This driver interacts with the "OMAP Control USB Driver" to power
+	  on/off the PHY.
+
 config OMAP_CONTROL_USB
 	tristate "OMAP CONTROL USB Driver"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index ee977671f530..b13faa193e0c 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -5,6 +5,7 @@
 ccflags-$(CONFIG_USB_DEBUG) := -DDEBUG
 
 obj-$(CONFIG_OMAP_USB2)			+= omap-usb2.o
+obj-$(CONFIG_OMAP_USB3)			+= omap-usb3.o
 obj-$(CONFIG_OMAP_CONTROL_USB)		+= omap-control-usb.o
 obj-$(CONFIG_USB_ISP1301)		+= isp1301.o
 obj-$(CONFIG_MV_U3D_PHY)		+= mv_u3d_phy.o
diff --git a/drivers/usb/phy/omap-usb3.c b/drivers/usb/phy/omap-usb3.c
new file mode 100644
index 000000000000..fadc0c2b65bb
--- /dev/null
+++ b/drivers/usb/phy/omap-usb3.c
@@ -0,0 +1,355 @@
+/*
+ * omap-usb3 - USB PHY, talking to dwc3 controller in OMAP.
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/usb/omap_usb.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/pm_runtime.h>
+#include <linux/delay.h>
+#include <linux/usb/omap_control_usb.h>
+
+#define	NUM_SYS_CLKS		5
+#define	PLL_STATUS		0x00000004
+#define	PLL_GO			0x00000008
+#define	PLL_CONFIGURATION1	0x0000000C
+#define	PLL_CONFIGURATION2	0x00000010
+#define	PLL_CONFIGURATION3	0x00000014
+#define	PLL_CONFIGURATION4	0x00000020
+
+#define	PLL_REGM_MASK		0x001FFE00
+#define	PLL_REGM_SHIFT		0x9
+#define	PLL_REGM_F_MASK		0x0003FFFF
+#define	PLL_REGM_F_SHIFT	0x0
+#define	PLL_REGN_MASK		0x000001FE
+#define	PLL_REGN_SHIFT		0x1
+#define	PLL_SELFREQDCO_MASK	0x0000000E
+#define	PLL_SELFREQDCO_SHIFT	0x1
+#define	PLL_SD_MASK		0x0003FC00
+#define	PLL_SD_SHIFT		0x9
+#define	SET_PLL_GO		0x1
+#define	PLL_TICOPWDN		0x10000
+#define	PLL_LOCK		0x2
+#define	PLL_IDLE		0x1
+
+/*
+ * This is an Empirical value that works, need to confirm the actual
+ * value required for the USB3PHY_PLL_CONFIGURATION2.PLL_IDLE status
+ * to be correctly reflected in the USB3PHY_PLL_STATUS register.
+ */
+# define PLL_IDLE_TIME  100;
+
+enum sys_clk_rate {
+	CLK_RATE_UNDEFINED = -1,
+	CLK_RATE_12MHZ,
+	CLK_RATE_16MHZ,
+	CLK_RATE_19MHZ,
+	CLK_RATE_26MHZ,
+	CLK_RATE_38MHZ
+};
+
+static struct usb_dpll_params omap_usb3_dpll_params[NUM_SYS_CLKS] = {
+	{1250, 5, 4, 20, 0},		/* 12 MHz */
+	{3125, 20, 4, 20, 0},		/* 16.8 MHz */
+	{1172, 8, 4, 20, 65537},	/* 19.2 MHz */
+	{1250, 12, 4, 20, 0},		/* 26 MHz */
+	{3125, 47, 4, 20, 92843},	/* 38.4 MHz */
+};
+
+static int omap_usb3_suspend(struct usb_phy *x, int suspend)
+{
+	struct omap_usb *phy = phy_to_omapusb(x);
+	int	val;
+	int timeout = PLL_IDLE_TIME;
+
+	if (suspend && !phy->is_suspended) {
+		val = omap_usb_readl(phy->pll_ctrl_base, PLL_CONFIGURATION2);
+		val |= PLL_IDLE;
+		omap_usb_writel(phy->pll_ctrl_base, PLL_CONFIGURATION2, val);
+
+		do {
+			val = omap_usb_readl(phy->pll_ctrl_base, PLL_STATUS);
+			if (val & PLL_TICOPWDN)
+				break;
+			udelay(1);
+		} while (--timeout);
+
+		omap_control_usb3_phy_power(phy->control_dev, 0);
+
+		phy->is_suspended	= 1;
+	} else if (!suspend && phy->is_suspended) {
+		phy->is_suspended	= 0;
+
+		val = omap_usb_readl(phy->pll_ctrl_base, PLL_CONFIGURATION2);
+		val &= ~PLL_IDLE;
+		omap_usb_writel(phy->pll_ctrl_base, PLL_CONFIGURATION2, val);
+
+		do {
+			val = omap_usb_readl(phy->pll_ctrl_base, PLL_STATUS);
+			if (!(val & PLL_TICOPWDN))
+				break;
+			udelay(1);
+		} while (--timeout);
+	}
+
+	return 0;
+}
+
+static inline enum sys_clk_rate __get_sys_clk_index(unsigned long rate)
+{
+	switch (rate) {
+	case 12000000:
+		return CLK_RATE_12MHZ;
+	case 16800000:
+		return CLK_RATE_16MHZ;
+	case 19200000:
+		return CLK_RATE_19MHZ;
+	case 26000000:
+		return CLK_RATE_26MHZ;
+	case 38400000:
+		return CLK_RATE_38MHZ;
+	default:
+		return CLK_RATE_UNDEFINED;
+	}
+}
+
+static void omap_usb_dpll_relock(struct omap_usb *phy)
+{
+	u32		val;
+	unsigned long	timeout;
+
+	omap_usb_writel(phy->pll_ctrl_base, PLL_GO, SET_PLL_GO);
+
+	timeout = jiffies + msecs_to_jiffies(20);
+	do {
+		val = omap_usb_readl(phy->pll_ctrl_base, PLL_STATUS);
+		if (val & PLL_LOCK)
+			break;
+	} while (!WARN_ON(time_after(jiffies, timeout)));
+}
+
+static int omap_usb_dpll_lock(struct omap_usb *phy)
+{
+	u32			val;
+	unsigned long		rate;
+	enum sys_clk_rate	clk_index;
+
+	rate		= clk_get_rate(phy->sys_clk);
+	clk_index	= __get_sys_clk_index(rate);
+
+	if (clk_index == CLK_RATE_UNDEFINED) {
+		pr_err("dpll cannot be locked for sys clk freq:%luHz\n", rate);
+		return -EINVAL;
+	}
+
+	val = omap_usb_readl(phy->pll_ctrl_base, PLL_CONFIGURATION1);
+	val &= ~PLL_REGN_MASK;
+	val |= omap_usb3_dpll_params[clk_index].n << PLL_REGN_SHIFT;
+	omap_usb_writel(phy->pll_ctrl_base, PLL_CONFIGURATION1, val);
+
+	val = omap_usb_readl(phy->pll_ctrl_base, PLL_CONFIGURATION2);
+	val &= ~PLL_SELFREQDCO_MASK;
+	val |= omap_usb3_dpll_params[clk_index].freq << PLL_SELFREQDCO_SHIFT;
+	omap_usb_writel(phy->pll_ctrl_base, PLL_CONFIGURATION2, val);
+
+	val = omap_usb_readl(phy->pll_ctrl_base, PLL_CONFIGURATION1);
+	val &= ~PLL_REGM_MASK;
+	val |= omap_usb3_dpll_params[clk_index].m << PLL_REGM_SHIFT;
+	omap_usb_writel(phy->pll_ctrl_base, PLL_CONFIGURATION1, val);
+
+	val = omap_usb_readl(phy->pll_ctrl_base, PLL_CONFIGURATION4);
+	val &= ~PLL_REGM_F_MASK;
+	val |= omap_usb3_dpll_params[clk_index].mf << PLL_REGM_F_SHIFT;
+	omap_usb_writel(phy->pll_ctrl_base, PLL_CONFIGURATION4, val);
+
+	val = omap_usb_readl(phy->pll_ctrl_base, PLL_CONFIGURATION3);
+	val &= ~PLL_SD_MASK;
+	val |= omap_usb3_dpll_params[clk_index].sd << PLL_SD_SHIFT;
+	omap_usb_writel(phy->pll_ctrl_base, PLL_CONFIGURATION3, val);
+
+	omap_usb_dpll_relock(phy);
+
+	return 0;
+}
+
+static int omap_usb3_init(struct usb_phy *x)
+{
+	struct omap_usb	*phy = phy_to_omapusb(x);
+
+	omap_usb_dpll_lock(phy);
+	omap_control_usb3_phy_power(phy->control_dev, 1);
+
+	return 0;
+}
+
+static int omap_usb3_probe(struct platform_device *pdev)
+{
+	struct omap_usb			*phy;
+	struct resource			*res;
+
+	phy = devm_kzalloc(&pdev->dev, sizeof(*phy), GFP_KERNEL);
+	if (!phy) {
+		dev_err(&pdev->dev, "unable to alloc mem for OMAP USB3 PHY\n");
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pll_ctrl");
+	phy->pll_ctrl_base = devm_request_and_ioremap(&pdev->dev, res);
+	if (!phy->pll_ctrl_base) {
+		dev_err(&pdev->dev, "ioremap of pll_ctrl failed\n");
+		return -ENOMEM;
+	}
+
+	phy->dev		= &pdev->dev;
+
+	phy->phy.dev		= phy->dev;
+	phy->phy.label		= "omap-usb3";
+	phy->phy.init		= omap_usb3_init;
+	phy->phy.set_suspend	= omap_usb3_suspend;
+	phy->phy.type		= USB_PHY_TYPE_USB3;
+
+	phy->is_suspended	= 1;
+	phy->wkupclk = devm_clk_get(phy->dev, "usb_phy_cm_clk32k");
+	if (IS_ERR(phy->wkupclk)) {
+		dev_err(&pdev->dev, "unable to get usb_phy_cm_clk32k\n");
+		return PTR_ERR(phy->wkupclk);
+	}
+	clk_prepare(phy->wkupclk);
+
+	phy->optclk = devm_clk_get(phy->dev, "usb_otg_ss_refclk960m");
+	if (IS_ERR(phy->optclk)) {
+		dev_err(&pdev->dev, "unable to get usb_otg_ss_refclk960m\n");
+		return PTR_ERR(phy->optclk);
+	}
+	clk_prepare(phy->optclk);
+
+	phy->sys_clk = devm_clk_get(phy->dev, "sys_clkin");
+	if (IS_ERR(phy->sys_clk)) {
+		pr_err("%s: unable to get sys_clkin\n", __func__);
+		return -EINVAL;
+	}
+
+	phy->control_dev = omap_get_control_dev();
+	if (IS_ERR(phy->control_dev)) {
+		dev_dbg(&pdev->dev, "Failed to get control device\n");
+		return -ENODEV;
+	}
+
+	omap_control_usb3_phy_power(phy->control_dev, 0);
+	usb_add_phy_dev(&phy->phy);
+
+	platform_set_drvdata(pdev, phy);
+
+	pm_runtime_enable(phy->dev);
+	pm_runtime_get(&pdev->dev);
+
+	return 0;
+}
+
+static int omap_usb3_remove(struct platform_device *pdev)
+{
+	struct omap_usb *phy = platform_get_drvdata(pdev);
+
+	clk_unprepare(phy->wkupclk);
+	clk_unprepare(phy->optclk);
+	usb_remove_phy(&phy->phy);
+	if (!pm_runtime_suspended(&pdev->dev))
+		pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_RUNTIME
+
+static int omap_usb3_runtime_suspend(struct device *dev)
+{
+	struct platform_device	*pdev = to_platform_device(dev);
+	struct omap_usb	*phy = platform_get_drvdata(pdev);
+
+	clk_disable(phy->wkupclk);
+	clk_disable(phy->optclk);
+
+	return 0;
+}
+
+static int omap_usb3_runtime_resume(struct device *dev)
+{
+	u32 ret = 0;
+	struct platform_device	*pdev = to_platform_device(dev);
+	struct omap_usb	*phy = platform_get_drvdata(pdev);
+
+	ret = clk_enable(phy->optclk);
+	if (ret) {
+		dev_err(phy->dev, "Failed to enable optclk %d\n", ret);
+		goto err1;
+	}
+
+	ret = clk_enable(phy->wkupclk);
+	if (ret) {
+		dev_err(phy->dev, "Failed to enable wkupclk %d\n", ret);
+		goto err2;
+	}
+
+	return 0;
+
+err2:
+	clk_disable(phy->optclk);
+
+err1:
+	return ret;
+}
+
+static const struct dev_pm_ops omap_usb3_pm_ops = {
+	SET_RUNTIME_PM_OPS(omap_usb3_runtime_suspend, omap_usb3_runtime_resume,
+		NULL)
+};
+
+#define DEV_PM_OPS     (&omap_usb3_pm_ops)
+#else
+#define DEV_PM_OPS     NULL
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id omap_usb3_id_table[] = {
+	{ .compatible = "ti,omap-usb3" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, omap_usb3_id_table);
+#endif
+
+static struct platform_driver omap_usb3_driver = {
+	.probe		= omap_usb3_probe,
+	.remove		= omap_usb3_remove,
+	.driver		= {
+		.name	= "omap-usb3",
+		.owner	= THIS_MODULE,
+		.pm	= DEV_PM_OPS,
+		.of_match_table = of_match_ptr(omap_usb3_id_table),
+	},
+};
+
+module_platform_driver(omap_usb3_driver);
+
+MODULE_ALIAS("platform: omap_usb3");
+MODULE_AUTHOR("Texas Instruments Inc.");
+MODULE_DESCRIPTION("OMAP USB3 phy driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/usb/omap_usb.h b/include/linux/usb/omap_usb.h
index 3db9b5316b10..6ae29360e1d2 100644
--- a/include/linux/usb/omap_usb.h
+++ b/include/linux/usb/omap_usb.h
@@ -19,14 +19,26 @@
 #ifndef __DRIVERS_OMAP_USB2_H
 #define __DRIVERS_OMAP_USB2_H
 
+#include <linux/io.h>
 #include <linux/usb/otg.h>
 
+struct usb_dpll_params {
+	u16	m;
+	u8	n;
+	u8	freq:3;
+	u8	sd;
+	u32	mf;
+};
+
 struct omap_usb {
 	struct usb_phy		phy;
 	struct phy_companion	*comparator;
+	void __iomem		*pll_ctrl_base;
 	struct device		*dev;
 	struct device		*control_dev;
 	struct clk		*wkupclk;
+	struct clk		*sys_clk;
+	struct clk		*optclk;
 	u8			is_suspended:1;
 };
 
@@ -41,4 +53,15 @@ static inline int omap_usb2_set_comparator(struct phy_companion *comparator)
 }
 #endif
 
+static inline u32 omap_usb_readl(void __iomem *addr, unsigned offset)
+{
+	return __raw_readl(addr + offset);
+}
+
+static inline void omap_usb_writel(void __iomem *addr, unsigned offset,
+	u32 data)
+{
+	__raw_writel(data, addr + offset);
+}
+
 #endif /* __DRIVERS_OMAP_USB_H */
-- 
cgit v1.2.3


From 7fb96565e3e18ad41857ca6ffdaa9a26ae92df5a Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Wed, 23 Jan 2013 17:42:40 -0800
Subject: Drivers: hv: vmbus: Consolidate all offer GUID definitions in
 hyperv.h

Consolidate all GUID definitions in hyperv.h and use these definitions in implementing
channel bindings (as far as interrupt delivery goes).

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 31 +++-------------
 include/linux/hyperv.h    | 94 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 7478ef914b9a..53a8600162a5 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -265,36 +265,17 @@ enum {
 };
 
 /*
- * This is an array of channels (devices) that are performance critical.
+ * This is an array of device_ids (device types) that are performance critical.
  * We attempt to distribute the interrupt load for these devices across
  * all available CPUs.
  */
-static const uuid_le hp_devs[] = {
-	/* {32412632-86cb-44a2-9b5c-50d1417354f5} */
+static const struct hv_vmbus_device_id hp_devs[] = {
 	/* IDE */
-	{
-		.b = {
-			0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
-			0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5
-		}
-	},
-	/* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
+	{ HV_IDE_GUID, },
 	/* Storage - SCSI */
-	{
-		.b  = {
-			0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
-			0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f
-		}
-	},
-	/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
+	{ HV_SCSI_GUID, },
 	/* Network */
-	{
-		.b = {
-			0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
-			0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E
-		}
-	},
-
+	{ HV_NIC_GUID, },
 };
 
 
@@ -320,7 +301,7 @@ static u32 get_vp_index(uuid_le *type_guid)
 	u32 max_cpus = num_online_cpus();
 
 	for (i = IDE; i < MAX_PERF_CHN; i++) {
-		if (!memcmp(type_guid->b, hp_devs[i].b,
+		if (!memcmp(type_guid->b, hp_devs[i].guid,
 				 sizeof(uuid_le))) {
 			perf_chn = true;
 			break;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 5095b066df94..df77ba9a8166 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1158,6 +1158,100 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver);
 	.guid = { g0, g1, g2, g3, g4, g5, g6, g7,	\
 		  g8, g9, ga, gb, gc, gd, ge, gf },
 
+/*
+ * GUID definitions of various offer types - services offered to the guest.
+ */
+
+/*
+ * Network GUID
+ * {f8615163-df3e-46c5-913f-f2d2f965ed0e}
+ */
+#define HV_NIC_GUID \
+	.guid = { \
+			0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46, \
+			0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e \
+		}
+
+/*
+ * IDE GUID
+ * {32412632-86cb-44a2-9b5c-50d1417354f5}
+ */
+#define HV_IDE_GUID \
+	.guid = { \
+			0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, \
+			0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5 \
+		}
+
+/*
+ * SCSI GUID
+ * {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}
+ */
+#define HV_SCSI_GUID \
+	.guid = { \
+			0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, \
+			0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f \
+		}
+
+/*
+ * Shutdown GUID
+ * {0e0b6031-5213-4934-818b-38d90ced39db}
+ */
+#define HV_SHUTDOWN_GUID \
+	.guid = { \
+			0x31, 0x60, 0x0b, 0x0e, 0x13, 0x52, 0x34, 0x49, \
+			0x81, 0x8b, 0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb \
+		}
+
+/*
+ * Time Synch GUID
+ * {9527E630-D0AE-497b-ADCE-E80AB0175CAF}
+ */
+#define HV_TS_GUID \
+	.guid = { \
+			0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, \
+			0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf \
+		}
+
+/*
+ * Heartbeat GUID
+ * {57164f39-9115-4e78-ab55-382f3bd5422d}
+ */
+#define HV_HEART_BEAT_GUID \
+	.guid = { \
+			0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e, \
+			0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d \
+		}
+
+/*
+ * KVP GUID
+ * {a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}
+ */
+#define HV_KVP_GUID \
+	.guid = { \
+			0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d, \
+			0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3,  0xe6 \
+		}
+
+/*
+ * Dynamic memory GUID
+ * {525074dc-8985-46e2-8057-a307dc18a502}
+ */
+#define HV_DM_GUID \
+	.guid = { \
+			0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46, \
+			0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02 \
+		}
+
+/*
+ * Mouse GUID
+ * {cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}
+ */
+#define HV_MOUSE_GUID \
+	.guid = { \
+			0x9e, 0xb6, 0xa8, 0xcf, 0x4a, 0x5b, 0xc0, 0x4c, \
+			0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a \
+		}
+
 /*
  * Common header for Hyper-V ICs
  */
-- 
cgit v1.2.3


From e175435ed281d06ffd56d4edc8a2dd56f8672f07 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Fri, 25 Jan 2013 15:30:23 -0500
Subject: Revert "libata: export host controller number thru /sys"

This reverts commit 1757d902b029a29dfcef63609964385cf8865b5a.

Discussion continues upstream.
---
 drivers/ata/libata-core.c      | 4 ----
 drivers/ata/libata-transport.c | 2 +-
 include/linux/libata.h         | 1 -
 3 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b7eed827daae..4b6fb48eb831 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -99,7 +99,6 @@ static void ata_dev_xfermask(struct ata_device *dev);
 static unsigned long ata_dev_blacklisted(const struct ata_device *dev);
 
 atomic_t ata_print_id = ATOMIC_INIT(0);
-atomic_t host_print_id = ATOMIC_INIT(0);
 
 struct ata_force_param {
 	const char	*name;
@@ -6119,9 +6118,6 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
 	for (i = host->n_ports; host->ports[i]; i++)
 		kfree(host->ports[i]);
 
-	/* track host controller */
-	host->host_id = atomic_inc_return(&host_print_id);
-
 	/* give ports names and add SCSI hosts */
 	for (i = 0; i < host->n_ports; i++)
 		host->ports[i]->print_id = atomic_inc_return(&ata_print_id);
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index 61dca7a20a31..c04d393d20c1 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -284,7 +284,7 @@ int ata_tport_add(struct device *parent,
 
 	dev->parent = get_device(parent);
 	dev->release = ata_tport_release;
-	dev_set_name(dev, "ata%d.%d", ap->host->host_id, ap->print_id);
+	dev_set_name(dev, "ata%d", ap->print_id);
 	transport_setup_device(dev);
 	error = device_add(dev);
 	if (error) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 65ff67e34b77..91c9d109e5f1 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -546,7 +546,6 @@ struct ata_host {
 	void			*private_data;
 	struct ata_port_operations *ops;
 	unsigned long		flags;
-	unsigned int            host_id; /* user visible host ID */
 
 	struct mutex		eh_mutex;
 	struct task_struct	*eh_owner;
-- 
cgit v1.2.3


From 0bb8f3d6ae621945e6fa2102aa894f72b76a023e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 25 Jan 2013 21:51:13 +0100
Subject: sysfs: Functions for adding/removing symlinks to/from attribute
 groups

The most convenient way to expose ACPI power resources lists of a
device is to put symbolic links to sysfs directories representing
those resources into special attribute groups in the device's sysfs
directory.  For this purpose, it is necessary to be able to add
symbolic links to attribute groups.

For this reason, add sysfs helper functions for adding/removing
symbolic links to/from attribute groups, sysfs_add_link_to_group()
and sysfs_remove_link_from_group(), respectively.

This change set includes a build fix from David Rientjes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/group.c      | 42 ++++++++++++++++++++++++++++++++++++++++++
 fs/sysfs/symlink.c    | 45 ++++++++++++++++++++++++++++++++-------------
 fs/sysfs/sysfs.h      |  2 ++
 include/linux/sysfs.h | 16 ++++++++++++++++
 4 files changed, 92 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2df555c66d57..aec3d5c98c94 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj,
 }
 EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
 
+/**
+ * sysfs_add_link_to_group - add a symlink to an attribute group.
+ * @kobj:	The kobject containing the group.
+ * @group_name:	The name of the group.
+ * @target:	The target kobject of the symlink to create.
+ * @link_name:	The name of the symlink to create.
+ */
+int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
+			    struct kobject *target, const char *link_name)
+{
+	struct sysfs_dirent *dir_sd;
+	int error = 0;
+
+	dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+	if (!dir_sd)
+		return -ENOENT;
+
+	error = sysfs_create_link_sd(dir_sd, target, link_name);
+	sysfs_put(dir_sd);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
+
+/**
+ * sysfs_remove_link_from_group - remove a symlink from an attribute group.
+ * @kobj:	The kobject containing the group.
+ * @group_name:	The name of the group.
+ * @link_name:	The name of the symlink to remove.
+ */
+void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
+				  const char *link_name)
+{
+	struct sysfs_dirent *dir_sd;
+
+	dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
+	if (dir_sd) {
+		sysfs_hash_and_remove(dir_sd, NULL, link_name);
+		sysfs_put(dir_sd);
+	}
+}
+EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
 
 EXPORT_SYMBOL_GPL(sysfs_create_group);
 EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3c9eb5624f5e..8c940df97a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -21,26 +21,17 @@
 
 #include "sysfs.h"
 
-static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
-				const char *name, int warn)
+static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
+				   struct kobject *target,
+				   const char *name, int warn)
 {
-	struct sysfs_dirent *parent_sd = NULL;
 	struct sysfs_dirent *target_sd = NULL;
 	struct sysfs_dirent *sd = NULL;
 	struct sysfs_addrm_cxt acxt;
 	enum kobj_ns_type ns_type;
 	int error;
 
-	BUG_ON(!name);
-
-	if (!kobj)
-		parent_sd = &sysfs_root;
-	else
-		parent_sd = kobj->sd;
-
-	error = -EFAULT;
-	if (!parent_sd)
-		goto out_put;
+	BUG_ON(!name || !parent_sd);
 
 	/* target->sd can go away beneath us but is protected with
 	 * sysfs_assoc_lock.  Fetch target_sd from it.
@@ -95,6 +86,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
 	return error;
 }
 
+/**
+ *	sysfs_create_link_sd - create symlink to a given object.
+ *	@sd:		directory we're creating the link in.
+ *	@target:	object we're pointing to.
+ *	@name:		name of the symlink.
+ */
+int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+			 const char *name)
+{
+	return sysfs_do_create_link_sd(sd, target, name, 1);
+}
+
+static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
+				const char *name, int warn)
+{
+	struct sysfs_dirent *parent_sd = NULL;
+
+	if (!kobj)
+		parent_sd = &sysfs_root;
+	else
+		parent_sd = kobj->sd;
+
+	if (!parent_sd)
+		return -EFAULT;
+
+	return sysfs_do_create_link_sd(parent_sd, target, name, warn);
+}
+
 /**
  *	sysfs_create_link - create symlink between two objects.
  *	@kobj:	object whose directory we're creating the link in.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d73c0932bbd6..d1e4043eb0c3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd);
  * symlink.c
  */
 extern const struct inode_operations sysfs_symlink_inode_operations;
+int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+			 const char *name);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 381f06db2fe5..e2cee22f578a 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -181,6 +181,10 @@ int sysfs_merge_group(struct kobject *kobj,
 		       const struct attribute_group *grp);
 void sysfs_unmerge_group(struct kobject *kobj,
 		       const struct attribute_group *grp);
+int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
+			    struct kobject *target, const char *link_name);
+void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
+				  const char *link_name);
 
 void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
 void sysfs_notify_dirent(struct sysfs_dirent *sd);
@@ -326,6 +330,18 @@ static inline void sysfs_unmerge_group(struct kobject *kobj,
 {
 }
 
+static inline int sysfs_add_link_to_group(struct kobject *kobj,
+		const char *group_name, struct kobject *target,
+		const char *link_name)
+{
+	return 0;
+}
+
+static inline void sysfs_remove_link_from_group(struct kobject *kobj,
+		const char *group_name, const char *link_name)
+{
+}
+
 static inline void sysfs_notify(struct kobject *kobj, const char *dir,
 				const char *attr)
 {
-- 
cgit v1.2.3


From 58d9a38f6facb28e935ec2747f6d9e9bf4684118 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Mon, 21 Jan 2013 13:20:51 -0800
Subject: PCI: Skip attaching driver in device_add()

We want to add PCI devices to the device tree as early as possible but
delay attaching drivers.

device_add() adds a device to the device hierarchy and (via
device_attach()) attaches a matching driver and calls its .probe() method.
We want to separate adding the device to the hierarchy from attaching the
driver.

This patch does that by adding "match_driver" in struct pci_dev.  When
false, we return failure from pci_bus_match(), which makes device_attach()
believe there's no matching driver.

Later, we set "match_driver = true" and call device_attach() again, which
now attaches the driver and calls its .probe() method.

[bhelgaas: changelog, explicitly init dev->match_driver,
fold device_attach() call into pci_bus_add_device()]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/bus.c        | 5 +++++
 drivers/pci/pci-driver.c | 6 +++++-
 include/linux/pci.h      | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 847f3ca47bb8..c8709c6fdb7c 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -177,10 +177,15 @@ int pci_bus_add_device(struct pci_dev *dev)
 	if (retval)
 		return retval;
 
+	dev->match_driver = false;
 	retval = device_add(&dev->dev);
 	if (retval)
 		return retval;
 
+	dev->match_driver = true;
+	retval = device_attach(&dev->dev);
+	WARN_ON(retval < 0);
+
 	dev->is_added = 1;
 	pci_proc_attach_device(dev);
 	pci_create_sysfs_dev_files(dev);
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index f79cbcd3944b..acdcc3c6ecdd 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1186,9 +1186,13 @@ pci_dev_driver(const struct pci_dev *dev)
 static int pci_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct pci_driver *pci_drv = to_pci_driver(drv);
+	struct pci_driver *pci_drv;
 	const struct pci_device_id *found_id;
 
+	if (!pci_dev->match_driver)
+		return 0;
+
+	pci_drv = to_pci_driver(drv);
 	found_id = pci_match_device(pci_drv, pci_dev);
 	if (found_id)
 		return 1;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 907b455ab603..8ee7e4e46539 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -286,6 +286,7 @@ struct pci_dev {
 	unsigned int	irq;
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
 
+	bool match_driver;		/* Skip attaching driver */
 	/* These fields are used by common fixups */
 	unsigned int	transparent:1;	/* Transparent PCI bridge */
 	unsigned int	multifunction:1;/* Part of multi-function device */
-- 
cgit v1.2.3


From da0aa7169b97d90f4af39a9dc84d58bbe19d7e78 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 25 Jan 2013 17:09:42 -0500
Subject: USB: add usb_hcd_{start,end}_port_resume

This patch (as1649) adds a mechanism for host controller drivers to
inform usbcore when they have begun or ended resume signalling on a
particular root-hub port.  The core will then make sure that the root
hub does not get runtime-suspended while the port resume is going on.

Since commit 596d789a211d134dc5f94d1e5957248c204ef850 (USB: set hub's
default autosuspend delay as 0), the system tries to suspend hubs
whenever they aren't in use.  While a root-hub port is being resumed,
the root hub does not appear to be in use.  Attempted runtime suspends
fail because of the ongoing port resume, but the PM core just keeps on
trying over and over again.  We want to prevent this wasteful effort.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb.h     |  2 ++
 include/linux/usb/hcd.h |  3 +++
 3 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 4225d5e72131..8e64adf8e4d5 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -39,6 +39,7 @@
 #include <asm/unaligned.h>
 #include <linux/platform_device.h>
 #include <linux/workqueue.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
@@ -1025,6 +1026,49 @@ static int register_root_hub(struct usb_hcd *hcd)
 	return retval;
 }
 
+/*
+ * usb_hcd_start_port_resume - a root-hub port is sending a resume signal
+ * @bus: the bus which the root hub belongs to
+ * @portnum: the port which is being resumed
+ *
+ * HCDs should call this function when they know that a resume signal is
+ * being sent to a root-hub port.  The root hub will be prevented from
+ * going into autosuspend until usb_hcd_end_port_resume() is called.
+ *
+ * The bus's private lock must be held by the caller.
+ */
+void usb_hcd_start_port_resume(struct usb_bus *bus, int portnum)
+{
+	unsigned bit = 1 << portnum;
+
+	if (!(bus->resuming_ports & bit)) {
+		bus->resuming_ports |= bit;
+		pm_runtime_get_noresume(&bus->root_hub->dev);
+	}
+}
+EXPORT_SYMBOL_GPL(usb_hcd_start_port_resume);
+
+/*
+ * usb_hcd_end_port_resume - a root-hub port has stopped sending a resume signal
+ * @bus: the bus which the root hub belongs to
+ * @portnum: the port which is being resumed
+ *
+ * HCDs should call this function when they know that a resume signal has
+ * stopped being sent to a root-hub port.  The root hub will be allowed to
+ * autosuspend again.
+ *
+ * The bus's private lock must be held by the caller.
+ */
+void usb_hcd_end_port_resume(struct usb_bus *bus, int portnum)
+{
+	unsigned bit = 1 << portnum;
+
+	if (bus->resuming_ports & bit) {
+		bus->resuming_ports &= ~bit;
+		pm_runtime_put_noidle(&bus->root_hub->dev);
+	}
+}
+EXPORT_SYMBOL_GPL(usb_hcd_end_port_resume);
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 689b14b26c8d..4d22d0f6167a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -357,6 +357,8 @@ struct usb_bus {
 	int bandwidth_int_reqs;		/* number of Interrupt requests */
 	int bandwidth_isoc_reqs;	/* number of Isoc. requests */
 
+	unsigned resuming_ports;	/* bit array: resuming root-hub ports */
+
 #if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
 	struct mon_bus *mon_bus;	/* non-null when associated */
 	int monitored;			/* non-zero when monitored */
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 608050b2545f..0a78df5f6cfd 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -430,6 +430,9 @@ extern void usb_hcd_poll_rh_status(struct usb_hcd *hcd);
 extern void usb_wakeup_notification(struct usb_device *hdev,
 		unsigned int portnum);
 
+extern void usb_hcd_start_port_resume(struct usb_bus *bus, int portnum);
+extern void usb_hcd_end_port_resume(struct usb_bus *bus, int portnum);
+
 /* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */
 #define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1)
 #define	usb_dotoggle(dev, ep, out)  ((dev)->toggle[out] ^= (1 << (ep)))
-- 
cgit v1.2.3


From fbadc58dd3a52c330c8f3926aa93011bf9d91fa0 Mon Sep 17 00:00:00 2001
From: ShuoX Liu <shuox.liu@intel.com>
Date: Wed, 23 Jan 2013 21:49:37 +0100
Subject: PM / Runtime: Add new helper function: pm_runtime_active()

This boolean function simply returns whether or not the runtime
status of the device is 'active'. The typical scenario is driver
calls pm_runtime_get firstly, then check pm_runtime_active in
atomic environment.

Also add entry to Documentation/power/runtime.txt

Signed-off-by: Yanmin Zhang <yanmin.zhang@intel.com>
Signed-off-by: ShuoX Liu <shuox.liu@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.txt | 4 ++++
 include/linux/pm_runtime.h         | 7 +++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 03591a750f99..6c9f5d9aa115 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -426,6 +426,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
       'power.runtime_error' is set or 'power.disable_depth' is greater than
       zero)
 
+  bool pm_runtime_active(struct device *dev);
+    - return true if the device's runtime PM status is 'active' or its
+      'power.disable_depth' field is not equal to zero, or false otherwise
+
   bool pm_runtime_suspended(struct device *dev);
     - return true if the device's runtime PM status is 'suspended' and its
       'power.disable_depth' field is equal to zero, or false otherwise
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index f271860c78d5..c785c215abfc 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -80,6 +80,12 @@ static inline bool pm_runtime_suspended(struct device *dev)
 		&& !dev->power.disable_depth;
 }
 
+static inline bool pm_runtime_active(struct device *dev)
+{
+	return dev->power.runtime_status == RPM_ACTIVE
+		|| dev->power.disable_depth;
+}
+
 static inline bool pm_runtime_status_suspended(struct device *dev)
 {
 	return dev->power.runtime_status == RPM_SUSPENDED;
@@ -132,6 +138,7 @@ static inline void pm_runtime_put_noidle(struct device *dev) {}
 static inline bool device_run_wake(struct device *dev) { return false; }
 static inline void device_set_run_wake(struct device *dev, bool enable) {}
 static inline bool pm_runtime_suspended(struct device *dev) { return false; }
+static inline bool pm_runtime_active(struct device *dev) { return true; }
 static inline bool pm_runtime_status_suspended(struct device *dev) { return false; }
 static inline bool pm_runtime_enabled(struct device *dev) { return false; }
 
-- 
cgit v1.2.3


From c25fb816298138a4b12c606f0aaa018bdd3cc09c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 9 Jan 2013 08:12:46 -0800
Subject: hwmon: Retire SENSORS_LIMIT

SENSORS_LIMIT and clamp_val have the same functionality, so retire SENSORS_LIMIT
as it is no longer needed.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/hwmon/sysfs-interface |  8 ++++----
 include/linux/hwmon.h               | 12 ------------
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index 1f4dd855a299..79f8257dd790 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -722,14 +722,14 @@ add/subtract if it has been divided before the add/subtract.
 What to do if a value is found to be invalid, depends on the type of the
 sysfs attribute that is being set. If it is a continuous setting like a
 tempX_max or inX_max attribute, then the value should be clamped to its
-limits using SENSORS_LIMIT(value, min_limit, max_limit). If it is not
-continuous like for example a tempX_type, then when an invalid value is
-written, -EINVAL should be returned.
+limits using clamp_val(value, min_limit, max_limit). If it is not continuous
+like for example a tempX_type, then when an invalid value is written,
+-EINVAL should be returned.
 
 Example1, temp1_max, register is a signed 8 bit value (-128 - 127 degrees):
 
 	long v = simple_strtol(buf, NULL, 10) / 1000;
-	v = SENSORS_LIMIT(v, -128, 127);
+	v = clamp_val(v, -128, 127);
 	/* write v to register */
 
 Example2, fan divider setting, valid values 2, 4 and 8:
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 82b29ae6ebb0..b2514f70d591 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -20,16 +20,4 @@ struct device *hwmon_device_register(struct device *dev);
 
 void hwmon_device_unregister(struct device *dev);
 
-/* Scale user input to sensible values */
-static inline int SENSORS_LIMIT(long value, long low, long high)
-{
-	if (value < low)
-		return low;
-	else if (value > high)
-		return high;
-	else
-		return value;
-}
-
 #endif
-
-- 
cgit v1.2.3


From 3343b7a6d2cd0a980d0c4a0ce02ef48b6bfcc12a Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 22 Jan 2013 12:26:27 +0200
Subject: spi/pxa2xx: convert to the common clk framework

Convert clk_enable() to clk_prepare_enable() and clk_disable() to
clk_disable_unprepare() respectively in order to support the common clk
framework. Otherwise we get warnings on the console as the clock is not
prepared before it is enabled.

In addition we must cache the maximum clock rate to drv_data->max_clk_rate
at probe time because clk_get_rate() cannot be called in tasklet context.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/spi-pxa2xx.c       | 33 ++++++++++++++++++++-------------
 include/linux/spi/pxa2xx_spi.h | 18 ------------------
 2 files changed, 20 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index a241891355db..304cf6eb50e6 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
+#include <linux/clk.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -114,6 +115,9 @@ struct driver_data {
 	u32 clear_sr;
 	u32 mask_sr;
 
+	/* Maximun clock rate */
+	unsigned long max_clk_rate;
+
 	/* Message Transfer pump */
 	struct tasklet_struct pump_transfers;
 
@@ -891,9 +895,12 @@ static int set_dma_burst_and_threshold(struct chip_data *chip,
 	return retval;
 }
 
-static unsigned int ssp_get_clk_div(struct ssp_device *ssp, int rate)
+static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 {
-	unsigned long ssp_clk = clk_get_rate(ssp->clk);
+	unsigned long ssp_clk = drv_data->max_clk_rate;
+	const struct ssp_device *ssp = drv_data->ssp;
+
+	rate = min_t(int, ssp_clk, rate);
 
 	if (ssp->type == PXA25x_SSP || ssp->type == CE4100_SSP)
 		return ((ssp_clk / (2 * rate) - 1) & 0xff) << 8;
@@ -908,7 +915,6 @@ static void pump_transfers(unsigned long data)
 	struct spi_transfer *transfer = NULL;
 	struct spi_transfer *previous = NULL;
 	struct chip_data *chip = NULL;
-	struct ssp_device *ssp = drv_data->ssp;
 	void __iomem *reg = drv_data->ioaddr;
 	u32 clk_div = 0;
 	u8 bits = 0;
@@ -1005,7 +1011,7 @@ static void pump_transfers(unsigned long data)
 		if (transfer->bits_per_word)
 			bits = transfer->bits_per_word;
 
-		clk_div = ssp_get_clk_div(ssp, speed);
+		clk_div = ssp_get_clk_div(drv_data, speed);
 
 		if (bits <= 8) {
 			drv_data->n_bytes = 1;
@@ -1214,7 +1220,6 @@ static int setup(struct spi_device *spi)
 	struct pxa2xx_spi_chip *chip_info = NULL;
 	struct chip_data *chip;
 	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
-	struct ssp_device *ssp = drv_data->ssp;
 	unsigned int clk_div;
 	uint tx_thres = TX_THRESH_DFLT;
 	uint rx_thres = RX_THRESH_DFLT;
@@ -1296,7 +1301,7 @@ static int setup(struct spi_device *spi)
 		}
 	}
 
-	clk_div = ssp_get_clk_div(ssp, spi->max_speed_hz);
+	clk_div = ssp_get_clk_div(drv_data, spi->max_speed_hz);
 	chip->speed_hz = spi->max_speed_hz;
 
 	chip->cr0 = clk_div
@@ -1312,12 +1317,12 @@ static int setup(struct spi_device *spi)
 	/* NOTE:  PXA25x_SSP _could_ use external clocking ... */
 	if (!pxa25x_ssp_comp(drv_data))
 		dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
-			clk_get_rate(ssp->clk)
+			drv_data->max_clk_rate
 				/ (1 + ((chip->cr0 & SSCR0_SCR(0xfff)) >> 8)),
 			chip->enable_dma ? "DMA" : "PIO");
 	else
 		dev_dbg(&spi->dev, "%ld Hz actual, %s\n",
-			clk_get_rate(ssp->clk) / 2
+			drv_data->max_clk_rate / 2
 				/ (1 + ((chip->cr0 & SSCR0_SCR(0x0ff)) >> 8)),
 			chip->enable_dma ? "DMA" : "PIO");
 
@@ -1470,7 +1475,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	}
 
 	/* Enable SOC clock */
-	clk_enable(ssp->clk);
+	clk_prepare_enable(ssp->clk);
+
+	drv_data->max_clk_rate = clk_get_rate(ssp->clk);
 
 	/* Load default SSP configuration */
 	write_SSCR0(0, drv_data->ioaddr);
@@ -1499,7 +1506,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	return status;
 
 out_error_clock_enabled:
-	clk_disable(ssp->clk);
+	clk_disable_unprepare(ssp->clk);
 
 out_error_dma_alloc:
 	if (drv_data->tx_channel != -1)
@@ -1527,7 +1534,7 @@ static int pxa2xx_spi_remove(struct platform_device *pdev)
 
 	/* Disable the SSP at the peripheral and SOC level */
 	write_SSCR0(0, drv_data->ioaddr);
-	clk_disable(ssp->clk);
+	clk_disable_unprepare(ssp->clk);
 
 	/* Release DMA */
 	if (drv_data->master_info->enable_dma) {
@@ -1571,7 +1578,7 @@ static int pxa2xx_spi_suspend(struct device *dev)
 	if (status != 0)
 		return status;
 	write_SSCR0(0, drv_data->ioaddr);
-	clk_disable(ssp->clk);
+	clk_disable_unprepare(ssp->clk);
 
 	return 0;
 }
@@ -1590,7 +1597,7 @@ static int pxa2xx_spi_resume(struct device *dev)
 			DRCMR_MAPVLD | drv_data->tx_channel;
 
 	/* Enable the SSP clock */
-	clk_enable(ssp->clk);
+	clk_prepare_enable(ssp->clk);
 
 	/* Start the queue running */
 	status = spi_master_resume(drv_data->master);
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 6b99f09b717d..053b5ba51b25 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -133,23 +133,5 @@ static inline void pxa_free_dma(int dma_ch)
 {
 }
 
-/*
- * The CE4100 does not have the clk framework implemented and SPI clock can
- * not be switched on/off or the divider changed.
- */
-static inline void clk_disable(struct clk *clk)
-{
-}
-
-static inline int clk_enable(struct clk *clk)
-{
-	return 0;
-}
-
-static inline unsigned long clk_get_rate(struct clk *clk)
-{
-	return 3686400;
-}
-
 #endif
 #endif
-- 
cgit v1.2.3


From 5cbc7ca987fb3f293203dc14a6c53b91b7c978a5 Mon Sep 17 00:00:00 2001
From: Matthias Brugger <matthias.bgg@gmail.com>
Date: Thu, 24 Jan 2013 13:40:41 +0100
Subject: spi: spi-omap2-mcspi.c: Toggle CS after each word

This patch allows the board code to define SPI devices which needs to
toggle the chip select after every word send. This is needed to get a
better resolution reading e.g. an ADC data stream.
Apart from that, as in the normal code CS is controlled by software,
a transfer is done much faster.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/spi-omap2-mcspi.c                 | 18 ++++++++++++++++++
 include/linux/platform_data/spi-omap2-mcspi.h |  3 +++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 31f6e842ea86..12789fcfa980 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -927,6 +927,7 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 
 	struct spi_device		*spi;
 	struct spi_transfer		*t = NULL;
+	struct spi_master		*master;
 	int				cs_active = 0;
 	struct omap2_mcspi_cs		*cs;
 	struct omap2_mcspi_device_config *cd;
@@ -935,6 +936,7 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 	u32				chconf;
 
 	spi = m->spi;
+	master = spi->master;
 	cs = spi->controller_state;
 	cd = spi->controller_data;
 
@@ -952,6 +954,14 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 			if (!t->speed_hz && !t->bits_per_word)
 				par_override = 0;
 		}
+		if (cd && cd->cs_per_word) {
+			chconf = mcspi->ctx.modulctrl;
+			chconf &= ~OMAP2_MCSPI_MODULCTRL_SINGLE;
+			mcspi_write_reg(master, OMAP2_MCSPI_MODULCTRL, chconf);
+			mcspi->ctx.modulctrl =
+				mcspi_read_cs_reg(spi, OMAP2_MCSPI_MODULCTRL);
+		}
+
 
 		if (!cs_active) {
 			omap2_mcspi_force_cs(spi, 1);
@@ -1013,6 +1023,14 @@ static void omap2_mcspi_work(struct omap2_mcspi *mcspi, struct spi_message *m)
 	if (cs_active)
 		omap2_mcspi_force_cs(spi, 0);
 
+	if (cd && cd->cs_per_word) {
+		chconf = mcspi->ctx.modulctrl;
+		chconf |= OMAP2_MCSPI_MODULCTRL_SINGLE;
+		mcspi_write_reg(master, OMAP2_MCSPI_MODULCTRL, chconf);
+		mcspi->ctx.modulctrl =
+			mcspi_read_cs_reg(spi, OMAP2_MCSPI_MODULCTRL);
+	}
+
 	omap2_mcspi_set_enable(spi, 0);
 
 	m->status = status;
diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h
index a65572d53211..c100456eab17 100644
--- a/include/linux/platform_data/spi-omap2-mcspi.h
+++ b/include/linux/platform_data/spi-omap2-mcspi.h
@@ -22,6 +22,9 @@ struct omap2_mcspi_dev_attr {
 
 struct omap2_mcspi_device_config {
 	unsigned turbo_mode:1;
+
+	/* toggle chip select after every word */
+	unsigned cs_per_word:1;
 };
 
 #endif
-- 
cgit v1.2.3


From 996a953de02ffb852c9ac736f4e892008ed68884 Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@gmail.com>
Date: Tue, 18 Dec 2012 18:50:55 +0100
Subject: can: add tx/rx LED trigger support

This patch implements the functions to add two LED triggers, named
<ifname>-tx and <ifname>-rx, to a canbus device driver.

Triggers are called from specific handlers by each CAN device driver and
can be disabled altogether with a Kconfig option.

The implementation keeps the LED on when the interface is UP and blinks
the LED on network activity at a configurable rate.

This only supports can-dev based drivers, as it uses some support field
in the can_priv structure.

Supported drivers should call devm_can_led_init() and can_led_event() as
needed.

Cleanup is handled automatically by devres, so no *_exit function is
needed.

Supported events are:
- CAN_LED_EVENT_OPEN: turn on tx/rx LEDs
- CAN_LED_EVENT_STOP: turn off tx/rx LEDs
- CAN_LED_EVENT_TX: trigger tx LED blink
- CAN_LED_EVENT_RX: trigger tx LED blink

Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/Kconfig  | 11 +++++++
 drivers/net/can/Makefile |  2 ++
 drivers/net/can/led.c    | 86 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/can/dev.h  |  8 +++++
 include/linux/can/led.h  | 42 +++++++++++++++++++++++
 5 files changed, 149 insertions(+)
 create mode 100644 drivers/net/can/led.c
 create mode 100644 include/linux/can/led.h

(limited to 'include/linux')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 0c5a65682d01..1cca19f1c490 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -51,6 +51,17 @@ config CAN_CALC_BITTIMING
 	  arguments "tq", "prop_seg", "phase_seg1", "phase_seg2" and "sjw".
 	  If unsure, say Y.
 
+config CAN_LEDS
+	bool "Enable LED triggers for Netlink based drivers"
+	depends on LEDS_CLASS
+	select LEDS_TRIGGERS
+	---help---
+	  This option adds two LED triggers for packet receive and transmit
+	  events on each supported CAN device.
+
+	  Say Y here if you are working on a system with led-class supported
+	  LEDs and you want to use them as canbus activity indicators.
+
 config CAN_AT91
 	tristate "Atmel AT91 onchip CAN controller"
 	depends on ARCH_AT91SAM9263 || ARCH_AT91SAM9X5
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 7de59862bbe9..c7440392adbb 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_CAN_SLCAN)		+= slcan.o
 obj-$(CONFIG_CAN_DEV)		+= can-dev.o
 can-dev-y			:= dev.o
 
+can-dev-$(CONFIG_CAN_LEDS)	+= led.o
+
 obj-y				+= usb/
 obj-y				+= softing/
 
diff --git a/drivers/net/can/led.c b/drivers/net/can/led.c
new file mode 100644
index 000000000000..c50a0d741c57
--- /dev/null
+++ b/drivers/net/can/led.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2012, Fabio Baltieri <fabio.baltieri@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/can/dev.h>
+
+#include <linux/can/led.h>
+
+static unsigned long led_delay = 50;
+module_param(led_delay, ulong, 0644);
+MODULE_PARM_DESC(led_delay,
+		"blink delay time for activity leds (msecs, default: 50).");
+
+/* Trigger a LED event in response to a CAN device event */
+void can_led_event(struct net_device *netdev, enum can_led_event event)
+{
+	struct can_priv *priv = netdev_priv(netdev);
+
+	switch (event) {
+	case CAN_LED_EVENT_OPEN:
+		led_trigger_event(priv->tx_led_trig, LED_FULL);
+		led_trigger_event(priv->rx_led_trig, LED_FULL);
+		break;
+	case CAN_LED_EVENT_STOP:
+		led_trigger_event(priv->tx_led_trig, LED_OFF);
+		led_trigger_event(priv->rx_led_trig, LED_OFF);
+		break;
+	case CAN_LED_EVENT_TX:
+		if (led_delay)
+			led_trigger_blink_oneshot(priv->tx_led_trig,
+						  &led_delay, &led_delay, 1);
+		break;
+	case CAN_LED_EVENT_RX:
+		if (led_delay)
+			led_trigger_blink_oneshot(priv->rx_led_trig,
+						  &led_delay, &led_delay, 1);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(can_led_event);
+
+static void can_led_release(struct device *gendev, void *res)
+{
+	struct can_priv *priv = netdev_priv(to_net_dev(gendev));
+
+	led_trigger_unregister_simple(priv->tx_led_trig);
+	led_trigger_unregister_simple(priv->rx_led_trig);
+}
+
+/* Register CAN LED triggers for a CAN device
+ *
+ * This is normally called from a driver's probe function
+ */
+void devm_can_led_init(struct net_device *netdev)
+{
+	struct can_priv *priv = netdev_priv(netdev);
+	void *res;
+
+	res = devres_alloc(can_led_release, 0, GFP_KERNEL);
+	if (!res) {
+		netdev_err(netdev, "cannot register LED triggers\n");
+		return;
+	}
+
+	snprintf(priv->tx_led_trig_name, sizeof(priv->tx_led_trig_name),
+		 "%s-tx", netdev->name);
+	snprintf(priv->rx_led_trig_name, sizeof(priv->rx_led_trig_name),
+		 "%s-rx", netdev->name);
+
+	led_trigger_register_simple(priv->tx_led_trig_name,
+				    &priv->tx_led_trig);
+	led_trigger_register_simple(priv->rx_led_trig_name,
+				    &priv->rx_led_trig);
+
+	devres_add(&netdev->dev, res);
+}
+EXPORT_SYMBOL_GPL(devm_can_led_init);
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 2b2fc345afca..7747d9bcdc84 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -16,6 +16,7 @@
 #include <linux/can.h>
 #include <linux/can/netlink.h>
 #include <linux/can/error.h>
+#include <linux/can/led.h>
 
 /*
  * CAN mode
@@ -52,6 +53,13 @@ struct can_priv {
 
 	unsigned int echo_skb_max;
 	struct sk_buff **echo_skb;
+
+#ifdef CONFIG_CAN_LEDS
+	struct led_trigger *tx_led_trig;
+	char tx_led_trig_name[CAN_LED_NAME_SZ];
+	struct led_trigger *rx_led_trig;
+	char rx_led_trig_name[CAN_LED_NAME_SZ];
+#endif
 };
 
 /*
diff --git a/include/linux/can/led.h b/include/linux/can/led.h
new file mode 100644
index 000000000000..12d5549abb95
--- /dev/null
+++ b/include/linux/can/led.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2012, Fabio Baltieri <fabio.baltieri@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef CAN_LED_H
+#define CAN_LED_H
+
+#include <linux/if.h>
+#include <linux/leds.h>
+
+enum can_led_event {
+	CAN_LED_EVENT_OPEN,
+	CAN_LED_EVENT_STOP,
+	CAN_LED_EVENT_TX,
+	CAN_LED_EVENT_RX,
+};
+
+#ifdef CONFIG_CAN_LEDS
+
+/* keep space for interface name + "-tx"/"-rx" suffix and null terminator */
+#define CAN_LED_NAME_SZ (IFNAMSIZ + 4)
+
+void can_led_event(struct net_device *netdev, enum can_led_event event);
+void devm_can_led_init(struct net_device *netdev);
+
+#else
+
+static inline void can_led_event(struct net_device *netdev,
+				 enum can_led_event event)
+{
+}
+static inline void devm_can_led_init(struct net_device *netdev)
+{
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From bf03a5379cd3492fbeca42111340581ba9dee0b8 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <kurt.van.dijck@eia.be>
Date: Tue, 18 Dec 2012 18:50:56 +0100
Subject: can: export a safe netdev_priv wrapper for candev

In net_device notifier calls, it was impossible to determine
if a CAN device is based on candev in a safe way.
This patch adds such test in order to access candev storage
from within those notifiers.

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   | 13 +++++++++++++
 include/linux/can/dev.h |  3 +++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 8233e5ed2939..13e738098fbe 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -794,6 +794,19 @@ void unregister_candev(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_candev);
 
+/*
+ * Test if a network device is a candev based device
+ * and return the can_priv* if so.
+ */
+struct can_priv *safe_candev_priv(struct net_device *dev)
+{
+	if ((dev->type != ARPHRD_CAN) || (dev->rtnl_link_ops != &can_link_ops))
+		return NULL;
+
+	return netdev_priv(dev);
+}
+EXPORT_SYMBOL_GPL(safe_candev_priv);
+
 static __init int can_dev_init(void)
 {
 	int err;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 7747d9bcdc84..fb0ab651a041 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -106,6 +106,9 @@ u8 can_len2dlc(u8 len);
 struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
 void free_candev(struct net_device *dev);
 
+/* a candev safe wrapper around netdev_priv */
+struct can_priv *safe_candev_priv(struct net_device *dev);
+
 int open_candev(struct net_device *dev);
 void close_candev(struct net_device *dev);
 
-- 
cgit v1.2.3


From a1ef7bd9fce8aba8e4701e60208148fb3bc9bdd4 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <kurt.van.dijck@eia.be>
Date: Tue, 18 Dec 2012 18:50:57 +0100
Subject: can: rename LED trigger name on netdev renames

The LED trigger name for CAN devices is based on the initial
CAN device name, but does never change. The LED trigger name
is not guaranteed to be unique in case of hotplugging CAN devices.

This patch tries to address this problem by modifying the
LED trigger name according to the CAN device name when
the latter changes.

v1 - Kurt Van Dijck
v2 - Fabio Baltieri
- remove rename blocking if trigger is bound
- use led-subsystem function for the actual rename (still WiP)
- call init/exit functions from dev.c
v3 - Kurt Van Dijck
- safe operation for non-candev based devices (vcan, slcan)
	based on earlier patch
v4 - Kurt Van Dijck
- trivial patch mistakes fixed

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: Fabio Baltieri <fabio.baltieri@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   |  5 +++++
 drivers/net/can/led.c   | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/can/led.h |  9 +++++++++
 3 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 13e738098fbe..6abc6e59778e 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -25,6 +25,7 @@
 #include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/netlink.h>
+#include <linux/can/led.h>
 #include <net/rtnetlink.h>
 
 #define MOD_DESC "CAN device driver interface"
@@ -811,6 +812,8 @@ static __init int can_dev_init(void)
 {
 	int err;
 
+	can_led_notifier_init();
+
 	err = rtnl_link_register(&can_link_ops);
 	if (!err)
 		printk(KERN_INFO MOD_DESC "\n");
@@ -822,6 +825,8 @@ module_init(can_dev_init);
 static __exit void can_dev_exit(void)
 {
 	rtnl_link_unregister(&can_link_ops);
+
+	can_led_notifier_exit();
 }
 module_exit(can_dev_exit);
 
diff --git a/drivers/net/can/led.c b/drivers/net/can/led.c
index c50a0d741c57..f27fca65dc4a 100644
--- a/drivers/net/can/led.c
+++ b/drivers/net/can/led.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2012, Fabio Baltieri <fabio.baltieri@gmail.com>
+ * Copyright 2012, Kurt Van Dijck <kurt.van.dijck@eia.be>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -84,3 +85,40 @@ void devm_can_led_init(struct net_device *netdev)
 	devres_add(&netdev->dev, res);
 }
 EXPORT_SYMBOL_GPL(devm_can_led_init);
+
+/* NETDEV rename notifier to rename the associated led triggers too */
+static int can_led_notifier(struct notifier_block *nb, unsigned long msg,
+			void *data)
+{
+	struct net_device *netdev = data;
+	struct can_priv *priv = safe_candev_priv(netdev);
+	char name[CAN_LED_NAME_SZ];
+
+	if (!priv)
+		return NOTIFY_DONE;
+
+	if (msg == NETDEV_CHANGENAME) {
+		snprintf(name, sizeof(name), "%s-tx", netdev->name);
+		led_trigger_rename_static(name, priv->tx_led_trig);
+
+		snprintf(name, sizeof(name), "%s-rx", netdev->name);
+		led_trigger_rename_static(name, priv->rx_led_trig);
+	}
+
+	return NOTIFY_DONE;
+}
+
+/* notifier block for netdevice event */
+static struct notifier_block can_netdev_notifier __read_mostly = {
+	.notifier_call = can_led_notifier,
+};
+
+int __init can_led_notifier_init(void)
+{
+	return register_netdevice_notifier(&can_netdev_notifier);
+}
+
+void __exit can_led_notifier_exit(void)
+{
+	unregister_netdevice_notifier(&can_netdev_notifier);
+}
diff --git a/include/linux/can/led.h b/include/linux/can/led.h
index 12d5549abb95..9c1167baf273 100644
--- a/include/linux/can/led.h
+++ b/include/linux/can/led.h
@@ -26,6 +26,8 @@ enum can_led_event {
 
 void can_led_event(struct net_device *netdev, enum can_led_event event);
 void devm_can_led_init(struct net_device *netdev);
+int __init can_led_notifier_init(void);
+void __exit can_led_notifier_exit(void);
 
 #else
 
@@ -36,6 +38,13 @@ static inline void can_led_event(struct net_device *netdev,
 static inline void devm_can_led_init(struct net_device *netdev)
 {
 }
+static inline int can_led_notifier_init(void)
+{
+	return 0;
+}
+static inline void can_led_notifier_exit(void)
+{
+}
 
 #endif
 
-- 
cgit v1.2.3


From 156c2bb9f88065c8da78814f98fde665a5cbb527 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 17 Jan 2013 18:43:39 +0100
Subject: can: add private data space for CAN sk_buffs

The struct can_skb_priv is used to transport additional information along
with the stored struct can(fd)_frame that can not be contained in existing
struct sk_buff elements.

can_skb_priv is located in the skb headroom, which does not touch the existing
CAN sk_buff usage with skb->data and skb->len, so that even out-of-tree
CAN drivers can be used without changes.

Btw. out-of-tree CAN drivers without can_skb_priv in the sk_buff headroom
would not support features based on can_skb_priv.

The can_skb_priv->ifindex contains the first interface where the CAN frame
appeared on the local host. Unfortunately skb->skb_iif can not be used as this
value is overwritten in every netif_receive_skb() call.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   |  8 +++++++-
 drivers/net/can/slcan.c |  8 +++++++-
 include/linux/can/skb.h | 35 +++++++++++++++++++++++++++++++++++
 net/can/bcm.c           | 12 +++++++++---
 net/can/raw.c           |  8 ++++++--
 5 files changed, 64 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/can/skb.h

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 6abc6e59778e..59ada082a994 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -24,6 +24,7 @@
 #include <linux/if_arp.h>
 #include <linux/can.h>
 #include <linux/can/dev.h>
+#include <linux/can/skb.h>
 #include <linux/can/netlink.h>
 #include <linux/can/led.h>
 #include <net/rtnetlink.h>
@@ -502,13 +503,18 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 {
 	struct sk_buff *skb;
 
-	skb = netdev_alloc_skb(dev, sizeof(struct can_frame));
+	skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) +
+			       sizeof(struct can_frame));
 	if (unlikely(!skb))
 		return NULL;
 
 	skb->protocol = htons(ETH_P_CAN);
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+
 	*cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
 	memset(*cf, 0, sizeof(struct can_frame));
 
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index adc3708d8829..e79a8d10e0fc 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -55,6 +55,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/can.h>
+#include <linux/can/skb.h>
 
 static __initconst const char banner[] =
 	KERN_INFO "slcan: serial line CAN interface driver\n";
@@ -184,7 +185,8 @@ static void slc_bump(struct slcan *sl)
 		cf.data[i] |= tmp;
 	}
 
-	skb = dev_alloc_skb(sizeof(struct can_frame));
+	skb = dev_alloc_skb(sizeof(struct can_frame) +
+			    sizeof(struct can_skb_priv));
 	if (!skb)
 		return;
 
@@ -192,6 +194,10 @@ static void slc_bump(struct slcan *sl)
 	skb->protocol = htons(ETH_P_CAN);
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+	((struct can_skb_priv *)(skb->head))->ifindex = sl->dev->ifindex;
+
 	memcpy(skb_put(skb, sizeof(struct can_frame)),
 	       &cf, sizeof(struct can_frame));
 	netif_rx_ni(skb);
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
new file mode 100644
index 000000000000..4b0f24d3a878
--- /dev/null
+++ b/include/linux/can/skb.h
@@ -0,0 +1,35 @@
+/*
+ * linux/can/skb.h
+ *
+ * Definitions for the CAN network socket buffer
+ *
+ * Copyright (C) 2012 Oliver Hartkopp <socketcan@hartkopp.net>
+ *
+ */
+
+#ifndef CAN_SKB_H
+#define CAN_SKB_H
+
+#include <linux/types.h>
+#include <linux/can.h>
+
+/*
+ * The struct can_skb_priv is used to transport additional information along
+ * with the stored struct can(fd)_frame that can not be contained in existing
+ * struct sk_buff elements.
+ * N.B. that this information must not be modified in cloned CAN sk_buffs.
+ * To modify the CAN frame content or the struct can_skb_priv content
+ * skb_copy() needs to be used instead of skb_clone().
+ */
+
+/**
+ * struct can_skb_priv - private additional data inside CAN sk_buffs
+ * @ifindex:	ifindex of the first interface the CAN frame appeared on
+ * @cf:		align to the following CAN frame at skb->data
+ */
+struct can_skb_priv {
+	int ifindex;
+	struct can_frame cf[0];
+};
+
+#endif /* CAN_SKB_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 969b7cdff59d..ccc27b9e8384 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -54,6 +54,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/bcm.h>
 #include <linux/slab.h>
 #include <net/sock.h>
@@ -256,10 +257,13 @@ static void bcm_can_tx(struct bcm_op *op)
 		return;
 	}
 
-	skb = alloc_skb(CFSIZ, gfp_any());
+	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), gfp_any());
 	if (!skb)
 		goto out;
 
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+
 	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
 
 	/* send with loopback */
@@ -1199,11 +1203,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	if (!ifindex)
 		return -ENODEV;
 
-	skb = alloc_skb(CFSIZ, GFP_KERNEL);
-
+	skb = alloc_skb(CFSIZ + sizeof(struct can_skb_priv), GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+
 	err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
 	if (err < 0) {
 		kfree_skb(skb);
@@ -1216,6 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 		return -ENODEV;
 	}
 
+	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
 	skb->dev = dev;
 	skb->sk  = sk;
 	err = can_send(skb, 1); /* send with loopback */
diff --git a/net/can/raw.c b/net/can/raw.c
index 5b0e3e330d97..5d860e8dcc52 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -50,6 +50,7 @@
 #include <linux/skbuff.h>
 #include <linux/can.h>
 #include <linux/can/core.h>
+#include <linux/can/skb.h>
 #include <linux/can/raw.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
@@ -699,11 +700,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!dev)
 		return -ENXIO;
 
-	skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT,
-				  &err);
+	skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
+				  msg->msg_flags & MSG_DONTWAIT, &err);
 	if (!skb)
 		goto put_dev;
 
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
 		goto free_skb;
-- 
cgit v1.2.3


From 95a79fd458b85132c25e351d45037ec9643312b2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 7 Jan 2013 18:12:14 +0100
Subject: context_tracking: Export context state for generic vtime

Export the context state: whether we run in user / kernel
from the context tracking subsystem point of view.

This is going to be used by the generic virtual cputime
accounting subsystem that is needed to implement the full
dynticks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/context_tracking.h | 28 ++++++++++++++++++++++++++++
 kernel/context_tracking.c        | 16 +---------------
 2 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index e24339ccb7f0..b28d161c1091 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -3,12 +3,40 @@
 
 #ifdef CONFIG_CONTEXT_TRACKING
 #include <linux/sched.h>
+#include <linux/percpu.h>
+
+struct context_tracking {
+	/*
+	 * When active is false, probes are unset in order
+	 * to minimize overhead: TIF flags are cleared
+	 * and calls to user_enter/exit are ignored. This
+	 * may be further optimized using static keys.
+	 */
+	bool active;
+	enum {
+		IN_KERNEL = 0,
+		IN_USER,
+	} state;
+};
+
+DECLARE_PER_CPU(struct context_tracking, context_tracking);
+
+static inline bool context_tracking_in_user(void)
+{
+	return __this_cpu_read(context_tracking.state) == IN_USER;
+}
+
+static inline bool context_tracking_active(void)
+{
+	return __this_cpu_read(context_tracking.active);
+}
 
 extern void user_enter(void);
 extern void user_exit(void);
 extern void context_tracking_task_switch(struct task_struct *prev,
 					 struct task_struct *next);
 #else
+static inline bool context_tracking_in_user(void) { return false; }
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
 static inline void context_tracking_task_switch(struct task_struct *prev,
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e0e07fd55508..54f471e536dc 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,24 +1,10 @@
 #include <linux/context_tracking.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/hardirq.h>
 
-struct context_tracking {
-	/*
-	 * When active is false, hooks are not set to
-	 * minimize overhead: TIF flags are cleared
-	 * and calls to user_enter/exit are ignored. This
-	 * may be further optimized using static keys.
-	 */
-	bool active;
-	enum {
-		IN_KERNEL = 0,
-		IN_USER,
-	} state;
-};
 
-static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
+DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
 	.active = true,
 #endif
-- 
cgit v1.2.3


From d817ac4e181710cd02b582b759d3123ad2cfa8d8 Mon Sep 17 00:00:00 2001
From: Wei WANG <wei_wang@realsil.com.cn>
Date: Wed, 23 Jan 2013 09:51:04 +0800
Subject: mfd: rtsx: Add output voltage switch hook

Different card reader has different method to switch output voltage,
add this callback to let the card reader implement its individual switch
function.
This is needed as rtl8411 has a specific switch output voltage procedure.

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/rtl8411.c        | 16 ++++++++++++++++
 drivers/mfd/rts5209.c        | 20 ++++++++++++++++++++
 drivers/mfd/rts5229.c        | 20 ++++++++++++++++++++
 drivers/mfd/rtsx_pcr.c       |  9 +++++++++
 include/linux/mfd/rtsx_pci.h | 24 ++++++++++++++++++++----
 5 files changed, 85 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c
index 89f046ca9e41..5058ba8dec3b 100644
--- a/drivers/mfd/rtl8411.c
+++ b/drivers/mfd/rtl8411.c
@@ -112,6 +112,21 @@ static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card)
 			BPP_LDO_POWB, BPP_LDO_SUSPEND);
 }
 
+static int rtl8411_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 mask, val;
+
+	mask = (BPP_REG_TUNED18 << BPP_TUNED18_SHIFT_8411) | BPP_PAD_MASK;
+	if (voltage == OUTPUT_3V3)
+		val = (BPP_ASIC_3V3 << BPP_TUNED18_SHIFT_8411) | BPP_PAD_3V3;
+	else if (voltage == OUTPUT_1V8)
+		val = (BPP_ASIC_1V8 << BPP_TUNED18_SHIFT_8411) | BPP_PAD_1V8;
+	else
+		return -EINVAL;
+
+	return rtsx_pci_write_register(pcr, LDO_CTL, mask, val);
+}
+
 static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
 {
 	unsigned int card_exist;
@@ -172,6 +187,7 @@ static const struct pcr_ops rtl8411_pcr_ops = {
 	.disable_auto_blink = rtl8411_disable_auto_blink,
 	.card_power_on = rtl8411_card_power_on,
 	.card_power_off = rtl8411_card_power_off,
+	.switch_output_voltage = rtl8411_switch_output_voltage,
 	.cd_deglitch = rtl8411_cd_deglitch,
 };
 
diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c
index 283a4f148084..ba74de8a7c24 100644
--- a/drivers/mfd/rts5209.c
+++ b/drivers/mfd/rts5209.c
@@ -144,6 +144,25 @@ static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card)
 	return rtsx_pci_send_cmd(pcr, 100);
 }
 
+static int rts5209_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct pcr_ops rts5209_pcr_ops = {
 	.extra_init_hw = rts5209_extra_init_hw,
 	.optimize_phy = rts5209_optimize_phy,
@@ -153,6 +172,7 @@ static const struct pcr_ops rts5209_pcr_ops = {
 	.disable_auto_blink = rts5209_disable_auto_blink,
 	.card_power_on = rts5209_card_power_on,
 	.card_power_off = rts5209_card_power_off,
+	.switch_output_voltage = rts5209_switch_output_voltage,
 	.cd_deglitch = NULL,
 };
 
diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c
index b9dbab266fda..ec1747adf5d3 100644
--- a/drivers/mfd/rts5229.c
+++ b/drivers/mfd/rts5229.c
@@ -114,6 +114,25 @@ static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card)
 	return rtsx_pci_send_cmd(pcr, 100);
 }
 
+static int rts5229_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct pcr_ops rts5229_pcr_ops = {
 	.extra_init_hw = rts5229_extra_init_hw,
 	.optimize_phy = rts5229_optimize_phy,
@@ -123,6 +142,7 @@ static const struct pcr_ops rts5229_pcr_ops = {
 	.disable_auto_blink = rts5229_disable_auto_blink,
 	.card_power_on = rts5229_card_power_on,
 	.card_power_off = rts5229_card_power_off,
+	.switch_output_voltage = rts5229_switch_output_voltage,
 	.cd_deglitch = NULL,
 };
 
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 7a7b0bda4618..f4e02d089271 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -703,6 +703,15 @@ int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card)
 }
 EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off);
 
+int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	if (pcr->ops->switch_output_voltage)
+		return pcr->ops->switch_output_voltage(pcr, voltage);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_switch_output_voltage);
+
 unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr)
 {
 	unsigned int val;
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 060b721fcbfb..271b6e5654af 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -158,10 +158,9 @@
 #define SG_TRANS_DATA		(0x02 << 4)
 #define SG_LINK_DESC		(0x03 << 4)
 
-/* SD bank voltage */
-#define SD_IO_3V3		0
-#define SD_IO_1V8		1
-
+/* Output voltage */
+#define OUTPUT_3V3		0
+#define OUTPUT_1V8		1
 
 /* Card Clock Enable Register */
 #define SD_CLK_EN			0x04
@@ -201,6 +200,20 @@
 #define CHANGE_CLK			0x01
 
 /* LDO_CTL */
+#define BPP_ASIC_1V7			0x00
+#define BPP_ASIC_1V8			0x01
+#define BPP_ASIC_1V9			0x02
+#define BPP_ASIC_2V0			0x03
+#define BPP_ASIC_2V7			0x04
+#define BPP_ASIC_2V8			0x05
+#define BPP_ASIC_3V2			0x06
+#define BPP_ASIC_3V3			0x07
+#define BPP_REG_TUNED18			0x07
+#define BPP_TUNED18_SHIFT_8402		5
+#define BPP_TUNED18_SHIFT_8411		4
+#define BPP_PAD_MASK			0x04
+#define BPP_PAD_3V3			0x04
+#define BPP_PAD_1V8			0x00
 #define BPP_LDO_POWB			0x03
 #define BPP_LDO_ON			0x00
 #define BPP_LDO_SUSPEND			0x02
@@ -688,6 +701,8 @@ struct pcr_ops {
 	int		(*disable_auto_blink)(struct rtsx_pcr *pcr);
 	int		(*card_power_on)(struct rtsx_pcr *pcr, int card);
 	int		(*card_power_off)(struct rtsx_pcr *pcr, int card);
+	int		(*switch_output_voltage)(struct rtsx_pcr *pcr,
+						u8 voltage);
 	unsigned int	(*cd_deglitch)(struct rtsx_pcr *pcr);
 };
 
@@ -783,6 +798,7 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
 int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card);
 int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage);
 unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr);
 void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr);
 
-- 
cgit v1.2.3


From ab4e8f8b7bdfeff0c961fdbbdacb262d68f094c0 Mon Sep 17 00:00:00 2001
From: Wei WANG <wei_wang@realsil.com.cn>
Date: Wed, 23 Jan 2013 09:51:06 +0800
Subject: mfd: rtsx: Add clock divider hook

Add callback function conv_clk_and_div_n to convert between SSC clock
and its divider N.
For rtl8411, the formula to calculate SSC clock divider N is different
with the other card reader models.

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/rtl8411.c           | 13 +++++++++++++
 drivers/mfd/rts5209.c           |  1 +
 drivers/mfd/rts5229.c           |  1 +
 drivers/mfd/rtsx_pcr.c          | 14 ++++++++++++--
 include/linux/mfd/rtsx_common.h |  3 +++
 include/linux/mfd/rtsx_pci.h    |  1 +
 6 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c
index 5058ba8dec3b..3d3b4addf81a 100644
--- a/drivers/mfd/rtl8411.c
+++ b/drivers/mfd/rtl8411.c
@@ -178,6 +178,18 @@ static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr)
 	return card_exist;
 }
 
+static int rtl8411_conv_clk_and_div_n(int input, int dir)
+{
+	int output;
+
+	if (dir == CLK_TO_DIV_N)
+		output = input * 4 / 5 - 2;
+	else
+		output = (input + 2) * 5 / 4;
+
+	return output;
+}
+
 static const struct pcr_ops rtl8411_pcr_ops = {
 	.extra_init_hw = rtl8411_extra_init_hw,
 	.optimize_phy = NULL,
@@ -189,6 +201,7 @@ static const struct pcr_ops rtl8411_pcr_ops = {
 	.card_power_off = rtl8411_card_power_off,
 	.switch_output_voltage = rtl8411_switch_output_voltage,
 	.cd_deglitch = rtl8411_cd_deglitch,
+	.conv_clk_and_div_n = rtl8411_conv_clk_and_div_n,
 };
 
 /* SD Pull Control Enable:
diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c
index ba74de8a7c24..98fe0f39463e 100644
--- a/drivers/mfd/rts5209.c
+++ b/drivers/mfd/rts5209.c
@@ -174,6 +174,7 @@ static const struct pcr_ops rts5209_pcr_ops = {
 	.card_power_off = rts5209_card_power_off,
 	.switch_output_voltage = rts5209_switch_output_voltage,
 	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
 };
 
 /* SD Pull Control Enable:
diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c
index ec1747adf5d3..29d889cbb9c5 100644
--- a/drivers/mfd/rts5229.c
+++ b/drivers/mfd/rts5229.c
@@ -144,6 +144,7 @@ static const struct pcr_ops rts5229_pcr_ops = {
 	.card_power_off = rts5229_card_power_off,
 	.switch_output_voltage = rts5229_switch_output_voltage,
 	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
 };
 
 /* SD Pull Control Enable:
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index f4e02d089271..910200e10fa0 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -630,7 +630,10 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 	if (clk == pcr->cur_clock)
 		return 0;
 
-	N = (u8)(clk - 2);
+	if (pcr->ops->conv_clk_and_div_n)
+		N = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N);
+	else
+		N = (u8)(clk - 2);
 	if ((clk <= 2) || (N > max_N))
 		return -EINVAL;
 
@@ -641,7 +644,14 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 	/* Make sure that the SSC clock div_n is equal or greater than min_N */
 	div = CLK_DIV_1;
 	while ((N < min_N) && (div < max_div)) {
-		N = (N + 2) * 2 - 2;
+		if (pcr->ops->conv_clk_and_div_n) {
+			int dbl_clk = pcr->ops->conv_clk_and_div_n(N,
+					DIV_N_TO_CLK) * 2;
+			N = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk,
+					CLK_TO_DIV_N);
+		} else {
+			N = (N + 2) * 2 - 2;
+		}
 		div++;
 	}
 	dev_dbg(&(pcr->pci->dev), "N = %d, div = %d\n", N, div);
diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h
index a8d393e3066b..2b13970596f5 100644
--- a/include/linux/mfd/rtsx_common.h
+++ b/include/linux/mfd/rtsx_common.h
@@ -38,6 +38,9 @@
 #define RTSX_SD_CARD			0
 #define RTSX_MS_CARD			1
 
+#define CLK_TO_DIV_N			0
+#define DIV_N_TO_CLK			1
+
 struct platform_device;
 
 struct rtsx_slot {
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 271b6e5654af..4b117a3f54d4 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -704,6 +704,7 @@ struct pcr_ops {
 	int		(*switch_output_voltage)(struct rtsx_pcr *pcr,
 						u8 voltage);
 	unsigned int	(*cd_deglitch)(struct rtsx_pcr *pcr);
+	int		(*conv_clk_and_div_n)(int clk, int dir);
 };
 
 enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
-- 
cgit v1.2.3


From 90f45e4e729a7ffaa3ed2423834aad612870b427 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 10 Jan 2013 05:24:49 -0800
Subject: rcu: Remove obsolete Kconfig option from comment

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 275aa3f1062d..7e12dbaa1457 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -749,7 +749,7 @@ static inline void rcu_preempt_sleep_check(void)
  * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
  * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
  * be preempted, but explicit blocking is illegal.  Finally, in preemptible
- * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
+ * RCU implementations in real-time (with -rt patchset) kernel builds,
  * RCU read-side critical sections may be preempted and they may also
  * block, but only when acquiring spinlocks that are subject to priority
  * inheritance.
-- 
cgit v1.2.3


From 0a8c290ac58a86d5e1f2193abcd4d74ec075e20c Mon Sep 17 00:00:00 2001
From: Ashish Jangam <ashish.jangam@kpitcummins.com>
Date: Fri, 25 Jan 2013 14:03:49 +0530
Subject: mfd: da9052/53 lockup fix

An issue has been reported where the PMIC either locks up or fails to
respond following a system Reset. This could result in a second write
in which the bus writes the current content of the write buffer to address
of the last I2C access.

The failure case is where this unwanted write transfers incorrect data to
a critical register.

This patch fixes this issue to by following any read or write with a dummy read
to a safe register address. A safe register address is one where the contents
will not affect the operation of the system.

Signed-off-by: Ashish Jangam <ashish.jangam@kpitcummins.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/da9052-i2c.c          | 61 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/da9052/da9052.h | 66 ++++++++++++++++++++++++++++++++++++---
 include/linux/mfd/da9052/reg.h    |  3 ++
 3 files changed, 126 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9052-i2c.c b/drivers/mfd/da9052-i2c.c
index ac74a4d1daea..885e56780358 100644
--- a/drivers/mfd/da9052-i2c.c
+++ b/drivers/mfd/da9052-i2c.c
@@ -27,6 +27,66 @@
 #include <linux/of_device.h>
 #endif
 
+/* I2C safe register check */
+static inline bool i2c_safe_reg(unsigned char reg)
+{
+	switch (reg) {
+	case DA9052_STATUS_A_REG:
+	case DA9052_STATUS_B_REG:
+	case DA9052_STATUS_C_REG:
+	case DA9052_STATUS_D_REG:
+	case DA9052_ADC_RES_L_REG:
+	case DA9052_ADC_RES_H_REG:
+	case DA9052_VDD_RES_REG:
+	case DA9052_ICHG_AV_REG:
+	case DA9052_TBAT_RES_REG:
+	case DA9052_ADCIN4_RES_REG:
+	case DA9052_ADCIN5_RES_REG:
+	case DA9052_ADCIN6_RES_REG:
+	case DA9052_TJUNC_RES_REG:
+	case DA9052_TSI_X_MSB_REG:
+	case DA9052_TSI_Y_MSB_REG:
+	case DA9052_TSI_LSB_REG:
+	case DA9052_TSI_Z_MSB_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/*
+ * There is an issue with DA9052 and DA9053_AA/BA/BB PMIC where the PMIC
+ * gets lockup up or fails to respond following a system reset.
+ * This fix is to follow any read or write with a dummy read to a safe
+ * register.
+ */
+int da9052_i2c_fix(struct da9052 *da9052, unsigned char reg)
+{
+	int val;
+
+	switch (da9052->chip_id) {
+	case DA9052:
+	case DA9053_AA:
+	case DA9053_BA:
+	case DA9053_BB:
+		/* A dummy read to a safe register address. */
+	if (!i2c_safe_reg(reg))
+			return regmap_read(da9052->regmap,
+					   DA9052_PARK_REGISTER,
+					   &val);
+		break;
+	default:
+		/*
+		 * For other chips parking of I2C register
+		 * to a safe place is not required.
+		 */
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(da9052_i2c_fix);
+
 static int da9052_i2c_enable_multiwrite(struct da9052 *da9052)
 {
 	int reg_val, ret;
@@ -83,6 +143,7 @@ static int da9052_i2c_probe(struct i2c_client *client,
 
 	da9052->dev = &client->dev;
 	da9052->chip_irq = client->irq;
+	da9052->fix_io = da9052_i2c_fix;
 
 	i2c_set_clientdata(client, da9052);
 
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index 86dd93de6ff2..786d02eb79d2 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -99,6 +99,9 @@ struct da9052 {
 	u8 chip_id;
 
 	int chip_irq;
+
+	/* SOC I/O transfer related fixes for DA9052/53 */
+	int (*fix_io) (struct da9052 *da9052, unsigned char reg);
 };
 
 /* ADC API */
@@ -113,32 +116,87 @@ static inline int da9052_reg_read(struct da9052 *da9052, unsigned char reg)
 	ret = regmap_read(da9052->regmap, reg, &val);
 	if (ret < 0)
 		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
 	return val;
 }
 
 static inline int da9052_reg_write(struct da9052 *da9052, unsigned char reg,
 				    unsigned char val)
 {
-	return regmap_write(da9052->regmap, reg, val);
+	int ret;
+
+	ret = regmap_write(da9052->regmap, reg, val);
+	if (ret < 0)
+		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
 }
 
 static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg,
 				     unsigned reg_cnt, unsigned char *val)
 {
-	return regmap_bulk_read(da9052->regmap, reg, val, reg_cnt);
+	int ret;
+
+	ret = regmap_bulk_read(da9052->regmap, reg, val, reg_cnt);
+	if (ret < 0)
+		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
 }
 
 static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg,
 				      unsigned reg_cnt, unsigned char *val)
 {
-	return regmap_raw_write(da9052->regmap, reg, val, reg_cnt);
+	int ret;
+
+	ret = regmap_raw_write(da9052->regmap, reg, val, reg_cnt);
+	if (ret < 0)
+		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
 }
 
 static inline int da9052_reg_update(struct da9052 *da9052, unsigned char reg,
 				     unsigned char bit_mask,
 				     unsigned char reg_val)
 {
-	return regmap_update_bits(da9052->regmap, reg, bit_mask, reg_val);
+	int ret;
+
+	ret = regmap_update_bits(da9052->regmap, reg, bit_mask, reg_val);
+	if (ret < 0)
+		return ret;
+
+	if (da9052->fix_io) {
+		ret = da9052->fix_io(da9052, reg);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
 }
 
 int da9052_device_init(struct da9052 *da9052, u8 chip_id);
diff --git a/include/linux/mfd/da9052/reg.h b/include/linux/mfd/da9052/reg.h
index b97f7309d7f6..c4dd3a8add21 100644
--- a/include/linux/mfd/da9052/reg.h
+++ b/include/linux/mfd/da9052/reg.h
@@ -34,6 +34,9 @@
 #define DA9052_STATUS_C_REG		3
 #define DA9052_STATUS_D_REG		4
 
+/* PARK REGISTER */
+#define DA9052_PARK_REGISTER		DA9052_STATUS_D_REG
+
 /* EVENT REGISTERS */
 #define DA9052_EVENT_A_REG		5
 #define DA9052_EVENT_B_REG		6
-- 
cgit v1.2.3


From c61a2810a2161986353705b44d9503e6bb079f4f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 28 Dec 2012 18:58:39 -0800
Subject: userns: Avoid recursion in put_user_ns

When freeing a deeply nested user namespace free_user_ns calls
put_user_ns on it's parent which may in turn call free_user_ns again.
When -fno-optimize-sibling-calls is passed to gcc one stack frame per
user namespace is left on the stack, potentially overflowing the
kernel stack.  CONFIG_FRAME_POINTER forces -fno-optimize-sibling-calls
so we can't count on gcc to optimize this code.

Remove struct kref and use a plain atomic_t.  Making the code more
flexible and easier to comprehend.  Make the loop in free_user_ns
explict to guarantee that the stack does not overflow with
CONFIG_FRAME_POINTER enabled.

I have tested this fix with a simple program that uses unshare to
create a deeply nested user namespace structure and then calls exit.
With 1000 nesteuser namespaces before this change running my test
program causes the kernel to die a horrible death.  With 10,000,000
nested user namespaces after this change my test program runs to
completion and causes no harm.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Pointed-out-by: Vasily Kulikov <segoon@openwall.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h | 10 +++++-----
 kernel/user.c                  |  4 +---
 kernel/user_namespace.c        | 17 +++++++++--------
 3 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b9bd2e6c73cc..4ce009324933 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -21,7 +21,7 @@ struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
 	struct uid_gid_map	projid_map;
-	struct kref		kref;
+	atomic_t		count;
 	struct user_namespace	*parent;
 	kuid_t			owner;
 	kgid_t			group;
@@ -35,18 +35,18 @@ extern struct user_namespace init_user_ns;
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 {
 	if (ns)
-		kref_get(&ns->kref);
+		atomic_inc(&ns->count);
 	return ns;
 }
 
 extern int create_user_ns(struct cred *new);
 extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
-extern void free_user_ns(struct kref *kref);
+extern void free_user_ns(struct user_namespace *ns);
 
 static inline void put_user_ns(struct user_namespace *ns)
 {
-	if (ns)
-		kref_put(&ns->kref, free_user_ns);
+	if (ns && atomic_dec_and_test(&ns->count))
+		free_user_ns(ns);
 }
 
 struct seq_operations;
diff --git a/kernel/user.c b/kernel/user.c
index 33acb5e53a5f..57ebfd42023c 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -47,9 +47,7 @@ struct user_namespace init_user_ns = {
 			.count = 4294967295U,
 		},
 	},
-	.kref = {
-		.refcount	= ATOMIC_INIT(3),
-	},
+	.count = ATOMIC_INIT(3),
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.proc_inum = PROC_USER_INIT_INO,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 2b042c42fbc4..24f8ec3b64d8 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -78,7 +78,7 @@ int create_user_ns(struct cred *new)
 		return ret;
 	}
 
-	kref_init(&ns->kref);
+	atomic_set(&ns->count, 1);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
 	ns->owner = owner;
@@ -104,15 +104,16 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
 	return create_user_ns(cred);
 }
 
-void free_user_ns(struct kref *kref)
+void free_user_ns(struct user_namespace *ns)
 {
-	struct user_namespace *parent, *ns =
-		container_of(kref, struct user_namespace, kref);
+	struct user_namespace *parent;
 
-	parent = ns->parent;
-	proc_free_inum(ns->proc_inum);
-	kmem_cache_free(user_ns_cachep, ns);
-	put_user_ns(parent);
+	do {
+		parent = ns->parent;
+		proc_free_inum(ns->proc_inum);
+		kmem_cache_free(user_ns_cachep, ns);
+		ns = parent;
+	} while (atomic_dec_and_test(&parent->count));
 }
 EXPORT_SYMBOL(free_user_ns);
 
-- 
cgit v1.2.3


From 9c2251dd4b7feba14d35835c6835024840b1f76b Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Sat, 12 Jan 2013 10:35:00 +0000
Subject: iio:light:tsl2563 move out of staging

This driver is simple, uses the latest interfaces and contains few if
any controversial elements.  All of its interfaces have been in place
for a long time now.  Hence let's move it out of staging.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Acked-by: Peter Meerwald <pmeerw@pmeerw.net>
---
 arch/arm/mach-omap2/board-rx51-peripherals.c |   2 +-
 drivers/iio/light/Kconfig                    |  10 +
 drivers/iio/light/Makefile                   |   1 +
 drivers/iio/light/tsl2563.c                  | 887 +++++++++++++++++++++++++++
 drivers/staging/iio/light/Kconfig            |  10 -
 drivers/staging/iio/light/Makefile           |   1 -
 drivers/staging/iio/light/tsl2563.c          | 887 ---------------------------
 drivers/staging/iio/light/tsl2563.h          |   9 -
 include/linux/platform_data/tsl2563.h        |   8 +
 9 files changed, 907 insertions(+), 908 deletions(-)
 create mode 100644 drivers/iio/light/tsl2563.c
 delete mode 100644 drivers/staging/iio/light/tsl2563.c
 delete mode 100644 drivers/staging/iio/light/tsl2563.h
 create mode 100644 include/linux/platform_data/tsl2563.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index cf07e289b4ea..f3d075baebb6 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -42,7 +42,7 @@
 #include <media/si4713.h>
 #include <linux/leds-lp5523.h>
 
-#include <../drivers/staging/iio/light/tsl2563.h>
+#include <linux/platform_data/tsl2563.h>
 #include <linux/lis3lv02d.h>
 
 #if defined(CONFIG_IR_RX51) || defined(CONFIG_IR_RX51_MODULE)
diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig
index dbf80abc834f..5ef1a396e0c9 100644
--- a/drivers/iio/light/Kconfig
+++ b/drivers/iio/light/Kconfig
@@ -32,6 +32,16 @@ config SENSORS_LM3533
 	  changes. The ALS-control output values can be set per zone for the
 	  three current output channels.
 
+config SENSORS_TSL2563
+	tristate "TAOS TSL2560, TSL2561, TSL2562 and TSL2563 ambient light sensors"
+	depends on I2C
+	help
+	 If you say yes here you get support for the Taos TSL2560,
+	 TSL2561, TSL2562 and TSL2563 ambient light sensors.
+
+	 This driver can also be built as a module.  If so, the module
+	 will be called tsl2563.
+
 config VCNL4000
 	tristate "VCNL4000 combined ALS and proximity sensor"
 	depends on I2C
diff --git a/drivers/iio/light/Makefile b/drivers/iio/light/Makefile
index 21a8f0df1407..040d9c75f8e6 100644
--- a/drivers/iio/light/Makefile
+++ b/drivers/iio/light/Makefile
@@ -4,5 +4,6 @@
 
 obj-$(CONFIG_ADJD_S311)		+= adjd_s311.o
 obj-$(CONFIG_SENSORS_LM3533)	+= lm3533-als.o
+obj-$(CONFIG_SENSORS_TSL2563)	+= tsl2563.o
 obj-$(CONFIG_VCNL4000)		+= vcnl4000.o
 obj-$(CONFIG_HID_SENSOR_ALS)	+= hid-sensor-als.o
diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
new file mode 100644
index 000000000000..fd8be69b7d05
--- /dev/null
+++ b/drivers/iio/light/tsl2563.c
@@ -0,0 +1,887 @@
+/*
+ * drivers/iio/light/tsl2563.c
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Written by Timo O. Karjalainen <timo.o.karjalainen@nokia.com>
+ * Contact: Amit Kucheria <amit.kucheria@verdurent.com>
+ *
+ * Converted to IIO driver
+ * Amit Kucheria <amit.kucheria@verdurent.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/events.h>
+#include <linux/platform_data/tsl2563.h>
+
+/* Use this many bits for fraction part. */
+#define ADC_FRAC_BITS		14
+
+/* Given number of 1/10000's in ADC_FRAC_BITS precision. */
+#define FRAC10K(f)		(((f) * (1L << (ADC_FRAC_BITS))) / (10000))
+
+/* Bits used for fraction in calibration coefficients.*/
+#define CALIB_FRAC_BITS		10
+/* 0.5 in CALIB_FRAC_BITS precision */
+#define CALIB_FRAC_HALF		(1 << (CALIB_FRAC_BITS - 1))
+/* Make a fraction from a number n that was multiplied with b. */
+#define CALIB_FRAC(n, b)	(((n) << CALIB_FRAC_BITS) / (b))
+/* Decimal 10^(digits in sysfs presentation) */
+#define CALIB_BASE_SYSFS	1000
+
+#define TSL2563_CMD		0x80
+#define TSL2563_CLEARINT	0x40
+
+#define TSL2563_REG_CTRL	0x00
+#define TSL2563_REG_TIMING	0x01
+#define TSL2563_REG_LOWLOW	0x02 /* data0 low threshold, 2 bytes */
+#define TSL2563_REG_LOWHIGH	0x03
+#define TSL2563_REG_HIGHLOW	0x04 /* data0 high threshold, 2 bytes */
+#define TSL2563_REG_HIGHHIGH	0x05
+#define TSL2563_REG_INT		0x06
+#define TSL2563_REG_ID		0x0a
+#define TSL2563_REG_DATA0LOW	0x0c /* broadband sensor value, 2 bytes */
+#define TSL2563_REG_DATA0HIGH	0x0d
+#define TSL2563_REG_DATA1LOW	0x0e /* infrared sensor value, 2 bytes */
+#define TSL2563_REG_DATA1HIGH	0x0f
+
+#define TSL2563_CMD_POWER_ON	0x03
+#define TSL2563_CMD_POWER_OFF	0x00
+#define TSL2563_CTRL_POWER_MASK	0x03
+
+#define TSL2563_TIMING_13MS	0x00
+#define TSL2563_TIMING_100MS	0x01
+#define TSL2563_TIMING_400MS	0x02
+#define TSL2563_TIMING_MASK	0x03
+#define TSL2563_TIMING_GAIN16	0x10
+#define TSL2563_TIMING_GAIN1	0x00
+
+#define TSL2563_INT_DISBLED	0x00
+#define TSL2563_INT_LEVEL	0x10
+#define TSL2563_INT_PERSIST(n)	((n) & 0x0F)
+
+struct tsl2563_gainlevel_coeff {
+	u8 gaintime;
+	u16 min;
+	u16 max;
+};
+
+static const struct tsl2563_gainlevel_coeff tsl2563_gainlevel_table[] = {
+	{
+		.gaintime	= TSL2563_TIMING_400MS | TSL2563_TIMING_GAIN16,
+		.min		= 0,
+		.max		= 65534,
+	}, {
+		.gaintime	= TSL2563_TIMING_400MS | TSL2563_TIMING_GAIN1,
+		.min		= 2048,
+		.max		= 65534,
+	}, {
+		.gaintime	= TSL2563_TIMING_100MS | TSL2563_TIMING_GAIN1,
+		.min		= 4095,
+		.max		= 37177,
+	}, {
+		.gaintime	= TSL2563_TIMING_13MS | TSL2563_TIMING_GAIN1,
+		.min		= 3000,
+		.max		= 65535,
+	},
+};
+
+struct tsl2563_chip {
+	struct mutex		lock;
+	struct i2c_client	*client;
+	struct delayed_work	poweroff_work;
+
+	/* Remember state for suspend and resume functions */
+	bool suspended;
+
+	struct tsl2563_gainlevel_coeff const *gainlevel;
+
+	u16			low_thres;
+	u16			high_thres;
+	u8			intr;
+	bool			int_enabled;
+
+	/* Calibration coefficients */
+	u32			calib0;
+	u32			calib1;
+	int			cover_comp_gain;
+
+	/* Cache current values, to be returned while suspended */
+	u32			data0;
+	u32			data1;
+};
+
+static int tsl2563_set_power(struct tsl2563_chip *chip, int on)
+{
+	struct i2c_client *client = chip->client;
+	u8 cmd;
+
+	cmd = on ? TSL2563_CMD_POWER_ON : TSL2563_CMD_POWER_OFF;
+	return i2c_smbus_write_byte_data(client,
+					 TSL2563_CMD | TSL2563_REG_CTRL, cmd);
+}
+
+/*
+ * Return value is 0 for off, 1 for on, or a negative error
+ * code if reading failed.
+ */
+static int tsl2563_get_power(struct tsl2563_chip *chip)
+{
+	struct i2c_client *client = chip->client;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, TSL2563_CMD | TSL2563_REG_CTRL);
+	if (ret < 0)
+		return ret;
+
+	return (ret & TSL2563_CTRL_POWER_MASK) == TSL2563_CMD_POWER_ON;
+}
+
+static int tsl2563_configure(struct tsl2563_chip *chip)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(chip->client,
+			TSL2563_CMD | TSL2563_REG_TIMING,
+			chip->gainlevel->gaintime);
+	if (ret)
+		goto error_ret;
+	ret = i2c_smbus_write_byte_data(chip->client,
+			TSL2563_CMD | TSL2563_REG_HIGHLOW,
+			chip->high_thres & 0xFF);
+	if (ret)
+		goto error_ret;
+	ret = i2c_smbus_write_byte_data(chip->client,
+			TSL2563_CMD | TSL2563_REG_HIGHHIGH,
+			(chip->high_thres >> 8) & 0xFF);
+	if (ret)
+		goto error_ret;
+	ret = i2c_smbus_write_byte_data(chip->client,
+			TSL2563_CMD | TSL2563_REG_LOWLOW,
+			chip->low_thres & 0xFF);
+	if (ret)
+		goto error_ret;
+	ret = i2c_smbus_write_byte_data(chip->client,
+			TSL2563_CMD | TSL2563_REG_LOWHIGH,
+			(chip->low_thres >> 8) & 0xFF);
+/*
+ * Interrupt register is automatically written anyway if it is relevant
+ * so is not here.
+ */
+error_ret:
+	return ret;
+}
+
+static void tsl2563_poweroff_work(struct work_struct *work)
+{
+	struct tsl2563_chip *chip =
+		container_of(work, struct tsl2563_chip, poweroff_work.work);
+	tsl2563_set_power(chip, 0);
+}
+
+static int tsl2563_detect(struct tsl2563_chip *chip)
+{
+	int ret;
+
+	ret = tsl2563_set_power(chip, 1);
+	if (ret)
+		return ret;
+
+	ret = tsl2563_get_power(chip);
+	if (ret < 0)
+		return ret;
+
+	return ret ? 0 : -ENODEV;
+}
+
+static int tsl2563_read_id(struct tsl2563_chip *chip, u8 *id)
+{
+	struct i2c_client *client = chip->client;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, TSL2563_CMD | TSL2563_REG_ID);
+	if (ret < 0)
+		return ret;
+
+	*id = ret;
+
+	return 0;
+}
+
+/*
+ * "Normalized" ADC value is one obtained with 400ms of integration time and
+ * 16x gain. This function returns the number of bits of shift needed to
+ * convert between normalized values and HW values obtained using given
+ * timing and gain settings.
+ */
+static int adc_shiftbits(u8 timing)
+{
+	int shift = 0;
+
+	switch (timing & TSL2563_TIMING_MASK) {
+	case TSL2563_TIMING_13MS:
+		shift += 5;
+		break;
+	case TSL2563_TIMING_100MS:
+		shift += 2;
+		break;
+	case TSL2563_TIMING_400MS:
+		/* no-op */
+		break;
+	}
+
+	if (!(timing & TSL2563_TIMING_GAIN16))
+		shift += 4;
+
+	return shift;
+}
+
+/* Convert a HW ADC value to normalized scale. */
+static u32 normalize_adc(u16 adc, u8 timing)
+{
+	return adc << adc_shiftbits(timing);
+}
+
+static void tsl2563_wait_adc(struct tsl2563_chip *chip)
+{
+	unsigned int delay;
+
+	switch (chip->gainlevel->gaintime & TSL2563_TIMING_MASK) {
+	case TSL2563_TIMING_13MS:
+		delay = 14;
+		break;
+	case TSL2563_TIMING_100MS:
+		delay = 101;
+		break;
+	default:
+		delay = 402;
+	}
+	/*
+	 * TODO: Make sure that we wait at least required delay but why we
+	 * have to extend it one tick more?
+	 */
+	schedule_timeout_interruptible(msecs_to_jiffies(delay) + 2);
+}
+
+static int tsl2563_adjust_gainlevel(struct tsl2563_chip *chip, u16 adc)
+{
+	struct i2c_client *client = chip->client;
+
+	if (adc > chip->gainlevel->max || adc < chip->gainlevel->min) {
+
+		(adc > chip->gainlevel->max) ?
+			chip->gainlevel++ : chip->gainlevel--;
+
+		i2c_smbus_write_byte_data(client,
+					  TSL2563_CMD | TSL2563_REG_TIMING,
+					  chip->gainlevel->gaintime);
+
+		tsl2563_wait_adc(chip);
+		tsl2563_wait_adc(chip);
+
+		return 1;
+	} else
+		return 0;
+}
+
+static int tsl2563_get_adc(struct tsl2563_chip *chip)
+{
+	struct i2c_client *client = chip->client;
+	u16 adc0, adc1;
+	int retry = 1;
+	int ret = 0;
+
+	if (chip->suspended)
+		goto out;
+
+	if (!chip->int_enabled) {
+		cancel_delayed_work(&chip->poweroff_work);
+
+		if (!tsl2563_get_power(chip)) {
+			ret = tsl2563_set_power(chip, 1);
+			if (ret)
+				goto out;
+			ret = tsl2563_configure(chip);
+			if (ret)
+				goto out;
+			tsl2563_wait_adc(chip);
+		}
+	}
+
+	while (retry) {
+		ret = i2c_smbus_read_word_data(client,
+				TSL2563_CMD | TSL2563_REG_DATA0LOW);
+		if (ret < 0)
+			goto out;
+		adc0 = ret;
+
+		ret = i2c_smbus_read_word_data(client,
+				TSL2563_CMD | TSL2563_REG_DATA1LOW);
+		if (ret < 0)
+			goto out;
+		adc1 = ret;
+
+		retry = tsl2563_adjust_gainlevel(chip, adc0);
+	}
+
+	chip->data0 = normalize_adc(adc0, chip->gainlevel->gaintime);
+	chip->data1 = normalize_adc(adc1, chip->gainlevel->gaintime);
+
+	if (!chip->int_enabled)
+		schedule_delayed_work(&chip->poweroff_work, 5 * HZ);
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static inline int calib_to_sysfs(u32 calib)
+{
+	return (int) (((calib * CALIB_BASE_SYSFS) +
+		       CALIB_FRAC_HALF) >> CALIB_FRAC_BITS);
+}
+
+static inline u32 calib_from_sysfs(int value)
+{
+	return (((u32) value) << CALIB_FRAC_BITS) / CALIB_BASE_SYSFS;
+}
+
+/*
+ * Conversions between lux and ADC values.
+ *
+ * The basic formula is lux = c0 * adc0 - c1 * adc1, where c0 and c1 are
+ * appropriate constants. Different constants are needed for different
+ * kinds of light, determined by the ratio adc1/adc0 (basically the ratio
+ * of the intensities in infrared and visible wavelengths). lux_table below
+ * lists the upper threshold of the adc1/adc0 ratio and the corresponding
+ * constants.
+ */
+
+struct tsl2563_lux_coeff {
+	unsigned long ch_ratio;
+	unsigned long ch0_coeff;
+	unsigned long ch1_coeff;
+};
+
+static const struct tsl2563_lux_coeff lux_table[] = {
+	{
+		.ch_ratio	= FRAC10K(1300),
+		.ch0_coeff	= FRAC10K(315),
+		.ch1_coeff	= FRAC10K(262),
+	}, {
+		.ch_ratio	= FRAC10K(2600),
+		.ch0_coeff	= FRAC10K(337),
+		.ch1_coeff	= FRAC10K(430),
+	}, {
+		.ch_ratio	= FRAC10K(3900),
+		.ch0_coeff	= FRAC10K(363),
+		.ch1_coeff	= FRAC10K(529),
+	}, {
+		.ch_ratio	= FRAC10K(5200),
+		.ch0_coeff	= FRAC10K(392),
+		.ch1_coeff	= FRAC10K(605),
+	}, {
+		.ch_ratio	= FRAC10K(6500),
+		.ch0_coeff	= FRAC10K(229),
+		.ch1_coeff	= FRAC10K(291),
+	}, {
+		.ch_ratio	= FRAC10K(8000),
+		.ch0_coeff	= FRAC10K(157),
+		.ch1_coeff	= FRAC10K(180),
+	}, {
+		.ch_ratio	= FRAC10K(13000),
+		.ch0_coeff	= FRAC10K(34),
+		.ch1_coeff	= FRAC10K(26),
+	}, {
+		.ch_ratio	= ULONG_MAX,
+		.ch0_coeff	= 0,
+		.ch1_coeff	= 0,
+	},
+};
+
+/* Convert normalized, scaled ADC values to lux. */
+static unsigned int adc_to_lux(u32 adc0, u32 adc1)
+{
+	const struct tsl2563_lux_coeff *lp = lux_table;
+	unsigned long ratio, lux, ch0 = adc0, ch1 = adc1;
+
+	ratio = ch0 ? ((ch1 << ADC_FRAC_BITS) / ch0) : ULONG_MAX;
+
+	while (lp->ch_ratio < ratio)
+		lp++;
+
+	lux = ch0 * lp->ch0_coeff - ch1 * lp->ch1_coeff;
+
+	return (unsigned int) (lux >> ADC_FRAC_BITS);
+}
+
+/* Apply calibration coefficient to ADC count. */
+static u32 calib_adc(u32 adc, u32 calib)
+{
+	unsigned long scaled = adc;
+
+	scaled *= calib;
+	scaled >>= CALIB_FRAC_BITS;
+
+	return (u32) scaled;
+}
+
+static int tsl2563_write_raw(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan,
+			       int val,
+			       int val2,
+			       long mask)
+{
+	struct tsl2563_chip *chip = iio_priv(indio_dev);
+
+	if (chan->channel == IIO_MOD_LIGHT_BOTH)
+		chip->calib0 = calib_from_sysfs(val);
+	else
+		chip->calib1 = calib_from_sysfs(val);
+
+	return 0;
+}
+
+static int tsl2563_read_raw(struct iio_dev *indio_dev,
+			    struct iio_chan_spec const *chan,
+			    int *val,
+			    int *val2,
+			    long m)
+{
+	int ret = -EINVAL;
+	u32 calib0, calib1;
+	struct tsl2563_chip *chip = iio_priv(indio_dev);
+
+	mutex_lock(&chip->lock);
+	switch (m) {
+	case IIO_CHAN_INFO_RAW:
+	case IIO_CHAN_INFO_PROCESSED:
+		switch (chan->type) {
+		case IIO_LIGHT:
+			ret = tsl2563_get_adc(chip);
+			if (ret)
+				goto error_ret;
+			calib0 = calib_adc(chip->data0, chip->calib0) *
+				chip->cover_comp_gain;
+			calib1 = calib_adc(chip->data1, chip->calib1) *
+				chip->cover_comp_gain;
+			*val = adc_to_lux(calib0, calib1);
+			ret = IIO_VAL_INT;
+			break;
+		case IIO_INTENSITY:
+			ret = tsl2563_get_adc(chip);
+			if (ret)
+				goto error_ret;
+			if (chan->channel == 0)
+				*val = chip->data0;
+			else
+				*val = chip->data1;
+			ret = IIO_VAL_INT;
+			break;
+		default:
+			break;
+		}
+		break;
+
+	case IIO_CHAN_INFO_CALIBSCALE:
+		if (chan->channel == 0)
+			*val = calib_to_sysfs(chip->calib0);
+		else
+			*val = calib_to_sysfs(chip->calib1);
+		ret = IIO_VAL_INT;
+		break;
+	default:
+		ret = -EINVAL;
+		goto error_ret;
+	}
+
+error_ret:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+
+static const struct iio_chan_spec tsl2563_channels[] = {
+	{
+		.type = IIO_LIGHT,
+		.indexed = 1,
+		.info_mask = IIO_CHAN_INFO_PROCESSED_SEPARATE_BIT,
+		.channel = 0,
+	}, {
+		.type = IIO_INTENSITY,
+		.modified = 1,
+		.channel2 = IIO_MOD_LIGHT_BOTH,
+		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
+		IIO_CHAN_INFO_CALIBSCALE_SEPARATE_BIT,
+		.event_mask = (IIO_EV_BIT(IIO_EV_TYPE_THRESH,
+					  IIO_EV_DIR_RISING) |
+			       IIO_EV_BIT(IIO_EV_TYPE_THRESH,
+					  IIO_EV_DIR_FALLING)),
+	}, {
+		.type = IIO_INTENSITY,
+		.modified = 1,
+		.channel2 = IIO_MOD_LIGHT_IR,
+		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
+		IIO_CHAN_INFO_CALIBSCALE_SEPARATE_BIT,
+	}
+};
+
+static int tsl2563_read_thresh(struct iio_dev *indio_dev,
+			       u64 event_code,
+			       int *val)
+{
+	struct tsl2563_chip *chip = iio_priv(indio_dev);
+
+	switch (IIO_EVENT_CODE_EXTRACT_DIR(event_code)) {
+	case IIO_EV_DIR_RISING:
+		*val = chip->high_thres;
+		break;
+	case IIO_EV_DIR_FALLING:
+		*val = chip->low_thres;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int tsl2563_write_thresh(struct iio_dev *indio_dev,
+				  u64 event_code,
+				  int val)
+{
+	struct tsl2563_chip *chip = iio_priv(indio_dev);
+	int ret;
+	u8 address;
+
+	if (IIO_EVENT_CODE_EXTRACT_DIR(event_code) == IIO_EV_DIR_RISING)
+		address = TSL2563_REG_HIGHLOW;
+	else
+		address = TSL2563_REG_LOWLOW;
+	mutex_lock(&chip->lock);
+	ret = i2c_smbus_write_byte_data(chip->client, TSL2563_CMD | address,
+					val & 0xFF);
+	if (ret)
+		goto error_ret;
+	ret = i2c_smbus_write_byte_data(chip->client,
+					TSL2563_CMD | (address + 1),
+					(val >> 8) & 0xFF);
+	if (IIO_EVENT_CODE_EXTRACT_DIR(event_code) == IIO_EV_DIR_RISING)
+		chip->high_thres = val;
+	else
+		chip->low_thres = val;
+
+error_ret:
+	mutex_unlock(&chip->lock);
+
+	return ret;
+}
+
+static irqreturn_t tsl2563_event_handler(int irq, void *private)
+{
+	struct iio_dev *dev_info = private;
+	struct tsl2563_chip *chip = iio_priv(dev_info);
+
+	iio_push_event(dev_info,
+		       IIO_UNMOD_EVENT_CODE(IIO_LIGHT,
+					    0,
+					    IIO_EV_TYPE_THRESH,
+					    IIO_EV_DIR_EITHER),
+		       iio_get_time_ns());
+
+	/* clear the interrupt and push the event */
+	i2c_smbus_write_byte(chip->client, TSL2563_CMD | TSL2563_CLEARINT);
+	return IRQ_HANDLED;
+}
+
+static int tsl2563_write_interrupt_config(struct iio_dev *indio_dev,
+					  u64 event_code,
+					  int state)
+{
+	struct tsl2563_chip *chip = iio_priv(indio_dev);
+	int ret = 0;
+
+	mutex_lock(&chip->lock);
+	if (state && !(chip->intr & 0x30)) {
+		chip->intr &= ~0x30;
+		chip->intr |= 0x10;
+		/* ensure the chip is actually on */
+		cancel_delayed_work(&chip->poweroff_work);
+		if (!tsl2563_get_power(chip)) {
+			ret = tsl2563_set_power(chip, 1);
+			if (ret)
+				goto out;
+			ret = tsl2563_configure(chip);
+			if (ret)
+				goto out;
+		}
+		ret = i2c_smbus_write_byte_data(chip->client,
+						TSL2563_CMD | TSL2563_REG_INT,
+						chip->intr);
+		chip->int_enabled = true;
+	}
+
+	if (!state && (chip->intr & 0x30)) {
+		chip->intr &= ~0x30;
+		ret = i2c_smbus_write_byte_data(chip->client,
+						TSL2563_CMD | TSL2563_REG_INT,
+						chip->intr);
+		chip->int_enabled = false;
+		/* now the interrupt is not enabled, we can go to sleep */
+		schedule_delayed_work(&chip->poweroff_work, 5 * HZ);
+	}
+out:
+	mutex_unlock(&chip->lock);
+
+	return ret;
+}
+
+static int tsl2563_read_interrupt_config(struct iio_dev *indio_dev,
+					 u64 event_code)
+{
+	struct tsl2563_chip *chip = iio_priv(indio_dev);
+	int ret;
+
+	mutex_lock(&chip->lock);
+	ret = i2c_smbus_read_byte_data(chip->client,
+				       TSL2563_CMD | TSL2563_REG_INT);
+	mutex_unlock(&chip->lock);
+	if (ret < 0)
+		return ret;
+
+	return !!(ret & 0x30);
+}
+
+static const struct iio_info tsl2563_info_no_irq = {
+	.driver_module = THIS_MODULE,
+	.read_raw = &tsl2563_read_raw,
+	.write_raw = &tsl2563_write_raw,
+};
+
+static const struct iio_info tsl2563_info = {
+	.driver_module = THIS_MODULE,
+	.read_raw = &tsl2563_read_raw,
+	.write_raw = &tsl2563_write_raw,
+	.read_event_value = &tsl2563_read_thresh,
+	.write_event_value = &tsl2563_write_thresh,
+	.read_event_config = &tsl2563_read_interrupt_config,
+	.write_event_config = &tsl2563_write_interrupt_config,
+};
+
+static int tsl2563_probe(struct i2c_client *client,
+				const struct i2c_device_id *device_id)
+{
+	struct iio_dev *indio_dev;
+	struct tsl2563_chip *chip;
+	struct tsl2563_platform_data *pdata = client->dev.platform_data;
+	int err = 0;
+	u8 id = 0;
+
+	indio_dev = iio_device_alloc(sizeof(*chip));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	chip = iio_priv(indio_dev);
+
+	i2c_set_clientdata(client, chip);
+	chip->client = client;
+
+	err = tsl2563_detect(chip);
+	if (err) {
+		dev_err(&client->dev, "detect error %d\n", -err);
+		goto fail1;
+	}
+
+	err = tsl2563_read_id(chip, &id);
+	if (err) {
+		dev_err(&client->dev, "read id error %d\n", -err);
+		goto fail1;
+	}
+
+	mutex_init(&chip->lock);
+
+	/* Default values used until userspace says otherwise */
+	chip->low_thres = 0x0;
+	chip->high_thres = 0xffff;
+	chip->gainlevel = tsl2563_gainlevel_table;
+	chip->intr = TSL2563_INT_PERSIST(4);
+	chip->calib0 = calib_from_sysfs(CALIB_BASE_SYSFS);
+	chip->calib1 = calib_from_sysfs(CALIB_BASE_SYSFS);
+
+	if (pdata)
+		chip->cover_comp_gain = pdata->cover_comp_gain;
+	else
+		chip->cover_comp_gain = 1;
+
+	dev_info(&client->dev, "model %d, rev. %d\n", id >> 4, id & 0x0f);
+	indio_dev->name = client->name;
+	indio_dev->channels = tsl2563_channels;
+	indio_dev->num_channels = ARRAY_SIZE(tsl2563_channels);
+	indio_dev->dev.parent = &client->dev;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+
+	if (client->irq)
+		indio_dev->info = &tsl2563_info;
+	else
+		indio_dev->info = &tsl2563_info_no_irq;
+
+	if (client->irq) {
+		err = request_threaded_irq(client->irq,
+					   NULL,
+					   &tsl2563_event_handler,
+					   IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+					   "tsl2563_event",
+					   indio_dev);
+		if (err) {
+			dev_err(&client->dev, "irq request error %d\n", -err);
+			goto fail1;
+		}
+	}
+
+	err = tsl2563_configure(chip);
+	if (err) {
+		dev_err(&client->dev, "configure error %d\n", -err);
+		goto fail2;
+	}
+
+	INIT_DELAYED_WORK(&chip->poweroff_work, tsl2563_poweroff_work);
+
+	/* The interrupt cannot yet be enabled so this is fine without lock */
+	schedule_delayed_work(&chip->poweroff_work, 5 * HZ);
+
+	err = iio_device_register(indio_dev);
+	if (err) {
+		dev_err(&client->dev, "iio registration error %d\n", -err);
+		goto fail3;
+	}
+
+	return 0;
+
+fail3:
+	cancel_delayed_work(&chip->poweroff_work);
+	flush_scheduled_work();
+fail2:
+	if (client->irq)
+		free_irq(client->irq, indio_dev);
+fail1:
+	iio_device_free(indio_dev);
+	return err;
+}
+
+static int tsl2563_remove(struct i2c_client *client)
+{
+	struct tsl2563_chip *chip = i2c_get_clientdata(client);
+	struct iio_dev *indio_dev = iio_priv_to_dev(chip);
+
+	iio_device_unregister(indio_dev);
+	if (!chip->int_enabled)
+		cancel_delayed_work(&chip->poweroff_work);
+	/* Ensure that interrupts are disabled - then flush any bottom halves */
+	chip->intr &= ~0x30;
+	i2c_smbus_write_byte_data(chip->client, TSL2563_CMD | TSL2563_REG_INT,
+				  chip->intr);
+	flush_scheduled_work();
+	tsl2563_set_power(chip, 0);
+	if (client->irq)
+		free_irq(client->irq, indio_dev);
+
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int tsl2563_suspend(struct device *dev)
+{
+	struct tsl2563_chip *chip = i2c_get_clientdata(to_i2c_client(dev));
+	int ret;
+
+	mutex_lock(&chip->lock);
+
+	ret = tsl2563_set_power(chip, 0);
+	if (ret)
+		goto out;
+
+	chip->suspended = true;
+
+out:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+
+static int tsl2563_resume(struct device *dev)
+{
+	struct tsl2563_chip *chip = i2c_get_clientdata(to_i2c_client(dev));
+	int ret;
+
+	mutex_lock(&chip->lock);
+
+	ret = tsl2563_set_power(chip, 1);
+	if (ret)
+		goto out;
+
+	ret = tsl2563_configure(chip);
+	if (ret)
+		goto out;
+
+	chip->suspended = false;
+
+out:
+	mutex_unlock(&chip->lock);
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(tsl2563_pm_ops, tsl2563_suspend, tsl2563_resume);
+#define TSL2563_PM_OPS (&tsl2563_pm_ops)
+#else
+#define TSL2563_PM_OPS NULL
+#endif
+
+static const struct i2c_device_id tsl2563_id[] = {
+	{ "tsl2560", 0 },
+	{ "tsl2561", 1 },
+	{ "tsl2562", 2 },
+	{ "tsl2563", 3 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, tsl2563_id);
+
+static struct i2c_driver tsl2563_i2c_driver = {
+	.driver = {
+		.name	 = "tsl2563",
+		.pm	= TSL2563_PM_OPS,
+	},
+	.probe		= tsl2563_probe,
+	.remove		= tsl2563_remove,
+	.id_table	= tsl2563_id,
+};
+module_i2c_driver(tsl2563_i2c_driver);
+
+MODULE_AUTHOR("Nokia Corporation");
+MODULE_DESCRIPTION("tsl2563 light sensor driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/iio/light/Kconfig b/drivers/staging/iio/light/Kconfig
index 4bed30eac3ed..ca8d6e66c899 100644
--- a/drivers/staging/iio/light/Kconfig
+++ b/drivers/staging/iio/light/Kconfig
@@ -25,16 +25,6 @@ config SENSORS_ISL29028
 	 Proximity value via iio. The ISL29028 provides the concurrent sensing
 	 of ambient light and proximity.
 
-config SENSORS_TSL2563
-	tristate "TAOS TSL2560, TSL2561, TSL2562 and TSL2563 ambient light sensors"
-	depends on I2C
-	help
-	 If you say yes here you get support for the Taos TSL2560,
-	 TSL2561, TSL2562 and TSL2563 ambient light sensors.
-
-	 This driver can also be built as a module.  If so, the module
-	 will be called tsl2563.
-
 config TSL2583
 	tristate "TAOS TSL2580, TSL2581 and TSL2583 light-to-digital converters"
 	depends on I2C
diff --git a/drivers/staging/iio/light/Makefile b/drivers/staging/iio/light/Makefile
index 141af1eb164c..9960fdf7c15b 100644
--- a/drivers/staging/iio/light/Makefile
+++ b/drivers/staging/iio/light/Makefile
@@ -2,7 +2,6 @@
 # Makefile for industrial I/O Light sensors
 #
 
-obj-$(CONFIG_SENSORS_TSL2563)	+= tsl2563.o
 obj-$(CONFIG_SENSORS_ISL29018)	+= isl29018.o
 obj-$(CONFIG_SENSORS_ISL29028)	+= isl29028.o
 obj-$(CONFIG_TSL2583)	+= tsl2583.o
diff --git a/drivers/staging/iio/light/tsl2563.c b/drivers/staging/iio/light/tsl2563.c
deleted file mode 100644
index b91d9bb8b083..000000000000
--- a/drivers/staging/iio/light/tsl2563.c
+++ /dev/null
@@ -1,887 +0,0 @@
-/*
- * drivers/i2c/chips/tsl2563.c
- *
- * Copyright (C) 2008 Nokia Corporation
- *
- * Written by Timo O. Karjalainen <timo.o.karjalainen@nokia.com>
- * Contact: Amit Kucheria <amit.kucheria@verdurent.com>
- *
- * Converted to IIO driver
- * Amit Kucheria <amit.kucheria@verdurent.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#include <linux/module.h>
-#include <linux/i2c.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/sched.h>
-#include <linux/mutex.h>
-#include <linux/delay.h>
-#include <linux/pm.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-#include <linux/iio/iio.h>
-#include <linux/iio/sysfs.h>
-#include <linux/iio/events.h>
-#include "tsl2563.h"
-
-/* Use this many bits for fraction part. */
-#define ADC_FRAC_BITS		14
-
-/* Given number of 1/10000's in ADC_FRAC_BITS precision. */
-#define FRAC10K(f)		(((f) * (1L << (ADC_FRAC_BITS))) / (10000))
-
-/* Bits used for fraction in calibration coefficients.*/
-#define CALIB_FRAC_BITS		10
-/* 0.5 in CALIB_FRAC_BITS precision */
-#define CALIB_FRAC_HALF		(1 << (CALIB_FRAC_BITS - 1))
-/* Make a fraction from a number n that was multiplied with b. */
-#define CALIB_FRAC(n, b)	(((n) << CALIB_FRAC_BITS) / (b))
-/* Decimal 10^(digits in sysfs presentation) */
-#define CALIB_BASE_SYSFS	1000
-
-#define TSL2563_CMD		0x80
-#define TSL2563_CLEARINT	0x40
-
-#define TSL2563_REG_CTRL	0x00
-#define TSL2563_REG_TIMING	0x01
-#define TSL2563_REG_LOWLOW	0x02 /* data0 low threshold, 2 bytes */
-#define TSL2563_REG_LOWHIGH	0x03
-#define TSL2563_REG_HIGHLOW	0x04 /* data0 high threshold, 2 bytes */
-#define TSL2563_REG_HIGHHIGH	0x05
-#define TSL2563_REG_INT		0x06
-#define TSL2563_REG_ID		0x0a
-#define TSL2563_REG_DATA0LOW	0x0c /* broadband sensor value, 2 bytes */
-#define TSL2563_REG_DATA0HIGH	0x0d
-#define TSL2563_REG_DATA1LOW	0x0e /* infrared sensor value, 2 bytes */
-#define TSL2563_REG_DATA1HIGH	0x0f
-
-#define TSL2563_CMD_POWER_ON	0x03
-#define TSL2563_CMD_POWER_OFF	0x00
-#define TSL2563_CTRL_POWER_MASK	0x03
-
-#define TSL2563_TIMING_13MS	0x00
-#define TSL2563_TIMING_100MS	0x01
-#define TSL2563_TIMING_400MS	0x02
-#define TSL2563_TIMING_MASK	0x03
-#define TSL2563_TIMING_GAIN16	0x10
-#define TSL2563_TIMING_GAIN1	0x00
-
-#define TSL2563_INT_DISBLED	0x00
-#define TSL2563_INT_LEVEL	0x10
-#define TSL2563_INT_PERSIST(n)	((n) & 0x0F)
-
-struct tsl2563_gainlevel_coeff {
-	u8 gaintime;
-	u16 min;
-	u16 max;
-};
-
-static const struct tsl2563_gainlevel_coeff tsl2563_gainlevel_table[] = {
-	{
-		.gaintime	= TSL2563_TIMING_400MS | TSL2563_TIMING_GAIN16,
-		.min		= 0,
-		.max		= 65534,
-	}, {
-		.gaintime	= TSL2563_TIMING_400MS | TSL2563_TIMING_GAIN1,
-		.min		= 2048,
-		.max		= 65534,
-	}, {
-		.gaintime	= TSL2563_TIMING_100MS | TSL2563_TIMING_GAIN1,
-		.min		= 4095,
-		.max		= 37177,
-	}, {
-		.gaintime	= TSL2563_TIMING_13MS | TSL2563_TIMING_GAIN1,
-		.min		= 3000,
-		.max		= 65535,
-	},
-};
-
-struct tsl2563_chip {
-	struct mutex		lock;
-	struct i2c_client	*client;
-	struct delayed_work	poweroff_work;
-
-	/* Remember state for suspend and resume functions */
-	bool suspended;
-
-	struct tsl2563_gainlevel_coeff const *gainlevel;
-
-	u16			low_thres;
-	u16			high_thres;
-	u8			intr;
-	bool			int_enabled;
-
-	/* Calibration coefficients */
-	u32			calib0;
-	u32			calib1;
-	int			cover_comp_gain;
-
-	/* Cache current values, to be returned while suspended */
-	u32			data0;
-	u32			data1;
-};
-
-static int tsl2563_set_power(struct tsl2563_chip *chip, int on)
-{
-	struct i2c_client *client = chip->client;
-	u8 cmd;
-
-	cmd = on ? TSL2563_CMD_POWER_ON : TSL2563_CMD_POWER_OFF;
-	return i2c_smbus_write_byte_data(client,
-					 TSL2563_CMD | TSL2563_REG_CTRL, cmd);
-}
-
-/*
- * Return value is 0 for off, 1 for on, or a negative error
- * code if reading failed.
- */
-static int tsl2563_get_power(struct tsl2563_chip *chip)
-{
-	struct i2c_client *client = chip->client;
-	int ret;
-
-	ret = i2c_smbus_read_byte_data(client, TSL2563_CMD | TSL2563_REG_CTRL);
-	if (ret < 0)
-		return ret;
-
-	return (ret & TSL2563_CTRL_POWER_MASK) == TSL2563_CMD_POWER_ON;
-}
-
-static int tsl2563_configure(struct tsl2563_chip *chip)
-{
-	int ret;
-
-	ret = i2c_smbus_write_byte_data(chip->client,
-			TSL2563_CMD | TSL2563_REG_TIMING,
-			chip->gainlevel->gaintime);
-	if (ret)
-		goto error_ret;
-	ret = i2c_smbus_write_byte_data(chip->client,
-			TSL2563_CMD | TSL2563_REG_HIGHLOW,
-			chip->high_thres & 0xFF);
-	if (ret)
-		goto error_ret;
-	ret = i2c_smbus_write_byte_data(chip->client,
-			TSL2563_CMD | TSL2563_REG_HIGHHIGH,
-			(chip->high_thres >> 8) & 0xFF);
-	if (ret)
-		goto error_ret;
-	ret = i2c_smbus_write_byte_data(chip->client,
-			TSL2563_CMD | TSL2563_REG_LOWLOW,
-			chip->low_thres & 0xFF);
-	if (ret)
-		goto error_ret;
-	ret = i2c_smbus_write_byte_data(chip->client,
-			TSL2563_CMD | TSL2563_REG_LOWHIGH,
-			(chip->low_thres >> 8) & 0xFF);
-/*
- * Interrupt register is automatically written anyway if it is relevant
- * so is not here.
- */
-error_ret:
-	return ret;
-}
-
-static void tsl2563_poweroff_work(struct work_struct *work)
-{
-	struct tsl2563_chip *chip =
-		container_of(work, struct tsl2563_chip, poweroff_work.work);
-	tsl2563_set_power(chip, 0);
-}
-
-static int tsl2563_detect(struct tsl2563_chip *chip)
-{
-	int ret;
-
-	ret = tsl2563_set_power(chip, 1);
-	if (ret)
-		return ret;
-
-	ret = tsl2563_get_power(chip);
-	if (ret < 0)
-		return ret;
-
-	return ret ? 0 : -ENODEV;
-}
-
-static int tsl2563_read_id(struct tsl2563_chip *chip, u8 *id)
-{
-	struct i2c_client *client = chip->client;
-	int ret;
-
-	ret = i2c_smbus_read_byte_data(client, TSL2563_CMD | TSL2563_REG_ID);
-	if (ret < 0)
-		return ret;
-
-	*id = ret;
-
-	return 0;
-}
-
-/*
- * "Normalized" ADC value is one obtained with 400ms of integration time and
- * 16x gain. This function returns the number of bits of shift needed to
- * convert between normalized values and HW values obtained using given
- * timing and gain settings.
- */
-static int adc_shiftbits(u8 timing)
-{
-	int shift = 0;
-
-	switch (timing & TSL2563_TIMING_MASK) {
-	case TSL2563_TIMING_13MS:
-		shift += 5;
-		break;
-	case TSL2563_TIMING_100MS:
-		shift += 2;
-		break;
-	case TSL2563_TIMING_400MS:
-		/* no-op */
-		break;
-	}
-
-	if (!(timing & TSL2563_TIMING_GAIN16))
-		shift += 4;
-
-	return shift;
-}
-
-/* Convert a HW ADC value to normalized scale. */
-static u32 normalize_adc(u16 adc, u8 timing)
-{
-	return adc << adc_shiftbits(timing);
-}
-
-static void tsl2563_wait_adc(struct tsl2563_chip *chip)
-{
-	unsigned int delay;
-
-	switch (chip->gainlevel->gaintime & TSL2563_TIMING_MASK) {
-	case TSL2563_TIMING_13MS:
-		delay = 14;
-		break;
-	case TSL2563_TIMING_100MS:
-		delay = 101;
-		break;
-	default:
-		delay = 402;
-	}
-	/*
-	 * TODO: Make sure that we wait at least required delay but why we
-	 * have to extend it one tick more?
-	 */
-	schedule_timeout_interruptible(msecs_to_jiffies(delay) + 2);
-}
-
-static int tsl2563_adjust_gainlevel(struct tsl2563_chip *chip, u16 adc)
-{
-	struct i2c_client *client = chip->client;
-
-	if (adc > chip->gainlevel->max || adc < chip->gainlevel->min) {
-
-		(adc > chip->gainlevel->max) ?
-			chip->gainlevel++ : chip->gainlevel--;
-
-		i2c_smbus_write_byte_data(client,
-					  TSL2563_CMD | TSL2563_REG_TIMING,
-					  chip->gainlevel->gaintime);
-
-		tsl2563_wait_adc(chip);
-		tsl2563_wait_adc(chip);
-
-		return 1;
-	} else
-		return 0;
-}
-
-static int tsl2563_get_adc(struct tsl2563_chip *chip)
-{
-	struct i2c_client *client = chip->client;
-	u16 adc0, adc1;
-	int retry = 1;
-	int ret = 0;
-
-	if (chip->suspended)
-		goto out;
-
-	if (!chip->int_enabled) {
-		cancel_delayed_work(&chip->poweroff_work);
-
-		if (!tsl2563_get_power(chip)) {
-			ret = tsl2563_set_power(chip, 1);
-			if (ret)
-				goto out;
-			ret = tsl2563_configure(chip);
-			if (ret)
-				goto out;
-			tsl2563_wait_adc(chip);
-		}
-	}
-
-	while (retry) {
-		ret = i2c_smbus_read_word_data(client,
-				TSL2563_CMD | TSL2563_REG_DATA0LOW);
-		if (ret < 0)
-			goto out;
-		adc0 = ret;
-
-		ret = i2c_smbus_read_word_data(client,
-				TSL2563_CMD | TSL2563_REG_DATA1LOW);
-		if (ret < 0)
-			goto out;
-		adc1 = ret;
-
-		retry = tsl2563_adjust_gainlevel(chip, adc0);
-	}
-
-	chip->data0 = normalize_adc(adc0, chip->gainlevel->gaintime);
-	chip->data1 = normalize_adc(adc1, chip->gainlevel->gaintime);
-
-	if (!chip->int_enabled)
-		schedule_delayed_work(&chip->poweroff_work, 5 * HZ);
-
-	ret = 0;
-out:
-	return ret;
-}
-
-static inline int calib_to_sysfs(u32 calib)
-{
-	return (int) (((calib * CALIB_BASE_SYSFS) +
-		       CALIB_FRAC_HALF) >> CALIB_FRAC_BITS);
-}
-
-static inline u32 calib_from_sysfs(int value)
-{
-	return (((u32) value) << CALIB_FRAC_BITS) / CALIB_BASE_SYSFS;
-}
-
-/*
- * Conversions between lux and ADC values.
- *
- * The basic formula is lux = c0 * adc0 - c1 * adc1, where c0 and c1 are
- * appropriate constants. Different constants are needed for different
- * kinds of light, determined by the ratio adc1/adc0 (basically the ratio
- * of the intensities in infrared and visible wavelengths). lux_table below
- * lists the upper threshold of the adc1/adc0 ratio and the corresponding
- * constants.
- */
-
-struct tsl2563_lux_coeff {
-	unsigned long ch_ratio;
-	unsigned long ch0_coeff;
-	unsigned long ch1_coeff;
-};
-
-static const struct tsl2563_lux_coeff lux_table[] = {
-	{
-		.ch_ratio	= FRAC10K(1300),
-		.ch0_coeff	= FRAC10K(315),
-		.ch1_coeff	= FRAC10K(262),
-	}, {
-		.ch_ratio	= FRAC10K(2600),
-		.ch0_coeff	= FRAC10K(337),
-		.ch1_coeff	= FRAC10K(430),
-	}, {
-		.ch_ratio	= FRAC10K(3900),
-		.ch0_coeff	= FRAC10K(363),
-		.ch1_coeff	= FRAC10K(529),
-	}, {
-		.ch_ratio	= FRAC10K(5200),
-		.ch0_coeff	= FRAC10K(392),
-		.ch1_coeff	= FRAC10K(605),
-	}, {
-		.ch_ratio	= FRAC10K(6500),
-		.ch0_coeff	= FRAC10K(229),
-		.ch1_coeff	= FRAC10K(291),
-	}, {
-		.ch_ratio	= FRAC10K(8000),
-		.ch0_coeff	= FRAC10K(157),
-		.ch1_coeff	= FRAC10K(180),
-	}, {
-		.ch_ratio	= FRAC10K(13000),
-		.ch0_coeff	= FRAC10K(34),
-		.ch1_coeff	= FRAC10K(26),
-	}, {
-		.ch_ratio	= ULONG_MAX,
-		.ch0_coeff	= 0,
-		.ch1_coeff	= 0,
-	},
-};
-
-/* Convert normalized, scaled ADC values to lux. */
-static unsigned int adc_to_lux(u32 adc0, u32 adc1)
-{
-	const struct tsl2563_lux_coeff *lp = lux_table;
-	unsigned long ratio, lux, ch0 = adc0, ch1 = adc1;
-
-	ratio = ch0 ? ((ch1 << ADC_FRAC_BITS) / ch0) : ULONG_MAX;
-
-	while (lp->ch_ratio < ratio)
-		lp++;
-
-	lux = ch0 * lp->ch0_coeff - ch1 * lp->ch1_coeff;
-
-	return (unsigned int) (lux >> ADC_FRAC_BITS);
-}
-
-/* Apply calibration coefficient to ADC count. */
-static u32 calib_adc(u32 adc, u32 calib)
-{
-	unsigned long scaled = adc;
-
-	scaled *= calib;
-	scaled >>= CALIB_FRAC_BITS;
-
-	return (u32) scaled;
-}
-
-static int tsl2563_write_raw(struct iio_dev *indio_dev,
-			       struct iio_chan_spec const *chan,
-			       int val,
-			       int val2,
-			       long mask)
-{
-	struct tsl2563_chip *chip = iio_priv(indio_dev);
-
-	if (chan->channel == IIO_MOD_LIGHT_BOTH)
-		chip->calib0 = calib_from_sysfs(val);
-	else
-		chip->calib1 = calib_from_sysfs(val);
-
-	return 0;
-}
-
-static int tsl2563_read_raw(struct iio_dev *indio_dev,
-			    struct iio_chan_spec const *chan,
-			    int *val,
-			    int *val2,
-			    long m)
-{
-	int ret = -EINVAL;
-	u32 calib0, calib1;
-	struct tsl2563_chip *chip = iio_priv(indio_dev);
-
-	mutex_lock(&chip->lock);
-	switch (m) {
-	case IIO_CHAN_INFO_RAW:
-	case IIO_CHAN_INFO_PROCESSED:
-		switch (chan->type) {
-		case IIO_LIGHT:
-			ret = tsl2563_get_adc(chip);
-			if (ret)
-				goto error_ret;
-			calib0 = calib_adc(chip->data0, chip->calib0) *
-				chip->cover_comp_gain;
-			calib1 = calib_adc(chip->data1, chip->calib1) *
-				chip->cover_comp_gain;
-			*val = adc_to_lux(calib0, calib1);
-			ret = IIO_VAL_INT;
-			break;
-		case IIO_INTENSITY:
-			ret = tsl2563_get_adc(chip);
-			if (ret)
-				goto error_ret;
-			if (chan->channel == 0)
-				*val = chip->data0;
-			else
-				*val = chip->data1;
-			ret = IIO_VAL_INT;
-			break;
-		default:
-			break;
-		}
-		break;
-
-	case IIO_CHAN_INFO_CALIBSCALE:
-		if (chan->channel == 0)
-			*val = calib_to_sysfs(chip->calib0);
-		else
-			*val = calib_to_sysfs(chip->calib1);
-		ret = IIO_VAL_INT;
-		break;
-	default:
-		ret = -EINVAL;
-		goto error_ret;
-	}
-
-error_ret:
-	mutex_unlock(&chip->lock);
-	return ret;
-}
-
-static const struct iio_chan_spec tsl2563_channels[] = {
-	{
-		.type = IIO_LIGHT,
-		.indexed = 1,
-		.info_mask = IIO_CHAN_INFO_PROCESSED_SEPARATE_BIT,
-		.channel = 0,
-	}, {
-		.type = IIO_INTENSITY,
-		.modified = 1,
-		.channel2 = IIO_MOD_LIGHT_BOTH,
-		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
-		IIO_CHAN_INFO_CALIBSCALE_SEPARATE_BIT,
-		.event_mask = (IIO_EV_BIT(IIO_EV_TYPE_THRESH,
-					  IIO_EV_DIR_RISING) |
-			       IIO_EV_BIT(IIO_EV_TYPE_THRESH,
-					  IIO_EV_DIR_FALLING)),
-	}, {
-		.type = IIO_INTENSITY,
-		.modified = 1,
-		.channel2 = IIO_MOD_LIGHT_IR,
-		.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT |
-		IIO_CHAN_INFO_CALIBSCALE_SEPARATE_BIT,
-	}
-};
-
-static int tsl2563_read_thresh(struct iio_dev *indio_dev,
-			       u64 event_code,
-			       int *val)
-{
-	struct tsl2563_chip *chip = iio_priv(indio_dev);
-
-	switch (IIO_EVENT_CODE_EXTRACT_DIR(event_code)) {
-	case IIO_EV_DIR_RISING:
-		*val = chip->high_thres;
-		break;
-	case IIO_EV_DIR_FALLING:
-		*val = chip->low_thres;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int tsl2563_write_thresh(struct iio_dev *indio_dev,
-				  u64 event_code,
-				  int val)
-{
-	struct tsl2563_chip *chip = iio_priv(indio_dev);
-	int ret;
-	u8 address;
-
-	if (IIO_EVENT_CODE_EXTRACT_DIR(event_code) == IIO_EV_DIR_RISING)
-		address = TSL2563_REG_HIGHLOW;
-	else
-		address = TSL2563_REG_LOWLOW;
-	mutex_lock(&chip->lock);
-	ret = i2c_smbus_write_byte_data(chip->client, TSL2563_CMD | address,
-					val & 0xFF);
-	if (ret)
-		goto error_ret;
-	ret = i2c_smbus_write_byte_data(chip->client,
-					TSL2563_CMD | (address + 1),
-					(val >> 8) & 0xFF);
-	if (IIO_EVENT_CODE_EXTRACT_DIR(event_code) == IIO_EV_DIR_RISING)
-		chip->high_thres = val;
-	else
-		chip->low_thres = val;
-
-error_ret:
-	mutex_unlock(&chip->lock);
-
-	return ret;
-}
-
-static irqreturn_t tsl2563_event_handler(int irq, void *private)
-{
-	struct iio_dev *dev_info = private;
-	struct tsl2563_chip *chip = iio_priv(dev_info);
-
-	iio_push_event(dev_info,
-		       IIO_UNMOD_EVENT_CODE(IIO_LIGHT,
-					    0,
-					    IIO_EV_TYPE_THRESH,
-					    IIO_EV_DIR_EITHER),
-		       iio_get_time_ns());
-
-	/* clear the interrupt and push the event */
-	i2c_smbus_write_byte(chip->client, TSL2563_CMD | TSL2563_CLEARINT);
-	return IRQ_HANDLED;
-}
-
-static int tsl2563_write_interrupt_config(struct iio_dev *indio_dev,
-					  u64 event_code,
-					  int state)
-{
-	struct tsl2563_chip *chip = iio_priv(indio_dev);
-	int ret = 0;
-
-	mutex_lock(&chip->lock);
-	if (state && !(chip->intr & 0x30)) {
-		chip->intr &= ~0x30;
-		chip->intr |= 0x10;
-		/* ensure the chip is actually on */
-		cancel_delayed_work(&chip->poweroff_work);
-		if (!tsl2563_get_power(chip)) {
-			ret = tsl2563_set_power(chip, 1);
-			if (ret)
-				goto out;
-			ret = tsl2563_configure(chip);
-			if (ret)
-				goto out;
-		}
-		ret = i2c_smbus_write_byte_data(chip->client,
-						TSL2563_CMD | TSL2563_REG_INT,
-						chip->intr);
-		chip->int_enabled = true;
-	}
-
-	if (!state && (chip->intr & 0x30)) {
-		chip->intr &= ~0x30;
-		ret = i2c_smbus_write_byte_data(chip->client,
-						TSL2563_CMD | TSL2563_REG_INT,
-						chip->intr);
-		chip->int_enabled = false;
-		/* now the interrupt is not enabled, we can go to sleep */
-		schedule_delayed_work(&chip->poweroff_work, 5 * HZ);
-	}
-out:
-	mutex_unlock(&chip->lock);
-
-	return ret;
-}
-
-static int tsl2563_read_interrupt_config(struct iio_dev *indio_dev,
-					 u64 event_code)
-{
-	struct tsl2563_chip *chip = iio_priv(indio_dev);
-	int ret;
-
-	mutex_lock(&chip->lock);
-	ret = i2c_smbus_read_byte_data(chip->client,
-				       TSL2563_CMD | TSL2563_REG_INT);
-	mutex_unlock(&chip->lock);
-	if (ret < 0)
-		return ret;
-
-	return !!(ret & 0x30);
-}
-
-static const struct iio_info tsl2563_info_no_irq = {
-	.driver_module = THIS_MODULE,
-	.read_raw = &tsl2563_read_raw,
-	.write_raw = &tsl2563_write_raw,
-};
-
-static const struct iio_info tsl2563_info = {
-	.driver_module = THIS_MODULE,
-	.read_raw = &tsl2563_read_raw,
-	.write_raw = &tsl2563_write_raw,
-	.read_event_value = &tsl2563_read_thresh,
-	.write_event_value = &tsl2563_write_thresh,
-	.read_event_config = &tsl2563_read_interrupt_config,
-	.write_event_config = &tsl2563_write_interrupt_config,
-};
-
-static int tsl2563_probe(struct i2c_client *client,
-				const struct i2c_device_id *device_id)
-{
-	struct iio_dev *indio_dev;
-	struct tsl2563_chip *chip;
-	struct tsl2563_platform_data *pdata = client->dev.platform_data;
-	int err = 0;
-	u8 id = 0;
-
-	indio_dev = iio_device_alloc(sizeof(*chip));
-	if (!indio_dev)
-		return -ENOMEM;
-
-	chip = iio_priv(indio_dev);
-
-	i2c_set_clientdata(client, chip);
-	chip->client = client;
-
-	err = tsl2563_detect(chip);
-	if (err) {
-		dev_err(&client->dev, "detect error %d\n", -err);
-		goto fail1;
-	}
-
-	err = tsl2563_read_id(chip, &id);
-	if (err) {
-		dev_err(&client->dev, "read id error %d\n", -err);
-		goto fail1;
-	}
-
-	mutex_init(&chip->lock);
-
-	/* Default values used until userspace says otherwise */
-	chip->low_thres = 0x0;
-	chip->high_thres = 0xffff;
-	chip->gainlevel = tsl2563_gainlevel_table;
-	chip->intr = TSL2563_INT_PERSIST(4);
-	chip->calib0 = calib_from_sysfs(CALIB_BASE_SYSFS);
-	chip->calib1 = calib_from_sysfs(CALIB_BASE_SYSFS);
-
-	if (pdata)
-		chip->cover_comp_gain = pdata->cover_comp_gain;
-	else
-		chip->cover_comp_gain = 1;
-
-	dev_info(&client->dev, "model %d, rev. %d\n", id >> 4, id & 0x0f);
-	indio_dev->name = client->name;
-	indio_dev->channels = tsl2563_channels;
-	indio_dev->num_channels = ARRAY_SIZE(tsl2563_channels);
-	indio_dev->dev.parent = &client->dev;
-	indio_dev->modes = INDIO_DIRECT_MODE;
-
-	if (client->irq)
-		indio_dev->info = &tsl2563_info;
-	else
-		indio_dev->info = &tsl2563_info_no_irq;
-
-	if (client->irq) {
-		err = request_threaded_irq(client->irq,
-					   NULL,
-					   &tsl2563_event_handler,
-					   IRQF_TRIGGER_RISING | IRQF_ONESHOT,
-					   "tsl2563_event",
-					   indio_dev);
-		if (err) {
-			dev_err(&client->dev, "irq request error %d\n", -err);
-			goto fail1;
-		}
-	}
-
-	err = tsl2563_configure(chip);
-	if (err) {
-		dev_err(&client->dev, "configure error %d\n", -err);
-		goto fail2;
-	}
-
-	INIT_DELAYED_WORK(&chip->poweroff_work, tsl2563_poweroff_work);
-
-	/* The interrupt cannot yet be enabled so this is fine without lock */
-	schedule_delayed_work(&chip->poweroff_work, 5 * HZ);
-
-	err = iio_device_register(indio_dev);
-	if (err) {
-		dev_err(&client->dev, "iio registration error %d\n", -err);
-		goto fail3;
-	}
-
-	return 0;
-
-fail3:
-	cancel_delayed_work(&chip->poweroff_work);
-	flush_scheduled_work();
-fail2:
-	if (client->irq)
-		free_irq(client->irq, indio_dev);
-fail1:
-	iio_device_free(indio_dev);
-	return err;
-}
-
-static int tsl2563_remove(struct i2c_client *client)
-{
-	struct tsl2563_chip *chip = i2c_get_clientdata(client);
-	struct iio_dev *indio_dev = iio_priv_to_dev(chip);
-
-	iio_device_unregister(indio_dev);
-	if (!chip->int_enabled)
-		cancel_delayed_work(&chip->poweroff_work);
-	/* Ensure that interrupts are disabled - then flush any bottom halves */
-	chip->intr &= ~0x30;
-	i2c_smbus_write_byte_data(chip->client, TSL2563_CMD | TSL2563_REG_INT,
-				  chip->intr);
-	flush_scheduled_work();
-	tsl2563_set_power(chip, 0);
-	if (client->irq)
-		free_irq(client->irq, indio_dev);
-
-	iio_device_free(indio_dev);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int tsl2563_suspend(struct device *dev)
-{
-	struct tsl2563_chip *chip = i2c_get_clientdata(to_i2c_client(dev));
-	int ret;
-
-	mutex_lock(&chip->lock);
-
-	ret = tsl2563_set_power(chip, 0);
-	if (ret)
-		goto out;
-
-	chip->suspended = true;
-
-out:
-	mutex_unlock(&chip->lock);
-	return ret;
-}
-
-static int tsl2563_resume(struct device *dev)
-{
-	struct tsl2563_chip *chip = i2c_get_clientdata(to_i2c_client(dev));
-	int ret;
-
-	mutex_lock(&chip->lock);
-
-	ret = tsl2563_set_power(chip, 1);
-	if (ret)
-		goto out;
-
-	ret = tsl2563_configure(chip);
-	if (ret)
-		goto out;
-
-	chip->suspended = false;
-
-out:
-	mutex_unlock(&chip->lock);
-	return ret;
-}
-
-static SIMPLE_DEV_PM_OPS(tsl2563_pm_ops, tsl2563_suspend, tsl2563_resume);
-#define TSL2563_PM_OPS (&tsl2563_pm_ops)
-#else
-#define TSL2563_PM_OPS NULL
-#endif
-
-static const struct i2c_device_id tsl2563_id[] = {
-	{ "tsl2560", 0 },
-	{ "tsl2561", 1 },
-	{ "tsl2562", 2 },
-	{ "tsl2563", 3 },
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, tsl2563_id);
-
-static struct i2c_driver tsl2563_i2c_driver = {
-	.driver = {
-		.name	 = "tsl2563",
-		.pm	= TSL2563_PM_OPS,
-	},
-	.probe		= tsl2563_probe,
-	.remove		= tsl2563_remove,
-	.id_table	= tsl2563_id,
-};
-module_i2c_driver(tsl2563_i2c_driver);
-
-MODULE_AUTHOR("Nokia Corporation");
-MODULE_DESCRIPTION("tsl2563 light sensor driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/iio/light/tsl2563.h b/drivers/staging/iio/light/tsl2563.h
deleted file mode 100644
index b97368bd7fff..000000000000
--- a/drivers/staging/iio/light/tsl2563.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __LINUX_TSL2563_H
-#define __LINUX_TSL2563_H
-
-struct tsl2563_platform_data {
-	int cover_comp_gain;
-};
-
-#endif /* __LINUX_TSL2563_H */
-
diff --git a/include/linux/platform_data/tsl2563.h b/include/linux/platform_data/tsl2563.h
new file mode 100644
index 000000000000..c90d7a09dda7
--- /dev/null
+++ b/include/linux/platform_data/tsl2563.h
@@ -0,0 +1,8 @@
+#ifndef __LINUX_TSL2563_H
+#define __LINUX_TSL2563_H
+
+struct tsl2563_platform_data {
+	int cover_comp_gain;
+};
+
+#endif /* __LINUX_TSL2563_H */
-- 
cgit v1.2.3


From abf917cd91cbb73952758f9741e2fa65002a48ee Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 25 Jul 2012 07:56:04 +0200
Subject: cputime: Generic on-demand virtual cputime accounting

If we want to stop the tick further idle, we need to be
able to account the cputime without using the tick.

Virtual based cputime accounting solves that problem by
hooking into kernel/user boundaries.

However implementing CONFIG_VIRT_CPU_ACCOUNTING require
low level hooks and involves more overhead. But we already
have a generic context tracking subsystem that is required
for RCU needs by archs which plan to shut down the tick
outside idle.

This patch implements a generic virtual based cputime
accounting that relies on these generic kernel/user hooks.

There are some upsides of doing this:

- This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING
if context tracking is already built (already necessary for RCU in full
tickless mode).

- We can rely on the generic context tracking subsystem to dynamically
(de)activate the hooks, so that we can switch anytime between virtual
and tick based accounting. This way we don't have the overhead
of the virtual accounting when the tick is running periodically.

And one downside:

- There is probably more overhead than a native virtual based cputime
accounting. But this relies on hooks that are already set anyway.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/include/asm/cputime.h              |  6 +--
 arch/ia64/include/asm/thread_info.h          |  4 +-
 arch/ia64/include/asm/xen/minstate.h         |  2 +-
 arch/ia64/kernel/asm-offsets.c               |  2 +-
 arch/ia64/kernel/entry.S                     | 16 ++++----
 arch/ia64/kernel/fsys.S                      |  4 +-
 arch/ia64/kernel/head.S                      |  4 +-
 arch/ia64/kernel/ivt.S                       |  8 ++--
 arch/ia64/kernel/minstate.h                  |  2 +-
 arch/ia64/kernel/time.c                      |  4 +-
 arch/powerpc/configs/chroma_defconfig        |  2 +-
 arch/powerpc/configs/corenet64_smp_defconfig |  2 +-
 arch/powerpc/configs/pasemi_defconfig        |  2 +-
 arch/powerpc/include/asm/cputime.h           |  6 +--
 arch/powerpc/include/asm/lppaca.h            |  2 +-
 arch/powerpc/include/asm/ppc_asm.h           |  4 +-
 arch/powerpc/kernel/entry_64.S               |  4 +-
 arch/powerpc/kernel/time.c                   |  4 +-
 arch/powerpc/platforms/pseries/dtl.c         |  6 +--
 arch/powerpc/platforms/pseries/setup.c       |  6 +--
 include/asm-generic/cputime.h                |  8 +++-
 include/asm-generic/cputime_nsecs.h          |  8 ++++
 include/linux/kernel_stat.h                  |  2 +-
 include/linux/vtime.h                        | 16 ++++++++
 init/Kconfig                                 | 23 ++++++++++-
 kernel/context_tracking.c                    |  6 ++-
 kernel/sched/cputime.c                       | 61 ++++++++++++++++++++++++++--
 27 files changed, 160 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 040b31638001..e2d3f5baf265 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -11,19 +11,19 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec.
  * Otherwise we measure cpu time in jiffies using the generic definitions.
  */
 
 #ifndef __IA64_CPUTIME_H
 #define __IA64_CPUTIME_H
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 # include <asm-generic/cputime.h>
 #else
 # include <asm/processor.h>
 # include <asm-generic/cputime_nsecs.h>
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index ff2ae4136584..020d655ed082 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -31,7 +31,7 @@ struct thread_info {
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	struct restart_block restart_block;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	__u64 ac_stamp;
 	__u64 ac_leave;
 	__u64 ac_stime;
@@ -69,7 +69,7 @@ struct thread_info {
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #define setup_thread_stack(p, org)			\
 	*task_thread_info(p) = *task_thread_info(org);	\
 	task_thread_info(p)->ac_stime = 0;		\
diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h
index c57fa910f2c9..00cf03e0cb82 100644
--- a/arch/ia64/include/asm/xen/minstate.h
+++ b/arch/ia64/include/asm/xen/minstate.h
@@ -1,5 +1,5 @@
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define XEN_ACCOUNT_GET_STAMP		\
 	MOV_FROM_ITC(pUStk, p6, r20, r2);
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index a48bd9a9927b..46c9e3007315 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -41,7 +41,7 @@ void foo(void)
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
 	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
 	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6bfd8429ee0f..7a53530f22c2 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall)
 #endif
 .global __paravirt_work_processed_syscall;
 __paravirt_work_processed_syscall:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	adds r2=PT(LOADRS)+16,r12
 	MOV_FROM_ITC(pUStk, p9, r22, r19)	// fetch time at leave
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -762,7 +762,7 @@ __paravirt_work_processed_syscall:
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
@@ -793,7 +793,7 @@ __paravirt_work_processed_syscall:
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk) st1 [r15]=r17				// M2|3
 #else
 (pUStk) st1 [r14]=r17				// M2|3
@@ -813,7 +813,7 @@ __paravirt_work_processed_syscall:
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	COVER				// B    add current frame into dirty partition & set cr.ifs
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	st8 [r14]=r22			// M	save time at leave
 	mov f10=f0			// F    clear f10
@@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	.pred.rel.mutex pUStk,pKStk
 	MOV_FROM_PSR(pKStk, r22, r29)	// M2 read PSR now that interrupts are disabled
 	MOV_FROM_ITC(pUStk, p9, r22, r29)	// M  fetch time at leave
@@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
 #else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
@@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
 #endif
 	;;
@@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel)
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
 	//  mib  :  mov add br        ->  mib  : ld8 add br
 	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index e662f178b990..c4cd45d97749 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	MOV_FROM_ITC(p0, p6, r30, r23)		// M    get cycle for accounting
 #else
 	nop.m 0
@@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	// mov.m r30=ar.itc is called in advance
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 4738ff7bd66a..9be4e497f3d3 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock)
 sched_clock = ia64_native_sched_clock
 #endif
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 GLOBAL_ENTRY(cycle_to_cputime)
 	alloc r16=ar.pfs,1,0,0,0
 	addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
@@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
 	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
 	br.ret.sptk.many rp
 END(cycle_to_cputime)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_IA64_BRL_EMU
 
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index fa25689fc453..689ffcaa284e 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -784,7 +784,7 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	;;
 	mov b6=r30				// I0   setup syscall handler branch reg early
 #else
@@ -801,7 +801,7 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	MOV_FROM_ITC(p0, p14, r30, r18)		// M    get cycle for accounting
 #else
 	mov b6=r30				// I0   setup syscall handler branch reg early
@@ -817,7 +817,7 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	// mov.m r30=ar.itc is called in advance, and r13 is current
 	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
 	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
@@ -1043,7 +1043,7 @@ END(ia64_syscall_setup)
 	DBG_FAULT(16)
 	FAULT(16)
 
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
 	/*
 	 * There is no particular reason for this code to be here, other than
 	 * that there happens to be space here that would go unused otherwise.
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index d56753a11636..cc82a7d744c9 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -4,7 +4,7 @@
 #include "entry.h"
 #include "paravirt_inst.h"
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /* read ar.itc in advance, and use it before leaving bank 0 */
 #define ACCOUNT_GET_STAMP				\
 (pUStk) mov.m r20=ar.itc;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 88a794536bc0..a3a3f5a1cb3a 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 
 #include <linux/kernel_stat.h>
 
@@ -142,7 +142,7 @@ void vtime_account_idle(struct task_struct *tsk)
 	account_idle_time(vtime_delta(tsk));
 }
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig
index 29bb11ec6c64..4f35fc462385 100644
--- a/arch/powerpc/configs/chroma_defconfig
+++ b/arch/powerpc/configs/chroma_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=256
 CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
index 88fa5c46f66f..f7df8362911f 100644
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_PPC_BOOK3E_64=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 840a2c2d0430..bcedeea0df89 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -1,6 +1,6 @@
 CONFIG_PPC64=y
 CONFIG_ALTIVEC=y
-# CONFIG_VIRT_CPU_ACCOUNTING is not set
+# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_EXPERIMENTAL=y
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 483733bd06d4..607559ab271f 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
- * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
  * the same units as the timebase.  Otherwise we measure cpu time
  * in jiffies using the generic definitions.
  */
@@ -16,7 +16,7 @@
 #ifndef __POWERPC_CPUTIME_H
 #define __POWERPC_CPUTIME_H
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #include <asm-generic/cputime.h>
 #ifdef __KERNEL__
 static inline void setup_cputime_one_jiffy(void) { }
@@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
 
 #endif /* __KERNEL__ */
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 531fe0c3108f..b1e7f2af1016 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -145,7 +145,7 @@ struct dtl_entry {
 extern struct kmem_cache *dtl_cache;
 
 /*
- * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
  * reading from the dispatch trace log.  If other code wants to consume
  * DTL entries, it can set this pointer to a function that will get
  * called once for each DTL entry that gets processed.
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index ea2a86e8ff95..2d0e1f5d8339 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -24,7 +24,7 @@
  * user_time and system_time fields in the paca.
  */
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)
 #define ACCOUNT_STOLEN_TIME
@@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 
 #endif /* CONFIG_PPC_SPLPAR */
 
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 /*
  * Macros for storing registers into and loading registers from
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index b310a0573625..a0ca42fb1541 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -94,7 +94,7 @@ system_call_common:
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 	ld	r11,exception_marker@toc(r2)
 	std	r11,-16(r9)		/* "regshere" marker */
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
 BEGIN_FW_FTR_SECTION
 	beq	33f
 	/* if from user, see if there are any DTL entries to process */
@@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 33:
 END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
 
 	/*
 	 * A syscall should always be called with interrupts enabled
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6f6b1cccc916..22c9b67f9983 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
 unsigned long ppc_tb_freq;
 EXPORT_SYMBOL_GPL(ppc_tb_freq);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Factors for converting from cputime_t (timebase ticks) to
  * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
@@ -377,7 +377,7 @@ void vtime_account_user(struct task_struct *tsk)
 	account_user_time(tsk, utime, utimescaled);
 }
 
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 #define calc_cputime_factors()
 #endif
 
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index a7648543c59e..0cc0ac07a55d 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7;
  */
 static int dtl_buf_entries = N_DISPATCH_LOG;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 struct dtl_ring {
 	u64	write_index;
 	struct dtl_entry *write_ptr;
@@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 	return per_cpu(dtl_rings, dtl->cpu).write_index;
 }
 
-#else /* CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int dtl_start(struct dtl *dtl)
 {
@@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl)
 {
 	return lppaca_of(dtl->cpu).dtl_idx;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int dtl_enable(struct dtl *dtl)
 {
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index ca55882465d6..527e12c9573b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = {
 
 struct kmem_cache *dtl_cache;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Allocate space for the dispatch trace log for all possible cpus
  * and register the buffers with the hypervisor.  This is used for
@@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void)
 
 	return 0;
 }
-#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 static inline int alloc_dispatch_logs(void)
 {
 	return 0;
 }
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 static int alloc_dispatch_log_kmem_cache(void)
 {
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index c6eddf50eaf9..51969436b8b8 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -4,6 +4,12 @@
 #include <linux/time.h>
 #include <linux/jiffies.h>
 
-#include <asm-generic/cputime_jiffies.h>
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+# include <asm-generic/cputime_jiffies.h>
+#endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# include <asm-generic/cputime_nsecs.h>
+#endif
 
 #endif
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
index c73d182f4751..b6485cafb7bd 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -26,6 +26,7 @@ typedef u64 __nocast cputime64_t;
  */
 #define cputime_to_jiffies(__ct)	\
 	((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define cputime_to_scaled(__ct)		(__ct)
 #define jiffies_to_cputime(__jif)	\
 	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
 #define cputime64_to_jiffies64(__ct)	\
@@ -33,6 +34,13 @@ typedef u64 __nocast cputime64_t;
 #define jiffies64_to_cputime64(__jif)	\
 	(__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
 
+
+/*
+ * Convert cputime <-> nanoseconds
+ */
+#define nsecs_to_cputime(__nsecs)	((__force u64)(__nsecs))
+
+
 /*
  * Convert cputime <-> microseconds
  */
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 66b70780e910..ed5f6ed6eb77 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline void account_process_tick(struct task_struct *tsk, int user)
 {
 	vtime_account_user(tsk);
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index ae30ab58431a..21ef703d1b25 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -14,9 +14,25 @@ extern void vtime_account(struct task_struct *tsk);
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
+static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_account(struct task_struct *tsk) { }
 #endif
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
+static inline void vtime_user_enter(struct task_struct *tsk)
+{
+	vtime_account_system(tsk);
+}
+static inline void vtime_user_exit(struct task_struct *tsk)
+{
+	vtime_account_user(tsk);
+}
+#else
+static inline void vtime_user_enter(struct task_struct *tsk) { }
+static inline void vtime_user_exit(struct task_struct *tsk) { }
+#endif
+
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 extern void irqtime_account_irq(struct task_struct *tsk);
 #else
diff --git a/init/Kconfig b/init/Kconfig
index be8b7f55312d..a05f843e7e52 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -326,6 +326,9 @@ source "kernel/time/Kconfig"
 
 menu "CPU/Task time and stats accounting"
 
+config VIRT_CPU_ACCOUNTING
+	bool
+
 choice
 	prompt "Cputime accounting"
 	default TICK_CPU_ACCOUNTING if !PPC64
@@ -342,9 +345,10 @@ config TICK_CPU_ACCOUNTING
 
 	  If unsure, say Y.
 
-config VIRT_CPU_ACCOUNTING
+config VIRT_CPU_ACCOUNTING_NATIVE
 	bool "Deterministic task and CPU time accounting"
 	depends on HAVE_VIRT_CPU_ACCOUNTING
+	select VIRT_CPU_ACCOUNTING
 	help
 	  Select this option to enable more accurate task and CPU time
 	  accounting.  This is done by reading a CPU counter on each
@@ -354,6 +358,23 @@ config VIRT_CPU_ACCOUNTING
 	  this also enables accounting of stolen time on logically-partitioned
 	  systems.
 
+config VIRT_CPU_ACCOUNTING_GEN
+	bool "Full dynticks CPU time accounting"
+	depends on HAVE_CONTEXT_TRACKING && 64BIT
+	select VIRT_CPU_ACCOUNTING
+	select CONTEXT_TRACKING
+	help
+	  Select this option to enable task and CPU time accounting on full
+	  dynticks systems. This accounting is implemented by watching every
+	  kernel-user boundaries using the context tracking subsystem.
+	  The accounting is thus performed at the expense of some significant
+	  overhead.
+
+	  For now this is only useful if you are working on the full
+	  dynticks subsystem development.
+
+	  If unsure, say N.
+
 config IRQ_TIME_ACCOUNTING
 	bool "Fine granularity task level IRQ time accounting"
 	depends on HAVE_IRQ_TIME_ACCOUNTING
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 54f471e536dc..9002e92e6372 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -30,8 +30,9 @@ void user_enter(void)
 	local_irq_save(flags);
 	if (__this_cpu_read(context_tracking.active) &&
 	    __this_cpu_read(context_tracking.state) != IN_USER) {
-		__this_cpu_write(context_tracking.state, IN_USER);
+		vtime_user_enter(current);
 		rcu_user_enter();
+		__this_cpu_write(context_tracking.state, IN_USER);
 	}
 	local_irq_restore(flags);
 }
@@ -53,8 +54,9 @@ void user_exit(void)
 
 	local_irq_save(flags);
 	if (__this_cpu_read(context_tracking.state) == IN_USER) {
-		__this_cpu_write(context_tracking.state, IN_KERNEL);
 		rcu_user_exit();
+		vtime_user_exit(current);
+		__this_cpu_write(context_tracking.state, IN_KERNEL);
 	}
 	local_irq_restore(flags);
 }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5849448b981e..1c964eced92c 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -3,6 +3,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/kernel_stat.h>
 #include <linux/static_key.h>
+#include <linux/context_tracking.h>
 #include "sched.h"
 
 
@@ -479,7 +480,9 @@ void vtime_task_switch(struct task_struct *prev)
 	else
 		vtime_account_system(prev);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	vtime_account_user(prev);
+#endif
 	arch_vtime_task_switch(prev);
 }
 #endif
@@ -495,10 +498,24 @@ void vtime_task_switch(struct task_struct *prev)
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
 void vtime_account(struct task_struct *tsk)
 {
-	if (in_interrupt() || !is_idle_task(tsk))
-		vtime_account_system(tsk);
-	else
-		vtime_account_idle(tsk);
+	if (!in_interrupt()) {
+		/*
+		 * If we interrupted user, context_tracking_in_user()
+		 * is 1 because the context tracking don't hook
+		 * on irq entry/exit. This way we know if
+		 * we need to flush user time on kernel entry.
+		 */
+		if (context_tracking_in_user()) {
+			vtime_account_user(tsk);
+			return;
+		}
+
+		if (is_idle_task(tsk)) {
+			vtime_account_idle(tsk);
+			return;
+		}
+	}
+	vtime_account_system(tsk);
 }
 EXPORT_SYMBOL_GPL(vtime_account);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -583,3 +600,39 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
 #endif
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+static DEFINE_PER_CPU(unsigned long long, cputime_snap);
+
+static cputime_t get_vtime_delta(void)
+{
+	unsigned long long delta;
+
+	delta = sched_clock() - __this_cpu_read(cputime_snap);
+	__this_cpu_add(cputime_snap, delta);
+
+	/* CHECKME: always safe to convert nsecs to cputime? */
+	return nsecs_to_cputime(delta);
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta();
+
+	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+void vtime_account_user(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta();
+
+	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta();
+
+	account_idle_time(delta_cpu);
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
cgit v1.2.3


From 3f4724ea85b7d9055a9976fa8f30b471bdfbca93 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 16 Jul 2012 18:00:34 +0200
Subject: cputime: Allow dynamic switch between tick/virtual based cputime
 accounting

Allow to dynamically switch between tick and virtual based
cputime accounting. This way we can provide a kind of "on-demand"
virtual based cputime accounting. In this mode, the kernel relies
on the context tracking subsystem to dynamically probe on kernel
boundaries.

This is in preparation for being able to stop the timer tick in
more places than just the idle state. Doing so will depend on
CONFIG_VIRT_CPU_ACCOUNTING_GEN which makes it possible to account
the cputime without the tick by hooking on kernel/user boundaries.

Depending whether the tick is stopped or not, we can switch between
tick and vtime based accounting anytime in order to minimize the
overhead associated to user hooks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/vtime.h    |  8 ++++++++
 kernel/sched/cputime.c   | 41 +++++++++++++++++++++++++++++++----------
 kernel/time/tick-sched.c |  5 ++++-
 3 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 21ef703d1b25..5368af9bdf06 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,12 +10,20 @@ extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 extern void vtime_account(struct task_struct *tsk);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern bool vtime_accounting_enabled(void);
 #else
+static inline bool vtime_accounting_enabled(void) { return true; }
+#endif
+
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_account(struct task_struct *tsk) { }
+static inline bool vtime_accounting_enabled(void) { return false; }
 #endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1c964eced92c..e1939d38bf73 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -317,8 +317,6 @@ out:
 	rcu_read_unlock();
 }
 
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 /*
  * Account a tick to a process and cpustat
@@ -383,11 +381,12 @@ static void irqtime_account_idle_ticks(int ticks)
 		irqtime_account_process_tick(current, 0, rq);
 }
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
-static void irqtime_account_idle_ticks(int ticks) {}
-static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+static inline void irqtime_account_idle_ticks(int ticks) {}
+static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 						struct rq *rq) {}
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Account a single tick of cpu time.
  * @p: the process that the cpu time gets accounted to
@@ -398,6 +397,9 @@ void account_process_tick(struct task_struct *p, int user_tick)
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
+	if (vtime_accounting_enabled())
+		return;
+
 	if (sched_clock_irqtime) {
 		irqtime_account_process_tick(p, user_tick, rq);
 		return;
@@ -439,8 +441,7 @@ void account_idle_ticks(unsigned long ticks)
 
 	account_idle_time(jiffies_to_cputime(ticks));
 }
-
-#endif
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 /*
  * Use precise platform statistics if available:
@@ -475,6 +476,9 @@ EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 void vtime_task_switch(struct task_struct *prev)
 {
+	if (!vtime_accounting_enabled())
+		return;
+
 	if (is_idle_task(prev))
 		vtime_account_idle(prev);
 	else
@@ -498,6 +502,9 @@ void vtime_task_switch(struct task_struct *prev)
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
 void vtime_account(struct task_struct *tsk)
 {
+	if (!vtime_accounting_enabled())
+		return;
+
 	if (!in_interrupt()) {
 		/*
 		 * If we interrupted user, context_tracking_in_user()
@@ -520,7 +527,7 @@ void vtime_account(struct task_struct *tsk)
 EXPORT_SYMBOL_GPL(vtime_account);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 
-#else
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
 {
@@ -599,7 +606,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	thread_group_cputime(p, &cputime);
 	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
 }
-#endif
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 static DEFINE_PER_CPU(unsigned long long, cputime_snap);
@@ -617,14 +624,23 @@ static cputime_t get_vtime_delta(void)
 
 void vtime_account_system(struct task_struct *tsk)
 {
-	cputime_t delta_cpu = get_vtime_delta();
+	cputime_t delta_cpu;
+
+	if (!vtime_accounting_enabled())
+		return;
 
+	delta_cpu = get_vtime_delta();
 	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
 }
 
 void vtime_account_user(struct task_struct *tsk)
 {
-	cputime_t delta_cpu = get_vtime_delta();
+	cputime_t delta_cpu;
+
+	if (!vtime_accounting_enabled())
+		return;
+
+	delta_cpu = get_vtime_delta();
 
 	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
 }
@@ -635,4 +651,9 @@ void vtime_account_idle(struct task_struct *tsk)
 
 	account_idle_time(delta_cpu);
 }
+
+bool vtime_accounting_enabled(void)
+{
+	return context_tracking_active();
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d58e552d9fd1..46dfb6d94b1c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -631,8 +631,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 {
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	unsigned long ticks;
+
+	if (vtime_accounting_enabled())
+		return;
 	/*
 	 * We stopped the tick in idle. Update process times would miss the
 	 * time we slept as update_process_times does only a 1 tick
-- 
cgit v1.2.3


From 6fac4829ce0ef9b7f24369086ce5f0e9f38d37bc Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 13 Nov 2012 14:20:55 +0100
Subject: cputime: Use accessors to read task cputime stats

This is in preparation for the full dynticks feature. While
remotely reading the cputime of a task running in a full
dynticks CPU, we'll need to do some extra-computation. This
way we can account the time it spent tickless in userspace
since its last cputime snapshot.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/alpha/kernel/osf_sys.c |  6 ++++--
 arch/x86/kernel/apm_32.c    | 11 ++++++-----
 drivers/isdn/mISDN/stack.c  |  7 ++++++-
 fs/binfmt_elf.c             |  8 ++++++--
 fs/binfmt_elf_fdpic.c       |  7 +++++--
 fs/proc/array.c             |  4 ++--
 include/linux/sched.h       | 23 +++++++++++++++++++++++
 include/linux/tsacct_kern.h |  3 +++
 kernel/acct.c               |  6 ++++--
 kernel/cpu.c                |  4 +++-
 kernel/delayacct.c          |  7 +++++--
 kernel/exit.c               | 10 ++++++----
 kernel/posix-cpu-timers.c   | 28 ++++++++++++++++++++++------
 kernel/sched/cputime.c      | 13 +++++++------
 kernel/signal.c             | 12 ++++++++----
 kernel/tsacct.c             | 44 ++++++++++++++++++++++++++++++++++----------
 16 files changed, 144 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 14db93e4c8a8..dbc1760f418b 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1139,6 +1139,7 @@ struct rusage32 {
 SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 {
 	struct rusage32 r;
+	cputime_t utime, stime;
 
 	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
 		return -EINVAL;
@@ -1146,8 +1147,9 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 	memset(&r, 0, sizeof(r));
 	switch (who) {
 	case RUSAGE_SELF:
-		jiffies_to_timeval32(current->utime, &r.ru_utime);
-		jiffies_to_timeval32(current->stime, &r.ru_stime);
+		task_cputime(current, &utime, &stime);
+		jiffies_to_timeval32(utime, &r.ru_utime);
+		jiffies_to_timeval32(stime, &r.ru_stime);
 		r.ru_minflt = current->min_flt;
 		r.ru_majflt = current->maj_flt;
 		break;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e43503..8d7012b7f402 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -899,6 +899,7 @@ static void apm_cpu_idle(void)
 	static int use_apm_idle; /* = 0 */
 	static unsigned int last_jiffies; /* = 0 */
 	static unsigned int last_stime; /* = 0 */
+	cputime_t stime;
 
 	int apm_idle_done = 0;
 	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@@ -906,23 +907,23 @@ static void apm_cpu_idle(void)
 
 	WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
 recalc:
+	task_cputime(current, NULL, &stime);
 	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
 		use_apm_idle = 0;
-		last_jiffies = jiffies;
-		last_stime = current->stime;
 	} else if (jiffies_since_last_check > idle_period) {
 		unsigned int idle_percentage;
 
-		idle_percentage = current->stime - last_stime;
+		idle_percentage = stime - last_stime;
 		idle_percentage *= 100;
 		idle_percentage /= jiffies_since_last_check;
 		use_apm_idle = (idle_percentage > idle_threshold);
 		if (apm_info.forbid_idle)
 			use_apm_idle = 0;
-		last_jiffies = jiffies;
-		last_stime = current->stime;
 	}
 
+	last_jiffies = jiffies;
+	last_stime = stime;
+
 	bucket = IDLE_LEAKY_MAX;
 
 	while (!need_resched()) {
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 5f21f629b7ae..deda591f70b9 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
+#include <linux/sched.h>
 #include "core.h"
 
 static u_int	*debug;
@@ -202,6 +203,9 @@ static int
 mISDNStackd(void *data)
 {
 	struct mISDNstack *st = data;
+#ifdef MISDN_MSG_STATS
+	cputime_t utime, stime;
+#endif
 	int err = 0;
 
 	sigfillset(&current->blocked);
@@ -303,9 +307,10 @@ mISDNStackd(void *data)
 	       "msg %d sleep %d stopped\n",
 	       dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
 	       st->stopped_cnt);
+	task_cputime(st->thread, &utime, &stime);
 	printk(KERN_DEBUG
 	       "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
-	       dev_name(&st->dev->dev), st->thread->utime, st->thread->stime);
+	       dev_name(&st->dev->dev), utime, stime);
 	printk(KERN_DEBUG
 	       "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
 	       dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..49d0b43458b7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
 #include <linux/elf.h>
 #include <linux/utsname.h>
 #include <linux/coredump.h>
+#include <linux/sched.h>
 #include <asm/uaccess.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
-		cputime_to_timeval(p->utime, &prstatus->pr_utime);
-		cputime_to_timeval(p->stime, &prstatus->pr_stime);
+		cputime_t utime, stime;
+
+		task_cputime(p, &utime, &stime);
+		cputime_to_timeval(utime, &prstatus->pr_utime);
+		cputime_to_timeval(stime, &prstatus->pr_stime);
 	}
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..cb240dd3b402 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
 		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
 	} else {
-		cputime_to_timeval(p->utime, &prstatus->pr_utime);
-		cputime_to_timeval(p->stime, &prstatus->pr_stime);
+		cputime_t utime, stime;
+
+		task_cputime(p, &utime, &stime);
+		cputime_to_timeval(utime, &prstatus->pr_utime);
+		cputime_to_timeval(stime, &prstatus->pr_stime);
 	}
 	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
 	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			do {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
-				gtime += t->gtime;
+				gtime += task_gtime(t);
 				t = next_thread(t);
 			} while (t != task);
 
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
 		task_cputime_adjusted(task, &utime, &stime);
-		gtime = task->gtime;
+		gtime = task_gtime(task);
 	}
 
 	/* scale priority and nice values from timeslices to -20..20 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6fc8f45de4e9..a9c608b6154e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1792,6 +1792,29 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+
+static inline void task_cputime(struct task_struct *t,
+				cputime_t *utime, cputime_t *stime)
+{
+	if (utime)
+		*utime = t->utime;
+	if (stime)
+		*stime = t->stime;
+}
+
+static inline void task_cputime_scaled(struct task_struct *t,
+				       cputime_t *utimescaled,
+				       cputime_t *stimescaled)
+{
+	if (utimescaled)
+		*utimescaled = t->utimescaled;
+	if (stimescaled)
+		*stimescaled = t->stimescaled;
+}
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h
index 44893e5ec8f7..3251965bf4cc 100644
--- a/include/linux/tsacct_kern.h
+++ b/include/linux/tsacct_kern.h
@@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns,
 #ifdef CONFIG_TASK_XACCT
 extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
 extern void acct_update_integrals(struct task_struct *tsk);
+extern void acct_account_cputime(struct task_struct *tsk);
 extern void acct_clear_integrals(struct task_struct *tsk);
 #else
 static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {}
 static inline void acct_update_integrals(struct task_struct *tsk)
 {}
+static inline void acct_account_cputime(struct task_struct *tsk)
+{}
 static inline void acct_clear_integrals(struct task_struct *tsk)
 {}
 #endif /* CONFIG_TASK_XACCT */
diff --git a/kernel/acct.c b/kernel/acct.c
index 051e071a06e7..e8b1627ab9c7 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -566,6 +566,7 @@ out:
 void acct_collect(long exitcode, int group_dead)
 {
 	struct pacct_struct *pacct = &current->signal->pacct;
+	cputime_t utime, stime;
 	unsigned long vsize = 0;
 
 	if (group_dead && current->mm) {
@@ -593,8 +594,9 @@ void acct_collect(long exitcode, int group_dead)
 		pacct->ac_flag |= ACORE;
 	if (current->flags & PF_SIGNALED)
 		pacct->ac_flag |= AXSIG;
-	pacct->ac_utime += current->utime;
-	pacct->ac_stime += current->stime;
+	task_cputime(current, &utime, &stime);
+	pacct->ac_utime += utime;
+	pacct->ac_stime += stime;
 	pacct->ac_minflt += current->min_flt;
 	pacct->ac_majflt += current->maj_flt;
 	spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3046a503242c..e5d5e8e1e030 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -224,11 +224,13 @@ void clear_tasks_mm_cpumask(int cpu)
 static inline void check_for_tasks(int cpu)
 {
 	struct task_struct *p;
+	cputime_t utime, stime;
 
 	write_lock_irq(&tasklist_lock);
 	for_each_process(p) {
+		task_cputime(p, &utime, &stime);
 		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
-		    (p->utime || p->stime))
+		    (utime || stime))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
 				"(state = %ld, flags = %x)\n",
 				p->comm, task_pid_nr(p), cpu,
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 418b3f7053aa..d473988c1d0b 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -106,6 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	unsigned long long t2, t3;
 	unsigned long flags;
 	struct timespec ts;
+	cputime_t utime, stime, stimescaled, utimescaled;
 
 	/* Though tsk->delays accessed later, early exit avoids
 	 * unnecessary returning of other data
@@ -114,12 +115,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 		goto done;
 
 	tmp = (s64)d->cpu_run_real_total;
-	cputime_to_timespec(tsk->utime + tsk->stime, &ts);
+	task_cputime(tsk, &utime, &stime);
+	cputime_to_timespec(utime + stime, &ts);
 	tmp += timespec_to_ns(&ts);
 	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
 
 	tmp = (s64)d->cpu_scaled_run_real_total;
-	cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts);
+	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+	cputime_to_timespec(utimescaled + stimescaled, &ts);
 	tmp += timespec_to_ns(&ts);
 	d->cpu_scaled_run_real_total =
 		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
diff --git a/kernel/exit.c b/kernel/exit.c
index b4df21937216..7dd20408707c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,6 +85,7 @@ static void __exit_signal(struct task_struct *tsk)
 	bool group_dead = thread_group_leader(tsk);
 	struct sighand_struct *sighand;
 	struct tty_struct *uninitialized_var(tty);
+	cputime_t utime, stime;
 
 	sighand = rcu_dereference_check(tsk->sighand,
 					lockdep_tasklist_lock_is_held());
@@ -123,9 +124,10 @@ static void __exit_signal(struct task_struct *tsk)
 		 * We won't ever get here for the group leader, since it
 		 * will have been the last reference on the signal_struct.
 		 */
-		sig->utime += tsk->utime;
-		sig->stime += tsk->stime;
-		sig->gtime += tsk->gtime;
+		task_cputime(tsk, &utime, &stime);
+		sig->utime += utime;
+		sig->stime += stime;
+		sig->gtime += task_gtime(tsk);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
 		sig->nvcsw += tsk->nvcsw;
@@ -1092,7 +1094,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		sig = p->signal;
 		psig->cutime += tgutime + sig->cutime;
 		psig->cstime += tgstime + sig->cstime;
-		psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
+		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
 		psig->cmin_flt +=
 			p->min_flt + sig->min_flt + sig->cmin_flt;
 		psig->cmaj_flt +=
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index a278cad1d5d6..165d47698477 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -155,11 +155,19 @@ static void bump_cpu_timer(struct k_itimer *timer,
 
 static inline cputime_t prof_ticks(struct task_struct *p)
 {
-	return p->utime + p->stime;
+	cputime_t utime, stime;
+
+	task_cputime(p, &utime, &stime);
+
+	return utime + stime;
 }
 static inline cputime_t virt_ticks(struct task_struct *p)
 {
-	return p->utime;
+	cputime_t utime;
+
+	task_cputime(p, &utime, NULL);
+
+	return utime;
 }
 
 static int
@@ -471,18 +479,23 @@ static void cleanup_timers(struct list_head *head,
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
+	cputime_t utime, stime;
+
 	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
 						sizeof(unsigned long long));
+	task_cputime(tsk, &utime, &stime);
 	cleanup_timers(tsk->cpu_timers,
-		       tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
+		       utime, stime, tsk->se.sum_exec_runtime);
 
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
 	struct signal_struct *const sig = tsk->signal;
+	cputime_t utime, stime;
 
+	task_cputime(tsk, &utime, &stime);
 	cleanup_timers(tsk->signal->cpu_timers,
-		       tsk->utime + sig->utime, tsk->stime + sig->stime,
+		       utime + sig->utime, stime + sig->stime,
 		       tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
 }
 
@@ -1226,11 +1239,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
 static inline int fastpath_timer_check(struct task_struct *tsk)
 {
 	struct signal_struct *sig;
+	cputime_t utime, stime;
+
+	task_cputime(tsk, &utime, &stime);
 
 	if (!task_cputime_zero(&tsk->cputime_expires)) {
 		struct task_cputime task_sample = {
-			.utime = tsk->utime,
-			.stime = tsk->stime,
+			.utime = utime,
+			.stime = stime,
 			.sum_exec_runtime = tsk->se.sum_exec_runtime
 		};
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e1939d38bf73..c533deaf06d5 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -164,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 	task_group_account_field(p, index, (__force u64) cputime);
 
 	/* Account for user time used */
-	acct_update_integrals(p);
+	acct_account_cputime(p);
 }
 
 /*
@@ -214,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 	task_group_account_field(p, index, (__force u64) cputime);
 
 	/* Account for system time used */
-	acct_update_integrals(p);
+	acct_account_cputime(p);
 }
 
 /*
@@ -296,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 {
 	struct signal_struct *sig = tsk->signal;
+	cputime_t utime, stime;
 	struct task_struct *t;
 
 	times->utime = sig->utime;
@@ -309,8 +310,9 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 
 	t = tsk;
 	do {
-		times->utime += t->utime;
-		times->stime += t->stime;
+		task_cputime(tsk, &utime, &stime);
+		times->utime += utime;
+		times->stime += stime;
 		times->sum_exec_runtime += task_sched_runtime(t);
 	} while_each_thread(tsk, t);
 out:
@@ -588,11 +590,10 @@ static void cputime_adjust(struct task_cputime *curr,
 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	struct task_cputime cputime = {
-		.utime = p->utime,
-		.stime = p->stime,
 		.sum_exec_runtime = p->se.sum_exec_runtime,
 	};
 
+	task_cputime(p, &cputime.utime, &cputime.stime);
 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 372771e948c2..776a45a3661b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1638,6 +1638,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 	unsigned long flags;
 	struct sighand_struct *psig;
 	bool autoreap = false;
+	cputime_t utime, stime;
 
 	BUG_ON(sig == -1);
 
@@ -1675,8 +1676,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 				       task_uid(tsk));
 	rcu_read_unlock();
 
-	info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime);
-	info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime);
+	task_cputime(tsk, &utime, &stime);
+	info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
+	info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
 
 	info.si_status = tsk->exit_code & 0x7f;
 	if (tsk->exit_code & 0x80)
@@ -1740,6 +1742,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	unsigned long flags;
 	struct task_struct *parent;
 	struct sighand_struct *sighand;
+	cputime_t utime, stime;
 
 	if (for_ptracer) {
 		parent = tsk->parent;
@@ -1758,8 +1761,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
 	rcu_read_unlock();
 
-	info.si_utime = cputime_to_clock_t(tsk->utime);
-	info.si_stime = cputime_to_clock_t(tsk->stime);
+	task_cputime(tsk, &utime, &stime);
+	info.si_utime = cputime_to_clock_t(utime);
+	info.si_stime = cputime_to_clock_t(stime);
 
  	info.si_code = why;
  	switch (why) {
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 625df0b44690..a1dd9a1b1327 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -32,6 +32,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 {
 	const struct cred *tcred;
 	struct timespec uptime, ts;
+	cputime_t utime, stime, utimescaled, stimescaled;
 	u64 ac_etime;
 
 	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
@@ -65,10 +66,15 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 	stats->ac_ppid	 = pid_alive(tsk) ?
 		task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
 	rcu_read_unlock();
-	stats->ac_utime = cputime_to_usecs(tsk->utime);
-	stats->ac_stime = cputime_to_usecs(tsk->stime);
-	stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
-	stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
+
+	task_cputime(tsk, &utime, &stime);
+	stats->ac_utime = cputime_to_usecs(utime);
+	stats->ac_stime = cputime_to_usecs(stime);
+
+	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+	stats->ac_utimescaled = cputime_to_usecs(utimescaled);
+	stats->ac_stimescaled = cputime_to_usecs(stimescaled);
+
 	stats->ac_minflt = tsk->min_flt;
 	stats->ac_majflt = tsk->maj_flt;
 
@@ -115,11 +121,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 #undef KB
 #undef MB
 
-/**
- * acct_update_integrals - update mm integral fields in task_struct
- * @tsk: task_struct for accounting
- */
-void acct_update_integrals(struct task_struct *tsk)
+static void __acct_update_integrals(struct task_struct *tsk,
+				    cputime_t utime, cputime_t stime)
 {
 	if (likely(tsk->mm)) {
 		cputime_t time, dtime;
@@ -128,7 +131,7 @@ void acct_update_integrals(struct task_struct *tsk)
 		u64 delta;
 
 		local_irq_save(flags);
-		time = tsk->stime + tsk->utime;
+		time = stime + utime;
 		dtime = time - tsk->acct_timexpd;
 		jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
 		delta = value.tv_sec;
@@ -144,6 +147,27 @@ void acct_update_integrals(struct task_struct *tsk)
 	}
 }
 
+/**
+ * acct_update_integrals - update mm integral fields in task_struct
+ * @tsk: task_struct for accounting
+ */
+void acct_update_integrals(struct task_struct *tsk)
+{
+	cputime_t utime, stime;
+
+	task_cputime(tsk, &utime, &stime);
+	__acct_update_integrals(tsk, utime, stime);
+}
+
+/**
+ * acct_account_cputime - update mm integral after cputime update
+ * @tsk: task_struct for accounting
+ */
+void acct_account_cputime(struct task_struct *tsk)
+{
+	__acct_update_integrals(tsk, tsk->utime, tsk->stime);
+}
+
 /**
  * acct_clear_integrals - clear the mm integral fields in task_struct
  * @tsk: task_struct whose accounting fields are cleared
-- 
cgit v1.2.3


From c11f11fcbdb5be790c565aed46411486a7586afc Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 21 Jan 2013 00:50:22 +0100
Subject: kvm: Prepare to add generic guest entry/exit callbacks

Do some ground preparatory work before adding guest_enter()
and guest_exit() context tracking callbacks. Those will
be later used to read the guest cputime safely when we
run in full dynticks mode.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/kernel/time.c    |  1 +
 arch/powerpc/kernel/time.c |  1 +
 include/linux/kvm_host.h   | 39 ++++++++++++++++++++++++++++++---------
 include/linux/vtime.h      |  2 --
 kernel/sched/cputime.c     | 10 ----------
 5 files changed, 32 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index a3a3f5a1cb3a..fbaac1afb844 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -136,6 +136,7 @@ void vtime_account_system(struct task_struct *tsk)
 
 	account_system_time(tsk, 0, delta, delta);
 }
+EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 22c9b67f9983..2e04b37f67f9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk)
 	if (stolen)
 		account_steal_time(stolen);
 }
+EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c497ab0d03d..4fe2396401da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -22,6 +22,7 @@
 #include <linux/rcupdate.h>
 #include <linux/ratelimit.h>
 #include <linux/err.h>
+#include <linux/irqflags.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -740,15 +741,36 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 }
 #endif /* CONFIG_IOMMU_API */
 
-static inline void kvm_guest_enter(void)
+static inline void guest_enter(void)
 {
-	BUG_ON(preemptible());
 	/*
 	 * This is running in ioctl context so we can avoid
 	 * the call to vtime_account() with its unnecessary idle check.
 	 */
-	vtime_account_system_irqsafe(current);
+	vtime_account_system(current);
 	current->flags |= PF_VCPU;
+}
+
+static inline void guest_exit(void)
+{
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system(current);
+	current->flags &= ~PF_VCPU;
+}
+
+static inline void kvm_guest_enter(void)
+{
+	unsigned long flags;
+
+	BUG_ON(preemptible());
+
+	local_irq_save(flags);
+	guest_enter();
+	local_irq_restore(flags);
+
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
 	 * is very similar to exiting to userspase from rcu point of view. In
@@ -761,12 +783,11 @@ static inline void kvm_guest_enter(void)
 
 static inline void kvm_guest_exit(void)
 {
-	/*
-	 * This is running in ioctl context so we can avoid
-	 * the call to vtime_account() with its unnecessary idle check.
-	 */
-	vtime_account_system_irqsafe(current);
-	current->flags &= ~PF_VCPU;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	guest_exit();
+	local_irq_restore(flags);
 }
 
 /*
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 5368af9bdf06..bb50c3ca0d79 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -6,7 +6,6 @@ struct task_struct;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
-extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 extern void vtime_account(struct task_struct *tsk);
@@ -20,7 +19,6 @@ static inline bool vtime_accounting_enabled(void) { return true; }
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
-static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
 static inline void vtime_account(struct task_struct *tsk) { }
 static inline bool vtime_accounting_enabled(void) { return false; }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index c533deaf06d5..a44ecdf809a1 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -465,16 +465,6 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 	*st = cputime.stime;
 }
 
-void vtime_account_system_irqsafe(struct task_struct *tsk)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	vtime_account_system(tsk);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe);
-
 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH
 void vtime_task_switch(struct task_struct *prev)
 {
-- 
cgit v1.2.3


From 6a61671bb2f3a1bd12cd17b8fca811a624782632 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 16 Dec 2012 20:00:34 +0100
Subject: cputime: Safely read cputime of full dynticks CPUs

While remotely reading the cputime of a task running in a
full dynticks CPU, the values stored in utime/stime fields
of struct task_struct may be stale. Its values may be those
of the last kernel <-> user transition time snapshot and
we need to add the tickless time spent since this snapshot.

To fix this, flush the cputime of the dynticks CPUs on
kernel <-> user transition and record the time / context
where we did this. Then on top of this snapshot and the current
time, perform the fixup on the reader side from task_times()
accessors.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[fixed kvm module related build errors]
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
---
 arch/s390/kernel/vtime.c  |   6 +-
 include/linux/hardirq.h   |   4 +-
 include/linux/init_task.h |  11 +++
 include/linux/kvm_host.h  |  20 ++++-
 include/linux/sched.h     |  27 +++++--
 include/linux/vtime.h     |  47 ++++++-----
 kernel/context_tracking.c |  21 ++++-
 kernel/fork.c             |   6 ++
 kernel/sched/core.c       |   1 +
 kernel/sched/cputime.c    | 193 +++++++++++++++++++++++++++++++++++++++++++---
 kernel/softirq.c          |   6 +-
 11 files changed, 290 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e84b8b68444a..ce9cc5aa2033 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk)
  * Update process times based on virtual cpu times stored by entry.S
  * to the lowcore fields user_timer, system_timer & steal_clock.
  */
-void vtime_account(struct task_struct *tsk)
+void vtime_account_irq_enter(struct task_struct *tsk)
 {
 	struct thread_info *ti = task_thread_info(tsk);
 	u64 timer, system;
@@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk)
 
 	virt_timer_forward(system);
 }
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 
 void vtime_account_system(struct task_struct *tsk)
-__attribute__((alias("vtime_account")));
+__attribute__((alias("vtime_account_irq_enter")));
 EXPORT_SYMBOL_GPL(vtime_account_system);
 
 void __kprobes vtime_stop_cpu(void)
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 624ef3f45c8e..7105d5cbb762 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
  */
 #define __irq_enter()					\
 	do {						\
-		vtime_account_irq_enter(current);	\
+		account_irq_enter_time(current);	\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
 #define __irq_exit()					\
 	do {						\
 		trace_hardirq_exit();			\
-		vtime_account_irq_exit(current);	\
+		account_irq_exit_time(current);		\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6d087c5f57f7..cc898b871cef 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -10,6 +10,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/securebits.h>
+#include <linux/seqlock.h>
 #include <net/net_namespace.h>
 
 #ifdef CONFIG_SMP
@@ -141,6 +142,15 @@ extern struct task_group root_task_group;
 # define INIT_PERF_EVENTS(tsk)
 #endif
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+# define INIT_VTIME(tsk)						\
+	.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock),	\
+	.vtime_snap = 0,				\
+	.vtime_snap_whence = VTIME_SYS,
+#else
+# define INIT_VTIME(tsk)
+#endif
+
 #define INIT_TASK_COMM "swapper"
 
 /*
@@ -210,6 +220,7 @@ extern struct task_group root_task_group;
 	INIT_TRACE_RECURSION						\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
 	INIT_CPUSET_SEQ							\
+	INIT_VTIME(tsk)							\
 }
 
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4fe2396401da..b7996a768eb2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -741,7 +741,7 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 }
 #endif /* CONFIG_IOMMU_API */
 
-static inline void guest_enter(void)
+static inline void __guest_enter(void)
 {
 	/*
 	 * This is running in ioctl context so we can avoid
@@ -751,7 +751,7 @@ static inline void guest_enter(void)
 	current->flags |= PF_VCPU;
 }
 
-static inline void guest_exit(void)
+static inline void __guest_exit(void)
 {
 	/*
 	 * This is running in ioctl context so we can avoid
@@ -761,6 +761,22 @@ static inline void guest_exit(void)
 	current->flags &= ~PF_VCPU;
 }
 
+#ifdef CONFIG_CONTEXT_TRACKING
+extern void guest_enter(void);
+extern void guest_exit(void);
+
+#else /* !CONFIG_CONTEXT_TRACKING */
+static inline void guest_enter(void)
+{
+	__guest_enter();
+}
+
+static inline void guest_exit(void)
+{
+	__guest_exit();
+}
+#endif /* !CONFIG_CONTEXT_TRACKING */
+
 static inline void kvm_guest_enter(void)
 {
 	unsigned long flags;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a9c608b6154e..a9fa5145e1a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1367,6 +1367,15 @@ struct task_struct {
 	cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	struct cputime prev_cputime;
+#endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+	seqlock_t vtime_seqlock;
+	unsigned long long vtime_snap;
+	enum {
+		VTIME_SLEEPING = 0,
+		VTIME_USER,
+		VTIME_SYS,
+	} vtime_snap_whence;
 #endif
 	unsigned long nvcsw, nivcsw; /* context switch counts */
 	struct timespec start_time; 		/* monotonic time */
@@ -1792,11 +1801,13 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
-static inline cputime_t task_gtime(struct task_struct *t)
-{
-	return t->gtime;
-}
-
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+			 cputime_t *utime, cputime_t *stime);
+extern void task_cputime_scaled(struct task_struct *t,
+				cputime_t *utimescaled, cputime_t *stimescaled);
+extern cputime_t task_gtime(struct task_struct *t);
+#else
 static inline void task_cputime(struct task_struct *t,
 				cputime_t *utime, cputime_t *stime)
 {
@@ -1815,6 +1826,12 @@ static inline void task_cputime_scaled(struct task_struct *t,
 	if (stimescaled)
 		*stimescaled = t->stimescaled;
 }
+
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index bb50c3ca0d79..71a5782d8c59 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -8,35 +8,44 @@ extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
-extern void vtime_account(struct task_struct *tsk);
+extern void vtime_account_irq_enter(struct task_struct *tsk);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern bool vtime_accounting_enabled(void);
-#else
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline bool vtime_accounting_enabled(void) { return true; }
 #endif
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
 static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_user(struct task_struct *tsk) { }
-static inline void vtime_account(struct task_struct *tsk) { }
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
 static inline bool vtime_accounting_enabled(void) { return false; }
 #endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
-static inline void vtime_user_enter(struct task_struct *tsk)
-{
-	vtime_account_system(tsk);
-}
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+extern void vtime_account_irq_exit(struct task_struct *tsk);
+extern bool vtime_accounting_enabled(void);
+extern void vtime_user_enter(struct task_struct *tsk);
 static inline void vtime_user_exit(struct task_struct *tsk)
 {
 	vtime_account_user(tsk);
 }
+extern void vtime_guest_enter(struct task_struct *tsk);
+extern void vtime_guest_exit(struct task_struct *tsk);
+extern void vtime_init_idle(struct task_struct *tsk);
 #else
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+	/* On hard|softirq exit we always account to hard|softirq cputime */
+	vtime_account_system(tsk);
+}
 static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
+static inline void vtime_guest_enter(struct task_struct *tsk) { }
+static inline void vtime_guest_exit(struct task_struct *tsk) { }
+static inline void vtime_init_idle(struct task_struct *tsk) { }
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -45,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk);
 static inline void irqtime_account_irq(struct task_struct *tsk) { }
 #endif
 
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
+static inline void account_irq_enter_time(struct task_struct *tsk)
 {
-	/*
-	 * Hardirq can interrupt idle task anytime. So we need vtime_account()
-	 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
-	 * Softirq can also interrupt idle task directly if it calls
-	 * local_bh_enable(). Such case probably don't exist but we never know.
-	 * Ksoftirqd is not concerned because idle time is flushed on context
-	 * switch. Softirqs in the end of hardirqs are also not a problem because
-	 * the idle time is flushed on hardirq time already.
-	 */
-	vtime_account(tsk);
+	vtime_account_irq_enter(tsk);
 	irqtime_account_irq(tsk);
 }
 
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
+static inline void account_irq_exit_time(struct task_struct *tsk)
 {
-	/* On hard|softirq exit we always account to hard|softirq cputime */
-	vtime_account_system(tsk);
+	vtime_account_irq_exit(tsk);
 	irqtime_account_irq(tsk);
 }
 
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index 9002e92e6372..74f68f4dc6c2 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,8 +1,9 @@
 #include <linux/context_tracking.h>
+#include <linux/kvm_host.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/hardirq.h>
-
+#include <linux/export.h>
 
 DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
 #ifdef CONFIG_CONTEXT_TRACKING_FORCE
@@ -61,6 +62,24 @@ void user_exit(void)
 	local_irq_restore(flags);
 }
 
+void guest_enter(void)
+{
+	if (vtime_accounting_enabled())
+		vtime_guest_enter(current);
+	else
+		__guest_enter();
+}
+EXPORT_SYMBOL_GPL(guest_enter);
+
+void guest_exit(void)
+{
+	if (vtime_accounting_enabled())
+		vtime_guest_exit(current);
+	else
+		__guest_exit();
+}
+EXPORT_SYMBOL_GPL(guest_exit);
+
 void context_tracking_task_switch(struct task_struct *prev,
 			     struct task_struct *next)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 65ca6d27f24e..e68a95b4cf26 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+	seqlock_init(&p->vtime_seqlock);
+	p->vtime_snap = 0;
+	p->vtime_snap_whence = VTIME_SLEEPING;
+#endif
+
 #if defined(SPLIT_RSS_COUNTING)
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..261022d7e79d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4666,6 +4666,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	 */
 	idle->sched_class = &idle_sched_class;
 	ftrace_graph_init_idle_task(idle, cpu);
+	vtime_init_idle(idle);
 #if defined(CONFIG_SMP)
 	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index a44ecdf809a1..082e05d915b4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -492,7 +492,7 @@ void vtime_task_switch(struct task_struct *prev)
  * vtime_account().
  */
 #ifndef __ARCH_HAS_VTIME_ACCOUNT
-void vtime_account(struct task_struct *tsk)
+void vtime_account_irq_enter(struct task_struct *tsk)
 {
 	if (!vtime_accounting_enabled())
 		return;
@@ -516,7 +516,7 @@ void vtime_account(struct task_struct *tsk)
 	}
 	vtime_account_system(tsk);
 }
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
@@ -600,28 +600,55 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static DEFINE_PER_CPU(unsigned long long, cputime_snap);
+static unsigned long long vtime_delta(struct task_struct *tsk)
+{
+	unsigned long long clock;
+
+	clock = sched_clock();
+	if (clock < tsk->vtime_snap)
+		return 0;
 
-static cputime_t get_vtime_delta(void)
+	return clock - tsk->vtime_snap;
+}
+
+static cputime_t get_vtime_delta(struct task_struct *tsk)
 {
-	unsigned long long delta;
+	unsigned long long delta = vtime_delta(tsk);
 
-	delta = sched_clock() - __this_cpu_read(cputime_snap);
-	__this_cpu_add(cputime_snap, delta);
+	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
+	tsk->vtime_snap += delta;
 
 	/* CHECKME: always safe to convert nsecs to cputime? */
 	return nsecs_to_cputime(delta);
 }
 
+static void __vtime_account_system(struct task_struct *tsk)
+{
+	cputime_t delta_cpu = get_vtime_delta(tsk);
+
+	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+}
+
 void vtime_account_system(struct task_struct *tsk)
 {
-	cputime_t delta_cpu;
+	if (!vtime_accounting_enabled())
+		return;
+
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
+}
 
+void vtime_account_irq_exit(struct task_struct *tsk)
+{
 	if (!vtime_accounting_enabled())
 		return;
 
-	delta_cpu = get_vtime_delta();
-	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
+	write_seqlock(&tsk->vtime_seqlock);
+	if (context_tracking_in_user())
+		tsk->vtime_snap_whence = VTIME_USER;
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 
 void vtime_account_user(struct task_struct *tsk)
@@ -631,14 +658,44 @@ void vtime_account_user(struct task_struct *tsk)
 	if (!vtime_accounting_enabled())
 		return;
 
-	delta_cpu = get_vtime_delta();
+	delta_cpu = get_vtime_delta(tsk);
 
+	write_seqlock(&tsk->vtime_seqlock);
+	tsk->vtime_snap_whence = VTIME_SYS;
 	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_user_enter(struct task_struct *tsk)
+{
+	if (!vtime_accounting_enabled())
+		return;
+
+	write_seqlock(&tsk->vtime_seqlock);
+	tsk->vtime_snap_whence = VTIME_USER;
+	__vtime_account_system(tsk);
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_guest_enter(struct task_struct *tsk)
+{
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	current->flags |= PF_VCPU;
+	write_sequnlock(&tsk->vtime_seqlock);
+}
+
+void vtime_guest_exit(struct task_struct *tsk)
+{
+	write_seqlock(&tsk->vtime_seqlock);
+	__vtime_account_system(tsk);
+	current->flags &= ~PF_VCPU;
+	write_sequnlock(&tsk->vtime_seqlock);
 }
 
 void vtime_account_idle(struct task_struct *tsk)
 {
-	cputime_t delta_cpu = get_vtime_delta();
+	cputime_t delta_cpu = get_vtime_delta(tsk);
 
 	account_idle_time(delta_cpu);
 }
@@ -647,4 +704,116 @@ bool vtime_accounting_enabled(void)
 {
 	return context_tracking_active();
 }
+
+void arch_vtime_task_switch(struct task_struct *prev)
+{
+	write_seqlock(&prev->vtime_seqlock);
+	prev->vtime_snap_whence = VTIME_SLEEPING;
+	write_sequnlock(&prev->vtime_seqlock);
+
+	write_seqlock(&current->vtime_seqlock);
+	current->vtime_snap_whence = VTIME_SYS;
+	current->vtime_snap = sched_clock();
+	write_sequnlock(&current->vtime_seqlock);
+}
+
+void vtime_init_idle(struct task_struct *t)
+{
+	unsigned long flags;
+
+	write_seqlock_irqsave(&t->vtime_seqlock, flags);
+	t->vtime_snap_whence = VTIME_SYS;
+	t->vtime_snap = sched_clock();
+	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
+}
+
+cputime_t task_gtime(struct task_struct *t)
+{
+	unsigned long flags;
+	unsigned int seq;
+	cputime_t gtime;
+
+	do {
+		seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags);
+
+		gtime = t->gtime;
+		if (t->flags & PF_VCPU)
+			gtime += vtime_delta(t);
+
+	} while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags));
+
+	return gtime;
+}
+
+/*
+ * Fetch cputime raw values from fields of task_struct and
+ * add up the pending nohz execution time since the last
+ * cputime snapshot.
+ */
+static void
+fetch_task_cputime(struct task_struct *t,
+		   cputime_t *u_dst, cputime_t *s_dst,
+		   cputime_t *u_src, cputime_t *s_src,
+		   cputime_t *udelta, cputime_t *sdelta)
+{
+	unsigned long flags;
+	unsigned int seq;
+	unsigned long long delta;
+
+	do {
+		*udelta = 0;
+		*sdelta = 0;
+
+		seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags);
+
+		if (u_dst)
+			*u_dst = *u_src;
+		if (s_dst)
+			*s_dst = *s_src;
+
+		/* Task is sleeping, nothing to add */
+		if (t->vtime_snap_whence == VTIME_SLEEPING ||
+		    is_idle_task(t))
+			continue;
+
+		delta = vtime_delta(t);
+
+		/*
+		 * Task runs either in user or kernel space, add pending nohz time to
+		 * the right place.
+		 */
+		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
+			*udelta = delta;
+		} else {
+			if (t->vtime_snap_whence == VTIME_SYS)
+				*sdelta = delta;
+		}
+	} while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags));
+}
+
+
+void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
+{
+	cputime_t udelta, sdelta;
+
+	fetch_task_cputime(t, utime, stime, &t->utime,
+			   &t->stime, &udelta, &sdelta);
+	if (utime)
+		*utime += udelta;
+	if (stime)
+		*stime += sdelta;
+}
+
+void task_cputime_scaled(struct task_struct *t,
+			 cputime_t *utimescaled, cputime_t *stimescaled)
+{
+	cputime_t udelta, sdelta;
+
+	fetch_task_cputime(t, utimescaled, stimescaled,
+			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
+	if (utimescaled)
+		*utimescaled += cputime_to_scaled(udelta);
+	if (stimescaled)
+		*stimescaled += cputime_to_scaled(sdelta);
+}
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ed567babe789..f5cc25f147a6 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
 	current->flags &= ~PF_MEMALLOC;
 
 	pending = local_softirq_pending();
-	vtime_account_irq_enter(current);
+	account_irq_enter_time(current);
 
 	__local_bh_disable((unsigned long)__builtin_return_address(0),
 				SOFTIRQ_OFFSET);
@@ -272,7 +272,7 @@ restart:
 
 	lockdep_softirq_exit();
 
-	vtime_account_irq_exit(current);
+	account_irq_exit_time(current);
 	__local_bh_enable(SOFTIRQ_OFFSET);
 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 }
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
  */
 void irq_exit(void)
 {
-	vtime_account_irq_exit(current);
+	account_irq_exit_time(current);
 	trace_hardirq_exit();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
-- 
cgit v1.2.3


From cef401de7be8c4e155c6746bfccf721a4fa5fab9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 25 Jan 2013 20:34:37 +0000
Subject: net: fix possible wrong checksum generation

Pravin Shelar mentioned that GSO could potentially generate
wrong TX checksum if skb has fragments that are overwritten
by the user between the checksum computation and transmit.

He suggested to linearize skbs but this extra copy can be
avoided for normal tcp skbs cooked by tcp_sendmsg().

This patch introduces a new SKB_GSO_SHARED_FRAG flag, set
in skb_shinfo(skb)->gso_type if at least one frag can be
modified by the user.

Typical sources of such possible overwrites are {vm}splice(),
sendfile(), and macvtap/tun/virtio_net drivers.

Tested:

$ netperf -H 7.7.8.84
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
7.7.8.84 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3959.52

$ netperf -H 7.7.8.84 -t TCP_SENDFILE
TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 ()
port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3216.80

Performance of the SENDFILE is impacted by the extra allocation and
copy, and because we use order-0 pages, while the TCP_STREAM uses
bigger pages.

Reported-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c    |  3 ++-
 drivers/net/tun.c        | 12 ++++++++----
 drivers/net/virtio_net.c | 12 ++++++++----
 include/linux/skbuff.h   | 19 +++++++++++++++++++
 net/core/dev.c           |  9 +++++++++
 net/core/skbuff.c        |  4 ++++
 net/ipv4/af_inet.c       |  1 +
 net/ipv4/ip_gre.c        |  4 +++-
 net/ipv4/ipip.c          |  4 +++-
 net/ipv4/tcp.c           |  3 +++
 net/ipv4/tcp_input.c     |  4 ++--
 net/ipv4/tcp_output.c    |  4 ++--
 net/ipv6/ip6_offload.c   |  1 +
 13 files changed, 65 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0f0f9ce3a776..b181dfb3d6d6 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -543,6 +543,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 		skb->data_len += len;
 		skb->len += len;
 		skb->truesize += truesize;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (len) {
 			int off = base & ~PAGE_MASK;
@@ -598,7 +599,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
 
 	if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		skb_shinfo(skb)->gso_size = vnet_hdr->gso_size;
-		skb_shinfo(skb)->gso_type = gso_type;
+		skb_shinfo(skb)->gso_type |= gso_type;
 
 		/* Header must be checked, and gso_segs computed. */
 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c81680dc10eb..293ce8dfc9e6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1005,6 +1005,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 		skb->data_len += len;
 		skb->len += len;
 		skb->truesize += truesize;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (len) {
 			int off = base & ~PAGE_MASK;
@@ -1150,16 +1151,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		unsigned short gso_type = 0;
+
 		pr_debug("GSO!\n");
 		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+			gso_type = SKB_GSO_TCPV4;
 			break;
 		case VIRTIO_NET_HDR_GSO_TCPV6:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+			gso_type = SKB_GSO_TCPV6;
 			break;
 		case VIRTIO_NET_HDR_GSO_UDP:
-			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+			gso_type = SKB_GSO_UDP;
 			break;
 		default:
 			tun->dev->stats.rx_frame_errors++;
@@ -1168,9 +1171,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		}
 
 		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+			gso_type |= SKB_GSO_TCP_ECN;
 
 		skb_shinfo(skb)->gso_size = gso.gso_size;
+		skb_shinfo(skb)->gso_type |= gso_type;
 		if (skb_shinfo(skb)->gso_size == 0) {
 			tun->dev->stats.rx_frame_errors++;
 			kfree_skb(skb);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 701408a1ded6..58914c8ea68f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -220,6 +220,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
 	skb->len += size;
 	skb->truesize += PAGE_SIZE;
 	skb_shinfo(skb)->nr_frags++;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 	*len -= size;
 }
 
@@ -379,16 +380,18 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
 
 	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+		unsigned short gso_type = 0;
+
 		pr_debug("GSO!\n");
 		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+			gso_type = SKB_GSO_TCPV4;
 			break;
 		case VIRTIO_NET_HDR_GSO_UDP:
-			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+			gso_type = SKB_GSO_UDP;
 			break;
 		case VIRTIO_NET_HDR_GSO_TCPV6:
-			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+			gso_type = SKB_GSO_TCPV6;
 			break;
 		default:
 			net_warn_ratelimited("%s: bad gso type %u.\n",
@@ -397,7 +400,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		}
 
 		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+			gso_type |= SKB_GSO_TCP_ECN;
 
 		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
 		if (skb_shinfo(skb)->gso_size == 0) {
@@ -405,6 +408,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 			goto frame_err;
 		}
 
+		skb_shinfo(skb)->gso_type |= gso_type;
 		/* Header must be checked, and gso_segs computed. */
 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 		skb_shinfo(skb)->gso_segs = 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8b2256e880e0..0259b719bebf 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -307,6 +307,13 @@ enum {
 	SKB_GSO_TCPV6 = 1 << 4,
 
 	SKB_GSO_FCOE = 1 << 5,
+
+	/* This indicates at least one fragment might be overwritten
+	 * (as in vmsplice(), sendfile() ...)
+	 * If we need to compute a TX checksum, we'll need to copy
+	 * all frags to avoid possible bad checksum
+	 */
+	SKB_GSO_SHARED_FRAG = 1 << 6,
 };
 
 #if BITS_PER_LONG > 32
@@ -2200,6 +2207,18 @@ static inline int skb_linearize(struct sk_buff *skb)
 	return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
 }
 
+/**
+ * skb_has_shared_frag - can any frag be overwritten
+ * @skb: buffer to test
+ *
+ * Return true if the skb has at least one frag that might be modified
+ * by an external entity (as in vmsplice()/sendfile())
+ */
+static inline bool skb_has_shared_frag(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG;
+}
+
 /**
  *	skb_linearize_cow - make sure skb is linear and writable
  *	@skb: buffer to process
diff --git a/net/core/dev.c b/net/core/dev.c
index c69cd8721b28..a83375d3af72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2271,6 +2271,15 @@ int skb_checksum_help(struct sk_buff *skb)
 		return -EINVAL;
 	}
 
+	/* Before computing a checksum, we should make sure no frag could
+	 * be modified by an external entity : checksum could be wrong.
+	 */
+	if (skb_has_shared_frag(skb)) {
+		ret = __skb_linearize(skb);
+		if (ret)
+			goto out;
+	}
+
 	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2568c449fe36..bddc1dd2e7f2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2340,6 +2340,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
 
+	skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type;
+
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2845,6 +2847,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_copy_from_linear_data_offset(skb, offset,
 						 skb_put(nskb, hsize), hsize);
 
+		skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
+
 		while (pos < offset + len && i < nfrags) {
 			*frag = skb_shinfo(skb)->frags[i];
 			__skb_frag_ref(frag);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4b7053919976..49ddca31c4da 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1306,6 +1306,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_SHARED_FRAG |
 		       0)))
 		goto out;
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 303012adf9e6..af6be70821c4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -738,7 +738,7 @@ drop:
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	const struct iphdr  *old_iph = ip_hdr(skb);
+	const struct iphdr  *old_iph;
 	const struct iphdr  *tiph;
 	struct flowi4 fl4;
 	u8     tos;
@@ -756,6 +756,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	    skb_checksum_help(skb))
 		goto tx_error;
 
+	old_iph = ip_hdr(skb);
+
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 191fc24a745a..8f024d41eefa 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	__be16 df = tiph->frag_off;
 	struct rtable *rt;     			/* Route to the other host */
 	struct net_device *tdev;		/* Device to other host */
-	const struct iphdr  *old_iph = ip_hdr(skb);
+	const struct iphdr  *old_iph;
 	struct iphdr  *iph;			/* Our new IP header */
 	unsigned int max_headroom;		/* The extra header space needed */
 	__be32 dst = tiph->daddr;
@@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	    skb_checksum_help(skb))
 		goto tx_error;
 
+	old_iph = ip_hdr(skb);
+
 	if (tos & 1)
 		tos = old_iph->tos;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 52271947a471..3ec1f69c5ceb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -896,6 +896,8 @@ new_segment:
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
 
+		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
+
 		skb->len += copy;
 		skb->data_len += copy;
 		skb->truesize += copy;
@@ -3032,6 +3034,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
+			       SKB_GSO_SHARED_FRAG |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0905997e5873..492c7cfe1453 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1240,13 +1240,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (!skb_shinfo(prev)->gso_size) {
 		skb_shinfo(prev)->gso_size = mss;
-		skb_shinfo(prev)->gso_type = sk->sk_gso_type;
+		skb_shinfo(prev)->gso_type |= sk->sk_gso_type;
 	}
 
 	/* CHECKME: To clear or not to clear? Mimics normal skb currently */
 	if (skb_shinfo(skb)->gso_segs <= 1) {
 		skb_shinfo(skb)->gso_size = 0;
-		skb_shinfo(skb)->gso_type = 0;
+		skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
 	}
 
 	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 667a6adfccf8..367e2ec01da1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1133,6 +1133,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int mss_now)
 {
+	skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
 	if (skb->len <= mss_now || !sk_can_gso(sk) ||
 	    skb->ip_summed == CHECKSUM_NONE) {
 		/* Avoid the costly divide in the normal
@@ -1140,11 +1141,10 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 		 */
 		skb_shinfo(skb)->gso_segs = 1;
 		skb_shinfo(skb)->gso_size = 0;
-		skb_shinfo(skb)->gso_type = 0;
 	} else {
 		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
 		skb_shinfo(skb)->gso_size = mss_now;
-		skb_shinfo(skb)->gso_type = sk->sk_gso_type;
+		skb_shinfo(skb)->gso_type |= sk->sk_gso_type;
 	}
 }
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f26f0da7f095..d141fc32a2ea 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_TCPV6 |
+		       SKB_GSO_SHARED_FRAG |
 		       0)))
 		goto out;
 
-- 
cgit v1.2.3


From 5afba62cc8a16716508605e02c1b02ee5f969184 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Wed, 26 Sep 2012 12:44:38 +0200
Subject: x86, msi: Use IRQ remapping specific setup_msi_irqs routine

Use seperate routines to setup MSI IRQs for both
irq_remapping_enabled cases.

Signed-off-by: Joerg Roedel <joro@8bytes.org>
Acked-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/irq_remapping.h |  12 ----
 arch/x86/include/asm/pci.h           |   3 +
 arch/x86/kernel/apic/io_apic.c       | 104 ++++----------------------------
 drivers/iommu/irq_remapping.c        | 112 ++++++++++++++++++++++++++++++++++-
 include/linux/irq.h                  |   3 +
 5 files changed, 125 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f14..0ee1e88bd17a 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -47,9 +47,6 @@ extern void free_remapped_irq(int irq);
 extern void compose_remapped_msi_msg(struct pci_dev *pdev,
 				     unsigned int irq, unsigned int dest,
 				     struct msi_msg *msg, u8 hpet_id);
-extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
-extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
-				  int index, int sub_handle);
 extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
 
 #else  /* CONFIG_IRQ_REMAP */
@@ -83,15 +80,6 @@ static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
 					    struct msi_msg *msg, u8 hpet_id)
 {
 }
-static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
-{
-	return -ENODEV;
-}
-static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
-					 int index, int sub_handle)
-{
-	return -ENODEV;
-}
 static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
 {
 	return -ENODEV;
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index dba7805176bf..c28fd02f4bf7 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
 #define arch_teardown_msi_irq x86_teardown_msi_irq
 #define arch_restore_msi_irqs x86_restore_msi_irqs
 /* implemented in arch/x86/kernel/apic/io_apic. */
+struct msi_desc;
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev, int irq);
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset);
 /* default to the implementation in drivers/lib/msi.c */
 #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
 #define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e7b87630c13d..d4b045e018fb 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3066,7 +3066,7 @@ void destroy_irq(unsigned int irq)
 	free_irq_at(irq, cfg);
 }
 
-static inline void destroy_irqs(unsigned int irq, unsigned int count)
+void destroy_irqs(unsigned int irq, unsigned int count)
 {
 	unsigned int i;
 
@@ -3165,8 +3165,8 @@ static struct irq_chip msi_chip = {
 	.irq_retrigger		= ioapic_retrigger_irq,
 };
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-			 unsigned int irq_base, unsigned int irq_offset)
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset)
 {
 	struct irq_chip *chip = &msi_chip;
 	struct msi_msg msg;
@@ -3198,44 +3198,28 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 	return 0;
 }
 
-int setup_msix_irqs(struct pci_dev *dev, int nvec)
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-	int node, ret, sub_handle, index = 0;
 	unsigned int irq, irq_want;
 	struct msi_desc *msidesc;
+	int node, ret;
+
+	/* Multiple MSI vectors only supported with interrupt remapping */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
 
 	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
-	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
 		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
 			return -ENOSPC;
+
 		irq_want = irq + 1;
-		if (!irq_remapping_enabled)
-			goto no_ir;
 
-		if (!sub_handle) {
-			/*
-			 * allocate the consecutive block of IRTE's
-			 * for 'nvec'
-			 */
-			index = msi_alloc_remapped_irq(dev, irq, nvec);
-			if (index < 0) {
-				ret = index;
-				goto error;
-			}
-		} else {
-			ret = msi_setup_remapped_irq(dev, irq, index,
-						     sub_handle);
-			if (ret < 0)
-				goto error;
-		}
-no_ir:
 		ret = setup_msi_irq(dev, msidesc, irq, 0);
 		if (ret < 0)
 			goto error;
-		sub_handle++;
 	}
 	return 0;
 
@@ -3244,74 +3228,6 @@ error:
 	return ret;
 }
 
-int setup_msi_irqs(struct pci_dev *dev, int nvec)
-{
-	int node, ret, sub_handle, index = 0;
-	unsigned int irq;
-	struct msi_desc *msidesc;
-
-	if (nvec > 1 && !irq_remapping_enabled)
-		return 1;
-
-	nvec = __roundup_pow_of_two(nvec);
-
-	WARN_ON(!list_is_singular(&dev->msi_list));
-	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-	WARN_ON(msidesc->irq);
-	WARN_ON(msidesc->msi_attrib.multiple);
-
-	node = dev_to_node(&dev->dev);
-	irq = __create_irqs(nr_irqs_gsi, nvec, node);
-	if (irq == 0)
-		return -ENOSPC;
-
-	if (!irq_remapping_enabled) {
-		ret = setup_msi_irq(dev, msidesc, irq, 0);
-		if (ret < 0)
-			goto error;
-		return 0;
-	}
-
-	msidesc->msi_attrib.multiple = ilog2(nvec);
-	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
-		if (!sub_handle) {
-			index = msi_alloc_remapped_irq(dev, irq, nvec);
-			if (index < 0) {
-				ret = index;
-				goto error;
-			}
-		} else {
-			ret = msi_setup_remapped_irq(dev, irq + sub_handle,
-						     index, sub_handle);
-			if (ret < 0)
-				goto error;
-		}
-		ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
-		if (ret < 0)
-			goto error;
-	}
-	return 0;
-
-error:
-	destroy_irqs(irq, nvec);
-
-	/*
-	 * Restore altered MSI descriptor fields and prevent just destroyed
-	 * IRQs from tearing down again in default_teardown_msi_irqs()
-	 */
-	msidesc->irq = 0;
-	msidesc->msi_attrib.multiple = 0;
-
-	return ret;
-}
-
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	if (type == PCI_CAP_ID_MSI)
-		return setup_msi_irqs(dev, nvec);
-	return setup_msix_irqs(dev, nvec);
-}
-
 void native_teardown_msi_irq(unsigned int irq)
 {
 	destroy_irq(irq);
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 0baad3b9ecba..20f04b67efd2 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -4,6 +4,8 @@
 #include <linux/cpumask.h>
 #include <linux/errno.h>
 #include <linux/msi.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
 
 #include <asm/hw_irq.h>
 #include <asm/irq_remapping.h>
@@ -21,6 +23,10 @@ int no_x2apic_optout;
 
 static struct irq_remap_ops *remap_ops;
 
+static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
+static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+				  int index, int sub_handle);
+
 static void irq_remapping_disable_io_apic(void)
 {
 	/*
@@ -34,9 +40,109 @@ static void irq_remapping_disable_io_apic(void)
 		disconnect_bsp_APIC(0);
 }
 
+static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
+{
+	int node, ret, sub_handle, index = 0;
+	unsigned int irq;
+	struct msi_desc *msidesc;
+
+	nvec = __roundup_pow_of_two(nvec);
+
+	WARN_ON(!list_is_singular(&dev->msi_list));
+	msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+	WARN_ON(msidesc->irq);
+	WARN_ON(msidesc->msi_attrib.multiple);
+
+	node = dev_to_node(&dev->dev);
+	irq = __create_irqs(get_nr_irqs_gsi(), nvec, node);
+	if (irq == 0)
+		return -ENOSPC;
+
+	msidesc->msi_attrib.multiple = ilog2(nvec);
+	for (sub_handle = 0; sub_handle < nvec; sub_handle++) {
+		if (!sub_handle) {
+			index = msi_alloc_remapped_irq(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			ret = msi_setup_remapped_irq(dev, irq + sub_handle,
+						     index, sub_handle);
+			if (ret < 0)
+				goto error;
+		}
+		ret = setup_msi_irq(dev, msidesc, irq, sub_handle);
+		if (ret < 0)
+			goto error;
+	}
+	return 0;
+
+error:
+	destroy_irqs(irq, nvec);
+
+	/*
+	 * Restore altered MSI descriptor fields and prevent just destroyed
+	 * IRQs from tearing down again in default_teardown_msi_irqs()
+	 */
+	msidesc->irq = 0;
+	msidesc->msi_attrib.multiple = 0;
+
+	return ret;
+}
+
+static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
+{
+	int node, ret, sub_handle, index = 0;
+	struct msi_desc *msidesc;
+	unsigned int irq;
+
+	node		= dev_to_node(&dev->dev);
+	irq		= get_nr_irqs_gsi();
+	sub_handle	= 0;
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+
+		irq = create_irq_nr(irq, node);
+		if (irq == 0)
+			return -1;
+
+		if (sub_handle == 0)
+			ret = index = msi_alloc_remapped_irq(dev, irq, nvec);
+		else
+			ret = msi_setup_remapped_irq(dev, irq, index, sub_handle);
+
+		if (ret < 0)
+			goto error;
+
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0)
+			goto error;
+
+		sub_handle += 1;
+		irq        += 1;
+	}
+
+	return 0;
+
+error:
+	destroy_irq(irq);
+	return ret;
+}
+
+static int irq_remapping_setup_msi_irqs(struct pci_dev *dev,
+					int nvec, int type)
+{
+	if (type == PCI_CAP_ID_MSI)
+		return do_setup_msi_irqs(dev, nvec);
+	else
+		return do_setup_msix_irqs(dev, nvec);
+}
+
 static void __init irq_remapping_modify_x86_ops(void)
 {
 	x86_io_apic_ops.disable		= irq_remapping_disable_io_apic;
+	x86_msi.setup_msi_irqs		= irq_remapping_setup_msi_irqs;
 	x86_msi.setup_hpet_msi		= setup_hpet_msi_remapped;
 }
 
@@ -186,7 +292,7 @@ void compose_remapped_msi_msg(struct pci_dev *pdev,
 	remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id);
 }
 
-int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
+static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
 {
 	if (!remap_ops || !remap_ops->msi_alloc_irq)
 		return -ENODEV;
@@ -194,8 +300,8 @@ int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
 	return remap_ops->msi_alloc_irq(pdev, irq, nvec);
 }
 
-int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
-			   int index, int sub_handle)
+static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
+				  int index, int sub_handle)
 {
 	if (!remap_ops || !remap_ops->msi_setup_irq)
 		return -ENODEV;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 1eab99111e94..bc4e06611958 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -509,8 +509,11 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
 
 /* Handle dynamic irq creation and destruction */
 extern unsigned int create_irq_nr(unsigned int irq_want, int node);
+extern unsigned int __create_irqs(unsigned int from, unsigned int count,
+				  int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
+extern void destroy_irqs(unsigned int irq, unsigned int count);
 
 /*
  * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
-- 
cgit v1.2.3


From c7736edf1b1734455c186deec53a0aa7f8fa87ea Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Fri, 11 Jan 2013 13:16:19 +0530
Subject: ARM: tegra: add function to read chipid

Add function to read chip id from APB MISC registers. This function
will also get called from clock driver to flush write operations on
apb bus.

Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/mach-tegra/fuse.c |  8 +++++++-
 include/linux/tegra-soc.h  | 22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/tegra-soc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/fuse.c b/arch/arm/mach-tegra/fuse.c
index 8121742711fe..f7db0782a6b6 100644
--- a/arch/arm/mach-tegra/fuse.c
+++ b/arch/arm/mach-tegra/fuse.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/export.h>
+#include <linux/tegra-soc.h>
 
 #include "fuse.h"
 #include "iomap.h"
@@ -105,6 +106,11 @@ static void tegra_get_process_id(void)
 	tegra_core_process_id = (reg >> 12) & 3;
 }
 
+u32 tegra_read_chipid(void)
+{
+	return readl_relaxed(IO_ADDRESS(TEGRA_APB_MISC_BASE) + 0x804);
+}
+
 void tegra_init_fuse(void)
 {
 	u32 id;
@@ -119,7 +125,7 @@ void tegra_init_fuse(void)
 	reg = tegra_apb_readl(TEGRA_APB_MISC_BASE + STRAP_OPT);
 	tegra_bct_strapping = (reg & RAM_ID_MASK) >> RAM_CODE_SHIFT;
 
-	id = readl_relaxed(IO_ADDRESS(TEGRA_APB_MISC_BASE) + 0x804);
+	id = tegra_read_chipid();
 	tegra_chip_id = (id >> 8) & 0xff;
 
 	switch (tegra_chip_id) {
diff --git a/include/linux/tegra-soc.h b/include/linux/tegra-soc.h
new file mode 100644
index 000000000000..95f611d78f3a
--- /dev/null
+++ b/include/linux/tegra-soc.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LINUX_TEGRA_SOC_H_
+#define __LINUX_TEGRA_SOC_H_
+
+u32 tegra_read_chipid(void);
+
+#endif /* __LINUX_TEGRA_SOC_H_ */
-- 
cgit v1.2.3


From 89572c77cdffdf24f8fec50d3e38db6a18c04dbe Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Fri, 11 Jan 2013 13:16:21 +0530
Subject: ARM: tegra: move tegra_cpu_car.h to linux/clk/tegra.h

tegra_cpu_car_ops struct is going to be accessed from drivers/clk/tegra.
Move the tegra_cpu_car_ops to include/linux/clk/tegra.h.

Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/mach-tegra/clock.c               |   2 +-
 arch/arm/mach-tegra/cpuidle-tegra30.c     |   2 +-
 arch/arm/mach-tegra/hotplug.c             |   2 +-
 arch/arm/mach-tegra/platsmp.c             |   2 +-
 arch/arm/mach-tegra/pm.c                  |   2 +-
 arch/arm/mach-tegra/tegra20_clocks.c      |   2 +-
 arch/arm/mach-tegra/tegra20_clocks_data.c |   2 +-
 arch/arm/mach-tegra/tegra30_clocks.c      |   2 +-
 arch/arm/mach-tegra/tegra30_clocks_data.c |   2 +-
 arch/arm/mach-tegra/tegra_cpu_car.h       | 124 ------------------------------
 include/linux/clk/tegra.h                 | 124 ++++++++++++++++++++++++++++++
 11 files changed, 133 insertions(+), 133 deletions(-)
 delete mode 100644 arch/arm/mach-tegra/tegra_cpu_car.h
 create mode 100644 include/linux/clk/tegra.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c
index 867bf8bf5561..8c0ff061f8cf 100644
--- a/arch/arm/mach-tegra/clock.c
+++ b/arch/arm/mach-tegra/clock.c
@@ -26,10 +26,10 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/clk/tegra.h>
 
 #include "board.h"
 #include "clock.h"
-#include "tegra_cpu_car.h"
 
 /* Global data of Tegra CPU CAR ops */
 struct tegra_cpu_car_ops *tegra_cpu_car_ops;
diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
index 82530bd9b8c2..8b50cf4ddd6f 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra30.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra30.c
@@ -24,6 +24,7 @@
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/clockchips.h>
+#include <linux/clk/tegra.h>
 
 #include <asm/cpuidle.h>
 #include <asm/proc-fns.h>
@@ -32,7 +33,6 @@
 
 #include "pm.h"
 #include "sleep.h"
-#include "tegra_cpu_car.h"
 
 #ifdef CONFIG_PM_SLEEP
 static int tegra30_idle_lp2(struct cpuidle_device *dev,
diff --git a/arch/arm/mach-tegra/hotplug.c b/arch/arm/mach-tegra/hotplug.c
index 6a27de4001ee..a599f6e36dea 100644
--- a/arch/arm/mach-tegra/hotplug.c
+++ b/arch/arm/mach-tegra/hotplug.c
@@ -10,12 +10,12 @@
  */
 #include <linux/kernel.h>
 #include <linux/smp.h>
+#include <linux/clk/tegra.h>
 
 #include <asm/cacheflush.h>
 #include <asm/smp_plat.h>
 
 #include "sleep.h"
-#include "tegra_cpu_car.h"
 
 static void (*tegra_hotplug_shutdown)(void);
 
diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index 68670304d9bc..3ec7fc487857 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -18,6 +18,7 @@
 #include <linux/jiffies.h>
 #include <linux/smp.h>
 #include <linux/io.h>
+#include <linux/clk/tegra.h>
 
 #include <asm/cacheflush.h>
 #include <asm/hardware/gic.h>
@@ -30,7 +31,6 @@
 #include "fuse.h"
 #include "flowctrl.h"
 #include "reset.h"
-#include "tegra_cpu_car.h"
 
 #include "common.h"
 #include "iomap.h"
diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
index 498d70b33775..abfe9b93cc0c 100644
--- a/arch/arm/mach-tegra/pm.c
+++ b/arch/arm/mach-tegra/pm.c
@@ -24,6 +24,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/clk/tegra.h>
 
 #include <asm/smp_plat.h>
 #include <asm/cacheflush.h>
@@ -36,7 +37,6 @@
 #include "reset.h"
 #include "flowctrl.h"
 #include "sleep.h"
-#include "tegra_cpu_car.h"
 
 #define TEGRA_POWER_CPU_PWRREQ_OE	(1 << 16)  /* CPU pwr req enable */
 
diff --git a/arch/arm/mach-tegra/tegra20_clocks.c b/arch/arm/mach-tegra/tegra20_clocks.c
index 4eb6bc81a87b..1a80ff65e5fc 100644
--- a/arch/arm/mach-tegra/tegra20_clocks.c
+++ b/arch/arm/mach-tegra/tegra20_clocks.c
@@ -26,12 +26,12 @@
 #include <linux/io.h>
 #include <linux/clkdev.h>
 #include <linux/clk.h>
+#include <linux/clk/tegra.h>
 
 #include "clock.h"
 #include "fuse.h"
 #include "iomap.h"
 #include "tegra2_emc.h"
-#include "tegra_cpu_car.h"
 
 #define RST_DEVICES			0x004
 #define RST_DEVICES_SET			0x300
diff --git a/arch/arm/mach-tegra/tegra20_clocks_data.c b/arch/arm/mach-tegra/tegra20_clocks_data.c
index a23a0734e352..022cdaef7ca5 100644
--- a/arch/arm/mach-tegra/tegra20_clocks_data.c
+++ b/arch/arm/mach-tegra/tegra20_clocks_data.c
@@ -26,12 +26,12 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/clk.h>
+#include <linux/clk/tegra.h>
 
 #include "clock.h"
 #include "fuse.h"
 #include "tegra2_emc.h"
 #include "tegra20_clocks.h"
-#include "tegra_cpu_car.h"
 
 /* Clock definitions */
 
diff --git a/arch/arm/mach-tegra/tegra30_clocks.c b/arch/arm/mach-tegra/tegra30_clocks.c
index d7147779f8ea..4330787fe5cb 100644
--- a/arch/arm/mach-tegra/tegra30_clocks.c
+++ b/arch/arm/mach-tegra/tegra30_clocks.c
@@ -28,6 +28,7 @@
 #include <linux/clk.h>
 #include <linux/cpufreq.h>
 #include <linux/syscore_ops.h>
+#include <linux/clk/tegra.h>
 
 #include <asm/clkdev.h>
 
@@ -36,7 +37,6 @@
 #include "clock.h"
 #include "fuse.h"
 #include "iomap.h"
-#include "tegra_cpu_car.h"
 
 #define USE_PLL_LOCK_BITS 0
 
diff --git a/arch/arm/mach-tegra/tegra30_clocks_data.c b/arch/arm/mach-tegra/tegra30_clocks_data.c
index 741d264d5ecb..9bfaa490cff6 100644
--- a/arch/arm/mach-tegra/tegra30_clocks_data.c
+++ b/arch/arm/mach-tegra/tegra30_clocks_data.c
@@ -28,11 +28,11 @@
 #include <linux/io.h>
 #include <linux/clk.h>
 #include <linux/cpufreq.h>
+#include <linux/clk/tegra.h>
 
 #include "clock.h"
 #include "fuse.h"
 #include "tegra30_clocks.h"
-#include "tegra_cpu_car.h"
 
 #define DEFINE_CLK_TEGRA(_name, _rate, _ops, _flags,		\
 		   _parent_names, _parents, _parent)		\
diff --git a/arch/arm/mach-tegra/tegra_cpu_car.h b/arch/arm/mach-tegra/tegra_cpu_car.h
deleted file mode 100644
index 9764d31032b7..000000000000
--- a/arch/arm/mach-tegra/tegra_cpu_car.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __MACH_TEGRA_CPU_CAR_H
-#define __MACH_TEGRA_CPU_CAR_H
-
-/*
- * Tegra CPU clock and reset control ops
- *
- * wait_for_reset:
- *	keep waiting until the CPU in reset state
- * put_in_reset:
- *	put the CPU in reset state
- * out_of_reset:
- *	release the CPU from reset state
- * enable_clock:
- *	CPU clock un-gate
- * disable_clock:
- *	CPU clock gate
- * rail_off_ready:
- *	CPU is ready for rail off
- * suspend:
- *	save the clock settings when CPU go into low-power state
- * resume:
- *	restore the clock settings when CPU exit low-power state
- */
-struct tegra_cpu_car_ops {
-	void (*wait_for_reset)(u32 cpu);
-	void (*put_in_reset)(u32 cpu);
-	void (*out_of_reset)(u32 cpu);
-	void (*enable_clock)(u32 cpu);
-	void (*disable_clock)(u32 cpu);
-#ifdef CONFIG_PM_SLEEP
-	bool (*rail_off_ready)(void);
-	void (*suspend)(void);
-	void (*resume)(void);
-#endif
-};
-
-extern struct tegra_cpu_car_ops *tegra_cpu_car_ops;
-
-static inline void tegra_wait_cpu_in_reset(u32 cpu)
-{
-	if (WARN_ON(!tegra_cpu_car_ops->wait_for_reset))
-		return;
-
-	tegra_cpu_car_ops->wait_for_reset(cpu);
-}
-
-static inline void tegra_put_cpu_in_reset(u32 cpu)
-{
-	if (WARN_ON(!tegra_cpu_car_ops->put_in_reset))
-		return;
-
-	tegra_cpu_car_ops->put_in_reset(cpu);
-}
-
-static inline void tegra_cpu_out_of_reset(u32 cpu)
-{
-	if (WARN_ON(!tegra_cpu_car_ops->out_of_reset))
-		return;
-
-	tegra_cpu_car_ops->out_of_reset(cpu);
-}
-
-static inline void tegra_enable_cpu_clock(u32 cpu)
-{
-	if (WARN_ON(!tegra_cpu_car_ops->enable_clock))
-		return;
-
-	tegra_cpu_car_ops->enable_clock(cpu);
-}
-
-static inline void tegra_disable_cpu_clock(u32 cpu)
-{
-	if (WARN_ON(!tegra_cpu_car_ops->disable_clock))
-		return;
-
-	tegra_cpu_car_ops->disable_clock(cpu);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static inline bool tegra_cpu_rail_off_ready(void)
-{
-	if (WARN_ON(!tegra_cpu_car_ops->rail_off_ready))
-		return false;
-
-	return tegra_cpu_car_ops->rail_off_ready();
-}
-
-static inline void tegra_cpu_clock_suspend(void)
-{
-	if (WARN_ON(!tegra_cpu_car_ops->suspend))
-		return;
-
-	tegra_cpu_car_ops->suspend();
-}
-
-static inline void tegra_cpu_clock_resume(void)
-{
-	if (WARN_ON(!tegra_cpu_car_ops->resume))
-		return;
-
-	tegra_cpu_car_ops->resume();
-}
-#endif
-
-void tegra20_cpu_car_ops_init(void);
-void tegra30_cpu_car_ops_init(void);
-
-#endif /* __MACH_TEGRA_CPU_CAR_H */
diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h
new file mode 100644
index 000000000000..0977f2a24757
--- /dev/null
+++ b/include/linux/clk/tegra.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LINUX_CLK_TEGRA_H_
+#define __LINUX_CLK_TEGRA_H_
+
+/*
+ * Tegra CPU clock and reset control ops
+ *
+ * wait_for_reset:
+ *	keep waiting until the CPU in reset state
+ * put_in_reset:
+ *	put the CPU in reset state
+ * out_of_reset:
+ *	release the CPU from reset state
+ * enable_clock:
+ *	CPU clock un-gate
+ * disable_clock:
+ *	CPU clock gate
+ * rail_off_ready:
+ *	CPU is ready for rail off
+ * suspend:
+ *	save the clock settings when CPU go into low-power state
+ * resume:
+ *	restore the clock settings when CPU exit low-power state
+ */
+struct tegra_cpu_car_ops {
+	void (*wait_for_reset)(u32 cpu);
+	void (*put_in_reset)(u32 cpu);
+	void (*out_of_reset)(u32 cpu);
+	void (*enable_clock)(u32 cpu);
+	void (*disable_clock)(u32 cpu);
+#ifdef CONFIG_PM_SLEEP
+	bool (*rail_off_ready)(void);
+	void (*suspend)(void);
+	void (*resume)(void);
+#endif
+};
+
+extern struct tegra_cpu_car_ops *tegra_cpu_car_ops;
+
+static inline void tegra_wait_cpu_in_reset(u32 cpu)
+{
+	if (WARN_ON(!tegra_cpu_car_ops->wait_for_reset))
+		return;
+
+	tegra_cpu_car_ops->wait_for_reset(cpu);
+}
+
+static inline void tegra_put_cpu_in_reset(u32 cpu)
+{
+	if (WARN_ON(!tegra_cpu_car_ops->put_in_reset))
+		return;
+
+	tegra_cpu_car_ops->put_in_reset(cpu);
+}
+
+static inline void tegra_cpu_out_of_reset(u32 cpu)
+{
+	if (WARN_ON(!tegra_cpu_car_ops->out_of_reset))
+		return;
+
+	tegra_cpu_car_ops->out_of_reset(cpu);
+}
+
+static inline void tegra_enable_cpu_clock(u32 cpu)
+{
+	if (WARN_ON(!tegra_cpu_car_ops->enable_clock))
+		return;
+
+	tegra_cpu_car_ops->enable_clock(cpu);
+}
+
+static inline void tegra_disable_cpu_clock(u32 cpu)
+{
+	if (WARN_ON(!tegra_cpu_car_ops->disable_clock))
+		return;
+
+	tegra_cpu_car_ops->disable_clock(cpu);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static inline bool tegra_cpu_rail_off_ready(void)
+{
+	if (WARN_ON(!tegra_cpu_car_ops->rail_off_ready))
+		return false;
+
+	return tegra_cpu_car_ops->rail_off_ready();
+}
+
+static inline void tegra_cpu_clock_suspend(void)
+{
+	if (WARN_ON(!tegra_cpu_car_ops->suspend))
+		return;
+
+	tegra_cpu_car_ops->suspend();
+}
+
+static inline void tegra_cpu_clock_resume(void)
+{
+	if (WARN_ON(!tegra_cpu_car_ops->resume))
+		return;
+
+	tegra_cpu_car_ops->resume();
+}
+#endif
+
+void tegra20_cpu_car_ops_init(void);
+void tegra30_cpu_car_ops_init(void);
+
+#endif /* __LINUX_CLK_TEGRA_H_ */
-- 
cgit v1.2.3


From 61fd290d213e25d5a119b8ca25644001ed9f8f2d Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Fri, 11 Jan 2013 13:16:26 +0530
Subject: ARM: tegra: migrate to new clock code

Migrate Tegra clock support to drivers/clk/tegra, this involves
moving:
1. definition of tegra_cpu_car_ops to clk.c
2. definition of reset functions to clk-peripheral.c
3. change parent of cpu clock.
4. Remove legacy clock initialization.
5. Initialize clocks using DT.
6. Remove all instance of mach/clk.h

Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
[swarren: use to_clk_periph_gate().]
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/mach-tegra/board-dt-tegra20.c | 30 -----------------------
 arch/arm/mach-tegra/board-dt-tegra30.c | 31 ------------------------
 arch/arm/mach-tegra/clock.c            | 19 ---------------
 arch/arm/mach-tegra/common.c           | 44 ++--------------------------------
 arch/arm/mach-tegra/cpu-tegra.c        |  2 +-
 arch/arm/mach-tegra/include/mach/clk.h |  3 ---
 arch/arm/mach-tegra/pcie.c             |  2 +-
 arch/arm/mach-tegra/powergate.c        |  2 +-
 drivers/clk/tegra/clk-periph.c         | 38 +++++++++++++++++++++++++++++
 drivers/clk/tegra/clk.c                | 16 +++++++++++++
 drivers/dma/tegra20-apb-dma.c          |  2 +-
 drivers/gpu/drm/tegra/dc.c             |  3 +--
 drivers/gpu/drm/tegra/drm.c            |  1 -
 drivers/gpu/drm/tegra/hdmi.c           |  3 +--
 drivers/i2c/busses/i2c-tegra.c         |  3 +--
 drivers/input/keyboard/tegra-kbc.c     |  2 +-
 drivers/spi/spi-tegra20-sflash.c       |  2 +-
 drivers/spi/spi-tegra20-slink.c        |  2 +-
 drivers/staging/nvec/nvec.c            |  3 +--
 include/linux/clk/tegra.h              |  5 ++++
 sound/soc/tegra/tegra30_ahub.c         |  2 +-
 21 files changed, 73 insertions(+), 142 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/board-dt-tegra20.c b/arch/arm/mach-tegra/board-dt-tegra20.c
index e1f87dd314ef..0c11b8af3af3 100644
--- a/arch/arm/mach-tegra/board-dt-tegra20.c
+++ b/arch/arm/mach-tegra/board-dt-tegra20.c
@@ -42,7 +42,6 @@
 #include <asm/setup.h>
 
 #include "board.h"
-#include "clock.h"
 #include "common.h"
 #include "iomap.h"
 
@@ -104,37 +103,8 @@ static struct of_dev_auxdata tegra20_auxdata_lookup[] __initdata = {
 	{}
 };
 
-static __initdata struct tegra_clk_init_table tegra_dt_clk_init_table[] = {
-	/* name		parent		rate		enabled */
-	{ "uarta",	"pll_p",	216000000,	true },
-	{ "uartd",	"pll_p",	216000000,	true },
-	{ "usbd",	"clk_m",	12000000,	false },
-	{ "usb2",	"clk_m",	12000000,	false },
-	{ "usb3",	"clk_m",	12000000,	false },
-	{ "pll_a",      "pll_p_out1",   56448000,       true },
-	{ "pll_a_out0", "pll_a",        11289600,       true },
-	{ "cdev1",      NULL,           0,              true },
-	{ "blink",      "clk_32k",      32768,          true },
-	{ "i2s1",       "pll_a_out0",   11289600,       false},
-	{ "i2s2",       "pll_a_out0",   11289600,       false},
-	{ "sdmmc1",	"pll_p",	48000000,	false},
-	{ "sdmmc3",	"pll_p",	48000000,	false},
-	{ "sdmmc4",	"pll_p",	48000000,	false},
-	{ "spi",	"pll_p",	20000000,	false },
-	{ "sbc1",	"pll_p",	100000000,	false },
-	{ "sbc2",	"pll_p",	100000000,	false },
-	{ "sbc3",	"pll_p",	100000000,	false },
-	{ "sbc4",	"pll_p",	100000000,	false },
-	{ "host1x",	"pll_c",	150000000,	false },
-	{ "disp1",	"pll_p",	600000000,	false },
-	{ "disp2",	"pll_p",	600000000,	false },
-	{ NULL,		NULL,		0,		0},
-};
-
 static void __init tegra_dt_init(void)
 {
-	tegra_clk_init_from_table(tegra_dt_clk_init_table);
-
 	/*
 	 * Finished with the static registrations now; fill in the missing
 	 * devices
diff --git a/arch/arm/mach-tegra/board-dt-tegra30.c b/arch/arm/mach-tegra/board-dt-tegra30.c
index cfe5fc02be77..92f6014d22a1 100644
--- a/arch/arm/mach-tegra/board-dt-tegra30.c
+++ b/arch/arm/mach-tegra/board-dt-tegra30.c
@@ -35,7 +35,6 @@
 #include <asm/hardware/gic.h>
 
 #include "board.h"
-#include "clock.h"
 #include "common.h"
 #include "iomap.h"
 
@@ -67,38 +66,8 @@ static struct of_dev_auxdata tegra30_auxdata_lookup[] __initdata = {
 	{}
 };
 
-static __initdata struct tegra_clk_init_table tegra_dt_clk_init_table[] = {
-	/* name		parent		rate		enabled */
-	{ "uarta",	"pll_p",	408000000,	true },
-	{ "pll_a",	"pll_p_out1",	564480000,	true },
-	{ "pll_a_out0",	"pll_a",	11289600,	true },
-	{ "extern1",	"pll_a_out0",	0,		true },
-	{ "clk_out_1",	"extern1",	0,		true },
-	{ "blink",	"clk_32k",	32768,		true },
-	{ "i2s0",	"pll_a_out0",	11289600,	false},
-	{ "i2s1",	"pll_a_out0",	11289600,	false},
-	{ "i2s2",	"pll_a_out0",	11289600,	false},
-	{ "i2s3",	"pll_a_out0",	11289600,	false},
-	{ "i2s4",	"pll_a_out0",	11289600,	false},
-	{ "sdmmc1",	"pll_p",	48000000,	false},
-	{ "sdmmc3",	"pll_p",	48000000,	false},
-	{ "sdmmc4",	"pll_p",	48000000,	false},
-	{ "sbc1",	"pll_p",	100000000,	false},
-	{ "sbc2",	"pll_p",	100000000,	false},
-	{ "sbc3",	"pll_p",	100000000,	false},
-	{ "sbc4",	"pll_p",	100000000,	false},
-	{ "sbc5",	"pll_p",	100000000,	false},
-	{ "sbc6",	"pll_p",	100000000,	false},
-	{ "host1x",	"pll_c",	150000000,	false},
-	{ "disp1",	"pll_p",	600000000,	false},
-	{ "disp2",	"pll_p",	600000000,	false},
-	{ NULL,		NULL,		0,		0},
-};
-
 static void __init tegra30_dt_init(void)
 {
-	tegra_clk_init_from_table(tegra_dt_clk_init_table);
-
 	of_platform_populate(NULL, of_default_bus_match_table,
 				tegra30_auxdata_lookup, NULL);
 }
diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c
index 8c0ff061f8cf..baa0c5b008f1 100644
--- a/arch/arm/mach-tegra/clock.c
+++ b/arch/arm/mach-tegra/clock.c
@@ -31,9 +31,6 @@
 #include "board.h"
 #include "clock.h"
 
-/* Global data of Tegra CPU CAR ops */
-struct tegra_cpu_car_ops *tegra_cpu_car_ops;
-
 /*
  * Locking:
  *
@@ -131,22 +128,6 @@ void tegra_clk_init_from_table(struct tegra_clk_init_table *table)
 		tegra_clk_init_one_from_table(table);
 }
 
-void tegra_periph_reset_deassert(struct clk *c)
-{
-	struct clk_tegra *clk = to_clk_tegra(__clk_get_hw(c));
-	BUG_ON(!clk->reset);
-	clk->reset(__clk_get_hw(c), false);
-}
-EXPORT_SYMBOL(tegra_periph_reset_deassert);
-
-void tegra_periph_reset_assert(struct clk *c)
-{
-	struct clk_tegra *clk = to_clk_tegra(__clk_get_hw(c));
-	BUG_ON(!clk->reset);
-	clk->reset(__clk_get_hw(c), true);
-}
-EXPORT_SYMBOL(tegra_periph_reset_assert);
-
 /* Several extended clock configuration bits (e.g., clock routing, clock
  * phase control) are included in PLL and peripheral clock source
  * registers. */
diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c
index 3efe80b2af28..87dd69ccdf8e 100644
--- a/arch/arm/mach-tegra/common.c
+++ b/arch/arm/mach-tegra/common.c
@@ -22,6 +22,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/of_irq.h>
+#include <linux/clk/tegra.h>
 
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/hardware/gic.h>
@@ -29,7 +30,6 @@
 #include <mach/powergate.h>
 
 #include "board.h"
-#include "clock.h"
 #include "common.h"
 #include "fuse.h"
 #include "iomap.h"
@@ -65,6 +65,7 @@ static const struct of_device_id tegra_dt_irq_match[] __initconst = {
 
 void __init tegra_dt_init_irq(void)
 {
+	tegra_clocks_init();
 	tegra_init_irq();
 	of_irq_init(tegra_dt_irq_match);
 }
@@ -80,43 +81,6 @@ void tegra_assert_system_reset(char mode, const char *cmd)
 	writel_relaxed(reg, reset);
 }
 
-#ifdef CONFIG_ARCH_TEGRA_2x_SOC
-static __initdata struct tegra_clk_init_table tegra20_clk_init_table[] = {
-	/* name		parent		rate		enabled */
-	{ "clk_m",	NULL,		0,		true },
-	{ "pll_p",	"clk_m",	216000000,	true },
-	{ "pll_p_out1",	"pll_p",	28800000,	true },
-	{ "pll_p_out2",	"pll_p",	48000000,	true },
-	{ "pll_p_out3",	"pll_p",	72000000,	true },
-	{ "pll_p_out4",	"pll_p",	24000000,	true },
-	{ "pll_c",	"clk_m",	600000000,	true },
-	{ "pll_c_out1",	"pll_c",	120000000,	true },
-	{ "sclk",	"pll_c_out1",	120000000,	true },
-	{ "hclk",	"sclk",		120000000,	true },
-	{ "pclk",	"hclk",		60000000,	true },
-	{ "csite",	NULL,		0,		true },
-	{ "emc",	NULL,		0,		true },
-	{ "cpu",	NULL,		0,		true },
-	{ NULL,		NULL,		0,		0},
-};
-#endif
-
-#ifdef CONFIG_ARCH_TEGRA_3x_SOC
-static __initdata struct tegra_clk_init_table tegra30_clk_init_table[] = {
-	/* name		parent		rate		enabled */
-	{ "clk_m",	NULL,		0,		true },
-	{ "pll_p",	"pll_ref",	408000000,	true },
-	{ "pll_p_out1",	"pll_p",	9600000,	true },
-	{ "pll_p_out4",	"pll_p",	102000000,	true },
-	{ "sclk",	"pll_p_out4",	102000000,	true },
-	{ "hclk",	"sclk",		102000000,	true },
-	{ "pclk",	"hclk",		51000000,	true },
-	{ "csite",	NULL,		0,		true },
-	{ NULL,		NULL,		0,		0},
-};
-#endif
-
-
 static void __init tegra_init_cache(void)
 {
 #ifdef CONFIG_CACHE_L2X0
@@ -141,8 +105,6 @@ void __init tegra20_init_early(void)
 	tegra_cpu_reset_handler_init();
 	tegra_apb_io_init();
 	tegra_init_fuse();
-	tegra2_init_clocks();
-	tegra_clk_init_from_table(tegra20_clk_init_table);
 	tegra_init_cache();
 	tegra_pmc_init();
 	tegra_powergate_init();
@@ -155,8 +117,6 @@ void __init tegra30_init_early(void)
 	tegra_cpu_reset_handler_init();
 	tegra_apb_io_init();
 	tegra_init_fuse();
-	tegra30_init_clocks();
-	tegra_clk_init_from_table(tegra30_clk_init_table);
 	tegra_init_cache();
 	tegra_pmc_init();
 	tegra_powergate_init();
diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c
index 85d4a23bba03..ebffed67e2f5 100644
--- a/arch/arm/mach-tegra/cpu-tegra.c
+++ b/arch/arm/mach-tegra/cpu-tegra.c
@@ -266,7 +266,7 @@ static int __init tegra_cpufreq_init(void)
 	if (IS_ERR(pll_x_clk))
 		return PTR_ERR(pll_x_clk);
 
-	pll_p_clk = clk_get_sys(NULL, "pll_p");
+	pll_p_clk = clk_get_sys(NULL, "pll_p_cclk");
 	if (IS_ERR(pll_p_clk))
 		return PTR_ERR(pll_p_clk);
 
diff --git a/arch/arm/mach-tegra/include/mach/clk.h b/arch/arm/mach-tegra/include/mach/clk.h
index 95f3a547c770..85bbf10a7d0d 100644
--- a/arch/arm/mach-tegra/include/mach/clk.h
+++ b/arch/arm/mach-tegra/include/mach/clk.h
@@ -31,9 +31,6 @@ enum tegra_clk_ex_param {
 	TEGRA_CLK_PLLD_MIPI_MUX_SEL,
 };
 
-void tegra_periph_reset_deassert(struct clk *c);
-void tegra_periph_reset_assert(struct clk *c);
-
 #ifndef CONFIG_COMMON_CLK
 unsigned long clk_get_rate_all_locked(struct clk *c);
 #endif
diff --git a/arch/arm/mach-tegra/pcie.c b/arch/arm/mach-tegra/pcie.c
index bffcd643d7a3..b60165f1ca02 100644
--- a/arch/arm/mach-tegra/pcie.c
+++ b/arch/arm/mach-tegra/pcie.c
@@ -33,11 +33,11 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/export.h>
+#include <linux/clk/tegra.h>
 
 #include <asm/sizes.h>
 #include <asm/mach/pci.h>
 
-#include <mach/clk.h>
 #include <mach/powergate.h>
 
 #include "board.h"
diff --git a/arch/arm/mach-tegra/powergate.c b/arch/arm/mach-tegra/powergate.c
index 2cc1185d902e..c6bc8f85759c 100644
--- a/arch/arm/mach-tegra/powergate.c
+++ b/arch/arm/mach-tegra/powergate.c
@@ -26,8 +26,8 @@
 #include <linux/io.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
+#include <linux/clk/tegra.h>
 
-#include <mach/clk.h>
 #include <mach/powergate.h>
 
 #include "fuse.h"
diff --git a/drivers/clk/tegra/clk-periph.c b/drivers/clk/tegra/clk-periph.c
index 5978e81b175b..788486e6331a 100644
--- a/drivers/clk/tegra/clk-periph.c
+++ b/drivers/clk/tegra/clk-periph.c
@@ -110,6 +110,44 @@ static void clk_periph_disable(struct clk_hw *hw)
 	gate_ops->disable(gate_hw);
 }
 
+void tegra_periph_reset_deassert(struct clk *c)
+{
+	struct clk_hw *hw = __clk_get_hw(c);
+	struct tegra_clk_periph *periph = to_clk_periph(hw);
+	struct tegra_clk_periph_gate *gate;
+
+	if (periph->magic != TEGRA_CLK_PERIPH_MAGIC) {
+		gate = to_clk_periph_gate(hw);
+		if (gate->magic != TEGRA_CLK_PERIPH_GATE_MAGIC) {
+			WARN_ON(1);
+			return;
+		}
+	} else {
+		gate = &periph->gate;
+	}
+
+	tegra_periph_reset(gate, 0);
+}
+
+void tegra_periph_reset_assert(struct clk *c)
+{
+	struct clk_hw *hw = __clk_get_hw(c);
+	struct tegra_clk_periph *periph = to_clk_periph(hw);
+	struct tegra_clk_periph_gate *gate;
+
+	if (periph->magic != TEGRA_CLK_PERIPH_MAGIC) {
+		gate = to_clk_periph_gate(hw);
+		if (gate->magic != TEGRA_CLK_PERIPH_GATE_MAGIC) {
+			WARN_ON(1);
+			return;
+		}
+	} else {
+		gate = &periph->gate;
+	}
+
+	tegra_periph_reset(gate, 1);
+}
+
 const struct clk_ops tegra_clk_periph_ops = {
 	.get_parent = clk_periph_get_parent,
 	.set_parent = clk_periph_set_parent,
diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c
index cf023a937208..a603b9af0ad3 100644
--- a/drivers/clk/tegra/clk.c
+++ b/drivers/clk/tegra/clk.c
@@ -16,9 +16,14 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/clk/tegra.h>
 
 #include "clk.h"
 
+/* Global data of Tegra CPU CAR ops */
+struct tegra_cpu_car_ops *tegra_cpu_car_ops;
+
 void __init tegra_init_dup_clks(struct tegra_clk_duplicate *dup_list,
 				struct clk *clks[], int clk_max)
 {
@@ -67,3 +72,14 @@ void __init tegra_init_from_table(struct tegra_clk_init_table *tbl,
 			}
 	}
 }
+
+static const struct of_device_id tegra_dt_clk_match[] = {
+	{ .compatible = "nvidia,tegra20-car", .data = tegra20_clock_init },
+	{ .compatible = "nvidia,tegra30-car", .data = tegra30_clock_init },
+	{ }
+};
+
+void __init tegra_clocks_init(void)
+{
+	of_clk_init(tegra_dt_clk_match);
+}
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index c39e61bc8172..afc9b89e20f4 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -31,8 +31,8 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
+#include <linux/clk/tegra.h>
 
-#include <mach/clk.h>
 #include "dmaengine.h"
 
 #define TEGRA_APBDMA_GENERAL			0x0
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 656b2e3334a6..56813f967c8f 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -12,8 +12,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
-
-#include <mach/clk.h>
+#include <linux/clk/tegra.h>
 
 #include "drm.h"
 #include "dc.h"
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 3a503c9e4686..d980dc75788c 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -11,7 +11,6 @@
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 
-#include <mach/clk.h>
 #include <linux/dma-mapping.h>
 #include <asm/dma-iommu.h>
 
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index e060c7e6434d..92ad276cc5e0 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -14,8 +14,7 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
-
-#include <mach/clk.h>
+#include <linux/clk/tegra.h>
 
 #include "hdmi.h"
 #include "drm.h"
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 7b38877ffec1..c7aca35e38fd 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -29,11 +29,10 @@
 #include <linux/of_i2c.h>
 #include <linux/of_device.h>
 #include <linux/module.h>
+#include <linux/clk/tegra.h>
 
 #include <asm/unaligned.h>
 
-#include <mach/clk.h>
-
 #define TEGRA_I2C_TIMEOUT (msecs_to_jiffies(1000))
 #define BYTES_PER_FIFO_WORD 4
 
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index c76f96872d31..54ac1dc7d477 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -30,7 +30,7 @@
 #include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/input/tegra_kbc.h>
-#include <mach/clk.h>
+#include <linux/clk/tegra.h>
 
 #define KBC_MAX_DEBOUNCE_CNT	0x3ffu
 
diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c
index 448a8cc71df3..02feaa51a0fa 100644
--- a/drivers/spi/spi-tegra20-sflash.c
+++ b/drivers/spi/spi-tegra20-sflash.c
@@ -34,7 +34,7 @@
 #include <linux/of_device.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-tegra.h>
-#include <mach/clk.h>
+#include <linux/clk/tegra.h>
 
 #define SPI_COMMAND				0x000
 #define SPI_GO					BIT(30)
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 651167f2e0af..fa208a5cc612 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -35,7 +35,7 @@
 #include <linux/of_device.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-tegra.h>
-#include <mach/clk.h>
+#include <linux/clk/tegra.h>
 
 #define SLINK_COMMAND			0x000
 #define SLINK_BIT_LENGTH(x)		(((x) & 0x1f) << 0)
diff --git a/drivers/staging/nvec/nvec.c b/drivers/staging/nvec/nvec.c
index 2830946860d1..d51615b19797 100644
--- a/drivers/staging/nvec/nvec.c
+++ b/drivers/staging/nvec/nvec.c
@@ -37,8 +37,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
-
-#include <mach/clk.h>
+#include <linux/clk/tegra.h>
 
 #include "nvec.h"
 
diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h
index 0977f2a24757..a7e5a3999099 100644
--- a/include/linux/clk/tegra.h
+++ b/include/linux/clk/tegra.h
@@ -17,6 +17,8 @@
 #ifndef __LINUX_CLK_TEGRA_H_
 #define __LINUX_CLK_TEGRA_H_
 
+#include <linux/clk.h>
+
 /*
  * Tegra CPU clock and reset control ops
  *
@@ -120,5 +122,8 @@ static inline void tegra_cpu_clock_resume(void)
 
 void tegra20_cpu_car_ops_init(void);
 void tegra30_cpu_car_ops_init(void);
+void tegra_periph_reset_deassert(struct clk *c);
+void tegra_periph_reset_assert(struct clk *c);
+void tegra_clocks_init(void);
 
 #endif /* __LINUX_CLK_TEGRA_H_ */
diff --git a/sound/soc/tegra/tegra30_ahub.c b/sound/soc/tegra/tegra30_ahub.c
index f354dc390a0b..bb31c4123a7b 100644
--- a/sound/soc/tegra/tegra30_ahub.c
+++ b/sound/soc/tegra/tegra30_ahub.c
@@ -25,7 +25,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
-#include <mach/clk.h>
+#include <linux/clk/tegra.h>
 #include <sound/soc.h>
 #include "tegra30_ahub.h"
 
-- 
cgit v1.2.3


From 52dec4c9eacc339dbf1b2ab549248df1cc6eb030 Mon Sep 17 00:00:00 2001
From: Prashant Gaikwad <pgaikwad@nvidia.com>
Date: Fri, 11 Jan 2013 13:16:27 +0530
Subject: ARM: tegra: remove legacy clock code

Remove all legacy clock code from mach-tegra.

Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 arch/arm/mach-tegra/Makefile              |    5 -
 arch/arm/mach-tegra/clock.c               |  147 --
 arch/arm/mach-tegra/clock.h               |  153 --
 arch/arm/mach-tegra/include/mach/clk.h    |   41 -
 arch/arm/mach-tegra/tegra20_clocks.c      | 1623 -------------------
 arch/arm/mach-tegra/tegra20_clocks.h      |   42 -
 arch/arm/mach-tegra/tegra20_clocks_data.c | 1143 -------------
 arch/arm/mach-tegra/tegra30_clocks.c      | 2506 -----------------------------
 arch/arm/mach-tegra/tegra30_clocks.h      |   54 -
 arch/arm/mach-tegra/tegra30_clocks_data.c | 1425 ----------------
 include/linux/clk/tegra.h                 |    2 -
 11 files changed, 7141 deletions(-)
 delete mode 100644 arch/arm/mach-tegra/clock.c
 delete mode 100644 arch/arm/mach-tegra/clock.h
 delete mode 100644 arch/arm/mach-tegra/include/mach/clk.h
 delete mode 100644 arch/arm/mach-tegra/tegra20_clocks.c
 delete mode 100644 arch/arm/mach-tegra/tegra20_clocks.h
 delete mode 100644 arch/arm/mach-tegra/tegra20_clocks_data.c
 delete mode 100644 arch/arm/mach-tegra/tegra30_clocks.c
 delete mode 100644 arch/arm/mach-tegra/tegra30_clocks.h
 delete mode 100644 arch/arm/mach-tegra/tegra30_clocks_data.c

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/Makefile b/arch/arm/mach-tegra/Makefile
index f0520961bafe..6018a05e808c 100644
--- a/arch/arm/mach-tegra/Makefile
+++ b/arch/arm/mach-tegra/Makefile
@@ -1,7 +1,6 @@
 obj-y                                   += common.o
 obj-y                                   += io.o
 obj-y                                   += irq.o
-obj-y                                   += clock.o
 obj-y					+= fuse.o
 obj-y					+= pmc.o
 obj-y					+= flowctrl.o
@@ -12,16 +11,12 @@ obj-y					+= reset.o
 obj-y					+= reset-handler.o
 obj-y					+= sleep.o
 obj-$(CONFIG_CPU_IDLE)			+= cpuidle.o
-obj-$(CONFIG_ARCH_TEGRA_2x_SOC)         += tegra20_clocks.o
-obj-$(CONFIG_ARCH_TEGRA_2x_SOC)         += tegra20_clocks_data.o
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= tegra20_speedo.o
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= tegra2_emc.o
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= sleep-tegra20.o
 ifeq ($(CONFIG_CPU_IDLE),y)
 obj-$(CONFIG_ARCH_TEGRA_2x_SOC)		+= cpuidle-tegra20.o
 endif
-obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= tegra30_clocks.o
-obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= tegra30_clocks_data.o
 obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= tegra30_speedo.o
 obj-$(CONFIG_ARCH_TEGRA_3x_SOC)		+= sleep-tegra30.o
 ifeq ($(CONFIG_CPU_IDLE),y)
diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c
deleted file mode 100644
index baa0c5b008f1..000000000000
--- a/arch/arm/mach-tegra/clock.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- *
- * Copyright (C) 2010 Google, Inc.
- * Copyright (c) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * Author:
- *	Colin Cross <ccross@google.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/clkdev.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/clk/tegra.h>
-
-#include "board.h"
-#include "clock.h"
-
-/*
- * Locking:
- *
- * An additional mutex, clock_list_lock, is used to protect the list of all
- * clocks.
- *
- */
-static DEFINE_MUTEX(clock_list_lock);
-static LIST_HEAD(clocks);
-
-void tegra_clk_add(struct clk *clk)
-{
-	struct clk_tegra *c = to_clk_tegra(__clk_get_hw(clk));
-
-	mutex_lock(&clock_list_lock);
-	list_add(&c->node, &clocks);
-	mutex_unlock(&clock_list_lock);
-}
-
-struct clk *tegra_get_clock_by_name(const char *name)
-{
-	struct clk_tegra *c;
-	struct clk *ret = NULL;
-	mutex_lock(&clock_list_lock);
-	list_for_each_entry(c, &clocks, node) {
-		if (strcmp(__clk_get_name(c->hw.clk), name) == 0) {
-			ret = c->hw.clk;
-			break;
-		}
-	}
-	mutex_unlock(&clock_list_lock);
-	return ret;
-}
-
-static int tegra_clk_init_one_from_table(struct tegra_clk_init_table *table)
-{
-	struct clk *c;
-	struct clk *p;
-	struct clk *parent;
-
-	int ret = 0;
-
-	c = tegra_get_clock_by_name(table->name);
-
-	if (!c) {
-		pr_warn("Unable to initialize clock %s\n",
-			table->name);
-		return -ENODEV;
-	}
-
-	parent = clk_get_parent(c);
-
-	if (table->parent) {
-		p = tegra_get_clock_by_name(table->parent);
-		if (!p) {
-			pr_warn("Unable to find parent %s of clock %s\n",
-				table->parent, table->name);
-			return -ENODEV;
-		}
-
-		if (parent != p) {
-			ret = clk_set_parent(c, p);
-			if (ret) {
-				pr_warn("Unable to set parent %s of clock %s: %d\n",
-					table->parent, table->name, ret);
-				return -EINVAL;
-			}
-		}
-	}
-
-	if (table->rate && table->rate != clk_get_rate(c)) {
-		ret = clk_set_rate(c, table->rate);
-		if (ret) {
-			pr_warn("Unable to set clock %s to rate %lu: %d\n",
-				table->name, table->rate, ret);
-			return -EINVAL;
-		}
-	}
-
-	if (table->enabled) {
-		ret = clk_prepare_enable(c);
-		if (ret) {
-			pr_warn("Unable to enable clock %s: %d\n",
-				table->name, ret);
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
-void tegra_clk_init_from_table(struct tegra_clk_init_table *table)
-{
-	for (; table->name; table++)
-		tegra_clk_init_one_from_table(table);
-}
-
-/* Several extended clock configuration bits (e.g., clock routing, clock
- * phase control) are included in PLL and peripheral clock source
- * registers. */
-int tegra_clk_cfg_ex(struct clk *c, enum tegra_clk_ex_param p, u32 setting)
-{
-	int ret = 0;
-	struct clk_tegra *clk = to_clk_tegra(__clk_get_hw(c));
-
-	if (!clk->clk_cfg_ex) {
-		ret = -ENOSYS;
-		goto out;
-	}
-	ret = clk->clk_cfg_ex(__clk_get_hw(c), p, setting);
-
-out:
-	return ret;
-}
diff --git a/arch/arm/mach-tegra/clock.h b/arch/arm/mach-tegra/clock.h
deleted file mode 100644
index 2aa37f5c44c0..000000000000
--- a/arch/arm/mach-tegra/clock.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * arch/arm/mach-tegra/include/mach/clock.h
- *
- * Copyright (C) 2010 Google, Inc.
- * Copyright (c) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * Author:
- *	Colin Cross <ccross@google.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __MACH_TEGRA_CLOCK_H
-#define __MACH_TEGRA_CLOCK_H
-
-#include <linux/clk-provider.h>
-#include <linux/clkdev.h>
-#include <linux/list.h>
-
-#include <mach/clk.h>
-
-#define DIV_BUS			(1 << 0)
-#define DIV_U71			(1 << 1)
-#define DIV_U71_FIXED		(1 << 2)
-#define DIV_2			(1 << 3)
-#define DIV_U16			(1 << 4)
-#define PLL_FIXED		(1 << 5)
-#define PLL_HAS_CPCON		(1 << 6)
-#define MUX			(1 << 7)
-#define PLLD			(1 << 8)
-#define PERIPH_NO_RESET		(1 << 9)
-#define PERIPH_NO_ENB		(1 << 10)
-#define PERIPH_EMC_ENB		(1 << 11)
-#define PERIPH_MANUAL_RESET	(1 << 12)
-#define PLL_ALT_MISC_REG	(1 << 13)
-#define PLLU			(1 << 14)
-#define PLLX                    (1 << 15)
-#define MUX_PWM                 (1 << 16)
-#define MUX8                    (1 << 17)
-#define DIV_U71_UART            (1 << 18)
-#define MUX_CLK_OUT             (1 << 19)
-#define PLLM                    (1 << 20)
-#define DIV_U71_INT             (1 << 21)
-#define DIV_U71_IDLE            (1 << 22)
-#define ENABLE_ON_INIT		(1 << 28)
-#define PERIPH_ON_APB           (1 << 29)
-
-struct clk_tegra;
-#define to_clk_tegra(_hw) container_of(_hw, struct clk_tegra, hw)
-
-struct clk_mux_sel {
-	struct clk	*input;
-	u32		value;
-};
-
-struct clk_pll_freq_table {
-	unsigned long	input_rate;
-	unsigned long	output_rate;
-	u16		n;
-	u16		m;
-	u8		p;
-	u8		cpcon;
-};
-
-enum clk_state {
-	UNINITIALIZED = 0,
-	ON,
-	OFF,
-};
-
-struct clk_tegra {
-	/* node for master clocks list */
-	struct list_head	node;	/* node for list of all clocks */
-	struct clk_lookup	lookup;
-	struct clk_hw		hw;
-
-	bool			set;
-	unsigned long		fixed_rate;
-	unsigned long		max_rate;
-	unsigned long		min_rate;
-	u32			flags;
-	const char		*name;
-
-	enum clk_state		state;
-	u32			div;
-	u32			mul;
-
-	u32				reg;
-	u32				reg_shift;
-
-	struct list_head		shared_bus_list;
-
-	union {
-		struct {
-			unsigned int			clk_num;
-		} periph;
-		struct {
-			unsigned long			input_min;
-			unsigned long			input_max;
-			unsigned long			cf_min;
-			unsigned long			cf_max;
-			unsigned long			vco_min;
-			unsigned long			vco_max;
-			const struct clk_pll_freq_table	*freq_table;
-			int				lock_delay;
-			unsigned long			fixed_rate;
-		} pll;
-		struct {
-			u32				sel;
-			u32				reg_mask;
-		} mux;
-		struct {
-			struct clk			*main;
-			struct clk			*backup;
-		} cpu;
-		struct {
-			struct list_head		node;
-			bool				enabled;
-			unsigned long			rate;
-		} shared_bus_user;
-	} u;
-
-	void (*reset)(struct clk_hw *, bool);
-	int (*clk_cfg_ex)(struct clk_hw *, enum tegra_clk_ex_param, u32);
-};
-
-struct clk_duplicate {
-	const char *name;
-	struct clk_lookup lookup;
-};
-
-struct tegra_clk_init_table {
-	const char *name;
-	const char *parent;
-	unsigned long rate;
-	bool enabled;
-};
-
-void tegra_clk_add(struct clk *c);
-void tegra2_init_clocks(void);
-void tegra30_init_clocks(void);
-struct clk *tegra_get_clock_by_name(const char *name);
-void tegra_clk_init_from_table(struct tegra_clk_init_table *table);
-
-#endif
diff --git a/arch/arm/mach-tegra/include/mach/clk.h b/arch/arm/mach-tegra/include/mach/clk.h
deleted file mode 100644
index 85bbf10a7d0d..000000000000
--- a/arch/arm/mach-tegra/include/mach/clk.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * arch/arm/mach-tegra/include/mach/clk.h
- *
- * Copyright (C) 2010 Google, Inc.
- *
- * Author:
- *	Erik Gilling <konkers@google.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __MACH_CLK_H
-#define __MACH_CLK_H
-
-struct clk;
-
-enum tegra_clk_ex_param {
-	TEGRA_CLK_VI_INP_SEL,
-	TEGRA_CLK_DTV_INVERT,
-	TEGRA_CLK_NAND_PAD_DIV2_ENB,
-	TEGRA_CLK_PLLD_CSI_OUT_ENB,
-	TEGRA_CLK_PLLD_DSI_OUT_ENB,
-	TEGRA_CLK_PLLD_MIPI_MUX_SEL,
-};
-
-#ifndef CONFIG_COMMON_CLK
-unsigned long clk_get_rate_all_locked(struct clk *c);
-#endif
-
-void tegra2_sdmmc_tap_delay(struct clk *c, int delay);
-int tegra_clk_cfg_ex(struct clk *c, enum tegra_clk_ex_param p, u32 setting);
-
-#endif
diff --git a/arch/arm/mach-tegra/tegra20_clocks.c b/arch/arm/mach-tegra/tegra20_clocks.c
deleted file mode 100644
index 1a80ff65e5fc..000000000000
--- a/arch/arm/mach-tegra/tegra20_clocks.c
+++ /dev/null
@@ -1,1623 +0,0 @@
-/*
- * arch/arm/mach-tegra/tegra20_clocks.c
- *
- * Copyright (C) 2010 Google, Inc.
- * Copyright (c) 2010-2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * Author:
- *	Colin Cross <ccross@google.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/clkdev.h>
-#include <linux/clk.h>
-#include <linux/clk/tegra.h>
-
-#include "clock.h"
-#include "fuse.h"
-#include "iomap.h"
-#include "tegra2_emc.h"
-
-#define RST_DEVICES			0x004
-#define RST_DEVICES_SET			0x300
-#define RST_DEVICES_CLR			0x304
-#define RST_DEVICES_NUM			3
-
-#define CLK_OUT_ENB			0x010
-#define CLK_OUT_ENB_SET			0x320
-#define CLK_OUT_ENB_CLR			0x324
-#define CLK_OUT_ENB_NUM			3
-
-#define CLK_MASK_ARM			0x44
-#define MISC_CLK_ENB			0x48
-
-#define OSC_CTRL			0x50
-#define OSC_CTRL_OSC_FREQ_MASK		(3<<30)
-#define OSC_CTRL_OSC_FREQ_13MHZ		(0<<30)
-#define OSC_CTRL_OSC_FREQ_19_2MHZ	(1<<30)
-#define OSC_CTRL_OSC_FREQ_12MHZ		(2<<30)
-#define OSC_CTRL_OSC_FREQ_26MHZ		(3<<30)
-#define OSC_CTRL_MASK			(0x3f2 | OSC_CTRL_OSC_FREQ_MASK)
-
-#define OSC_FREQ_DET			0x58
-#define OSC_FREQ_DET_TRIG		(1<<31)
-
-#define OSC_FREQ_DET_STATUS		0x5C
-#define OSC_FREQ_DET_BUSY		(1<<31)
-#define OSC_FREQ_DET_CNT_MASK		0xFFFF
-
-#define PERIPH_CLK_SOURCE_I2S1		0x100
-#define PERIPH_CLK_SOURCE_EMC		0x19c
-#define PERIPH_CLK_SOURCE_OSC		0x1fc
-#define PERIPH_CLK_SOURCE_NUM \
-	((PERIPH_CLK_SOURCE_OSC - PERIPH_CLK_SOURCE_I2S1) / 4)
-
-#define PERIPH_CLK_SOURCE_MASK		(3<<30)
-#define PERIPH_CLK_SOURCE_SHIFT		30
-#define PERIPH_CLK_SOURCE_PWM_MASK	(7<<28)
-#define PERIPH_CLK_SOURCE_PWM_SHIFT	28
-#define PERIPH_CLK_SOURCE_ENABLE	(1<<28)
-#define PERIPH_CLK_SOURCE_DIVU71_MASK	0xFF
-#define PERIPH_CLK_SOURCE_DIVU16_MASK	0xFFFF
-#define PERIPH_CLK_SOURCE_DIV_SHIFT	0
-
-#define SDMMC_CLK_INT_FB_SEL		(1 << 23)
-#define SDMMC_CLK_INT_FB_DLY_SHIFT	16
-#define SDMMC_CLK_INT_FB_DLY_MASK	(0xF << SDMMC_CLK_INT_FB_DLY_SHIFT)
-
-#define PLL_BASE			0x0
-#define PLL_BASE_BYPASS			(1<<31)
-#define PLL_BASE_ENABLE			(1<<30)
-#define PLL_BASE_REF_ENABLE		(1<<29)
-#define PLL_BASE_OVERRIDE		(1<<28)
-#define PLL_BASE_DIVP_MASK		(0x7<<20)
-#define PLL_BASE_DIVP_SHIFT		20
-#define PLL_BASE_DIVN_MASK		(0x3FF<<8)
-#define PLL_BASE_DIVN_SHIFT		8
-#define PLL_BASE_DIVM_MASK		(0x1F)
-#define PLL_BASE_DIVM_SHIFT		0
-
-#define PLL_OUT_RATIO_MASK		(0xFF<<8)
-#define PLL_OUT_RATIO_SHIFT		8
-#define PLL_OUT_OVERRIDE		(1<<2)
-#define PLL_OUT_CLKEN			(1<<1)
-#define PLL_OUT_RESET_DISABLE		(1<<0)
-
-#define PLL_MISC(c) (((c)->flags & PLL_ALT_MISC_REG) ? 0x4 : 0xc)
-
-#define PLL_MISC_DCCON_SHIFT		20
-#define PLL_MISC_CPCON_SHIFT		8
-#define PLL_MISC_CPCON_MASK		(0xF<<PLL_MISC_CPCON_SHIFT)
-#define PLL_MISC_LFCON_SHIFT		4
-#define PLL_MISC_LFCON_MASK		(0xF<<PLL_MISC_LFCON_SHIFT)
-#define PLL_MISC_VCOCON_SHIFT		0
-#define PLL_MISC_VCOCON_MASK		(0xF<<PLL_MISC_VCOCON_SHIFT)
-
-#define PLLU_BASE_POST_DIV		(1<<20)
-
-#define PLLD_MISC_CLKENABLE		(1<<30)
-#define PLLD_MISC_DIV_RST		(1<<23)
-#define PLLD_MISC_DCCON_SHIFT		12
-
-#define PLLE_MISC_READY			(1 << 15)
-
-#define PERIPH_CLK_TO_ENB_REG(c)	((c->u.periph.clk_num / 32) * 4)
-#define PERIPH_CLK_TO_ENB_SET_REG(c)	((c->u.periph.clk_num / 32) * 8)
-#define PERIPH_CLK_TO_ENB_BIT(c)	(1 << (c->u.periph.clk_num % 32))
-
-#define SUPER_CLK_MUX			0x00
-#define SUPER_STATE_SHIFT		28
-#define SUPER_STATE_MASK		(0xF << SUPER_STATE_SHIFT)
-#define SUPER_STATE_STANDBY		(0x0 << SUPER_STATE_SHIFT)
-#define SUPER_STATE_IDLE		(0x1 << SUPER_STATE_SHIFT)
-#define SUPER_STATE_RUN			(0x2 << SUPER_STATE_SHIFT)
-#define SUPER_STATE_IRQ			(0x3 << SUPER_STATE_SHIFT)
-#define SUPER_STATE_FIQ			(0x4 << SUPER_STATE_SHIFT)
-#define SUPER_SOURCE_MASK		0xF
-#define	SUPER_FIQ_SOURCE_SHIFT		12
-#define	SUPER_IRQ_SOURCE_SHIFT		8
-#define	SUPER_RUN_SOURCE_SHIFT		4
-#define	SUPER_IDLE_SOURCE_SHIFT		0
-
-#define SUPER_CLK_DIVIDER		0x04
-
-#define BUS_CLK_DISABLE			(1<<3)
-#define BUS_CLK_DIV_MASK		0x3
-
-#define PMC_CTRL			0x0
- #define PMC_CTRL_BLINK_ENB		(1 << 7)
-
-#define PMC_DPD_PADS_ORIDE		0x1c
- #define PMC_DPD_PADS_ORIDE_BLINK_ENB	(1 << 20)
-
-#define PMC_BLINK_TIMER_DATA_ON_SHIFT	0
-#define PMC_BLINK_TIMER_DATA_ON_MASK	0x7fff
-#define PMC_BLINK_TIMER_ENB		(1 << 15)
-#define PMC_BLINK_TIMER_DATA_OFF_SHIFT	16
-#define PMC_BLINK_TIMER_DATA_OFF_MASK	0xffff
-
-/* Tegra CPU clock and reset control regs */
-#define TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX		0x4c
-#define TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_SET	0x340
-#define TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_CLR	0x344
-
-#define CPU_CLOCK(cpu)	(0x1 << (8 + cpu))
-#define CPU_RESET(cpu)	(0x1111ul << (cpu))
-
-static void __iomem *reg_clk_base = IO_ADDRESS(TEGRA_CLK_RESET_BASE);
-static void __iomem *reg_pmc_base = IO_ADDRESS(TEGRA_PMC_BASE);
-
-/*
- * Some clocks share a register with other clocks.  Any clock op that
- * non-atomically modifies a register used by another clock must lock
- * clock_register_lock first.
- */
-static DEFINE_SPINLOCK(clock_register_lock);
-
-/*
- * Some peripheral clocks share an enable bit, so refcount the enable bits
- * in registers CLK_ENABLE_L, CLK_ENABLE_H, and CLK_ENABLE_U
- */
-static int tegra_periph_clk_enable_refcount[3 * 32];
-
-#define clk_writel(value, reg) \
-	__raw_writel(value, reg_clk_base + (reg))
-#define clk_readl(reg) \
-	__raw_readl(reg_clk_base + (reg))
-#define pmc_writel(value, reg) \
-	__raw_writel(value, reg_pmc_base + (reg))
-#define pmc_readl(reg) \
-	__raw_readl(reg_pmc_base + (reg))
-
-static unsigned long clk_measure_input_freq(void)
-{
-	u32 clock_autodetect;
-	clk_writel(OSC_FREQ_DET_TRIG | 1, OSC_FREQ_DET);
-	do {} while (clk_readl(OSC_FREQ_DET_STATUS) & OSC_FREQ_DET_BUSY);
-	clock_autodetect = clk_readl(OSC_FREQ_DET_STATUS);
-	if (clock_autodetect >= 732 - 3 && clock_autodetect <= 732 + 3) {
-		return 12000000;
-	} else if (clock_autodetect >= 794 - 3 && clock_autodetect <= 794 + 3) {
-		return 13000000;
-	} else if (clock_autodetect >= 1172 - 3 && clock_autodetect <= 1172 + 3) {
-		return 19200000;
-	} else if (clock_autodetect >= 1587 - 3 && clock_autodetect <= 1587 + 3) {
-		return 26000000;
-	} else {
-		pr_err("%s: Unexpected clock autodetect value %d",
-						__func__, clock_autodetect);
-		BUG();
-		return 0;
-	}
-}
-
-static int clk_div71_get_divider(unsigned long parent_rate, unsigned long rate)
-{
-	s64 divider_u71 = parent_rate * 2;
-	divider_u71 += rate - 1;
-	do_div(divider_u71, rate);
-
-	if (divider_u71 - 2 < 0)
-		return 0;
-
-	if (divider_u71 - 2 > 255)
-		return -EINVAL;
-
-	return divider_u71 - 2;
-}
-
-static int clk_div16_get_divider(unsigned long parent_rate, unsigned long rate)
-{
-	s64 divider_u16;
-
-	divider_u16 = parent_rate;
-	divider_u16 += rate - 1;
-	do_div(divider_u16, rate);
-
-	if (divider_u16 - 1 < 0)
-		return 0;
-
-	if (divider_u16 - 1 > 0xFFFF)
-		return -EINVAL;
-
-	return divider_u16 - 1;
-}
-
-static unsigned long tegra_clk_fixed_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	return to_clk_tegra(hw)->fixed_rate;
-}
-
-struct clk_ops tegra_clk_32k_ops = {
-	.recalc_rate = tegra_clk_fixed_recalc_rate,
-};
-
-/* clk_m functions */
-static unsigned long tegra20_clk_m_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	if (!to_clk_tegra(hw)->fixed_rate)
-		to_clk_tegra(hw)->fixed_rate = clk_measure_input_freq();
-	return to_clk_tegra(hw)->fixed_rate;
-}
-
-static void tegra20_clk_m_init(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 osc_ctrl = clk_readl(OSC_CTRL);
-	u32 auto_clock_control = osc_ctrl & ~OSC_CTRL_OSC_FREQ_MASK;
-
-	switch (c->fixed_rate) {
-	case 12000000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_12MHZ;
-		break;
-	case 13000000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_13MHZ;
-		break;
-	case 19200000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_19_2MHZ;
-		break;
-	case 26000000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_26MHZ;
-		break;
-	default:
-		BUG();
-	}
-	clk_writel(auto_clock_control, OSC_CTRL);
-}
-
-struct clk_ops tegra_clk_m_ops = {
-	.init = tegra20_clk_m_init,
-	.recalc_rate = tegra20_clk_m_recalc_rate,
-};
-
-/* super clock functions */
-/* "super clocks" on tegra have two-stage muxes and a clock skipping
- * super divider.  We will ignore the clock skipping divider, since we
- * can't lower the voltage when using the clock skip, but we can if we
- * lower the PLL frequency.
- */
-static int tegra20_super_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = clk_readl(c->reg + SUPER_CLK_MUX);
-	BUG_ON(((val & SUPER_STATE_MASK) != SUPER_STATE_RUN) &&
-		((val & SUPER_STATE_MASK) != SUPER_STATE_IDLE));
-	c->state = ON;
-	return c->state;
-}
-
-static int tegra20_super_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	clk_writel(0, c->reg + SUPER_CLK_DIVIDER);
-	return 0;
-}
-
-static void tegra20_super_clk_disable(struct clk_hw *hw)
-{
-	pr_debug("%s on clock %s\n", __func__, __clk_get_name(hw->clk));
-
-	/* oops - don't disable the CPU clock! */
-	BUG();
-}
-
-static u8 tegra20_super_clk_get_parent(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	int val = clk_readl(c->reg + SUPER_CLK_MUX);
-	int source;
-	int shift;
-
-	BUG_ON(((val & SUPER_STATE_MASK) != SUPER_STATE_RUN) &&
-		((val & SUPER_STATE_MASK) != SUPER_STATE_IDLE));
-	shift = ((val & SUPER_STATE_MASK) == SUPER_STATE_IDLE) ?
-		SUPER_IDLE_SOURCE_SHIFT : SUPER_RUN_SOURCE_SHIFT;
-	source = (val >> shift) & SUPER_SOURCE_MASK;
-	return source;
-}
-
-static int tegra20_super_clk_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg + SUPER_CLK_MUX);
-	int shift;
-
-	BUG_ON(((val & SUPER_STATE_MASK) != SUPER_STATE_RUN) &&
-		((val & SUPER_STATE_MASK) != SUPER_STATE_IDLE));
-	shift = ((val & SUPER_STATE_MASK) == SUPER_STATE_IDLE) ?
-		SUPER_IDLE_SOURCE_SHIFT : SUPER_RUN_SOURCE_SHIFT;
-	val &= ~(SUPER_SOURCE_MASK << shift);
-	val |= index << shift;
-
-	clk_writel(val, c->reg);
-
-	return 0;
-}
-
-/* FIX ME: Need to switch parents to change the source PLL rate */
-static unsigned long tegra20_super_clk_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	return prate;
-}
-
-static long tegra20_super_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	return *prate;
-}
-
-static int tegra20_super_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	return 0;
-}
-
-struct clk_ops tegra_super_ops = {
-	.is_enabled = tegra20_super_clk_is_enabled,
-	.enable = tegra20_super_clk_enable,
-	.disable = tegra20_super_clk_disable,
-	.set_parent = tegra20_super_clk_set_parent,
-	.get_parent = tegra20_super_clk_get_parent,
-	.set_rate = tegra20_super_clk_set_rate,
-	.round_rate = tegra20_super_clk_round_rate,
-	.recalc_rate = tegra20_super_clk_recalc_rate,
-};
-
-static unsigned long tegra20_twd_clk_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-struct clk_ops tegra_twd_ops = {
-	.recalc_rate = tegra20_twd_clk_recalc_rate,
-};
-
-static u8 tegra20_cop_clk_get_parent(struct clk_hw *hw)
-{
-	return 0;
-}
-
-struct clk_ops tegra_cop_ops = {
-	.get_parent = tegra20_cop_clk_get_parent,
-};
-
-/* virtual cop clock functions. Used to acquire the fake 'cop' clock to
- * reset the COP block (i.e. AVP) */
-void tegra2_cop_clk_reset(struct clk_hw *hw, bool assert)
-{
-	unsigned long reg = assert ? RST_DEVICES_SET : RST_DEVICES_CLR;
-
-	pr_debug("%s %s\n", __func__, assert ? "assert" : "deassert");
-	clk_writel(1 << 1, reg);
-}
-
-/* bus clock functions */
-static int tegra20_bus_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-
-	c->state = ((val >> c->reg_shift) & BUS_CLK_DISABLE) ? OFF : ON;
-	return c->state;
-}
-
-static int tegra20_bus_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long flags;
-	u32 val;
-
-	spin_lock_irqsave(&clock_register_lock, flags);
-
-	val = clk_readl(c->reg);
-	val &= ~(BUS_CLK_DISABLE << c->reg_shift);
-	clk_writel(val, c->reg);
-
-	spin_unlock_irqrestore(&clock_register_lock, flags);
-
-	return 0;
-}
-
-static void tegra20_bus_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long flags;
-	u32 val;
-
-	spin_lock_irqsave(&clock_register_lock, flags);
-
-	val = clk_readl(c->reg);
-	val |= BUS_CLK_DISABLE << c->reg_shift;
-	clk_writel(val, c->reg);
-
-	spin_unlock_irqrestore(&clock_register_lock, flags);
-}
-
-static unsigned long tegra20_bus_clk_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	u64 rate = prate;
-
-	c->div = ((val >> c->reg_shift) & BUS_CLK_DIV_MASK) + 1;
-	c->mul = 1;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-	return rate;
-}
-
-static int tegra20_bus_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	int ret = -EINVAL;
-	unsigned long flags;
-	u32 val;
-	int i;
-
-	spin_lock_irqsave(&clock_register_lock, flags);
-
-	val = clk_readl(c->reg);
-	for (i = 1; i <= 4; i++) {
-		if (rate == parent_rate / i) {
-			val &= ~(BUS_CLK_DIV_MASK << c->reg_shift);
-			val |= (i - 1) << c->reg_shift;
-			clk_writel(val, c->reg);
-			c->div = i;
-			c->mul = 1;
-			ret = 0;
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&clock_register_lock, flags);
-
-	return ret;
-}
-
-static long tegra20_bus_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	unsigned long parent_rate = *prate;
-	s64 divider;
-
-	if (rate >= parent_rate)
-		return rate;
-
-	divider = parent_rate;
-	divider += rate - 1;
-	do_div(divider, rate);
-
-	if (divider < 0)
-		return divider;
-
-	if (divider > 4)
-		divider = 4;
-	do_div(parent_rate, divider);
-
-	return parent_rate;
-}
-
-struct clk_ops tegra_bus_ops = {
-	.is_enabled = tegra20_bus_clk_is_enabled,
-	.enable = tegra20_bus_clk_enable,
-	.disable = tegra20_bus_clk_disable,
-	.set_rate = tegra20_bus_clk_set_rate,
-	.round_rate = tegra20_bus_clk_round_rate,
-	.recalc_rate = tegra20_bus_clk_recalc_rate,
-};
-
-/* Blink output functions */
-static int tegra20_blink_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = pmc_readl(PMC_CTRL);
-	c->state = (val & PMC_CTRL_BLINK_ENB) ? ON : OFF;
-	return c->state;
-}
-
-static unsigned long tegra20_blink_clk_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = prate;
-	u32 val;
-
-	c->mul = 1;
-	val = pmc_readl(c->reg);
-
-	if (val & PMC_BLINK_TIMER_ENB) {
-		unsigned int on_off;
-
-		on_off = (val >> PMC_BLINK_TIMER_DATA_ON_SHIFT) &
-			PMC_BLINK_TIMER_DATA_ON_MASK;
-		val >>= PMC_BLINK_TIMER_DATA_OFF_SHIFT;
-		val &= PMC_BLINK_TIMER_DATA_OFF_MASK;
-		on_off += val;
-		/* each tick in the blink timer is 4 32KHz clocks */
-		c->div = on_off * 4;
-	} else {
-		c->div = 1;
-	}
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-	return rate;
-}
-
-static int tegra20_blink_clk_enable(struct clk_hw *hw)
-{
-	u32 val;
-
-	val = pmc_readl(PMC_DPD_PADS_ORIDE);
-	pmc_writel(val | PMC_DPD_PADS_ORIDE_BLINK_ENB, PMC_DPD_PADS_ORIDE);
-
-	val = pmc_readl(PMC_CTRL);
-	pmc_writel(val | PMC_CTRL_BLINK_ENB, PMC_CTRL);
-
-	return 0;
-}
-
-static void tegra20_blink_clk_disable(struct clk_hw *hw)
-{
-	u32 val;
-
-	val = pmc_readl(PMC_CTRL);
-	pmc_writel(val & ~PMC_CTRL_BLINK_ENB, PMC_CTRL);
-
-	val = pmc_readl(PMC_DPD_PADS_ORIDE);
-	pmc_writel(val & ~PMC_DPD_PADS_ORIDE_BLINK_ENB, PMC_DPD_PADS_ORIDE);
-}
-
-static int tegra20_blink_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	if (rate >= parent_rate) {
-		c->div = 1;
-		pmc_writel(0, c->reg);
-	} else {
-		unsigned int on_off;
-		u32 val;
-
-		on_off = DIV_ROUND_UP(parent_rate / 8, rate);
-		c->div = on_off * 8;
-
-		val = (on_off & PMC_BLINK_TIMER_DATA_ON_MASK) <<
-			PMC_BLINK_TIMER_DATA_ON_SHIFT;
-		on_off &= PMC_BLINK_TIMER_DATA_OFF_MASK;
-		on_off <<= PMC_BLINK_TIMER_DATA_OFF_SHIFT;
-		val |= on_off;
-		val |= PMC_BLINK_TIMER_ENB;
-		pmc_writel(val, c->reg);
-	}
-
-	return 0;
-}
-
-static long tegra20_blink_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	int div;
-	int mul;
-	long round_rate = *prate;
-
-	mul = 1;
-
-	if (rate >= *prate) {
-		div = 1;
-	} else {
-		div = DIV_ROUND_UP(*prate / 8, rate);
-		div *= 8;
-	}
-
-	round_rate *= mul;
-	round_rate += div - 1;
-	do_div(round_rate, div);
-
-	return round_rate;
-}
-
-struct clk_ops tegra_blink_clk_ops = {
-	.is_enabled = tegra20_blink_clk_is_enabled,
-	.enable = tegra20_blink_clk_enable,
-	.disable = tegra20_blink_clk_disable,
-	.set_rate = tegra20_blink_clk_set_rate,
-	.round_rate = tegra20_blink_clk_round_rate,
-	.recalc_rate = tegra20_blink_clk_recalc_rate,
-};
-
-/* PLL Functions */
-static int tegra20_pll_clk_wait_for_lock(struct clk_tegra *c)
-{
-	udelay(c->u.pll.lock_delay);
-	return 0;
-}
-
-static int tegra20_pll_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg + PLL_BASE);
-
-	c->state = (val & PLL_BASE_ENABLE) ? ON : OFF;
-	return c->state;
-}
-
-static unsigned long tegra20_pll_clk_recalc_rate(struct clk_hw *hw,
-				unsigned long prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg + PLL_BASE);
-	u64 rate = prate;
-
-	if (c->flags & PLL_FIXED && !(val & PLL_BASE_OVERRIDE)) {
-		const struct clk_pll_freq_table *sel;
-		for (sel = c->u.pll.freq_table; sel->input_rate != 0; sel++) {
-			if (sel->input_rate == prate &&
-				sel->output_rate == c->u.pll.fixed_rate) {
-				c->mul = sel->n;
-				c->div = sel->m * sel->p;
-				break;
-			}
-		}
-		pr_err("Clock %s has unknown fixed frequency\n",
-			__clk_get_name(hw->clk));
-		BUG();
-	} else if (val & PLL_BASE_BYPASS) {
-		c->mul = 1;
-		c->div = 1;
-	} else {
-		c->mul = (val & PLL_BASE_DIVN_MASK) >> PLL_BASE_DIVN_SHIFT;
-		c->div = (val & PLL_BASE_DIVM_MASK) >> PLL_BASE_DIVM_SHIFT;
-		if (c->flags & PLLU)
-			c->div *= (val & PLLU_BASE_POST_DIV) ? 1 : 2;
-		else
-			c->div *= (val & PLL_BASE_DIVP_MASK) ? 2 : 1;
-	}
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-	return rate;
-}
-
-static int tegra20_pll_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	pr_debug("%s on clock %s\n", __func__, __clk_get_name(hw->clk));
-
-	val = clk_readl(c->reg + PLL_BASE);
-	val &= ~PLL_BASE_BYPASS;
-	val |= PLL_BASE_ENABLE;
-	clk_writel(val, c->reg + PLL_BASE);
-
-	tegra20_pll_clk_wait_for_lock(c);
-
-	return 0;
-}
-
-static void tegra20_pll_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	pr_debug("%s on clock %s\n", __func__, __clk_get_name(hw->clk));
-
-	val = clk_readl(c->reg);
-	val &= ~(PLL_BASE_BYPASS | PLL_BASE_ENABLE);
-	clk_writel(val, c->reg);
-}
-
-static int tegra20_pll_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long input_rate = parent_rate;
-	const struct clk_pll_freq_table *sel;
-	u32 val;
-
-	pr_debug("%s: %s %lu\n", __func__, __clk_get_name(hw->clk), rate);
-
-	if (c->flags & PLL_FIXED) {
-		int ret = 0;
-		if (rate != c->u.pll.fixed_rate) {
-			pr_err("%s: Can not change %s fixed rate %lu to %lu\n",
-				__func__, __clk_get_name(hw->clk),
-				c->u.pll.fixed_rate, rate);
-			ret = -EINVAL;
-		}
-		return ret;
-	}
-
-	for (sel = c->u.pll.freq_table; sel->input_rate != 0; sel++) {
-		if (sel->input_rate == input_rate && sel->output_rate == rate) {
-			c->mul = sel->n;
-			c->div = sel->m * sel->p;
-
-			val = clk_readl(c->reg + PLL_BASE);
-			if (c->flags & PLL_FIXED)
-				val |= PLL_BASE_OVERRIDE;
-			val &= ~(PLL_BASE_DIVP_MASK | PLL_BASE_DIVN_MASK |
-				 PLL_BASE_DIVM_MASK);
-			val |= (sel->m << PLL_BASE_DIVM_SHIFT) |
-				(sel->n << PLL_BASE_DIVN_SHIFT);
-			BUG_ON(sel->p < 1 || sel->p > 2);
-			if (c->flags & PLLU) {
-				if (sel->p == 1)
-					val |= PLLU_BASE_POST_DIV;
-			} else {
-				if (sel->p == 2)
-					val |= 1 << PLL_BASE_DIVP_SHIFT;
-			}
-			clk_writel(val, c->reg + PLL_BASE);
-
-			if (c->flags & PLL_HAS_CPCON) {
-				val = clk_readl(c->reg + PLL_MISC(c));
-				val &= ~PLL_MISC_CPCON_MASK;
-				val |= sel->cpcon << PLL_MISC_CPCON_SHIFT;
-				clk_writel(val, c->reg + PLL_MISC(c));
-			}
-
-			if (c->state == ON)
-				tegra20_pll_clk_enable(hw);
-			return 0;
-		}
-	}
-	return -EINVAL;
-}
-
-static long tegra20_pll_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	const struct clk_pll_freq_table *sel;
-	unsigned long input_rate = *prate;
-	u64 output_rate = *prate;
-	int mul;
-	int div;
-
-	if (c->flags & PLL_FIXED)
-		return c->u.pll.fixed_rate;
-
-	for (sel = c->u.pll.freq_table; sel->input_rate != 0; sel++)
-		if (sel->input_rate == input_rate && sel->output_rate == rate) {
-			mul = sel->n;
-			div = sel->m * sel->p;
-			break;
-		}
-
-	if (sel->input_rate == 0)
-		return -EINVAL;
-
-	output_rate *= mul;
-	output_rate += div - 1; /* round up */
-	do_div(output_rate, div);
-
-	return output_rate;
-}
-
-struct clk_ops tegra_pll_ops = {
-	.is_enabled = tegra20_pll_clk_is_enabled,
-	.enable = tegra20_pll_clk_enable,
-	.disable = tegra20_pll_clk_disable,
-	.set_rate = tegra20_pll_clk_set_rate,
-	.recalc_rate = tegra20_pll_clk_recalc_rate,
-	.round_rate = tegra20_pll_clk_round_rate,
-};
-
-static void tegra20_pllx_clk_init(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	if (tegra_sku_id == 7)
-		c->max_rate = 750000000;
-}
-
-struct clk_ops tegra_pllx_ops = {
-	.init = tegra20_pllx_clk_init,
-	.is_enabled = tegra20_pll_clk_is_enabled,
-	.enable = tegra20_pll_clk_enable,
-	.disable = tegra20_pll_clk_disable,
-	.set_rate = tegra20_pll_clk_set_rate,
-	.recalc_rate = tegra20_pll_clk_recalc_rate,
-	.round_rate = tegra20_pll_clk_round_rate,
-};
-
-static int tegra20_plle_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	pr_debug("%s on clock %s\n", __func__, __clk_get_name(hw->clk));
-
-	mdelay(1);
-
-	val = clk_readl(c->reg + PLL_BASE);
-	if (!(val & PLLE_MISC_READY))
-		return -EBUSY;
-
-	val = clk_readl(c->reg + PLL_BASE);
-	val |= PLL_BASE_ENABLE | PLL_BASE_BYPASS;
-	clk_writel(val, c->reg + PLL_BASE);
-
-	return 0;
-}
-
-struct clk_ops tegra_plle_ops = {
-	.is_enabled = tegra20_pll_clk_is_enabled,
-	.enable = tegra20_plle_clk_enable,
-	.set_rate = tegra20_pll_clk_set_rate,
-	.recalc_rate = tegra20_pll_clk_recalc_rate,
-	.round_rate = tegra20_pll_clk_round_rate,
-};
-
-/* Clock divider ops */
-static int tegra20_pll_div_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-
-	val >>= c->reg_shift;
-	c->state = (val & PLL_OUT_CLKEN) ? ON : OFF;
-	if (!(val & PLL_OUT_RESET_DISABLE))
-		c->state = OFF;
-	return c->state;
-}
-
-static unsigned long tegra20_pll_div_clk_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = prate;
-	u32 val = clk_readl(c->reg);
-	u32 divu71;
-
-	val >>= c->reg_shift;
-
-	if (c->flags & DIV_U71) {
-		divu71 = (val & PLL_OUT_RATIO_MASK) >> PLL_OUT_RATIO_SHIFT;
-		c->div = (divu71 + 2);
-		c->mul = 2;
-	} else if (c->flags & DIV_2) {
-		c->div = 2;
-		c->mul = 1;
-	} else {
-		c->div = 1;
-		c->mul = 1;
-	}
-
-	rate *= c->mul;
-	rate += c->div - 1; /* round up */
-	do_div(rate, c->div);
-
-	return rate;
-}
-
-static int tegra20_pll_div_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long flags;
-	u32 new_val;
-	u32 val;
-
-	pr_debug("%s: %s\n", __func__, __clk_get_name(hw->clk));
-
-	if (c->flags & DIV_U71) {
-		spin_lock_irqsave(&clock_register_lock, flags);
-		val = clk_readl(c->reg);
-		new_val = val >> c->reg_shift;
-		new_val &= 0xFFFF;
-
-		new_val |= PLL_OUT_CLKEN | PLL_OUT_RESET_DISABLE;
-
-		val &= ~(0xFFFF << c->reg_shift);
-		val |= new_val << c->reg_shift;
-		clk_writel(val, c->reg);
-		spin_unlock_irqrestore(&clock_register_lock, flags);
-		return 0;
-	} else if (c->flags & DIV_2) {
-		BUG_ON(!(c->flags & PLLD));
-		spin_lock_irqsave(&clock_register_lock, flags);
-		val = clk_readl(c->reg);
-		val &= ~PLLD_MISC_DIV_RST;
-		clk_writel(val, c->reg);
-		spin_unlock_irqrestore(&clock_register_lock, flags);
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static void tegra20_pll_div_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long flags;
-	u32 new_val;
-	u32 val;
-
-	pr_debug("%s: %s\n", __func__, __clk_get_name(hw->clk));
-
-	if (c->flags & DIV_U71) {
-		spin_lock_irqsave(&clock_register_lock, flags);
-		val = clk_readl(c->reg);
-		new_val = val >> c->reg_shift;
-		new_val &= 0xFFFF;
-
-		new_val &= ~(PLL_OUT_CLKEN | PLL_OUT_RESET_DISABLE);
-
-		val &= ~(0xFFFF << c->reg_shift);
-		val |= new_val << c->reg_shift;
-		clk_writel(val, c->reg);
-		spin_unlock_irqrestore(&clock_register_lock, flags);
-	} else if (c->flags & DIV_2) {
-		BUG_ON(!(c->flags & PLLD));
-		spin_lock_irqsave(&clock_register_lock, flags);
-		val = clk_readl(c->reg);
-		val |= PLLD_MISC_DIV_RST;
-		clk_writel(val, c->reg);
-		spin_unlock_irqrestore(&clock_register_lock, flags);
-	}
-}
-
-static int tegra20_pll_div_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long flags;
-	int divider_u71;
-	u32 new_val;
-	u32 val;
-
-	pr_debug("%s: %s %lu\n", __func__, __clk_get_name(hw->clk), rate);
-
-	if (c->flags & DIV_U71) {
-		divider_u71 = clk_div71_get_divider(parent_rate, rate);
-		if (divider_u71 >= 0) {
-			spin_lock_irqsave(&clock_register_lock, flags);
-			val = clk_readl(c->reg);
-			new_val = val >> c->reg_shift;
-			new_val &= 0xFFFF;
-			if (c->flags & DIV_U71_FIXED)
-				new_val |= PLL_OUT_OVERRIDE;
-			new_val &= ~PLL_OUT_RATIO_MASK;
-			new_val |= divider_u71 << PLL_OUT_RATIO_SHIFT;
-
-			val &= ~(0xFFFF << c->reg_shift);
-			val |= new_val << c->reg_shift;
-			clk_writel(val, c->reg);
-			c->div = divider_u71 + 2;
-			c->mul = 2;
-			spin_unlock_irqrestore(&clock_register_lock, flags);
-			return 0;
-		}
-	} else if (c->flags & DIV_2) {
-		if (parent_rate == rate * 2)
-			return 0;
-	}
-	return -EINVAL;
-}
-
-static long tegra20_pll_div_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long parent_rate = *prate;
-	int divider;
-
-	pr_debug("%s: %s %lu\n", __func__, __clk_get_name(hw->clk), rate);
-
-	if (c->flags & DIV_U71) {
-		divider = clk_div71_get_divider(parent_rate, rate);
-		if (divider < 0)
-			return divider;
-		return DIV_ROUND_UP(parent_rate * 2, divider + 2);
-	} else if (c->flags & DIV_2) {
-		return DIV_ROUND_UP(parent_rate, 2);
-	}
-	return -EINVAL;
-}
-
-struct clk_ops tegra_pll_div_ops = {
-	.is_enabled = tegra20_pll_div_clk_is_enabled,
-	.enable = tegra20_pll_div_clk_enable,
-	.disable = tegra20_pll_div_clk_disable,
-	.set_rate = tegra20_pll_div_clk_set_rate,
-	.round_rate = tegra20_pll_div_clk_round_rate,
-	.recalc_rate = tegra20_pll_div_clk_recalc_rate,
-};
-
-/* Periph clk ops */
-
-static int tegra20_periph_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	c->state = ON;
-
-	if (!c->u.periph.clk_num)
-		goto out;
-
-	if (!(clk_readl(CLK_OUT_ENB + PERIPH_CLK_TO_ENB_REG(c)) &
-			PERIPH_CLK_TO_ENB_BIT(c)))
-		c->state = OFF;
-
-	if (!(c->flags & PERIPH_NO_RESET))
-		if (clk_readl(RST_DEVICES + PERIPH_CLK_TO_ENB_REG(c)) &
-				PERIPH_CLK_TO_ENB_BIT(c))
-			c->state = OFF;
-
-out:
-	return c->state;
-}
-
-static int tegra20_periph_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long flags;
-	u32 val;
-
-	pr_debug("%s on clock %s\n", __func__, __clk_get_name(hw->clk));
-
-	if (!c->u.periph.clk_num)
-		return 0;
-
-	tegra_periph_clk_enable_refcount[c->u.periph.clk_num]++;
-	if (tegra_periph_clk_enable_refcount[c->u.periph.clk_num] > 1)
-		return 0;
-
-	spin_lock_irqsave(&clock_register_lock, flags);
-
-	clk_writel(PERIPH_CLK_TO_ENB_BIT(c),
-		CLK_OUT_ENB_SET + PERIPH_CLK_TO_ENB_SET_REG(c));
-	if (!(c->flags & PERIPH_NO_RESET) && !(c->flags & PERIPH_MANUAL_RESET))
-		clk_writel(PERIPH_CLK_TO_ENB_BIT(c),
-			RST_DEVICES_CLR + PERIPH_CLK_TO_ENB_SET_REG(c));
-	if (c->flags & PERIPH_EMC_ENB) {
-		/* The EMC peripheral clock has 2 extra enable bits */
-		/* FIXME: Do they need to be disabled? */
-		val = clk_readl(c->reg);
-		val |= 0x3 << 24;
-		clk_writel(val, c->reg);
-	}
-
-	spin_unlock_irqrestore(&clock_register_lock, flags);
-
-	return 0;
-}
-
-static void tegra20_periph_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long flags;
-
-	pr_debug("%s on clock %s\n", __func__, __clk_get_name(hw->clk));
-
-	if (!c->u.periph.clk_num)
-		return;
-
-	tegra_periph_clk_enable_refcount[c->u.periph.clk_num]--;
-
-	if (tegra_periph_clk_enable_refcount[c->u.periph.clk_num] > 0)
-		return;
-
-	spin_lock_irqsave(&clock_register_lock, flags);
-
-	clk_writel(PERIPH_CLK_TO_ENB_BIT(c),
-		CLK_OUT_ENB_CLR + PERIPH_CLK_TO_ENB_SET_REG(c));
-
-	spin_unlock_irqrestore(&clock_register_lock, flags);
-}
-
-void tegra2_periph_clk_reset(struct clk_hw *hw, bool assert)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long base = assert ? RST_DEVICES_SET : RST_DEVICES_CLR;
-
-	pr_debug("%s %s on clock %s\n", __func__,
-		assert ? "assert" : "deassert", __clk_get_name(hw->clk));
-
-	BUG_ON(!c->u.periph.clk_num);
-
-	if (!(c->flags & PERIPH_NO_RESET))
-		clk_writel(PERIPH_CLK_TO_ENB_BIT(c),
-			   base + PERIPH_CLK_TO_ENB_SET_REG(c));
-}
-
-static int tegra20_periph_clk_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	u32 mask;
-	u32 shift;
-
-	pr_debug("%s: %s %d\n", __func__, __clk_get_name(hw->clk), index);
-
-	if (c->flags & MUX_PWM) {
-		shift = PERIPH_CLK_SOURCE_PWM_SHIFT;
-		mask = PERIPH_CLK_SOURCE_PWM_MASK;
-	} else {
-		shift = PERIPH_CLK_SOURCE_SHIFT;
-		mask = PERIPH_CLK_SOURCE_MASK;
-	}
-
-	val = clk_readl(c->reg);
-	val &= ~mask;
-	val |= (index) << shift;
-
-	clk_writel(val, c->reg);
-
-	return 0;
-}
-
-static u8 tegra20_periph_clk_get_parent(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	u32 mask;
-	u32 shift;
-
-	if (c->flags & MUX_PWM) {
-		shift = PERIPH_CLK_SOURCE_PWM_SHIFT;
-		mask = PERIPH_CLK_SOURCE_PWM_MASK;
-	} else {
-		shift = PERIPH_CLK_SOURCE_SHIFT;
-		mask = PERIPH_CLK_SOURCE_MASK;
-	}
-
-	if (c->flags & MUX)
-		return (val & mask) >> shift;
-	else
-		return 0;
-}
-
-static unsigned long tegra20_periph_clk_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long rate = prate;
-	u32 val = clk_readl(c->reg);
-
-	if (c->flags & DIV_U71) {
-		u32 divu71 = val & PERIPH_CLK_SOURCE_DIVU71_MASK;
-		c->div = divu71 + 2;
-		c->mul = 2;
-	} else if (c->flags & DIV_U16) {
-		u32 divu16 = val & PERIPH_CLK_SOURCE_DIVU16_MASK;
-		c->div = divu16 + 1;
-		c->mul = 1;
-	} else {
-		c->div = 1;
-		c->mul = 1;
-		return rate;
-	}
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-static int tegra20_periph_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	int divider;
-
-	val = clk_readl(c->reg);
-
-	if (c->flags & DIV_U71) {
-		divider = clk_div71_get_divider(parent_rate, rate);
-
-		if (divider >= 0) {
-			val = clk_readl(c->reg);
-			val &= ~PERIPH_CLK_SOURCE_DIVU71_MASK;
-			val |= divider;
-			clk_writel(val, c->reg);
-			c->div = divider + 2;
-			c->mul = 2;
-			return 0;
-		}
-	} else if (c->flags & DIV_U16) {
-		divider = clk_div16_get_divider(parent_rate, rate);
-		if (divider >= 0) {
-			val = clk_readl(c->reg);
-			val &= ~PERIPH_CLK_SOURCE_DIVU16_MASK;
-			val |= divider;
-			clk_writel(val, c->reg);
-			c->div = divider + 1;
-			c->mul = 1;
-			return 0;
-		}
-	} else if (parent_rate <= rate) {
-		c->div = 1;
-		c->mul = 1;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static long tegra20_periph_clk_round_rate(struct clk_hw *hw,
-	unsigned long rate, unsigned long *prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long parent_rate = __clk_get_rate(__clk_get_parent(hw->clk));
-	int divider;
-
-	pr_debug("%s: %s %lu\n", __func__, __clk_get_name(hw->clk), rate);
-
-	if (prate)
-		parent_rate = *prate;
-
-	if (c->flags & DIV_U71) {
-		divider = clk_div71_get_divider(parent_rate, rate);
-		if (divider < 0)
-			return divider;
-
-		return DIV_ROUND_UP(parent_rate * 2, divider + 2);
-	} else if (c->flags & DIV_U16) {
-		divider = clk_div16_get_divider(parent_rate, rate);
-		if (divider < 0)
-			return divider;
-		return DIV_ROUND_UP(parent_rate, divider + 1);
-	}
-	return -EINVAL;
-}
-
-struct clk_ops tegra_periph_clk_ops = {
-	.is_enabled = tegra20_periph_clk_is_enabled,
-	.enable = tegra20_periph_clk_enable,
-	.disable = tegra20_periph_clk_disable,
-	.set_parent = tegra20_periph_clk_set_parent,
-	.get_parent = tegra20_periph_clk_get_parent,
-	.set_rate = tegra20_periph_clk_set_rate,
-	.round_rate = tegra20_periph_clk_round_rate,
-	.recalc_rate = tegra20_periph_clk_recalc_rate,
-};
-
-/* External memory controller clock ops */
-static void tegra20_emc_clk_init(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	c->max_rate = __clk_get_rate(hw->clk);
-}
-
-static long tegra20_emc_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	long emc_rate;
-	long clk_rate;
-
-	/*
-	 * The slowest entry in the EMC clock table that is at least as
-	 * fast as rate.
-	 */
-	emc_rate = tegra_emc_round_rate(rate);
-	if (emc_rate < 0)
-		return c->max_rate;
-
-	/*
-	 * The fastest rate the PLL will generate that is at most the
-	 * requested rate.
-	 */
-	clk_rate = tegra20_periph_clk_round_rate(hw, emc_rate, NULL);
-
-	/*
-	 * If this fails, and emc_rate > clk_rate, it's because the maximum
-	 * rate in the EMC tables is larger than the maximum rate of the EMC
-	 * clock. The EMC clock's max rate is the rate it was running when the
-	 * kernel booted. Such a mismatch is probably due to using the wrong
-	 * BCT, i.e. using a Tegra20 BCT with an EMC table written for Tegra25.
-	 */
-	WARN_ONCE(emc_rate != clk_rate,
-		"emc_rate %ld != clk_rate %ld",
-		emc_rate, clk_rate);
-
-	return emc_rate;
-}
-
-static int tegra20_emc_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	int ret;
-
-	/*
-	 * The Tegra2 memory controller has an interlock with the clock
-	 * block that allows memory shadowed registers to be updated,
-	 * and then transfer them to the main registers at the same
-	 * time as the clock update without glitches.
-	 */
-	ret = tegra_emc_set_rate(rate);
-	if (ret < 0)
-		return ret;
-
-	ret = tegra20_periph_clk_set_rate(hw, rate, parent_rate);
-	udelay(1);
-
-	return ret;
-}
-
-struct clk_ops tegra_emc_clk_ops = {
-	.init = tegra20_emc_clk_init,
-	.is_enabled = tegra20_periph_clk_is_enabled,
-	.enable = tegra20_periph_clk_enable,
-	.disable = tegra20_periph_clk_disable,
-	.set_parent = tegra20_periph_clk_set_parent,
-	.get_parent = tegra20_periph_clk_get_parent,
-	.set_rate = tegra20_emc_clk_set_rate,
-	.round_rate = tegra20_emc_clk_round_rate,
-	.recalc_rate = tegra20_periph_clk_recalc_rate,
-};
-
-/* Clock doubler ops */
-static int tegra20_clk_double_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	c->state = ON;
-
-	if (!c->u.periph.clk_num)
-		goto out;
-
-	if (!(clk_readl(CLK_OUT_ENB + PERIPH_CLK_TO_ENB_REG(c)) &
-			PERIPH_CLK_TO_ENB_BIT(c)))
-		c->state = OFF;
-
-out:
-	return c->state;
-};
-
-static unsigned long tegra20_clk_double_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = prate;
-
-	c->mul = 2;
-	c->div = 1;
-
-	rate *= c->mul;
-	rate += c->div - 1; /* round up */
-	do_div(rate, c->div);
-
-	return rate;
-}
-
-static long tegra20_clk_double_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	unsigned long output_rate = *prate;
-
-	do_div(output_rate, 2);
-	return output_rate;
-}
-
-static int tegra20_clk_double_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	if (rate != 2 * parent_rate)
-		return -EINVAL;
-	return 0;
-}
-
-struct clk_ops tegra_clk_double_ops = {
-	.is_enabled = tegra20_clk_double_is_enabled,
-	.enable = tegra20_periph_clk_enable,
-	.disable = tegra20_periph_clk_disable,
-	.set_rate = tegra20_clk_double_set_rate,
-	.recalc_rate = tegra20_clk_double_recalc_rate,
-	.round_rate = tegra20_clk_double_round_rate,
-};
-
-/* Audio sync clock ops */
-static int tegra20_audio_sync_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-
-	c->state = (val & (1<<4)) ? OFF : ON;
-	return c->state;
-}
-
-static int tegra20_audio_sync_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	clk_writel(0, c->reg);
-	return 0;
-}
-
-static void tegra20_audio_sync_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	clk_writel(1, c->reg);
-}
-
-static u8 tegra20_audio_sync_clk_get_parent(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	int source;
-
-	source = val & 0xf;
-	return source;
-}
-
-static int tegra20_audio_sync_clk_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = clk_readl(c->reg);
-	val &= ~0xf;
-	val |= index;
-
-	clk_writel(val, c->reg);
-
-	return 0;
-}
-
-struct clk_ops tegra_audio_sync_clk_ops = {
-	.is_enabled = tegra20_audio_sync_clk_is_enabled,
-	.enable = tegra20_audio_sync_clk_enable,
-	.disable = tegra20_audio_sync_clk_disable,
-	.set_parent = tegra20_audio_sync_clk_set_parent,
-	.get_parent = tegra20_audio_sync_clk_get_parent,
-};
-
-/* cdev1 and cdev2 (dap_mclk1 and dap_mclk2) ops */
-
-static int tegra20_cdev_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	/* We could un-tristate the cdev1 or cdev2 pingroup here; this is
-	 * currently done in the pinmux code. */
-	c->state = ON;
-
-	BUG_ON(!c->u.periph.clk_num);
-
-	if (!(clk_readl(CLK_OUT_ENB + PERIPH_CLK_TO_ENB_REG(c)) &
-			PERIPH_CLK_TO_ENB_BIT(c)))
-		c->state = OFF;
-	return c->state;
-}
-
-static int tegra20_cdev_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	BUG_ON(!c->u.periph.clk_num);
-
-	clk_writel(PERIPH_CLK_TO_ENB_BIT(c),
-		CLK_OUT_ENB_SET + PERIPH_CLK_TO_ENB_SET_REG(c));
-	return 0;
-}
-
-static void tegra20_cdev_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	BUG_ON(!c->u.periph.clk_num);
-
-	clk_writel(PERIPH_CLK_TO_ENB_BIT(c),
-		CLK_OUT_ENB_CLR + PERIPH_CLK_TO_ENB_SET_REG(c));
-}
-
-static unsigned long tegra20_cdev_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	return to_clk_tegra(hw)->fixed_rate;
-}
-
-struct clk_ops tegra_cdev_clk_ops = {
-	.is_enabled = tegra20_cdev_clk_is_enabled,
-	.enable = tegra20_cdev_clk_enable,
-	.disable = tegra20_cdev_clk_disable,
-	.recalc_rate = tegra20_cdev_recalc_rate,
-};
-
-/* Tegra20 CPU clock and reset control functions */
-static void tegra20_wait_cpu_in_reset(u32 cpu)
-{
-	unsigned int reg;
-
-	do {
-		reg = readl(reg_clk_base +
-			    TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_SET);
-		cpu_relax();
-	} while (!(reg & (1 << cpu)));	/* check CPU been reset or not */
-
-	return;
-}
-
-static void tegra20_put_cpu_in_reset(u32 cpu)
-{
-	writel(CPU_RESET(cpu),
-	       reg_clk_base + TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_SET);
-	dmb();
-}
-
-static void tegra20_cpu_out_of_reset(u32 cpu)
-{
-	writel(CPU_RESET(cpu),
-	       reg_clk_base + TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_CLR);
-	wmb();
-}
-
-static void tegra20_enable_cpu_clock(u32 cpu)
-{
-	unsigned int reg;
-
-	reg = readl(reg_clk_base + TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
-	writel(reg & ~CPU_CLOCK(cpu),
-	       reg_clk_base + TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
-	barrier();
-	reg = readl(reg_clk_base + TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
-}
-
-static void tegra20_disable_cpu_clock(u32 cpu)
-{
-	unsigned int reg;
-
-	reg = readl(reg_clk_base + TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
-	writel(reg | CPU_CLOCK(cpu),
-	       reg_clk_base + TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
-}
-
-static struct tegra_cpu_car_ops tegra20_cpu_car_ops = {
-	.wait_for_reset	= tegra20_wait_cpu_in_reset,
-	.put_in_reset	= tegra20_put_cpu_in_reset,
-	.out_of_reset	= tegra20_cpu_out_of_reset,
-	.enable_clock	= tegra20_enable_cpu_clock,
-	.disable_clock	= tegra20_disable_cpu_clock,
-};
-
-void __init tegra20_cpu_car_ops_init(void)
-{
-	tegra_cpu_car_ops = &tegra20_cpu_car_ops;
-}
diff --git a/arch/arm/mach-tegra/tegra20_clocks.h b/arch/arm/mach-tegra/tegra20_clocks.h
deleted file mode 100644
index 8bfd31bcc490..000000000000
--- a/arch/arm/mach-tegra/tegra20_clocks.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __MACH_TEGRA20_CLOCK_H
-#define __MACH_TEGRA20_CLOCK_H
-
-extern struct clk_ops tegra_clk_32k_ops;
-extern struct clk_ops tegra_pll_ops;
-extern struct clk_ops tegra_clk_m_ops;
-extern struct clk_ops tegra_pll_div_ops;
-extern struct clk_ops tegra_pllx_ops;
-extern struct clk_ops tegra_plle_ops;
-extern struct clk_ops tegra_clk_double_ops;
-extern struct clk_ops tegra_cdev_clk_ops;
-extern struct clk_ops tegra_audio_sync_clk_ops;
-extern struct clk_ops tegra_super_ops;
-extern struct clk_ops tegra_cpu_ops;
-extern struct clk_ops tegra_twd_ops;
-extern struct clk_ops tegra_cop_ops;
-extern struct clk_ops tegra_bus_ops;
-extern struct clk_ops tegra_blink_clk_ops;
-extern struct clk_ops tegra_emc_clk_ops;
-extern struct clk_ops tegra_periph_clk_ops;
-extern struct clk_ops tegra_clk_shared_bus_ops;
-
-void tegra2_periph_clk_reset(struct clk_hw *hw, bool assert);
-void tegra2_cop_clk_reset(struct clk_hw *hw, bool assert);
-
-#endif
diff --git a/arch/arm/mach-tegra/tegra20_clocks_data.c b/arch/arm/mach-tegra/tegra20_clocks_data.c
deleted file mode 100644
index 022cdaef7ca5..000000000000
--- a/arch/arm/mach-tegra/tegra20_clocks_data.c
+++ /dev/null
@@ -1,1143 +0,0 @@
-/*
- * arch/arm/mach-tegra/tegra2_clocks.c
- *
- * Copyright (C) 2010 Google, Inc.
- * Copyright (c) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * Author:
- *	Colin Cross <ccross@google.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/clk-private.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/clk/tegra.h>
-
-#include "clock.h"
-#include "fuse.h"
-#include "tegra2_emc.h"
-#include "tegra20_clocks.h"
-
-/* Clock definitions */
-
-#define DEFINE_CLK_TEGRA(_name, _rate, _ops, _flags,		\
-		   _parent_names, _parents, _parent)		\
-	static struct clk tegra_##_name = {			\
-		.hw = &tegra_##_name##_hw.hw,			\
-		.name = #_name,					\
-		.rate = _rate,					\
-		.ops = _ops,					\
-		.flags = _flags,				\
-		.parent_names = _parent_names,			\
-		.parents = _parents,				\
-		.num_parents = ARRAY_SIZE(_parent_names),	\
-		.parent = _parent,				\
-	};
-
-static struct clk tegra_clk_32k;
-static struct clk_tegra tegra_clk_32k_hw = {
-	.hw = {
-		.clk = &tegra_clk_32k,
-	},
-	.fixed_rate = 32768,
-};
-
-static struct clk tegra_clk_32k = {
-	.name = "clk_32k",
-	.rate = 32768,
-	.ops = &tegra_clk_32k_ops,
-	.hw = &tegra_clk_32k_hw.hw,
-	.flags = CLK_IS_ROOT,
-};
-
-static struct clk tegra_clk_m;
-static struct clk_tegra tegra_clk_m_hw = {
-	.hw = {
-		.clk = &tegra_clk_m,
-	},
-	.flags = ENABLE_ON_INIT,
-	.reg = 0x1fc,
-	.reg_shift = 28,
-	.max_rate = 26000000,
-	.fixed_rate = 0,
-};
-
-static struct clk tegra_clk_m = {
-	.name = "clk_m",
-	.ops = &tegra_clk_m_ops,
-	.hw = &tegra_clk_m_hw.hw,
-	.flags = CLK_IS_ROOT,
-};
-
-#define DEFINE_PLL(_name, _flags, _reg, _max_rate, _input_min,	\
-		   _input_max, _cf_min, _cf_max, _vco_min,	\
-		   _vco_max, _freq_table, _lock_delay, _ops,	\
-		   _fixed_rate, _parent)			\
-	static const char *tegra_##_name##_parent_names[] = {	\
-		#_parent,					\
-	};							\
-	static struct clk *tegra_##_name##_parents[] = {	\
-		&tegra_##_parent,				\
-	};							\
-	static struct clk tegra_##_name;			\
-	static struct clk_tegra tegra_##_name##_hw = {		\
-		.hw = {						\
-			.clk = &tegra_##_name,			\
-		},						\
-		.flags = _flags,				\
-		.reg = _reg,					\
-		.max_rate = _max_rate,				\
-		.u.pll = {					\
-			.input_min = _input_min,		\
-			.input_max = _input_max,		\
-			.cf_min = _cf_min,			\
-			.cf_max = _cf_max,			\
-			.vco_min = _vco_min,			\
-			.vco_max = _vco_max,			\
-			.freq_table = _freq_table,		\
-			.lock_delay = _lock_delay,		\
-			.fixed_rate = _fixed_rate,		\
-		},						\
-	};							\
-	static struct clk tegra_##_name = {			\
-		.name = #_name,					\
-		.ops = &_ops,					\
-		.hw = &tegra_##_name##_hw.hw,			\
-		.parent = &tegra_##_parent,			\
-		.parent_names = tegra_##_name##_parent_names,	\
-		.parents = tegra_##_name##_parents,		\
-		.num_parents = 1,				\
-	};
-
-#define DEFINE_PLL_OUT(_name, _flags, _reg, _reg_shift,		\
-		_max_rate, _ops, _parent, _clk_flags)		\
-	static const char *tegra_##_name##_parent_names[] = {	\
-		#_parent,					\
-	};							\
-	static struct clk *tegra_##_name##_parents[] = {	\
-		&tegra_##_parent,				\
-	};							\
-	static struct clk tegra_##_name;			\
-	static struct clk_tegra tegra_##_name##_hw = {		\
-		.hw = {						\
-			.clk = &tegra_##_name,			\
-		},						\
-		.flags = _flags,				\
-		.reg = _reg,					\
-		.max_rate = _max_rate,				\
-		.reg_shift = _reg_shift,			\
-	};							\
-	static struct clk tegra_##_name = {			\
-		.name = #_name,					\
-		.ops = &tegra_pll_div_ops,			\
-		.hw = &tegra_##_name##_hw.hw,			\
-		.parent = &tegra_##_parent,			\
-		.parent_names = tegra_##_name##_parent_names,	\
-		.parents = tegra_##_name##_parents,		\
-		.num_parents = 1,				\
-		.flags = _clk_flags,				\
-	};
-
-
-static struct clk_pll_freq_table tegra_pll_s_freq_table[] = {
-	{32768, 12000000, 366, 1, 1, 0},
-	{32768, 13000000, 397, 1, 1, 0},
-	{32768, 19200000, 586, 1, 1, 0},
-	{32768, 26000000, 793, 1, 1, 0},
-	{0, 0, 0, 0, 0, 0},
-};
-
-DEFINE_PLL(pll_s, PLL_ALT_MISC_REG, 0xf0, 26000000, 32768, 32768, 0,
-		0, 12000000, 26000000, tegra_pll_s_freq_table, 300,
-		tegra_pll_ops, 0, clk_32k);
-
-static struct clk_pll_freq_table tegra_pll_c_freq_table[] = {
-	{ 12000000, 600000000, 600, 12, 1, 8 },
-	{ 13000000, 600000000, 600, 13, 1, 8 },
-	{ 19200000, 600000000, 500, 16, 1, 6 },
-	{ 26000000, 600000000, 600, 26, 1, 8 },
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_c, PLL_HAS_CPCON, 0x80, 600000000, 2000000, 31000000, 1000000,
-		6000000, 20000000, 1400000000, tegra_pll_c_freq_table, 300,
-		tegra_pll_ops, 0, clk_m);
-
-DEFINE_PLL_OUT(pll_c_out1, DIV_U71, 0x84, 0, 600000000,
-		tegra_pll_div_ops, pll_c, 0);
-
-static struct clk_pll_freq_table tegra_pll_m_freq_table[] = {
-	{ 12000000, 666000000, 666, 12, 1, 8},
-	{ 13000000, 666000000, 666, 13, 1, 8},
-	{ 19200000, 666000000, 555, 16, 1, 8},
-	{ 26000000, 666000000, 666, 26, 1, 8},
-	{ 12000000, 600000000, 600, 12, 1, 8},
-	{ 13000000, 600000000, 600, 13, 1, 8},
-	{ 19200000, 600000000, 375, 12, 1, 6},
-	{ 26000000, 600000000, 600, 26, 1, 8},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_m, PLL_HAS_CPCON, 0x90, 800000000, 2000000, 31000000, 1000000,
-		6000000, 20000000, 1200000000, tegra_pll_m_freq_table, 300,
-		tegra_pll_ops, 0, clk_m);
-
-DEFINE_PLL_OUT(pll_m_out1, DIV_U71, 0x94, 0, 600000000,
-		tegra_pll_div_ops, pll_m, 0);
-
-static struct clk_pll_freq_table tegra_pll_p_freq_table[] = {
-	{ 12000000, 216000000, 432, 12, 2, 8},
-	{ 13000000, 216000000, 432, 13, 2, 8},
-	{ 19200000, 216000000, 90,   4, 2, 1},
-	{ 26000000, 216000000, 432, 26, 2, 8},
-	{ 12000000, 432000000, 432, 12, 1, 8},
-	{ 13000000, 432000000, 432, 13, 1, 8},
-	{ 19200000, 432000000, 90,   4, 1, 1},
-	{ 26000000, 432000000, 432, 26, 1, 8},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-
-DEFINE_PLL(pll_p, ENABLE_ON_INIT | PLL_FIXED | PLL_HAS_CPCON, 0xa0, 432000000,
-		2000000, 31000000, 1000000, 6000000, 20000000, 1400000000,
-		tegra_pll_p_freq_table, 300, tegra_pll_ops, 216000000, clk_m);
-
-DEFINE_PLL_OUT(pll_p_out1, ENABLE_ON_INIT | DIV_U71 | DIV_U71_FIXED, 0xa4, 0,
-		432000000, tegra_pll_div_ops, pll_p, 0);
-DEFINE_PLL_OUT(pll_p_out2, ENABLE_ON_INIT | DIV_U71 | DIV_U71_FIXED, 0xa4, 16,
-		432000000, tegra_pll_div_ops, pll_p, 0);
-DEFINE_PLL_OUT(pll_p_out3, ENABLE_ON_INIT | DIV_U71 | DIV_U71_FIXED, 0xa8, 0,
-		432000000, tegra_pll_div_ops, pll_p, 0);
-DEFINE_PLL_OUT(pll_p_out4, ENABLE_ON_INIT | DIV_U71 | DIV_U71_FIXED, 0xa8, 16,
-		432000000, tegra_pll_div_ops, pll_p, 0);
-
-static struct clk_pll_freq_table tegra_pll_a_freq_table[] = {
-	{ 28800000, 56448000, 49, 25, 1, 1},
-	{ 28800000, 73728000, 64, 25, 1, 1},
-	{ 28800000, 24000000,  5,  6, 1, 1},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_a, PLL_HAS_CPCON, 0xb0, 73728000, 2000000, 31000000, 1000000,
-		6000000, 20000000, 1400000000, tegra_pll_a_freq_table, 300,
-		tegra_pll_ops, 0, pll_p_out1);
-
-DEFINE_PLL_OUT(pll_a_out0, DIV_U71, 0xb4, 0, 73728000,
-		tegra_pll_div_ops, pll_a, 0);
-
-static struct clk_pll_freq_table tegra_pll_d_freq_table[] = {
-	{ 12000000, 216000000, 216, 12, 1, 4},
-	{ 13000000, 216000000, 216, 13, 1, 4},
-	{ 19200000, 216000000, 135, 12, 1, 3},
-	{ 26000000, 216000000, 216, 26, 1, 4},
-
-	{ 12000000, 297000000,  99,  4, 1, 4 },
-	{ 12000000, 339000000, 113,  4, 1, 4 },
-
-	{ 12000000, 594000000, 594, 12, 1, 8},
-	{ 13000000, 594000000, 594, 13, 1, 8},
-	{ 19200000, 594000000, 495, 16, 1, 8},
-	{ 26000000, 594000000, 594, 26, 1, 8},
-
-	{ 12000000, 616000000, 616, 12, 1, 8},
-
-	{ 12000000, 1000000000, 1000, 12, 1, 12},
-	{ 13000000, 1000000000, 1000, 13, 1, 12},
-	{ 19200000, 1000000000, 625,  12, 1, 8},
-	{ 26000000, 1000000000, 1000, 26, 1, 12},
-
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_d, PLL_HAS_CPCON | PLLD, 0xd0, 1000000000, 2000000, 40000000,
-		1000000, 6000000, 40000000, 1000000000, tegra_pll_d_freq_table,
-		1000, tegra_pll_ops, 0, clk_m);
-
-DEFINE_PLL_OUT(pll_d_out0, DIV_2 | PLLD, 0, 0, 500000000,
-		tegra_pll_div_ops, pll_d, CLK_SET_RATE_PARENT);
-
-static struct clk_pll_freq_table tegra_pll_u_freq_table[] = {
-	{ 12000000, 480000000, 960, 12, 2, 0},
-	{ 13000000, 480000000, 960, 13, 2, 0},
-	{ 19200000, 480000000, 200, 4,  2, 0},
-	{ 26000000, 480000000, 960, 26, 2, 0},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_u, PLLU, 0xc0, 480000000, 2000000, 40000000, 1000000, 6000000,
-		48000000, 960000000, tegra_pll_u_freq_table, 1000,
-		tegra_pll_ops, 0, clk_m);
-
-static struct clk_pll_freq_table tegra_pll_x_freq_table[] = {
-	/* 1 GHz */
-	{ 12000000, 1000000000, 1000, 12, 1, 12},
-	{ 13000000, 1000000000, 1000, 13, 1, 12},
-	{ 19200000, 1000000000, 625,  12, 1, 8},
-	{ 26000000, 1000000000, 1000, 26, 1, 12},
-
-	/* 912 MHz */
-	{ 12000000, 912000000,  912,  12, 1, 12},
-	{ 13000000, 912000000,  912,  13, 1, 12},
-	{ 19200000, 912000000,  760,  16, 1, 8},
-	{ 26000000, 912000000,  912,  26, 1, 12},
-
-	/* 816 MHz */
-	{ 12000000, 816000000,  816,  12, 1, 12},
-	{ 13000000, 816000000,  816,  13, 1, 12},
-	{ 19200000, 816000000,  680,  16, 1, 8},
-	{ 26000000, 816000000,  816,  26, 1, 12},
-
-	/* 760 MHz */
-	{ 12000000, 760000000,  760,  12, 1, 12},
-	{ 13000000, 760000000,  760,  13, 1, 12},
-	{ 19200000, 760000000,  950,  24, 1, 8},
-	{ 26000000, 760000000,  760,  26, 1, 12},
-
-	/* 750 MHz */
-	{ 12000000, 750000000,  750,  12, 1, 12},
-	{ 13000000, 750000000,  750,  13, 1, 12},
-	{ 19200000, 750000000,  625,  16, 1, 8},
-	{ 26000000, 750000000,  750,  26, 1, 12},
-
-	/* 608 MHz */
-	{ 12000000, 608000000,  608,  12, 1, 12},
-	{ 13000000, 608000000,  608,  13, 1, 12},
-	{ 19200000, 608000000,  380,  12, 1, 8},
-	{ 26000000, 608000000,  608,  26, 1, 12},
-
-	/* 456 MHz */
-	{ 12000000, 456000000,  456,  12, 1, 12},
-	{ 13000000, 456000000,  456,  13, 1, 12},
-	{ 19200000, 456000000,  380,  16, 1, 8},
-	{ 26000000, 456000000,  456,  26, 1, 12},
-
-	/* 312 MHz */
-	{ 12000000, 312000000,  312,  12, 1, 12},
-	{ 13000000, 312000000,  312,  13, 1, 12},
-	{ 19200000, 312000000,  260,  16, 1, 8},
-	{ 26000000, 312000000,  312,  26, 1, 12},
-
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_x, PLL_HAS_CPCON | PLL_ALT_MISC_REG, 0xe0, 1000000000, 2000000,
-		31000000, 1000000, 6000000, 20000000, 1200000000,
-		tegra_pll_x_freq_table, 300, tegra_pllx_ops, 0, clk_m);
-
-static struct clk_pll_freq_table tegra_pll_e_freq_table[] = {
-	{ 12000000, 100000000,  200,  24, 1, 0 },
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_e, PLL_ALT_MISC_REG, 0xe8, 100000000, 12000000, 12000000, 0, 0,
-		0, 0, tegra_pll_e_freq_table, 0, tegra_plle_ops, 0, clk_m);
-
-static const char *tegra_common_parent_names[] = {
-	"clk_m",
-};
-
-static struct clk *tegra_common_parents[] = {
-	&tegra_clk_m,
-};
-
-static struct clk tegra_clk_d;
-static struct clk_tegra tegra_clk_d_hw = {
-	.hw = {
-		.clk = &tegra_clk_d,
-	},
-	.flags = PERIPH_NO_RESET,
-	.reg = 0x34,
-	.reg_shift = 12,
-	.max_rate = 52000000,
-	.u.periph = {
-		.clk_num = 90,
-	},
-};
-
-static struct clk tegra_clk_d = {
-	.name = "clk_d",
-	.hw = &tegra_clk_d_hw.hw,
-	.ops = &tegra_clk_double_ops,
-	.parent = &tegra_clk_m,
-	.parent_names = tegra_common_parent_names,
-	.parents = tegra_common_parents,
-	.num_parents = ARRAY_SIZE(tegra_common_parent_names),
-};
-
-static struct clk tegra_cdev1;
-static struct clk_tegra tegra_cdev1_hw = {
-	.hw = {
-		.clk = &tegra_cdev1,
-	},
-	.fixed_rate = 26000000,
-	.u.periph = {
-		.clk_num = 94,
-	},
-};
-static struct clk tegra_cdev1 = {
-	.name = "cdev1",
-	.hw = &tegra_cdev1_hw.hw,
-	.ops = &tegra_cdev_clk_ops,
-	.flags = CLK_IS_ROOT,
-};
-
-/* dap_mclk2, belongs to the cdev2 pingroup. */
-static struct clk tegra_cdev2;
-static struct clk_tegra tegra_cdev2_hw = {
-	.hw = {
-		.clk = &tegra_cdev2,
-	},
-	.fixed_rate = 26000000,
-	.u.periph = {
-		.clk_num  = 93,
-	},
-};
-static struct clk tegra_cdev2 = {
-	.name = "cdev2",
-	.hw = &tegra_cdev2_hw.hw,
-	.ops = &tegra_cdev_clk_ops,
-	.flags = CLK_IS_ROOT,
-};
-
-/* initialized before peripheral clocks */
-static struct clk_mux_sel mux_audio_sync_clk[8+1];
-static const struct audio_sources {
-	const char *name;
-	int value;
-} mux_audio_sync_clk_sources[] = {
-	{ .name = "spdif_in", .value = 0 },
-	{ .name = "i2s1", .value = 1 },
-	{ .name = "i2s2", .value = 2 },
-	{ .name = "pll_a_out0", .value = 4 },
-#if 0 /* FIXME: not implemented */
-	{ .name = "ac97", .value = 3 },
-	{ .name = "ext_audio_clk2", .value = 5 },
-	{ .name = "ext_audio_clk1", .value = 6 },
-	{ .name = "ext_vimclk", .value = 7 },
-#endif
-	{ NULL, 0 }
-};
-
-static const char *audio_parent_names[] = {
-	"spdif_in",
-	"i2s1",
-	"i2s2",
-	"dummy",
-	"pll_a_out0",
-	"dummy",
-	"dummy",
-	"dummy",
-};
-
-static struct clk *audio_parents[] = {
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-	NULL,
-};
-
-static struct clk tegra_audio;
-static struct clk_tegra tegra_audio_hw = {
-	.hw = {
-		.clk = &tegra_audio,
-	},
-	.reg = 0x38,
-	.max_rate = 73728000,
-};
-DEFINE_CLK_TEGRA(audio, 0, &tegra_audio_sync_clk_ops, 0, audio_parent_names,
-		audio_parents, NULL);
-
-static const char *audio_2x_parent_names[] = {
-	"audio",
-};
-
-static struct clk *audio_2x_parents[] = {
-	&tegra_audio,
-};
-
-static struct clk tegra_audio_2x;
-static struct clk_tegra tegra_audio_2x_hw = {
-	.hw = {
-		.clk = &tegra_audio_2x,
-	},
-	.flags = PERIPH_NO_RESET,
-	.max_rate = 48000000,
-	.reg = 0x34,
-	.reg_shift = 8,
-	.u.periph = {
-		.clk_num = 89,
-	},
-};
-DEFINE_CLK_TEGRA(audio_2x, 0, &tegra_clk_double_ops, 0, audio_2x_parent_names,
-		audio_2x_parents, &tegra_audio);
-
-static struct clk_lookup tegra_audio_clk_lookups[] = {
-	{ .con_id = "audio", .clk = &tegra_audio },
-	{ .con_id = "audio_2x", .clk = &tegra_audio_2x }
-};
-
-/* This is called after peripheral clocks are initialized, as the
- * audio_sync clock depends on some of the peripheral clocks.
- */
-
-static void init_audio_sync_clock_mux(void)
-{
-	int i;
-	struct clk_mux_sel *sel = mux_audio_sync_clk;
-	const struct audio_sources *src = mux_audio_sync_clk_sources;
-	struct clk_lookup *lookup;
-
-	for (i = 0; src->name; i++, sel++, src++) {
-		sel->input = tegra_get_clock_by_name(src->name);
-		if (!sel->input)
-			pr_err("%s: could not find clk %s\n", __func__,
-				src->name);
-		audio_parents[src->value] = sel->input;
-		sel->value = src->value;
-	}
-
-	lookup = tegra_audio_clk_lookups;
-	for (i = 0; i < ARRAY_SIZE(tegra_audio_clk_lookups); i++, lookup++) {
-		struct clk *c = lookup->clk;
-		struct clk_tegra *clk = to_clk_tegra(c->hw);
-		__clk_init(NULL, c);
-		INIT_LIST_HEAD(&clk->shared_bus_list);
-		clk->lookup.con_id = lookup->con_id;
-		clk->lookup.clk = c;
-		clkdev_add(&clk->lookup);
-		tegra_clk_add(c);
-	}
-}
-
-static const char *mux_cclk[] = {
-	"clk_m",
-	"pll_c",
-	"clk_32k",
-	"pll_m",
-	"pll_p",
-	"pll_p_out4",
-	"pll_p_out3",
-	"clk_d",
-	"pll_x",
-};
-
-
-static struct clk *mux_cclk_p[] = {
-	&tegra_clk_m,
-	&tegra_pll_c,
-	&tegra_clk_32k,
-	&tegra_pll_m,
-	&tegra_pll_p,
-	&tegra_pll_p_out4,
-	&tegra_pll_p_out3,
-	&tegra_clk_d,
-	&tegra_pll_x,
-};
-
-static const char *mux_sclk[] = {
-	"clk_m",
-	"pll_c_out1",
-	"pll_p_out4",
-	"pllp_p_out3",
-	"pll_p_out2",
-	"clk_d",
-	"clk_32k",
-	"pll_m_out1",
-};
-
-static struct clk *mux_sclk_p[] = {
-	&tegra_clk_m,
-	&tegra_pll_c_out1,
-	&tegra_pll_p_out4,
-	&tegra_pll_p_out3,
-	&tegra_pll_p_out2,
-	&tegra_clk_d,
-	&tegra_clk_32k,
-	&tegra_pll_m_out1,
-};
-
-static struct clk tegra_cclk;
-static struct clk_tegra tegra_cclk_hw = {
-	.hw = {
-		.clk = &tegra_cclk,
-	},
-	.reg = 0x20,
-	.max_rate = 1000000000,
-};
-DEFINE_CLK_TEGRA(cclk, 0, &tegra_super_ops, 0, mux_cclk,
-		mux_cclk_p, NULL);
-
-static const char *mux_twd[] = {
-	"cclk",
-};
-
-static struct clk *mux_twd_p[] = {
-	&tegra_cclk,
-};
-
-static struct clk tegra_clk_twd;
-static struct clk_tegra tegra_clk_twd_hw = {
-	.hw = {
-		.clk = &tegra_clk_twd,
-	},
-	.max_rate = 1000000000,
-	.mul = 1,
-	.div = 4,
-};
-
-static struct clk tegra_clk_twd = {
-	.name = "twd",
-	.ops = &tegra_twd_ops,
-	.hw = &tegra_clk_twd_hw.hw,
-	.parent = &tegra_cclk,
-	.parent_names = mux_twd,
-	.parents = mux_twd_p,
-	.num_parents = ARRAY_SIZE(mux_twd),
-};
-
-static struct clk tegra_sclk;
-static struct clk_tegra tegra_sclk_hw = {
-	.hw = {
-		.clk = &tegra_sclk,
-	},
-	.reg = 0x28,
-	.max_rate = 240000000,
-	.min_rate = 120000000,
-};
-DEFINE_CLK_TEGRA(sclk, 0, &tegra_super_ops, 0, mux_sclk,
-		mux_sclk_p, NULL);
-
-static const char *tegra_cop_parent_names[] = {
-	"tegra_sclk",
-};
-
-static struct clk *tegra_cop_parents[] = {
-	&tegra_sclk,
-};
-
-static struct clk tegra_cop;
-static struct clk_tegra tegra_cop_hw = {
-	.hw = {
-		.clk = &tegra_cop,
-	},
-	.max_rate  = 240000000,
-	.reset = &tegra2_cop_clk_reset,
-};
-DEFINE_CLK_TEGRA(cop, 0, &tegra_cop_ops, CLK_SET_RATE_PARENT,
-		tegra_cop_parent_names, tegra_cop_parents, &tegra_sclk);
-
-static const char *tegra_hclk_parent_names[] = {
-	"tegra_sclk",
-};
-
-static struct clk *tegra_hclk_parents[] = {
-	&tegra_sclk,
-};
-
-static struct clk tegra_hclk;
-static struct clk_tegra tegra_hclk_hw = {
-	.hw = {
-		.clk = &tegra_hclk,
-	},
-	.flags = DIV_BUS,
-	.reg = 0x30,
-	.reg_shift = 4,
-	.max_rate = 240000000,
-};
-DEFINE_CLK_TEGRA(hclk, 0, &tegra_bus_ops, 0, tegra_hclk_parent_names,
-		tegra_hclk_parents, &tegra_sclk);
-
-static const char *tegra_pclk_parent_names[] = {
-	"tegra_hclk",
-};
-
-static struct clk *tegra_pclk_parents[] = {
-	&tegra_hclk,
-};
-
-static struct clk tegra_pclk;
-static struct clk_tegra tegra_pclk_hw = {
-	.hw = {
-		.clk = &tegra_pclk,
-	},
-	.flags = DIV_BUS,
-	.reg = 0x30,
-	.reg_shift = 0,
-	.max_rate = 120000000,
-};
-DEFINE_CLK_TEGRA(pclk, 0, &tegra_bus_ops, 0, tegra_pclk_parent_names,
-		tegra_pclk_parents, &tegra_hclk);
-
-static const char *tegra_blink_parent_names[] = {
-	"clk_32k",
-};
-
-static struct clk *tegra_blink_parents[] = {
-	&tegra_clk_32k,
-};
-
-static struct clk tegra_blink;
-static struct clk_tegra tegra_blink_hw = {
-	.hw = {
-		.clk = &tegra_blink,
-	},
-	.reg = 0x40,
-	.max_rate = 32768,
-};
-DEFINE_CLK_TEGRA(blink, 0, &tegra_blink_clk_ops, 0, tegra_blink_parent_names,
-		tegra_blink_parents, &tegra_clk_32k);
-
-static const char *mux_pllm_pllc_pllp_plla[] = {
-	"pll_m",
-	"pll_c",
-	"pll_p",
-	"pll_a_out0",
-};
-
-static struct clk *mux_pllm_pllc_pllp_plla_p[] = {
-	&tegra_pll_m,
-	&tegra_pll_c,
-	&tegra_pll_p,
-	&tegra_pll_a_out0,
-};
-
-static const char *mux_pllm_pllc_pllp_clkm[] = {
-	"pll_m",
-	"pll_c",
-	"pll_p",
-	"clk_m",
-};
-
-static struct clk *mux_pllm_pllc_pllp_clkm_p[] = {
-	&tegra_pll_m,
-	&tegra_pll_c,
-	&tegra_pll_p,
-	&tegra_clk_m,
-};
-
-static const char *mux_pllp_pllc_pllm_clkm[] = {
-	"pll_p",
-	"pll_c",
-	"pll_m",
-	"clk_m",
-};
-
-static struct clk *mux_pllp_pllc_pllm_clkm_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_c,
-	&tegra_pll_m,
-	&tegra_clk_m,
-};
-
-static const char *mux_pllaout0_audio2x_pllp_clkm[] = {
-	"pll_a_out0",
-	"audio_2x",
-	"pll_p",
-	"clk_m",
-};
-
-static struct clk *mux_pllaout0_audio2x_pllp_clkm_p[] = {
-	&tegra_pll_a_out0,
-	&tegra_audio_2x,
-	&tegra_pll_p,
-	&tegra_clk_m,
-};
-
-static const char *mux_pllp_plld_pllc_clkm[] = {
-	"pllp",
-	"pll_d_out0",
-	"pll_c",
-	"clk_m",
-};
-
-static struct clk *mux_pllp_plld_pllc_clkm_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_d_out0,
-	&tegra_pll_c,
-	&tegra_clk_m,
-};
-
-static const char *mux_pllp_pllc_audio_clkm_clk32[] = {
-	"pll_p",
-	"pll_c",
-	"audio",
-	"clk_m",
-	"clk_32k",
-};
-
-static struct clk *mux_pllp_pllc_audio_clkm_clk32_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_c,
-	&tegra_audio,
-	&tegra_clk_m,
-	&tegra_clk_32k,
-};
-
-static const char *mux_pllp_pllc_pllm[] = {
-	"pll_p",
-	"pll_c",
-	"pll_m"
-};
-
-static struct clk *mux_pllp_pllc_pllm_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_c,
-	&tegra_pll_m,
-};
-
-static const char *mux_clk_m[] = {
-	"clk_m",
-};
-
-static struct clk *mux_clk_m_p[] = {
-	&tegra_clk_m,
-};
-
-static const char *mux_pllp_out3[] = {
-	"pll_p_out3",
-};
-
-static struct clk *mux_pllp_out3_p[] = {
-	&tegra_pll_p_out3,
-};
-
-static const char *mux_plld[] = {
-	"pll_d",
-};
-
-static struct clk *mux_plld_p[] = {
-	&tegra_pll_d,
-};
-
-static const char *mux_clk_32k[] = {
-	"clk_32k",
-};
-
-static struct clk *mux_clk_32k_p[] = {
-	&tegra_clk_32k,
-};
-
-static const char *mux_pclk[] = {
-	"pclk",
-};
-
-static struct clk *mux_pclk_p[] = {
-	&tegra_pclk,
-};
-
-static struct clk tegra_emc;
-static struct clk_tegra tegra_emc_hw = {
-	.hw = {
-		.clk = &tegra_emc,
-	},
-	.reg = 0x19c,
-	.max_rate = 800000000,
-	.flags = MUX | DIV_U71 | PERIPH_EMC_ENB,
-	.reset = &tegra2_periph_clk_reset,
-	.u.periph = {
-		.clk_num = 57,
-	},
-};
-DEFINE_CLK_TEGRA(emc, 0, &tegra_emc_clk_ops, 0, mux_pllm_pllc_pllp_clkm,
-		mux_pllm_pllc_pllp_clkm_p, NULL);
-
-#define PERIPH_CLK(_name, _dev, _con, _clk_num, _reg,	\
-		_max, _inputs, _flags) 			\
-	static struct clk tegra_##_name;		\
-	static struct clk_tegra tegra_##_name##_hw = {	\
-		.hw = {					\
-			.clk = &tegra_##_name,		\
-		},					\
-		.lookup = {				\
-			.dev_id = _dev,			\
-			.con_id = _con,			\
-		},					\
-		.reg = _reg,				\
-		.flags = _flags,			\
-		.max_rate = _max,			\
-		.u.periph = {				\
-			.clk_num = _clk_num,		\
-		},					\
-		.reset = tegra2_periph_clk_reset,	\
-	};						\
-	static struct clk tegra_##_name = {		\
-		.name = #_name,				\
-		.ops = &tegra_periph_clk_ops,		\
-		.hw = &tegra_##_name##_hw.hw,		\
-		.parent_names = _inputs,		\
-		.parents = _inputs##_p,			\
-		.num_parents = ARRAY_SIZE(_inputs),	\
-	};
-
-PERIPH_CLK(apbdma,	"tegra-apbdma",		NULL,	34,	0,	108000000, mux_pclk,			0);
-PERIPH_CLK(rtc,		"rtc-tegra",		NULL,	4,	0,	32768,     mux_clk_32k,			PERIPH_NO_RESET);
-PERIPH_CLK(timer,	"timer",		NULL,	5,	0,	26000000,  mux_clk_m,			0);
-PERIPH_CLK(i2s1,	"tegra20-i2s.0",	NULL,	11,	0x100,	26000000,  mux_pllaout0_audio2x_pllp_clkm,	MUX | DIV_U71);
-PERIPH_CLK(i2s2,	"tegra20-i2s.1",	NULL,	18,	0x104,	26000000,  mux_pllaout0_audio2x_pllp_clkm,	MUX | DIV_U71);
-PERIPH_CLK(spdif_out,	"spdif_out",		NULL,	10,	0x108,	100000000, mux_pllaout0_audio2x_pllp_clkm,	MUX | DIV_U71);
-PERIPH_CLK(spdif_in,	"spdif_in",		NULL,	10,	0x10c,	100000000, mux_pllp_pllc_pllm,		MUX | DIV_U71);
-PERIPH_CLK(pwm,		"tegra-pwm",		NULL,	17,	0x110,	432000000, mux_pllp_pllc_audio_clkm_clk32,	MUX | DIV_U71 | MUX_PWM);
-PERIPH_CLK(spi,		"spi",			NULL,	43,	0x114,	40000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(xio,		"xio",			NULL,	45,	0x120,	150000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(twc,		"twc",			NULL,	16,	0x12c,	150000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(sbc1,	"spi_tegra.0",		NULL,	41,	0x134,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(sbc2,	"spi_tegra.1",		NULL,	44,	0x118,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(sbc3,	"spi_tegra.2",		NULL,	46,	0x11c,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(sbc4,	"spi_tegra.3",		NULL,	68,	0x1b4,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(ide,		"ide",			NULL,	25,	0x144,	100000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(ndflash,	"tegra_nand",		NULL,	13,	0x160,	164000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(vfir,	"vfir",			NULL,	7,	0x168,	72000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(sdmmc1,	"sdhci-tegra.0",	NULL,	14,	0x150,	52000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(sdmmc2,	"sdhci-tegra.1",	NULL,	9,	0x154,	52000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(sdmmc3,	"sdhci-tegra.2",	NULL,	69,	0x1bc,	52000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(sdmmc4,	"sdhci-tegra.3",	NULL,	15,	0x164,	52000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(vcp,		"tegra-avp",		"vcp",	29,	0,	250000000, mux_clk_m,			0);
-PERIPH_CLK(bsea,	"tegra-avp",		"bsea",	62,	0,	250000000, mux_clk_m,			0);
-PERIPH_CLK(bsev,	"tegra-aes",		"bsev",	63,	0,	250000000, mux_clk_m,			0);
-PERIPH_CLK(vde,		"tegra-avp",		"vde",	61,	0x1c8,	250000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage and process_id */
-PERIPH_CLK(csite,	"csite",		NULL,	73,	0x1d4,	144000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* max rate ??? */
-/* FIXME: what is la? */
-PERIPH_CLK(la,		"la",			NULL,	76,	0x1f8,	26000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(owr,		"tegra_w1",		NULL,	71,	0x1cc,	26000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(nor,		"nor",			NULL,	42,	0x1d0,	92000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(mipi,	"mipi",			NULL,	50,	0x174,	60000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(i2c1,	"tegra-i2c.0",		"div-clk", 12,	0x124,	26000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U16);
-PERIPH_CLK(i2c2,	"tegra-i2c.1",		"div-clk", 54,	0x198,	26000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U16);
-PERIPH_CLK(i2c3,	"tegra-i2c.2",		"div-clk", 67,	0x1b8,	26000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U16);
-PERIPH_CLK(dvc,		"tegra-i2c.3",		"div-clk", 47,	0x128,	26000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U16);
-PERIPH_CLK(uarta,	"tegra-uart.0",		NULL,	6,	0x178,	600000000, mux_pllp_pllc_pllm_clkm,	MUX);
-PERIPH_CLK(uartb,	"tegra-uart.1",		NULL,	7,	0x17c,	600000000, mux_pllp_pllc_pllm_clkm,	MUX);
-PERIPH_CLK(uartc,	"tegra-uart.2",		NULL,	55,	0x1a0,	600000000, mux_pllp_pllc_pllm_clkm,	MUX);
-PERIPH_CLK(uartd,	"tegra-uart.3",		NULL,	65,	0x1c0,	600000000, mux_pllp_pllc_pllm_clkm,	MUX);
-PERIPH_CLK(uarte,	"tegra-uart.4",		NULL,	66,	0x1c4,	600000000, mux_pllp_pllc_pllm_clkm,	MUX);
-PERIPH_CLK(3d,		"3d",			NULL,	24,	0x158,	300000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | PERIPH_MANUAL_RESET); /* scales with voltage and process_id */
-PERIPH_CLK(2d,		"2d",			NULL,	21,	0x15c,	300000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71); /* scales with voltage and process_id */
-PERIPH_CLK(vi,		"tegra_camera",		"vi",	20,	0x148,	150000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71); /* scales with voltage and process_id */
-PERIPH_CLK(vi_sensor,	"tegra_camera",		"vi_sensor",	20,	0x1a8,	150000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | PERIPH_NO_RESET); /* scales with voltage and process_id */
-PERIPH_CLK(epp,		"epp",			NULL,	19,	0x16c,	300000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71); /* scales with voltage and process_id */
-PERIPH_CLK(mpe,		"mpe",			NULL,	60,	0x170,	250000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71); /* scales with voltage and process_id */
-PERIPH_CLK(host1x,	"host1x",		NULL,	28,	0x180,	166000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71); /* scales with voltage and process_id */
-PERIPH_CLK(cve,		"cve",			NULL,	49,	0x140,	250000000, mux_pllp_plld_pllc_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(tvo,		"tvo",			NULL,	49,	0x188,	250000000, mux_pllp_plld_pllc_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(hdmi,	"hdmi",			NULL,	51,	0x18c,	600000000, mux_pllp_plld_pllc_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(tvdac,	"tvdac",		NULL,	53,	0x194,	250000000, mux_pllp_plld_pllc_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(disp1,	"tegradc.0",		NULL,	27,	0x138,	600000000, mux_pllp_plld_pllc_clkm,	MUX); /* scales with voltage and process_id */
-PERIPH_CLK(disp2,	"tegradc.1",		NULL,	26,	0x13c,	600000000, mux_pllp_plld_pllc_clkm,	MUX); /* scales with voltage and process_id */
-PERIPH_CLK(usbd,	"fsl-tegra-udc",	NULL,	22,	0,	480000000, mux_clk_m,			0); /* requires min voltage */
-PERIPH_CLK(usb2,	"tegra-ehci.1",		NULL,	58,	0,	480000000, mux_clk_m,			0); /* requires min voltage */
-PERIPH_CLK(usb3,	"tegra-ehci.2",		NULL,	59,	0,	480000000, mux_clk_m,			0); /* requires min voltage */
-PERIPH_CLK(dsi,		"dsi",			NULL,	48,	0,	500000000, mux_plld,			0); /* scales with voltage */
-PERIPH_CLK(csi,		"tegra_camera",		"csi",	52,	0,	72000000,  mux_pllp_out3,		0);
-PERIPH_CLK(isp,		"tegra_camera",		"isp",	23,	0,	150000000, mux_clk_m,			0); /* same frequency as VI */
-PERIPH_CLK(csus,	"tegra_camera",		"csus",	92,	0,	150000000, mux_clk_m,			PERIPH_NO_RESET);
-PERIPH_CLK(pex,		NULL,			"pex",  70,     0,	26000000,  mux_clk_m,			PERIPH_MANUAL_RESET);
-PERIPH_CLK(afi,		NULL,			"afi",  72,     0,	26000000,  mux_clk_m,			PERIPH_MANUAL_RESET);
-PERIPH_CLK(pcie_xclk,	NULL,		  "pcie_xclk",  74,     0,	26000000,  mux_clk_m,			PERIPH_MANUAL_RESET);
-
-static struct clk *tegra_list_clks[] = {
-	&tegra_apbdma,
-	&tegra_rtc,
-	&tegra_timer,
-	&tegra_i2s1,
-	&tegra_i2s2,
-	&tegra_spdif_out,
-	&tegra_spdif_in,
-	&tegra_pwm,
-	&tegra_spi,
-	&tegra_xio,
-	&tegra_twc,
-	&tegra_sbc1,
-	&tegra_sbc2,
-	&tegra_sbc3,
-	&tegra_sbc4,
-	&tegra_ide,
-	&tegra_ndflash,
-	&tegra_vfir,
-	&tegra_sdmmc1,
-	&tegra_sdmmc2,
-	&tegra_sdmmc3,
-	&tegra_sdmmc4,
-	&tegra_vcp,
-	&tegra_bsea,
-	&tegra_bsev,
-	&tegra_vde,
-	&tegra_csite,
-	&tegra_la,
-	&tegra_owr,
-	&tegra_nor,
-	&tegra_mipi,
-	&tegra_i2c1,
-	&tegra_i2c2,
-	&tegra_i2c3,
-	&tegra_dvc,
-	&tegra_uarta,
-	&tegra_uartb,
-	&tegra_uartc,
-	&tegra_uartd,
-	&tegra_uarte,
-	&tegra_3d,
-	&tegra_2d,
-	&tegra_vi,
-	&tegra_vi_sensor,
-	&tegra_epp,
-	&tegra_mpe,
-	&tegra_host1x,
-	&tegra_cve,
-	&tegra_tvo,
-	&tegra_hdmi,
-	&tegra_tvdac,
-	&tegra_disp1,
-	&tegra_disp2,
-	&tegra_usbd,
-	&tegra_usb2,
-	&tegra_usb3,
-	&tegra_dsi,
-	&tegra_csi,
-	&tegra_isp,
-	&tegra_csus,
-	&tegra_pex,
-	&tegra_afi,
-	&tegra_pcie_xclk,
-};
-
-#define CLK_DUPLICATE(_name, _dev, _con)	\
-	{					\
-		.name	= _name,		\
-		.lookup	= {			\
-			.dev_id	= _dev,		\
-			.con_id	= _con,		\
-		},				\
-	}
-
-/* Some clocks may be used by different drivers depending on the board
- * configuration.  List those here to register them twice in the clock lookup
- * table under two names.
- */
-static struct clk_duplicate tegra_clk_duplicates[] = {
-	CLK_DUPLICATE("uarta",	"serial8250.0",	NULL),
-	CLK_DUPLICATE("uartb",	"serial8250.1",	NULL),
-	CLK_DUPLICATE("uartc",	"serial8250.2",	NULL),
-	CLK_DUPLICATE("uartd",	"serial8250.3",	NULL),
-	CLK_DUPLICATE("uarte",	"serial8250.4",	NULL),
-	CLK_DUPLICATE("usbd",	"utmip-pad",	NULL),
-	CLK_DUPLICATE("usbd",	"tegra-ehci.0",	NULL),
-	CLK_DUPLICATE("usbd",	"tegra-otg",	NULL),
-	CLK_DUPLICATE("2d",	"tegra_grhost",	"gr2d"),
-	CLK_DUPLICATE("3d",	"tegra_grhost",	"gr3d"),
-	CLK_DUPLICATE("epp",	"tegra_grhost",	"epp"),
-	CLK_DUPLICATE("mpe",	"tegra_grhost",	"mpe"),
-	CLK_DUPLICATE("cop",	"tegra-avp",	"cop"),
-	CLK_DUPLICATE("vde",	"tegra-aes",	"vde"),
-	CLK_DUPLICATE("cclk",	NULL,		"cpu"),
-	CLK_DUPLICATE("twd",	"smp_twd",	NULL),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.0", "fast-clk"),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.1", "fast-clk"),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.2", "fast-clk"),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.3", "fast-clk"),
-	CLK_DUPLICATE("pll_p", "tegradc.0", "parent"),
-	CLK_DUPLICATE("pll_p", "tegradc.1", "parent"),
-	CLK_DUPLICATE("pll_d_out0", "hdmi", "parent"),
-};
-
-#define CLK(dev, con, ck)	\
-	{			\
-		.dev_id	= dev,	\
-		.con_id	= con,	\
-		.clk	= ck,	\
-	}
-
-static struct clk *tegra_ptr_clks[] = {
-	&tegra_clk_32k,
-	&tegra_pll_s,
-	&tegra_clk_m,
-	&tegra_pll_m,
-	&tegra_pll_m_out1,
-	&tegra_pll_c,
-	&tegra_pll_c_out1,
-	&tegra_pll_p,
-	&tegra_pll_p_out1,
-	&tegra_pll_p_out2,
-	&tegra_pll_p_out3,
-	&tegra_pll_p_out4,
-	&tegra_pll_a,
-	&tegra_pll_a_out0,
-	&tegra_pll_d,
-	&tegra_pll_d_out0,
-	&tegra_pll_u,
-	&tegra_pll_x,
-	&tegra_pll_e,
-	&tegra_cclk,
-	&tegra_clk_twd,
-	&tegra_sclk,
-	&tegra_hclk,
-	&tegra_pclk,
-	&tegra_clk_d,
-	&tegra_cdev1,
-	&tegra_cdev2,
-	&tegra_blink,
-	&tegra_cop,
-	&tegra_emc,
-};
-
-static void tegra2_init_one_clock(struct clk *c)
-{
-	struct clk_tegra *clk = to_clk_tegra(c->hw);
-	int ret;
-
-	ret = __clk_init(NULL, c);
-	if (ret)
-		pr_err("clk init failed %s\n", __clk_get_name(c));
-
-	INIT_LIST_HEAD(&clk->shared_bus_list);
-	if (!clk->lookup.dev_id && !clk->lookup.con_id)
-		clk->lookup.con_id = c->name;
-	clk->lookup.clk = c;
-	clkdev_add(&clk->lookup);
-	tegra_clk_add(c);
-}
-
-void __init tegra2_init_clocks(void)
-{
-	int i;
-	struct clk *c;
-
-	for (i = 0; i < ARRAY_SIZE(tegra_ptr_clks); i++)
-		tegra2_init_one_clock(tegra_ptr_clks[i]);
-
-	for (i = 0; i < ARRAY_SIZE(tegra_list_clks); i++)
-		tegra2_init_one_clock(tegra_list_clks[i]);
-
-	for (i = 0; i < ARRAY_SIZE(tegra_clk_duplicates); i++) {
-		c = tegra_get_clock_by_name(tegra_clk_duplicates[i].name);
-		if (!c) {
-			pr_err("%s: Unknown duplicate clock %s\n", __func__,
-				tegra_clk_duplicates[i].name);
-			continue;
-		}
-
-		tegra_clk_duplicates[i].lookup.clk = c;
-		clkdev_add(&tegra_clk_duplicates[i].lookup);
-	}
-
-	init_audio_sync_clock_mux();
-	tegra20_cpu_car_ops_init();
-}
diff --git a/arch/arm/mach-tegra/tegra30_clocks.c b/arch/arm/mach-tegra/tegra30_clocks.c
deleted file mode 100644
index 4330787fe5cb..000000000000
--- a/arch/arm/mach-tegra/tegra30_clocks.c
+++ /dev/null
@@ -1,2506 +0,0 @@
-/*
- * arch/arm/mach-tegra/tegra30_clocks.c
- *
- * Copyright (c) 2010-2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/cpufreq.h>
-#include <linux/syscore_ops.h>
-#include <linux/clk/tegra.h>
-
-#include <asm/clkdev.h>
-
-#include <mach/powergate.h>
-
-#include "clock.h"
-#include "fuse.h"
-#include "iomap.h"
-
-#define USE_PLL_LOCK_BITS 0
-
-#define RST_DEVICES_L			0x004
-#define RST_DEVICES_H			0x008
-#define RST_DEVICES_U			0x00C
-#define RST_DEVICES_V			0x358
-#define RST_DEVICES_W			0x35C
-#define RST_DEVICES_SET_L		0x300
-#define RST_DEVICES_CLR_L		0x304
-#define RST_DEVICES_SET_V		0x430
-#define RST_DEVICES_CLR_V		0x434
-#define RST_DEVICES_NUM			5
-
-#define CLK_OUT_ENB_L			0x010
-#define CLK_OUT_ENB_H			0x014
-#define CLK_OUT_ENB_U			0x018
-#define CLK_OUT_ENB_V			0x360
-#define CLK_OUT_ENB_W			0x364
-#define CLK_OUT_ENB_SET_L		0x320
-#define CLK_OUT_ENB_CLR_L		0x324
-#define CLK_OUT_ENB_SET_V		0x440
-#define CLK_OUT_ENB_CLR_V		0x444
-#define CLK_OUT_ENB_NUM			5
-
-#define RST_DEVICES_V_SWR_CPULP_RST_DIS	(0x1 << 1)
-#define CLK_OUT_ENB_V_CLK_ENB_CPULP_EN	(0x1 << 1)
-
-#define PERIPH_CLK_TO_BIT(c)		(1 << (c->u.periph.clk_num % 32))
-#define PERIPH_CLK_TO_RST_REG(c)	\
-	periph_clk_to_reg((c), RST_DEVICES_L, RST_DEVICES_V, 4)
-#define PERIPH_CLK_TO_RST_SET_REG(c)	\
-	periph_clk_to_reg((c), RST_DEVICES_SET_L, RST_DEVICES_SET_V, 8)
-#define PERIPH_CLK_TO_RST_CLR_REG(c)	\
-	periph_clk_to_reg((c), RST_DEVICES_CLR_L, RST_DEVICES_CLR_V, 8)
-
-#define PERIPH_CLK_TO_ENB_REG(c)	\
-	periph_clk_to_reg((c), CLK_OUT_ENB_L, CLK_OUT_ENB_V, 4)
-#define PERIPH_CLK_TO_ENB_SET_REG(c)	\
-	periph_clk_to_reg((c), CLK_OUT_ENB_SET_L, CLK_OUT_ENB_SET_V, 8)
-#define PERIPH_CLK_TO_ENB_CLR_REG(c)	\
-	periph_clk_to_reg((c), CLK_OUT_ENB_CLR_L, CLK_OUT_ENB_CLR_V, 8)
-
-#define CLK_MASK_ARM			0x44
-#define MISC_CLK_ENB			0x48
-
-#define OSC_CTRL			0x50
-#define OSC_CTRL_OSC_FREQ_MASK		(0xF<<28)
-#define OSC_CTRL_OSC_FREQ_13MHZ		(0x0<<28)
-#define OSC_CTRL_OSC_FREQ_19_2MHZ	(0x4<<28)
-#define OSC_CTRL_OSC_FREQ_12MHZ		(0x8<<28)
-#define OSC_CTRL_OSC_FREQ_26MHZ		(0xC<<28)
-#define OSC_CTRL_OSC_FREQ_16_8MHZ	(0x1<<28)
-#define OSC_CTRL_OSC_FREQ_38_4MHZ	(0x5<<28)
-#define OSC_CTRL_OSC_FREQ_48MHZ		(0x9<<28)
-#define OSC_CTRL_MASK			(0x3f2 | OSC_CTRL_OSC_FREQ_MASK)
-
-#define OSC_CTRL_PLL_REF_DIV_MASK	(3<<26)
-#define OSC_CTRL_PLL_REF_DIV_1		(0<<26)
-#define OSC_CTRL_PLL_REF_DIV_2		(1<<26)
-#define OSC_CTRL_PLL_REF_DIV_4		(2<<26)
-
-#define OSC_FREQ_DET			0x58
-#define OSC_FREQ_DET_TRIG		(1<<31)
-
-#define OSC_FREQ_DET_STATUS		0x5C
-#define OSC_FREQ_DET_BUSY		(1<<31)
-#define OSC_FREQ_DET_CNT_MASK		0xFFFF
-
-#define PERIPH_CLK_SOURCE_I2S1		0x100
-#define PERIPH_CLK_SOURCE_EMC		0x19c
-#define PERIPH_CLK_SOURCE_OSC		0x1fc
-#define PERIPH_CLK_SOURCE_NUM1 \
-	((PERIPH_CLK_SOURCE_OSC - PERIPH_CLK_SOURCE_I2S1) / 4)
-
-#define PERIPH_CLK_SOURCE_G3D2		0x3b0
-#define PERIPH_CLK_SOURCE_SE		0x42c
-#define PERIPH_CLK_SOURCE_NUM2 \
-	((PERIPH_CLK_SOURCE_SE - PERIPH_CLK_SOURCE_G3D2) / 4 + 1)
-
-#define AUDIO_DLY_CLK			0x49c
-#define AUDIO_SYNC_CLK_SPDIF		0x4b4
-#define PERIPH_CLK_SOURCE_NUM3 \
-	((AUDIO_SYNC_CLK_SPDIF - AUDIO_DLY_CLK) / 4 + 1)
-
-#define PERIPH_CLK_SOURCE_NUM		(PERIPH_CLK_SOURCE_NUM1 + \
-					 PERIPH_CLK_SOURCE_NUM2 + \
-					 PERIPH_CLK_SOURCE_NUM3)
-
-#define CPU_SOFTRST_CTRL		0x380
-
-#define PERIPH_CLK_SOURCE_DIVU71_MASK	0xFF
-#define PERIPH_CLK_SOURCE_DIVU16_MASK	0xFFFF
-#define PERIPH_CLK_SOURCE_DIV_SHIFT	0
-#define PERIPH_CLK_SOURCE_DIVIDLE_SHIFT	8
-#define PERIPH_CLK_SOURCE_DIVIDLE_VAL	50
-#define PERIPH_CLK_UART_DIV_ENB		(1<<24)
-#define PERIPH_CLK_VI_SEL_EX_SHIFT	24
-#define PERIPH_CLK_VI_SEL_EX_MASK	(0x3<<PERIPH_CLK_VI_SEL_EX_SHIFT)
-#define PERIPH_CLK_NAND_DIV_EX_ENB	(1<<8)
-#define PERIPH_CLK_DTV_POLARITY_INV	(1<<25)
-
-#define AUDIO_SYNC_SOURCE_MASK		0x0F
-#define AUDIO_SYNC_DISABLE_BIT		0x10
-#define AUDIO_SYNC_TAP_NIBBLE_SHIFT(c)	((c->reg_shift - 24) * 4)
-
-#define PLL_BASE			0x0
-#define PLL_BASE_BYPASS			(1<<31)
-#define PLL_BASE_ENABLE			(1<<30)
-#define PLL_BASE_REF_ENABLE		(1<<29)
-#define PLL_BASE_OVERRIDE		(1<<28)
-#define PLL_BASE_LOCK			(1<<27)
-#define PLL_BASE_DIVP_MASK		(0x7<<20)
-#define PLL_BASE_DIVP_SHIFT		20
-#define PLL_BASE_DIVN_MASK		(0x3FF<<8)
-#define PLL_BASE_DIVN_SHIFT		8
-#define PLL_BASE_DIVM_MASK		(0x1F)
-#define PLL_BASE_DIVM_SHIFT		0
-
-#define PLL_OUT_RATIO_MASK		(0xFF<<8)
-#define PLL_OUT_RATIO_SHIFT		8
-#define PLL_OUT_OVERRIDE		(1<<2)
-#define PLL_OUT_CLKEN			(1<<1)
-#define PLL_OUT_RESET_DISABLE		(1<<0)
-
-#define PLL_MISC(c)			\
-	(((c)->flags & PLL_ALT_MISC_REG) ? 0x4 : 0xc)
-#define PLL_MISC_LOCK_ENABLE(c)	\
-	(((c)->flags & (PLLU | PLLD)) ? (1<<22) : (1<<18))
-
-#define PLL_MISC_DCCON_SHIFT		20
-#define PLL_MISC_CPCON_SHIFT		8
-#define PLL_MISC_CPCON_MASK		(0xF<<PLL_MISC_CPCON_SHIFT)
-#define PLL_MISC_LFCON_SHIFT		4
-#define PLL_MISC_LFCON_MASK		(0xF<<PLL_MISC_LFCON_SHIFT)
-#define PLL_MISC_VCOCON_SHIFT		0
-#define PLL_MISC_VCOCON_MASK		(0xF<<PLL_MISC_VCOCON_SHIFT)
-#define PLLD_MISC_CLKENABLE		(1<<30)
-
-#define PLLU_BASE_POST_DIV		(1<<20)
-
-#define PLLD_BASE_DSIB_MUX_SHIFT	25
-#define PLLD_BASE_DSIB_MUX_MASK		(1<<PLLD_BASE_DSIB_MUX_SHIFT)
-#define PLLD_BASE_CSI_CLKENABLE		(1<<26)
-#define PLLD_MISC_DSI_CLKENABLE		(1<<30)
-#define PLLD_MISC_DIV_RST		(1<<23)
-#define PLLD_MISC_DCCON_SHIFT		12
-
-#define PLLDU_LFCON_SET_DIVN		600
-
-/* FIXME: OUT_OF_TABLE_CPCON per pll */
-#define OUT_OF_TABLE_CPCON		0x8
-
-#define SUPER_CLK_MUX			0x00
-#define SUPER_STATE_SHIFT		28
-#define SUPER_STATE_MASK		(0xF << SUPER_STATE_SHIFT)
-#define SUPER_STATE_STANDBY		(0x0 << SUPER_STATE_SHIFT)
-#define SUPER_STATE_IDLE		(0x1 << SUPER_STATE_SHIFT)
-#define SUPER_STATE_RUN			(0x2 << SUPER_STATE_SHIFT)
-#define SUPER_STATE_IRQ			(0x3 << SUPER_STATE_SHIFT)
-#define SUPER_STATE_FIQ			(0x4 << SUPER_STATE_SHIFT)
-#define SUPER_LP_DIV2_BYPASS		(0x1 << 16)
-#define SUPER_SOURCE_MASK		0xF
-#define	SUPER_FIQ_SOURCE_SHIFT		12
-#define	SUPER_IRQ_SOURCE_SHIFT		8
-#define	SUPER_RUN_SOURCE_SHIFT		4
-#define	SUPER_IDLE_SOURCE_SHIFT		0
-
-#define SUPER_CLK_DIVIDER		0x04
-#define SUPER_CLOCK_DIV_U71_SHIFT	16
-#define SUPER_CLOCK_DIV_U71_MASK	(0xff << SUPER_CLOCK_DIV_U71_SHIFT)
-/* guarantees safe cpu backup */
-#define SUPER_CLOCK_DIV_U71_MIN		0x2
-
-#define BUS_CLK_DISABLE			(1<<3)
-#define BUS_CLK_DIV_MASK		0x3
-
-#define PMC_CTRL			0x0
- #define PMC_CTRL_BLINK_ENB		(1 << 7)
-
-#define PMC_DPD_PADS_ORIDE		0x1c
- #define PMC_DPD_PADS_ORIDE_BLINK_ENB	(1 << 20)
-
-#define PMC_BLINK_TIMER_DATA_ON_SHIFT	0
-#define PMC_BLINK_TIMER_DATA_ON_MASK	0x7fff
-#define PMC_BLINK_TIMER_ENB		(1 << 15)
-#define PMC_BLINK_TIMER_DATA_OFF_SHIFT	16
-#define PMC_BLINK_TIMER_DATA_OFF_MASK	0xffff
-
-#define PMC_PLLP_WB0_OVERRIDE				0xf8
-#define PMC_PLLP_WB0_OVERRIDE_PLLM_ENABLE		(1 << 12)
-
-#define UTMIP_PLL_CFG2					0x488
-#define UTMIP_PLL_CFG2_STABLE_COUNT(x)			(((x) & 0xfff) << 6)
-#define UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(x)		(((x) & 0x3f) << 18)
-#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERDOWN	(1 << 0)
-#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERDOWN	(1 << 2)
-#define UTMIP_PLL_CFG2_FORCE_PD_SAMP_C_POWERDOWN	(1 << 4)
-
-#define UTMIP_PLL_CFG1					0x484
-#define UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(x)		(((x) & 0x1f) << 27)
-#define UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(x)		(((x) & 0xfff) << 0)
-#define UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN	(1 << 14)
-#define UTMIP_PLL_CFG1_FORCE_PLL_ACTIVE_POWERDOWN	(1 << 12)
-#define UTMIP_PLL_CFG1_FORCE_PLLU_POWERDOWN		(1 << 16)
-
-#define PLLE_BASE_CML_ENABLE		(1<<31)
-#define PLLE_BASE_ENABLE		(1<<30)
-#define PLLE_BASE_DIVCML_SHIFT		24
-#define PLLE_BASE_DIVCML_MASK		(0xf<<PLLE_BASE_DIVCML_SHIFT)
-#define PLLE_BASE_DIVP_SHIFT		16
-#define PLLE_BASE_DIVP_MASK		(0x3f<<PLLE_BASE_DIVP_SHIFT)
-#define PLLE_BASE_DIVN_SHIFT		8
-#define PLLE_BASE_DIVN_MASK		(0xFF<<PLLE_BASE_DIVN_SHIFT)
-#define PLLE_BASE_DIVM_SHIFT		0
-#define PLLE_BASE_DIVM_MASK		(0xFF<<PLLE_BASE_DIVM_SHIFT)
-#define PLLE_BASE_DIV_MASK		\
-	(PLLE_BASE_DIVCML_MASK | PLLE_BASE_DIVP_MASK | \
-	 PLLE_BASE_DIVN_MASK | PLLE_BASE_DIVM_MASK)
-#define PLLE_BASE_DIV(m, n, p, cml)		\
-	 (((cml)<<PLLE_BASE_DIVCML_SHIFT) | ((p)<<PLLE_BASE_DIVP_SHIFT) | \
-	  ((n)<<PLLE_BASE_DIVN_SHIFT) | ((m)<<PLLE_BASE_DIVM_SHIFT))
-
-#define PLLE_MISC_SETUP_BASE_SHIFT	16
-#define PLLE_MISC_SETUP_BASE_MASK	(0xFFFF<<PLLE_MISC_SETUP_BASE_SHIFT)
-#define PLLE_MISC_READY			(1<<15)
-#define PLLE_MISC_LOCK			(1<<11)
-#define PLLE_MISC_LOCK_ENABLE		(1<<9)
-#define PLLE_MISC_SETUP_EX_SHIFT	2
-#define PLLE_MISC_SETUP_EX_MASK		(0x3<<PLLE_MISC_SETUP_EX_SHIFT)
-#define PLLE_MISC_SETUP_MASK		\
-	  (PLLE_MISC_SETUP_BASE_MASK | PLLE_MISC_SETUP_EX_MASK)
-#define PLLE_MISC_SETUP_VALUE		\
-	  ((0x7<<PLLE_MISC_SETUP_BASE_SHIFT) | (0x0<<PLLE_MISC_SETUP_EX_SHIFT))
-
-#define PLLE_SS_CTRL			0x68
-#define	PLLE_SS_INCINTRV_SHIFT		24
-#define	PLLE_SS_INCINTRV_MASK		(0x3f<<PLLE_SS_INCINTRV_SHIFT)
-#define	PLLE_SS_INC_SHIFT		16
-#define	PLLE_SS_INC_MASK		(0xff<<PLLE_SS_INC_SHIFT)
-#define	PLLE_SS_MAX_SHIFT		0
-#define	PLLE_SS_MAX_MASK		(0x1ff<<PLLE_SS_MAX_SHIFT)
-#define PLLE_SS_COEFFICIENTS_MASK	\
-	(PLLE_SS_INCINTRV_MASK | PLLE_SS_INC_MASK | PLLE_SS_MAX_MASK)
-#define PLLE_SS_COEFFICIENTS_12MHZ	\
-	((0x18<<PLLE_SS_INCINTRV_SHIFT) | (0x1<<PLLE_SS_INC_SHIFT) | \
-	 (0x24<<PLLE_SS_MAX_SHIFT))
-#define PLLE_SS_DISABLE			((1<<12) | (1<<11) | (1<<10))
-
-#define PLLE_AUX			0x48c
-#define PLLE_AUX_PLLP_SEL		(1<<2)
-#define PLLE_AUX_CML_SATA_ENABLE	(1<<1)
-#define PLLE_AUX_CML_PCIE_ENABLE	(1<<0)
-
-#define	PMC_SATA_PWRGT			0x1ac
-#define PMC_SATA_PWRGT_PLLE_IDDQ_VALUE	(1<<5)
-#define PMC_SATA_PWRGT_PLLE_IDDQ_SWCTL	(1<<4)
-
-#define ROUND_DIVIDER_UP	0
-#define ROUND_DIVIDER_DOWN	1
-
-/* FIXME: recommended safety delay after lock is detected */
-#define PLL_POST_LOCK_DELAY		100
-
-/* Tegra CPU clock and reset control regs */
-#define TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX		0x4c
-#define TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_SET	0x340
-#define TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_CLR	0x344
-#define TEGRA30_CLK_RST_CONTROLLER_CLK_CPU_CMPLX_CLR	0x34c
-#define TEGRA30_CLK_RST_CONTROLLER_CPU_CMPLX_STATUS	0x470
-
-#define CPU_CLOCK(cpu)	(0x1 << (8 + cpu))
-#define CPU_RESET(cpu)	(0x1111ul << (cpu))
-
-#define CLK_RESET_CCLK_BURST	0x20
-#define CLK_RESET_CCLK_DIVIDER  0x24
-#define CLK_RESET_PLLX_BASE	0xe0
-#define CLK_RESET_PLLX_MISC	0xe4
-
-#define CLK_RESET_SOURCE_CSITE	0x1d4
-
-#define CLK_RESET_CCLK_BURST_POLICY_SHIFT	28
-#define CLK_RESET_CCLK_RUN_POLICY_SHIFT		4
-#define CLK_RESET_CCLK_IDLE_POLICY_SHIFT	0
-#define CLK_RESET_CCLK_IDLE_POLICY		1
-#define CLK_RESET_CCLK_RUN_POLICY		2
-#define CLK_RESET_CCLK_BURST_POLICY_PLLX	8
-
-#ifdef CONFIG_PM_SLEEP
-static struct cpu_clk_suspend_context {
-	u32 pllx_misc;
-	u32 pllx_base;
-
-	u32 cpu_burst;
-	u32 clk_csite_src;
-	u32 cclk_divider;
-} tegra30_cpu_clk_sctx;
-#endif
-
-/**
-* Structure defining the fields for USB UTMI clocks Parameters.
-*/
-struct utmi_clk_param {
-	/* Oscillator Frequency in KHz */
-	u32 osc_frequency;
-	/* UTMIP PLL Enable Delay Count  */
-	u8 enable_delay_count;
-	/* UTMIP PLL Stable count */
-	u8 stable_count;
-	/*  UTMIP PLL Active delay count */
-	u8 active_delay_count;
-	/* UTMIP PLL Xtal frequency count */
-	u8 xtal_freq_count;
-};
-
-static const struct utmi_clk_param utmi_parameters[] = {
-	{
-		.osc_frequency = 13000000,
-		.enable_delay_count = 0x02,
-		.stable_count = 0x33,
-		.active_delay_count = 0x05,
-		.xtal_freq_count = 0x7F
-	},
-	{
-		.osc_frequency = 19200000,
-		.enable_delay_count = 0x03,
-		.stable_count = 0x4B,
-		.active_delay_count = 0x06,
-		.xtal_freq_count = 0xBB},
-	{
-		.osc_frequency = 12000000,
-		.enable_delay_count = 0x02,
-		.stable_count = 0x2F,
-		.active_delay_count = 0x04,
-		.xtal_freq_count = 0x76
-	},
-	{
-		.osc_frequency = 26000000,
-		.enable_delay_count = 0x04,
-		.stable_count = 0x66,
-		.active_delay_count = 0x09,
-		.xtal_freq_count = 0xFE
-	},
-	{
-		.osc_frequency = 16800000,
-		.enable_delay_count = 0x03,
-		.stable_count = 0x41,
-		.active_delay_count = 0x0A,
-		.xtal_freq_count = 0xA4
-	},
-};
-
-static void __iomem *reg_clk_base = IO_ADDRESS(TEGRA_CLK_RESET_BASE);
-static void __iomem *reg_pmc_base = IO_ADDRESS(TEGRA_PMC_BASE);
-static void __iomem *misc_gp_hidrev_base = IO_ADDRESS(TEGRA_APB_MISC_BASE);
-
-#define MISC_GP_HIDREV                  0x804
-
-/*
- * Some peripheral clocks share an enable bit, so refcount the enable bits
- * in registers CLK_ENABLE_L, ... CLK_ENABLE_W
- */
-static int tegra_periph_clk_enable_refcount[CLK_OUT_ENB_NUM * 32];
-
-#define clk_writel(value, reg) \
-	__raw_writel(value, reg_clk_base + (reg))
-#define clk_readl(reg) \
-	__raw_readl(reg_clk_base + (reg))
-#define pmc_writel(value, reg) \
-	__raw_writel(value, reg_pmc_base + (reg))
-#define pmc_readl(reg) \
-	__raw_readl(reg_pmc_base + (reg))
-#define chipid_readl() \
-	__raw_readl(misc_gp_hidrev_base + MISC_GP_HIDREV)
-
-#define clk_writel_delay(value, reg)					\
-	do {								\
-		__raw_writel((value), reg_clk_base + (reg));	\
-		udelay(2);						\
-	} while (0)
-
-static inline int clk_set_div(struct clk_tegra *c, u32 n)
-{
-	struct clk *clk = c->hw.clk;
-
-	return clk_set_rate(clk,
-			(__clk_get_rate(__clk_get_parent(clk)) + n - 1) / n);
-}
-
-static inline u32 periph_clk_to_reg(
-	struct clk_tegra *c, u32 reg_L, u32 reg_V, int offs)
-{
-	u32 reg = c->u.periph.clk_num / 32;
-	BUG_ON(reg >= RST_DEVICES_NUM);
-	if (reg < 3)
-		reg = reg_L + (reg * offs);
-	else
-		reg = reg_V + ((reg - 3) * offs);
-	return reg;
-}
-
-static unsigned long clk_measure_input_freq(void)
-{
-	u32 clock_autodetect;
-	clk_writel(OSC_FREQ_DET_TRIG | 1, OSC_FREQ_DET);
-	do {} while (clk_readl(OSC_FREQ_DET_STATUS) & OSC_FREQ_DET_BUSY);
-	clock_autodetect = clk_readl(OSC_FREQ_DET_STATUS);
-	if (clock_autodetect >= 732 - 3 && clock_autodetect <= 732 + 3) {
-		return 12000000;
-	} else if (clock_autodetect >= 794 - 3 && clock_autodetect <= 794 + 3) {
-		return 13000000;
-	} else if (clock_autodetect >= 1172 - 3 && clock_autodetect <= 1172 + 3) {
-		return 19200000;
-	} else if (clock_autodetect >= 1587 - 3 && clock_autodetect <= 1587 + 3) {
-		return 26000000;
-	} else if (clock_autodetect >= 1025 - 3 && clock_autodetect <= 1025 + 3) {
-		return 16800000;
-	} else if (clock_autodetect >= 2344 - 3 && clock_autodetect <= 2344 + 3) {
-		return 38400000;
-	} else if (clock_autodetect >= 2928 - 3 && clock_autodetect <= 2928 + 3) {
-		return 48000000;
-	} else {
-		pr_err("%s: Unexpected clock autodetect value %d", __func__,
-			clock_autodetect);
-		BUG();
-		return 0;
-	}
-}
-
-static int clk_div71_get_divider(unsigned long parent_rate, unsigned long rate,
-				 u32 flags, u32 round_mode)
-{
-	s64 divider_u71 = parent_rate;
-	if (!rate)
-		return -EINVAL;
-
-	if (!(flags & DIV_U71_INT))
-		divider_u71 *= 2;
-	if (round_mode == ROUND_DIVIDER_UP)
-		divider_u71 += rate - 1;
-	do_div(divider_u71, rate);
-	if (flags & DIV_U71_INT)
-		divider_u71 *= 2;
-
-	if (divider_u71 - 2 < 0)
-		return 0;
-
-	if (divider_u71 - 2 > 255)
-		return -EINVAL;
-
-	return divider_u71 - 2;
-}
-
-static int clk_div16_get_divider(unsigned long parent_rate, unsigned long rate)
-{
-	s64 divider_u16;
-
-	divider_u16 = parent_rate;
-	if (!rate)
-		return -EINVAL;
-	divider_u16 += rate - 1;
-	do_div(divider_u16, rate);
-
-	if (divider_u16 - 1 < 0)
-		return 0;
-
-	if (divider_u16 - 1 > 0xFFFF)
-		return -EINVAL;
-
-	return divider_u16 - 1;
-}
-
-static unsigned long tegra30_clk_fixed_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	return to_clk_tegra(hw)->fixed_rate;
-}
-
-struct clk_ops tegra30_clk_32k_ops = {
-	.recalc_rate = tegra30_clk_fixed_recalc_rate,
-};
-
-/* clk_m functions */
-static unsigned long tegra30_clk_m_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	if (!to_clk_tegra(hw)->fixed_rate)
-		to_clk_tegra(hw)->fixed_rate = clk_measure_input_freq();
-	return to_clk_tegra(hw)->fixed_rate;
-}
-
-static void tegra30_clk_m_init(struct clk_hw *hw)
-{
-	u32 osc_ctrl = clk_readl(OSC_CTRL);
-	u32 auto_clock_control = osc_ctrl & ~OSC_CTRL_OSC_FREQ_MASK;
-	u32 pll_ref_div = osc_ctrl & OSC_CTRL_PLL_REF_DIV_MASK;
-
-	switch (to_clk_tegra(hw)->fixed_rate) {
-	case 12000000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_12MHZ;
-		BUG_ON(pll_ref_div != OSC_CTRL_PLL_REF_DIV_1);
-		break;
-	case 13000000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_13MHZ;
-		BUG_ON(pll_ref_div != OSC_CTRL_PLL_REF_DIV_1);
-		break;
-	case 19200000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_19_2MHZ;
-		BUG_ON(pll_ref_div != OSC_CTRL_PLL_REF_DIV_1);
-		break;
-	case 26000000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_26MHZ;
-		BUG_ON(pll_ref_div != OSC_CTRL_PLL_REF_DIV_1);
-		break;
-	case 16800000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_16_8MHZ;
-		BUG_ON(pll_ref_div != OSC_CTRL_PLL_REF_DIV_1);
-		break;
-	case 38400000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_38_4MHZ;
-		BUG_ON(pll_ref_div != OSC_CTRL_PLL_REF_DIV_2);
-		break;
-	case 48000000:
-		auto_clock_control |= OSC_CTRL_OSC_FREQ_48MHZ;
-		BUG_ON(pll_ref_div != OSC_CTRL_PLL_REF_DIV_4);
-		break;
-	default:
-		pr_err("%s: Unexpected clock rate %ld", __func__,
-				to_clk_tegra(hw)->fixed_rate);
-		BUG();
-	}
-	clk_writel(auto_clock_control, OSC_CTRL);
-}
-
-struct clk_ops tegra30_clk_m_ops = {
-	.init = tegra30_clk_m_init,
-	.recalc_rate = tegra30_clk_m_recalc_rate,
-};
-
-static unsigned long tegra30_clk_m_div_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-struct clk_ops tegra_clk_m_div_ops = {
-	.recalc_rate = tegra30_clk_m_div_recalc_rate,
-};
-
-/* PLL reference divider functions */
-static unsigned long tegra30_pll_ref_recalc_rate(struct clk_hw *hw,
-			unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long rate = parent_rate;
-	u32 pll_ref_div = clk_readl(OSC_CTRL) & OSC_CTRL_PLL_REF_DIV_MASK;
-
-	switch (pll_ref_div) {
-	case OSC_CTRL_PLL_REF_DIV_1:
-		c->div = 1;
-		break;
-	case OSC_CTRL_PLL_REF_DIV_2:
-		c->div = 2;
-		break;
-	case OSC_CTRL_PLL_REF_DIV_4:
-		c->div = 4;
-		break;
-	default:
-		pr_err("%s: Invalid pll ref divider %d", __func__, pll_ref_div);
-		BUG();
-	}
-	c->mul = 1;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-struct clk_ops tegra_pll_ref_ops = {
-	.recalc_rate = tegra30_pll_ref_recalc_rate,
-};
-
-/* super clock functions */
-/* "super clocks" on tegra30 have two-stage muxes, fractional 7.1 divider and
- * clock skipping super divider.  We will ignore the clock skipping divider,
- * since we can't lower the voltage when using the clock skip, but we can if
- * we lower the PLL frequency. We will use 7.1 divider for CPU super-clock
- * only when its parent is a fixed rate PLL, since we can't change PLL rate
- * in this case.
- */
-static void tegra30_super_clk_init(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	struct clk_tegra *p =
-			to_clk_tegra(__clk_get_hw(__clk_get_parent(hw->clk)));
-
-	c->state = ON;
-	if (c->flags & DIV_U71) {
-		/* Init safe 7.1 divider value (does not affect PLLX path) */
-		clk_writel(SUPER_CLOCK_DIV_U71_MIN << SUPER_CLOCK_DIV_U71_SHIFT,
-			   c->reg + SUPER_CLK_DIVIDER);
-		c->mul = 2;
-		c->div = 2;
-		if (!(p->flags & PLLX))
-			c->div += SUPER_CLOCK_DIV_U71_MIN;
-	} else
-		clk_writel(0, c->reg + SUPER_CLK_DIVIDER);
-}
-
-static u8 tegra30_super_clk_get_parent(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	int source;
-	int shift;
-
-	val = clk_readl(c->reg + SUPER_CLK_MUX);
-	BUG_ON(((val & SUPER_STATE_MASK) != SUPER_STATE_RUN) &&
-		((val & SUPER_STATE_MASK) != SUPER_STATE_IDLE));
-	shift = ((val & SUPER_STATE_MASK) == SUPER_STATE_IDLE) ?
-		SUPER_IDLE_SOURCE_SHIFT : SUPER_RUN_SOURCE_SHIFT;
-	source = (val >> shift) & SUPER_SOURCE_MASK;
-	if (c->flags & DIV_2)
-		source |= val & SUPER_LP_DIV2_BYPASS;
-
-	return source;
-}
-
-static int tegra30_super_clk_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	struct clk_tegra *p =
-			to_clk_tegra(__clk_get_hw(clk_get_parent(hw->clk)));
-	u32 val;
-	int shift;
-
-	val = clk_readl(c->reg + SUPER_CLK_MUX);
-	BUG_ON(((val & SUPER_STATE_MASK) != SUPER_STATE_RUN) &&
-		((val & SUPER_STATE_MASK) != SUPER_STATE_IDLE));
-	shift = ((val & SUPER_STATE_MASK) == SUPER_STATE_IDLE) ?
-		SUPER_IDLE_SOURCE_SHIFT : SUPER_RUN_SOURCE_SHIFT;
-
-	/* For LP mode super-clock switch between PLLX direct
-	   and divided-by-2 outputs is allowed only when other
-	   than PLLX clock source is current parent */
-	if ((c->flags & DIV_2) && (p->flags & PLLX) &&
-	    ((index ^ val) & SUPER_LP_DIV2_BYPASS)) {
-		if (p->flags & PLLX)
-			return -EINVAL;
-		val ^= SUPER_LP_DIV2_BYPASS;
-		clk_writel_delay(val, c->reg);
-	}
-	val &= ~(SUPER_SOURCE_MASK << shift);
-	val |= (index & SUPER_SOURCE_MASK) << shift;
-
-	/* 7.1 divider for CPU super-clock does not affect
-	   PLLX path */
-	if (c->flags & DIV_U71) {
-		u32 div = 0;
-		if (!(p->flags & PLLX)) {
-			div = clk_readl(c->reg +
-					SUPER_CLK_DIVIDER);
-			div &= SUPER_CLOCK_DIV_U71_MASK;
-			div >>= SUPER_CLOCK_DIV_U71_SHIFT;
-		}
-		c->div = div + 2;
-		c->mul = 2;
-	}
-	clk_writel_delay(val, c->reg);
-
-	return 0;
-}
-
-/*
- * Do not use super clocks "skippers", since dividing using a clock skipper
- * does not allow the voltage to be scaled down. Instead adjust the rate of
- * the parent clock. This requires that the parent of a super clock have no
- * other children, otherwise the rate will change underneath the other
- * children. Special case: if fixed rate PLL is CPU super clock parent the
- * rate of this PLL can't be changed, and it has many other children. In
- * this case use 7.1 fractional divider to adjust the super clock rate.
- */
-static int tegra30_super_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	struct clk *parent = __clk_get_parent(hw->clk);
-	struct clk_tegra *cparent = to_clk_tegra(__clk_get_hw(parent));
-
-	if ((c->flags & DIV_U71) && (cparent->flags & PLL_FIXED)) {
-		int div = clk_div71_get_divider(parent_rate,
-					rate, c->flags, ROUND_DIVIDER_DOWN);
-		div = max(div, SUPER_CLOCK_DIV_U71_MIN);
-
-		clk_writel(div << SUPER_CLOCK_DIV_U71_SHIFT,
-			   c->reg + SUPER_CLK_DIVIDER);
-		c->div = div + 2;
-		c->mul = 2;
-		return 0;
-	}
-	return 0;
-}
-
-static unsigned long tegra30_super_clk_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-static long tegra30_super_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	struct clk *parent = __clk_get_parent(hw->clk);
-	struct clk_tegra *cparent = to_clk_tegra(__clk_get_hw(parent));
-	int mul = 2;
-	int div;
-
-	if ((c->flags & DIV_U71) && (cparent->flags & PLL_FIXED)) {
-		div = clk_div71_get_divider(*prate,
-				rate, c->flags, ROUND_DIVIDER_DOWN);
-		div = max(div, SUPER_CLOCK_DIV_U71_MIN) + 2;
-		rate = *prate * mul;
-		rate += div - 1; /* round up */
-		do_div(rate, c->div);
-
-		return rate;
-	}
-	return *prate;
-}
-
-struct clk_ops tegra30_super_ops = {
-	.init = tegra30_super_clk_init,
-	.set_parent = tegra30_super_clk_set_parent,
-	.get_parent = tegra30_super_clk_get_parent,
-	.recalc_rate = tegra30_super_clk_recalc_rate,
-	.round_rate = tegra30_super_clk_round_rate,
-	.set_rate = tegra30_super_clk_set_rate,
-};
-
-static unsigned long tegra30_twd_clk_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-struct clk_ops tegra30_twd_ops = {
-	.recalc_rate = tegra30_twd_clk_recalc_rate,
-};
-
-/* bus clock functions */
-static int tegra30_bus_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-
-	c->state = ((val >> c->reg_shift) & BUS_CLK_DISABLE) ? OFF : ON;
-	return c->state;
-}
-
-static int tegra30_bus_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = clk_readl(c->reg);
-	val &= ~(BUS_CLK_DISABLE << c->reg_shift);
-	clk_writel(val, c->reg);
-
-	return 0;
-}
-
-static void tegra30_bus_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = clk_readl(c->reg);
-	val |= BUS_CLK_DISABLE << c->reg_shift;
-	clk_writel(val, c->reg);
-}
-
-static unsigned long tegra30_bus_clk_recalc_rate(struct clk_hw *hw,
-			unsigned long prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	u64 rate = prate;
-
-	c->div = ((val >> c->reg_shift) & BUS_CLK_DIV_MASK) + 1;
-	c->mul = 1;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-	return rate;
-}
-
-static int tegra30_bus_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	int ret = -EINVAL;
-	u32 val;
-	int i;
-
-	val = clk_readl(c->reg);
-	for (i = 1; i <= 4; i++) {
-		if (rate == parent_rate / i) {
-			val &= ~(BUS_CLK_DIV_MASK << c->reg_shift);
-			val |= (i - 1) << c->reg_shift;
-			clk_writel(val, c->reg);
-			c->div = i;
-			c->mul = 1;
-			ret = 0;
-			break;
-		}
-	}
-
-	return ret;
-}
-
-static long tegra30_bus_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	unsigned long parent_rate = *prate;
-	s64 divider;
-
-	if (rate >= parent_rate)
-		return parent_rate;
-
-	divider = parent_rate;
-	divider += rate - 1;
-	do_div(divider, rate);
-
-	if (divider < 0)
-		return divider;
-
-	if (divider > 4)
-		divider = 4;
-	do_div(parent_rate, divider);
-
-	return parent_rate;
-}
-
-struct clk_ops tegra30_bus_ops = {
-	.is_enabled = tegra30_bus_clk_is_enabled,
-	.enable = tegra30_bus_clk_enable,
-	.disable = tegra30_bus_clk_disable,
-	.set_rate = tegra30_bus_clk_set_rate,
-	.round_rate = tegra30_bus_clk_round_rate,
-	.recalc_rate = tegra30_bus_clk_recalc_rate,
-};
-
-/* Blink output functions */
-static int tegra30_blink_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = pmc_readl(PMC_CTRL);
-	c->state = (val & PMC_CTRL_BLINK_ENB) ? ON : OFF;
-	return c->state;
-}
-
-static int tegra30_blink_clk_enable(struct clk_hw *hw)
-{
-	u32 val;
-
-	val = pmc_readl(PMC_DPD_PADS_ORIDE);
-	pmc_writel(val | PMC_DPD_PADS_ORIDE_BLINK_ENB, PMC_DPD_PADS_ORIDE);
-
-	val = pmc_readl(PMC_CTRL);
-	pmc_writel(val | PMC_CTRL_BLINK_ENB, PMC_CTRL);
-
-	return 0;
-}
-
-static void tegra30_blink_clk_disable(struct clk_hw *hw)
-{
-	u32 val;
-
-	val = pmc_readl(PMC_CTRL);
-	pmc_writel(val & ~PMC_CTRL_BLINK_ENB, PMC_CTRL);
-
-	val = pmc_readl(PMC_DPD_PADS_ORIDE);
-	pmc_writel(val & ~PMC_DPD_PADS_ORIDE_BLINK_ENB, PMC_DPD_PADS_ORIDE);
-}
-
-static int tegra30_blink_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	if (rate >= parent_rate) {
-		c->div = 1;
-		pmc_writel(0, c->reg);
-	} else {
-		unsigned int on_off;
-		u32 val;
-
-		on_off = DIV_ROUND_UP(parent_rate / 8, rate);
-		c->div = on_off * 8;
-
-		val = (on_off & PMC_BLINK_TIMER_DATA_ON_MASK) <<
-			PMC_BLINK_TIMER_DATA_ON_SHIFT;
-		on_off &= PMC_BLINK_TIMER_DATA_OFF_MASK;
-		on_off <<= PMC_BLINK_TIMER_DATA_OFF_SHIFT;
-		val |= on_off;
-		val |= PMC_BLINK_TIMER_ENB;
-		pmc_writel(val, c->reg);
-	}
-
-	return 0;
-}
-
-static unsigned long tegra30_blink_clk_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-	u32 val;
-	u32 mul;
-	u32 div;
-	u32 on_off;
-
-	mul = 1;
-	val = pmc_readl(c->reg);
-
-	if (val & PMC_BLINK_TIMER_ENB) {
-		on_off = (val >> PMC_BLINK_TIMER_DATA_ON_SHIFT) &
-			PMC_BLINK_TIMER_DATA_ON_MASK;
-		val >>= PMC_BLINK_TIMER_DATA_OFF_SHIFT;
-		val &= PMC_BLINK_TIMER_DATA_OFF_MASK;
-		on_off += val;
-		/* each tick in the blink timer is 4 32KHz clocks */
-		div = on_off * 4;
-	} else {
-		div = 1;
-	}
-
-	if (mul != 0 && div != 0) {
-		rate *= mul;
-		rate += div - 1; /* round up */
-		do_div(rate, div);
-	}
-	return rate;
-}
-
-static long tegra30_blink_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	int div;
-	int mul;
-	long round_rate = *prate;
-
-	mul = 1;
-
-	if (rate >= *prate) {
-		div = 1;
-	} else {
-		div = DIV_ROUND_UP(*prate / 8, rate);
-		div *= 8;
-	}
-
-	round_rate *= mul;
-	round_rate += div - 1;
-	do_div(round_rate, div);
-
-	return round_rate;
-}
-
-struct clk_ops tegra30_blink_clk_ops = {
-	.is_enabled = tegra30_blink_clk_is_enabled,
-	.enable = tegra30_blink_clk_enable,
-	.disable = tegra30_blink_clk_disable,
-	.recalc_rate = tegra30_blink_clk_recalc_rate,
-	.round_rate = tegra30_blink_clk_round_rate,
-	.set_rate = tegra30_blink_clk_set_rate,
-};
-
-static void tegra30_utmi_param_configure(struct clk_hw *hw)
-{
-	unsigned long main_rate =
-		__clk_get_rate(__clk_get_parent(__clk_get_parent(hw->clk)));
-	u32 reg;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(utmi_parameters); i++) {
-		if (main_rate == utmi_parameters[i].osc_frequency)
-			break;
-	}
-
-	if (i >= ARRAY_SIZE(utmi_parameters)) {
-		pr_err("%s: Unexpected main rate %lu\n", __func__, main_rate);
-		return;
-	}
-
-	reg = clk_readl(UTMIP_PLL_CFG2);
-
-	/* Program UTMIP PLL stable and active counts */
-	/* [FIXME] arclk_rst.h says WRONG! This should be 1ms -> 0x50 Check! */
-	reg &= ~UTMIP_PLL_CFG2_STABLE_COUNT(~0);
-	reg |= UTMIP_PLL_CFG2_STABLE_COUNT(
-			utmi_parameters[i].stable_count);
-
-	reg &= ~UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(~0);
-
-	reg |= UTMIP_PLL_CFG2_ACTIVE_DLY_COUNT(
-			utmi_parameters[i].active_delay_count);
-
-	/* Remove power downs from UTMIP PLL control bits */
-	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_A_POWERDOWN;
-	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_B_POWERDOWN;
-	reg &= ~UTMIP_PLL_CFG2_FORCE_PD_SAMP_C_POWERDOWN;
-
-	clk_writel(reg, UTMIP_PLL_CFG2);
-
-	/* Program UTMIP PLL delay and oscillator frequency counts */
-	reg = clk_readl(UTMIP_PLL_CFG1);
-	reg &= ~UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(~0);
-
-	reg |= UTMIP_PLL_CFG1_ENABLE_DLY_COUNT(
-		utmi_parameters[i].enable_delay_count);
-
-	reg &= ~UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(~0);
-	reg |= UTMIP_PLL_CFG1_XTAL_FREQ_COUNT(
-		utmi_parameters[i].xtal_freq_count);
-
-	/* Remove power downs from UTMIP PLL control bits */
-	reg &= ~UTMIP_PLL_CFG1_FORCE_PLL_ENABLE_POWERDOWN;
-	reg &= ~UTMIP_PLL_CFG1_FORCE_PLL_ACTIVE_POWERDOWN;
-	reg &= ~UTMIP_PLL_CFG1_FORCE_PLLU_POWERDOWN;
-
-	clk_writel(reg, UTMIP_PLL_CFG1);
-}
-
-/* PLL Functions */
-static int tegra30_pll_clk_wait_for_lock(struct clk_tegra *c, u32 lock_reg,
-					 u32 lock_bit)
-{
-	int ret = 0;
-
-#if USE_PLL_LOCK_BITS
-	int i;
-	for (i = 0; i < c->u.pll.lock_delay; i++) {
-		if (clk_readl(lock_reg) & lock_bit) {
-			udelay(PLL_POST_LOCK_DELAY);
-			return 0;
-		}
-		udelay(2);	/* timeout = 2 * lock time */
-	}
-	pr_err("Timed out waiting for lock bit on pll %s",
-					__clk_get_name(hw->clk));
-	ret = -1;
-#else
-	udelay(c->u.pll.lock_delay);
-#endif
-	return ret;
-}
-
-static int tegra30_pll_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg + PLL_BASE);
-
-	c->state = (val & PLL_BASE_ENABLE) ? ON : OFF;
-	return c->state;
-}
-
-static void tegra30_pll_clk_init(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	if (c->flags & PLLU)
-		tegra30_utmi_param_configure(hw);
-}
-
-static int tegra30_pll_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	pr_debug("%s on clock %s\n", __func__, __clk_get_name(hw->clk));
-
-#if USE_PLL_LOCK_BITS
-	val = clk_readl(c->reg + PLL_MISC(c));
-	val |= PLL_MISC_LOCK_ENABLE(c);
-	clk_writel(val, c->reg + PLL_MISC(c));
-#endif
-	val = clk_readl(c->reg + PLL_BASE);
-	val &= ~PLL_BASE_BYPASS;
-	val |= PLL_BASE_ENABLE;
-	clk_writel(val, c->reg + PLL_BASE);
-
-	if (c->flags & PLLM) {
-		val = pmc_readl(PMC_PLLP_WB0_OVERRIDE);
-		val |= PMC_PLLP_WB0_OVERRIDE_PLLM_ENABLE;
-		pmc_writel(val, PMC_PLLP_WB0_OVERRIDE);
-	}
-
-	tegra30_pll_clk_wait_for_lock(c, c->reg + PLL_BASE, PLL_BASE_LOCK);
-
-	return 0;
-}
-
-static void tegra30_pll_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	pr_debug("%s on clock %s\n", __func__, __clk_get_name(hw->clk));
-
-	val = clk_readl(c->reg);
-	val &= ~(PLL_BASE_BYPASS | PLL_BASE_ENABLE);
-	clk_writel(val, c->reg);
-
-	if (c->flags & PLLM) {
-		val = pmc_readl(PMC_PLLP_WB0_OVERRIDE);
-		val &= ~PMC_PLLP_WB0_OVERRIDE_PLLM_ENABLE;
-		pmc_writel(val, PMC_PLLP_WB0_OVERRIDE);
-	}
-}
-
-static int tegra30_pll_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val, p_div, old_base;
-	unsigned long input_rate;
-	const struct clk_pll_freq_table *sel;
-	struct clk_pll_freq_table cfg;
-
-	if (c->flags & PLL_FIXED) {
-		int ret = 0;
-		if (rate != c->u.pll.fixed_rate) {
-			pr_err("%s: Can not change %s fixed rate %lu to %lu\n",
-			       __func__, __clk_get_name(hw->clk),
-				c->u.pll.fixed_rate, rate);
-			ret = -EINVAL;
-		}
-		return ret;
-	}
-
-	if (c->flags & PLLM) {
-		if (rate != __clk_get_rate(hw->clk)) {
-			pr_err("%s: Can not change memory %s rate in flight\n",
-				__func__, __clk_get_name(hw->clk));
-			return -EINVAL;
-		}
-	}
-
-	p_div = 0;
-	input_rate = parent_rate;
-
-	/* Check if the target rate is tabulated */
-	for (sel = c->u.pll.freq_table; sel->input_rate != 0; sel++) {
-		if (sel->input_rate == input_rate && sel->output_rate == rate) {
-			if (c->flags & PLLU) {
-				BUG_ON(sel->p < 1 || sel->p > 2);
-				if (sel->p == 1)
-					p_div = PLLU_BASE_POST_DIV;
-			} else {
-				BUG_ON(sel->p < 1);
-				for (val = sel->p; val > 1; val >>= 1)
-					p_div++;
-				p_div <<= PLL_BASE_DIVP_SHIFT;
-			}
-			break;
-		}
-	}
-
-	/* Configure out-of-table rate */
-	if (sel->input_rate == 0) {
-		unsigned long cfreq;
-		BUG_ON(c->flags & PLLU);
-		sel = &cfg;
-
-		switch (input_rate) {
-		case 12000000:
-		case 26000000:
-			cfreq = (rate <= 1000000 * 1000) ? 1000000 : 2000000;
-			break;
-		case 13000000:
-			cfreq = (rate <= 1000000 * 1000) ? 1000000 : 2600000;
-			break;
-		case 16800000:
-		case 19200000:
-			cfreq = (rate <= 1200000 * 1000) ? 1200000 : 2400000;
-			break;
-		default:
-			pr_err("%s: Unexpected reference rate %lu\n",
-			       __func__, input_rate);
-			BUG();
-		}
-
-		/* Raise VCO to guarantee 0.5% accuracy */
-		for (cfg.output_rate = rate; cfg.output_rate < 200 * cfreq;
-		      cfg.output_rate <<= 1)
-			p_div++;
-
-		cfg.p = 0x1 << p_div;
-		cfg.m = input_rate / cfreq;
-		cfg.n = cfg.output_rate / cfreq;
-		cfg.cpcon = OUT_OF_TABLE_CPCON;
-
-		if ((cfg.m > (PLL_BASE_DIVM_MASK >> PLL_BASE_DIVM_SHIFT)) ||
-		    (cfg.n > (PLL_BASE_DIVN_MASK >> PLL_BASE_DIVN_SHIFT)) ||
-		    (p_div > (PLL_BASE_DIVP_MASK >> PLL_BASE_DIVP_SHIFT)) ||
-		    (cfg.output_rate > c->u.pll.vco_max)) {
-			pr_err("%s: Failed to set %s out-of-table rate %lu\n",
-			       __func__, __clk_get_name(hw->clk), rate);
-			return -EINVAL;
-		}
-		p_div <<= PLL_BASE_DIVP_SHIFT;
-	}
-
-	c->mul = sel->n;
-	c->div = sel->m * sel->p;
-
-	old_base = val = clk_readl(c->reg + PLL_BASE);
-	val &= ~(PLL_BASE_DIVM_MASK | PLL_BASE_DIVN_MASK |
-		 ((c->flags & PLLU) ? PLLU_BASE_POST_DIV : PLL_BASE_DIVP_MASK));
-	val |= (sel->m << PLL_BASE_DIVM_SHIFT) |
-		(sel->n << PLL_BASE_DIVN_SHIFT) | p_div;
-	if (val == old_base)
-		return 0;
-
-	if (c->state == ON) {
-		tegra30_pll_clk_disable(hw);
-		val &= ~(PLL_BASE_BYPASS | PLL_BASE_ENABLE);
-	}
-	clk_writel(val, c->reg + PLL_BASE);
-
-	if (c->flags & PLL_HAS_CPCON) {
-		val = clk_readl(c->reg + PLL_MISC(c));
-		val &= ~PLL_MISC_CPCON_MASK;
-		val |= sel->cpcon << PLL_MISC_CPCON_SHIFT;
-		if (c->flags & (PLLU | PLLD)) {
-			val &= ~PLL_MISC_LFCON_MASK;
-			if (sel->n >= PLLDU_LFCON_SET_DIVN)
-				val |= 0x1 << PLL_MISC_LFCON_SHIFT;
-		} else if (c->flags & (PLLX | PLLM)) {
-			val &= ~(0x1 << PLL_MISC_DCCON_SHIFT);
-			if (rate >= (c->u.pll.vco_max >> 1))
-				val |= 0x1 << PLL_MISC_DCCON_SHIFT;
-		}
-		clk_writel(val, c->reg + PLL_MISC(c));
-	}
-
-	if (c->state == ON)
-		tegra30_pll_clk_enable(hw);
-
-	c->u.pll.fixed_rate = rate;
-
-	return 0;
-}
-
-static long tegra30_pll_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long input_rate = *prate;
-	u64 output_rate = *prate;
-	const struct clk_pll_freq_table *sel;
-	struct clk_pll_freq_table cfg;
-	int mul;
-	int div;
-	u32 p_div;
-	u32 val;
-
-	if (c->flags & PLL_FIXED)
-		return c->u.pll.fixed_rate;
-
-	if (c->flags & PLLM)
-		return __clk_get_rate(hw->clk);
-
-	p_div = 0;
-	/* Check if the target rate is tabulated */
-	for (sel = c->u.pll.freq_table; sel->input_rate != 0; sel++) {
-		if (sel->input_rate == input_rate && sel->output_rate == rate) {
-			if (c->flags & PLLU) {
-				BUG_ON(sel->p < 1 || sel->p > 2);
-				if (sel->p == 1)
-					p_div = PLLU_BASE_POST_DIV;
-			} else {
-				BUG_ON(sel->p < 1);
-				for (val = sel->p; val > 1; val >>= 1)
-					p_div++;
-				p_div <<= PLL_BASE_DIVP_SHIFT;
-			}
-			break;
-		}
-	}
-
-	if (sel->input_rate == 0) {
-		unsigned long cfreq;
-		BUG_ON(c->flags & PLLU);
-		sel = &cfg;
-
-		switch (input_rate) {
-		case 12000000:
-		case 26000000:
-			cfreq = (rate <= 1000000 * 1000) ? 1000000 : 2000000;
-			break;
-		case 13000000:
-			cfreq = (rate <= 1000000 * 1000) ? 1000000 : 2600000;
-			break;
-		case 16800000:
-		case 19200000:
-			cfreq = (rate <= 1200000 * 1000) ? 1200000 : 2400000;
-			break;
-		default:
-			pr_err("%s: Unexpected reference rate %lu\n",
-			       __func__, input_rate);
-			BUG();
-		}
-
-		/* Raise VCO to guarantee 0.5% accuracy */
-		for (cfg.output_rate = rate; cfg.output_rate < 200 * cfreq;
-		      cfg.output_rate <<= 1)
-			p_div++;
-
-		cfg.p = 0x1 << p_div;
-		cfg.m = input_rate / cfreq;
-		cfg.n = cfg.output_rate / cfreq;
-	}
-
-	mul = sel->n;
-	div = sel->m * sel->p;
-
-	output_rate *= mul;
-	output_rate += div - 1; /* round up */
-	do_div(output_rate, div);
-
-	return output_rate;
-}
-
-static unsigned long tegra30_pll_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-	u32 val = clk_readl(c->reg + PLL_BASE);
-
-	if (c->flags & PLL_FIXED && !(val & PLL_BASE_OVERRIDE)) {
-		const struct clk_pll_freq_table *sel;
-		for (sel = c->u.pll.freq_table; sel->input_rate != 0; sel++) {
-			if (sel->input_rate == parent_rate &&
-				sel->output_rate == c->u.pll.fixed_rate) {
-				c->mul = sel->n;
-				c->div = sel->m * sel->p;
-				break;
-			}
-		}
-		pr_err("Clock %s has unknown fixed frequency\n",
-						__clk_get_name(hw->clk));
-		BUG();
-	} else if (val & PLL_BASE_BYPASS) {
-		c->mul = 1;
-		c->div = 1;
-	} else {
-		c->mul = (val & PLL_BASE_DIVN_MASK) >> PLL_BASE_DIVN_SHIFT;
-		c->div = (val & PLL_BASE_DIVM_MASK) >> PLL_BASE_DIVM_SHIFT;
-		if (c->flags & PLLU)
-			c->div *= (val & PLLU_BASE_POST_DIV) ? 1 : 2;
-		else
-			c->div *= (0x1 << ((val & PLL_BASE_DIVP_MASK) >>
-					PLL_BASE_DIVP_SHIFT));
-	}
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-struct clk_ops tegra30_pll_ops = {
-	.is_enabled = tegra30_pll_clk_is_enabled,
-	.init = tegra30_pll_clk_init,
-	.enable = tegra30_pll_clk_enable,
-	.disable = tegra30_pll_clk_disable,
-	.recalc_rate = tegra30_pll_recalc_rate,
-	.round_rate = tegra30_pll_round_rate,
-	.set_rate = tegra30_pll_clk_set_rate,
-};
-
-int tegra30_plld_clk_cfg_ex(struct clk_hw *hw,
-				enum tegra_clk_ex_param p, u32 setting)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val, mask, reg;
-
-	switch (p) {
-	case TEGRA_CLK_PLLD_CSI_OUT_ENB:
-		mask = PLLD_BASE_CSI_CLKENABLE;
-		reg = c->reg + PLL_BASE;
-		break;
-	case TEGRA_CLK_PLLD_DSI_OUT_ENB:
-		mask = PLLD_MISC_DSI_CLKENABLE;
-		reg = c->reg + PLL_MISC(c);
-		break;
-	case TEGRA_CLK_PLLD_MIPI_MUX_SEL:
-		if (!(c->flags & PLL_ALT_MISC_REG)) {
-			mask = PLLD_BASE_DSIB_MUX_MASK;
-			reg = c->reg + PLL_BASE;
-			break;
-		}
-	/* fall through - error since PLLD2 does not have MUX_SEL control */
-	default:
-		return -EINVAL;
-	}
-
-	val = clk_readl(reg);
-	if (setting)
-		val |= mask;
-	else
-		val &= ~mask;
-	clk_writel(val, reg);
-	return 0;
-}
-
-static int tegra30_plle_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = clk_readl(c->reg + PLL_BASE);
-	c->state = (val & PLLE_BASE_ENABLE) ? ON : OFF;
-	return c->state;
-}
-
-static void tegra30_plle_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = clk_readl(c->reg + PLL_BASE);
-	val &= ~(PLLE_BASE_CML_ENABLE | PLLE_BASE_ENABLE);
-	clk_writel(val, c->reg + PLL_BASE);
-}
-
-static void tegra30_plle_training(struct clk_tegra *c)
-{
-	u32 val;
-
-	/* PLLE is already disabled, and setup cleared;
-	 * create falling edge on PLLE IDDQ input */
-	val = pmc_readl(PMC_SATA_PWRGT);
-	val |= PMC_SATA_PWRGT_PLLE_IDDQ_VALUE;
-	pmc_writel(val, PMC_SATA_PWRGT);
-
-	val = pmc_readl(PMC_SATA_PWRGT);
-	val |= PMC_SATA_PWRGT_PLLE_IDDQ_SWCTL;
-	pmc_writel(val, PMC_SATA_PWRGT);
-
-	val = pmc_readl(PMC_SATA_PWRGT);
-	val &= ~PMC_SATA_PWRGT_PLLE_IDDQ_VALUE;
-	pmc_writel(val, PMC_SATA_PWRGT);
-
-	do {
-		val = clk_readl(c->reg + PLL_MISC(c));
-	} while (!(val & PLLE_MISC_READY));
-}
-
-static int tegra30_plle_configure(struct clk_hw *hw, bool force_training)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	struct clk *parent = __clk_get_parent(hw->clk);
-	const struct clk_pll_freq_table *sel;
-	u32 val;
-
-	unsigned long rate = c->u.pll.fixed_rate;
-	unsigned long input_rate = __clk_get_rate(parent);
-
-	for (sel = c->u.pll.freq_table; sel->input_rate != 0; sel++) {
-		if (sel->input_rate == input_rate && sel->output_rate == rate)
-			break;
-	}
-
-	if (sel->input_rate == 0)
-		return -ENOSYS;
-
-	/* disable PLLE, clear setup fiels */
-	tegra30_plle_clk_disable(hw);
-
-	val = clk_readl(c->reg + PLL_MISC(c));
-	val &= ~(PLLE_MISC_LOCK_ENABLE | PLLE_MISC_SETUP_MASK);
-	clk_writel(val, c->reg + PLL_MISC(c));
-
-	/* training */
-	val = clk_readl(c->reg + PLL_MISC(c));
-	if (force_training || (!(val & PLLE_MISC_READY)))
-		tegra30_plle_training(c);
-
-	/* configure dividers, setup, disable SS */
-	val = clk_readl(c->reg + PLL_BASE);
-	val &= ~PLLE_BASE_DIV_MASK;
-	val |= PLLE_BASE_DIV(sel->m, sel->n, sel->p, sel->cpcon);
-	clk_writel(val, c->reg + PLL_BASE);
-	c->mul = sel->n;
-	c->div = sel->m * sel->p;
-
-	val = clk_readl(c->reg + PLL_MISC(c));
-	val |= PLLE_MISC_SETUP_VALUE;
-	val |= PLLE_MISC_LOCK_ENABLE;
-	clk_writel(val, c->reg + PLL_MISC(c));
-
-	val = clk_readl(PLLE_SS_CTRL);
-	val |= PLLE_SS_DISABLE;
-	clk_writel(val, PLLE_SS_CTRL);
-
-	/* enable and lock PLLE*/
-	val = clk_readl(c->reg + PLL_BASE);
-	val |= (PLLE_BASE_CML_ENABLE | PLLE_BASE_ENABLE);
-	clk_writel(val, c->reg + PLL_BASE);
-
-	tegra30_pll_clk_wait_for_lock(c, c->reg + PLL_MISC(c), PLLE_MISC_LOCK);
-
-	return 0;
-}
-
-static int tegra30_plle_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	return tegra30_plle_configure(hw, !c->set);
-}
-
-static unsigned long tegra30_plle_clk_recalc_rate(struct clk_hw *hw,
-			unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long rate = parent_rate;
-	u32 val;
-
-	val = clk_readl(c->reg + PLL_BASE);
-	c->mul = (val & PLLE_BASE_DIVN_MASK) >> PLLE_BASE_DIVN_SHIFT;
-	c->div = (val & PLLE_BASE_DIVM_MASK) >> PLLE_BASE_DIVM_SHIFT;
-	c->div *= (val & PLLE_BASE_DIVP_MASK) >> PLLE_BASE_DIVP_SHIFT;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-	return rate;
-}
-
-struct clk_ops tegra30_plle_ops = {
-	.is_enabled = tegra30_plle_clk_is_enabled,
-	.enable = tegra30_plle_clk_enable,
-	.disable = tegra30_plle_clk_disable,
-	.recalc_rate = tegra30_plle_clk_recalc_rate,
-};
-
-/* Clock divider ops */
-static int tegra30_pll_div_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	if (c->flags & DIV_U71) {
-		u32 val = clk_readl(c->reg);
-		val >>= c->reg_shift;
-		c->state = (val & PLL_OUT_CLKEN) ? ON : OFF;
-		if (!(val & PLL_OUT_RESET_DISABLE))
-			c->state = OFF;
-	} else {
-		c->state = ON;
-	}
-	return c->state;
-}
-
-static int tegra30_pll_div_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	u32 new_val;
-
-	pr_debug("%s: %s\n", __func__, __clk_get_name(hw->clk));
-	if (c->flags & DIV_U71) {
-		val = clk_readl(c->reg);
-		new_val = val >> c->reg_shift;
-		new_val &= 0xFFFF;
-
-		new_val |= PLL_OUT_CLKEN | PLL_OUT_RESET_DISABLE;
-
-		val &= ~(0xFFFF << c->reg_shift);
-		val |= new_val << c->reg_shift;
-		clk_writel_delay(val, c->reg);
-		return 0;
-	} else if (c->flags & DIV_2) {
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static void tegra30_pll_div_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	u32 new_val;
-
-	pr_debug("%s: %s\n", __func__, __clk_get_name(hw->clk));
-	if (c->flags & DIV_U71) {
-		val = clk_readl(c->reg);
-		new_val = val >> c->reg_shift;
-		new_val &= 0xFFFF;
-
-		new_val &= ~(PLL_OUT_CLKEN | PLL_OUT_RESET_DISABLE);
-
-		val &= ~(0xFFFF << c->reg_shift);
-		val |= new_val << c->reg_shift;
-		clk_writel_delay(val, c->reg);
-	}
-}
-
-static int tegra30_pll_div_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	u32 new_val;
-	int divider_u71;
-
-	if (c->flags & DIV_U71) {
-		divider_u71 = clk_div71_get_divider(
-			parent_rate, rate, c->flags, ROUND_DIVIDER_UP);
-		if (divider_u71 >= 0) {
-			val = clk_readl(c->reg);
-			new_val = val >> c->reg_shift;
-			new_val &= 0xFFFF;
-			if (c->flags & DIV_U71_FIXED)
-				new_val |= PLL_OUT_OVERRIDE;
-			new_val &= ~PLL_OUT_RATIO_MASK;
-			new_val |= divider_u71 << PLL_OUT_RATIO_SHIFT;
-
-			val &= ~(0xFFFF << c->reg_shift);
-			val |= new_val << c->reg_shift;
-			clk_writel_delay(val, c->reg);
-			c->div = divider_u71 + 2;
-			c->mul = 2;
-			c->fixed_rate = rate;
-			return 0;
-		}
-	} else if (c->flags & DIV_2) {
-		c->fixed_rate = rate;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static unsigned long tegra30_pll_div_clk_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-
-	if (c->flags & DIV_U71) {
-		u32 divu71;
-		u32 val = clk_readl(c->reg);
-		val >>= c->reg_shift;
-
-		divu71 = (val & PLL_OUT_RATIO_MASK) >> PLL_OUT_RATIO_SHIFT;
-		c->div = (divu71 + 2);
-		c->mul = 2;
-	} else if (c->flags & DIV_2) {
-		if (c->flags & (PLLD | PLLX)) {
-			c->div = 2;
-			c->mul = 1;
-		} else
-			BUG();
-	} else {
-		c->div = 1;
-		c->mul = 1;
-	}
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-static long tegra30_pll_div_clk_round_rate(struct clk_hw *hw,
-				unsigned long rate, unsigned long *prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long parent_rate = __clk_get_rate(__clk_get_parent(hw->clk));
-	int divider;
-
-	if (prate)
-		parent_rate = *prate;
-
-	if (c->flags & DIV_U71) {
-		divider = clk_div71_get_divider(
-			parent_rate, rate, c->flags, ROUND_DIVIDER_UP);
-		if (divider < 0)
-			return divider;
-		return DIV_ROUND_UP(parent_rate * 2, divider + 2);
-	} else if (c->flags & DIV_2) {
-		*prate = rate * 2;
-		return rate;
-	}
-
-	return -EINVAL;
-}
-
-struct clk_ops tegra30_pll_div_ops = {
-	.is_enabled = tegra30_pll_div_clk_is_enabled,
-	.enable = tegra30_pll_div_clk_enable,
-	.disable = tegra30_pll_div_clk_disable,
-	.set_rate = tegra30_pll_div_clk_set_rate,
-	.recalc_rate = tegra30_pll_div_clk_recalc_rate,
-	.round_rate = tegra30_pll_div_clk_round_rate,
-};
-
-/* Periph clk ops */
-static inline u32 periph_clk_source_mask(struct clk_tegra *c)
-{
-	if (c->flags & MUX8)
-		return 7 << 29;
-	else if (c->flags & MUX_PWM)
-		return 3 << 28;
-	else if (c->flags & MUX_CLK_OUT)
-		return 3 << (c->u.periph.clk_num + 4);
-	else if (c->flags & PLLD)
-		return PLLD_BASE_DSIB_MUX_MASK;
-	else
-		return 3 << 30;
-}
-
-static inline u32 periph_clk_source_shift(struct clk_tegra *c)
-{
-	if (c->flags & MUX8)
-		return 29;
-	else if (c->flags & MUX_PWM)
-		return 28;
-	else if (c->flags & MUX_CLK_OUT)
-		return c->u.periph.clk_num + 4;
-	else if (c->flags & PLLD)
-		return PLLD_BASE_DSIB_MUX_SHIFT;
-	else
-		return 30;
-}
-
-static int tegra30_periph_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	c->state = ON;
-	if (!(clk_readl(PERIPH_CLK_TO_ENB_REG(c)) & PERIPH_CLK_TO_BIT(c)))
-		c->state = OFF;
-	if (!(c->flags & PERIPH_NO_RESET))
-		if (clk_readl(PERIPH_CLK_TO_RST_REG(c)) & PERIPH_CLK_TO_BIT(c))
-			c->state = OFF;
-	return c->state;
-}
-
-static int tegra30_periph_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	tegra_periph_clk_enable_refcount[c->u.periph.clk_num]++;
-	if (tegra_periph_clk_enable_refcount[c->u.periph.clk_num] > 1)
-		return 0;
-
-	clk_writel_delay(PERIPH_CLK_TO_BIT(c), PERIPH_CLK_TO_ENB_SET_REG(c));
-	if (!(c->flags & PERIPH_NO_RESET) &&
-		 !(c->flags & PERIPH_MANUAL_RESET)) {
-		if (clk_readl(PERIPH_CLK_TO_RST_REG(c)) &
-			 PERIPH_CLK_TO_BIT(c)) {
-			udelay(5);	/* reset propagation delay */
-			clk_writel(PERIPH_CLK_TO_BIT(c),
-				 PERIPH_CLK_TO_RST_CLR_REG(c));
-		}
-	}
-	return 0;
-}
-
-static void tegra30_periph_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long val;
-
-	tegra_periph_clk_enable_refcount[c->u.periph.clk_num]--;
-
-	if (tegra_periph_clk_enable_refcount[c->u.periph.clk_num] > 0)
-		return;
-
-	/* If peripheral is in the APB bus then read the APB bus to
-	 * flush the write operation in apb bus. This will avoid the
-	 * peripheral access after disabling clock*/
-	if (c->flags & PERIPH_ON_APB)
-		val = chipid_readl();
-
-	clk_writel_delay(PERIPH_CLK_TO_BIT(c), PERIPH_CLK_TO_ENB_CLR_REG(c));
-}
-
-void tegra30_periph_clk_reset(struct clk_hw *hw, bool assert)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long val;
-
-	if (!(c->flags & PERIPH_NO_RESET)) {
-		if (assert) {
-			/* If peripheral is in the APB bus then read the APB
-			 * bus to flush the write operation in apb bus. This
-			 * will avoid the peripheral access after disabling
-			 * clock */
-			if (c->flags & PERIPH_ON_APB)
-				val = chipid_readl();
-
-			clk_writel(PERIPH_CLK_TO_BIT(c),
-				   PERIPH_CLK_TO_RST_SET_REG(c));
-		} else
-			clk_writel(PERIPH_CLK_TO_BIT(c),
-				   PERIPH_CLK_TO_RST_CLR_REG(c));
-	}
-}
-
-static int tegra30_periph_clk_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	if (!(c->flags & MUX))
-		return (index == 0) ? 0 : (-EINVAL);
-
-	val = clk_readl(c->reg);
-	val &= ~periph_clk_source_mask(c);
-	val |= (index << periph_clk_source_shift(c));
-	clk_writel_delay(val, c->reg);
-	return 0;
-}
-
-static u8 tegra30_periph_clk_get_parent(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	int source  = (val & periph_clk_source_mask(c)) >>
-					periph_clk_source_shift(c);
-
-	if (!(c->flags & MUX))
-		return 0;
-
-	return source;
-}
-
-static int tegra30_periph_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	int divider;
-
-	if (c->flags & DIV_U71) {
-		divider = clk_div71_get_divider(
-			parent_rate, rate, c->flags, ROUND_DIVIDER_UP);
-		if (divider >= 0) {
-			val = clk_readl(c->reg);
-			val &= ~PERIPH_CLK_SOURCE_DIVU71_MASK;
-			val |= divider;
-			if (c->flags & DIV_U71_UART) {
-				if (divider)
-					val |= PERIPH_CLK_UART_DIV_ENB;
-				else
-					val &= ~PERIPH_CLK_UART_DIV_ENB;
-			}
-			clk_writel_delay(val, c->reg);
-			c->div = divider + 2;
-			c->mul = 2;
-			return 0;
-		}
-	} else if (c->flags & DIV_U16) {
-		divider = clk_div16_get_divider(parent_rate, rate);
-		if (divider >= 0) {
-			val = clk_readl(c->reg);
-			val &= ~PERIPH_CLK_SOURCE_DIVU16_MASK;
-			val |= divider;
-			clk_writel_delay(val, c->reg);
-			c->div = divider + 1;
-			c->mul = 1;
-			return 0;
-		}
-	} else if (parent_rate <= rate) {
-		c->div = 1;
-		c->mul = 1;
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static long tegra30_periph_clk_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	unsigned long parent_rate = __clk_get_rate(__clk_get_parent(hw->clk));
-	int divider;
-
-	if (prate)
-		parent_rate = *prate;
-
-	if (c->flags & DIV_U71) {
-		divider = clk_div71_get_divider(
-			parent_rate, rate, c->flags, ROUND_DIVIDER_UP);
-		if (divider < 0)
-			return divider;
-
-		return DIV_ROUND_UP(parent_rate * 2, divider + 2);
-	} else if (c->flags & DIV_U16) {
-		divider = clk_div16_get_divider(parent_rate, rate);
-		if (divider < 0)
-			return divider;
-		return DIV_ROUND_UP(parent_rate, divider + 1);
-	}
-	return -EINVAL;
-}
-
-static unsigned long tegra30_periph_clk_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-	u32 val = clk_readl(c->reg);
-
-	if (c->flags & DIV_U71) {
-		u32 divu71 = val & PERIPH_CLK_SOURCE_DIVU71_MASK;
-		if ((c->flags & DIV_U71_UART) &&
-		    (!(val & PERIPH_CLK_UART_DIV_ENB))) {
-			divu71 = 0;
-		}
-		if (c->flags & DIV_U71_IDLE) {
-			val &= ~(PERIPH_CLK_SOURCE_DIVU71_MASK <<
-				PERIPH_CLK_SOURCE_DIVIDLE_SHIFT);
-			val |= (PERIPH_CLK_SOURCE_DIVIDLE_VAL <<
-				PERIPH_CLK_SOURCE_DIVIDLE_SHIFT);
-			clk_writel(val, c->reg);
-		}
-		c->div = divu71 + 2;
-		c->mul = 2;
-	} else if (c->flags & DIV_U16) {
-		u32 divu16 = val & PERIPH_CLK_SOURCE_DIVU16_MASK;
-		c->div = divu16 + 1;
-		c->mul = 1;
-	} else {
-		c->div = 1;
-		c->mul = 1;
-	}
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-	return rate;
-}
-
-struct clk_ops tegra30_periph_clk_ops = {
-	.is_enabled = tegra30_periph_clk_is_enabled,
-	.enable = tegra30_periph_clk_enable,
-	.disable = tegra30_periph_clk_disable,
-	.set_parent = tegra30_periph_clk_set_parent,
-	.get_parent = tegra30_periph_clk_get_parent,
-	.set_rate = tegra30_periph_clk_set_rate,
-	.round_rate = tegra30_periph_clk_round_rate,
-	.recalc_rate = tegra30_periph_clk_recalc_rate,
-};
-
-static int tegra30_dsib_clk_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk *d = clk_get_sys(NULL, "pll_d");
-	/* The DSIB parent selection bit is in PLLD base register */
-	tegra_clk_cfg_ex(
-		d, TEGRA_CLK_PLLD_MIPI_MUX_SEL, index);
-
-	return 0;
-}
-
-struct clk_ops tegra30_dsib_clk_ops = {
-	.is_enabled = tegra30_periph_clk_is_enabled,
-	.enable			= &tegra30_periph_clk_enable,
-	.disable		= &tegra30_periph_clk_disable,
-	.set_parent		= &tegra30_dsib_clk_set_parent,
-	.get_parent		= &tegra30_periph_clk_get_parent,
-	.set_rate		= &tegra30_periph_clk_set_rate,
-	.round_rate		= &tegra30_periph_clk_round_rate,
-	.recalc_rate		= &tegra30_periph_clk_recalc_rate,
-};
-
-/* Periph extended clock configuration ops */
-int tegra30_vi_clk_cfg_ex(struct clk_hw *hw,
-				enum tegra_clk_ex_param p, u32 setting)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	if (p == TEGRA_CLK_VI_INP_SEL) {
-		u32 val = clk_readl(c->reg);
-		val &= ~PERIPH_CLK_VI_SEL_EX_MASK;
-		val |= (setting << PERIPH_CLK_VI_SEL_EX_SHIFT) &
-			PERIPH_CLK_VI_SEL_EX_MASK;
-		clk_writel(val, c->reg);
-		return 0;
-	}
-	return -EINVAL;
-}
-
-int tegra30_nand_clk_cfg_ex(struct clk_hw *hw,
-				enum tegra_clk_ex_param p, u32 setting)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	if (p == TEGRA_CLK_NAND_PAD_DIV2_ENB) {
-		u32 val = clk_readl(c->reg);
-		if (setting)
-			val |= PERIPH_CLK_NAND_DIV_EX_ENB;
-		else
-			val &= ~PERIPH_CLK_NAND_DIV_EX_ENB;
-		clk_writel(val, c->reg);
-		return 0;
-	}
-	return -EINVAL;
-}
-
-int tegra30_dtv_clk_cfg_ex(struct clk_hw *hw,
-				enum tegra_clk_ex_param p, u32 setting)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	if (p == TEGRA_CLK_DTV_INVERT) {
-		u32 val = clk_readl(c->reg);
-		if (setting)
-			val |= PERIPH_CLK_DTV_POLARITY_INV;
-		else
-			val &= ~PERIPH_CLK_DTV_POLARITY_INV;
-		clk_writel(val, c->reg);
-		return 0;
-	}
-	return -EINVAL;
-}
-
-/* Output clock ops */
-
-static DEFINE_SPINLOCK(clk_out_lock);
-
-static int tegra30_clk_out_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = pmc_readl(c->reg);
-
-	c->state = (val & (0x1 << c->u.periph.clk_num)) ? ON : OFF;
-	c->mul = 1;
-	c->div = 1;
-	return c->state;
-}
-
-static int tegra30_clk_out_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&clk_out_lock, flags);
-	val = pmc_readl(c->reg);
-	val |= (0x1 << c->u.periph.clk_num);
-	pmc_writel(val, c->reg);
-	spin_unlock_irqrestore(&clk_out_lock, flags);
-
-	return 0;
-}
-
-static void tegra30_clk_out_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&clk_out_lock, flags);
-	val = pmc_readl(c->reg);
-	val &= ~(0x1 << c->u.periph.clk_num);
-	pmc_writel(val, c->reg);
-	spin_unlock_irqrestore(&clk_out_lock, flags);
-}
-
-static int tegra30_clk_out_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&clk_out_lock, flags);
-	val = pmc_readl(c->reg);
-	val &= ~periph_clk_source_mask(c);
-	val |= (index << periph_clk_source_shift(c));
-	pmc_writel(val, c->reg);
-	spin_unlock_irqrestore(&clk_out_lock, flags);
-
-	return 0;
-}
-
-static u8 tegra30_clk_out_get_parent(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = pmc_readl(c->reg);
-	int source;
-
-	source = (val & periph_clk_source_mask(c)) >>
-				periph_clk_source_shift(c);
-	return source;
-}
-
-struct clk_ops tegra_clk_out_ops = {
-	.is_enabled = tegra30_clk_out_is_enabled,
-	.enable = tegra30_clk_out_enable,
-	.disable = tegra30_clk_out_disable,
-	.set_parent = tegra30_clk_out_set_parent,
-	.get_parent = tegra30_clk_out_get_parent,
-	.recalc_rate = tegra30_clk_fixed_recalc_rate,
-};
-
-/* Clock doubler ops */
-static int tegra30_clk_double_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	c->state = ON;
-	if (!(clk_readl(PERIPH_CLK_TO_ENB_REG(c)) & PERIPH_CLK_TO_BIT(c)))
-		c->state = OFF;
-	return c->state;
-};
-
-static int tegra30_clk_double_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	if (rate == parent_rate) {
-		val = clk_readl(c->reg) | (0x1 << c->reg_shift);
-		clk_writel(val, c->reg);
-		c->mul = 1;
-		c->div = 1;
-		return 0;
-	} else if (rate == 2 * parent_rate) {
-		val = clk_readl(c->reg) & (~(0x1 << c->reg_shift));
-		clk_writel(val, c->reg);
-		c->mul = 2;
-		c->div = 1;
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static unsigned long tegra30_clk_double_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u64 rate = parent_rate;
-
-	u32 val = clk_readl(c->reg);
-	c->mul = val & (0x1 << c->reg_shift) ? 1 : 2;
-	c->div = 1;
-
-	if (c->mul != 0 && c->div != 0) {
-		rate *= c->mul;
-		rate += c->div - 1; /* round up */
-		do_div(rate, c->div);
-	}
-
-	return rate;
-}
-
-static long tegra30_clk_double_round_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long *prate)
-{
-	unsigned long output_rate = *prate;
-
-	do_div(output_rate, 2);
-	return output_rate;
-}
-
-struct clk_ops tegra30_clk_double_ops = {
-	.is_enabled = tegra30_clk_double_is_enabled,
-	.enable = tegra30_periph_clk_enable,
-	.disable = tegra30_periph_clk_disable,
-	.recalc_rate = tegra30_clk_double_recalc_rate,
-	.round_rate = tegra30_clk_double_round_rate,
-	.set_rate = tegra30_clk_double_set_rate,
-};
-
-/* Audio sync clock ops */
-struct clk_ops tegra_sync_source_ops = {
-	.recalc_rate = tegra30_clk_fixed_recalc_rate,
-};
-
-static int tegra30_audio_sync_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	c->state = (val & AUDIO_SYNC_DISABLE_BIT) ? OFF : ON;
-	return c->state;
-}
-
-static int tegra30_audio_sync_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	clk_writel((val & (~AUDIO_SYNC_DISABLE_BIT)), c->reg);
-	return 0;
-}
-
-static void tegra30_audio_sync_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	clk_writel((val | AUDIO_SYNC_DISABLE_BIT), c->reg);
-}
-
-static int tegra30_audio_sync_clk_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val;
-
-	val = clk_readl(c->reg);
-	val &= ~AUDIO_SYNC_SOURCE_MASK;
-	val |= index;
-
-	clk_writel(val, c->reg);
-	return 0;
-}
-
-static u8 tegra30_audio_sync_clk_get_parent(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	int source;
-
-	source = val & AUDIO_SYNC_SOURCE_MASK;
-	return source;
-}
-
-struct clk_ops tegra30_audio_sync_clk_ops = {
-	.is_enabled = tegra30_audio_sync_clk_is_enabled,
-	.enable = tegra30_audio_sync_clk_enable,
-	.disable = tegra30_audio_sync_clk_disable,
-	.set_parent = tegra30_audio_sync_clk_set_parent,
-	.get_parent = tegra30_audio_sync_clk_get_parent,
-	.recalc_rate = tegra30_clk_fixed_recalc_rate,
-};
-
-/* cml0 (pcie), and cml1 (sata) clock ops */
-static int tegra30_cml_clk_is_enabled(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-	u32 val = clk_readl(c->reg);
-	c->state = val & (0x1 << c->u.periph.clk_num) ? ON : OFF;
-	return c->state;
-}
-
-static int tegra30_cml_clk_enable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	u32 val = clk_readl(c->reg);
-	val |= (0x1 << c->u.periph.clk_num);
-	clk_writel(val, c->reg);
-
-	return 0;
-}
-
-static void tegra30_cml_clk_disable(struct clk_hw *hw)
-{
-	struct clk_tegra *c = to_clk_tegra(hw);
-
-	u32 val = clk_readl(c->reg);
-	val &= ~(0x1 << c->u.periph.clk_num);
-	clk_writel(val, c->reg);
-}
-
-struct clk_ops tegra_cml_clk_ops = {
-	.is_enabled = tegra30_cml_clk_is_enabled,
-	.enable = tegra30_cml_clk_enable,
-	.disable = tegra30_cml_clk_disable,
-	.recalc_rate = tegra30_clk_fixed_recalc_rate,
-};
-
-struct clk_ops tegra_pciex_clk_ops = {
-	.recalc_rate = tegra30_clk_fixed_recalc_rate,
-};
-
-/* Tegra30 CPU clock and reset control functions */
-static void tegra30_wait_cpu_in_reset(u32 cpu)
-{
-	unsigned int reg;
-
-	do {
-		reg = readl(reg_clk_base +
-			    TEGRA30_CLK_RST_CONTROLLER_CPU_CMPLX_STATUS);
-		cpu_relax();
-	} while (!(reg & (1 << cpu)));	/* check CPU been reset or not */
-
-	return;
-}
-
-static void tegra30_put_cpu_in_reset(u32 cpu)
-{
-	writel(CPU_RESET(cpu),
-	       reg_clk_base + TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_SET);
-	dmb();
-}
-
-static void tegra30_cpu_out_of_reset(u32 cpu)
-{
-	writel(CPU_RESET(cpu),
-	       reg_clk_base + TEGRA_CLK_RST_CONTROLLER_RST_CPU_CMPLX_CLR);
-	wmb();
-}
-
-static void tegra30_enable_cpu_clock(u32 cpu)
-{
-	unsigned int reg;
-
-	writel(CPU_CLOCK(cpu),
-	       reg_clk_base + TEGRA30_CLK_RST_CONTROLLER_CLK_CPU_CMPLX_CLR);
-	reg = readl(reg_clk_base +
-		    TEGRA30_CLK_RST_CONTROLLER_CLK_CPU_CMPLX_CLR);
-}
-
-static void tegra30_disable_cpu_clock(u32 cpu)
-{
-
-	unsigned int reg;
-
-	reg = readl(reg_clk_base + TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
-	writel(reg | CPU_CLOCK(cpu),
-	       reg_clk_base + TEGRA_CLK_RST_CONTROLLER_CLK_CPU_CMPLX);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static bool tegra30_cpu_rail_off_ready(void)
-{
-	unsigned int cpu_rst_status;
-	int cpu_pwr_status;
-
-	cpu_rst_status = readl(reg_clk_base +
-			       TEGRA30_CLK_RST_CONTROLLER_CPU_CMPLX_STATUS);
-	cpu_pwr_status = tegra_powergate_is_powered(TEGRA_POWERGATE_CPU1) ||
-			 tegra_powergate_is_powered(TEGRA_POWERGATE_CPU2) ||
-			 tegra_powergate_is_powered(TEGRA_POWERGATE_CPU3);
-
-	if (((cpu_rst_status & 0xE) != 0xE) || cpu_pwr_status)
-		return false;
-
-	return true;
-}
-
-static void tegra30_cpu_clock_suspend(void)
-{
-	/* switch coresite to clk_m, save off original source */
-	tegra30_cpu_clk_sctx.clk_csite_src =
-				readl(reg_clk_base + CLK_RESET_SOURCE_CSITE);
-	writel(3<<30, reg_clk_base + CLK_RESET_SOURCE_CSITE);
-
-	tegra30_cpu_clk_sctx.cpu_burst =
-				readl(reg_clk_base + CLK_RESET_CCLK_BURST);
-	tegra30_cpu_clk_sctx.pllx_base =
-				readl(reg_clk_base + CLK_RESET_PLLX_BASE);
-	tegra30_cpu_clk_sctx.pllx_misc =
-				readl(reg_clk_base + CLK_RESET_PLLX_MISC);
-	tegra30_cpu_clk_sctx.cclk_divider =
-				readl(reg_clk_base + CLK_RESET_CCLK_DIVIDER);
-}
-
-static void tegra30_cpu_clock_resume(void)
-{
-	unsigned int reg, policy;
-
-	/* Is CPU complex already running on PLLX? */
-	reg = readl(reg_clk_base + CLK_RESET_CCLK_BURST);
-	policy = (reg >> CLK_RESET_CCLK_BURST_POLICY_SHIFT) & 0xF;
-
-	if (policy == CLK_RESET_CCLK_IDLE_POLICY)
-		reg = (reg >> CLK_RESET_CCLK_IDLE_POLICY_SHIFT) & 0xF;
-	else if (policy == CLK_RESET_CCLK_RUN_POLICY)
-		reg = (reg >> CLK_RESET_CCLK_RUN_POLICY_SHIFT) & 0xF;
-	else
-		BUG();
-
-	if (reg != CLK_RESET_CCLK_BURST_POLICY_PLLX) {
-		/* restore PLLX settings if CPU is on different PLL */
-		writel(tegra30_cpu_clk_sctx.pllx_misc,
-					reg_clk_base + CLK_RESET_PLLX_MISC);
-		writel(tegra30_cpu_clk_sctx.pllx_base,
-					reg_clk_base + CLK_RESET_PLLX_BASE);
-
-		/* wait for PLL stabilization if PLLX was enabled */
-		if (tegra30_cpu_clk_sctx.pllx_base & (1 << 30))
-			udelay(300);
-	}
-
-	/*
-	 * Restore original burst policy setting for calls resulting from CPU
-	 * LP2 in idle or system suspend.
-	 */
-	writel(tegra30_cpu_clk_sctx.cclk_divider,
-					reg_clk_base + CLK_RESET_CCLK_DIVIDER);
-	writel(tegra30_cpu_clk_sctx.cpu_burst,
-					reg_clk_base + CLK_RESET_CCLK_BURST);
-
-	writel(tegra30_cpu_clk_sctx.clk_csite_src,
-					reg_clk_base + CLK_RESET_SOURCE_CSITE);
-}
-#endif
-
-static struct tegra_cpu_car_ops tegra30_cpu_car_ops = {
-	.wait_for_reset	= tegra30_wait_cpu_in_reset,
-	.put_in_reset	= tegra30_put_cpu_in_reset,
-	.out_of_reset	= tegra30_cpu_out_of_reset,
-	.enable_clock	= tegra30_enable_cpu_clock,
-	.disable_clock	= tegra30_disable_cpu_clock,
-#ifdef CONFIG_PM_SLEEP
-	.rail_off_ready	= tegra30_cpu_rail_off_ready,
-	.suspend	= tegra30_cpu_clock_suspend,
-	.resume		= tegra30_cpu_clock_resume,
-#endif
-};
-
-void __init tegra30_cpu_car_ops_init(void)
-{
-	tegra_cpu_car_ops = &tegra30_cpu_car_ops;
-}
diff --git a/arch/arm/mach-tegra/tegra30_clocks.h b/arch/arm/mach-tegra/tegra30_clocks.h
deleted file mode 100644
index 7a34adb2f72d..000000000000
--- a/arch/arm/mach-tegra/tegra30_clocks.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __MACH_TEGRA30_CLOCK_H
-#define __MACH_TEGRA30_CLOCK_H
-
-extern struct clk_ops tegra30_clk_32k_ops;
-extern struct clk_ops tegra30_clk_m_ops;
-extern struct clk_ops tegra_clk_m_div_ops;
-extern struct clk_ops tegra_pll_ref_ops;
-extern struct clk_ops tegra30_pll_ops;
-extern struct clk_ops tegra30_pll_div_ops;
-extern struct clk_ops tegra_plld_ops;
-extern struct clk_ops tegra30_plle_ops;
-extern struct clk_ops tegra_cml_clk_ops;
-extern struct clk_ops tegra_pciex_clk_ops;
-extern struct clk_ops tegra_sync_source_ops;
-extern struct clk_ops tegra30_audio_sync_clk_ops;
-extern struct clk_ops tegra30_clk_double_ops;
-extern struct clk_ops tegra_clk_out_ops;
-extern struct clk_ops tegra30_super_ops;
-extern struct clk_ops tegra30_blink_clk_ops;
-extern struct clk_ops tegra30_twd_ops;
-extern struct clk_ops tegra30_bus_ops;
-extern struct clk_ops tegra30_periph_clk_ops;
-extern struct clk_ops tegra30_dsib_clk_ops;
-extern struct clk_ops tegra_nand_clk_ops;
-extern struct clk_ops tegra_vi_clk_ops;
-extern struct clk_ops tegra_dtv_clk_ops;
-extern struct clk_ops tegra_clk_shared_bus_ops;
-
-int tegra30_plld_clk_cfg_ex(struct clk_hw *hw,
-				enum tegra_clk_ex_param p, u32 setting);
-void tegra30_periph_clk_reset(struct clk_hw *hw, bool assert);
-int tegra30_vi_clk_cfg_ex(struct clk_hw *hw,
-				enum tegra_clk_ex_param p, u32 setting);
-int tegra30_nand_clk_cfg_ex(struct clk_hw *hw,
-				enum tegra_clk_ex_param p, u32 setting);
-int tegra30_dtv_clk_cfg_ex(struct clk_hw *hw,
-				enum tegra_clk_ex_param p, u32 setting);
-#endif
diff --git a/arch/arm/mach-tegra/tegra30_clocks_data.c b/arch/arm/mach-tegra/tegra30_clocks_data.c
deleted file mode 100644
index 9bfaa490cff6..000000000000
--- a/arch/arm/mach-tegra/tegra30_clocks_data.c
+++ /dev/null
@@ -1,1425 +0,0 @@
-/*
- * arch/arm/mach-tegra/tegra30_clocks.c
- *
- * Copyright (c) 2010-2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- */
-
-#include <linux/clk-private.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/cpufreq.h>
-#include <linux/clk/tegra.h>
-
-#include "clock.h"
-#include "fuse.h"
-#include "tegra30_clocks.h"
-
-#define DEFINE_CLK_TEGRA(_name, _rate, _ops, _flags,		\
-		   _parent_names, _parents, _parent)		\
-	static struct clk tegra_##_name = {			\
-		.hw = &tegra_##_name##_hw.hw,			\
-		.name = #_name,					\
-		.rate = _rate,					\
-		.ops = _ops,					\
-		.flags = _flags,				\
-		.parent_names = _parent_names,			\
-		.parents = _parents,				\
-		.num_parents = ARRAY_SIZE(_parent_names),	\
-		.parent	= _parent,				\
-	};
-
-static struct clk tegra_clk_32k;
-static struct clk_tegra tegra_clk_32k_hw = {
-	.hw = {
-		.clk = &tegra_clk_32k,
-	},
-	.fixed_rate = 32768,
-};
-static struct clk tegra_clk_32k = {
-	.name = "clk_32k",
-	.hw = &tegra_clk_32k_hw.hw,
-	.ops = &tegra30_clk_32k_ops,
-	.flags = CLK_IS_ROOT,
-};
-
-static struct clk tegra_clk_m;
-static struct clk_tegra tegra_clk_m_hw = {
-	.hw = {
-		.clk = &tegra_clk_m,
-	},
-	.flags = ENABLE_ON_INIT,
-	.reg = 0x1fc,
-	.reg_shift = 28,
-	.max_rate = 48000000,
-};
-static struct clk tegra_clk_m = {
-	.name = "clk_m",
-	.hw = &tegra_clk_m_hw.hw,
-	.ops = &tegra30_clk_m_ops,
-	.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED,
-};
-
-static const char *clk_m_div_parent_names[] = {
-	"clk_m",
-};
-
-static struct clk *clk_m_div_parents[] = {
-	&tegra_clk_m,
-};
-
-static struct clk tegra_clk_m_div2;
-static struct clk_tegra tegra_clk_m_div2_hw = {
-	.hw = {
-		.clk = &tegra_clk_m_div2,
-	},
-	.mul = 1,
-	.div = 2,
-	.max_rate = 24000000,
-};
-DEFINE_CLK_TEGRA(clk_m_div2, 0, &tegra_clk_m_div_ops, 0,
-		clk_m_div_parent_names, clk_m_div_parents, &tegra_clk_m);
-
-static struct clk tegra_clk_m_div4;
-static struct clk_tegra tegra_clk_m_div4_hw = {
-	.hw = {
-		.clk = &tegra_clk_m_div4,
-	},
-	.mul = 1,
-	.div = 4,
-	.max_rate = 12000000,
-};
-DEFINE_CLK_TEGRA(clk_m_div4, 0, &tegra_clk_m_div_ops, 0,
-		clk_m_div_parent_names, clk_m_div_parents, &tegra_clk_m);
-
-static struct clk tegra_pll_ref;
-static struct clk_tegra tegra_pll_ref_hw = {
-	.hw = {
-		.clk = &tegra_pll_ref,
-	},
-	.flags = ENABLE_ON_INIT,
-	.max_rate = 26000000,
-};
-DEFINE_CLK_TEGRA(pll_ref, 0, &tegra_pll_ref_ops, 0, clk_m_div_parent_names,
-		clk_m_div_parents, &tegra_clk_m);
-
-#define DEFINE_PLL(_name, _flags, _reg, _max_rate, _input_min,	\
-		   _input_max, _cf_min, _cf_max, _vco_min,	\
-		   _vco_max, _freq_table, _lock_delay, _ops,	\
-		   _fixed_rate, _clk_cfg_ex, _parent)		\
-	static struct clk tegra_##_name;			\
-	static const char *_name##_parent_names[] = {		\
-		#_parent,					\
-	};							\
-	static struct clk *_name##_parents[] = {		\
-		&tegra_##_parent,				\
-	};							\
-	static struct clk_tegra tegra_##_name##_hw = {		\
-		.hw = {						\
-			.clk = &tegra_##_name,			\
-		},						\
-		.flags = _flags,				\
-		.reg = _reg,					\
-		.max_rate = _max_rate,				\
-		.u.pll = {					\
-			.input_min = _input_min,		\
-			.input_max = _input_max,		\
-			.cf_min = _cf_min,			\
-			.cf_max = _cf_max,			\
-			.vco_min = _vco_min,			\
-			.vco_max = _vco_max,			\
-			.freq_table = _freq_table,		\
-			.lock_delay = _lock_delay,		\
-			.fixed_rate = _fixed_rate,		\
-		},						\
-		.clk_cfg_ex = _clk_cfg_ex,			\
-	};							\
-	DEFINE_CLK_TEGRA(_name, 0, &_ops, CLK_IGNORE_UNUSED,	\
-			 _name##_parent_names, _name##_parents,	\
-			&tegra_##_parent);
-
-#define DEFINE_PLL_OUT(_name, _flags, _reg, _reg_shift,		\
-		_max_rate, _ops, _parent, _clk_flags)		\
-	static const char *_name##_parent_names[] = {		\
-		#_parent,					\
-	};							\
-	static struct clk *_name##_parents[] = {		\
-		&tegra_##_parent,				\
-	};							\
-	static struct clk tegra_##_name;			\
-	static struct clk_tegra tegra_##_name##_hw = {		\
-		.hw = {						\
-			.clk = &tegra_##_name,			\
-		},						\
-		.flags = _flags,				\
-		.reg = _reg,					\
-		.max_rate = _max_rate,				\
-		.reg_shift = _reg_shift,			\
-	};							\
-	DEFINE_CLK_TEGRA(_name, 0, &tegra30_pll_div_ops,	\
-		_clk_flags,  _name##_parent_names,		\
-		_name##_parents, &tegra_##_parent);
-
-static struct clk_pll_freq_table tegra_pll_c_freq_table[] = {
-	{ 12000000, 1040000000, 520,  6, 1, 8},
-	{ 13000000, 1040000000, 480,  6, 1, 8},
-	{ 16800000, 1040000000, 495,  8, 1, 8},	/* actual: 1039.5 MHz */
-	{ 19200000, 1040000000, 325,  6, 1, 6},
-	{ 26000000, 1040000000, 520, 13, 1, 8},
-
-	{ 12000000, 832000000, 416,  6, 1, 8},
-	{ 13000000, 832000000, 832, 13, 1, 8},
-	{ 16800000, 832000000, 396,  8, 1, 8},	/* actual: 831.6 MHz */
-	{ 19200000, 832000000, 260,  6, 1, 8},
-	{ 26000000, 832000000, 416, 13, 1, 8},
-
-	{ 12000000, 624000000, 624, 12, 1, 8},
-	{ 13000000, 624000000, 624, 13, 1, 8},
-	{ 16800000, 600000000, 520, 14, 1, 8},
-	{ 19200000, 624000000, 520, 16, 1, 8},
-	{ 26000000, 624000000, 624, 26, 1, 8},
-
-	{ 12000000, 600000000, 600, 12, 1, 8},
-	{ 13000000, 600000000, 600, 13, 1, 8},
-	{ 16800000, 600000000, 500, 14, 1, 8},
-	{ 19200000, 600000000, 375, 12, 1, 6},
-	{ 26000000, 600000000, 600, 26, 1, 8},
-
-	{ 12000000, 520000000, 520, 12, 1, 8},
-	{ 13000000, 520000000, 520, 13, 1, 8},
-	{ 16800000, 520000000, 495, 16, 1, 8},	/* actual: 519.75 MHz */
-	{ 19200000, 520000000, 325, 12, 1, 6},
-	{ 26000000, 520000000, 520, 26, 1, 8},
-
-	{ 12000000, 416000000, 416, 12, 1, 8},
-	{ 13000000, 416000000, 416, 13, 1, 8},
-	{ 16800000, 416000000, 396, 16, 1, 8},	/* actual: 415.8 MHz */
-	{ 19200000, 416000000, 260, 12, 1, 6},
-	{ 26000000, 416000000, 416, 26, 1, 8},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_c, PLL_HAS_CPCON, 0x80, 1400000000, 2000000, 31000000, 1000000,
-		6000000, 20000000, 1400000000, tegra_pll_c_freq_table, 300,
-		tegra30_pll_ops, 0, NULL, pll_ref);
-
-DEFINE_PLL_OUT(pll_c_out1, DIV_U71, 0x84, 0, 700000000,
-		tegra30_pll_div_ops, pll_c, CLK_IGNORE_UNUSED);
-
-static struct clk_pll_freq_table tegra_pll_m_freq_table[] = {
-	{ 12000000, 666000000, 666, 12, 1, 8},
-	{ 13000000, 666000000, 666, 13, 1, 8},
-	{ 16800000, 666000000, 555, 14, 1, 8},
-	{ 19200000, 666000000, 555, 16, 1, 8},
-	{ 26000000, 666000000, 666, 26, 1, 8},
-	{ 12000000, 600000000, 600, 12, 1, 8},
-	{ 13000000, 600000000, 600, 13, 1, 8},
-	{ 16800000, 600000000, 500, 14, 1, 8},
-	{ 19200000, 600000000, 375, 12, 1, 6},
-	{ 26000000, 600000000, 600, 26, 1, 8},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_m, PLL_HAS_CPCON | PLLM, 0x90, 800000000, 2000000, 31000000,
-		1000000, 6000000, 20000000, 1200000000, tegra_pll_m_freq_table,
-		300, tegra30_pll_ops, 0, NULL, pll_ref);
-
-DEFINE_PLL_OUT(pll_m_out1, DIV_U71, 0x94, 0, 600000000,
-		tegra30_pll_div_ops, pll_m, CLK_IGNORE_UNUSED);
-
-static struct clk_pll_freq_table tegra_pll_p_freq_table[] = {
-	{ 12000000, 216000000, 432, 12, 2, 8},
-	{ 13000000, 216000000, 432, 13, 2, 8},
-	{ 16800000, 216000000, 360, 14, 2, 8},
-	{ 19200000, 216000000, 360, 16, 2, 8},
-	{ 26000000, 216000000, 432, 26, 2, 8},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_p, ENABLE_ON_INIT | PLL_FIXED | PLL_HAS_CPCON, 0xa0, 432000000,
-		2000000, 31000000, 1000000, 6000000, 20000000, 1400000000,
-		tegra_pll_p_freq_table, 300, tegra30_pll_ops, 408000000, NULL,
-		pll_ref);
-
-DEFINE_PLL_OUT(pll_p_out1, ENABLE_ON_INIT | DIV_U71 | DIV_U71_FIXED, 0xa4,
-		0, 432000000, tegra30_pll_div_ops, pll_p, CLK_IGNORE_UNUSED);
-DEFINE_PLL_OUT(pll_p_out2, ENABLE_ON_INIT | DIV_U71 | DIV_U71_FIXED, 0xa4,
-		16, 432000000, tegra30_pll_div_ops, pll_p, CLK_IGNORE_UNUSED);
-DEFINE_PLL_OUT(pll_p_out3, ENABLE_ON_INIT | DIV_U71 | DIV_U71_FIXED, 0xa8,
-		0, 432000000, tegra30_pll_div_ops, pll_p, CLK_IGNORE_UNUSED);
-DEFINE_PLL_OUT(pll_p_out4, ENABLE_ON_INIT | DIV_U71 | DIV_U71_FIXED, 0xa8,
-		16, 432000000, tegra30_pll_div_ops, pll_p, CLK_IGNORE_UNUSED);
-
-static struct clk_pll_freq_table tegra_pll_a_freq_table[] = {
-	{ 9600000, 564480000, 294, 5, 1, 4},
-	{ 9600000, 552960000, 288, 5, 1, 4},
-	{ 9600000, 24000000,  5,   2, 1, 1},
-
-	{ 28800000, 56448000, 49, 25, 1, 1},
-	{ 28800000, 73728000, 64, 25, 1, 1},
-	{ 28800000, 24000000,  5,  6, 1, 1},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_a, PLL_HAS_CPCON, 0xb0, 700000000, 2000000, 31000000, 1000000,
-		6000000, 20000000, 1400000000, tegra_pll_a_freq_table,
-		300, tegra30_pll_ops, 0, NULL, pll_p_out1);
-
-DEFINE_PLL_OUT(pll_a_out0, DIV_U71, 0xb4, 0, 100000000, tegra30_pll_div_ops,
-		pll_a, CLK_IGNORE_UNUSED);
-
-static struct clk_pll_freq_table tegra_pll_d_freq_table[] = {
-	{ 12000000, 216000000, 216, 12, 1, 4},
-	{ 13000000, 216000000, 216, 13, 1, 4},
-	{ 16800000, 216000000, 180, 14, 1, 4},
-	{ 19200000, 216000000, 180, 16, 1, 4},
-	{ 26000000, 216000000, 216, 26, 1, 4},
-
-	{ 12000000, 594000000, 594, 12, 1, 8},
-	{ 13000000, 594000000, 594, 13, 1, 8},
-	{ 16800000, 594000000, 495, 14, 1, 8},
-	{ 19200000, 594000000, 495, 16, 1, 8},
-	{ 26000000, 594000000, 594, 26, 1, 8},
-
-	{ 12000000, 1000000000, 1000, 12, 1, 12},
-	{ 13000000, 1000000000, 1000, 13, 1, 12},
-	{ 19200000, 1000000000, 625,  12, 1, 8},
-	{ 26000000, 1000000000, 1000, 26, 1, 12},
-
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_d, PLL_HAS_CPCON | PLLD, 0xd0, 1000000000, 2000000, 40000000,
-		1000000, 6000000, 40000000, 1000000000, tegra_pll_d_freq_table,
-		1000, tegra30_pll_ops, 0, tegra30_plld_clk_cfg_ex, pll_ref);
-
-DEFINE_PLL_OUT(pll_d_out0, DIV_2 | PLLD, 0, 0, 500000000, tegra30_pll_div_ops,
-		pll_d, CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED);
-
-DEFINE_PLL(pll_d2, PLL_HAS_CPCON | PLL_ALT_MISC_REG | PLLD, 0x4b8, 1000000000,
-		2000000, 40000000, 1000000, 6000000, 40000000, 1000000000,
-		tegra_pll_d_freq_table, 1000, tegra30_pll_ops, 0, NULL,
-		pll_ref);
-
-DEFINE_PLL_OUT(pll_d2_out0, DIV_2 | PLLD, 0, 0, 500000000, tegra30_pll_div_ops,
-		pll_d2, CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED);
-
-static struct clk_pll_freq_table tegra_pll_u_freq_table[] = {
-	{ 12000000, 480000000, 960, 12, 2, 12},
-	{ 13000000, 480000000, 960, 13, 2, 12},
-	{ 16800000, 480000000, 400, 7,  2, 5},
-	{ 19200000, 480000000, 200, 4,  2, 3},
-	{ 26000000, 480000000, 960, 26, 2, 12},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_u, PLL_HAS_CPCON | PLLU, 0xc0, 480000000, 2000000, 40000000,
-		1000000, 6000000, 48000000, 960000000, tegra_pll_u_freq_table,
-		1000, tegra30_pll_ops, 0, NULL, pll_ref);
-
-static struct clk_pll_freq_table tegra_pll_x_freq_table[] = {
-	/* 1.7 GHz */
-	{ 12000000, 1700000000, 850,  6,  1, 8},
-	{ 13000000, 1700000000, 915,  7,  1, 8},	/* actual: 1699.2 MHz */
-	{ 16800000, 1700000000, 708,  7,  1, 8},	/* actual: 1699.2 MHz */
-	{ 19200000, 1700000000, 885,  10, 1, 8},	/* actual: 1699.2 MHz */
-	{ 26000000, 1700000000, 850,  13, 1, 8},
-
-	/* 1.6 GHz */
-	{ 12000000, 1600000000, 800,  6,  1, 8},
-	{ 13000000, 1600000000, 738,  6,  1, 8},	/* actual: 1599.0 MHz */
-	{ 16800000, 1600000000, 857,  9,  1, 8},	/* actual: 1599.7 MHz */
-	{ 19200000, 1600000000, 500,  6,  1, 8},
-	{ 26000000, 1600000000, 800,  13, 1, 8},
-
-	/* 1.5 GHz */
-	{ 12000000, 1500000000, 750,  6,  1, 8},
-	{ 13000000, 1500000000, 923,  8,  1, 8},	/* actual: 1499.8 MHz */
-	{ 16800000, 1500000000, 625,  7,  1, 8},
-	{ 19200000, 1500000000, 625,  8,  1, 8},
-	{ 26000000, 1500000000, 750,  13, 1, 8},
-
-	/* 1.4 GHz */
-	{ 12000000, 1400000000, 700,  6,  1, 8},
-	{ 13000000, 1400000000, 969,  9,  1, 8},	/* actual: 1399.7 MHz */
-	{ 16800000, 1400000000, 1000, 12, 1, 8},
-	{ 19200000, 1400000000, 875,  12, 1, 8},
-	{ 26000000, 1400000000, 700,  13, 1, 8},
-
-	/* 1.3 GHz */
-	{ 12000000, 1300000000, 975,  9,  1, 8},
-	{ 13000000, 1300000000, 1000, 10, 1, 8},
-	{ 16800000, 1300000000, 928,  12, 1, 8},	/* actual: 1299.2 MHz */
-	{ 19200000, 1300000000, 812,  12, 1, 8},	/* actual: 1299.2 MHz */
-	{ 26000000, 1300000000, 650,  13, 1, 8},
-
-	/* 1.2 GHz */
-	{ 12000000, 1200000000, 1000, 10, 1, 8},
-	{ 13000000, 1200000000, 923,  10, 1, 8},	/* actual: 1199.9 MHz */
-	{ 16800000, 1200000000, 1000, 14, 1, 8},
-	{ 19200000, 1200000000, 1000, 16, 1, 8},
-	{ 26000000, 1200000000, 600,  13, 1, 8},
-
-	/* 1.1 GHz */
-	{ 12000000, 1100000000, 825,  9,  1, 8},
-	{ 13000000, 1100000000, 846,  10, 1, 8},	/* actual: 1099.8 MHz */
-	{ 16800000, 1100000000, 982,  15, 1, 8},	/* actual: 1099.8 MHz */
-	{ 19200000, 1100000000, 859,  15, 1, 8},	/* actual: 1099.5 MHz */
-	{ 26000000, 1100000000, 550,  13, 1, 8},
-
-	/* 1 GHz */
-	{ 12000000, 1000000000, 1000, 12, 1, 8},
-	{ 13000000, 1000000000, 1000, 13, 1, 8},
-	{ 16800000, 1000000000, 833,  14, 1, 8},	/* actual: 999.6 MHz */
-	{ 19200000, 1000000000, 625,  12, 1, 8},
-	{ 26000000, 1000000000, 1000, 26, 1, 8},
-
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_x, PLL_HAS_CPCON | PLL_ALT_MISC_REG | PLLX, 0xe0, 1700000000,
-		2000000, 31000000, 1000000, 6000000, 20000000, 1700000000,
-		tegra_pll_x_freq_table, 300, tegra30_pll_ops, 0, NULL, pll_ref);
-
-DEFINE_PLL_OUT(pll_x_out0, DIV_2 | PLLX, 0, 0, 850000000, tegra30_pll_div_ops,
-		pll_x, CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED);
-
-static struct clk_pll_freq_table tegra_pll_e_freq_table[] = {
-	/* PLLE special case: use cpcon field to store cml divider value */
-	{ 12000000,  100000000, 150, 1,  18, 11},
-	{ 216000000, 100000000, 200, 18, 24, 13},
-	{ 0, 0, 0, 0, 0, 0 },
-};
-
-DEFINE_PLL(pll_e, PLL_ALT_MISC_REG, 0xe8, 100000000, 2000000, 216000000,
-		12000000, 12000000, 1200000000, 2400000000U,
-		tegra_pll_e_freq_table, 300, tegra30_plle_ops, 100000000, NULL,
-		pll_ref);
-
-static const char *mux_plle[] = {
-	"pll_e",
-};
-
-static struct clk *mux_plle_p[] = {
-	&tegra_pll_e,
-};
-
-static struct clk tegra_cml0;
-static struct clk_tegra tegra_cml0_hw = {
-	.hw = {
-		.clk = &tegra_cml0,
-	},
-	.reg = 0x48c,
-	.fixed_rate = 100000000,
-	.u.periph = {
-		.clk_num = 0,
-	},
-};
-DEFINE_CLK_TEGRA(cml0, 0, &tegra_cml_clk_ops, 0, mux_plle,
-		mux_plle_p, &tegra_pll_e);
-
-static struct clk tegra_cml1;
-static struct clk_tegra tegra_cml1_hw = {
-	.hw = {
-		.clk = &tegra_cml1,
-	},
-	.reg = 0x48c,
-	.fixed_rate = 100000000,
-	.u.periph = {
-		.clk_num = 1,
-	},
-};
-DEFINE_CLK_TEGRA(cml1, 0, &tegra_cml_clk_ops, 0, mux_plle,
-		mux_plle_p, &tegra_pll_e);
-
-static struct clk tegra_pciex;
-static struct clk_tegra tegra_pciex_hw = {
-	.hw = {
-		.clk = &tegra_pciex,
-	},
-	.reg = 0x48c,
-	.fixed_rate = 100000000,
-	.reset = tegra30_periph_clk_reset,
-	.u.periph = {
-		.clk_num = 74,
-	},
-};
-DEFINE_CLK_TEGRA(pciex, 0, &tegra_pciex_clk_ops, 0, mux_plle,
-		mux_plle_p, &tegra_pll_e);
-
-#define SYNC_SOURCE(_name)					\
-	static struct clk tegra_##_name##_sync;			\
-	static struct clk_tegra tegra_##_name##_sync_hw = {	\
-		.hw = {						\
-			.clk = &tegra_##_name##_sync,		\
-		},						\
-		.max_rate = 24000000,				\
-		.fixed_rate = 24000000,				\
-	};							\
-	static struct clk tegra_##_name##_sync = {		\
-		.name = #_name "_sync",				\
-		.hw = &tegra_##_name##_sync_hw.hw,		\
-		.ops = &tegra_sync_source_ops,			\
-		.flags = CLK_IS_ROOT,				\
-	};
-
-SYNC_SOURCE(spdif_in);
-SYNC_SOURCE(i2s0);
-SYNC_SOURCE(i2s1);
-SYNC_SOURCE(i2s2);
-SYNC_SOURCE(i2s3);
-SYNC_SOURCE(i2s4);
-SYNC_SOURCE(vimclk);
-
-static struct clk *tegra_sync_source_list[] = {
-	&tegra_spdif_in_sync,
-	&tegra_i2s0_sync,
-	&tegra_i2s1_sync,
-	&tegra_i2s2_sync,
-	&tegra_i2s3_sync,
-	&tegra_i2s4_sync,
-	&tegra_vimclk_sync,
-};
-
-static const char *mux_audio_sync_clk[] = {
-	"spdif_in_sync",
-	"i2s0_sync",
-	"i2s1_sync",
-	"i2s2_sync",
-	"i2s3_sync",
-	"i2s4_sync",
-	"vimclk_sync",
-};
-
-#define AUDIO_SYNC_CLK(_name, _index)				\
-	static struct clk tegra_##_name;			\
-	static struct clk_tegra tegra_##_name##_hw = {		\
-		.hw = {						\
-			.clk = &tegra_##_name,			\
-		},						\
-		.max_rate = 24000000,				\
-		.reg = 0x4A0 + (_index) * 4,			\
-	};							\
-	static struct clk tegra_##_name = {			\
-		.name = #_name,					\
-		.ops = &tegra30_audio_sync_clk_ops,		\
-		.hw = &tegra_##_name##_hw.hw,			\
-		.parent_names = mux_audio_sync_clk,		\
-		.parents = tegra_sync_source_list,		\
-		.num_parents = ARRAY_SIZE(mux_audio_sync_clk),	\
-	};
-
-AUDIO_SYNC_CLK(audio0, 0);
-AUDIO_SYNC_CLK(audio1, 1);
-AUDIO_SYNC_CLK(audio2, 2);
-AUDIO_SYNC_CLK(audio3, 3);
-AUDIO_SYNC_CLK(audio4, 4);
-AUDIO_SYNC_CLK(audio5, 5);
-
-static struct clk *tegra_clk_audio_list[] = {
-	&tegra_audio0,
-	&tegra_audio1,
-	&tegra_audio2,
-	&tegra_audio3,
-	&tegra_audio4,
-	&tegra_audio5,	/* SPDIF */
-};
-
-#define AUDIO_SYNC_2X_CLK(_name, _index)			\
-	static const char *_name##_parent_names[] = {		\
-		"tegra_" #_name,				\
-	};							\
-	static struct clk *_name##_parents[] = {		\
-		&tegra_##_name,					\
-	};							\
-	static struct clk tegra_##_name##_2x;			\
-	static struct clk_tegra tegra_##_name##_2x_hw = {	\
-		.hw = {						\
-			.clk = &tegra_##_name##_2x,		\
-		},						\
-		.flags = PERIPH_NO_RESET,			\
-		.max_rate = 48000000,				\
-		.reg = 0x49C,					\
-		.reg_shift = 24 + (_index),			\
-		.u.periph = {					\
-			.clk_num = 113 + (_index),		\
-		},						\
-	};							\
-	static struct clk tegra_##_name##_2x = {		\
-		.name = #_name "_2x",				\
-		.ops = &tegra30_clk_double_ops,			\
-		.hw = &tegra_##_name##_2x_hw.hw,		\
-		.parent_names = _name##_parent_names,		\
-		.parents = _name##_parents,			\
-		.parent = &tegra_##_name,			\
-		.num_parents = 1,				\
-	};
-
-AUDIO_SYNC_2X_CLK(audio0, 0);
-AUDIO_SYNC_2X_CLK(audio1, 1);
-AUDIO_SYNC_2X_CLK(audio2, 2);
-AUDIO_SYNC_2X_CLK(audio3, 3);
-AUDIO_SYNC_2X_CLK(audio4, 4);
-AUDIO_SYNC_2X_CLK(audio5, 5);	/* SPDIF */
-
-static struct clk *tegra_clk_audio_2x_list[] = {
-	&tegra_audio0_2x,
-	&tegra_audio1_2x,
-	&tegra_audio2_2x,
-	&tegra_audio3_2x,
-	&tegra_audio4_2x,
-	&tegra_audio5_2x,	/* SPDIF */
-};
-
-#define MUX_I2S_SPDIF(_id)					\
-static const char *mux_pllaout0_##_id##_2x_pllp_clkm[] = {	\
-	"pll_a_out0",						\
-	#_id "_2x",						\
-	"pll_p",						\
-	"clk_m",						\
-};								\
-static struct clk *mux_pllaout0_##_id##_2x_pllp_clkm_p[] = {	\
-	&tegra_pll_a_out0,					\
-	&tegra_##_id##_2x,					\
-	&tegra_pll_p,						\
-	&tegra_clk_m,						\
-};
-
-MUX_I2S_SPDIF(audio0);
-MUX_I2S_SPDIF(audio1);
-MUX_I2S_SPDIF(audio2);
-MUX_I2S_SPDIF(audio3);
-MUX_I2S_SPDIF(audio4);
-MUX_I2S_SPDIF(audio5);		/* SPDIF */
-
-static struct clk tegra_extern1;
-static struct clk tegra_extern2;
-static struct clk tegra_extern3;
-
-/* External clock outputs (through PMC) */
-#define MUX_EXTERN_OUT(_id)					\
-static const char *mux_clkm_clkm2_clkm4_extern##_id[] = {	\
-	"clk_m",						\
-	"clk_m_div2",						\
-	"clk_m_div4",						\
-	"extern" #_id,						\
-};								\
-static struct clk *mux_clkm_clkm2_clkm4_extern##_id##_p[] = {	\
-	&tegra_clk_m,						\
-	&tegra_clk_m_div2,					\
-	&tegra_clk_m_div4,					\
-	&tegra_extern##_id,					\
-};
-
-MUX_EXTERN_OUT(1);
-MUX_EXTERN_OUT(2);
-MUX_EXTERN_OUT(3);
-
-#define CLK_OUT_CLK(_name, _index)					\
-	static struct clk tegra_##_name;				\
-	static struct clk_tegra tegra_##_name##_hw = {			\
-		.hw = {							\
-			.clk = &tegra_##_name,				\
-		},							\
-		.lookup = {						\
-			.dev_id	= #_name,				\
-			.con_id	= "extern" #_index,			\
-		},							\
-		.flags = MUX_CLK_OUT,					\
-		.fixed_rate = 216000000,					\
-		.reg = 0x1a8,						\
-		.u.periph = {						\
-			.clk_num = (_index - 1) * 8 + 2,		\
-		},							\
-	};								\
-	static struct clk tegra_##_name = {				\
-		.name = #_name,						\
-		.ops = &tegra_clk_out_ops,				\
-		.hw = &tegra_##_name##_hw.hw,				\
-		.parent_names = mux_clkm_clkm2_clkm4_extern##_index,	\
-		.parents = mux_clkm_clkm2_clkm4_extern##_index##_p,	\
-		.num_parents = ARRAY_SIZE(mux_clkm_clkm2_clkm4_extern##_index),\
-	};
-
-CLK_OUT_CLK(clk_out_1, 1);
-CLK_OUT_CLK(clk_out_2, 2);
-CLK_OUT_CLK(clk_out_3, 3);
-
-static struct clk *tegra_clk_out_list[] = {
-	&tegra_clk_out_1,
-	&tegra_clk_out_2,
-	&tegra_clk_out_3,
-};
-
-static const char *mux_sclk[] = {
-	"clk_m",
-	"pll_c_out1",
-	"pll_p_out4",
-	"pll_p_out3",
-	"pll_p_out2",
-	"dummy",
-	"clk_32k",
-	"pll_m_out1",
-};
-
-static struct clk *mux_sclk_p[] = {
-	&tegra_clk_m,
-	&tegra_pll_c_out1,
-	&tegra_pll_p_out4,
-	&tegra_pll_p_out3,
-	&tegra_pll_p_out2,
-	NULL,
-	&tegra_clk_32k,
-	&tegra_pll_m_out1,
-};
-
-static struct clk tegra_clk_sclk;
-static struct clk_tegra tegra_clk_sclk_hw = {
-	.hw = {
-		.clk = &tegra_clk_sclk,
-	},
-	.reg = 0x28,
-	.max_rate = 334000000,
-	.min_rate = 40000000,
-};
-
-static struct clk tegra_clk_sclk = {
-	.name = "sclk",
-	.ops = &tegra30_super_ops,
-	.hw = &tegra_clk_sclk_hw.hw,
-	.parent_names = mux_sclk,
-	.parents = mux_sclk_p,
-	.num_parents = ARRAY_SIZE(mux_sclk),
-};
-
-static const char *tegra_hclk_parent_names[] = {
-	"tegra_sclk",
-};
-
-static struct clk *tegra_hclk_parents[] = {
-	&tegra_clk_sclk,
-};
-
-static struct clk tegra_hclk;
-static struct clk_tegra tegra_hclk_hw = {
-	.hw = {
-		.clk = &tegra_hclk,
-	},
-	.flags = DIV_BUS,
-	.reg = 0x30,
-	.reg_shift = 4,
-	.max_rate = 378000000,
-	.min_rate = 12000000,
-};
-DEFINE_CLK_TEGRA(hclk, 0, &tegra30_bus_ops, 0, tegra_hclk_parent_names,
-		tegra_hclk_parents, &tegra_clk_sclk);
-
-static const char *tegra_pclk_parent_names[] = {
-	"tegra_hclk",
-};
-
-static struct clk *tegra_pclk_parents[] = {
-	&tegra_hclk,
-};
-
-static struct clk tegra_pclk;
-static struct clk_tegra tegra_pclk_hw = {
-	.hw = {
-		.clk = &tegra_pclk,
-	},
-	.flags = DIV_BUS,
-	.reg = 0x30,
-	.reg_shift = 0,
-	.max_rate = 167000000,
-	.min_rate = 12000000,
-};
-DEFINE_CLK_TEGRA(pclk, 0, &tegra30_bus_ops, 0, tegra_pclk_parent_names,
-		tegra_pclk_parents, &tegra_hclk);
-
-static const char *mux_blink[] = {
-	"clk_32k",
-};
-
-static struct clk *mux_blink_p[] = {
-	&tegra_clk_32k,
-};
-
-static struct clk tegra_clk_blink;
-static struct clk_tegra tegra_clk_blink_hw = {
-	.hw = {
-		.clk = &tegra_clk_blink,
-	},
-	.reg = 0x40,
-	.max_rate = 32768,
-};
-static struct clk tegra_clk_blink = {
-	.name = "blink",
-	.ops = &tegra30_blink_clk_ops,
-	.hw = &tegra_clk_blink_hw.hw,
-	.parent = &tegra_clk_32k,
-	.parent_names = mux_blink,
-	.parents = mux_blink_p,
-	.num_parents = ARRAY_SIZE(mux_blink),
-};
-
-static const char *mux_pllm_pllc_pllp_plla[] = {
-	"pll_m",
-	"pll_c",
-	"pll_p",
-	"pll_a_out0",
-};
-
-static const char *mux_pllp_pllc_pllm_clkm[] = {
-	"pll_p",
-	"pll_c",
-	"pll_m",
-	"clk_m",
-};
-
-static const char *mux_pllp_clkm[] = {
-	"pll_p",
-	"dummy",
-	"dummy",
-	"clk_m",
-};
-
-static const char *mux_pllp_plld_pllc_clkm[] = {
-	"pll_p",
-	"pll_d_out0",
-	"pll_c",
-	"clk_m",
-};
-
-static const char *mux_pllp_pllm_plld_plla_pllc_plld2_clkm[] = {
-	"pll_p",
-	"pll_m",
-	"pll_d_out0",
-	"pll_a_out0",
-	"pll_c",
-	"pll_d2_out0",
-	"clk_m",
-};
-
-static const char *mux_plla_pllc_pllp_clkm[] = {
-	"pll_a_out0",
-	"dummy",
-	"pll_p",
-	"clk_m"
-};
-
-static const char *mux_pllp_pllc_clk32_clkm[] = {
-	"pll_p",
-	"pll_c",
-	"clk_32k",
-	"clk_m",
-};
-
-static const char *mux_pllp_pllc_clkm_clk32[] = {
-	"pll_p",
-	"pll_c",
-	"clk_m",
-	"clk_32k",
-};
-
-static const char *mux_pllp_pllc_pllm[] = {
-	"pll_p",
-	"pll_c",
-	"pll_m",
-};
-
-static const char *mux_clk_m[] = {
-	"clk_m",
-};
-
-static const char *mux_pllp_out3[] = {
-	"pll_p_out3",
-};
-
-static const char *mux_plld_out0[] = {
-	"pll_d_out0",
-};
-
-static const char *mux_plld_out0_plld2_out0[] = {
-	"pll_d_out0",
-	"pll_d2_out0",
-};
-
-static const char *mux_clk_32k[] = {
-	"clk_32k",
-};
-
-static const char *mux_plla_clk32_pllp_clkm_plle[] = {
-	"pll_a_out0",
-	"clk_32k",
-	"pll_p",
-	"clk_m",
-	"pll_e",
-};
-
-static const char *mux_cclk_g[] = {
-	"clk_m",
-	"pll_c",
-	"clk_32k",
-	"pll_m",
-	"pll_p",
-	"pll_p_out4",
-	"pll_p_out3",
-	"dummy",
-	"pll_x",
-};
-
-static struct clk *mux_pllm_pllc_pllp_plla_p[] = {
-	&tegra_pll_m,
-	&tegra_pll_c,
-	&tegra_pll_p,
-	&tegra_pll_a_out0,
-};
-
-static struct clk *mux_pllp_pllc_pllm_clkm_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_c,
-	&tegra_pll_m,
-	&tegra_clk_m,
-};
-
-static struct clk *mux_pllp_clkm_p[] = {
-	&tegra_pll_p,
-	NULL,
-	NULL,
-	&tegra_clk_m,
-};
-
-static struct clk *mux_pllp_plld_pllc_clkm_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_d_out0,
-	&tegra_pll_c,
-	&tegra_clk_m,
-};
-
-static struct clk *mux_pllp_pllm_plld_plla_pllc_plld2_clkm_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_m,
-	&tegra_pll_d_out0,
-	&tegra_pll_a_out0,
-	&tegra_pll_c,
-	&tegra_pll_d2_out0,
-	&tegra_clk_m,
-};
-
-static struct clk *mux_plla_pllc_pllp_clkm_p[] = {
-	&tegra_pll_a_out0,
-	NULL,
-	&tegra_pll_p,
-	&tegra_clk_m,
-};
-
-static struct clk *mux_pllp_pllc_clk32_clkm_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_c,
-	&tegra_clk_32k,
-	&tegra_clk_m,
-};
-
-static struct clk *mux_pllp_pllc_clkm_clk32_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_c,
-	&tegra_clk_m,
-	&tegra_clk_32k,
-};
-
-static struct clk *mux_pllp_pllc_pllm_p[] = {
-	&tegra_pll_p,
-	&tegra_pll_c,
-	&tegra_pll_m,
-};
-
-static struct clk *mux_clk_m_p[] = {
-	&tegra_clk_m,
-};
-
-static struct clk *mux_pllp_out3_p[] = {
-	&tegra_pll_p_out3,
-};
-
-static struct clk *mux_plld_out0_p[] = {
-	&tegra_pll_d_out0,
-};
-
-static struct clk *mux_plld_out0_plld2_out0_p[] = {
-	&tegra_pll_d_out0,
-	&tegra_pll_d2_out0,
-};
-
-static struct clk *mux_clk_32k_p[] = {
-	&tegra_clk_32k,
-};
-
-static struct clk *mux_plla_clk32_pllp_clkm_plle_p[] = {
-	&tegra_pll_a_out0,
-	&tegra_clk_32k,
-	&tegra_pll_p,
-	&tegra_clk_m,
-	&tegra_pll_e,
-};
-
-static struct clk *mux_cclk_g_p[] = {
-	&tegra_clk_m,
-	&tegra_pll_c,
-	&tegra_clk_32k,
-	&tegra_pll_m,
-	&tegra_pll_p,
-	&tegra_pll_p_out4,
-	&tegra_pll_p_out3,
-	NULL,
-	&tegra_pll_x,
-};
-
-static struct clk tegra_clk_cclk_g;
-static struct clk_tegra tegra_clk_cclk_g_hw = {
-	.hw = {
-		.clk = &tegra_clk_cclk_g,
-	},
-	.flags = DIV_U71 | DIV_U71_INT,
-	.reg = 0x368,
-	.max_rate = 1700000000,
-};
-static struct clk tegra_clk_cclk_g = {
-	.name = "cclk_g",
-	.ops = &tegra30_super_ops,
-	.hw = &tegra_clk_cclk_g_hw.hw,
-	.parent_names = mux_cclk_g,
-	.parents = mux_cclk_g_p,
-	.num_parents = ARRAY_SIZE(mux_cclk_g),
-};
-
-static const char *mux_twd[] = {
-	"cclk_g",
-};
-
-static struct clk *mux_twd_p[] = {
-	&tegra_clk_cclk_g,
-};
-
-static struct clk tegra30_clk_twd;
-static struct clk_tegra tegra30_clk_twd_hw = {
-	.hw = {
-		.clk = &tegra30_clk_twd,
-	},
-	.max_rate = 1400000000,
-	.mul = 1,
-	.div = 2,
-};
-
-static struct clk tegra30_clk_twd = {
-	.name = "twd",
-	.ops = &tegra30_twd_ops,
-	.hw = &tegra30_clk_twd_hw.hw,
-	.parent = &tegra_clk_cclk_g,
-	.parent_names = mux_twd,
-	.parents = mux_twd_p,
-	.num_parents = ARRAY_SIZE(mux_twd),
-};
-
-#define PERIPH_CLK(_name, _dev, _con, _clk_num, _reg,	\
-		_max, _inputs, _flags)	 		\
-	static struct clk tegra_##_name;		\
-	static struct clk_tegra tegra_##_name##_hw = {	\
-		.hw = {					\
-			.clk = &tegra_##_name,		\
-		},					\
-		.lookup = {				\
-			.dev_id	= _dev,			\
-			.con_id	= _con,			\
-		},					\
-		.reg = _reg,				\
-		.flags = _flags,			\
-		.max_rate = _max,			\
-		.u.periph = {				\
-			.clk_num = _clk_num,		\
-		},					\
-		.reset = &tegra30_periph_clk_reset,	\
-	};						\
-	static struct clk tegra_##_name = {		\
-		.name = #_name,				\
-		.ops = &tegra30_periph_clk_ops,		\
-		.hw = &tegra_##_name##_hw.hw,		\
-		.parent_names = _inputs,		\
-		.parents = _inputs##_p,			\
-		.num_parents = ARRAY_SIZE(_inputs),	\
-	};
-
-PERIPH_CLK(apbdma,	"tegra-apbdma",		NULL,	34,	0,	26000000,  mux_clk_m,			0);
-PERIPH_CLK(rtc,		"rtc-tegra",		NULL,	4,	0,	32768,     mux_clk_32k,			PERIPH_NO_RESET | PERIPH_ON_APB);
-PERIPH_CLK(kbc,		"tegra-kbc",		NULL,	36,	0,	32768,     mux_clk_32k,			PERIPH_NO_RESET | PERIPH_ON_APB);
-PERIPH_CLK(timer,	"timer",		NULL,	5,	0,	26000000,  mux_clk_m,			0);
-PERIPH_CLK(kfuse,	"kfuse-tegra",		NULL,	40,	0,	26000000,  mux_clk_m,			0);
-PERIPH_CLK(fuse,	"fuse-tegra",		"fuse",	39,	0,	26000000,  mux_clk_m,			PERIPH_ON_APB);
-PERIPH_CLK(fuse_burn,	"fuse-tegra",		"fuse_burn",	39,	0,	26000000,  mux_clk_m,		PERIPH_ON_APB);
-PERIPH_CLK(apbif,	"tegra30-ahub",		"apbif", 107,	0,	26000000,  mux_clk_m,			0);
-PERIPH_CLK(i2s0,	"tegra30-i2s.0",	NULL,	30,	0x1d8,	26000000,  mux_pllaout0_audio0_2x_pllp_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(i2s1,	"tegra30-i2s.1",	NULL,	11,	0x100,	26000000,  mux_pllaout0_audio1_2x_pllp_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(i2s2,	"tegra30-i2s.2",	NULL,	18,	0x104,	26000000,  mux_pllaout0_audio2_2x_pllp_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(i2s3,	"tegra30-i2s.3",	NULL,	101,	0x3bc,	26000000,  mux_pllaout0_audio3_2x_pllp_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(i2s4,	"tegra30-i2s.4",	NULL,	102,	0x3c0,	26000000,  mux_pllaout0_audio4_2x_pllp_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(spdif_out,	"tegra30-spdif",	"spdif_out",	10,	0x108,	100000000, mux_pllaout0_audio5_2x_pllp_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(spdif_in,	"tegra30-spdif",	"spdif_in",	10,	0x10c,	100000000, mux_pllp_pllc_pllm,		MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(pwm,		"tegra-pwm",		NULL,	17,	0x110,	432000000, mux_pllp_pllc_clk32_clkm,	MUX | MUX_PWM | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(d_audio,	"tegra30-ahub",		"d_audio", 106,	0x3d0,	48000000,  mux_plla_pllc_pllp_clkm,	MUX | DIV_U71);
-PERIPH_CLK(dam0,	"tegra30-dam.0",	NULL,	108,	0x3d8,	48000000,  mux_plla_pllc_pllp_clkm,	MUX | DIV_U71);
-PERIPH_CLK(dam1,	"tegra30-dam.1",	NULL,	109,	0x3dc,	48000000,  mux_plla_pllc_pllp_clkm,	MUX | DIV_U71);
-PERIPH_CLK(dam2,	"tegra30-dam.2",	NULL,	110,	0x3e0,	48000000,  mux_plla_pllc_pllp_clkm,	MUX | DIV_U71);
-PERIPH_CLK(hda,		"tegra30-hda",		"hda",	125,	0x428,	108000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(hda2codec_2x,	"tegra30-hda",	"hda2codec",	111,	0x3e4,	48000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(hda2hdmi,	"tegra30-hda",		"hda2hdmi",	128,	0,	48000000,  mux_clk_m,			0);
-PERIPH_CLK(sbc1,	"spi_tegra.0",		NULL,	41,	0x134,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(sbc2,	"spi_tegra.1",		NULL,	44,	0x118,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(sbc3,	"spi_tegra.2",		NULL,	46,	0x11c,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(sbc4,	"spi_tegra.3",		NULL,	68,	0x1b4,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(sbc5,	"spi_tegra.4",		NULL,	104,	0x3c8,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(sbc6,	"spi_tegra.5",		NULL,	105,	0x3cc,	160000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(sata_oob,	"tegra_sata_oob",	NULL,	123,	0x420,	216000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(sata,	"tegra_sata",		NULL,	124,	0x424,	216000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(sata_cold,	"tegra_sata_cold",	NULL,	129,	0,	48000000,  mux_clk_m,			0);
-PERIPH_CLK(ndflash,	"tegra_nand",		NULL,	13,	0x160,	240000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(ndspeed,	"tegra_nand_speed",	NULL,	80,	0x3f8,	240000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(vfir,	"vfir",			NULL,	7,	0x168,	72000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(sdmmc1,	"sdhci-tegra.0",	NULL,	14,	0x150,	208000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(sdmmc2,	"sdhci-tegra.1",	NULL,	9,	0x154,	104000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(sdmmc3,	"sdhci-tegra.2",	NULL,	69,	0x1bc,	208000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(sdmmc4,	"sdhci-tegra.3",	NULL,	15,	0x164,	104000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* scales with voltage */
-PERIPH_CLK(vcp,		"tegra-avp",		"vcp",	29,	0,	250000000, mux_clk_m,			0);
-PERIPH_CLK(bsea,	"tegra-avp",		"bsea",	62,	0,	250000000, mux_clk_m,			0);
-PERIPH_CLK(bsev,	"tegra-aes",		"bsev",	63,	0,	250000000, mux_clk_m,			0);
-PERIPH_CLK(vde,		"vde",			NULL,	61,	0x1c8,	520000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | DIV_U71_INT);
-PERIPH_CLK(csite,	"csite",		NULL,	73,	0x1d4,	144000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* max rate ??? */
-PERIPH_CLK(la,		"la",			NULL,	76,	0x1f8,	26000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71);
-PERIPH_CLK(owr,		"tegra_w1",		NULL,	71,	0x1cc,	26000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(nor,		"nor",			NULL,	42,	0x1d0,	127000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(mipi,	"mipi",			NULL,	50,	0x174,	60000000,  mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | PERIPH_ON_APB); /* scales with voltage */
-PERIPH_CLK(i2c1,	"tegra-i2c.0",		"div-clk", 12,	0x124,	26000000,  mux_pllp_clkm,		MUX | DIV_U16 | PERIPH_ON_APB);
-PERIPH_CLK(i2c2,	"tegra-i2c.1",		"div-clk", 54,	0x198,	26000000,  mux_pllp_clkm,		MUX | DIV_U16 | PERIPH_ON_APB);
-PERIPH_CLK(i2c3,	"tegra-i2c.2",		"div-clk", 67,	0x1b8,	26000000,  mux_pllp_clkm,		MUX | DIV_U16 | PERIPH_ON_APB);
-PERIPH_CLK(i2c4,	"tegra-i2c.3",		"div-clk", 103,	0x3c4,	26000000,  mux_pllp_clkm,		MUX | DIV_U16 | PERIPH_ON_APB);
-PERIPH_CLK(i2c5,	"tegra-i2c.4",		"div-clk", 47,	0x128,	26000000,  mux_pllp_clkm,		MUX | DIV_U16 | PERIPH_ON_APB);
-PERIPH_CLK(uarta,	"tegra-uart.0",		NULL,	6,	0x178,	800000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | DIV_U71_UART | PERIPH_ON_APB);
-PERIPH_CLK(uartb,	"tegra-uart.1",		NULL,	7,	0x17c,	800000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | DIV_U71_UART | PERIPH_ON_APB);
-PERIPH_CLK(uartc,	"tegra-uart.2",		NULL,	55,	0x1a0,	800000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | DIV_U71_UART | PERIPH_ON_APB);
-PERIPH_CLK(uartd,	"tegra-uart.3",		NULL,	65,	0x1c0,	800000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | DIV_U71_UART | PERIPH_ON_APB);
-PERIPH_CLK(uarte,	"tegra-uart.4",		NULL,	66,	0x1c4,	800000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | DIV_U71_UART | PERIPH_ON_APB);
-PERIPH_CLK(vi,		"tegra_camera",		"vi",	20,	0x148,	425000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | DIV_U71_INT);
-PERIPH_CLK(3d,		"3d",			NULL,	24,	0x158,	520000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET);
-PERIPH_CLK(3d2,		"3d2",			NULL,	98,	0x3b0,	520000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE | PERIPH_MANUAL_RESET);
-PERIPH_CLK(2d,		"2d",			NULL,	21,	0x15c,	520000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | DIV_U71_INT | DIV_U71_IDLE);
-PERIPH_CLK(vi_sensor,	"tegra_camera",		"vi_sensor",	20,	0x1a8,	150000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | PERIPH_NO_RESET);
-PERIPH_CLK(epp,		"epp",			NULL,	19,	0x16c,	520000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | DIV_U71_INT);
-PERIPH_CLK(mpe,		"mpe",			NULL,	60,	0x170,	520000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | DIV_U71_INT);
-PERIPH_CLK(host1x,	"host1x",		NULL,	28,	0x180,	260000000, mux_pllm_pllc_pllp_plla,	MUX | DIV_U71 | DIV_U71_INT);
-PERIPH_CLK(cve,		"cve",			NULL,	49,	0x140,	250000000, mux_pllp_plld_pllc_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(tvo,		"tvo",			NULL,	49,	0x188,	250000000, mux_pllp_plld_pllc_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(dtv,		"dtv",			NULL,	79,	0x1dc,	250000000, mux_clk_m,			0);
-PERIPH_CLK(hdmi,	"hdmi",			NULL,	51,	0x18c,	148500000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm,	MUX | MUX8 | DIV_U71);
-PERIPH_CLK(tvdac,	"tvdac",		NULL,	53,	0x194,	220000000, mux_pllp_plld_pllc_clkm,	MUX | DIV_U71); /* requires min voltage */
-PERIPH_CLK(disp1,	"tegradc.0",		NULL,	27,	0x138,	600000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm,	MUX | MUX8);
-PERIPH_CLK(disp2,	"tegradc.1",		NULL,	26,	0x13c,	600000000, mux_pllp_pllm_plld_plla_pllc_plld2_clkm,	MUX | MUX8);
-PERIPH_CLK(usbd,	"fsl-tegra-udc",	NULL,	22,	0,	480000000, mux_clk_m,			0); /* requires min voltage */
-PERIPH_CLK(usb2,	"tegra-ehci.1",		NULL,	58,	0,	480000000, mux_clk_m,			0); /* requires min voltage */
-PERIPH_CLK(usb3,	"tegra-ehci.2",		NULL,	59,	0,	480000000, mux_clk_m,			0); /* requires min voltage */
-PERIPH_CLK(dsia,	"tegradc.0",		"dsia",	48,	0,	500000000, mux_plld_out0,		0);
-PERIPH_CLK(csi,		"tegra_camera",		"csi",	52,	0,	102000000, mux_pllp_out3,		0);
-PERIPH_CLK(isp,		"tegra_camera",		"isp",	23,	0,	150000000, mux_clk_m,			0); /* same frequency as VI */
-PERIPH_CLK(csus,	"tegra_camera",		"csus",	92,	0,	150000000, mux_clk_m,			PERIPH_NO_RESET);
-PERIPH_CLK(tsensor,	"tegra-tsensor",	NULL,	100,	0x3b8,	216000000, mux_pllp_pllc_clkm_clk32,	MUX | DIV_U71);
-PERIPH_CLK(actmon,	"actmon",		NULL,	119,	0x3e8,	216000000, mux_pllp_pllc_clk32_clkm,	MUX | DIV_U71);
-PERIPH_CLK(extern1,	"extern1",		NULL,	120,	0x3ec,	216000000, mux_plla_clk32_pllp_clkm_plle,	MUX | MUX8 | DIV_U71);
-PERIPH_CLK(extern2,	"extern2",		NULL,	121,	0x3f0,	216000000, mux_plla_clk32_pllp_clkm_plle,	MUX | MUX8 | DIV_U71);
-PERIPH_CLK(extern3,	"extern3",		NULL,	122,	0x3f4,	216000000, mux_plla_clk32_pllp_clkm_plle,	MUX | MUX8 | DIV_U71);
-PERIPH_CLK(i2cslow,	"i2cslow",		NULL,	81,	0x3fc,	26000000,  mux_pllp_pllc_clk32_clkm,	MUX | DIV_U71 | PERIPH_ON_APB);
-PERIPH_CLK(pcie,	"tegra-pcie",		"pcie",	70,	0,	250000000, mux_clk_m,			0);
-PERIPH_CLK(afi,		"tegra-pcie",		"afi",	72,	0,	250000000, mux_clk_m,			0);
-PERIPH_CLK(se,		"se",			NULL,	127,	0x42c,	520000000, mux_pllp_pllc_pllm_clkm,	MUX | DIV_U71 | DIV_U71_INT);
-
-static struct clk tegra_dsib;
-static struct clk_tegra tegra_dsib_hw = {
-	.hw = {
-		.clk = &tegra_dsib,
-	},
-	.lookup = {
-		.dev_id	= "tegradc.1",
-		.con_id	= "dsib",
-	},
-	.reg = 0xd0,
-	.flags = MUX | PLLD,
-	.max_rate = 500000000,
-	.u.periph = {
-		.clk_num = 82,
-	},
-	.reset = &tegra30_periph_clk_reset,
-};
-static struct clk tegra_dsib = {
-	.name = "dsib",
-	.ops = &tegra30_dsib_clk_ops,
-	.hw = &tegra_dsib_hw.hw,
-	.parent_names = mux_plld_out0_plld2_out0,
-	.parents = mux_plld_out0_plld2_out0_p,
-	.num_parents = ARRAY_SIZE(mux_plld_out0_plld2_out0),
-};
-
-static struct clk *tegra_list_clks[] = {
-	&tegra_apbdma,
-	&tegra_rtc,
-	&tegra_kbc,
-	&tegra_timer,
-	&tegra_kfuse,
-	&tegra_fuse,
-	&tegra_fuse_burn,
-	&tegra_apbif,
-	&tegra_i2s0,
-	&tegra_i2s1,
-	&tegra_i2s2,
-	&tegra_i2s3,
-	&tegra_i2s4,
-	&tegra_spdif_out,
-	&tegra_spdif_in,
-	&tegra_pwm,
-	&tegra_d_audio,
-	&tegra_dam0,
-	&tegra_dam1,
-	&tegra_dam2,
-	&tegra_hda,
-	&tegra_hda2codec_2x,
-	&tegra_hda2hdmi,
-	&tegra_sbc1,
-	&tegra_sbc2,
-	&tegra_sbc3,
-	&tegra_sbc4,
-	&tegra_sbc5,
-	&tegra_sbc6,
-	&tegra_sata_oob,
-	&tegra_sata,
-	&tegra_sata_cold,
-	&tegra_ndflash,
-	&tegra_ndspeed,
-	&tegra_vfir,
-	&tegra_sdmmc1,
-	&tegra_sdmmc2,
-	&tegra_sdmmc3,
-	&tegra_sdmmc4,
-	&tegra_vcp,
-	&tegra_bsea,
-	&tegra_bsev,
-	&tegra_vde,
-	&tegra_csite,
-	&tegra_la,
-	&tegra_owr,
-	&tegra_nor,
-	&tegra_mipi,
-	&tegra_i2c1,
-	&tegra_i2c2,
-	&tegra_i2c3,
-	&tegra_i2c4,
-	&tegra_i2c5,
-	&tegra_uarta,
-	&tegra_uartb,
-	&tegra_uartc,
-	&tegra_uartd,
-	&tegra_uarte,
-	&tegra_vi,
-	&tegra_3d,
-	&tegra_3d2,
-	&tegra_2d,
-	&tegra_vi_sensor,
-	&tegra_epp,
-	&tegra_mpe,
-	&tegra_host1x,
-	&tegra_cve,
-	&tegra_tvo,
-	&tegra_dtv,
-	&tegra_hdmi,
-	&tegra_tvdac,
-	&tegra_disp1,
-	&tegra_disp2,
-	&tegra_usbd,
-	&tegra_usb2,
-	&tegra_usb3,
-	&tegra_dsia,
-	&tegra_dsib,
-	&tegra_csi,
-	&tegra_isp,
-	&tegra_csus,
-	&tegra_tsensor,
-	&tegra_actmon,
-	&tegra_extern1,
-	&tegra_extern2,
-	&tegra_extern3,
-	&tegra_i2cslow,
-	&tegra_pcie,
-	&tegra_afi,
-	&tegra_se,
-};
-
-#define CLK_DUPLICATE(_name, _dev, _con)	\
-	{					\
-		.name	= _name,		\
-		.lookup	= {			\
-			.dev_id	= _dev,		\
-			.con_id	= _con,		\
-		},				\
-	}
-
-/* Some clocks may be used by different drivers depending on the board
- * configuration.  List those here to register them twice in the clock lookup
- * table under two names.
- */
-static struct clk_duplicate tegra_clk_duplicates[] = {
-	CLK_DUPLICATE("uarta",  "serial8250.0", NULL),
-	CLK_DUPLICATE("uartb",  "serial8250.1", NULL),
-	CLK_DUPLICATE("uartc",  "serial8250.2", NULL),
-	CLK_DUPLICATE("uartd",  "serial8250.3", NULL),
-	CLK_DUPLICATE("uarte",  "serial8250.4", NULL),
-	CLK_DUPLICATE("usbd", "utmip-pad", NULL),
-	CLK_DUPLICATE("usbd", "tegra-ehci.0", NULL),
-	CLK_DUPLICATE("usbd", "tegra-otg", NULL),
-	CLK_DUPLICATE("dsib", "tegradc.0", "dsib"),
-	CLK_DUPLICATE("dsia", "tegradc.1", "dsia"),
-	CLK_DUPLICATE("bsev", "tegra-avp", "bsev"),
-	CLK_DUPLICATE("bsev", "nvavp", "bsev"),
-	CLK_DUPLICATE("vde", "tegra-aes", "vde"),
-	CLK_DUPLICATE("bsea", "tegra-aes", "bsea"),
-	CLK_DUPLICATE("bsea", "nvavp", "bsea"),
-	CLK_DUPLICATE("cml1", "tegra_sata_cml", NULL),
-	CLK_DUPLICATE("cml0", "tegra_pcie", "cml"),
-	CLK_DUPLICATE("pciex", "tegra_pcie", "pciex"),
-	CLK_DUPLICATE("i2c1", "tegra-i2c-slave.0", NULL),
-	CLK_DUPLICATE("i2c2", "tegra-i2c-slave.1", NULL),
-	CLK_DUPLICATE("i2c3", "tegra-i2c-slave.2", NULL),
-	CLK_DUPLICATE("i2c4", "tegra-i2c-slave.3", NULL),
-	CLK_DUPLICATE("i2c5", "tegra-i2c-slave.4", NULL),
-	CLK_DUPLICATE("sbc1", "spi_slave_tegra.0", NULL),
-	CLK_DUPLICATE("sbc2", "spi_slave_tegra.1", NULL),
-	CLK_DUPLICATE("sbc3", "spi_slave_tegra.2", NULL),
-	CLK_DUPLICATE("sbc4", "spi_slave_tegra.3", NULL),
-	CLK_DUPLICATE("sbc5", "spi_slave_tegra.4", NULL),
-	CLK_DUPLICATE("sbc6", "spi_slave_tegra.5", NULL),
-	CLK_DUPLICATE("twd", "smp_twd", NULL),
-	CLK_DUPLICATE("vcp", "nvavp", "vcp"),
-	CLK_DUPLICATE("i2s0", NULL, "i2s0"),
-	CLK_DUPLICATE("i2s1", NULL, "i2s1"),
-	CLK_DUPLICATE("i2s2", NULL, "i2s2"),
-	CLK_DUPLICATE("i2s3", NULL, "i2s3"),
-	CLK_DUPLICATE("i2s4", NULL, "i2s4"),
-	CLK_DUPLICATE("dam0", NULL, "dam0"),
-	CLK_DUPLICATE("dam1", NULL, "dam1"),
-	CLK_DUPLICATE("dam2", NULL, "dam2"),
-	CLK_DUPLICATE("spdif_in", NULL, "spdif_in"),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.0", "fast-clk"),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.1", "fast-clk"),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.2", "fast-clk"),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.3", "fast-clk"),
-	CLK_DUPLICATE("pll_p_out3", "tegra-i2c.4", "fast-clk"),
-	CLK_DUPLICATE("pll_p", "tegradc.0", "parent"),
-	CLK_DUPLICATE("pll_p", "tegradc.1", "parent"),
-	CLK_DUPLICATE("pll_d2_out0", "hdmi", "parent"),
-};
-
-static struct clk *tegra_ptr_clks[] = {
-	&tegra_clk_32k,
-	&tegra_clk_m,
-	&tegra_clk_m_div2,
-	&tegra_clk_m_div4,
-	&tegra_pll_ref,
-	&tegra_pll_m,
-	&tegra_pll_m_out1,
-	&tegra_pll_c,
-	&tegra_pll_c_out1,
-	&tegra_pll_p,
-	&tegra_pll_p_out1,
-	&tegra_pll_p_out2,
-	&tegra_pll_p_out3,
-	&tegra_pll_p_out4,
-	&tegra_pll_a,
-	&tegra_pll_a_out0,
-	&tegra_pll_d,
-	&tegra_pll_d_out0,
-	&tegra_pll_d2,
-	&tegra_pll_d2_out0,
-	&tegra_pll_u,
-	&tegra_pll_x,
-	&tegra_pll_x_out0,
-	&tegra_pll_e,
-	&tegra_clk_cclk_g,
-	&tegra_cml0,
-	&tegra_cml1,
-	&tegra_pciex,
-	&tegra_clk_sclk,
-	&tegra_hclk,
-	&tegra_pclk,
-	&tegra_clk_blink,
-	&tegra30_clk_twd,
-};
-
-static void tegra30_init_one_clock(struct clk *c)
-{
-	struct clk_tegra *clk = to_clk_tegra(c->hw);
-	__clk_init(NULL, c);
-	INIT_LIST_HEAD(&clk->shared_bus_list);
-	if (!clk->lookup.dev_id && !clk->lookup.con_id)
-		clk->lookup.con_id = c->name;
-	clk->lookup.clk = c;
-	clkdev_add(&clk->lookup);
-	tegra_clk_add(c);
-}
-
-void __init tegra30_init_clocks(void)
-{
-	int i;
-	struct clk *c;
-
-	for (i = 0; i < ARRAY_SIZE(tegra_ptr_clks); i++)
-		tegra30_init_one_clock(tegra_ptr_clks[i]);
-
-	for (i = 0; i < ARRAY_SIZE(tegra_list_clks); i++)
-		tegra30_init_one_clock(tegra_list_clks[i]);
-
-	for (i = 0; i < ARRAY_SIZE(tegra_clk_duplicates); i++) {
-		c = tegra_get_clock_by_name(tegra_clk_duplicates[i].name);
-		if (!c) {
-			pr_err("%s: Unknown duplicate clock %s\n", __func__,
-				tegra_clk_duplicates[i].name);
-			continue;
-		}
-
-		tegra_clk_duplicates[i].lookup.clk = c;
-		clkdev_add(&tegra_clk_duplicates[i].lookup);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(tegra_sync_source_list); i++)
-		tegra30_init_one_clock(tegra_sync_source_list[i]);
-	for (i = 0; i < ARRAY_SIZE(tegra_clk_audio_list); i++)
-		tegra30_init_one_clock(tegra_clk_audio_list[i]);
-	for (i = 0; i < ARRAY_SIZE(tegra_clk_audio_2x_list); i++)
-		tegra30_init_one_clock(tegra_clk_audio_2x_list[i]);
-
-	for (i = 0; i < ARRAY_SIZE(tegra_clk_out_list); i++)
-		tegra30_init_one_clock(tegra_clk_out_list[i]);
-
-	tegra30_cpu_car_ops_init();
-}
diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h
index a7e5a3999099..404d6f940872 100644
--- a/include/linux/clk/tegra.h
+++ b/include/linux/clk/tegra.h
@@ -120,8 +120,6 @@ static inline void tegra_cpu_clock_resume(void)
 }
 #endif
 
-void tegra20_cpu_car_ops_init(void);
-void tegra30_cpu_car_ops_init(void);
 void tegra_periph_reset_deassert(struct clk *c);
 void tegra_periph_reset_assert(struct clk *c);
 void tegra_clocks_init(void);
-- 
cgit v1.2.3


From 5e18150a7b4139e15e1ed7d7c938dd91f7f12354 Mon Sep 17 00:00:00 2001
From: Venu Byravarasu <vbyravarasu@nvidia.com>
Date: Tue, 15 Jan 2013 15:49:30 +0530
Subject: usb: phy: remove unused APIs from Tegra PHY.

As tegra_usb_phy_clk_disable/enable() are not being
used, removing them.

Signed-off-by: Venu Byravarasu <vbyravarasu@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 drivers/usb/phy/tegra_usb_phy.c   | 13 -------------
 include/linux/usb/tegra_usb_phy.h |  4 ----
 2 files changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/tegra_usb_phy.c b/drivers/usb/phy/tegra_usb_phy.c
index 2d3cae9a785d..3059384326b6 100644
--- a/drivers/usb/phy/tegra_usb_phy.c
+++ b/drivers/usb/phy/tegra_usb_phy.c
@@ -825,16 +825,3 @@ void tegra_ehci_phy_restore_end(struct tegra_usb_phy *phy)
 }
 EXPORT_SYMBOL_GPL(tegra_ehci_phy_restore_end);
 
-void tegra_usb_phy_clk_disable(struct tegra_usb_phy *phy)
-{
-	if (!phy_is_ulpi(phy))
-		utmi_phy_clk_disable(phy);
-}
-EXPORT_SYMBOL_GPL(tegra_usb_phy_clk_disable);
-
-void tegra_usb_phy_clk_enable(struct tegra_usb_phy *phy)
-{
-	if (!phy_is_ulpi(phy))
-		utmi_phy_clk_enable(phy);
-}
-EXPORT_SYMBOL_GPL(tegra_usb_phy_clk_enable);
diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h
index 176b1ca06ae4..34e6355a15e3 100644
--- a/include/linux/usb/tegra_usb_phy.h
+++ b/include/linux/usb/tegra_usb_phy.h
@@ -64,10 +64,6 @@ struct tegra_usb_phy {
 struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance,
 	void __iomem *regs, void *config, enum tegra_usb_phy_mode phy_mode);
 
-void tegra_usb_phy_clk_disable(struct tegra_usb_phy *phy);
-
-void tegra_usb_phy_clk_enable(struct tegra_usb_phy *phy);
-
 void tegra_usb_phy_preresume(struct tegra_usb_phy *phy);
 
 void tegra_usb_phy_postresume(struct tegra_usb_phy *phy);
-- 
cgit v1.2.3


From 3a55c6a8b96f055783a1b9d5497f66b56a9ea0d7 Mon Sep 17 00:00:00 2001
From: Venu Byravarasu <vbyravarasu@nvidia.com>
Date: Wed, 16 Jan 2013 03:30:20 +0000
Subject: USB: PHY: tegra: get rid of instance number to differentiate legacy
 controller

Tegra20 USB has 3 PHY instances. Instance 0 is based on
legacy PHY interface and other two are standard interfaces.

As instance number was used to differentiate legacy from
standard interfaces, used DT param to get this info and
processed accordingly.

Signed-off-by: Venu Byravarasu <vbyravarasu@nvidia.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 drivers/usb/phy/tegra_usb_phy.c   | 32 +++++++++++++++-----------------
 include/linux/usb/tegra_usb_phy.h |  1 +
 2 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/tegra_usb_phy.c b/drivers/usb/phy/tegra_usb_phy.c
index 3059384326b6..79280fe90893 100644
--- a/drivers/usb/phy/tegra_usb_phy.c
+++ b/drivers/usb/phy/tegra_usb_phy.c
@@ -24,6 +24,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
+#include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/usb/otg.h>
 #include <linux/usb/ulpi.h>
@@ -221,7 +222,7 @@ static int utmip_pad_open(struct tegra_usb_phy *phy)
 		return PTR_ERR(phy->pad_clk);
 	}
 
-	if (phy->instance == 0) {
+	if (phy->is_legacy_phy) {
 		phy->pad_regs = phy->regs;
 	} else {
 		phy->pad_regs = ioremap(TEGRA_USB_BASE, TEGRA_USB_SIZE);
@@ -236,7 +237,7 @@ static int utmip_pad_open(struct tegra_usb_phy *phy)
 
 static void utmip_pad_close(struct tegra_usb_phy *phy)
 {
-	if (phy->instance != 0)
+	if (!phy->is_legacy_phy)
 		iounmap(phy->pad_regs);
 	clk_put(phy->pad_clk);
 }
@@ -305,7 +306,7 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy)
 	unsigned long val;
 	void __iomem *base = phy->regs;
 
-	if (phy->instance == 0) {
+	if (phy->is_legacy_phy) {
 		val = readl(base + USB_SUSP_CTRL);
 		val |= USB_SUSP_SET;
 		writel(val, base + USB_SUSP_CTRL);
@@ -315,9 +316,7 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy)
 		val = readl(base + USB_SUSP_CTRL);
 		val &= ~USB_SUSP_SET;
 		writel(val, base + USB_SUSP_CTRL);
-	}
-
-	if (phy->instance == 2) {
+	} else {
 		val = readl(base + USB_PORTSC1);
 		val |= USB_PORTSC1_PHCD;
 		writel(val, base + USB_PORTSC1);
@@ -332,7 +331,7 @@ static void utmi_phy_clk_enable(struct tegra_usb_phy *phy)
 	unsigned long val;
 	void __iomem *base = phy->regs;
 
-	if (phy->instance == 0) {
+	if (phy->is_legacy_phy) {
 		val = readl(base + USB_SUSP_CTRL);
 		val |= USB_SUSP_CLR;
 		writel(val, base + USB_SUSP_CTRL);
@@ -342,9 +341,7 @@ static void utmi_phy_clk_enable(struct tegra_usb_phy *phy)
 		val = readl(base + USB_SUSP_CTRL);
 		val &= ~USB_SUSP_CLR;
 		writel(val, base + USB_SUSP_CTRL);
-	}
-
-	if (phy->instance == 2) {
+	} else {
 		val = readl(base + USB_PORTSC1);
 		val &= ~USB_PORTSC1_PHCD;
 		writel(val, base + USB_PORTSC1);
@@ -365,7 +362,7 @@ static int utmi_phy_power_on(struct tegra_usb_phy *phy)
 	val |= UTMIP_RESET;
 	writel(val, base + USB_SUSP_CTRL);
 
-	if (phy->instance == 0) {
+	if (phy->is_legacy_phy) {
 		val = readl(base + USB1_LEGACY_CTRL);
 		val |= USB1_NO_LEGACY_MODE;
 		writel(val, base + USB1_LEGACY_CTRL);
@@ -440,16 +437,14 @@ static int utmi_phy_power_on(struct tegra_usb_phy *phy)
 	val |= UTMIP_BIAS_PDTRK_COUNT(0x5);
 	writel(val, base + UTMIP_BIAS_CFG1);
 
-	if (phy->instance == 0) {
+	if (phy->is_legacy_phy) {
 		val = readl(base + UTMIP_SPARE_CFG0);
 		if (phy->mode == TEGRA_USB_PHY_MODE_DEVICE)
 			val &= ~FUSE_SETUP_SEL;
 		else
 			val |= FUSE_SETUP_SEL;
 		writel(val, base + UTMIP_SPARE_CFG0);
-	}
-
-	if (phy->instance == 2) {
+	} else {
 		val = readl(base + USB_SUSP_CTRL);
 		val |= UTMIP_PHY_ENABLE;
 		writel(val, base + USB_SUSP_CTRL);
@@ -459,7 +454,7 @@ static int utmi_phy_power_on(struct tegra_usb_phy *phy)
 	val &= ~UTMIP_RESET;
 	writel(val, base + USB_SUSP_CTRL);
 
-	if (phy->instance == 0) {
+	if (phy->is_legacy_phy) {
 		val = readl(base + USB1_LEGACY_CTRL);
 		val &= ~USB1_VBUS_SENSE_CTL_MASK;
 		val |= USB1_VBUS_SENSE_CTL_A_SESS_VLD;
@@ -472,7 +467,7 @@ static int utmi_phy_power_on(struct tegra_usb_phy *phy)
 
 	utmi_phy_clk_enable(phy);
 
-	if (phy->instance == 2) {
+	if (!phy->is_legacy_phy) {
 		val = readl(base + USB_PORTSC1);
 		val &= ~USB_PORTSC1_PTS(~0);
 		writel(val, base + USB_PORTSC1);
@@ -739,6 +734,7 @@ struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance,
 	unsigned long parent_rate;
 	int i;
 	int err;
+	struct device_node *np = dev->of_node;
 
 	phy = kzalloc(sizeof(struct tegra_usb_phy), GFP_KERNEL);
 	if (!phy)
@@ -749,6 +745,8 @@ struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance,
 	phy->config = config;
 	phy->mode = phy_mode;
 	phy->dev = dev;
+	phy->is_legacy_phy =
+		of_property_read_bool(np, "nvidia,has-legacy-mode");
 
 	if (!phy->config) {
 		if (phy_is_ulpi(phy)) {
diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h
index 34e6355a15e3..f03e157abaa8 100644
--- a/include/linux/usb/tegra_usb_phy.h
+++ b/include/linux/usb/tegra_usb_phy.h
@@ -59,6 +59,7 @@ struct tegra_usb_phy {
 	struct usb_phy *ulpi;
 	struct usb_phy u_phy;
 	struct device *dev;
+	bool is_legacy_phy;
 };
 
 struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance,
-- 
cgit v1.2.3


From 3f9db1a19a8a17f6000973194c6a1d63c5cebf21 Mon Sep 17 00:00:00 2001
From: Venu Byravarasu <vbyravarasu@nvidia.com>
Date: Wed, 16 Jan 2013 03:30:21 +0000
Subject: USB: PHY: tegra: Get rid of instance number to differentiate PHY type

Tegra20 USB has 3 PHY instances:
Instance 1 and 3 are UTMI. Instance 2 is ULPI.

As instance number was used to differentiate ULPI from UTMI,
used DT param to get this info and processed accordingly.

Signed-off-by: Venu Byravarasu <vbyravarasu@nvidia.com>
Acked-by: Felipe Balbi <balbi@ti.com>
[swarren: moved assignment of phy->is_ulpi_phy into this patch out
of next patch.]
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 drivers/usb/phy/tegra_usb_phy.c   | 28 ++++++++++++++--------------
 include/linux/usb/tegra_usb_phy.h |  1 +
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/tegra_usb_phy.c b/drivers/usb/phy/tegra_usb_phy.c
index 79280fe90893..48fa5258ce0b 100644
--- a/drivers/usb/phy/tegra_usb_phy.c
+++ b/drivers/usb/phy/tegra_usb_phy.c
@@ -209,11 +209,6 @@ static struct tegra_utmip_config utmip_default[] = {
 	},
 };
 
-static inline bool phy_is_ulpi(struct tegra_usb_phy *phy)
-{
-	return (phy->instance == 1);
-}
-
 static int utmip_pad_open(struct tegra_usb_phy *phy)
 {
 	phy->pad_clk = clk_get_sys("utmip-pad", NULL);
@@ -655,7 +650,7 @@ static int	tegra_phy_init(struct usb_phy *x)
 	struct tegra_ulpi_config *ulpi_config;
 	int err;
 
-	if (phy_is_ulpi(phy)) {
+	if (phy->is_ulpi_phy) {
 		ulpi_config = phy->config;
 		phy->clk = clk_get_sys(NULL, ulpi_config->clk);
 		if (IS_ERR(phy->clk)) {
@@ -693,7 +688,7 @@ static void tegra_usb_phy_close(struct usb_phy *x)
 {
 	struct tegra_usb_phy *phy = container_of(x, struct tegra_usb_phy, u_phy);
 
-	if (phy_is_ulpi(phy))
+	if (phy->is_ulpi_phy)
 		clk_put(phy->clk);
 	else
 		utmip_pad_close(phy);
@@ -704,7 +699,7 @@ static void tegra_usb_phy_close(struct usb_phy *x)
 
 static int tegra_usb_phy_power_on(struct tegra_usb_phy *phy)
 {
-	if (phy_is_ulpi(phy))
+	if (phy->is_ulpi_phy)
 		return ulpi_phy_power_on(phy);
 	else
 		return utmi_phy_power_on(phy);
@@ -712,7 +707,7 @@ static int tegra_usb_phy_power_on(struct tegra_usb_phy *phy)
 
 static int tegra_usb_phy_power_off(struct tegra_usb_phy *phy)
 {
-	if (phy_is_ulpi(phy))
+	if (phy->is_ulpi_phy)
 		return ulpi_phy_power_off(phy);
 	else
 		return utmi_phy_power_off(phy);
@@ -747,9 +742,14 @@ struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance,
 	phy->dev = dev;
 	phy->is_legacy_phy =
 		of_property_read_bool(np, "nvidia,has-legacy-mode");
+	err = of_property_match_string(np, "phy_type", "ulpi");
+	if (err < 0)
+		phy->is_ulpi_phy = false;
+	else
+		phy->is_ulpi_phy = true;
 
 	if (!phy->config) {
-		if (phy_is_ulpi(phy)) {
+		if (phy->is_ulpi_phy) {
 			pr_err("%s: ulpi phy configuration missing", __func__);
 			err = -EINVAL;
 			goto err0;
@@ -796,14 +796,14 @@ EXPORT_SYMBOL_GPL(tegra_usb_phy_open);
 
 void tegra_usb_phy_preresume(struct tegra_usb_phy *phy)
 {
-	if (!phy_is_ulpi(phy))
+	if (!phy->is_ulpi_phy)
 		utmi_phy_preresume(phy);
 }
 EXPORT_SYMBOL_GPL(tegra_usb_phy_preresume);
 
 void tegra_usb_phy_postresume(struct tegra_usb_phy *phy)
 {
-	if (!phy_is_ulpi(phy))
+	if (!phy->is_ulpi_phy)
 		utmi_phy_postresume(phy);
 }
 EXPORT_SYMBOL_GPL(tegra_usb_phy_postresume);
@@ -811,14 +811,14 @@ EXPORT_SYMBOL_GPL(tegra_usb_phy_postresume);
 void tegra_ehci_phy_restore_start(struct tegra_usb_phy *phy,
 				 enum tegra_usb_phy_port_speed port_speed)
 {
-	if (!phy_is_ulpi(phy))
+	if (!phy->is_ulpi_phy)
 		utmi_phy_restore_start(phy, port_speed);
 }
 EXPORT_SYMBOL_GPL(tegra_ehci_phy_restore_start);
 
 void tegra_ehci_phy_restore_end(struct tegra_usb_phy *phy)
 {
-	if (!phy_is_ulpi(phy))
+	if (!phy->is_ulpi_phy)
 		utmi_phy_restore_end(phy);
 }
 EXPORT_SYMBOL_GPL(tegra_ehci_phy_restore_end);
diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h
index f03e157abaa8..a6a89d4866f4 100644
--- a/include/linux/usb/tegra_usb_phy.h
+++ b/include/linux/usb/tegra_usb_phy.h
@@ -60,6 +60,7 @@ struct tegra_usb_phy {
 	struct usb_phy u_phy;
 	struct device *dev;
 	bool is_legacy_phy;
+	bool is_ulpi_phy;
 };
 
 struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance,
-- 
cgit v1.2.3


From bbdabdb62d86090511410728644a19291bf300cf Mon Sep 17 00:00:00 2001
From: Venu Byravarasu <vbyravarasu@nvidia.com>
Date: Thu, 17 Jan 2013 20:15:37 +0000
Subject: usb: add APIs to access host registers from Tegra PHY

As Tegra PHY driver needs to access one of the host registers,
added few APIs.

Signed-off-by: Venu Byravarasu <vbyravarasu@nvidia.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
[swarren: moved assignment of phy->is_ulpi_phy to previous patch.]
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 drivers/usb/host/ehci-tegra.c     | 51 +++++++++++++++++++++++++++++++++++++--
 drivers/usb/phy/tegra_usb_phy.c   | 47 +++++-------------------------------
 include/linux/usb/tegra_usb_phy.h |  4 +++
 3 files changed, 59 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 55a9cdee2949..1f596fb7cf71 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -2,7 +2,7 @@
  * EHCI-compliant USB host controller driver for NVIDIA Tegra SoCs
  *
  * Copyright (C) 2010 Google, Inc.
- * Copyright (C) 2009 NVIDIA Corporation
+ * Copyright (C) 2009 - 2013 NVIDIA Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -26,13 +26,18 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
-
+#include <linux/usb/ehci_def.h>
 #include <linux/usb/tegra_usb_phy.h>
 
 #define TEGRA_USB_BASE			0xC5000000
 #define TEGRA_USB2_BASE			0xC5004000
 #define TEGRA_USB3_BASE			0xC5008000
 
+/* PORTSC registers */
+#define TEGRA_USB_PORTSC1			0x184
+#define TEGRA_USB_PORTSC1_PTS(x)	(((x) & 0x3) << 30)
+#define TEGRA_USB_PORTSC1_PHCD	(1 << 23)
+
 #define TEGRA_USB_DMA_ALIGN 32
 
 struct tegra_ehci_hcd {
@@ -605,6 +610,37 @@ static const struct dev_pm_ops tegra_ehci_pm_ops = {
 
 #endif
 
+/* Bits of PORTSC1, which will get cleared by writing 1 into them */
+#define TEGRA_PORTSC1_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
+
+void tegra_ehci_set_pts(struct usb_phy *x, u8 pts_val)
+{
+	unsigned long val;
+	struct usb_hcd *hcd = bus_to_hcd(x->otg->host);
+	void __iomem *base = hcd->regs;
+
+	val = readl(base + TEGRA_USB_PORTSC1) & ~TEGRA_PORTSC1_RWC_BITS;
+	val &= ~TEGRA_USB_PORTSC1_PTS(3);
+	val |= TEGRA_USB_PORTSC1_PTS(pts_val & 3);
+	writel(val, base + TEGRA_USB_PORTSC1);
+}
+EXPORT_SYMBOL_GPL(tegra_ehci_set_pts);
+
+void tegra_ehci_set_phcd(struct usb_phy *x, bool enable)
+{
+	unsigned long val;
+	struct usb_hcd *hcd = bus_to_hcd(x->otg->host);
+	void __iomem *base = hcd->regs;
+
+	val = readl(base + TEGRA_USB_PORTSC1) & ~TEGRA_PORTSC1_RWC_BITS;
+	if (enable)
+		val |= TEGRA_USB_PORTSC1_PHCD;
+	else
+		val &= ~TEGRA_USB_PORTSC1_PHCD;
+	writel(val, base + TEGRA_USB_PORTSC1);
+}
+EXPORT_SYMBOL_GPL(tegra_ehci_set_phcd);
+
 static u64 tegra_ehci_dma_mask = DMA_BIT_MASK(32);
 
 static int tegra_ehci_probe(struct platform_device *pdev)
@@ -616,6 +652,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 	int err = 0;
 	int irq;
 	int instance = pdev->id;
+	struct usb_phy *u_phy;
 
 	pdata = pdev->dev.platform_data;
 	if (!pdata) {
@@ -718,6 +755,16 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 
 	usb_phy_init(&tegra->phy->u_phy);
 
+	hcd->phy = u_phy = &tegra->phy->u_phy;
+	u_phy->otg = devm_kzalloc(&pdev->dev, sizeof(struct usb_otg),
+			     GFP_KERNEL);
+	if (!u_phy->otg) {
+		dev_err(&pdev->dev, "Failed to alloc memory for otg\n");
+		err = -ENOMEM;
+		goto fail_io;
+	}
+	u_phy->otg->host = hcd_to_bus(hcd);
+
 	err = usb_phy_set_suspend(&tegra->phy->u_phy, 0);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to power on the phy\n");
diff --git a/drivers/usb/phy/tegra_usb_phy.c b/drivers/usb/phy/tegra_usb_phy.c
index 48fa5258ce0b..d4657045b8b1 100644
--- a/drivers/usb/phy/tegra_usb_phy.c
+++ b/drivers/usb/phy/tegra_usb_phy.c
@@ -36,19 +36,6 @@
 
 #define ULPI_VIEWPORT		0x170
 
-#define USB_PORTSC1		0x184
-#define   USB_PORTSC1_PTS(x)	(((x) & 0x3) << 30)
-#define   USB_PORTSC1_PSPD(x)	(((x) & 0x3) << 26)
-#define   USB_PORTSC1_PHCD	(1 << 23)
-#define   USB_PORTSC1_WKOC	(1 << 22)
-#define   USB_PORTSC1_WKDS	(1 << 21)
-#define   USB_PORTSC1_WKCN	(1 << 20)
-#define   USB_PORTSC1_PTC(x)	(((x) & 0xf) << 16)
-#define   USB_PORTSC1_PP	(1 << 12)
-#define   USB_PORTSC1_SUSP	(1 << 7)
-#define   USB_PORTSC1_PE	(1 << 2)
-#define   USB_PORTSC1_CCS	(1 << 0)
-
 #define USB_SUSP_CTRL		0x400
 #define   USB_WAKE_ON_CNNT_EN_DEV	(1 << 3)
 #define   USB_WAKE_ON_DISCON_EN_DEV	(1 << 4)
@@ -311,11 +298,8 @@ static void utmi_phy_clk_disable(struct tegra_usb_phy *phy)
 		val = readl(base + USB_SUSP_CTRL);
 		val &= ~USB_SUSP_SET;
 		writel(val, base + USB_SUSP_CTRL);
-	} else {
-		val = readl(base + USB_PORTSC1);
-		val |= USB_PORTSC1_PHCD;
-		writel(val, base + USB_PORTSC1);
-	}
+	} else
+		tegra_ehci_set_phcd(&phy->u_phy, true);
 
 	if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID, 0) < 0)
 		pr_err("%s: timeout waiting for phy to stabilize\n", __func__);
@@ -336,11 +320,8 @@ static void utmi_phy_clk_enable(struct tegra_usb_phy *phy)
 		val = readl(base + USB_SUSP_CTRL);
 		val &= ~USB_SUSP_CLR;
 		writel(val, base + USB_SUSP_CTRL);
-	} else {
-		val = readl(base + USB_PORTSC1);
-		val &= ~USB_PORTSC1_PHCD;
-		writel(val, base + USB_PORTSC1);
-	}
+	} else
+		tegra_ehci_set_phcd(&phy->u_phy, false);
 
 	if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID,
 						     USB_PHY_CLK_VALID))
@@ -462,11 +443,8 @@ static int utmi_phy_power_on(struct tegra_usb_phy *phy)
 
 	utmi_phy_clk_enable(phy);
 
-	if (!phy->is_legacy_phy) {
-		val = readl(base + USB_PORTSC1);
-		val &= ~USB_PORTSC1_PTS(~0);
-		writel(val, base + USB_PORTSC1);
-	}
+	if (!phy->is_legacy_phy)
+		tegra_ehci_set_pts(&phy->u_phy, 0);
 
 	return 0;
 }
@@ -611,10 +589,6 @@ static int ulpi_phy_power_on(struct tegra_usb_phy *phy)
 		return ret;
 	}
 
-	val = readl(base + USB_PORTSC1);
-	val |= USB_PORTSC1_WKOC | USB_PORTSC1_WKDS | USB_PORTSC1_WKCN;
-	writel(val, base + USB_PORTSC1);
-
 	val = readl(base + USB_SUSP_CTRL);
 	val |= USB_SUSP_CLR;
 	writel(val, base + USB_SUSP_CTRL);
@@ -629,17 +603,8 @@ static int ulpi_phy_power_on(struct tegra_usb_phy *phy)
 
 static int ulpi_phy_power_off(struct tegra_usb_phy *phy)
 {
-	unsigned long val;
-	void __iomem *base = phy->regs;
 	struct tegra_ulpi_config *config = phy->config;
 
-	/* Clear WKCN/WKDS/WKOC wake-on events that can cause the USB
-	 * Controller to immediately bring the ULPI PHY out of low power
-	 */
-	val = readl(base + USB_PORTSC1);
-	val &= ~(USB_PORTSC1_WKOC | USB_PORTSC1_WKDS | USB_PORTSC1_WKCN);
-	writel(val, base + USB_PORTSC1);
-
 	clk_disable(phy->clk);
 	return gpio_direction_output(config->reset_gpio, 0);
 }
diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h
index a6a89d4866f4..9259d4659582 100644
--- a/include/linux/usb/tegra_usb_phy.h
+++ b/include/linux/usb/tegra_usb_phy.h
@@ -75,4 +75,8 @@ void tegra_ehci_phy_restore_start(struct tegra_usb_phy *phy,
 
 void tegra_ehci_phy_restore_end(struct tegra_usb_phy *phy);
 
+void tegra_ehci_set_pts(struct usb_phy *x, u8 pts_val);
+
+void tegra_ehci_set_phcd(struct usb_phy *x, bool enable);
+
 #endif /* __TEGRA_USB_PHY_H */
-- 
cgit v1.2.3


From ab137d04db5a4b32250ce5ef1b288ce6cf06adf6 Mon Sep 17 00:00:00 2001
From: Venu Byravarasu <vbyravarasu@nvidia.com>
Date: Thu, 24 Jan 2013 15:57:03 +0530
Subject: usb: host: tegra: make use of PHY pointer of HCD

As pointer to PHY structure can be stored in struct usb_hcd
making use of it, to call Tegra PHY APIs.

Call to usb_phy_shutdown() is moved up in tegra_ehci_remove(),
so that to avoid dereferencing of hcd after its freed up.

Signed-off-by: Venu Byravarasu <vbyravarasu@nvidia.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Felipe Balbi <balbi@ti.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 drivers/usb/host/ehci-tegra.c     | 27 +++++++++++++--------------
 drivers/usb/phy/tegra_usb_phy.c   | 16 ++++++++++++----
 include/linux/usb/tegra_usb_phy.h |  8 ++++----
 3 files changed, 29 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index b02622a936c2..568aecc7075b 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -56,7 +56,7 @@ static void tegra_ehci_power_up(struct usb_hcd *hcd)
 	struct tegra_ehci_hcd *tegra = dev_get_drvdata(hcd->self.controller);
 
 	clk_prepare_enable(tegra->clk);
-	usb_phy_set_suspend(&tegra->phy->u_phy, 0);
+	usb_phy_set_suspend(hcd->phy, 0);
 	tegra->host_resumed = 1;
 }
 
@@ -65,7 +65,7 @@ static void tegra_ehci_power_down(struct usb_hcd *hcd)
 	struct tegra_ehci_hcd *tegra = dev_get_drvdata(hcd->self.controller);
 
 	tegra->host_resumed = 0;
-	usb_phy_set_suspend(&tegra->phy->u_phy, 1);
+	usb_phy_set_suspend(hcd->phy, 1);
 	clk_disable_unprepare(tegra->clk);
 }
 
@@ -159,7 +159,7 @@ static int tegra_ehci_hub_control(
 		if (tegra->port_resuming && !(temp & PORT_SUSPEND)) {
 			/* Resume completed, re-enable disconnect detection */
 			tegra->port_resuming = 0;
-			tegra_usb_phy_postresume(tegra->phy);
+			tegra_usb_phy_postresume(hcd->phy);
 		}
 	}
 
@@ -212,7 +212,7 @@ static int tegra_ehci_hub_control(
 			goto done;
 
 		/* Disable disconnect detection during port resume */
-		tegra_usb_phy_preresume(tegra->phy);
+		tegra_usb_phy_preresume(hcd->phy);
 
 		ehci->reset_done[wIndex-1] = jiffies + msecs_to_jiffies(25);
 
@@ -476,7 +476,7 @@ static int controller_resume(struct device *dev)
 	}
 
 	/* Force the phy to keep data lines in suspend state */
-	tegra_ehci_phy_restore_start(tegra->phy, tegra->port_speed);
+	tegra_ehci_phy_restore_start(hcd->phy, tegra->port_speed);
 
 	/* Enable host mode */
 	tdi_reset(ehci);
@@ -543,17 +543,17 @@ static int controller_resume(struct device *dev)
 		}
 	}
 
-	tegra_ehci_phy_restore_end(tegra->phy);
+	tegra_ehci_phy_restore_end(hcd->phy);
 	goto done;
 
  restart:
 	if (tegra->port_speed <= TEGRA_USB_PHY_PORT_SPEED_HIGH)
-		tegra_ehci_phy_restore_end(tegra->phy);
+		tegra_ehci_phy_restore_end(hcd->phy);
 
 	tegra_ehci_restart(hcd);
 
  done:
-	tegra_usb_phy_preresume(tegra->phy);
+	tegra_usb_phy_preresume(hcd->phy);
 	tegra->port_resuming = 1;
 	return 0;
 }
@@ -740,9 +740,9 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 		goto fail_io;
 	}
 
-	usb_phy_init(&tegra->phy->u_phy);
-
 	hcd->phy = u_phy = &tegra->phy->u_phy;
+	usb_phy_init(hcd->phy);
+
 	u_phy->otg = devm_kzalloc(&pdev->dev, sizeof(struct usb_otg),
 			     GFP_KERNEL);
 	if (!u_phy->otg) {
@@ -752,7 +752,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 	}
 	u_phy->otg->host = hcd_to_bus(hcd);
 
-	err = usb_phy_set_suspend(&tegra->phy->u_phy, 0);
+	err = usb_phy_set_suspend(hcd->phy, 0);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to power on the phy\n");
 		goto fail;
@@ -798,7 +798,7 @@ fail:
 	if (!IS_ERR_OR_NULL(tegra->transceiver))
 		otg_set_host(tegra->transceiver->otg, NULL);
 #endif
-	usb_phy_shutdown(&tegra->phy->u_phy);
+	usb_phy_shutdown(hcd->phy);
 fail_io:
 	clk_disable_unprepare(tegra->clk);
 fail_clk:
@@ -820,11 +820,10 @@ static int tegra_ehci_remove(struct platform_device *pdev)
 		otg_set_host(tegra->transceiver->otg, NULL);
 #endif
 
+	usb_phy_shutdown(hcd->phy);
 	usb_remove_hcd(hcd);
 	usb_put_hcd(hcd);
 
-	usb_phy_shutdown(&tegra->phy->u_phy);
-
 	clk_disable_unprepare(tegra->clk);
 
 	return 0;
diff --git a/drivers/usb/phy/tegra_usb_phy.c b/drivers/usb/phy/tegra_usb_phy.c
index d4657045b8b1..5487d38481af 100644
--- a/drivers/usb/phy/tegra_usb_phy.c
+++ b/drivers/usb/phy/tegra_usb_phy.c
@@ -759,30 +759,38 @@ err0:
 }
 EXPORT_SYMBOL_GPL(tegra_usb_phy_open);
 
-void tegra_usb_phy_preresume(struct tegra_usb_phy *phy)
+void tegra_usb_phy_preresume(struct usb_phy *x)
 {
+	struct tegra_usb_phy *phy = container_of(x, struct tegra_usb_phy, u_phy);
+
 	if (!phy->is_ulpi_phy)
 		utmi_phy_preresume(phy);
 }
 EXPORT_SYMBOL_GPL(tegra_usb_phy_preresume);
 
-void tegra_usb_phy_postresume(struct tegra_usb_phy *phy)
+void tegra_usb_phy_postresume(struct usb_phy *x)
 {
+	struct tegra_usb_phy *phy = container_of(x, struct tegra_usb_phy, u_phy);
+
 	if (!phy->is_ulpi_phy)
 		utmi_phy_postresume(phy);
 }
 EXPORT_SYMBOL_GPL(tegra_usb_phy_postresume);
 
-void tegra_ehci_phy_restore_start(struct tegra_usb_phy *phy,
+void tegra_ehci_phy_restore_start(struct usb_phy *x,
 				 enum tegra_usb_phy_port_speed port_speed)
 {
+	struct tegra_usb_phy *phy = container_of(x, struct tegra_usb_phy, u_phy);
+
 	if (!phy->is_ulpi_phy)
 		utmi_phy_restore_start(phy, port_speed);
 }
 EXPORT_SYMBOL_GPL(tegra_ehci_phy_restore_start);
 
-void tegra_ehci_phy_restore_end(struct tegra_usb_phy *phy)
+void tegra_ehci_phy_restore_end(struct usb_phy *x)
 {
+	struct tegra_usb_phy *phy = container_of(x, struct tegra_usb_phy, u_phy);
+
 	if (!phy->is_ulpi_phy)
 		utmi_phy_restore_end(phy);
 }
diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h
index 9259d4659582..9ebebe906925 100644
--- a/include/linux/usb/tegra_usb_phy.h
+++ b/include/linux/usb/tegra_usb_phy.h
@@ -66,14 +66,14 @@ struct tegra_usb_phy {
 struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance,
 	void __iomem *regs, void *config, enum tegra_usb_phy_mode phy_mode);
 
-void tegra_usb_phy_preresume(struct tegra_usb_phy *phy);
+void tegra_usb_phy_preresume(struct usb_phy *phy);
 
-void tegra_usb_phy_postresume(struct tegra_usb_phy *phy);
+void tegra_usb_phy_postresume(struct usb_phy *phy);
 
-void tegra_ehci_phy_restore_start(struct tegra_usb_phy *phy,
+void tegra_ehci_phy_restore_start(struct usb_phy *phy,
 				 enum tegra_usb_phy_port_speed port_speed);
 
-void tegra_ehci_phy_restore_end(struct tegra_usb_phy *phy);
+void tegra_ehci_phy_restore_end(struct usb_phy *phy);
 
 void tegra_ehci_set_pts(struct usb_phy *x, u8 pts_val);
 
-- 
cgit v1.2.3


From 2bf3440d7b8755f2627232e6a4c37efbbe053685 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 28 Jan 2013 08:33:33 +0000
Subject: can: rework skb reserved data handling

Added accessor and skb_reserve helpers for struct can_skb_priv.
Removed pointless skb_headroom() check.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
CC: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/dev.c   |  4 ++--
 drivers/net/can/slcan.c |  4 ++--
 include/linux/can/skb.h | 10 ++++++++++
 net/can/bcm.c           |  8 ++++----
 net/can/gw.c            |  4 +---
 net/can/raw.c           |  4 ++--
 6 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 59ada082a994..f9cba4123c66 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -512,8 +512,8 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
-	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 
 	*cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
 	memset(*cf, 0, sizeof(struct can_frame));
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index e79a8d10e0fc..06b7e097d36e 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -195,8 +195,8 @@ static void slc_bump(struct slcan *sl)
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
-	((struct can_skb_priv *)(skb->head))->ifindex = sl->dev->ifindex;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = sl->dev->ifindex;
 
 	memcpy(skb_put(skb, sizeof(struct can_frame)),
 	       &cf, sizeof(struct can_frame));
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index 4b0f24d3a878..2f0543f7510c 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -32,4 +32,14 @@ struct can_skb_priv {
 	struct can_frame cf[0];
 };
 
+static inline struct can_skb_priv *can_skb_prv(struct sk_buff *skb)
+{
+	return (struct can_skb_priv *)(skb->head);
+}
+
+static inline void can_skb_reserve(struct sk_buff *skb)
+{
+	skb_reserve(skb, sizeof(struct can_skb_priv));
+}
+
 #endif /* CAN_SKB_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ccc27b9e8384..28e12d18f0f1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -261,8 +261,8 @@ static void bcm_can_tx(struct bcm_op *op)
 	if (!skb)
 		goto out;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
-	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 
 	memcpy(skb_put(skb, CFSIZ), cf, CFSIZ);
 
@@ -1207,7 +1207,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 	if (!skb)
 		return -ENOMEM;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
+	can_skb_reserve(skb);
 
 	err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
 	if (err < 0) {
@@ -1221,7 +1221,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 		return -ENODEV;
 	}
 
-	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 	skb->dev = dev;
 	skb->sk  = sk;
 	err = can_send(skb, 1); /* send with loopback */
diff --git a/net/can/gw.c b/net/can/gw.c
index acdd4656cc3b..c185fcd5e828 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -381,9 +381,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 
 	/* is sending the skb back to the incoming interface not allowed? */
 	if (!(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK) &&
-	    skb_headroom(skb) == sizeof(struct can_skb_priv) &&
-	    (((struct can_skb_priv *)(skb->head))->ifindex ==
-	     gwj->dst.dev->ifindex))
+	    can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex)
 		return;
 
 	/*
diff --git a/net/can/raw.c b/net/can/raw.c
index 5d860e8dcc52..c1764e41ddaf 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -705,8 +705,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		goto put_dev;
 
-	skb_reserve(skb, sizeof(struct can_skb_priv));
-	((struct can_skb_priv *)(skb->head))->ifindex = dev->ifindex;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = dev->ifindex;
 
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
-- 
cgit v1.2.3


From 5fbee843c32e5de2d8af68ba0bdd113bb0af9d86 Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Tue, 22 Jan 2013 21:29:39 +0000
Subject: netpoll: add RCU annotation to npinfo field

dev->npinfo is protected by RCU.

This fixes the following sparse warnings:

net/core/netpoll.c:177:48: error: incompatible types in comparison expression (different address spaces)
net/core/netpoll.c:200:35: error: incompatible types in comparison expression (different address spaces)
net/core/netpoll.c:221:35: error: incompatible types in comparison expression (different address spaces)
net/core/netpoll.c:327:18: error: incompatible types in comparison expression (different address spaces)

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 549f5ad2055d..85b0949d9946 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1272,7 +1272,7 @@ struct net_device {
 	void (*destructor)(struct net_device *dev);
 
 #ifdef CONFIG_NETPOLL
-	struct netpoll_info	*npinfo;
+	struct netpoll_info __rcu	*npinfo;
 #endif
 
 #ifdef CONFIG_NET_NS
-- 
cgit v1.2.3


From 7ab59dc15e2f42a4321ed016bcd6044a4d8de6d1 Mon Sep 17 00:00:00 2001
From: "David J. Choi" <david.choi@micrel.com>
Date: Wed, 23 Jan 2013 14:05:15 +0000
Subject: drivers/net/phy/micrel_phy: Add support for new PHYs

Summary of changes:
.Newly added phys
	-KSZ8081/KSZ8091, which has some phy ids.
	-KSZ8061
	-KSZ9031, which is Gigabit phy.
	-KSZ886X, which has a switch function.
	-KSZ8031, which has a same phy ids with KSZ8021.

Signed-off-by: David J. Choi <david.choi@micrel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 64 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/micrel_phy.h |  9 ++++++-
 2 files changed, 68 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index b983596abcbb..29934446436a 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -5,15 +5,20 @@
  *
  * Author: David J. Choi
  *
- * Copyright (c) 2010 Micrel, Inc.
+ * Copyright (c) 2010-2013 Micrel, Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
  *
- * Support : ksz9021 1000/100/10 phy from Micrel
- *		ks8001, ks8737, ks8721, ks8041, ks8051 100/10 phy
+ * Support : Micrel Phys:
+ *		Giga phys: ksz9021, ksz9031
+ *		100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041
+ *			   ksz8021, ksz8031, ksz8051,
+ *			   ksz8081, ksz8091,
+ *			   ksz8061,
+ *		Switch : ksz8873, ksz886x
  */
 
 #include <linux/kernel.h>
@@ -176,7 +181,7 @@ static struct phy_driver ksphy_driver[] = {
 }, {
 	.phy_id		= PHY_ID_KSZ8021,
 	.phy_id_mask	= 0x00ffffff,
-	.name		= "Micrel KSZ8021",
+	.name		= "Micrel KSZ8021 or KSZ8031",
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
@@ -224,6 +229,30 @@ static struct phy_driver ksphy_driver[] = {
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
 	.driver		= { .owner = THIS_MODULE,},
+}, {
+	.phy_id		= PHY_ID_KSZ8081,
+	.name		= "Micrel KSZ8081 or KSZ8091",
+	.phy_id_mask	= 0x00fffff0,
+	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause),
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= kszphy_config_intr,
+	.driver		= { .owner = THIS_MODULE,},
+}, {
+	.phy_id		= PHY_ID_KSZ8061,
+	.name		= "Micrel KSZ8061",
+	.phy_id_mask	= 0x00fffff0,
+	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause),
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= kszphy_config_intr,
+	.driver		= { .owner = THIS_MODULE,},
 }, {
 	.phy_id		= PHY_ID_KSZ9021,
 	.phy_id_mask	= 0x000ffffe,
@@ -237,6 +266,19 @@ static struct phy_driver ksphy_driver[] = {
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= ksz9021_config_intr,
 	.driver		= { .owner = THIS_MODULE, },
+}, {
+	.phy_id		= PHY_ID_KSZ9031,
+	.phy_id_mask	= 0x00fffff0,
+	.name		= "Micrel KSZ9031 Gigabit PHY",
+	.features	= (PHY_GBIT_FEATURES | SUPPORTED_Pause
+				| SUPPORTED_Asym_Pause),
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= ksz9021_config_intr,
+	.driver		= { .owner = THIS_MODULE, },
 }, {
 	.phy_id		= PHY_ID_KSZ8873MLL,
 	.phy_id_mask	= 0x00fffff0,
@@ -247,6 +289,16 @@ static struct phy_driver ksphy_driver[] = {
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
 	.driver		= { .owner = THIS_MODULE, },
+}, {
+	.phy_id		= PHY_ID_KSZ886X,
+	.phy_id_mask	= 0x00fffff0,
+	.name		= "Micrel KSZ886X Switch",
+	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause),
+	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.config_init	= kszphy_config_init,
+	.config_aneg	= genphy_config_aneg,
+	.read_status	= genphy_read_status,
+	.driver		= { .owner = THIS_MODULE, },
 } };
 
 static int __init ksphy_init(void)
@@ -270,12 +322,16 @@ MODULE_LICENSE("GPL");
 
 static struct mdio_device_id __maybe_unused micrel_tbl[] = {
 	{ PHY_ID_KSZ9021, 0x000ffffe },
+	{ PHY_ID_KSZ9031, 0x00fffff0 },
 	{ PHY_ID_KSZ8001, 0x00ffffff },
 	{ PHY_ID_KS8737, 0x00fffff0 },
 	{ PHY_ID_KSZ8021, 0x00ffffff },
 	{ PHY_ID_KSZ8041, 0x00fffff0 },
 	{ PHY_ID_KSZ8051, 0x00fffff0 },
+	{ PHY_ID_KSZ8061, 0x00fffff0 },
+	{ PHY_ID_KSZ8081, 0x00fffff0 },
 	{ PHY_ID_KSZ8873MLL, 0x00fffff0 },
+	{ PHY_ID_KSZ886X, 0x00fffff0 },
 	{ }
 };
 
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index adfe8c058f29..9dbb41a4e250 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -21,8 +21,15 @@
 #define PHY_ID_KSZ8021		0x00221555
 #define PHY_ID_KSZ8041		0x00221510
 #define PHY_ID_KSZ8051		0x00221550
-/* both for ks8001 Rev. A/B, and for ks8721 Rev 3. */
+/* same id: ks8001 Rev. A/B, and ks8721 Rev 3. */
 #define PHY_ID_KSZ8001		0x0022161A
+/* same id: KS8081, KS8091 */
+#define PHY_ID_KSZ8081		0x00221560
+#define PHY_ID_KSZ8061		0x00221570
+#define PHY_ID_KSZ9031		0x00221620
+
+#define PHY_ID_KSZ886X		0x00221430
+#define PHY_ID_KSZ8863		0x00221435
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
-- 
cgit v1.2.3


From d2a5884a64161b524cc6749ee11b95d252e497f3 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Sun, 27 Jan 2013 10:49:05 -0800
Subject: regmap: Add "no-bus" option for regmap API

This commit adds provision for "no-bus" usage of the regmap API. In
this configuration user can provide API with two callbacks 'reg_read'
and 'reg_write' which are to be called when reads and writes to one of
device's registers is performed. This is useful for devices that
expose registers but whose register access sequence does not fit the 'bus'
abstraction.

Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/internal.h |  2 ++
 drivers/base/regmap/regmap.c   | 58 +++++++++++++++++++++++++++++++++---------
 include/linux/regmap.h         | 18 ++++++++++++-
 3 files changed, 65 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 51f057405647..b55fde5d216a 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -77,6 +77,8 @@ struct regmap {
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
 
+	bool defer_caching;
+
 	u8 read_flag_mask;
 	u8 write_flag_mask;
 
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 6845a077bd84..9592030a0dc3 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -379,7 +379,7 @@ struct regmap *regmap_init(struct device *dev,
 	enum regmap_endian reg_endian, val_endian;
 	int i, j;
 
-	if (!bus || !config)
+	if (!config)
 		goto err;
 
 	map = kzalloc(sizeof(*map), GFP_KERNEL);
@@ -393,7 +393,8 @@ struct regmap *regmap_init(struct device *dev,
 		map->unlock = config->unlock;
 		map->lock_arg = config->lock_arg;
 	} else {
-		if (bus->fast_io) {
+		if ((bus && bus->fast_io) ||
+		    config->fast_io) {
 			spin_lock_init(&map->spinlock);
 			map->lock = regmap_lock_spinlock;
 			map->unlock = regmap_unlock_spinlock;
@@ -433,11 +434,19 @@ struct regmap *regmap_init(struct device *dev,
 	if (config->read_flag_mask || config->write_flag_mask) {
 		map->read_flag_mask = config->read_flag_mask;
 		map->write_flag_mask = config->write_flag_mask;
-	} else {
+	} else if (bus) {
 		map->read_flag_mask = bus->read_flag_mask;
 	}
 
-	map->reg_read = _regmap_bus_read;
+	if (!bus) {
+		map->reg_read  = config->reg_read;
+		map->reg_write = config->reg_write;
+
+		map->defer_caching = false;
+		goto skip_format_initialization;
+	} else {
+		map->reg_read  = _regmap_bus_read;
+	}
 
 	reg_endian = config->reg_format_endian;
 	if (reg_endian == REGMAP_ENDIAN_DEFAULT)
@@ -584,10 +593,15 @@ struct regmap *regmap_init(struct device *dev,
 		goto err_map;
 	}
 
-	if (map->format.format_write)
+	if (map->format.format_write) {
+		map->defer_caching = false;
 		map->reg_write = _regmap_bus_formatted_write;
-	else if (map->format.format_val)
+	} else if (map->format.format_val) {
+		map->defer_caching = true;
 		map->reg_write = _regmap_bus_raw_write;
+	}
+
+skip_format_initialization:
 
 	map->range_tree = RB_ROOT;
 	for (i = 0; i < config->num_ranges; i++) {
@@ -790,7 +804,7 @@ void regmap_exit(struct regmap *map)
 	regcache_exit(map);
 	regmap_debugfs_exit(map);
 	regmap_range_exit(map);
-	if (map->bus->free_context)
+	if (map->bus && map->bus->free_context)
 		map->bus->free_context(map->bus_context);
 	kfree(map->work_buf);
 	kfree(map);
@@ -893,6 +907,8 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg,
 	size_t len;
 	int i;
 
+	BUG_ON(!map->bus);
+
 	/* Check for unwritable registers before we start */
 	if (map->writeable_reg)
 		for (i = 0; i < val_len / map->format.val_bytes; i++)
@@ -1002,7 +1018,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 	struct regmap_range_node *range;
 	struct regmap *map = context;
 
-	BUG_ON(!map->format.format_write);
+	BUG_ON(!map->bus || !map->format.format_write);
 
 	range = _regmap_range_lookup(map, reg);
 	if (range) {
@@ -1028,7 +1044,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 {
 	struct regmap *map = context;
 
-	BUG_ON(!map->format.format_val);
+	BUG_ON(!map->bus || !map->format.format_val);
 
 	map->format.format_val(map->work_buf + map->format.reg_bytes
 			       + map->format.pad_bytes, val, 0);
@@ -1039,12 +1055,18 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				 map->format.val_bytes);
 }
 
+static inline void *_regmap_map_get_context(struct regmap *map)
+{
+	return (map->bus) ? map : map->bus_context;
+}
+
 int _regmap_write(struct regmap *map, unsigned int reg,
 		  unsigned int val)
 {
 	int ret;
+	void *context = _regmap_map_get_context(map);
 
-	if (!map->cache_bypass && map->format.format_write) {
+	if (!map->cache_bypass && !map->defer_caching) {
 		ret = regcache_write(map, reg, val);
 		if (ret != 0)
 			return ret;
@@ -1061,7 +1083,7 @@ int _regmap_write(struct regmap *map, unsigned int reg,
 
 	trace_regmap_reg_write(map->dev, reg, val);
 
-	return map->reg_write(map, reg, val);
+	return map->reg_write(context, reg, val);
 }
 
 /**
@@ -1112,6 +1134,8 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 {
 	int ret;
 
+	if (!map->bus)
+		return -EINVAL;
 	if (val_len % map->format.val_bytes)
 		return -EINVAL;
 	if (reg % map->reg_stride)
@@ -1148,6 +1172,8 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 	size_t val_bytes = map->format.val_bytes;
 	void *wval;
 
+	if (!map->bus)
+		return -EINVAL;
 	if (!map->format.parse_val)
 		return -EINVAL;
 	if (reg % map->reg_stride)
@@ -1201,6 +1227,8 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	u8 *u8 = map->work_buf;
 	int ret;
 
+	BUG_ON(!map->bus);
+
 	range = _regmap_range_lookup(map, reg);
 	if (range) {
 		ret = _regmap_select_page(map, &reg, range,
@@ -1252,6 +1280,8 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
 			unsigned int *val)
 {
 	int ret;
+	void *context = _regmap_map_get_context(map);
+
 	BUG_ON(!map->reg_read);
 
 	if (!map->cache_bypass) {
@@ -1263,7 +1293,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
 	if (map->cache_only)
 		return -EBUSY;
 
-	ret = map->reg_read(map, reg, val);
+	ret = map->reg_read(context, reg, val);
 	if (ret == 0) {
 #ifdef LOG_DEVICE
 		if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0)
@@ -1325,6 +1355,8 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	unsigned int v;
 	int ret, i;
 
+	if (!map->bus)
+		return -EINVAL;
 	if (val_len % map->format.val_bytes)
 		return -EINVAL;
 	if (reg % map->reg_stride)
@@ -1376,6 +1408,8 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 	size_t val_bytes = map->format.val_bytes;
 	bool vol = regmap_volatile_range(map, reg, val_count);
 
+	if (!map->bus)
+		return -EINVAL;
 	if (!map->format.parse_val)
 		return -EINVAL;
 	if (reg % map->reg_stride)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..28dde941c783 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -127,7 +127,18 @@ typedef void (*regmap_unlock)(void *);
  * @lock_arg:	  this field is passed as the only argument of lock/unlock
  *		  functions (ignored in case regular lock/unlock functions
  *		  are not overridden).
- *
+ * @reg_read:	  Optional callback that if filled will be used to perform
+ *           	  all the reads from the registers. Should only be provided for
+ *		  devices whos read operation cannot be represented as a simple read
+ *		  operation on a bus such as SPI, I2C, etc. Most of the devices do
+ * 		  not need this.
+ * @reg_write:	  Same as above for writing.
+ * @fast_io:	  Register IO is fast. Use a spinlock instead of a mutex
+ *	     	  to perform locking. This field is ignored if custom lock/unlock
+ *	     	  functions are used (see fields lock/unlock of struct regmap_config).
+ *		  This field is a duplicate of a similar file in
+ *		  'struct regmap_bus' and serves exact same purpose.
+ *		   Use it only for "no-bus" cases.
  * @max_register: Optional, specifies the maximum valid register index.
  * @wr_table:     Optional, points to a struct regmap_access_table specifying
  *                valid ranges for write access.
@@ -177,6 +188,11 @@ struct regmap_config {
 	regmap_unlock unlock;
 	void *lock_arg;
 
+	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
+	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
+
+	bool fast_io;
+
 	unsigned int max_register;
 	const struct regmap_access_table *wr_table;
 	const struct regmap_access_table *rd_table;
-- 
cgit v1.2.3


From 0d509f2b112b21411712f0bf789b372987967e49 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sun, 27 Jan 2013 22:07:38 +0800
Subject: regmap: Add asynchronous I/O support

Some use cases like firmware download can transfer a lot of data in quick
succession. With high speed buses these use cases can benefit from having
multiple transfers scheduled at once since this allows the bus to minimise
the delay between transfers.

Support this by adding regmap_raw_write_async(), allowing raw transfers to
be scheduled, and regmap_async_complete() to wait for them to finish.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/internal.h |  15 ++++
 drivers/base/regmap/regmap.c   | 182 ++++++++++++++++++++++++++++++++++++++---
 include/linux/regmap.h         |  28 +++++++
 3 files changed, 215 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 51f057405647..202518641779 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -16,6 +16,7 @@
 #include <linux/regmap.h>
 #include <linux/fs.h>
 #include <linux/list.h>
+#include <linux/wait.h>
 
 struct regmap;
 struct regcache_ops;
@@ -39,6 +40,13 @@ struct regmap_format {
 	unsigned int (*parse_val)(void *buf);
 };
 
+struct regmap_async {
+	struct list_head list;
+	struct work_struct cleanup;
+	struct regmap *map;
+	void *work_buf;
+};
+
 struct regmap {
 	struct mutex mutex;
 	spinlock_t spinlock;
@@ -53,6 +61,11 @@ struct regmap {
 	void *bus_context;
 	const char *name;
 
+	spinlock_t async_lock;
+	wait_queue_head_t async_waitq;
+	struct list_head async_list;
+	int async_ret;
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs;
 	const char *debugfs_name;
@@ -178,6 +191,8 @@ bool regcache_set_val(void *base, unsigned int idx,
 		      unsigned int val, unsigned int word_size);
 int regcache_lookup_reg(struct regmap *map, unsigned int reg);
 
+void regmap_async_complete_cb(struct regmap_async *async, int ret);
+
 extern struct regcache_ops regcache_rbtree_ops;
 extern struct regcache_ops regcache_lzo_ops;
 
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 6845a077bd84..e57b7bc035fc 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -41,6 +41,15 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				 unsigned int val);
 
+static void async_cleanup(struct work_struct *work)
+{
+	struct regmap_async *async = container_of(work, struct regmap_async,
+						  cleanup);
+
+	kfree(async->work_buf);
+	kfree(async);
+}
+
 bool regmap_reg_in_ranges(unsigned int reg,
 			  const struct regmap_range *ranges,
 			  unsigned int nranges)
@@ -430,6 +439,10 @@ struct regmap *regmap_init(struct device *dev,
 	map->cache_type = config->cache_type;
 	map->name = config->name;
 
+	spin_lock_init(&map->async_lock);
+	INIT_LIST_HEAD(&map->async_list);
+	init_waitqueue_head(&map->async_waitq);
+
 	if (config->read_flag_mask || config->write_flag_mask) {
 		map->read_flag_mask = config->read_flag_mask;
 		map->write_flag_mask = config->write_flag_mask;
@@ -884,10 +897,13 @@ static int _regmap_select_page(struct regmap *map, unsigned int *reg,
 }
 
 static int _regmap_raw_write(struct regmap *map, unsigned int reg,
-			     const void *val, size_t val_len)
+			     const void *val, size_t val_len, bool async)
 {
 	struct regmap_range_node *range;
+	unsigned long flags;
 	u8 *u8 = map->work_buf;
+	void *work_val = map->work_buf + map->format.reg_bytes +
+		map->format.pad_bytes;
 	void *buf;
 	int ret = -ENOTSUPP;
 	size_t len;
@@ -932,7 +948,7 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg,
 			dev_dbg(map->dev, "Writing window %d/%zu\n",
 				win_residue, val_len / map->format.val_bytes);
 			ret = _regmap_raw_write(map, reg, val, win_residue *
-						map->format.val_bytes);
+						map->format.val_bytes, async);
 			if (ret != 0)
 				return ret;
 
@@ -955,6 +971,50 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg,
 
 	u8[0] |= map->write_flag_mask;
 
+	if (async && map->bus->async_write) {
+		struct regmap_async *async = map->bus->async_alloc();
+		if (!async)
+			return -ENOMEM;
+
+		async->work_buf = kzalloc(map->format.buf_size,
+					  GFP_KERNEL | GFP_DMA);
+		if (!async->work_buf) {
+			kfree(async);
+			return -ENOMEM;
+		}
+
+		INIT_WORK(&async->cleanup, async_cleanup);
+		async->map = map;
+
+		/* If the caller supplied the value we can use it safely. */
+		memcpy(async->work_buf, map->work_buf, map->format.pad_bytes +
+		       map->format.reg_bytes + map->format.val_bytes);
+		if (val == work_val)
+			val = async->work_buf + map->format.pad_bytes +
+				map->format.reg_bytes;
+
+		spin_lock_irqsave(&map->async_lock, flags);
+		list_add_tail(&async->list, &map->async_list);
+		spin_unlock_irqrestore(&map->async_lock, flags);
+
+		ret = map->bus->async_write(map->bus_context, async->work_buf,
+					    map->format.reg_bytes +
+					    map->format.pad_bytes,
+					    val, val_len, async);
+
+		if (ret != 0) {
+			dev_err(map->dev, "Failed to schedule write: %d\n",
+				ret);
+
+			spin_lock_irqsave(&map->async_lock, flags);
+			list_del(&async->list);
+			spin_unlock_irqrestore(&map->async_lock, flags);
+
+			kfree(async->work_buf);
+			kfree(async);
+		}
+	}
+
 	trace_regmap_hw_write_start(map->dev, reg,
 				    val_len / map->format.val_bytes);
 
@@ -962,8 +1022,7 @@ static int _regmap_raw_write(struct regmap *map, unsigned int reg,
 	 * send the work_buf directly, otherwise try to do a gather
 	 * write.
 	 */
-	if (val == (map->work_buf + map->format.pad_bytes +
-		    map->format.reg_bytes))
+	if (val == work_val)
 		ret = map->bus->write(map->bus_context, map->work_buf,
 				      map->format.reg_bytes +
 				      map->format.pad_bytes +
@@ -1036,7 +1095,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				 map->work_buf +
 				 map->format.reg_bytes +
 				 map->format.pad_bytes,
-				 map->format.val_bytes);
+				 map->format.val_bytes, false);
 }
 
 int _regmap_write(struct regmap *map, unsigned int reg,
@@ -1119,7 +1178,7 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 
 	map->lock(map->lock_arg);
 
-	ret = _regmap_raw_write(map, reg, val, val_len);
+	ret = _regmap_raw_write(map, reg, val, val_len, false);
 
 	map->unlock(map->lock_arg);
 
@@ -1175,14 +1234,15 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 	if (map->use_single_rw) {
 		for (i = 0; i < val_count; i++) {
 			ret = regmap_raw_write(map,
-						reg + (i * map->reg_stride),
-						val + (i * val_bytes),
-						val_bytes);
+					       reg + (i * map->reg_stride),
+					       val + (i * val_bytes),
+					       val_bytes);
 			if (ret != 0)
 				return ret;
 		}
 	} else {
-		ret = _regmap_raw_write(map, reg, wval, val_bytes * val_count);
+		ret = _regmap_raw_write(map, reg, wval, val_bytes * val_count,
+					false);
 	}
 
 	if (val_bytes != 1)
@@ -1194,6 +1254,48 @@ out:
 }
 EXPORT_SYMBOL_GPL(regmap_bulk_write);
 
+/**
+ * regmap_raw_write_async(): Write raw values to one or more registers
+ *                           asynchronously
+ *
+ * @map: Register map to write to
+ * @reg: Initial register to write to
+ * @val: Block of data to be written, laid out for direct transmission to the
+ *       device.  Must be valid until regmap_async_complete() is called.
+ * @val_len: Length of data pointed to by val.
+ *
+ * This function is intended to be used for things like firmware
+ * download where a large block of data needs to be transferred to the
+ * device.  No formatting will be done on the data provided.
+ *
+ * If supported by the underlying bus the write will be scheduled
+ * asynchronously, helping maximise I/O speed on higher speed buses
+ * like SPI.  regmap_async_complete() can be called to ensure that all
+ * asynchrnous writes have been completed.
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_raw_write_async(struct regmap *map, unsigned int reg,
+			   const void *val, size_t val_len)
+{
+	int ret;
+
+	if (val_len % map->format.val_bytes)
+		return -EINVAL;
+	if (reg % map->reg_stride)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	ret = _regmap_raw_write(map, reg, val, val_len, true);
+
+	map->unlock(map->lock_arg);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_raw_write_async);
+
 static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 			    unsigned int val_len)
 {
@@ -1492,6 +1594,66 @@ int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_update_bits_check);
 
+void regmap_async_complete_cb(struct regmap_async *async, int ret)
+{
+	struct regmap *map = async->map;
+	bool wake;
+
+	spin_lock(&map->async_lock);
+
+	list_del(&async->list);
+	wake = list_empty(&map->async_list);
+
+	if (ret != 0)
+		map->async_ret = ret;
+
+	spin_unlock(&map->async_lock);
+
+	schedule_work(&async->cleanup);
+
+	if (wake)
+		wake_up(&map->async_waitq);
+}
+
+static int regmap_async_is_done(struct regmap *map)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&map->async_lock, flags);
+	ret = list_empty(&map->async_list);
+	spin_unlock_irqrestore(&map->async_lock, flags);
+
+	return ret;
+}
+
+/**
+ * regmap_async_complete: Ensure all asynchronous I/O has completed.
+ *
+ * @map: Map to operate on.
+ *
+ * Blocks until any pending asynchronous I/O has completed.  Returns
+ * an error code for any failed I/O operations.
+ */
+int regmap_async_complete(struct regmap *map)
+{
+	unsigned long flags;
+	int ret;
+
+	/* Nothing to do with no async support */
+	if (!map->bus->async_write)
+		return 0;
+
+	wait_event(map->async_waitq, regmap_async_is_done(map));
+
+	spin_lock_irqsave(&map->async_lock, flags);
+	ret = map->async_ret;
+	map->async_ret = 0;
+	spin_unlock_irqrestore(&map->async_lock, flags);
+
+	return ret;
+}
+
 /**
  * regmap_register_patch: Register and apply register updates to be applied
  *                        on device initialistion
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..f9b7fbe35ab1 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -235,14 +235,21 @@ struct regmap_range_cfg {
 	unsigned int window_len;
 };
 
+struct regmap_async;
+
 typedef int (*regmap_hw_write)(void *context, const void *data,
 			       size_t count);
 typedef int (*regmap_hw_gather_write)(void *context,
 				      const void *reg, size_t reg_len,
 				      const void *val, size_t val_len);
+typedef int (*regmap_hw_async_write)(void *context,
+				     const void *reg, size_t reg_len,
+				     const void *val, size_t val_len,
+				     struct regmap_async *async);
 typedef int (*regmap_hw_read)(void *context,
 			      const void *reg_buf, size_t reg_size,
 			      void *val_buf, size_t val_size);
+typedef struct regmap_async *(*regmap_hw_async_alloc)(void);
 typedef void (*regmap_hw_free_context)(void *context);
 
 /**
@@ -255,8 +262,11 @@ typedef void (*regmap_hw_free_context)(void *context);
  * @write: Write operation.
  * @gather_write: Write operation with split register/value, return -ENOTSUPP
  *                if not implemented  on a given device.
+ * @async_write: Write operation which completes asynchronously, optional and
+ *               must serialise with respect to non-async I/O.
  * @read: Read operation.  Data is returned in the buffer used to transmit
  *         data.
+ * @async_alloc: Allocate a regmap_async() structure.
  * @read_flag_mask: Mask to be set in the top byte of the register when doing
  *                  a read.
  * @reg_format_endian_default: Default endianness for formatted register
@@ -265,13 +275,16 @@ typedef void (*regmap_hw_free_context)(void *context);
  * @val_format_endian_default: Default endianness for formatted register
  *     values. Used when the regmap_config specifies DEFAULT. If this is
  *     DEFAULT, BIG is assumed.
+ * @async_size: Size of struct used for async work.
  */
 struct regmap_bus {
 	bool fast_io;
 	regmap_hw_write write;
 	regmap_hw_gather_write gather_write;
+	regmap_hw_async_write async_write;
 	regmap_hw_read read;
 	regmap_hw_free_context free_context;
+	regmap_hw_async_alloc async_alloc;
 	u8 read_flag_mask;
 	enum regmap_endian reg_format_endian_default;
 	enum regmap_endian val_format_endian_default;
@@ -310,6 +323,8 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 		     const void *val, size_t val_len);
 int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 			size_t val_count);
+int regmap_raw_write_async(struct regmap *map, unsigned int reg,
+			   const void *val, size_t val_len);
 int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val);
 int regmap_raw_read(struct regmap *map, unsigned int reg,
 		    void *val, size_t val_len);
@@ -321,6 +336,7 @@ int regmap_update_bits_check(struct regmap *map, unsigned int reg,
 			     unsigned int mask, unsigned int val,
 			     bool *change);
 int regmap_get_val_bytes(struct regmap *map);
+int regmap_async_complete(struct regmap *map);
 
 int regcache_sync(struct regmap *map);
 int regcache_sync_region(struct regmap *map, unsigned int min,
@@ -422,6 +438,13 @@ static inline int regmap_raw_write(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_raw_write_async(struct regmap *map, unsigned int reg,
+					 const void *val, size_t val_len)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_bulk_write(struct regmap *map, unsigned int reg,
 				    const void *val, size_t val_count)
 {
@@ -500,6 +523,11 @@ static inline void regcache_mark_dirty(struct regmap *map)
 	WARN_ONCE(1, "regmap API is disabled");
 }
 
+static inline void regmap_async_complete(struct regmap *map)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+}
+
 static inline int regmap_register_patch(struct regmap *map,
 					const struct reg_default *regs,
 					int num_regs)
-- 
cgit v1.2.3


From 0d1c28a449c6c23a126e3a08ee30914609aac227 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Mon, 28 Jan 2013 16:23:10 +0200
Subject: gpiolib-acpi: Add ACPI5 event model support to gpio.

Add ability to handle ACPI events signalled by GPIO interrupts.

ACPI5 platforms can use GPIO signaled ACPI events. These GPIO interrupts are
handled by ACPI event methods which need to be called from the GPIO
controller's interrupt handler. acpi_gpio_request_interrupt() finds out which
gpio pins have acpi event methods and assigns interrupt handlers that calls
the acpi event methods for those pins.

Partially based on work by Rui Zhang <rui.zhang@intel.com>

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 82 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi_gpio.h   |  4 +++
 2 files changed, 86 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index cbad6e908d30..54ce2269ed25 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/acpi_gpio.h>
 #include <linux/acpi.h>
+#include <linux/interrupt.h>
 
 static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
 {
@@ -52,3 +53,84 @@ int acpi_get_gpio(char *path, int pin)
 	return chip->base + pin;
 }
 EXPORT_SYMBOL_GPL(acpi_get_gpio);
+
+
+static irqreturn_t acpi_gpio_irq_handler(int irq, void *data)
+{
+	acpi_handle handle = data;
+
+	acpi_evaluate_object(handle, NULL, NULL, NULL);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * acpi_gpiochip_request_interrupts() - Register isr for gpio chip ACPI events
+ * @chip:      gpio chip
+ *
+ * ACPI5 platforms can use GPIO signaled ACPI events. These GPIO interrupts are
+ * handled by ACPI event methods which need to be called from the GPIO
+ * chip's interrupt handler. acpi_gpiochip_request_interrupts finds out which
+ * gpio pins have acpi event methods and assigns interrupt handlers that calls
+ * the acpi event methods for those pins.
+ *
+ * Interrupts are automatically freed on driver detach
+ */
+
+void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
+{
+	struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct acpi_resource *res;
+	acpi_handle handle, ev_handle;
+	acpi_status status;
+	unsigned int pin, irq;
+	char ev_name[5];
+
+	if (!chip->dev || !chip->to_irq)
+		return;
+
+	handle = ACPI_HANDLE(chip->dev);
+	if (!handle)
+		return;
+
+	status = acpi_get_event_resources(handle, &buf);
+	if (ACPI_FAILURE(status))
+		return;
+
+	/* If a gpio interrupt has an acpi event handler method, then
+	 * set up an interrupt handler that calls the acpi event handler
+	 */
+
+	for (res = buf.pointer;
+	     res && (res->type != ACPI_RESOURCE_TYPE_END_TAG);
+	     res = ACPI_NEXT_RESOURCE(res)) {
+
+		if (res->type != ACPI_RESOURCE_TYPE_GPIO ||
+		    res->data.gpio.connection_type !=
+		    ACPI_RESOURCE_GPIO_TYPE_INT)
+			continue;
+
+		pin = res->data.gpio.pin_table[0];
+		if (pin > chip->ngpio)
+			continue;
+
+		sprintf(ev_name, "_%c%02X",
+		res->data.gpio.triggering ? 'E' : 'L', pin);
+
+		status = acpi_get_handle(handle, ev_name, &ev_handle);
+		if (ACPI_FAILURE(status))
+			continue;
+
+		irq = chip->to_irq(chip, pin);
+		if (irq < 0)
+			continue;
+
+		/* Assume BIOS sets the triggering, so no flags */
+		devm_request_threaded_irq(chip->dev, irq, NULL,
+					  acpi_gpio_irq_handler,
+					  0,
+					  "GPIO-signaled-ACPI-event",
+					  ev_handle);
+	}
+}
+EXPORT_SYMBOL(acpi_gpiochip_request_interrupts);
diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h
index 91615a389b65..b76ebd08ff8e 100644
--- a/include/linux/acpi_gpio.h
+++ b/include/linux/acpi_gpio.h
@@ -2,10 +2,12 @@
 #define _LINUX_ACPI_GPIO_H_
 
 #include <linux/errno.h>
+#include <linux/gpio.h>
 
 #ifdef CONFIG_GPIO_ACPI
 
 int acpi_get_gpio(char *path, int pin);
+void acpi_gpiochip_request_interrupts(struct gpio_chip *chip);
 
 #else /* CONFIG_GPIO_ACPI */
 
@@ -14,6 +16,8 @@ static inline int acpi_get_gpio(char *path, int pin)
 	return -ENODEV;
 }
 
+static inline void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { }
+
 #endif /* CONFIG_GPIO_ACPI */
 
 #endif /* _LINUX_ACPI_GPIO_H_ */
-- 
cgit v1.2.3


From 222e8500f5e75d578ce1cc38c0349f89646319c4 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 7 Jan 2013 09:06:16 +0100
Subject: mfd: prcmu: delete pin control helpers

These static inlines are duplicating the task now done by the
Nomadik pinctrl drivers, so delete them from the prcmu static
inlines, also delete the register definitions as these should
only be known by the pinctrl driver.

Cc: Loic Pallardy <loic.pallardy@st.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: Michel Jaouen <michel.jaouen@st.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mfd/db8500-prcmu.h |  6 ---
 include/linux/mfd/dbx500-prcmu.h | 79 ----------------------------------------
 2 files changed, 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h
index 6ee4247df11e..a65deddede2f 100644
--- a/include/linux/mfd/db8500-prcmu.h
+++ b/include/linux/mfd/db8500-prcmu.h
@@ -16,12 +16,6 @@
 /*
  * Registers
  */
-#define DB8500_PRCM_GPIOCR 0x138
-#define DB8500_PRCM_GPIOCR_DBG_UARTMOD_CMD0	BIT(0)
-#define DB8500_PRCM_GPIOCR_DBG_STM_APE_CMD	BIT(9)
-#define DB8500_PRCM_GPIOCR_DBG_STM_MOD_CMD1	BIT(11)
-#define DB8500_PRCM_GPIOCR_SPI2_SELECT		BIT(23)
-
 #define DB8500_PRCM_LINE_VALUE 0x170
 #define DB8500_PRCM_LINE_VALUE_HSI_CAWAKE0	BIT(3)
 
diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
index c202d6c4d879..ac601470962c 100644
--- a/include/linux/mfd/dbx500-prcmu.h
+++ b/include/linux/mfd/dbx500-prcmu.h
@@ -626,85 +626,6 @@ static inline void prcmu_clear(unsigned int reg, u32 bits)
 	prcmu_write_masked(reg, bits, 0);
 }
 
-#if defined(CONFIG_UX500_SOC_DB8500)
-
-/**
- * prcmu_enable_spi2 - Enables pin muxing for SPI2 on OtherAlternateC1.
- */
-static inline void prcmu_enable_spi2(void)
-{
-	if (cpu_is_u8500())
-		prcmu_set(DB8500_PRCM_GPIOCR, DB8500_PRCM_GPIOCR_SPI2_SELECT);
-}
-
-/**
- * prcmu_disable_spi2 - Disables pin muxing for SPI2 on OtherAlternateC1.
- */
-static inline void prcmu_disable_spi2(void)
-{
-	if (cpu_is_u8500())
-		prcmu_clear(DB8500_PRCM_GPIOCR, DB8500_PRCM_GPIOCR_SPI2_SELECT);
-}
-
-/**
- * prcmu_enable_stm_mod_uart - Enables pin muxing for STMMOD
- * and UARTMOD on OtherAlternateC3.
- */
-static inline void prcmu_enable_stm_mod_uart(void)
-{
-	if (cpu_is_u8500()) {
-		prcmu_set(DB8500_PRCM_GPIOCR,
-			(DB8500_PRCM_GPIOCR_DBG_STM_MOD_CMD1 |
-			 DB8500_PRCM_GPIOCR_DBG_UARTMOD_CMD0));
-	}
-}
-
-/**
- * prcmu_disable_stm_mod_uart - Disables pin muxing for STMMOD
- * and UARTMOD on OtherAlternateC3.
- */
-static inline void prcmu_disable_stm_mod_uart(void)
-{
-	if (cpu_is_u8500()) {
-		prcmu_clear(DB8500_PRCM_GPIOCR,
-			(DB8500_PRCM_GPIOCR_DBG_STM_MOD_CMD1 |
-			 DB8500_PRCM_GPIOCR_DBG_UARTMOD_CMD0));
-	}
-}
-
-/**
- * prcmu_enable_stm_ape - Enables pin muxing for STM APE on OtherAlternateC1.
- */
-static inline void prcmu_enable_stm_ape(void)
-{
-	if (cpu_is_u8500()) {
-		prcmu_set(DB8500_PRCM_GPIOCR,
-			DB8500_PRCM_GPIOCR_DBG_STM_APE_CMD);
-	}
-}
-
-/**
- * prcmu_disable_stm_ape - Disables pin muxing for STM APE on OtherAlternateC1.
- */
-static inline void prcmu_disable_stm_ape(void)
-{
-	if (cpu_is_u8500()) {
-		prcmu_clear(DB8500_PRCM_GPIOCR,
-			DB8500_PRCM_GPIOCR_DBG_STM_APE_CMD);
-	}
-}
-
-#else
-
-static inline void prcmu_enable_spi2(void) {}
-static inline void prcmu_disable_spi2(void) {}
-static inline void prcmu_enable_stm_mod_uart(void) {}
-static inline void prcmu_disable_stm_mod_uart(void) {}
-static inline void prcmu_enable_stm_ape(void) {}
-static inline void prcmu_disable_stm_ape(void) {}
-
-#endif
-
 /* PRCMU QoS APE OPP class */
 #define PRCMU_QOS_APE_OPP 1
 #define PRCMU_QOS_DDR_OPP 2
-- 
cgit v1.2.3


From 7a4f26097d389c16c9956bc03b81532698d97d64 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 19 Sep 2012 19:31:19 +0200
Subject: ARM: ux500: de-globalize <mach/id.h>

This removes the file <mach/id.h> from the global kernel include
scope, making it a pure mach-ux500 detail. All ASIC specifics
needed by drivers shall henceforth be passed from either platform
data or the device tree.

Cc: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/board-mop500-uib.c      |   1 +
 arch/arm/mach-ux500/cache-l2x0.c            |   3 +-
 arch/arm/mach-ux500/cpu-db8500.c            |   2 +
 arch/arm/mach-ux500/cpu.c                   |   1 +
 arch/arm/mach-ux500/id.c                    |   2 +
 arch/arm/mach-ux500/id.h                    | 144 ++++++++++++++++++++++++++++
 arch/arm/mach-ux500/include/mach/hardware.h |   1 -
 arch/arm/mach-ux500/include/mach/id.h       | 144 ----------------------------
 arch/arm/mach-ux500/platsmp.c               |   3 +
 arch/arm/mach-ux500/timer.c                 |   2 +
 drivers/cpufreq/db8500-cpufreq.c            |   3 -
 drivers/mfd/db8500-prcmu.c                  |  17 ++--
 include/linux/mfd/dbx500-prcmu.h            |   2 -
 13 files changed, 163 insertions(+), 162 deletions(-)
 create mode 100644 arch/arm/mach-ux500/id.h
 delete mode 100644 arch/arm/mach-ux500/include/mach/id.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500-uib.c b/arch/arm/mach-ux500/board-mop500-uib.c
index 1f47d962e3a1..7037d3687e9f 100644
--- a/arch/arm/mach-ux500/board-mop500-uib.c
+++ b/arch/arm/mach-ux500/board-mop500-uib.c
@@ -13,6 +13,7 @@
 
 #include <mach/hardware.h>
 #include "board-mop500.h"
+#include "id.h"
 
 enum mop500_uib {
 	STUIB,
diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c
index 75d5b512a3d5..1c1609da76ce 100644
--- a/arch/arm/mach-ux500/cache-l2x0.c
+++ b/arch/arm/mach-ux500/cache-l2x0.c
@@ -10,7 +10,8 @@
 #include <asm/cacheflush.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <mach/hardware.h>
-#include <mach/id.h>
+
+#include "id.h"
 
 static void __iomem *l2x0_base;
 
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index db0bb75e2c76..09c3fee726d4 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -37,7 +37,9 @@
 
 #include "devices-db8500.h"
 #include "ste-dma40-db8500.h"
+
 #include "board-mop500.h"
+#include "id.h"
 
 /* minimum static i/o mapping required to boot U8500 platforms */
 static struct map_desc u8500_uart_io_desc[] __initdata = {
diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c
index 721e7b4275f3..14b7d686f527 100644
--- a/arch/arm/mach-ux500/cpu.c
+++ b/arch/arm/mach-ux500/cpu.c
@@ -27,6 +27,7 @@
 #include <mach/devices.h>
 
 #include "board-mop500.h"
+#include "id.h"
 
 void __iomem *_PRCMU_BASE;
 
diff --git a/arch/arm/mach-ux500/id.c b/arch/arm/mach-ux500/id.c
index d1579920139f..9f951842e1e5 100644
--- a/arch/arm/mach-ux500/id.c
+++ b/arch/arm/mach-ux500/id.c
@@ -17,6 +17,8 @@
 #include <mach/hardware.h>
 #include <mach/setup.h>
 
+#include "id.h"
+
 struct dbx500_asic_id dbx500_id;
 
 static unsigned int ux500_read_asicid(phys_addr_t addr)
diff --git a/arch/arm/mach-ux500/id.h b/arch/arm/mach-ux500/id.h
new file mode 100644
index 000000000000..bcc58a8cccbc
--- /dev/null
+++ b/arch/arm/mach-ux500/id.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef __MACH_UX500_ID
+#define __MACH_UX500_ID
+
+/**
+ * struct dbx500_asic_id - fields of the ASIC ID
+ * @process: the manufacturing process, 0x40 is 40 nm 0x00 is "standard"
+ * @partnumber: hithereto 0x8500 for DB8500
+ * @revision: version code in the series
+ */
+struct dbx500_asic_id {
+	u16	partnumber;
+	u8	revision;
+	u8	process;
+};
+
+extern struct dbx500_asic_id dbx500_id;
+
+static inline unsigned int __attribute_const__ dbx500_partnumber(void)
+{
+	return dbx500_id.partnumber;
+}
+
+static inline unsigned int __attribute_const__ dbx500_revision(void)
+{
+	return dbx500_id.revision;
+}
+
+/*
+ * SOCs
+ */
+
+static inline bool __attribute_const__ cpu_is_u8500(void)
+{
+	return dbx500_partnumber() == 0x8500;
+}
+
+static inline bool __attribute_const__ cpu_is_u8520(void)
+{
+	return dbx500_partnumber() == 0x8520;
+}
+
+static inline bool cpu_is_u8500_family(void)
+{
+	return cpu_is_u8500() || cpu_is_u8520();
+}
+
+static inline bool __attribute_const__ cpu_is_u9540(void)
+{
+	return dbx500_partnumber() == 0x9540;
+}
+
+static inline bool __attribute_const__ cpu_is_u8540(void)
+{
+	return dbx500_partnumber() == 0x8540;
+}
+
+static inline bool __attribute_const__ cpu_is_u8580(void)
+{
+	return dbx500_partnumber() == 0x8580;
+}
+
+static inline bool cpu_is_ux540_family(void)
+{
+	return cpu_is_u9540() || cpu_is_u8540() || cpu_is_u8580();
+}
+
+/*
+ * 8500 revisions
+ */
+
+static inline bool __attribute_const__ cpu_is_u8500ed(void)
+{
+	return cpu_is_u8500() && dbx500_revision() == 0x00;
+}
+
+static inline bool __attribute_const__ cpu_is_u8500v1(void)
+{
+	return cpu_is_u8500() && (dbx500_revision() & 0xf0) == 0xA0;
+}
+
+static inline bool __attribute_const__ cpu_is_u8500v10(void)
+{
+	return cpu_is_u8500() && dbx500_revision() == 0xA0;
+}
+
+static inline bool __attribute_const__ cpu_is_u8500v11(void)
+{
+	return cpu_is_u8500() && dbx500_revision() == 0xA1;
+}
+
+static inline bool __attribute_const__ cpu_is_u8500v2(void)
+{
+	return cpu_is_u8500() && ((dbx500_revision() & 0xf0) == 0xB0);
+}
+
+static inline bool cpu_is_u8500v20(void)
+{
+	return cpu_is_u8500() && (dbx500_revision() == 0xB0);
+}
+
+static inline bool cpu_is_u8500v21(void)
+{
+	return cpu_is_u8500() && (dbx500_revision() == 0xB1);
+}
+
+static inline bool cpu_is_u8500v22(void)
+{
+	return cpu_is_u8500() && (dbx500_revision() == 0xB2);
+}
+
+static inline bool cpu_is_u8500v20_or_later(void)
+{
+	return (cpu_is_u8500() && !cpu_is_u8500v10() && !cpu_is_u8500v11());
+}
+
+/*
+ * 8540 revisions
+ */
+
+static inline bool __attribute_const__ cpu_is_u8540v10(void)
+{
+	return cpu_is_u8540() && dbx500_revision() == 0xA0;
+}
+
+static inline bool __attribute_const__ cpu_is_u8580v10(void)
+{
+	return cpu_is_u8580() && dbx500_revision() == 0xA0;
+}
+
+static inline bool ux500_is_svp(void)
+{
+	return false;
+}
+
+#define ux500_unknown_soc()	BUG()
+
+#endif
diff --git a/arch/arm/mach-ux500/include/mach/hardware.h b/arch/arm/mach-ux500/include/mach/hardware.h
index 28d16e744bfd..5201ddace503 100644
--- a/arch/arm/mach-ux500/include/mach/hardware.h
+++ b/arch/arm/mach-ux500/include/mach/hardware.h
@@ -39,7 +39,6 @@
 
 #ifndef __ASSEMBLY__
 
-#include <mach/id.h>
 extern void __iomem *_PRCMU_BASE;
 
 #define ARRAY_AND_SIZE(x)	(x), ARRAY_SIZE(x)
diff --git a/arch/arm/mach-ux500/include/mach/id.h b/arch/arm/mach-ux500/include/mach/id.h
deleted file mode 100644
index bcc58a8cccbc..000000000000
--- a/arch/arm/mach-ux500/include/mach/id.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2010
- *
- * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
- * License terms: GNU General Public License (GPL) version 2
- */
-
-#ifndef __MACH_UX500_ID
-#define __MACH_UX500_ID
-
-/**
- * struct dbx500_asic_id - fields of the ASIC ID
- * @process: the manufacturing process, 0x40 is 40 nm 0x00 is "standard"
- * @partnumber: hithereto 0x8500 for DB8500
- * @revision: version code in the series
- */
-struct dbx500_asic_id {
-	u16	partnumber;
-	u8	revision;
-	u8	process;
-};
-
-extern struct dbx500_asic_id dbx500_id;
-
-static inline unsigned int __attribute_const__ dbx500_partnumber(void)
-{
-	return dbx500_id.partnumber;
-}
-
-static inline unsigned int __attribute_const__ dbx500_revision(void)
-{
-	return dbx500_id.revision;
-}
-
-/*
- * SOCs
- */
-
-static inline bool __attribute_const__ cpu_is_u8500(void)
-{
-	return dbx500_partnumber() == 0x8500;
-}
-
-static inline bool __attribute_const__ cpu_is_u8520(void)
-{
-	return dbx500_partnumber() == 0x8520;
-}
-
-static inline bool cpu_is_u8500_family(void)
-{
-	return cpu_is_u8500() || cpu_is_u8520();
-}
-
-static inline bool __attribute_const__ cpu_is_u9540(void)
-{
-	return dbx500_partnumber() == 0x9540;
-}
-
-static inline bool __attribute_const__ cpu_is_u8540(void)
-{
-	return dbx500_partnumber() == 0x8540;
-}
-
-static inline bool __attribute_const__ cpu_is_u8580(void)
-{
-	return dbx500_partnumber() == 0x8580;
-}
-
-static inline bool cpu_is_ux540_family(void)
-{
-	return cpu_is_u9540() || cpu_is_u8540() || cpu_is_u8580();
-}
-
-/*
- * 8500 revisions
- */
-
-static inline bool __attribute_const__ cpu_is_u8500ed(void)
-{
-	return cpu_is_u8500() && dbx500_revision() == 0x00;
-}
-
-static inline bool __attribute_const__ cpu_is_u8500v1(void)
-{
-	return cpu_is_u8500() && (dbx500_revision() & 0xf0) == 0xA0;
-}
-
-static inline bool __attribute_const__ cpu_is_u8500v10(void)
-{
-	return cpu_is_u8500() && dbx500_revision() == 0xA0;
-}
-
-static inline bool __attribute_const__ cpu_is_u8500v11(void)
-{
-	return cpu_is_u8500() && dbx500_revision() == 0xA1;
-}
-
-static inline bool __attribute_const__ cpu_is_u8500v2(void)
-{
-	return cpu_is_u8500() && ((dbx500_revision() & 0xf0) == 0xB0);
-}
-
-static inline bool cpu_is_u8500v20(void)
-{
-	return cpu_is_u8500() && (dbx500_revision() == 0xB0);
-}
-
-static inline bool cpu_is_u8500v21(void)
-{
-	return cpu_is_u8500() && (dbx500_revision() == 0xB1);
-}
-
-static inline bool cpu_is_u8500v22(void)
-{
-	return cpu_is_u8500() && (dbx500_revision() == 0xB2);
-}
-
-static inline bool cpu_is_u8500v20_or_later(void)
-{
-	return (cpu_is_u8500() && !cpu_is_u8500v10() && !cpu_is_u8500v11());
-}
-
-/*
- * 8540 revisions
- */
-
-static inline bool __attribute_const__ cpu_is_u8540v10(void)
-{
-	return cpu_is_u8540() && dbx500_revision() == 0xA0;
-}
-
-static inline bool __attribute_const__ cpu_is_u8580v10(void)
-{
-	return cpu_is_u8580() && dbx500_revision() == 0xA0;
-}
-
-static inline bool ux500_is_svp(void)
-{
-	return false;
-}
-
-#define ux500_unknown_soc()	BUG()
-
-#endif
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index 3db7782f3afb..1a92e071bd61 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -21,9 +21,12 @@
 #include <asm/hardware/gic.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
+
 #include <mach/hardware.h>
 #include <mach/setup.h>
 
+#include "id.h"
+
 /* This is called from headsmp.S to wakeup the secondary core */
 extern void u8500_secondary_startup(void);
 
diff --git a/arch/arm/mach-ux500/timer.c b/arch/arm/mach-ux500/timer.c
index 875309acb022..c685ad1a2099 100644
--- a/arch/arm/mach-ux500/timer.c
+++ b/arch/arm/mach-ux500/timer.c
@@ -17,6 +17,8 @@
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 
+#include "id.h"
+
 #ifdef CONFIG_HAVE_ARM_TWD
 static DEFINE_TWD_LOCAL_TIMER(u8500_twd_local_timer,
 			      U8500_TWD_BASE, IRQ_LOCALTIMER);
diff --git a/drivers/cpufreq/db8500-cpufreq.c b/drivers/cpufreq/db8500-cpufreq.c
index 4f154bc0ebe4..523c9403bfce 100644
--- a/drivers/cpufreq/db8500-cpufreq.c
+++ b/drivers/cpufreq/db8500-cpufreq.c
@@ -167,9 +167,6 @@ static struct platform_driver db8500_cpufreq_plat_driver = {
 
 static int __init db8500_cpufreq_register(void)
 {
-	if (!cpu_is_u8500_family())
-		return -ENODEV;
-
 	pr_info("cpufreq for DB8500 started\n");
 	return platform_driver_register(&db8500_cpufreq_plat_driver);
 }
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index dc8826d8d69d..67d8b25d183e 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -36,7 +36,6 @@
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/db8500-regs.h>
-#include <mach/id.h>
 #include "dbx500-prcmu-regs.h"
 
 /* Offset for the firmware version within the TCPM */
@@ -216,10 +215,8 @@
 #define PRCM_REQ_MB5_I2C_HW_BITS	(PRCM_REQ_MB5 + 0x1)
 #define PRCM_REQ_MB5_I2C_REG		(PRCM_REQ_MB5 + 0x2)
 #define PRCM_REQ_MB5_I2C_VAL		(PRCM_REQ_MB5 + 0x3)
-#define PRCMU_I2C_WRITE(slave) \
-	(((slave) << 1) | (cpu_is_u8500v2() ? BIT(6) : 0))
-#define PRCMU_I2C_READ(slave) \
-	(((slave) << 1) | BIT(0) | (cpu_is_u8500v2() ? BIT(6) : 0))
+#define PRCMU_I2C_WRITE(slave) (((slave) << 1) | BIT(6))
+#define PRCMU_I2C_READ(slave) (((slave) << 1) | BIT(0) | BIT(6))
 #define PRCMU_I2C_STOP_EN		BIT(3)
 
 /* Mailbox 5 ACKs */
@@ -1049,12 +1046,13 @@ int db8500_prcmu_get_ddr_opp(void)
  *
  * This function sets the operating point of the DDR.
  */
+static bool enable_set_ddr_opp;
 int db8500_prcmu_set_ddr_opp(u8 opp)
 {
 	if (opp < DDR_100_OPP || opp > DDR_25_OPP)
 		return -EINVAL;
 	/* Changing the DDR OPP can hang the hardware pre-v21 */
-	if (cpu_is_u8500v20_or_later() && !cpu_is_u8500v20())
+	if (enable_set_ddr_opp)
 		writeb(opp, PRCM_DDR_SUBSYS_APE_MINBW);
 
 	return 0;
@@ -2790,6 +2788,7 @@ void __init db8500_prcmu_early_init(void)
 		pr_err("prcmu: Unsupported chip version\n");
 		BUG();
 	}
+	tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
 
 	spin_lock_init(&mb0_transfer.lock);
 	spin_lock_init(&mb0_transfer.dbb_irqs_lock);
@@ -3104,9 +3103,6 @@ static int db8500_prcmu_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	int irq = 0, err = 0, i;
 
-	if (ux500_is_svp())
-		return -ENODEV;
-
 	init_prcm_registers();
 
 	/* Clean up the mailbox interrupts after pre-kernel code. */
@@ -3135,8 +3131,7 @@ static int db8500_prcmu_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (cpu_is_u8500v20_or_later())
-		prcmu_config_esram0_deep_sleep(ESRAM0_DEEP_SLEEP_STATE_RET);
+	prcmu_config_esram0_deep_sleep(ESRAM0_DEEP_SLEEP_STATE_RET);
 
 	db8500_prcmu_update_cpufreq();
 
diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
index ac601470962c..155280642583 100644
--- a/include/linux/mfd/dbx500-prcmu.h
+++ b/include/linux/mfd/dbx500-prcmu.h
@@ -218,8 +218,6 @@ enum ddr_pwrst {
 
 #if defined(CONFIG_UX500_SOC_DB8500)
 
-#include <mach/id.h>
-
 static inline void __init prcmu_early_init(void)
 {
 	return db8500_prcmu_early_init();
-- 
cgit v1.2.3


From 5c766d642bcaffd0c2a5b354db2068515b3846cf Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 24 Jan 2013 09:41:41 +0000
Subject: ipv4: introduce address lifetime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are some usecase when lifetime of ipv4 addresses might be helpful.
For example:
1) initramfs networkmanager uses a DHCP daemon to learn network
configuration parameters
2) initramfs networkmanager addresses, routes and DNS configuration
3) initramfs networkmanager is requested to stop
4) initramfs networkmanager stops all daemons including dhclient
5) there are addresses and routes configured but no daemon running. If
the system doesn't start networkmanager for some reason, addresses and
routes will be used forever, which violates RFC 2131.

This patch is essentially a backport of ivp6 address lifetime mechanism
for ipv4 addresses.

Current "ip" tool supports this without any patch (since it does not
distinguish between ipv4 and ipv6 addresses in this perspective.

Also, this should be back-compatible with all current netlink users.

Reported-by: Pavel Šimerda <psimerda@redhat.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h |   6 ++
 include/net/addrconf.h     |   4 +
 net/ipv4/devinet.c         | 215 +++++++++++++++++++++++++++++++++++++++++++--
 net/ipv6/addrconf.c        |   4 -
 4 files changed, 220 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index a9d828976a77..ea1e3b863890 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -166,6 +166,12 @@ struct in_ifaddr {
 	unsigned char		ifa_flags;
 	unsigned char		ifa_prefixlen;
 	char			ifa_label[IFNAMSIZ];
+
+	/* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */
+	__u32			ifa_valid_lft;
+	__u32			ifa_preferred_lft;
+	unsigned long		ifa_cstamp; /* created timestamp */
+	unsigned long		ifa_tstamp; /* updated timestamp */
 };
 
 extern int register_inetaddr_notifier(struct notifier_block *nb);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6c58d507123f..40be2a0d8ae1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -15,6 +15,10 @@
 
 #define IPV6_MAX_ADDRESSES		16
 
+#define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ / 50 : 1)
+#define ADDRCONF_TIMER_FUZZ		(HZ / 4)
+#define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
+
 #include <linux/in.h>
 #include <linux/in6.h>
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a8e4f2665d5e..5281314886c1 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -63,6 +63,7 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
+#include <net/addrconf.h>
 
 #include "fib_lookup.h"
 
@@ -93,6 +94,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
+	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 };
 
 #define IN4_ADDR_HSIZE_SHIFT	8
@@ -417,6 +419,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 }
 
+static void check_lifetime(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
+
 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 			     u32 portid)
 {
@@ -462,6 +468,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 
 	inet_hash_insert(dev_net(in_dev->dev), ifa);
 
+	cancel_delayed_work(&check_lifetime_work);
+	schedule_delayed_work(&check_lifetime_work, 0);
+
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
@@ -573,7 +582,107 @@ errout:
 	return err;
 }
 
-static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
+#define INFINITY_LIFE_TIME	0xFFFFFFFF
+
+static void check_lifetime(struct work_struct *work)
+{
+	unsigned long now, next, next_sec, next_sched;
+	struct in_ifaddr *ifa;
+	struct hlist_node *node;
+	int i;
+
+	now = jiffies;
+	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
+
+	rcu_read_lock();
+	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
+		hlist_for_each_entry_rcu(ifa, node,
+					 &inet_addr_lst[i], hash) {
+			unsigned long age;
+
+			if (ifa->ifa_flags & IFA_F_PERMANENT)
+				continue;
+
+			/* We try to batch several events at once. */
+			age = (now - ifa->ifa_tstamp +
+			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+
+			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
+			    age >= ifa->ifa_valid_lft) {
+				struct in_ifaddr **ifap ;
+
+				rtnl_lock();
+				for (ifap = &ifa->ifa_dev->ifa_list;
+				     *ifap != NULL; ifap = &ifa->ifa_next) {
+					if (*ifap == ifa)
+						inet_del_ifa(ifa->ifa_dev,
+							     ifap, 1);
+				}
+				rtnl_unlock();
+			} else if (ifa->ifa_preferred_lft ==
+				   INFINITY_LIFE_TIME) {
+				continue;
+			} else if (age >= ifa->ifa_preferred_lft) {
+				if (time_before(ifa->ifa_tstamp +
+						ifa->ifa_valid_lft * HZ, next))
+					next = ifa->ifa_tstamp +
+					       ifa->ifa_valid_lft * HZ;
+
+				if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
+					ifa->ifa_flags |= IFA_F_DEPRECATED;
+					rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
+				}
+			} else if (time_before(ifa->ifa_tstamp +
+					       ifa->ifa_preferred_lft * HZ,
+					       next)) {
+				next = ifa->ifa_tstamp +
+				       ifa->ifa_preferred_lft * HZ;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	next_sec = round_jiffies_up(next);
+	next_sched = next;
+
+	/* If rounded timeout is accurate enough, accept it. */
+	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+		next_sched = next_sec;
+
+	now = jiffies;
+	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
+		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
+
+	schedule_delayed_work(&check_lifetime_work, next_sched - now);
+}
+
+static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
+			     __u32 prefered_lft)
+{
+	unsigned long timeout;
+
+	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
+
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout))
+		ifa->ifa_valid_lft = timeout;
+	else
+		ifa->ifa_flags |= IFA_F_PERMANENT;
+
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa->ifa_flags |= IFA_F_DEPRECATED;
+		ifa->ifa_preferred_lft = timeout;
+	}
+	ifa->ifa_tstamp = jiffies;
+	if (!ifa->ifa_cstamp)
+		ifa->ifa_cstamp = ifa->ifa_tstamp;
+}
+
+static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
+				       __u32 *pvalid_lft, __u32 *pprefered_lft)
 {
 	struct nlattr *tb[IFA_MAX+1];
 	struct in_ifaddr *ifa;
@@ -633,24 +742,73 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 	else
 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 
+	if (tb[IFA_CACHEINFO]) {
+		struct ifa_cacheinfo *ci;
+
+		ci = nla_data(tb[IFA_CACHEINFO]);
+		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
+			err = -EINVAL;
+			goto errout;
+		}
+		*pvalid_lft = ci->ifa_valid;
+		*pprefered_lft = ci->ifa_prefered;
+	}
+
 	return ifa;
 
 errout:
 	return ERR_PTR(err);
 }
 
+static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct in_ifaddr *ifa1, **ifap;
+
+	if (!ifa->ifa_local)
+		return NULL;
+
+	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
+	     ifap = &ifa1->ifa_next) {
+		if (ifa1->ifa_mask == ifa->ifa_mask &&
+		    inet_ifa_match(ifa1->ifa_address, ifa) &&
+		    ifa1->ifa_local == ifa->ifa_local)
+			return ifa1;
+	}
+	return NULL;
+}
+
 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct in_ifaddr *ifa;
+	struct in_ifaddr *ifa_existing;
+	__u32 valid_lft = INFINITY_LIFE_TIME;
+	__u32 prefered_lft = INFINITY_LIFE_TIME;
 
 	ASSERT_RTNL();
 
-	ifa = rtm_to_ifaddr(net, nlh);
+	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
 	if (IS_ERR(ifa))
 		return PTR_ERR(ifa);
 
-	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+	ifa_existing = find_matching_ifa(ifa);
+	if (!ifa_existing) {
+		/* It would be best to check for !NLM_F_CREATE here but
+		 * userspace alreay relies on not having to provide this.
+		 */
+		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+	} else {
+		inet_free_ifa(ifa);
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL ||
+		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
+			return -EEXIST;
+
+		set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
+	}
+	return 0;
 }
 
 /*
@@ -852,6 +1010,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 			ifa->ifa_prefixlen = 32;
 			ifa->ifa_mask = inet_make_mask(32);
 		}
+		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 		ret = inet_set_ifa(dev, ifa);
 		break;
 
@@ -1190,6 +1349,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 				ifa->ifa_dev = in_dev;
 				ifa->ifa_scope = RT_SCOPE_HOST;
 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
+						 INFINITY_LIFE_TIME);
 				inet_insert_ifa(ifa);
 			}
 		}
@@ -1246,11 +1407,30 @@ static size_t inet_nlmsg_size(void)
 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
 }
 
+static inline u32 cstamp_delta(unsigned long cstamp)
+{
+	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
+}
+
+static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
+			 unsigned long tstamp, u32 preferred, u32 valid)
+{
+	struct ifa_cacheinfo ci;
+
+	ci.cstamp = cstamp_delta(cstamp);
+	ci.tstamp = cstamp_delta(tstamp);
+	ci.ifa_prefered = preferred;
+	ci.ifa_valid = valid;
+
+	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+}
+
 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 			    u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
+	u32 preferred, valid;
 
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -1259,10 +1439,31 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	ifm = nlmsg_data(nlh);
 	ifm->ifa_family = AF_INET;
 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
-	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
+	ifm->ifa_flags = ifa->ifa_flags;
 	ifm->ifa_scope = ifa->ifa_scope;
 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
 
+	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
+		preferred = ifa->ifa_preferred_lft;
+		valid = ifa->ifa_valid_lft;
+		if (preferred != INFINITY_LIFE_TIME) {
+			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
+
+			if (preferred > tval)
+				preferred -= tval;
+			else
+				preferred = 0;
+			if (valid != INFINITY_LIFE_TIME) {
+				if (valid > tval)
+					valid -= tval;
+				else
+					valid = 0;
+			}
+		}
+	} else {
+		preferred = INFINITY_LIFE_TIME;
+		valid = INFINITY_LIFE_TIME;
+	}
 	if ((ifa->ifa_address &&
 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
 	    (ifa->ifa_local &&
@@ -1270,7 +1471,9 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	    (ifa->ifa_broadcast &&
 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
 	    (ifa->ifa_label[0] &&
-	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
+	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
+	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
+			  preferred, valid))
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -1988,6 +2191,8 @@ void __init devinet_init(void)
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
 
+	schedule_delayed_work(&check_lifetime_work, 0);
+
 	rtnl_af_register(&inet_af_ops);
 
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 80d59802d964..7f7332b44699 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -110,10 +110,6 @@ static inline u32 cstamp_delta(unsigned long cstamp)
 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
 }
 
-#define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ/50 : 1)
-#define ADDRCONF_TIMER_FUZZ		(HZ / 4)
-#define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
-
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct inet6_dev *idev);
-- 
cgit v1.2.3


From 6f16eebe1ff82176339a0439c98ebec9768b0ee2 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 25 Jan 2013 17:08:12 -0800
Subject: timekeeping: Switch HAS_PERSISTENT_CLOCK to
 ALWAYS_USE_PERSISTENT_CLOCK

Jason pointed out the HAS_PERSISTENT_CLOCK name isn't
quite accurate for the config, as some systems may have
the persistent_clock in some cases, but not always.

So change the config name to the more clear
ALWAYS_USE_PERSISTENT_CLOCK.

Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/x86/Kconfig     | 2 +-
 drivers/rtc/Kconfig  | 4 ++--
 include/linux/time.h | 2 +-
 kernel/time/Kconfig  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a4135b5e562e..335da90560e4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,7 +108,7 @@ config X86
 	select GENERIC_STRNLEN_USER
 	select HAVE_RCU_USER_QS if X86_64
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select HAS_PERSISTENT_CLOCK
+	select ALWAYS_USE_PERSISTENT_CLOCK
 	select GENERIC_KERNEL_THREAD
 	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_REL if X86_32
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 05761de7929b..da60de01f732 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -20,7 +20,7 @@ if RTC_CLASS
 config RTC_HCTOSYS
 	bool "Set system time from RTC on startup and resume"
 	default y
-	depends on !HAS_PERSISTENT_CLOCK
+	depends on !ALWAYS_USE_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be set using
 	  the value read from a specified RTC device. This is useful to avoid
@@ -29,7 +29,7 @@ config RTC_HCTOSYS
 config RTC_SYSTOHC
 	bool "Set the RTC time based on NTP synchronization"
 	default y
-	depends on !HAS_PERSISTENT_CLOCK
+	depends on !ALWAYS_USE_PERSISTENT_CLOCK
 	help
 	  If you say yes here, the system time (wall clock) will be stored
 	  in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11
diff --git a/include/linux/time.h b/include/linux/time.h
index 369b6e3b87d8..476e1d7b2c37 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -117,7 +117,7 @@ static inline bool timespec_valid_strict(const struct timespec *ts)
 
 extern bool persistent_clock_exist;
 
-#ifdef CONFIG_HAS_PERSISTENT_CLOCK
+#ifdef ALWAYS_USE_PERSISTENT_CLOCK
 #define has_persistent_clock()	true
 #else
 static inline bool has_persistent_clock(void)
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index f7e45b9b142b..0dddb9d09d0b 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -13,7 +13,7 @@ config ARCH_CLOCKSOURCE_DATA
 	bool
 
 # Platforms has a persistent clock
-config HAS_PERSISTENT_CLOCK
+config ALWAYS_USE_PERSISTENT_CLOCK
 	bool
 	default n
 
-- 
cgit v1.2.3


From 595ad9af8584908ea5fb698b836169d05b99f186 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 24 Jan 2013 12:20:09 -0800
Subject: memblock: Add memblock_mem_size()

Use it to get mem size under the limit_pfn.
to replace local version in x86 reserved_initrd.

-v2: remove not needed cast that is pointed out by HPA.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1359058816-7615-29-git-send-email-yinghai@kernel.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/setup.c  | 16 +---------------
 include/linux/memblock.h |  1 +
 mm/memblock.c            | 17 +++++++++++++++++
 3 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b80bee10982f..bbe8cdf7515e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -363,20 +363,6 @@ static void __init relocate_initrd(void)
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
 }
 
-static u64 __init get_mem_size(unsigned long limit_pfn)
-{
-	int i;
-	u64 mapped_pages = 0;
-	unsigned long start_pfn, end_pfn;
-
-	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
-		start_pfn = min_t(unsigned long, start_pfn, limit_pfn);
-		end_pfn = min_t(unsigned long, end_pfn, limit_pfn);
-		mapped_pages += end_pfn - start_pfn;
-	}
-
-	return mapped_pages << PAGE_SHIFT;
-}
 static void __init early_reserve_initrd(void)
 {
 	/* Assume only end is not page aligned */
@@ -404,7 +390,7 @@ static void __init reserve_initrd(void)
 
 	initrd_start = 0;
 
-	mapped_size = get_mem_size(max_pfn_mapped);
+	mapped_size = memblock_mem_size(max_pfn_mapped);
 	if (ramdisk_size >= (mapped_size>>1))
 		panic("initrd too large to handle, "
 		       "disabling initrd (%lld needed, %lld available)\n",
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index d452ee191066..f388203db7e8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -155,6 +155,7 @@ phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				  phys_addr_t max_addr);
 phys_addr_t memblock_phys_mem_size(void);
+phys_addr_t memblock_mem_size(unsigned long limit_pfn);
 phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
 void memblock_enforce_memory_limit(phys_addr_t memory_limit);
diff --git a/mm/memblock.c b/mm/memblock.c
index 88adc8afb610..b8d9147e5c08 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -828,6 +828,23 @@ phys_addr_t __init memblock_phys_mem_size(void)
 	return memblock.memory.total_size;
 }
 
+phys_addr_t __init memblock_mem_size(unsigned long limit_pfn)
+{
+	unsigned long pages = 0;
+	struct memblock_region *r;
+	unsigned long start_pfn, end_pfn;
+
+	for_each_memblock(memory, r) {
+		start_pfn = memblock_region_memory_base_pfn(r);
+		end_pfn = memblock_region_memory_end_pfn(r);
+		start_pfn = min_t(unsigned long, start_pfn, limit_pfn);
+		end_pfn = min_t(unsigned long, end_pfn, limit_pfn);
+		pages += end_pfn - start_pfn;
+	}
+
+	return (phys_addr_t)pages << PAGE_SHIFT;
+}
+
 /* lowest address */
 phys_addr_t __init_memblock memblock_start_of_DRAM(void)
 {
-- 
cgit v1.2.3


From 0212f9159694be61c6bc52e925fa76643e0c1abf Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 24 Jan 2013 12:20:11 -0800
Subject: x86: Add Crash kernel low reservation

During kdump kernel's booting stage, it need to find low ram for
swiotlb buffer when system does not support intel iommu/dmar remapping.

kexed-tools is appending memmap=exactmap and range from /proc/iomem
with "Crash kernel", and that range is above 4G for 64bit after boot
protocol 2.12.

We need to add another range in /proc/iomem like "Crash kernel low",
so kexec-tools could find that info and append to kdump kernel
command line.

Try to reserve some under 4G if the normal "Crash kernel" is above 4G.

User could specify the size with crashkernel_low=XX[KMG].

-v2: fix warning that is found by Fengguang's test robot.
-v3: move out get_mem_size change to another patch, to solve compiling
     warning that is found by Borislav Petkov <bp@alien8.de>
-v4: user must specify crashkernel_low if system does not support
     intel or amd iommu.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1359058816-7615-31-git-send-email-yinghai@kernel.org
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Rob Landley <rob@landley.net>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 Documentation/kernel-parameters.txt |  3 +++
 arch/x86/kernel/setup.c             | 42 +++++++++++++++++++++++++++++++++++--
 include/linux/kexec.h               |  3 +++
 kernel/kexec.c                      | 34 +++++++++++++++++++++++++-----
 4 files changed, 75 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 363e348bff9b..da0e0773ca96 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -594,6 +594,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			is selected automatically. Check
 			Documentation/kdump/kdump.txt for further details.
 
+	crashkernel_low=size[KMG]
+			[KNL, x86] parts under 4G.
+
 	crashkernel=range1:size1[,range2:size2,...][@offset]
 			[KNL] Same as above, but depends on the memory
 			in the running system. The syntax of range is
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4778ddeedc8a..5dc47c3e537b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -508,8 +508,44 @@ static void __init memblock_x86_reserve_range_setup_data(void)
 # define CRASH_KERNEL_ADDR_MAX	MAXMEM
 #endif
 
+static void __init reserve_crashkernel_low(void)
+{
+#ifdef CONFIG_X86_64
+	const unsigned long long alignment = 16<<20;	/* 16M */
+	unsigned long long low_base = 0, low_size = 0;
+	unsigned long total_low_mem;
+	unsigned long long base;
+	int ret;
+
+	total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
+	ret = parse_crashkernel_low(boot_command_line, total_low_mem,
+						&low_size, &base);
+	if (ret != 0 || low_size <= 0)
+		return;
+
+	low_base = memblock_find_in_range(low_size, (1ULL<<32),
+					low_size, alignment);
+
+	if (!low_base) {
+		pr_info("crashkernel low reservation failed - No suitable area found.\n");
+
+		return;
+	}
+
+	memblock_reserve(low_base, low_size);
+	pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
+			(unsigned long)(low_size >> 20),
+			(unsigned long)(low_base >> 20),
+			(unsigned long)(total_low_mem >> 20));
+	crashk_low_res.start = low_base;
+	crashk_low_res.end   = low_base + low_size - 1;
+	insert_resource(&iomem_resource, &crashk_low_res);
+#endif
+}
+
 static void __init reserve_crashkernel(void)
 {
+	const unsigned long long alignment = 16<<20;	/* 16M */
 	unsigned long long total_mem;
 	unsigned long long crash_size, crash_base;
 	int ret;
@@ -523,8 +559,6 @@ static void __init reserve_crashkernel(void)
 
 	/* 0 means: find the address automatically */
 	if (crash_base <= 0) {
-		const unsigned long long alignment = 16<<20;	/* 16M */
-
 		/*
 		 *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
 		 */
@@ -535,6 +569,7 @@ static void __init reserve_crashkernel(void)
 			pr_info("crashkernel reservation failed - No suitable area found.\n");
 			return;
 		}
+
 	} else {
 		unsigned long long start;
 
@@ -556,6 +591,9 @@ static void __init reserve_crashkernel(void)
 	crashk_res.start = crash_base;
 	crashk_res.end   = crash_base + crash_size - 1;
 	insert_resource(&iomem_resource, &crashk_res);
+
+	if (crash_base >= (1ULL<<32))
+		reserve_crashkernel_low();
 }
 #else
 static void __init reserve_crashkernel(void)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index d0b8458a703a..d2e6927bbaae 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -191,6 +191,7 @@ extern struct kimage *kexec_crash_image;
 /* Location of a reserved region to hold the crash kernel.
  */
 extern struct resource crashk_res;
+extern struct resource crashk_low_res;
 typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4];
 extern note_buf_t __percpu *crash_notes;
 extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
@@ -199,6 +200,8 @@ extern size_t vmcoreinfo_max_size;
 
 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
 		unsigned long long *crash_size, unsigned long long *crash_base);
+int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
+		unsigned long long *crash_size, unsigned long long *crash_base);
 int crash_shrink_memory(unsigned long new_size);
 size_t crash_get_memory_size(void);
 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 5e4bd7864c5d..2436ffcec91f 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -54,6 +54,12 @@ struct resource crashk_res = {
 	.end   = 0,
 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
 };
+struct resource crashk_low_res = {
+	.name  = "Crash kernel low",
+	.start = 0,
+	.end   = 0,
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
 
 int kexec_should_crash(struct task_struct *p)
 {
@@ -1369,10 +1375,11 @@ static int __init parse_crashkernel_simple(char 		*cmdline,
  * That function is the entry point for command line parsing and should be
  * called from the arch-specific code.
  */
-int __init parse_crashkernel(char 		 *cmdline,
+static int __init __parse_crashkernel(char *cmdline,
 			     unsigned long long system_ram,
 			     unsigned long long *crash_size,
-			     unsigned long long *crash_base)
+			     unsigned long long *crash_base,
+				const char *name)
 {
 	char 	*p = cmdline, *ck_cmdline = NULL;
 	char	*first_colon, *first_space;
@@ -1382,16 +1389,16 @@ int __init parse_crashkernel(char 		 *cmdline,
 	*crash_base = 0;
 
 	/* find crashkernel and use the last one if there are more */
-	p = strstr(p, "crashkernel=");
+	p = strstr(p, name);
 	while (p) {
 		ck_cmdline = p;
-		p = strstr(p+1, "crashkernel=");
+		p = strstr(p+1, name);
 	}
 
 	if (!ck_cmdline)
 		return -EINVAL;
 
-	ck_cmdline += 12; /* strlen("crashkernel=") */
+	ck_cmdline += strlen(name);
 
 	/*
 	 * if the commandline contains a ':', then that's the extended
@@ -1409,6 +1416,23 @@ int __init parse_crashkernel(char 		 *cmdline,
 	return 0;
 }
 
+int __init parse_crashkernel(char *cmdline,
+			     unsigned long long system_ram,
+			     unsigned long long *crash_size,
+			     unsigned long long *crash_base)
+{
+	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
+					"crashkernel=");
+}
+
+int __init parse_crashkernel_low(char *cmdline,
+			     unsigned long long system_ram,
+			     unsigned long long *crash_size,
+			     unsigned long long *crash_base)
+{
+	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
+					"crashkernel_low=");
+}
 
 static void update_vmcoreinfo_note(void)
 {
-- 
cgit v1.2.3


From 38fa4175e60d98fb1c9815fb14f8057576dade73 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 24 Jan 2013 12:20:15 -0800
Subject: mm: Add alloc_bootmem_low_pages_nopanic()

We don't need to panic in some case, like for swiotlb preallocating.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1359058816-7615-35-git-send-email-yinghai@kernel.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/bootmem.h | 5 +++++
 mm/bootmem.c            | 8 ++++++++
 mm/nobootmem.c          | 8 ++++++++
 3 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 3f778c27f825..3cd16ba82f15 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -99,6 +99,9 @@ void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 extern void *__alloc_bootmem_low(unsigned long size,
 				 unsigned long align,
 				 unsigned long goal);
+void *__alloc_bootmem_low_nopanic(unsigned long size,
+				 unsigned long align,
+				 unsigned long goal);
 extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long size,
 				      unsigned long align,
@@ -132,6 +135,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 
 #define alloc_bootmem_low(x) \
 	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
+#define alloc_bootmem_low_pages_nopanic(x) \
+	__alloc_bootmem_low_nopanic(x, PAGE_SIZE, 0)
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
 #define alloc_bootmem_low_pages_node(pgdat, x) \
diff --git a/mm/bootmem.c b/mm/bootmem.c
index b93376c39b61..2b0bcb019ec2 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -833,6 +833,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
 }
 
+void * __init __alloc_bootmem_low_nopanic(unsigned long size,
+					  unsigned long align,
+					  unsigned long goal)
+{
+	return ___alloc_bootmem_nopanic(size, align, goal,
+					ARCH_LOW_ADDRESS_LIMIT);
+}
+
 /**
  * __alloc_bootmem_low_node - allocate low boot memory from a specific node
  * @pgdat: node to allocate from
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 03d152a76acf..5e07d36e381e 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -391,6 +391,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
 }
 
+void * __init __alloc_bootmem_low_nopanic(unsigned long size,
+					  unsigned long align,
+					  unsigned long goal)
+{
+	return ___alloc_bootmem_nopanic(size, align, goal,
+					ARCH_LOW_ADDRESS_LIMIT);
+}
+
 /**
  * __alloc_bootmem_low_node - allocate low boot memory from a specific node
  * @pgdat: node to allocate from
-- 
cgit v1.2.3


From ac2cbab21f318e19bc176a7f38a120cec835220f Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 24 Jan 2013 12:20:16 -0800
Subject: x86: Don't panic if can not alloc buffer for swiotlb

Normal boot path on system with iommu support:
swiotlb buffer will be allocated early at first and then try to initialize
iommu, if iommu for intel or AMD could setup properly, swiotlb buffer
will be freed.

The early allocating is with bootmem, and could panic when we try to use
kdump with buffer above 4G only, or with memmap to limit mem under 4G.
for example: memmap=4095M$1M to remove memory under 4G.

According to Eric, add _nopanic version and no_iotlb_memory to fail
map single later if swiotlb is still needed.

-v2: don't pass nopanic, and use -ENOMEM return value according to Eric.
     panic early instead of using swiotlb_full to panic...according to Eric/Konrad.
-v3: make swiotlb_init to be notpanic, but will affect:
     arm64, ia64, powerpc, tile, unicore32, x86.
-v4: cleanup swiotlb_init by removing swiotlb_init_with_default_size.

Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1359058816-7615-36-git-send-email-yinghai@kernel.org
Reviewed-and-tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Cc: linux-mips@linux-mips.org
Cc: xen-devel@lists.xensource.com
Cc: virtualization@lists.linux-foundation.org
Cc: Shuah Khan <shuahkhan@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/mips/cavium-octeon/dma-octeon.c |  3 ++-
 drivers/xen/swiotlb-xen.c            |  4 ++-
 include/linux/swiotlb.h              |  2 +-
 lib/swiotlb.c                        | 47 ++++++++++++++++++++++--------------
 4 files changed, 35 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index 41dd00884975..02f244475207 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -317,7 +317,8 @@ void __init plat_swiotlb_setup(void)
 
 	octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize);
 
-	swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1);
+	if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM)
+		panic("Cannot allocate SWIOTLB buffer");
 
 	mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops;
 }
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index af47e7594460..1d94316f0ea4 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -231,7 +231,9 @@ retry:
 	}
 	start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
 	if (early) {
-		swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
+		if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
+			 verbose))
+			panic("Cannot allocate SWIOTLB buffer");
 		rc = 0;
 	} else
 		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 071d62c214a6..2de42f9401d2 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -23,7 +23,7 @@ extern int swiotlb_force;
 #define IO_TLB_SHIFT 11
 
 extern void swiotlb_init(int verbose);
-extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
+int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
 extern unsigned long swiotlb_nr_tbl(void);
 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 196b06984dec..bfe02b8fc55b 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -122,11 +122,18 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
+static bool no_iotlb_memory;
+
 void swiotlb_print_info(void)
 {
 	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 	unsigned char *vstart, *vend;
 
+	if (no_iotlb_memory) {
+		pr_warn("software IO TLB: No low mem\n");
+		return;
+	}
+
 	vstart = phys_to_virt(io_tlb_start);
 	vend = phys_to_virt(io_tlb_end);
 
@@ -136,7 +143,7 @@ void swiotlb_print_info(void)
 	       bytes >> 20, vstart, vend - 1);
 }
 
-void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
 	void *v_overflow_buffer;
 	unsigned long i, bytes;
@@ -150,9 +157,10 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	/*
 	 * Get the overflow emergency buffer
 	 */
-	v_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow));
+	v_overflow_buffer = alloc_bootmem_low_pages_nopanic(
+						PAGE_ALIGN(io_tlb_overflow));
 	if (!v_overflow_buffer)
-		panic("Cannot allocate SWIOTLB overflow buffer!\n");
+		return -ENOMEM;
 
 	io_tlb_overflow_buffer = __pa(v_overflow_buffer);
 
@@ -169,15 +177,19 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 
 	if (verbose)
 		swiotlb_print_info();
+
+	return 0;
 }
 
 /*
  * Statically reserve bounce buffer space and initialize bounce buffer data
  * structures for the software IO TLB used to implement the DMA API.
  */
-static void __init
-swiotlb_init_with_default_size(size_t default_size, int verbose)
+void  __init
+swiotlb_init(int verbose)
 {
+	/* default to 64MB */
+	size_t default_size = 64UL<<20;
 	unsigned char *vstart;
 	unsigned long bytes;
 
@@ -188,20 +200,16 @@ swiotlb_init_with_default_size(size_t default_size, int verbose)
 
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	vstart = alloc_bootmem_low_pages(PAGE_ALIGN(bytes));
-	if (!vstart)
-		panic("Cannot allocate SWIOTLB buffer");
-
-	swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose);
-}
+	/* Get IO TLB memory from the low pages */
+	vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes));
+	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+		return;
 
-void __init
-swiotlb_init(int verbose)
-{
-	swiotlb_init_with_default_size(64 * (1<<20), verbose);	/* default to 64MB */
+	if (io_tlb_start)
+		free_bootmem(io_tlb_start,
+				 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+	pr_warn("Cannot allocate SWIOTLB buffer");
+	no_iotlb_memory = true;
 }
 
 /*
@@ -405,6 +413,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 	unsigned long offset_slots;
 	unsigned long max_slots;
 
+	if (no_iotlb_memory)
+		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
 	mask = dma_get_seg_boundary(hwdev);
 
 	tbl_dma_addr &= mask;
-- 
cgit v1.2.3


From c420811f117a59a4a7d4e34b362437b91c7fafa1 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Fri, 25 Jan 2013 19:39:51 +0100
Subject: serial: ralink: adds support for the serial core found on ralink
 wisoc

The MIPS based Ralink WiSoC platform has 1 or more 8250 compatible serial cores.
To make them work we require the same quirks that are used by AU1x00.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.c  | 6 +++---
 drivers/tty/serial/8250/Kconfig | 8 ++++++++
 include/linux/serial_core.h     | 2 +-
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c
index 24939ca3eedc..0efc815a4968 100644
--- a/drivers/tty/serial/8250/8250.c
+++ b/drivers/tty/serial/8250/8250.c
@@ -317,9 +317,9 @@ static void default_serial_dl_write(struct uart_8250_port *up, int value)
 	serial_out(up, UART_DLM, value >> 8 & 0xff);
 }
 
-#ifdef CONFIG_MIPS_ALCHEMY
+#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X)
 
-/* Au1x00 UART hardware has a weird register layout */
+/* Au1x00/RT288x UART hardware has a weird register layout */
 static const u8 au_io_in_map[] = {
 	[UART_RX]  = 0,
 	[UART_IER] = 2,
@@ -440,7 +440,7 @@ static void set_io_from_upio(struct uart_port *p)
 		p->serial_out = mem32_serial_out;
 		break;
 
-#ifdef CONFIG_MIPS_ALCHEMY
+#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X)
 	case UPIO_AU:
 		p->serial_in = au_serial_in;
 		p->serial_out = au_serial_out;
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index d31f4c63d4ba..2ef9537bcb2c 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -276,3 +276,11 @@ config SERIAL_8250_EM
 	  Selecting this option will add support for the integrated serial
 	  port hardware found on the Emma Mobile line of processors.
 	  If unsure, say N.
+
+config SERIAL_8250_RT288X
+	bool "Ralink RT288x/RT305x/RT3662/RT3883 serial port support"
+	depends on SERIAL_8250 && (SOC_RT288X || SOC_RT305X || SOC_RT3883)
+	help
+	  If you have a Ralink RT288x/RT305x SoC based board and want to use the
+	  serial port, say Y to this option. The driver can handle up to 2 serial
+	  ports. If unsure, say N.
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 82aebc8ff77f..d97142159e0f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -134,7 +134,7 @@ struct uart_port {
 #define UPIO_HUB6		(1)
 #define UPIO_MEM		(2)
 #define UPIO_MEM32		(3)
-#define UPIO_AU			(4)			/* Au1x00 type IO */
+#define UPIO_AU			(4)			/* Au1x00 and RT288x type IO */
 #define UPIO_TSI		(5)			/* Tsi108/109 type IO */
 
 	unsigned int		read_status_mask;	/* driver specific */
-- 
cgit v1.2.3


From 6e69ab1361c44e3ee1398158b56d114b1aef8179 Mon Sep 17 00:00:00 2001
From: Florian Vaussard <florian.vaussard@epfl.ch>
Date: Mon, 28 Jan 2013 15:00:57 +0100
Subject: pwm: Add pwm_can_sleep() as exported API to users

Calls to some external PWM chips can sleep. To help users,
add pwm_can_sleep() API.

Cc: Thierry Reding <thierry.reding@avionic-design.de>
Cc: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Florian Vaussard <florian.vaussard@epfl.ch>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
---
 drivers/pwm/core.c  | 12 ++++++++++++
 include/linux/pwm.h | 10 ++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 903138b18842..ffbef95e88dc 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -733,6 +733,18 @@ void devm_pwm_put(struct device *dev, struct pwm_device *pwm)
 }
 EXPORT_SYMBOL_GPL(devm_pwm_put);
 
+/**
+  * pwm_can_sleep() - report whether PWM access will sleep
+  * @pwm: PWM device
+  *
+  * It returns true if accessing the PWM can sleep, false otherwise.
+  */
+bool pwm_can_sleep(struct pwm_device *pwm)
+{
+	return pwm->chip->can_sleep;
+}
+EXPORT_SYMBOL_GPL(pwm_can_sleep);
+
 #ifdef CONFIG_DEBUG_FS
 static void pwm_dbg_show(struct pwm_chip *chip, struct seq_file *s)
 {
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 6d661f32e0e4..3fef47733ea3 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -146,6 +146,8 @@ struct pwm_ops {
  * @base: number of first PWM controlled by this chip
  * @npwm: number of PWMs controlled by this chip
  * @pwms: array of PWM devices allocated by the framework
+ * @can_sleep: must be true if the .config(), .enable() or .disable()
+ *             operations may sleep
  */
 struct pwm_chip {
 	struct device		*dev;
@@ -159,6 +161,7 @@ struct pwm_chip {
 	struct pwm_device *	(*of_xlate)(struct pwm_chip *pc,
 					    const struct of_phandle_args *args);
 	unsigned int		of_pwm_n_cells;
+	bool			can_sleep;
 };
 
 #if IS_ENABLED(CONFIG_PWM)
@@ -179,6 +182,8 @@ void pwm_put(struct pwm_device *pwm);
 
 struct pwm_device *devm_pwm_get(struct device *dev, const char *consumer);
 void devm_pwm_put(struct device *dev, struct pwm_device *pwm);
+
+bool pwm_can_sleep(struct pwm_device *pwm);
 #else
 static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
 {
@@ -226,6 +231,11 @@ static inline struct pwm_device *devm_pwm_get(struct device *dev,
 static inline void devm_pwm_put(struct device *dev, struct pwm_device *pwm)
 {
 }
+
+static inline bool pwm_can_sleep(struct pwm_device *pwm)
+{
+	return false;
+}
 #endif
 
 struct pwm_lookup {
-- 
cgit v1.2.3


From da660b4a3b15caea9c198c4f26d1cf7023df92fc Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 4 Jan 2013 14:17:15 +0000
Subject: arm: Move sp810.h to include/linux/amba/

Since it is now used by code under drivers/clk/ it makes sense for this
file to be in a more generic location. This is required for building
vexpress support on arm64.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Pawel Moll <pawel.moll@arm.com>
Cc: Shiraz Hashim <shiraz.hashim@st.com>
Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Pawel Moll <pawel.moll@arm.com>
---
 arch/arm/include/asm/hardware/sp810.h | 64 -----------------------------------
 arch/arm/plat-spear/restart.c         |  2 +-
 drivers/clk/versatile/clk-vexpress.c  |  3 +-
 include/linux/amba/sp810.h            | 64 +++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 67 deletions(-)
 delete mode 100644 arch/arm/include/asm/hardware/sp810.h
 create mode 100644 include/linux/amba/sp810.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hardware/sp810.h b/arch/arm/include/asm/hardware/sp810.h
deleted file mode 100644
index 6636430dd0e6..000000000000
--- a/arch/arm/include/asm/hardware/sp810.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * arch/arm/include/asm/hardware/sp810.h
- *
- * ARM PrimeXsys System Controller SP810 header file
- *
- * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __ASM_ARM_SP810_H
-#define __ASM_ARM_SP810_H
-
-#include <linux/io.h>
-
-/* sysctl registers offset */
-#define SCCTRL			0x000
-#define SCSYSSTAT		0x004
-#define SCIMCTRL		0x008
-#define SCIMSTAT		0x00C
-#define SCXTALCTRL		0x010
-#define SCPLLCTRL		0x014
-#define SCPLLFCTRL		0x018
-#define SCPERCTRL0		0x01C
-#define SCPERCTRL1		0x020
-#define SCPEREN			0x024
-#define SCPERDIS		0x028
-#define SCPERCLKEN		0x02C
-#define SCPERSTAT		0x030
-#define SCSYSID0		0xEE0
-#define SCSYSID1		0xEE4
-#define SCSYSID2		0xEE8
-#define SCSYSID3		0xEEC
-#define SCITCR			0xF00
-#define SCITIR0			0xF04
-#define SCITIR1			0xF08
-#define SCITOR			0xF0C
-#define SCCNTCTRL		0xF10
-#define SCCNTDATA		0xF14
-#define SCCNTSTEP		0xF18
-#define SCPERIPHID0		0xFE0
-#define SCPERIPHID1		0xFE4
-#define SCPERIPHID2		0xFE8
-#define SCPERIPHID3		0xFEC
-#define SCPCELLID0		0xFF0
-#define SCPCELLID1		0xFF4
-#define SCPCELLID2		0xFF8
-#define SCPCELLID3		0xFFC
-
-#define SCCTRL_TIMERENnSEL_SHIFT(n)	(15 + ((n) * 2))
-
-static inline void sysctl_soft_reset(void __iomem *base)
-{
-	/* switch to slow mode */
-	writel(0x2, base + SCCTRL);
-
-	/* writing any value to SCSYSSTAT reg will reset system */
-	writel(0, base + SCSYSSTAT);
-}
-
-#endif	/* __ASM_ARM_SP810_H */
diff --git a/arch/arm/plat-spear/restart.c b/arch/arm/plat-spear/restart.c
index 4f990115b1bd..7d4616d5df11 100644
--- a/arch/arm/plat-spear/restart.c
+++ b/arch/arm/plat-spear/restart.c
@@ -11,8 +11,8 @@
  * warranty of any kind, whether express or implied.
  */
 #include <linux/io.h>
+#include <linux/amba/sp810.h>
 #include <asm/system_misc.h>
-#include <asm/hardware/sp810.h>
 #include <mach/spear.h>
 #include <mach/generic.h>
 
diff --git a/drivers/clk/versatile/clk-vexpress.c b/drivers/clk/versatile/clk-vexpress.c
index c742ac7c60bb..4f83ff9ab2d9 100644
--- a/drivers/clk/versatile/clk-vexpress.c
+++ b/drivers/clk/versatile/clk-vexpress.c
@@ -11,6 +11,7 @@
  * Copyright (C) 2012 ARM Limited
  */
 
+#include <linux/amba/sp810.h>
 #include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/err.h>
@@ -18,8 +19,6 @@
 #include <linux/of_address.h>
 #include <linux/vexpress.h>
 
-#include <asm/hardware/sp810.h>
-
 static struct clk *vexpress_sp810_timerclken[4];
 static DEFINE_SPINLOCK(vexpress_sp810_lock);
 
diff --git a/include/linux/amba/sp810.h b/include/linux/amba/sp810.h
new file mode 100644
index 000000000000..6636430dd0e6
--- /dev/null
+++ b/include/linux/amba/sp810.h
@@ -0,0 +1,64 @@
+/*
+ * arch/arm/include/asm/hardware/sp810.h
+ *
+ * ARM PrimeXsys System Controller SP810 header file
+ *
+ * Copyright (C) 2009 ST Microelectronics
+ * Viresh Kumar <viresh.linux@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __ASM_ARM_SP810_H
+#define __ASM_ARM_SP810_H
+
+#include <linux/io.h>
+
+/* sysctl registers offset */
+#define SCCTRL			0x000
+#define SCSYSSTAT		0x004
+#define SCIMCTRL		0x008
+#define SCIMSTAT		0x00C
+#define SCXTALCTRL		0x010
+#define SCPLLCTRL		0x014
+#define SCPLLFCTRL		0x018
+#define SCPERCTRL0		0x01C
+#define SCPERCTRL1		0x020
+#define SCPEREN			0x024
+#define SCPERDIS		0x028
+#define SCPERCLKEN		0x02C
+#define SCPERSTAT		0x030
+#define SCSYSID0		0xEE0
+#define SCSYSID1		0xEE4
+#define SCSYSID2		0xEE8
+#define SCSYSID3		0xEEC
+#define SCITCR			0xF00
+#define SCITIR0			0xF04
+#define SCITIR1			0xF08
+#define SCITOR			0xF0C
+#define SCCNTCTRL		0xF10
+#define SCCNTDATA		0xF14
+#define SCCNTSTEP		0xF18
+#define SCPERIPHID0		0xFE0
+#define SCPERIPHID1		0xFE4
+#define SCPERIPHID2		0xFE8
+#define SCPERIPHID3		0xFEC
+#define SCPCELLID0		0xFF0
+#define SCPCELLID1		0xFF4
+#define SCPCELLID2		0xFF8
+#define SCPCELLID3		0xFFC
+
+#define SCCTRL_TIMERENnSEL_SHIFT(n)	(15 + ((n) * 2))
+
+static inline void sysctl_soft_reset(void __iomem *base)
+{
+	/* switch to slow mode */
+	writel(0x2, base + SCCTRL);
+
+	/* writing any value to SCSYSSTAT reg will reset system */
+	writel(0, base + SCSYSSTAT);
+}
+
+#endif	/* __ASM_ARM_SP810_H */
-- 
cgit v1.2.3


From ad964704ba9326d027fc10fd0099b7c880e50172 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 29 Jan 2013 17:45:49 -0500
Subject: ring-buffer: Add stats field for amount read from trace ring buffer

Add a stat about the number of events read from the ring buffer:

 #  cat /debug/tracing/per_cpu/cpu0/stats
entries: 39869
overrun: 870512
commit overrun: 0
bytes: 1449912
oldest event ts:  6561.368690
now ts:  6565.246426
dropped events: 0
read events: 112    <-- Added

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 18 ++++++++++++++++++
 kernel/trace/trace.c        |  3 +++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 519777e3fa01..1342e69542f3 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 13950d9027cb..7244acde77b0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3102,6 +3102,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu);
 
+/**
+ * ring_buffer_read_events_cpu - get the number of events successfully read
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of events read
+ */
+unsigned long
+ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	return cpu_buffer->read;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu);
+
 /**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d399592701aa..90a1c71fdbfc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4430,6 +4430,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu);
 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
 
+	cnt = ring_buffer_read_events_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "read events: %ld\n", cnt);
+
 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
 
 	kfree(s);
-- 
cgit v1.2.3


From debdd57f5145f3c6a4b3f8d0126abd1a2def7fc6 Mon Sep 17 00:00:00 2001
From: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
Date: Wed, 26 Dec 2012 11:53:00 +0900
Subject: tracing: Make a snapshot feature available from userspace

Ftrace has a snapshot feature available from kernel space and
latency tracers (e.g. irqsoff) are using it. This patch enables
user applictions to take a snapshot via debugfs.

Add "snapshot" debugfs file in "tracing" directory.

  snapshot:
    This is used to take a snapshot and to read the output of the
    snapshot.

     # echo 1 > snapshot

    This will allocate the spare buffer for snapshot (if it is
    not allocated), and take a snapshot.

     # cat snapshot

    This will show contents of the snapshot.

     # echo 0 > snapshot

    This will free the snapshot if it is allocated.

    Any other positive values will clear the snapshot contents if
    the snapshot is allocated, or return EINVAL if it is not allocated.

Link: http://lkml.kernel.org/r/20121226025300.3252.86850.stgit@liselsia

Cc: Jiri Olsa <jolsa@redhat.com>
Cc: David Sharp <dhsharp@google.com>
Signed-off-by: Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
[
   Fixed irqsoff selftest and also a conflict with a change
   that fixes the update_max_tr.
]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |   3 +
 kernel/trace/Kconfig         |  10 +++
 kernel/trace/trace.c         | 166 ++++++++++++++++++++++++++++++++++++-------
 kernel/trace/trace.h         |   1 +
 4 files changed, 154 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 6f8d0b77006b..13a54d0bdfa8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -83,6 +83,9 @@ struct trace_iterator {
 	long			idx;
 
 	cpumask_var_t		started;
+
+	/* it's true when current open file is snapshot */
+	bool			snapshot;
 };
 
 enum trace_iter_flags {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cdc9d284d24e..36567564e221 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -253,6 +253,16 @@ config FTRACE_SYSCALLS
 	help
 	  Basic tracer to catch the syscall entry and exit events.
 
+config TRACER_SNAPSHOT
+	bool "Create a snapshot trace buffer"
+	select TRACER_MAX_TRACE
+	help
+	  Allow tracing users to take snapshot of the current buffer using the
+	  ftrace interface, e.g.:
+
+	      echo 1 > /sys/kernel/debug/tracing/snapshot
+	      cat snapshot
+
 config TRACE_BRANCH_PROFILING
 	bool
 	select GENERIC_TRACER
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2c724662a3e8..70dce64b9ecf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -710,12 +710,11 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	WARN_ON_ONCE(!irqs_disabled());
 
-	/* If we disabled the tracer, stop now */
-	if (current_trace == &nop_trace)
-		return;
-
-	if (WARN_ON_ONCE(!current_trace->use_max_tr))
+	if (!current_trace->allocated_snapshot) {
+		/* Only the nop tracer should hit this when disabling */
+		WARN_ON_ONCE(current_trace != &nop_trace);
 		return;
+	}
 
 	arch_spin_lock(&ftrace_max_lock);
 
@@ -743,10 +742,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 		return;
 
 	WARN_ON_ONCE(!irqs_disabled());
-	if (!current_trace->use_max_tr) {
-		WARN_ON_ONCE(1);
+	if (WARN_ON_ONCE(!current_trace->allocated_snapshot))
 		return;
-	}
 
 	arch_spin_lock(&ftrace_max_lock);
 
@@ -866,10 +863,13 @@ int register_tracer(struct tracer *type)
 
 		current_trace = type;
 
-		/* If we expanded the buffers, make sure the max is expanded too */
-		if (ring_buffer_expanded && type->use_max_tr)
-			ring_buffer_resize(max_tr.buffer, trace_buf_size,
-						RING_BUFFER_ALL_CPUS);
+		if (type->use_max_tr) {
+			/* If we expanded the buffers, make sure the max is expanded too */
+			if (ring_buffer_expanded)
+				ring_buffer_resize(max_tr.buffer, trace_buf_size,
+						   RING_BUFFER_ALL_CPUS);
+			type->allocated_snapshot = true;
+		}
 
 		/* the test is responsible for initializing and enabling */
 		pr_info("Testing tracer %s: ", type->name);
@@ -885,10 +885,14 @@ int register_tracer(struct tracer *type)
 		/* Only reset on passing, to avoid touching corrupted buffers */
 		tracing_reset_online_cpus(tr);
 
-		/* Shrink the max buffer again */
-		if (ring_buffer_expanded && type->use_max_tr)
-			ring_buffer_resize(max_tr.buffer, 1,
-						RING_BUFFER_ALL_CPUS);
+		if (type->use_max_tr) {
+			type->allocated_snapshot = false;
+
+			/* Shrink the max buffer again */
+			if (ring_buffer_expanded)
+				ring_buffer_resize(max_tr.buffer, 1,
+						   RING_BUFFER_ALL_CPUS);
+		}
 
 		printk(KERN_CONT "PASSED\n");
 	}
@@ -1964,7 +1968,11 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		*iter->trace = *current_trace;
 	mutex_unlock(&trace_types_lock);
 
-	atomic_inc(&trace_record_cmdline_disabled);
+	if (iter->snapshot && iter->trace->use_max_tr)
+		return ERR_PTR(-EBUSY);
+
+	if (!iter->snapshot)
+		atomic_inc(&trace_record_cmdline_disabled);
 
 	if (*pos != iter->pos) {
 		iter->ent = NULL;
@@ -2003,7 +2011,11 @@ static void s_stop(struct seq_file *m, void *p)
 {
 	struct trace_iterator *iter = m->private;
 
-	atomic_dec(&trace_record_cmdline_disabled);
+	if (iter->snapshot && iter->trace->use_max_tr)
+		return;
+
+	if (!iter->snapshot)
+		atomic_dec(&trace_record_cmdline_disabled);
 	trace_access_unlock(iter->cpu_file);
 	trace_event_read_unlock();
 }
@@ -2438,7 +2450,7 @@ static const struct seq_operations tracer_seq_ops = {
 };
 
 static struct trace_iterator *
-__tracing_open(struct inode *inode, struct file *file)
+__tracing_open(struct inode *inode, struct file *file, bool snapshot)
 {
 	long cpu_file = (long) inode->i_private;
 	struct trace_iterator *iter;
@@ -2471,10 +2483,11 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
 		goto fail;
 
-	if (current_trace && current_trace->print_max)
+	if ((current_trace && current_trace->print_max) || snapshot)
 		iter->tr = &max_tr;
 	else
 		iter->tr = &global_trace;
+	iter->snapshot = snapshot;
 	iter->pos = -1;
 	mutex_init(&iter->mutex);
 	iter->cpu_file = cpu_file;
@@ -2491,8 +2504,9 @@ __tracing_open(struct inode *inode, struct file *file)
 	if (trace_clocks[trace_clock_id].in_ns)
 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
 
-	/* stop the trace while dumping */
-	tracing_stop();
+	/* stop the trace while dumping if we are not opening "snapshot" */
+	if (!iter->snapshot)
+		tracing_stop();
 
 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
 		for_each_tracing_cpu(cpu) {
@@ -2555,8 +2569,9 @@ static int tracing_release(struct inode *inode, struct file *file)
 	if (iter->trace && iter->trace->close)
 		iter->trace->close(iter);
 
-	/* reenable tracing if it was previously enabled */
-	tracing_start();
+	if (!iter->snapshot)
+		/* reenable tracing if it was previously enabled */
+		tracing_start();
 	mutex_unlock(&trace_types_lock);
 
 	mutex_destroy(&iter->mutex);
@@ -2584,7 +2599,7 @@ static int tracing_open(struct inode *inode, struct file *file)
 	}
 
 	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(inode, file);
+		iter = __tracing_open(inode, file, false);
 		if (IS_ERR(iter))
 			ret = PTR_ERR(iter);
 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3219,7 +3234,7 @@ static int tracing_set_tracer(const char *buf)
 	if (current_trace && current_trace->reset)
 		current_trace->reset(tr);
 
-	had_max_tr = current_trace && current_trace->use_max_tr;
+	had_max_tr = current_trace && current_trace->allocated_snapshot;
 	current_trace = &nop_trace;
 
 	if (had_max_tr && !t->use_max_tr) {
@@ -3238,6 +3253,8 @@ static int tracing_set_tracer(const char *buf)
 		 */
 		ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS);
 		set_buffer_entries(&max_tr, 1);
+		tracing_reset_online_cpus(&max_tr);
+		current_trace->allocated_snapshot = false;
 	}
 	destroy_trace_option_files(topts);
 
@@ -3248,6 +3265,7 @@ static int tracing_set_tracer(const char *buf)
 						   RING_BUFFER_ALL_CPUS);
 		if (ret < 0)
 			goto out;
+		t->allocated_snapshot = true;
 	}
 
 	if (t->init) {
@@ -4066,6 +4084,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
 	return single_open(file, tracing_clock_show, NULL);
 }
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+static int tracing_snapshot_open(struct inode *inode, struct file *file)
+{
+	struct trace_iterator *iter;
+	int ret = 0;
+
+	if (file->f_mode & FMODE_READ) {
+		iter = __tracing_open(inode, file, true);
+		if (IS_ERR(iter))
+			ret = PTR_ERR(iter);
+	}
+	return ret;
+}
+
+static ssize_t
+tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+		       loff_t *ppos)
+{
+	unsigned long val;
+	int ret;
+
+	ret = tracing_update_buffers();
+	if (ret < 0)
+		return ret;
+
+	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&trace_types_lock);
+
+	if (current_trace && current_trace->use_max_tr) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	switch (val) {
+	case 0:
+		if (current_trace->allocated_snapshot) {
+			/* free spare buffer */
+			ring_buffer_resize(max_tr.buffer, 1,
+					   RING_BUFFER_ALL_CPUS);
+			set_buffer_entries(&max_tr, 1);
+			tracing_reset_online_cpus(&max_tr);
+			current_trace->allocated_snapshot = false;
+		}
+		break;
+	case 1:
+		if (!current_trace->allocated_snapshot) {
+			/* allocate spare buffer */
+			ret = resize_buffer_duplicate_size(&max_tr,
+					&global_trace, RING_BUFFER_ALL_CPUS);
+			if (ret < 0)
+				break;
+			current_trace->allocated_snapshot = true;
+		}
+
+		local_irq_disable();
+		/* Now, we're going to swap */
+		update_max_tr(&global_trace, current, smp_processor_id());
+		local_irq_enable();
+		break;
+	default:
+		if (current_trace->allocated_snapshot)
+			tracing_reset_online_cpus(&max_tr);
+		else
+			ret = -EINVAL;
+		break;
+	}
+
+	if (ret >= 0) {
+		*ppos += cnt;
+		ret = cnt;
+	}
+out:
+	mutex_unlock(&trace_types_lock);
+	return ret;
+}
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
+
 static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -4122,6 +4221,16 @@ static const struct file_operations trace_clock_fops = {
 	.write		= tracing_clock_write,
 };
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+static const struct file_operations snapshot_fops = {
+	.open		= tracing_snapshot_open,
+	.read		= seq_read,
+	.write		= tracing_snapshot_write,
+	.llseek		= tracing_seek,
+	.release	= tracing_release,
+};
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
 struct ftrace_buffer_info {
 	struct trace_array	*tr;
 	void			*spare;
@@ -4921,6 +5030,11 @@ static __init int tracer_init_debugfs(void)
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 
+#ifdef CONFIG_TRACER_SNAPSHOT
+	trace_create_file("snapshot", 0644, d_tracer,
+			  (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops);
+#endif
+
 	create_trace_options_dir();
 
 	for_each_tracing_cpu(cpu)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 04a2c7ab1735..57d7e5397d56 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -287,6 +287,7 @@ struct tracer {
 	struct tracer_flags	*flags;
 	bool			print_max;
 	bool			use_max_tr;
+	bool			allocated_snapshot;
 };
 
 
-- 
cgit v1.2.3


From 56813f798e8cb3f79a25e0523e782d910f376083 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 4 Jan 2013 13:14:34 +0100
Subject: mfd: ab8500: update header file and version detection

This updates the AB8500 register map with defines for a few
new chip variants and adds version detection helpers to handle
the different variants.

Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/mfd/abx500/ab8500.h | 277 ++++++++++++++++++++++++++++++--------
 1 file changed, 222 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 1cb5698b4d76..e640ea059be2 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -24,7 +24,7 @@ enum ab8500_version {
 	AB8500_VERSION_AB8500 = 0x0,
 	AB8500_VERSION_AB8505 = 0x1,
 	AB8500_VERSION_AB9540 = 0x2,
-	AB8500_VERSION_AB8540 = 0x3,
+	AB8500_VERSION_AB8540 = 0x4,
 	AB8500_VERSION_UNDEFINED,
 };
 
@@ -32,6 +32,7 @@ enum ab8500_version {
 #define AB8500_CUTEARLY	0x00
 #define AB8500_CUT1P0	0x10
 #define AB8500_CUT1P1	0x11
+#define AB8500_CUT1P2	0x12 /* Only valid for AB8540 */
 #define AB8500_CUT2P0	0x20
 #define AB8500_CUT3P0	0x30
 #define AB8500_CUT3P3	0x33
@@ -39,6 +40,7 @@ enum ab8500_version {
 /*
  * AB8500 bank addresses
  */
+#define AB8500_M_FSM_RANK	0x0
 #define AB8500_SYS_CTRL1_BLOCK	0x1
 #define AB8500_SYS_CTRL2_BLOCK	0x2
 #define AB8500_REGU_CTRL1	0x3
@@ -58,6 +60,7 @@ enum ab8500_version {
 #define AB8500_DEVELOPMENT	0x11
 #define AB8500_DEBUG		0x12
 #define AB8500_PROD_TEST	0x13
+#define AB8500_STE_TEST		0x14
 #define AB8500_OTP_EMUL		0x15
 
 /*
@@ -65,11 +68,11 @@ enum ab8500_version {
  * Values used to index into array ab8500_irq_regoffset[] defined in
  * drivers/mdf/ab8500-core.c
  */
-/* Definitions for AB8500 and AB9540 */
+/* Definitions for AB8500, AB9540 and AB8540 */
 /* ab8500_irq_regoffset[0] -> IT[Source|Latch|Mask]1 */
 #define AB8500_INT_MAIN_EXT_CH_NOT_OK	0 /* not 8505/9540 */
-#define AB8500_INT_UN_PLUG_TV_DET	1 /* not 8505/9540 */
-#define AB8500_INT_PLUG_TV_DET		2 /* not 8505/9540 */
+#define AB8500_INT_UN_PLUG_TV_DET	1 /* not 8505/9540/8540 */
+#define AB8500_INT_PLUG_TV_DET		2 /* not 8505/9540/8540 */
 #define AB8500_INT_TEMP_WARM		3
 #define AB8500_INT_PON_KEY2DB_F		4
 #define AB8500_INT_PON_KEY2DB_R		5
@@ -77,18 +80,19 @@ enum ab8500_version {
 #define AB8500_INT_PON_KEY1DB_R		7
 /* ab8500_irq_regoffset[1] -> IT[Source|Latch|Mask]2 */
 #define AB8500_INT_BATT_OVV		8
-#define AB8500_INT_MAIN_CH_UNPLUG_DET	10 /* not 8505 */
-#define AB8500_INT_MAIN_CH_PLUG_DET	11 /* not 8505 */
+#define AB8500_INT_MAIN_CH_UNPLUG_DET	10 /* not 8505/8540 */
+#define AB8500_INT_MAIN_CH_PLUG_DET	11 /* not 8505/8540 */
 #define AB8500_INT_VBUS_DET_F		14
 #define AB8500_INT_VBUS_DET_R		15
 /* ab8500_irq_regoffset[2] -> IT[Source|Latch|Mask]3 */
 #define AB8500_INT_VBUS_CH_DROP_END	16
 #define AB8500_INT_RTC_60S		17
 #define AB8500_INT_RTC_ALARM		18
+#define AB8540_INT_BIF_INT		19
 #define AB8500_INT_BAT_CTRL_INDB	20
 #define AB8500_INT_CH_WD_EXP		21
 #define AB8500_INT_VBUS_OVV		22
-#define AB8500_INT_MAIN_CH_DROP_END	23 /* not 8505/9540 */
+#define AB8500_INT_MAIN_CH_DROP_END	23 /* not 8505/9540/8540 */
 /* ab8500_irq_regoffset[3] -> IT[Source|Latch|Mask]4 */
 #define AB8500_INT_CCN_CONV_ACC		24
 #define AB8500_INT_INT_AUD		25
@@ -99,7 +103,7 @@ enum ab8500_version {
 #define AB8500_INT_BUP_CHG_NOT_OK	30
 #define AB8500_INT_BUP_CHG_OK		31
 /* ab8500_irq_regoffset[4] -> IT[Source|Latch|Mask]5 */
-#define AB8500_INT_GP_HW_ADC_CONV_END	32 /* not 8505 */
+#define AB8500_INT_GP_HW_ADC_CONV_END	32 /* not 8505/8540 */
 #define AB8500_INT_ACC_DETECT_1DB_F	33
 #define AB8500_INT_ACC_DETECT_1DB_R	34
 #define AB8500_INT_ACC_DETECT_22DB_F	35
@@ -108,23 +112,23 @@ enum ab8500_version {
 #define AB8500_INT_ACC_DETECT_21DB_R	38
 #define AB8500_INT_GP_SW_ADC_CONV_END	39
 /* ab8500_irq_regoffset[5] -> IT[Source|Latch|Mask]7 */
-#define AB8500_INT_GPIO6R		40 /* not 8505/9540 */
-#define AB8500_INT_GPIO7R		41 /* not 8505/9540 */
-#define AB8500_INT_GPIO8R		42 /* not 8505/9540 */
-#define AB8500_INT_GPIO9R		43 /* not 8505/9540 */
-#define AB8500_INT_GPIO10R		44
-#define AB8500_INT_GPIO11R		45
-#define AB8500_INT_GPIO12R		46 /* not 8505 */
-#define AB8500_INT_GPIO13R		47
+#define AB8500_INT_GPIO6R		40 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO7R		41 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO8R		42 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO9R		43 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO10R		44 /* not 8540 */
+#define AB8500_INT_GPIO11R		45 /* not 8540 */
+#define AB8500_INT_GPIO12R		46 /* not 8505/8540 */
+#define AB8500_INT_GPIO13R		47 /* not 8540 */
 /* ab8500_irq_regoffset[6] -> IT[Source|Latch|Mask]8 */
-#define AB8500_INT_GPIO24R		48 /* not 8505 */
-#define AB8500_INT_GPIO25R		49 /* not 8505 */
-#define AB8500_INT_GPIO36R		50 /* not 8505/9540 */
-#define AB8500_INT_GPIO37R		51 /* not 8505/9540 */
-#define AB8500_INT_GPIO38R		52 /* not 8505/9540 */
-#define AB8500_INT_GPIO39R		53 /* not 8505/9540 */
-#define AB8500_INT_GPIO40R		54
-#define AB8500_INT_GPIO41R		55
+#define AB8500_INT_GPIO24R		48 /* not 8505/8540 */
+#define AB8500_INT_GPIO25R		49 /* not 8505/8540 */
+#define AB8500_INT_GPIO36R		50 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO37R		51 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO38R		52 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO39R		53 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO40R		54 /* not 8540 */
+#define AB8500_INT_GPIO41R		55 /* not 8540 */
 /* ab8500_irq_regoffset[7] -> IT[Source|Latch|Mask]9 */
 #define AB8500_INT_GPIO6F		56 /* not 8505/9540 */
 #define AB8500_INT_GPIO7F		57 /* not 8505/9540 */
@@ -135,14 +139,14 @@ enum ab8500_version {
 #define AB8500_INT_GPIO12F		62 /* not 8505 */
 #define AB8500_INT_GPIO13F		63
 /* ab8500_irq_regoffset[8] -> IT[Source|Latch|Mask]10 */
-#define AB8500_INT_GPIO24F		64 /* not 8505 */
-#define AB8500_INT_GPIO25F		65 /* not 8505 */
-#define AB8500_INT_GPIO36F		66 /* not 8505/9540 */
-#define AB8500_INT_GPIO37F		67 /* not 8505/9540 */
-#define AB8500_INT_GPIO38F		68 /* not 8505/9540 */
-#define AB8500_INT_GPIO39F		69 /* not 8505/9540 */
-#define AB8500_INT_GPIO40F		70
-#define AB8500_INT_GPIO41F		71
+#define AB8500_INT_GPIO24F		64 /* not 8505/8540 */
+#define AB8500_INT_GPIO25F		65 /* not 8505/8540 */
+#define AB8500_INT_GPIO36F		66 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO37F		67 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO38F		68 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO39F		69 /* not 8505/9540/8540 */
+#define AB8500_INT_GPIO40F		70 /* not 8540 */
+#define AB8500_INT_GPIO41F		71 /* not 8540 */
 /* ab8500_irq_regoffset[9] -> IT[Source|Latch|Mask]12 */
 #define AB8500_INT_ADP_SOURCE_ERROR	72
 #define AB8500_INT_ADP_SINK_ERROR	73
@@ -160,42 +164,44 @@ enum ab8500_version {
 #define AB8500_INT_SRP_DETECT		88
 #define AB8500_INT_USB_CHARGER_NOT_OKR	89
 #define AB8500_INT_ID_WAKEUP_R		90
+#define AB8500_INT_ID_DET_PLUGR         91 /* 8505/9540 cut2.0 */
 #define AB8500_INT_ID_DET_R1R		92
 #define AB8500_INT_ID_DET_R2R		93
 #define AB8500_INT_ID_DET_R3R		94
 #define AB8500_INT_ID_DET_R4R		95
 /* ab8500_irq_regoffset[12] -> IT[Source|Latch|Mask]21 */
-#define AB8500_INT_ID_WAKEUP_F		96
-#define AB8500_INT_ID_DET_R1F		98
-#define AB8500_INT_ID_DET_R2F		99
-#define AB8500_INT_ID_DET_R3F		100
-#define AB8500_INT_ID_DET_R4F		101
-#define AB8500_INT_CHAUTORESTARTAFTSEC  102
+#define AB8500_INT_ID_WAKEUP_F		96 /* not 8505/9540 */
+#define AB8500_INT_ID_DET_PLUGF		97 /* 8505/9540 cut2.0 */
+#define AB8500_INT_ID_DET_R1F		98 /* not 8505/9540 */
+#define AB8500_INT_ID_DET_R2F		99 /* not 8505/9540 */
+#define AB8500_INT_ID_DET_R3F		100 /* not 8505/9540 */
+#define AB8500_INT_ID_DET_R4F		101 /* not 8505/9540 */
+#define AB8500_INT_CHAUTORESTARTAFTSEC	102 /* not 8505/9540 */
 #define AB8500_INT_CHSTOPBYSEC		103
 /* ab8500_irq_regoffset[13] -> IT[Source|Latch|Mask]22 */
 #define AB8500_INT_USB_CH_TH_PROT_F	104
-#define AB8500_INT_USB_CH_TH_PROT_R    105
+#define AB8500_INT_USB_CH_TH_PROT_R	105
 #define AB8500_INT_MAIN_CH_TH_PROT_F	106 /* not 8505/9540 */
 #define AB8500_INT_MAIN_CH_TH_PROT_R	107 /* not 8505/9540 */
 #define AB8500_INT_CHCURLIMNOHSCHIRP	109
 #define AB8500_INT_CHCURLIMHSCHIRP	110
 #define AB8500_INT_XTAL32K_KO		111
 
-/* Definitions for AB9540 */
+/* Definitions for AB9540 / AB8505 */
 /* ab8500_irq_regoffset[14] -> IT[Source|Latch|Mask]13 */
-#define AB9540_INT_GPIO50R		113
-#define AB9540_INT_GPIO51R		114 /* not 8505 */
-#define AB9540_INT_GPIO52R		115
-#define AB9540_INT_GPIO53R		116
-#define AB9540_INT_GPIO54R		117 /* not 8505 */
+#define AB9540_INT_GPIO50R		113 /* not 8540 */
+#define AB9540_INT_GPIO51R		114 /* not 8505/8540 */
+#define AB9540_INT_GPIO52R		115 /* not 8540 */
+#define AB9540_INT_GPIO53R		116 /* not 8540 */
+#define AB9540_INT_GPIO54R		117 /* not 8505/8540 */
 #define AB9540_INT_IEXT_CH_RF_BFN_R	118
-#define AB9540_INT_IEXT_CH_RF_BFN_F	119
 /* ab8500_irq_regoffset[15] -> IT[Source|Latch|Mask]14 */
-#define AB9540_INT_GPIO50F		121
-#define AB9540_INT_GPIO51F		122 /* not 8505 */
-#define AB9540_INT_GPIO52F		123
-#define AB9540_INT_GPIO53F		124
-#define AB9540_INT_GPIO54F		125 /* not 8505 */
+#define AB9540_INT_GPIO50F		121 /* not 8540 */
+#define AB9540_INT_GPIO51F		122 /* not 8505/8540 */
+#define AB9540_INT_GPIO52F		123 /* not 8540 */
+#define AB9540_INT_GPIO53F		124 /* not 8540 */
+#define AB9540_INT_GPIO54F		125 /* not 8505/8540 */
+#define AB9540_INT_IEXT_CH_RF_BFN_F	126
 /* ab8500_irq_regoffset[16] -> IT[Source|Latch|Mask]25 */
 #define AB8505_INT_KEYSTUCK		128
 #define AB8505_INT_IKR			129
@@ -204,6 +210,87 @@ enum ab8500_version {
 #define AB8505_INT_KEYDEGLITCH		132
 #define AB8505_INT_MODPWRSTATUSF	134
 #define AB8505_INT_MODPWRSTATUSR	135
+/* ab8500_irq_regoffset[17] -> IT[Source|Latch|Mask]6 */
+#define AB8500_INT_HOOK_DET_NEG_F	138
+#define AB8500_INT_HOOK_DET_NEG_R	139
+#define AB8500_INT_HOOK_DET_POS_F	140
+#define AB8500_INT_HOOK_DET_POS_R	141
+#define AB8500_INT_PLUG_DET_COMP_F	142
+#define AB8500_INT_PLUG_DET_COMP_R	143
+/* ab8500_irq_regoffset[18] -> IT[Source|Latch|Mask]23 */
+#define AB8505_INT_COLL			144
+#define AB8505_INT_RESERR		145
+#define AB8505_INT_FRAERR		146
+#define AB8505_INT_COMERR		147
+#define AB8505_INT_SPDSET		148
+#define AB8505_INT_DSENT		149
+#define AB8505_INT_DREC			150
+#define AB8505_INT_ACC_INT		151
+/* ab8500_irq_regoffset[19] -> IT[Source|Latch|Mask]24 */
+#define AB8505_INT_NOPINT		152
+/* ab8540_irq_regoffset[20] -> IT[Source|Latch|Mask]26 */
+#define AB8540_INT_IDPLUGDETCOMPF	160
+#define AB8540_INT_IDPLUGDETCOMPR	161
+#define AB8540_INT_FMDETCOMPLOF		162
+#define AB8540_INT_FMDETCOMPLOR		163
+#define AB8540_INT_FMDETCOMPHIF		164
+#define AB8540_INT_FMDETCOMPHIR		165
+#define AB8540_INT_ID5VDETCOMPF		166
+#define AB8540_INT_ID5VDETCOMPR		167
+/* ab8540_irq_regoffset[21] -> IT[Source|Latch|Mask]27 */
+#define AB8540_INT_GPIO43F		168
+#define AB8540_INT_GPIO43R		169
+#define AB8540_INT_GPIO44F		170
+#define AB8540_INT_GPIO44R		171
+#define AB8540_INT_KEYPOSDETCOMPF	172
+#define AB8540_INT_KEYPOSDETCOMPR	173
+#define AB8540_INT_KEYNEGDETCOMPF	174
+#define AB8540_INT_KEYNEGDETCOMPR	175
+/* ab8540_irq_regoffset[22] -> IT[Source|Latch|Mask]28 */
+#define AB8540_INT_GPIO1VBATF		176
+#define AB8540_INT_GPIO1VBATR		177
+#define AB8540_INT_GPIO2VBATF		178
+#define AB8540_INT_GPIO2VBATR		179
+#define AB8540_INT_GPIO3VBATF		180
+#define AB8540_INT_GPIO3VBATR		181
+#define AB8540_INT_GPIO4VBATF		182
+#define AB8540_INT_GPIO4VBATR		183
+/* ab8540_irq_regoffset[23] -> IT[Source|Latch|Mask]29 */
+#define AB8540_INT_SYSCLKREQ2F		184
+#define AB8540_INT_SYSCLKREQ2R		185
+#define AB8540_INT_SYSCLKREQ3F		186
+#define AB8540_INT_SYSCLKREQ3R		187
+#define AB8540_INT_SYSCLKREQ4F		188
+#define AB8540_INT_SYSCLKREQ4R		189
+#define AB8540_INT_SYSCLKREQ5F		190
+#define AB8540_INT_SYSCLKREQ5R		191
+/* ab8540_irq_regoffset[24] -> IT[Source|Latch|Mask]30 */
+#define AB8540_INT_PWMOUT1F		192
+#define AB8540_INT_PWMOUT1R		193
+#define AB8540_INT_PWMCTRL0F		194
+#define AB8540_INT_PWMCTRL0R		195
+#define AB8540_INT_PWMCTRL1F		196
+#define AB8540_INT_PWMCTRL1R		197
+#define AB8540_INT_SYSCLKREQ6F		198
+#define AB8540_INT_SYSCLKREQ6R		199
+/* ab8540_irq_regoffset[25] -> IT[Source|Latch|Mask]31 */
+#define AB8540_INT_PWMEXTVIBRA1F	200
+#define AB8540_INT_PWMEXTVIBRA1R	201
+#define AB8540_INT_PWMEXTVIBRA2F	202
+#define AB8540_INT_PWMEXTVIBRA2R	203
+#define AB8540_INT_PWMOUT2F		204
+#define AB8540_INT_PWMOUT2R		205
+#define AB8540_INT_PWMOUT3F		206
+#define AB8540_INT_PWMOUT3R		207
+/* ab8540_irq_regoffset[26] -> IT[Source|Latch|Mask]32 */
+#define AB8540_INT_ADDATA2F		208
+#define AB8540_INT_ADDATA2R		209
+#define AB8540_INT_DADATA2F		210
+#define AB8540_INT_DADATA2R		211
+#define AB8540_INT_FSYNC2F		212
+#define AB8540_INT_FSYNC2R		213
+#define AB8540_INT_BITCLK2F		214
+#define AB8540_INT_BITCLK2R		215
 
 /*
  * AB8500_AB9540_NR_IRQS is used when configuring the IRQ numbers for the
@@ -213,13 +300,24 @@ enum ab8500_version {
  * which is larger.
  */
 #define AB8500_NR_IRQS			112
-#define AB8505_NR_IRQS			136
-#define AB9540_NR_IRQS			136
+#define AB8505_NR_IRQS			153
+#define AB9540_NR_IRQS			153
+#define AB8540_NR_IRQS			216
 /* This is set to the roof of any AB8500 chip variant IRQ counts */
-#define AB8500_MAX_NR_IRQS		AB9540_NR_IRQS
+#define AB8500_MAX_NR_IRQS		AB8540_NR_IRQS
 
 #define AB8500_NUM_IRQ_REGS		14
-#define AB9540_NUM_IRQ_REGS		17
+#define AB9540_NUM_IRQ_REGS		20
+#define AB8540_NUM_IRQ_REGS		27
+
+/* Turn On Status Event */
+#define AB8500_POR_ON_VBAT		0x01
+#define AB8500_POW_KEY_1_ON		0x02
+#define AB8500_POW_KEY_2_ON		0x04
+#define AB8500_RTC_ALARM		0x08
+#define AB8500_MAIN_CH_DET		0x10
+#define AB8500_VBUS_DET			0x20
+#define AB8500_USB_ID_DET		0x40
 
 /**
  * struct ab8500 - ab8500 internal structure
@@ -335,10 +433,79 @@ static inline int is_ab8500_2p0_or_earlier(struct ab8500 *ab)
 	return (is_ab8500(ab) && (ab->chip_id <= AB8500_CUT2P0));
 }
 
+static inline int is_ab8500_3p3_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab8500(ab) && (ab->chip_id <= AB8500_CUT3P3));
+}
+
 /* exclude also ab8505, ab9540... */
 static inline int is_ab8500_2p0(struct ab8500 *ab)
 {
 	return (is_ab8500(ab) && (ab->chip_id == AB8500_CUT2P0));
 }
 
+static inline int is_ab8505_1p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id <= AB8500_CUT1P0));
+}
+
+static inline int is_ab8505_2p0(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id == AB8500_CUT2P0));
+}
+
+static inline int is_ab9540_1p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id <= AB8500_CUT1P0));
+}
+
+static inline int is_ab9540_2p0(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id == AB8500_CUT2P0));
+}
+
+/*
+ * Be careful, the marketing name for this chip is 2.1
+ * but the value read from the chip is 3.0 (0x30)
+ */
+static inline int is_ab9540_3p0(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id == AB8500_CUT3P0));
+}
+
+static inline int is_ab8540_1p0_or_earlier(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id <= AB8500_CUT1P0);
+}
+
+static inline int is_ab8540_1p1_or_earlier(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id <= AB8500_CUT1P1);
+}
+
+static inline int is_ab8540_1p2_or_earlier(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id <= AB8500_CUT1P2);
+}
+
+static inline int is_ab8540_2p0_or_earlier(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id <= AB8500_CUT2P0);
+}
+
+static inline int is_ab8540_2p0(struct ab8500 *ab)
+{
+	return is_ab8540(ab) && (ab->chip_id == AB8500_CUT2P0);
+}
+
+static inline int is_ab8505_2p0_earlier(struct ab8500 *ab)
+{
+	return (is_ab8505(ab) && (ab->chip_id < AB8500_CUT2P0));
+}
+
+static inline int is_ab9540_2p0_or_earlier(struct ab8500 *ab)
+{
+	return (is_ab9540(ab) && (ab->chip_id < AB8500_CUT2P0));
+}
+
 #endif /* MFD_AB8500_H */
-- 
cgit v1.2.3


From 0493e6493031523f78680b4469f02fc1b2d440f3 Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@st.com>
Date: Tue, 8 Jan 2013 10:41:02 +0100
Subject: pinctrl: add abx500 pinctrl driver core

This adds the AB8500 core driver, which will be utilized by
the follow-on drivers for different ABx500 variants.
Sselect the driver from the DBX500_SOC, as this chip is
powering and clocking that SoC.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Patrice Chotard <patrice.chotard@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/Kconfig            |    1 +
 arch/arm/mach-ux500/board-mop500.c     |   19 +-
 drivers/pinctrl/Kconfig                |    7 +
 drivers/pinctrl/Makefile               |    1 +
 drivers/pinctrl/pinctrl-abx500.c       | 1233 ++++++++++++++++++++++++++++++++
 drivers/pinctrl/pinctrl-abx500.h       |  180 +++++
 include/linux/mfd/abx500/ab8500-gpio.h |   15 +-
 include/linux/mfd/abx500/ab8500.h      |    2 +-
 8 files changed, 1437 insertions(+), 21 deletions(-)
 create mode 100644 drivers/pinctrl/pinctrl-abx500.c
 create mode 100644 drivers/pinctrl/pinctrl-abx500.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig
index 1d7dbe61a958..018bf681659d 100644
--- a/arch/arm/mach-ux500/Kconfig
+++ b/arch/arm/mach-ux500/Kconfig
@@ -11,6 +11,7 @@ config UX500_SOC_COMMON
 	select COMMON_CLK
 	select PINCTRL
 	select PINCTRL_NOMADIK
+	select PINCTRL_ABX500
 	select PL310_ERRATA_753970 if CACHE_PL310
 
 config UX500_SOC_DB8500
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index d453522edb0d..b6f14ee507ca 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -90,26 +90,9 @@ static struct platform_device snowball_gpio_en_3v3_regulator_dev = {
        },
 };
 
-static struct ab8500_gpio_platform_data ab8500_gpio_pdata = {
+static struct abx500_gpio_platform_data ab8500_gpio_pdata = {
 	.gpio_base		= MOP500_AB8500_PIN_GPIO(1),
 	.irq_base		= MOP500_AB8500_VIR_GPIO_IRQ_BASE,
-	/* config_reg is the initial configuration of ab8500 pins.
-	 * The pins can be configured as GPIO or alt functions based
-	 * on value present in GpioSel1 to GpioSel6 and AlternatFunction
-	 * register. This is the array of 7 configuration settings.
-	 * One has to compile time decide these settings. Below is the
-	 * explanation of these setting
-	 * GpioSel1 = 0x00 => Pins GPIO1 to GPIO8 are not used as GPIO
-	 * GpioSel2 = 0x1E => Pins GPIO10 to GPIO13 are configured as GPIO
-	 * GpioSel3 = 0x80 => Pin GPIO24 is configured as GPIO
-	 * GpioSel4 = 0x01 => Pin GPIo25 is configured as GPIO
-	 * GpioSel5 = 0x7A => Pins GPIO34, GPIO36 to GPIO39 are conf as GPIO
-	 * GpioSel6 = 0x00 => Pins GPIO41 & GPIo42 are not configured as GPIO
-	 * AlternaFunction = 0x00 => If Pins GPIO10 to 13 are not configured
-	 * as GPIO then this register selectes the alternate fucntions
-	 */
-	.config_reg		= {0x00, 0x1E, 0x80, 0x01,
-					0x7A, 0x00, 0x00},
 };
 
 /* ab8500-codec */
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 881ddcd00236..ba3038c827c6 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -26,6 +26,13 @@ config DEBUG_PINCTRL
 	help
 	  Say Y here to add some extra checks and diagnostics to PINCTRL calls.
 
+config PINCTRL_ABX500
+	bool "ST-Ericsson ABx500 family Mixed Signal Circuit gpio functions"
+	depends on AB8500_CORE
+	select GENERIC_PINCONF
+	help
+	  Select this to enable the ABx500 family IC GPIO driver
+
 config PINCTRL_AT91
 	bool "AT91 pinctrl driver"
 	depends on OF
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index e9ad8c3851be..ead4fa7dfd1f 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -9,6 +9,7 @@ ifeq ($(CONFIG_OF),y)
 obj-$(CONFIG_PINCTRL)		+= devicetree.o
 endif
 obj-$(CONFIG_GENERIC_PINCONF)	+= pinconf-generic.o
+obj-$(CONFIG_PINCTRL_ABX500)	+= pinctrl-abx500.o
 obj-$(CONFIG_PINCTRL_AT91)	+= pinctrl-at91.o
 obj-$(CONFIG_PINCTRL_BCM2835)	+= pinctrl-bcm2835.o
 obj-$(CONFIG_PINCTRL_IMX)	+= pinctrl-imx.o
diff --git a/drivers/pinctrl/pinctrl-abx500.c b/drivers/pinctrl/pinctrl-abx500.c
new file mode 100644
index 000000000000..9eed9887da2a
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-abx500.c
@@ -0,0 +1,1233 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2013
+ *
+ * Author: Patrice Chotard <patrice.chotard@st.com>
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-gpio.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+
+#include "pinctrl-abx500.h"
+
+/*
+ * The AB9540 and AB8540 GPIO support are extended versions
+ * of the AB8500 GPIO support.
+ * The AB9540 supports an additional (7th) register so that
+ * more GPIO may be configured and used.
+ * The AB8540 supports 4 new gpios (GPIOx_VBAT) that have
+ * internal pull-up and pull-down capabilities.
+ */
+
+/*
+ * GPIO registers offset
+ * Bank: 0x10
+ */
+#define AB8500_GPIO_SEL1_REG	0x00
+#define AB8500_GPIO_SEL2_REG	0x01
+#define AB8500_GPIO_SEL3_REG	0x02
+#define AB8500_GPIO_SEL4_REG	0x03
+#define AB8500_GPIO_SEL5_REG	0x04
+#define AB8500_GPIO_SEL6_REG	0x05
+#define AB9540_GPIO_SEL7_REG	0x06
+
+#define AB8500_GPIO_DIR1_REG	0x10
+#define AB8500_GPIO_DIR2_REG	0x11
+#define AB8500_GPIO_DIR3_REG	0x12
+#define AB8500_GPIO_DIR4_REG	0x13
+#define AB8500_GPIO_DIR5_REG	0x14
+#define AB8500_GPIO_DIR6_REG	0x15
+#define AB9540_GPIO_DIR7_REG	0x16
+
+#define AB8500_GPIO_OUT1_REG	0x20
+#define AB8500_GPIO_OUT2_REG	0x21
+#define AB8500_GPIO_OUT3_REG	0x22
+#define AB8500_GPIO_OUT4_REG	0x23
+#define AB8500_GPIO_OUT5_REG	0x24
+#define AB8500_GPIO_OUT6_REG	0x25
+#define AB9540_GPIO_OUT7_REG	0x26
+
+#define AB8500_GPIO_PUD1_REG	0x30
+#define AB8500_GPIO_PUD2_REG	0x31
+#define AB8500_GPIO_PUD3_REG	0x32
+#define AB8500_GPIO_PUD4_REG	0x33
+#define AB8500_GPIO_PUD5_REG	0x34
+#define AB8500_GPIO_PUD6_REG	0x35
+#define AB9540_GPIO_PUD7_REG	0x36
+
+#define AB8500_GPIO_IN1_REG	0x40
+#define AB8500_GPIO_IN2_REG	0x41
+#define AB8500_GPIO_IN3_REG	0x42
+#define AB8500_GPIO_IN4_REG	0x43
+#define AB8500_GPIO_IN5_REG	0x44
+#define AB8500_GPIO_IN6_REG	0x45
+#define AB9540_GPIO_IN7_REG	0x46
+#define AB8540_GPIO_VINSEL_REG	0x47
+#define AB8540_GPIO_PULL_UPDOWN_REG	0x48
+#define AB8500_GPIO_ALTFUN_REG	0x50
+#define AB8500_NUM_VIR_GPIO_IRQ	16
+#define AB8540_GPIO_PULL_UPDOWN_MASK	0x03
+#define AB8540_GPIO_VINSEL_MASK	0x03
+#define AB8540_GPIOX_VBAT_START	51
+#define AB8540_GPIOX_VBAT_END	54
+
+enum abx500_gpio_action {
+	NONE,
+	STARTUP,
+	SHUTDOWN,
+	MASK,
+	UNMASK
+};
+
+struct abx500_pinctrl {
+	struct device *dev;
+	struct pinctrl_dev *pctldev;
+	struct abx500_pinctrl_soc_data *soc;
+	struct gpio_chip chip;
+	struct ab8500 *parent;
+	struct mutex lock;
+	u32 irq_base;
+	enum abx500_gpio_action irq_action;
+	u16 rising;
+	u16 falling;
+	struct abx500_gpio_irq_cluster *irq_cluster;
+	int irq_cluster_size;
+	int irq_gpio_rising_offset;
+	int irq_gpio_falling_offset;
+	int irq_gpio_factor;
+};
+
+/**
+ * to_abx500_pinctrl() - get the pointer to abx500_pinctrl
+ * @chip:	Member of the structure abx500_pinctrl
+ */
+static inline struct abx500_pinctrl *to_abx500_pinctrl(struct gpio_chip *chip)
+{
+	return container_of(chip, struct abx500_pinctrl, chip);
+}
+
+static int abx500_gpio_get_bit(struct gpio_chip *chip, u8 reg,
+					unsigned offset, bool *bit)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	u8 pos = offset % 8;
+	u8 val;
+	int ret;
+
+	reg += offset / 8;
+	ret = abx500_get_register_interruptible(pct->dev,
+						AB8500_MISC, reg, &val);
+
+	*bit = !!(val & BIT(pos));
+
+	if (ret < 0)
+		dev_err(pct->dev,
+			"%s read reg =%x, offset=%x failed\n",
+			__func__, reg, offset);
+
+	return ret;
+}
+
+static int abx500_gpio_set_bits(struct gpio_chip *chip, u8 reg,
+					unsigned offset, int val)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	u8 pos = offset % 8;
+	int ret;
+
+	reg += offset / 8;
+	ret = abx500_mask_and_set_register_interruptible(pct->dev,
+				AB8500_MISC, reg, 1 << pos, val << pos);
+	if (ret < 0)
+		dev_err(pct->dev, "%s write failed\n", __func__);
+	return ret;
+}
+/**
+ * abx500_gpio_get() - Get the particular GPIO value
+ * @chip: Gpio device
+ * @offset: GPIO number to read
+ */
+static int abx500_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	bool bit;
+	int ret;
+
+	ret = abx500_gpio_get_bit(chip, AB8500_GPIO_IN1_REG,
+				  offset, &bit);
+	if (ret < 0) {
+		dev_err(pct->dev, "%s failed\n", __func__);
+		return ret;
+	}
+	return bit;
+}
+
+static void abx500_gpio_set(struct gpio_chip *chip, unsigned offset, int val)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	int ret;
+
+	ret = abx500_gpio_set_bits(chip, AB8500_GPIO_OUT1_REG, offset, val);
+	if (ret < 0)
+		dev_err(pct->dev, "%s write failed\n", __func__);
+}
+
+static int abx500_config_pull_updown(struct abx500_pinctrl *pct,
+				int offset, enum abx500_gpio_pull_updown val)
+{
+	u8 pos;
+	int ret;
+	struct pullud *pullud;
+
+	if (!pct->soc->pullud) {
+		dev_err(pct->dev, "%s AB chip doesn't support pull up/down feature",
+				__func__);
+		ret = -EPERM;
+		goto out;
+	}
+
+	pullud = pct->soc->pullud;
+
+	if ((offset < pullud->first_pin)
+		|| (offset > pullud->last_pin)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pos = offset << 1;
+
+	ret = abx500_mask_and_set_register_interruptible(pct->dev,
+			AB8500_MISC, AB8540_GPIO_PULL_UPDOWN_REG,
+			AB8540_GPIO_PULL_UPDOWN_MASK << pos, val << pos);
+
+out:
+	if (ret < 0)
+		dev_err(pct->dev, "%s failed (%d)\n", __func__, ret);
+	return ret;
+}
+
+static int abx500_gpio_direction_output(struct gpio_chip *chip,
+					unsigned offset,
+					int val)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	struct pullud *pullud = pct->soc->pullud;
+	unsigned gpio;
+	int ret;
+	/* set direction as output */
+	ret = abx500_gpio_set_bits(chip, AB8500_GPIO_DIR1_REG, offset, 1);
+	if (ret < 0)
+		return ret;
+
+	/* disable pull down */
+	ret = abx500_gpio_set_bits(chip, AB8500_GPIO_PUD1_REG, offset, 1);
+	if (ret < 0)
+		return ret;
+
+	/* if supported, disable both pull down and pull up */
+	gpio = offset + 1;
+	if (pullud && gpio >= pullud->first_pin && gpio <= pullud->last_pin) {
+		ret = abx500_config_pull_updown(pct,
+				gpio,
+				ABX500_GPIO_PULL_NONE);
+		if (ret < 0)
+			return ret;
+	}
+	/* set the output as 1 or 0 */
+	return abx500_gpio_set_bits(chip, AB8500_GPIO_OUT1_REG, offset, val);
+}
+
+static int abx500_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+{
+	/* set the register as input */
+	return abx500_gpio_set_bits(chip, AB8500_GPIO_DIR1_REG, offset, 0);
+}
+
+static int abx500_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	int base = pct->irq_base;
+	int i;
+
+	for (i = 0; i < pct->irq_cluster_size; i++) {
+		struct abx500_gpio_irq_cluster *cluster =
+			&pct->irq_cluster[i];
+
+		if (offset >= cluster->start && offset <= cluster->end)
+			return base + offset - cluster->start;
+
+		/* Advance by the number of gpios in this cluster */
+		base += cluster->end + cluster->offset - cluster->start + 1;
+	}
+
+	return -EINVAL;
+}
+
+static int abx500_set_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip,
+		unsigned gpio, int alt_setting)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct alternate_functions af = pct->soc->alternate_functions[gpio];
+	int ret;
+	int val;
+	unsigned offset;
+	const char *modes[] = {
+		[ABX500_DEFAULT]	= "default",
+		[ABX500_ALT_A]		= "altA",
+		[ABX500_ALT_B]		= "altB",
+		[ABX500_ALT_C]		= "altC",
+	};
+
+	/* sanity check */
+	if (((alt_setting == ABX500_ALT_A) && (af.gpiosel_bit == UNUSED)) ||
+	    ((alt_setting == ABX500_ALT_B) && (af.alt_bit1 == UNUSED)) ||
+	    ((alt_setting == ABX500_ALT_C) && (af.alt_bit2 == UNUSED))) {
+		dev_dbg(pct->dev, "pin %d doesn't support %s mode\n", gpio,
+				modes[alt_setting]);
+		return -EINVAL;
+	}
+
+	/* on ABx5xx, there is no GPIO0, so adjust the offset */
+	offset = gpio - 1;
+	switch (alt_setting) {
+	case ABX500_DEFAULT:
+		/*
+		 * for ABx5xx family, default mode is always selected by
+		 * writing 0 to GPIOSELx register, except for pins which
+		 * support at least ALT_B mode, default mode is selected
+		 * by writing 1 to GPIOSELx register
+		 */
+		val = 0;
+		if (af.alt_bit1 != UNUSED)
+			val++;
+
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+					   offset, val);
+		break;
+	case ABX500_ALT_A:
+		/*
+		 * for ABx5xx family, alt_a mode is always selected by
+		 * writing 1 to GPIOSELx register, except for pins which
+		 * support at least ALT_B mode, alt_a mode is selected
+		 * by writing 0 to GPIOSELx register and 0 in ALTFUNC
+		 * register
+		 */
+		if (af.alt_bit1 != UNUSED) {
+			ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+					offset, 0);
+			ret = abx500_gpio_set_bits(chip,
+					AB8500_GPIO_ALTFUN_REG,
+					af.alt_bit1,
+					!!(af.alta_val && BIT(0)));
+			if (af.alt_bit2 != UNUSED)
+				ret = abx500_gpio_set_bits(chip,
+					AB8500_GPIO_ALTFUN_REG,
+					af.alt_bit2,
+					!!(af.alta_val && BIT(1)));
+		} else
+			ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+					offset, 1);
+		break;
+	case ABX500_ALT_B:
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+				offset, 0);
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_ALTFUN_REG,
+				af.alt_bit1, !!(af.altb_val && BIT(0)));
+		if (af.alt_bit2 != UNUSED)
+			ret = abx500_gpio_set_bits(chip,
+					AB8500_GPIO_ALTFUN_REG,
+					af.alt_bit2,
+					!!(af.altb_val && BIT(1)));
+		break;
+	case ABX500_ALT_C:
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_SEL1_REG,
+				offset, 0);
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_ALTFUN_REG,
+				af.alt_bit2, !!(af.altc_val && BIT(0)));
+		ret = abx500_gpio_set_bits(chip, AB8500_GPIO_ALTFUN_REG,
+				af.alt_bit2, !!(af.altc_val && BIT(1)));
+		break;
+
+	default:
+		dev_dbg(pct->dev, "unknow alt_setting %d\n", alt_setting);
+		return -EINVAL;
+	}
+	return ret;
+}
+
+static u8 abx500_get_mode(struct pinctrl_dev *pctldev, struct gpio_chip *chip,
+		unsigned gpio)
+{
+	u8 mode;
+	bool bit_mode;
+	bool alt_bit1;
+	bool alt_bit2;
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct alternate_functions af = pct->soc->alternate_functions[gpio];
+
+	/*
+	 * if gpiosel_bit is set to unused,
+	 * it means no GPIO or special case
+	 */
+	if (af.gpiosel_bit == UNUSED)
+		return ABX500_DEFAULT;
+
+	/* read GpioSelx register */
+	abx500_gpio_get_bit(chip, AB8500_GPIO_SEL1_REG + (gpio / 8),
+			af.gpiosel_bit, &bit_mode);
+	mode = bit_mode;
+
+	/* sanity check */
+	if ((af.alt_bit1 < UNUSED) || (af.alt_bit1 > 7) ||
+	    (af.alt_bit2 < UNUSED) || (af.alt_bit2 > 7)) {
+		dev_err(pct->dev,
+			"alt_bitX value not in correct range (-1 to 7)\n");
+		return -EINVAL;
+	}
+	/* if alt_bit2 is used, alt_bit1 must be used too */
+	if ((af.alt_bit2 != UNUSED) && (af.alt_bit1 == UNUSED)) {
+		dev_err(pct->dev,
+			"if alt_bit2 is used, alt_bit1 can't be unused\n");
+		return -EINVAL;
+	}
+
+	/* check if pin use AlternateFunction register */
+	if ((af.alt_bit1 == UNUSED) && (af.alt_bit1 == UNUSED))
+		return mode;
+	/*
+	 * if pin GPIOSEL bit is set and pin supports alternate function,
+	 * it means DEFAULT mode
+	 */
+	if (mode)
+		return ABX500_DEFAULT;
+	/*
+	 * pin use the AlternatFunction register
+	 * read alt_bit1 value
+	 */
+	abx500_gpio_get_bit(chip, AB8500_GPIO_ALTFUN_REG,
+			    af.alt_bit1, &alt_bit1);
+
+	if (af.alt_bit2 != UNUSED)
+		/* read alt_bit2 value */
+		abx500_gpio_get_bit(chip, AB8500_GPIO_ALTFUN_REG, af.alt_bit2,
+				&alt_bit2);
+	else
+		alt_bit2 = 0;
+
+	mode = (alt_bit2 << 1) + alt_bit1;
+	if (mode == af.alta_val)
+		return ABX500_ALT_A;
+	else if (mode == af.altb_val)
+		return ABX500_ALT_B;
+	else
+		return ABX500_ALT_C;
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/seq_file.h>
+
+static void abx500_gpio_dbg_show_one(struct seq_file *s,
+	struct pinctrl_dev *pctldev, struct gpio_chip *chip,
+	unsigned offset, unsigned gpio)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	const char *label = gpiochip_is_requested(chip, offset - 1);
+	u8 gpio_offset = offset - 1;
+	int mode = -1;
+	bool is_out;
+	bool pull;
+	const char *modes[] = {
+		[ABX500_DEFAULT]	= "default",
+		[ABX500_ALT_A]		= "altA",
+		[ABX500_ALT_B]		= "altB",
+		[ABX500_ALT_C]		= "altC",
+	};
+
+	abx500_gpio_get_bit(chip, AB8500_GPIO_DIR1_REG, gpio_offset, &is_out);
+	abx500_gpio_get_bit(chip, AB8500_GPIO_PUD1_REG, gpio_offset, &pull);
+
+	if (pctldev)
+		mode = abx500_get_mode(pctldev, chip, offset);
+
+	seq_printf(s, " gpio-%-3d (%-20.20s) %-3s %-9s %s",
+		   gpio, label ?: "(none)",
+		   is_out ? "out" : "in ",
+		   is_out ?
+		   (chip->get
+		   ? (chip->get(chip, offset) ? "hi" : "lo")
+		   : "?  ")
+		   : (pull ? "pull up" : "pull down"),
+		   (mode < 0) ? "unknown" : modes[mode]);
+
+	if (label && !is_out) {
+		int irq = gpio_to_irq(gpio);
+		struct irq_desc	*desc = irq_to_desc(irq);
+
+		if (irq >= 0 && desc->action) {
+			char *trigger;
+			int irq_offset = irq - pct->irq_base;
+
+			if (pct->rising & BIT(irq_offset))
+				trigger = "edge-rising";
+			else if (pct->falling & BIT(irq_offset))
+				trigger = "edge-falling";
+			else
+				trigger = "edge-undefined";
+
+			seq_printf(s, " irq-%d %s", irq, trigger);
+		}
+	}
+}
+
+static void abx500_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
+{
+	unsigned i;
+	unsigned gpio = chip->base;
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	struct pinctrl_dev *pctldev = pct->pctldev;
+
+	for (i = 0; i < chip->ngpio; i++, gpio++) {
+		/* On AB8500, there is no GPIO0, the first is the GPIO 1 */
+		abx500_gpio_dbg_show_one(s, pctldev, chip, i + 1, gpio);
+		seq_printf(s, "\n");
+	}
+}
+
+#else
+static inline void abx500_gpio_dbg_show_one(struct seq_file *s,
+					 struct pinctrl_dev *pctldev,
+					 struct gpio_chip *chip,
+					 unsigned offset, unsigned gpio)
+{
+}
+#define abx500_gpio_dbg_show	NULL
+#endif
+
+int abx500_gpio_request(struct gpio_chip *chip, unsigned offset)
+{
+	int gpio = chip->base + offset;
+
+	return pinctrl_request_gpio(gpio);
+}
+
+void abx500_gpio_free(struct gpio_chip *chip, unsigned offset)
+{
+	int gpio = chip->base + offset;
+
+	pinctrl_free_gpio(gpio);
+}
+
+static struct gpio_chip abx500gpio_chip = {
+	.label			= "abx500-gpio",
+	.owner			= THIS_MODULE,
+	.request		= abx500_gpio_request,
+	.free			= abx500_gpio_free,
+	.direction_input	= abx500_gpio_direction_input,
+	.get			= abx500_gpio_get,
+	.direction_output	= abx500_gpio_direction_output,
+	.set			= abx500_gpio_set,
+	.to_irq			= abx500_gpio_to_irq,
+	.dbg_show		= abx500_gpio_dbg_show,
+};
+
+static unsigned int irq_to_rising(unsigned int irq)
+{
+	struct abx500_pinctrl *pct = irq_get_chip_data(irq);
+	int offset = irq - pct->irq_base;
+	int new_irq;
+
+	new_irq = offset * pct->irq_gpio_factor
+		+ pct->irq_gpio_rising_offset
+		+ pct->parent->irq_base;
+
+	return new_irq;
+}
+
+static unsigned int irq_to_falling(unsigned int irq)
+{
+	struct abx500_pinctrl *pct = irq_get_chip_data(irq);
+	int offset = irq - pct->irq_base;
+	int new_irq;
+
+	new_irq = offset * pct->irq_gpio_factor
+		+ pct->irq_gpio_falling_offset
+		+ pct->parent->irq_base;
+	return new_irq;
+
+}
+
+static unsigned int rising_to_irq(unsigned int irq, void *dev)
+{
+	struct abx500_pinctrl *pct = dev;
+	int offset, new_irq;
+
+	offset = irq - pct->irq_gpio_rising_offset
+		- pct->parent->irq_base;
+	new_irq = (offset / pct->irq_gpio_factor)
+		+ pct->irq_base;
+
+	return new_irq;
+}
+
+static unsigned int falling_to_irq(unsigned int irq, void *dev)
+{
+	struct abx500_pinctrl *pct = dev;
+	int offset, new_irq;
+
+	offset = irq - pct->irq_gpio_falling_offset
+		- pct->parent->irq_base;
+	new_irq = (offset / pct->irq_gpio_factor)
+		+ pct->irq_base;
+
+	return new_irq;
+}
+
+/*
+ * IRQ handler
+ */
+
+static irqreturn_t handle_rising(int irq, void *dev)
+{
+
+	handle_nested_irq(rising_to_irq(irq , dev));
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t handle_falling(int irq, void *dev)
+{
+
+	handle_nested_irq(falling_to_irq(irq, dev));
+	return IRQ_HANDLED;
+}
+
+static void abx500_gpio_irq_lock(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	mutex_lock(&pct->lock);
+}
+
+static void abx500_gpio_irq_sync_unlock(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	unsigned int irq = data->irq;
+	int offset = irq - pct->irq_base;
+	bool rising = pct->rising & BIT(offset);
+	bool falling = pct->falling & BIT(offset);
+	int ret;
+
+	switch (pct->irq_action)	{
+	case STARTUP:
+		if (rising)
+			ret = request_threaded_irq(irq_to_rising(irq),
+					NULL, handle_rising,
+					IRQF_TRIGGER_RISING | IRQF_NO_SUSPEND,
+					"abx500-gpio-r", pct);
+		if (falling)
+			ret = request_threaded_irq(irq_to_falling(irq),
+				       NULL, handle_falling,
+				       IRQF_TRIGGER_FALLING | IRQF_NO_SUSPEND,
+				       "abx500-gpio-f", pct);
+		break;
+	case SHUTDOWN:
+		if (rising)
+			free_irq(irq_to_rising(irq), pct);
+		if (falling)
+			free_irq(irq_to_falling(irq), pct);
+		break;
+	case MASK:
+		if (rising)
+			disable_irq(irq_to_rising(irq));
+		if (falling)
+			disable_irq(irq_to_falling(irq));
+		break;
+	case UNMASK:
+		if (rising)
+			enable_irq(irq_to_rising(irq));
+		if (falling)
+			enable_irq(irq_to_falling(irq));
+		break;
+	case NONE:
+		break;
+	}
+	pct->irq_action = NONE;
+	pct->rising &= ~(BIT(offset));
+	pct->falling &= ~(BIT(offset));
+	mutex_unlock(&pct->lock);
+}
+
+
+static void abx500_gpio_irq_mask(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	pct->irq_action = MASK;
+}
+
+static void abx500_gpio_irq_unmask(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct =  irq_data_get_irq_chip_data(data);
+	pct->irq_action = UNMASK;
+}
+
+static int abx500_gpio_irq_set_type(struct irq_data *data, unsigned int type)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	unsigned int irq = data->irq;
+	int offset = irq - pct->irq_base;
+
+	if (type == IRQ_TYPE_EDGE_BOTH) {
+		pct->rising =  BIT(offset);
+		pct->falling = BIT(offset);
+	} else if (type == IRQ_TYPE_EDGE_RISING) {
+		pct->rising =  BIT(offset);
+	} else  {
+		pct->falling = BIT(offset);
+	}
+	return 0;
+}
+
+static unsigned int abx500_gpio_irq_startup(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	pct->irq_action = STARTUP;
+	return 0;
+}
+
+static void abx500_gpio_irq_shutdown(struct irq_data *data)
+{
+	struct abx500_pinctrl *pct = irq_data_get_irq_chip_data(data);
+	pct->irq_action = SHUTDOWN;
+}
+
+static struct irq_chip abx500_gpio_irq_chip = {
+	.name			= "abx500-gpio",
+	.irq_startup		= abx500_gpio_irq_startup,
+	.irq_shutdown		= abx500_gpio_irq_shutdown,
+	.irq_bus_lock		= abx500_gpio_irq_lock,
+	.irq_bus_sync_unlock	= abx500_gpio_irq_sync_unlock,
+	.irq_mask		= abx500_gpio_irq_mask,
+	.irq_unmask		= abx500_gpio_irq_unmask,
+	.irq_set_type		= abx500_gpio_irq_set_type,
+};
+
+static int abx500_gpio_irq_init(struct abx500_pinctrl *pct)
+{
+	u32 base = pct->irq_base;
+	int irq;
+
+	for (irq = base; irq < base + AB8500_NUM_VIR_GPIO_IRQ ; irq++) {
+		irq_set_chip_data(irq, pct);
+		irq_set_chip_and_handler(irq, &abx500_gpio_irq_chip,
+				handle_simple_irq);
+		irq_set_nested_thread(irq, 1);
+#ifdef CONFIG_ARM
+		set_irq_flags(irq, IRQF_VALID);
+#else
+		irq_set_noprobe(irq);
+#endif
+	}
+
+	return 0;
+}
+
+static void abx500_gpio_irq_remove(struct abx500_pinctrl *pct)
+{
+	int base = pct->irq_base;
+	int irq;
+
+	for (irq = base; irq < base + AB8500_NUM_VIR_GPIO_IRQ; irq++) {
+#ifdef CONFIG_ARM
+		set_irq_flags(irq, 0);
+#endif
+		irq_set_chip_and_handler(irq, NULL, NULL);
+		irq_set_chip_data(irq, NULL);
+	}
+}
+
+static int abx500_pmx_get_funcs_cnt(struct pinctrl_dev *pctldev)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	return pct->soc->nfunctions;
+}
+
+static const char *abx500_pmx_get_func_name(struct pinctrl_dev *pctldev,
+					 unsigned function)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	return pct->soc->functions[function].name;
+}
+
+static int abx500_pmx_get_func_groups(struct pinctrl_dev *pctldev,
+				   unsigned function,
+				   const char * const **groups,
+				   unsigned * const num_groups)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	*groups = pct->soc->functions[function].groups;
+	*num_groups = pct->soc->functions[function].ngroups;
+
+	return 0;
+}
+
+static void abx500_disable_lazy_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct abx500_pinctrl *pct = to_abx500_pinctrl(chip);
+	int irq;
+	int offset;
+	bool rising;
+	bool falling;
+
+	/*
+	 * check if gpio has interrupt capability and convert
+	 * gpio number to irq
+	 * On ABx5xx, there is no GPIO0, GPIO1 is the
+	 * first one, so adjust gpio number
+	 */
+	gpio--;
+	irq =  gpio_to_irq(gpio + chip->base);
+	if (irq < 0)
+		return;
+
+	offset = irq - pct->irq_base;
+	rising = pct->rising & BIT(offset);
+	falling = pct->falling & BIT(offset);
+
+	/* nothing to do ?*/
+	if (!rising && !falling)
+		return;
+
+	if (rising) {
+		disable_irq(irq_to_rising(irq));
+		free_irq(irq_to_rising(irq), pct);
+	}
+	if (falling) {
+		disable_irq(irq_to_falling(irq));
+		free_irq(irq_to_falling(irq), pct);
+	}
+}
+
+static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function,
+			  unsigned group)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct gpio_chip *chip = &pct->chip;
+	const struct abx500_pingroup *g;
+	int i;
+	int ret = 0;
+
+	g = &pct->soc->groups[group];
+	if (g->altsetting < 0)
+		return -EINVAL;
+
+	dev_dbg(pct->dev, "enable group %s, %u pins\n", g->name, g->npins);
+
+	for (i = 0; i < g->npins; i++) {
+		dev_dbg(pct->dev, "setting pin %d to altsetting %d\n",
+			g->pins[i], g->altsetting);
+
+		abx500_disable_lazy_irq(chip, g->pins[i]);
+		ret = abx500_set_mode(pctldev, chip, g->pins[i], g->altsetting);
+	}
+	return ret;
+}
+
+static void abx500_pmx_disable(struct pinctrl_dev *pctldev,
+			    unsigned function, unsigned group)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	const struct abx500_pingroup *g;
+
+	g = &pct->soc->groups[group];
+	if (g->altsetting < 0)
+		return;
+
+	/* FIXME: poke out the mux, set the pin to some default state? */
+	dev_dbg(pct->dev, "disable group %s, %u pins\n", g->name, g->npins);
+}
+
+int abx500_gpio_request_enable(struct pinctrl_dev *pctldev,
+			    struct pinctrl_gpio_range *range,
+			    unsigned offset)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	const struct abx500_pinrange *p;
+	int ret;
+	int i;
+
+	/*
+	 * Different ranges have different ways to enable GPIO function on a
+	 * pin, so refer back to our local range type, where we handily define
+	 * what altfunc enables GPIO for a certain pin.
+	 */
+	for (i = 0; i < pct->soc->gpio_num_ranges; i++) {
+		p = &pct->soc->gpio_ranges[i];
+		if ((offset >= p->offset) &&
+		    (offset < (p->offset + p->npins)))
+		  break;
+	}
+
+	if (i == pct->soc->gpio_num_ranges) {
+		dev_err(pct->dev, "%s failed to locate range\n", __func__);
+		return -ENODEV;
+	}
+
+	dev_dbg(pct->dev, "enable GPIO by altfunc %d at gpio %d\n",
+		p->altfunc, offset);
+
+	ret = abx500_set_mode(pct->pctldev, &pct->chip,
+			      offset, p->altfunc);
+	if (ret < 0) {
+		dev_err(pct->dev, "%s setting altfunc failed\n", __func__);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void abx500_gpio_disable_free(struct pinctrl_dev *pctldev,
+			   struct pinctrl_gpio_range *range,
+			   unsigned offset)
+{
+}
+
+static struct pinmux_ops abx500_pinmux_ops = {
+	.get_functions_count = abx500_pmx_get_funcs_cnt,
+	.get_function_name = abx500_pmx_get_func_name,
+	.get_function_groups = abx500_pmx_get_func_groups,
+	.enable = abx500_pmx_enable,
+	.disable = abx500_pmx_disable,
+	.gpio_request_enable = abx500_gpio_request_enable,
+	.gpio_disable_free = abx500_gpio_disable_free,
+};
+
+static int abx500_get_groups_cnt(struct pinctrl_dev *pctldev)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	return pct->soc->ngroups;
+}
+
+static const char *abx500_get_group_name(struct pinctrl_dev *pctldev,
+				       unsigned selector)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	return pct->soc->groups[selector].name;
+}
+
+static int abx500_get_group_pins(struct pinctrl_dev *pctldev,
+		unsigned selector,
+		const unsigned **pins,
+		unsigned *num_pins)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+
+	*pins = pct->soc->groups[selector].pins;
+	*num_pins = pct->soc->groups[selector].npins;
+	return 0;
+}
+
+static void abx500_pin_dbg_show(struct pinctrl_dev *pctldev,
+		struct seq_file *s, unsigned offset)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct gpio_chip *chip = &pct->chip;
+
+	abx500_gpio_dbg_show_one(s, pctldev, chip, offset,
+				 chip->base + offset - 1);
+}
+
+static struct pinctrl_ops abx500_pinctrl_ops = {
+	.get_groups_count = abx500_get_groups_cnt,
+	.get_group_name = abx500_get_group_name,
+	.get_group_pins = abx500_get_group_pins,
+	.pin_dbg_show = abx500_pin_dbg_show,
+};
+
+int abx500_pin_config_get(struct pinctrl_dev *pctldev,
+		       unsigned pin,
+		       unsigned long *config)
+{
+	/* Not implemented */
+	return -EINVAL;
+}
+
+int abx500_pin_config_set(struct pinctrl_dev *pctldev,
+		       unsigned pin,
+		       unsigned long config)
+{
+	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
+	struct pullud *pullud = pct->soc->pullud;
+	struct gpio_chip *chip = &pct->chip;
+	unsigned offset;
+	int ret;
+	enum pin_config_param param = pinconf_to_config_param(config);
+	enum pin_config_param argument = pinconf_to_config_argument(config);
+
+	dev_dbg(chip->dev, "pin %d [%#lx]: %s %s\n",
+		pin, config, (param == PIN_CONFIG_OUTPUT) ? "output " : "input",
+		(param == PIN_CONFIG_OUTPUT) ? (argument ? "high" : "low") :
+		(argument ? "pull up" : "pull down"));
+	/* on ABx500, there is no GPIO0, so adjust the offset */
+	offset = pin - 1;
+
+	switch (param) {
+	case PIN_CONFIG_BIAS_PULL_DOWN:
+		/*
+		 * if argument = 1 set the pull down
+		 * else clear the pull down
+		 */
+		ret = abx500_gpio_direction_input(chip, offset);
+		/*
+		 * Some chips only support pull down, while some actually
+		 * support both pull up and pull down. Such chips have
+		 * a "pullud" range specified for the pins that support
+		 * both features. If the pin is not within that range, we
+		 * fall back to the old bit set that only support pull down.
+		 */
+		if (pullud &&
+		    pin >= pullud->first_pin &&
+		    pin <= pullud->last_pin)
+			ret = abx500_config_pull_updown(pct,
+				pin,
+				argument ? ABX500_GPIO_PULL_DOWN : ABX500_GPIO_PULL_NONE);
+		else
+			/* Chip only supports pull down */
+			ret = abx500_gpio_set_bits(chip, AB8500_GPIO_PUD1_REG,
+				offset, argument ? 0 : 1);
+		break;
+	case PIN_CONFIG_OUTPUT:
+		ret = abx500_gpio_direction_output(chip, offset, argument);
+		break;
+	default:
+		dev_err(chip->dev, "illegal configuration requested\n");
+		return -EINVAL;
+	}
+	return ret;
+}
+
+static struct pinconf_ops abx500_pinconf_ops = {
+	.pin_config_get = abx500_pin_config_get,
+	.pin_config_set = abx500_pin_config_set,
+};
+
+static struct pinctrl_desc abx500_pinctrl_desc = {
+	.name = "pinctrl-abx500",
+	.pctlops = &abx500_pinctrl_ops,
+	.pmxops = &abx500_pinmux_ops,
+	.confops = &abx500_pinconf_ops,
+	.owner = THIS_MODULE,
+};
+
+static int abx500_get_gpio_num(struct abx500_pinctrl_soc_data *soc)
+{
+	unsigned int lowest = 0;
+	unsigned int highest = 0;
+	unsigned int npins = 0;
+	int i;
+
+	/*
+	 * Compute number of GPIOs from the last SoC gpio range descriptors
+	 * These ranges may include "holes" but the GPIO number space shall
+	 * still be homogeneous, so we need to detect and account for any
+	 * such holes so that these are included in the number of GPIO pins.
+	 */
+	for (i = 0; i < soc->gpio_num_ranges; i++) {
+		unsigned gstart;
+		unsigned gend;
+		const struct abx500_pinrange *p;
+
+		p = &soc->gpio_ranges[i];
+		gstart = p->offset;
+		gend = p->offset + p->npins - 1;
+
+		if (i == 0) {
+			/* First iteration, set start values */
+			lowest = gstart;
+			highest = gend;
+		} else {
+			if (gstart < lowest)
+				lowest = gstart;
+			if (gend > highest)
+				highest = gend;
+		}
+	}
+	/* this gives the absolute number of pins */
+	npins = highest - lowest + 1;
+	return npins;
+}
+
+static int abx500_gpio_probe(struct platform_device *pdev)
+{
+	struct ab8500_platform_data *abx500_pdata =
+				dev_get_platdata(pdev->dev.parent);
+	struct abx500_gpio_platform_data *pdata;
+	struct abx500_pinctrl *pct;
+	const struct platform_device_id *platid = platform_get_device_id(pdev);
+	int ret;
+	int i;
+
+	pdata = abx500_pdata->gpio;
+	if (!pdata)	{
+		dev_err(&pdev->dev, "gpio platform data missing\n");
+		return -ENODEV;
+	}
+
+	pct = devm_kzalloc(&pdev->dev, sizeof(struct abx500_pinctrl),
+				   GFP_KERNEL);
+	if (pct == NULL) {
+		dev_err(&pdev->dev,
+			"failed to allocate memory for pct\n");
+		return -ENOMEM;
+	}
+
+	pct->dev = &pdev->dev;
+	pct->parent = dev_get_drvdata(pdev->dev.parent);
+	pct->chip = abx500gpio_chip;
+	pct->chip.dev = &pdev->dev;
+	pct->chip.base = pdata->gpio_base;
+	pct->irq_base = pdata->irq_base;
+
+	/* initialize the lock */
+	mutex_init(&pct->lock);
+
+	/* Poke in other ASIC variants here */
+	switch (platid->driver_data) {
+	default:
+		dev_err(&pdev->dev, "Unsupported pinctrl sub driver (%d)\n",
+				(int) platid->driver_data);
+		return -EINVAL;
+	}
+
+	if (!pct->soc) {
+		dev_err(&pdev->dev, "Invalid SOC data\n");
+		return -EINVAL;
+	}
+
+	pct->chip.ngpio = abx500_get_gpio_num(pct->soc);
+	pct->irq_cluster = pct->soc->gpio_irq_cluster;
+	pct->irq_cluster_size = pct->soc->ngpio_irq_cluster;
+	pct->irq_gpio_rising_offset = pct->soc->irq_gpio_rising_offset;
+	pct->irq_gpio_falling_offset = pct->soc->irq_gpio_falling_offset;
+	pct->irq_gpio_factor = pct->soc->irq_gpio_factor;
+
+	ret = abx500_gpio_irq_init(pct);
+	if (ret)
+		goto out_free;
+	ret = gpiochip_add(&pct->chip);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to add gpiochip: %d\n",
+				ret);
+		goto out_rem_irq;
+	}
+	dev_info(&pdev->dev, "added gpiochip\n");
+
+	abx500_pinctrl_desc.pins = pct->soc->pins;
+	abx500_pinctrl_desc.npins = pct->soc->npins;
+	pct->pctldev = pinctrl_register(&abx500_pinctrl_desc, &pdev->dev, pct);
+	if (!pct->pctldev) {
+		dev_err(&pdev->dev,
+			"could not register abx500 pinctrl driver\n");
+		goto out_rem_chip;
+	}
+	dev_info(&pdev->dev, "registered pin controller\n");
+
+	/* We will handle a range of GPIO pins */
+	for (i = 0; i < pct->soc->gpio_num_ranges; i++) {
+		const struct abx500_pinrange *p = &pct->soc->gpio_ranges[i];
+
+		ret = gpiochip_add_pin_range(&pct->chip,
+					dev_name(&pdev->dev),
+					p->offset - 1, p->offset, p->npins);
+		if (ret < 0)
+			return ret;
+	}
+
+	platform_set_drvdata(pdev, pct);
+	dev_info(&pdev->dev, "initialized abx500 pinctrl driver\n");
+
+	return 0;
+
+out_rem_chip:
+	ret = gpiochip_remove(&pct->chip);
+	if (ret)
+		dev_info(&pdev->dev, "failed to remove gpiochip\n");
+out_rem_irq:
+	abx500_gpio_irq_remove(pct);
+out_free:
+	mutex_destroy(&pct->lock);
+	return ret;
+}
+
+/*
+ * abx500_gpio_remove() - remove Ab8500-gpio driver
+ * @pdev :	Platform device registered
+ */
+static int abx500_gpio_remove(struct platform_device *pdev)
+{
+	struct abx500_pinctrl *pct = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&pct->chip);
+	if (ret < 0) {
+		dev_err(pct->dev, "unable to remove gpiochip: %d\n",
+			ret);
+		return ret;
+	}
+
+	mutex_destroy(&pct->lock);
+
+	return 0;
+}
+
+static const struct platform_device_id abx500_pinctrl_id[] = {
+	{ "pinctrl-ab8500", PINCTRL_AB8500 },
+	{ "pinctrl-ab8540", PINCTRL_AB8540 },
+	{ "pinctrl-ab9540", PINCTRL_AB9540 },
+	{ "pinctrl-ab8505", PINCTRL_AB8505 },
+	{ },
+};
+
+static struct platform_driver abx500_gpio_driver = {
+	.driver = {
+		.name = "abx500-gpio",
+		.owner = THIS_MODULE,
+	},
+	.probe = abx500_gpio_probe,
+	.remove = abx500_gpio_remove,
+	.id_table = abx500_pinctrl_id,
+};
+
+static int __init abx500_gpio_init(void)
+{
+	return platform_driver_register(&abx500_gpio_driver);
+}
+core_initcall(abx500_gpio_init);
+
+MODULE_AUTHOR("Patrice Chotard <patrice.chotard@st.com>");
+MODULE_DESCRIPTION("Driver allows to use AxB5xx unused pins to be used as GPIO");
+MODULE_ALIAS("platform:abx500-gpio");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/pinctrl-abx500.h b/drivers/pinctrl/pinctrl-abx500.h
new file mode 100644
index 000000000000..436ace356175
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-abx500.h
@@ -0,0 +1,180 @@
+#ifndef PINCTRL_PINCTRL_ABx5O0_H
+#define PINCTRL_PINCTRL_ABx500_H
+
+/* Package definitions */
+#define PINCTRL_AB8500	0
+#define PINCTRL_AB8540	1
+#define PINCTRL_AB9540	2
+#define PINCTRL_AB8505	3
+
+/* pins alternate function */
+enum abx500_pin_func {
+	ABX500_DEFAULT,
+	ABX500_ALT_A,
+	ABX500_ALT_B,
+	ABX500_ALT_C,
+};
+
+/**
+ * struct abx500_function - ABx500 pinctrl mux function
+ * @name: The name of the function, exported to pinctrl core.
+ * @groups: An array of pin groups that may select this function.
+ * @ngroups: The number of entries in @groups.
+ */
+struct abx500_function {
+	const char *name;
+	const char * const *groups;
+	unsigned ngroups;
+};
+
+/**
+ * struct abx500_pingroup - describes a ABx500 pin group
+ * @name: the name of this specific pin group
+ * @pins: an array of discrete physical pins used in this group, taken
+ *	from the driver-local pin enumeration space
+ * @num_pins: the number of pins in this group array, i.e. the number of
+ *	elements in .pins so we can iterate over that array
+ * @altsetting: the altsetting to apply to all pins in this group to
+ *	configure them to be used by a function
+ */
+struct abx500_pingroup {
+	const char *name;
+	const unsigned int *pins;
+	const unsigned npins;
+	int altsetting;
+};
+
+#define ALTERNATE_FUNCTIONS(pin, sel_bit, alt1, alt2, alta, altb, altc)	\
+{									\
+	.pin_number = pin,						\
+	.gpiosel_bit = sel_bit,						\
+	.alt_bit1 = alt1,						\
+	.alt_bit2 = alt2,						\
+	.alta_val = alta,						\
+	.altb_val = altb,						\
+	.altc_val = altc,						\
+}
+
+#define UNUSED -1
+/**
+ * struct alternate_functions
+ * @pin_number:		The pin number
+ * @gpiosel_bit:	Control bit in GPIOSEL register,
+ * @alt_bit1:		First AlternateFunction bit used to select the
+ *			alternate function
+ * @alt_bit2:		Second AlternateFunction bit used to select the
+ *			alternate function
+ *
+ *			these 3 following fields are necessary due to none
+ *			coherency on how to select the altA, altB and altC
+ *			function between the ABx500 SOC family when using
+ *			alternatfunc register.
+ * @alta_val:		value to write in alternatfunc to select altA function
+ * @altb_val:		value to write in alternatfunc to select altB function
+ * @altc_val:		value to write in alternatfunc to select altC function
+ */
+struct alternate_functions {
+	unsigned pin_number;
+	s8 gpiosel_bit;
+	s8 alt_bit1;
+	s8 alt_bit2;
+	u8 alta_val;
+	u8 altb_val;
+	u8 altc_val;
+};
+
+/**
+ * struct pullud - specific pull up/down feature
+ * @first_pin:		The pin number of the first pins which support
+ *			specific pull up/down
+ * @last_pin:		The pin number of the last pins
+ */
+struct pullud {
+	unsigned first_pin;
+	unsigned last_pin;
+};
+
+#define GPIO_IRQ_CLUSTER(a, b, c)	\
+{					\
+	.start = a,			\
+	.end = b,			\
+	.offset = c,			\
+}
+
+/**
+ * struct abx500_gpio_irq_cluster - indicates GPIOs which are interrupt
+ *			capable
+ * @start:		The pin number of the first pin interrupt capable
+ * @end:		The pin number of the last pin interrupt capable
+ * @offset:		offset used to compute specific setting strategy of
+ *			the interrupt line
+ */
+
+struct abx500_gpio_irq_cluster {
+	int start;
+	int end;
+	int offset;
+};
+
+/**
+ * struct abx500_pinrange - map pin numbers to GPIO offsets
+ * @offset:		offset into the GPIO local numberspace, incidentally
+ *			identical to the offset into the local pin numberspace
+ * @npins:		number of pins to map from both offsets
+ * @altfunc:		altfunc setting to be used to enable GPIO on a pin in
+ *			this range (may vary)
+ */
+struct abx500_pinrange {
+	unsigned int offset;
+	unsigned int npins;
+	int altfunc;
+};
+
+#define ABX500_PINRANGE(a, b, c) { .offset = a, .npins = b, .altfunc = c }
+
+/**
+ * struct abx500_pinctrl_soc_data - ABx500 pin controller per-SoC configuration
+ * @gpio_ranges:	An array of GPIO ranges for this SoC
+ * @gpio_num_ranges:	The number of GPIO ranges for this SoC
+ * @pins:		An array describing all pins the pin controller affects.
+ *			All pins which are also GPIOs must be listed first within the
+ *			array, and be numbered identically to the GPIO controller's
+ *			numbering.
+ * @npins:		The number of entries in @pins.
+ * @functions:		The functions supported on this SoC.
+ * @nfunction:		The number of entries in @functions.
+ * @groups:		An array describing all pin groups the pin SoC supports.
+ * @ngroups:		The number of entries in @groups.
+ * @alternate_functions: array describing pins which supports alternate and
+ *			how to set it.
+ * @pullud:		array describing pins which supports pull up/down
+ *			specific registers.
+ * @gpio_irq_cluster:	An array of GPIO interrupt capable for this SoC
+ * @ngpio_irq_cluster:	The number of GPIO inetrrupt capable for this SoC
+ * @irq_gpio_rising_offset: Interrupt offset used as base to compute specific
+ *			setting strategy of the rising interrupt line
+ * @irq_gpio_falling_offset: Interrupt offset used as base to compute specific
+ *			setting strategy of the falling interrupt line
+ * @irq_gpio_factor:	Factor used to compute specific setting strategy of
+ *			the interrupt line
+ */
+
+struct abx500_pinctrl_soc_data {
+	const struct abx500_pinrange *gpio_ranges;
+	unsigned gpio_num_ranges;
+	const struct pinctrl_pin_desc *pins;
+	unsigned npins;
+	const struct abx500_function *functions;
+	unsigned nfunctions;
+	const struct abx500_pingroup *groups;
+	unsigned ngroups;
+	struct alternate_functions *alternate_functions;
+	struct pullud *pullud;
+	struct abx500_gpio_irq_cluster *gpio_irq_cluster;
+	unsigned ngpio_irq_cluster;
+	int irq_gpio_rising_offset;
+	int irq_gpio_falling_offset;
+	int irq_gpio_factor;
+};
+
+#endif /* PINCTRL_PINCTRL_ABx500_H */
diff --git a/include/linux/mfd/abx500/ab8500-gpio.h b/include/linux/mfd/abx500/ab8500-gpio.h
index 2387c207ea86..e8c8281e194a 100644
--- a/include/linux/mfd/abx500/ab8500-gpio.h
+++ b/include/linux/mfd/abx500/ab8500-gpio.h
@@ -14,10 +14,21 @@
  * registers.
  */
 
-struct ab8500_gpio_platform_data {
+struct abx500_gpio_platform_data {
 	int gpio_base;
 	u32 irq_base;
-	u8  config_reg[8];
+};
+
+enum abx500_gpio_pull_updown {
+	ABX500_GPIO_PULL_DOWN = 0x0,
+	ABX500_GPIO_PULL_NONE = 0x1,
+	ABX500_GPIO_PULL_UP = 0x3,
+};
+
+enum abx500_gpio_vinsel {
+	ABX500_GPIO_VINSEL_VBAT = 0x0,
+	ABX500_GPIO_VINSEL_VIN_1V8 = 0x1,
+	ABX500_GPIO_VINSEL_VDD_BIF = 0x2,
 };
 
 #endif /* _AB8500_GPIO_H */
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index e640ea059be2..fc0534483c72 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -385,7 +385,7 @@ struct ab8500_platform_data {
 	struct ab8500_regulator_reg_init *regulator_reg_init;
 	int num_regulator;
 	struct regulator_init_data *regulator;
-	struct ab8500_gpio_platform_data *gpio;
+	struct abx500_gpio_platform_data *gpio;
 	struct ab8500_codec_platform_data *codec;
 };
 
-- 
cgit v1.2.3


From 83e68189745ad931c2afd45d8ee3303929233e7f Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Wed, 14 Nov 2012 09:42:35 +0000
Subject: efi: Make 'efi_enabled' a function to query EFI facilities

Originally 'efi_enabled' indicated whether a kernel was booted from
EFI firmware. Over time its semantics have changed, and it now
indicates whether or not we are booted on an EFI machine with
bit-native firmware, e.g. 64-bit kernel with 64-bit firmware.

The immediate motivation for this patch is the bug report at,

    https://bugs.launchpad.net/ubuntu-cdimage/+bug/1040557

which details how running a platform driver on an EFI machine that is
designed to run under BIOS can cause the machine to become
bricked. Also, the following report,

    https://bugzilla.kernel.org/show_bug.cgi?id=47121

details how running said driver can also cause Machine Check
Exceptions. Drivers need a new means of detecting whether they're
running on an EFI machine, as sadly the expression,

    if (!efi_enabled)

hasn't been a sufficient condition for quite some time.

Users actually want to query 'efi_enabled' for different reasons -
what they really want access to is the list of available EFI
facilities.

For instance, the x86 reboot code needs to know whether it can invoke
the ResetSystem() function provided by the EFI runtime services, while
the ACPI OSL code wants to know whether the EFI config tables were
mapped successfully. There are also checks in some of the platform
driver code to simply see if they're running on an EFI machine (which
would make it a bad idea to do BIOS-y things).

This patch is a prereq for the samsung-laptop fix patch.

Cc: David Airlie <airlied@linux.ie>
Cc: Corentin Chary <corentincj@iksaif.net>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Peter Jones <pjones@redhat.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Steve Langasek <steve.langasek@canonical.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Konrad Rzeszutek Wilk <konrad@kernel.org>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/efi.h             |  1 +
 arch/x86/kernel/reboot.c               |  2 +-
 arch/x86/kernel/setup.c                | 28 ++++++++---------
 arch/x86/platform/efi/efi.c            | 57 ++++++++++++++++++++--------------
 drivers/acpi/osl.c                     |  2 +-
 drivers/firmware/dmi_scan.c            |  2 +-
 drivers/firmware/efivars.c             |  4 +--
 drivers/firmware/iscsi_ibft_find.c     |  2 +-
 drivers/gpu/drm/radeon/radeon_device.c |  3 +-
 drivers/platform/x86/ibm_rtl.c         |  2 +-
 drivers/scsi/isci/init.c               |  2 +-
 include/linux/efi.h                    | 24 ++++++++++----
 init/main.c                            |  4 +--
 13 files changed, 79 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 6e8fdf5ad113..28677c55113f 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -94,6 +94,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
 #endif /* CONFIG_X86_32 */
 
 extern int add_efi_memmap;
+extern unsigned long x86_efi_facility;
 extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int efi_memblock_x86_reserve_range(void);
 extern void efi_call_phys_prelog(void);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4e8ba39eaf0f..76fa1e9a2b39 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -584,7 +584,7 @@ static void native_machine_emergency_restart(void)
 			break;
 
 		case BOOT_EFI:
-			if (efi_enabled)
+			if (efi_enabled(EFI_RUNTIME_SERVICES))
 				efi.reset_system(reboot_mode ?
 						 EFI_RESET_WARM :
 						 EFI_RESET_COLD,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 00f6c1472b85..8b24289cc10c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -807,15 +807,15 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_EFI
 	if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
 		     "EL32", 4)) {
-		efi_enabled = 1;
-		efi_64bit = false;
+		set_bit(EFI_BOOT, &x86_efi_facility);
 	} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
 		     "EL64", 4)) {
-		efi_enabled = 1;
-		efi_64bit = true;
+		set_bit(EFI_BOOT, &x86_efi_facility);
+		set_bit(EFI_64BIT, &x86_efi_facility);
 	}
-	if (efi_enabled && efi_memblock_x86_reserve_range())
-		efi_enabled = 0;
+
+	if (efi_enabled(EFI_BOOT))
+		efi_memblock_x86_reserve_range();
 #endif
 
 	x86_init.oem.arch_setup();
@@ -888,7 +888,7 @@ void __init setup_arch(char **cmdline_p)
 
 	finish_e820_parsing();
 
-	if (efi_enabled)
+	if (efi_enabled(EFI_BOOT))
 		efi_init();
 
 	dmi_scan_machine();
@@ -971,7 +971,7 @@ void __init setup_arch(char **cmdline_p)
 	 * The EFI specification says that boot service code won't be called
 	 * after ExitBootServices(). This is, in fact, a lie.
 	 */
-	if (efi_enabled)
+	if (efi_enabled(EFI_MEMMAP))
 		efi_reserve_boot_services();
 
 	/* preallocate 4k for mptable mpc */
@@ -1114,7 +1114,7 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
-	if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+	if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
 		conswitchp = &vga_con;
 #elif defined(CONFIG_DUMMY_CONSOLE)
 	conswitchp = &dummy_con;
@@ -1131,14 +1131,14 @@ void __init setup_arch(char **cmdline_p)
 	register_refined_jiffies(CLOCK_TICK_RATE);
 
 #ifdef CONFIG_EFI
-	/* Once setup is done above, disable efi_enabled on mismatched
-	 * firmware/kernel archtectures since there is no support for
-	 * runtime services.
+	/* Once setup is done above, unmap the EFI memory map on
+	 * mismatched firmware/kernel archtectures since there is no
+	 * support for runtime services.
 	 */
-	if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+	if (efi_enabled(EFI_BOOT) &&
+	    IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
 		pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
 		efi_unmap_memmap();
-		efi_enabled = 0;
 	}
 #endif
 }
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad4439145f85..5426e482db6e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,9 +51,6 @@
 
 #define EFI_DEBUG	1
 
-int efi_enabled;
-EXPORT_SYMBOL(efi_enabled);
-
 struct efi __read_mostly efi = {
 	.mps        = EFI_INVALID_TABLE_ADDR,
 	.acpi       = EFI_INVALID_TABLE_ADDR,
@@ -69,19 +66,28 @@ EXPORT_SYMBOL(efi);
 
 struct efi_memory_map memmap;
 
-bool efi_64bit;
-
 static struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
 
 static inline bool efi_is_native(void)
 {
-	return IS_ENABLED(CONFIG_X86_64) == efi_64bit;
+	return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+}
+
+unsigned long x86_efi_facility;
+
+/*
+ * Returns 1 if 'facility' is enabled, 0 otherwise.
+ */
+int efi_enabled(int facility)
+{
+	return test_bit(facility, &x86_efi_facility) != 0;
 }
+EXPORT_SYMBOL(efi_enabled);
 
 static int __init setup_noefi(char *arg)
 {
-	efi_enabled = 0;
+	clear_bit(EFI_BOOT, &x86_efi_facility);
 	return 0;
 }
 early_param("noefi", setup_noefi);
@@ -426,6 +432,7 @@ void __init efi_reserve_boot_services(void)
 
 void __init efi_unmap_memmap(void)
 {
+	clear_bit(EFI_MEMMAP, &x86_efi_facility);
 	if (memmap.map) {
 		early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
 		memmap.map = NULL;
@@ -460,7 +467,7 @@ void __init efi_free_boot_services(void)
 
 static int __init efi_systab_init(void *phys)
 {
-	if (efi_64bit) {
+	if (efi_enabled(EFI_64BIT)) {
 		efi_system_table_64_t *systab64;
 		u64 tmp = 0;
 
@@ -552,7 +559,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
 	void *config_tables, *tablep;
 	int i, sz;
 
-	if (efi_64bit)
+	if (efi_enabled(EFI_64BIT))
 		sz = sizeof(efi_config_table_64_t);
 	else
 		sz = sizeof(efi_config_table_32_t);
@@ -572,7 +579,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
 		efi_guid_t guid;
 		unsigned long table;
 
-		if (efi_64bit) {
+		if (efi_enabled(EFI_64BIT)) {
 			u64 table64;
 			guid = ((efi_config_table_64_t *)tablep)->guid;
 			table64 = ((efi_config_table_64_t *)tablep)->table;
@@ -684,7 +691,6 @@ void __init efi_init(void)
 	if (boot_params.efi_info.efi_systab_hi ||
 	    boot_params.efi_info.efi_memmap_hi) {
 		pr_info("Table located above 4GB, disabling EFI.\n");
-		efi_enabled = 0;
 		return;
 	}
 	efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
@@ -694,10 +700,10 @@ void __init efi_init(void)
 			  ((__u64)boot_params.efi_info.efi_systab_hi<<32));
 #endif
 
-	if (efi_systab_init(efi_phys.systab)) {
-		efi_enabled = 0;
+	if (efi_systab_init(efi_phys.systab))
 		return;
-	}
+
+	set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
 
 	/*
 	 * Show what we know for posterity
@@ -715,10 +721,10 @@ void __init efi_init(void)
 		efi.systab->hdr.revision >> 16,
 		efi.systab->hdr.revision & 0xffff, vendor);
 
-	if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) {
-		efi_enabled = 0;
+	if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
 		return;
-	}
+
+	set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
 
 	/*
 	 * Note: We currently don't support runtime services on an EFI
@@ -727,15 +733,17 @@ void __init efi_init(void)
 
 	if (!efi_is_native())
 		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
-	else if (efi_runtime_init()) {
-		efi_enabled = 0;
-		return;
+	else {
+		if (efi_runtime_init())
+			return;
+		set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
 	}
 
-	if (efi_memmap_init()) {
-		efi_enabled = 0;
+	if (efi_memmap_init())
 		return;
-	}
+
+	set_bit(EFI_MEMMAP, &x86_efi_facility);
+
 #ifdef CONFIG_X86_32
 	if (efi_is_native()) {
 		x86_platform.get_wallclock = efi_get_time;
@@ -969,6 +977,9 @@ u32 efi_mem_type(unsigned long phys_addr)
 	efi_memory_desc_t *md;
 	void *p;
 
+	if (!efi_enabled(EFI_MEMMAP))
+		return 0;
+
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
 		if ((md->phys_addr <= phys_addr) &&
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 3ff267861541..bd22f8667eed 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -250,7 +250,7 @@ acpi_physical_address __init acpi_os_get_root_pointer(void)
 		return acpi_rsdp;
 #endif
 
-	if (efi_enabled) {
+	if (efi_enabled(EFI_CONFIG_TABLES)) {
 		if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
 			return efi.acpi20;
 		else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index fd3ae6290d71..982f1f5f5742 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -471,7 +471,7 @@ void __init dmi_scan_machine(void)
 	char __iomem *p, *q;
 	int rc;
 
-	if (efi_enabled) {
+	if (efi_enabled(EFI_CONFIG_TABLES)) {
 		if (efi.smbios == EFI_INVALID_TABLE_ADDR)
 			goto error;
 
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 7b1c37497c9a..1065119dff92 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -1782,7 +1782,7 @@ efivars_init(void)
 	printk(KERN_INFO "EFI Variables Facility v%s %s\n", EFIVARS_VERSION,
 	       EFIVARS_DATE);
 
-	if (!efi_enabled)
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
 		return 0;
 
 	/* For now we'll register the efi directory at /sys/firmware/efi */
@@ -1822,7 +1822,7 @@ err_put:
 static void __exit
 efivars_exit(void)
 {
-	if (efi_enabled) {
+	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
 		unregister_efivars(&__efivars);
 		kobject_put(efi_kobj);
 	}
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 4da4eb9ae926..2224f1dc074b 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -99,7 +99,7 @@ unsigned long __init find_ibft_region(unsigned long *sizep)
 	/* iBFT 1.03 section 1.4.3.1 mandates that UEFI machines will
 	 * only use ACPI for this */
 
-	if (!efi_enabled)
+	if (!efi_enabled(EFI_BOOT))
 		find_ibft_in_mem();
 
 	if (ibft_addr) {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index edfc54e41842..0d6562bb0c93 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -429,7 +429,8 @@ bool radeon_card_posted(struct radeon_device *rdev)
 {
 	uint32_t reg;
 
-	if (efi_enabled && rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE)
+	if (efi_enabled(EFI_BOOT) &&
+	    rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE)
 		return false;
 
 	/* first check CRTCs */
diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c
index 7481146a5b47..97c2be195efc 100644
--- a/drivers/platform/x86/ibm_rtl.c
+++ b/drivers/platform/x86/ibm_rtl.c
@@ -244,7 +244,7 @@ static int __init ibm_rtl_init(void) {
 	if (force)
 		pr_warn("module loaded by force\n");
 	/* first ensure that we are running on IBM HW */
-	else if (efi_enabled || !dmi_check_system(ibm_rtl_dmi_table))
+	else if (efi_enabled(EFI_BOOT) || !dmi_check_system(ibm_rtl_dmi_table))
 		return -ENODEV;
 
 	/* Get the address for the Extended BIOS Data Area */
diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c
index d73fdcfeb45a..2839baa82a5a 100644
--- a/drivers/scsi/isci/init.c
+++ b/drivers/scsi/isci/init.c
@@ -633,7 +633,7 @@ static int isci_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 	pci_set_drvdata(pdev, pci_info);
 
-	if (efi_enabled)
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		orom = isci_get_efi_var(pdev);
 
 	if (!orom)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8b84916dc671..7a9498ab3c2d 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -618,18 +618,30 @@ extern int __init efi_setup_pcdp_console(char *);
 #endif
 
 /*
- * We play games with efi_enabled so that the compiler will, if possible, remove
- * EFI-related code altogether.
+ * We play games with efi_enabled so that the compiler will, if
+ * possible, remove EFI-related code altogether.
  */
+#define EFI_BOOT		0	/* Were we booted from EFI? */
+#define EFI_SYSTEM_TABLES	1	/* Can we use EFI system tables? */
+#define EFI_CONFIG_TABLES	2	/* Can we use EFI config tables? */
+#define EFI_RUNTIME_SERVICES	3	/* Can we use runtime services? */
+#define EFI_MEMMAP		4	/* Can we use EFI memory map? */
+#define EFI_64BIT		5	/* Is the firmware 64-bit? */
+
 #ifdef CONFIG_EFI
 # ifdef CONFIG_X86
-   extern int efi_enabled;
-   extern bool efi_64bit;
+extern int efi_enabled(int facility);
 # else
-#  define efi_enabled 1
+static inline int efi_enabled(int facility)
+{
+	return 1;
+}
 # endif
 #else
-# define efi_enabled 0
+static inline int efi_enabled(int facility)
+{
+	return 0;
+}
 #endif
 
 /*
diff --git a/init/main.c b/init/main.c
index 85d69dffe864..cd30179c86bd 100644
--- a/init/main.c
+++ b/init/main.c
@@ -604,7 +604,7 @@ asmlinkage void __init start_kernel(void)
 	pidmap_init();
 	anon_vma_init();
 #ifdef CONFIG_X86
-	if (efi_enabled)
+	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		efi_enter_virtual_mode();
 #endif
 	thread_info_cache_init();
@@ -632,7 +632,7 @@ asmlinkage void __init start_kernel(void)
 	acpi_early_init(); /* before LAPIC and SMP init */
 	sfi_init_late();
 
-	if (efi_enabled) {
+	if (efi_enabled(EFI_RUNTIME_SERVICES)) {
 		efi_late_init();
 		efi_free_boot_services();
 	}
-- 
cgit v1.2.3


From a6ca2e10f795111a90a4efabb07717258669e03d Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 26 Jan 2013 21:38:35 +0100
Subject: ssb: add gpio_to_irq

The old bcm47xx gpio code had support for gpio_to_irq, but the new
code did not provide this function, but returned -ENXIO all the time.
This patch adds the missing function.

arch/mips/bcm47xx/wgt634u.c calls gpio_to_irq() and got the correct irq
number with the old gpio handling code. With this patch the code in
wgt634u.c should work again. I do not have a wgt634u to test this.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_gpio.c           | 22 ++++++++++++++++++++++
 include/linux/ssb/ssb_driver_mips.h |  5 +++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
index 97ac0a38e3d0..accabe39b320 100644
--- a/drivers/ssb/driver_gpio.c
+++ b/drivers/ssb/driver_gpio.c
@@ -74,6 +74,16 @@ static void ssb_gpio_chipco_free(struct gpio_chip *chip, unsigned gpio)
 	ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 0);
 }
 
+static int ssb_gpio_chipco_to_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		return ssb_mips_irq(bus->chipco.dev) + 2;
+	else
+		return -EINVAL;
+}
+
 static int ssb_gpio_chipco_init(struct ssb_bus *bus)
 {
 	struct gpio_chip *chip = &bus->gpio;
@@ -86,6 +96,7 @@ static int ssb_gpio_chipco_init(struct ssb_bus *bus)
 	chip->set		= ssb_gpio_chipco_set_value;
 	chip->direction_input	= ssb_gpio_chipco_direction_input;
 	chip->direction_output	= ssb_gpio_chipco_direction_output;
+	chip->to_irq		= ssb_gpio_chipco_to_irq;
 	chip->ngpio		= 16;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
@@ -134,6 +145,16 @@ static int ssb_gpio_extif_direction_output(struct gpio_chip *chip,
 	return 0;
 }
 
+static int ssb_gpio_extif_to_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		return ssb_mips_irq(bus->extif.dev) + 2;
+	else
+		return -EINVAL;
+}
+
 static int ssb_gpio_extif_init(struct ssb_bus *bus)
 {
 	struct gpio_chip *chip = &bus->gpio;
@@ -144,6 +165,7 @@ static int ssb_gpio_extif_init(struct ssb_bus *bus)
 	chip->set		= ssb_gpio_extif_set_value;
 	chip->direction_input	= ssb_gpio_extif_direction_input;
 	chip->direction_output	= ssb_gpio_extif_direction_output;
+	chip->to_irq		= ssb_gpio_extif_to_irq;
 	chip->ngpio		= 5;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
diff --git a/include/linux/ssb/ssb_driver_mips.h b/include/linux/ssb/ssb_driver_mips.h
index 07a9c7a2e088..afe79d40a99e 100644
--- a/include/linux/ssb/ssb_driver_mips.h
+++ b/include/linux/ssb/ssb_driver_mips.h
@@ -45,6 +45,11 @@ void ssb_mipscore_init(struct ssb_mipscore *mcore)
 {
 }
 
+static inline unsigned int ssb_mips_irq(struct ssb_device *dev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SSB_DRIVER_MIPS */
 
 #endif /* LINUX_SSB_MIPSCORE_H_ */
-- 
cgit v1.2.3


From 8f1ca2683225afa21b827ff620a6225c390771a9 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 26 Jan 2013 21:39:44 +0100
Subject: bcma: add gpio_to_irq

The old bcm47xx gpio code had support for gpio_to_irq, but the new
code did not provide this function, but returned -ENXIO all the time.
This patch adds the missing function.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_gpio.c            | 11 +++++++++++
 include/linux/bcma/bcma_driver_mips.h |  9 +++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
index 9a6f585da2d9..0b5df538dfd9 100644
--- a/drivers/bcma/driver_gpio.c
+++ b/drivers/bcma/driver_gpio.c
@@ -73,6 +73,16 @@ static void bcma_gpio_free(struct gpio_chip *chip, unsigned gpio)
 	bcma_chipco_gpio_pullup(cc, 1 << gpio, 0);
 }
 
+static int bcma_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
+
+	if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC)
+		return bcma_core_irq(cc->core);
+	else
+		return -EINVAL;
+}
+
 int bcma_gpio_init(struct bcma_drv_cc *cc)
 {
 	struct gpio_chip *chip = &cc->gpio;
@@ -85,6 +95,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc)
 	chip->set		= bcma_gpio_set_value;
 	chip->direction_input	= bcma_gpio_direction_input;
 	chip->direction_output	= bcma_gpio_direction_output;
+	chip->to_irq		= bcma_gpio_to_irq;
 	chip->ngpio		= 16;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index 0d1ea297851a..fb61f3fb4ddb 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -42,13 +42,18 @@ struct bcma_drv_mips {
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 extern void bcma_core_mips_init(struct bcma_drv_mips *mcore);
 extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore);
+
+extern unsigned int bcma_core_irq(struct bcma_device *core);
 #else
 static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { }
 static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { }
+
+static inline unsigned int bcma_core_irq(struct bcma_device *core)
+{
+	return 0;
+}
 #endif
 
 extern u32 bcma_cpu_clock(struct bcma_drv_mips *mcore);
 
-extern unsigned int bcma_core_irq(struct bcma_device *core);
-
 #endif /* LINUX_BCMA_DRIVER_MIPS_H_ */
-- 
cgit v1.2.3


From 70c37bf97f2a91accba76080db69144f3b69f736 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Mon, 28 Jan 2013 23:51:28 +0000
Subject: net: usbnet: prevent buggy devices from killing us
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A device sending 0 length frames as fast as it can has been
observed killing the host system due to the resulting memory
pressure.

Temporarily disable RX skb allocation and URB submission when
the current error ratio is high, preventing us from trying to
allocate an infinite number of skbs.  Reenable as soon as we
are finished processing the done queue, allowing the device
to continue working after short error bursts.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Oliver Neukum <oneukum@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 25 +++++++++++++++++++++++++
 include/linux/usb/usbnet.h |  2 ++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index f34b2ebee815..977837725726 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -380,6 +380,12 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
 	unsigned long		lockflags;
 	size_t			size = dev->rx_urb_size;
 
+	/* prevent rx skb allocation when error ratio is high */
+	if (test_bit(EVENT_RX_KILL, &dev->flags)) {
+		usb_free_urb(urb);
+		return -ENOLINK;
+	}
+
 	skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
 	if (!skb) {
 		netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
@@ -539,6 +545,17 @@ block:
 		break;
 	}
 
+	/* stop rx if packet error rate is high */
+	if (++dev->pkt_cnt > 30) {
+		dev->pkt_cnt = 0;
+		dev->pkt_err = 0;
+	} else {
+		if (state == rx_cleanup)
+			dev->pkt_err++;
+		if (dev->pkt_err > 20)
+			set_bit(EVENT_RX_KILL, &dev->flags);
+	}
+
 	state = defer_bh(dev, skb, &dev->rxq, state);
 
 	if (urb) {
@@ -791,6 +808,11 @@ int usbnet_open (struct net_device *net)
 		   (dev->driver_info->flags & FLAG_FRAMING_AX) ? "ASIX" :
 		   "simple");
 
+	/* reset rx error state */
+	dev->pkt_cnt = 0;
+	dev->pkt_err = 0;
+	clear_bit(EVENT_RX_KILL, &dev->flags);
+
 	// delay posting reads until we're fully open
 	tasklet_schedule (&dev->bh);
 	if (info->manage_power) {
@@ -1254,6 +1276,9 @@ static void usbnet_bh (unsigned long param)
 		}
 	}
 
+	/* restart RX again after disabling due to high error rate */
+	clear_bit(EVENT_RX_KILL, &dev->flags);
+
 	// waiting for all pending urbs to complete?
 	if (dev->wait) {
 		if ((dev->txq.qlen + dev->rxq.qlen + dev->done.qlen) == 0) {
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 5de7a220e986..0de078d4cdb9 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -33,6 +33,7 @@ struct usbnet {
 	wait_queue_head_t	*wait;
 	struct mutex		phy_mutex;
 	unsigned char		suspend_count;
+	unsigned char		pkt_cnt, pkt_err;
 
 	/* i/o info: pipes etc */
 	unsigned		in, out;
@@ -70,6 +71,7 @@ struct usbnet {
 #		define EVENT_DEV_OPEN	7
 #		define EVENT_DEVICE_REPORT_IDLE	8
 #		define EVENT_NO_RUNTIME_PM	9
+#		define EVENT_RX_KILL	10
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
cgit v1.2.3


From 18367681a10bd29c3f2305e6b7b984de5b33d548 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Wed, 30 Jan 2013 09:27:52 +0000
Subject: ipv6 flowlabel: Convert np->ipv6_fl_list to RCU.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h     |  2 +-
 include/net/ipv6.h       |  1 +
 net/ipv6/ip6_flowlabel.c | 72 +++++++++++++++++++++++++++---------------------
 3 files changed, 42 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e971e3742172..850e95bc766c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -214,7 +214,7 @@ struct ipv6_pinfo {
 
 	struct ipv6_mc_socklist	__rcu *ipv6_mc_list;
 	struct ipv6_ac_socklist	*ipv6_ac_list;
-	struct ipv6_fl_socklist *ipv6_fl_list;
+	struct ipv6_fl_socklist __rcu *ipv6_fl_list;
 
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 1d457161def2..851d5412a299 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -239,6 +239,7 @@ struct ip6_flowlabel {
 struct ipv6_fl_socklist {
 	struct ipv6_fl_socklist	*next;
 	struct ip6_flowlabel	*fl;
+	struct rcu_head		rcu;
 };
 
 extern struct ip6_flowlabel	*fl6_sock_lookup(struct sock *sk, __be32 label);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index da156015d827..22494afd981c 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -62,7 +62,7 @@ static DEFINE_SPINLOCK(ip6_fl_lock);
 
 /* Big socket sock */
 
-static DEFINE_RWLOCK(ip6_sk_fl_lock);
+static DEFINE_SPINLOCK(ip6_sk_fl_lock);
 
 #define for_each_fl_rcu(hash, fl)				\
 	for (fl = rcu_dereference(fl_ht[(hash)]);		\
@@ -73,6 +73,11 @@ static DEFINE_RWLOCK(ip6_sk_fl_lock);
 	     fl != NULL;					\
 	     fl = rcu_dereference(fl->next))
 
+#define for_each_sk_fl_rcu(np, sfl)				\
+	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
+	     sfl != NULL;					\
+	     sfl = rcu_dereference_bh(sfl->next))
+
 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
 {
 	struct ip6_flowlabel *fl;
@@ -244,17 +249,17 @@ struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
 
 	label &= IPV6_FLOWLABEL_MASK;
 
-	read_lock_bh(&ip6_sk_fl_lock);
-	for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+	rcu_read_lock_bh();
+	for_each_sk_fl_rcu(np, sfl) {
 		struct ip6_flowlabel *fl = sfl->fl;
 		if (fl->label == label) {
 			fl->lastuse = jiffies;
 			atomic_inc(&fl->users);
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 			return fl;
 		}
 	}
-	read_unlock_bh(&ip6_sk_fl_lock);
+	rcu_read_unlock_bh();
 	return NULL;
 }
 
@@ -265,20 +270,21 @@ void fl6_free_socklist(struct sock *sk)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6_fl_socklist *sfl;
 
-	if (!np->ipv6_fl_list)
+	if (!rcu_access_pointer(np->ipv6_fl_list))
 		return;
 
-	write_lock_bh(&ipv6_sk_fl_lock);
-	sfl = np->ipv6_fl_list;
-	np->ipv6_fl_list = NULL;
-	write_unlock_bh(&ipv6_sk_fl_lock);
+	spin_lock_bh(&ip6_sk_fl_lock);
+	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
+						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
+		np->ipv6_fl_list = sfl->next;
+		spin_unlock_bh(&ip6_sk_fl_lock);
 
-	while (sfl) {
-		struct ipv6_fl_socklist *next = sfl->next;
 		fl_release(sfl->fl);
-		kfree(sfl);
-		sfl = next;
+		kfree_rcu(sfl, rcu);
+
+		spin_lock_bh(&ip6_sk_fl_lock);
 	}
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 /* Service routines */
@@ -443,7 +449,7 @@ static int mem_check(struct sock *sk)
 	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
 		return 0;
 
-	for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+	for_each_sk_fl_rcu(np, sfl)
 		count++;
 
 	if (room <= 0 ||
@@ -486,11 +492,11 @@ static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
 static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
 		struct ip6_flowlabel *fl)
 {
-	write_lock_bh(&ip6_sk_fl_lock);
+	spin_lock_bh(&ip6_sk_fl_lock);
 	sfl->fl = fl;
 	sfl->next = np->ipv6_fl_list;
-	np->ipv6_fl_list = sfl;
-	write_unlock_bh(&ip6_sk_fl_lock);
+	rcu_assign_pointer(np->ipv6_fl_list, sfl);
+	spin_unlock_bh(&ip6_sk_fl_lock);
 }
 
 int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
@@ -512,31 +518,33 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 	switch (freq.flr_action) {
 	case IPV6_FL_A_PUT:
-		write_lock_bh(&ip6_sk_fl_lock);
-		for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+		spin_lock_bh(&ip6_sk_fl_lock);
+		for (sflp = &np->ipv6_fl_list;
+		     (sfl = rcu_dereference(*sflp))!=NULL;
+		     sflp = &sfl->next) {
 			if (sfl->fl->label == freq.flr_label) {
 				if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
 					np->flow_label &= ~IPV6_FLOWLABEL_MASK;
-				*sflp = sfl->next;
-				write_unlock_bh(&ip6_sk_fl_lock);
+				*sflp = rcu_dereference(sfl->next);
+				spin_unlock_bh(&ip6_sk_fl_lock);
 				fl_release(sfl->fl);
-				kfree(sfl);
+				kfree_rcu(sfl, rcu);
 				return 0;
 			}
 		}
-		write_unlock_bh(&ip6_sk_fl_lock);
+		spin_unlock_bh(&ip6_sk_fl_lock);
 		return -ESRCH;
 
 	case IPV6_FL_A_RENEW:
-		read_lock_bh(&ip6_sk_fl_lock);
-		for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+		rcu_read_lock_bh();
+		for_each_sk_fl_rcu(np, sfl) {
 			if (sfl->fl->label == freq.flr_label) {
 				err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
-				read_unlock_bh(&ip6_sk_fl_lock);
+				rcu_read_unlock_bh();
 				return err;
 			}
 		}
-		read_unlock_bh(&ip6_sk_fl_lock);
+		rcu_read_unlock_bh();
 
 		if (freq.flr_share == IPV6_FL_S_NONE &&
 		    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
@@ -560,11 +568,11 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 
 		if (freq.flr_label) {
 			err = -EEXIST;
-			read_lock_bh(&ip6_sk_fl_lock);
-			for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+			rcu_read_lock_bh();
+			for_each_sk_fl_rcu(np, sfl) {
 				if (sfl->fl->label == freq.flr_label) {
 					if (freq.flr_flags&IPV6_FL_F_EXCL) {
-						read_unlock_bh(&ip6_sk_fl_lock);
+						rcu_read_unlock_bh();
 						goto done;
 					}
 					fl1 = sfl->fl;
@@ -572,7 +580,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
 					break;
 				}
 			}
-			read_unlock_bh(&ip6_sk_fl_lock);
+			rcu_read_unlock_bh();
 
 			if (fl1 == NULL)
 				fl1 = fl_lookup(net, freq.flr_label);
-- 
cgit v1.2.3


From 7c9c003c68de51fb8712a01e904444ef40c742f5 Mon Sep 17 00:00:00 2001
From: Myron Stowe <myron.stowe@redhat.com>
Date: Fri, 25 Jan 2013 17:55:39 -0700
Subject: PCI: Introduce accessor to retrieve PCIe Capabilities Register

Provide an accessor to retrieve the PCI Express device's Capabilities
Register.

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15472d691ee6..78581e1d3f64 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1692,6 +1692,15 @@ static inline bool pci_is_pcie(struct pci_dev *dev)
 	return !!pci_pcie_cap(dev);
 }
 
+/**
+ * pcie_caps_reg - get the PCIe Capabilities Register
+ * @dev: PCI device
+ */
+static inline u16 pcie_caps_reg(const struct pci_dev *dev)
+{
+	return dev->pcie_flags_reg;
+}
+
 /**
  * pci_pcie_type - get the PCIe device/port type
  * @dev: PCI device
-- 
cgit v1.2.3


From 1c531d82ee1a220ae7132ba0eb31edaf186b56d1 Mon Sep 17 00:00:00 2001
From: Myron Stowe <myron.stowe@redhat.com>
Date: Fri, 25 Jan 2013 17:55:45 -0700
Subject: PCI: Use PCI Express Capability accessor

Use PCI Express Capability access functions to simplify device
Capabilities Register usages.

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/access.c            | 4 ++--
 drivers/pci/pcie/portdrv_core.c | 2 +-
 include/linux/pci.h             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 3af0478c057b..5278ac692cbf 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -472,7 +472,7 @@ EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
 
 static inline int pcie_cap_version(const struct pci_dev *dev)
 {
-	return dev->pcie_flags_reg & PCI_EXP_FLAGS_VERS;
+	return pcie_caps_reg(dev) & PCI_EXP_FLAGS_VERS;
 }
 
 static inline bool pcie_cap_has_devctl(const struct pci_dev *dev)
@@ -497,7 +497,7 @@ static inline bool pcie_cap_has_sltctl(const struct pci_dev *dev)
 	return pcie_cap_version(dev) > 1 ||
 	       type == PCI_EXP_TYPE_ROOT_PORT ||
 	       (type == PCI_EXP_TYPE_DOWNSTREAM &&
-		dev->pcie_flags_reg & PCI_EXP_FLAGS_SLOT);
+		pcie_caps_reg(dev) & PCI_EXP_FLAGS_SLOT);
 }
 
 static inline bool pcie_cap_has_rtctl(const struct pci_dev *dev)
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index b42133afca98..31063ac30992 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -272,7 +272,7 @@ static int get_port_device_capability(struct pci_dev *dev)
 
 	/* Hot-Plug Capable */
 	if ((cap_mask & PCIE_PORT_SERVICE_HP) &&
-	    dev->pcie_flags_reg & PCI_EXP_FLAGS_SLOT) {
+	    pcie_caps_reg(dev) & PCI_EXP_FLAGS_SLOT) {
 		pcie_capability_read_dword(dev, PCI_EXP_SLTCAP, &reg32);
 		if (reg32 & PCI_EXP_SLTCAP_HPC) {
 			services |= PCIE_PORT_SERVICE_HP;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 78581e1d3f64..63b36281afce 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1707,7 +1707,7 @@ static inline u16 pcie_caps_reg(const struct pci_dev *dev)
  */
 static inline int pci_pcie_type(const struct pci_dev *dev)
 {
-	return (dev->pcie_flags_reg & PCI_EXP_FLAGS_TYPE) >> 4;
+	return (pcie_caps_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
 void pci_request_acs(void);
-- 
cgit v1.2.3


From d560040f7d6fbe0a2990b8f6edca1815e19e72f5 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 31 Jan 2013 11:12:56 +0100
Subject: spi: spi-gpio: fix compilation warning on 64 bits systems

SPI_GPIO_NO_MOSI and SPI_GPIO_NO_MISO flags are type casted to unsigned
long, yet, they are to be stored in an unsigned int field in the
spi_gpio_platform_data structure.

This leads to the following warning during compilation on 64 bits systems:
warning: large integer implicitly truncated to unsigned type [-Woverflow]

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/spi/spi_gpio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h
index 369b3d7d5b95..1634ce31c06d 100644
--- a/include/linux/spi/spi_gpio.h
+++ b/include/linux/spi/spi_gpio.h
@@ -62,8 +62,8 @@
  */
 struct spi_gpio_platform_data {
 	unsigned	sck;
-	unsigned	mosi;
-	unsigned	miso;
+	unsigned long	mosi;
+	unsigned long	miso;
 
 	u16		num_chipselect;
 };
-- 
cgit v1.2.3


From 2663960c159f23cbfb8e196c96e9fc9f3b5f1a8d Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 22 Jan 2013 22:24:23 -0800
Subject: perf: Make EVENT_ATTR global

Rename EVENT_ATTR() to PMU_EVENT_ATTR() and make it global so it is
available to all architectures.

Further to allow architectures flexibility, have PMU_EVENT_ATTR() pass
in the variable name as a parameter.

Changelog[v2]
	- [Jiri Olsa] No need to define PMU_EVENT_PTR()

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anton Blanchard <anton@au1.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: linuxppc-dev@ozlabs.org
Link: http://lkml.kernel.org/r/20130123062422.GC13720@us.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/kernel/cpu/perf_event.c | 13 +++----------
 include/linux/perf_event.h       | 11 +++++++++++
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6774c17a5576..c0df5ed2e048 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = {
 	.attrs = NULL,
 };
 
-struct perf_pmu_events_attr {
-	struct device_attribute attr;
-	u64 id;
-};
-
 /*
  * Remove all undefined events (x86_pmu.event_map(id) == 0)
  * out of events_attr attributes.
@@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at
 #define EVENT_VAR(_id)  event_attr_##_id
 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
 
-#define EVENT_ATTR(_name, _id)					\
-static struct perf_pmu_events_attr EVENT_VAR(_id) = {		\
-	.attr = __ATTR(_name, 0444, events_sysfs_show, NULL),	\
-	.id   =  PERF_COUNT_HW_##_id,				\
-};
+#define EVENT_ATTR(_name, _id)						\
+	PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id,	\
+			events_sysfs_show)
 
 EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
 EVENT_ATTR(instructions,		INSTRUCTIONS		);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6bfb2faa0b19..42adf012145d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -817,6 +817,17 @@ do {									\
 } while (0)
 
 
+struct perf_pmu_events_attr {
+	struct device_attribute attr;
+	u64 id;
+};
+
+#define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
+static struct perf_pmu_events_attr _var = {				\
+	.attr = __ATTR(_name, 0444, _show, NULL),			\
+	.id   =  _id,							\
+};
+
 #define PMU_FORMAT_ATTR(_name, _format)					\
 static ssize_t								\
 _name##_show(struct device *dev,					\
-- 
cgit v1.2.3


From 955154fa33df2b74f0fea8e7c84df6dfd954dab2 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Wed, 30 Jan 2013 23:07:10 +0000
Subject: net/mlx4_en: Don't reassign port mac address on firmware that
 supports it

Mac reassignments should only be done when not supported by the firmware. To
accomplish that, checking firmware capability bit to know whether we should
reassign macs in the driver.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx4/fw.c        | 7 ++++++-
 include/linux/mlx4/device.h                    | 3 ++-
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 333a7a0b833c..7b513e9aea85 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1285,7 +1285,8 @@ void mlx4_en_stop_port(struct net_device *dev)
 
 	/* Unregister Mac address for the port */
 	mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn);
-	mdev->mac_removed[priv->port] = 1;
+	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN))
+		mdev->mac_removed[priv->port] = 1;
 
 	/* Remove flow steering rules for the port*/
 	if (mdev->dev->caps.steering_mode ==
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 91acf71aca97..38b62c78d5da 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -127,7 +127,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[0] = "RSS support",
 		[1] = "RSS Toeplitz Hash Function support",
 		[2] = "RSS XOR Hash Function support",
-		[3] = "Device manage flow steering support"
+		[3] = "Device manage flow steering support",
+		[4] = "Automatic mac reassignment support"
 	};
 	int i;
 
@@ -478,6 +479,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET		0x94
 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET		0x98
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
+#define QUERY_DEV_CAP_FW_REASSIGN_MAC		0x9d
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -637,6 +639,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
 	MLX4_GET(dev_cap->reserved_lkey, outbox,
 		 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
+	if (field & 1<<6)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN;
 	MLX4_GET(dev_cap->max_icm_sz, outbox,
 		 QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
 	if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 20ea939c22a6..1883e8e84718 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -150,7 +150,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_RSS			= 1LL <<  0,
 	MLX4_DEV_CAP_FLAG2_RSS_TOP		= 1LL <<  1,
 	MLX4_DEV_CAP_FLAG2_RSS_XOR		= 1LL <<  2,
-	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3
+	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3,
+	MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN	= 1LL <<  4
 };
 
 enum {
-- 
cgit v1.2.3


From 23491b513bcd3dfe4ddb94547d73d9deb94eda44 Mon Sep 17 00:00:00 2001
From: Denis Ciocca <denis.ciocca@gmail.com>
Date: Fri, 25 Jan 2013 23:44:00 +0000
Subject: iio:common: Add STMicroelectronics common library

This patch add a generic library for STMicroelectronics 3-axis sensors.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/common/Kconfig                         |   1 +
 drivers/iio/common/Makefile                        |   1 +
 drivers/iio/common/st_sensors/Kconfig              |  14 +
 drivers/iio/common/st_sensors/Makefile             |  10 +
 drivers/iio/common/st_sensors/st_sensors_buffer.c  | 116 ++++++
 drivers/iio/common/st_sensors/st_sensors_core.c    | 460 +++++++++++++++++++++
 drivers/iio/common/st_sensors/st_sensors_i2c.c     |  81 ++++
 drivers/iio/common/st_sensors/st_sensors_spi.c     | 128 ++++++
 drivers/iio/common/st_sensors/st_sensors_trigger.c |  77 ++++
 include/linux/iio/common/st_sensors.h              | 274 ++++++++++++
 include/linux/iio/common/st_sensors_i2c.h          |  20 +
 include/linux/iio/common/st_sensors_spi.h          |  20 +
 12 files changed, 1202 insertions(+)
 create mode 100644 drivers/iio/common/st_sensors/Kconfig
 create mode 100644 drivers/iio/common/st_sensors/Makefile
 create mode 100644 drivers/iio/common/st_sensors/st_sensors_buffer.c
 create mode 100644 drivers/iio/common/st_sensors/st_sensors_core.c
 create mode 100644 drivers/iio/common/st_sensors/st_sensors_i2c.c
 create mode 100644 drivers/iio/common/st_sensors/st_sensors_spi.c
 create mode 100644 drivers/iio/common/st_sensors/st_sensors_trigger.c
 create mode 100644 include/linux/iio/common/st_sensors.h
 create mode 100644 include/linux/iio/common/st_sensors_i2c.h
 create mode 100644 include/linux/iio/common/st_sensors_spi.h

(limited to 'include/linux')

diff --git a/drivers/iio/common/Kconfig b/drivers/iio/common/Kconfig
index ed45ee54500c..0b6e97d18fa0 100644
--- a/drivers/iio/common/Kconfig
+++ b/drivers/iio/common/Kconfig
@@ -3,3 +3,4 @@
 #
 
 source "drivers/iio/common/hid-sensors/Kconfig"
+source "drivers/iio/common/st_sensors/Kconfig"
diff --git a/drivers/iio/common/Makefile b/drivers/iio/common/Makefile
index 81584009b21b..c2352beb5d97 100644
--- a/drivers/iio/common/Makefile
+++ b/drivers/iio/common/Makefile
@@ -7,3 +7,4 @@
 #
 
 obj-y += hid-sensors/
+obj-y += st_sensors/
diff --git a/drivers/iio/common/st_sensors/Kconfig b/drivers/iio/common/st_sensors/Kconfig
new file mode 100644
index 000000000000..865f1ca33eb9
--- /dev/null
+++ b/drivers/iio/common/st_sensors/Kconfig
@@ -0,0 +1,14 @@
+#
+# STMicroelectronics sensors common library
+#
+
+config IIO_ST_SENSORS_I2C
+	tristate
+
+config IIO_ST_SENSORS_SPI
+	tristate
+
+config IIO_ST_SENSORS_CORE
+	tristate
+	select IIO_ST_SENSORS_I2C if I2C
+	select IIO_ST_SENSORS_SPI if SPI_MASTER
diff --git a/drivers/iio/common/st_sensors/Makefile b/drivers/iio/common/st_sensors/Makefile
new file mode 100644
index 000000000000..9f3e24f3024b
--- /dev/null
+++ b/drivers/iio/common/st_sensors/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the STMicroelectronics sensor common modules.
+#
+
+obj-$(CONFIG_IIO_ST_SENSORS_I2C) += st_sensors_i2c.o
+obj-$(CONFIG_IIO_ST_SENSORS_SPI) += st_sensors_spi.o
+obj-$(CONFIG_IIO_ST_SENSORS_CORE) += st_sensors.o
+st_sensors-y := st_sensors_core.o
+st_sensors-$(CONFIG_IIO_BUFFER) += st_sensors_buffer.o
+st_sensors-$(CONFIG_IIO_TRIGGER) += st_sensors_trigger.o
diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c
new file mode 100644
index 000000000000..09b236d6ee89
--- /dev/null
+++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c
@@ -0,0 +1,116 @@
+/*
+ * STMicroelectronics sensors buffer library driver
+ *
+ * Copyright 2012-2013 STMicroelectronics Inc.
+ *
+ * Denis Ciocca <denis.ciocca@st.com>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/trigger.h>
+#include <linux/interrupt.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/irqreturn.h>
+
+#include <linux/iio/common/st_sensors.h>
+
+
+int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf)
+{
+	int i, n = 0, len;
+	u8 addr[ST_SENSORS_NUMBER_DATA_CHANNELS];
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	for (i = 0; i < ST_SENSORS_NUMBER_DATA_CHANNELS; i++) {
+		if (test_bit(i, indio_dev->active_scan_mask)) {
+			addr[n] = indio_dev->channels[i].address;
+			n++;
+		}
+	}
+	switch (n) {
+	case 1:
+		len = sdata->tf->read_multiple_byte(&sdata->tb, sdata->dev,
+			addr[0], ST_SENSORS_BYTE_FOR_CHANNEL, buf,
+			sdata->multiread_bit);
+		break;
+	case 2:
+		if ((addr[1] - addr[0]) == ST_SENSORS_BYTE_FOR_CHANNEL) {
+			len = sdata->tf->read_multiple_byte(&sdata->tb,
+					sdata->dev, addr[0],
+					ST_SENSORS_BYTE_FOR_CHANNEL*n,
+					buf, sdata->multiread_bit);
+		} else {
+			u8 rx_array[ST_SENSORS_BYTE_FOR_CHANNEL*
+				    ST_SENSORS_NUMBER_DATA_CHANNELS];
+			len = sdata->tf->read_multiple_byte(&sdata->tb,
+				sdata->dev, addr[0],
+				ST_SENSORS_BYTE_FOR_CHANNEL*
+				ST_SENSORS_NUMBER_DATA_CHANNELS,
+				rx_array, sdata->multiread_bit);
+			if (len < 0)
+				goto read_data_channels_error;
+
+			for (i = 0; i < n * ST_SENSORS_NUMBER_DATA_CHANNELS;
+									i++) {
+				if (i < n)
+					buf[i] = rx_array[i];
+				else
+					buf[i] = rx_array[n + i];
+			}
+			len = ST_SENSORS_BYTE_FOR_CHANNEL*n;
+		}
+		break;
+	case 3:
+		len = sdata->tf->read_multiple_byte(&sdata->tb, sdata->dev,
+			addr[0], ST_SENSORS_BYTE_FOR_CHANNEL*
+			ST_SENSORS_NUMBER_DATA_CHANNELS,
+			buf, sdata->multiread_bit);
+		break;
+	default:
+		len = -EINVAL;
+		goto read_data_channels_error;
+	}
+	if (len != ST_SENSORS_BYTE_FOR_CHANNEL*n) {
+		len = -EIO;
+		goto read_data_channels_error;
+	}
+
+read_data_channels_error:
+	return len;
+}
+EXPORT_SYMBOL(st_sensors_get_buffer_element);
+
+irqreturn_t st_sensors_trigger_handler(int irq, void *p)
+{
+	int len;
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	len = st_sensors_get_buffer_element(indio_dev, sdata->buffer_data);
+	if (len < 0)
+		goto st_sensors_get_buffer_element_error;
+
+	if (indio_dev->scan_timestamp)
+		*(s64 *)((u8 *)sdata->buffer_data +
+				ALIGN(len, sizeof(s64))) = pf->timestamp;
+
+	iio_push_to_buffers(indio_dev, sdata->buffer_data);
+
+st_sensors_get_buffer_element_error:
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+EXPORT_SYMBOL(st_sensors_trigger_handler);
+
+MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics ST-sensors buffer");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
new file mode 100644
index 000000000000..fba6d6847b6d
--- /dev/null
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -0,0 +1,460 @@
+/*
+ * STMicroelectronics sensors core library driver
+ *
+ * Copyright 2012-2013 STMicroelectronics Inc.
+ *
+ * Denis Ciocca <denis.ciocca@st.com>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/iio/iio.h>
+#include <asm/unaligned.h>
+
+#include <linux/iio/common/st_sensors.h>
+
+
+#define ST_SENSORS_WAI_ADDRESS		0x0f
+
+static int st_sensors_write_data_with_mask(struct iio_dev *indio_dev,
+						u8 reg_addr, u8 mask, u8 data)
+{
+	int err;
+	u8 new_data;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	err = sdata->tf->read_byte(&sdata->tb, sdata->dev, reg_addr, &new_data);
+	if (err < 0)
+		goto st_sensors_write_data_with_mask_error;
+
+	new_data = ((new_data & (~mask)) | ((data << __ffs(mask)) & mask));
+	err = sdata->tf->write_byte(&sdata->tb, sdata->dev, reg_addr, new_data);
+
+st_sensors_write_data_with_mask_error:
+	return err;
+}
+
+int st_sensors_get_sampling_frequency_avl(struct iio_dev *indio_dev, char *buf)
+{
+	int i, len = 0;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	mutex_lock(&indio_dev->mlock);
+	for (i = 0; i < ST_SENSORS_ODR_LIST_MAX; i++) {
+		if (sdata->sensor->odr.odr_avl[i].hz == 0)
+			break;
+
+		len += scnprintf(buf + len, PAGE_SIZE - len, "%d ",
+					sdata->sensor->odr.odr_avl[i].hz);
+	}
+	mutex_unlock(&indio_dev->mlock);
+	buf[len - 1] = '\n';
+
+	return len;
+}
+EXPORT_SYMBOL(st_sensors_get_sampling_frequency_avl);
+
+int st_sensors_get_scale_avl(struct iio_dev *indio_dev, char *buf)
+{
+	int i, len = 0;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	mutex_lock(&indio_dev->mlock);
+	for (i = 0; i < ST_SENSORS_FULLSCALE_AVL_MAX; i++) {
+		if (sdata->sensor->fs.fs_avl[i].num == 0)
+			break;
+
+		len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
+					sdata->sensor->fs.fs_avl[i].gain);
+	}
+	mutex_unlock(&indio_dev->mlock);
+	buf[len - 1] = '\n';
+
+	return len;
+}
+EXPORT_SYMBOL(st_sensors_get_scale_avl);
+
+static int st_sensors_match_odr(struct st_sensors *sensor,
+			unsigned int odr, struct st_sensor_odr_avl *odr_out)
+{
+	int i, ret = -EINVAL;
+
+	for (i = 0; i < ST_SENSORS_ODR_LIST_MAX; i++) {
+		if (sensor->odr.odr_avl[i].hz == 0)
+			goto st_sensors_match_odr_error;
+
+		if (sensor->odr.odr_avl[i].hz == odr) {
+			odr_out->hz = sensor->odr.odr_avl[i].hz;
+			odr_out->value = sensor->odr.odr_avl[i].value;
+			ret = 0;
+			break;
+		}
+	}
+
+st_sensors_match_odr_error:
+	return ret;
+}
+
+int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr)
+{
+	int err;
+	struct st_sensor_odr_avl odr_out;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	err = st_sensors_match_odr(sdata->sensor, odr, &odr_out);
+	if (err < 0)
+		goto st_sensors_match_odr_error;
+
+	if ((sdata->sensor->odr.addr == sdata->sensor->pw.addr) &&
+			(sdata->sensor->odr.mask == sdata->sensor->pw.mask)) {
+		if (sdata->enabled == true) {
+			err = st_sensors_write_data_with_mask(indio_dev,
+				sdata->sensor->odr.addr,
+				sdata->sensor->odr.mask,
+				odr_out.value);
+		} else {
+			err = 0;
+		}
+	} else {
+		err = st_sensors_write_data_with_mask(indio_dev,
+			sdata->sensor->odr.addr, sdata->sensor->odr.mask,
+			odr_out.value);
+	}
+	if (err >= 0)
+		sdata->odr = odr_out.hz;
+
+st_sensors_match_odr_error:
+	return err;
+}
+EXPORT_SYMBOL(st_sensors_set_odr);
+
+static int st_sensors_match_fs(struct st_sensors *sensor,
+					unsigned int fs, int *index_fs_avl)
+{
+	int i, ret = -EINVAL;
+
+	for (i = 0; i < ST_SENSORS_FULLSCALE_AVL_MAX; i++) {
+		if (sensor->fs.fs_avl[i].num == 0)
+			goto st_sensors_match_odr_error;
+
+		if (sensor->fs.fs_avl[i].num == fs) {
+			*index_fs_avl = i;
+			ret = 0;
+			break;
+		}
+	}
+
+st_sensors_match_odr_error:
+	return ret;
+}
+
+static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs)
+{
+	int err, i;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	err = st_sensors_match_fs(sdata->sensor, fs, &i);
+	if (err < 0)
+		goto st_accel_set_fullscale_error;
+
+	err = st_sensors_write_data_with_mask(indio_dev,
+				sdata->sensor->fs.addr,
+				sdata->sensor->fs.mask,
+				sdata->sensor->fs.fs_avl[i].value);
+	if (err < 0)
+		goto st_accel_set_fullscale_error;
+
+	sdata->current_fullscale = (struct st_sensor_fullscale_avl *)
+						&sdata->sensor->fs.fs_avl[i];
+	return err;
+
+st_accel_set_fullscale_error:
+	dev_err(&indio_dev->dev, "failed to set new fullscale.\n");
+	return err;
+}
+
+int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable)
+{
+	bool found;
+	u8 tmp_value;
+	int err = -EINVAL;
+	struct st_sensor_odr_avl odr_out;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	if (enable) {
+		found = false;
+		tmp_value = sdata->sensor->pw.value_on;
+		if ((sdata->sensor->odr.addr == sdata->sensor->pw.addr) &&
+			(sdata->sensor->odr.mask == sdata->sensor->pw.mask)) {
+			err = st_sensors_match_odr(sdata->sensor,
+							sdata->odr, &odr_out);
+			if (err < 0)
+				goto set_enable_error;
+			tmp_value = odr_out.value;
+			found = true;
+		}
+		err = st_sensors_write_data_with_mask(indio_dev,
+				sdata->sensor->pw.addr,
+				sdata->sensor->pw.mask, tmp_value);
+		if (err < 0)
+			goto set_enable_error;
+
+		sdata->enabled = true;
+
+		if (found)
+			sdata->odr = odr_out.hz;
+	} else {
+		err = st_sensors_write_data_with_mask(indio_dev,
+				sdata->sensor->pw.addr,
+				sdata->sensor->pw.mask,
+				sdata->sensor->pw.value_off);
+		if (err < 0)
+			goto set_enable_error;
+
+		sdata->enabled = false;
+	}
+
+set_enable_error:
+	return err;
+}
+EXPORT_SYMBOL(st_sensors_set_enable);
+
+int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable)
+{
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	return st_sensors_write_data_with_mask(indio_dev,
+				sdata->sensor->enable_axis.addr,
+				sdata->sensor->enable_axis.mask, axis_enable);
+}
+EXPORT_SYMBOL(st_sensors_set_axis_enable);
+
+int st_sensors_init_sensor(struct iio_dev *indio_dev)
+{
+	int err;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	mutex_init(&sdata->tb.buf_lock);
+
+	err = st_sensors_set_enable(indio_dev, false);
+	if (err < 0)
+		goto init_error;
+
+	err = st_sensors_set_fullscale(indio_dev,
+						sdata->current_fullscale->num);
+	if (err < 0)
+		goto init_error;
+
+	err = st_sensors_set_odr(indio_dev, sdata->odr);
+	if (err < 0)
+		goto init_error;
+
+	/* set BDU */
+	err = st_sensors_write_data_with_mask(indio_dev,
+			sdata->sensor->bdu.addr, sdata->sensor->bdu.mask, true);
+	if (err < 0)
+		goto init_error;
+
+	err = st_sensors_set_axis_enable(indio_dev, ST_SENSORS_ENABLE_ALL_AXIS);
+
+init_error:
+	return err;
+}
+EXPORT_SYMBOL(st_sensors_init_sensor);
+
+int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable)
+{
+	int err;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	/* Enable/Disable the interrupt generator 1. */
+	if (sdata->sensor->drdy_irq.ig1.en_addr > 0) {
+		err = st_sensors_write_data_with_mask(indio_dev,
+			sdata->sensor->drdy_irq.ig1.en_addr,
+			sdata->sensor->drdy_irq.ig1.en_mask, (int)enable);
+		if (err < 0)
+			goto st_accel_set_dataready_irq_error;
+	}
+
+	/* Enable/Disable the interrupt generator for data ready. */
+	err = st_sensors_write_data_with_mask(indio_dev,
+			sdata->sensor->drdy_irq.addr,
+			sdata->sensor->drdy_irq.mask, (int)enable);
+
+st_accel_set_dataready_irq_error:
+	return err;
+}
+EXPORT_SYMBOL(st_sensors_set_dataready_irq);
+
+int st_sensors_set_fullscale_by_gain(struct iio_dev *indio_dev, int scale)
+{
+	int err = -EINVAL, i;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	for (i = 0; i < ST_SENSORS_FULLSCALE_AVL_MAX; i++) {
+		if ((sdata->sensor->fs.fs_avl[i].gain == scale) &&
+				(sdata->sensor->fs.fs_avl[i].gain != 0)) {
+			err = 0;
+			break;
+		}
+	}
+	if (err < 0)
+		goto st_sensors_match_scale_error;
+
+	err = st_sensors_set_fullscale(indio_dev,
+					sdata->sensor->fs.fs_avl[i].num);
+
+st_sensors_match_scale_error:
+	return err;
+}
+EXPORT_SYMBOL(st_sensors_set_fullscale_by_gain);
+
+static int st_sensors_read_axis_data(struct iio_dev *indio_dev,
+							u8 ch_addr, int *data)
+{
+	int err;
+	u8 outdata[ST_SENSORS_BYTE_FOR_CHANNEL];
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	err = sdata->tf->read_multiple_byte(&sdata->tb, sdata->dev,
+				ch_addr, ST_SENSORS_BYTE_FOR_CHANNEL,
+				outdata, sdata->multiread_bit);
+	if (err < 0)
+		goto read_error;
+
+	*data = (s16)get_unaligned_le16(outdata);
+
+read_error:
+	return err;
+}
+
+int st_sensors_read_info_raw(struct iio_dev *indio_dev,
+				struct iio_chan_spec const *ch, int *val)
+{
+	int err;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	mutex_lock(&indio_dev->mlock);
+	if (indio_dev->currentmode == INDIO_BUFFER_TRIGGERED) {
+		err = -EBUSY;
+		goto read_error;
+	} else {
+		err = st_sensors_set_enable(indio_dev, true);
+		if (err < 0)
+			goto read_error;
+
+		msleep((sdata->sensor->bootime * 1000) / sdata->odr);
+		err = st_sensors_read_axis_data(indio_dev, ch->address, val);
+		if (err < 0)
+			goto read_error;
+
+		*val = *val >> ch->scan_type.shift;
+	}
+	mutex_unlock(&indio_dev->mlock);
+
+	return err;
+
+read_error:
+	mutex_unlock(&indio_dev->mlock);
+	return err;
+}
+EXPORT_SYMBOL(st_sensors_read_info_raw);
+
+int st_sensors_check_device_support(struct iio_dev *indio_dev,
+			int num_sensors_list, const struct st_sensors *sensors)
+{
+	u8 wai;
+	int i, n, err;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	err = sdata->tf->read_byte(&sdata->tb, sdata->dev,
+					ST_SENSORS_DEFAULT_WAI_ADDRESS, &wai);
+	if (err < 0) {
+		dev_err(&indio_dev->dev, "failed to read Who-Am-I register.\n");
+		goto read_wai_error;
+	}
+
+	for (i = 0; i < num_sensors_list; i++) {
+		if (sensors[i].wai == wai)
+			break;
+	}
+	if (i == num_sensors_list)
+		goto device_not_supported;
+
+	for (n = 0; n < ARRAY_SIZE(sensors[i].sensors_supported); n++) {
+		if (strcmp(indio_dev->name,
+				&sensors[i].sensors_supported[n][0]) == 0)
+			break;
+	}
+	if (n == ARRAY_SIZE(sensors[i].sensors_supported)) {
+		dev_err(&indio_dev->dev, "device name and WhoAmI mismatch.\n");
+		goto sensor_name_mismatch;
+	}
+
+	sdata->sensor = (struct st_sensors *)&sensors[i];
+
+	return i;
+
+device_not_supported:
+	dev_err(&indio_dev->dev, "device not supported: WhoAmI (0x%x).\n", wai);
+sensor_name_mismatch:
+	err = -ENODEV;
+read_wai_error:
+	return err;
+}
+EXPORT_SYMBOL(st_sensors_check_device_support);
+
+ssize_t st_sensors_sysfs_get_sampling_frequency(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct st_sensor_data *adata = iio_priv(dev_get_drvdata(dev));
+
+	return sprintf(buf, "%d\n", adata->odr);
+}
+EXPORT_SYMBOL(st_sensors_sysfs_get_sampling_frequency);
+
+ssize_t st_sensors_sysfs_set_sampling_frequency(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	int err;
+	unsigned int odr;
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+
+	err = kstrtoint(buf, 10, &odr);
+	if (err < 0)
+		goto conversion_error;
+
+	mutex_lock(&indio_dev->mlock);
+	err = st_sensors_set_odr(indio_dev, odr);
+	mutex_unlock(&indio_dev->mlock);
+
+conversion_error:
+	return err < 0 ? err : size;
+}
+EXPORT_SYMBOL(st_sensors_sysfs_set_sampling_frequency);
+
+ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+
+	return st_sensors_get_sampling_frequency_avl(indio_dev, buf);
+}
+EXPORT_SYMBOL(st_sensors_sysfs_sampling_frequency_avail);
+
+ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+
+	return st_sensors_get_scale_avl(indio_dev, buf);
+}
+EXPORT_SYMBOL(st_sensors_sysfs_scale_avail);
+
+MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics ST-sensors core");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/common/st_sensors/st_sensors_i2c.c b/drivers/iio/common/st_sensors/st_sensors_i2c.c
new file mode 100644
index 000000000000..38af9440c103
--- /dev/null
+++ b/drivers/iio/common/st_sensors/st_sensors_i2c.c
@@ -0,0 +1,81 @@
+/*
+ * STMicroelectronics sensors i2c library driver
+ *
+ * Copyright 2012-2013 STMicroelectronics Inc.
+ *
+ * Denis Ciocca <denis.ciocca@st.com>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/iio/iio.h>
+
+#include <linux/iio/common/st_sensors_i2c.h>
+
+
+#define ST_SENSORS_I2C_MULTIREAD	0x80
+
+static unsigned int st_sensors_i2c_get_irq(struct iio_dev *indio_dev)
+{
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	return to_i2c_client(sdata->dev)->irq;
+}
+
+static int st_sensors_i2c_read_byte(struct st_sensor_transfer_buffer *tb,
+				struct device *dev, u8 reg_addr, u8 *res_byte)
+{
+	int err;
+
+	err = i2c_smbus_read_byte_data(to_i2c_client(dev), reg_addr);
+	if (err < 0)
+		goto st_accel_i2c_read_byte_error;
+
+	*res_byte = err & 0xff;
+
+st_accel_i2c_read_byte_error:
+	return err < 0 ? err : 0;
+}
+
+static int st_sensors_i2c_read_multiple_byte(
+		struct st_sensor_transfer_buffer *tb, struct device *dev,
+			u8 reg_addr, int len, u8 *data, bool multiread_bit)
+{
+	if (multiread_bit)
+		reg_addr |= ST_SENSORS_I2C_MULTIREAD;
+
+	return i2c_smbus_read_i2c_block_data(to_i2c_client(dev),
+							reg_addr, len, data);
+}
+
+static int st_sensors_i2c_write_byte(struct st_sensor_transfer_buffer *tb,
+				struct device *dev, u8 reg_addr, u8 data)
+{
+	return i2c_smbus_write_byte_data(to_i2c_client(dev), reg_addr, data);
+}
+
+static const struct st_sensor_transfer_function st_sensors_tf_i2c = {
+	.read_byte = st_sensors_i2c_read_byte,
+	.write_byte = st_sensors_i2c_write_byte,
+	.read_multiple_byte = st_sensors_i2c_read_multiple_byte,
+};
+
+void st_sensors_i2c_configure(struct iio_dev *indio_dev,
+		struct i2c_client *client, struct st_sensor_data *sdata)
+{
+	i2c_set_clientdata(client, indio_dev);
+
+	indio_dev->dev.parent = &client->dev;
+	indio_dev->name = client->name;
+
+	sdata->tf = &st_sensors_tf_i2c;
+	sdata->get_irq_data_ready = st_sensors_i2c_get_irq;
+}
+EXPORT_SYMBOL(st_sensors_i2c_configure);
+
+MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics ST-sensors i2c driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/common/st_sensors/st_sensors_spi.c b/drivers/iio/common/st_sensors/st_sensors_spi.c
new file mode 100644
index 000000000000..f0aa2f105222
--- /dev/null
+++ b/drivers/iio/common/st_sensors/st_sensors_spi.c
@@ -0,0 +1,128 @@
+/*
+ * STMicroelectronics sensors spi library driver
+ *
+ * Copyright 2012-2013 STMicroelectronics Inc.
+ *
+ * Denis Ciocca <denis.ciocca@st.com>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/iio/iio.h>
+
+#include <linux/iio/common/st_sensors_spi.h>
+
+
+#define ST_SENSORS_SPI_MULTIREAD	0xc0
+#define ST_SENSORS_SPI_READ		0x80
+
+static unsigned int st_sensors_spi_get_irq(struct iio_dev *indio_dev)
+{
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	return to_spi_device(sdata->dev)->irq;
+}
+
+static int st_sensors_spi_read(struct st_sensor_transfer_buffer *tb,
+	struct device *dev, u8 reg_addr, int len, u8 *data, bool multiread_bit)
+{
+	struct spi_message msg;
+	int err;
+
+	struct spi_transfer xfers[] = {
+		{
+			.tx_buf = tb->tx_buf,
+			.bits_per_word = 8,
+			.len = 1,
+		},
+		{
+			.rx_buf = tb->rx_buf,
+			.bits_per_word = 8,
+			.len = len,
+		}
+	};
+
+	mutex_lock(&tb->buf_lock);
+	if ((multiread_bit) && (len > 1))
+		tb->tx_buf[0] = reg_addr | ST_SENSORS_SPI_MULTIREAD;
+	else
+		tb->tx_buf[0] = reg_addr | ST_SENSORS_SPI_READ;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfers[0], &msg);
+	spi_message_add_tail(&xfers[1], &msg);
+	err = spi_sync(to_spi_device(dev), &msg);
+	if (err)
+		goto acc_spi_read_error;
+
+	memcpy(data, tb->rx_buf, len*sizeof(u8));
+	mutex_unlock(&tb->buf_lock);
+	return len;
+
+acc_spi_read_error:
+	mutex_unlock(&tb->buf_lock);
+	return err;
+}
+
+static int st_sensors_spi_read_byte(struct st_sensor_transfer_buffer *tb,
+				struct device *dev, u8 reg_addr, u8 *res_byte)
+{
+	return st_sensors_spi_read(tb, dev, reg_addr, 1, res_byte, false);
+}
+
+static int st_sensors_spi_read_multiple_byte(
+	struct st_sensor_transfer_buffer *tb, struct device *dev,
+			u8 reg_addr, int len, u8 *data, bool multiread_bit)
+{
+	return st_sensors_spi_read(tb, dev, reg_addr, len, data, multiread_bit);
+}
+
+static int st_sensors_spi_write_byte(struct st_sensor_transfer_buffer *tb,
+				struct device *dev, u8 reg_addr, u8 data)
+{
+	struct spi_message msg;
+	int err;
+
+	struct spi_transfer xfers = {
+		.tx_buf = tb->tx_buf,
+		.bits_per_word = 8,
+		.len = 2,
+	};
+
+	mutex_lock(&tb->buf_lock);
+	tb->tx_buf[0] = reg_addr;
+	tb->tx_buf[1] = data;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfers, &msg);
+	err = spi_sync(to_spi_device(dev), &msg);
+	mutex_unlock(&tb->buf_lock);
+
+	return err;
+}
+
+static const struct st_sensor_transfer_function st_sensors_tf_spi = {
+	.read_byte = st_sensors_spi_read_byte,
+	.write_byte = st_sensors_spi_write_byte,
+	.read_multiple_byte = st_sensors_spi_read_multiple_byte,
+};
+
+void st_sensors_spi_configure(struct iio_dev *indio_dev,
+			struct spi_device *spi, struct st_sensor_data *sdata)
+{
+	spi_set_drvdata(spi, indio_dev);
+
+	indio_dev->dev.parent = &spi->dev;
+	indio_dev->name = spi->modalias;
+
+	sdata->tf = &st_sensors_tf_spi;
+	sdata->get_irq_data_ready = st_sensors_spi_get_irq;
+}
+EXPORT_SYMBOL(st_sensors_spi_configure);
+
+MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics ST-sensors spi driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
new file mode 100644
index 000000000000..139ed030abb0
--- /dev/null
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -0,0 +1,77 @@
+/*
+ * STMicroelectronics sensors trigger library driver
+ *
+ * Copyright 2012-2013 STMicroelectronics Inc.
+ *
+ * Denis Ciocca <denis.ciocca@st.com>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/trigger.h>
+#include <linux/interrupt.h>
+
+#include <linux/iio/common/st_sensors.h>
+
+
+int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
+				const struct iio_trigger_ops *trigger_ops)
+{
+	int err;
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	sdata->trig = iio_trigger_alloc("%s-trigger", indio_dev->name);
+	if (sdata->trig == NULL) {
+		err = -ENOMEM;
+		dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n");
+		goto iio_trigger_alloc_error;
+	}
+
+	err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev),
+			iio_trigger_generic_data_rdy_poll,
+			NULL,
+			IRQF_TRIGGER_RISING,
+			sdata->trig->name,
+			sdata->trig);
+	if (err)
+		goto request_irq_error;
+
+	sdata->trig->private_data = indio_dev;
+	sdata->trig->ops = trigger_ops;
+	sdata->trig->dev.parent = sdata->dev;
+
+	err = iio_trigger_register(sdata->trig);
+	if (err < 0) {
+		dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
+		goto iio_trigger_register_error;
+	}
+	indio_dev->trig = sdata->trig;
+
+	return 0;
+
+iio_trigger_register_error:
+	free_irq(sdata->get_irq_data_ready(indio_dev), sdata->trig);
+request_irq_error:
+	iio_trigger_free(sdata->trig);
+iio_trigger_alloc_error:
+	return err;
+}
+EXPORT_SYMBOL(st_sensors_allocate_trigger);
+
+void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
+{
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	iio_trigger_unregister(sdata->trig);
+	free_irq(sdata->get_irq_data_ready(indio_dev), sdata->trig);
+	iio_trigger_free(sdata->trig);
+}
+EXPORT_SYMBOL(st_sensors_deallocate_trigger);
+
+MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics ST-sensors trigger");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
new file mode 100644
index 000000000000..3cc85715491f
--- /dev/null
+++ b/include/linux/iio/common/st_sensors.h
@@ -0,0 +1,274 @@
+/*
+ * STMicroelectronics sensors library driver
+ *
+ * Copyright 2012-2013 STMicroelectronics Inc.
+ *
+ * Denis Ciocca <denis.ciocca@st.com>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#ifndef ST_SENSORS_H
+#define ST_SENSORS_H
+
+#include <linux/i2c.h>
+#include <linux/spi/spi.h>
+#include <linux/irqreturn.h>
+#include <linux/iio/trigger.h>
+
+#define ST_SENSORS_TX_MAX_LENGTH		2
+#define ST_SENSORS_RX_MAX_LENGTH		6
+
+#define ST_SENSORS_ODR_LIST_MAX			10
+#define ST_SENSORS_FULLSCALE_AVL_MAX		10
+
+#define ST_SENSORS_NUMBER_ALL_CHANNELS		4
+#define ST_SENSORS_NUMBER_DATA_CHANNELS		3
+#define ST_SENSORS_ENABLE_ALL_AXIS		0x07
+#define ST_SENSORS_BYTE_FOR_CHANNEL		2
+#define ST_SENSORS_SCAN_X			0
+#define ST_SENSORS_SCAN_Y			1
+#define ST_SENSORS_SCAN_Z			2
+#define ST_SENSORS_DEFAULT_12_REALBITS		12
+#define ST_SENSORS_DEFAULT_16_REALBITS		16
+#define ST_SENSORS_DEFAULT_POWER_ON_VALUE	0x01
+#define ST_SENSORS_DEFAULT_POWER_OFF_VALUE	0x00
+#define ST_SENSORS_DEFAULT_WAI_ADDRESS		0x0f
+#define ST_SENSORS_DEFAULT_AXIS_ADDR		0x20
+#define ST_SENSORS_DEFAULT_AXIS_MASK		0x07
+#define ST_SENSORS_DEFAULT_AXIS_N_BIT		3
+
+#define ST_SENSORS_MAX_NAME			17
+#define ST_SENSORS_MAX_4WAI			7
+
+#define ST_SENSORS_LSM_CHANNELS(device_type, index, mod, endian, bits, addr) \
+{ \
+	.type = device_type, \
+	.modified = 1, \
+	.info_mask = IIO_CHAN_INFO_RAW_SEPARATE_BIT | \
+			IIO_CHAN_INFO_SCALE_SEPARATE_BIT, \
+	.scan_index = index, \
+	.channel2 = mod, \
+	.address = addr, \
+	.scan_type = { \
+		.sign = 's', \
+		.realbits = bits, \
+		.shift = 16 - bits, \
+		.storagebits = 16, \
+		.endianness = endian, \
+	}, \
+}
+
+#define ST_SENSOR_DEV_ATTR_SAMP_FREQ() \
+		IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO, \
+			st_sensors_sysfs_get_sampling_frequency, \
+			st_sensors_sysfs_set_sampling_frequency)
+
+#define ST_SENSORS_DEV_ATTR_SAMP_FREQ_AVAIL() \
+		IIO_DEV_ATTR_SAMP_FREQ_AVAIL( \
+			st_sensors_sysfs_sampling_frequency_avail)
+
+#define ST_SENSORS_DEV_ATTR_SCALE_AVAIL(name) \
+		IIO_DEVICE_ATTR(name, S_IRUGO, \
+			st_sensors_sysfs_scale_avail, NULL , 0);
+
+struct st_sensor_odr_avl {
+	unsigned int hz;
+	u8 value;
+};
+
+struct st_sensor_odr {
+	u8 addr;
+	u8 mask;
+	struct st_sensor_odr_avl odr_avl[ST_SENSORS_ODR_LIST_MAX];
+};
+
+struct st_sensor_power {
+	u8 addr;
+	u8 mask;
+	u8 value_off;
+	u8 value_on;
+};
+
+struct st_sensor_axis {
+	u8 addr;
+	u8 mask;
+};
+
+struct st_sensor_fullscale_avl {
+	unsigned int num;
+	u8 value;
+	unsigned int gain;
+	unsigned int gain2;
+};
+
+struct st_sensor_fullscale {
+	u8 addr;
+	u8 mask;
+	struct st_sensor_fullscale_avl fs_avl[ST_SENSORS_FULLSCALE_AVL_MAX];
+};
+
+/**
+ * struct st_sensor_bdu - ST sensor device block data update
+ * @addr: address of the register.
+ * @mask: mask to write the block data update flag.
+ */
+struct st_sensor_bdu {
+	u8 addr;
+	u8 mask;
+};
+
+/**
+ * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt
+ * @addr: address of the register.
+ * @mask: mask to write the on/off value.
+ * struct ig1 - represents the Interrupt Generator 1 of sensors.
+ * @en_addr: address of the enable ig1 register.
+ * @en_mask: mask to write the on/off value for enable.
+ */
+struct st_sensor_data_ready_irq {
+	u8 addr;
+	u8 mask;
+	struct {
+		u8 en_addr;
+		u8 en_mask;
+	} ig1;
+};
+
+/**
+ * struct st_sensor_transfer_buffer - ST sensor device I/O buffer
+ * @buf_lock: Mutex to protect rx and tx buffers.
+ * @tx_buf: Buffer used by SPI transfer function to send data to the sensors.
+ *	This buffer is used to avoid DMA not-aligned issue.
+ * @rx_buf: Buffer used by SPI transfer to receive data from sensors.
+ *	This buffer is used to avoid DMA not-aligned issue.
+ */
+struct st_sensor_transfer_buffer {
+	struct mutex buf_lock;
+	u8 rx_buf[ST_SENSORS_RX_MAX_LENGTH];
+	u8 tx_buf[ST_SENSORS_TX_MAX_LENGTH] ____cacheline_aligned;
+};
+
+/**
+ * struct st_sensor_transfer_function - ST sensor device I/O function
+ * @read_byte: Function used to read one byte.
+ * @write_byte: Function used to write one byte.
+ * @read_multiple_byte: Function used to read multiple byte.
+ */
+struct st_sensor_transfer_function {
+	int (*read_byte) (struct st_sensor_transfer_buffer *tb,
+				struct device *dev, u8 reg_addr, u8 *res_byte);
+	int (*write_byte) (struct st_sensor_transfer_buffer *tb,
+				struct device *dev, u8 reg_addr, u8 data);
+	int (*read_multiple_byte) (struct st_sensor_transfer_buffer *tb,
+		struct device *dev, u8 reg_addr, int len, u8 *data,
+							bool multiread_bit);
+};
+
+/**
+ * struct st_sensors - ST sensors list
+ * @wai: Contents of WhoAmI register.
+ * @sensors_supported: List of supported sensors by struct itself.
+ * @ch: IIO channels for the sensor.
+ * @odr: Output data rate register and ODR list available.
+ * @pw: Power register of the sensor.
+ * @enable_axis: Enable one or more axis of the sensor.
+ * @fs: Full scale register and full scale list available.
+ * @bdu: Block data update register.
+ * @drdy_irq: Data ready register of the sensor.
+ * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read.
+ * @bootime: samples to discard when sensor passing from power-down to power-up.
+ */
+struct st_sensors {
+	u8 wai;
+	char sensors_supported[ST_SENSORS_MAX_4WAI][ST_SENSORS_MAX_NAME];
+	struct iio_chan_spec *ch;
+	struct st_sensor_odr odr;
+	struct st_sensor_power pw;
+	struct st_sensor_axis enable_axis;
+	struct st_sensor_fullscale fs;
+	struct st_sensor_bdu bdu;
+	struct st_sensor_data_ready_irq drdy_irq;
+	bool multi_read_bit;
+	unsigned int bootime;
+};
+
+/**
+ * struct st_sensor_data - ST sensor device status
+ * @dev: Pointer to instance of struct device (I2C or SPI).
+ * @trig: The trigger in use by the core driver.
+ * @sensor: Pointer to the current sensor struct in use.
+ * @current_fullscale: Maximum range of measure by the sensor.
+ * @enabled: Status of the sensor (false->off, true->on).
+ * @multiread_bit: Use or not particular bit for [I2C/SPI] multiread.
+ * @buffer_data: Data used by buffer part.
+ * @odr: Output data rate of the sensor [Hz].
+ * @get_irq_data_ready: Function to get the IRQ used for data ready signal.
+ * @tf: Transfer function structure used by I/O operations.
+ * @tb: Transfer buffers and mutex used by I/O operations.
+ */
+struct st_sensor_data {
+	struct device *dev;
+	struct iio_trigger *trig;
+	struct st_sensors *sensor;
+	struct st_sensor_fullscale_avl *current_fullscale;
+
+	bool enabled;
+	bool multiread_bit;
+
+	char *buffer_data;
+
+	unsigned int odr;
+
+	unsigned int (*get_irq_data_ready) (struct iio_dev *indio_dev);
+
+	const struct st_sensor_transfer_function *tf;
+	struct st_sensor_transfer_buffer tb;
+};
+
+#ifdef CONFIG_IIO_BUFFER
+int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
+				const struct iio_trigger_ops *trigger_ops);
+
+void st_sensors_deallocate_trigger(struct iio_dev *indio_dev);
+
+irqreturn_t st_sensors_trigger_handler(int irq, void *p);
+
+int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf);
+#endif
+
+int st_sensors_init_sensor(struct iio_dev *indio_dev);
+
+int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable);
+
+int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable);
+
+int st_sensors_get_sampling_frequency_avl(struct iio_dev *indio_dev, char *buf);
+
+int st_sensors_get_scale_avl(struct iio_dev *indio_dev, char *buf);
+
+int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr);
+
+int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable);
+
+int st_sensors_set_fullscale_by_gain(struct iio_dev *indio_dev, int scale);
+
+int st_sensors_read_info_raw(struct iio_dev *indio_dev,
+				struct iio_chan_spec const *ch, int *val);
+
+int st_sensors_check_device_support(struct iio_dev *indio_dev,
+			int num_sensors_list, const struct st_sensors *sensors);
+
+ssize_t st_sensors_sysfs_get_sampling_frequency(struct device *dev,
+				struct device_attribute *attr, char *buf);
+
+ssize_t st_sensors_sysfs_set_sampling_frequency(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size);
+
+ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
+				struct device_attribute *attr, char *buf);
+
+ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
+				struct device_attribute *attr, char *buf);
+
+#endif /* ST_SENSORS_H */
diff --git a/include/linux/iio/common/st_sensors_i2c.h b/include/linux/iio/common/st_sensors_i2c.h
new file mode 100644
index 000000000000..67d845385ae2
--- /dev/null
+++ b/include/linux/iio/common/st_sensors_i2c.h
@@ -0,0 +1,20 @@
+/*
+ * STMicroelectronics sensors i2c library driver
+ *
+ * Copyright 2012-2013 STMicroelectronics Inc.
+ *
+ * Denis Ciocca <denis.ciocca@st.com>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#ifndef ST_SENSORS_I2C_H
+#define ST_SENSORS_I2C_H
+
+#include <linux/i2c.h>
+#include <linux/iio/common/st_sensors.h>
+
+void st_sensors_i2c_configure(struct iio_dev *indio_dev,
+		struct i2c_client *client, struct st_sensor_data *sdata);
+
+#endif /* ST_SENSORS_I2C_H */
diff --git a/include/linux/iio/common/st_sensors_spi.h b/include/linux/iio/common/st_sensors_spi.h
new file mode 100644
index 000000000000..d964a3563dc6
--- /dev/null
+++ b/include/linux/iio/common/st_sensors_spi.h
@@ -0,0 +1,20 @@
+/*
+ * STMicroelectronics sensors spi library driver
+ *
+ * Copyright 2012-2013 STMicroelectronics Inc.
+ *
+ * Denis Ciocca <denis.ciocca@st.com>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#ifndef ST_SENSORS_SPI_H
+#define ST_SENSORS_SPI_H
+
+#include <linux/spi/spi.h>
+#include <linux/iio/common/st_sensors.h>
+
+void st_sensors_spi_configure(struct iio_dev *indio_dev,
+			struct spi_device *spi, struct st_sensor_data *sdata);
+
+#endif /* ST_SENSORS_SPI_H */
-- 
cgit v1.2.3


From 999517f6742b4ca4692c041752afc4298fbbf0da Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 28 Jan 2013 10:12:00 +0000
Subject: staging:iio: Remove noop call to __iio_update_buffer

__iio_update_buffer updates the buffer's bytes_per_datum and length fields.
But the only user of this function just passes in these exact fields, so the
call basically looks like this:

	buffer->bytes_per_datum = buffer->bytes_per_datum;
	buffer->length = buffer->length;

Which means it is a noop and can be removed. Also remove the function itself,
since it is now unused.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/kfifo_buf.c    |  1 -
 include/linux/iio/buffer.h | 13 -------------
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/kfifo_buf.c b/drivers/iio/kfifo_buf.c
index 5bc5c860e9ca..a923c78d5cb4 100644
--- a/drivers/iio/kfifo_buf.c
+++ b/drivers/iio/kfifo_buf.c
@@ -22,7 +22,6 @@ static inline int __iio_allocate_kfifo(struct iio_kfifo *buf,
 	if ((length == 0) || (bytes_per_datum == 0))
 		return -EINVAL;
 
-	__iio_update_buffer(&buf->buffer, bytes_per_datum, length);
 	return __kfifo_alloc((struct __kfifo *)&buf->kf, length,
 			     bytes_per_datum, GFP_KERNEL);
 }
diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index f3eea18fdf46..2bac0eb8948d 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -103,19 +103,6 @@ int iio_update_buffers(struct iio_dev *indio_dev,
  **/
 void iio_buffer_init(struct iio_buffer *buffer);
 
-/**
- * __iio_update_buffer() - update common elements of buffers
- * @buffer:		buffer that is the event source
- * @bytes_per_datum:	size of individual datum including timestamp
- * @length:		number of datums in buffer
- **/
-static inline void __iio_update_buffer(struct iio_buffer *buffer,
-				       int bytes_per_datum, int length)
-{
-	buffer->bytes_per_datum = bytes_per_datum;
-	buffer->length = length;
-}
-
 int iio_scan_mask_query(struct iio_dev *indio_dev,
 			struct iio_buffer *buffer, int bit);
 
-- 
cgit v1.2.3


From 12572dbb53638c6e454ef831c8fee7de3df24389 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 14 Jan 2013 17:05:21 +0000
Subject: clockevents: Add generic timer broadcast receiver

Currently the broadcast mechanism used for timers is abstracted by a
function pointer on struct clock_event_device. As the fundamental
mechanism for broadcast is architecture-specific, this ties each
clock_event_device driver to a single architecture, even where the
driver is otherwise generic.

This patch adds a standard path for the receipt of timer broadcasts, so
drivers and/or architecture backends need not manage redundant lists of
timers for the purpose of routing broadcast timer ticks.

[tglx: Made the implementation depend on the config switch as well ]

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: nico@linaro.org
Cc: Will.Deacon@arm.com
Cc: Marc.Zyngier@arm.com
Cc: john.stultz@linaro.org
Link: http://lkml.kernel.org/r/1358183124-28461-2-git-send-email-mark.rutland@arm.com
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h   |  4 ++++
 kernel/time/tick-broadcast.c | 17 +++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 8a7096fcb01e..e1089aa7816d 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -161,6 +161,10 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
 extern void clockevents_suspend(void);
 extern void clockevents_resume(void);
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern int tick_receive_broadcast(void);
+#endif
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void clockevents_notify(unsigned long reason, void *arg);
 #else
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f113755695e2..7cc81c57eb31 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -125,6 +125,23 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	return ret;
 }
 
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+int tick_receive_broadcast(void)
+{
+	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+	struct clock_event_device *evt = td->evtdev;
+
+	if (!evt)
+		return -ENODEV;
+
+	if (!evt->event_handler)
+		return -EINVAL;
+
+	evt->event_handler(evt);
+	return 0;
+}
+#endif
+
 /*
  * Broadcast the event to the cpus, which are set in the mask (mangled).
  */
-- 
cgit v1.2.3


From 12ad10004645d38356b14d1fbba379c523a61916 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 14 Jan 2013 17:05:22 +0000
Subject: clockevents: Add generic timer broadcast function

Currently, the timer broadcast mechanism is defined by a function
pointer on struct clock_event_device. As the fundamental mechanism for
broadcast is architecture-specific, this means that clock_event_device
drivers cannot be shared across multiple architectures.

This patch adds an (optional) architecture-specific function for timer
tick broadcast, allowing drivers which may require broadcast
functionality to be shared across multiple architectures.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: nico@linaro.org
Cc: Will.Deacon@arm.com
Cc: Marc.Zyngier@arm.com
Cc: john.stultz@linaro.org
Link: http://lkml.kernel.org/r/1358183124-28461-3-git-send-email-mark.rutland@arm.com
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h   |  5 +++++
 kernel/time/Kconfig          |  4 ++++
 kernel/time/tick-broadcast.c | 13 +++++++++++++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index e1089aa7816d..66346521cb65 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -162,6 +162,11 @@ extern void clockevents_suspend(void);
 extern void clockevents_resume(void);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
+extern void tick_broadcast(const struct cpumask *mask);
+#else
+#define tick_broadcast	NULL
+#endif
 extern int tick_receive_broadcast(void);
 #endif
 
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 8601f0db1261..b69692250af4 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -38,6 +38,10 @@ config GENERIC_CLOCKEVENTS_BUILD
 	default y
 	depends on GENERIC_CLOCKEVENTS
 
+# Architecture can handle broadcast in a driver-agnostic way
+config ARCH_HAS_TICK_BROADCAST
+	bool
+
 # Clockevents broadcasting infrastructure
 config GENERIC_CLOCKEVENTS_BROADCAST
 	bool
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 7cc81c57eb31..f726537d24eb 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -18,6 +18,7 @@
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
+#include <linux/smp.h>
 
 #include "tick-internal.h"
 
@@ -86,6 +87,11 @@ int tick_is_broadcast_device(struct clock_event_device *dev)
 	return (dev && tick_broadcast_device.evtdev == dev);
 }
 
+static void err_broadcast(const struct cpumask *mask)
+{
+	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
+}
+
 /*
  * Check, if the device is disfunctional and a place holder, which
  * needs to be handled by the broadcast device.
@@ -105,6 +111,13 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	 */
 	if (!tick_device_is_functional(dev)) {
 		dev->event_handler = tick_handle_periodic;
+		if (!dev->broadcast)
+			dev->broadcast = tick_broadcast;
+		if (!dev->broadcast) {
+			pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
+				     dev->name);
+			dev->broadcast = err_broadcast;
+		}
 		cpumask_set_cpu(cpu, tick_get_broadcast_mask());
 		tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
 		ret = 1;
-- 
cgit v1.2.3


From 6fcdf4facb85e7d54ff6195378dd2ba8e0baccc4 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 30 Jan 2013 21:50:08 -0500
Subject: wanrouter: delete now orphaned header content, files/drivers

The wanrouter support was identified earlier as unused for years,
and so the previous commit totally decoupled it from the kernel,
leaving the related wanrouter files present, but totally inert.

Here we take the final step in that cleanup, by doing a wholesale
removal of these files.  The two step process is used so that the
large deletion is decoupled from the git history of files that we
still care about.

The drivers deleted here all were dependent on the Kconfig setting
CONFIG_WAN_ROUTER_DRIVERS.

A stub wanrouter.h header (kernel & uapi) are left behind so that
drivers/isdn/i4l/isdn_x25iface.c continues to compile, and so that
we don't accidentally break userspace that expected these defines.

Cc: Joe Perches <joe@perches.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 drivers/net/wan/cycx_drv.c     |  569 --------------
 drivers/net/wan/cycx_main.c    |  346 ---------
 drivers/net/wan/cycx_x25.c     | 1602 ----------------------------------------
 include/linux/cyclomx.h        |   77 --
 include/linux/cycx_drv.h       |   64 --
 include/linux/wanrouter.h      |  127 +---
 include/uapi/linux/wanrouter.h |  443 +----------
 net/wanrouter/Kconfig          |   27 -
 net/wanrouter/Makefile         |    7 -
 net/wanrouter/patchlevel       |    1 -
 net/wanrouter/wanmain.c        |  782 --------------------
 net/wanrouter/wanproc.c        |  380 ----------
 12 files changed, 8 insertions(+), 4417 deletions(-)
 delete mode 100644 drivers/net/wan/cycx_drv.c
 delete mode 100644 drivers/net/wan/cycx_main.c
 delete mode 100644 drivers/net/wan/cycx_x25.c
 delete mode 100644 include/linux/cyclomx.h
 delete mode 100644 include/linux/cycx_drv.h
 delete mode 100644 net/wanrouter/Kconfig
 delete mode 100644 net/wanrouter/Makefile
 delete mode 100644 net/wanrouter/patchlevel
 delete mode 100644 net/wanrouter/wanmain.c
 delete mode 100644 net/wanrouter/wanproc.c

(limited to 'include/linux')

diff --git a/drivers/net/wan/cycx_drv.c b/drivers/net/wan/cycx_drv.c
deleted file mode 100644
index 2a3ecae67a90..000000000000
--- a/drivers/net/wan/cycx_drv.c
+++ /dev/null
@@ -1,569 +0,0 @@
-/*
-* cycx_drv.c	Cyclom 2X Support Module.
-*
-*		This module is a library of common hardware specific
-*		functions used by the Cyclades Cyclom 2X sync card.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdladrv.c by Gene Kozin <genek@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 1999/11/11	acme		set_current_state(TASK_INTERRUPTIBLE), code
-*				cleanup
-* 1999/11/08	acme		init_cyc2x deleted, doing nothing
-* 1999/11/06	acme		back to read[bw], write[bw] and memcpy_to and
-*				fromio to use dpmbase ioremaped
-* 1999/10/26	acme		use isa_read[bw], isa_write[bw] & isa_memcpy_to
-*				& fromio
-* 1999/10/23	acme		cleanup to only supports cyclom2x: all the other
-*				boards are no longer manufactured by cyclades,
-*				if someone wants to support them... be my guest!
-* 1999/05/28    acme		cycx_intack & cycx_intde gone for good
-* 1999/05/18	acme		lots of unlogged work, submitting to Linus...
-* 1999/01/03	acme		more judicious use of data types
-* 1999/01/03	acme		judicious use of data types :>
-*				cycx_inten trying to reset pending interrupts
-*				from cyclom 2x - I think this isn't the way to
-*				go, but for now...
-* 1999/01/02	acme		cycx_intack ok, I think there's nothing to do
-*				to ack an int in cycx_drv.c, only handle it in
-*				cyx_isr (or in the other protocols: cyp_isr,
-*				cyf_isr, when they get implemented.
-* Dec 31, 1998	acme		cycx_data_boot & cycx_code_boot fixed, crossing
-*				fingers to see x25_configure in cycx_x25.c
-*				work... :)
-* Dec 26, 1998	acme		load implementation fixed, seems to work! :)
-*				cycx_2x_dpmbase_options with all the possible
-*				DPM addresses (20).
-*				cycx_intr implemented (test this!)
-*				general code cleanup
-* Dec  8, 1998	Ivan Passos	Cyclom-2X firmware load implementation.
-* Aug  8, 1998	acme		Initial version.
-*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>		/* __init */
-#include <linux/module.h>
-#include <linux/kernel.h>	/* printk(), and other useful stuff */
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
-#include <linux/cycx_drv.h>	/* API definitions */
-#include <linux/cycx_cfm.h>	/* CYCX firmware module definitions */
-#include <linux/delay.h>	/* udelay, msleep_interruptible */
-#include <asm/io.h>		/* read[wl], write[wl], ioremap, iounmap */
-
-#define	MOD_VERSION	0
-#define	MOD_RELEASE	6
-
-MODULE_AUTHOR("Arnaldo Carvalho de Melo");
-MODULE_DESCRIPTION("Cyclom 2x Sync Card Driver");
-MODULE_LICENSE("GPL");
-
-/* Hardware-specific functions */
-static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len);
-static void cycx_bootcfg(struct cycx_hw *hw);
-
-static int reset_cyc2x(void __iomem *addr);
-static int detect_cyc2x(void __iomem *addr);
-
-/* Miscellaneous functions */
-static int get_option_index(const long *optlist, long optval);
-static u16 checksum(u8 *buf, u32 len);
-
-#define wait_cyc(addr) cycx_exec(addr + CMD_OFFSET)
-
-/* Global Data */
-
-/* private data */
-static const char fullname[] = "Cyclom 2X Support Module";
-static const char copyright[] =
-	"(c) 1998-2003 Arnaldo Carvalho de Melo <acme@conectiva.com.br>";
-
-/* Hardware configuration options.
- * These are arrays of configuration options used by verification routines.
- * The first element of each array is its size (i.e. number of options).
- */
-static const long cyc2x_dpmbase_options[] = {
-	20,
-	0xA0000, 0xA4000, 0xA8000, 0xAC000, 0xB0000, 0xB4000, 0xB8000,
-	0xBC000, 0xC0000, 0xC4000, 0xC8000, 0xCC000, 0xD0000, 0xD4000,
-	0xD8000, 0xDC000, 0xE0000, 0xE4000, 0xE8000, 0xEC000
-};
-
-static const long cycx_2x_irq_options[]  = { 7, 3, 5, 9, 10, 11, 12, 15 };
-
-/* Kernel Loadable Module Entry Points */
-/* Module 'insert' entry point.
- * o print announcement
- * o initialize static data
- *
- * Return:	0	Ok
- *		< 0	error.
- * Context:	process */
-
-static int __init cycx_drv_init(void)
-{
-	pr_info("%s v%u.%u %s\n",
-		fullname, MOD_VERSION, MOD_RELEASE, copyright);
-
-	return 0;
-}
-
-/* Module 'remove' entry point.
- * o release all remaining system resources */
-static void cycx_drv_cleanup(void)
-{
-}
-
-/* Kernel APIs */
-/* Set up adapter.
- * o detect adapter type
- * o verify hardware configuration options
- * o check for hardware conflicts
- * o set up adapter shared memory
- * o test adapter memory
- * o load firmware
- * Return:	0	ok.
- *		< 0	error */
-EXPORT_SYMBOL(cycx_setup);
-int cycx_setup(struct cycx_hw *hw, void *cfm, u32 len, unsigned long dpmbase)
-{
-	int err;
-
-	/* Verify IRQ configuration options */
-	if (!get_option_index(cycx_2x_irq_options, hw->irq)) {
-		pr_err("IRQ %d is invalid!\n", hw->irq);
-		return -EINVAL;
-	}
-
-	/* Setup adapter dual-port memory window and test memory */
-	if (!dpmbase) {
-		pr_err("you must specify the dpm address!\n");
- 		return -EINVAL;
-	} else if (!get_option_index(cyc2x_dpmbase_options, dpmbase)) {
-		pr_err("memory address 0x%lX is invalid!\n", dpmbase);
-		return -EINVAL;
-	}
-
-	hw->dpmbase = ioremap(dpmbase, CYCX_WINDOWSIZE);
-	hw->dpmsize = CYCX_WINDOWSIZE;
-
-	if (!detect_cyc2x(hw->dpmbase)) {
-		pr_err("adapter Cyclom 2X not found at address 0x%lX!\n",
-		       dpmbase);
-		return -EINVAL;
-	}
-
-	pr_info("found Cyclom 2X card at address 0x%lX\n", dpmbase);
-
-	/* Load firmware. If loader fails then shut down adapter */
-	err = load_cyc2x(hw, cfm, len);
-
-	if (err)
-		cycx_down(hw);         /* shutdown adapter */
-
-	return err;
-}
-
-EXPORT_SYMBOL(cycx_down);
-int cycx_down(struct cycx_hw *hw)
-{
-	iounmap(hw->dpmbase);
-	return 0;
-}
-
-/* Enable interrupt generation.  */
-static void cycx_inten(struct cycx_hw *hw)
-{
-	writeb(0, hw->dpmbase);
-}
-
-/* Generate an interrupt to adapter's CPU. */
-EXPORT_SYMBOL(cycx_intr);
-void cycx_intr(struct cycx_hw *hw)
-{
-	writew(0, hw->dpmbase + GEN_CYCX_INTR);
-}
-
-/* Execute Adapter Command.
- * o Set exec flag.
- * o Busy-wait until flag is reset. */
-EXPORT_SYMBOL(cycx_exec);
-int cycx_exec(void __iomem *addr)
-{
-	u16 i = 0;
-	/* wait till addr content is zeroed */
-
-	while (readw(addr)) {
-		udelay(1000);
-
-		if (++i > 50)
-			return -1;
-	}
-
-	return 0;
-}
-
-/* Read absolute adapter memory.
- * Transfer data from adapter's memory to data buffer. */
-EXPORT_SYMBOL(cycx_peek);
-int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len)
-{
-	if (len == 1)
-		*(u8*)buf = readb(hw->dpmbase + addr);
-	else
-		memcpy_fromio(buf, hw->dpmbase + addr, len);
-
-	return 0;
-}
-
-/* Write Absolute Adapter Memory.
- * Transfer data from data buffer to adapter's memory. */
-EXPORT_SYMBOL(cycx_poke);
-int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len)
-{
-	if (len == 1)
-		writeb(*(u8*)buf, hw->dpmbase + addr);
-	else
-		memcpy_toio(hw->dpmbase + addr, buf, len);
-
-	return 0;
-}
-
-/* Hardware-Specific Functions */
-
-/* Load Aux Routines */
-/* Reset board hardware.
-   return 1 if memory exists at addr and 0 if not. */
-static int memory_exists(void __iomem *addr)
-{
-	int tries = 0;
-
-	for (; tries < 3 ; tries++) {
-		writew(TEST_PATTERN, addr + 0x10);
-
-		if (readw(addr + 0x10) == TEST_PATTERN)
-			if (readw(addr + 0x10) == TEST_PATTERN)
-				return 1;
-
-		msleep_interruptible(1 * 1000);
-	}
-
-	return 0;
-}
-
-/* Load reset code. */
-static void reset_load(void __iomem *addr, u8 *buffer, u32 cnt)
-{
-	void __iomem *pt_code = addr + RESET_OFFSET;
-	u16 i; /*, j; */
-
-	for (i = 0 ; i < cnt ; i++) {
-/*		for (j = 0 ; j < 50 ; j++); Delay - FIXME busy waiting... */
-		writeb(*buffer++, pt_code++);
-	}
-}
-
-/* Load buffer using boot interface.
- * o copy data from buffer to Cyclom-X memory
- * o wait for reset code to copy it to right portion of memory */
-static int buffer_load(void __iomem *addr, u8 *buffer, u32 cnt)
-{
-	memcpy_toio(addr + DATA_OFFSET, buffer, cnt);
-	writew(GEN_BOOT_DAT, addr + CMD_OFFSET);
-
-	return wait_cyc(addr);
-}
-
-/* Set up entry point and kick start Cyclom-X CPU. */
-static void cycx_start(void __iomem *addr)
-{
-	/* put in 0x30 offset the jump instruction to the code entry point */
-	writeb(0xea, addr + 0x30);
-	writeb(0x00, addr + 0x31);
-	writeb(0xc4, addr + 0x32);
-	writeb(0x00, addr + 0x33);
-	writeb(0x00, addr + 0x34);
-
-	/* cmd to start executing code */
-	writew(GEN_START, addr + CMD_OFFSET);
-}
-
-/* Load and boot reset code. */
-static void cycx_reset_boot(void __iomem *addr, u8 *code, u32 len)
-{
-	void __iomem *pt_start = addr + START_OFFSET;
-
-	writeb(0xea, pt_start++); /* jmp to f000:3f00 */
-	writeb(0x00, pt_start++);
-	writeb(0xfc, pt_start++);
-	writeb(0x00, pt_start++);
-	writeb(0xf0, pt_start);
-	reset_load(addr, code, len);
-
-	/* 80186 was in hold, go */
-	writeb(0, addr + START_CPU);
-	msleep_interruptible(1 * 1000);
-}
-
-/* Load data.bin file through boot (reset) interface. */
-static int cycx_data_boot(void __iomem *addr, u8 *code, u32 len)
-{
-	void __iomem *pt_boot_cmd = addr + CMD_OFFSET;
-	u32 i;
-
-	/* boot buffer length */
-	writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16));
-	writew(GEN_DEFPAR, pt_boot_cmd);
-
-	if (wait_cyc(addr) < 0)
-		return -1;
-
-	writew(0, pt_boot_cmd + sizeof(u16));
-	writew(0x4000, pt_boot_cmd + 2 * sizeof(u16));
-	writew(GEN_SET_SEG, pt_boot_cmd);
-
-	if (wait_cyc(addr) < 0)
-		return -1;
-
-	for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ)
-		if (buffer_load(addr, code + i,
-				min_t(u32, CFM_LOAD_BUFSZ, (len - i))) < 0) {
-			pr_err("Error !!\n");
-			return -1;
-		}
-
-	return 0;
-}
-
-
-/* Load code.bin file through boot (reset) interface. */
-static int cycx_code_boot(void __iomem *addr, u8 *code, u32 len)
-{
-	void __iomem *pt_boot_cmd = addr + CMD_OFFSET;
-	u32 i;
-
-	/* boot buffer length */
-	writew(CFM_LOAD_BUFSZ, pt_boot_cmd + sizeof(u16));
-	writew(GEN_DEFPAR, pt_boot_cmd);
-
-	if (wait_cyc(addr) < 0)
-		return -1;
-
-	writew(0x0000, pt_boot_cmd + sizeof(u16));
-	writew(0xc400, pt_boot_cmd + 2 * sizeof(u16));
-	writew(GEN_SET_SEG, pt_boot_cmd);
-
-	if (wait_cyc(addr) < 0)
-		return -1;
-
-	for (i = 0 ; i < len ; i += CFM_LOAD_BUFSZ)
-		if (buffer_load(addr, code + i,
-				min_t(u32, CFM_LOAD_BUFSZ, (len - i)))) {
-			pr_err("Error !!\n");
-			return -1;
-		}
-
-	return 0;
-}
-
-/* Load adapter from the memory image of the CYCX firmware module.
- * o verify firmware integrity and compatibility
- * o start adapter up */
-static int load_cyc2x(struct cycx_hw *hw, struct cycx_firmware *cfm, u32 len)
-{
-	int i, j;
-	struct cycx_fw_header *img_hdr;
-	u8 *reset_image,
-	   *data_image,
-	   *code_image;
-	void __iomem *pt_cycld = hw->dpmbase + 0x400;
-	u16 cksum;
-
-	/* Announce */
-	pr_info("firmware signature=\"%s\"\n", cfm->signature);
-
-	/* Verify firmware signature */
-	if (strcmp(cfm->signature, CFM_SIGNATURE)) {
-		pr_err("load_cyc2x: not Cyclom-2X firmware!\n");
-		return -EINVAL;
-	}
-
-	pr_info("firmware version=%u\n", cfm->version);
-
-	/* Verify firmware module format version */
-	if (cfm->version != CFM_VERSION) {
-		pr_err("%s: firmware format %u rejected! Expecting %u.\n",
-		       __func__, cfm->version, CFM_VERSION);
-		return -EINVAL;
-	}
-
-	/* Verify firmware module length and checksum */
-	cksum = checksum((u8*)&cfm->info, sizeof(struct cycx_fw_info) +
-					  cfm->info.codesize);
-/*
-	FIXME cfm->info.codesize is off by 2
-	if (((len - sizeof(struct cycx_firmware) - 1) != cfm->info.codesize) ||
-*/
-	if (cksum != cfm->checksum) {
-		pr_err("%s: firmware corrupted!\n", __func__);
-		pr_err(" cdsize = 0x%x (expected 0x%lx)\n",
-		       len - (int)sizeof(struct cycx_firmware) - 1,
-		       cfm->info.codesize);
-		pr_err(" chksum = 0x%x (expected 0x%x)\n",
-		       cksum, cfm->checksum);
-		return -EINVAL;
-	}
-
-	/* If everything is ok, set reset, data and code pointers */
-	img_hdr = (struct cycx_fw_header *)&cfm->image;
-#ifdef FIRMWARE_DEBUG
-	pr_info("%s: image sizes\n", __func__);
-	pr_info(" reset=%lu\n", img_hdr->reset_size);
-	pr_info("  data=%lu\n", img_hdr->data_size);
-	pr_info("  code=%lu\n", img_hdr->code_size);
-#endif
-	reset_image = ((u8 *)img_hdr) + sizeof(struct cycx_fw_header);
-	data_image = reset_image + img_hdr->reset_size;
-	code_image = data_image + img_hdr->data_size;
-
-	/*---- Start load ----*/
-	/* Announce */
-	pr_info("loading firmware %s (ID=%u)...\n",
-		cfm->descr[0] ? cfm->descr : "unknown firmware",
-		cfm->info.codeid);
-
-	for (i = 0 ; i < 5 ; i++) {
-		/* Reset Cyclom hardware */
-		if (!reset_cyc2x(hw->dpmbase)) {
-			pr_err("dpm problem or board not found\n");
-			return -EINVAL;
-		}
-
-		/* Load reset.bin */
-		cycx_reset_boot(hw->dpmbase, reset_image, img_hdr->reset_size);
-		/* reset is waiting for boot */
-		writew(GEN_POWER_ON, pt_cycld);
-		msleep_interruptible(1 * 1000);
-
-		for (j = 0 ; j < 3 ; j++)
-			if (!readw(pt_cycld))
-				goto reset_loaded;
-			else
-				msleep_interruptible(1 * 1000);
-	}
-
-	pr_err("reset not started\n");
-	return -EINVAL;
-
-reset_loaded:
-	/* Load data.bin */
-	if (cycx_data_boot(hw->dpmbase, data_image, img_hdr->data_size)) {
-		pr_err("cannot load data file\n");
-		return -EINVAL;
-	}
-
-	/* Load code.bin */
-	if (cycx_code_boot(hw->dpmbase, code_image, img_hdr->code_size)) {
-		pr_err("cannot load code file\n");
-		return -EINVAL;
-	}
-
-	/* Prepare boot-time configuration data */
-	cycx_bootcfg(hw);
-
-	/* kick-off CPU */
-	cycx_start(hw->dpmbase);
-
-	/* Arthur Ganzert's tip: wait a while after the firmware loading...
-	   seg abr 26 17:17:12 EST 1999 - acme */
-	msleep_interruptible(7 * 1000);
-	pr_info("firmware loaded!\n");
-
-	/* enable interrupts */
-	cycx_inten(hw);
-
-	return 0;
-}
-
-/* Prepare boot-time firmware configuration data.
- * o initialize configuration data area
-   From async.doc - V_3.4.0 - 07/18/1994
-   - As of now, only static buffers are available to the user.
-     So, the bit VD_RXDIRC must be set in 'valid'. That means that user
-     wants to use the static transmission and reception buffers. */
-static void cycx_bootcfg(struct cycx_hw *hw)
-{
-	/* use fixed buffers */
-	writeb(FIXED_BUFFERS, hw->dpmbase + CONF_OFFSET);
-}
-
-/* Detect Cyclom 2x adapter.
- *	Following tests are used to detect Cyclom 2x adapter:
- *       to be completed based on the tests done below
- *	Return 1 if detected o.k. or 0 if failed.
- *	Note:	This test is destructive! Adapter will be left in shutdown
- *		state after the test. */
-static int detect_cyc2x(void __iomem *addr)
-{
-	reset_cyc2x(addr);
-
-	return memory_exists(addr);
-}
-
-/* Miscellaneous */
-/* Get option's index into the options list.
- *	Return option's index (1 .. N) or zero if option is invalid. */
-static int get_option_index(const long *optlist, long optval)
-{
-	int i = 1;
-
-	for (; i <= optlist[0]; ++i)
-		if (optlist[i] == optval)
-			return i;
-
-	return 0;
-}
-
-/* Reset adapter's CPU. */
-static int reset_cyc2x(void __iomem *addr)
-{
-	writeb(0, addr + RST_ENABLE);
-	msleep_interruptible(2 * 1000);
-	writeb(0, addr + RST_DISABLE);
-	msleep_interruptible(2 * 1000);
-
-	return memory_exists(addr);
-}
-
-/* Calculate 16-bit CRC using CCITT polynomial. */
-static u16 checksum(u8 *buf, u32 len)
-{
-	u16 crc = 0;
-	u16 mask, flag;
-
-	for (; len; --len, ++buf)
-		for (mask = 0x80; mask; mask >>= 1) {
-			flag = (crc & 0x8000);
-			crc <<= 1;
-			crc |= ((*buf & mask) ? 1 : 0);
-
-			if (flag)
-				crc ^= 0x1021;
-		}
-
-	return crc;
-}
-
-module_init(cycx_drv_init);
-module_exit(cycx_drv_cleanup);
-
-/* End */
diff --git a/drivers/net/wan/cycx_main.c b/drivers/net/wan/cycx_main.c
deleted file mode 100644
index 81fbbad406be..000000000000
--- a/drivers/net/wan/cycx_main.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
-* cycx_main.c	Cyclades Cyclom 2X WAN Link Driver. Main module.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdlamain.c by Gene Kozin <genek@compuserve.com> &
-*			 Jaspreet Singh	<jaspreet@sangoma.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Please look at the bitkeeper changelog (or any other scm tool that ends up
-* importing bitkeeper changelog or that replaces bitkeeper in the future as
-* main tool for linux development).
-* 
-* 2001/05/09	acme		Fix MODULE_DESC for debug, .bss nitpicks,
-* 				some cleanups
-* 2000/07/13	acme		remove useless #ifdef MODULE and crap
-*				#if KERNEL_VERSION > blah
-* 2000/07/06	acme		__exit at cyclomx_cleanup
-* 2000/04/02	acme		dprintk and cycx_debug
-* 				module_init/module_exit
-* 2000/01/21	acme		rename cyclomx_open to cyclomx_mod_inc_use_count
-*				and cyclomx_close to cyclomx_mod_dec_use_count
-* 2000/01/08	acme		cleanup
-* 1999/11/06	acme		cycx_down back to life (it needs to be
-*				called to iounmap the dpmbase)
-* 1999/08/09	acme		removed references to enable_tx_int
-*				use spinlocks instead of cli/sti in
-*				cyclomx_set_state
-* 1999/05/19	acme		works directly linked into the kernel
-*				init_waitqueue_head for 2.3.* kernel
-* 1999/05/18	acme		major cleanup (polling not needed), etc
-* 1998/08/28	acme		minor cleanup (ioctls for firmware deleted)
-*				queue_task activated
-* 1998/08/08	acme		Initial version.
-*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
-#include <linux/string.h>	/* inline memset(), etc. */
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/kernel.h>	/* printk(), and other useful stuff */
-#include <linux/module.h>	/* support for loadable modules */
-#include <linux/ioport.h>	/* request_region(), release_region() */
-#include <linux/wanrouter.h>	/* WAN router definitions */
-#include <linux/cyclomx.h>	/* cyclomx common user API definitions */
-#include <linux/init.h>         /* __init (when not using as a module) */
-#include <linux/interrupt.h>
-
-unsigned int cycx_debug;
-
-MODULE_AUTHOR("Arnaldo Carvalho de Melo");
-MODULE_DESCRIPTION("Cyclom 2X Sync Card Driver.");
-MODULE_LICENSE("GPL");
-module_param(cycx_debug, int, 0);
-MODULE_PARM_DESC(cycx_debug, "cyclomx debug level");
-
-/* Defines & Macros */
-
-#define	CYCX_DRV_VERSION	0	/* version number */
-#define	CYCX_DRV_RELEASE	11	/* release (minor version) number */
-#define	CYCX_MAX_CARDS		1	/* max number of adapters */
-
-#define	CONFIG_CYCX_CARDS 1
-
-/* Function Prototypes */
-
-/* WAN link driver entry points */
-static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf);
-static int cycx_wan_shutdown(struct wan_device *wandev);
-
-/* Miscellaneous functions */
-static irqreturn_t cycx_isr(int irq, void *dev_id);
-
-/* Global Data
- * Note: All data must be explicitly initialized!!!
- */
-
-/* private data */
-static const char cycx_drvname[] = "cyclomx";
-static const char cycx_fullname[] = "CYCLOM 2X(tm) Sync Card Driver";
-static const char cycx_copyright[] = "(c) 1998-2003 Arnaldo Carvalho de Melo "
-			  "<acme@conectiva.com.br>";
-static int cycx_ncards = CONFIG_CYCX_CARDS;
-static struct cycx_device *cycx_card_array;	/* adapter data space */
-
-/* Kernel Loadable Module Entry Points */
-
-/*
- * Module 'insert' entry point.
- * o print announcement
- * o allocate adapter data space
- * o initialize static data
- * o register all cards with WAN router
- * o calibrate Cyclom 2X shared memory access delay.
- *
- * Return:	0	Ok
- *		< 0	error.
- * Context:	process
- */
-static int __init cycx_init(void)
-{
-	int cnt, err = -ENOMEM;
-
-	pr_info("%s v%u.%u %s\n",
-		cycx_fullname, CYCX_DRV_VERSION, CYCX_DRV_RELEASE,
-		cycx_copyright);
-
-	/* Verify number of cards and allocate adapter data space */
-	cycx_ncards = min_t(int, cycx_ncards, CYCX_MAX_CARDS);
-	cycx_ncards = max_t(int, cycx_ncards, 1);
-	cycx_card_array = kcalloc(cycx_ncards, sizeof(struct cycx_device), GFP_KERNEL);
-	if (!cycx_card_array)
-		goto out;
-
-
-	/* Register adapters with WAN router */
-	for (cnt = 0; cnt < cycx_ncards; ++cnt) {
-		struct cycx_device *card = &cycx_card_array[cnt];
-		struct wan_device *wandev = &card->wandev;
-
-		sprintf(card->devname, "%s%d", cycx_drvname, cnt + 1);
-		wandev->magic    = ROUTER_MAGIC;
-		wandev->name     = card->devname;
-		wandev->private  = card;
-		wandev->setup    = cycx_wan_setup;
-		wandev->shutdown = cycx_wan_shutdown;
-		err = register_wan_device(wandev);
-
-		if (err) {
-			pr_err("%s registration failed with error %d!\n",
-			       card->devname, err);
-			break;
-		}
-	}
-
-	err = -ENODEV;
-	if (!cnt) {
-		kfree(cycx_card_array);
-		goto out;
-	}
-	err = 0;
-	cycx_ncards = cnt;	/* adjust actual number of cards */
-out:	return err;
-}
-
-/*
- * Module 'remove' entry point.
- * o unregister all adapters from the WAN router
- * o release all remaining system resources
- */
-static void __exit cycx_exit(void)
-{
-	int i = 0;
-
-	for (; i < cycx_ncards; ++i) {
-		struct cycx_device *card = &cycx_card_array[i];
-		unregister_wan_device(card->devname);
-	}
-
-	kfree(cycx_card_array);
-}
-
-/* WAN Device Driver Entry Points */
-/*
- * Setup/configure WAN link driver.
- * o check adapter state
- * o make sure firmware is present in configuration
- * o allocate interrupt vector
- * o setup Cyclom 2X hardware
- * o call appropriate routine to perform protocol-specific initialization
- *
- * This function is called when router handles ROUTER_SETUP IOCTL. The
- * configuration structure is in kernel memory (including extended data, if
- * any).
- */
-static int cycx_wan_setup(struct wan_device *wandev, wandev_conf_t *conf)
-{
-	int rc = -EFAULT;
-	struct cycx_device *card;
-	int irq;
-
-	/* Sanity checks */
-
-	if (!wandev || !wandev->private || !conf)
-		goto out;
-
-	card = wandev->private;
-	rc = -EBUSY;
-	if (wandev->state != WAN_UNCONFIGURED)
-		goto out;
-
-	rc = -EINVAL;
-	if (!conf->data_size || !conf->data) {
-		pr_err("%s: firmware not found in configuration data!\n",
-		       wandev->name);
-		goto out;
-	}
-
-	if (conf->irq <= 0) {
-		pr_err("%s: can't configure without IRQ!\n", wandev->name);
-		goto out;
-	}
-
-	/* Allocate IRQ */
-	irq = conf->irq == 2 ? 9 : conf->irq;	/* IRQ2 -> IRQ9 */
-
-	if (request_irq(irq, cycx_isr, 0, wandev->name, card)) {
-		pr_err("%s: can't reserve IRQ %d!\n", wandev->name, irq);
-		goto out;
-	}
-
-	/* Configure hardware, load firmware, etc. */
-	memset(&card->hw, 0, sizeof(card->hw));
-	card->hw.irq	 = irq;
-	card->hw.dpmsize = CYCX_WINDOWSIZE;
-	card->hw.fwid	 = CFID_X25_2X;
-	spin_lock_init(&card->lock);
-	init_waitqueue_head(&card->wait_stats);
-
-	rc = cycx_setup(&card->hw, conf->data, conf->data_size, conf->maddr);
-	if (rc)
-		goto out_irq;
-
-	/* Initialize WAN device data space */
-	wandev->irq       = irq;
-	wandev->dma       = wandev->ioport = 0;
-	wandev->maddr     = (unsigned long)card->hw.dpmbase;
-	wandev->msize     = card->hw.dpmsize;
-	wandev->hw_opt[2] = 0;
-	wandev->hw_opt[3] = card->hw.fwid;
-
-	/* Protocol-specific initialization */
-	switch (card->hw.fwid) {
-#ifdef CONFIG_CYCLOMX_X25
-	case CFID_X25_2X:
-		rc = cycx_x25_wan_init(card, conf);
-		break;
-#endif
-	default:
-		pr_err("%s: this firmware is not supported!\n", wandev->name);
-		rc = -EINVAL;
-	}
-
-	if (rc) {
-		cycx_down(&card->hw);
-		goto out_irq;
-	}
-
-	rc = 0;
-out:
-	return rc;
-out_irq:
-	free_irq(irq, card);
-	goto out;
-}
-
-/*
- * Shut down WAN link driver.
- * o shut down adapter hardware
- * o release system resources.
- *
- * This function is called by the router when device is being unregistered or
- * when it handles ROUTER_DOWN IOCTL.
- */
-static int cycx_wan_shutdown(struct wan_device *wandev)
-{
-	int ret = -EFAULT;
-	struct cycx_device *card;
-
-	/* sanity checks */
-	if (!wandev || !wandev->private)
-		goto out;
-
-	ret = 0;
-	if (wandev->state == WAN_UNCONFIGURED)
-		goto out;
-
-	card = wandev->private;
-	wandev->state = WAN_UNCONFIGURED;
-	cycx_down(&card->hw);
-	pr_info("%s: irq %d being freed!\n", wandev->name, wandev->irq);
-	free_irq(wandev->irq, card);
-out:	return ret;
-}
-
-/* Miscellaneous */
-/*
- * Cyclom 2X Interrupt Service Routine.
- * o acknowledge Cyclom 2X hardware interrupt.
- * o call protocol-specific interrupt service routine, if any.
- */
-static irqreturn_t cycx_isr(int irq, void *dev_id)
-{
-	struct cycx_device *card = dev_id;
-
-	if (card->wandev.state == WAN_UNCONFIGURED)
-		goto out;
-
-	if (card->in_isr) {
-		pr_warn("%s: interrupt re-entrancy on IRQ %d!\n",
-			card->devname, card->wandev.irq);
-		goto out;
-	}
-
-	if (card->isr)
-		card->isr(card);
-	return IRQ_HANDLED;
-out:
-	return IRQ_NONE;
-}
-
-/* Set WAN device state.  */
-void cycx_set_state(struct cycx_device *card, int state)
-{
-	unsigned long flags;
-	char *string_state = NULL;
-
-	spin_lock_irqsave(&card->lock, flags);
-
-	if (card->wandev.state != state) {
-		switch (state) {
-		case WAN_CONNECTED:
-			string_state = "connected!";
-			break;
-		case WAN_DISCONNECTED:
-			string_state = "disconnected!";
-			break;
-		}
-		pr_info("%s: link %s\n", card->devname, string_state);
-		card->wandev.state = state;
-	}
-
-	card->state_tick = jiffies;
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-module_init(cycx_init);
-module_exit(cycx_exit);
diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c
deleted file mode 100644
index 06f3f6309e4b..000000000000
--- a/drivers/net/wan/cycx_x25.c
+++ /dev/null
@@ -1,1602 +0,0 @@
-/*
-* cycx_x25.c	Cyclom 2X WAN Link Driver.  X.25 module.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdla_x25.c by Gene Kozin <genek@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 2001/01/12	acme		use dev_kfree_skb_irq on interrupt context
-* 2000/04/02	acme		dprintk, cycx_debug
-* 				fixed the bug introduced in get_dev_by_lcn and
-* 				get_dev_by_dte_addr by the anonymous hacker
-* 				that converted this driver to softnet
-* 2000/01/08	acme		cleanup
-* 1999/10/27	acme		use ARPHRD_HWX25 so that the X.25 stack know
-*				that we have a X.25 stack implemented in
-*				firmware onboard
-* 1999/10/18	acme		support for X.25 sockets in if_send,
-*				beware: socket(AF_X25...) IS WORK IN PROGRESS,
-*				TCP/IP over X.25 via wanrouter not affected,
-*				working.
-* 1999/10/09	acme		chan_disc renamed to chan_disconnect,
-* 				began adding support for X.25 sockets:
-* 				conf->protocol in new_if
-* 1999/10/05	acme		fixed return E... to return -E...
-* 1999/08/10	acme		serialized access to the card thru a spinlock
-*				in x25_exec
-* 1999/08/09	acme		removed per channel spinlocks
-*				removed references to enable_tx_int
-* 1999/05/28	acme		fixed nibble_to_byte, ackvc now properly treated
-*				if_send simplified
-* 1999/05/25	acme		fixed t1, t2, t21 & t23 configuration
-*				use spinlocks instead of cli/sti in some points
-* 1999/05/24	acme		finished the x25_get_stat function
-* 1999/05/23	acme		dev->type = ARPHRD_X25 (tcpdump only works,
-*				AFAIT, with ARPHRD_ETHER). This seems to be
-*				needed to use socket(AF_X25)...
-*				Now the config file must specify a peer media
-*				address for svc channels over a crossover cable.
-*				Removed hold_timeout from x25_channel_t,
-*				not used.
-*				A little enhancement in the DEBUG processing
-* 1999/05/22	acme		go to DISCONNECTED in disconnect_confirm_intr,
-*				instead of chan_disc.
-* 1999/05/16	marcelo		fixed timer initialization in SVCs
-* 1999/01/05	acme		x25_configure now get (most of) all
-*				parameters...
-* 1999/01/05	acme		pktlen now (correctly) uses log2 (value
-*				configured)
-* 1999/01/03	acme		judicious use of data types (u8, u16, u32, etc)
-* 1999/01/03	acme		cyx_isr: reset dpmbase to acknowledge
-*				indication (interrupt from cyclom 2x)
-* 1999/01/02	acme		cyx_isr: first hackings...
-* 1999/01/0203  acme 		when initializing an array don't give less
-*				elements than declared...
-* 				example: char send_cmd[6] = "?\xFF\x10";
-*          			you'll gonna lose a couple hours, 'cause your
-*				brain won't admit that there's an error in the
-*				above declaration...  the side effect is that
-*				memset is put into the unresolved symbols
-*				instead of using the inline memset functions...
-* 1999/01/02    acme 		began chan_connect, chan_send, x25_send
-* 1998/12/31	acme		x25_configure
-*				this code can be compiled as non module
-* 1998/12/27	acme		code cleanup
-*				IPX code wiped out! let's decrease code
-*				complexity for now, remember: I'm learning! :)
-*                               bps_to_speed_code OK
-* 1998/12/26	acme		Minimal debug code cleanup
-* 1998/08/08	acme		Initial version.
-*/
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#define CYCLOMX_X25_DEBUG 1
-
-#include <linux/ctype.h>	/* isdigit() */
-#include <linux/errno.h>	/* return codes */
-#include <linux/if_arp.h>       /* ARPHRD_HWX25 */
-#include <linux/kernel.h>	/* printk(), and other useful stuff */
-#include <linux/module.h>
-#include <linux/string.h>	/* inline memset(), etc. */
-#include <linux/sched.h>
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/wanrouter.h>	/* WAN router definitions */
-
-#include <asm/byteorder.h>	/* htons(), etc. */
-
-#include <linux/cyclomx.h>	/* Cyclom 2X common user API definitions */
-#include <linux/cycx_x25.h>	/* X.25 firmware API definitions */
-
-#include <net/x25device.h>
-
-/* Defines & Macros */
-#define CYCX_X25_MAX_CMD_RETRY 5
-#define CYCX_X25_CHAN_MTU 2048	/* unfragmented logical channel MTU */
-
-/* Data Structures */
-/* This is an extension of the 'struct net_device' we create for each network
-   interface to keep the rest of X.25 channel-specific data. */
-struct cycx_x25_channel {
-	/* This member must be first. */
-	struct net_device *slave;	/* WAN slave */
-
-	char name[WAN_IFNAME_SZ+1];	/* interface name, ASCIIZ */
-	char addr[WAN_ADDRESS_SZ+1];	/* media address, ASCIIZ */
-	char *local_addr;		/* local media address, ASCIIZ -
-					   svc thru crossover cable */
-	s16 lcn;			/* logical channel number/conn.req.key*/
-	u8 link;
-	struct timer_list timer;	/* timer used for svc channel disc. */
-	u16 protocol;			/* ethertype, 0 - multiplexed */
-	u8 svc;				/* 0 - permanent, 1 - switched */
-	u8 state;			/* channel state */
-	u8 drop_sequence;		/* mark sequence for dropping */
-	u32 idle_tmout;			/* sec, before disconnecting */
-	struct sk_buff *rx_skb;		/* receive socket buffer */
-	struct cycx_device *card;	/* -> owner */
-	struct net_device_stats ifstats;/* interface statistics */
-};
-
-/* Function Prototypes */
-/* WAN link driver entry points. These are called by the WAN router module. */
-static int cycx_wan_update(struct wan_device *wandev),
-	   cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev,
-			   wanif_conf_t *conf),
-	   cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev);
-
-/* Network device interface */
-static int cycx_netdevice_init(struct net_device *dev);
-static int cycx_netdevice_open(struct net_device *dev);
-static int cycx_netdevice_stop(struct net_device *dev);
-static int cycx_netdevice_hard_header(struct sk_buff *skb,
-				      struct net_device *dev, u16 type,
-				      const void *daddr, const void *saddr,
-				      unsigned len);
-static int cycx_netdevice_rebuild_header(struct sk_buff *skb);
-static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb,
-							struct net_device *dev);
-
-static struct net_device_stats *
-			cycx_netdevice_get_stats(struct net_device *dev);
-
-/* Interrupt handlers */
-static void cycx_x25_irq_handler(struct cycx_device *card),
-	    cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_log(struct cycx_device *card,
-			     struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_stat(struct cycx_device *card,
-			      struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_connect_confirm(struct cycx_device *card,
-					 struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_disconnect_confirm(struct cycx_device *card,
-					    struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_connect(struct cycx_device *card,
-				 struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_disconnect(struct cycx_device *card,
-				    struct cycx_x25_cmd *cmd),
-	    cycx_x25_irq_spurious(struct cycx_device *card,
-				  struct cycx_x25_cmd *cmd);
-
-/* X.25 firmware interface functions */
-static int cycx_x25_configure(struct cycx_device *card,
-			      struct cycx_x25_config *conf),
-	   cycx_x25_get_stats(struct cycx_device *card),
-	   cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm,
-			 int len, void *buf),
-	   cycx_x25_connect_response(struct cycx_device *card,
-				struct cycx_x25_channel *chan),
-	   cycx_x25_disconnect_response(struct cycx_device *card, u8 link,
-			   		u8 lcn);
-
-/* channel functions */
-static int cycx_x25_chan_connect(struct net_device *dev),
-	   cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb);
-
-static void cycx_x25_chan_disconnect(struct net_device *dev),
-	    cycx_x25_chan_send_event(struct net_device *dev, u8 event);
-
-/* Miscellaneous functions */
-static void cycx_x25_set_chan_state(struct net_device *dev, u8 state),
-	    cycx_x25_chan_timer(unsigned long d);
-
-static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble),
-	    reset_timer(struct net_device *dev);
-
-static u8 bps_to_speed_code(u32 bps);
-static u8 cycx_log2(u32 n);
-
-static unsigned dec_to_uint(u8 *str, int len);
-
-static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev,
-						  s16 lcn);
-static struct net_device *
-	cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte);
-
-static void cycx_x25_chan_setup(struct net_device *dev);
-
-#ifdef CYCLOMX_X25_DEBUG
-static void hex_dump(char *msg, unsigned char *p, int len);
-static void cycx_x25_dump_config(struct cycx_x25_config *conf);
-static void cycx_x25_dump_stats(struct cycx_x25_stats *stats);
-static void cycx_x25_dump_devs(struct wan_device *wandev);
-#else
-#define hex_dump(msg, p, len)
-#define cycx_x25_dump_config(conf)
-#define cycx_x25_dump_stats(stats)
-#define cycx_x25_dump_devs(wandev)
-#endif
-/* Public Functions */
-
-/* X.25 Protocol Initialization routine.
- *
- * This routine is called by the main Cyclom 2X module during setup.  At this
- * point adapter is completely initialized and X.25 firmware is running.
- *  o configure adapter
- *  o initialize protocol-specific fields of the adapter data space.
- *
- * Return:	0	o.k.
- *		< 0	failure.  */
-int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf)
-{
-	struct cycx_x25_config cfg;
-
-	/* Verify configuration ID */
-	if (conf->config_id != WANCONFIG_X25) {
-		pr_info("%s: invalid configuration ID %u!\n",
-			card->devname, conf->config_id);
-		return -EINVAL;
-	}
-
-	/* Initialize protocol-specific fields */
-	card->mbox  = card->hw.dpmbase + X25_MBOX_OFFS;
-	card->u.x.connection_keys = 0;
-	spin_lock_init(&card->u.x.lock);
-
-	/* Configure adapter. Here we set reasonable defaults, then parse
-	 * device configuration structure and set configuration options.
-	 * Most configuration options are verified and corrected (if
-	 * necessary) since we can't rely on the adapter to do so and don't
-	 * want it to fail either. */
-	memset(&cfg, 0, sizeof(cfg));
-	cfg.link = 0;
-	cfg.clock = conf->clocking == WANOPT_EXTERNAL ? 8 : 55;
-	cfg.speed = bps_to_speed_code(conf->bps);
-	cfg.n3win = 7;
-	cfg.n2win = 2;
-	cfg.n2 = 5;
-	cfg.nvc = 1;
-	cfg.npvc = 1;
-	cfg.flags = 0x02; /* default = V35 */
-	cfg.t1 = 10;   /* line carrier timeout */
-	cfg.t2 = 29;   /* tx timeout */
-	cfg.t21 = 180; /* CALL timeout */
-	cfg.t23 = 180; /* CLEAR timeout */
-
-	/* adjust MTU */
-	if (!conf->mtu || conf->mtu >= 512)
-		card->wandev.mtu = 512;
-	else if (conf->mtu >= 256)
-		card->wandev.mtu = 256;
-	else if (conf->mtu >= 128)
-		card->wandev.mtu = 128;
-	else
-		card->wandev.mtu = 64;
-
-	cfg.pktlen = cycx_log2(card->wandev.mtu);
-
-	if (conf->station == WANOPT_DTE) {
-		cfg.locaddr = 3; /* DTE */
-		cfg.remaddr = 1; /* DCE */
-	} else {
-		cfg.locaddr = 1; /* DCE */
-		cfg.remaddr = 3; /* DTE */
-	}
-
-	if (conf->interface == WANOPT_RS232)
-	        cfg.flags = 0;      /* FIXME just reset the 2nd bit */
-
-	if (conf->u.x25.hi_pvc) {
-		card->u.x.hi_pvc = min_t(unsigned int, conf->u.x25.hi_pvc, 4095);
-		card->u.x.lo_pvc = min_t(unsigned int, conf->u.x25.lo_pvc, card->u.x.hi_pvc);
-	}
-
-	if (conf->u.x25.hi_svc) {
-		card->u.x.hi_svc = min_t(unsigned int, conf->u.x25.hi_svc, 4095);
-		card->u.x.lo_svc = min_t(unsigned int, conf->u.x25.lo_svc, card->u.x.hi_svc);
-	}
-
-	if (card->u.x.lo_pvc == 255)
-		cfg.npvc = 0;
-	else
-		cfg.npvc = card->u.x.hi_pvc - card->u.x.lo_pvc + 1;
-
-	cfg.nvc = card->u.x.hi_svc - card->u.x.lo_svc + 1 + cfg.npvc;
-
-	if (conf->u.x25.hdlc_window)
-		cfg.n2win = min_t(unsigned int, conf->u.x25.hdlc_window, 7);
-
-	if (conf->u.x25.pkt_window)
-		cfg.n3win = min_t(unsigned int, conf->u.x25.pkt_window, 7);
-
-	if (conf->u.x25.t1)
-		cfg.t1 = min_t(unsigned int, conf->u.x25.t1, 30);
-
-	if (conf->u.x25.t2)
-		cfg.t2 = min_t(unsigned int, conf->u.x25.t2, 30);
-
-	if (conf->u.x25.t11_t21)
-		cfg.t21 = min_t(unsigned int, conf->u.x25.t11_t21, 30);
-
-	if (conf->u.x25.t13_t23)
-		cfg.t23 = min_t(unsigned int, conf->u.x25.t13_t23, 30);
-
-	if (conf->u.x25.n2)
-		cfg.n2 = min_t(unsigned int, conf->u.x25.n2, 30);
-
-	/* initialize adapter */
-	if (cycx_x25_configure(card, &cfg))
-		return -EIO;
-
-	/* Initialize protocol-specific fields of adapter data space */
-	card->wandev.bps	= conf->bps;
-	card->wandev.interface	= conf->interface;
-	card->wandev.clocking	= conf->clocking;
-	card->wandev.station	= conf->station;
-	card->isr		= cycx_x25_irq_handler;
-	card->exec		= NULL;
-	card->wandev.update	= cycx_wan_update;
-	card->wandev.new_if	= cycx_wan_new_if;
-	card->wandev.del_if	= cycx_wan_del_if;
-	card->wandev.state	= WAN_DISCONNECTED;
-
-	return 0;
-}
-
-/* WAN Device Driver Entry Points */
-/* Update device status & statistics. */
-static int cycx_wan_update(struct wan_device *wandev)
-{
-	/* sanity checks */
-	if (!wandev || !wandev->private)
-		return -EFAULT;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return -ENODEV;
-
-	cycx_x25_get_stats(wandev->private);
-
-	return 0;
-}
-
-/* Create new logical channel.
- * This routine is called by the router when ROUTER_IFNEW IOCTL is being
- * handled.
- * o parse media- and hardware-specific configuration
- * o make sure that a new channel can be created
- * o allocate resources, if necessary
- * o prepare network device structure for registration.
- *
- * Return:	0	o.k.
- *		< 0	failure (channel will not be created) */
-static int cycx_wan_new_if(struct wan_device *wandev, struct net_device *dev,
-			   wanif_conf_t *conf)
-{
-	struct cycx_device *card = wandev->private;
-	struct cycx_x25_channel *chan;
-	int err = 0;
-
-	if (!conf->name[0] || strlen(conf->name) > WAN_IFNAME_SZ) {
-		pr_info("%s: invalid interface name!\n", card->devname);
-		return -EINVAL;
-	}
-
-	dev = alloc_netdev(sizeof(struct cycx_x25_channel), conf->name,
-			     cycx_x25_chan_setup);
-	if (!dev)
-		return -ENOMEM;
-
-	chan = netdev_priv(dev);
-	strcpy(chan->name, conf->name);
-	chan->card = card;
-	chan->link = conf->port;
-	chan->protocol = conf->protocol ? ETH_P_X25 : ETH_P_IP;
-	chan->rx_skb = NULL;
-	/* only used in svc connected thru crossover cable */
-	chan->local_addr = NULL;
-
-	if (conf->addr[0] == '@') {	/* SVC */
-		int len = strlen(conf->local_addr);
-
-		if (len) {
-			if (len > WAN_ADDRESS_SZ) {
-				pr_err("%s: %s local addr too long!\n",
-				       wandev->name, chan->name);
-				err = -EINVAL;
-				goto error;
-			} else {
-				chan->local_addr = kmalloc(len + 1, GFP_KERNEL);
-
-				if (!chan->local_addr) {
-					err = -ENOMEM;
-					goto error;
-				}
-			}
-
-			strncpy(chan->local_addr, conf->local_addr,
-				WAN_ADDRESS_SZ);
-		}
-
-		chan->svc = 1;
-		strncpy(chan->addr, &conf->addr[1], WAN_ADDRESS_SZ);
-		init_timer(&chan->timer);
-		chan->timer.function	= cycx_x25_chan_timer;
-		chan->timer.data	= (unsigned long)dev;
-
-		/* Set channel timeouts (default if not specified) */
-		chan->idle_tmout = conf->idle_timeout ? conf->idle_timeout : 90;
-	} else if (isdigit(conf->addr[0])) {	/* PVC */
-		s16 lcn = dec_to_uint(conf->addr, 0);
-
-		if (lcn >= card->u.x.lo_pvc && lcn <= card->u.x.hi_pvc)
-			chan->lcn = lcn;
-		else {
-			pr_err("%s: PVC %u is out of range on interface %s!\n",
-			       wandev->name, lcn, chan->name);
-			err = -EINVAL;
-			goto error;
-		}
-	} else {
-		pr_err("%s: invalid media address on interface %s!\n",
-		       wandev->name, chan->name);
-		err = -EINVAL;
-		goto error;
-	}
-
-	return 0;
-
-error:
-	free_netdev(dev);
-	return err;
-}
-
-/* Delete logical channel. */
-static int cycx_wan_del_if(struct wan_device *wandev, struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	if (chan->svc) {
-		kfree(chan->local_addr);
-		if (chan->state == WAN_CONNECTED)
-			del_timer(&chan->timer);
-	}
-
-	return 0;
-}
-
-
-/* Network Device Interface */
-
-static const struct header_ops cycx_header_ops = {
-	.create = cycx_netdevice_hard_header,
-	.rebuild = cycx_netdevice_rebuild_header,
-};
-
-static const struct net_device_ops cycx_netdev_ops = {
-	.ndo_init	= cycx_netdevice_init,
-	.ndo_open	= cycx_netdevice_open,
-	.ndo_stop	= cycx_netdevice_stop,
-	.ndo_start_xmit	= cycx_netdevice_hard_start_xmit,
-	.ndo_get_stats	= cycx_netdevice_get_stats,
-};
-
-static void cycx_x25_chan_setup(struct net_device *dev)
-{
-	/* Initialize device driver entry points */
-	dev->netdev_ops		= &cycx_netdev_ops;
-	dev->header_ops		= &cycx_header_ops;
-
-	/* Initialize media-specific parameters */
-	dev->mtu		= CYCX_X25_CHAN_MTU;
-	dev->type		= ARPHRD_HWX25;	/* ARP h/w type */
-	dev->hard_header_len	= 0;		/* media header length */
-	dev->addr_len		= 0;		/* hardware address length */
-}
-
-/* Initialize Linux network interface.
- *
- * This routine is called only once for each interface, during Linux network
- * interface registration.  Returning anything but zero will fail interface
- * registration. */
-static int cycx_netdevice_init(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-	struct wan_device *wandev = &card->wandev;
-
-	if (!chan->svc)
-		*(__be16*)dev->dev_addr = htons(chan->lcn);
-
-	/* Initialize hardware parameters (just for reference) */
-	dev->irq		= wandev->irq;
-	dev->dma		= wandev->dma;
-	dev->base_addr		= wandev->ioport;
-	dev->mem_start		= (unsigned long)wandev->maddr;
-	dev->mem_end		= (unsigned long)(wandev->maddr +
-						  wandev->msize - 1);
-	dev->flags		|= IFF_NOARP;
-
-	/* Set transmit buffer queue length */
-	dev->tx_queue_len	= 10;
-
-	/* Initialize socket buffers */
-	cycx_x25_set_chan_state(dev, WAN_DISCONNECTED);
-
-	return 0;
-}
-
-/* Open network interface.
- * o prevent module from unloading by incrementing use count
- * o if link is disconnected then initiate connection
- *
- * Return 0 if O.k. or errno.  */
-static int cycx_netdevice_open(struct net_device *dev)
-{
-	if (netif_running(dev))
-		return -EBUSY; /* only one open is allowed */
-
-	netif_start_queue(dev);
-	return 0;
-}
-
-/* Close network interface.
- * o reset flags.
- * o if there's no more open channels then disconnect physical link. */
-static int cycx_netdevice_stop(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	netif_stop_queue(dev);
-
-	if (chan->state == WAN_CONNECTED || chan->state == WAN_CONNECTING)
-		cycx_x25_chan_disconnect(dev);
-
-	return 0;
-}
-
-/* Build media header.
- * o encapsulate packet according to encapsulation type.
- *
- * The trick here is to put packet type (Ethertype) into 'protocol' field of
- * the socket buffer, so that we don't forget it.  If encapsulation fails,
- * set skb->protocol to 0 and discard packet later.
- *
- * Return:	media header length. */
-static int cycx_netdevice_hard_header(struct sk_buff *skb,
-				      struct net_device *dev, u16 type,
-				      const void *daddr, const void *saddr,
-				      unsigned len)
-{
-	skb->protocol = htons(type);
-
-	return dev->hard_header_len;
-}
-
-/* * Re-build media header.
- * Return:	1	physical address resolved.
- *		0	physical address not resolved */
-static int cycx_netdevice_rebuild_header(struct sk_buff *skb)
-{
-	return 1;
-}
-
-/* Send a packet on a network interface.
- * o set busy flag (marks start of the transmission).
- * o check link state. If link is not up, then drop the packet.
- * o check channel status. If it's down then initiate a call.
- * o pass a packet to corresponding WAN device.
- * o free socket buffer
- *
- * Return:	0	complete (socket buffer must be freed)
- *		non-0	packet may be re-transmitted (tbusy must be set)
- *
- * Notes:
- * 1. This routine is called either by the protocol stack or by the "net
- *    bottom half" (with interrupts enabled).
- * 2. Setting tbusy flag will inhibit further transmit requests from the
- *    protocol stack and can be used for flow control with protocol layer. */
-static netdev_tx_t cycx_netdevice_hard_start_xmit(struct sk_buff *skb,
-							struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-
-	if (!chan->svc)
-		chan->protocol = ntohs(skb->protocol);
-
-	if (card->wandev.state != WAN_CONNECTED)
-		++chan->ifstats.tx_dropped;
-	else if (chan->svc && chan->protocol &&
-		 chan->protocol != ntohs(skb->protocol)) {
-		pr_info("%s: unsupported Ethertype 0x%04X on interface %s!\n",
-			card->devname, ntohs(skb->protocol), dev->name);
-		++chan->ifstats.tx_errors;
-	} else if (chan->protocol == ETH_P_IP) {
-		switch (chan->state) {
-		case WAN_DISCONNECTED:
-			if (cycx_x25_chan_connect(dev)) {
-				netif_stop_queue(dev);
-				return NETDEV_TX_BUSY;
-			}
-			/* fall thru */
-		case WAN_CONNECTED:
-			reset_timer(dev);
-			dev->trans_start = jiffies;
-			netif_stop_queue(dev);
-
-			if (cycx_x25_chan_send(dev, skb))
-				return NETDEV_TX_BUSY;
-
-			break;
-		default:
-			++chan->ifstats.tx_dropped;
-			++card->wandev.stats.tx_dropped;
-	}
-	} else { /* chan->protocol == ETH_P_X25 */
-		switch (skb->data[0]) {
-		case X25_IFACE_DATA:
-			break;
-		case X25_IFACE_CONNECT:
-			cycx_x25_chan_connect(dev);
-			goto free_packet;
-		case X25_IFACE_DISCONNECT:
-			cycx_x25_chan_disconnect(dev);
-			goto free_packet;
-	        default:
-			pr_info("%s: unknown %d x25-iface request on %s!\n",
-				card->devname, skb->data[0], dev->name);
-			++chan->ifstats.tx_errors;
-			goto free_packet;
-		}
-
-		skb_pull(skb, 1); /* Remove control byte */
-		reset_timer(dev);
-		dev->trans_start = jiffies;
-		netif_stop_queue(dev);
-
-		if (cycx_x25_chan_send(dev, skb)) {
-			/* prepare for future retransmissions */
-			skb_push(skb, 1);
-			return NETDEV_TX_BUSY;
-		}
-	}
-
-free_packet:
-	dev_kfree_skb(skb);
-
-	return NETDEV_TX_OK;
-}
-
-/* Get Ethernet-style interface statistics.
- * Return a pointer to struct net_device_stats */
-static struct net_device_stats *cycx_netdevice_get_stats(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	return chan ? &chan->ifstats : NULL;
-}
-
-/* Interrupt Handlers */
-/* X.25 Interrupt Service Routine. */
-static void cycx_x25_irq_handler(struct cycx_device *card)
-{
-	struct cycx_x25_cmd cmd;
-	u16 z = 0;
-
-	card->in_isr = 1;
-	card->buff_int_mode_unbusy = 0;
-	cycx_peek(&card->hw, X25_RXMBOX_OFFS, &cmd, sizeof(cmd));
-
-	switch (cmd.command) {
-	case X25_DATA_INDICATION:
-		cycx_x25_irq_rx(card, &cmd);
-		break;
-	case X25_ACK_FROM_VC:
-		cycx_x25_irq_tx(card, &cmd);
-		break;
-	case X25_LOG:
-		cycx_x25_irq_log(card, &cmd);
-		break;
-	case X25_STATISTIC:
-		cycx_x25_irq_stat(card, &cmd);
-		break;
-	case X25_CONNECT_CONFIRM:
-		cycx_x25_irq_connect_confirm(card, &cmd);
-		break;
-	case X25_CONNECT_INDICATION:
-		cycx_x25_irq_connect(card, &cmd);
-		break;
-	case X25_DISCONNECT_INDICATION:
-		cycx_x25_irq_disconnect(card, &cmd);
-		break;
-	case X25_DISCONNECT_CONFIRM:
-		cycx_x25_irq_disconnect_confirm(card, &cmd);
-		break;
-	case X25_LINE_ON:
-		cycx_set_state(card, WAN_CONNECTED);
-		break;
-	case X25_LINE_OFF:
-		cycx_set_state(card, WAN_DISCONNECTED);
-		break;
-	default:
-		cycx_x25_irq_spurious(card, &cmd);
-		break;
-	}
-
-	cycx_poke(&card->hw, 0, &z, sizeof(z));
-	cycx_poke(&card->hw, X25_RXMBOX_OFFS, &z, sizeof(z));
-	card->in_isr = 0;
-}
-
-/* Transmit interrupt handler.
- *	o Release socket buffer
- *	o Clear 'tbusy' flag */
-static void cycx_x25_irq_tx(struct cycx_device *card, struct cycx_x25_cmd *cmd)
-{
-	struct net_device *dev;
-	struct wan_device *wandev = &card->wandev;
-	u8 lcn;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-
-	/* unbusy device and then dev_tint(); */
-	dev = cycx_x25_get_dev_by_lcn(wandev, lcn);
-	if (dev) {
-		card->buff_int_mode_unbusy = 1;
-		netif_wake_queue(dev);
-	} else
-		pr_err("%s:ackvc for inexistent lcn %d\n", card->devname, lcn);
-}
-
-/* Receive interrupt handler.
- * This routine handles fragmented IP packets using M-bit according to the
- * RFC1356.
- * o map logical channel number to network interface.
- * o allocate socket buffer or append received packet to the existing one.
- * o if M-bit is reset (i.e. it's the last packet in a sequence) then
- *   decapsulate packet and pass socket buffer to the protocol stack.
- *
- * Notes:
- * 1. When allocating a socket buffer, if M-bit is set then more data is
- *    coming and we have to allocate buffer for the maximum IP packet size
- *    expected on this channel.
- * 2. If something goes wrong and X.25 packet has to be dropped (e.g. no
- *    socket buffers available) the whole packet sequence must be discarded. */
-static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev;
-	struct cycx_x25_channel *chan;
-	struct sk_buff *skb;
-	u8 bitm, lcn;
-	int pktlen = cmd->len - 5;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	cycx_peek(&card->hw, cmd->buf + 4, &bitm, sizeof(bitm));
-	bitm &= 0x10;
-
-	dev = cycx_x25_get_dev_by_lcn(wandev, lcn);
-	if (!dev) {
-		/* Invalid channel, discard packet */
-		pr_info("%s: receiving on orphaned LCN %d!\n",
-			card->devname, lcn);
-		return;
-	}
-
-	chan = netdev_priv(dev);
-	reset_timer(dev);
-
-	if (chan->drop_sequence) {
-		if (!bitm)
-			chan->drop_sequence = 0;
-		else
-			return;
-	}
-
-	if ((skb = chan->rx_skb) == NULL) {
-		/* Allocate new socket buffer */
-		int bufsize = bitm ? dev->mtu : pktlen;
-
-		if ((skb = dev_alloc_skb((chan->protocol == ETH_P_X25 ? 1 : 0) +
-					 bufsize +
-					 dev->hard_header_len)) == NULL) {
-			pr_info("%s: no socket buffers available!\n",
-				card->devname);
-			chan->drop_sequence = 1;
-			++chan->ifstats.rx_dropped;
-			return;
-		}
-
-		if (chan->protocol == ETH_P_X25) /* X.25 socket layer control */
-			/* 0 = data packet (dev_alloc_skb zeroed skb->data) */
-			skb_put(skb, 1);
-
-		skb->dev = dev;
-		skb->protocol = htons(chan->protocol);
-		chan->rx_skb = skb;
-	}
-
-	if (skb_tailroom(skb) < pktlen) {
-		/* No room for the packet. Call off the whole thing! */
-		dev_kfree_skb_irq(skb);
-		chan->rx_skb = NULL;
-
-		if (bitm)
-			chan->drop_sequence = 1;
-
-		pr_info("%s: unexpectedly long packet sequence on interface %s!\n",
-			card->devname, dev->name);
-		++chan->ifstats.rx_length_errors;
-		return;
-	}
-
-	/* Append packet to the socket buffer  */
-	cycx_peek(&card->hw, cmd->buf + 5, skb_put(skb, pktlen), pktlen);
-
-	if (bitm)
-		return; /* more data is coming */
-
-	chan->rx_skb = NULL;		/* dequeue packet */
-
-	++chan->ifstats.rx_packets;
-	chan->ifstats.rx_bytes += pktlen;
-
-	skb_reset_mac_header(skb);
-	netif_rx(skb);
-}
-
-/* Connect interrupt handler. */
-static void cycx_x25_irq_connect(struct cycx_device *card,
-				 struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev = NULL;
-	struct cycx_x25_channel *chan;
-	u8 d[32],
-	   loc[24],
-	   rem[24];
-	u8 lcn, sizeloc, sizerem;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	cycx_peek(&card->hw, cmd->buf + 5, &sizeloc, sizeof(sizeloc));
-	cycx_peek(&card->hw, cmd->buf + 6, d, cmd->len - 6);
-
-	sizerem = sizeloc >> 4;
-	sizeloc &= 0x0F;
-
-	loc[0] = rem[0] = '\0';
-
-	if (sizeloc)
-		nibble_to_byte(d, loc, sizeloc, 0);
-
-	if (sizerem)
-		nibble_to_byte(d + (sizeloc >> 1), rem, sizerem, sizeloc & 1);
-
-	dprintk(1, KERN_INFO "%s:lcn=%d, local=%s, remote=%s\n",
-			  __func__, lcn, loc, rem);
-
-	dev = cycx_x25_get_dev_by_dte_addr(wandev, rem);
-	if (!dev) {
-		/* Invalid channel, discard packet */
-		pr_info("%s: connect not expected: remote %s!\n",
-			card->devname, rem);
-		return;
-	}
-
-	chan = netdev_priv(dev);
-	chan->lcn = lcn;
-	cycx_x25_connect_response(card, chan);
-	cycx_x25_set_chan_state(dev, WAN_CONNECTED);
-}
-
-/* Connect confirm interrupt handler. */
-static void cycx_x25_irq_connect_confirm(struct cycx_device *card,
-					 struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev;
-	struct cycx_x25_channel *chan;
-	u8 lcn, key;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	cycx_peek(&card->hw, cmd->buf + 1, &key, sizeof(key));
-	dprintk(1, KERN_INFO "%s: %s:lcn=%d, key=%d\n",
-			  card->devname, __func__, lcn, key);
-
-	dev = cycx_x25_get_dev_by_lcn(wandev, -key);
-	if (!dev) {
-		/* Invalid channel, discard packet */
-		clear_bit(--key, (void*)&card->u.x.connection_keys);
-		pr_info("%s: connect confirm not expected: lcn %d, key=%d!\n",
-			card->devname, lcn, key);
-		return;
-	}
-
-	clear_bit(--key, (void*)&card->u.x.connection_keys);
-	chan = netdev_priv(dev);
-	chan->lcn = lcn;
-	cycx_x25_set_chan_state(dev, WAN_CONNECTED);
-}
-
-/* Disconnect confirm interrupt handler. */
-static void cycx_x25_irq_disconnect_confirm(struct cycx_device *card,
-					    struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev;
-	u8 lcn;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	dprintk(1, KERN_INFO "%s: %s:lcn=%d\n",
-			  card->devname, __func__, lcn);
-	dev = cycx_x25_get_dev_by_lcn(wandev, lcn);
-	if (!dev) {
-		/* Invalid channel, discard packet */
-		pr_info("%s:disconnect confirm not expected!:lcn %d\n",
-			card->devname, lcn);
-		return;
-	}
-
-	cycx_x25_set_chan_state(dev, WAN_DISCONNECTED);
-}
-
-/* disconnect interrupt handler. */
-static void cycx_x25_irq_disconnect(struct cycx_device *card,
-				    struct cycx_x25_cmd *cmd)
-{
-	struct wan_device *wandev = &card->wandev;
-	struct net_device *dev;
-	u8 lcn;
-
-	cycx_peek(&card->hw, cmd->buf, &lcn, sizeof(lcn));
-	dprintk(1, KERN_INFO "%s:lcn=%d\n", __func__, lcn);
-
-	dev = cycx_x25_get_dev_by_lcn(wandev, lcn);
-	if (dev) {
-		struct cycx_x25_channel *chan = netdev_priv(dev);
-
-		cycx_x25_disconnect_response(card, chan->link, lcn);
-		cycx_x25_set_chan_state(dev, WAN_DISCONNECTED);
-	} else
-		cycx_x25_disconnect_response(card, 0, lcn);
-}
-
-/* LOG interrupt handler. */
-static void cycx_x25_irq_log(struct cycx_device *card, struct cycx_x25_cmd *cmd)
-{
-#if CYCLOMX_X25_DEBUG
-	char bf[20];
-	u16 size, toread, link, msg_code;
-	u8 code, routine;
-
-	cycx_peek(&card->hw, cmd->buf, &msg_code, sizeof(msg_code));
-	cycx_peek(&card->hw, cmd->buf + 2, &link, sizeof(link));
-	cycx_peek(&card->hw, cmd->buf + 4, &size, sizeof(size));
-	/* at most 20 bytes are available... thanks to Daniela :) */
-	toread = size < 20 ? size : 20;
-	cycx_peek(&card->hw, cmd->buf + 10, &bf, toread);
-	cycx_peek(&card->hw, cmd->buf + 10 + toread, &code, 1);
-	cycx_peek(&card->hw, cmd->buf + 10 + toread + 1, &routine, 1);
-
-	pr_info("cycx_x25_irq_handler: X25_LOG (0x4500) indic.:\n");
-	pr_info("cmd->buf=0x%X\n", cmd->buf);
-	pr_info("Log message code=0x%X\n", msg_code);
-	pr_info("Link=%d\n", link);
-	pr_info("log code=0x%X\n", code);
-	pr_info("log routine=0x%X\n", routine);
-	pr_info("Message size=%d\n", size);
-	hex_dump("Message", bf, toread);
-#endif
-}
-
-/* STATISTIC interrupt handler. */
-static void cycx_x25_irq_stat(struct cycx_device *card,
-			      struct cycx_x25_cmd *cmd)
-{
-	cycx_peek(&card->hw, cmd->buf, &card->u.x.stats,
-		  sizeof(card->u.x.stats));
-	hex_dump("cycx_x25_irq_stat", (unsigned char*)&card->u.x.stats,
-		 sizeof(card->u.x.stats));
-	cycx_x25_dump_stats(&card->u.x.stats);
-	wake_up_interruptible(&card->wait_stats);
-}
-
-/* Spurious interrupt handler.
- * o print a warning
- * If number of spurious interrupts exceeded some limit, then ??? */
-static void cycx_x25_irq_spurious(struct cycx_device *card,
-				  struct cycx_x25_cmd *cmd)
-{
-	pr_info("%s: spurious interrupt (0x%X)!\n",
-		card->devname, cmd->command);
-}
-#ifdef CYCLOMX_X25_DEBUG
-static void hex_dump(char *msg, unsigned char *p, int len)
-{
-	print_hex_dump(KERN_INFO, msg, DUMP_PREFIX_OFFSET, 16, 1,
-		       p, len, true);
-}
-#endif
-
-/* Cyclom 2X Firmware-Specific Functions */
-/* Exec X.25 command. */
-static int x25_exec(struct cycx_device *card, int command, int link,
-		    void *d1, int len1, void *d2, int len2)
-{
-	struct cycx_x25_cmd c;
-	unsigned long flags;
-	u32 addr = 0x1200 + 0x2E0 * link + 0x1E2;
-	u8 retry = CYCX_X25_MAX_CMD_RETRY;
-	int err = 0;
-
-	c.command = command;
-	c.link = link;
-	c.len = len1 + len2;
-
-	spin_lock_irqsave(&card->u.x.lock, flags);
-
-	/* write command */
-	cycx_poke(&card->hw, X25_MBOX_OFFS, &c, sizeof(c) - sizeof(c.buf));
-
-	/* write X.25 data */
-	if (d1) {
-		cycx_poke(&card->hw, addr, d1, len1);
-
-		if (d2) {
-			if (len2 > 254) {
-				u32 addr1 = 0xA00 + 0x400 * link;
-
-				cycx_poke(&card->hw, addr + len1, d2, 249);
-				cycx_poke(&card->hw, addr1, ((u8*)d2) + 249,
-					  len2 - 249);
-			} else
-				cycx_poke(&card->hw, addr + len1, d2, len2);
-		}
-	}
-
-	/* generate interruption, executing command */
-	cycx_intr(&card->hw);
-
-	/* wait till card->mbox == 0 */
-	do {
-		err = cycx_exec(card->mbox);
-	} while (retry-- && err);
-
-	spin_unlock_irqrestore(&card->u.x.lock, flags);
-
-	return err;
-}
-
-/* Configure adapter. */
-static int cycx_x25_configure(struct cycx_device *card,
-			      struct cycx_x25_config *conf)
-{
-	struct {
-		u16 nlinks;
-		struct cycx_x25_config conf[2];
-	} x25_cmd_conf;
-
-	memset(&x25_cmd_conf, 0, sizeof(x25_cmd_conf));
-	x25_cmd_conf.nlinks = 2;
-	x25_cmd_conf.conf[0] = *conf;
-	/* FIXME: we need to find a way in the wanrouter framework
-		  to configure the second link, for now lets use it
-		  with the same config from the first link, fixing
-		  the interface type to RS232, the speed in 38400 and
-		  the clock to external */
-	x25_cmd_conf.conf[1] = *conf;
-	x25_cmd_conf.conf[1].link = 1;
-	x25_cmd_conf.conf[1].speed = 5; /* 38400 */
-	x25_cmd_conf.conf[1].clock = 8;
-	x25_cmd_conf.conf[1].flags = 0; /* default = RS232 */
-
-	cycx_x25_dump_config(&x25_cmd_conf.conf[0]);
-	cycx_x25_dump_config(&x25_cmd_conf.conf[1]);
-
-	return x25_exec(card, X25_CONFIG, 0,
-			&x25_cmd_conf, sizeof(x25_cmd_conf), NULL, 0);
-}
-
-/* Get protocol statistics. */
-static int cycx_x25_get_stats(struct cycx_device *card)
-{
-	/* the firmware expects 20 in the size field!!!
-	   thanks to Daniela */
-	int err = x25_exec(card, X25_STATISTIC, 0, NULL, 20, NULL, 0);
-
-	if (err)
-		return err;
-
-	interruptible_sleep_on(&card->wait_stats);
-
-	if (signal_pending(current))
-		return -EINTR;
-
-	card->wandev.stats.rx_packets = card->u.x.stats.n2_rx_frames;
-	card->wandev.stats.rx_over_errors = card->u.x.stats.rx_over_errors;
-	card->wandev.stats.rx_crc_errors = card->u.x.stats.rx_crc_errors;
-	card->wandev.stats.rx_length_errors = 0; /* not available from fw */
-	card->wandev.stats.rx_frame_errors = 0; /* not available from fw */
-	card->wandev.stats.rx_missed_errors = card->u.x.stats.rx_aborts;
-	card->wandev.stats.rx_dropped = 0; /* not available from fw */
-	card->wandev.stats.rx_errors = 0; /* not available from fw */
-	card->wandev.stats.tx_packets = card->u.x.stats.n2_tx_frames;
-	card->wandev.stats.tx_aborted_errors = card->u.x.stats.tx_aborts;
-	card->wandev.stats.tx_dropped = 0; /* not available from fw */
-	card->wandev.stats.collisions = 0; /* not available from fw */
-	card->wandev.stats.tx_errors = 0; /* not available from fw */
-
-	cycx_x25_dump_devs(&card->wandev);
-
-	return 0;
-}
-
-/* return the number of nibbles */
-static int byte_to_nibble(u8 *s, u8 *d, char *nibble)
-{
-	int i = 0;
-
-	if (*nibble && *s) {
-		d[i] |= *s++ - '0';
-		*nibble = 0;
-		++i;
-	}
-
-	while (*s) {
-		d[i] = (*s - '0') << 4;
-		if (*(s + 1))
-			d[i] |= *(s + 1) - '0';
-		else {
-			*nibble = 1;
-			break;
-		}
-		++i;
-		s += 2;
-	}
-
-	return i;
-}
-
-static void nibble_to_byte(u8 *s, u8 *d, u8 len, u8 nibble)
-{
-	if (nibble) {
-		*d++ = '0' + (*s++ & 0x0F);
-		--len;
-	}
-
-	while (len) {
-		*d++ = '0' + (*s >> 4);
-
-		if (--len) {
-			*d++ = '0' + (*s & 0x0F);
-			--len;
-		} else break;
-
-		++s;
-	}
-
-	*d = '\0';
-}
-
-/* Place X.25 call. */
-static int x25_place_call(struct cycx_device *card,
-			  struct cycx_x25_channel *chan)
-{
-	int err = 0,
-	    len;
-	char d[64],
-	     nibble = 0,
-	     mylen = chan->local_addr ? strlen(chan->local_addr) : 0,
-	     remotelen = strlen(chan->addr);
-	u8 key;
-
-	if (card->u.x.connection_keys == ~0U) {
-		pr_info("%s: too many simultaneous connection requests!\n",
-			card->devname);
-		return -EAGAIN;
-	}
-
-	key = ffz(card->u.x.connection_keys);
-	set_bit(key, (void*)&card->u.x.connection_keys);
-	++key;
-	dprintk(1, KERN_INFO "%s:x25_place_call:key=%d\n", card->devname, key);
-	memset(d, 0, sizeof(d));
-	d[1] = key; /* user key */
-	d[2] = 0x10;
-	d[4] = 0x0B;
-
-	len = byte_to_nibble(chan->addr, d + 6, &nibble);
-
-	if (chan->local_addr)
-		len += byte_to_nibble(chan->local_addr, d + 6 + len, &nibble);
-
-	if (nibble)
-		++len;
-
-	d[5] = mylen << 4 | remotelen;
-	d[6 + len + 1] = 0xCC; /* TCP/IP over X.25, thanks to Daniela :) */
-
-	if ((err = x25_exec(card, X25_CONNECT_REQUEST, chan->link,
-			    &d, 7 + len + 1, NULL, 0)) != 0)
-		clear_bit(--key, (void*)&card->u.x.connection_keys);
-	else
-		chan->lcn = -key;
-
-	return err;
-}
-
-/* Place X.25 CONNECT RESPONSE. */
-static int cycx_x25_connect_response(struct cycx_device *card,
-				     struct cycx_x25_channel *chan)
-{
-	u8 d[8];
-
-	memset(d, 0, sizeof(d));
-	d[0] = d[3] = chan->lcn;
-	d[2] = 0x10;
-	d[4] = 0x0F;
-	d[7] = 0xCC; /* TCP/IP over X.25, thanks Daniela */
-
-	return x25_exec(card, X25_CONNECT_RESPONSE, chan->link, &d, 8, NULL, 0);
-}
-
-/* Place X.25 DISCONNECT RESPONSE.  */
-static int cycx_x25_disconnect_response(struct cycx_device *card, u8 link,
-					u8 lcn)
-{
-	char d[5];
-
-	memset(d, 0, sizeof(d));
-	d[0] = d[3] = lcn;
-	d[2] = 0x10;
-	d[4] = 0x17;
-
-	return x25_exec(card, X25_DISCONNECT_RESPONSE, link, &d, 5, NULL, 0);
-}
-
-/* Clear X.25 call.  */
-static int x25_clear_call(struct cycx_device *card, u8 link, u8 lcn, u8 cause,
-			  u8 diagn)
-{
-	u8 d[7];
-
-	memset(d, 0, sizeof(d));
-	d[0] = d[3] = lcn;
-	d[2] = 0x10;
-	d[4] = 0x13;
-	d[5] = cause;
-	d[6] = diagn;
-
-	return x25_exec(card, X25_DISCONNECT_REQUEST, link, d, 7, NULL, 0);
-}
-
-/* Send X.25 data packet. */
-static int cycx_x25_send(struct cycx_device *card, u8 link, u8 lcn, u8 bitm,
-			 int len, void *buf)
-{
-	u8 d[] = "?\xFF\x10??";
-
-	d[0] = d[3] = lcn;
-	d[4] = bitm;
-
-	return x25_exec(card, X25_DATA_REQUEST, link, &d, 5, buf, len);
-}
-
-/* Miscellaneous */
-/* Find network device by its channel number.  */
-static struct net_device *cycx_x25_get_dev_by_lcn(struct wan_device *wandev,
-						  s16 lcn)
-{
-	struct net_device *dev = wandev->dev;
-	struct cycx_x25_channel *chan;
-
-	while (dev) {
-		chan = netdev_priv(dev);
-
-		if (chan->lcn == lcn)
-			break;
-		dev = chan->slave;
-	}
-	return dev;
-}
-
-/* Find network device by its remote dte address. */
-static struct net_device *
-	cycx_x25_get_dev_by_dte_addr(struct wan_device *wandev, char *dte)
-{
-	struct net_device *dev = wandev->dev;
-	struct cycx_x25_channel *chan;
-
-	while (dev) {
-		chan = netdev_priv(dev);
-
-		if (!strcmp(chan->addr, dte))
-			break;
-		dev = chan->slave;
-	}
-	return dev;
-}
-
-/* Initiate connection on the logical channel.
- * o for PVC we just get channel configuration
- * o for SVCs place an X.25 call
- *
- * Return:	0	connected
- *		>0	connection in progress
- *		<0	failure */
-static int cycx_x25_chan_connect(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-
-	if (chan->svc) {
-		if (!chan->addr[0])
-			return -EINVAL; /* no destination address */
-
-		dprintk(1, KERN_INFO "%s: placing X.25 call to %s...\n",
-				  card->devname, chan->addr);
-
-		if (x25_place_call(card, chan))
-			return -EIO;
-
-		cycx_x25_set_chan_state(dev, WAN_CONNECTING);
-		return 1;
-	} else
-		cycx_x25_set_chan_state(dev, WAN_CONNECTED);
-
-	return 0;
-}
-
-/* Disconnect logical channel.
- * o if SVC then clear X.25 call */
-static void cycx_x25_chan_disconnect(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	if (chan->svc) {
-		x25_clear_call(chan->card, chan->link, chan->lcn, 0, 0);
-		cycx_x25_set_chan_state(dev, WAN_DISCONNECTING);
-	} else
-		cycx_x25_set_chan_state(dev, WAN_DISCONNECTED);
-}
-
-/* Called by kernel timer */
-static void cycx_x25_chan_timer(unsigned long d)
-{
-	struct net_device *dev = (struct net_device *)d;
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	if (chan->state == WAN_CONNECTED)
-		cycx_x25_chan_disconnect(dev);
-	else
-		pr_err("%s: %s for svc (%s) not connected!\n",
-		       chan->card->devname, __func__, dev->name);
-}
-
-/* Set logical channel state. */
-static void cycx_x25_set_chan_state(struct net_device *dev, u8 state)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-	unsigned long flags;
-	char *string_state = NULL;
-
-	spin_lock_irqsave(&card->lock, flags);
-
-	if (chan->state != state) {
-		if (chan->svc && chan->state == WAN_CONNECTED)
-			del_timer(&chan->timer);
-
-		switch (state) {
-		case WAN_CONNECTED:
-			string_state = "connected!";
-			*(__be16*)dev->dev_addr = htons(chan->lcn);
-			netif_wake_queue(dev);
-			reset_timer(dev);
-
-			if (chan->protocol == ETH_P_X25)
-				cycx_x25_chan_send_event(dev,
-					X25_IFACE_CONNECT);
-
-			break;
-		case WAN_CONNECTING:
-			string_state = "connecting...";
-			break;
-		case WAN_DISCONNECTING:
-			string_state = "disconnecting...";
-			break;
-		case WAN_DISCONNECTED:
-			string_state = "disconnected!";
-
-			if (chan->svc) {
-				*(unsigned short*)dev->dev_addr = 0;
-				chan->lcn = 0;
-			}
-
-			if (chan->protocol == ETH_P_X25)
-				cycx_x25_chan_send_event(dev,
-					X25_IFACE_DISCONNECT);
-
-			netif_wake_queue(dev);
-			break;
-		}
-
-		pr_info("%s: interface %s %s\n",
-			card->devname, dev->name, string_state);
-		chan->state = state;
-	}
-
-	spin_unlock_irqrestore(&card->lock, flags);
-}
-
-/* Send packet on a logical channel.
- *	When this function is called, tx_skb field of the channel data space
- *	points to the transmit socket buffer.  When transmission is complete,
- *	release socket buffer and reset 'tbusy' flag.
- *
- * Return:	0	- transmission complete
- *		1	- busy
- *
- * Notes:
- * 1. If packet length is greater than MTU for this channel, we'll fragment
- *    the packet into 'complete sequence' using M-bit.
- * 2. When transmission is complete, an event notification should be issued
- *    to the router.  */
-static int cycx_x25_chan_send(struct net_device *dev, struct sk_buff *skb)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-	struct cycx_device *card = chan->card;
-	int bitm = 0;		/* final packet */
-	unsigned len = skb->len;
-
-	if (skb->len > card->wandev.mtu) {
-		len = card->wandev.mtu;
-		bitm = 0x10;		/* set M-bit (more data) */
-	}
-
-	if (cycx_x25_send(card, chan->link, chan->lcn, bitm, len, skb->data))
-		return 1;
-
-	if (bitm) {
-		skb_pull(skb, len);
-		return 1;
-	}
-
-	++chan->ifstats.tx_packets;
-	chan->ifstats.tx_bytes += len;
-
-	return 0;
-}
-
-/* Send event (connection, disconnection, etc) to X.25 socket layer */
-
-static void cycx_x25_chan_send_event(struct net_device *dev, u8 event)
-{
-	struct sk_buff *skb;
-	unsigned char *ptr;
-
-	if ((skb = dev_alloc_skb(1)) == NULL) {
-		pr_err("%s: out of memory\n", __func__);
-		return;
-	}
-
-	ptr  = skb_put(skb, 1);
-	*ptr = event;
-
-	skb->protocol = x25_type_trans(skb, dev);
-	netif_rx(skb);
-}
-
-/* Convert line speed in bps to a number used by cyclom 2x code. */
-static u8 bps_to_speed_code(u32 bps)
-{
-	u8 number = 0; /* defaults to the lowest (1200) speed ;> */
-
-	     if (bps >= 512000) number = 8;
-	else if (bps >= 256000) number = 7;
-	else if (bps >= 64000)  number = 6;
-	else if (bps >= 38400)  number = 5;
-	else if (bps >= 19200)  number = 4;
-	else if (bps >= 9600)   number = 3;
-	else if (bps >= 4800)   number = 2;
-	else if (bps >= 2400)   number = 1;
-
-	return number;
-}
-
-/* log base 2 */
-static u8 cycx_log2(u32 n)
-{
-	u8 log = 0;
-
-	if (!n)
-		return 0;
-
-	while (n > 1) {
-		n >>= 1;
-		++log;
-	}
-
-	return log;
-}
-
-/* Convert decimal string to unsigned integer.
- * If len != 0 then only 'len' characters of the string are converted. */
-static unsigned dec_to_uint(u8 *str, int len)
-{
-	unsigned val = 0;
-
-	if (!len)
-		len = strlen(str);
-
-	for (; len && isdigit(*str); ++str, --len)
-		val = (val * 10) + (*str - (unsigned) '0');
-
-	return val;
-}
-
-static void reset_timer(struct net_device *dev)
-{
-	struct cycx_x25_channel *chan = netdev_priv(dev);
-
-	if (chan->svc)
-		mod_timer(&chan->timer, jiffies+chan->idle_tmout*HZ);
-}
-#ifdef CYCLOMX_X25_DEBUG
-static void cycx_x25_dump_config(struct cycx_x25_config *conf)
-{
-	pr_info("X.25 configuration\n");
-	pr_info("-----------------\n");
-	pr_info("link number=%d\n", conf->link);
-	pr_info("line speed=%d\n", conf->speed);
-	pr_info("clock=%sternal\n", conf->clock == 8 ? "Ex" : "In");
-	pr_info("# level 2 retransm.=%d\n", conf->n2);
-	pr_info("level 2 window=%d\n", conf->n2win);
-	pr_info("level 3 window=%d\n", conf->n3win);
-	pr_info("# logical channels=%d\n", conf->nvc);
-	pr_info("level 3 pkt len=%d\n", conf->pktlen);
-	pr_info("my address=%d\n", conf->locaddr);
-	pr_info("remote address=%d\n", conf->remaddr);
-	pr_info("t1=%d seconds\n", conf->t1);
-	pr_info("t2=%d seconds\n", conf->t2);
-	pr_info("t21=%d seconds\n", conf->t21);
-	pr_info("# PVCs=%d\n", conf->npvc);
-	pr_info("t23=%d seconds\n", conf->t23);
-	pr_info("flags=0x%x\n", conf->flags);
-}
-
-static void cycx_x25_dump_stats(struct cycx_x25_stats *stats)
-{
-	pr_info("X.25 statistics\n");
-	pr_info("--------------\n");
-	pr_info("rx_crc_errors=%d\n", stats->rx_crc_errors);
-	pr_info("rx_over_errors=%d\n", stats->rx_over_errors);
-	pr_info("n2_tx_frames=%d\n", stats->n2_tx_frames);
-	pr_info("n2_rx_frames=%d\n", stats->n2_rx_frames);
-	pr_info("tx_timeouts=%d\n", stats->tx_timeouts);
-	pr_info("rx_timeouts=%d\n", stats->rx_timeouts);
-	pr_info("n3_tx_packets=%d\n", stats->n3_tx_packets);
-	pr_info("n3_rx_packets=%d\n", stats->n3_rx_packets);
-	pr_info("tx_aborts=%d\n", stats->tx_aborts);
-	pr_info("rx_aborts=%d\n", stats->rx_aborts);
-}
-
-static void cycx_x25_dump_devs(struct wan_device *wandev)
-{
-	struct net_device *dev = wandev->dev;
-
-	pr_info("X.25 dev states\n");
-	pr_info("name: addr:           txoff:  protocol:\n");
-	pr_info("---------------------------------------\n");
-
-	while(dev) {
-		struct cycx_x25_channel *chan = netdev_priv(dev);
-
-		pr_info("%-5.5s %-15.15s   %d     ETH_P_%s\n",
-			chan->name, chan->addr, netif_queue_stopped(dev),
-			chan->protocol == ETH_P_IP ? "IP" : "X25");
-		dev = chan->slave;
-	}
-}
-
-#endif /* CYCLOMX_X25_DEBUG */
-/* End */
diff --git a/include/linux/cyclomx.h b/include/linux/cyclomx.h
deleted file mode 100644
index b88f7f428e58..000000000000
--- a/include/linux/cyclomx.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef	_CYCLOMX_H
-#define	_CYCLOMX_H
-/*
-* cyclomx.h	Cyclom 2X WAN Link Driver.
-*		User-level API definitions.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on wanpipe.h by Gene Kozin <genek@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 2000/07/13    acme		remove crap #if KERNEL_VERSION > blah
-* 2000/01/21    acme            rename cyclomx_open to cyclomx_mod_inc_use_count
-*                               and cyclomx_close to cyclomx_mod_dec_use_count
-* 1999/05/19	acme		wait_queue_head_t wait_stats(support for 2.3.*)
-* 1999/01/03	acme		judicious use of data types
-* 1998/12/27	acme		cleanup: PACKED not needed
-* 1998/08/08	acme		Version 0.0.1
-*/
-
-#include <linux/wanrouter.h>
-#include <linux/spinlock.h>
-
-#ifdef	__KERNEL__
-/* Kernel Interface */
-
-#include <linux/cycx_drv.h>	/* Cyclom 2X support module API definitions */
-#include <linux/cycx_cfm.h>	/* Cyclom 2X firmware module definitions */
-#ifdef CONFIG_CYCLOMX_X25
-#include <linux/cycx_x25.h>
-#endif
-
-/* Adapter Data Space.
- * This structure is needed because we handle multiple cards, otherwise
- * static data would do it.
- */
-struct cycx_device {
-	char devname[WAN_DRVNAME_SZ + 1];/* card name */
-	struct cycx_hw hw;		/* hardware configuration */
-	struct wan_device wandev;	/* WAN device data space */
-	u32 state_tick;			/* link state timestamp */
-	spinlock_t lock;
-	char in_isr;			/* interrupt-in-service flag */
-	char buff_int_mode_unbusy;      /* flag for carrying out dev_tint */
-	wait_queue_head_t wait_stats;  /* to wait for the STATS indication */
-	void __iomem *mbox;			/* -> mailbox */
-	void (*isr)(struct cycx_device* card);	/* interrupt service routine */
-	int (*exec)(struct cycx_device* card, void* u_cmd, void* u_data);
-	union {
-#ifdef CONFIG_CYCLOMX_X25
-		struct { /* X.25 specific data */
-			u32 lo_pvc;
-			u32 hi_pvc;
-			u32 lo_svc;
-			u32 hi_svc;
-			struct cycx_x25_stats stats;
-			spinlock_t lock;
-			u32 connection_keys;
-		} x;
-#endif
-	} u;
-};
-
-/* Public Functions */
-void cycx_set_state(struct cycx_device *card, int state);
-
-#ifdef CONFIG_CYCLOMX_X25
-int cycx_x25_wan_init(struct cycx_device *card, wandev_conf_t *conf);
-#endif
-#endif	/* __KERNEL__ */
-#endif	/* _CYCLOMX_H */
diff --git a/include/linux/cycx_drv.h b/include/linux/cycx_drv.h
deleted file mode 100644
index 12fe6b0bfcff..000000000000
--- a/include/linux/cycx_drv.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
-* cycx_drv.h	CYCX Support Module.  Kernel API Definitions.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdladrv.h by Gene Kozin <genek@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 1999/10/23	acme		cycxhw_t cleanup
-* 1999/01/03	acme		more judicious use of data types...
-*				uclong, ucchar, etc deleted, the u8, u16, u32
-*				types are the portable way to go.
-* 1999/01/03	acme		judicious use of data types... u16, u32, etc
-* 1998/12/26	acme	 	FIXED_BUFFERS, CONF_OFFSET,
-*                               removal of cy_read{bwl}
-* 1998/08/08	acme	 	Initial version.
-*/
-#ifndef	_CYCX_DRV_H
-#define	_CYCX_DRV_H
-
-#define	CYCX_WINDOWSIZE	0x4000	/* default dual-port memory window size */
-#define	GEN_CYCX_INTR	0x02
-#define	RST_ENABLE	0x04
-#define	START_CPU	0x06
-#define	RST_DISABLE	0x08
-#define	FIXED_BUFFERS	0x08
-#define	TEST_PATTERN	0xaa55
-#define	CMD_OFFSET	0x20
-#define CONF_OFFSET     0x0380
-#define	RESET_OFFSET	0x3c00	/* For reset file load */
-#define	DATA_OFFSET	0x0100	/* For code and data files load */
-#define	START_OFFSET	0x3ff0	/* 80186 starts here */
-
-/**
- *	struct cycx_hw - Adapter hardware configuration
- *	@fwid - firmware ID
- *	@irq - interrupt request level
- *	@dpmbase - dual-port memory base
- *	@dpmsize - dual-port memory size
- *	@reserved - reserved for future use
- */
-struct cycx_hw {
-	u32 fwid;
-	int irq;
-	void __iomem *dpmbase;
-	u32 dpmsize;
-	u32 reserved[5];
-};
-
-/* Function Prototypes */
-extern int cycx_setup(struct cycx_hw *hw, void *sfm, u32 len, unsigned long base);
-extern int cycx_down(struct cycx_hw *hw);
-extern int cycx_peek(struct cycx_hw *hw, u32 addr, void *buf, u32 len);
-extern int cycx_poke(struct cycx_hw *hw, u32 addr, void *buf, u32 len);
-extern int cycx_exec(void __iomem *addr);
-
-extern void cycx_intr(struct cycx_hw *hw);
-#endif	/* _CYCX_DRV_H */
diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h
index cec4b4159767..8198a63cf459 100644
--- a/include/linux/wanrouter.h
+++ b/include/linux/wanrouter.h
@@ -1,129 +1,10 @@
-/*****************************************************************************
-* wanrouter.h	Definitions for the WAN Multiprotocol Router Module.
-*		This module provides API and common services for WAN Link
-*		Drivers and is completely hardware-independent.
-*
-* Author: 	Nenad Corbic <ncorbic@sangoma.com>
-*		Gideon Hack 	
-* Additions:	Arnaldo Melo
-*
-* Copyright:	(c) 1995-2000 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jul 21, 2000  Nenad Corbic	Added WAN_FT1_READY State
-* Feb 24, 2000  Nenad Corbic    Added support for socket based x25api
-* Jan 28, 2000  Nenad Corbic    Added support for the ASYNC protocol.
-* Oct 04, 1999  Nenad Corbic 	Updated for 2.1.0 release
-* Jun 02, 1999  Gideon Hack	Added support for the S514 adapter.
-* May 23, 1999	Arnaldo Melo	Added local_addr to wanif_conf_t
-*				WAN_DISCONNECTING state added
-* Jul 20, 1998	David Fong	Added Inverse ARP options to 'wanif_conf_t'
-* Jun 12, 1998	David Fong	Added Cisco HDLC support.
-* Dec 16, 1997	Jaspreet Singh	Moved 'enable_IPX' and 'network_number' to
-*				'wanif_conf_t'
-* Dec 05, 1997	Jaspreet Singh	Added 'pap', 'chap' to 'wanif_conf_t'
-*				Added 'authenticator' to 'wan_ppp_conf_t'
-* Nov 06, 1997	Jaspreet Singh	Changed Router Driver version to 1.1 from 1.0
-* Oct 20, 1997	Jaspreet Singh	Added 'cir','bc','be' and 'mc' to 'wanif_conf_t'
-*				Added 'enable_IPX' and 'network_number' to 
-*				'wan_device_t'.  Also added defines for
-*				UDP PACKET TYPE, Interrupt test, critical values
-*				for RACE conditions.
-* Oct 05, 1997	Jaspreet Singh	Added 'dlci_num' and 'dlci[100]' to 
-*				'wan_fr_conf_t' to configure a list of dlci(s)
-*				for a NODE 
-* Jul 07, 1997	Jaspreet Singh	Added 'ttl' to 'wandev_conf_t' & 'wan_device_t'
-* May 29, 1997 	Jaspreet Singh	Added 'tx_int_enabled' to 'wan_device_t'
-* May 21, 1997	Jaspreet Singh	Added 'udp_port' to 'wan_device_t'
-* Apr 25, 1997  Farhan Thawar   Added 'udp_port' to 'wandev_conf_t'
-* Jan 16, 1997	Gene Kozin	router_devlist made public
-* Jan 02, 1997	Gene Kozin	Initial version (based on wanpipe.h).
-*****************************************************************************/
+/*
+ * wanrouter.h	Legacy declarations kept around until X25 is removed
+ */
+
 #ifndef	_ROUTER_H
 #define	_ROUTER_H
 
 #include <uapi/linux/wanrouter.h>
 
-/****** Kernel Interface ****************************************************/
-
-#include <linux/fs.h>		/* support for device drivers */
-#include <linux/proc_fs.h>	/* proc filesystem pragmatics */
-#include <linux/netdevice.h>	/* support for network drivers */
-#include <linux/spinlock.h>     /* Support for SMP Locking */
-
-/*----------------------------------------------------------------------------
- * WAN device data space.
- */
-struct wan_device {
-	unsigned magic;			/* magic number */
-	char* name;			/* -> WAN device name (ASCIIZ) */
-	void* private;			/* -> driver private data */
-	unsigned config_id;		/* Configuration ID */
-					/****** hardware configuration ******/
-	unsigned ioport;		/* adapter I/O port base #1 */
-	char S514_cpu_no[1];		/* PCI CPU Number */
-	unsigned char S514_slot_no;	/* PCI Slot Number */
-	unsigned long maddr;		/* dual-port memory address */
-	unsigned msize;			/* dual-port memory size */
-	int irq;			/* interrupt request level */
-	int dma;			/* DMA request level */
-	unsigned bps;			/* data transfer rate */
-	unsigned mtu;			/* max physical transmit unit size */
-	unsigned udp_port;              /* UDP port for management */
-        unsigned char ttl;		/* Time To Live for UDP security */
-	unsigned enable_tx_int; 	/* Transmit Interrupt enabled or not */
-	char interface;			/* RS-232/V.35, etc. */
-	char clocking;			/* external/internal */
-	char line_coding;		/* NRZ/NRZI/FM0/FM1, etc. */
-	char station;			/* DTE/DCE, primary/secondary, etc. */
-	char connection;		/* permanent/switched/on-demand */
-	char signalling;		/* Signalling RS232 or V35 */
-	char read_mode;			/* read mode: Polling or interrupt */
-	char new_if_cnt;                /* Number of interfaces per wanpipe */ 
-	char del_if_cnt;		/* Number of times del_if() gets called */
-	unsigned char piggyback;        /* Piggibacking a port */
-	unsigned hw_opt[4];		/* other hardware options */
-					/****** status and statistics *******/
-	char state;			/* device state */
-	char api_status;		/* device api status */
-	struct net_device_stats stats; 	/* interface statistics */
-	unsigned reserved[16];		/* reserved for future use */
-	unsigned long critical;		/* critical section flag */
-	spinlock_t lock;                /* Support for SMP Locking */
-
-					/****** device management methods ***/
-	int (*setup) (struct wan_device *wandev, wandev_conf_t *conf);
-	int (*shutdown) (struct wan_device *wandev);
-	int (*update) (struct wan_device *wandev);
-	int (*ioctl) (struct wan_device *wandev, unsigned cmd,
-		unsigned long arg);
-	int (*new_if)(struct wan_device *wandev, struct net_device *dev,
-		      wanif_conf_t *conf);
-	int (*del_if)(struct wan_device *wandev, struct net_device *dev);
-					/****** maintained by the router ****/
-	struct wan_device* next;	/* -> next device */
-	struct net_device* dev;		/* list of network interfaces */
-	unsigned ndev;			/* number of interfaces */
-	struct proc_dir_entry *dent;	/* proc filesystem entry */
-};
-
-/* Public functions available for device drivers */
-extern int register_wan_device(struct wan_device *wandev);
-extern int unregister_wan_device(char *name);
-
-/* Proc interface functions. These must not be called by the drivers! */
-extern int wanrouter_proc_init(void);
-extern void wanrouter_proc_cleanup(void);
-extern int wanrouter_proc_add(struct wan_device *wandev);
-extern int wanrouter_proc_delete(struct wan_device *wandev);
-extern long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-
-/* Public Data */
-/* list of registered devices */
-extern struct wan_device *wanrouter_router_devlist;
-
 #endif	/* _ROUTER_H */
diff --git a/include/uapi/linux/wanrouter.h b/include/uapi/linux/wanrouter.h
index 7617df2833d5..498d6c12c666 100644
--- a/include/uapi/linux/wanrouter.h
+++ b/include/uapi/linux/wanrouter.h
@@ -1,363 +1,9 @@
-/*****************************************************************************
-* wanrouter.h	Definitions for the WAN Multiprotocol Router Module.
-*		This module provides API and common services for WAN Link
-*		Drivers and is completely hardware-independent.
-*
-* Author: 	Nenad Corbic <ncorbic@sangoma.com>
-*		Gideon Hack 	
-* Additions:	Arnaldo Melo
-*
-* Copyright:	(c) 1995-2000 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jul 21, 2000  Nenad Corbic	Added WAN_FT1_READY State
-* Feb 24, 2000  Nenad Corbic    Added support for socket based x25api
-* Jan 28, 2000  Nenad Corbic    Added support for the ASYNC protocol.
-* Oct 04, 1999  Nenad Corbic 	Updated for 2.1.0 release
-* Jun 02, 1999  Gideon Hack	Added support for the S514 adapter.
-* May 23, 1999	Arnaldo Melo	Added local_addr to wanif_conf_t
-*				WAN_DISCONNECTING state added
-* Jul 20, 1998	David Fong	Added Inverse ARP options to 'wanif_conf_t'
-* Jun 12, 1998	David Fong	Added Cisco HDLC support.
-* Dec 16, 1997	Jaspreet Singh	Moved 'enable_IPX' and 'network_number' to
-*				'wanif_conf_t'
-* Dec 05, 1997	Jaspreet Singh	Added 'pap', 'chap' to 'wanif_conf_t'
-*				Added 'authenticator' to 'wan_ppp_conf_t'
-* Nov 06, 1997	Jaspreet Singh	Changed Router Driver version to 1.1 from 1.0
-* Oct 20, 1997	Jaspreet Singh	Added 'cir','bc','be' and 'mc' to 'wanif_conf_t'
-*				Added 'enable_IPX' and 'network_number' to 
-*				'wan_device_t'.  Also added defines for
-*				UDP PACKET TYPE, Interrupt test, critical values
-*				for RACE conditions.
-* Oct 05, 1997	Jaspreet Singh	Added 'dlci_num' and 'dlci[100]' to 
-*				'wan_fr_conf_t' to configure a list of dlci(s)
-*				for a NODE 
-* Jul 07, 1997	Jaspreet Singh	Added 'ttl' to 'wandev_conf_t' & 'wan_device_t'
-* May 29, 1997 	Jaspreet Singh	Added 'tx_int_enabled' to 'wan_device_t'
-* May 21, 1997	Jaspreet Singh	Added 'udp_port' to 'wan_device_t'
-* Apr 25, 1997  Farhan Thawar   Added 'udp_port' to 'wandev_conf_t'
-* Jan 16, 1997	Gene Kozin	router_devlist made public
-* Jan 02, 1997	Gene Kozin	Initial version (based on wanpipe.h).
-*****************************************************************************/
-
-#ifndef _UAPI_ROUTER_H
-#define _UAPI_ROUTER_H
-
-#define	ROUTER_NAME	"wanrouter"	/* in case we ever change it */
-#define	ROUTER_VERSION	1		/* version number */
-#define	ROUTER_RELEASE	1		/* release (minor version) number */
-#define	ROUTER_IOCTL	'W'		/* for IOCTL calls */
-#define	ROUTER_MAGIC	0x524D4157L	/* signature: 'WANR' reversed */
-
-/* IOCTL codes for /proc/router/<device> entries (up to 255) */
-enum router_ioctls
-{
-	ROUTER_SETUP	= ROUTER_IOCTL<<8,	/* configure device */
-	ROUTER_DOWN,				/* shut down device */
-	ROUTER_STAT,				/* get device status */
-	ROUTER_IFNEW,				/* add interface */
-	ROUTER_IFDEL,				/* delete interface */
-	ROUTER_IFSTAT,				/* get interface status */
-	ROUTER_USER	= (ROUTER_IOCTL<<8)+16,	/* driver-specific calls */
-	ROUTER_USER_MAX	= (ROUTER_IOCTL<<8)+31
-};
-
-/* identifiers for displaying proc file data for dual port adapters */
-#define PROC_DATA_PORT_0 0x8000	/* the data is for port 0 */
-#define PROC_DATA_PORT_1 0x8001	/* the data is for port 1 */
-
-/* NLPID for packet encapsulation (ISO/IEC TR 9577) */
-#define	NLPID_IP	0xCC	/* Internet Protocol Datagram */
-#define	NLPID_SNAP	0x80	/* IEEE Subnetwork Access Protocol */
-#define	NLPID_CLNP	0x81	/* ISO/IEC 8473 */
-#define	NLPID_ESIS	0x82	/* ISO/IEC 9542 */
-#define	NLPID_ISIS	0x83	/* ISO/IEC ISIS */
-#define	NLPID_Q933	0x08	/* CCITT Q.933 */
-
-/* Miscellaneous */
-#define	WAN_IFNAME_SZ	15	/* max length of the interface name */
-#define	WAN_DRVNAME_SZ	15	/* max length of the link driver name */
-#define	WAN_ADDRESS_SZ	31	/* max length of the WAN media address */
-#define USED_BY_FIELD	8	/* max length of the used by field */
-
-/* Defines for UDP PACKET TYPE */
-#define UDP_PTPIPE_TYPE 	0x01
-#define UDP_FPIPE_TYPE		0x02
-#define UDP_CPIPE_TYPE		0x03
-#define UDP_DRVSTATS_TYPE 	0x04
-#define UDP_INVALID_TYPE  	0x05
-
-/* Command return code */
-#define CMD_OK		0		/* normal firmware return code */
-#define CMD_TIMEOUT	0xFF		/* firmware command timed out */
-
-/* UDP Packet Management */
-#define UDP_PKT_FRM_STACK	0x00
-#define UDP_PKT_FRM_NETWORK	0x01
-
-/* Maximum interrupt test counter */
-#define MAX_INTR_TEST_COUNTER	100
-
-/* Critical Values for RACE conditions*/
-#define CRITICAL_IN_ISR		0xA1
-#define CRITICAL_INTR_HANDLED	0xB1
-
-/****** Data Types **********************************************************/
-
-/*----------------------------------------------------------------------------
- * X.25-specific link-level configuration.
- */
-typedef struct wan_x25_conf
-{
-	unsigned lo_pvc;	/* lowest permanent circuit number */
-	unsigned hi_pvc;	/* highest permanent circuit number */
-	unsigned lo_svc;	/* lowest switched circuit number */
-	unsigned hi_svc;	/* highest switched circuit number */
-	unsigned hdlc_window;	/* HDLC window size (1..7) */
-	unsigned pkt_window;	/* X.25 packet window size (1..7) */
-	unsigned t1;		/* HDLC timer T1, sec (1..30) */
-	unsigned t2;		/* HDLC timer T2, sec (0..29) */
-	unsigned t4;		/* HDLC supervisory frame timer = T4 * T1 */
-	unsigned n2;		/* HDLC retransmission limit (1..30) */
-	unsigned t10_t20;	/* X.25 RESTART timeout, sec (1..255) */
-	unsigned t11_t21;	/* X.25 CALL timeout, sec (1..255) */
-	unsigned t12_t22;	/* X.25 RESET timeout, sec (1..255) */
-	unsigned t13_t23;	/* X.25 CLEAR timeout, sec (1..255) */
-	unsigned t16_t26;	/* X.25 INTERRUPT timeout, sec (1..255) */
-	unsigned t28;		/* X.25 REGISTRATION timeout, sec (1..255) */
-	unsigned r10_r20;	/* RESTART retransmission limit (0..250) */
-	unsigned r12_r22;	/* RESET retransmission limit (0..250) */
-	unsigned r13_r23;	/* CLEAR retransmission limit (0..250) */
-	unsigned ccitt_compat;	/* compatibility mode: 1988/1984/1980 */
-	unsigned x25_conf_opt;   /* User defined x25 config optoins */
-	unsigned char LAPB_hdlc_only; /* Run in HDLC only mode */
-	unsigned char logging;   /* Control connection logging */  
-	unsigned char oob_on_modem; /* Whether to send modem status to the user app */
-} wan_x25_conf_t;
-
-/*----------------------------------------------------------------------------
- * Frame relay specific link-level configuration.
- */
-typedef struct wan_fr_conf
-{
-	unsigned signalling;	/* local in-channel signalling type */
-	unsigned t391;		/* link integrity verification timer */
-	unsigned t392;		/* polling verification timer */
-	unsigned n391;		/* full status polling cycle counter */
-	unsigned n392;		/* error threshold counter */
-	unsigned n393;		/* monitored events counter */
-	unsigned dlci_num;	/* number of DLCs (access node) */
-	unsigned  dlci[100];    /* List of all DLCIs */
-} wan_fr_conf_t;
-
-/*----------------------------------------------------------------------------
- * PPP-specific link-level configuration.
- */
-typedef struct wan_ppp_conf
-{
-	unsigned restart_tmr;	/* restart timer */
-	unsigned auth_rsrt_tmr;	/* authentication timer */
-	unsigned auth_wait_tmr;	/* authentication timer */
-	unsigned mdm_fail_tmr;	/* modem failure timer */
-	unsigned dtr_drop_tmr;	/* DTR drop timer */
-	unsigned connect_tmout;	/* connection timeout */
-	unsigned conf_retry;	/* max. retry */
-	unsigned term_retry;	/* max. retry */
-	unsigned fail_retry;	/* max. retry */
-	unsigned auth_retry;	/* max. retry */
-	unsigned auth_options;	/* authentication opt. */
-	unsigned ip_options;	/* IP options */
-	char	authenticator;	/* AUTHENTICATOR or not */
-	char	ip_mode;	/* Static/Host/Peer */
-} wan_ppp_conf_t;
-
-/*----------------------------------------------------------------------------
- * CHDLC-specific link-level configuration.
- */
-typedef struct wan_chdlc_conf
-{
-	unsigned char ignore_dcd;	/* Protocol options:		*/
-	unsigned char ignore_cts;	/*  Ignore these to determine	*/
-	unsigned char ignore_keepalive;	/*  link status (Yes or No)	*/
-	unsigned char hdlc_streaming;	/*  hdlc_streaming mode (Y/N) */
-	unsigned char receive_only;	/*  no transmit buffering (Y/N) */
-	unsigned keepalive_tx_tmr;	/* transmit keepalive timer */
-	unsigned keepalive_rx_tmr;	/* receive  keepalive timer */
-	unsigned keepalive_err_margin;	/* keepalive_error_tolerance */
-	unsigned slarp_timer;		/* SLARP request timer */
-} wan_chdlc_conf_t;
-
-
-/*----------------------------------------------------------------------------
- * WAN device configuration. Passed to ROUTER_SETUP IOCTL.
- */
-typedef struct wandev_conf
-{
-	unsigned magic;		/* magic number (for verification) */
-	unsigned config_id;	/* configuration structure identifier */
-				/****** hardware configuration ******/
-	unsigned ioport;	/* adapter I/O port base */
-	unsigned long maddr;	/* dual-port memory address */
-	unsigned msize;		/* dual-port memory size */
-	int irq;		/* interrupt request level */
-	int dma;		/* DMA request level */
-        char S514_CPU_no[1];	/* S514 PCI adapter CPU number ('A' or 'B') */
-        unsigned PCI_slot_no;	/* S514 PCI adapter slot number */
-	char auto_pci_cfg;	/* S515 PCI automatic slot detection */
-	char comm_port;		/* Communication Port (PRI=0, SEC=1) */ 
-	unsigned bps;		/* data transfer rate */
-	unsigned mtu;		/* maximum transmit unit size */
-        unsigned udp_port;      /* UDP port for management */
-	unsigned char ttl;	/* Time To Live for UDP security */
-	unsigned char ft1;	/* FT1 Configurator Option */
-        char interface;		/* RS-232/V.35, etc. */
-	char clocking;		/* external/internal */
-	char line_coding;	/* NRZ/NRZI/FM0/FM1, etc. */
-	char station;		/* DTE/DCE, primary/secondary, etc. */
-	char connection;	/* permanent/switched/on-demand */
-	char read_mode;		/* read mode: Polling or interrupt */
-	char receive_only;	/* disable tx buffers */
-	char tty;		/* Create a fake tty device */
-	unsigned tty_major;	/* Major number for wanpipe tty device */
-	unsigned tty_minor; 	/* Minor number for wanpipe tty device */
-	unsigned tty_mode;	/* TTY operation mode SYNC or ASYNC */
-	char backup;		/* Backup Mode */
-	unsigned hw_opt[4];	/* other hardware options */
-	unsigned reserved[4];
-				/****** arbitrary data ***************/
-	unsigned data_size;	/* data buffer size */
-	void* data;		/* data buffer, e.g. firmware */
-	union			/****** protocol-specific ************/
-	{
-		wan_x25_conf_t x25;	/* X.25 configuration */
-		wan_ppp_conf_t ppp;	/* PPP configuration */
-		wan_fr_conf_t fr;	/* frame relay configuration */
-		wan_chdlc_conf_t chdlc;	/* Cisco HDLC configuration */
-	} u;
-} wandev_conf_t;
-
-/* 'config_id' definitions */
-#define	WANCONFIG_X25	101	/* X.25 link */
-#define	WANCONFIG_FR	102	/* frame relay link */
-#define	WANCONFIG_PPP	103	/* synchronous PPP link */
-#define WANCONFIG_CHDLC	104	/* Cisco HDLC Link */
-#define WANCONFIG_BSC	105	/* BiSync Streaming */
-#define WANCONFIG_HDLC	106	/* HDLC Support */
-#define WANCONFIG_MPPP  107	/* Multi Port PPP over RAW CHDLC */
-
 /*
- * Configuration options defines.
+ * wanrouter.h	Legacy declarations kept around until X25 is removed
  */
-/* general options */
-#define	WANOPT_OFF	0
-#define	WANOPT_ON	1
-#define	WANOPT_NO	0
-#define	WANOPT_YES	1
-
-/* intercace options */
-#define	WANOPT_RS232	0
-#define	WANOPT_V35	1
-
-/* data encoding options */
-#define	WANOPT_NRZ	0
-#define	WANOPT_NRZI	1
-#define	WANOPT_FM0	2
-#define	WANOPT_FM1	3
-
-/* link type options */
-#define	WANOPT_POINTTOPOINT	0	/* RTS always active */
-#define	WANOPT_MULTIDROP	1	/* RTS is active when transmitting */
-
-/* clocking options */
-#define	WANOPT_EXTERNAL	0
-#define	WANOPT_INTERNAL	1
-
-/* station options */
-#define	WANOPT_DTE		0
-#define	WANOPT_DCE		1
-#define	WANOPT_CPE		0
-#define	WANOPT_NODE		1
-#define	WANOPT_SECONDARY	0
-#define	WANOPT_PRIMARY		1
-
-/* connection options */
-#define	WANOPT_PERMANENT	0	/* DTR always active */
-#define	WANOPT_SWITCHED		1	/* use DTR to setup link (dial-up) */
-#define	WANOPT_ONDEMAND		2	/* activate DTR only before sending */
-
-/* frame relay in-channel signalling */
-#define	WANOPT_FR_ANSI		1	/* ANSI T1.617 Annex D */
-#define	WANOPT_FR_Q933		2	/* ITU Q.933A */
-#define	WANOPT_FR_LMI		3	/* LMI */
-
-/* PPP IP Mode Options */
-#define	WANOPT_PPP_STATIC	0
-#define	WANOPT_PPP_HOST		1
-#define	WANOPT_PPP_PEER		2
-
-/* ASY Mode Options */
-#define WANOPT_ONE 		1
-#define WANOPT_TWO		2
-#define WANOPT_ONE_AND_HALF	3
-
-#define WANOPT_NONE	0
-#define WANOPT_ODD      1
-#define WANOPT_EVEN	2
-
-/* CHDLC Protocol Options */
-/* DF Commented out for now.
-
-#define WANOPT_CHDLC_NO_DCD		IGNORE_DCD_FOR_LINK_STAT
-#define WANOPT_CHDLC_NO_CTS		IGNORE_CTS_FOR_LINK_STAT
-#define WANOPT_CHDLC_NO_KEEPALIVE	IGNORE_KPALV_FOR_LINK_STAT
-*/
-
-/* Port options */
-#define WANOPT_PRI 0
-#define WANOPT_SEC 1
-/* read mode */
-#define	WANOPT_INTR	0
-#define WANOPT_POLL	1
 
-
-#define WANOPT_TTY_SYNC  0
-#define WANOPT_TTY_ASYNC 1
-/*----------------------------------------------------------------------------
- * WAN Link Status Info (for ROUTER_STAT IOCTL).
- */
-typedef struct wandev_stat
-{
-	unsigned state;		/* link state */
-	unsigned ndev;		/* number of configured interfaces */
-
-	/* link/interface configuration */
-	unsigned connection;	/* permanent/switched/on-demand */
-	unsigned media_type;	/* Frame relay/PPP/X.25/SDLC, etc. */
-	unsigned mtu;		/* max. transmit unit for this device */
-
-	/* physical level statistics */
-	unsigned modem_status;	/* modem status */
-	unsigned rx_frames;	/* received frames count */
-	unsigned rx_overruns;	/* receiver overrun error count */
-	unsigned rx_crc_err;	/* receive CRC error count */
-	unsigned rx_aborts;	/* received aborted frames count */
-	unsigned rx_bad_length;	/* unexpetedly long/short frames count */
-	unsigned rx_dropped;	/* frames discarded at device level */
-	unsigned tx_frames;	/* transmitted frames count */
-	unsigned tx_underruns;	/* aborted transmissions (underruns) count */
-	unsigned tx_timeouts;	/* transmission timeouts */
-	unsigned tx_rejects;	/* other transmit errors */
-
-	/* media level statistics */
-	unsigned rx_bad_format;	/* frames with invalid format */
-	unsigned rx_bad_addr;	/* frames with invalid media address */
-	unsigned tx_retries;	/* frames re-transmitted */
-	unsigned reserved[16];	/* reserved for future use */
-} wandev_stat_t;
+#ifndef _UAPI_ROUTER_H
+#define _UAPI_ROUTER_H
 
 /* 'state' defines */
 enum wan_states
@@ -365,88 +11,7 @@ enum wan_states
 	WAN_UNCONFIGURED,	/* link/channel is not configured */
 	WAN_DISCONNECTED,	/* link/channel is disconnected */
 	WAN_CONNECTING,		/* connection is in progress */
-	WAN_CONNECTED,		/* link/channel is operational */
-	WAN_LIMIT,		/* for verification only */
-	WAN_DUALPORT,		/* for Dual Port cards */
-	WAN_DISCONNECTING,
-	WAN_FT1_READY		/* FT1 Configurator Ready */
+	WAN_CONNECTED		/* link/channel is operational */
 };
 
-enum {
-	WAN_LOCAL_IP,
-	WAN_POINTOPOINT_IP,
-	WAN_NETMASK_IP,
-	WAN_BROADCAST_IP
-};
-
-/* 'modem_status' masks */
-#define	WAN_MODEM_CTS	0x0001	/* CTS line active */
-#define	WAN_MODEM_DCD	0x0002	/* DCD line active */
-#define	WAN_MODEM_DTR	0x0010	/* DTR line active */
-#define	WAN_MODEM_RTS	0x0020	/* RTS line active */
-
-/*----------------------------------------------------------------------------
- * WAN interface (logical channel) configuration (for ROUTER_IFNEW IOCTL).
- */
-typedef struct wanif_conf
-{
-	unsigned magic;			/* magic number */
-	unsigned config_id;		/* configuration identifier */
-	char name[WAN_IFNAME_SZ+1];	/* interface name, ASCIIZ */
-	char addr[WAN_ADDRESS_SZ+1];	/* media address, ASCIIZ */
-	char usedby[USED_BY_FIELD];	/* used by API or WANPIPE */
-	unsigned idle_timeout;		/* sec, before disconnecting */
-	unsigned hold_timeout;		/* sec, before re-connecting */
-	unsigned cir;			/* Committed Information Rate fwd,bwd*/
-	unsigned bc;			/* Committed Burst Size fwd, bwd */
-	unsigned be;			/* Excess Burst Size fwd, bwd */ 
-	unsigned char enable_IPX;	/* Enable or Disable IPX */
-	unsigned char inarp;		/* Send Inverse ARP requests Y/N */
-	unsigned inarp_interval;	/* sec, between InARP requests */
-	unsigned long network_number;	/* Network Number for IPX */
-	char mc;			/* Multicast on or off */
-	char local_addr[WAN_ADDRESS_SZ+1];/* local media address, ASCIIZ */
-	unsigned char port;		/* board port */
-	unsigned char protocol;		/* prococol used in this channel (TCPOX25 or X25) */
-	char pap;			/* PAP enabled or disabled */
-	char chap;			/* CHAP enabled or disabled */
-	unsigned char userid[511];	/* List of User Id */
-	unsigned char passwd[511];	/* List of passwords */
-	unsigned char sysname[31];	/* Name of the system */
-	unsigned char ignore_dcd;	/* Protocol options: */
-	unsigned char ignore_cts;	/*  Ignore these to determine */
-	unsigned char ignore_keepalive;	/*  link status (Yes or No) */
-	unsigned char hdlc_streaming;	/*  Hdlc streaming mode (Y/N) */
-	unsigned keepalive_tx_tmr;	/* transmit keepalive timer */
-	unsigned keepalive_rx_tmr;	/* receive  keepalive timer */
-	unsigned keepalive_err_margin;	/* keepalive_error_tolerance */
-	unsigned slarp_timer;		/* SLARP request timer */
-	unsigned char ttl;		/* Time To Live for UDP security */
-	char interface;			/* RS-232/V.35, etc. */
-	char clocking;			/* external/internal */
-	unsigned bps;			/* data transfer rate */
-	unsigned mtu;			/* maximum transmit unit size */
-	unsigned char if_down;		/* brind down interface when disconnected */
-	unsigned char gateway;		/* Is this interface a gateway */
-	unsigned char true_if_encoding;	/* Set the dev->type to true board protocol */
-
-	unsigned char asy_data_trans;     /* async API options */
-        unsigned char rts_hs_for_receive; /* async Protocol options */
-        unsigned char xon_xoff_hs_for_receive;
-	unsigned char xon_xoff_hs_for_transmit;
-	unsigned char dcd_hs_for_transmit;
-	unsigned char cts_hs_for_transmit;
-	unsigned char async_mode;
-	unsigned tx_bits_per_char;
-	unsigned rx_bits_per_char;
-	unsigned stop_bits;  
-	unsigned char parity;
- 	unsigned break_timer;
-        unsigned inter_char_timer;
-	unsigned rx_complete_length;
-	unsigned xon_char;
-	unsigned xoff_char;
-	unsigned char receive_only;	/*  no transmit buffering (Y/N) */
-} wanif_conf_t;
-
 #endif /* _UAPI_ROUTER_H */
diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig
deleted file mode 100644
index a157a2e64e18..000000000000
--- a/net/wanrouter/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Configuration for WAN router
-#
-
-config WAN_ROUTER
-	tristate "WAN router (DEPRECATED)"
-	depends on EXPERIMENTAL
-	---help---
-	  Wide Area Networks (WANs), such as X.25, frame relay and leased
-	  lines, are used to interconnect Local Area Networks (LANs) over vast
-	  distances with data transfer rates significantly higher than those
-	  achievable with commonly used asynchronous modem connections.
-	  Usually, a quite expensive external device called a `WAN router' is
-	  needed to connect to a WAN.
-
-	  As an alternative, WAN routing can be built into the Linux kernel.
-	  With relatively inexpensive WAN interface cards available on the
-	  market, a perfectly usable router can be built for less than half
-	  the price of an external router.  If you have one of those cards and
-	  wish to use your Linux box as a WAN router, say Y here and also to
-	  the WAN driver for your card, below.  You will then need the
-	  wan-tools package which is available from <ftp://ftp.sangoma.com/>.
-
-	  To compile WAN routing support as a module, choose M here: the
-	  module will be called wanrouter.
-
-	  If unsure, say N.
diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile
deleted file mode 100644
index 4da14bc48078..000000000000
--- a/net/wanrouter/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the Linux WAN router layer.
-#
-
-obj-$(CONFIG_WAN_ROUTER) += wanrouter.o
-
-wanrouter-y :=  wanproc.o wanmain.o
diff --git a/net/wanrouter/patchlevel b/net/wanrouter/patchlevel
deleted file mode 100644
index c043eea7767e..000000000000
--- a/net/wanrouter/patchlevel
+++ /dev/null
@@ -1 +0,0 @@
-2.2.1
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
deleted file mode 100644
index 2ab785064b7e..000000000000
--- a/net/wanrouter/wanmain.c
+++ /dev/null
@@ -1,782 +0,0 @@
-/*****************************************************************************
-* wanmain.c	WAN Multiprotocol Router Module. Main code.
-*
-*		This module is completely hardware-independent and provides
-*		the following common services for the WAN Link Drivers:
-*		 o WAN device management (registering, unregistering)
-*		 o Network interface management
-*		 o Physical connection management (dial-up, incoming calls)
-*		 o Logical connection management (switched virtual circuits)
-*		 o Protocol encapsulation/decapsulation
-*
-* Author:	Gideon Hack
-*
-* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Nov 24, 2000  Nenad Corbic	Updated for 2.4.X kernels
-* Nov 07, 2000  Nenad Corbic	Fixed the Mulit-Port PPP for kernels 2.2.16 and
-*  				greater.
-* Aug 2,  2000  Nenad Corbic	Block the Multi-Port PPP from running on
-*  			        kernels 2.2.16 or greater.  The SyncPPP
-*  			        has changed.
-* Jul 13, 2000  Nenad Corbic	Added SyncPPP support
-* 				Added extra debugging in device_setup().
-* Oct 01, 1999  Gideon Hack     Update for s514 PCI card
-* Dec 27, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
-* Jan 16, 1997	Gene Kozin	router_devlist made public
-* Jan 31, 1997  Alan Cox	Hacked it about a bit for 2.1
-* Jun 27, 1997  Alan Cox	realigned with vendor code
-* Oct 15, 1997  Farhan Thawar   changed wan_encapsulate to add a pad byte of 0
-* Apr 20, 1998	Alan Cox	Fixed 2.1 symbols
-* May 17, 1998  K. Baranowski	Fixed SNAP encapsulation in wan_encapsulate
-* Dec 15, 1998  Arnaldo Melo    support for firmwares of up to 128000 bytes
-*                               check wandev->setup return value
-* Dec 22, 1998  Arnaldo Melo    vmalloc/vfree used in device_setup to allocate
-*                               kernel memory and copy configuration data to
-*                               kernel space (for big firmwares)
-* Jun 02, 1999  Gideon Hack	Updates for Linux 2.0.X and 2.2.X kernels.
-*****************************************************************************/
-
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/capability.h>
-#include <linux/errno.h>	/* return codes */
-#include <linux/kernel.h>
-#include <linux/module.h>	/* support for loadable modules */
-#include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/mutex.h>
-#include <linux/mm.h>
-#include <linux/string.h>	/* inline mem*, str* functions */
-
-#include <asm/byteorder.h>	/* htons(), etc. */
-#include <linux/wanrouter.h>	/* WAN router API definitions */
-
-#include <linux/vmalloc.h>	/* vmalloc, vfree */
-#include <asm/uaccess.h>        /* copy_to/from_user */
-#include <linux/init.h>         /* __initfunc et al. */
-
-#define DEV_TO_SLAVE(dev)	(*((struct net_device **)netdev_priv(dev)))
-
-/*
- * 	Function Prototypes
- */
-
-/*
- *	WAN device IOCTL handlers
- */
-
-static DEFINE_MUTEX(wanrouter_mutex);
-static int wanrouter_device_setup(struct wan_device *wandev,
-				  wandev_conf_t __user *u_conf);
-static int wanrouter_device_stat(struct wan_device *wandev,
-				 wandev_stat_t __user *u_stat);
-static int wanrouter_device_shutdown(struct wan_device *wandev);
-static int wanrouter_device_new_if(struct wan_device *wandev,
-				   wanif_conf_t __user *u_conf);
-static int wanrouter_device_del_if(struct wan_device *wandev,
-				   char __user *u_name);
-
-/*
- *	Miscellaneous
- */
-
-static struct wan_device *wanrouter_find_device(char *name);
-static int wanrouter_delete_interface(struct wan_device *wandev, char *name);
-static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__acquires(lock);
-static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__releases(lock);
-
-
-
-/*
- *	Global Data
- */
-
-static char wanrouter_fullname[]  = "Sangoma WANPIPE Router";
-static char wanrouter_copyright[] = "(c) 1995-2000 Sangoma Technologies Inc.";
-static char wanrouter_modname[] = ROUTER_NAME; /* short module name */
-struct wan_device* wanrouter_router_devlist; /* list of registered devices */
-
-/*
- *	Organize Unique Identifiers for encapsulation/decapsulation
- */
-
-#if 0
-static unsigned char wanrouter_oui_ether[] = { 0x00, 0x00, 0x00 };
-static unsigned char wanrouter_oui_802_2[] = { 0x00, 0x80, 0xC2 };
-#endif
-
-static int __init wanrouter_init(void)
-{
-	int err;
-
-	printk(KERN_INFO "%s v%u.%u %s\n",
-	       wanrouter_fullname, ROUTER_VERSION, ROUTER_RELEASE,
-	       wanrouter_copyright);
-
-	err = wanrouter_proc_init();
-	if (err)
-		printk(KERN_INFO "%s: can't create entry in proc filesystem!\n",
-		       wanrouter_modname);
-
-	return err;
-}
-
-static void __exit wanrouter_cleanup (void)
-{
-	wanrouter_proc_cleanup();
-}
-
-/*
- * This is just plain dumb.  We should move the bugger to drivers/net/wan,
- * slap it first in directory and make it module_init().  The only reason
- * for subsys_initcall() here is that net goes after drivers (why, BTW?)
- */
-subsys_initcall(wanrouter_init);
-module_exit(wanrouter_cleanup);
-
-/*
- * 	Kernel APIs
- */
-
-/*
- * 	Register WAN device.
- * 	o verify device credentials
- * 	o create an entry for the device in the /proc/net/router directory
- * 	o initialize internally maintained fields of the wan_device structure
- * 	o link device data space to a singly-linked list
- * 	o if it's the first device, then start kernel 'thread'
- * 	o increment module use count
- *
- * 	Return:
- *	0	Ok
- *	< 0	error.
- *
- * 	Context:	process
- */
-
-
-int register_wan_device(struct wan_device *wandev)
-{
-	int err, namelen;
-
-	if ((wandev == NULL) || (wandev->magic != ROUTER_MAGIC) ||
-	    (wandev->name == NULL))
-		return -EINVAL;
-
-	namelen = strlen(wandev->name);
-	if (!namelen || (namelen > WAN_DRVNAME_SZ))
-		return -EINVAL;
-
-	if (wanrouter_find_device(wandev->name))
-		return -EEXIST;
-
-#ifdef WANDEBUG
-	printk(KERN_INFO "%s: registering WAN device %s\n",
-	       wanrouter_modname, wandev->name);
-#endif
-
-	/*
-	 *	Register /proc directory entry
-	 */
-	err = wanrouter_proc_add(wandev);
-	if (err) {
-		printk(KERN_INFO
-			"%s: can't create /proc/net/router/%s entry!\n",
-			wanrouter_modname, wandev->name);
-		return err;
-	}
-
-	/*
-	 *	Initialize fields of the wan_device structure maintained by the
-	 *	router and update local data.
-	 */
-
-	wandev->ndev = 0;
-	wandev->dev  = NULL;
-	wandev->next = wanrouter_router_devlist;
-	wanrouter_router_devlist = wandev;
-	return 0;
-}
-
-/*
- *	Unregister WAN device.
- *	o shut down device
- *	o unlink device data space from the linked list
- *	o delete device entry in the /proc/net/router directory
- *	o decrement module use count
- *
- *	Return:		0	Ok
- *			<0	error.
- *	Context:	process
- */
-
-
-int unregister_wan_device(char *name)
-{
-	struct wan_device *wandev, *prev;
-
-	if (name == NULL)
-		return -EINVAL;
-
-	for (wandev = wanrouter_router_devlist, prev = NULL;
-		wandev && strcmp(wandev->name, name);
-		prev = wandev, wandev = wandev->next)
-		;
-	if (wandev == NULL)
-		return -ENODEV;
-
-#ifdef WANDEBUG
-	printk(KERN_INFO "%s: unregistering WAN device %s\n",
-	       wanrouter_modname, name);
-#endif
-
-	if (wandev->state != WAN_UNCONFIGURED)
-		wanrouter_device_shutdown(wandev);
-
-	if (prev)
-		prev->next = wandev->next;
-	else
-		wanrouter_router_devlist = wandev->next;
-
-	wanrouter_proc_delete(wandev);
-	return 0;
-}
-
-#if 0
-
-/*
- *	Encapsulate packet.
- *
- *	Return:	encapsulation header size
- *		< 0	- unsupported Ethertype
- *
- *	Notes:
- *	1. This function may be called on interrupt context.
- */
-
-
-int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
-			  unsigned short type)
-{
-	int hdr_len = 0;
-
-	switch (type) {
-	case ETH_P_IP:		/* IP datagram encapsulation */
-		hdr_len += 1;
-		skb_push(skb, 1);
-		skb->data[0] = NLPID_IP;
-		break;
-
-	case ETH_P_IPX:		/* SNAP encapsulation */
-	case ETH_P_ARP:
-		hdr_len += 7;
-		skb_push(skb, 7);
-		skb->data[0] = 0;
-		skb->data[1] = NLPID_SNAP;
-		skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
-					       sizeof(wanrouter_oui_ether));
-		*((unsigned short*)&skb->data[5]) = htons(type);
-		break;
-
-	default:		/* Unknown packet type */
-		printk(KERN_INFO
-			"%s: unsupported Ethertype 0x%04X on interface %s!\n",
-			wanrouter_modname, type, dev->name);
-		hdr_len = -EINVAL;
-	}
-	return hdr_len;
-}
-
-
-/*
- *	Decapsulate packet.
- *
- *	Return:	Ethertype (in network order)
- *			0	unknown encapsulation
- *
- *	Notes:
- *	1. This function may be called on interrupt context.
- */
-
-
-__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
-	int cnt = skb->data[0] ? 0 : 1;	/* there may be a pad present */
-	__be16 ethertype;
-
-	switch (skb->data[cnt]) {
-	case NLPID_IP:		/* IP datagramm */
-		ethertype = htons(ETH_P_IP);
-		cnt += 1;
-		break;
-
-	case NLPID_SNAP:	/* SNAP encapsulation */
-		if (memcmp(&skb->data[cnt + 1], wanrouter_oui_ether,
-			   sizeof(wanrouter_oui_ether))){
-			printk(KERN_INFO
-				"%s: unsupported SNAP OUI %02X-%02X-%02X "
-				"on interface %s!\n", wanrouter_modname,
-				skb->data[cnt+1], skb->data[cnt+2],
-				skb->data[cnt+3], dev->name);
-			return 0;
-		}
-		ethertype = *((__be16*)&skb->data[cnt+4]);
-		cnt += 6;
-		break;
-
-	/* add other protocols, e.g. CLNP, ESIS, ISIS, if needed */
-
-	default:
-		printk(KERN_INFO
-			"%s: unsupported NLPID 0x%02X on interface %s!\n",
-			wanrouter_modname, skb->data[cnt], dev->name);
-		return 0;
-	}
-	skb->protocol = ethertype;
-	skb->pkt_type = PACKET_HOST;	/*	Physically point to point */
-	skb_pull(skb, cnt);
-	skb_reset_mac_header(skb);
-	return ethertype;
-}
-
-#endif  /*  0  */
-
-/*
- *	WAN device IOCTL.
- *	o find WAN device associated with this node
- *	o execute requested action or pass command to the device driver
- */
-
-long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct inode *inode = file->f_path.dentry->d_inode;
-	int err = 0;
-	struct proc_dir_entry *dent;
-	struct wan_device *wandev;
-	void __user *data = (void __user *)arg;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if ((cmd >> 8) != ROUTER_IOCTL)
-		return -EINVAL;
-
-	dent = PDE(inode);
-	if ((dent == NULL) || (dent->data == NULL))
-		return -EINVAL;
-
-	wandev = dent->data;
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-
-	mutex_lock(&wanrouter_mutex);
-	switch (cmd) {
-	case ROUTER_SETUP:
-		err = wanrouter_device_setup(wandev, data);
-		break;
-
-	case ROUTER_DOWN:
-		err = wanrouter_device_shutdown(wandev);
-		break;
-
-	case ROUTER_STAT:
-		err = wanrouter_device_stat(wandev, data);
-		break;
-
-	case ROUTER_IFNEW:
-		err = wanrouter_device_new_if(wandev, data);
-		break;
-
-	case ROUTER_IFDEL:
-		err = wanrouter_device_del_if(wandev, data);
-		break;
-
-	case ROUTER_IFSTAT:
-		break;
-
-	default:
-		if ((cmd >= ROUTER_USER) &&
-		    (cmd <= ROUTER_USER_MAX) &&
-		    wandev->ioctl)
-			err = wandev->ioctl(wandev, cmd, arg);
-		else err = -EINVAL;
-	}
-	mutex_unlock(&wanrouter_mutex);
-	return err;
-}
-
-/*
- *	WAN Driver IOCTL Handlers
- */
-
-/*
- *	Setup WAN link device.
- *	o verify user address space
- *	o allocate kernel memory and copy configuration data to kernel space
- *	o if configuration data includes extension, copy it to kernel space too
- *	o call driver's setup() entry point
- */
-
-static int wanrouter_device_setup(struct wan_device *wandev,
-				  wandev_conf_t __user *u_conf)
-{
-	void *data = NULL;
-	wandev_conf_t *conf;
-	int err = -EINVAL;
-
-	if (wandev->setup == NULL) {	/* Nothing to do ? */
-		printk(KERN_INFO "%s: ERROR, No setup script: wandev->setup()\n",
-				wandev->name);
-		return 0;
-	}
-
-	conf = kmalloc(sizeof(wandev_conf_t), GFP_KERNEL);
-	if (conf == NULL){
-		printk(KERN_INFO "%s: ERROR, Failed to allocate kernel memory !\n",
-				wandev->name);
-		return -ENOBUFS;
-	}
-
-	if (copy_from_user(conf, u_conf, sizeof(wandev_conf_t))) {
-		printk(KERN_INFO "%s: Failed to copy user config data to kernel space!\n",
-				wandev->name);
-		kfree(conf);
-		return -EFAULT;
-	}
-
-	if (conf->magic != ROUTER_MAGIC) {
-		kfree(conf);
-		printk(KERN_INFO "%s: ERROR, Invalid MAGIC Number\n",
-				wandev->name);
-		return -EINVAL;
-	}
-
-	if (conf->data_size && conf->data) {
-		if (conf->data_size > 128000) {
-			printk(KERN_INFO
-			    "%s: ERROR, Invalid firmware data size %i !\n",
-					wandev->name, conf->data_size);
-			kfree(conf);
-			return -EINVAL;
-		}
-
-		data = vmalloc(conf->data_size);
-		if (!data) {
-			printk(KERN_INFO
-				"%s: ERROR, Failed allocate kernel memory !\n",
-				wandev->name);
-			kfree(conf);
-			return -ENOBUFS;
-		}
-		if (!copy_from_user(data, conf->data, conf->data_size)) {
-			conf->data = data;
-			err = wandev->setup(wandev, conf);
-		} else {
-			printk(KERN_INFO
-			     "%s: ERROR, Failed to copy from user data !\n",
-			       wandev->name);
-			err = -EFAULT;
-		}
-		vfree(data);
-	} else {
-		printk(KERN_INFO
-		    "%s: ERROR, No firmware found ! Firmware size = %i !\n",
-				wandev->name, conf->data_size);
-	}
-
-	kfree(conf);
-	return err;
-}
-
-/*
- *	Shutdown WAN device.
- *	o delete all not opened logical channels for this device
- *	o call driver's shutdown() entry point
- */
-
-static int wanrouter_device_shutdown(struct wan_device *wandev)
-{
-	struct net_device *dev;
-	int err=0;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return 0;
-
-	printk(KERN_INFO "\n%s: Shutting Down!\n",wandev->name);
-
-	for (dev = wandev->dev; dev;) {
-		err = wanrouter_delete_interface(wandev, dev->name);
-		if (err)
-			return err;
-		/* The above function deallocates the current dev
-		 * structure. Therefore, we cannot use netdev_priv(dev)
-		 * as the next element: wandev->dev points to the
-		 * next element */
-		dev = wandev->dev;
-	}
-
-	if (wandev->ndev)
-		return -EBUSY;	/* there are opened interfaces  */
-
-	if (wandev->shutdown)
-		err=wandev->shutdown(wandev);
-
-	return err;
-}
-
-/*
- *	Get WAN device status & statistics.
- */
-
-static int wanrouter_device_stat(struct wan_device *wandev,
-				 wandev_stat_t __user *u_stat)
-{
-	wandev_stat_t stat;
-
-	memset(&stat, 0, sizeof(stat));
-
-	/* Ask device driver to update device statistics */
-	if ((wandev->state != WAN_UNCONFIGURED) && wandev->update)
-		wandev->update(wandev);
-
-	/* Fill out structure */
-	stat.ndev  = wandev->ndev;
-	stat.state = wandev->state;
-
-	if (copy_to_user(u_stat, &stat, sizeof(stat)))
-		return -EFAULT;
-
-	return 0;
-}
-
-/*
- *	Create new WAN interface.
- *	o verify user address space
- *	o copy configuration data to kernel address space
- *	o allocate network interface data space
- *	o call driver's new_if() entry point
- *	o make sure there is no interface name conflict
- *	o register network interface
- */
-
-static int wanrouter_device_new_if(struct wan_device *wandev,
-				   wanif_conf_t __user *u_conf)
-{
-	wanif_conf_t *cnf;
-	struct net_device *dev = NULL;
-	int err;
-
-	if ((wandev->state == WAN_UNCONFIGURED) || (wandev->new_if == NULL))
-		return -ENODEV;
-
-	cnf = kmalloc(sizeof(wanif_conf_t), GFP_KERNEL);
-	if (!cnf)
-		return -ENOBUFS;
-
-	err = -EFAULT;
-	if (copy_from_user(cnf, u_conf, sizeof(wanif_conf_t)))
-		goto out;
-
-	err = -EINVAL;
-	if (cnf->magic != ROUTER_MAGIC)
-		goto out;
-
-	if (cnf->config_id == WANCONFIG_MPPP) {
-		printk(KERN_INFO "%s: Wanpipe Mulit-Port PPP support has not been compiled in!\n",
-				wandev->name);
-		err = -EPROTONOSUPPORT;
-		goto out;
-	} else {
-		err = wandev->new_if(wandev, dev, cnf);
-	}
-
-	if (!err) {
-		/* Register network interface. This will invoke init()
-		 * function supplied by the driver.  If device registered
-		 * successfully, add it to the interface list.
-		 */
-
-#ifdef WANDEBUG
-		printk(KERN_INFO "%s: registering interface %s...\n",
-		       wanrouter_modname, dev->name);
-#endif
-
-		err = register_netdev(dev);
-		if (!err) {
-			struct net_device *slave = NULL;
-			unsigned long smp_flags=0;
-
-			lock_adapter_irq(&wandev->lock, &smp_flags);
-
-			if (wandev->dev == NULL) {
-				wandev->dev = dev;
-			} else {
-				for (slave=wandev->dev;
-				     DEV_TO_SLAVE(slave);
-				     slave = DEV_TO_SLAVE(slave))
-					DEV_TO_SLAVE(slave) = dev;
-			}
-			++wandev->ndev;
-
-			unlock_adapter_irq(&wandev->lock, &smp_flags);
-			err = 0;	/* done !!! */
-			goto out;
-		}
-		if (wandev->del_if)
-			wandev->del_if(wandev, dev);
-		free_netdev(dev);
-	}
-
-out:
-	kfree(cnf);
-	return err;
-}
-
-
-/*
- *	Delete WAN logical channel.
- *	 o verify user address space
- *	 o copy configuration data to kernel address space
- */
-
-static int wanrouter_device_del_if(struct wan_device *wandev, char __user *u_name)
-{
-	char name[WAN_IFNAME_SZ + 1];
-	int err = 0;
-
-	if (wandev->state == WAN_UNCONFIGURED)
-		return -ENODEV;
-
-	memset(name, 0, sizeof(name));
-
-	if (copy_from_user(name, u_name, WAN_IFNAME_SZ))
-		return -EFAULT;
-
-	err = wanrouter_delete_interface(wandev, name);
-	if (err)
-		return err;
-
-	/* If last interface being deleted, shutdown card
-	 * This helps with administration at leaf nodes
-	 * (You can tell if the person at the other end of the phone
-	 * has an interface configured) and avoids DoS vulnerabilities
-	 * in binary driver files - this fixes a problem with the current
-	 * Sangoma driver going into strange states when all the network
-	 * interfaces are deleted and the link irrecoverably disconnected.
-	 */
-
-	if (!wandev->ndev && wandev->shutdown)
-		err = wandev->shutdown(wandev);
-
-	return err;
-}
-
-/*
- *	Miscellaneous Functions
- */
-
-/*
- *	Find WAN device by name.
- *	Return pointer to the WAN device data space or NULL if device not found.
- */
-
-static struct wan_device *wanrouter_find_device(char *name)
-{
-	struct wan_device *wandev;
-
-	for (wandev = wanrouter_router_devlist;
-	     wandev && strcmp(wandev->name, name);
-		wandev = wandev->next);
-	return wandev;
-}
-
-/*
- *	Delete WAN logical channel identified by its name.
- *	o find logical channel by its name
- *	o call driver's del_if() entry point
- *	o unregister network interface
- *	o unlink channel data space from linked list of channels
- *	o release channel data space
- *
- *	Return:	0		success
- *		-ENODEV		channel not found.
- *		-EBUSY		interface is open
- *
- *	Note: If (force != 0), then device will be destroyed even if interface
- *	associated with it is open. It's caller's responsibility to make
- *	sure that opened interfaces are not removed!
- */
-
-static int wanrouter_delete_interface(struct wan_device *wandev, char *name)
-{
-	struct net_device *dev = NULL, *prev = NULL;
-	unsigned long smp_flags=0;
-
-	lock_adapter_irq(&wandev->lock, &smp_flags);
-	dev = wandev->dev;
-	prev = NULL;
-	while (dev && strcmp(name, dev->name)) {
-		struct net_device **slave = netdev_priv(dev);
-		prev = dev;
-		dev = *slave;
-	}
-	unlock_adapter_irq(&wandev->lock, &smp_flags);
-
-	if (dev == NULL)
-		return -ENODEV;	/* interface not found */
-
-	if (netif_running(dev))
-		return -EBUSY;	/* interface in use */
-
-	if (wandev->del_if)
-		wandev->del_if(wandev, dev);
-
-	lock_adapter_irq(&wandev->lock, &smp_flags);
-	if (prev) {
-		struct net_device **prev_slave = netdev_priv(prev);
-		struct net_device **slave = netdev_priv(dev);
-
-		*prev_slave = *slave;
-	} else {
-		struct net_device **slave = netdev_priv(dev);
-		wandev->dev = *slave;
-	}
-	--wandev->ndev;
-	unlock_adapter_irq(&wandev->lock, &smp_flags);
-
-	printk(KERN_INFO "%s: unregistering '%s'\n", wandev->name, dev->name);
-
-	unregister_netdev(dev);
-
-	free_netdev(dev);
-
-	return 0;
-}
-
-static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__acquires(lock)
-{
-	spin_lock_irqsave(lock, *smp_flags);
-}
-
-
-static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
-	__releases(lock)
-{
-	spin_unlock_irqrestore(lock, *smp_flags);
-}
-
-EXPORT_SYMBOL(register_wan_device);
-EXPORT_SYMBOL(unregister_wan_device);
-
-MODULE_LICENSE("GPL");
-
-/*
- *	End
- */
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
deleted file mode 100644
index c43612ee96bb..000000000000
--- a/net/wanrouter/wanproc.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*****************************************************************************
-* wanproc.c	WAN Router Module. /proc filesystem interface.
-*
-*		This module is completely hardware-independent and provides
-*		access to the router using Linux /proc filesystem.
-*
-* Author: 	Gideon Hack
-*
-* Copyright:	(c) 1995-1999 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jun 02, 1999  Gideon Hack	Updates for Linux 2.2.X kernels.
-* Jun 29, 1997	Alan Cox	Merged with 1.0.3 vendor code
-* Jan 29, 1997	Gene Kozin	v1.0.1. Implemented /proc read routines
-* Jan 30, 1997	Alan Cox	Hacked around for 2.1
-* Dec 13, 1996	Gene Kozin	Initial version (based on Sangoma's WANPIPE)
-*****************************************************************************/
-
-#include <linux/init.h>		/* __initfunc et al. */
-#include <linux/stddef.h>	/* offsetof(), etc. */
-#include <linux/errno.h>	/* return codes */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/wanrouter.h>	/* WAN router API definitions */
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-
-#include <net/net_namespace.h>
-#include <asm/io.h>
-
-#define PROC_STATS_FORMAT "%30s: %12lu\n"
-
-/****** Defines and Macros **************************************************/
-
-#define PROT_DECODE(prot) ((prot == WANCONFIG_FR) ? " FR" :\
-			      (prot == WANCONFIG_X25) ? " X25" : \
-				 (prot == WANCONFIG_PPP) ? " PPP" : \
-				    (prot == WANCONFIG_CHDLC) ? " CHDLC": \
-				       (prot == WANCONFIG_MPPP) ? " MPPP" : \
-					   " Unknown" )
-
-/****** Function Prototypes *************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-/* Miscellaneous */
-
-/*
- *	Structures for interfacing with the /proc filesystem.
- *	Router creates its own directory /proc/net/router with the following
- *	entries:
- *	config		device configuration
- *	status		global device statistics
- *	<device>	entry for each WAN device
- */
-
-/*
- *	Generic /proc/net/router/<file> file and inode operations
- */
-
-/*
- *	/proc/net/router
- */
-
-static DEFINE_MUTEX(config_mutex);
-static struct proc_dir_entry *proc_router;
-
-/* Strings */
-
-/*
- *	Interface functions
- */
-
-/****** Proc filesystem entry points ****************************************/
-
-/*
- *	Iterator
- */
-static void *r_start(struct seq_file *m, loff_t *pos)
-{
-	struct wan_device *wandev;
-	loff_t l = *pos;
-
-	mutex_lock(&config_mutex);
-	if (!l--)
-		return SEQ_START_TOKEN;
-	for (wandev = wanrouter_router_devlist; l-- && wandev;
-	     wandev = wandev->next)
-		;
-	return wandev;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	struct wan_device *wandev = v;
-	(*pos)++;
-	return (v == SEQ_START_TOKEN) ? wanrouter_router_devlist : wandev->next;
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
-	mutex_unlock(&config_mutex);
-}
-
-static int config_show(struct seq_file *m, void *v)
-{
-	struct wan_device *p = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "Device name    | port |IRQ|DMA|  mem.addr  |"
-			    "mem.size|option1|option2|option3|option4\n");
-		return 0;
-	}
-	if (!p->state)
-		return 0;
-	seq_printf(m, "%-15s|0x%-4X|%3u|%3u| 0x%-8lX |0x%-6X|%7u|%7u|%7u|%7u\n",
-			p->name, p->ioport, p->irq, p->dma, p->maddr, p->msize,
-			p->hw_opt[0], p->hw_opt[1], p->hw_opt[2], p->hw_opt[3]);
-	return 0;
-}
-
-static int status_show(struct seq_file *m, void *v)
-{
-	struct wan_device *p = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "Device name    |protocol|station|interface|"
-			    "clocking|baud rate| MTU |ndev|link state\n");
-		return 0;
-	}
-	if (!p->state)
-		return 0;
-	seq_printf(m, "%-15s|%-8s| %-7s| %-9s|%-8s|%9u|%5u|%3u |",
-		p->name,
-		PROT_DECODE(p->config_id),
-		p->config_id == WANCONFIG_FR ?
-			(p->station ? "Node" : "CPE") :
-			(p->config_id == WANCONFIG_X25 ?
-			(p->station ? "DCE" : "DTE") :
-			("N/A")),
-		p->interface ? "V.35" : "RS-232",
-		p->clocking ? "internal" : "external",
-		p->bps,
-		p->mtu,
-		p->ndev);
-
-	switch (p->state) {
-	case WAN_UNCONFIGURED:
-		seq_printf(m, "%-12s\n", "unconfigured");
-		break;
-	case WAN_DISCONNECTED:
-		seq_printf(m, "%-12s\n", "disconnected");
-		break;
-	case WAN_CONNECTING:
-		seq_printf(m, "%-12s\n", "connecting");
-		break;
-	case WAN_CONNECTED:
-		seq_printf(m, "%-12s\n", "connected");
-		break;
-	default:
-		seq_printf(m, "%-12s\n", "invalid");
-		break;
-	}
-	return 0;
-}
-
-static const struct seq_operations config_op = {
-	.start	= r_start,
-	.next	= r_next,
-	.stop	= r_stop,
-	.show	= config_show,
-};
-
-static const struct seq_operations status_op = {
-	.start	= r_start,
-	.next	= r_next,
-	.stop	= r_stop,
-	.show	= status_show,
-};
-
-static int config_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &config_op);
-}
-
-static int status_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &status_op);
-}
-
-static const struct file_operations config_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = config_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
-static const struct file_operations status_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = status_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
-static int wandev_show(struct seq_file *m, void *v)
-{
-	struct wan_device *wandev = m->private;
-
-	if (wandev->magic != ROUTER_MAGIC)
-		return 0;
-
-	if (!wandev->state) {
-		seq_puts(m, "device is not configured!\n");
-		return 0;
-	}
-
-	/* Update device statistics */
-	if (wandev->update) {
-		int err = wandev->update(wandev);
-		if (err == -EAGAIN) {
-			seq_puts(m, "Device is busy!\n");
-			return 0;
-		}
-		if (err) {
-			seq_puts(m, "Device is not configured!\n");
-			return 0;
-		}
-	}
-
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total packets received", wandev->stats.rx_packets);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total packets transmitted", wandev->stats.tx_packets);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total bytes received", wandev->stats.rx_bytes);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"total bytes transmitted", wandev->stats.tx_bytes);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"bad packets received", wandev->stats.rx_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"packet transmit problems", wandev->stats.tx_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"received frames dropped", wandev->stats.rx_dropped);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"transmit frames dropped", wandev->stats.tx_dropped);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"multicast packets received", wandev->stats.multicast);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"transmit collisions", wandev->stats.collisions);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receive length errors", wandev->stats.rx_length_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver overrun errors", wandev->stats.rx_over_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"CRC errors", wandev->stats.rx_crc_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"frame format errors (aborts)", wandev->stats.rx_frame_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver fifo overrun", wandev->stats.rx_fifo_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"receiver missed packet", wandev->stats.rx_missed_errors);
-	seq_printf(m, PROC_STATS_FORMAT,
-		"aborted frames transmitted", wandev->stats.tx_aborted_errors);
-	return 0;
-}
-
-static int wandev_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, wandev_show, PDE(inode)->data);
-}
-
-static const struct file_operations wandev_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = wandev_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release,
-	.unlocked_ioctl  = wanrouter_ioctl,
-};
-
-/*
- *	Initialize router proc interface.
- */
-
-int __init wanrouter_proc_init(void)
-{
-	struct proc_dir_entry *p;
-	proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net);
-	if (!proc_router)
-		goto fail;
-
-	p = proc_create("config", S_IRUGO, proc_router, &config_fops);
-	if (!p)
-		goto fail_config;
-	p = proc_create("status", S_IRUGO, proc_router, &status_fops);
-	if (!p)
-		goto fail_stat;
-	return 0;
-fail_stat:
-	remove_proc_entry("config", proc_router);
-fail_config:
-	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
-fail:
-	return -ENOMEM;
-}
-
-/*
- *	Clean up router proc interface.
- */
-
-void wanrouter_proc_cleanup(void)
-{
-	remove_proc_entry("config", proc_router);
-	remove_proc_entry("status", proc_router);
-	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
-}
-
-/*
- *	Add directory entry for WAN device.
- */
-
-int wanrouter_proc_add(struct wan_device* wandev)
-{
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-
-	wandev->dent = proc_create(wandev->name, S_IRUGO,
-				   proc_router, &wandev_fops);
-	if (!wandev->dent)
-		return -ENOMEM;
-	wandev->dent->data	= wandev;
-	return 0;
-}
-
-/*
- *	Delete directory entry for WAN device.
- */
-int wanrouter_proc_delete(struct wan_device* wandev)
-{
-	if (wandev->magic != ROUTER_MAGIC)
-		return -EINVAL;
-	remove_proc_entry(wandev->name, proc_router);
-	return 0;
-}
-
-#else
-
-/*
- *	No /proc - output stubs
- */
-
-int __init wanrouter_proc_init(void)
-{
-	return 0;
-}
-
-void wanrouter_proc_cleanup(void)
-{
-}
-
-int wanrouter_proc_add(struct wan_device *wandev)
-{
-	return 0;
-}
-
-int wanrouter_proc_delete(struct wan_device *wandev)
-{
-	return 0;
-}
-
-#endif
-
-/*
- *	End
- */
-
-- 
cgit v1.2.3


From e1d7ef1cc472de30995a50ecb9c7aa3361f985f9 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Wed, 30 Jan 2013 10:49:30 -0700
Subject: clocksource: always define CLOCKSOURCE_OF_DECLARE

This allows clocksource drivers that support both DT and non-DT to
always invoke macro CLOCKSOURCE_OF_DECLARE(), rather than wrapping
it in a #ifdef CONFIG_CLKSRC_OF, which simplifies their code.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 include/linux/clocksource.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7944f14ea947..27cfda427dd9 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -339,6 +339,8 @@ extern void clocksource_of_init(void);
 	static const struct of_device_id __clksrc_of_table_##name	\
 		__used __section(__clksrc_of_table)			\
 		 = { .compatible = compat, .data = fn };
+#else
+#define CLOCKSOURCE_OF_DECLARE(name, compat, fn)
 #endif
 
 #endif /* _LINUX_CLOCKSOURCE_H */
-- 
cgit v1.2.3


From 1b092092bf0e2e8b7af1c2a03f615b4e60b05d47 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 8 Jan 2013 09:26:49 -0500
Subject: SUNRPC: Pass a pointer to struct rpc_xprt to the connect callback

Avoid another RCU dereference by passing the pointer to struct rpc_xprt
from the caller.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h     | 2 +-
 net/sunrpc/xprt.c               | 2 +-
 net/sunrpc/xprtrdma/transport.c | 3 +--
 net/sunrpc/xprtsock.c           | 4 ++--
 4 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 951cb9b7d02b..7dd598a5c9aa 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -117,7 +117,7 @@ struct rpc_xprt_ops {
 	void		(*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*rpcbind)(struct rpc_task *task);
 	void		(*set_port)(struct rpc_xprt *xprt, unsigned short port);
-	void		(*connect)(struct rpc_task *task);
+	void		(*connect)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void *		(*buf_alloc)(struct rpc_task *task, size_t size);
 	void		(*buf_free)(void *buffer);
 	int		(*send_request)(struct rpc_task *task);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 738ad59628cd..774025109e2f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -724,7 +724,7 @@ void xprt_connect(struct rpc_task *task)
 		if (xprt_test_and_set_connecting(xprt))
 			return;
 		xprt->stat.connect_start = jiffies;
-		xprt->ops->connect(task);
+		xprt->ops->connect(xprt, task);
 	}
 }
 
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index b357c528d29c..d0074289708e 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -426,9 +426,8 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
 }
 
 static void
-xprt_rdma_connect(struct rpc_task *task)
+xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
 	if (r_xprt->rx_ep.rep_connected != 0) {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 25838901c97f..1897181d7438 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2205,6 +2205,7 @@ out:
 
 /**
  * xs_connect - connect a socket to a remote endpoint
+ * @xprt: pointer to transport structure
  * @task: address of RPC task that manages state of connect request
  *
  * TCP: If the remote end dropped the connection, delay reconnecting.
@@ -2216,9 +2217,8 @@ out:
  * If a UDP socket connect fails, the delay behavior here prevents
  * retry floods (hard mounts).
  */
-static void xs_connect(struct rpc_task *task)
+static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	struct rpc_xprt *xprt = task->tk_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
 	if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
-- 
cgit v1.2.3


From 6a24dfb645dbcb05b34d08b991d082bdaa3ff072 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 8 Jan 2013 09:48:15 -0500
Subject: SUNRPC: Pass pointers to struct rpc_xprt to the congestion window

Avoid access to task->tk_xprt

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h | 4 ++--
 net/sunrpc/xprt.c           | 6 +++---
 net/sunrpc/xprtsock.c       | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 7dd598a5c9aa..30834be03011 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -122,7 +122,7 @@ struct rpc_xprt_ops {
 	void		(*buf_free)(void *buffer);
 	int		(*send_request)(struct rpc_task *task);
 	void		(*set_retrans_timeout)(struct rpc_task *task);
-	void		(*timer)(struct rpc_task *task);
+	void		(*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*release_request)(struct rpc_task *task);
 	void		(*close)(struct rpc_xprt *xprt);
 	void		(*destroy)(struct rpc_xprt *xprt);
@@ -313,7 +313,7 @@ void			xprt_set_retrans_timeout_rtt(struct rpc_task *task);
 void			xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
 void			xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
 void			xprt_write_space(struct rpc_xprt *xprt);
-void			xprt_adjust_cwnd(struct rpc_task *task, int result);
+void			xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result);
 struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
 void			xprt_complete_rqst(struct rpc_task *task, int copied);
 void			xprt_release_rqst_cong(struct rpc_task *task);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 774025109e2f..e1e439ea177f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -438,15 +438,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
 
 /**
  * xprt_adjust_cwnd - adjust transport congestion window
+ * @xprt: pointer to xprt
  * @task: recently completed RPC request used to adjust window
  * @result: result code of completed RPC request
  *
  * We use a time-smoothed congestion estimator to avoid heavy oscillation.
  */
-void xprt_adjust_cwnd(struct rpc_task *task, int result)
+void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
-	struct rpc_xprt *xprt = task->tk_xprt;
 	unsigned long cwnd = xprt->cwnd;
 
 	if (result >= 0 && cwnd <= xprt->cong) {
@@ -834,7 +834,7 @@ static void xprt_timer(struct rpc_task *task)
 	spin_lock_bh(&xprt->transport_lock);
 	if (!req->rq_reply_bytes_recvd) {
 		if (xprt->ops->timer)
-			xprt->ops->timer(task);
+			xprt->ops->timer(xprt, task);
 	} else
 		task->tk_status = 0;
 	spin_unlock_bh(&xprt->transport_lock);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index f7d6e4f8c162..37cbda63f45c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1005,7 +1005,7 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 
 	UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
 
-	xprt_adjust_cwnd(task, copied);
+	xprt_adjust_cwnd(xprt, task, copied);
 	xprt_complete_rqst(task, copied);
 
  out_unlock:
@@ -1646,9 +1646,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
  *
  * Adjust the congestion window after a retransmit timeout has occurred.
  */
-static void xs_udp_timer(struct rpc_task *task)
+static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	xprt_adjust_cwnd(task, -ETIMEDOUT);
+	xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
 }
 
 static unsigned short xs_get_random_port(void)
-- 
cgit v1.2.3


From 77102893ae685270c1774fa8b7eead6ad93c838d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 8 Jan 2013 10:10:04 -0500
Subject: SUNRPC: Nuke the tk_xprt macro

It is no longer in use

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index b64f8eb0b973..84ca436b76c2 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -87,7 +87,6 @@ struct rpc_task {
 				tk_cred_retry : 2,
 				tk_rebind_retry : 2;
 };
-#define tk_xprt			tk_client->cl_xprt
 
 /* support walking a list of tasks on a wait queue */
 #define	task_for_each(task, pos, head) \
-- 
cgit v1.2.3


From 97cc019ee56d52005ea4544af17bef268c464862 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Fri, 1 Feb 2013 08:46:56 +0100
Subject: bcma: cc: fix (and rename) define of NAND flash type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma_driver_chipcommon.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 9a0e3fa3ca95..6a299f416288 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -27,7 +27,7 @@
 #define   BCMA_CC_FLASHT_NONE		0x00000000	/* No flash */
 #define   BCMA_CC_FLASHT_STSER		0x00000100	/* ST serial flash */
 #define   BCMA_CC_FLASHT_ATSER		0x00000200	/* Atmel serial flash */
-#define   BCMA_CC_FLASHT_NFLASH		0x00000200	/* NAND flash */
+#define   BCMA_CC_FLASHT_NAND		0x00000300	/* NAND flash */
 #define	  BCMA_CC_FLASHT_PARA		0x00000700	/* Parallel flash */
 #define  BCMA_CC_CAP_PLLT		0x00038000	/* PLL Type */
 #define   BCMA_PLLTYPE_NONE		0x00000000
-- 
cgit v1.2.3


From dc75eb36c3233409728e88acfd6b45a223856289 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Fri, 25 Jan 2013 09:23:09 -0300
Subject: mtd: omap-onenand: pass device_node in platform data

Pass an optional device_node pointer in the platform data,
which in turn will be put into a mtd_part_parser_data.
This way, code that sets up the platform devices can pass
along the node from DT so that the partitions can be parsed.

For non-DT boards, this change has no effect.

Acked-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/mtd/onenand/omap2.c                     | 4 +++-
 include/linux/platform_data/mtd-onenand-omap2.h | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index 065f3fe02a2f..eec2aedb4ab8 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -637,6 +637,7 @@ static int omap2_onenand_probe(struct platform_device *pdev)
 	struct onenand_chip *this;
 	int r;
 	struct resource *res;
+	struct mtd_part_parser_data ppdata = {};
 
 	pdata = pdev->dev.platform_data;
 	if (pdata == NULL) {
@@ -767,7 +768,8 @@ static int omap2_onenand_probe(struct platform_device *pdev)
 	if ((r = onenand_scan(&c->mtd, 1)) < 0)
 		goto err_release_regulator;
 
-	r = mtd_device_parse_register(&c->mtd, NULL, NULL,
+	ppdata.of_node = pdata->of_node;
+	r = mtd_device_parse_register(&c->mtd, NULL, &ppdata,
 				      pdata ? pdata->parts : NULL,
 				      pdata ? pdata->nr_parts : 0);
 	if (r)
diff --git a/include/linux/platform_data/mtd-onenand-omap2.h b/include/linux/platform_data/mtd-onenand-omap2.h
index 685af7e8b120..e9a9fb188f97 100644
--- a/include/linux/platform_data/mtd-onenand-omap2.h
+++ b/include/linux/platform_data/mtd-onenand-omap2.h
@@ -29,5 +29,8 @@ struct omap_onenand_platform_data {
 	u8			flags;
 	u8			regulator_can_sleep;
 	u8			skip_initial_unlocking;
+
+	/* for passing the partitions */
+	struct device_node	*of_node;
 };
 #endif
-- 
cgit v1.2.3


From b8eed8af94f9203e0cc39245ea335f4b8dc1ed31 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 14 Jan 2013 13:23:03 +0000
Subject: cpufreq: Simplify __cpufreq_remove_dev()

__cpufreq_remove_dev() is called on multiple occasions: cpufreq_driver
unregister and cpu removals.

Current implementation of this routine is overly complex without much need. If
the cpu to be removed is the policy->cpu, we remove the policy first and add all
other cpus again from policy->cpus and then finally call __cpufreq_remove_dev()
again to remove the cpu to be deleted. Haahhhh..

There exist a simple solution to removal of a cpu:
- Simply use the old policy structure
- update its fields like: policy->cpu, etc.
- notify any users of cpufreq, which depend on changing policy->cpu

Hence this patch, which tries to implement the above theory. It is tested well
by myself on ARM big.LITTLE TC2 SoC, which has 5 cores (2 A15 and 3 A7). Both
A15's share same struct policy and all A7's share same policy structure.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c       | 161 +++++++++++++++++-----------------------
 drivers/cpufreq/cpufreq_stats.c |  27 ++++++-
 drivers/cpufreq/freq_table.c    |   9 +++
 include/linux/cpufreq.h         |  14 ++--
 4 files changed, 113 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 034d1836884b..9af14a8bbcdb 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1036,6 +1036,25 @@ module_out:
 	return ret;
 }
 
+static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
+{
+	int j;
+
+	policy->last_cpu = policy->cpu;
+	policy->cpu = cpu;
+
+	for_each_cpu(j, policy->cpus) {
+		if (!cpu_online(j))
+			continue;
+		per_cpu(cpufreq_policy_cpu, j) = cpu;
+	}
+
+#ifdef CONFIG_CPU_FREQ_TABLE
+	cpufreq_frequency_table_update_policy_cpu(policy);
+#endif
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_UPDATE_POLICY_CPU, policy);
+}
 
 /**
  * __cpufreq_remove_dev - remove a CPU device
@@ -1046,132 +1065,92 @@ module_out:
  */
 static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
-	unsigned int cpu = dev->id;
+	unsigned int cpu = dev->id, ret, cpus;
 	unsigned long flags;
 	struct cpufreq_policy *data;
 	struct kobject *kobj;
 	struct completion *cmp;
-#ifdef CONFIG_SMP
 	struct device *cpu_dev;
-	unsigned int j;
-#endif
 
-	pr_debug("unregistering CPU %u\n", cpu);
+	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
 	spin_lock_irqsave(&cpufreq_driver_lock, flags);
 	data = per_cpu(cpufreq_cpu_data, cpu);
 
 	if (!data) {
+		pr_debug("%s: No cpu_data found\n", __func__);
 		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 		unlock_policy_rwsem_write(cpu);
 		return -EINVAL;
 	}
-	per_cpu(cpufreq_cpu_data, cpu) = NULL;
 
-#ifdef CONFIG_SMP
-	/* if this isn't the CPU which is the parent of the kobj, we
-	 * only need to unlink, put and exit
-	 */
-	if (unlikely(cpu != data->cpu)) {
-		pr_debug("removing link\n");
+	if (cpufreq_driver->target)
 		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
-		cpumask_clear_cpu(cpu, data->cpus);
-		spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-
-		__cpufreq_governor(data, CPUFREQ_GOV_START);
-		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
-
-		kobj = &dev->kobj;
-		cpufreq_cpu_put(data);
-		unlock_policy_rwsem_write(cpu);
-		sysfs_remove_link(kobj, "cpufreq");
-		return 0;
-	}
-#endif
-
-#ifdef CONFIG_SMP
 
 #ifdef CONFIG_HOTPLUG_CPU
 	strncpy(per_cpu(cpufreq_cpu_governor, cpu), data->governor->name,
 			CPUFREQ_NAME_LEN);
 #endif
 
-	/* if we have other CPUs still registered, we need to unlink them,
-	 * or else wait_for_completion below will lock up. Clean the
-	 * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
-	 * the sysfs links afterwards.
-	 */
-	if (unlikely(cpumask_weight(data->cpus) > 1)) {
-		for_each_cpu(j, data->cpus) {
-			if (j == cpu)
-				continue;
-			per_cpu(cpufreq_cpu_data, j) = NULL;
-		}
-	}
-
-	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	per_cpu(cpufreq_cpu_data, cpu) = NULL;
+	cpus = cpumask_weight(data->cpus);
+	cpumask_clear_cpu(cpu, data->cpus);
 
-	if (unlikely(cpumask_weight(data->cpus) > 1)) {
-		for_each_cpu(j, data->cpus) {
-			if (j == cpu)
-				continue;
-			pr_debug("removing link for cpu %u\n", j);
-#ifdef CONFIG_HOTPLUG_CPU
-			strncpy(per_cpu(cpufreq_cpu_governor, j),
-				data->governor->name, CPUFREQ_NAME_LEN);
-#endif
-			cpu_dev = get_cpu_device(j);
-			kobj = &cpu_dev->kobj;
+	if (unlikely((cpu == data->cpu) && (cpus > 1))) {
+		/* first sibling now owns the new sysfs dir */
+		cpu_dev = get_cpu_device(cpumask_first(data->cpus));
+		sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
+		ret = kobject_move(&data->kobj, &cpu_dev->kobj);
+		if (ret) {
+			pr_err("%s: Failed to move kobj: %d", __func__, ret);
+			cpumask_set_cpu(cpu, data->cpus);
+			ret = sysfs_create_link(&cpu_dev->kobj, &data->kobj,
+					"cpufreq");
+			spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 			unlock_policy_rwsem_write(cpu);
-			sysfs_remove_link(kobj, "cpufreq");
-			lock_policy_rwsem_write(cpu);
-			cpufreq_cpu_put(data);
+			return -EINVAL;
 		}
+
+		update_policy_cpu(data, cpu_dev->id);
+		pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
+				__func__, cpu_dev->id, cpu);
 	}
-#else
-	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
-#endif
 
-	if (cpufreq_driver->target)
-		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
+	spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	kobj = &data->kobj;
-	cmp = &data->kobj_unregister;
+	pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
+	cpufreq_cpu_put(data);
 	unlock_policy_rwsem_write(cpu);
-	kobject_put(kobj);
+	sysfs_remove_link(&dev->kobj, "cpufreq");
 
-	/* we need to make sure that the underlying kobj is actually
-	 * not referenced anymore by anybody before we proceed with
-	 * unloading.
-	 */
-	pr_debug("waiting for dropping of refcount\n");
-	wait_for_completion(cmp);
-	pr_debug("wait complete\n");
-
-	lock_policy_rwsem_write(cpu);
-	if (cpufreq_driver->exit)
-		cpufreq_driver->exit(data);
-	unlock_policy_rwsem_write(cpu);
+	/* If cpu is last user of policy, free policy */
+	if (cpus == 1) {
+		lock_policy_rwsem_write(cpu);
+		kobj = &data->kobj;
+		cmp = &data->kobj_unregister;
+		unlock_policy_rwsem_write(cpu);
+		kobject_put(kobj);
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* when the CPU which is the parent of the kobj is hotplugged
-	 * offline, check for siblings, and create cpufreq sysfs interface
-	 * and symlinks
-	 */
-	if (unlikely(cpumask_weight(data->cpus) > 1)) {
-		/* first sibling now owns the new sysfs dir */
-		cpumask_clear_cpu(cpu, data->cpus);
-		cpufreq_add_dev(get_cpu_device(cpumask_first(data->cpus)), NULL);
+		/* we need to make sure that the underlying kobj is actually
+		 * not referenced anymore by anybody before we proceed with
+		 * unloading.
+		 */
+		pr_debug("waiting for dropping of refcount\n");
+		wait_for_completion(cmp);
+		pr_debug("wait complete\n");
 
-		/* finally remove our own symlink */
 		lock_policy_rwsem_write(cpu);
-		__cpufreq_remove_dev(dev, sif);
-	}
-#endif
+		if (cpufreq_driver->exit)
+			cpufreq_driver->exit(data);
+		unlock_policy_rwsem_write(cpu);
 
-	free_cpumask_var(data->related_cpus);
-	free_cpumask_var(data->cpus);
-	kfree(data);
+		free_cpumask_var(data->related_cpus);
+		free_cpumask_var(data->cpus);
+		kfree(data);
+	} else if (cpufreq_driver->target) {
+		__cpufreq_governor(data, CPUFREQ_GOV_START);
+		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+	}
 
 	return 0;
 }
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 9d7732b81044..beef6b54382b 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -170,11 +170,13 @@ static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq)
 static void cpufreq_stats_free_table(unsigned int cpu)
 {
 	struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu);
+
 	if (stat) {
+		pr_debug("%s: Free stat table\n", __func__);
 		kfree(stat->time_in_state);
 		kfree(stat);
+		per_cpu(cpufreq_stats_table, cpu) = NULL;
 	}
-	per_cpu(cpufreq_stats_table, cpu) = NULL;
 }
 
 /* must be called early in the CPU removal sequence (before
@@ -183,8 +185,10 @@ static void cpufreq_stats_free_table(unsigned int cpu)
 static void cpufreq_stats_free_sysfs(unsigned int cpu)
 {
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-	if (policy && policy->cpu == cpu)
+	if (policy && (cpumask_weight(policy->cpus) == 1)) {
+		pr_debug("%s: Free sysfs stat\n", __func__);
 		sysfs_remove_group(&policy->kobj, &stats_attr_group);
+	}
 	if (policy)
 		cpufreq_cpu_put(policy);
 }
@@ -262,6 +266,19 @@ error_get_fail:
 	return ret;
 }
 
+static void cpufreq_stats_update_policy_cpu(struct cpufreq_policy *policy)
+{
+	struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table,
+			policy->last_cpu);
+
+	pr_debug("Updating stats_table for new_cpu %u from last_cpu %u\n",
+			policy->cpu, policy->last_cpu);
+	per_cpu(cpufreq_stats_table, policy->cpu) = per_cpu(cpufreq_stats_table,
+			policy->last_cpu);
+	per_cpu(cpufreq_stats_table, policy->last_cpu) = NULL;
+	stat->cpu = policy->cpu;
+}
+
 static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
 		unsigned long val, void *data)
 {
@@ -269,6 +286,12 @@ static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
 	struct cpufreq_policy *policy = data;
 	struct cpufreq_frequency_table *table;
 	unsigned int cpu = policy->cpu;
+
+	if (val == CPUFREQ_UPDATE_POLICY_CPU) {
+		cpufreq_stats_update_policy_cpu(policy);
+		return 0;
+	}
+
 	if (val != CPUFREQ_NOTIFY)
 		return 0;
 	table = cpufreq_frequency_get_table(cpu);
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 49cda256efb2..aa5bd39d129e 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -227,6 +227,15 @@ void cpufreq_frequency_table_put_attr(unsigned int cpu)
 }
 EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr);
 
+void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy)
+{
+	pr_debug("Updating show_table for new_cpu %u from last_cpu %u\n",
+			policy->cpu, policy->last_cpu);
+	per_cpu(cpufreq_show_table, policy->cpu) = per_cpu(cpufreq_show_table,
+			policy->last_cpu);
+	per_cpu(cpufreq_show_table, policy->last_cpu) = NULL;
+}
+
 struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu)
 {
 	return per_cpu(cpufreq_show_table, cpu);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index a55b88eaf96a..52be2d0c994a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -93,7 +93,9 @@ struct cpufreq_policy {
 	cpumask_var_t		related_cpus; /* CPUs with any coordination */
 	unsigned int		shared_type; /* ANY or ALL affected CPUs
 						should set cpufreq */
-	unsigned int		cpu;    /* cpu nr of registered CPU */
+	unsigned int		cpu;    /* cpu nr of CPU managing this policy */
+	unsigned int		last_cpu; /* cpu nr of previous CPU that managed
+					   * this policy */
 	struct cpufreq_cpuinfo	cpuinfo;/* see above */
 
 	unsigned int		min;    /* in kHz */
@@ -112,10 +114,11 @@ struct cpufreq_policy {
 	struct completion	kobj_unregister;
 };
 
-#define CPUFREQ_ADJUST		(0)
-#define CPUFREQ_INCOMPATIBLE	(1)
-#define CPUFREQ_NOTIFY		(2)
-#define CPUFREQ_START		(3)
+#define CPUFREQ_ADJUST			(0)
+#define CPUFREQ_INCOMPATIBLE		(1)
+#define CPUFREQ_NOTIFY			(2)
+#define CPUFREQ_START			(3)
+#define CPUFREQ_UPDATE_POLICY_CPU	(4)
 
 #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
 #define CPUFREQ_SHARED_TYPE_HW	 (1) /* HW does needed coordination */
@@ -405,6 +408,7 @@ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 
 void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 				      unsigned int cpu);
+void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From 9d95046e5d6afd6d7ae86fb71ab59c6faf0db8de Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 20 Jan 2013 10:24:28 +0000
Subject: cpufreq: Add a get_current_driver helper

Add a helper function to return cpufreq_driver->name.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 14 ++++++++++++++
 include/linux/cpufreq.h   |  1 +
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 2417576393a6..216c104d51ad 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1395,6 +1395,20 @@ static struct syscore_ops cpufreq_syscore_ops = {
 	.resume		= cpufreq_bp_resume,
 };
 
+/**
+ *	cpufreq_get_current_driver - return current driver's name
+ *
+ *	Return the name string of the currently loaded cpufreq driver
+ *	or NULL, if none.
+ */
+const char *cpufreq_get_current_driver(void)
+{
+	if (cpufreq_driver)
+		return cpufreq_driver->name;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
 
 /*********************************************************************
  *                     NOTIFIER LISTS INTERFACE                      *
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 52be2d0c994a..1f3a726640e8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -411,4 +411,5 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
+const char *cpufreq_get_current_driver(void);
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From ab45bd9bed36ad6c471b090bb8846ab7228fdf11 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 28 Jan 2013 14:50:39 +0100
Subject: cpufreq: Sort function prototypes properly

Move function prototypes to a place where they logically fit better.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1f3a726640e8..5fdc6c6e3f8a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -311,6 +311,9 @@ __ATTR(_name, 0444, show_##_name, NULL)
 static struct global_attr _name =		\
 __ATTR(_name, 0644, show_##_name, store_##_name)
 
+struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
+void cpufreq_cpu_put(struct cpufreq_policy *data);
+const char *cpufreq_get_current_driver(void);
 
 /*********************************************************************
  *                        CPUFREQ 2.6. INTERFACE                     *
@@ -400,8 +403,6 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 
 /* the following 3 funtions are for cpufreq core use only */
 struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu);
-struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu);
-void   cpufreq_cpu_put(struct cpufreq_policy *data);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
@@ -411,5 +412,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 void cpufreq_frequency_table_update_policy_cpu(struct cpufreq_policy *policy);
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
-const char *cpufreq_get_current_driver(void);
 #endif /* _LINUX_CPUFREQ_H */
-- 
cgit v1.2.3


From 300586778d405f0a4d1f6dc51fcfb4fed567d020 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Mon, 28 Jan 2013 16:13:14 +0000
Subject: ARM / highbank: add support for pl320 IPC

The pl320 IPC allows for interprocessor communication between the
highbank A9 and the EnergyCore Management Engine. The pl320 implements
a straightforward mailbox protocol.

Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-highbank/Kconfig |   2 +
 drivers/Kconfig                |   2 +
 drivers/Makefile               |   1 +
 drivers/mailbox/Kconfig        |  19 ++++
 drivers/mailbox/Makefile       |   1 +
 drivers/mailbox/pl320-ipc.c    | 199 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mailbox.h        |  17 ++++
 7 files changed, 241 insertions(+)
 create mode 100644 drivers/mailbox/Kconfig
 create mode 100644 drivers/mailbox/Makefile
 create mode 100644 drivers/mailbox/pl320-ipc.c
 create mode 100644 include/linux/mailbox.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig
index 551c97e87a78..2388085beaa2 100644
--- a/arch/arm/mach-highbank/Kconfig
+++ b/arch/arm/mach-highbank/Kconfig
@@ -11,5 +11,7 @@ config ARCH_HIGHBANK
 	select GENERIC_CLOCKEVENTS
 	select HAVE_ARM_SCU
 	select HAVE_SMP
+	select MAILBOX
+	select PL320_MBOX
 	select SPARSE_IRQ
 	select USE_OF
diff --git a/drivers/Kconfig b/drivers/Kconfig
index f5fb0722a63a..2b4e89ba15ad 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -134,6 +134,8 @@ source "drivers/hwspinlock/Kconfig"
 
 source "drivers/clocksource/Kconfig"
 
+source "drivers/mailbox/Kconfig"
+
 source "drivers/iommu/Kconfig"
 
 source "drivers/remoteproc/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 7863b9fee50b..a8d32f1094b4 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -130,6 +130,7 @@ obj-y				+= platform/
 #common clk code
 obj-y				+= clk/
 
+obj-$(CONFIG_MAILBOX)		+= mailbox/
 obj-$(CONFIG_HWSPINLOCK)	+= hwspinlock/
 obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_IOMMU_SUPPORT)	+= iommu/
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
new file mode 100644
index 000000000000..9545c9f03809
--- /dev/null
+++ b/drivers/mailbox/Kconfig
@@ -0,0 +1,19 @@
+menuconfig MAILBOX
+	bool "Mailbox Hardware Support"
+	help
+	  Mailbox is a framework to control hardware communication between
+	  on-chip processors through queued messages and interrupt driven
+	  signals. Say Y if your platform supports hardware mailboxes.
+
+if MAILBOX
+config PL320_MBOX
+	bool "ARM PL320 Mailbox"
+	depends on ARM_AMBA
+	help
+	  An implementation of the ARM PL320 Interprocessor Communication
+	  Mailbox (IPCM), tailored for the Calxeda Highbank. It is used to
+	  send short messages between Highbank's A9 cores and the EnergyCore
+	  Management Engine, primarily for cpufreq. Say Y here if you want
+	  to use the PL320 IPCM support.
+
+endif
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
new file mode 100644
index 000000000000..543ad6a79505
--- /dev/null
+++ b/drivers/mailbox/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
new file mode 100644
index 000000000000..c45b3aedafba
--- /dev/null
+++ b/drivers/mailbox/pl320-ipc.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2012 Calxeda, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/amba/bus.h>
+
+#include <linux/mailbox.h>
+
+#define IPCMxSOURCE(m)		((m) * 0x40)
+#define IPCMxDSET(m)		(((m) * 0x40) + 0x004)
+#define IPCMxDCLEAR(m)		(((m) * 0x40) + 0x008)
+#define IPCMxDSTATUS(m)		(((m) * 0x40) + 0x00C)
+#define IPCMxMODE(m)		(((m) * 0x40) + 0x010)
+#define IPCMxMSET(m)		(((m) * 0x40) + 0x014)
+#define IPCMxMCLEAR(m)		(((m) * 0x40) + 0x018)
+#define IPCMxMSTATUS(m)		(((m) * 0x40) + 0x01C)
+#define IPCMxSEND(m)		(((m) * 0x40) + 0x020)
+#define IPCMxDR(m, dr)		(((m) * 0x40) + ((dr) * 4) + 0x024)
+
+#define IPCMMIS(irq)		(((irq) * 8) + 0x800)
+#define IPCMRIS(irq)		(((irq) * 8) + 0x804)
+
+#define MBOX_MASK(n)		(1 << (n))
+#define IPC_TX_MBOX		1
+#define IPC_RX_MBOX		2
+
+#define CHAN_MASK(n)		(1 << (n))
+#define A9_SOURCE		1
+#define M3_SOURCE		0
+
+static void __iomem *ipc_base;
+static int ipc_irq;
+static DEFINE_MUTEX(ipc_m1_lock);
+static DECLARE_COMPLETION(ipc_completion);
+static ATOMIC_NOTIFIER_HEAD(ipc_notifier);
+
+static inline void set_destination(int source, int mbox)
+{
+	__raw_writel(CHAN_MASK(source), ipc_base + IPCMxDSET(mbox));
+	__raw_writel(CHAN_MASK(source), ipc_base + IPCMxMSET(mbox));
+}
+
+static inline void clear_destination(int source, int mbox)
+{
+	__raw_writel(CHAN_MASK(source), ipc_base + IPCMxDCLEAR(mbox));
+	__raw_writel(CHAN_MASK(source), ipc_base + IPCMxMCLEAR(mbox));
+}
+
+static void __ipc_send(int mbox, u32 *data)
+{
+	int i;
+	for (i = 0; i < 7; i++)
+		__raw_writel(data[i], ipc_base + IPCMxDR(mbox, i));
+	__raw_writel(0x1, ipc_base + IPCMxSEND(mbox));
+}
+
+static u32 __ipc_rcv(int mbox, u32 *data)
+{
+	int i;
+	for (i = 0; i < 7; i++)
+		data[i] = __raw_readl(ipc_base + IPCMxDR(mbox, i));
+	return data[1];
+}
+
+/* blocking implmentation from the A9 side, not usuable in interrupts! */
+int pl320_ipc_transmit(u32 *data)
+{
+	int ret;
+
+	mutex_lock(&ipc_m1_lock);
+
+	init_completion(&ipc_completion);
+	__ipc_send(IPC_TX_MBOX, data);
+	ret = wait_for_completion_timeout(&ipc_completion,
+					  msecs_to_jiffies(1000));
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+		goto out;
+	}
+
+	ret = __ipc_rcv(IPC_TX_MBOX, data);
+out:
+	mutex_unlock(&ipc_m1_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pl320_ipc_transmit);
+
+static irqreturn_t ipc_handler(int irq, void *dev)
+{
+	u32 irq_stat;
+	u32 data[7];
+
+	irq_stat = __raw_readl(ipc_base + IPCMMIS(1));
+	if (irq_stat & MBOX_MASK(IPC_TX_MBOX)) {
+		__raw_writel(0, ipc_base + IPCMxSEND(IPC_TX_MBOX));
+		complete(&ipc_completion);
+	}
+	if (irq_stat & MBOX_MASK(IPC_RX_MBOX)) {
+		__ipc_rcv(IPC_RX_MBOX, data);
+		atomic_notifier_call_chain(&ipc_notifier, data[0], data + 1);
+		__raw_writel(2, ipc_base + IPCMxSEND(IPC_RX_MBOX));
+	}
+
+	return IRQ_HANDLED;
+}
+
+int pl320_ipc_register_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&ipc_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(pl320_ipc_register_notifier);
+
+int pl320_ipc_unregister_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&ipc_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(pl320_ipc_unregister_notifier);
+
+static int __init pl320_probe(struct amba_device *adev,
+				const struct amba_id *id)
+{
+	int ret;
+
+	ipc_base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (ipc_base == NULL)
+		return -ENOMEM;
+
+	__raw_writel(0, ipc_base + IPCMxSEND(IPC_TX_MBOX));
+
+	ipc_irq = adev->irq[0];
+	ret = request_irq(ipc_irq, ipc_handler, 0, dev_name(&adev->dev), NULL);
+	if (ret < 0)
+		goto err;
+
+	/* Init slow mailbox */
+	__raw_writel(CHAN_MASK(A9_SOURCE),
+			ipc_base + IPCMxSOURCE(IPC_TX_MBOX));
+	__raw_writel(CHAN_MASK(M3_SOURCE),
+			ipc_base + IPCMxDSET(IPC_TX_MBOX));
+	__raw_writel(CHAN_MASK(M3_SOURCE) | CHAN_MASK(A9_SOURCE),
+		     ipc_base + IPCMxMSET(IPC_TX_MBOX));
+
+	/* Init receive mailbox */
+	__raw_writel(CHAN_MASK(M3_SOURCE),
+			ipc_base + IPCMxSOURCE(IPC_RX_MBOX));
+	__raw_writel(CHAN_MASK(A9_SOURCE),
+			ipc_base + IPCMxDSET(IPC_RX_MBOX));
+	__raw_writel(CHAN_MASK(M3_SOURCE) | CHAN_MASK(A9_SOURCE),
+		     ipc_base + IPCMxMSET(IPC_RX_MBOX));
+
+	return 0;
+err:
+	iounmap(ipc_base);
+	return ret;
+}
+
+static struct amba_id pl320_ids[] = {
+	{
+		.id	= 0x00041320,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl320_driver = {
+	.drv = {
+		.name	= "pl320",
+	},
+	.id_table	= pl320_ids,
+	.probe		= pl320_probe,
+};
+
+static int __init ipc_init(void)
+{
+	return amba_driver_register(&pl320_driver);
+}
+module_init(ipc_init);
diff --git a/include/linux/mailbox.h b/include/linux/mailbox.h
new file mode 100644
index 000000000000..5161f63ec1c8
--- /dev/null
+++ b/include/linux/mailbox.h
@@ -0,0 +1,17 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+int pl320_ipc_transmit(u32 *data);
+int pl320_ipc_register_notifier(struct notifier_block *nb);
+int pl320_ipc_unregister_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 951fc5f45836988c7df1d05c7f4658f331e7a920 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 31 Jan 2013 02:03:53 +0000
Subject: cpufreq: Update Documentation for cpus and related_cpus

Documentation related to cpus and related_cpus is confusing and not very clear.
Over that CPUFreq core has seen much changes recently. Lets update documentation
and comments for cpus and related_cpus.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/cpu-drivers.txt | 6 ++++++
 Documentation/cpu-freq/user-guide.txt  | 8 ++++----
 include/linux/cpufreq.h                | 6 ++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index c436096351f8..72f70b16d299 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -111,6 +111,12 @@ policy->governor		must contain the "default policy" for
 For setting some of these values, the frequency table helpers might be
 helpful. See the section 2 for more information on them.
 
+SMP systems normally have same clock source for a group of cpus. For these the
+.init() would be called only once for the first online cpu. Here the .init()
+routine must initialize policy->cpus with mask of all possible cpus (Online +
+Offline) that share the clock. Then the core would copy this mask onto
+policy->related_cpus and will reset policy->cpus to carry only online cpus.
+
 
 1.3 verify
 ------------
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index 04f6b32993e6..ff2f28332cc4 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -190,11 +190,11 @@ scaling_max_freq		show the current "policy limits" (in
 				first set scaling_max_freq, then
 				scaling_min_freq.
 
-affected_cpus :			List of CPUs that require software coordination
-				of frequency.
+affected_cpus :			List of Online CPUs that require software
+				coordination of frequency.
 
-related_cpus :			List of CPUs that need some sort of frequency
-				coordination, whether software or hardware.
+related_cpus :			List of Online + Offline CPUs that need software
+				coordination of frequency.
 
 scaling_driver :		Hardware driver for cpufreq.
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 5fdc6c6e3f8a..753b198750cf 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -89,8 +89,10 @@ struct cpufreq_real_policy {
 };
 
 struct cpufreq_policy {
-	cpumask_var_t		cpus;	/* CPUs requiring sw coordination */
-	cpumask_var_t		related_cpus; /* CPUs with any coordination */
+	/* CPUs sharing clock, require sw coordination */
+	cpumask_var_t		cpus;	/* Online CPUs only */
+	cpumask_var_t		related_cpus; /* Online + Offline CPUs */
+
 	unsigned int		shared_type; /* ANY or ALL affected CPUs
 						should set cpufreq */
 	unsigned int		cpu;    /* cpu nr of CPU managing this policy */
-- 
cgit v1.2.3


From 2624f90c16413990ecb0414400174a066319a9f5 Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@linaro.org>
Date: Thu, 31 Jan 2013 09:44:40 +0000
Subject: cpufreq: governors: implement generic policy_is_shared

Implement a generic helper function policy_is_shared() to replace the
current dbs_sw_coordinated_cpus() at cpufreq level, so that it can be
used by code other than cpufreq governors.

Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c         | 2 +-
 drivers/cpufreq/cpufreq_conservative.c | 2 +-
 drivers/cpufreq/cpufreq_governor.c     | 8 --------
 drivers/cpufreq/cpufreq_governor.h     | 1 -
 drivers/cpufreq/cpufreq_ondemand.c     | 2 +-
 drivers/cpufreq/cpufreq_stats.c        | 2 +-
 include/linux/cpufreq.h                | 5 +++++
 7 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 22f9b47c5c74..937bc286591f 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -734,7 +734,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 #ifdef CONFIG_SMP
 	dmi_check_system(sw_any_bug_dmi_table);
-	if (bios_with_sw_any_bug && cpumask_weight(policy->cpus) == 1) {
+	if (bios_with_sw_any_bug && !policy_is_shared(policy)) {
 		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
 		cpumask_copy(policy->cpus, cpu_core_mask(cpu));
 	}
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 5d8e8942ec97..653fb0652412 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -154,7 +154,7 @@ static void cs_dbs_timer(struct work_struct *work)
 	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
 			struct cs_cpu_dbs_info_s, cdbs.work.work);
 
-	if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) {
+	if (policy_is_shared(dbs_info->cdbs.cur_policy)) {
 		cs_timer_coordinated(dbs_info, dw);
 	} else {
 		mutex_lock(&dbs_info->cdbs.timer_mutex);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 46f96a4cebf9..67e235acf43b 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -161,14 +161,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 }
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 
-bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs)
-{
-	struct cpufreq_policy *policy = cdbs->cur_policy;
-
-	return cpumask_weight(policy->cpus) > 1;
-}
-EXPORT_SYMBOL_GPL(dbs_sw_coordinated_cpus);
-
 static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu,
 				  unsigned int sampling_rate)
 {
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index aaf073daa9fb..b72e628e7ed6 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -172,7 +172,6 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate)
 
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
-bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs);
 int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 		struct cpufreq_policy *policy, unsigned int event);
 #endif /* _CPUFREQ_GOVERNER_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 1017b90b902e..5ae84ffeafb3 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -277,7 +277,7 @@ static void od_dbs_timer(struct work_struct *work)
 	struct od_cpu_dbs_info_s *dbs_info =
 		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
 
-	if (dbs_sw_coordinated_cpus(&dbs_info->cdbs)) {
+	if (policy_is_shared(dbs_info->cdbs.cur_policy)) {
 		od_timer_coordinated(dbs_info, dw);
 	} else {
 		mutex_lock(&dbs_info->cdbs.timer_mutex);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index beef6b54382b..572124c6e36b 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -185,7 +185,7 @@ static void cpufreq_stats_free_table(unsigned int cpu)
 static void cpufreq_stats_free_sysfs(unsigned int cpu)
 {
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
-	if (policy && (cpumask_weight(policy->cpus) == 1)) {
+	if (policy && !policy_is_shared(policy)) {
 		pr_debug("%s: Free sysfs stat\n", __func__);
 		sysfs_remove_group(&policy->kobj, &stats_attr_group);
 	}
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 753b198750cf..feb360c8aa88 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -127,6 +127,11 @@ struct cpufreq_policy {
 #define CPUFREQ_SHARED_TYPE_ALL	 (2) /* All dependent CPUs should set freq */
 #define CPUFREQ_SHARED_TYPE_ANY	 (3) /* Freq can be set from any dependent CPU*/
 
+static inline bool policy_is_shared(struct cpufreq_policy *policy)
+{
+	return cpumask_weight(policy->cpus) > 1;
+}
+
 /******************** cpufreq transition notifiers *******************/
 
 #define CPUFREQ_PRECHANGE	(0)
-- 
cgit v1.2.3


From b394058f064848deac7a7cd6942b6521d7b3fe1d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 1 Feb 2013 05:42:58 +0000
Subject: cpufreq: governors: Reset tunables only for
 cpufreq_unregister_governor()

Currently, whenever governor->governor() is called for CPUFRREQ_GOV_START event
we reset few tunables of governor. Which isn't correct, as this routine is
called for every cpu hot-[un]plugging event. We should actually be resetting
these only when the governor module is removed and re-installed.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c          |  4 ++++
 drivers/cpufreq/cpufreq_governor.c | 24 ++++++++++++++++--------
 include/linux/cpufreq.h            |  1 +
 3 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1cea7a1eac13..0b4be4481433 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1562,6 +1562,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 						policy->cpu, event);
 	ret = policy->governor->governor(policy, event);
 
+	if (!policy->governor->initialized && (event == CPUFREQ_GOV_START))
+		policy->governor->initialized = 1;
+
 	/* we keep one module reference alive for
 			each CPU governed by this CPU */
 	if ((event != CPUFREQ_GOV_START) || ret)
@@ -1585,6 +1588,7 @@ int cpufreq_register_governor(struct cpufreq_governor *governor)
 
 	mutex_lock(&cpufreq_governor_mutex);
 
+	governor->initialized = 0;
 	err = -EBUSY;
 	if (__find_governor(governor->name) == NULL) {
 		err = 0;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 7aaa9b151940..79795c4bf611 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -254,11 +254,6 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 			return rc;
 		}
 
-		/* policy latency is in nS. Convert it to uS first */
-		latency = policy->cpuinfo.transition_latency / 1000;
-		if (latency == 0)
-			latency = 1;
-
 		/*
 		 * conservative does not implement micro like ondemand
 		 * governor, thus we are bound to jiffes/HZ
@@ -270,20 +265,33 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 			cpufreq_register_notifier(cs_ops->notifier_block,
 					CPUFREQ_TRANSITION_NOTIFIER);
 
-			dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
-				jiffies_to_usecs(10);
+			if (!policy->governor->initialized)
+				dbs_data->min_sampling_rate =
+					MIN_SAMPLING_RATE_RATIO *
+					jiffies_to_usecs(10);
 		} else {
 			od_dbs_info->rate_mult = 1;
 			od_dbs_info->sample_type = OD_NORMAL_SAMPLE;
 			od_ops->powersave_bias_init_cpu(cpu);
-			od_tuners->io_is_busy = od_ops->io_busy();
+
+			if (!policy->governor->initialized)
+				od_tuners->io_is_busy = od_ops->io_busy();
 		}
 
+		if (policy->governor->initialized)
+			goto unlock;
+
+		/* policy latency is in nS. Convert it to uS first */
+		latency = policy->cpuinfo.transition_latency / 1000;
+		if (latency == 0)
+			latency = 1;
+
 		/* Bring kernel and HW constraints together */
 		dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate,
 				MIN_LATENCY_MULTIPLIER * latency);
 		*sampling_rate = max(dbs_data->min_sampling_rate, latency *
 				LATENCY_MULTIPLIER);
+unlock:
 		mutex_unlock(&dbs_data->mutex);
 
 		/* Initiate timer time stamp */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index feb360c8aa88..6bf3f2d12c90 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -183,6 +183,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mu
 
 struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
+	int	initialized;
 	int	(*governor)	(struct cpufreq_policy *policy,
 				 unsigned int event);
 	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
-- 
cgit v1.2.3


From 62b36cc1c83aca1cd252772e82cbc5d9ef8ff25b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 1 Feb 2013 06:40:02 +0000
Subject: cpufreq: Remove unnecessary use of policy->shared_type

policy->shared_type field was added only for SoCs with ACPI support:

commit 3b2d99429e3386b6e2ac949fc72486509c8bbe36
Author: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date:   Wed Dec 14 15:05:00 2005 -0500

    P-state software coordination for ACPI core

    http://bugzilla.kernel.org/show_bug.cgi?id=5737

Many non-ACPI systems are filling this field by mistake, which makes its usage
confusing. Lets clean it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-tegra/cpu-tegra.c  | 1 -
 drivers/cpufreq/cpufreq-cpu0.c   | 1 -
 drivers/cpufreq/db8500-cpufreq.c | 2 --
 drivers/cpufreq/omap-cpufreq.c   | 4 +---
 include/linux/cpufreq.h          | 3 ++-
 5 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-tegra/cpu-tegra.c b/arch/arm/mach-tegra/cpu-tegra.c
index e7ddcb2b5261..a36a03d3c9a0 100644
--- a/arch/arm/mach-tegra/cpu-tegra.c
+++ b/arch/arm/mach-tegra/cpu-tegra.c
@@ -243,7 +243,6 @@ static int tegra_cpu_init(struct cpufreq_policy *policy)
 	/* FIXME: what's the actual transition time? */
 	policy->cpuinfo.transition_latency = 300 * 1000;
 
-	policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
 	cpumask_copy(policy->cpus, cpu_possible_mask);
 
 	if (policy->cpu == 0)
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index a108f66271ee..4e5b7fb8927c 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -146,7 +146,6 @@ static int cpu0_cpufreq_init(struct cpufreq_policy *policy)
 	 * share the clock and voltage and clock.  Use cpufreq affected_cpus
 	 * interface to have all CPUs scaled together.
 	 */
-	policy->shared_type = CPUFREQ_SHARED_TYPE_ANY;
 	cpumask_setall(policy->cpus);
 
 	cpufreq_frequency_table_get_attr(freq_table, policy->cpu);
diff --git a/drivers/cpufreq/db8500-cpufreq.c b/drivers/cpufreq/db8500-cpufreq.c
index e12dff601b8a..79a84860ea56 100644
--- a/drivers/cpufreq/db8500-cpufreq.c
+++ b/drivers/cpufreq/db8500-cpufreq.c
@@ -130,8 +130,6 @@ static int __cpuinit db8500_cpufreq_init(struct cpufreq_policy *policy)
 	/* policy sharing between dual CPUs */
 	cpumask_setall(policy->cpus);
 
-	policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-
 	return 0;
 }
 
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 97102b05843f..9128c07bafba 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -214,10 +214,8 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy)
 	 * interface to handle this scenario. Additional is_smp() check
 	 * is to keep SMP_ON_UP build working.
 	 */
-	if (is_smp()) {
-		policy->shared_type = CPUFREQ_SHARED_TYPE_ANY;
+	if (is_smp())
 		cpumask_setall(policy->cpus);
-	}
 
 	/* FIXME: what's the actual transition time? */
 	policy->cpuinfo.transition_latency = 300 * 1000;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 6bf3f2d12c90..a22944ca0526 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -93,7 +93,7 @@ struct cpufreq_policy {
 	cpumask_var_t		cpus;	/* Online CPUs only */
 	cpumask_var_t		related_cpus; /* Online + Offline CPUs */
 
-	unsigned int		shared_type; /* ANY or ALL affected CPUs
+	unsigned int		shared_type; /* ACPI: ANY or ALL affected CPUs
 						should set cpufreq */
 	unsigned int		cpu;    /* cpu nr of CPU managing this policy */
 	unsigned int		last_cpu; /* cpu nr of previous CPU that managed
@@ -122,6 +122,7 @@ struct cpufreq_policy {
 #define CPUFREQ_START			(3)
 #define CPUFREQ_UPDATE_POLICY_CPU	(4)
 
+/* Only for ACPI */
 #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
 #define CPUFREQ_SHARED_TYPE_HW	 (1) /* HW does needed coordination */
 #define CPUFREQ_SHARED_TYPE_ALL	 (2) /* All dependent CPUs should set freq */
-- 
cgit v1.2.3


From 9ea6cdac9ba5fbc65adde4dc6a3cbee1206508df Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 21 Dec 2012 01:43:55 -0800
Subject: leds: leds-pwm: Convert to use devm_get_pwm

Update the driver to use the new API for requesting pwm so we can take
advantage of the pwm_lookup table to find the correct pwm to be used for the
LED functionality.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/leds-pwm.c  | 19 ++++++-------------
 include/linux/leds_pwm.h |  2 +-
 2 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c
index 2157524f277c..351257c2a7b8 100644
--- a/drivers/leds/leds-pwm.c
+++ b/drivers/leds/leds-pwm.c
@@ -67,12 +67,11 @@ static int led_pwm_probe(struct platform_device *pdev)
 		cur_led = &pdata->leds[i];
 		led_dat = &leds_data[i];
 
-		led_dat->pwm = pwm_request(cur_led->pwm_id,
-				cur_led->name);
+		led_dat->pwm = devm_pwm_get(&pdev->dev, cur_led->name);
 		if (IS_ERR(led_dat->pwm)) {
 			ret = PTR_ERR(led_dat->pwm);
-			dev_err(&pdev->dev, "unable to request PWM %d\n",
-					cur_led->pwm_id);
+			dev_err(&pdev->dev, "unable to request PWM for %s\n",
+				cur_led->name);
 			goto err;
 		}
 
@@ -86,10 +85,8 @@ static int led_pwm_probe(struct platform_device *pdev)
 		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
 
 		ret = led_classdev_register(&pdev->dev, &led_dat->cdev);
-		if (ret < 0) {
-			pwm_free(led_dat->pwm);
+		if (ret < 0)
 			goto err;
-		}
 	}
 
 	platform_set_drvdata(pdev, leds_data);
@@ -98,10 +95,8 @@ static int led_pwm_probe(struct platform_device *pdev)
 
 err:
 	if (i > 0) {
-		for (i = i - 1; i >= 0; i--) {
+		for (i = i - 1; i >= 0; i--)
 			led_classdev_unregister(&leds_data[i].cdev);
-			pwm_free(leds_data[i].pwm);
-		}
 	}
 
 	return ret;
@@ -115,10 +110,8 @@ static int led_pwm_remove(struct platform_device *pdev)
 
 	leds_data = platform_get_drvdata(pdev);
 
-	for (i = 0; i < pdata->num_leds; i++) {
+	for (i = 0; i < pdata->num_leds; i++)
 		led_classdev_unregister(&leds_data[i].cdev);
-		pwm_free(leds_data[i].pwm);
-	}
 
 	return 0;
 }
diff --git a/include/linux/leds_pwm.h b/include/linux/leds_pwm.h
index 33a071167489..a65e9646e4b1 100644
--- a/include/linux/leds_pwm.h
+++ b/include/linux/leds_pwm.h
@@ -7,7 +7,7 @@
 struct led_pwm {
 	const char	*name;
 	const char	*default_trigger;
-	unsigned	pwm_id;
+	unsigned	pwm_id __deprecated;
 	u8 		active_low;
 	unsigned 	max_brightness;
 	unsigned	pwm_period_ns;
-- 
cgit v1.2.3


From d4c0c4705bef1134339b038872ece3705a2783e0 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 21 Dec 2012 01:43:57 -0800
Subject: pwm: Correct parameter name in header for *pwm_get() functions

To synchronize the header file definition and the actual code. In the code
the consumer parameter is named as con_id, change the header file and replace
consumer -> con_id in the parameter list.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Ackedy-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 include/linux/pwm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 6d661f32e0e4..cc908a5396f8 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -174,10 +174,10 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
 struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
 		const struct of_phandle_args *args);
 
-struct pwm_device *pwm_get(struct device *dev, const char *consumer);
+struct pwm_device *pwm_get(struct device *dev, const char *con_id);
 void pwm_put(struct pwm_device *pwm);
 
-struct pwm_device *devm_pwm_get(struct device *dev, const char *consumer);
+struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id);
 void devm_pwm_put(struct device *dev, struct pwm_device *pwm);
 #else
 static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
-- 
cgit v1.2.3


From 8eb9612799605a7988d1c97cdc5980a5b8f04c56 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 21 Dec 2012 01:43:58 -0800
Subject: pwm: core: Rename of_pwm_request() to of_pwm_get() and export it

Allow client driver to use of_pwm_get() to get the PWM they need. This
is needed for drivers which handle more than one PWM separately, like
leds-pwm driver, which have:

pwmleds {
	compatible = "pwm-leds";
	kpad {
		label = "omap4::keypad";
		pwms = <&twl_pwm 0 7812500>;
		max-brightness = <127>;
	};

	charging {
		label = "omap4:green:chrg";
		pwms = <&twl_pwmled 0 7812500>;
		max-brightness = <255>;
	};
};

in the dts files.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/pwm/core.c  | 8 ++++----
 include/linux/pwm.h | 7 +++++++
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 903138b18842..3cb741dc2038 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -471,7 +471,7 @@ static struct pwm_chip *of_node_to_pwmchip(struct device_node *np)
 }
 
 /**
- * of_pwm_request() - request a PWM via the PWM framework
+ * of_pwm_get() - request a PWM via the PWM framework
  * @np: device node to get the PWM from
  * @con_id: consumer name
  *
@@ -486,8 +486,7 @@ static struct pwm_chip *of_node_to_pwmchip(struct device_node *np)
  * becomes mandatory for devices that look up the PWM device via the con_id
  * parameter.
  */
-static struct pwm_device *of_pwm_request(struct device_node *np,
-					 const char *con_id)
+struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id)
 {
 	struct pwm_device *pwm = NULL;
 	struct of_phandle_args args;
@@ -545,6 +544,7 @@ put:
 
 	return pwm;
 }
+EXPORT_SYMBOL_GPL(of_pwm_get);
 
 /**
  * pwm_add_table() - register PWM device consumers
@@ -587,7 +587,7 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
 
 	/* look up via DT first */
 	if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node)
-		return of_pwm_request(dev->of_node, con_id);
+		return of_pwm_get(dev->of_node, con_id);
 
 	/*
 	 * We look up the provider in the static table typically provided by
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index cc908a5396f8..76a1959f2b23 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -175,6 +175,7 @@ struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
 		const struct of_phandle_args *args);
 
 struct pwm_device *pwm_get(struct device *dev, const char *con_id);
+struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id);
 void pwm_put(struct pwm_device *pwm);
 
 struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id);
@@ -213,6 +214,12 @@ static inline struct pwm_device *pwm_get(struct device *dev,
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct pwm_device *of_pwm_get(struct device_node *np,
+					    const char *con_id)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline void pwm_put(struct pwm_device *pwm)
 {
 }
-- 
cgit v1.2.3


From 261a5edd3ac77ecb4b33310a1dd1ed8d656f0569 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 21 Dec 2012 01:43:59 -0800
Subject: pwm: Add devm_of_pwm_get() as exported API for users

When booted with DT users can use devm version of of_pwm_get() to benefit
from automatic resource release.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/pwm/core.c  | 30 ++++++++++++++++++++++++++++++
 include/linux/pwm.h |  9 +++++++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 3cb741dc2038..4a13da48fefe 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -708,6 +708,36 @@ struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id)
 }
 EXPORT_SYMBOL_GPL(devm_pwm_get);
 
+/**
+ * devm_of_pwm_get() - resource managed of_pwm_get()
+ * @dev: device for PWM consumer
+ * @np: device node to get the PWM from
+ * @con_id: consumer name
+ *
+ * This function performs like of_pwm_get() but the acquired PWM device will
+ * automatically be released on driver detach.
+ */
+struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
+				   const char *con_id)
+{
+	struct pwm_device **ptr, *pwm;
+
+	ptr = devres_alloc(devm_pwm_release, sizeof(**ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	pwm = of_pwm_get(np, con_id);
+	if (!IS_ERR(pwm)) {
+		*ptr = pwm;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return pwm;
+}
+EXPORT_SYMBOL_GPL(devm_of_pwm_get);
+
 static int devm_pwm_match(struct device *dev, void *res, void *data)
 {
 	struct pwm_device **p = res;
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 76a1959f2b23..70655a205b74 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -179,6 +179,8 @@ struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id);
 void pwm_put(struct pwm_device *pwm);
 
 struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id);
+struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
+				   const char *con_id);
 void devm_pwm_put(struct device *dev, struct pwm_device *pwm);
 #else
 static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
@@ -230,6 +232,13 @@ static inline struct pwm_device *devm_pwm_get(struct device *dev,
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct pwm_device *devm_of_pwm_get(struct device *dev,
+						 struct device_node *np,
+						 const char *con_id)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline void devm_pwm_put(struct device *dev, struct pwm_device *pwm)
 {
 }
-- 
cgit v1.2.3


From 9dbf091da080508e9f632d307f357beb79a0766b Mon Sep 17 00:00:00 2001
From: Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
Date: Fri, 1 Feb 2013 08:51:00 +0000
Subject: iio: gyro: Add itg3200

This patch adds support for the InvenSense itg3200.
The itg3200 is a three-axis gyro with 16-bit ADC and
I2C interface.

Signed-off-by: Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/gyro/Kconfig          |   8 +
 drivers/iio/gyro/Makefile         |   4 +
 drivers/iio/gyro/itg3200_buffer.c | 156 +++++++++++++++
 drivers/iio/gyro/itg3200_core.c   | 401 ++++++++++++++++++++++++++++++++++++++
 include/linux/iio/gyro/itg3200.h  | 154 +++++++++++++++
 5 files changed, 723 insertions(+)
 create mode 100644 drivers/iio/gyro/itg3200_buffer.c
 create mode 100644 drivers/iio/gyro/itg3200_core.c
 create mode 100644 include/linux/iio/gyro/itg3200.h

(limited to 'include/linux')

diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig
index 51ac6bc791c6..6be4628faffe 100644
--- a/drivers/iio/gyro/Kconfig
+++ b/drivers/iio/gyro/Kconfig
@@ -70,4 +70,12 @@ config IIO_ST_GYRO_SPI_3AXIS
 	depends on IIO_ST_GYRO_3AXIS
 	depends on IIO_ST_SENSORS_SPI
 
+config ITG3200
+	tristate "InvenSense ITG3200 Digital 3-Axis Gyroscope I2C driver"
+	depends on I2C
+	select IIO_TRIGGERED_BUFFER if IIO_BUFFER
+	help
+	  Say yes here to add support for the InvenSense ITG3200 digital
+	  3-axis gyroscope sensor.
+
 endmenu
diff --git a/drivers/iio/gyro/Makefile b/drivers/iio/gyro/Makefile
index eff9bb0119a2..225d289082e6 100644
--- a/drivers/iio/gyro/Makefile
+++ b/drivers/iio/gyro/Makefile
@@ -8,6 +8,10 @@ obj-$(CONFIG_ADXRS450) += adxrs450.o
 
 obj-$(CONFIG_HID_SENSOR_GYRO_3D) += hid-sensor-gyro-3d.o
 
+itg3200-y               := itg3200_core.o
+itg3200-$(CONFIG_IIO_BUFFER) += itg3200_buffer.o
+obj-$(CONFIG_ITG3200)   += itg3200.o
+
 obj-$(CONFIG_IIO_ST_GYRO_3AXIS) += st_gyro.o
 st_gyro-y := st_gyro_core.o
 st_gyro-$(CONFIG_IIO_BUFFER) += st_gyro_buffer.o
diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c
new file mode 100644
index 000000000000..f667d2c8c00f
--- /dev/null
+++ b/drivers/iio/gyro/itg3200_buffer.c
@@ -0,0 +1,156 @@
+/*
+ * itg3200_buffer.c -- support InvenSense ITG3200
+ *                     Digital 3-Axis Gyroscope driver
+ *
+ * Copyright (c) 2011 Christian Strobel <christian.strobel@iis.fraunhofer.de>
+ * Copyright (c) 2011 Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
+ * Copyright (c) 2012 Thorsten Nowak <thorsten.nowak@iis.fraunhofer.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/iio/gyro/itg3200.h>
+
+
+static int itg3200_read_all_channels(struct i2c_client *i2c, __be16 *buf)
+{
+	u8 tx = 0x80 | ITG3200_REG_TEMP_OUT_H;
+	struct i2c_msg msg[2] = {
+		{
+			.addr = i2c->addr,
+			.flags = i2c->flags,
+			.len = 1,
+			.buf = &tx,
+		},
+		{
+			.addr = i2c->addr,
+			.flags = i2c->flags | I2C_M_RD,
+			.len = ITG3200_SCAN_ELEMENTS * sizeof(s16),
+			.buf = (char *)&buf,
+		},
+	};
+
+	return i2c_transfer(i2c->adapter, msg, 2);
+}
+
+static irqreturn_t itg3200_trigger_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct itg3200 *st = iio_priv(indio_dev);
+	__be16 buf[ITG3200_SCAN_ELEMENTS + sizeof(s64)/sizeof(u16)];
+
+	int ret = itg3200_read_all_channels(st->i2c, buf);
+	if (ret < 0)
+		goto error_ret;
+
+	if (indio_dev->scan_timestamp)
+		memcpy(buf + indio_dev->scan_bytes - sizeof(s64),
+				&pf->timestamp, sizeof(pf->timestamp));
+
+	iio_push_to_buffers(indio_dev, (u8 *)buf);
+	iio_trigger_notify_done(indio_dev->trig);
+
+error_ret:
+	return IRQ_HANDLED;
+}
+
+int itg3200_buffer_configure(struct iio_dev *indio_dev)
+{
+	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+		itg3200_trigger_handler, NULL);
+}
+
+void itg3200_buffer_unconfigure(struct iio_dev *indio_dev)
+{
+	iio_triggered_buffer_cleanup(indio_dev);
+}
+
+
+static int itg3200_data_rdy_trigger_set_state(struct iio_trigger *trig,
+		bool state)
+{
+	struct iio_dev *indio_dev = trig->private_data;
+	int ret;
+	u8 msc;
+
+	ret = itg3200_read_reg_8(indio_dev, ITG3200_REG_IRQ_CONFIG, &msc);
+	if (ret)
+		goto error_ret;
+
+	if (state)
+		msc |= ITG3200_IRQ_DATA_RDY_ENABLE;
+	else
+		msc &= ~ITG3200_IRQ_DATA_RDY_ENABLE;
+
+	ret = itg3200_write_reg_8(indio_dev, ITG3200_REG_IRQ_CONFIG, msc);
+	if (ret)
+		goto error_ret;
+
+error_ret:
+	return ret;
+
+}
+
+static const struct iio_trigger_ops itg3200_trigger_ops = {
+	.owner = THIS_MODULE,
+	.set_trigger_state = &itg3200_data_rdy_trigger_set_state,
+};
+
+int itg3200_probe_trigger(struct iio_dev *indio_dev)
+{
+	int ret;
+	struct itg3200 *st = iio_priv(indio_dev);
+
+	st->trig = iio_trigger_alloc("%s-dev%d", indio_dev->name,
+				     indio_dev->id);
+	if (!st->trig)
+		return -ENOMEM;
+
+	ret = request_irq(st->i2c->irq,
+			  &iio_trigger_generic_data_rdy_poll,
+			  IRQF_TRIGGER_RISING,
+			  "itg3200_data_rdy",
+			  st->trig);
+	if (ret)
+		goto error_free_trig;
+
+
+	st->trig->dev.parent = &st->i2c->dev;
+	st->trig->ops = &itg3200_trigger_ops;
+	st->trig->private_data = indio_dev;
+	ret = iio_trigger_register(st->trig);
+	if (ret)
+		goto error_free_irq;
+
+	/* select default trigger */
+	indio_dev->trig = st->trig;
+
+	return 0;
+
+error_free_irq:
+	free_irq(st->i2c->irq, st->trig);
+error_free_trig:
+	iio_trigger_free(st->trig);
+	return ret;
+}
+
+void itg3200_remove_trigger(struct iio_dev *indio_dev)
+{
+	struct itg3200 *st = iio_priv(indio_dev);
+
+	iio_trigger_unregister(st->trig);
+	free_irq(st->i2c->irq, st->trig);
+	iio_trigger_free(st->trig);
+}
diff --git a/drivers/iio/gyro/itg3200_core.c b/drivers/iio/gyro/itg3200_core.c
new file mode 100644
index 000000000000..df2e6aa5d73b
--- /dev/null
+++ b/drivers/iio/gyro/itg3200_core.c
@@ -0,0 +1,401 @@
+/*
+ * itg3200_core.c -- support InvenSense ITG3200
+ *                   Digital 3-Axis Gyroscope driver
+ *
+ * Copyright (c) 2011 Christian Strobel <christian.strobel@iis.fraunhofer.de>
+ * Copyright (c) 2011 Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
+ * Copyright (c) 2012 Thorsten Nowak <thorsten.nowak@iis.fraunhofer.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * TODO:
+ * - Support digital low pass filter
+ * - Support power management
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/events.h>
+#include <linux/iio/buffer.h>
+
+#include <linux/iio/gyro/itg3200.h>
+
+
+int itg3200_write_reg_8(struct iio_dev *indio_dev,
+		u8 reg_address, u8 val)
+{
+	struct itg3200 *st = iio_priv(indio_dev);
+
+	return i2c_smbus_write_byte_data(st->i2c, 0x80 | reg_address, val);
+}
+
+int itg3200_read_reg_8(struct iio_dev *indio_dev,
+		u8 reg_address, u8 *val)
+{
+	struct itg3200 *st = iio_priv(indio_dev);
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(st->i2c, reg_address);
+	if (ret < 0)
+		return ret;
+	*val = ret;
+	return 0;
+}
+
+static int itg3200_read_reg_s16(struct iio_dev *indio_dev, u8 lower_reg_address,
+		int *val)
+{
+	struct itg3200 *st = iio_priv(indio_dev);
+	struct i2c_client *client = st->i2c;
+	int ret;
+	s16 out;
+
+	struct i2c_msg msg[2] = {
+		{
+			.addr = client->addr,
+			.flags = client->flags,
+			.len = 1,
+			.buf = (char *)&lower_reg_address,
+		},
+		{
+			.addr = client->addr,
+			.flags = client->flags | I2C_M_RD,
+			.len = 2,
+			.buf = (char *)&out,
+		},
+	};
+
+	lower_reg_address |= 0x80;
+	ret = i2c_transfer(client->adapter, msg, 2);
+	be16_to_cpus(&out);
+	*val = out;
+
+	return (ret == 2) ? 0 : ret;
+}
+
+static int itg3200_read_raw(struct iio_dev *indio_dev,
+		const struct iio_chan_spec *chan,
+		int *val, int *val2, long info)
+{
+	int ret = 0;
+	u8 reg;
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		reg = (u8)chan->address;
+		ret = itg3200_read_reg_s16(indio_dev, reg, val);
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SCALE:
+		*val = 0;
+		if (chan->type == IIO_TEMP)
+			*val2 = 1000000000/280;
+		else
+			*val2 = 1214142; /* (1 / 14,375) * (PI / 180) */
+		return IIO_VAL_INT_PLUS_NANO;
+	case IIO_CHAN_INFO_OFFSET:
+		/* Only the temperature channel has an offset */
+		*val = 23000;
+		return IIO_VAL_INT;
+	default:
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static ssize_t itg3200_read_frequency(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	int ret, sps;
+	u8 val;
+
+	ret = itg3200_read_reg_8(indio_dev, ITG3200_REG_DLPF, &val);
+	if (ret)
+		return ret;
+
+	sps = (val & ITG3200_DLPF_CFG_MASK) ? 1000 : 8000;
+
+	ret = itg3200_read_reg_8(indio_dev, ITG3200_REG_SAMPLE_RATE_DIV, &val);
+	if (ret)
+		return ret;
+
+	sps /= val + 1;
+
+	return sprintf(buf, "%d\n", sps);
+}
+
+static ssize_t itg3200_write_frequency(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t len)
+{
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	unsigned val;
+	int ret;
+	u8 t;
+
+	ret = kstrtouint(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&indio_dev->mlock);
+
+	ret = itg3200_read_reg_8(indio_dev, ITG3200_REG_DLPF, &t);
+	if (ret)
+		goto err_ret;
+
+	if (val == 0) {
+		ret = -EINVAL;
+		goto err_ret;
+	}
+	t = ((t & ITG3200_DLPF_CFG_MASK) ? 1000u : 8000u) / val - 1;
+
+	ret = itg3200_write_reg_8(indio_dev, ITG3200_REG_SAMPLE_RATE_DIV, t);
+
+err_ret:
+	mutex_unlock(&indio_dev->mlock);
+
+	return ret ? ret : len;
+}
+
+/*
+ * Reset device and internal registers to the power-up-default settings
+ * Use the gyro clock as reference, as suggested by the datasheet
+ */
+static int itg3200_reset(struct iio_dev *indio_dev)
+{
+	struct itg3200 *st = iio_priv(indio_dev);
+	int ret;
+
+	dev_dbg(&st->i2c->dev, "reset device");
+
+	ret = itg3200_write_reg_8(indio_dev,
+			ITG3200_REG_POWER_MANAGEMENT,
+			ITG3200_RESET);
+	if (ret) {
+		dev_err(&st->i2c->dev, "error resetting device");
+		goto error_ret;
+	}
+
+	/* Wait for PLL (1ms according to datasheet) */
+	udelay(1500);
+
+	ret = itg3200_write_reg_8(indio_dev,
+			ITG3200_REG_IRQ_CONFIG,
+			ITG3200_IRQ_ACTIVE_HIGH |
+			ITG3200_IRQ_PUSH_PULL |
+			ITG3200_IRQ_LATCH_50US_PULSE |
+			ITG3200_IRQ_LATCH_CLEAR_ANY);
+
+	if (ret)
+		dev_err(&st->i2c->dev, "error init device");
+
+error_ret:
+	return ret;
+}
+
+/* itg3200_enable_full_scale() - Disables the digital low pass filter */
+static int itg3200_enable_full_scale(struct iio_dev *indio_dev)
+{
+	u8 val;
+	int ret;
+
+	ret = itg3200_read_reg_8(indio_dev, ITG3200_REG_DLPF, &val);
+	if (ret)
+		goto err_ret;
+
+	val |= ITG3200_DLPF_FS_SEL_2000;
+	return itg3200_write_reg_8(indio_dev, ITG3200_REG_DLPF, val);
+
+err_ret:
+	return ret;
+}
+
+static int itg3200_initial_setup(struct iio_dev *indio_dev)
+{
+	struct itg3200 *st = iio_priv(indio_dev);
+	int ret;
+	u8 val;
+
+	ret = itg3200_read_reg_8(indio_dev, ITG3200_REG_ADDRESS, &val);
+	if (ret)
+		goto err_ret;
+
+	if (((val >> 1) & 0x3f) != 0x34) {
+		dev_err(&st->i2c->dev, "invalid reg value 0x%02x", val);
+		ret = -ENXIO;
+		goto err_ret;
+	}
+
+	ret = itg3200_reset(indio_dev);
+	if (ret)
+		goto err_ret;
+
+	ret = itg3200_enable_full_scale(indio_dev);
+err_ret:
+	return ret;
+}
+
+#define ITG3200_TEMP_INFO_MASK	(IIO_CHAN_INFO_OFFSET_SHARED_BIT | \
+				 IIO_CHAN_INFO_SCALE_SHARED_BIT | \
+				 IIO_CHAN_INFO_RAW_SEPARATE_BIT)
+#define ITG3200_GYRO_INFO_MASK	(IIO_CHAN_INFO_SCALE_SHARED_BIT | \
+				 IIO_CHAN_INFO_RAW_SEPARATE_BIT)
+
+#define ITG3200_ST						\
+	{ .sign = 's', .realbits = 16, .storagebits = 16, .endianness = IIO_BE }
+
+#define ITG3200_GYRO_CHAN(_mod) { \
+	.type = IIO_ANGL_VEL, \
+	.modified = 1, \
+	.channel2 = IIO_MOD_ ## _mod, \
+	.info_mask = ITG3200_GYRO_INFO_MASK, \
+	.address = ITG3200_REG_GYRO_ ## _mod ## OUT_H, \
+	.scan_index = ITG3200_SCAN_GYRO_ ## _mod, \
+	.scan_type = ITG3200_ST, \
+}
+
+static const struct iio_chan_spec itg3200_channels[] = {
+	{
+		.type = IIO_TEMP,
+		.channel2 = IIO_NO_MOD,
+		.info_mask = ITG3200_TEMP_INFO_MASK,
+		.address = ITG3200_REG_TEMP_OUT_H,
+		.scan_index = ITG3200_SCAN_TEMP,
+		.scan_type = ITG3200_ST,
+	},
+	ITG3200_GYRO_CHAN(X),
+	ITG3200_GYRO_CHAN(Y),
+	ITG3200_GYRO_CHAN(Z),
+	IIO_CHAN_SOFT_TIMESTAMP(ITG3200_SCAN_ELEMENTS),
+};
+
+/* IIO device attributes */
+static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO, itg3200_read_frequency,
+		itg3200_write_frequency);
+
+static struct attribute *itg3200_attributes[] = {
+	&iio_dev_attr_sampling_frequency.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group itg3200_attribute_group = {
+	.attrs = itg3200_attributes,
+};
+
+static const struct iio_info itg3200_info = {
+	.attrs = &itg3200_attribute_group,
+	.read_raw = &itg3200_read_raw,
+	.driver_module = THIS_MODULE,
+};
+
+static const unsigned long itg3200_available_scan_masks[] = { 0xffffffff, 0x0 };
+
+static int itg3200_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	int ret;
+	struct itg3200 *st;
+	struct iio_dev *indio_dev;
+
+	dev_dbg(&client->dev, "probe I2C dev with IRQ %i", client->irq);
+
+	indio_dev = iio_device_alloc(sizeof(*st));
+	if (indio_dev == NULL) {
+		ret =  -ENOMEM;
+		goto error_ret;
+	}
+
+	st = iio_priv(indio_dev);
+
+	i2c_set_clientdata(client, indio_dev);
+	st->i2c = client;
+
+	indio_dev->dev.parent = &client->dev;
+	indio_dev->name = client->dev.driver->name;
+	indio_dev->channels = itg3200_channels;
+	indio_dev->num_channels = ARRAY_SIZE(itg3200_channels);
+	indio_dev->available_scan_masks = itg3200_available_scan_masks;
+	indio_dev->info = &itg3200_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+
+	ret = itg3200_buffer_configure(indio_dev);
+	if (ret)
+		goto error_free_dev;
+
+	if (client->irq) {
+		ret = itg3200_probe_trigger(indio_dev);
+		if (ret)
+			goto error_unconfigure_buffer;
+	}
+
+	ret = itg3200_initial_setup(indio_dev);
+	if (ret)
+		goto error_remove_trigger;
+
+	ret = iio_device_register(indio_dev);
+	if (ret)
+		goto error_remove_trigger;
+
+	return 0;
+
+error_remove_trigger:
+	if (client->irq)
+		itg3200_remove_trigger(indio_dev);
+error_unconfigure_buffer:
+	itg3200_buffer_unconfigure(indio_dev);
+error_free_dev:
+	iio_device_free(indio_dev);
+error_ret:
+	return ret;
+}
+
+static int itg3200_remove(struct i2c_client *client)
+{
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+
+	iio_device_unregister(indio_dev);
+
+	if (client->irq)
+		itg3200_remove_trigger(indio_dev);
+
+	itg3200_buffer_unconfigure(indio_dev);
+
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+
+static const struct i2c_device_id itg3200_id[] = {
+	{ "itg3200", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, itg3200_id);
+
+static struct i2c_driver itg3200_driver = {
+	.driver = {
+		.owner  = THIS_MODULE,
+		.name	= "itg3200",
+	},
+	.id_table	= itg3200_id,
+	.probe		= itg3200_probe,
+	.remove		= itg3200_remove,
+};
+
+module_i2c_driver(itg3200_driver);
+
+MODULE_AUTHOR("Christian Strobel <christian.strobel@iis.fraunhofer.de>");
+MODULE_DESCRIPTION("ITG3200 Gyroscope I2C driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/iio/gyro/itg3200.h b/include/linux/iio/gyro/itg3200.h
new file mode 100644
index 000000000000..c53f16914b77
--- /dev/null
+++ b/include/linux/iio/gyro/itg3200.h
@@ -0,0 +1,154 @@
+/*
+ * itg3200.h -- support InvenSense ITG3200
+ *              Digital 3-Axis Gyroscope driver
+ *
+ * Copyright (c) 2011 Christian Strobel <christian.strobel@iis.fraunhofer.de>
+ * Copyright (c) 2011 Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
+ * Copyright (c) 2012 Thorsten Nowak <thorsten.nowak@iis.fraunhofer.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef I2C_ITG3200_H_
+#define I2C_ITG3200_H_
+
+#include <linux/iio/iio.h>
+
+/* Register with I2C address (34h) */
+#define ITG3200_REG_ADDRESS		0x00
+
+/* Sample rate divider
+ * Range: 0 to 255
+ * Default value: 0x00 */
+#define ITG3200_REG_SAMPLE_RATE_DIV	0x15
+
+/* Digital low pass filter settings */
+#define ITG3200_REG_DLPF		0x16
+/* DLPF full scale range */
+#define ITG3200_DLPF_FS_SEL_2000	0x18
+/* Bandwidth (Hz) and internal sample rate
+ * (kHz) of DLPF */
+#define ITG3200_DLPF_256_8		0x00
+#define ITG3200_DLPF_188_1		0x01
+#define ITG3200_DLPF_98_1		0x02
+#define ITG3200_DLPF_42_1		0x03
+#define ITG3200_DLPF_20_1		0x04
+#define ITG3200_DLPF_10_1		0x05
+#define ITG3200_DLPF_5_1		0x06
+
+#define ITG3200_DLPF_CFG_MASK		0x07
+
+/* Configuration for interrupt operations */
+#define ITG3200_REG_IRQ_CONFIG		0x17
+/* Logic level */
+#define ITG3200_IRQ_ACTIVE_LOW		0x80
+#define ITG3200_IRQ_ACTIVE_HIGH		0x00
+/* Drive type */
+#define ITG3200_IRQ_OPEN_DRAIN		0x40
+#define ITG3200_IRQ_PUSH_PULL		0x00
+/* Latch mode */
+#define ITG3200_IRQ_LATCH_UNTIL_CLEARED	0x20
+#define ITG3200_IRQ_LATCH_50US_PULSE	0x00
+/* Latch clear method */
+#define ITG3200_IRQ_LATCH_CLEAR_ANY	0x10
+#define ITG3200_IRQ_LATCH_CLEAR_STATUS	0x00
+/* Enable interrupt when device is ready */
+#define ITG3200_IRQ_DEVICE_RDY_ENABLE	0x04
+/* Enable interrupt when data is available */
+#define ITG3200_IRQ_DATA_RDY_ENABLE	0x01
+
+/* Determine the status of ITG-3200 interrupts */
+#define ITG3200_REG_IRQ_STATUS		0x1A
+/* Status of 'device is ready'-interrupt */
+#define ITG3200_IRQ_DEVICE_RDY_STATUS	0x04
+/* Status of 'data is available'-interrupt */
+#define ITG3200_IRQ_DATA_RDY_STATUS	0x01
+
+/* Sensor registers */
+#define ITG3200_REG_TEMP_OUT_H		0x1B
+#define ITG3200_REG_TEMP_OUT_L		0x1C
+#define ITG3200_REG_GYRO_XOUT_H		0x1D
+#define ITG3200_REG_GYRO_XOUT_L		0x1E
+#define ITG3200_REG_GYRO_YOUT_H		0x1F
+#define ITG3200_REG_GYRO_YOUT_L		0x20
+#define ITG3200_REG_GYRO_ZOUT_H		0x21
+#define ITG3200_REG_GYRO_ZOUT_L		0x22
+
+/* Power management */
+#define ITG3200_REG_POWER_MANAGEMENT	0x3E
+/* Reset device and internal registers to the
+ * power-up-default settings */
+#define ITG3200_RESET			0x80
+/* Enable low power sleep mode */
+#define ITG3200_SLEEP			0x40
+/* Put according gyroscope in standby mode */
+#define ITG3200_STANDBY_GYRO_X		0x20
+#define ITG3200_STANDBY_GYRO_Y		0x10
+#define ITG3200_STANDBY_GYRO_Z		0x08
+/* Determine the device clock source */
+#define ITG3200_CLK_INTERNAL		0x00
+#define ITG3200_CLK_GYRO_X		0x01
+#define ITG3200_CLK_GYRO_Y		0x02
+#define ITG3200_CLK_GYRO_Z		0x03
+#define ITG3200_CLK_EXT_32K		0x04
+#define ITG3200_CLK_EXT_19M		0x05
+
+
+/**
+ * struct itg3200 - device instance specific data
+ * @i2c:    actual i2c_client
+ * @trig:   data ready trigger from itg3200 pin
+ **/
+struct itg3200 {
+	struct i2c_client	*i2c;
+	struct iio_trigger	*trig;
+};
+
+enum ITG3200_SCAN_INDEX {
+	ITG3200_SCAN_TEMP,
+	ITG3200_SCAN_GYRO_X,
+	ITG3200_SCAN_GYRO_Y,
+	ITG3200_SCAN_GYRO_Z,
+	ITG3200_SCAN_ELEMENTS,
+};
+
+int itg3200_write_reg_8(struct iio_dev *indio_dev,
+		u8 reg_address, u8 val);
+
+int itg3200_read_reg_8(struct iio_dev *indio_dev,
+		u8 reg_address, u8 *val);
+
+
+#ifdef CONFIG_IIO_BUFFER
+
+void itg3200_remove_trigger(struct iio_dev *indio_dev);
+int itg3200_probe_trigger(struct iio_dev *indio_dev);
+
+int itg3200_buffer_configure(struct iio_dev *indio_dev);
+void itg3200_buffer_unconfigure(struct iio_dev *indio_dev);
+
+#else /* CONFIG_IIO_BUFFER */
+
+static inline void itg3200_remove_trigger(struct iio_dev *indio_dev)
+{
+}
+
+static inline int itg3200_probe_trigger(struct iio_dev *indio_dev)
+{
+	return 0;
+}
+
+static inline int itg3200_buffer_configure(struct iio_dev *indio_dev)
+{
+	return 0;
+}
+
+static inline void itg3200_buffer_unconfigure(struct iio_dev *indio_dev)
+{
+}
+
+#endif  /* CONFIG_IIO_RING_BUFFER */
+
+#endif /* ITG3200_H_ */
-- 
cgit v1.2.3


From 4d2e4fc224d0d5e56fdb0baa7cd135d1557b0a25 Mon Sep 17 00:00:00 2001
From: Denis CIOCCA <denis.ciocca@st.com>
Date: Wed, 30 Jan 2013 09:15:00 +0000
Subject: iio:common: removed unused functions outside st_sensors library

This patch remove st_sensors_get_sampling_frequency_avl and
st_sensors_get_scale_avl functions used only in
st_sensors_sysfs_sampling_frequency_avail and st_sensors_sysfs_scale_avail
sysfs functions.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/common/st_sensors/st_sensors_core.c | 70 ++++++++++---------------
 include/linux/iio/common/st_sensors.h           |  4 --
 2 files changed, 28 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index fba6d6847b6d..0198324a8b0c 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -38,46 +38,6 @@ st_sensors_write_data_with_mask_error:
 	return err;
 }
 
-int st_sensors_get_sampling_frequency_avl(struct iio_dev *indio_dev, char *buf)
-{
-	int i, len = 0;
-	struct st_sensor_data *sdata = iio_priv(indio_dev);
-
-	mutex_lock(&indio_dev->mlock);
-	for (i = 0; i < ST_SENSORS_ODR_LIST_MAX; i++) {
-		if (sdata->sensor->odr.odr_avl[i].hz == 0)
-			break;
-
-		len += scnprintf(buf + len, PAGE_SIZE - len, "%d ",
-					sdata->sensor->odr.odr_avl[i].hz);
-	}
-	mutex_unlock(&indio_dev->mlock);
-	buf[len - 1] = '\n';
-
-	return len;
-}
-EXPORT_SYMBOL(st_sensors_get_sampling_frequency_avl);
-
-int st_sensors_get_scale_avl(struct iio_dev *indio_dev, char *buf)
-{
-	int i, len = 0;
-	struct st_sensor_data *sdata = iio_priv(indio_dev);
-
-	mutex_lock(&indio_dev->mlock);
-	for (i = 0; i < ST_SENSORS_FULLSCALE_AVL_MAX; i++) {
-		if (sdata->sensor->fs.fs_avl[i].num == 0)
-			break;
-
-		len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
-					sdata->sensor->fs.fs_avl[i].gain);
-	}
-	mutex_unlock(&indio_dev->mlock);
-	buf[len - 1] = '\n';
-
-	return len;
-}
-EXPORT_SYMBOL(st_sensors_get_scale_avl);
-
 static int st_sensors_match_odr(struct st_sensors *sensor,
 			unsigned int odr, struct st_sensor_odr_avl *odr_out)
 {
@@ -440,18 +400,44 @@ EXPORT_SYMBOL(st_sensors_sysfs_set_sampling_frequency);
 ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
+	int i, len = 0;
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	mutex_lock(&indio_dev->mlock);
+	for (i = 0; i < ST_SENSORS_ODR_LIST_MAX; i++) {
+		if (sdata->sensor->odr.odr_avl[i].hz == 0)
+			break;
+
+		len += scnprintf(buf + len, PAGE_SIZE - len, "%d ",
+					sdata->sensor->odr.odr_avl[i].hz);
+	}
+	mutex_unlock(&indio_dev->mlock);
+	buf[len - 1] = '\n';
 
-	return st_sensors_get_sampling_frequency_avl(indio_dev, buf);
+	return len;
 }
 EXPORT_SYMBOL(st_sensors_sysfs_sampling_frequency_avail);
 
 ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
+	int i, len = 0;
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	mutex_lock(&indio_dev->mlock);
+	for (i = 0; i < ST_SENSORS_FULLSCALE_AVL_MAX; i++) {
+		if (sdata->sensor->fs.fs_avl[i].num == 0)
+			break;
+
+		len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
+					sdata->sensor->fs.fs_avl[i].gain);
+	}
+	mutex_unlock(&indio_dev->mlock);
+	buf[len - 1] = '\n';
 
-	return st_sensors_get_scale_avl(indio_dev, buf);
+	return len;
 }
 EXPORT_SYMBOL(st_sensors_sysfs_scale_avail);
 
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 3cc85715491f..c40fdf537f69 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -243,10 +243,6 @@ int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable);
 
 int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable);
 
-int st_sensors_get_sampling_frequency_avl(struct iio_dev *indio_dev, char *buf);
-
-int st_sensors_get_scale_avl(struct iio_dev *indio_dev, char *buf);
-
 int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr);
 
 int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable);
-- 
cgit v1.2.3


From ca7d98dbd7db6aa8bc4b08e26be1249436d21af3 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 31 Jan 2013 21:43:00 +0000
Subject: iio: Update iio_channel_get_all and iio_channel_get_all_cb API

Pass device pointer instead of device name as parameter to iio_channel_get_all
and iio_channel_get_all_cb. This will enable us to use OF information to
retrieve consumer channel information.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/buffer_cb.c         | 4 ++--
 drivers/iio/inkern.c            | 6 ++++--
 drivers/staging/iio/iio_hwmon.c | 9 ++++++---
 include/linux/iio/consumer.h    | 9 +++++----
 4 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/buffer_cb.c b/drivers/iio/buffer_cb.c
index 4d40e24f3721..9201022945e9 100644
--- a/drivers/iio/buffer_cb.c
+++ b/drivers/iio/buffer_cb.c
@@ -25,7 +25,7 @@ static struct iio_buffer_access_funcs iio_cb_access = {
 	.store_to = &iio_buffer_cb_store_to,
 };
 
-struct iio_cb_buffer *iio_channel_get_all_cb(const char *name,
+struct iio_cb_buffer *iio_channel_get_all_cb(struct device *dev,
 					     int (*cb)(u8 *data,
 						       void *private),
 					     void *private)
@@ -46,7 +46,7 @@ struct iio_cb_buffer *iio_channel_get_all_cb(const char *name,
 	cb_buff->buffer.access = &iio_cb_access;
 	INIT_LIST_HEAD(&cb_buff->buffer.demux_list);
 
-	cb_buff->channels = iio_channel_get_all(name);
+	cb_buff->channels = iio_channel_get_all(dev);
 	if (IS_ERR(cb_buff->channels)) {
 		ret = PTR_ERR(cb_buff->channels);
 		goto error_free_cb_buff;
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index d55e98fb300e..58d0ffe856b6 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -167,16 +167,18 @@ void iio_channel_release(struct iio_channel *channel)
 }
 EXPORT_SYMBOL_GPL(iio_channel_release);
 
-struct iio_channel *iio_channel_get_all(const char *name)
+struct iio_channel *iio_channel_get_all(struct device *dev)
 {
+	const char *name;
 	struct iio_channel *chans;
 	struct iio_map_internal *c = NULL;
 	int nummaps = 0;
 	int mapind = 0;
 	int i, ret;
 
-	if (name == NULL)
+	if (dev == NULL)
 		return ERR_PTR(-EINVAL);
+	name = dev_name(dev);
 
 	mutex_lock(&iio_map_list_lock);
 	/* first count the matching maps */
diff --git a/drivers/staging/iio/iio_hwmon.c b/drivers/staging/iio/iio_hwmon.c
index d4ef34fe0341..93af756ba48c 100644
--- a/drivers/staging/iio/iio_hwmon.c
+++ b/drivers/staging/iio/iio_hwmon.c
@@ -71,14 +71,17 @@ static int iio_hwmon_probe(struct platform_device *pdev)
 	int ret, i;
 	int in_i = 1, temp_i = 1, curr_i = 1;
 	enum iio_chan_type type;
+	struct iio_channel *channels;
+
+	channels = iio_channel_get_all(dev);
+	if (IS_ERR(channels))
+		return PTR_ERR(channels);
 
 	st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL);
 	if (st == NULL)
 		return -ENOMEM;
 
-	st->channels = iio_channel_get_all(dev_name(dev));
-	if (IS_ERR(st->channels))
-		return PTR_ERR(st->channels);
+	st->channels = channels;
 
 	/* count how many attributes we have */
 	while (st->channels[st->num_channels].indio_dev)
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 16c35ac045bd..a85787ac66ab 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -15,6 +15,7 @@
 
 struct iio_dev;
 struct iio_chan_spec;
+struct device;
 
 /**
  * struct iio_channel - everything needed for a consumer to use a channel
@@ -48,14 +49,14 @@ void iio_channel_release(struct iio_channel *chan);
 
 /**
  * iio_channel_get_all() - get all channels associated with a client
- * @name:		name of consumer device.
+ * @dev:		Pointer to consumer device.
  *
  * Returns an array of iio_channel structures terminated with one with
  * null iio_dev pointer.
  * This function is used by fairly generic consumers to get all the
  * channels registered as having this consumer.
  */
-struct iio_channel *iio_channel_get_all(const char *name);
+struct iio_channel *iio_channel_get_all(struct device *dev);
 
 /**
  * iio_channel_release_all() - reverse iio_channel_get_all
@@ -66,7 +67,7 @@ void iio_channel_release_all(struct iio_channel *chan);
 struct iio_cb_buffer;
 /**
  * iio_channel_get_all_cb() - register callback for triggered capture
- * @name:		Name of client device.
+ * @dev:		Pointer to client device.
  * @cb:			Callback function.
  * @private:		Private data passed to callback.
  *
@@ -74,7 +75,7 @@ struct iio_cb_buffer;
  * So if the channels requested come from different devices this will
  * fail.
  */
-struct iio_cb_buffer *iio_channel_get_all_cb(const char *name,
+struct iio_cb_buffer *iio_channel_get_all_cb(struct device *dev,
 					     int (*cb)(u8 *data,
 						       void *private),
 					     void *private);
-- 
cgit v1.2.3


From 6cb2afd7c0abb93bd9dc6d36b858b1e312e2407d Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 31 Jan 2013 21:43:00 +0000
Subject: iio: Simplify iio_map_array_unregister API

Instead of requiring the map to unregister, simply unregister all map entries
associated with the given iio device. This simplifies map removal and also works
for maps generated through devicetree.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/lp8788_adc.c | 11 ++---------
 drivers/iio/adc/max1363.c    |  4 ++--
 drivers/iio/inkern.c         | 36 +++++++++++-------------------------
 include/linux/iio/driver.h   |  9 +++------
 4 files changed, 18 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/lp8788_adc.c b/drivers/iio/adc/lp8788_adc.c
index d02704ce0091..763f57565ee4 100644
--- a/drivers/iio/adc/lp8788_adc.c
+++ b/drivers/iio/adc/lp8788_adc.c
@@ -187,12 +187,6 @@ static int lp8788_iio_map_register(struct iio_dev *indio_dev,
 	return 0;
 }
 
-static inline void lp8788_iio_map_unregister(struct iio_dev *indio_dev,
-				struct lp8788_adc *adc)
-{
-	iio_map_array_unregister(indio_dev, adc->map);
-}
-
 static int lp8788_adc_probe(struct platform_device *pdev)
 {
 	struct lp8788 *lp = dev_get_drvdata(pdev->dev.parent);
@@ -231,7 +225,7 @@ static int lp8788_adc_probe(struct platform_device *pdev)
 	return 0;
 
 err_iio_device:
-	lp8788_iio_map_unregister(indio_dev, adc);
+	iio_map_array_unregister(indio_dev);
 err_iio_map:
 	iio_device_free(indio_dev);
 	return ret;
@@ -240,10 +234,9 @@ err_iio_map:
 static int lp8788_adc_remove(struct platform_device *pdev)
 {
 	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
-	struct lp8788_adc *adc = iio_priv(indio_dev);
 
 	iio_device_unregister(indio_dev);
-	lp8788_iio_map_unregister(indio_dev, adc);
+	iio_map_array_unregister(indio_dev);
 	iio_device_free(indio_dev);
 
 	return 0;
diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c
index 46732380566f..ef868c9c17be 100644
--- a/drivers/iio/adc/max1363.c
+++ b/drivers/iio/adc/max1363.c
@@ -1611,7 +1611,7 @@ error_disable_reg:
 error_put_reg:
 	regulator_put(st->reg);
 error_unregister_map:
-	iio_map_array_unregister(indio_dev, client->dev.platform_data);
+	iio_map_array_unregister(indio_dev);
 error_free_device:
 	iio_device_free(indio_dev);
 error_out:
@@ -1630,7 +1630,7 @@ static int max1363_remove(struct i2c_client *client)
 	kfree(indio_dev->available_scan_masks);
 	regulator_disable(st->reg);
 	regulator_put(st->reg);
-	iio_map_array_unregister(indio_dev, client->dev.platform_data);
+	iio_map_array_unregister(indio_dev);
 	iio_device_free(indio_dev);
 
 	return 0;
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 58d0ffe856b6..c42aba6817e8 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -54,39 +54,25 @@ error_ret:
 EXPORT_SYMBOL_GPL(iio_map_array_register);
 
 
-/* Assumes the exact same array (e.g. memory locations)
- * used at unregistration as used at registration rather than
- * more complex checking of contents.
+/*
+ * Remove all map entries associated with the given iio device
  */
-int iio_map_array_unregister(struct iio_dev *indio_dev,
-			     struct iio_map *maps)
+int iio_map_array_unregister(struct iio_dev *indio_dev)
 {
-	int i = 0, ret = 0;
-	bool found_it;
+	int ret = -ENODEV;
 	struct iio_map_internal *mapi;
-
-	if (maps == NULL)
-		return 0;
+	struct list_head *pos, *tmp;
 
 	mutex_lock(&iio_map_list_lock);
-	while (maps[i].consumer_dev_name != NULL) {
-		found_it = false;
-		list_for_each_entry(mapi, &iio_map_list, l)
-			if (&maps[i] == mapi->map) {
-				list_del(&mapi->l);
-				kfree(mapi);
-				found_it = true;
-				break;
-			}
-		if (!found_it) {
-			ret = -ENODEV;
-			goto error_ret;
+	list_for_each_safe(pos, tmp, &iio_map_list) {
+		mapi = list_entry(pos, struct iio_map_internal, l);
+		if (indio_dev == mapi->indio_dev) {
+			list_del(&mapi->l);
+			kfree(mapi);
+			ret = 0;
 		}
-		i++;
 	}
-error_ret:
 	mutex_unlock(&iio_map_list_lock);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iio_map_array_unregister);
diff --git a/include/linux/iio/driver.h b/include/linux/iio/driver.h
index a4f8b2e05af5..7dfb10ee2669 100644
--- a/include/linux/iio/driver.h
+++ b/include/linux/iio/driver.h
@@ -22,13 +22,10 @@ int iio_map_array_register(struct iio_dev *indio_dev,
 			   struct iio_map *map);
 
 /**
- * iio_map_array_unregister() - tell the core to remove consumer mappings
+ * iio_map_array_unregister() - tell the core to remove consumer mappings for
+ *				the given provider device
  * @indio_dev:	provider device
- * @map:	array of mappings to remove. Note these must have same memory
- *		addresses as those originally added not just equal parameter
- *		values.
  */
-int iio_map_array_unregister(struct iio_dev *indio_dev,
-			     struct iio_map *map);
+int iio_map_array_unregister(struct iio_dev *indio_dev);
 
 #endif
-- 
cgit v1.2.3


From cfad1ba87150e198be9ea32367a24e500e59de2c Mon Sep 17 00:00:00 2001
From: Eric Lapuyade <eric.lapuyade@linux.intel.com>
Date: Tue, 18 Dec 2012 14:53:53 +0100
Subject: NFC: Initial support for Inside Secure microread

Inside Secure microread is an HCI based NFC chipset.
This initial support includes reader and p2p (Target and initiator) modes.

Signed-off-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/Kconfig                     |   1 +
 drivers/nfc/Makefile                    |   1 +
 drivers/nfc/microread/Kconfig           |  13 +
 drivers/nfc/microread/Makefile          |   5 +
 drivers/nfc/microread/microread.c       | 728 ++++++++++++++++++++++++++++++++
 drivers/nfc/microread/microread.h       |  33 ++
 include/linux/platform_data/microread.h |  35 ++
 7 files changed, 816 insertions(+)
 create mode 100644 drivers/nfc/microread/Kconfig
 create mode 100644 drivers/nfc/microread/Makefile
 create mode 100644 drivers/nfc/microread/microread.c
 create mode 100644 drivers/nfc/microread/microread.h
 create mode 100644 include/linux/platform_data/microread.h

(limited to 'include/linux')

diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index 80c728b28828..e57034971ccc 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -27,5 +27,6 @@ config NFC_WILINK
 	  into the kernel or say M to compile it as module.
 
 source "drivers/nfc/pn544/Kconfig"
+source "drivers/nfc/microread/Kconfig"
 
 endmenu
diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile
index 574bbc04d97a..a189ada0926a 100644
--- a/drivers/nfc/Makefile
+++ b/drivers/nfc/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_NFC_PN544)		+= pn544/
+obj-$(CONFIG_NFC_MICROREAD)	+= microread/
 obj-$(CONFIG_NFC_PN533)		+= pn533.o
 obj-$(CONFIG_NFC_WILINK)	+= nfcwilink.o
 
diff --git a/drivers/nfc/microread/Kconfig b/drivers/nfc/microread/Kconfig
new file mode 100644
index 000000000000..5b89d011d098
--- /dev/null
+++ b/drivers/nfc/microread/Kconfig
@@ -0,0 +1,13 @@
+config NFC_MICROREAD
+	tristate "Inside Secure microread NFC driver"
+	depends on NFC_HCI
+	select CRC_CCITT
+	default n
+	---help---
+	  This module contains the main code for Inside Secure microread
+	  NFC chipsets. It implements the chipset HCI logic and hooks into
+	  the NFC kernel APIs. Physical layers will register against it.
+
+	  To compile this driver as a module, choose m here. The module will
+	  be called microread.
+	  Say N if unsure.
diff --git a/drivers/nfc/microread/Makefile b/drivers/nfc/microread/Makefile
new file mode 100644
index 000000000000..9ce2c53f49a7
--- /dev/null
+++ b/drivers/nfc/microread/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Microread HCI based NFC driver
+#
+
+obj-$(CONFIG_NFC_MICROREAD)     += microread.o
diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c
new file mode 100644
index 000000000000..3420d833db17
--- /dev/null
+++ b/drivers/nfc/microread/microread.c
@@ -0,0 +1,728 @@
+/*
+ * HCI based Driver for Inside Secure microread NFC Chip
+ *
+ * Copyright (C) 2013  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/crc-ccitt.h>
+
+#include <linux/nfc.h>
+#include <net/nfc/nfc.h>
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+
+#include "microread.h"
+
+/* Proprietary gates, events, commands and registers */
+/* Admin */
+#define MICROREAD_GATE_ID_ADM NFC_HCI_ADMIN_GATE
+#define MICROREAD_GATE_ID_MGT 0x01
+#define MICROREAD_GATE_ID_OS 0x02
+#define MICROREAD_GATE_ID_TESTRF 0x03
+#define MICROREAD_GATE_ID_LOOPBACK NFC_HCI_LOOPBACK_GATE
+#define MICROREAD_GATE_ID_IDT NFC_HCI_ID_MGMT_GATE
+#define MICROREAD_GATE_ID_LMS NFC_HCI_LINK_MGMT_GATE
+
+/* Reader */
+#define MICROREAD_GATE_ID_MREAD_GEN 0x10
+#define MICROREAD_GATE_ID_MREAD_ISO_B NFC_HCI_RF_READER_B_GATE
+#define MICROREAD_GATE_ID_MREAD_NFC_T1 0x12
+#define MICROREAD_GATE_ID_MREAD_ISO_A NFC_HCI_RF_READER_A_GATE
+#define MICROREAD_GATE_ID_MREAD_NFC_T3 0x14
+#define MICROREAD_GATE_ID_MREAD_ISO_15_3 0x15
+#define MICROREAD_GATE_ID_MREAD_ISO_15_2 0x16
+#define MICROREAD_GATE_ID_MREAD_ISO_B_3 0x17
+#define MICROREAD_GATE_ID_MREAD_BPRIME 0x18
+#define MICROREAD_GATE_ID_MREAD_ISO_A_3 0x19
+
+/* Card */
+#define MICROREAD_GATE_ID_MCARD_GEN 0x20
+#define MICROREAD_GATE_ID_MCARD_ISO_B 0x21
+#define MICROREAD_GATE_ID_MCARD_BPRIME 0x22
+#define MICROREAD_GATE_ID_MCARD_ISO_A 0x23
+#define MICROREAD_GATE_ID_MCARD_NFC_T3 0x24
+#define MICROREAD_GATE_ID_MCARD_ISO_15_3 0x25
+#define MICROREAD_GATE_ID_MCARD_ISO_15_2 0x26
+#define MICROREAD_GATE_ID_MCARD_ISO_B_2 0x27
+#define MICROREAD_GATE_ID_MCARD_ISO_CUSTOM 0x28
+#define MICROREAD_GATE_ID_SECURE_ELEMENT 0x2F
+
+/* P2P */
+#define MICROREAD_GATE_ID_P2P_GEN 0x30
+#define MICROREAD_GATE_ID_P2P_TARGET 0x31
+#define MICROREAD_PAR_P2P_TARGET_MODE 0x01
+#define MICROREAD_PAR_P2P_TARGET_GT 0x04
+#define MICROREAD_GATE_ID_P2P_INITIATOR 0x32
+#define MICROREAD_PAR_P2P_INITIATOR_GI 0x01
+#define MICROREAD_PAR_P2P_INITIATOR_GT 0x03
+
+/* Those pipes are created/opened by default in the chip */
+#define MICROREAD_PIPE_ID_LMS 0x00
+#define MICROREAD_PIPE_ID_ADMIN 0x01
+#define MICROREAD_PIPE_ID_MGT 0x02
+#define MICROREAD_PIPE_ID_OS 0x03
+#define MICROREAD_PIPE_ID_HDS_LOOPBACK 0x04
+#define MICROREAD_PIPE_ID_HDS_IDT 0x05
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_B 0x08
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_BPRIME 0x09
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_A 0x0A
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_15_3 0x0B
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_15_2 0x0C
+#define MICROREAD_PIPE_ID_HDS_MCARD_NFC_T3 0x0D
+#define MICROREAD_PIPE_ID_HDS_MCARD_ISO_B_2 0x0E
+#define MICROREAD_PIPE_ID_HDS_MCARD_CUSTOM 0x0F
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_B 0x10
+#define MICROREAD_PIPE_ID_HDS_MREAD_NFC_T1 0x11
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_A 0x12
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_15_3 0x13
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_15_2 0x14
+#define MICROREAD_PIPE_ID_HDS_MREAD_NFC_T3 0x15
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_B_3 0x16
+#define MICROREAD_PIPE_ID_HDS_MREAD_BPRIME 0x17
+#define MICROREAD_PIPE_ID_HDS_MREAD_ISO_A_3 0x18
+#define MICROREAD_PIPE_ID_HDS_MREAD_GEN 0x1B
+#define MICROREAD_PIPE_ID_HDS_STACKED_ELEMENT 0x1C
+#define MICROREAD_PIPE_ID_HDS_INSTANCES 0x1D
+#define MICROREAD_PIPE_ID_HDS_TESTRF 0x1E
+#define MICROREAD_PIPE_ID_HDS_P2P_TARGET 0x1F
+#define MICROREAD_PIPE_ID_HDS_P2P_INITIATOR 0x20
+
+/* Events */
+#define MICROREAD_EVT_MREAD_DISCOVERY_OCCURED NFC_HCI_EVT_TARGET_DISCOVERED
+#define MICROREAD_EVT_MREAD_CARD_FOUND 0x3D
+#define MICROREAD_EMCF_A_ATQA 0
+#define MICROREAD_EMCF_A_SAK 2
+#define MICROREAD_EMCF_A_LEN 3
+#define MICROREAD_EMCF_A_UID 4
+#define MICROREAD_EMCF_A3_ATQA 0
+#define MICROREAD_EMCF_A3_SAK 2
+#define MICROREAD_EMCF_A3_LEN 3
+#define MICROREAD_EMCF_A3_UID 4
+#define MICROREAD_EMCF_B_UID 0
+#define MICROREAD_EMCF_T1_ATQA 0
+#define MICROREAD_EMCF_T1_UID 4
+#define MICROREAD_EMCF_T3_UID 0
+#define MICROREAD_EVT_MREAD_DISCOVERY_START NFC_HCI_EVT_READER_REQUESTED
+#define MICROREAD_EVT_MREAD_DISCOVERY_START_SOME 0x3E
+#define MICROREAD_EVT_MREAD_DISCOVERY_STOP NFC_HCI_EVT_END_OPERATION
+#define MICROREAD_EVT_MREAD_SIM_REQUESTS 0x3F
+#define MICROREAD_EVT_MCARD_EXCHANGE NFC_HCI_EVT_TARGET_DISCOVERED
+#define MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_TO_RF 0x20
+#define MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_FROM_RF 0x21
+#define MICROREAD_EVT_MCARD_FIELD_ON 0x11
+#define MICROREAD_EVT_P2P_TARGET_ACTIVATED 0x13
+#define MICROREAD_EVT_P2P_TARGET_DEACTIVATED 0x12
+#define MICROREAD_EVT_MCARD_FIELD_OFF 0x14
+
+/* Commands */
+#define MICROREAD_CMD_MREAD_EXCHANGE 0x10
+#define MICROREAD_CMD_MREAD_SUBSCRIBE 0x3F
+
+/* Hosts IDs */
+#define MICROREAD_ELT_ID_HDS NFC_HCI_TERMINAL_HOST_ID
+#define MICROREAD_ELT_ID_SIM NFC_HCI_UICC_HOST_ID
+#define MICROREAD_ELT_ID_SE1 0x03
+#define MICROREAD_ELT_ID_SE2 0x04
+#define MICROREAD_ELT_ID_SE3 0x05
+
+static struct nfc_hci_gate microread_gates[] = {
+	{MICROREAD_GATE_ID_ADM, MICROREAD_PIPE_ID_ADMIN},
+	{MICROREAD_GATE_ID_LOOPBACK, MICROREAD_PIPE_ID_HDS_LOOPBACK},
+	{MICROREAD_GATE_ID_IDT, MICROREAD_PIPE_ID_HDS_IDT},
+	{MICROREAD_GATE_ID_LMS, MICROREAD_PIPE_ID_LMS},
+	{MICROREAD_GATE_ID_MREAD_ISO_B, MICROREAD_PIPE_ID_HDS_MREAD_ISO_B},
+	{MICROREAD_GATE_ID_MREAD_ISO_A, MICROREAD_PIPE_ID_HDS_MREAD_ISO_A},
+	{MICROREAD_GATE_ID_MREAD_ISO_A_3, MICROREAD_PIPE_ID_HDS_MREAD_ISO_A_3},
+	{MICROREAD_GATE_ID_MGT, MICROREAD_PIPE_ID_MGT},
+	{MICROREAD_GATE_ID_OS, MICROREAD_PIPE_ID_OS},
+	{MICROREAD_GATE_ID_MREAD_NFC_T1, MICROREAD_PIPE_ID_HDS_MREAD_NFC_T1},
+	{MICROREAD_GATE_ID_MREAD_NFC_T3, MICROREAD_PIPE_ID_HDS_MREAD_NFC_T3},
+	{MICROREAD_GATE_ID_P2P_TARGET, MICROREAD_PIPE_ID_HDS_P2P_TARGET},
+	{MICROREAD_GATE_ID_P2P_INITIATOR, MICROREAD_PIPE_ID_HDS_P2P_INITIATOR}
+};
+
+/* Largest headroom needed for outgoing custom commands */
+#define MICROREAD_CMDS_HEADROOM	2
+#define MICROREAD_CMD_TAILROOM	2
+
+struct microread_info {
+	struct nfc_phy_ops *phy_ops;
+	void *phy_id;
+
+	struct nfc_hci_dev *hdev;
+
+	int async_cb_type;
+	data_exchange_cb_t async_cb;
+	void *async_cb_context;
+};
+
+static int microread_open(struct nfc_hci_dev *hdev)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+
+	return info->phy_ops->enable(info->phy_id);
+}
+
+static void microread_close(struct nfc_hci_dev *hdev)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+
+	info->phy_ops->disable(info->phy_id);
+}
+
+static int microread_hci_ready(struct nfc_hci_dev *hdev)
+{
+	int r;
+	u8 param[4];
+
+	param[0] = 0x03;
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_A,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, param, 1, NULL);
+	if (r)
+		return r;
+
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_A_3,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, NULL, 0, NULL);
+	if (r)
+		return r;
+
+	param[0] = 0x00;
+	param[1] = 0x03;
+	param[2] = 0x00;
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_ISO_B,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, param, 3, NULL);
+	if (r)
+		return r;
+
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_NFC_T1,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, NULL, 0, NULL);
+	if (r)
+		return r;
+
+	param[0] = 0xFF;
+	param[1] = 0xFF;
+	param[2] = 0x00;
+	param[3] = 0x00;
+	r = nfc_hci_send_cmd(hdev, MICROREAD_GATE_ID_MREAD_NFC_T3,
+			     MICROREAD_CMD_MREAD_SUBSCRIBE, param, 4, NULL);
+
+	return r;
+}
+
+static int microread_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+
+	return info->phy_ops->write(info->phy_id, skb);
+}
+
+static int microread_start_poll(struct nfc_hci_dev *hdev,
+				u32 im_protocols, u32 tm_protocols)
+{
+	int r;
+
+	u8 param[2];
+	u8 mode;
+
+	param[0] = 0x00;
+	param[1] = 0x00;
+
+	if (im_protocols & NFC_PROTO_ISO14443_MASK)
+		param[0] |= (1 << 2);
+
+	if (im_protocols & NFC_PROTO_ISO14443_B_MASK)
+		param[0] |= 1;
+
+	if (im_protocols & NFC_PROTO_MIFARE_MASK)
+		param[1] |= 1;
+
+	if (im_protocols & NFC_PROTO_JEWEL_MASK)
+		param[0] |= (1 << 1);
+
+	if (im_protocols & NFC_PROTO_FELICA_MASK)
+		param[0] |= (1 << 5);
+
+	if (im_protocols & NFC_PROTO_NFC_DEP_MASK)
+		param[1] |= (1 << 1);
+
+	if ((im_protocols | tm_protocols) & NFC_PROTO_NFC_DEP_MASK) {
+		hdev->gb = nfc_get_local_general_bytes(hdev->ndev,
+						       &hdev->gb_len);
+		if (hdev->gb == NULL || hdev->gb_len == 0) {
+			im_protocols &= ~NFC_PROTO_NFC_DEP_MASK;
+			tm_protocols &= ~NFC_PROTO_NFC_DEP_MASK;
+		}
+	}
+
+	r = nfc_hci_send_event(hdev, MICROREAD_GATE_ID_MREAD_ISO_A,
+			       MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL, 0);
+	if (r)
+		return r;
+
+	mode = 0xff;
+	r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+			      MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1);
+	if (r)
+		return r;
+
+	if (im_protocols & NFC_PROTO_NFC_DEP_MASK) {
+		r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_INITIATOR,
+				      MICROREAD_PAR_P2P_INITIATOR_GI,
+				      hdev->gb, hdev->gb_len);
+		if (r)
+			return r;
+	}
+
+	if (tm_protocols & NFC_PROTO_NFC_DEP_MASK) {
+		r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+				      MICROREAD_PAR_P2P_TARGET_GT,
+				      hdev->gb, hdev->gb_len);
+		if (r)
+			return r;
+
+		mode = 0x02;
+		r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+				      MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1);
+		if (r)
+			return r;
+	}
+
+	return nfc_hci_send_event(hdev, MICROREAD_GATE_ID_MREAD_ISO_A,
+				  MICROREAD_EVT_MREAD_DISCOVERY_START_SOME,
+				  param, 2);
+}
+
+static int microread_dep_link_up(struct nfc_hci_dev *hdev,
+				struct nfc_target *target, u8 comm_mode,
+				u8 *gb, size_t gb_len)
+{
+	struct sk_buff *rgb_skb = NULL;
+	int r;
+
+	r = nfc_hci_get_param(hdev, target->hci_reader_gate,
+			      MICROREAD_PAR_P2P_INITIATOR_GT, &rgb_skb);
+	if (r < 0)
+		return r;
+
+	if (rgb_skb->len == 0 || rgb_skb->len > NFC_GB_MAXSIZE) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	r = nfc_set_remote_general_bytes(hdev->ndev, rgb_skb->data,
+					 rgb_skb->len);
+	if (r == 0)
+		r = nfc_dep_link_is_up(hdev->ndev, target->idx, comm_mode,
+				       NFC_RF_INITIATOR);
+exit:
+	kfree_skb(rgb_skb);
+
+	return r;
+}
+
+static int microread_dep_link_down(struct nfc_hci_dev *hdev)
+{
+	return nfc_hci_send_event(hdev, MICROREAD_GATE_ID_P2P_INITIATOR,
+				  MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL, 0);
+}
+
+static int microread_target_from_gate(struct nfc_hci_dev *hdev, u8 gate,
+				      struct nfc_target *target)
+{
+	switch (gate) {
+	case MICROREAD_GATE_ID_P2P_INITIATOR:
+		target->supported_protocols = NFC_PROTO_NFC_DEP_MASK;
+		break;
+	default:
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+static int microread_complete_target_discovered(struct nfc_hci_dev *hdev,
+						u8 gate,
+						struct nfc_target *target)
+{
+	return 0;
+}
+
+#define MICROREAD_CB_TYPE_READER_ALL 1
+
+static void microread_im_transceive_cb(void *context, struct sk_buff *skb,
+				       int err)
+{
+	struct microread_info *info = context;
+
+	switch (info->async_cb_type) {
+	case MICROREAD_CB_TYPE_READER_ALL:
+		if (err == 0) {
+			if (skb->len == 0) {
+				err = -EPROTO;
+				kfree_skb(skb);
+				info->async_cb(info->async_cb_context, NULL,
+					       -EPROTO);
+				return;
+			}
+
+			if (skb->data[skb->len - 1] != 0) {
+				err = nfc_hci_result_to_errno(
+						       skb->data[skb->len - 1]);
+				kfree_skb(skb);
+				info->async_cb(info->async_cb_context, NULL,
+					       err);
+				return;
+			}
+
+			skb_trim(skb, skb->len - 1);	/* RF Error ind. */
+		}
+		info->async_cb(info->async_cb_context, skb, err);
+		break;
+	default:
+		if (err == 0)
+			kfree_skb(skb);
+		break;
+	}
+}
+
+/*
+ * Returns:
+ * <= 0: driver handled the data exchange
+ *    1: driver doesn't especially handle, please do standard processing
+ */
+static int microread_im_transceive(struct nfc_hci_dev *hdev,
+				   struct nfc_target *target,
+				   struct sk_buff *skb, data_exchange_cb_t cb,
+				   void *cb_context)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+	u8 control_bits;
+	u16 crc;
+
+	pr_info("data exchange to gate 0x%x\n", target->hci_reader_gate);
+
+	if (target->hci_reader_gate == MICROREAD_GATE_ID_P2P_INITIATOR) {
+		*skb_push(skb, 1) = 0;
+
+		return nfc_hci_send_event(hdev, target->hci_reader_gate,
+				     MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_TO_RF,
+				     skb->data, skb->len);
+	}
+
+	switch (target->hci_reader_gate) {
+	case MICROREAD_GATE_ID_MREAD_ISO_A:
+		control_bits = 0xCB;
+		break;
+	case MICROREAD_GATE_ID_MREAD_ISO_A_3:
+		control_bits = 0xCB;
+		break;
+	case MICROREAD_GATE_ID_MREAD_ISO_B:
+		control_bits = 0xCB;
+		break;
+	case MICROREAD_GATE_ID_MREAD_NFC_T1:
+		control_bits = 0x1B;
+
+		crc = crc_ccitt(0xffff, skb->data, skb->len);
+		crc = ~crc;
+		*skb_put(skb, 1) = crc & 0xff;
+		*skb_put(skb, 1) = crc >> 8;
+		break;
+	case MICROREAD_GATE_ID_MREAD_NFC_T3:
+		control_bits = 0xDB;
+		break;
+	default:
+		pr_info("Abort im_transceive to invalid gate 0x%x\n",
+			target->hci_reader_gate);
+		return 1;
+	}
+
+	*skb_push(skb, 1) = control_bits;
+
+	info->async_cb_type = MICROREAD_CB_TYPE_READER_ALL;
+	info->async_cb = cb;
+	info->async_cb_context = cb_context;
+
+	return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+				      MICROREAD_CMD_MREAD_EXCHANGE,
+				      skb->data, skb->len,
+				      microread_im_transceive_cb, info);
+}
+
+static int microread_tm_send(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+	int r;
+
+	r = nfc_hci_send_event(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+			       MICROREAD_EVT_MCARD_EXCHANGE,
+			       skb->data, skb->len);
+
+	kfree_skb(skb);
+
+	return r;
+}
+
+static void microread_target_discovered(struct nfc_hci_dev *hdev, u8 gate,
+					struct sk_buff *skb)
+{
+	struct nfc_target *targets;
+	int r = 0;
+
+	pr_info("target discovered to gate 0x%x\n", gate);
+
+	targets = kzalloc(sizeof(struct nfc_target), GFP_KERNEL);
+	if (targets == NULL) {
+		r = -ENOMEM;
+		goto exit;
+	}
+
+	targets->hci_reader_gate = gate;
+
+	switch (gate) {
+	case MICROREAD_GATE_ID_MREAD_ISO_A:
+		targets->supported_protocols =
+		      nfc_hci_sak_to_protocol(skb->data[MICROREAD_EMCF_A_SAK]);
+		targets->sens_res =
+			 be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A_ATQA]);
+		targets->sel_res = skb->data[MICROREAD_EMCF_A_SAK];
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A_UID],
+		       skb->data[MICROREAD_EMCF_A_LEN]);
+		targets->nfcid1_len = skb->data[MICROREAD_EMCF_A_LEN];
+		break;
+	case MICROREAD_GATE_ID_MREAD_ISO_A_3:
+		targets->supported_protocols =
+		      nfc_hci_sak_to_protocol(skb->data[MICROREAD_EMCF_A3_SAK]);
+		targets->sens_res =
+			 be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A3_ATQA]);
+		targets->sel_res = skb->data[MICROREAD_EMCF_A3_SAK];
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A3_UID],
+		       skb->data[MICROREAD_EMCF_A3_LEN]);
+		targets->nfcid1_len = skb->data[MICROREAD_EMCF_A3_LEN];
+		break;
+	case MICROREAD_GATE_ID_MREAD_ISO_B:
+		targets->supported_protocols = NFC_PROTO_ISO14443_B_MASK;
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_B_UID], 4);
+		targets->nfcid1_len = 4;
+		break;
+	case MICROREAD_GATE_ID_MREAD_NFC_T1:
+		targets->supported_protocols = NFC_PROTO_JEWEL_MASK;
+		targets->sens_res =
+			le16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_T1_ATQA]);
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_T1_UID], 4);
+		targets->nfcid1_len = 4;
+		break;
+	case MICROREAD_GATE_ID_MREAD_NFC_T3:
+		targets->supported_protocols = NFC_PROTO_FELICA_MASK;
+		memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_T3_UID], 8);
+		targets->nfcid1_len = 8;
+		break;
+	default:
+		pr_info("discard target discovered to gate 0x%x\n", gate);
+		goto exit_free;
+	}
+
+	r = nfc_targets_found(hdev->ndev, targets, 1);
+
+exit_free:
+	kfree(targets);
+
+exit:
+	kfree_skb(skb);
+
+	if (r)
+		pr_err("Failed to handle discovered target err=%d", r);
+}
+
+static int microread_event_received(struct nfc_hci_dev *hdev, u8 gate,
+				     u8 event, struct sk_buff *skb)
+{
+	int r;
+	u8 mode;
+
+	pr_info("Microread received event 0x%x to gate 0x%x\n", event, gate);
+
+	switch (event) {
+	case MICROREAD_EVT_MREAD_CARD_FOUND:
+		microread_target_discovered(hdev, gate, skb);
+		return 0;
+
+	case MICROREAD_EVT_P2P_INITIATOR_EXCHANGE_FROM_RF:
+		if (skb->len < 1) {
+			kfree_skb(skb);
+			return -EPROTO;
+		}
+
+		if (skb->data[skb->len - 1]) {
+			kfree_skb(skb);
+			return -EIO;
+		}
+
+		skb_trim(skb, skb->len - 1);
+
+		r = nfc_tm_data_received(hdev->ndev, skb);
+		break;
+
+	case MICROREAD_EVT_MCARD_FIELD_ON:
+	case MICROREAD_EVT_MCARD_FIELD_OFF:
+		kfree_skb(skb);
+		return 0;
+
+	case MICROREAD_EVT_P2P_TARGET_ACTIVATED:
+		r = nfc_tm_activated(hdev->ndev, NFC_PROTO_NFC_DEP_MASK,
+				     NFC_COMM_PASSIVE, skb->data,
+				     skb->len);
+
+		kfree_skb(skb);
+		break;
+
+	case MICROREAD_EVT_MCARD_EXCHANGE:
+		if (skb->len < 1) {
+			kfree_skb(skb);
+			return -EPROTO;
+		}
+
+		if (skb->data[skb->len-1]) {
+			kfree_skb(skb);
+			return -EIO;
+		}
+
+		skb_trim(skb, skb->len - 1);
+
+		r = nfc_tm_data_received(hdev->ndev, skb);
+		break;
+
+	case MICROREAD_EVT_P2P_TARGET_DEACTIVATED:
+		kfree_skb(skb);
+
+		mode = 0xff;
+		r = nfc_hci_set_param(hdev, MICROREAD_GATE_ID_P2P_TARGET,
+				      MICROREAD_PAR_P2P_TARGET_MODE, &mode, 1);
+		if (r)
+			break;
+
+		r = nfc_hci_send_event(hdev, gate,
+				       MICROREAD_EVT_MREAD_DISCOVERY_STOP, NULL,
+				       0);
+		break;
+
+	default:
+		return 1;
+	}
+
+	return r;
+}
+
+static struct nfc_hci_ops microread_hci_ops = {
+	.open = microread_open,
+	.close = microread_close,
+	.hci_ready = microread_hci_ready,
+	.xmit = microread_xmit,
+	.start_poll = microread_start_poll,
+	.dep_link_up = microread_dep_link_up,
+	.dep_link_down = microread_dep_link_down,
+	.target_from_gate = microread_target_from_gate,
+	.complete_target_discovered = microread_complete_target_discovered,
+	.im_transceive = microread_im_transceive,
+	.tm_send = microread_tm_send,
+	.check_presence = NULL,
+	.event_received = microread_event_received,
+};
+
+int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
+		    int phy_headroom, int phy_tailroom, int phy_payload,
+		    struct nfc_hci_dev **hdev)
+{
+	struct microread_info *info;
+	unsigned long quirks = 0;
+	u32 protocols, se;
+	struct nfc_hci_init_data init_data;
+	int r;
+
+	info = kzalloc(sizeof(struct microread_info), GFP_KERNEL);
+	if (!info) {
+		pr_err("Cannot allocate memory for microread_info.\n");
+		r = -ENOMEM;
+		goto err_info_alloc;
+	}
+
+	info->phy_ops = phy_ops;
+	info->phy_id = phy_id;
+
+	init_data.gate_count = ARRAY_SIZE(microread_gates);
+	memcpy(init_data.gates, microread_gates, sizeof(microread_gates));
+
+	strcpy(init_data.session_id, "MICROREA");
+
+	set_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &quirks);
+
+	protocols = NFC_PROTO_JEWEL_MASK |
+		    NFC_PROTO_MIFARE_MASK |
+		    NFC_PROTO_FELICA_MASK |
+		    NFC_PROTO_ISO14443_MASK |
+		    NFC_PROTO_ISO14443_B_MASK |
+		    NFC_PROTO_NFC_DEP_MASK;
+
+	se = NFC_SE_UICC | NFC_SE_EMBEDDED;
+
+	info->hdev = nfc_hci_allocate_device(&microread_hci_ops, &init_data,
+					     quirks, protocols, se, llc_name,
+					     phy_headroom +
+					     MICROREAD_CMDS_HEADROOM,
+					     phy_tailroom +
+					     MICROREAD_CMD_TAILROOM,
+					     phy_payload);
+	if (!info->hdev) {
+		pr_err("Cannot allocate nfc hdev.\n");
+		r = -ENOMEM;
+		goto err_alloc_hdev;
+	}
+
+	nfc_hci_set_clientdata(info->hdev, info);
+
+	r = nfc_hci_register_device(info->hdev);
+	if (r)
+		goto err_regdev;
+
+	*hdev = info->hdev;
+
+	return 0;
+
+err_regdev:
+	nfc_hci_free_device(info->hdev);
+
+err_alloc_hdev:
+	kfree(info);
+
+err_info_alloc:
+	return r;
+}
+EXPORT_SYMBOL(microread_probe);
+
+void microread_remove(struct nfc_hci_dev *hdev)
+{
+	struct microread_info *info = nfc_hci_get_clientdata(hdev);
+
+	nfc_hci_unregister_device(hdev);
+	nfc_hci_free_device(hdev);
+	kfree(info);
+}
+EXPORT_SYMBOL(microread_remove);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/nfc/microread/microread.h b/drivers/nfc/microread/microread.h
new file mode 100644
index 000000000000..64b447a1c5bf
--- /dev/null
+++ b/drivers/nfc/microread/microread.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2011 - 2012  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LOCAL_MICROREAD_H_
+#define __LOCAL_MICROREAD_H_
+
+#include <net/nfc/hci.h>
+
+#define DRIVER_DESC "NFC driver for microread"
+
+int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
+		    int phy_headroom, int phy_tailroom, int phy_payload,
+		    struct nfc_hci_dev **hdev);
+
+void microread_remove(struct nfc_hci_dev *hdev);
+
+#endif /* __LOCAL_MICROREAD_H_ */
diff --git a/include/linux/platform_data/microread.h b/include/linux/platform_data/microread.h
new file mode 100644
index 000000000000..cfda59b226ee
--- /dev/null
+++ b/include/linux/platform_data/microread.h
@@ -0,0 +1,35 @@
+/*
+ * Driver include for the PN544 NFC chip.
+ *
+ * Copyright (C) 2011 Tieto Poland
+ * Copyright (C) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _MICROREAD_H
+#define _MICROREAD_H
+
+#include <linux/i2c.h>
+
+#define MICROREAD_DRIVER_NAME	"microread"
+
+/* board config platform data for microread */
+struct microread_nfc_platform_data {
+	unsigned int rst_gpio;
+	unsigned int irq_gpio;
+	unsigned int ioh_gpio;
+};
+
+#endif /* _MICROREAD_H */
-- 
cgit v1.2.3


From eaca6eae3e0c41d41fcb9d1d70e00934988dff2e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Nov 2012 23:12:10 -0500
Subject: sanitize rt_sigaction() situation a bit

	Switch from __ARCH_WANT_SYS_RT_SIGACTION to opposite
(!CONFIG_ODD_RT_SIGACTION); the only two architectures that
need it are alpha and sparc.  The reason for use of CONFIG_...
instead of __ARCH_... is that it's needed only kernel-side
and doing it that way avoids a mess with include order on many
architectures.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig                            |  5 +++++
 arch/alpha/Kconfig                      |  1 +
 arch/ia64/include/asm/unistd.h          |  4 ----
 arch/powerpc/include/asm/syscalls.h     |  3 ---
 arch/sparc/Kconfig                      |  1 +
 arch/sparc/kernel/sys_sparc_32.c        | 11 +++++------
 arch/x86/um/shared/sysdep/syscalls_32.h |  5 -----
 arch/xtensa/include/asm/syscall.h       |  5 -----
 include/asm-generic/syscalls.h          |  5 -----
 include/linux/syscalls.h                |  6 ++++++
 kernel/signal.c                         |  4 ++--
 11 files changed, 20 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 7f8f281f2585..6e4c32a5a358 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -373,4 +373,9 @@ config CLONE_BACKWARDS2
 	help
 	  Architecture has the first two arguments of clone(2) swapped.
 
+config ODD_RT_SIGACTION
+	bool
+	help
+	  Architecture has unusual rt_sigaction(2) arguments
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9d5904cc7712..8696c03a9d73 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -23,6 +23,7 @@ config ALPHA
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select GENERIC_SIGALTSTACK
+	select ODD_RT_SIGACTION
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index c3cc42a15af1..bfbb109458be 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -49,10 +49,6 @@ asmlinkage unsigned long sys_mmap2(
 struct pt_regs;
 struct sigaction;
 asmlinkage long sys_ia64_pipe(void);
-asmlinkage long sys_rt_sigaction(int sig,
-				 const struct sigaction __user *act,
-				 struct sigaction __user *oact,
-				 size_t sigsetsize);
 
 /*
  * "Conditional" syscalls
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index b5308d3e6d39..5c51659e61d5 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -19,9 +19,6 @@ asmlinkage unsigned long sys_mmap2(unsigned long addr, size_t len,
 		unsigned long fd, unsigned long pgoff);
 asmlinkage long sys_pipe(int __user *fildes);
 asmlinkage long sys_pipe2(int __user *fildes, int flags);
-asmlinkage long sys_rt_sigaction(int sig,
-		const struct sigaction __user *act,
-		struct sigaction __user *oact, size_t sigsetsize);
 asmlinkage long ppc64_personality(unsigned long personality);
 asmlinkage int ppc_rtas(struct rtas_args __user *uargs);
 asmlinkage time_t sys64_time(time_t __user * tloc);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 9f2edb5c5551..89dde2f0653a 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -41,6 +41,7 @@ config SPARC
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
+	select ODD_RT_SIGACTION
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 2da0bdcae52f..cdd2d7035930 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -197,12 +197,11 @@ sparc_sigaction (int sig, const struct old_sigaction __user *act,
 	return ret;
 }
 
-asmlinkage long
-sys_rt_sigaction(int sig,
-		 const struct sigaction __user *act,
-		 struct sigaction __user *oact,
-		 void __user *restorer,
-		 size_t sigsetsize)
+SYSCALL_DEFINE5(rt_sigaction, int, sig,
+		 const struct sigaction __user *, act,
+		 struct sigaction __user *, oact,
+		 void __user *, restorer,
+		 size_t, sigsetsize)
 {
 	struct k_sigaction new_ka, old_ka;
 	int ret;
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 8436079be914..68fd2cf526fd 100644
--- a/arch/x86/um/shared/sysdep/syscalls_32.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -8,11 +8,6 @@
 
 typedef long syscall_handler_t(struct pt_regs);
 
-/* Not declared on x86, incompatible declarations on x86_64, so these have
- * to go here rather than in sys_call_table.c
- */
-extern syscall_handler_t sys_rt_sigaction;
-
 extern syscall_handler_t *sys_call_table[];
 
 #define EXECUTE_SYSCALL(syscall, regs) \
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 8d5e47fad095..6cf7c6c07a84 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -9,15 +9,10 @@
  */
 
 struct pt_regs;
-struct sigaction;
 asmlinkage long xtensa_ptrace(long, long, long, long);
 asmlinkage long xtensa_sigreturn(struct pt_regs*);
 asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
 asmlinkage long xtensa_sigaltstack(struct pt_regs *regs);
-asmlinkage long sys_rt_sigaction(int,
-				 const struct sigaction __user *,
-				 struct sigaction __user *,
-				 size_t);
 asmlinkage long xtensa_shmat(int, char __user *, int);
 asmlinkage long xtensa_fadvise64_64(int, int,
 				    unsigned long long, unsigned long long);
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 1db51b8524e9..6a8d620a84d4 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -36,9 +36,4 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs);
 asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
 #endif
 
-#ifndef sys_rt_sigaction
-asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act,
-			 struct sigaction __user *oact, size_t sigsetsize);
-#endif
-
 #endif /* __ASM_GENERIC_SYSCALLS_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 45e2db270255..75defcd1c276 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -377,6 +377,12 @@ asmlinkage long sys_init_module(void __user *umod, unsigned long len,
 asmlinkage long sys_delete_module(const char __user *name_user,
 				unsigned int flags);
 
+#ifndef CONFIG_ODD_RT_SIGACTION
+asmlinkage long sys_rt_sigaction(int,
+				 const struct sigaction __user *,
+				 struct sigaction __user *,
+				 size_t);
+#endif
 asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set,
 				sigset_t __user *oset, size_t sigsetsize);
 asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize);
diff --git a/kernel/signal.c b/kernel/signal.c
index 53cd5c4d1172..00cd1ce998be 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3231,7 +3231,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
 }
 #endif /* __ARCH_WANT_SYS_SIGPROCMASK */
 
-#ifdef __ARCH_WANT_SYS_RT_SIGACTION
+#ifndef CONFIG_ODD_RT_SIGACTION
 /**
  *  sys_rt_sigaction - alter an action taken by a process
  *  @sig: signal to be sent
@@ -3265,7 +3265,7 @@ SYSCALL_DEFINE4(rt_sigaction, int, sig,
 out:
 	return ret;
 }
-#endif /* __ARCH_WANT_SYS_RT_SIGACTION */
+#endif /* !CONFIG_ODD_RT_SIGACTION */
 
 #ifdef __ARCH_WANT_SYS_SGETMASK
 
-- 
cgit v1.2.3


From 322a56cb1fcbe228eee5cdb8a9c6df9f797d998c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 25 Dec 2012 13:32:58 -0500
Subject: generic compat_sys_rt_sigprocmask()

conditional on GENERIC_COMPAT_RT_SIGPROCMASK; by the end of that series
it will become the same thing as COMPAT and conditional will die out.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig           |  3 +++
 include/linux/compat.h |  8 +++++++-
 kernel/compat.c        | 13 ++++++++++++-
 kernel/signal.c        | 41 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 6e4c32a5a358..374a68adbf1f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -359,6 +359,9 @@ config MODULES_USE_ELF_REL
 config GENERIC_SIGALTSTACK
 	bool
 
+config GENERIC_COMPAT_RT_SIGPROCMASK
+	bool
+
 #
 # ABI hall of shame
 #
diff --git a/include/linux/compat.h b/include/linux/compat.h
index dec7e2d18875..9d3c2a98d537 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -401,7 +401,8 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 
 extern int compat_printk(const char *fmt, ...);
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+extern void sigset_from_compat(sigset_t *set, const compat_sigset_t *compat);
+extern void sigset_to_compat(compat_sigset_t *compat, const sigset_t *set);
 
 asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
 		compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
@@ -592,6 +593,11 @@ asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
 		struct compat_timespec __user *uts, compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset,
 					 compat_size_t sigsetsize);
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPROCMASK
+asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set,
+					  compat_sigset_t __user *oset,
+					  compat_size_t sigsetsize);
+#endif
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 				 unsigned long arg);
diff --git a/kernel/compat.c b/kernel/compat.c
index 0c07d435254f..e2a9c4785988 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -971,7 +971,7 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 }
 
 void
-sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
+sigset_from_compat(sigset_t *set, const compat_sigset_t *compat)
 {
 	switch (_NSIG_WORDS) {
 	case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
@@ -982,6 +982,17 @@ sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
 }
 EXPORT_SYMBOL_GPL(sigset_from_compat);
 
+void
+sigset_to_compat(compat_sigset_t *compat, const sigset_t *set)
+{
+	switch (_NSIG_WORDS) {
+	case 4: compat->sig[7] = (set->sig[3] >> 32); compat->sig[6] = set->sig[3];
+	case 3: compat->sig[5] = (set->sig[2] >> 32); compat->sig[4] = set->sig[2];
+	case 2: compat->sig[3] = (set->sig[1] >> 32); compat->sig[2] = set->sig[1];
+	case 1: compat->sig[1] = (set->sig[0] >> 32); compat->sig[0] = set->sig[0];
+	}
+}
+
 asmlinkage long
 compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 		struct compat_siginfo __user *uinfo,
diff --git a/kernel/signal.c b/kernel/signal.c
index 4a0934e03d5c..bce1222b7315 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2613,6 +2613,47 @@ SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset,
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPROCMASK
+COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
+		compat_sigset_t __user *, oset, compat_size_t, sigsetsize)
+{
+#ifdef __BIG_ENDIAN
+	sigset_t old_set = current->blocked;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (nset) {
+		compat_sigset_t new32;
+		sigset_t new_set;
+		int error;
+		if (copy_from_user(&new32, nset, sizeof(compat_sigset_t)))
+			return -EFAULT;
+
+		sigset_from_compat(&new_set, &new32);
+		sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
+
+		error = sigprocmask(how, &new_set, NULL);
+		if (error)
+			return error;
+	}
+	if (oset) {
+		compat_sigset_t old32;
+		sigset_to_compat(&old32, &old_set);
+		if (copy_to_user(oset, &old_set, sizeof(sigset_t)))
+			return -EFAULT;
+	}
+	return 0;
+#else
+	return sys_rt_sigprocmask(how, (sigset_t __user *)nset,
+				  (sigset_t __user *)oset, sigsetsize);
+#endif
+}
+#endif
+#endif
+
 long do_sigpending(void __user *set, unsigned long sigsetsize)
 {
 	long error = -EINVAL;
-- 
cgit v1.2.3


From fe9c1db2cfc363cd30ecfe6480481b280abf8c0a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 25 Dec 2012 14:31:38 -0500
Subject: generic compat_sys_rt_sigpending()

conditional on GENERIC_COMPAT_RT_SIGPENDING; by the end of that series
it will become the same thing as COMPAT and conditional will die out.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig           |  3 +++
 include/linux/compat.h |  4 ++++
 include/linux/signal.h |  1 -
 kernel/signal.c        | 52 +++++++++++++++++++++++++++++++++-----------------
 4 files changed, 42 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 374a68adbf1f..18c0383dcc42 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -362,6 +362,9 @@ config GENERIC_SIGALTSTACK
 config GENERIC_COMPAT_RT_SIGPROCMASK
 	bool
 
+config GENERIC_COMPAT_RT_SIGPENDING
+	bool
+
 #
 # ABI hall of shame
 #
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 9d3c2a98d537..75548a43a1c5 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -598,6 +598,10 @@ asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set,
 					  compat_sigset_t __user *oset,
 					  compat_size_t sigsetsize);
 #endif
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPENDING
+asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset,
+					 compat_size_t sigsetsize);
+#endif
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 				 unsigned long arg);
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 0a89ffc48466..786bd99fde65 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -243,7 +243,6 @@ extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
 				 siginfo_t *info);
-extern long do_sigpending(void __user *, unsigned long);
 extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
 				const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
diff --git a/kernel/signal.c b/kernel/signal.c
index bce1222b7315..3040c349b0e1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2654,28 +2654,19 @@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
 #endif
 #endif
 
-long do_sigpending(void __user *set, unsigned long sigsetsize)
+static int do_sigpending(void *set, unsigned long sigsetsize)
 {
-	long error = -EINVAL;
-	sigset_t pending;
-
 	if (sigsetsize > sizeof(sigset_t))
-		goto out;
+		return -EINVAL;
 
 	spin_lock_irq(&current->sighand->siglock);
-	sigorsets(&pending, &current->pending.signal,
+	sigorsets(set, &current->pending.signal,
 		  &current->signal->shared_pending.signal);
 	spin_unlock_irq(&current->sighand->siglock);
 
 	/* Outside the lock because only this thread touches it.  */
-	sigandsets(&pending, &current->blocked, &pending);
-
-	error = -EFAULT;
-	if (!copy_to_user(set, &pending, sigsetsize))
-		error = 0;
-
-out:
-	return error;
+	sigandsets(set, &current->blocked, set);
+	return 0;
 }
 
 /**
@@ -2684,10 +2675,37 @@ out:
  *  @set: stores pending signals
  *  @sigsetsize: size of sigset_t type or larger
  */
-SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
+SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
+{
+	sigset_t set;
+	int err = do_sigpending(&set, sigsetsize);
+	if (!err && copy_to_user(uset, &set, sigsetsize))
+		err = -EFAULT;
+	return err;
+}
+
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPENDING
+COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
+		compat_size_t, sigsetsize)
 {
-	return do_sigpending(set, sigsetsize);
+#ifdef __BIG_ENDIAN
+	sigset_t set;
+	int err = do_sigpending(&set, sigsetsize);
+	if (!err) {
+		compat_sigset_t set32;
+		sigset_to_compat(&set32, &set);
+		/* we can get here only if sigsetsize <= sizeof(set) */
+		if (copy_to_user(uset, &set32, sigsetsize))
+			err = -EFAULT;
+	}
+	return err;
+#else
+	return sys_rt_sigpending((sigset_t __user *)uset, sigsetsize);
+#endif
 }
+#endif
+#endif
 
 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
 
@@ -3216,7 +3234,7 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
  */
 SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
 {
-	return do_sigpending(set, sizeof(*set));
+	return sys_rt_sigpending((sigset_t __user *)set, sizeof(old_sigset_t)); 
 }
 
 #endif
-- 
cgit v1.2.3


From 75907d4d7bc5d79b82d1453d9689efc588de1b43 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 25 Dec 2012 15:19:12 -0500
Subject: generic compat_sys_rt_sigqueueinfo()

conditional on GENERIC_COMPAT_RT_SIGQUEUEINFO; by the end of that series
it will become the same thing as COMPAT and conditional will die out.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig           |  3 +++
 include/linux/compat.h |  4 ++++
 kernel/signal.c        | 45 ++++++++++++++++++++++++++++++++-------------
 3 files changed, 39 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 18c0383dcc42..c612b5ccfd84 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -365,6 +365,9 @@ config GENERIC_COMPAT_RT_SIGPROCMASK
 config GENERIC_COMPAT_RT_SIGPENDING
 	bool
 
+config GENERIC_COMPAT_RT_SIGQUEUEINFO
+	bool
+
 #
 # ABI hall of shame
 #
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 75548a43a1c5..bbee15ef3ae9 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -602,6 +602,10 @@ asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set,
 asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset,
 					 compat_size_t sigsetsize);
 #endif
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGQUEUEINFO
+asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig,
+				struct compat_siginfo __user *uinfo);
+#endif
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 				 unsigned long arg);
diff --git a/kernel/signal.c b/kernel/signal.c
index 3040c349b0e1..6cd3023cc66b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2983,6 +2983,22 @@ SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
 	return do_tkill(0, pid, sig);
 }
 
+static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info)
+{
+	/* Not even root can pretend to send signals from the kernel.
+	 * Nor can they impersonate a kill()/tgkill(), which adds source info.
+	 */
+	if (info->si_code >= 0 || info->si_code == SI_TKILL) {
+		/* We used to allow any < 0 si_code */
+		WARN_ON_ONCE(info->si_code < 0);
+		return -EPERM;
+	}
+	info->si_signo = sig;
+
+	/* POSIX.1b doesn't mention process groups.  */
+	return kill_proc_info(sig, info, pid);
+}
+
 /**
  *  sys_rt_sigqueueinfo - send signal information to a signal
  *  @pid: the PID of the thread
@@ -2993,23 +3009,26 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
 		siginfo_t __user *, uinfo)
 {
 	siginfo_t info;
-
 	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
 		return -EFAULT;
+	return do_rt_sigqueueinfo(pid, sig, &info);
+}
 
-	/* Not even root can pretend to send signals from the kernel.
-	 * Nor can they impersonate a kill()/tgkill(), which adds source info.
-	 */
-	if (info.si_code >= 0 || info.si_code == SI_TKILL) {
-		/* We used to allow any < 0 si_code */
-		WARN_ON_ONCE(info.si_code < 0);
-		return -EPERM;
-	}
-	info.si_signo = sig;
-
-	/* POSIX.1b doesn't mention process groups.  */
-	return kill_proc_info(sig, &info, pid);
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGQUEUEINFO
+COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
+			compat_pid_t, pid,
+			int, sig,
+			struct compat_siginfo __user *, uinfo)
+{
+	siginfo_t info;
+	int ret = copy_siginfo_from_user32(&info, uinfo);
+	if (unlikely(ret))
+		return ret;
+	return do_rt_sigqueueinfo(pid, sig, &info);
 }
+#endif
+#endif
 
 long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
 {
-- 
cgit v1.2.3


From 0a0e8cdf734ce723bfc4ca6032ffbc03ce17c642 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 25 Dec 2012 16:04:12 -0500
Subject: old sigsuspend variants in kernel/signal.c

conditional on OLD_SIGSUSPEND/OLD_SIGSUSPEND3, depending on which
variety of that fossil is needed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig             | 10 ++++++++++
 include/linux/syscalls.h |  8 ++++++++
 kernel/signal.c          | 17 +++++++++++++++++
 3 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index c612b5ccfd84..6b1df95ffefc 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -387,4 +387,14 @@ config ODD_RT_SIGACTION
 	help
 	  Architecture has unusual rt_sigaction(2) arguments
 
+config OLD_SIGSUSPEND
+	bool
+	help
+	  Architecture has old sigsuspend(2) syscall, of one-argument variety
+
+config OLD_SIGSUSPEND3
+	bool
+	help
+	  Even weirder antique ABI - three-argument sigsuspend(2)
+
 source "kernel/gcov/Kconfig"
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 75defcd1c276..d2dd2f63d220 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -377,6 +377,14 @@ asmlinkage long sys_init_module(void __user *umod, unsigned long len,
 asmlinkage long sys_delete_module(const char __user *name_user,
 				unsigned int flags);
 
+#ifdef CONFIG_OLD_SIGSUSPEND
+asmlinkage long sys_sigsuspend(old_sigset_t mask);
+#endif
+
+#ifdef CONFIG_OLD_SIGSUSPEND3
+asmlinkage long sys_sigsuspend(int unused1, int unused2, old_sigset_t mask);
+#endif
+
 #ifndef CONFIG_ODD_RT_SIGACTION
 asmlinkage long sys_rt_sigaction(int,
 				 const struct sigaction __user *,
diff --git a/kernel/signal.c b/kernel/signal.c
index 6cd3023cc66b..93fd4b83d866 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3454,6 +3454,23 @@ COMPAT_SYSCALL_DEFINE2(rt_sigsuspend, compat_sigset_t __user *, unewset, compat_
 #endif
 #endif
 
+#ifdef CONFIG_OLD_SIGSUSPEND
+SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask)
+{
+	sigset_t blocked;
+	siginitset(&blocked, mask);
+	return sigsuspend(&blocked);
+}
+#endif
+#ifdef CONFIG_OLD_SIGSUSPEND3
+SYSCALL_DEFINE3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask)
+{
+	sigset_t blocked;
+	siginitset(&blocked, mask);
+	return sigsuspend(&blocked);
+}
+#endif
+
 __attribute__((weak)) const char *arch_vma_name(struct vm_area_struct *vma)
 {
 	return NULL;
-- 
cgit v1.2.3


From 9aae8fc05d2d130797be436eb7cae29c32710193 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Dec 2012 23:12:04 -0500
Subject: switch rt_tgsigqueueinfo to COMPAT_SYSCALL_DEFINE

C ABI violations on sparc, ppc and mips

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/signal.h |  2 --
 kernel/compat.c        | 11 -----------
 kernel/signal.c        | 17 ++++++++++++++++-
 3 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 786bd99fde65..ed1e71f1aac7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -241,8 +241,6 @@ extern int do_send_sig_info(int sig, struct siginfo *info,
 				struct task_struct *p, bool group);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
-extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
-				 siginfo_t *info);
 extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
 				const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
diff --git a/kernel/compat.c b/kernel/compat.c
index a53b04a2b5eb..cf75a288f0c0 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -1025,17 +1025,6 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
 	return ret;
 }
 
-asmlinkage long
-compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
-			     struct compat_siginfo __user *uinfo)
-{
-	siginfo_t info;
-
-	if (copy_siginfo_from_user32(&info, uinfo))
-		return -EFAULT;
-	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
-}
-
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
 /* compat_time_t is a 32 bit "long" and needs to get converted. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 93fd4b83d866..5a4aed1244fb 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3030,7 +3030,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
 #endif
 #endif
 
-long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
 {
 	/* This is only valid for single tasks */
 	if (pid <= 0 || tgid <= 0)
@@ -3060,6 +3060,21 @@ SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
 	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
+			compat_pid_t, tgid,
+			compat_pid_t, pid,
+			int, sig,
+			struct compat_siginfo __user *, uinfo)
+{
+	siginfo_t info;
+
+	if (copy_siginfo_from_user32(&info, uinfo))
+		return -EFAULT;
+	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+#endif
+
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct task_struct *t = current;
-- 
cgit v1.2.3


From 92a3ce4a1e0047215aa0a0b30cc333bd32b866a8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Nov 2012 21:20:05 -0500
Subject: consolidate declarations of k_sigaction

Only alpha and sparc are unusual - they have ka_restorer in it.
And nobody needs that exposed to userland.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/signal.h        | 5 +----
 arch/arm/include/asm/signal.h          | 4 ----
 arch/avr32/include/asm/signal.h        | 4 ----
 arch/cris/include/asm/signal.h         | 3 ---
 arch/h8300/include/asm/signal.h        | 4 ----
 arch/ia64/include/asm/signal.h         | 4 ----
 arch/m32r/include/asm/signal.h         | 3 ---
 arch/m68k/include/asm/signal.h         | 3 ---
 arch/mips/include/uapi/asm/signal.h    | 4 ----
 arch/mn10300/include/asm/signal.h      | 3 ---
 arch/parisc/include/asm/signal.h       | 4 ----
 arch/powerpc/include/uapi/asm/signal.h | 4 ----
 arch/s390/include/asm/signal.h         | 4 ----
 arch/sparc/include/asm/signal.h        | 5 +----
 arch/x86/include/asm/signal.h          | 5 -----
 arch/x86/include/uapi/asm/signal.h     | 4 ----
 arch/xtensa/include/asm/signal.h       | 4 ----
 include/linux/signal.h                 | 7 +++++++
 include/uapi/asm-generic/signal.h      | 4 ----
 19 files changed, 9 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h
index 8a1ac28cd562..8c06bd1dfffc 100644
--- a/arch/alpha/include/asm/signal.h
+++ b/arch/alpha/include/asm/signal.h
@@ -28,9 +28,6 @@ struct sigaction {
 	sigset_t	sa_mask;	/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-	__sigrestore_t ka_restorer;
-};
+#define __ARCH_HAS_KA_RESTORER
 #include <asm/sigcontext.h>
 #endif
diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h
index 9a0ea6ab988f..58057023ff60 100644
--- a/arch/arm/include/asm/signal.h
+++ b/arch/arm/include/asm/signal.h
@@ -30,9 +30,5 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 #include <asm/sigcontext.h>
 #endif
diff --git a/arch/avr32/include/asm/signal.h b/arch/avr32/include/asm/signal.h
index 9326d182e9e5..c8858f0cef3d 100644
--- a/arch/avr32/include/asm/signal.h
+++ b/arch/avr32/include/asm/signal.h
@@ -30,10 +30,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h
index c0cb1fd4644c..b0cd904ecd8a 100644
--- a/arch/cris/include/asm/signal.h
+++ b/arch/cris/include/asm/signal.h
@@ -30,9 +30,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
 #include <asm/sigcontext.h>
 
 #endif
diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h
index 66c81c67e55d..c05f937bb492 100644
--- a/arch/h8300/include/asm/signal.h
+++ b/arch/h8300/include/asm/signal.h
@@ -30,10 +30,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h
index 3a1b20e74c5c..a0d5f00ec8db 100644
--- a/arch/ia64/include/asm/signal.h
+++ b/arch/ia64/include/asm/signal.h
@@ -32,10 +32,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 #  include <asm/sigcontext.h>
 
 # endif /* !__ASSEMBLY__ */
diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h
index a5ba4a217fb9..4699405f9f82 100644
--- a/arch/m32r/include/asm/signal.h
+++ b/arch/m32r/include/asm/signal.h
@@ -23,9 +23,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
 #include <asm/sigcontext.h>
 
 #undef __HAVE_ARCH_SIG_BITOPS
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 9c8c46b06b0c..1edd5f358c0f 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -30,9 +30,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
 #include <asm/sigcontext.h>
 
 #ifndef CONFIG_CPU_HAS_NO_BITFIELDS
diff --git a/arch/mips/include/uapi/asm/signal.h b/arch/mips/include/uapi/asm/signal.h
index 770732cb8d03..3c85fa07b3d3 100644
--- a/arch/mips/include/uapi/asm/signal.h
+++ b/arch/mips/include/uapi/asm/signal.h
@@ -102,10 +102,6 @@ struct sigaction {
 	sigset_t	sa_mask;
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 /* IRIX compatible stack_t  */
 typedef struct sigaltstack {
 	void __user *ss_sp;
diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h
index d280e9780793..d6f06540acb0 100644
--- a/arch/mn10300/include/asm/signal.h
+++ b/arch/mn10300/include/asm/signal.h
@@ -40,9 +40,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
 #include <asm/sigcontext.h>
 
 #endif /* _ASM_SIGNAL_H */
diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h
index 0fdb3c835952..e42e05d69a1d 100644
--- a/arch/parisc/include/asm/signal.h
+++ b/arch/parisc/include/asm/signal.h
@@ -30,10 +30,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 #include <asm/sigcontext.h>
 
 #endif /* !__ASSEMBLY */
diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h
index e079fb39d5bc..a1a624699292 100644
--- a/arch/powerpc/include/uapi/asm/signal.h
+++ b/arch/powerpc/include/uapi/asm/signal.h
@@ -104,10 +104,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 typedef struct sigaltstack {
 	void __user *ss_sp;
 	int ss_flags;
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
index db7ddfaf5b79..89853592e492 100644
--- a/arch/s390/include/asm/signal.h
+++ b/arch/s390/include/asm/signal.h
@@ -35,8 +35,4 @@ struct sigaction {
         sigset_t sa_mask;               /* mask last for extensibility */
 };
 
-struct k_sigaction {
-        struct sigaction sa;
-};
-
 #endif
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index 77b85850d543..e1881856a55c 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -21,10 +21,7 @@
  */
 #define SA_STATIC_ALLOC         0x8000
 
-struct k_sigaction {
-	struct			__new_sigaction sa;
-	void			__user *ka_restorer;
-};
+#define __ARCH_HAS_KA_RESTORER
 
 #endif /* !(__ASSEMBLY__) */
 #endif /* !(__SPARC_SIGNAL_H) */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 216bf364a7e7..e7cf5005931a 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -46,11 +46,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
-#else /* __i386__ */
 #endif /* !__i386__ */
 #include <asm/sigcontext.h>
 
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index aa7d6ae39e0e..e52443fc026b 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -122,10 +122,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 #endif /* !__i386__ */
 
 typedef struct sigaltstack {
diff --git a/arch/xtensa/include/asm/signal.h b/arch/xtensa/include/asm/signal.h
index 6f586bd90e18..fd63b8f46a4b 100644
--- a/arch/xtensa/include/asm/signal.h
+++ b/arch/xtensa/include/asm/signal.h
@@ -22,10 +22,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 #include <asm/sigcontext.h>
 
 #endif	/* __ASSEMBLY__ */
diff --git a/include/linux/signal.h b/include/linux/signal.h
index ed1e71f1aac7..01451a156ff7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -249,6 +249,13 @@ extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 extern int sigsuspend(sigset_t *);
 
+struct k_sigaction {
+	struct sigaction sa;
+#ifdef __ARCH_HAS_KA_RESTORER
+	__sigrestore_t ka_restorer;
+#endif
+};
+
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h
index 6fae30fd16ab..21e59f36c61b 100644
--- a/include/uapi/asm-generic/signal.h
+++ b/include/uapi/asm-generic/signal.h
@@ -102,10 +102,6 @@ struct sigaction {
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
 
-struct k_sigaction {
-	struct sigaction sa;
-};
-
 typedef struct sigaltstack {
 	void __user *ss_sp;
 	int ss_flags;
-- 
cgit v1.2.3


From 574c4866e33d648520a8bd5bf6f573ea6e554e88 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Nov 2012 22:24:19 -0500
Subject: consolidate kernel-side struct sigaction declarations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/signal.h        |  6 ------
 arch/arm/include/asm/signal.h          |  7 +------
 arch/avr32/include/asm/signal.h        |  7 +------
 arch/cris/include/asm/signal.h         |  7 +------
 arch/h8300/include/asm/signal.h        |  7 +------
 arch/ia64/include/asm/signal.h         |  6 ------
 arch/ia64/include/asm/unistd.h         |  1 -
 arch/m32r/include/asm/signal.h         |  8 +-------
 arch/m68k/include/asm/signal.h         |  7 +------
 arch/mips/include/asm/signal.h         |  2 ++
 arch/mips/include/uapi/asm/signal.h    |  2 ++
 arch/mn10300/include/asm/signal.h      |  7 +------
 arch/parisc/include/asm/signal.h       |  2 ++
 arch/powerpc/include/asm/signal.h      |  1 +
 arch/powerpc/include/asm/syscalls.h    |  1 -
 arch/powerpc/include/uapi/asm/signal.h |  2 ++
 arch/s390/include/asm/signal.h         |  8 +-------
 arch/sparc/include/asm/signal.h        |  1 +
 arch/sparc/include/uapi/asm/signal.h   |  2 ++
 arch/sparc/kernel/systbls.h            |  2 +-
 arch/x86/include/asm/signal.h          | 10 +++-------
 arch/x86/include/uapi/asm/signal.h     |  4 ++--
 arch/xtensa/include/asm/signal.h       |  7 +------
 include/linux/signal.h                 | 14 ++++++++++++++
 include/linux/syscalls.h               |  2 +-
 include/uapi/asm-generic/signal.h      |  6 ++++++
 26 files changed, 48 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h
index 8c06bd1dfffc..963f0494dca7 100644
--- a/arch/alpha/include/asm/signal.h
+++ b/arch/alpha/include/asm/signal.h
@@ -22,12 +22,6 @@ struct osf_sigaction {
 	int		sa_flags;
 };
 
-struct sigaction {
-	__sighandler_t	sa_handler;
-	unsigned long	sa_flags;
-	sigset_t	sa_mask;	/* mask last for extensibility */
-};
-
 #define __ARCH_HAS_KA_RESTORER
 #include <asm/sigcontext.h>
 #endif
diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h
index 58057023ff60..a5076b9bd463 100644
--- a/arch/arm/include/asm/signal.h
+++ b/arch/arm/include/asm/signal.h
@@ -23,12 +23,7 @@ struct old_sigaction {
 	__sigrestore_t sa_restorer;
 };
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
+#define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
 #endif
diff --git a/arch/avr32/include/asm/signal.h b/arch/avr32/include/asm/signal.h
index c8858f0cef3d..d875eb6a3f3c 100644
--- a/arch/avr32/include/asm/signal.h
+++ b/arch/avr32/include/asm/signal.h
@@ -23,12 +23,7 @@ typedef struct {
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
+#define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h
index b0cd904ecd8a..b3650ab2c320 100644
--- a/arch/cris/include/asm/signal.h
+++ b/arch/cris/include/asm/signal.h
@@ -23,12 +23,7 @@ struct old_sigaction {
 	void (*sa_restorer)(void);
 };
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
+#define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
 
diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h
index c05f937bb492..9b18a0959461 100644
--- a/arch/h8300/include/asm/signal.h
+++ b/arch/h8300/include/asm/signal.h
@@ -23,12 +23,7 @@ struct old_sigaction {
 	void (*sa_restorer)(void);
 };
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
+#define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h
index a0d5f00ec8db..c62afa4a0dc2 100644
--- a/arch/ia64/include/asm/signal.h
+++ b/arch/ia64/include/asm/signal.h
@@ -26,12 +26,6 @@ typedef struct {
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
-
 #  include <asm/sigcontext.h>
 
 # endif /* !__ASSEMBLY__ */
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index bfbb109458be..c827049eb62c 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -47,7 +47,6 @@ asmlinkage unsigned long sys_mmap2(
 				int prot, int flags,
 				int fd, long pgoff);
 struct pt_regs;
-struct sigaction;
 asmlinkage long sys_ia64_pipe(void);
 
 /*
diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h
index 4699405f9f82..ed3ded6601e8 100644
--- a/arch/m32r/include/asm/signal.h
+++ b/arch/m32r/include/asm/signal.h
@@ -16,13 +16,7 @@ typedef struct {
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
-
+#define __ARCH_HAS_SA_RESTORER
 #include <asm/sigcontext.h>
 
 #undef __HAVE_ARCH_SIG_BITOPS
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 1edd5f358c0f..c7b4fb1fa14d 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -23,12 +23,7 @@ struct old_sigaction {
 	__sigrestore_t sa_restorer;
 };
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
+#define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
 
diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h
index cf4a08062d1d..197f6367c201 100644
--- a/arch/mips/include/asm/signal.h
+++ b/arch/mips/include/asm/signal.h
@@ -21,4 +21,6 @@
 #include <asm/sigcontext.h>
 #include <asm/siginfo.h>
 
+#define __ARCH_HAS_ODD_SIGACTION
+
 #endif /* _ASM_SIGNAL_H */
diff --git a/arch/mips/include/uapi/asm/signal.h b/arch/mips/include/uapi/asm/signal.h
index 3c85fa07b3d3..6783c887a678 100644
--- a/arch/mips/include/uapi/asm/signal.h
+++ b/arch/mips/include/uapi/asm/signal.h
@@ -96,11 +96,13 @@ typedef unsigned long old_sigset_t;		/* at least 32 bits */
 
 #include <asm-generic/signal-defs.h>
 
+#ifndef __KERNEL__
 struct sigaction {
 	unsigned int	sa_flags;
 	__sighandler_t	sa_handler;
 	sigset_t	sa_mask;
 };
+#endif
 
 /* IRIX compatible stack_t  */
 typedef struct sigaltstack {
diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h
index d6f06540acb0..288ade5ec94e 100644
--- a/arch/mn10300/include/asm/signal.h
+++ b/arch/mn10300/include/asm/signal.h
@@ -33,12 +33,7 @@ struct old_sigaction {
 	__sigrestore_t sa_restorer;
 };
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
+#define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
 
diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h
index e42e05d69a1d..c8e4ec5a6582 100644
--- a/arch/parisc/include/asm/signal.h
+++ b/arch/parisc/include/asm/signal.h
@@ -24,11 +24,13 @@ typedef struct {
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
+#ifndef __KERNEL__
 struct sigaction {
 	__sighandler_t sa_handler;
 	unsigned long sa_flags;
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#endif
 
 #include <asm/sigcontext.h>
 
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index a101637725a2..fbe66c463891 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_POWERPC_SIGNAL_H
 #define _ASM_POWERPC_SIGNAL_H
 
+#define __ARCH_HAS_SA_RESTORER
 #include <uapi/asm/signal.h>
 
 #endif /* _ASM_POWERPC_SIGNAL_H */
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index 5c51659e61d5..21936530df08 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -9,7 +9,6 @@
 
 struct pt_regs;
 struct rtas_args;
-struct sigaction;
 
 asmlinkage unsigned long sys_mmap(unsigned long addr, size_t len,
 		unsigned long prot, unsigned long flags,
diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h
index a1a624699292..6defdd65594e 100644
--- a/arch/powerpc/include/uapi/asm/signal.h
+++ b/arch/powerpc/include/uapi/asm/signal.h
@@ -97,12 +97,14 @@ struct old_sigaction {
 	__sigrestore_t sa_restorer;
 };
 
+#ifndef __KERNEL__
 struct sigaction {
 	__sighandler_t sa_handler;
 	unsigned long sa_flags;
 	__sigrestore_t sa_restorer;
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#endif
 
 typedef struct sigaltstack {
 	void __user *ss_sp;
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
index 89853592e492..d26e30e31656 100644
--- a/arch/s390/include/asm/signal.h
+++ b/arch/s390/include/asm/signal.h
@@ -28,11 +28,5 @@ struct old_sigaction {
         void (*sa_restorer)(void);
 };
 
-struct sigaction {
-        __sighandler_t sa_handler;
-        unsigned long sa_flags;
-        void (*sa_restorer)(void);
-        sigset_t sa_mask;               /* mask last for extensibility */
-};
-
+#define __ARCH_HAS_SA_RESTORER
 #endif
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index e1881856a55c..c33ce3f2ba84 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -22,6 +22,7 @@
 #define SA_STATIC_ALLOC         0x8000
 
 #define __ARCH_HAS_KA_RESTORER
+#define __ARCH_HAS_SA_RESTORER
 
 #endif /* !(__ASSEMBLY__) */
 #endif /* !(__SPARC_SIGNAL_H) */
diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h
index c4ffd6c97106..284836f0b7dc 100644
--- a/arch/sparc/include/uapi/asm/signal.h
+++ b/arch/sparc/include/uapi/asm/signal.h
@@ -153,12 +153,14 @@ struct sigstack {
 
 #include <asm-generic/signal-defs.h>
 
+#ifndef __KERNEL__
 struct __new_sigaction {
 	__sighandler_t		sa_handler;
 	unsigned long		sa_flags;
 	__sigrestore_t		sa_restorer;  /* not used by Linux/SPARC yet */
 	__new_sigset_t		sa_mask;
 };
+#endif
 
 struct __old_sigaction {
 	__sighandler_t		sa_handler;
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index 118759cd7342..1dd89dbac8d8 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -3,8 +3,8 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <linux/signal.h>
 #include <asm/utrap.h>
-#include <asm/signal.h>
 
 extern asmlinkage unsigned long sys_getpagesize(void);
 extern asmlinkage long sparc_pipe(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index e7cf5005931a..9bda8224f3d5 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,6 +31,9 @@ typedef sigset_t compat_sigset_t;
 #include <uapi/asm/signal.h>
 #ifndef __ASSEMBLY__
 extern void do_notify_resume(struct pt_regs *, void *, __u32);
+
+#define __ARCH_HAS_SA_RESTORER
+
 #ifdef __i386__
 struct old_sigaction {
 	__sighandler_t sa_handler;
@@ -39,13 +42,6 @@ struct old_sigaction {
 	__sigrestore_t sa_restorer;
 };
 
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
-
 #endif /* !__i386__ */
 #include <asm/sigcontext.h>
 
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index e52443fc026b..8264f47cf53e 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -95,9 +95,9 @@ typedef unsigned long sigset_t;
 #ifndef __ASSEMBLY__
 
 
-#ifdef __i386__
 # ifndef __KERNEL__
 /* Here we must cater to libcs that poke about in kernel headers.  */
+#ifdef __i386__
 
 struct sigaction {
 	union {
@@ -112,7 +112,6 @@ struct sigaction {
 #define sa_handler	_u._sa_handler
 #define sa_sigaction	_u._sa_sigaction
 
-# endif /* ! __KERNEL__ */
 #else /* __i386__ */
 
 struct sigaction {
@@ -123,6 +122,7 @@ struct sigaction {
 };
 
 #endif /* !__i386__ */
+# endif /* ! __KERNEL__ */
 
 typedef struct sigaltstack {
 	void __user *ss_sp;
diff --git a/arch/xtensa/include/asm/signal.h b/arch/xtensa/include/asm/signal.h
index fd63b8f46a4b..de169b4eaeef 100644
--- a/arch/xtensa/include/asm/signal.h
+++ b/arch/xtensa/include/asm/signal.h
@@ -15,12 +15,7 @@
 #include <uapi/asm/signal.h>
 
 #ifndef __ASSEMBLY__
-struct sigaction {
-	__sighandler_t sa_handler;
-	unsigned long sa_flags;
-	void (*sa_restorer)(void);
-	sigset_t sa_mask;		/* mask last for extensibility */
-};
+#define __ARCH_HAS_SA_RESTORER
 
 #include <asm/sigcontext.h>
 
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 01451a156ff7..0b6878e882da 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -249,6 +249,20 @@ extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 extern int sigsuspend(sigset_t *);
 
+struct sigaction {
+#ifndef __ARCH_HAS_ODD_SIGACTION
+	__sighandler_t	sa_handler;
+	unsigned long	sa_flags;
+#else
+	unsigned long	sa_flags;
+	__sighandler_t	sa_handler;
+#endif
+#ifdef __ARCH_HAS_SA_RESTORER
+	__sigrestore_t sa_restorer;
+#endif
+	sigset_t	sa_mask;	/* mask last for extensibility */
+};
+
 struct k_sigaction {
 	struct sigaction sa;
 #ifdef __ARCH_HAS_KA_RESTORER
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d2dd2f63d220..1c4938bf901e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -68,11 +68,11 @@ struct sigaltstack;
 #include <linux/types.h>
 #include <linux/aio_abi.h>
 #include <linux/capability.h>
+#include <linux/signal.h>
 #include <linux/list.h>
 #include <linux/bug.h>
 #include <linux/sem.h>
 #include <asm/siginfo.h>
-#include <asm/signal.h>
 #include <linux/unistd.h>
 #include <linux/quota.h>
 #include <linux/key.h>
diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h
index 21e59f36c61b..9df61f1edb0f 100644
--- a/include/uapi/asm-generic/signal.h
+++ b/include/uapi/asm-generic/signal.h
@@ -93,6 +93,11 @@ typedef unsigned long old_sigset_t;
 
 #include <asm-generic/signal-defs.h>
 
+#ifdef SA_RESTORER
+#define __ARCH_HAS_SA_RESTORER
+#endif
+
+#ifndef __KERNEL__
 struct sigaction {
 	__sighandler_t sa_handler;
 	unsigned long sa_flags;
@@ -101,6 +106,7 @@ struct sigaction {
 #endif
 	sigset_t sa_mask;		/* mask last for extensibility */
 };
+#endif
 
 typedef struct sigaltstack {
 	void __user *ss_sp;
-- 
cgit v1.2.3


From 08d32fe504a7670cab3190c624448695adcf70e4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 25 Dec 2012 18:38:15 -0500
Subject: generic sys_compat_rt_sigaction()

Again, protected by a temporary config symbol (GENERIC_COMPAT_RT_SIGACTION);
will be gone by the end of series.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig           |  3 +++
 include/linux/compat.h | 24 ++++++++++++++++++++++++
 kernel/signal.c        | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 6b1df95ffefc..3b4a416fc448 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -368,6 +368,9 @@ config GENERIC_COMPAT_RT_SIGPENDING
 config GENERIC_COMPAT_RT_SIGQUEUEINFO
 	bool
 
+config GENERIC_COMPAT_RT_SIGACTION
+	bool
+
 #
 # ABI hall of shame
 #
diff --git a/include/linux/compat.h b/include/linux/compat.h
index bbee15ef3ae9..0d53ab4f79c5 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -142,6 +142,22 @@ typedef struct {
 	compat_sigset_word	sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
 
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION
+struct compat_sigaction {
+#ifndef __ARCH_HAS_ODD_SIGACTION
+	compat_uptr_t			sa_handler;
+	compat_ulong_t			sa_flags;
+#else
+	compat_ulong_t			sa_flags;
+	compat_uptr_t			sa_handler;
+#endif
+#ifdef __ARCH_HAS_SA_RESTORER
+	compat_uptr_t			sa_restorer;
+#endif
+	compat_sigset_t			sa_mask __packed;
+};
+#endif
+
 /*
  * These functions operate strictly on struct compat_time*
  */
@@ -602,6 +618,14 @@ asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set,
 asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset,
 					 compat_size_t sigsetsize);
 #endif
+#ifndef CONFIG_ODD_RT_SIGACTION
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION
+asmlinkage long compat_sys_rt_sigaction(int,
+				 const struct compat_sigaction __user *,
+				 struct compat_sigaction __user *,
+				 compat_size_t);
+#endif
+#endif
 #ifdef CONFIG_GENERIC_COMPAT_RT_SIGQUEUEINFO
 asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig,
 				struct compat_siginfo __user *uinfo);
diff --git a/kernel/signal.c b/kernel/signal.c
index 5a4aed1244fb..f3665f3de660 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3358,6 +3358,55 @@ SYSCALL_DEFINE4(rt_sigaction, int, sig,
 out:
 	return ret;
 }
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION
+COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
+		const struct compat_sigaction __user *, act,
+		struct compat_sigaction __user *, oact,
+		compat_size_t, sigsetsize)
+{
+	struct k_sigaction new_ka, old_ka;
+	compat_sigset_t mask;
+#ifdef __ARCH_HAS_SA_RESTORER
+	compat_uptr_t restorer;
+#endif
+	int ret;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(compat_sigset_t))
+		return -EINVAL;
+
+	if (act) {
+		compat_uptr_t handler;
+		ret = get_user(handler, &act->sa_handler);
+		new_ka.sa.sa_handler = compat_ptr(handler);
+#ifdef __ARCH_HAS_SA_RESTORER
+		ret |= get_user(restorer, &act->sa_restorer);
+		new_ka.sa.sa_restorer = compat_ptr(restorer);
+#endif
+		ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask));
+		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		if (ret)
+			return -EFAULT;
+		sigset_from_compat(&new_ka.sa.sa_mask, &mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+	if (!ret && oact) {
+		sigset_to_compat(&mask, &old_ka.sa.sa_mask);
+		ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), 
+			       &oact->sa_handler);
+		ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask));
+		ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+#ifdef __ARCH_HAS_SA_RESTORER
+		ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+				&oact->sa_restorer);
+#endif
+	}
+	return ret;
+}
+#endif
+#endif
 #endif /* !CONFIG_ODD_RT_SIGACTION */
 
 #ifdef __ARCH_WANT_SYS_SGETMASK
-- 
cgit v1.2.3


From 495dfbf767553980dbd40a19a96a8ca5fa1be616 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 25 Dec 2012 19:09:45 -0500
Subject: generic sys_sigaction() and compat_sys_sigaction()

conditional on OLD_SIGACTION/COMPAT_OLD_SIGACTION

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/Kconfig             | 11 +++++++
 include/linux/compat.h   | 14 +++++++++
 include/linux/signal.h   |  9 ++++++
 include/linux/syscalls.h |  5 ++++
 kernel/signal.c          | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 3b4a416fc448..e50d3af294d4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -400,4 +400,15 @@ config OLD_SIGSUSPEND3
 	help
 	  Even weirder antique ABI - three-argument sigsuspend(2)
 
+config OLD_SIGACTION
+	bool
+	help
+	  Architecture has old sigaction(2) syscall.  Nope, not the same
+	  as OLD_SIGSUSPEND | OLD_SIGSUSPEND3 - alpha has sigsuspend(2),
+	  but fairly different variant of sigaction(2), thanks to OSF/1
+	  compatibility...
+
+config COMPAT_OLD_SIGACTION
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0d53ab4f79c5..e20b8b404ae9 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -299,6 +299,15 @@ struct compat_robust_list_head {
 	compat_uptr_t			list_op_pending;
 };
 
+#ifdef CONFIG_COMPAT_OLD_SIGACTION
+struct compat_old_sigaction {
+	compat_uptr_t			sa_handler;
+	compat_old_sigset_t		sa_mask;
+	compat_ulong_t			sa_flags;
+	compat_uptr_t			sa_restorer;
+};
+#endif
+
 struct compat_statfs;
 struct compat_statfs64;
 struct compat_old_linux_dirent;
@@ -383,6 +392,11 @@ int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
 				  struct compat_siginfo __user *uinfo);
+#ifdef CONFIG_COMPAT_OLD_SIGACTION
+asmlinkage long compat_sys_sigaction(int sig,
+                                   const struct compat_old_sigaction __user *act,
+                                   struct compat_old_sigaction __user *oact);
+#endif
 
 static inline int compat_timeval_compare(struct compat_timeval *lhs,
 					struct compat_timeval *rhs)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 0b6878e882da..e28e8d455d6e 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -269,6 +269,15 @@ struct k_sigaction {
 	__sigrestore_t ka_restorer;
 #endif
 };
+ 
+#ifdef CONFIG_OLD_SIGACTION
+struct old_sigaction {
+	__sighandler_t sa_handler;
+	old_sigset_t sa_mask;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+};
+#endif
 
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1c4938bf901e..02b045012785 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -385,6 +385,11 @@ asmlinkage long sys_sigsuspend(old_sigset_t mask);
 asmlinkage long sys_sigsuspend(int unused1, int unused2, old_sigset_t mask);
 #endif
 
+#ifdef CONFIG_OLD_SIGACTION
+asmlinkage long sys_sigaction(int, const struct old_sigaction __user *,
+				struct old_sigaction __user *);
+#endif
+
 #ifndef CONFIG_ODD_RT_SIGACTION
 asmlinkage long sys_rt_sigaction(int,
 				 const struct sigaction __user *,
diff --git a/kernel/signal.c b/kernel/signal.c
index f3665f3de660..79998f5b0f11 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3409,6 +3409,84 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
 #endif
 #endif /* !CONFIG_ODD_RT_SIGACTION */
 
+#ifdef CONFIG_OLD_SIGACTION
+SYSCALL_DEFINE3(sigaction, int, sig,
+		const struct old_sigaction __user *, act,
+	        struct old_sigaction __user *, oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) ||
+		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+		    __get_user(mask, &act->sa_mask))
+			return -EFAULT;
+#ifdef __ARCH_HAS_KA_RESTORER
+		new_ka.ka_restorer = NULL;
+#endif
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) ||
+		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+			return -EFAULT;
+	}
+
+	return ret;
+}
+#endif
+#ifdef CONFIG_COMPAT_OLD_SIGACTION
+COMPAT_SYSCALL_DEFINE3(sigaction, int, sig,
+		const struct compat_old_sigaction __user *, act,
+	        struct compat_old_sigaction __user *, oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+	compat_old_sigset_t mask;
+	compat_uptr_t handler, restorer;
+
+	if (act) {
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(handler, &act->sa_handler) ||
+		    __get_user(restorer, &act->sa_restorer) ||
+		    __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+		    __get_user(mask, &act->sa_mask))
+			return -EFAULT;
+
+#ifdef __ARCH_HAS_KA_RESTORER
+		new_ka.ka_restorer = NULL;
+#endif
+		new_ka.sa.sa_handler = compat_ptr(handler);
+		new_ka.sa.sa_restorer = compat_ptr(restorer);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_handler),
+			       &oact->sa_handler) ||
+		    __put_user(ptr_to_compat(old_ka.sa.sa_restorer),
+			       &oact->sa_restorer) ||
+		    __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+		    __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+			return -EFAULT;
+	}
+	return ret;
+}
+#endif
+
 #ifdef __ARCH_WANT_SYS_SGETMASK
 
 /*
-- 
cgit v1.2.3


From f482e1b4a4abf926507a4c1c6317acd3e7aa61b7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 25 Dec 2012 20:24:38 -0500
Subject: switch compat_sys_open* to COMPAT_SYSCALL_DEFINE

---
 fs/compat.c            | 11 ++++-------
 include/linux/compat.h |  2 +-
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index 015e1e1f87c6..f9e2fe3794fa 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1278,8 +1278,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
  * Exactly like fs/open.c:sys_open(), except that it doesn't set the
  * O_LARGEFILE flag.
  */
-asmlinkage long
-compat_sys_open(const char __user *filename, int flags, umode_t mode)
+COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
 {
 	return do_sys_open(AT_FDCWD, filename, flags, mode);
 }
@@ -1288,8 +1287,7 @@ compat_sys_open(const char __user *filename, int flags, umode_t mode)
  * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
  * O_LARGEFILE flag.
  */
-asmlinkage long
-compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
+COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
 {
 	return do_sys_open(dfd, filename, flags, mode);
 }
@@ -1785,9 +1783,8 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
  * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
  * doesn't set the O_LARGEFILE flag.
  */
-asmlinkage long
-compat_sys_open_by_handle_at(int mountdirfd,
-			     struct file_handle __user *handle, int flags)
+COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
+			     struct file_handle __user *, handle, int, flags)
 {
 	return do_handle_open(mountdirfd, handle, flags);
 }
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e20b8b404ae9..8de903587fb9 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -534,7 +534,7 @@ asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
 				    unsigned int nr_segs, unsigned int flags);
 asmlinkage long compat_sys_open(const char __user *filename, int flags,
 				umode_t mode);
-asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
+asmlinkage long compat_sys_openat(int dfd, const char __user *filename,
 				  int flags, umode_t mode);
 asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 					     struct file_handle __user *handle,
-- 
cgit v1.2.3


From ca86b5dce213f52c7538932740f83cafb2c34547 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Nov 2012 15:09:38 -0500
Subject: new helper: get_signal()

On success get_signal_to_deliver() fills k_sigaction and siginfo.
_All_ users pass it addresses of the local variables sitting in
the same function.  Then they proceed to pass those addresses
pretty much in tandem to a bunch of helper functions; again, all
callers of those helpers are passing them such a pair, and one that
had been through get_signal_to_deliver() at that.

The obvious cleanup: introduce a new type that would contain a
<k_sigaction,siginfo> pair (struct ksignal) and begin switching to
using it.  Turns out that it's convenient to store the signal number
in the same object.

New helper, taking that sucker is a wrapper for get_signal_to_deliver();
takes struct ksignal * and returns bool.  On success fills ksignal
with the information for signal handler to be invoked.

For now it's a macro (to avoid header ordering headache), but eventually
it'll be a function in kernel/signal.c, with get_signal_to_deliver()
folded into it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/signal.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index e28e8d455d6e..7c2744198dba 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -279,10 +279,28 @@ struct old_sigaction {
 };
 #endif
 
+struct ksignal {
+	struct k_sigaction ka;
+	siginfo_t info;
+	int sig;
+};
+
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
 extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 
+/*
+ * Eventually that'll replace get_signal_to_deliver(); macro for now,
+ * to avoid nastiness with include order.
+ */
+#define get_signal(ksig)					\
+({								\
+	struct ksignal *p = (ksig);				\
+	p->sig = get_signal_to_deliver(&p->info, &p->ka,	\
+					signal_pt_regs(), NULL);\
+	p->sig > 0;						\
+})
+
 extern struct kmem_cache *sighand_cachep;
 
 int unhandled_signal(struct task_struct *tsk, int sig);
-- 
cgit v1.2.3


From 2ce5da17570771330f44ac993b77749debf7954b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 Nov 2012 15:11:25 -0500
Subject: new helper: signal_setup_done()

usual "call force_sigsegv or signal_delivered" logics.  Takes
ksignal instead of separate siginfo/k_sigaction.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/signal.h | 1 +
 kernel/signal.c        | 9 +++++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 7c2744198dba..a2dcb94ea49d 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -286,6 +286,7 @@ struct ksignal {
 };
 
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 79998f5b0f11..775f5552fa0e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2396,6 +2396,15 @@ void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
 	tracehook_signal_handler(sig, info, ka, regs, stepping);
 }
 
+void signal_setup_done(int failed, struct ksignal *ksig, int stepping)
+{
+	if (failed)
+		force_sigsegv(ksig->sig, current);
+	else
+		signal_delivered(ksig->sig, &ksig->info, &ksig->ka,
+			signal_pt_regs(), stepping);
+}
+
 /*
  * It could be that complete_signal() picked us to notify about the
  * group-wide signal. Other threads should be notified now to take
-- 
cgit v1.2.3


From 5a1b98d3096f1d780045f9be812335ad77aed93d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 6 Nov 2012 13:28:21 -0500
Subject: new helper: sigsp()

Normal logics for altstack handling in sigframe allocation

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6fc8f45de4e9..8f983293b403 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2305,6 +2305,17 @@ static inline int sas_ss_flags(unsigned long sp)
 		: on_sig_stack(sp) ? SS_ONSTACK : 0);
 }
 
+static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
+{
+	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
+#ifdef CONFIG_STACK_GROWSUP
+		return current->sas_ss_sp;
+#else
+		return current->sas_ss_sp + current->sas_ss_size;
+#endif
+	return sp;
+}
+
 /*
  * Routines for handling mm_structs
  */
-- 
cgit v1.2.3


From 0aa0203fb43f04714004b2c4ad33b858e240555d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Nov 2012 02:30:08 -0500
Subject: take sys_rt_sigsuspend() prototype to linux/syscalls.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/include/asm/syscalls.h | 2 --
 arch/powerpc/kernel/signal_32.c     | 1 -
 arch/xtensa/include/asm/syscall.h   | 1 -
 include/asm-generic/syscalls.h      | 4 ----
 include/linux/syscalls.h            | 2 ++
 5 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index 21936530df08..6949c42ffac2 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -22,8 +22,6 @@ asmlinkage long ppc64_personality(unsigned long personality);
 asmlinkage int ppc_rtas(struct rtas_args __user *uargs);
 asmlinkage time_t sys64_time(time_t __user * tloc);
 
-asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset,
-		size_t sigsetsize);
 asmlinkage long sys_sigaltstack(const stack_t __user *uss,
 		stack_t __user *uoss, unsigned long r5, unsigned long r6,
 		unsigned long r7, unsigned long r8, struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 804e323c139d..9ec3fed3caac 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -57,7 +57,6 @@
 
 #ifdef CONFIG_PPC64
 #define sys_sigsuspend	compat_sys_sigsuspend
-#define sys_rt_sigsuspend	compat_sys_rt_sigsuspend
 #define sys_rt_sigreturn	compat_sys_rt_sigreturn
 #define sys_sigaction	compat_sys_sigaction
 #define sys_swapcontext	compat_sys_swapcontext
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 6cf7c6c07a84..08a23ddac295 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -26,4 +26,3 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
 			  struct timespec __user *tsp,
 			  const sigset_t __user *sigmask,
 			  size_t sigsetsize);
-asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 6a8d620a84d4..9e25a3179d6c 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -32,8 +32,4 @@ asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
 asmlinkage long sys_rt_sigreturn(struct pt_regs *regs);
 #endif
 
-#ifndef sys_rt_sigsuspend
-asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
-#endif
-
 #endif /* __ASM_GENERIC_SYSCALLS_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 02b045012785..66d298f69f98 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -385,6 +385,8 @@ asmlinkage long sys_sigsuspend(old_sigset_t mask);
 asmlinkage long sys_sigsuspend(int unused1, int unused2, old_sigset_t mask);
 #endif
 
+asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
+
 #ifdef CONFIG_OLD_SIGACTION
 asmlinkage long sys_sigaction(int, const struct old_sigaction __user *,
 				struct old_sigaction __user *);
-- 
cgit v1.2.3


From 7f70410fe03f9f31a99bb5074b5e3c48f3b90541 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hsweeten@visionengravers.com>
Date: Thu, 31 Jan 2013 14:42:16 -0700
Subject: pci_ids.h: add common pci vendor ids from comedi subsystem

There are a number of pci vendor ids that are used in multiple
drivers in the comedi subsystem. Move these ids to pci_ids.h.

This also fixes some build warnings reported by the kbuild test
robot about PCI_VENDOR_ID_AMPLICON being undeclared.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/comedidev.h |  5 -----
 include/linux/pci_ids.h            | 10 ++++++++++
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/comedi/comedidev.h b/drivers/staging/comedi/comedidev.h
index 1502fc4a85fc..b7bb120f48be 100644
--- a/drivers/staging/comedi/comedidev.h
+++ b/drivers/staging/comedi/comedidev.h
@@ -377,12 +377,7 @@ int comedi_driver_unregister(struct comedi_driver *);
 #define PCI_VENDOR_ID_DT		0x1116
 #define PCI_VENDOR_ID_IOTECH		0x1616
 #define PCI_VENDOR_ID_CONTEC		0x1221
-#define PCI_VENDOR_ID_CB		0x1307	/* Measurement Computing */
-#define PCI_VENDOR_ID_ADVANTECH		0x13fe
-#define PCI_VENDOR_ID_MEILHAUS		0x1402
 #define PCI_VENDOR_ID_RTD		0x1435
-#define PCI_VENDOR_ID_ADLINK		0x144a
-#define PCI_VENDOR_ID_AMPLICON		0x14dc
 
 struct pci_dev;
 struct pci_driver;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0eb65796bcb9..747c8b7e97bd 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1807,6 +1807,8 @@
 #define PCI_VENDOR_ID_ESDGMBH		0x12fe
 #define PCI_DEVICE_ID_ESDGMBH_CPCIASIO4 0x0111
 
+#define PCI_VENDOR_ID_CB		0x1307	/* Measurement Computing */
+
 #define PCI_VENDOR_ID_SIIG		0x131f
 #define PCI_SUBVENDOR_ID_SIIG		0x131f
 #define PCI_DEVICE_ID_SIIG_1S_10x_550	0x1000
@@ -2013,6 +2015,10 @@
 #define PCI_DEVICE_ID_CMEDIA_CM8738	0x0111
 #define PCI_DEVICE_ID_CMEDIA_CM8738B	0x0112
 
+#define PCI_VENDOR_ID_ADVANTECH		0x13fe
+
+#define PCI_VENDOR_ID_MEILHAUS		0x1402
+
 #define PCI_VENDOR_ID_LAVA		0x1407
 #define PCI_DEVICE_ID_LAVA_DSERIAL	0x0100 /* 2x 16550 */
 #define PCI_DEVICE_ID_LAVA_QUATRO_A	0x0101 /* 2x 16550, half of 4 port */
@@ -2058,6 +2064,8 @@
 
 #define PCI_VENDOR_ID_CHELSIO		0x1425
 
+#define PCI_VENDOR_ID_ADLINK		0x144a
+
 #define PCI_VENDOR_ID_SAMSUNG		0x144d
 
 #define PCI_VENDOR_ID_GIGABYTE		0x1458
@@ -2091,6 +2099,8 @@
 #define PCI_DEVICE_ID_AFAVLAB_P030	0x2182
 #define PCI_SUBDEVICE_ID_AFAVLAB_P061		0x2150
 
+#define PCI_VENDOR_ID_AMPLICON		0x14dc
+
 #define PCI_VENDOR_ID_BCM_GVC          0x14a4
 #define PCI_VENDOR_ID_BROADCOM		0x14e4
 #define PCI_DEVICE_ID_TIGON3_5752	0x1600
-- 
cgit v1.2.3


From bf22433575ef30a4807f0620498017df0f27df67 Mon Sep 17 00:00:00 2001
From: Philip Avinash <avinashphilip@ti.com>
Date: Fri, 4 Jan 2013 13:26:50 +0530
Subject: mtd: devices: elm: Add support for ELM error correction

The ELM hardware module can be used to speedup BCH 4/8/16 ECC scheme
error correction.
For now only 4 & 8 bit support is added

Signed-off-by: Philip Avinash <avinashphilip@ti.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 Documentation/devicetree/bindings/mtd/elm.txt |  16 +
 drivers/mtd/devices/Makefile                  |   4 +-
 drivers/mtd/devices/elm.c                     | 404 ++++++++++++++++++++++++++
 include/linux/platform_data/elm.h             |  53 ++++
 4 files changed, 476 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/mtd/elm.txt
 create mode 100644 drivers/mtd/devices/elm.c
 create mode 100644 include/linux/platform_data/elm.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/elm.txt b/Documentation/devicetree/bindings/mtd/elm.txt
new file mode 100644
index 000000000000..e43c668656bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/elm.txt
@@ -0,0 +1,16 @@
+Error location module
+
+Required properties:
+- compatible: Must be "ti,am33xx-elm"
+- reg: physical base address and size of the registers map.
+- interrupts: Interrupt number for the elm.
+
+Optional properties:
+- ti,hwmods: Name of the hwmod associated to the elm
+
+Example:
+elm: elm@0 {
+	compatible	= "ti,am33xx-elm";
+	reg = <0x48080000 0x2000>;
+	interrupts = <4>;
+};
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 395733a30ef4..369a1943ca25 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -17,8 +17,10 @@ obj-$(CONFIG_MTD_LART)		+= lart.o
 obj-$(CONFIG_MTD_BLOCK2MTD)	+= block2mtd.o
 obj-$(CONFIG_MTD_DATAFLASH)	+= mtd_dataflash.o
 obj-$(CONFIG_MTD_M25P80)	+= m25p80.o
+obj-$(CONFIG_MTD_NAND_OMAP_BCH)	+= elm.o
 obj-$(CONFIG_MTD_SPEAR_SMI)	+= spear_smi.o
 obj-$(CONFIG_MTD_SST25L)	+= sst25l.o
 obj-$(CONFIG_MTD_BCM47XXSFLASH)	+= bcm47xxsflash.o
 
-CFLAGS_docg3.o			+= -I$(src)
\ No newline at end of file
+
+CFLAGS_docg3.o			+= -I$(src)
diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c
new file mode 100644
index 000000000000..b93a349ae587
--- /dev/null
+++ b/drivers/mtd/devices/elm.c
@@ -0,0 +1,404 @@
+/*
+ * Error Location Module
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/pm_runtime.h>
+#include <linux/platform_data/elm.h>
+
+#define ELM_IRQSTATUS			0x018
+#define ELM_IRQENABLE			0x01c
+#define ELM_LOCATION_CONFIG		0x020
+#define ELM_PAGE_CTRL			0x080
+#define ELM_SYNDROME_FRAGMENT_0		0x400
+#define ELM_SYNDROME_FRAGMENT_6		0x418
+#define ELM_LOCATION_STATUS		0x800
+#define ELM_ERROR_LOCATION_0		0x880
+
+/* ELM Interrupt Status Register */
+#define INTR_STATUS_PAGE_VALID		BIT(8)
+
+/* ELM Interrupt Enable Register */
+#define INTR_EN_PAGE_MASK		BIT(8)
+
+/* ELM Location Configuration Register */
+#define ECC_BCH_LEVEL_MASK		0x3
+
+/* ELM syndrome */
+#define ELM_SYNDROME_VALID		BIT(16)
+
+/* ELM_LOCATION_STATUS Register */
+#define ECC_CORRECTABLE_MASK		BIT(8)
+#define ECC_NB_ERRORS_MASK		0x1f
+
+/* ELM_ERROR_LOCATION_0-15 Registers */
+#define ECC_ERROR_LOCATION_MASK		0x1fff
+
+#define ELM_ECC_SIZE			0x7ff
+
+#define SYNDROME_FRAGMENT_REG_SIZE	0x40
+#define ERROR_LOCATION_SIZE		0x100
+
+struct elm_info {
+	struct device *dev;
+	void __iomem *elm_base;
+	struct completion elm_completion;
+	struct list_head list;
+	enum bch_ecc bch_type;
+};
+
+static LIST_HEAD(elm_devices);
+
+static void elm_write_reg(struct elm_info *info, int offset, u32 val)
+{
+	writel(val, info->elm_base + offset);
+}
+
+static u32 elm_read_reg(struct elm_info *info, int offset)
+{
+	return readl(info->elm_base + offset);
+}
+
+/**
+ * elm_config - Configure ELM module
+ * @dev:	ELM device
+ * @bch_type:	Type of BCH ecc
+ */
+void elm_config(struct device *dev, enum bch_ecc bch_type)
+{
+	u32 reg_val;
+	struct elm_info *info = dev_get_drvdata(dev);
+
+	reg_val = (bch_type & ECC_BCH_LEVEL_MASK) | (ELM_ECC_SIZE << 16);
+	elm_write_reg(info, ELM_LOCATION_CONFIG, reg_val);
+	info->bch_type = bch_type;
+}
+EXPORT_SYMBOL(elm_config);
+
+/**
+ * elm_configure_page_mode - Enable/Disable page mode
+ * @info:	elm info
+ * @index:	index number of syndrome fragment vector
+ * @enable:	enable/disable flag for page mode
+ *
+ * Enable page mode for syndrome fragment index
+ */
+static void elm_configure_page_mode(struct elm_info *info, int index,
+		bool enable)
+{
+	u32 reg_val;
+
+	reg_val = elm_read_reg(info, ELM_PAGE_CTRL);
+	if (enable)
+		reg_val |= BIT(index);	/* enable page mode */
+	else
+		reg_val &= ~BIT(index);	/* disable page mode */
+
+	elm_write_reg(info, ELM_PAGE_CTRL, reg_val);
+}
+
+/**
+ * elm_load_syndrome - Load ELM syndrome reg
+ * @info:	elm info
+ * @err_vec:	elm error vectors
+ * @ecc:	buffer with calculated ecc
+ *
+ * Load syndrome fragment registers with calculated ecc in reverse order.
+ */
+static void elm_load_syndrome(struct elm_info *info,
+		struct elm_errorvec *err_vec, u8 *ecc)
+{
+	int i, offset;
+	u32 val;
+
+	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
+
+		/* Check error reported */
+		if (err_vec[i].error_reported) {
+			elm_configure_page_mode(info, i, true);
+			offset = ELM_SYNDROME_FRAGMENT_0 +
+				SYNDROME_FRAGMENT_REG_SIZE * i;
+
+			/* BCH8 */
+			if (info->bch_type) {
+
+				/* syndrome fragment 0 = ecc[9-12B] */
+				val = cpu_to_be32(*(u32 *) &ecc[9]);
+				elm_write_reg(info, offset, val);
+
+				/* syndrome fragment 1 = ecc[5-8B] */
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[5]);
+				elm_write_reg(info, offset, val);
+
+				/* syndrome fragment 2 = ecc[1-4B] */
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[1]);
+				elm_write_reg(info, offset, val);
+
+				/* syndrome fragment 3 = ecc[0B] */
+				offset += 4;
+				val = ecc[0];
+				elm_write_reg(info, offset, val);
+			} else {
+				/* syndrome fragment 0 = ecc[20-52b] bits */
+				val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) |
+					((ecc[2] & 0xf) << 28);
+				elm_write_reg(info, offset, val);
+
+				/* syndrome fragment 1 = ecc[0-20b] bits */
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
+				elm_write_reg(info, offset, val);
+			}
+		}
+
+		/* Update ecc pointer with ecc byte size */
+		ecc += info->bch_type ? BCH8_SIZE : BCH4_SIZE;
+	}
+}
+
+/**
+ * elm_start_processing - start elm syndrome processing
+ * @info:	elm info
+ * @err_vec:	elm error vectors
+ *
+ * Set syndrome valid bit for syndrome fragment registers for which
+ * elm syndrome fragment registers are loaded. This enables elm module
+ * to start processing syndrome vectors.
+ */
+static void elm_start_processing(struct elm_info *info,
+		struct elm_errorvec *err_vec)
+{
+	int i, offset;
+	u32 reg_val;
+
+	/*
+	 * Set syndrome vector valid, so that ELM module
+	 * will process it for vectors error is reported
+	 */
+	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
+		if (err_vec[i].error_reported) {
+			offset = ELM_SYNDROME_FRAGMENT_6 +
+				SYNDROME_FRAGMENT_REG_SIZE * i;
+			reg_val = elm_read_reg(info, offset);
+			reg_val |= ELM_SYNDROME_VALID;
+			elm_write_reg(info, offset, reg_val);
+		}
+	}
+}
+
+/**
+ * elm_error_correction - locate correctable error position
+ * @info:	elm info
+ * @err_vec:	elm error vectors
+ *
+ * On completion of processing by elm module, error location status
+ * register updated with correctable/uncorrectable error information.
+ * In case of correctable errors, number of errors located from
+ * elm location status register & read the positions from
+ * elm error location register.
+ */
+static void elm_error_correction(struct elm_info *info,
+		struct elm_errorvec *err_vec)
+{
+	int i, j, errors = 0;
+	int offset;
+	u32 reg_val;
+
+	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
+
+		/* Check error reported */
+		if (err_vec[i].error_reported) {
+			offset = ELM_LOCATION_STATUS + ERROR_LOCATION_SIZE * i;
+			reg_val = elm_read_reg(info, offset);
+
+			/* Check correctable error or not */
+			if (reg_val & ECC_CORRECTABLE_MASK) {
+				offset = ELM_ERROR_LOCATION_0 +
+					ERROR_LOCATION_SIZE * i;
+
+				/* Read count of correctable errors */
+				err_vec[i].error_count = reg_val &
+					ECC_NB_ERRORS_MASK;
+
+				/* Update the error locations in error vector */
+				for (j = 0; j < err_vec[i].error_count; j++) {
+
+					reg_val = elm_read_reg(info, offset);
+					err_vec[i].error_loc[j] = reg_val &
+						ECC_ERROR_LOCATION_MASK;
+
+					/* Update error location register */
+					offset += 4;
+				}
+
+				errors += err_vec[i].error_count;
+			} else {
+				err_vec[i].error_uncorrectable = true;
+			}
+
+			/* Clearing interrupts for processed error vectors */
+			elm_write_reg(info, ELM_IRQSTATUS, BIT(i));
+
+			/* Disable page mode */
+			elm_configure_page_mode(info, i, false);
+		}
+	}
+}
+
+/**
+ * elm_decode_bch_error_page - Locate error position
+ * @dev:	device pointer
+ * @ecc_calc:	calculated ECC bytes from GPMC
+ * @err_vec:	elm error vectors
+ *
+ * Called with one or more error reported vectors & vectors with
+ * error reported is updated in err_vec[].error_reported
+ */
+void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
+		struct elm_errorvec *err_vec)
+{
+	struct elm_info *info = dev_get_drvdata(dev);
+	u32 reg_val;
+
+	/* Enable page mode interrupt */
+	reg_val = elm_read_reg(info, ELM_IRQSTATUS);
+	elm_write_reg(info, ELM_IRQSTATUS, reg_val & INTR_STATUS_PAGE_VALID);
+	elm_write_reg(info, ELM_IRQENABLE, INTR_EN_PAGE_MASK);
+
+	/* Load valid ecc byte to syndrome fragment register */
+	elm_load_syndrome(info, err_vec, ecc_calc);
+
+	/* Enable syndrome processing for which syndrome fragment is updated */
+	elm_start_processing(info, err_vec);
+
+	/* Wait for ELM module to finish locating error correction */
+	wait_for_completion(&info->elm_completion);
+
+	/* Disable page mode interrupt */
+	reg_val = elm_read_reg(info, ELM_IRQENABLE);
+	elm_write_reg(info, ELM_IRQENABLE, reg_val & ~INTR_EN_PAGE_MASK);
+	elm_error_correction(info, err_vec);
+}
+EXPORT_SYMBOL(elm_decode_bch_error_page);
+
+static irqreturn_t elm_isr(int this_irq, void *dev_id)
+{
+	u32 reg_val;
+	struct elm_info *info = dev_id;
+
+	reg_val = elm_read_reg(info, ELM_IRQSTATUS);
+
+	/* All error vectors processed */
+	if (reg_val & INTR_STATUS_PAGE_VALID) {
+		elm_write_reg(info, ELM_IRQSTATUS,
+				reg_val & INTR_STATUS_PAGE_VALID);
+		complete(&info->elm_completion);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static int elm_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct resource *res, *irq;
+	struct elm_info *info;
+
+	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&pdev->dev, "failed to allocate memory\n");
+		return -ENOMEM;
+	}
+
+	info->dev = &pdev->dev;
+
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "no irq resource defined\n");
+		return -ENODEV;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no memory resource defined\n");
+		return -ENODEV;
+	}
+
+	info->elm_base = devm_request_and_ioremap(&pdev->dev, res);
+	if (!info->elm_base)
+		return -EADDRNOTAVAIL;
+
+	ret = devm_request_irq(&pdev->dev, irq->start, elm_isr, 0,
+			pdev->name, info);
+	if (ret) {
+		dev_err(&pdev->dev, "failure requesting irq %i\n", irq->start);
+		return ret;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	if (pm_runtime_get_sync(&pdev->dev)) {
+		ret = -EINVAL;
+		pm_runtime_disable(&pdev->dev);
+		dev_err(&pdev->dev, "can't enable clock\n");
+		return ret;
+	}
+
+	init_completion(&info->elm_completion);
+	INIT_LIST_HEAD(&info->list);
+	list_add(&info->list, &elm_devices);
+	platform_set_drvdata(pdev, info);
+	return ret;
+}
+
+static int elm_remove(struct platform_device *pdev)
+{
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id elm_of_match[] = {
+	{ .compatible = "ti,am33xx-elm" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, elm_of_match);
+#endif
+
+static struct platform_driver elm_driver = {
+	.driver	= {
+		.name	= "elm",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(elm_of_match),
+	},
+	.probe	= elm_probe,
+	.remove	= elm_remove,
+};
+
+module_platform_driver(elm_driver);
+
+MODULE_DESCRIPTION("ELM driver for BCH error correction");
+MODULE_AUTHOR("Texas Instruments");
+MODULE_ALIAS("platform: elm");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h
new file mode 100644
index 000000000000..11ab6aaf2431
--- /dev/null
+++ b/include/linux/platform_data/elm.h
@@ -0,0 +1,53 @@
+/*
+ * BCH Error Location Module
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __ELM_H
+#define __ELM_H
+
+enum bch_ecc {
+	BCH4_ECC = 0,
+	BCH8_ECC,
+};
+
+/* ELM support 8 error syndrome process */
+#define ERROR_VECTOR_MAX		8
+
+#define BCH8_ECC_OOB_BYTES		13
+#define BCH4_ECC_OOB_BYTES		7
+/* RBL requires 14 byte even though BCH8 uses only 13 byte */
+#define BCH8_SIZE			(BCH8_ECC_OOB_BYTES + 1)
+#define BCH4_SIZE			(BCH4_ECC_OOB_BYTES)
+
+/**
+ * struct elm_errorvec - error vector for elm
+ * @error_reported:		set true for vectors error is reported
+ * @error_uncorrectable:	number of uncorrectable errors
+ * @error_count:		number of correctable errors in the sector
+ * @error_loc:			buffer for error location
+ *
+ */
+struct elm_errorvec {
+	bool error_reported;
+	bool error_uncorrectable;
+	int error_count;
+	int error_loc[ERROR_VECTOR_MAX];
+};
+
+void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
+		struct elm_errorvec *err_vec);
+void elm_config(struct device *dev, enum bch_ecc bch_type);
+#endif /* __ELM_H */
-- 
cgit v1.2.3


From 62116e5171e00f85a8d53f76e45b84423c89ff34 Mon Sep 17 00:00:00 2001
From: Philip Avinash <avinashphilip@ti.com>
Date: Fri, 4 Jan 2013 13:26:51 +0530
Subject: mtd: nand: omap2: Support for hardware BCH error correction.

ELM module can be used for hardware error correction of BCH 4 & 8 bit.
ELM module functionality is verified by checking the availability of
handle for ELM module in device tree. Hence supporting
1. ELM module available, BCH error correction done by ELM module. Also
support read & write page in one shot by adding custom read_page and
write_page methods. This helps in optimizing code for NAND flashes with
page size less than 4 KB.
2. If ELM module not available fall back to software BCH error
correction support.

New structure member is added to omap_nand_info
1. "is_elm_used" to know the status of whether the ELM module is used for
   error correction or not.
2. "elm_dev" device pointer to elm device on detection of ELM module.

Also being here update the device tree documentation of gpmc-nand for
adding optional property elm_id.

Note:
ECC layout uses 1 extra bytes for 512 byte of data to handle erased
pages. Extra byte programmed to zero for programmed pages. Also BCH8
requires 14 byte ecc to maintain compatibility with RBL ECC layout.
This results a common ecc layout across RBL, U-boot & Linux with BCH8.

Signed-off-by: Philip Avinash <avinashphilip@ti.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/mtd/nand/omap2.c          | 573 +++++++++++++++++++++++++++++++++++---
 include/linux/platform_data/elm.h |   3 +-
 2 files changed, 536 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index ec20f67e8949..1a88bd062ac1 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -22,9 +22,12 @@
 #include <linux/omap-dma.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #ifdef CONFIG_MTD_NAND_OMAP_BCH
 #include <linux/bch.h>
+#include <linux/platform_data/elm.h>
 #endif
 
 #include <linux/platform_data/mtd-nand-omap2.h>
@@ -120,6 +123,30 @@
 #define BCH8_MAX_ERROR		8	/* upto 8 bit correctable */
 #define BCH4_MAX_ERROR		4	/* upto 4 bit correctable */
 
+#define SECTOR_BYTES		512
+/* 4 bit padding to make byte aligned, 56 = 52 + 4 */
+#define BCH4_BIT_PAD		4
+#define BCH8_ECC_MAX		((SECTOR_BYTES + BCH8_ECC_OOB_BYTES) * 8)
+#define BCH4_ECC_MAX		((SECTOR_BYTES + BCH4_ECC_OOB_BYTES) * 8)
+
+/* GPMC ecc engine settings for read */
+#define BCH_WRAPMODE_1		1	/* BCH wrap mode 1 */
+#define BCH8R_ECC_SIZE0		0x1a	/* ecc_size0 = 26 */
+#define BCH8R_ECC_SIZE1		0x2	/* ecc_size1 = 2 */
+#define BCH4R_ECC_SIZE0		0xd	/* ecc_size0 = 13 */
+#define BCH4R_ECC_SIZE1		0x3	/* ecc_size1 = 3 */
+
+/* GPMC ecc engine settings for write */
+#define BCH_WRAPMODE_6		6	/* BCH wrap mode 6 */
+#define BCH_ECC_SIZE0		0x0	/* ecc_size0 = 0, no oob protection */
+#define BCH_ECC_SIZE1		0x20	/* ecc_size1 = 32 */
+
+#ifdef CONFIG_MTD_NAND_OMAP_BCH
+static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
+	0xac, 0x6b, 0xff, 0x99, 0x7b};
+static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
+#endif
+
 /* oob info generated runtime depending on ecc algorithm and layout selected */
 static struct nand_ecclayout omap_oobinfo;
 /* Define some generic bad / good block scan pattern which are used
@@ -159,6 +186,9 @@ struct omap_nand_info {
 #ifdef CONFIG_MTD_NAND_OMAP_BCH
 	struct bch_control             *bch;
 	struct nand_ecclayout           ecclayout;
+	bool				is_elm_used;
+	struct device			*elm_dev;
+	struct device_node		*of_node;
 #endif
 };
 
@@ -1034,6 +1064,13 @@ static int omap_dev_ready(struct mtd_info *mtd)
  * omap3_enable_hwecc_bch - Program OMAP3 GPMC to perform BCH ECC correction
  * @mtd: MTD device structure
  * @mode: Read/Write mode
+ *
+ * When using BCH, sector size is hardcoded to 512 bytes.
+ * Using wrapping mode 6 both for reading and writing if ELM module not uses
+ * for error correction.
+ * On writing,
+ * eccsize0 = 0  (no additional protected byte in spare area)
+ * eccsize1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area)
  */
 static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode)
 {
@@ -1042,32 +1079,57 @@ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode)
 	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
 						   mtd);
 	struct nand_chip *chip = mtd->priv;
-	u32 val;
+	u32 val, wr_mode;
+	unsigned int ecc_size1, ecc_size0;
+
+	/* Using wrapping mode 6 for writing */
+	wr_mode = BCH_WRAPMODE_6;
 
-	nerrors = info->nand.ecc.strength;
-	dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0;
-	nsectors = 1;
 	/*
-	 * Program GPMC to perform correction on one 512-byte sector at a time.
-	 * Using 4 sectors at a time (i.e. ecc.size = 2048) is also possible and
-	 * gives a slight (5%) performance gain (but requires additional code).
+	 * ECC engine enabled for valid ecc_size0 nibbles
+	 * and disabled for ecc_size1 nibbles.
 	 */
+	ecc_size0 = BCH_ECC_SIZE0;
+	ecc_size1 = BCH_ECC_SIZE1;
+
+	/* Perform ecc calculation on 512-byte sector */
+	nsectors = 1;
+
+	/* Update number of error correction */
+	nerrors = info->nand.ecc.strength;
+
+	/* Multi sector reading/writing for NAND flash with page size < 4096 */
+	if (info->is_elm_used && (mtd->writesize <= 4096)) {
+		if (mode == NAND_ECC_READ) {
+			/* Using wrapping mode 1 for reading */
+			wr_mode = BCH_WRAPMODE_1;
+
+			/*
+			 * ECC engine enabled for ecc_size0 nibbles
+			 * and disabled for ecc_size1 nibbles.
+			 */
+			ecc_size0 = (nerrors == 8) ?
+				BCH8R_ECC_SIZE0 : BCH4R_ECC_SIZE0;
+			ecc_size1 = (nerrors == 8) ?
+				BCH8R_ECC_SIZE1 : BCH4R_ECC_SIZE1;
+		}
+
+		/* Perform ecc calculation for one page (< 4096) */
+		nsectors = info->nand.ecc.steps;
+	}
 
 	writel(ECC1, info->reg.gpmc_ecc_control);
 
-	/*
-	 * When using BCH, sector size is hardcoded to 512 bytes.
-	 * Here we are using wrapping mode 6 both for reading and writing, with:
-	 *  size0 = 0  (no additional protected byte in spare area)
-	 *  size1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area)
-	 */
-	val = (32 << ECCSIZE1_SHIFT) | (0 << ECCSIZE0_SHIFT);
+	/* Configure ecc size for BCH */
+	val = (ecc_size1 << ECCSIZE1_SHIFT) | (ecc_size0 << ECCSIZE0_SHIFT);
 	writel(val, info->reg.gpmc_ecc_size_config);
 
+	dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0;
+
 	/* BCH configuration */
 	val = ((1                        << 16) | /* enable BCH */
 	       (((nerrors == 8) ? 1 : 0) << 12) | /* 8 or 4 bits */
-	       (0x06                     <<  8) | /* wrap mode = 6 */
+	       (wr_mode                  <<  8) | /* wrap mode */
 	       (dev_width                <<  7) | /* bus width */
 	       (((nsectors-1) & 0x7)     <<  4) | /* number of sectors */
 	       (info->gpmc_cs            <<  1) | /* ECC CS */
@@ -1075,7 +1137,7 @@ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode)
 
 	writel(val, info->reg.gpmc_ecc_config);
 
-	/* clear ecc and enable bits */
+	/* Clear ecc and enable bits */
 	writel(ECCCLEAR | ECC1, info->reg.gpmc_ecc_control);
 }
 
@@ -1164,6 +1226,298 @@ static int omap3_calculate_ecc_bch8(struct mtd_info *mtd, const u_char *dat,
 	return 0;
 }
 
+/**
+ * omap3_calculate_ecc_bch - Generate bytes of ECC bytes
+ * @mtd:	MTD device structure
+ * @dat:	The pointer to data on which ecc is computed
+ * @ecc_code:	The ecc_code buffer
+ *
+ * Support calculating of BCH4/8 ecc vectors for the page
+ */
+static int omap3_calculate_ecc_bch(struct mtd_info *mtd, const u_char *dat,
+				    u_char *ecc_code)
+{
+	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
+						   mtd);
+	unsigned long nsectors, bch_val1, bch_val2, bch_val3, bch_val4;
+	int i, eccbchtsel;
+
+	nsectors = ((readl(info->reg.gpmc_ecc_config) >> 4) & 0x7) + 1;
+	/*
+	 * find BCH scheme used
+	 * 0 -> BCH4
+	 * 1 -> BCH8
+	 */
+	eccbchtsel = ((readl(info->reg.gpmc_ecc_config) >> 12) & 0x3);
+
+	for (i = 0; i < nsectors; i++) {
+
+		/* Read hw-computed remainder */
+		bch_val1 = readl(info->reg.gpmc_bch_result0[i]);
+		bch_val2 = readl(info->reg.gpmc_bch_result1[i]);
+		if (eccbchtsel) {
+			bch_val3 = readl(info->reg.gpmc_bch_result2[i]);
+			bch_val4 = readl(info->reg.gpmc_bch_result3[i]);
+		}
+
+		if (eccbchtsel) {
+			/* BCH8 ecc scheme */
+			*ecc_code++ = (bch_val4 & 0xFF);
+			*ecc_code++ = ((bch_val3 >> 24) & 0xFF);
+			*ecc_code++ = ((bch_val3 >> 16) & 0xFF);
+			*ecc_code++ = ((bch_val3 >> 8) & 0xFF);
+			*ecc_code++ = (bch_val3 & 0xFF);
+			*ecc_code++ = ((bch_val2 >> 24) & 0xFF);
+			*ecc_code++ = ((bch_val2 >> 16) & 0xFF);
+			*ecc_code++ = ((bch_val2 >> 8) & 0xFF);
+			*ecc_code++ = (bch_val2 & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 24) & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 16) & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 8) & 0xFF);
+			*ecc_code++ = (bch_val1 & 0xFF);
+			/*
+			 * Setting 14th byte to zero to handle
+			 * erased page & maintain compatibility
+			 * with RBL
+			 */
+			*ecc_code++ = 0x0;
+		} else {
+			/* BCH4 ecc scheme */
+			*ecc_code++ = ((bch_val2 >> 12) & 0xFF);
+			*ecc_code++ = ((bch_val2 >> 4) & 0xFF);
+			*ecc_code++ = ((bch_val2 & 0xF) << 4) |
+				((bch_val1 >> 28) & 0xF);
+			*ecc_code++ = ((bch_val1 >> 20) & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 12) & 0xFF);
+			*ecc_code++ = ((bch_val1 >> 4) & 0xFF);
+			*ecc_code++ = ((bch_val1 & 0xF) << 4);
+			/*
+			 * Setting 8th byte to zero to handle
+			 * erased page
+			 */
+			*ecc_code++ = 0x0;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * erased_sector_bitflips - count bit flips
+ * @data:	data sector buffer
+ * @oob:	oob buffer
+ * @info:	omap_nand_info
+ *
+ * Check the bit flips in erased page falls below correctable level.
+ * If falls below, report the page as erased with correctable bit
+ * flip, else report as uncorrectable page.
+ */
+static int erased_sector_bitflips(u_char *data, u_char *oob,
+		struct omap_nand_info *info)
+{
+	int flip_bits = 0, i;
+
+	for (i = 0; i < info->nand.ecc.size; i++) {
+		flip_bits += hweight8(~data[i]);
+		if (flip_bits > info->nand.ecc.strength)
+			return 0;
+	}
+
+	for (i = 0; i < info->nand.ecc.bytes - 1; i++) {
+		flip_bits += hweight8(~oob[i]);
+		if (flip_bits > info->nand.ecc.strength)
+			return 0;
+	}
+
+	/*
+	 * Bit flips falls in correctable level.
+	 * Fill data area with 0xFF
+	 */
+	if (flip_bits) {
+		memset(data, 0xFF, info->nand.ecc.size);
+		memset(oob, 0xFF, info->nand.ecc.bytes);
+	}
+
+	return flip_bits;
+}
+
+/**
+ * omap_elm_correct_data - corrects page data area in case error reported
+ * @mtd:	MTD device structure
+ * @data:	page data
+ * @read_ecc:	ecc read from nand flash
+ * @calc_ecc:	ecc read from HW ECC registers
+ *
+ * Calculated ecc vector reported as zero in case of non-error pages.
+ * In case of error/erased pages non-zero error vector is reported.
+ * In case of non-zero ecc vector, check read_ecc at fixed offset
+ * (x = 13/7 in case of BCH8/4 == 0) to find page programmed or not.
+ * To handle bit flips in this data, count the number of 0's in
+ * read_ecc[x] and check if it greater than 4. If it is less, it is
+ * programmed page, else erased page.
+ *
+ * 1. If page is erased, check with standard ecc vector (ecc vector
+ * for erased page to find any bit flip). If check fails, bit flip
+ * is present in erased page. Count the bit flips in erased page and
+ * if it falls under correctable level, report page with 0xFF and
+ * update the correctable bit information.
+ * 2. If error is reported on programmed page, update elm error
+ * vector and correct the page with ELM error correction routine.
+ *
+ */
+static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
+				u_char *read_ecc, u_char *calc_ecc)
+{
+	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
+			mtd);
+	int eccsteps = info->nand.ecc.steps;
+	int i , j, stat = 0;
+	int eccsize, eccflag, ecc_vector_size;
+	struct elm_errorvec err_vec[ERROR_VECTOR_MAX];
+	u_char *ecc_vec = calc_ecc;
+	u_char *spare_ecc = read_ecc;
+	u_char *erased_ecc_vec;
+	enum bch_ecc type;
+	bool is_error_reported = false;
+
+	/* Initialize elm error vector to zero */
+	memset(err_vec, 0, sizeof(err_vec));
+
+	if (info->nand.ecc.strength == BCH8_MAX_ERROR) {
+		type = BCH8_ECC;
+		erased_ecc_vec = bch8_vector;
+	} else {
+		type = BCH4_ECC;
+		erased_ecc_vec = bch4_vector;
+	}
+
+	ecc_vector_size = info->nand.ecc.bytes;
+
+	/*
+	 * Remove extra byte padding for BCH8 RBL
+	 * compatibility and erased page handling
+	 */
+	eccsize = ecc_vector_size - 1;
+
+	for (i = 0; i < eccsteps ; i++) {
+		eccflag = 0;	/* initialize eccflag */
+
+		/*
+		 * Check any error reported,
+		 * In case of error, non zero ecc reported.
+		 */
+
+		for (j = 0; (j < eccsize); j++) {
+			if (calc_ecc[j] != 0) {
+				eccflag = 1; /* non zero ecc, error present */
+				break;
+			}
+		}
+
+		if (eccflag == 1) {
+			/*
+			 * Set threshold to minimum of 4, half of ecc.strength/2
+			 * to allow max bit flip in byte to 4
+			 */
+			unsigned int threshold = min_t(unsigned int, 4,
+					info->nand.ecc.strength / 2);
+
+			/*
+			 * Check data area is programmed by counting
+			 * number of 0's at fixed offset in spare area.
+			 * Checking count of 0's against threshold.
+			 * In case programmed page expects at least threshold
+			 * zeros in byte.
+			 * If zeros are less than threshold for programmed page/
+			 * zeros are more than threshold erased page, either
+			 * case page reported as uncorrectable.
+			 */
+			if (hweight8(~read_ecc[eccsize]) >= threshold) {
+				/*
+				 * Update elm error vector as
+				 * data area is programmed
+				 */
+				err_vec[i].error_reported = true;
+				is_error_reported = true;
+			} else {
+				/* Error reported in erased page */
+				int bitflip_count;
+				u_char *buf = &data[info->nand.ecc.size * i];
+
+				if (memcmp(calc_ecc, erased_ecc_vec, eccsize)) {
+					bitflip_count = erased_sector_bitflips(
+							buf, read_ecc, info);
+
+					if (bitflip_count)
+						stat += bitflip_count;
+					else
+						return -EINVAL;
+				}
+			}
+		}
+
+		/* Update the ecc vector */
+		calc_ecc += ecc_vector_size;
+		read_ecc += ecc_vector_size;
+	}
+
+	/* Check if any error reported */
+	if (!is_error_reported)
+		return 0;
+
+	/* Decode BCH error using ELM module */
+	elm_decode_bch_error_page(info->elm_dev, ecc_vec, err_vec);
+
+	for (i = 0; i < eccsteps; i++) {
+		if (err_vec[i].error_reported) {
+			for (j = 0; j < err_vec[i].error_count; j++) {
+				u32 bit_pos, byte_pos, error_max, pos;
+
+				if (type == BCH8_ECC)
+					error_max = BCH8_ECC_MAX;
+				else
+					error_max = BCH4_ECC_MAX;
+
+				if (info->nand.ecc.strength == BCH8_MAX_ERROR)
+					pos = err_vec[i].error_loc[j];
+				else
+					/* Add 4 to take care 4 bit padding */
+					pos = err_vec[i].error_loc[j] +
+						BCH4_BIT_PAD;
+
+				/* Calculate bit position of error */
+				bit_pos = pos % 8;
+
+				/* Calculate byte position of error */
+				byte_pos = (error_max - pos - 1) / 8;
+
+				if (pos < error_max) {
+					if (byte_pos < 512)
+						data[byte_pos] ^= 1 << bit_pos;
+					else
+						spare_ecc[byte_pos - 512] ^=
+							1 << bit_pos;
+				}
+				/* else, not interested to correct ecc */
+			}
+		}
+
+		/* Update number of correctable errors */
+		stat += err_vec[i].error_count;
+
+		/* Update page data with sector size */
+		data += info->nand.ecc.size;
+		spare_ecc += ecc_vector_size;
+	}
+
+	for (i = 0; i < eccsteps; i++)
+		/* Return error if uncorrectable error present */
+		if (err_vec[i].error_uncorrectable)
+			return -EINVAL;
+
+	return stat;
+}
+
 /**
  * omap3_correct_data_bch - Decode received data and correct errors
  * @mtd: MTD device structure
@@ -1196,6 +1550,92 @@ static int omap3_correct_data_bch(struct mtd_info *mtd, u_char *data,
 	return count;
 }
 
+/**
+ * omap_write_page_bch - BCH ecc based write page function for entire page
+ * @mtd:		mtd info structure
+ * @chip:		nand chip info structure
+ * @buf:		data buffer
+ * @oob_required:	must write chip->oob_poi to OOB
+ *
+ * Custom write page method evolved to support multi sector writing in one shot
+ */
+static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
+				  const uint8_t *buf, int oob_required)
+{
+	int i;
+	uint8_t *ecc_calc = chip->buffers->ecccalc;
+	uint32_t *eccpos = chip->ecc.layout->eccpos;
+
+	/* Enable GPMC ecc engine */
+	chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
+
+	/* Write data */
+	chip->write_buf(mtd, buf, mtd->writesize);
+
+	/* Update ecc vector from GPMC result registers */
+	chip->ecc.calculate(mtd, buf, &ecc_calc[0]);
+
+	for (i = 0; i < chip->ecc.total; i++)
+		chip->oob_poi[eccpos[i]] = ecc_calc[i];
+
+	/* Write ecc vector to OOB area */
+	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	return 0;
+}
+
+/**
+ * omap_read_page_bch - BCH ecc based page read function for entire page
+ * @mtd:		mtd info structure
+ * @chip:		nand chip info structure
+ * @buf:		buffer to store read data
+ * @oob_required:	caller requires OOB data read to chip->oob_poi
+ * @page:		page number to read
+ *
+ * For BCH ecc scheme, GPMC used for syndrome calculation and ELM module
+ * used for error correction.
+ * Custom method evolved to support ELM error correction & multi sector
+ * reading. On reading page data area is read along with OOB data with
+ * ecc engine enabled. ecc vector updated after read of OOB data.
+ * For non error pages ecc vector reported as zero.
+ */
+static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
+{
+	uint8_t *ecc_calc = chip->buffers->ecccalc;
+	uint8_t *ecc_code = chip->buffers->ecccode;
+	uint32_t *eccpos = chip->ecc.layout->eccpos;
+	uint8_t *oob = &chip->oob_poi[eccpos[0]];
+	uint32_t oob_pos = mtd->writesize + chip->ecc.layout->eccpos[0];
+	int stat;
+	unsigned int max_bitflips = 0;
+
+	/* Enable GPMC ecc engine */
+	chip->ecc.hwctl(mtd, NAND_ECC_READ);
+
+	/* Read data */
+	chip->read_buf(mtd, buf, mtd->writesize);
+
+	/* Read oob bytes */
+	chip->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_pos, -1);
+	chip->read_buf(mtd, oob, chip->ecc.total);
+
+	/* Calculate ecc bytes */
+	chip->ecc.calculate(mtd, buf, ecc_calc);
+
+	memcpy(ecc_code, &chip->oob_poi[eccpos[0]], chip->ecc.total);
+
+	stat = chip->ecc.correct(mtd, buf, ecc_code, ecc_calc);
+
+	if (stat < 0) {
+		mtd->ecc_stats.failed++;
+	} else {
+		mtd->ecc_stats.corrected += stat;
+		max_bitflips = max_t(unsigned int, max_bitflips, stat);
+	}
+
+	return max_bitflips;
+}
+
 /**
  * omap3_free_bch - Release BCH ecc resources
  * @mtd: MTD device structure
@@ -1225,6 +1665,11 @@ static int omap3_init_bch(struct mtd_info *mtd, int ecc_opt)
 #else
 	const int hw_errors = BCH4_MAX_ERROR;
 #endif
+	enum bch_ecc bch_type;
+	const __be32 *parp;
+	int lenp;
+	struct device_node *elm_node;
+
 	info->bch = NULL;
 
 	max_errors = (ecc_opt == OMAP_ECC_BCH8_CODE_HW) ?
@@ -1235,30 +1680,67 @@ static int omap3_init_bch(struct mtd_info *mtd, int ecc_opt)
 		goto fail;
 	}
 
-	/* software bch library is only used to detect and locate errors */
-	info->bch = init_bch(13, max_errors, 0x201b /* hw polynomial */);
-	if (!info->bch)
-		goto fail;
+	info->nand.ecc.size = 512;
+	info->nand.ecc.hwctl = omap3_enable_hwecc_bch;
+	info->nand.ecc.mode = NAND_ECC_HW;
+	info->nand.ecc.strength = max_errors;
 
-	info->nand.ecc.size    = 512;
-	info->nand.ecc.hwctl   = omap3_enable_hwecc_bch;
-	info->nand.ecc.correct = omap3_correct_data_bch;
-	info->nand.ecc.mode    = NAND_ECC_HW;
+	if (hw_errors == BCH8_MAX_ERROR)
+		bch_type = BCH8_ECC;
+	else
+		bch_type = BCH4_ECC;
 
-	/*
-	 * The number of corrected errors in an ecc block that will trigger
-	 * block scrubbing defaults to the ecc strength (4 or 8).
-	 * Set mtd->bitflip_threshold here to define a custom threshold.
-	 */
+	/* Detect availability of ELM module */
+	parp = of_get_property(info->of_node, "elm_id", &lenp);
+	if ((parp == NULL) && (lenp != (sizeof(void *) * 2))) {
+		pr_err("Missing elm_id property, fall back to Software BCH\n");
+		info->is_elm_used = false;
+	} else {
+		struct platform_device *pdev;
 
-	if (max_errors == 8) {
-		info->nand.ecc.strength  = 8;
-		info->nand.ecc.bytes     = 13;
-		info->nand.ecc.calculate = omap3_calculate_ecc_bch8;
+		elm_node = of_find_node_by_phandle(be32_to_cpup(parp));
+		pdev = of_find_device_by_node(elm_node);
+		info->elm_dev = &pdev->dev;
+		elm_config(info->elm_dev, bch_type);
+		info->is_elm_used = true;
+	}
+
+	if (info->is_elm_used && (mtd->writesize <= 4096)) {
+
+		if (hw_errors == BCH8_MAX_ERROR)
+			info->nand.ecc.bytes = BCH8_SIZE;
+		else
+			info->nand.ecc.bytes = BCH4_SIZE;
+
+		info->nand.ecc.correct = omap_elm_correct_data;
+		info->nand.ecc.calculate = omap3_calculate_ecc_bch;
+		info->nand.ecc.read_page = omap_read_page_bch;
+		info->nand.ecc.write_page = omap_write_page_bch;
 	} else {
-		info->nand.ecc.strength  = 4;
-		info->nand.ecc.bytes     = 7;
-		info->nand.ecc.calculate = omap3_calculate_ecc_bch4;
+		/*
+		 * software bch library is only used to detect and
+		 * locate errors
+		 */
+		info->bch = init_bch(13, max_errors,
+				0x201b /* hw polynomial */);
+		if (!info->bch)
+			goto fail;
+
+		info->nand.ecc.correct = omap3_correct_data_bch;
+
+		/*
+		 * The number of corrected errors in an ecc block that will
+		 * trigger block scrubbing defaults to the ecc strength (4 or 8)
+		 * Set mtd->bitflip_threshold here to define a custom threshold.
+		 */
+
+		if (max_errors == 8) {
+			info->nand.ecc.bytes = 13;
+			info->nand.ecc.calculate = omap3_calculate_ecc_bch8;
+		} else {
+			info->nand.ecc.bytes = 7;
+			info->nand.ecc.calculate = omap3_calculate_ecc_bch4;
+		}
 	}
 
 	pr_info("enabling NAND BCH ecc with %d-bit correction\n", max_errors);
@@ -1274,7 +1756,7 @@ fail:
  */
 static int omap3_init_bch_tail(struct mtd_info *mtd)
 {
-	int i, steps;
+	int i, steps, offset;
 	struct omap_nand_info *info = container_of(mtd, struct omap_nand_info,
 						   mtd);
 	struct nand_ecclayout *layout = &info->ecclayout;
@@ -1296,11 +1778,21 @@ static int omap3_init_bch_tail(struct mtd_info *mtd)
 		goto fail;
 	}
 
+	/* ECC layout compatible with RBL for BCH8 */
+	if (info->is_elm_used && (info->nand.ecc.bytes == BCH8_SIZE))
+		offset = 2;
+	else
+		offset = mtd->oobsize - layout->eccbytes;
+
 	/* put ecc bytes at oob tail */
 	for (i = 0; i < layout->eccbytes; i++)
-		layout->eccpos[i] = mtd->oobsize-layout->eccbytes+i;
+		layout->eccpos[i] = offset + i;
+
+	if (info->is_elm_used && (info->nand.ecc.bytes == BCH8_SIZE))
+		layout->oobfree[0].offset = 2 + layout->eccbytes * steps;
+	else
+		layout->oobfree[0].offset = 2;
 
-	layout->oobfree[0].offset = 2;
 	layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes;
 	info->nand.ecc.layout = layout;
 
@@ -1363,6 +1855,9 @@ static int omap_nand_probe(struct platform_device *pdev)
 
 	info->nand.options	= pdata->devsize;
 	info->nand.options	|= NAND_SKIP_BBTSCAN;
+#ifdef CONFIG_MTD_NAND_OMAP_BCH
+	info->of_node		= pdata->of_node;
+#endif
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h
index 11ab6aaf2431..1bd5244d1dcd 100644
--- a/include/linux/platform_data/elm.h
+++ b/include/linux/platform_data/elm.h
@@ -30,7 +30,8 @@ enum bch_ecc {
 #define BCH4_ECC_OOB_BYTES		7
 /* RBL requires 14 byte even though BCH8 uses only 13 byte */
 #define BCH8_SIZE			(BCH8_ECC_OOB_BYTES + 1)
-#define BCH4_SIZE			(BCH4_ECC_OOB_BYTES)
+/* Uses 1 extra byte to handle erased pages */
+#define BCH4_SIZE			(BCH4_ECC_OOB_BYTES + 1)
 
 /**
  * struct elm_errorvec - error vector for elm
-- 
cgit v1.2.3


From a2f74a7dacc1c17a0b146eb3112217874c5db436 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 6 Jan 2013 21:28:50 +0100
Subject: mtd: bcm47xxsflash: add own struct for abstrating bus type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
---
 drivers/mtd/devices/bcm47xxsflash.c         | 37 +++++++++++++++++++----------
 drivers/mtd/devices/bcm47xxsflash.h         | 15 ++++++++++++
 include/linux/bcma/bcma_driver_chipcommon.h |  1 +
 3 files changed, 40 insertions(+), 13 deletions(-)
 create mode 100644 drivers/mtd/devices/bcm47xxsflash.h

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/bcm47xxsflash.c b/drivers/mtd/devices/bcm47xxsflash.c
index 4714584aa993..8fe1a097161b 100644
--- a/drivers/mtd/devices/bcm47xxsflash.c
+++ b/drivers/mtd/devices/bcm47xxsflash.c
@@ -5,6 +5,8 @@
 #include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
 
+#include "bcm47xxsflash.h"
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Serial flash driver for BCMA bus");
 
@@ -13,26 +15,27 @@ static const char *probes[] = { "bcm47xxpart", NULL };
 static int bcm47xxsflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 			      size_t *retlen, u_char *buf)
 {
-	struct bcma_sflash *sflash = mtd->priv;
+	struct bcm47xxsflash *b47s = mtd->priv;
 
 	/* Check address range */
 	if ((from + len) > mtd->size)
 		return -EINVAL;
 
-	memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(sflash->window + from),
+	memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(b47s->window + from),
 		      len);
 
 	return len;
 }
 
-static void bcm47xxsflash_fill_mtd(struct bcma_sflash *sflash,
-				   struct mtd_info *mtd)
+static void bcm47xxsflash_fill_mtd(struct bcm47xxsflash *b47s)
 {
-	mtd->priv = sflash;
+	struct mtd_info *mtd = &b47s->mtd;
+
+	mtd->priv = b47s;
 	mtd->name = "bcm47xxsflash";
 	mtd->owner = THIS_MODULE;
 	mtd->type = MTD_ROM;
-	mtd->size = sflash->size;
+	mtd->size = b47s->size;
 	mtd->_read = bcm47xxsflash_read;
 
 	/* TODO: implement writing support and verify/change following code */
@@ -43,16 +46,23 @@ static void bcm47xxsflash_fill_mtd(struct bcma_sflash *sflash,
 static int bcm47xxsflash_probe(struct platform_device *pdev)
 {
 	struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev);
+	struct bcm47xxsflash *b47s;
 	int err;
 
-	sflash->mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL);
-	if (!sflash->mtd) {
+	b47s = kzalloc(sizeof(*b47s), GFP_KERNEL);
+	if (!b47s) {
 		err = -ENOMEM;
 		goto out;
 	}
-	bcm47xxsflash_fill_mtd(sflash, sflash->mtd);
+	sflash->priv = b47s;
+
+	b47s->window = sflash->window;
+	b47s->blocksize = sflash->blocksize;
+	b47s->numblocks = sflash->numblocks;
+	b47s->size = sflash->size;
+	bcm47xxsflash_fill_mtd(b47s);
 
-	err = mtd_device_parse_register(sflash->mtd, probes, NULL, NULL, 0);
+	err = mtd_device_parse_register(&b47s->mtd, probes, NULL, NULL, 0);
 	if (err) {
 		pr_err("Failed to register MTD device: %d\n", err);
 		goto err_dev_reg;
@@ -61,7 +71,7 @@ static int bcm47xxsflash_probe(struct platform_device *pdev)
 	return 0;
 
 err_dev_reg:
-	kfree(sflash->mtd);
+	kfree(&b47s->mtd);
 out:
 	return err;
 }
@@ -69,9 +79,10 @@ out:
 static int bcm47xxsflash_remove(struct platform_device *pdev)
 {
 	struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev);
+	struct bcm47xxsflash *b47s = sflash->priv;
 
-	mtd_device_unregister(sflash->mtd);
-	kfree(sflash->mtd);
+	mtd_device_unregister(&b47s->mtd);
+	kfree(b47s);
 
 	return 0;
 }
diff --git a/drivers/mtd/devices/bcm47xxsflash.h b/drivers/mtd/devices/bcm47xxsflash.h
new file mode 100644
index 000000000000..ebf6f710e23c
--- /dev/null
+++ b/drivers/mtd/devices/bcm47xxsflash.h
@@ -0,0 +1,15 @@
+#ifndef __BCM47XXSFLASH_H
+#define __BCM47XXSFLASH_H
+
+#include <linux/mtd/mtd.h>
+
+struct bcm47xxsflash {
+	u32 window;
+	u32 blocksize;
+	u16 numblocks;
+	u32 size;
+
+	struct mtd_info mtd;
+};
+
+#endif /* BCM47XXSFLASH */
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 9a0e3fa3ca95..a5bfda6b0a76 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -528,6 +528,7 @@ struct bcma_sflash {
 	u32 size;
 
 	struct mtd_info *mtd;
+	void *priv;
 };
 #endif
 
-- 
cgit v1.2.3


From 1648eaaa1575ea686acb82fb8cb3d8839764ef2c Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Fri, 18 Jan 2013 13:10:05 +0100
Subject: mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB)
 locking

Currently cfi_cmdset_0002.c does not support PPB locking of sectors. This
patch adds support for this locking/unlocking mechanism. It is needed on
some platforms, since newer U-Boot versions do support this PPB locking
and protect for example their environment sector(s) this way.

This PPB locking/unlocking will be enabled for all devices supported by
cfi_cmdset_0002 reporting 8 in the CFI word 0x49 (Sector Protect/Unprotect
scheme).

Please note that PPB locking does support sector-by-sector locking. But
the whole chip can only be unlocked together. So unlocking one sector
will automatically unlock all sectors of this device. Because of this
chip limitation, the PPB unlocking function saves the current locking
status of all sectors before unlocking the whole device. After unlocking
the saved locking status is re-configured. This way only the addressed
sectors will be unlocked.

To selectively enable this advanced sector protection mechanism, the
device-tree property "use-advanced-sector-protection" has been created.
To enable support for this locking this property needs to be present in the
flash DT node. E.g.:

nor_flash@0,0 {
	compatible = "amd,s29gl256n", "cfi-flash";
	bank-width = <2>;
	use-advanced-sector-protection;
	...

Tested with Spansion S29GL512S10THI and Micron JS28F512M29EWx flash
devices.

Signed-off-by: Stefan Roese <sr@denx.de>
Tested-by: Holger Brunck <holger.brunck@keymile.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 .../devicetree/bindings/mtd/mtd-physmap.txt        |   3 +
 drivers/mtd/chips/cfi_cmdset_0002.c                | 217 +++++++++++++++++++++
 drivers/mtd/maps/physmap_of.c                      |   1 +
 include/linux/mtd/map.h                            |   1 +
 4 files changed, 222 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
index dab7847fc800..61c5ec850f2f 100644
--- a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
+++ b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
@@ -26,6 +26,9 @@ file systems on embedded devices.
  - linux,mtd-name: allow to specify the mtd name for retro capability with
    physmap-flash drivers as boot loader pass the mtd partition via the old
    device name physmap-flash.
+ - use-advanced-sector-protection: boolean to enable support for the
+   advanced sector protection (Spansion: PPB - Persistent Protection
+   Bits) locking.
 
 For JEDEC compatible devices, the following additional properties
 are defined:
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index b86197286f24..fff665d59a0d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -33,6 +33,8 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/reboot.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/mtd/map.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/cfi.h>
@@ -74,6 +76,10 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad
 static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
+static int cfi_ppb_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+static int cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+static int cfi_ppb_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+
 static struct mtd_chip_driver cfi_amdstd_chipdrv = {
 	.probe		= NULL, /* Not usable directly */
 	.destroy	= cfi_amdstd_destroy,
@@ -496,6 +502,7 @@ static void cfi_fixup_m29ew_delay_after_resume(struct cfi_private *cfi)
 struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
+	struct device_node __maybe_unused *np = map->device_node;
 	struct mtd_info *mtd;
 	int i;
 
@@ -570,6 +577,17 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 			cfi_tell_features(extp);
 #endif
 
+#ifdef CONFIG_OF
+			if (np && of_property_read_bool(
+				    np, "use-advanced-sector-protection")
+			    && extp->BlkProtUnprot == 8) {
+				printk(KERN_INFO "  Advanced Sector Protection (PPB Locking) supported\n");
+				mtd->_lock = cfi_ppb_lock;
+				mtd->_unlock = cfi_ppb_unlock;
+				mtd->_is_locked = cfi_ppb_is_locked;
+			}
+#endif
+
 			bootloc = extp->TopBottom;
 			if ((bootloc < 2) || (bootloc > 5)) {
 				printk(KERN_WARNING "%s: CFI contains unrecognised boot "
@@ -2172,6 +2190,205 @@ static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL);
 }
 
+/*
+ * Advanced Sector Protection - PPB (Persistent Protection Bit) locking
+ */
+
+struct ppb_lock {
+	struct flchip *chip;
+	loff_t offset;
+	int locked;
+};
+
+#define MAX_SECTORS			512
+
+#define DO_XXLOCK_ONEBLOCK_LOCK		((void *)1)
+#define DO_XXLOCK_ONEBLOCK_UNLOCK	((void *)2)
+#define DO_XXLOCK_ONEBLOCK_GETLOCK	((void *)3)
+
+static int __maybe_unused do_ppb_xxlock(struct map_info *map,
+					struct flchip *chip,
+					unsigned long adr, int len, void *thunk)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	unsigned long timeo;
+	int ret;
+
+	mutex_lock(&chip->mutex);
+	ret = get_chip(map, chip, adr + chip->start, FL_LOCKING);
+	if (ret) {
+		mutex_unlock(&chip->mutex);
+		return ret;
+	}
+
+	pr_debug("MTD %s(): XXLOCK 0x%08lx len %d\n", __func__, adr, len);
+
+	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+	/* PPB entry command */
+	cfi_send_gen_cmd(0xC0, cfi->addr_unlock1, chip->start, map, cfi,
+			 cfi->device_type, NULL);
+
+	if (thunk == DO_XXLOCK_ONEBLOCK_LOCK) {
+		chip->state = FL_LOCKING;
+		map_write(map, CMD(0xA0), chip->start + adr);
+		map_write(map, CMD(0x00), chip->start + adr);
+	} else if (thunk == DO_XXLOCK_ONEBLOCK_UNLOCK) {
+		/*
+		 * Unlocking of one specific sector is not supported, so we
+		 * have to unlock all sectors of this device instead
+		 */
+		chip->state = FL_UNLOCKING;
+		map_write(map, CMD(0x80), chip->start);
+		map_write(map, CMD(0x30), chip->start);
+	} else if (thunk == DO_XXLOCK_ONEBLOCK_GETLOCK) {
+		chip->state = FL_JEDEC_QUERY;
+		/* Return locked status: 0->locked, 1->unlocked */
+		ret = !cfi_read_query(map, adr);
+	} else
+		BUG();
+
+	/*
+	 * Wait for some time as unlocking of all sectors takes quite long
+	 */
+	timeo = jiffies + msecs_to_jiffies(2000);	/* 2s max (un)locking */
+	for (;;) {
+		if (chip_ready(map, adr))
+			break;
+
+		if (time_after(jiffies, timeo)) {
+			printk(KERN_ERR "Waiting for chip to be ready timed out.\n");
+			ret = -EIO;
+			break;
+		}
+
+		UDELAY(map, chip, adr, 1);
+	}
+
+	/* Exit BC commands */
+	map_write(map, CMD(0x90), chip->start);
+	map_write(map, CMD(0x00), chip->start);
+
+	chip->state = FL_READY;
+	put_chip(map, chip, adr + chip->start);
+	mutex_unlock(&chip->mutex);
+
+	return ret;
+}
+
+static int __maybe_unused cfi_ppb_lock(struct mtd_info *mtd, loff_t ofs,
+				       uint64_t len)
+{
+	return cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len,
+				DO_XXLOCK_ONEBLOCK_LOCK);
+}
+
+static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs,
+					 uint64_t len)
+{
+	struct mtd_erase_region_info *regions = mtd->eraseregions;
+	struct map_info *map = mtd->priv;
+	struct cfi_private *cfi = map->fldrv_priv;
+	struct ppb_lock *sect;
+	unsigned long adr;
+	loff_t offset;
+	uint64_t length;
+	int chipnum;
+	int i;
+	int sectors;
+	int ret;
+
+	/*
+	 * PPB unlocking always unlocks all sectors of the flash chip.
+	 * We need to re-lock all previously locked sectors. So lets
+	 * first check the locking status of all sectors and save
+	 * it for future use.
+	 */
+	sect = kzalloc(MAX_SECTORS * sizeof(struct ppb_lock), GFP_KERNEL);
+	if (!sect)
+		return -ENOMEM;
+
+	/*
+	 * This code to walk all sectors is a slightly modified version
+	 * of the cfi_varsize_frob() code.
+	 */
+	i = 0;
+	chipnum = 0;
+	adr = 0;
+	sectors = 0;
+	offset = 0;
+	length = mtd->size;
+
+	while (length) {
+		int size = regions[i].erasesize;
+
+		/*
+		 * Only test sectors that shall not be unlocked. The other
+		 * sectors shall be unlocked, so lets keep their locking
+		 * status at "unlocked" (locked=0) for the final re-locking.
+		 */
+		if ((adr < ofs) || (adr >= (ofs + len))) {
+			sect[sectors].chip = &cfi->chips[chipnum];
+			sect[sectors].offset = offset;
+			sect[sectors].locked = do_ppb_xxlock(
+				map, &cfi->chips[chipnum], adr, 0,
+				DO_XXLOCK_ONEBLOCK_GETLOCK);
+		}
+
+		adr += size;
+		offset += size;
+		length -= size;
+
+		if (offset == regions[i].offset + size * regions[i].numblocks)
+			i++;
+
+		if (adr >> cfi->chipshift) {
+			adr = 0;
+			chipnum++;
+
+			if (chipnum >= cfi->numchips)
+				break;
+		}
+
+		sectors++;
+		if (sectors >= MAX_SECTORS) {
+			printk(KERN_ERR "Only %d sectors for PPB locking supported!\n",
+			       MAX_SECTORS);
+			kfree(sect);
+			return -EINVAL;
+		}
+	}
+
+	/* Now unlock the whole chip */
+	ret = cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len,
+			       DO_XXLOCK_ONEBLOCK_UNLOCK);
+	if (ret) {
+		kfree(sect);
+		return ret;
+	}
+
+	/*
+	 * PPB unlocking always unlocks all sectors of the flash chip.
+	 * We need to re-lock all previously locked sectors.
+	 */
+	for (i = 0; i < sectors; i++) {
+		if (sect[i].locked)
+			do_ppb_xxlock(map, sect[i].chip, sect[i].offset, 0,
+				      DO_XXLOCK_ONEBLOCK_LOCK);
+	}
+
+	kfree(sect);
+	return ret;
+}
+
+static int __maybe_unused cfi_ppb_is_locked(struct mtd_info *mtd, loff_t ofs,
+					    uint64_t len)
+{
+	return cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len,
+				DO_XXLOCK_ONEBLOCK_GETLOCK) ? 1 : 0;
+}
 
 static void cfi_amdstd_sync (struct mtd_info *mtd)
 {
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 67cc73c18ddd..263d9e1b8001 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -241,6 +241,7 @@ static int of_flash_probe(struct platform_device *dev)
 		info->list[i].map.phys = res.start;
 		info->list[i].map.size = res_size;
 		info->list[i].map.bankwidth = be32_to_cpup(width);
+		info->list[i].map.device_node = dp;
 
 		err = -ENOMEM;
 		info->list[i].map.virt = ioremap(info->list[i].map.phys,
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index f6eb4332ac92..8b9bfd7dcaa3 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -245,6 +245,7 @@ struct map_info {
 	unsigned long pfow_base;
 	unsigned long map_priv_1;
 	unsigned long map_priv_2;
+	struct device_node *device_node;
 	void *fldrv_priv;
 	struct mtd_chip_driver *fldrv;
 };
-- 
cgit v1.2.3


From 379749c46320c82df95c7909ec888da8ed1fb22c Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 14 Jan 2013 13:26:15 +0000
Subject: mfd: ab8500-sysctrl: Provide a platform specific pm_power_off()
 call-back

The kernel allows us to specify a function call-back which will be
invoked when a system power-off request has been received. Here we
provide one which is to be used when shutting down AB8500 based
platforms.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ab8500-sysctrl.c      | 22 ++++++++++++++++++++++
 include/linux/mfd/abx500/ab8500.h |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index 8a33b2c7eead..888e066c715b 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -7,12 +7,29 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/signal.h>
 #include <linux/mfd/abx500.h>
 #include <linux/mfd/abx500/ab8500.h>
 #include <linux/mfd/abx500/ab8500-sysctrl.h>
 
 static struct device *sysctrl_dev;
 
+void ab8500_power_off(void)
+{
+	sigset_t old;
+	sigset_t all;
+
+	sigfillset(&all);
+
+	if (!sigprocmask(SIG_BLOCK, &all, &old)) {
+		(void)ab8500_sysctrl_set(AB8500_STW4500CTRL1,
+					 AB8500_STW4500CTRL1_SWOFF |
+					 AB8500_STW4500CTRL1_SWRESET4500N);
+		(void)sigprocmask(SIG_SETMASK, &old, NULL);
+	}
+}
+
 static inline bool valid_bank(u8 bank)
 {
 	return ((bank == AB8500_SYS_CTRL1_BLOCK) ||
@@ -51,7 +68,12 @@ int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value)
 
 static int ab8500_sysctrl_probe(struct platform_device *pdev)
 {
+	struct ab8500_platform_data *plat;
+
 	sysctrl_dev = &pdev->dev;
+	plat = dev_get_platdata(pdev->dev.parent);
+	if (plat->pm_power_off)
+		pm_power_off = ab8500_power_off;
 	return 0;
 }
 
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 1cb5698b4d76..6119b2fbad97 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -274,6 +274,7 @@ struct ab8500_codec_platform_data;
 /**
  * struct ab8500_platform_data - AB8500 platform data
  * @irq_base: start of AB8500 IRQs, AB8500_NR_IRQS will be used
+ * @pm_power_off: Should machine pm power off hook be registered or not
  * @init: board-specific initialization after detection of ab8500
  * @num_regulator_reg_init: number of regulator init registers
  * @regulator_reg_init: regulator init registers
@@ -282,6 +283,7 @@ struct ab8500_codec_platform_data;
  */
 struct ab8500_platform_data {
 	int irq_base;
+	bool pm_power_off;
 	void (*init) (struct ab8500 *);
 	int num_regulator_reg_init;
 	struct ab8500_regulator_reg_init *regulator_reg_init;
-- 
cgit v1.2.3


From 1abf063ffd18e6934bb81a776b3d71278eeff4ab Mon Sep 17 00:00:00 2001
From: Kennet Wallden <kennet.wallden@stericsson.com>
Date: Tue, 27 Sep 2011 09:23:56 +0200
Subject: mfd: ab8500-sysctrl: Provide configuration for SysClkReqRfClkBuf
 registers

Add the possibility to pass configuration settings for
SysCl1kReqRfClkBuf to SysClk8ReqRfClkBuf via platform data.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Kennet Wallden <kennet.wallden@stericsson.com>
Reviewed-by: Karl-Johan PERNTZ <karl-johan.perntz@stericsson.com>
Reviewed-by: Bengt JONSSON <bengt.g.jonsson@stericsson.com>
---
 drivers/mfd/ab8500-sysctrl.c              | 24 ++++++++++++++++++++++++
 include/linux/mfd/abx500/ab8500-sysctrl.h |  5 +++++
 include/linux/mfd/abx500/ab8500.h         |  2 ++
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index 3aaff6043e18..236324e1136d 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -113,11 +113,35 @@ int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value)
 static int ab8500_sysctrl_probe(struct platform_device *pdev)
 {
 	struct ab8500_platform_data *plat;
+	struct ab8500_sysctrl_platform_data *pdata;
 
 	sysctrl_dev = &pdev->dev;
 	plat = dev_get_platdata(pdev->dev.parent);
 	if (plat->pm_power_off)
 		pm_power_off = ab8500_power_off;
+
+	pdata = plat->sysctrl;
+
+	if (pdata) {
+		int ret, i, j;
+
+		for (i = AB8500_SYSCLKREQ1RFCLKBUF;
+		     i <= AB8500_SYSCLKREQ8RFCLKBUF; i++) {
+			j = i - AB8500_SYSCLKREQ1RFCLKBUF;
+			ret = ab8500_sysctrl_write(i, 0xff,
+						   pdata->initial_req_buf_config[j]);
+			dev_dbg(&pdev->dev,
+				"Setting SysClkReq%dRfClkBuf 0x%X\n",
+				j + 1,
+				pdata->initial_req_buf_config[j]);
+			if (ret < 0) {
+				dev_err(&pdev->dev,
+					"unable to set sysClkReq%dRfClkBuf: "
+					"%d\n", j + 1, ret);
+			}
+		}
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/mfd/abx500/ab8500-sysctrl.h b/include/linux/mfd/abx500/ab8500-sysctrl.h
index 10eb50973c39..ebf12e793db9 100644
--- a/include/linux/mfd/abx500/ab8500-sysctrl.h
+++ b/include/linux/mfd/abx500/ab8500-sysctrl.h
@@ -37,6 +37,11 @@ static inline int ab8500_sysctrl_clear(u16 reg, u8 bits)
 	return ab8500_sysctrl_write(reg, bits, 0);
 }
 
+/* Configuration data for SysClkReq1RfClkBuf - SysClkReq8RfClkBuf */
+struct ab8500_sysctrl_platform_data {
+	u8 initial_req_buf_config[8];
+};
+
 /* Registers */
 #define AB8500_TURNONSTATUS		0x100
 #define AB8500_RESETSTATUS		0x101
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 6119b2fbad97..5251bca56326 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -270,6 +270,7 @@ struct regulator_reg_init;
 struct regulator_init_data;
 struct ab8500_gpio_platform_data;
 struct ab8500_codec_platform_data;
+struct ab8500_sysctrl_platform_data;
 
 /**
  * struct ab8500_platform_data - AB8500 platform data
@@ -291,6 +292,7 @@ struct ab8500_platform_data {
 	struct regulator_init_data *regulator;
 	struct ab8500_gpio_platform_data *gpio;
 	struct ab8500_codec_platform_data *codec;
+	struct ab8500_sysctrl_platform_data *sysctrl;
 };
 
 extern int ab8500_init(struct ab8500 *ab8500,
-- 
cgit v1.2.3


From e0f4fec030ce412666cc127702adbf0a6cfa0855 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <mian.yousaf.kaukab@stericsson.com>
Date: Fri, 27 Jan 2012 11:22:16 +0100
Subject: mfd: abx500-core: Provide an API to dump all ABx500 registers

Some drivers can detect subsystem failures e.g. shared memory driver
can detect modem sub system failures. It would be helpful in analyzing
these failures if AB register dump is available at that point. This
patch adds the API for the drivers to dump AB registers in the kernel
log.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mian Yousaf Kaukab <mian.yousaf.kaukab@stericsson.com>
Reviewed-by: Linus WALLEIJ <linus.walleij@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
---
 drivers/mfd/abx500-core.c  | 16 ++++++++++++++++
 include/linux/mfd/abx500.h |  2 ++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/abx500-core.c b/drivers/mfd/abx500-core.c
index 7ce65f49480f..9818afba2515 100644
--- a/drivers/mfd/abx500-core.c
+++ b/drivers/mfd/abx500-core.c
@@ -153,6 +153,22 @@ int abx500_startup_irq_enabled(struct device *dev, unsigned int irq)
 }
 EXPORT_SYMBOL(abx500_startup_irq_enabled);
 
+void abx500_dump_all_banks(void)
+{
+	struct abx500_ops *ops;
+	struct device dummy_child = {0};
+	struct abx500_device_entry *dev_entry;
+
+	list_for_each_entry(dev_entry, &abx500_list, list) {
+		dummy_child.parent = dev_entry->dev;
+		ops = &dev_entry->ops;
+
+		if ((ops != NULL) && (ops->dump_all_banks != NULL))
+			ops->dump_all_banks(&dummy_child);
+	}
+}
+EXPORT_SYMBOL(abx500_dump_all_banks);
+
 MODULE_AUTHOR("Mattias Wallin <mattias.wallin@stericsson.com>");
 MODULE_DESCRIPTION("ABX500 core driver");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 2138bd33021a..bd480b248c62 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -308,6 +308,7 @@ int abx500_mask_and_set_register_interruptible(struct device *dev, u8 bank,
 int abx500_get_chip_id(struct device *dev);
 int abx500_event_registers_startup_state_get(struct device *dev, u8 *event);
 int abx500_startup_irq_enabled(struct device *dev, unsigned int irq);
+void abx500_dump_all_banks(void);
 
 struct abx500_ops {
 	int (*get_chip_id) (struct device *);
@@ -318,6 +319,7 @@ struct abx500_ops {
 	int (*mask_and_set_register) (struct device *, u8, u8, u8, u8);
 	int (*event_registers_startup_state_get) (struct device *, u8 *);
 	int (*startup_irq_enabled) (struct device *, unsigned int);
+	void (*dump_all_banks) (struct device *);
 };
 
 int abx500_register_ops(struct device *core_dev, struct abx500_ops *ops);
-- 
cgit v1.2.3


From 1d843a6c8c2067615fea0ff8cb62d4a5c4a6f8ae Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <mian.yousaf.kaukab@stericsson.com>
Date: Fri, 27 Jan 2012 11:35:41 +0100
Subject: mfd: ab8500-core: Allow the possibility to dump all AB8500 registers

Implement an API so that a user may dump all AB8500 registers
via debugfs file access.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mian Yousaf Kaukab <mian.yousaf.kaukab@stericsson.com>
Reviewed-by: Linus WALLEIJ <linus.walleij@stericsson.com>
Reviewed-by: Jonas ABERG <jonas.aberg@stericsson.com>
---
 drivers/mfd/ab8500-core.c         |  1 +
 drivers/mfd/ab8500-debugfs.c      | 13 +++++++++++++
 include/linux/mfd/abx500/ab8500.h |  6 ++++++
 3 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index e1650badd106..d228697f5d9b 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -319,6 +319,7 @@ static struct abx500_ops ab8500_ops = {
 	.mask_and_set_register = ab8500_mask_and_set_register,
 	.event_registers_startup_state_get = NULL,
 	.startup_irq_enabled = NULL,
+	.dump_all_banks = ab8500_dump_all_banks,
 };
 
 static void ab8500_irq_lock(struct irq_data *data)
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index cd3cee814fb7..af6f774e658a 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -581,6 +581,19 @@ static int ab8500_print_all_banks(struct seq_file *s, void *p)
 	return 0;
 }
 
+/* Dump registers to kernel log */
+void ab8500_dump_all_banks(struct device *dev)
+{
+	unsigned int i;
+
+	printk(KERN_INFO"ab8500 register values:\n");
+
+	for (i = 1; i < AB8500_NUM_BANKS; i++) {
+		printk(KERN_INFO" bank %u:\n", i);
+		ab8500_registers_print(dev, i, NULL);
+	}
+}
+
 static int ab8500_all_banks_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *s;
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index 5251bca56326..dbec7b00ea83 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -345,4 +345,10 @@ static inline int is_ab8500_2p0(struct ab8500 *ab)
 	return (is_ab8500(ab) && (ab->chip_id == AB8500_CUT2P0));
 }
 
+#ifdef CONFIG_AB8500_DEBUG
+void ab8500_dump_all_banks(struct device *dev);
+#else
+static inline void ab8500_dump_all_banks(struct device *dev) {}
+#endif
+
 #endif /* MFD_AB8500_H */
-- 
cgit v1.2.3


From 8f0eb43be5f461a28341fe724686f265b0719dd3 Mon Sep 17 00:00:00 2001
From: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Date: Tue, 14 Feb 2012 13:01:00 +0100
Subject: mfd: ab8500-debugfs: Add interrupt debug

This patch adds an entry in debugfs to check number of interrupts
from the AB.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Bengt Jonsson <bengt.g.jonsson@stericsson.com>
Reviewed-by: Rabin VINCENT <rabin.vincent@stericsson.com>
---
 drivers/mfd/ab8500-core.c         |  1 +
 drivers/mfd/ab8500-debugfs.c      | 49 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/abx500/ab8500.h |  2 ++
 3 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index d228697f5d9b..05a7af4b9768 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -521,6 +521,7 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
 			int virq = ab8500_irq_get_virq(ab8500, line);
 
 			handle_nested_irq(virq);
+			ab8500_debug_register_interrupt(line);
 			value &= ~(1 << bit);
 
 		} while (value);
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index 8617b132e730..cbebbd7e7b40 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -736,6 +736,35 @@ static ssize_t ab8500_val_write(struct file *file,
 	return count;
 }
 
+/*
+ * Interrupt status
+ */
+static u32 num_interrupts[AB8500_MAX_NR_IRQS];
+static int num_interrupt_lines;
+
+void ab8500_debug_register_interrupt(int line)
+{
+	if (line < num_interrupt_lines)
+		num_interrupts[line]++;
+}
+
+static int ab8500_interrupts_print(struct seq_file *s, void *p)
+{
+	int line;
+
+	seq_printf(s, "irq:  number of\n");
+
+	for (line = 0; line < num_interrupt_lines; line++)
+		seq_printf(s, "%3i:  %6i\n", line, num_interrupts[line]);
+
+	return 0;
+}
+
+static int ab8500_interrupts_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ab8500_interrupts_print, inode->i_private);
+}
+
 /*
  * - HWREG DB8500 formated routines
  */
@@ -1489,6 +1518,14 @@ static const struct file_operations ab8500_val_fops = {
 	.owner = THIS_MODULE,
 };
 
+static const struct file_operations ab8500_interrupts_fops = {
+	.open = ab8500_interrupts_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
 static const struct file_operations ab8500_subscribe_fops = {
 	.open = ab8500_subscribe_unsubscribe_open,
 	.write = ab8500_subscribe_write,
@@ -1597,6 +1634,18 @@ static int ab8500_debug_probe(struct platform_device *plf)
 	if (!file)
 		goto err;
 
+	if (is_ab8500(ab8500))
+		num_interrupt_lines = AB8500_NR_IRQS;
+	else if (is_ab8505(ab8500))
+		num_interrupt_lines = AB8505_NR_IRQS;
+	else if (is_ab9540(ab8500))
+		num_interrupt_lines = AB9540_NR_IRQS;
+
+	file = debugfs_create_file("interrupts", (S_IRUGO),
+	    ab8500_dir, &plf->dev, &ab8500_interrupts_fops);
+	if (!file)
+		goto err;
+
 	file = debugfs_create_file("irq-unsubscribe", (S_IRUGO | S_IWUSR),
 	    ab8500_dir, &plf->dev, &ab8500_unsubscribe_fops);
 	if (!file)
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
index dbec7b00ea83..fa7173dd71f2 100644
--- a/include/linux/mfd/abx500/ab8500.h
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -347,8 +347,10 @@ static inline int is_ab8500_2p0(struct ab8500 *ab)
 
 #ifdef CONFIG_AB8500_DEBUG
 void ab8500_dump_all_banks(struct device *dev);
+void ab8500_debug_register_interrupt(int line);
 #else
 static inline void ab8500_dump_all_banks(struct device *dev) {}
+static inline void ab8500_debug_register_interrupt(int line) {}
 #endif
 
 #endif /* MFD_AB8500_H */
-- 
cgit v1.2.3


From 26aec009f6b61c077c6de1a96cca7a5132851dbe Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.daniel@samsung.com>
Date: Sun, 3 Feb 2013 15:49:47 -0800
Subject: regulator: add device tree support for s5m8767

This device tree support is added for PMIC block of S5m8767 multi
function driver. The usage detail is added in the device tree
documentation section. This change is tested on exynos5250 based
arndale platform by regulator voltage set/get API's.

Reviewed-by: Thomas Abraham <thomas.abraham@linaro.org>
Signed-off-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Tested-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 .../bindings/regulator/s5m8767-regulator.txt       | 152 +++++++++++++++++
 drivers/mfd/sec-core.c                             |  75 ++++++++-
 drivers/regulator/s5m8767.c                        | 186 ++++++++++++++++++++-
 include/linux/mfd/samsung/core.h                   |  11 +-
 4 files changed, 417 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt b/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt
new file mode 100644
index 000000000000..a35ff99003a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt
@@ -0,0 +1,152 @@
+* Samsung S5M8767 Voltage and Current Regulator
+
+The Samsung S5M8767 is a multi-function device which includes volatage and
+current regulators, rtc, charger controller and other sub-blocks. It is
+interfaced to the host controller using a i2c interface. Each sub-block is
+addressed by the host system using different i2c slave address. This document
+describes the bindings for 'pmic' sub-block of s5m8767.
+
+Required properties:
+- compatible: Should be "samsung,s5m8767-pmic".
+- reg: Specifies the i2c slave address of the pmic block. It should be 0x66.
+
+- s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck2 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+- s5m8767,pmic-buck3-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck3 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+- s5m8767,pmic-buck4-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+  units for buck4 when changing voltage using gpio dvs. Refer to [1] below
+  for additional information.
+
+- s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used
+  for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines.
+
+[1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+    property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage'
+    property should specify atleast one voltage level (which would be a
+    safe operating voltage).
+
+    If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+    property is specified, then all the eight voltage values for the
+    's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified.
+
+Optional properties:
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+  the interrupts from s5m8767 are delivered to.
+- interrupts: Interrupt specifiers for two interrupt sources.
+  - First interrupt specifier is for 'irq1' interrupt.
+  - Second interrupt specifier is for 'alert' interrupt.
+- s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs.
+- s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs.
+- s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs.
+
+Additional properties required if either of the optional properties are used:
+
+- s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from
+  the possible 8 options selectable by the dvs gpios. The value of this
+  property should be between 0 and 7. If not specified or if out of range, the
+  default value of this property is set to 0.
+
+- s5m8767,pmic-buck-dvs-gpios: GPIO specifiers for three host gpio's used
+  for dvs. The format of the gpio specifier depends in the gpio controller.
+
+Regulators: The regulators of s5m8767 that have to be instantiated should be
+included in a sub-node named 'regulators'. Regulator nodes included in this
+sub-node should be of the format as listed below.
+
+	regulator_name {
+		ldo1_reg: LDO1 {
+			regulator-name = "VDD_ALIVE_1.0V";
+			regulator-min-microvolt = <1100000>;
+			regulator-max-microvolt = <1100000>;
+			regulator-always-on;
+			regulator-boot-on;
+			op_mode = <1>; /* Normal Mode */
+		};
+	};
+The above regulator entries are defined in regulator bindings documentation
+except op_mode description.
+	- op_mode: describes the different operating modes of the LDO's with
+		power mode change in SOC. The different possible values are,
+		0 - always off mode
+		1 - on in normal mode
+		2 - low power mode
+		3 - suspend mode
+
+The following are the names of the regulators that the s5m8767 pmic block
+supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number
+as per the datasheet of s5m8767.
+
+	- LDOn
+		  - valid values for n are 1 to 28
+		  - Example: LDO0, LD01, LDO28
+	- BUCKn
+		  - valid values for n are 1 to 9.
+		  - Example: BUCK1, BUCK2, BUCK9
+
+The bindings inside the regulator nodes use the standard regulator bindings
+which are documented elsewhere.
+
+Example:
+
+	s5m8767_pmic@66 {
+		compatible = "samsung,s5m8767-pmic";
+		reg = <0x66>;
+
+		s5m8767,pmic-buck2-uses-gpio-dvs;
+		s5m8767,pmic-buck3-uses-gpio-dvs;
+		s5m8767,pmic-buck4-uses-gpio-dvs;
+
+		s5m8767,pmic-buck-default-dvs-idx = <0>;
+
+		s5m8767,pmic-buck-dvs-gpios = <&gpx0 0 1 0 0>, /* DVS1 */
+						 <&gpx0 1 1 0 0>, /* DVS2 */
+						 <&gpx0 2 1 0 0>; /* DVS3 */
+
+		s5m8767,pmic-buck-ds-gpios = <&gpx2 3 1 0 0>, /* SET1 */
+						<&gpx2 4 1 0 0>, /* SET2 */
+						<&gpx2 5 1 0 0>; /* SET3 */
+
+		s5m8767,pmic-buck2-dvs-voltage = <1350000>, <1300000>,
+						 <1250000>, <1200000>,
+						 <1150000>, <1100000>,
+						 <1000000>, <950000>;
+
+		s5m8767,pmic-buck3-dvs-voltage = <1100000>, <1100000>,
+						 <1100000>, <1100000>,
+						 <1000000>, <1000000>,
+						 <1000000>, <1000000>;
+
+		s5m8767,pmic-buck4-dvs-voltage = <1200000>, <1200000>,
+						 <1200000>, <1200000>,
+						 <1200000>, <1200000>,
+						 <1200000>, <1200000>;
+
+		regulators {
+			ldo1_reg: LDO1 {
+				regulator-name = "VDD_ABB_3.3V";
+				regulator-min-microvolt = <3300000>;
+				regulator-max-microvolt = <3300000>;
+				op_mode = <1>; /* Normal Mode */
+			};
+
+			ldo2_reg: LDO2 {
+				regulator-name = "VDD_ALIVE_1.1V";
+				regulator-min-microvolt = <1100000>;
+				regulator-max-microvolt = <1100000>;
+				regulator-always-on;
+			};
+
+			buck1_reg: BUCK1 {
+				regulator-name = "VDD_MIF_1.2V";
+				regulator-min-microvolt = <950000>;
+				regulator-max-microvolt = <1350000>;
+				regulator-always-on;
+				regulator-boot-on;
+			};
+		};
+	};
diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index 49d361a618d0..ae029834d0a0 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -17,6 +17,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/of_irq.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/mutex.h>
@@ -60,6 +61,15 @@ static struct mfd_cell s2mps11_devs[] = {
 	},
 };
 
+#ifdef CONFIG_OF
+static struct of_device_id sec_dt_match[] = {
+	{	.compatible = "samsung,s5m8767-pmic",
+		.data = (void *)S5M8767X,
+	},
+	{},
+};
+#endif
+
 int sec_reg_read(struct sec_pmic_dev *sec_pmic, u8 reg, void *dest)
 {
 	return regmap_read(sec_pmic->regmap, reg, dest);
@@ -95,6 +105,57 @@ static struct regmap_config sec_regmap_config = {
 	.val_bits = 8,
 };
 
+
+#ifdef CONFIG_OF
+/*
+ * Only the common platform data elements for s5m8767 are parsed here from the
+ * device tree. Other sub-modules of s5m8767 such as pmic, rtc , charger and
+ * others have to parse their own platform data elements from device tree.
+ *
+ * The s5m8767 platform data structure is instantiated here and the drivers for
+ * the sub-modules need not instantiate another instance while parsing their
+ * platform data.
+ */
+static struct sec_platform_data *sec_pmic_i2c_parse_dt_pdata(
+					struct device *dev)
+{
+	struct sec_platform_data *pd;
+
+	pd = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL);
+	if (!pd) {
+		dev_err(dev, "could not allocate memory for pdata\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/*
+	 * ToDo: the 'wakeup' member in the platform data is more of a linux
+	 * specfic information. Hence, there is no binding for that yet and
+	 * not parsed here.
+	 */
+
+	return pd;
+}
+#else
+static struct sec_platform_data *sec_i2c_parse_dt_pdata(
+					struct device *dev)
+{
+	return 0;
+}
+#endif
+
+static inline int sec_i2c_get_driver_data(struct i2c_client *i2c,
+						const struct i2c_device_id *id)
+{
+#ifdef CONFIG_OF
+	if (i2c->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(sec_dt_match, i2c->dev.of_node);
+		return (int)match->data;
+	}
+#endif
+	return (int)id->driver_data;
+}
+
 static int sec_pmic_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
@@ -111,13 +172,22 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	sec_pmic->dev = &i2c->dev;
 	sec_pmic->i2c = i2c;
 	sec_pmic->irq = i2c->irq;
-	sec_pmic->type = id->driver_data;
-
+	sec_pmic->type = sec_i2c_get_driver_data(i2c, id);
+
+	if (sec_pmic->dev->of_node) {
+		pdata = sec_pmic_i2c_parse_dt_pdata(sec_pmic->dev);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			return ret;
+		}
+		pdata->device_type = sec_pmic->type;
+	}
 	if (pdata) {
 		sec_pmic->device_type = pdata->device_type;
 		sec_pmic->ono = pdata->ono;
 		sec_pmic->irq_base = pdata->irq_base;
 		sec_pmic->wakeup = pdata->wakeup;
+		sec_pmic->pdata = pdata;
 	}
 
 	sec_pmic->regmap = devm_regmap_init_i2c(i2c, &sec_regmap_config);
@@ -192,6 +262,7 @@ static struct i2c_driver sec_pmic_driver = {
 	.driver = {
 		   .name = "sec_pmic",
 		   .owner = THIS_MODULE,
+		   .of_match_table = of_match_ptr(sec_dt_match),
 	},
 	.probe = sec_pmic_probe,
 	.remove = sec_pmic_remove,
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index aa0ccef5c31c..1250cef43d7b 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -14,6 +14,7 @@
 #include <linux/bug.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
+#include <linux/of_gpio.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -21,6 +22,9 @@
 #include <linux/regulator/machine.h>
 #include <linux/mfd/samsung/core.h>
 #include <linux/mfd/samsung/s5m8767.h>
+#include <linux/regulator/of_regulator.h>
+
+#define S5M8767_OPMODE_NORMAL_MODE 0x1
 
 struct s5m8767_info {
 	struct device *dev;
@@ -474,15 +478,194 @@ static struct regulator_desc regulators[] = {
 	s5m8767_regulator_desc(BUCK9),
 };
 
+#ifdef CONFIG_OF
+static int s5m8767_pmic_dt_parse_dvs_gpio(struct sec_pmic_dev *iodev,
+			struct sec_platform_data *pdata,
+			struct device_node *pmic_np)
+{
+	int i, gpio;
+
+	for (i = 0; i < 3; i++) {
+		gpio = of_get_named_gpio(pmic_np,
+					"s5m8767,pmic-buck-dvs-gpios", i);
+		if (!gpio_is_valid(gpio)) {
+			dev_err(iodev->dev, "invalid gpio[%d]: %d\n", i, gpio);
+			return -EINVAL;
+		}
+		pdata->buck_gpios[i] = gpio;
+	}
+	return 0;
+}
+
+static int s5m8767_pmic_dt_parse_ds_gpio(struct sec_pmic_dev *iodev,
+			struct sec_platform_data *pdata,
+			struct device_node *pmic_np)
+{
+	int i, gpio;
+
+	for (i = 0; i < 3; i++) {
+		gpio = of_get_named_gpio(pmic_np,
+					"s5m8767,pmic-buck-ds-gpios", i);
+		if (!gpio_is_valid(gpio)) {
+			dev_err(iodev->dev, "invalid gpio[%d]: %d\n", i, gpio);
+			return -EINVAL;
+		}
+		pdata->buck_ds[i] = gpio;
+	}
+	return 0;
+}
+
+static int s5m8767_pmic_dt_parse_pdata(struct sec_pmic_dev *iodev,
+					struct sec_platform_data *pdata)
+{
+	struct device_node *pmic_np, *regulators_np, *reg_np;
+	struct sec_regulator_data *rdata;
+	struct sec_opmode_data *rmode;
+	unsigned int i, dvs_voltage_nr = 1, ret;
+
+	pmic_np = iodev->dev->of_node;
+	if (!pmic_np) {
+		dev_err(iodev->dev, "could not find pmic sub-node\n");
+		return -ENODEV;
+	}
+
+	regulators_np = of_find_node_by_name(pmic_np, "regulators");
+	if (!regulators_np) {
+		dev_err(iodev->dev, "could not find regulators sub-node\n");
+		return -EINVAL;
+	}
+
+	/* count the number of regulators to be supported in pmic */
+	pdata->num_regulators = 0;
+	for_each_child_of_node(regulators_np, reg_np)
+		pdata->num_regulators++;
+
+	rdata = devm_kzalloc(iodev->dev, sizeof(*rdata) *
+				pdata->num_regulators, GFP_KERNEL);
+	if (!rdata) {
+		dev_err(iodev->dev,
+			"could not allocate memory for regulator data\n");
+		return -ENOMEM;
+	}
+
+	rmode = devm_kzalloc(iodev->dev, sizeof(*rmode) *
+				pdata->num_regulators, GFP_KERNEL);
+	if (!rdata) {
+		dev_err(iodev->dev,
+			"could not allocate memory for regulator mode\n");
+		return -ENOMEM;
+	}
+
+	pdata->regulators = rdata;
+	pdata->opmode = rmode;
+	for_each_child_of_node(regulators_np, reg_np) {
+		for (i = 0; i < ARRAY_SIZE(regulators); i++)
+			if (!of_node_cmp(reg_np->name, regulators[i].name))
+				break;
+
+		if (i == ARRAY_SIZE(regulators)) {
+			dev_warn(iodev->dev,
+			"don't know how to configure regulator %s\n",
+			reg_np->name);
+			continue;
+		}
+
+		rdata->id = i;
+		rdata->initdata = of_get_regulator_init_data(
+						iodev->dev, reg_np);
+		rdata->reg_node = reg_np;
+		rdata++;
+		rmode->id = i;
+		if (of_property_read_u32(reg_np, "op_mode",
+				&rmode->mode)) {
+			dev_warn(iodev->dev,
+				"no op_mode property property at %s\n",
+				reg_np->full_name);
+
+			rmode->mode = S5M8767_OPMODE_NORMAL_MODE;
+		}
+		rmode++;
+	}
+
+	if (of_get_property(pmic_np, "s5m8767,pmic-buck2-uses-gpio-dvs", NULL))
+		pdata->buck2_gpiodvs = true;
+
+	if (of_get_property(pmic_np, "s5m8767,pmic-buck3-uses-gpio-dvs", NULL))
+		pdata->buck3_gpiodvs = true;
+
+	if (of_get_property(pmic_np, "s5m8767,pmic-buck4-uses-gpio-dvs", NULL))
+		pdata->buck4_gpiodvs = true;
+
+	if (pdata->buck2_gpiodvs || pdata->buck3_gpiodvs ||
+						pdata->buck4_gpiodvs) {
+		ret = s5m8767_pmic_dt_parse_dvs_gpio(iodev, pdata, pmic_np);
+		if (ret)
+			return -EINVAL;
+
+		if (of_property_read_u32(pmic_np,
+				"s5m8767,pmic-buck-default-dvs-idx",
+				&pdata->buck_default_idx)) {
+			pdata->buck_default_idx = 0;
+		} else {
+			if (pdata->buck_default_idx >= 8) {
+				pdata->buck_default_idx = 0;
+				dev_info(iodev->dev,
+				"invalid value for default dvs index, use 0\n");
+			}
+		}
+		dvs_voltage_nr = 8;
+	}
+
+	ret = s5m8767_pmic_dt_parse_ds_gpio(iodev, pdata, pmic_np);
+	if (ret)
+		return -EINVAL;
+
+	if (of_property_read_u32_array(pmic_np,
+				"s5m8767,pmic-buck2-dvs-voltage",
+				pdata->buck2_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck2 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32_array(pmic_np,
+				"s5m8767,pmic-buck3-dvs-voltage",
+				pdata->buck3_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck3 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32_array(pmic_np,
+				"s5m8767,pmic-buck4-dvs-voltage",
+				pdata->buck4_voltage, dvs_voltage_nr)) {
+		dev_err(iodev->dev, "buck4 voltages not specified\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int s5m8767_pmic_dt_parse_pdata(struct sec_pmic_dev *iodev,
+					struct sec_platform_data *pdata)
+{
+	return 0;
+}
+#endif /* CONFIG_OF */
+
 static int s5m8767_pmic_probe(struct platform_device *pdev)
 {
 	struct sec_pmic_dev *iodev = dev_get_drvdata(pdev->dev.parent);
-	struct sec_platform_data *pdata = dev_get_platdata(iodev->dev);
+	struct sec_platform_data *pdata = iodev->pdata;
 	struct regulator_config config = { };
 	struct regulator_dev **rdev;
 	struct s5m8767_info *s5m8767;
 	int i, ret, size, buck_init;
 
+	if (iodev->dev->of_node) {
+		ret = s5m8767_pmic_dt_parse_pdata(iodev, pdata);
+		if (ret)
+			return ret;
+	}
+
 	if (!pdata) {
 		dev_err(pdev->dev.parent, "Platform data not supplied\n");
 		return -ENODEV;
@@ -726,6 +909,7 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
 		config.init_data = pdata->regulators[i].initdata;
 		config.driver_data = s5m8767;
 		config.regmap = iodev->regmap;
+		config.of_node = pdata->regulators[i].reg_node;
 
 		rdev[i] = regulator_register(&regulators[id], &config);
 		if (IS_ERR(rdev[i])) {
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index b50c38f8bc48..f0f4de3b4ccc 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -26,6 +26,7 @@ enum sec_device_type {
 /**
  * struct sec_pmic_dev - s5m87xx master device for sub-drivers
  * @dev: master device of the chip (can be used to access platform data)
+ * @pdata: pointer to private data used to pass platform data to child
  * @i2c: i2c client private data for regulator
  * @rtc: i2c client private data for rtc
  * @iolock: mutex for serializing io access
@@ -39,6 +40,7 @@ enum sec_device_type {
  */
 struct sec_pmic_dev {
 	struct device *dev;
+	struct sec_platform_data *pdata;
 	struct regmap *regmap;
 	struct i2c_client *i2c;
 	struct i2c_client *rtc;
@@ -82,11 +84,11 @@ struct sec_platform_data {
 
 	int				buck_gpios[3];
 	int				buck_ds[3];
-	int				buck2_voltage[8];
+	unsigned int			buck2_voltage[8];
 	bool				buck2_gpiodvs;
-	int				buck3_voltage[8];
+	unsigned int			buck3_voltage[8];
 	bool				buck3_gpiodvs;
-	int				buck4_voltage[8];
+	unsigned int			buck4_voltage[8];
 	bool				buck4_gpiodvs;
 
 	int				buck_set1;
@@ -127,6 +129,7 @@ struct sec_platform_data {
 struct sec_regulator_data {
 	int				id;
 	struct regulator_init_data	*initdata;
+	struct device_node *reg_node;
 };
 
 /*
@@ -136,7 +139,7 @@ struct sec_regulator_data {
  */
 struct sec_opmode_data {
 	int id;
-	int mode;
+	unsigned int mode;
 };
 
 /*
-- 
cgit v1.2.3


From c02cf5f8ed6137e2b3b2f10e0fca336e06e09ba4 Mon Sep 17 00:00:00 2001
From: anish kumar <anish198519851985@gmail.com>
Date: Sun, 3 Feb 2013 22:08:23 +0100
Subject: irq_work: Remove return value from the irq_work_queue() function

As no one is using the return value of irq_work_queue(),
so it is better to just make it void.

Signed-off-by: anish kumar <anish198519851985@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
[ Fix stale comments, remove now unnecessary __irq_work_queue() intermediate function ]
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/1359925703-24304-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/irq_work.h |  2 +-
 kernel/irq_work.c        | 31 ++++++++++---------------------
 2 files changed, 11 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 6a9e8f5399e2..ce60c084635b 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -16,7 +16,7 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
 	work->func = func;
 }
 
-bool irq_work_queue(struct irq_work *work);
+void irq_work_queue(struct irq_work *work);
 void irq_work_run(void);
 void irq_work_sync(struct irq_work *work);
 
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 64eddd59ed83..c9d7478e4889 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -63,12 +63,20 @@ void __weak arch_irq_work_raise(void)
 }
 
 /*
- * Queue the entry and raise the IPI if needed.
+ * Enqueue the irq_work @entry unless it's already pending
+ * somewhere.
+ *
+ * Can be re-enqueued while the callback is still in progress.
  */
-static void __irq_work_queue(struct irq_work *work)
+void irq_work_queue(struct irq_work *work)
 {
 	bool empty;
 
+	/* Only queue if not already pending */
+	if (!irq_work_claim(work))
+		return;
+
+	/* Queue the entry and raise the IPI if needed. */
 	preempt_disable();
 
 	empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
@@ -78,25 +86,6 @@ static void __irq_work_queue(struct irq_work *work)
 
 	preempt_enable();
 }
-
-/*
- * Enqueue the irq_work @entry, returns true on success, failure when the
- * @entry was already enqueued by someone else.
- *
- * Can be re-enqueued while the callback is still in progress.
- */
-bool irq_work_queue(struct irq_work *work)
-{
-	if (!irq_work_claim(work)) {
-		/*
-		 * Already enqueued, can't do!
-		 */
-		return false;
-	}
-
-	__irq_work_queue(work);
-	return true;
-}
 EXPORT_SYMBOL_GPL(irq_work_queue);
 
 /*
-- 
cgit v1.2.3


From 7b9e8522a65886d8ae168547a67c3617b6ba83f1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 28 Jan 2013 14:41:07 -0500
Subject: nfsd: fix IPv6 address handling in the DRC

Currently, it only stores the first 16 bytes of any address. struct
sockaddr_in6 is 28 bytes however, so we're currently ignoring the last
12 bytes of the address.

Expand the c_addr field to a sockaddr_in6, and cast it to a sockaddr_in
as necessary. Also fix the comparitor to use the existing RPC
helpers for this.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/cache.h             | 6 +++++-
 fs/nfsd/nfscache.c          | 7 +++++--
 include/linux/sunrpc/clnt.h | 4 +++-
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 93cc9d34c459..2cac76c63b97 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -12,6 +12,10 @@
 
 /*
  * Representation of a reply cache entry.
+ *
+ * Note that we use a sockaddr_in6 to hold the address instead of the more
+ * typical sockaddr_storage. This is for space reasons, since sockaddr_storage
+ * is much larger than a sockaddr_in6.
  */
 struct svc_cacherep {
 	struct hlist_node	c_hash;
@@ -20,7 +24,7 @@ struct svc_cacherep {
 	unsigned char		c_state,	/* unused, inprog, done */
 				c_type,		/* status, buffer */
 				c_secure : 1;	/* req came from port < 1024 */
-	struct sockaddr_in	c_addr;
+	struct sockaddr_in6	c_addr;
 	__be32			c_xid;
 	u32			c_prot;
 	u32			c_proc;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 2cbac34a55da..5dd9ec2a177f 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sunrpc/clnt.h>
 
 #include "nfsd.h"
 #include "cache.h"
@@ -146,7 +147,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
 		    xid == rp->c_xid && proc == rp->c_proc &&
 		    proto == rp->c_prot && vers == rp->c_vers &&
 		    time_before(jiffies, rp->c_timestamp + 120*HZ) &&
-		    memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) {
+		    rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) &&
+		    rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) {
 			nfsdstats.rchits++;
 			goto found_entry;
 		}
@@ -183,7 +185,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
 	rp->c_state = RC_INPROG;
 	rp->c_xid = xid;
 	rp->c_proc = proc;
-	memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr));
+	rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp));
+	rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp)));
 	rp->c_prot = proto;
 	rp->c_vers = vers;
 	rp->c_timestamp = jiffies;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 34206b84d8da..47354a25a927 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -263,7 +263,9 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst,
  * @sap1: first sockaddr
  * @sap2: second sockaddr
  *
- * Just compares the family and address portion. Ignores port, scope, etc.
+ * Just compares the family and address portion. Ignores port, but
+ * compares the scope if it's a link-local address.
+ *
  * Returns true if the addrs are equal, false if they aren't.
  */
 static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
-- 
cgit v1.2.3


From 3f52b7e328c526fa7a592af9bf5772c591ed38a4 Mon Sep 17 00:00:00 2001
From: Marco Porsch <marco@cozybit.com>
Date: Wed, 30 Jan 2013 18:14:08 +0100
Subject: mac80211: mesh power save basics

Add routines to
- maintain a PS mode for each peer and a non-peer PS mode
- indicate own PS mode in transmitted frames
- track neighbor STAs power modes
- buffer frames when neighbors are in PS mode
- add TIM and Awake Window IE to beacons
- release frames in Mesh Peer Service Periods

Add local_pm to sta_info to represent the link-specific power
mode at this station towards the remote station. When a peer
link is established, use the default power mode stored in mesh
config. Update the PS status if the peering status of a neighbor
changes.
Maintain a mesh power mode for non-peer mesh STAs. Set the
non-peer power mode to active mode during peering. Authenticated
mesh peering is currently not working when either node is
configured to be in power save mode.

Indicate the current power mode in transmitted frames. Use QoS
Nulls to indicate mesh power mode transitions.
For performance reasons, calls to the function setting the frame
flags are placed in HWMP routing routines, as there the STA
pointer is already available.

Add peer_pm to sta_info to represent the peer's link-specific
power mode towards the local station. Add nonpeer_pm to
represent the peer's power mode towards all non-peer stations.
Track power modes based on received frames.

Add the ps_data structure to ieee80211_if_mesh (for TIM map, PS
neighbor counter and group-addressed frame buffer).

Set WLAN_STA_PS flag for STA in PS mode to use the unicast frame
buffering routines in the tx path. Update num_sta_ps to buffer
and release group-addressed frames after DTIM beacons.

Announce the awake window duration in beacons if in light or
deep sleep mode towards any peer or non-peer. Create a TIM IE
similarly to AP mode and add it to mesh beacons. Parse received
Awake Window IEs and check TIM IEs for buffered frames.

Release frames towards peers in mesh Peer Service Periods. Use
the corresponding trigger frames and monitor the MPSP status.
Append a QoS Null as trigger frame if neccessary to properly end
the MPSP. Currently, in HT channels MPSPs behave imperfectly and
show large delay spikes and frame losses.

Signed-off-by: Marco Porsch <marco@cozybit.com>
Signed-off-by: Ivan Bezyazychnyy <ivan.bezyazychnyy@gmail.com>
Signed-off-by: Mike Krinkin <krinkin.m.u@gmail.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h     |   8 +
 net/mac80211/Kconfig          |  11 +
 net/mac80211/Makefile         |   3 +-
 net/mac80211/cfg.c            |  27 +-
 net/mac80211/debug.h          |  10 +
 net/mac80211/debugfs_netdev.c |   5 +
 net/mac80211/debugfs_sta.c    |   5 +-
 net/mac80211/ieee80211_i.h    |   6 +
 net/mac80211/mesh.c           |  33 +++
 net/mac80211/mesh.h           |  17 ++
 net/mac80211/mesh_hwmp.c      |   7 +
 net/mac80211/mesh_pathtbl.c   |   1 +
 net/mac80211/mesh_plink.c     |  17 ++
 net/mac80211/mesh_ps.c        | 585 ++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/rx.c             |   7 +
 net/mac80211/sta_info.c       |  20 +-
 net/mac80211/sta_info.h       |  11 +
 net/mac80211/status.c         |   7 +
 net/mac80211/tx.c             |  31 ++-
 net/mac80211/util.c           |   4 +
 net/mac80211/wme.c            |  13 +-
 21 files changed, 811 insertions(+), 17 deletions(-)
 create mode 100644 net/mac80211/mesh_ps.c

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 11c8bc87fdcb..7e8a498efe6d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -151,6 +151,11 @@
 /* Mesh Control 802.11s */
 #define IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT  0x0100
 
+/* Mesh Power Save Level */
+#define IEEE80211_QOS_CTL_MESH_PS_LEVEL		0x0200
+/* Mesh Receiver Service Period Initiated */
+#define IEEE80211_QOS_CTL_RSPI			0x0400
+
 /* U-APSD queue for WMM IEs sent by AP */
 #define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD	(1<<7)
 #define IEEE80211_WMM_IE_AP_QOSINFO_PARAM_SET_CNT_MASK	0x0f
@@ -675,11 +680,14 @@ struct ieee80211_meshconf_ie {
  * @IEEE80211_MESHCONF_CAPAB_FORWARDING: the STA forwards MSDUs
  * @IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING: TBTT adjustment procedure
  *	is ongoing
+ * @IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL: STA is in deep sleep mode or has
+ *	neighbors in deep sleep mode
  */
 enum mesh_config_capab_flags {
 	IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS		= 0x01,
 	IEEE80211_MESHCONF_CAPAB_FORWARDING		= 0x08,
 	IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING		= 0x20,
+	IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL	= 0x40,
 };
 
 /**
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index b4ecf267a34b..0ecf947ad378 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -258,6 +258,17 @@ config MAC80211_MESH_SYNC_DEBUG
 
 	  Do not select this option.
 
+config MAC80211_MESH_PS_DEBUG
+	bool "Verbose mesh powersave debugging"
+	depends on MAC80211_DEBUG_MENU
+	depends on MAC80211_MESH
+	---help---
+	  Selecting this option causes mac80211 to print out very verbose mesh
+	  powersave debugging messages (when mac80211 is taking part in a
+	  mesh network).
+
+	  Do not select this option.
+
 config MAC80211_TDLS_DEBUG
 	bool "Verbose TDLS debugging"
 	depends on MAC80211_DEBUG_MENU
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 4911202334d9..9d7d840aac6d 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -39,7 +39,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \
 	mesh_pathtbl.o \
 	mesh_plink.o \
 	mesh_hwmp.o \
-	mesh_sync.o
+	mesh_sync.o \
+	mesh_ps.o
 
 mac80211-$(CONFIG_PM) += pm.o
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 661b878bd19c..f4f7e7691077 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -492,7 +492,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 #ifdef CONFIG_MAC80211_MESH
 		sinfo->filled |= STATION_INFO_LLID |
 				 STATION_INFO_PLID |
-				 STATION_INFO_PLINK_STATE;
+				 STATION_INFO_PLINK_STATE |
+				 STATION_INFO_LOCAL_PM |
+				 STATION_INFO_PEER_PM |
+				 STATION_INFO_NONPEER_PM;
 
 		sinfo->llid = le16_to_cpu(sta->llid);
 		sinfo->plid = le16_to_cpu(sta->plid);
@@ -501,6 +504,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			sinfo->filled |= STATION_INFO_T_OFFSET;
 			sinfo->t_offset = sta->t_offset;
 		}
+		sinfo->local_pm = sta->local_pm;
+		sinfo->peer_pm = sta->peer_pm;
+		sinfo->nonpeer_pm = sta->nonpeer_pm;
 #endif
 	}
 
@@ -1262,6 +1268,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 					changed = mesh_plink_inc_estab_count(
 							sdata);
 				sta->plink_state = params->plink_state;
+
+				ieee80211_mps_sta_status_update(sta);
+				ieee80211_mps_set_sta_local_pm(sta,
+					sdata->u.mesh.mshcfg.power_mode);
 				break;
 			case NL80211_PLINK_LISTEN:
 			case NL80211_PLINK_BLOCKED:
@@ -1273,6 +1283,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 					changed = mesh_plink_dec_estab_count(
 							sdata);
 				sta->plink_state = params->plink_state;
+
+				ieee80211_mps_sta_status_update(sta);
+				ieee80211_mps_local_status_update(sdata);
 				break;
 			default:
 				/*  nothing  */
@@ -1289,6 +1302,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 				break;
 			}
 		}
+
+		if (params->local_pm)
+			ieee80211_mps_set_sta_local_pm(sta, params->local_pm);
 #endif
 	}
 
@@ -1777,6 +1793,15 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask))
 		conf->dot11MeshHWMPconfirmationInterval =
 			nconf->dot11MeshHWMPconfirmationInterval;
+	if (_chg_mesh_attr(NL80211_MESHCONF_POWER_MODE, mask)) {
+		conf->power_mode = nconf->power_mode;
+		ieee80211_mps_local_status_update(sdata);
+	}
+	if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) {
+		conf->dot11MeshAwakeWindowDuration =
+			nconf->dot11MeshAwakeWindowDuration;
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+	}
 	return 0;
 }
 
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 8f383a576016..4ccc5ed6237d 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -44,6 +44,12 @@
 #define MAC80211_MESH_SYNC_DEBUG 0
 #endif
 
+#ifdef CONFIG_MAC80211_MESH_PS_DEBUG
+#define MAC80211_MESH_PS_DEBUG 1
+#else
+#define MAC80211_MESH_PS_DEBUG 0
+#endif
+
 #ifdef CONFIG_MAC80211_TDLS_DEBUG
 #define MAC80211_TDLS_DEBUG 1
 #else
@@ -151,6 +157,10 @@ do {									\
 	_sdata_dbg(MAC80211_MESH_SYNC_DEBUG,				\
 		   sdata, fmt, ##__VA_ARGS__)
 
+#define mps_dbg(sdata, fmt, ...)					\
+	_sdata_dbg(MAC80211_MESH_PS_DEBUG,				\
+		   sdata, fmt, ##__VA_ARGS__)
+
 #define tdls_dbg(sdata, fmt, ...)					\
 	_sdata_dbg(MAC80211_TDLS_DEBUG,					\
 		   sdata, fmt, ##__VA_ARGS__)
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cbde5cc49a40..059bbb82e84f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -515,6 +515,9 @@ IEEE80211_IF_FILE(dot11MeshHWMProotInterval,
 		  u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval,
 		  u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval, DEC);
+IEEE80211_IF_FILE(power_mode, u.mesh.mshcfg.power_mode, DEC);
+IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
+		  u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC);
 #endif
 
 #define DEBUGFS_ADD_MODE(name, mode) \
@@ -620,6 +623,8 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
 	MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout);
 	MESHPARAMS_ADD(dot11MeshHWMProotInterval);
 	MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval);
+	MESHPARAMS_ADD(power_mode);
+	MESHPARAMS_ADD(dot11MeshAwakeWindowDuration);
 #undef MESHPARAMS_ADD
 }
 #endif
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 6fb1168b9f16..c7591f73dbc3 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -65,7 +65,7 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 	test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : ""
 
 	int res = scnprintf(buf, sizeof(buf),
-			    "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+			    "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
 			    TEST(AUTH), TEST(ASSOC), TEST(PS_STA),
 			    TEST(PS_DRIVER), TEST(AUTHORIZED),
 			    TEST(SHORT_PREAMBLE),
@@ -74,7 +74,8 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 			    TEST(UAPSD), TEST(SP), TEST(TDLS_PEER),
 			    TEST(TDLS_PEER_AUTH), TEST(4ADDR_EVENT),
 			    TEST(INSERTED), TEST(RATE_CONTROL),
-			    TEST(TOFFSET_KNOWN));
+			    TEST(TOFFSET_KNOWN), TEST(MPSP_OWNER),
+			    TEST(MPSP_RECIPIENT));
 #undef TEST
 	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8faf360e0b4c..5fe9db707880 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -590,6 +590,11 @@ struct ieee80211_if_mesh {
 	s64 sync_offset_clockdrift_max;
 	spinlock_t sync_offset_lock;
 	bool adjusting_tbtt;
+	/* mesh power save */
+	enum nl80211_mesh_power_mode nonpeer_pm;
+	int ps_peers_light_sleep;
+	int ps_peers_deep_sleep;
+	struct ps_data ps;
 };
 
 #ifdef CONFIG_MAC80211_MESH
@@ -1185,6 +1190,7 @@ struct ieee802_11_elems {
 	struct ieee80211_meshconf_ie *mesh_config;
 	u8 *mesh_id;
 	u8 *peering;
+	__le16 *awake_window;
 	u8 *preq;
 	u8 *prep;
 	u8 *perr;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index f920da1201ab..35ac38871420 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -261,6 +261,9 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	*pos = IEEE80211_MESHCONF_CAPAB_FORWARDING;
 	*pos |= ifmsh->accepting_plinks ?
 	    IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
+	/* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
+	*pos |= ifmsh->ps_peers_deep_sleep ?
+	    IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00;
 	*pos++ |= ifmsh->adjusting_tbtt ?
 	    IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00;
 	*pos++ = 0x00;
@@ -286,6 +289,29 @@ mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	return 0;
 }
 
+int mesh_add_awake_window_ie(struct sk_buff *skb,
+			     struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	u8 *pos;
+
+	/* see IEEE802.11-2012 13.14.6 */
+	if (ifmsh->ps_peers_light_sleep == 0 &&
+	    ifmsh->ps_peers_deep_sleep == 0 &&
+	    ifmsh->nonpeer_pm == NL80211_MESH_POWER_ACTIVE)
+		return 0;
+
+	if (skb_tailroom(skb) < 4)
+		return -ENOMEM;
+
+	pos = skb_put(skb, 2 + 2);
+	*pos++ = WLAN_EID_MESH_AWAKE_WINDOW;
+	*pos++ = 2;
+	put_unaligned_le16(ifmsh->mshcfg.dot11MeshAwakeWindowDuration, pos);
+
+	return 0;
+}
+
 int
 mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 {
@@ -629,6 +655,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 	sdata->vif.bss_conf.basic_rates =
 		ieee80211_mandatory_rates(local, band);
 
+	ieee80211_mps_local_status_update(sdata);
+
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	netif_carrier_on(sdata->dev);
@@ -651,6 +679,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 	sta_info_flush(sdata);
 	mesh_path_flush_by_iface(sdata);
 
+	/* free all potentially still buffered group-addressed frames */
+	local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf);
+	skb_queue_purge(&ifmsh->ps.bc_buf);
+
 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_timer);
@@ -828,6 +860,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 		    ieee80211_mesh_path_root_timer,
 		    (unsigned long) sdata);
 	INIT_LIST_HEAD(&ifmsh->preq_queue.list);
+	skb_queue_head_init(&ifmsh->ps.bc_buf);
 	spin_lock_init(&ifmsh->mesh_preq_queue_lock);
 	spin_lock_init(&ifmsh->sync_offset_lock);
 
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index aff301544c7f..eb336253b6b3 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -222,6 +222,8 @@ int mesh_add_meshid_ie(struct sk_buff *skb,
 		       struct ieee80211_sub_if_data *sdata);
 int mesh_add_rsn_ie(struct sk_buff *skb,
 		    struct ieee80211_sub_if_data *sdata);
+int mesh_add_awake_window_ie(struct sk_buff *skb,
+			     struct ieee80211_sub_if_data *sdata);
 int mesh_add_vendor_ies(struct sk_buff *skb,
 			struct ieee80211_sub_if_data *sdata);
 int mesh_add_ds_params_ie(struct sk_buff *skb,
@@ -242,6 +244,21 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata);
 void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh);
 const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method);
 
+/* mesh power save */
+void ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata);
+void ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
+				    enum nl80211_mesh_power_mode pm);
+void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata,
+				   struct sta_info *sta,
+				   struct ieee80211_hdr *hdr);
+void ieee80211_mps_sta_status_update(struct sta_info *sta);
+void ieee80211_mps_rx_h_sta_process(struct sta_info *sta,
+				    struct ieee80211_hdr *hdr);
+void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta,
+				    bool tx, bool acked);
+void ieee80211_mps_frame_release(struct sta_info *sta,
+				 struct ieee802_11_elems *elems);
+
 /* Mesh paths */
 int mesh_nexthop_lookup(struct sk_buff *skb,
 		struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 6b4603a90031..f0dd8742ed42 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -205,6 +205,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
 		struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 
 	skb_set_mac_header(skb, 0);
 	skb_set_network_header(skb, 0);
@@ -217,6 +218,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
 	info->control.vif = &sdata->vif;
 	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 	ieee80211_set_qos_hdr(sdata, skb);
+	ieee80211_mps_set_frame_flags(sdata, NULL, hdr);
 }
 
 /**
@@ -1080,6 +1082,10 @@ int mesh_nexthop_resolve(struct sk_buff *skb,
 	u8 *target_addr = hdr->addr3;
 	int err = 0;
 
+	/* Nulls are only sent to peers for PS and should be pre-addressed */
+	if (ieee80211_is_qos_nullfunc(hdr->frame_control))
+		return 0;
+
 	rcu_read_lock();
 	err = mesh_nexthop_lookup(skb, sdata);
 	if (!err)
@@ -1151,6 +1157,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 	if (next_hop) {
 		memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN);
 		memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+		ieee80211_mps_set_frame_flags(sdata, next_hop, hdr);
 		err = 0;
 	}
 
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index aa749818860e..d5786c3eaee2 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -212,6 +212,7 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta)
 		hdr = (struct ieee80211_hdr *) skb->data;
 		memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN);
 		memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN);
+		ieee80211_mps_set_frame_flags(sta->sdata, sta, hdr);
 	}
 
 	spin_unlock_irqrestore(&mpath->frame_queue.lock, flags);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 6787d696d94c..fe7c3334d6fe 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -201,6 +201,9 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta)
 	sta->plink_state = NL80211_PLINK_BLOCKED;
 	mesh_path_flush_by_nexthop(sta);
 
+	ieee80211_mps_sta_status_update(sta);
+	ieee80211_mps_local_status_update(sdata);
+
 	return changed;
 }
 
@@ -503,6 +506,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
 	    rssi_threshold_check(sta, sdata))
 		mesh_plink_open(sta);
 
+	ieee80211_mps_frame_release(sta, elems);
 out:
 	rcu_read_unlock();
 }
@@ -633,6 +637,9 @@ int mesh_plink_open(struct sta_info *sta)
 		"Mesh plink: starting establishment with %pM\n",
 		sta->sta.addr);
 
+	/* set the non-peer mode to active during peering */
+	ieee80211_mps_local_status_update(sdata);
+
 	return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN,
 				   sta->sta.addr, llid, 0, 0);
 }
@@ -866,6 +873,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			sta->llid = llid;
 			mesh_plink_timer_set(sta,
 					     mshcfg->dot11MeshRetryTimeout);
+
+			/* set the non-peer mode to active during peering */
+			ieee80211_mps_local_status_update(sdata);
+
 			spin_unlock_bh(&sta->lock);
 			mesh_plink_frame_tx(sdata,
 					    WLAN_SP_MESH_PEERING_OPEN,
@@ -959,6 +970,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			changed |= mesh_set_short_slot_time(sdata);
 			mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n",
 				sta->sta.addr);
+			ieee80211_mps_sta_status_update(sta);
+			ieee80211_mps_set_sta_local_pm(sta,
+						       mshcfg->power_mode);
 			break;
 		default:
 			spin_unlock_bh(&sta->lock);
@@ -998,6 +1012,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
 			mesh_plink_frame_tx(sdata,
 					    WLAN_SP_MESH_PEERING_CONFIRM,
 					    sta->sta.addr, llid, plid, 0);
+			ieee80211_mps_sta_status_update(sta);
+			ieee80211_mps_set_sta_local_pm(sta,
+						       mshcfg->power_mode);
 			break;
 		default:
 			spin_unlock_bh(&sta->lock);
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
new file mode 100644
index 000000000000..b677962525ed
--- /dev/null
+++ b/net/mac80211/mesh_ps.c
@@ -0,0 +1,585 @@
+/*
+ * Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
+ * Copyright 2012-2013, cozybit Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "mesh.h"
+#include "wme.h"
+
+
+/* mesh PS management */
+
+/**
+ * mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave
+ */
+static struct sk_buff *mps_qos_null_get(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_hdr *nullfunc; /* use 4addr header */
+	struct sk_buff *skb;
+	int size = sizeof(*nullfunc);
+	__le16 fc;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + size + 2);
+	if (!skb)
+		return NULL;
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	nullfunc = (struct ieee80211_hdr *) skb_put(skb, size);
+	fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC);
+	ieee80211_fill_mesh_addresses(nullfunc, &fc, sta->sta.addr,
+				      sdata->vif.addr);
+	nullfunc->frame_control = fc;
+	nullfunc->duration_id = 0;
+	/* no address resolution for this frame -> set addr 1 immediately */
+	memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
+	memset(skb_put(skb, 2), 0, 2); /* append QoS control field */
+	ieee80211_mps_set_frame_flags(sdata, sta, nullfunc);
+
+	return skb;
+}
+
+/**
+ * mps_qos_null_tx - send a QoS Null to indicate link-specific power mode
+ */
+static void mps_qos_null_tx(struct sta_info *sta)
+{
+	struct sk_buff *skb;
+
+	skb = mps_qos_null_get(sta);
+	if (!skb)
+		return;
+
+	mps_dbg(sta->sdata, "announcing peer-specific power mode to %pM\n",
+		sta->sta.addr);
+
+	/* don't unintentionally start a MPSP */
+	if (!test_sta_flag(sta, WLAN_STA_PS_STA)) {
+		u8 *qc = ieee80211_get_qos_ctl((void *) skb->data);
+
+		qc[0] |= IEEE80211_QOS_CTL_EOSP;
+	}
+
+	ieee80211_tx_skb(sta->sdata, skb);
+}
+
+/**
+ * ieee80211_mps_local_status_update - track status of local link-specific PMs
+ *
+ * @sdata: local mesh subif
+ *
+ * sets the non-peer power mode and triggers the driver PS (re-)configuration
+ */
+void ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct sta_info *sta;
+	bool peering = false;
+	int light_sleep_cnt = 0;
+	int deep_sleep_cnt = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+		if (sdata != sta->sdata)
+			continue;
+
+		switch (sta->plink_state) {
+		case NL80211_PLINK_OPN_SNT:
+		case NL80211_PLINK_OPN_RCVD:
+		case NL80211_PLINK_CNF_RCVD:
+			peering = true;
+			break;
+		case NL80211_PLINK_ESTAB:
+			if (sta->local_pm == NL80211_MESH_POWER_LIGHT_SLEEP)
+				light_sleep_cnt++;
+			else if (sta->local_pm == NL80211_MESH_POWER_DEEP_SLEEP)
+				deep_sleep_cnt++;
+			break;
+		default:
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Set non-peer mode to active during peering/scanning/authentication
+	 * (see IEEE802.11-2012 13.14.8.3). The non-peer mesh power mode is
+	 * deep sleep if the local STA is in light or deep sleep towards at
+	 * least one mesh peer (see 13.14.3.1). Otherwise, set it to the
+	 * user-configured default value.
+	 */
+	if (peering) {
+		mps_dbg(sdata, "setting non-peer PM to active for peering\n");
+		ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
+	} else if (light_sleep_cnt || deep_sleep_cnt) {
+		mps_dbg(sdata, "setting non-peer PM to deep sleep\n");
+		ifmsh->nonpeer_pm = NL80211_MESH_POWER_DEEP_SLEEP;
+	} else {
+		mps_dbg(sdata, "setting non-peer PM to user value\n");
+		ifmsh->nonpeer_pm = ifmsh->mshcfg.power_mode;
+	}
+
+	ifmsh->ps_peers_light_sleep = light_sleep_cnt;
+	ifmsh->ps_peers_deep_sleep = deep_sleep_cnt;
+}
+
+/**
+ * ieee80211_mps_set_sta_local_pm - set local PM towards a mesh STA
+ *
+ * @sta: mesh STA
+ * @pm: the power mode to set
+ */
+void ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
+				    enum nl80211_mesh_power_mode pm)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+
+	mps_dbg(sdata, "local STA operates in mode %d with %pM\n",
+		pm, sta->sta.addr);
+
+	sta->local_pm = pm;
+
+	/*
+	 * announce peer-specific power mode transition
+	 * (see IEEE802.11-2012 13.14.3.2 and 13.14.3.3)
+	 */
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		mps_qos_null_tx(sta);
+
+	ieee80211_mps_local_status_update(sdata);
+}
+
+/**
+ * ieee80211_mps_set_frame_flags - set mesh PS flags in FC (and QoS Control)
+ *
+ * @sdata: local mesh subif
+ * @sta: mesh STA
+ * @hdr: 802.11 frame header
+ *
+ * see IEEE802.11-2012 8.2.4.1.7 and 8.2.4.5.11
+ *
+ * NOTE: sta must be given when an individually-addressed QoS frame header
+ * is handled, for group-addressed and management frames it is not used
+ */
+void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata,
+				   struct sta_info *sta,
+				   struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+	u8 *qc;
+
+	if (WARN_ON(is_unicast_ether_addr(hdr->addr1) &&
+		    ieee80211_is_data_qos(hdr->frame_control) &&
+		    !sta))
+		return;
+
+	if (is_unicast_ether_addr(hdr->addr1) &&
+	    ieee80211_is_data_qos(hdr->frame_control) &&
+	    sta->plink_state == NL80211_PLINK_ESTAB)
+		pm = sta->local_pm;
+	else
+		pm = sdata->u.mesh.nonpeer_pm;
+
+	if (pm == NL80211_MESH_POWER_ACTIVE)
+		hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_PM);
+	else
+		hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM);
+
+	if (!ieee80211_is_data_qos(hdr->frame_control))
+		return;
+
+	qc = ieee80211_get_qos_ctl(hdr);
+
+	if ((is_unicast_ether_addr(hdr->addr1) &&
+	     pm == NL80211_MESH_POWER_DEEP_SLEEP) ||
+	    (is_multicast_ether_addr(hdr->addr1) &&
+	     sdata->u.mesh.ps_peers_deep_sleep > 0))
+		qc[1] |= (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8);
+	else
+		qc[1] &= ~(IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8);
+}
+
+/**
+ * ieee80211_mps_sta_status_update - update buffering status of neighbor STA
+ *
+ * @sta: mesh STA
+ *
+ * called after change of peering status or non-peer/peer-specific power mode
+ */
+void ieee80211_mps_sta_status_update(struct sta_info *sta)
+{
+	enum nl80211_mesh_power_mode pm;
+	bool do_buffer;
+
+	/*
+	 * use peer-specific power mode if peering is established and the
+	 * peer's power mode is known
+	 */
+	if (sta->plink_state == NL80211_PLINK_ESTAB &&
+	    sta->peer_pm != NL80211_MESH_POWER_UNKNOWN)
+		pm = sta->peer_pm;
+	else
+		pm = sta->nonpeer_pm;
+
+	do_buffer = (pm != NL80211_MESH_POWER_ACTIVE);
+
+	/* Don't let the same PS state be set twice */
+	if (test_sta_flag(sta, WLAN_STA_PS_STA) == do_buffer)
+		return;
+
+	if (do_buffer) {
+		set_sta_flag(sta, WLAN_STA_PS_STA);
+		atomic_inc(&sta->sdata->u.mesh.ps.num_sta_ps);
+		mps_dbg(sta->sdata, "start PS buffering frames towards %pM\n",
+			sta->sta.addr);
+	} else {
+		ieee80211_sta_ps_deliver_wakeup(sta);
+	}
+
+	/* clear the MPSP flags for non-peers or active STA */
+	if (sta->plink_state != NL80211_PLINK_ESTAB) {
+		clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+		clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+	} else if (!do_buffer) {
+		clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+	}
+}
+
+static void mps_set_sta_peer_pm(struct sta_info *sta,
+				struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+	u8 *qc = ieee80211_get_qos_ctl(hdr);
+
+	/*
+	 * Test Power Management field of frame control (PW) and
+	 * mesh power save level subfield of QoS control field (PSL)
+	 *
+	 * | PM | PSL| Mesh PM |
+	 * +----+----+---------+
+	 * | 0  |Rsrv|  Active |
+	 * | 1  | 0  |  Light  |
+	 * | 1  | 1  |  Deep   |
+	 */
+	if (ieee80211_has_pm(hdr->frame_control)) {
+		if (qc[1] & (IEEE80211_QOS_CTL_MESH_PS_LEVEL >> 8))
+			pm = NL80211_MESH_POWER_DEEP_SLEEP;
+		else
+			pm = NL80211_MESH_POWER_LIGHT_SLEEP;
+	} else {
+		pm = NL80211_MESH_POWER_ACTIVE;
+	}
+
+	if (sta->peer_pm == pm)
+		return;
+
+	mps_dbg(sta->sdata, "STA %pM enters mode %d\n",
+		sta->sta.addr, pm);
+
+	sta->peer_pm = pm;
+
+	ieee80211_mps_sta_status_update(sta);
+}
+
+static void mps_set_sta_nonpeer_pm(struct sta_info *sta,
+				   struct ieee80211_hdr *hdr)
+{
+	enum nl80211_mesh_power_mode pm;
+
+	if (ieee80211_has_pm(hdr->frame_control))
+		pm = NL80211_MESH_POWER_DEEP_SLEEP;
+	else
+		pm = NL80211_MESH_POWER_ACTIVE;
+
+	if (sta->nonpeer_pm == pm)
+		return;
+
+	mps_dbg(sta->sdata, "STA %pM sets non-peer mode to %d\n",
+		sta->sta.addr, pm);
+
+	sta->nonpeer_pm = pm;
+
+	ieee80211_mps_sta_status_update(sta);
+}
+
+/**
+ * ieee80211_mps_rx_h_sta_process - frame receive handler for mesh powersave
+ *
+ * @sta: STA info that transmitted the frame
+ * @hdr: IEEE 802.11 (QoS) Header
+ */
+void ieee80211_mps_rx_h_sta_process(struct sta_info *sta,
+				    struct ieee80211_hdr *hdr)
+{
+	if (is_unicast_ether_addr(hdr->addr1) &&
+	    ieee80211_is_data_qos(hdr->frame_control)) {
+		/*
+		 * individually addressed QoS Data/Null frames contain
+		 * peer link-specific PS mode towards the local STA
+		 */
+		mps_set_sta_peer_pm(sta, hdr);
+
+		/* check for mesh Peer Service Period trigger frames */
+		ieee80211_mpsp_trigger_process(ieee80211_get_qos_ctl(hdr),
+					       sta, false, false);
+	} else {
+		/*
+		 * can only determine non-peer PS mode
+		 * (see IEEE802.11-2012 8.2.4.1.7)
+		 */
+		mps_set_sta_nonpeer_pm(sta, hdr);
+	}
+}
+
+
+/* mesh PS frame release */
+
+static void mpsp_trigger_send(struct sta_info *sta, bool rspi, bool eosp)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct sk_buff *skb;
+	struct ieee80211_hdr *nullfunc;
+	struct ieee80211_tx_info *info;
+	u8 *qc;
+
+	skb = mps_qos_null_get(sta);
+	if (!skb)
+		return;
+
+	nullfunc = (struct ieee80211_hdr *) skb->data;
+	if (!eosp)
+		nullfunc->frame_control |=
+				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+	/*
+	 * | RSPI | EOSP |  MPSP triggering   |
+	 * +------+------+--------------------+
+	 * |  0   |  0   | local STA is owner |
+	 * |  0   |  1   | no MPSP (MPSP end) |
+	 * |  1   |  0   | both STA are owner |
+	 * |  1   |  1   | peer STA is owner  | see IEEE802.11-2012 13.14.9.2
+	 */
+	qc = ieee80211_get_qos_ctl(nullfunc);
+	if (rspi)
+		qc[1] |= (IEEE80211_QOS_CTL_RSPI >> 8);
+	if (eosp)
+		qc[0] |= IEEE80211_QOS_CTL_EOSP;
+
+	info = IEEE80211_SKB_CB(skb);
+
+	info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER |
+		       IEEE80211_TX_CTL_REQ_TX_STATUS;
+
+	mps_dbg(sdata, "sending MPSP trigger%s%s to %pM\n",
+		rspi ? " RSPI" : "", eosp ? " EOSP" : "", sta->sta.addr);
+
+	ieee80211_tx_skb(sdata, skb);
+}
+
+/**
+ * mpsp_qos_null_append - append QoS Null frame to MPSP skb queue if needed
+ *
+ * To properly end a mesh MPSP the last transmitted frame has to set the EOSP
+ * flag in the QoS Control field. In case the current tailing frame is not a
+ * QoS Data frame, append a QoS Null to carry the flag.
+ */
+static void mpsp_qos_null_append(struct sta_info *sta,
+				 struct sk_buff_head *frames)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct sk_buff *new_skb, *skb = skb_peek_tail(frames);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_tx_info *info;
+
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		return;
+
+	new_skb = mps_qos_null_get(sta);
+	if (!new_skb)
+		return;
+
+	mps_dbg(sdata, "appending QoS Null in MPSP towards %pM\n",
+		sta->sta.addr);
+	/*
+	 * This frame has to be transmitted last. Assign lowest priority to
+	 * make sure it cannot pass other frames when releasing multiple ACs.
+	 */
+	new_skb->priority = 1;
+	skb_set_queue_mapping(new_skb, IEEE80211_AC_BK);
+	ieee80211_set_qos_hdr(sdata, new_skb);
+
+	info = IEEE80211_SKB_CB(new_skb);
+	info->control.vif = &sdata->vif;
+	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
+
+	__skb_queue_tail(frames, new_skb);
+}
+
+/**
+ * mps_frame_deliver - transmit frames during mesh powersave
+ *
+ * @sta: STA info to transmit to
+ * @n_frames: number of frames to transmit. -1 for all
+ */
+static void mps_frame_deliver(struct sta_info *sta, int n_frames)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	int ac;
+	struct sk_buff_head frames;
+	struct sk_buff *skb;
+	bool more_data = false;
+
+	skb_queue_head_init(&frames);
+
+	/* collect frame(s) from buffers */
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		while (n_frames != 0) {
+			skb = skb_dequeue(&sta->tx_filtered[ac]);
+			if (!skb) {
+				skb = skb_dequeue(
+					&sta->ps_tx_buf[ac]);
+				if (skb)
+					local->total_ps_buffered--;
+			}
+			if (!skb)
+				break;
+			n_frames--;
+			__skb_queue_tail(&frames, skb);
+		}
+
+		if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
+		    !skb_queue_empty(&sta->ps_tx_buf[ac]))
+			more_data = true;
+	}
+
+	/* nothing to send? -> EOSP */
+	if (skb_queue_empty(&frames)) {
+		mpsp_trigger_send(sta, false, true);
+		return;
+	}
+
+	/* in a MPSP make sure the last skb is a QoS Data frame */
+	if (test_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+		mpsp_qos_null_append(sta, &frames);
+
+	mps_dbg(sta->sdata, "sending %d frames to PS STA %pM\n",
+		skb_queue_len(&frames), sta->sta.addr);
+
+	/* prepare collected frames for transmission */
+	skb_queue_walk(&frames, skb) {
+		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+		struct ieee80211_hdr *hdr = (void *) skb->data;
+
+		/*
+		 * Tell TX path to send this frame even though the
+		 * STA may still remain is PS mode after this frame
+		 * exchange.
+		 */
+		info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+
+		if (more_data || !skb_queue_is_last(&frames, skb))
+			hdr->frame_control |=
+				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
+		else
+			hdr->frame_control &=
+				cpu_to_le16(~IEEE80211_FCTL_MOREDATA);
+
+		if (skb_queue_is_last(&frames, skb) &&
+		    ieee80211_is_data_qos(hdr->frame_control)) {
+			u8 *qoshdr = ieee80211_get_qos_ctl(hdr);
+
+			/* MPSP trigger frame ends service period */
+			*qoshdr |= IEEE80211_QOS_CTL_EOSP;
+			info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+		}
+	}
+
+	ieee80211_add_pending_skbs(local, &frames);
+	sta_info_recalc_tim(sta);
+}
+
+/**
+ * ieee80211_mpsp_trigger_process - track status of mesh Peer Service Periods
+ *
+ * @qc: QoS Control field
+ * @sta: peer to start a MPSP with
+ * @tx: frame was transmitted by the local STA
+ * @acked: frame has been transmitted successfully
+ *
+ * NOTE: active mode STA may only serve as MPSP owner
+ */
+void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta,
+				    bool tx, bool acked)
+{
+	u8 rspi = qc[1] & (IEEE80211_QOS_CTL_RSPI >> 8);
+	u8 eosp = qc[0] & IEEE80211_QOS_CTL_EOSP;
+
+	if (tx) {
+		if (rspi && acked)
+			set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+
+		if (eosp)
+			clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
+		else if (acked &&
+			 test_sta_flag(sta, WLAN_STA_PS_STA) &&
+			 !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+			mps_frame_deliver(sta, -1);
+	} else {
+		if (eosp)
+			clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+		else if (sta->local_pm != NL80211_MESH_POWER_ACTIVE)
+			set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
+
+		if (rspi && !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER))
+			mps_frame_deliver(sta, -1);
+	}
+}
+
+/**
+ * ieee80211_mps_frame_release - release buffered frames in response to beacon
+ *
+ * @sta: mesh STA
+ * @elems: beacon IEs
+ *
+ * For peers if we have individually-addressed frames buffered or the peer
+ * indicates buffered frames, send a corresponding MPSP trigger frame. Since
+ * we do not evaluate the awake window duration, QoS Nulls are used as MPSP
+ * trigger frames. If the neighbour STA is not a peer, only send single frames.
+ */
+void ieee80211_mps_frame_release(struct sta_info *sta,
+				 struct ieee802_11_elems *elems)
+{
+	int ac, buffer_local = 0;
+	bool has_buffered = false;
+
+	/* TIM map only for LLID <= IEEE80211_MAX_AID */
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len,
+				le16_to_cpu(sta->llid) % IEEE80211_MAX_AID);
+
+	if (has_buffered)
+		mps_dbg(sta->sdata, "%pM indicates buffered frames\n",
+			sta->sta.addr);
+
+	/* only transmit to PS STA with announced, non-zero awake window */
+	if (test_sta_flag(sta, WLAN_STA_PS_STA) &&
+	    (!elems->awake_window || !le16_to_cpu(*elems->awake_window)))
+		return;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+		buffer_local += skb_queue_len(&sta->ps_tx_buf[ac]) +
+				skb_queue_len(&sta->tx_filtered[ac]);
+
+	if (!has_buffered && !buffer_local)
+		return;
+
+	if (sta->plink_state == NL80211_PLINK_ESTAB)
+		mpsp_trigger_send(sta, has_buffered, !buffer_local);
+	else
+		mps_frame_deliver(sta, 1);
+}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a19089565c4b..c98be0593756 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1452,6 +1452,10 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 		}
 	}
 
+	/* mesh power save support */
+	if (ieee80211_vif_is_mesh(&rx->sdata->vif))
+		ieee80211_mps_rx_h_sta_process(sta, hdr);
+
 	/*
 	 * Drop (qos-)data::nullfunc frames silently, since they
 	 * are used only to control station power saving mode.
@@ -2090,7 +2094,10 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	if (is_multicast_ether_addr(fwd_hdr->addr1)) {
 		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast);
 		memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
+		/* update power mode indication when forwarding */
+		ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr);
 	} else if (!mesh_nexthop_lookup(fwd_skb, sdata)) {
+		/* mesh power mode flags updated in mesh_nexthop_lookup */
 		IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast);
 	} else {
 		/* unable to resolve next hop */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 227233c3ff7f..47a0f0601768 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -120,6 +120,8 @@ static void cleanup_single_sta(struct sta_info *sta)
 		if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
 		    sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 			ps = &sdata->bss->ps;
+		else if (ieee80211_vif_is_mesh(&sdata->vif))
+			ps = &sdata->u.mesh.ps;
 		else
 			return;
 
@@ -587,6 +589,12 @@ void sta_info_recalc_tim(struct sta_info *sta)
 
 		ps = &sta->sdata->bss->ps;
 		id = sta->sta.aid;
+#ifdef CONFIG_MAC80211_MESH
+	} else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) {
+		ps = &sta->sdata->u.mesh.ps;
+		/* TIM map only for PLID <= IEEE80211_MAX_AID */
+		id = le16_to_cpu(sta->plid) % IEEE80211_MAX_AID;
+#endif
 	} else {
 		return;
 	}
@@ -745,8 +753,9 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 	bool have_buffered = false;
 	int ac;
 
-	/* This is only necessary for stations on BSS interfaces */
-	if (!sta->sdata->bss)
+	/* This is only necessary for stations on BSS/MBSS interfaces */
+	if (!sta->sdata->bss &&
+	    !ieee80211_vif_is_mesh(&sta->sdata->vif))
 		return false;
 
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
@@ -934,6 +943,11 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 		if (time_after(jiffies, sta->last_rx + exp_time)) {
 			sta_dbg(sta->sdata, "expiring inactive STA %pM\n",
 				sta->sta.addr);
+
+			if (ieee80211_vif_is_mesh(&sdata->vif) &&
+			    test_sta_flag(sta, WLAN_STA_PS_STA))
+				atomic_dec(&sdata->u.mesh.ps.num_sta_ps);
+
 			WARN_ON(__sta_info_destroy(sta));
 		}
 	}
@@ -992,6 +1006,8 @@ static void clear_sta_ps_flags(void *_sta)
 	if (sdata->vif.type == NL80211_IFTYPE_AP ||
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		ps = &sdata->bss->ps;
+	else if (ieee80211_vif_is_mesh(&sdata->vif))
+		ps = &sdata->u.mesh.ps;
 	else
 		return;
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index af7d78aa5523..5a1deba2c645 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -56,6 +56,8 @@
  * @WLAN_STA_INSERTED: This station is inserted into the hash table.
  * @WLAN_STA_RATE_CONTROL: rate control was initialized for this station.
  * @WLAN_STA_TOFFSET_KNOWN: toffset calculated for this station is valid.
+ * @WLAN_STA_MPSP_OWNER: local STA is owner of a mesh Peer Service Period.
+ * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP.
  */
 enum ieee80211_sta_info_flags {
 	WLAN_STA_AUTH,
@@ -78,6 +80,8 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_INSERTED,
 	WLAN_STA_RATE_CONTROL,
 	WLAN_STA_TOFFSET_KNOWN,
+	WLAN_STA_MPSP_OWNER,
+	WLAN_STA_MPSP_RECIPIENT,
 };
 
 #define ADDBA_RESP_INTERVAL HZ
@@ -282,6 +286,9 @@ struct sta_ampdu_mlme {
  * @t_offset_setpoint: reference timing offset of this sta to be used when
  * 	calculating clockdrift
  * @ch_width: peer's channel width
+ * @local_pm: local link-specific power save mode
+ * @peer_pm: peer-specific power save mode towards local STA
+ * @nonpeer_pm: STA power save mode towards non-peer neighbors
  * @debugfs: debug filesystem info
  * @dead: set to true when sta is unlinked
  * @uploaded: set to true when sta is uploaded to the driver
@@ -379,6 +386,10 @@ struct sta_info {
 	s64 t_offset;
 	s64 t_offset_setpoint;
 	enum nl80211_chan_width ch_width;
+	/* mesh power save */
+	enum nl80211_mesh_power_mode local_pm;
+	enum nl80211_mesh_power_mode peer_pm;
+	enum nl80211_mesh_power_mode nonpeer_pm;
 #endif
 
 #ifdef CONFIG_MAC80211_DEBUGFS
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index d041de056b7f..43439203f4e4 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -472,6 +472,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			return;
 		}
 
+		/* mesh Peer Service Period support */
+		if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
+		    ieee80211_is_data_qos(fc))
+			ieee80211_mpsp_trigger_process(
+					ieee80211_get_qos_ctl(hdr),
+					sta, true, acked);
+
 		if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) &&
 		    (rates_idx != -1))
 			sta->last_tx_rate = info->status.rates[rates_idx];
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7892b0a8873e..2ef0e19b06bb 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -329,6 +329,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
 
 		if (sdata->vif.type == NL80211_IFTYPE_AP)
 			ps = &sdata->u.ap.ps;
+		else if (ieee80211_vif_is_mesh(&sdata->vif))
+			ps = &sdata->u.mesh.ps;
 		else
 			continue;
 
@@ -372,18 +374,20 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	/*
 	 * broadcast/multicast frame
 	 *
-	 * If any of the associated stations is in power save mode,
+	 * If any of the associated/peer stations is in power save mode,
 	 * the frame is buffered to be sent after DTIM beacon frame.
 	 * This is done either by the hardware or us.
 	 */
 
-	/* powersaving STAs currently only in AP/VLAN mode */
+	/* powersaving STAs currently only in AP/VLAN/mesh mode */
 	if (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
 	    tx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
 		if (!tx->sdata->bss)
 			return TX_CONTINUE;
 
 		ps = &tx->sdata->bss->ps;
+	} else if (ieee80211_vif_is_mesh(&tx->sdata->vif)) {
+		ps = &tx->sdata->u.mesh.ps;
 	} else {
 		return TX_CONTINUE;
 	}
@@ -1473,12 +1477,14 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 	hdr = (struct ieee80211_hdr *) skb->data;
 	info->control.vif = &sdata->vif;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) &&
-	    ieee80211_is_data(hdr->frame_control) &&
-	    !is_multicast_ether_addr(hdr->addr1) &&
-	    mesh_nexthop_resolve(skb, sdata)) {
-		/* skb queued: don't free */
-		return;
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		if (ieee80211_is_data(hdr->frame_control) &&
+		    is_unicast_ether_addr(hdr->addr1)) {
+			if (mesh_nexthop_resolve(skb, sdata))
+				return; /* skb queued: don't free */
+		} else {
+			ieee80211_mps_set_frame_flags(sdata, NULL, hdr);
+		}
 	}
 
 	ieee80211_set_qos_hdr(sdata, skb);
@@ -2445,12 +2451,14 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 				    2 + /* NULL SSID */
 				    2 + 8 + /* supported rates */
 				    2 + 3 + /* DS params */
+				    256 + /* TIM IE */
 				    2 + (IEEE80211_MAX_SUPP_RATES - 8) +
 				    2 + sizeof(struct ieee80211_ht_cap) +
 				    2 + sizeof(struct ieee80211_ht_operation) +
 				    2 + sdata->u.mesh.mesh_id_len +
 				    2 + sizeof(struct ieee80211_meshconf_ie) +
-				    sdata->u.mesh.ie_len);
+				    sdata->u.mesh.ie_len +
+				    2 + sizeof(__le16)); /* awake window */
 		if (!skb)
 			goto out;
 
@@ -2462,6 +2470,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		eth_broadcast_addr(mgmt->da);
 		memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 		memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
+		ieee80211_mps_set_frame_flags(sdata, NULL, (void *) mgmt);
 		mgmt->u.beacon.beacon_int =
 			cpu_to_le16(sdata->vif.bss_conf.beacon_int);
 		mgmt->u.beacon.capab_info |= cpu_to_le16(
@@ -2475,12 +2484,14 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 
 		if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
 		    mesh_add_ds_params_ie(skb, sdata) ||
+		    ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb) ||
 		    ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
 		    mesh_add_rsn_ie(skb, sdata) ||
 		    mesh_add_ht_cap_ie(skb, sdata) ||
 		    mesh_add_ht_oper_ie(skb, sdata) ||
 		    mesh_add_meshid_ie(skb, sdata) ||
 		    mesh_add_meshconf_ie(skb, sdata) ||
+		    mesh_add_awake_window_ie(skb, sdata) ||
 		    mesh_add_vendor_ies(skb, sdata)) {
 			pr_err("o11s: couldn't add ies!\n");
 			goto out;
@@ -2734,6 +2745,8 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 			goto out;
 
 		ps = &sdata->u.ap.ps;
+	} else if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		ps = &sdata->u.mesh.ps;
 	} else {
 		goto out;
 	}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 139ad9b66c39..6cb71a350edd 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -805,6 +805,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
 			elems->peering = pos;
 			elems->peering_len = elen;
 			break;
+		case WLAN_EID_MESH_AWAKE_WINDOW:
+			if (elen >= 2)
+				elems->awake_window = (void *)pos;
+			break;
 		case WLAN_EID_PREQ:
 			elems->preq = pos;
 			elems->preq_len = elen;
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 906f00cd6d2f..afba19cb6f87 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -191,6 +191,15 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,
 
 	/* qos header is 2 bytes */
 	*p++ = ack_policy | tid;
-	*p = ieee80211_vif_is_mesh(&sdata->vif) ?
-		(IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8) : 0;
+	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+		/* preserve RSPI and Mesh PS Level bit */
+		*p &= ((IEEE80211_QOS_CTL_RSPI |
+			IEEE80211_QOS_CTL_MESH_PS_LEVEL) >> 8);
+
+		/* Nulls don't have a mesh header (frame body) */
+		if (!ieee80211_is_qos_nullfunc(hdr->frame_control))
+			*p |= (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8);
+	} else {
+		*p = 0;
+	}
 }
-- 
cgit v1.2.3


From 16559ae48c76f1ceb970b9719dea62b77eb5d06b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 4 Feb 2013 15:35:26 -0800
Subject: kgdb: remove #include <linux/serial_8250.h> from kgdb.h

There's no reason kgdb.h itself needs to include the 8250 serial port
header file.  So push it down to the _very_ limited number of individual
drivers that need the values in that file, and fix up the places where
people really wanted serial_core.h and platform_device.h.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/gma500/cdv_intel_dp.c | 1 +
 drivers/tty/serial/kgdb_nmi.c         | 1 +
 drivers/video/auo_k190x.c             | 1 +
 drivers/video/backlight/ot200_bl.c    | 1 +
 include/linux/kgdb.h                  | 1 -
 kernel/debug/debug_core.c             | 1 +
 kernel/debug/gdbstub.c                | 1 +
 7 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 51044cc55cf2..88d9ef6b5b4a 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -27,6 +27,7 @@
 
 #include <linux/i2c.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
diff --git a/drivers/tty/serial/kgdb_nmi.c b/drivers/tty/serial/kgdb_nmi.c
index 26a50b0c868b..5dafcf1c227b 100644
--- a/drivers/tty/serial/kgdb_nmi.c
+++ b/drivers/tty/serial/kgdb_nmi.c
@@ -23,6 +23,7 @@
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
+#include <linux/serial_core.h>
 #include <linux/interrupt.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
diff --git a/drivers/video/auo_k190x.c b/drivers/video/auo_k190x.c
index 97f79356141e..53846cb534d4 100644
--- a/drivers/video/auo_k190x.c
+++ b/drivers/video/auo_k190x.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/gpio.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/fb.h>
 #include <linux/delay.h>
diff --git a/drivers/video/backlight/ot200_bl.c b/drivers/video/backlight/ot200_bl.c
index 469cf0f109d2..fdbb6ee5027c 100644
--- a/drivers/video/backlight/ot200_bl.c
+++ b/drivers/video/backlight/ot200_bl.c
@@ -14,6 +14,7 @@
 #include <linux/fb.h>
 #include <linux/backlight.h>
 #include <linux/gpio.h>
+#include <linux/platform_device.h>
 #include <linux/cs5535.h>
 
 static struct cs5535_mfgpt_timer *pwm_timer;
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 4dff0c6ed58f..c6e091bf39a5 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -13,7 +13,6 @@
 #ifndef _KGDB_H_
 #define _KGDB_H_
 
-#include <linux/serial_8250.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/atomic.h>
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 9a61738cefc8..c26278fd4851 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -29,6 +29,7 @@
  */
 #include <linux/pid_namespace.h>
 #include <linux/clocksource.h>
+#include <linux/serial_core.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <linux/console.h>
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index ce615e064482..38573f35a5ad 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
+#include <linux/serial_core.h>
 #include <linux/reboot.h>
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
-- 
cgit v1.2.3


From 7be88b4ccb62737abe4bf27f9e66e224a70a12c4 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 30 Jan 2013 12:43:51 -0500
Subject: tty: Document required behavior of tty driver close()

If the tty driver open() fails, the tty driver close() is still
called during the resultant tty release.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_driver.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index dd976cfb6131..756a60989294 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -40,6 +40,7 @@
  * void (*close)(struct tty_struct * tty, struct file * filp);
  *
  * 	This routine is called when a particular tty device is closed.
+ *	Note: called even if the corresponding open() failed.
  *
  *	Required method.
  *
-- 
cgit v1.2.3


From c14b78e7decd0d1d5add6a4604feb8609fe920a9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 5 Feb 2013 01:50:26 +0100
Subject: netfilter: nfnetlink: add mutex per subsystem

This patch replaces the global lock to one lock per subsystem.
The per-subsystem lock avoids that processes operating
with different subsystems are synchronized.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  |  4 +--
 net/netfilter/ipset/ip_set_core.c    | 26 +++++++++---------
 net/netfilter/nf_conntrack_netlink.c | 12 ++++-----
 net/netfilter/nfnetlink.c            | 52 ++++++++++++++++++++++--------------
 4 files changed, 53 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 4966ddec039b..ecbb8e495912 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -34,8 +34,8 @@ extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigne
 extern int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error);
 extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags);
 
-extern void nfnl_lock(void);
-extern void nfnl_unlock(void);
+extern void nfnl_lock(__u8 subsys_id);
+extern void nfnl_unlock(__u8 subsys_id);
 
 #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
 	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 6d6d8f2b033e..f82b2e606cfd 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -88,14 +88,14 @@ find_set_type(const char *name, u8 family, u8 revision)
 static bool
 load_settype(const char *name)
 {
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 	pr_debug("try to load ip_set_%s\n", name);
 	if (request_module("ip_set_%s", name) < 0) {
 		pr_warning("Can't find ip_set type %s\n", name);
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		return false;
 	}
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	return true;
 }
 
@@ -532,7 +532,7 @@ ip_set_nfnl_get(const char *name)
 	ip_set_id_t i, index = IPSET_INVALID_ID;
 	struct ip_set *s;
 
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	for (i = 0; i < ip_set_max; i++) {
 		s = nfnl_set(i);
 		if (s != NULL && STREQ(s->name, name)) {
@@ -541,7 +541,7 @@ ip_set_nfnl_get(const char *name)
 			break;
 		}
 	}
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 
 	return index;
 }
@@ -561,13 +561,13 @@ ip_set_nfnl_get_byindex(ip_set_id_t index)
 	if (index > ip_set_max)
 		return IPSET_INVALID_ID;
 
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	set = nfnl_set(index);
 	if (set)
 		__ip_set_get(set);
 	else
 		index = IPSET_INVALID_ID;
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 
 	return index;
 }
@@ -584,11 +584,11 @@ void
 ip_set_nfnl_put(ip_set_id_t index)
 {
 	struct ip_set *set;
-	nfnl_lock();
+	nfnl_lock(NFNL_SUBSYS_IPSET);
 	set = nfnl_set(index);
 	if (set != NULL)
 		__ip_set_put(set);
-	nfnl_unlock();
+	nfnl_unlock(NFNL_SUBSYS_IPSET);
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 
@@ -1763,10 +1763,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 			goto done;
 		}
 		req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0';
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		find_set_and_id(req_get->set.name, &id);
 		req_get->set.index = id;
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
 	case IP_SET_OP_GET_BYINDEX: {
@@ -1778,11 +1778,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 			ret = -EINVAL;
 			goto done;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_IPSET);
 		set = nfnl_set(req_get->set.index);
 		strncpy(req_get->set.name, set ? set->name : "",
 			IPSET_MAXNAMELEN);
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_IPSET);
 		goto copy;
 	}
 	default:
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2334cc5d2b16..d490a300ce2b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1256,13 +1256,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (!parse_nat_setup) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
 		if (request_module("nf-nat") < 0) {
-			nfnl_lock();
+			nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 			rcu_read_lock();
 			return -EOPNOTSUPP;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 		rcu_read_lock();
 		if (nfnetlink_parse_nat_setup_hook)
 			return -EAGAIN;
@@ -1274,13 +1274,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (err == -EAGAIN) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
-		nfnl_unlock();
+		nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
 		if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) {
-			nfnl_lock();
+			nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 			rcu_read_lock();
 			return -EOPNOTSUPP;
 		}
-		nfnl_lock();
+		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 		rcu_read_lock();
 #else
 		err = -EOPNOTSUPP;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 58a09b7c3f6d..d578ec251712 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -36,8 +36,10 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-static const struct nfnetlink_subsystem __rcu *subsys_table[NFNL_SUBSYS_COUNT];
-static DEFINE_MUTEX(nfnl_mutex);
+static struct {
+	struct mutex				mutex;
+	const struct nfnetlink_subsystem __rcu	*subsys;
+} table[NFNL_SUBSYS_COUNT];
 
 static const int nfnl_group2type[NFNLGRP_MAX+1] = {
 	[NFNLGRP_CONNTRACK_NEW]		= NFNL_SUBSYS_CTNETLINK,
@@ -48,27 +50,32 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
 	[NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
 };
 
-void nfnl_lock(void)
+void nfnl_lock(__u8 subsys_id)
 {
-	mutex_lock(&nfnl_mutex);
+	mutex_lock(&table[subsys_id].mutex);
 }
 EXPORT_SYMBOL_GPL(nfnl_lock);
 
-void nfnl_unlock(void)
+void nfnl_unlock(__u8 subsys_id)
 {
-	mutex_unlock(&nfnl_mutex);
+	mutex_unlock(&table[subsys_id].mutex);
 }
 EXPORT_SYMBOL_GPL(nfnl_unlock);
 
+static struct mutex *nfnl_get_lock(__u8 subsys_id)
+{
+	return &table[subsys_id].mutex;
+}
+
 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
 {
-	nfnl_lock();
-	if (subsys_table[n->subsys_id]) {
-		nfnl_unlock();
+	nfnl_lock(n->subsys_id);
+	if (table[n->subsys_id].subsys) {
+		nfnl_unlock(n->subsys_id);
 		return -EBUSY;
 	}
-	rcu_assign_pointer(subsys_table[n->subsys_id], n);
-	nfnl_unlock();
+	rcu_assign_pointer(table[n->subsys_id].subsys, n);
+	nfnl_unlock(n->subsys_id);
 
 	return 0;
 }
@@ -76,9 +83,9 @@ EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
 
 int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n)
 {
-	nfnl_lock();
-	subsys_table[n->subsys_id] = NULL;
-	nfnl_unlock();
+	nfnl_lock(n->subsys_id);
+	table[n->subsys_id].subsys = NULL;
+	nfnl_unlock(n->subsys_id);
 	synchronize_rcu();
 	return 0;
 }
@@ -91,7 +98,7 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t
 	if (subsys_id >= NFNL_SUBSYS_COUNT)
 		return NULL;
 
-	return rcu_dereference(subsys_table[subsys_id]);
+	return rcu_dereference(table[subsys_id].subsys);
 }
 
 static inline const struct nfnl_callback *
@@ -175,6 +182,7 @@ replay:
 		struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
 		struct nlattr *attr = (void *)nlh + min_len;
 		int attrlen = nlh->nlmsg_len - min_len;
+		__u8 subsys_id = NFNL_SUBSYS_ID(type);
 
 		err = nla_parse(cda, ss->cb[cb_id].attr_count,
 				attr, attrlen, ss->cb[cb_id].policy);
@@ -189,10 +197,9 @@ replay:
 			rcu_read_unlock();
 		} else {
 			rcu_read_unlock();
-			nfnl_lock();
-			if (rcu_dereference_protected(
-					subsys_table[NFNL_SUBSYS_ID(type)],
-					lockdep_is_held(&nfnl_mutex)) != ss ||
+			nfnl_lock(subsys_id);
+			if (rcu_dereference_protected(table[subsys_id].subsys,
+				lockdep_is_held(nfnl_get_lock(subsys_id))) != ss ||
 			    nfnetlink_find_client(type, ss) != nc)
 				err = -EAGAIN;
 			else if (nc->call)
@@ -200,7 +207,7 @@ replay:
 						   (const struct nlattr **)cda);
 			else
 				err = -EINVAL;
-			nfnl_unlock();
+			nfnl_unlock(subsys_id);
 		}
 		if (err == -EAGAIN)
 			goto replay;
@@ -267,6 +274,11 @@ static struct pernet_operations nfnetlink_net_ops = {
 
 static int __init nfnetlink_init(void)
 {
+	int i;
+
+	for (i=0; i<NFNL_SUBSYS_COUNT; i++)
+		mutex_init(&table[i].mutex);
+
 	pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
 	return register_pernet_subsys(&nfnetlink_net_ops);
 }
-- 
cgit v1.2.3


From 631b0cfdbd801ceae8762e8d287f15da26792ebe Mon Sep 17 00:00:00 2001
From: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Date: Mon, 4 Feb 2013 14:28:48 -0800
Subject: mm: fix wrong comments about anon_vma lock

We use rwsem since commit 5a505085f043 ("mm/rmap: Convert the struct
anon_vma::mutex to an rwsem").  And most of comments are converted to
the new rwsem lock; while just 2 more missed from:

	 $ git grep 'anon_vma->mutex'

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 2 +-
 mm/mmap.c                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index bc823c4c028b..deca87452528 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -151,7 +151,7 @@ struct mmu_notifier_ops {
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->mutex).
+ * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/mm/mmap.c b/mm/mmap.c
index 35730ee9d515..d1e4124f3d0e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2943,7 +2943,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
-- 
cgit v1.2.3


From 91c777d86752b00bb3a1d8efa3d8f7e1264f38a9 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Mon, 4 Feb 2013 14:28:49 -0800
Subject: memcg: fix typo in kmemcg cache walk macro

The macro for_each_memcg_cache_index contains a silly yet potentially
deadly mistake.  Although the macro parameter is _idx, the loop tests
are done over i, not _idx.

This hasn't generated any problems so far, because all users use i as a
loop index.  However, while playing with an extension of the code I
ended using another loop index and the compiler was quick to complain.

Unfortunately, this is not the kind of thing that testing reveals =(

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0108a56f814e..28bd5fa2ff2e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -429,7 +429,7 @@ extern int memcg_limited_groups_array_size;
  * the slab_mutex must be held when looping through those caches
  */
 #define for_each_memcg_cache_index(_idx)	\
-	for ((_idx) = 0; i < memcg_limited_groups_array_size; (_idx)++)
+	for ((_idx) = 0; (_idx) < memcg_limited_groups_array_size; (_idx)++)
 
 static inline bool memcg_kmem_enabled(void)
 {
-- 
cgit v1.2.3


From 7fa4cf920c36a4736b8fa7c0e262dcf292d0cd36 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 4 Feb 2013 23:14:12 +0100
Subject: percpu_counter.h: comment code for better readability

Help people reading the percpu_counter code, to notice the ifdef
else statement that seperates CONFIG_SMP.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/percpu_counter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index b9df9ed1adc0..d5dd4657c8d6 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -83,7 +83,7 @@ static inline int percpu_counter_initialized(struct percpu_counter *fbc)
 	return (fbc->counters != NULL);
 }
 
-#else
+#else /* !CONFIG_SMP */
 
 struct percpu_counter {
 	s64 count;
-- 
cgit v1.2.3


From 6d85d037d6247b06e1060b5e5ad0e4854a7d1e3b Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Date: Thu, 31 Jan 2013 17:22:23 +0100
Subject: HID: core: add "report" hook, called once the report has been parsed

This callback is called when the parsing of the report has been done
by hid-core (so after the calls to .event). The hid drivers can now
have access to the whole report by relying on the values stored in
the different fields.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 4 ++++
 include/linux/hid.h    | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index eb2ee11b6412..754098a7dd47 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1195,6 +1195,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
 {
 	struct hid_report_enum *report_enum = hid->report_enum + type;
 	struct hid_report *report;
+	struct hid_driver *hdrv;
 	unsigned int a;
 	int rsize, csize = size;
 	u8 *cdata = data;
@@ -1231,6 +1232,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
 	if (hid->claimed != HID_CLAIMED_HIDRAW) {
 		for (a = 0; a < report->maxfield; a++)
 			hid_input_field(hid, report->field[a], cdata, interrupt);
+		hdrv = hid->driver;
+		if (hdrv && hdrv->report)
+			hdrv->report(hid, report);
 	}
 
 	if (hid->claimed & HID_CLAIMED_INPUT)
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 7330a0fef0c0..9f56e9040f66 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -589,6 +589,7 @@ struct hid_usage_id {
  * @raw_event: if report in report_table, this hook is called (NULL means nop)
  * @usage_table: on which events to call event (NULL means all)
  * @event: if usage in usage_table, this hook is called (NULL means nop)
+ * @report: this hook is called after parsing a report (NULL means nop)
  * @report_fixup: called before report descriptor parsing (NULL means nop)
  * @input_mapping: invoked on input registering before mapping an usage
  * @input_mapped: invoked on input registering after mapping an usage
@@ -627,6 +628,7 @@ struct hid_driver {
 	const struct hid_usage_id *usage_table;
 	int (*event)(struct hid_device *hdev, struct hid_field *field,
 			struct hid_usage *usage, __s32 value);
+	void (*report)(struct hid_device *hdev, struct hid_report *report);
 
 	__u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf,
 			unsigned int *size);
-- 
cgit v1.2.3


From 155a345a52e6cda18946efe2529d99d5040fad6d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 4 Feb 2013 08:10:08 -0500
Subject: sunrpc: copy scope ID in __rpc_copy_addr6

When copying an address, we should also copy the scopeid in the event
that this is a link-local address and the scope matters.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/clnt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 47354a25a927..6a7c2619a355 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -242,6 +242,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst,
 
 	dsin6->sin6_family = ssin6->sin6_family;
 	dsin6->sin6_addr = ssin6->sin6_addr;
+	dsin6->sin6_scope_id = ssin6->sin6_scope_id;
 	return true;
 }
 #else	/* !(IS_ENABLED(CONFIG_IPV6) */
-- 
cgit v1.2.3


From 5976687a2b3d1969f02aba16b80ad3ed79be6ad3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 4 Feb 2013 12:50:00 -0500
Subject: sunrpc: move address copy/cmp/convert routines and prototypes from
 clnt.h to addr.h

These routines are used by server and client code, so having them in a
separate header would be best.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/clntlock.c             |   2 +-
 fs/lockd/host.c                 |   1 +
 fs/lockd/mon.c                  |   1 +
 fs/lockd/svcsubs.c              |   2 +-
 fs/nfs/dns_resolve.c            |   2 +
 fs/nfs/nfs4client.c             |   1 +
 fs/nfs/nfs4filelayoutdev.c      |   1 +
 fs/nfs/nfs4namespace.c          |   1 +
 fs/nfs/super.c                  |   1 +
 fs/nfsd/fault_inject.c          |   2 +-
 fs/nfsd/nfs4state.c             |   2 +-
 fs/nfsd/nfscache.c              |   2 +-
 fs/nfsd/nfsctl.c                |   2 +-
 include/linux/sunrpc/addr.h     | 170 ++++++++++++++++++++++++++++++++++++++++
 include/linux/sunrpc/clnt.h     | 155 ------------------------------------
 net/sunrpc/addr.c               |   3 +-
 net/sunrpc/clnt.c               |   1 +
 net/sunrpc/rpcb_clnt.c          |   1 +
 net/sunrpc/svcauth_unix.c       |   2 +-
 net/sunrpc/xprtrdma/transport.c |   1 +
 net/sunrpc/xprtsock.c           |   1 +
 21 files changed, 191 insertions(+), 163 deletions(-)
 create mode 100644 include/linux/sunrpc/addr.h

(limited to 'include/linux')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index ca0a08001449..4885b53e56bd 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -11,7 +11,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/nfs_fs.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 #include <linux/kthread.h>
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 0e17090c310f..764c4d2ed804 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -13,6 +13,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 #include <linux/mutex.h>
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3c2cfc683631..1812f026960c 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0deb5f6c9dd4..8064435e8bef 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -13,7 +13,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/export.h>
 #include <linux/lockd/lockd.h>
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index ca4b11ec87a2..62c8c4127219 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/dns_resolver.h>
 #include "dns_resolve.h"
 
@@ -42,6 +43,7 @@ EXPORT_SYMBOL_GPL(nfs_dns_resolve_name);
 #include <linux/seq_file.h>
 #include <linux/inet.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index acc347268124..e5f2fad14ff8 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -6,6 +6,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
 #include <linux/nfs_mount.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/bc_xprt.h>
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index b720064bcd7f..1fe284f01f8b 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -31,6 +31,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
+#include <linux/sunrpc/addr.h>
 
 #include "internal.h"
 #include "nfs4session.h"
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 1e09eb78543b..0dd766079e1c 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/vfs.h>
 #include <linux/inet.h>
 #include "internal.h"
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c25cadf8f8c4..3250b41eb562 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -31,6 +31,7 @@
 #include <linux/errno.h>
 #include <linux/unistd.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/xprtsock.h>
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index e761ee95617f..247c00ccdb0f 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -9,7 +9,7 @@
 #include <linux/debugfs.h>
 #include <linux/module.h>
 #include <linux/nsproxy.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <asm/uaccess.h>
 
 #include "state.h"
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 27c77a0b423f..60a2282905e0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -40,7 +40,7 @@
 #include <linux/pagemap.h>
 #include <linux/ratelimit.h>
 #include <linux/sunrpc/svcauth_gss.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include "xdr4.h"
 #include "vfs.h"
 #include "current_stateid.h"
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index d16a5d6435f2..f7544698e6e6 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/slab.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/highmem.h>
 
 #include "nfsd.h"
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 65889ec1d28e..29c3f0d25469 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -10,7 +10,7 @@
 
 #include <linux/sunrpc/svcsock.h>
 #include <linux/lockd/lockd.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/gss_krb5_enctypes.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h
new file mode 100644
index 000000000000..07d8e53bedfc
--- /dev/null
+++ b/include/linux/sunrpc/addr.h
@@ -0,0 +1,170 @@
+/*
+ * linux/include/linux/sunrpc/addr.h
+ *
+ * Various routines for copying and comparing sockaddrs and for
+ * converting them to and from presentation format.
+ */
+#ifndef _LINUX_SUNRPC_ADDR_H
+#define _LINUX_SUNRPC_ADDR_H
+
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+
+size_t		rpc_ntop(const struct sockaddr *, char *, const size_t);
+size_t		rpc_pton(struct net *, const char *, const size_t,
+			 struct sockaddr *, const size_t);
+char *		rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t);
+size_t		rpc_uaddr2sockaddr(struct net *, const char *, const size_t,
+				   struct sockaddr *, const size_t);
+
+static inline unsigned short rpc_get_port(const struct sockaddr *sap)
+{
+	switch (sap->sa_family) {
+	case AF_INET:
+		return ntohs(((struct sockaddr_in *)sap)->sin_port);
+	case AF_INET6:
+		return ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+	}
+	return 0;
+}
+
+static inline void rpc_set_port(struct sockaddr *sap,
+				const unsigned short port)
+{
+	switch (sap->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *)sap)->sin_port = htons(port);
+		break;
+	case AF_INET6:
+		((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
+		break;
+	}
+}
+
+#define IPV6_SCOPE_DELIMITER		'%'
+#define IPV6_SCOPE_ID_LEN		sizeof("%nnnnnnnnnn")
+
+static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
+	const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
+
+	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+}
+
+static inline bool __rpc_copy_addr4(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
+	struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
+
+	dsin->sin_family = ssin->sin_family;
+	dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
+	return true;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
+	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
+
+	if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
+		return false;
+	else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+		return sin1->sin6_scope_id == sin2->sin6_scope_id;
+
+	return true;
+}
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
+	struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
+
+	dsin6->sin6_family = ssin6->sin6_family;
+	dsin6->sin6_addr = ssin6->sin6_addr;
+	dsin6->sin6_scope_id = ssin6->sin6_scope_id;
+	return true;
+}
+#else	/* !(IS_ENABLED(CONFIG_IPV6) */
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	return false;
+}
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	return false;
+}
+#endif	/* !(IS_ENABLED(CONFIG_IPV6) */
+
+/**
+ * rpc_cmp_addr - compare the address portion of two sockaddrs.
+ * @sap1: first sockaddr
+ * @sap2: second sockaddr
+ *
+ * Just compares the family and address portion. Ignores port, but
+ * compares the scope if it's a link-local address.
+ *
+ * Returns true if the addrs are equal, false if they aren't.
+ */
+static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
+				const struct sockaddr *sap2)
+{
+	if (sap1->sa_family == sap2->sa_family) {
+		switch (sap1->sa_family) {
+		case AF_INET:
+			return __rpc_cmp_addr4(sap1, sap2);
+		case AF_INET6:
+			return __rpc_cmp_addr6(sap1, sap2);
+		}
+	}
+	return false;
+}
+
+/**
+ * rpc_copy_addr - copy the address portion of one sockaddr to another
+ * @dst: destination sockaddr
+ * @src: source sockaddr
+ *
+ * Just copies the address portion and family. Ignores port, scope, etc.
+ * Caller is responsible for making certain that dst is large enough to hold
+ * the address in src. Returns true if address family is supported. Returns
+ * false otherwise.
+ */
+static inline bool rpc_copy_addr(struct sockaddr *dst,
+				 const struct sockaddr *src)
+{
+	switch (src->sa_family) {
+	case AF_INET:
+		return __rpc_copy_addr4(dst, src);
+	case AF_INET6:
+		return __rpc_copy_addr6(dst, src);
+	}
+	return false;
+}
+
+/**
+ * rpc_get_scope_id - return scopeid for a given sockaddr
+ * @sa: sockaddr to get scopeid from
+ *
+ * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
+ * not an AF_INET6 address.
+ */
+static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
+{
+	if (sa->sa_family != AF_INET6)
+		return 0;
+
+	return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
+}
+
+#endif /* _LINUX_SUNRPC_ADDR_H */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 6a7c2619a355..4a4abde000cb 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -165,160 +165,5 @@ size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 int		rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t);
 
-size_t		rpc_ntop(const struct sockaddr *, char *, const size_t);
-size_t		rpc_pton(struct net *, const char *, const size_t,
-			 struct sockaddr *, const size_t);
-char *		rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t);
-size_t		rpc_uaddr2sockaddr(struct net *, const char *, const size_t,
-				   struct sockaddr *, const size_t);
-
-static inline unsigned short rpc_get_port(const struct sockaddr *sap)
-{
-	switch (sap->sa_family) {
-	case AF_INET:
-		return ntohs(((struct sockaddr_in *)sap)->sin_port);
-	case AF_INET6:
-		return ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
-	}
-	return 0;
-}
-
-static inline void rpc_set_port(struct sockaddr *sap,
-				const unsigned short port)
-{
-	switch (sap->sa_family) {
-	case AF_INET:
-		((struct sockaddr_in *)sap)->sin_port = htons(port);
-		break;
-	case AF_INET6:
-		((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
-		break;
-	}
-}
-
-#define IPV6_SCOPE_DELIMITER		'%'
-#define IPV6_SCOPE_ID_LEN		sizeof("%nnnnnnnnnn")
-
-static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
-				   const struct sockaddr *sap2)
-{
-	const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
-	const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
-
-	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
-}
-
-static inline bool __rpc_copy_addr4(struct sockaddr *dst,
-				    const struct sockaddr *src)
-{
-	const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
-	struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
-
-	dsin->sin_family = ssin->sin_family;
-	dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
-	return true;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
-				   const struct sockaddr *sap2)
-{
-	const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
-	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
-
-	if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
-		return false;
-	else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		return sin1->sin6_scope_id == sin2->sin6_scope_id;
-
-	return true;
-}
-
-static inline bool __rpc_copy_addr6(struct sockaddr *dst,
-				    const struct sockaddr *src)
-{
-	const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
-	struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
-
-	dsin6->sin6_family = ssin6->sin6_family;
-	dsin6->sin6_addr = ssin6->sin6_addr;
-	dsin6->sin6_scope_id = ssin6->sin6_scope_id;
-	return true;
-}
-#else	/* !(IS_ENABLED(CONFIG_IPV6) */
-static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
-				   const struct sockaddr *sap2)
-{
-	return false;
-}
-
-static inline bool __rpc_copy_addr6(struct sockaddr *dst,
-				    const struct sockaddr *src)
-{
-	return false;
-}
-#endif	/* !(IS_ENABLED(CONFIG_IPV6) */
-
-/**
- * rpc_cmp_addr - compare the address portion of two sockaddrs.
- * @sap1: first sockaddr
- * @sap2: second sockaddr
- *
- * Just compares the family and address portion. Ignores port, but
- * compares the scope if it's a link-local address.
- *
- * Returns true if the addrs are equal, false if they aren't.
- */
-static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
-				const struct sockaddr *sap2)
-{
-	if (sap1->sa_family == sap2->sa_family) {
-		switch (sap1->sa_family) {
-		case AF_INET:
-			return __rpc_cmp_addr4(sap1, sap2);
-		case AF_INET6:
-			return __rpc_cmp_addr6(sap1, sap2);
-		}
-	}
-	return false;
-}
-
-/**
- * rpc_copy_addr - copy the address portion of one sockaddr to another
- * @dst: destination sockaddr
- * @src: source sockaddr
- *
- * Just copies the address portion and family. Ignores port, scope, etc.
- * Caller is responsible for making certain that dst is large enough to hold
- * the address in src. Returns true if address family is supported. Returns
- * false otherwise.
- */
-static inline bool rpc_copy_addr(struct sockaddr *dst,
-				 const struct sockaddr *src)
-{
-	switch (src->sa_family) {
-	case AF_INET:
-		return __rpc_copy_addr4(dst, src);
-	case AF_INET6:
-		return __rpc_copy_addr6(dst, src);
-	}
-	return false;
-}
-
-/**
- * rpc_get_scope_id - return scopeid for a given sockaddr
- * @sa: sockaddr to get scopeid from
- *
- * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
- * not an AF_INET6 address.
- */
-static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
-{
-	if (sa->sa_family != AF_INET6)
-		return 0;
-
-	return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
-}
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index d11418f97f1f..a622ad64acd8 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -17,7 +17,8 @@
  */
 
 #include <net/ipv6.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/msg_prot.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 822f020fa7f4..c8193ce67b23 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -33,6 +33,7 @@
 #include <linux/rcupdate.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/bc_xprt.h>
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 795a0f4e920b..3df764dc330c 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -26,6 +26,7 @@
 #include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xprtsock.h>
 
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 4d0129203733..ce34c8e5b8eb 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -6,6 +6,7 @@
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/err.h>
 #include <linux/seq_file.h>
 #include <linux/hash.h>
@@ -17,7 +18,6 @@
 #include <linux/user_namespace.h>
 #define RPCDBG_FACILITY	RPCDBG_AUTH
 
-#include <linux/sunrpc/clnt.h>
 
 #include "netns.h"
 
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c9aa7a35f3bf..66591dcd7e6b 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -51,6 +51,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
+#include <linux/sunrpc/addr.h>
 
 #include "xprt_rdma.h"
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 68b0a81c31d5..bbc09154df8d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -33,6 +33,7 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
-- 
cgit v1.2.3


From de0b65ca55dc62b6b477f6e02088df2281da7b51 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 4 Feb 2013 12:51:17 -0500
Subject: sunrpc: fix comment in struct xdr_buf definition

...these pages aren't necessarily contiguous.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 63988990bd36..224d06047e45 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -56,7 +56,7 @@ struct xdr_buf {
 	struct kvec	head[1],	/* RPC header + non-page data */
 			tail[1];	/* Appended after page data */
 
-	struct page **	pages;		/* Array of contiguous pages */
+	struct page **	pages;		/* Array of pages */
 	unsigned int	page_base,	/* Start of page data */
 			page_len,	/* Length of page data */
 			flags;		/* Flags for data disposition */
-- 
cgit v1.2.3


From f26402e81743d06b938a388925904bd47763277a Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Thu, 31 Jan 2013 12:01:53 -0700
Subject: tty: of_serial: unexport tegra_serial_handle_break

Tegra is only booted through device-tree now; there are no board files
left that use this function. Hence, don't export it. Move the static
inline definition into of_serial.c, so we can delete of_serial.h too.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/of_serial.c |  7 ++++---
 include/linux/of_serial.h      | 17 -----------------
 2 files changed, 4 insertions(+), 20 deletions(-)
 delete mode 100644 include/linux/of_serial.h

(limited to 'include/linux')

diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c
index e7cae1c2d7d2..d5874605682b 100644
--- a/drivers/tty/serial/of_serial.c
+++ b/drivers/tty/serial/of_serial.c
@@ -18,7 +18,6 @@
 #include <linux/serial_reg.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_serial.h>
 #include <linux/of_platform.h>
 #include <linux/nwpserial.h>
 #include <linux/clk.h>
@@ -45,8 +44,10 @@ void tegra_serial_handle_break(struct uart_port *p)
 		udelay(1);
 	} while (1);
 }
-/* FIXME remove this export when tegra finishes conversion to open firmware */
-EXPORT_SYMBOL_GPL(tegra_serial_handle_break);
+#else
+static inline void tegra_serial_handle_break(struct uart_port *port)
+{
+}
 #endif
 
 /*
diff --git a/include/linux/of_serial.h b/include/linux/of_serial.h
deleted file mode 100644
index 4a73ed80b4c0..000000000000
--- a/include/linux/of_serial.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef __LINUX_OF_SERIAL_H
-#define __LINUX_OF_SERIAL_H
-
-/*
- * FIXME remove this file when tegra finishes conversion to open firmware,
- * expectation is that all quirks will then be self-contained in
- * drivers/tty/serial/of_serial.c.
- */
-#ifdef CONFIG_ARCH_TEGRA
-extern void tegra_serial_handle_break(struct uart_port *port);
-#else
-static inline void tegra_serial_handle_break(struct uart_port *port)
-{
-}
-#endif
-
-#endif /* __LINUX_OF_SERIAL */
-- 
cgit v1.2.3


From ca2eb5679f8ddffff60156af42595df44a315ef0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 5 Feb 2013 07:25:17 +0000
Subject: tcp: remove Appropriate Byte Count support

TCP Appropriate Byte Count was added by me, but later disabled.
There is no point in maintaining it since it is a potential source
of bugs and Linux already implements other better window protection
heuristics.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 11 -----------
 include/linux/tcp.h                    |  1 -
 include/net/tcp.h                      |  1 -
 kernel/sysctl_binary.c                 |  1 -
 net/ipv4/sysctl_net_ipv4.c             |  7 -------
 net/ipv4/tcp.c                         |  1 -
 net/ipv4/tcp_cong.c                    | 30 +-----------------------------
 net/ipv4/tcp_input.c                   | 15 ---------------
 net/ipv4/tcp_minisocks.c               |  1 -
 9 files changed, 1 insertion(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 19ac1802bfd4..dc2dc87d2557 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -130,17 +130,6 @@ somaxconn - INTEGER
 	Defaults to 128.  See also tcp_max_syn_backlog for additional tuning
 	for TCP sockets.
 
-tcp_abc - INTEGER
-	Controls Appropriate Byte Count (ABC) defined in RFC3465.
-	ABC is a way of increasing congestion window (cwnd) more slowly
-	in response to partial acknowledgments.
-	Possible values are:
-		0 increase cwnd once per acknowledgment (no ABC)
-		1 increase cwnd once per acknowledgment of full sized segment
-		2 allow increase cwnd by two if acknowledgment is
-		  of two segments to compensate for delayed acknowledgments.
-	Default: 0 (off)
-
 tcp_abort_on_overflow - BOOLEAN
 	If listening service is too slow to accept new connections,
 	reset them. Default state is FALSE. It means that if overflow
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4e1d2283e3cc..6d0d46138ae8 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -246,7 +246,6 @@ struct tcp_sock {
 	u32	sacked_out;	/* SACK'd packets			*/
 	u32	fackets_out;	/* FACK'd packets			*/
 	u32	tso_deferred;
-	u32	bytes_acked;	/* Appropriate Byte Counting - RFC3465 */
 
 	/* from STCP, retrans queue hinting */
 	struct sk_buff* lost_skb_hint;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 614af8b7758e..23f2e98d4b65 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -279,7 +279,6 @@ extern int sysctl_tcp_dma_copybreak;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
-extern int sysctl_tcp_abc;
 extern int sysctl_tcp_mtu_probing;
 extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 5a6384450501..b669ca1fa103 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -387,7 +387,6 @@ static const struct bin_table bin_net_ipv4_table[] = {
 	{ CTL_INT,	NET_TCP_MODERATE_RCVBUF,		"tcp_moderate_rcvbuf" },
 	{ CTL_INT,	NET_TCP_TSO_WIN_DIVISOR,		"tcp_tso_win_divisor" },
 	{ CTL_STR,	NET_TCP_CONG_CONTROL,			"tcp_congestion_control" },
-	{ CTL_INT,	NET_TCP_ABC,				"tcp_abc" },
 	{ CTL_INT,	NET_TCP_MTU_PROBING,			"tcp_mtu_probing" },
 	{ CTL_INT,	NET_TCP_BASE_MSS,			"tcp_base_mss" },
 	{ CTL_INT,	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS,	"tcp_workaround_signed_windows" },
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 2622707602d1..960fd29d9b8e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -632,13 +632,6 @@ static struct ctl_table ipv4_table[] = {
 		.maxlen		= TCP_CA_NAME_MAX,
 		.proc_handler	= proc_tcp_congestion_control,
 	},
-	{
-		.procname	= "tcp_abc",
-		.data		= &sysctl_tcp_abc,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "tcp_mtu_probing",
 		.data		= &sysctl_tcp_mtu_probing,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3ec1f69c5ceb..2c7e5963c2ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2289,7 +2289,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->packets_out = 0;
 	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_cnt = 0;
-	tp->bytes_acked = 0;
 	tp->window_clamp = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index cdf2e707bb10..019c2389a341 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -317,28 +317,11 @@ void tcp_slow_start(struct tcp_sock *tp)
 		snd_cwnd = 1U;
 	}
 
-	/* RFC3465: ABC Slow start
-	 * Increase only after a full MSS of bytes is acked
-	 *
-	 * TCP sender SHOULD increase cwnd by the number of
-	 * previously unacknowledged bytes ACKed by each incoming
-	 * acknowledgment, provided the increase is not more than L
-	 */
-	if (sysctl_tcp_abc && tp->bytes_acked < tp->mss_cache)
-		return;
-
 	if (sysctl_tcp_max_ssthresh > 0 && tp->snd_cwnd > sysctl_tcp_max_ssthresh)
 		cnt = sysctl_tcp_max_ssthresh >> 1;	/* limited slow start */
 	else
 		cnt = snd_cwnd;				/* exponential increase */
 
-	/* RFC3465: ABC
-	 * We MAY increase by 2 if discovered delayed ack
-	 */
-	if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
-		cnt <<= 1;
-	tp->bytes_acked = 0;
-
 	tp->snd_cwnd_cnt += cnt;
 	while (tp->snd_cwnd_cnt >= snd_cwnd) {
 		tp->snd_cwnd_cnt -= snd_cwnd;
@@ -378,20 +361,9 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 	/* In "safe" area, increase. */
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
-
 	/* In dangerous area, increase slowly. */
-	else if (sysctl_tcp_abc) {
-		/* RFC3465: Appropriate Byte Count
-		 * increase once for each full cwnd acked
-		 */
-		if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
-			tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-		}
-	} else {
+	else
 		tcp_cong_avoid_ai(tp, tp->snd_cwnd);
-	}
 }
 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e376aa9591bc..f56bd1082f54 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -98,7 +98,6 @@ int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_abc __read_mostly;
 int sysctl_tcp_early_retrans __read_mostly = 2;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
@@ -2007,7 +2006,6 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->snd_cwnd_cnt = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	tp->frto_counter = 0;
-	tp->bytes_acked = 0;
 
 	tp->reordering = min_t(unsigned int, tp->reordering,
 			       sysctl_tcp_reordering);
@@ -2056,7 +2054,6 @@ void tcp_enter_loss(struct sock *sk, int how)
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
-	tp->bytes_acked = 0;
 	tcp_clear_retrans_partial(tp);
 
 	if (tcp_is_reno(tp))
@@ -2684,7 +2681,6 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->high_seq = tp->snd_nxt;
-	tp->bytes_acked = 0;
 	tp->snd_cwnd_cnt = 0;
 	tp->prior_cwnd = tp->snd_cwnd;
 	tp->prr_delivered = 0;
@@ -2735,7 +2731,6 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->prior_ssthresh = 0;
-	tp->bytes_acked = 0;
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
 		tcp_init_cwnd_reduction(sk, set_ssthresh);
@@ -3417,7 +3412,6 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 {
 	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
 	tp->snd_cwnd_cnt = 0;
-	tp->bytes_acked = 0;
 	TCP_ECN_queue_cwr(tp);
 	tcp_moderate_cwnd(tp);
 }
@@ -3609,15 +3603,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (after(ack, prior_snd_una))
 		flag |= FLAG_SND_UNA_ADVANCED;
 
-	if (sysctl_tcp_abc) {
-		if (icsk->icsk_ca_state < TCP_CA_CWR)
-			tp->bytes_acked += ack - prior_snd_una;
-		else if (icsk->icsk_ca_state == TCP_CA_Loss)
-			/* we assume just one segment left network */
-			tp->bytes_acked += min(ack - prior_snd_una,
-					       tp->mss_cache);
-	}
-
 	prior_fackets = tp->fackets_out;
 	prior_in_flight = tcp_packets_in_flight(tp);
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f35f2dfb6401..f0409287b5f4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -446,7 +446,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		 */
 		newtp->snd_cwnd = TCP_INIT_CWND;
 		newtp->snd_cwnd_cnt = 0;
-		newtp->bytes_acked = 0;
 
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
-- 
cgit v1.2.3


From e6e238c38bd4d42d5e2cddb2165e1a46e0fb1200 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.daniel@samsung.com>
Date: Mon, 4 Feb 2013 00:30:15 +0000
Subject: thermal: sysfs: Add a new sysfs node emul_temp for thermal emulation

This patch adds support to set the emulated temperature method in
thermal zone (sensor). After setting this feature thermal zone may
report this temperature and not the actual temperature. The emulation
implementation may be based on sensor capability through platform
specific handler or pure software emulation if no platform handler defined.

This is useful in debugging different temperature threshold and its
associated cooling action. Critical threshold's cannot be emulated.
Writing 0 on this node should disable emulation.

Signed-off-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 Documentation/thermal/sysfs-api.txt | 13 ++++++
 drivers/thermal/Kconfig             |  8 ++++
 drivers/thermal/thermal_sys.c       | 79 +++++++++++++++++++++++++++++++------
 include/linux/thermal.h             |  2 +
 4 files changed, 91 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index 526d4b90d6c1..6859661c9d31 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -55,6 +55,8 @@ temperature) and throttle appropriate devices.
 	.get_trip_type: get the type of certain trip point.
 	.get_trip_temp: get the temperature above which the certain trip point
 			will be fired.
+	.set_emul_temp: set the emulation temperature which helps in debugging
+			different threshold temperature points.
 
 1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 
@@ -153,6 +155,7 @@ Thermal zone device sys I/F, created once it's registered:
     |---trip_point_[0-*]_temp:	Trip point temperature
     |---trip_point_[0-*]_type:	Trip point type
     |---trip_point_[0-*]_hyst:	Hysteresis value for this trip point
+    |---emul_temp:		Emulated temperature set node
 
 Thermal cooling device sys I/F, created once it's registered:
 /sys/class/thermal/cooling_device[0-*]:
@@ -252,6 +255,16 @@ passive
 	Valid values: 0 (disabled) or greater than 1000
 	RW, Optional
 
+emul_temp
+	Interface to set the emulated temperature method in thermal zone
+	(sensor). After setting this temperature, the thermal zone may pass
+	this temperature to platform emulation function if registered or
+	cache it locally. This is useful in debugging different temperature
+	threshold and its associated cooling action. This is write only node
+	and writing 0 on this node should disable emulation.
+	Unit: millidegree Celsius
+	WO, Optional
+
 *****************************
 * Cooling device attributes *
 *****************************
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index faf38c522fa8..e4cf7fbc3a59 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -78,6 +78,14 @@ config CPU_THERMAL
 	  and not the ACPI interface.
 	  If you want this support, you should say Y here.
 
+config THERMAL_EMULATION
+	bool "Thermal emulation mode support"
+	help
+	  Enable this option to make a emul_temp sysfs node in thermal zone
+	  directory to support temperature emulation. With emulation sysfs node,
+	  user can manually input temperature and test the different trip
+	  threshold behaviour for simulation purpose.
+
 config SPEAR_THERMAL
 	bool "SPEAr thermal sensor driver"
 	depends on PLAT_SPEAR
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 0a1bf6b032ea..0675687c6de8 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -378,24 +378,54 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
 	monitor_thermal_zone(tz);
 }
 
+static int thermal_zone_get_temp(struct thermal_zone_device *tz,
+				unsigned long *temp)
+{
+	int ret = 0, count;
+	unsigned long crit_temp = -1UL;
+	enum thermal_trip_type type;
+
+	mutex_lock(&tz->lock);
+
+	ret = tz->ops->get_temp(tz, temp);
+#ifdef CONFIG_THERMAL_EMULATION
+	if (!tz->emul_temperature)
+		goto skip_emul;
+
+	for (count = 0; count < tz->trips; count++) {
+		ret = tz->ops->get_trip_type(tz, count, &type);
+		if (!ret && type == THERMAL_TRIP_CRITICAL) {
+			ret = tz->ops->get_trip_temp(tz, count, &crit_temp);
+			break;
+		}
+	}
+
+	if (ret)
+		goto skip_emul;
+
+	if (*temp < crit_temp)
+		*temp = tz->emul_temperature;
+skip_emul:
+#endif
+	mutex_unlock(&tz->lock);
+	return ret;
+}
+
 static void update_temperature(struct thermal_zone_device *tz)
 {
 	long temp;
 	int ret;
 
-	mutex_lock(&tz->lock);
-
-	ret = tz->ops->get_temp(tz, &temp);
+	ret = thermal_zone_get_temp(tz, &temp);
 	if (ret) {
 		dev_warn(&tz->device, "failed to read out thermal zone %d\n",
 			 tz->id);
-		goto exit;
+		return;
 	}
 
+	mutex_lock(&tz->lock);
 	tz->last_temperature = tz->temperature;
 	tz->temperature = temp;
-
-exit:
 	mutex_unlock(&tz->lock);
 }
 
@@ -438,10 +468,7 @@ temp_show(struct device *dev, struct device_attribute *attr, char *buf)
 	long temperature;
 	int ret;
 
-	if (!tz->ops->get_temp)
-		return -EPERM;
-
-	ret = tz->ops->get_temp(tz, &temperature);
+	ret = thermal_zone_get_temp(tz, &temperature);
 
 	if (ret)
 		return ret;
@@ -701,6 +728,31 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf)
 	return sprintf(buf, "%s\n", tz->governor->name);
 }
 
+#ifdef CONFIG_THERMAL_EMULATION
+static ssize_t
+emul_temp_store(struct device *dev, struct device_attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct thermal_zone_device *tz = to_thermal_zone(dev);
+	int ret = 0;
+	unsigned long temperature;
+
+	if (kstrtoul(buf, 10, &temperature))
+		return -EINVAL;
+
+	if (!tz->ops->set_emul_temp) {
+		mutex_lock(&tz->lock);
+		tz->emul_temperature = temperature;
+		mutex_unlock(&tz->lock);
+	} else {
+		ret = tz->ops->set_emul_temp(tz, temperature);
+	}
+
+	return ret ? ret : count;
+}
+static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
+#endif/*CONFIG_THERMAL_EMULATION*/
+
 static DEVICE_ATTR(type, 0444, type_show, NULL);
 static DEVICE_ATTR(temp, 0444, temp_show, NULL);
 static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
@@ -843,7 +895,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
 				       temp_input);
 	struct thermal_zone_device *tz = temp->tz;
 
-	ret = tz->ops->get_temp(tz, &temperature);
+	ret = thermal_zone_get_temp(tz, &temperature);
 
 	if (ret)
 		return ret;
@@ -1596,6 +1648,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 			goto unregister;
 	}
 
+#ifdef CONFIG_THERMAL_EMULATION
+	result = device_create_file(&tz->device, &dev_attr_emul_temp);
+	if (result)
+		goto unregister;
+#endif
 	/* Create policy attribute */
 	result = device_create_file(&tz->device, &dev_attr_policy);
 	if (result)
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 9b78f8c6f773..f0bd7f90a90d 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -123,6 +123,7 @@ struct thermal_zone_device_ops {
 	int (*set_trip_hyst) (struct thermal_zone_device *, int,
 			      unsigned long);
 	int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *);
+	int (*set_emul_temp) (struct thermal_zone_device *, unsigned long);
 	int (*get_trend) (struct thermal_zone_device *, int,
 			  enum thermal_trend *);
 	int (*notify) (struct thermal_zone_device *, int,
@@ -165,6 +166,7 @@ struct thermal_zone_device {
 	int polling_delay;
 	int temperature;
 	int last_temperature;
+	int emul_temperature;
 	int passive;
 	unsigned int forced_passive;
 	const struct thermal_zone_device_ops *ops;
-- 
cgit v1.2.3


From a8b8a88a9e3042d41326c854272c881664acba1c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Tue, 29 Jan 2013 14:36:31 +0100
Subject: iommu: Make sure DOMAIN_ATTR_MAX is really the maximum

Move it to the end of the list.

Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f3b99e1c1042..7e6ce7260b1c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -58,8 +58,8 @@ struct iommu_domain {
 #define IOMMU_CAP_INTR_REMAP		0x2	/* isolates device intrs */
 
 enum iommu_attr {
-	DOMAIN_ATTR_MAX,
 	DOMAIN_ATTR_GEOMETRY,
+	DOMAIN_ATTR_MAX,
 };
 
 #ifdef CONFIG_IOMMU_API
-- 
cgit v1.2.3


From d2e121601619631517409cba34e50db3cbff5852 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Tue, 29 Jan 2013 13:49:04 +0100
Subject: iommu: Implement DOMAIN_ATTR_PAGING attribute

This attribute of a domain can be queried to find out if the
domain supports setting up page-tables using the iommu_map()
and iommu_unmap() functions.

Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/iommu.c | 5 +++++
 include/linux/iommu.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 4e6d6f857e6f..0e0e5f2e0ccc 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -869,6 +869,7 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
 			  enum iommu_attr attr, void *data)
 {
 	struct iommu_domain_geometry *geometry;
+	bool *paging;
 	int ret = 0;
 
 	switch (attr) {
@@ -876,6 +877,10 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
 		geometry  = data;
 		*geometry = domain->geometry;
 
+		break;
+	case DOMAIN_ATTR_PAGING:
+		paging  = data;
+		*paging = (domain->ops->pgsize_bitmap != 0UL);
 		break;
 	default:
 		if (!domain->ops->domain_get_attr)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7e6ce7260b1c..26066f54a849 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -59,6 +59,7 @@ struct iommu_domain {
 
 enum iommu_attr {
 	DOMAIN_ATTR_GEOMETRY,
+	DOMAIN_ATTR_PAGING,
 	DOMAIN_ATTR_MAX,
 };
 
-- 
cgit v1.2.3


From d7787d579cbef9f8079104759a2259fc916c688c Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Tue, 29 Jan 2013 14:26:20 +0100
Subject: iommu: Add domain window handling functions

Add the iommu_domain_window_enable() and iommu_domain_window_disable()
functions to the IOMMU-API. These functions will be used to setup
domains that are based on subwindows and not on paging.

Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/iommu.c | 20 ++++++++++++++++++++
 include/linux/iommu.h | 22 ++++++++++++++++++++++
 2 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0e0e5f2e0ccc..b3aced7356cc 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -852,6 +852,26 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
+
+int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
+			       phys_addr_t paddr, u64 size)
+{
+	if (unlikely(domain->ops->domain_window_enable == NULL))
+		return -ENODEV;
+
+	return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_window_enable);
+
+void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr)
+{
+	if (unlikely(domain->ops->domain_window_disable == NULL))
+		return;
+
+	return domain->ops->domain_window_disable(domain, wnd_nr);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_window_disable);
+
 static int __init iommu_init(void)
 {
 	iommu_group_kset = kset_create_and_add("iommu_groups",
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 26066f54a849..5ea3d7250917 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -101,6 +101,12 @@ struct iommu_ops {
 			       enum iommu_attr attr, void *data);
 	int (*domain_set_attr)(struct iommu_domain *domain,
 			       enum iommu_attr attr, void *data);
+
+	/* Window handling functions */
+	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
+				    phys_addr_t paddr, u64 size);
+	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
+
 	unsigned long pgsize_bitmap;
 };
 
@@ -158,6 +164,10 @@ extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr,
 extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr,
 				 void *data);
 
+/* Window handling function prototypes */
+extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
+				      phys_addr_t offset, u64 size);
+extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr);
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
  * @domain: the iommu domain where the fault has happened
@@ -240,6 +250,18 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 	return -ENODEV;
 }
 
+static inline int iommu_domain_window_enable(struct iommu_domain *domain,
+					     u32 wnd_nr, phys_addr_t paddr,
+					     u64 size)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_domain_window_disable(struct iommu_domain *domain,
+					       u32 wnd_nr)
+{
+}
+
 static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
 					     unsigned long iova)
 {
-- 
cgit v1.2.3


From 693567125bde1966a095267a9d8ca1b8d40f59ee Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Mon, 4 Feb 2013 14:00:01 +0100
Subject: iommu: Add DOMAIN_ATTR_WINDOWS domain attribute

This attribute can be used to set and get the number of
subwindows on IOMMUs that are window-based.

Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/iommu.c | 33 ++++++++++++++++++++++++++++++---
 include/linux/iommu.h |  5 +++++
 2 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b3aced7356cc..b972d430d92b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -891,6 +891,7 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
 	struct iommu_domain_geometry *geometry;
 	bool *paging;
 	int ret = 0;
+	u32 *count;
 
 	switch (attr) {
 	case DOMAIN_ATTR_GEOMETRY:
@@ -901,6 +902,15 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
 	case DOMAIN_ATTR_PAGING:
 		paging  = data;
 		*paging = (domain->ops->pgsize_bitmap != 0UL);
+		break;
+	case DOMAIN_ATTR_WINDOWS:
+		count = data;
+
+		if (domain->ops->domain_get_windows != NULL)
+			*count = domain->ops->domain_get_windows(domain);
+		else
+			ret = -ENODEV;
+
 		break;
 	default:
 		if (!domain->ops->domain_get_attr)
@@ -916,9 +926,26 @@ EXPORT_SYMBOL_GPL(iommu_domain_get_attr);
 int iommu_domain_set_attr(struct iommu_domain *domain,
 			  enum iommu_attr attr, void *data)
 {
-	if (!domain->ops->domain_set_attr)
-		return -EINVAL;
+	int ret = 0;
+	u32 *count;
+
+	switch (attr) {
+	case DOMAIN_ATTR_WINDOWS:
+		count = data;
+
+		if (domain->ops->domain_set_windows != NULL)
+			ret = domain->ops->domain_set_windows(domain, *count);
+		else
+			ret = -ENODEV;
 
-	return domain->ops->domain_set_attr(domain, attr, data);
+		break;
+	default:
+		if (domain->ops->domain_set_attr == NULL)
+			return -EINVAL;
+
+		ret = domain->ops->domain_set_attr(domain, attr, data);
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5ea3d7250917..ba3b8a98a049 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -60,6 +60,7 @@ struct iommu_domain {
 enum iommu_attr {
 	DOMAIN_ATTR_GEOMETRY,
 	DOMAIN_ATTR_PAGING,
+	DOMAIN_ATTR_WINDOWS,
 	DOMAIN_ATTR_MAX,
 };
 
@@ -106,6 +107,10 @@ struct iommu_ops {
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
 				    phys_addr_t paddr, u64 size);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
+	/* Set the numer of window per domain */
+	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
+	/* Get the numer of window per domain */
+	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 	unsigned long pgsize_bitmap;
 };
-- 
cgit v1.2.3


From c2c460f7c148aa1a59630f61dac2481f1efb4f4e Mon Sep 17 00:00:00 2001
From: Hideki EIRAKU <hdk@igel.co.jp>
Date: Mon, 21 Jan 2013 19:54:26 +0900
Subject: iommu/shmobile: Add iommu driver for Renesas IPMMU modules

This is the Renesas IPMMU driver and IOMMU API implementation.

The IPMMU module supports the MMU function and the PMB function.  The
MMU function provides address translation by pagetable compatible with
ARMv6.  The PMB function provides address translation including
tile-linear translation.  This patch implements the MMU function.

The iommu driver does not register a platform driver directly because:
- the register space of the MMU function and the PMB function
  have a common register (used for settings flush), so they should ideally
  have a way to appropriately share this register.
- the MMU function uses the IOMMU API while the PMB function does not.
- the two functions may be used independently.

Signed-off-by: Hideki EIRAKU <hdk@igel.co.jp>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/Kconfig                  |  74 ++++++
 drivers/iommu/Makefile                 |   2 +
 drivers/iommu/shmobile-iommu.c         | 395 +++++++++++++++++++++++++++++++++
 drivers/iommu/shmobile-ipmmu.c         | 136 ++++++++++++
 drivers/iommu/shmobile-ipmmu.h         |  34 +++
 include/linux/platform_data/sh_ipmmu.h |  18 ++
 6 files changed, 659 insertions(+)
 create mode 100644 drivers/iommu/shmobile-iommu.c
 create mode 100644 drivers/iommu/shmobile-ipmmu.c
 create mode 100644 drivers/iommu/shmobile-ipmmu.h
 create mode 100644 include/linux/platform_data/sh_ipmmu.h

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index e39f9dbf297b..d364494c9e7c 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -187,4 +187,78 @@ config EXYNOS_IOMMU_DEBUG
 
 	  Say N unless you need kernel log message for IOMMU debugging
 
+config SHMOBILE_IPMMU
+	bool
+
+config SHMOBILE_IPMMU_TLB
+	bool
+
+config SHMOBILE_IOMMU
+	bool "IOMMU for Renesas IPMMU/IPMMUI"
+	default n
+	depends on (ARM && ARCH_SHMOBILE)
+	select IOMMU_API
+	select ARM_DMA_USE_IOMMU
+	select SHMOBILE_IPMMU
+	select SHMOBILE_IPMMU_TLB
+	help
+	  Support for Renesas IPMMU/IPMMUI. This option enables
+	  remapping of DMA memory accesses from all of the IP blocks
+	  on the ICB.
+
+	  Warning: Drivers (including userspace drivers of UIO
+	  devices) of the IP blocks on the ICB *must* use addresses
+	  allocated from the IPMMU (iova) for DMA with this option
+	  enabled.
+
+	  If unsure, say N.
+
+choice
+	prompt "IPMMU/IPMMUI address space size"
+	default SHMOBILE_IOMMU_ADDRSIZE_2048MB
+	depends on SHMOBILE_IOMMU
+	help
+	  This option sets IPMMU/IPMMUI address space size by
+	  adjusting the 1st level page table size. The page table size
+	  is calculated as follows:
+
+	      page table size = number of page table entries * 4 bytes
+	      number of page table entries = address space size / 1 MiB
+
+	  For example, when the address space size is 2048 MiB, the
+	  1st level page table size is 8192 bytes.
+
+	config SHMOBILE_IOMMU_ADDRSIZE_2048MB
+		bool "2 GiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_1024MB
+		bool "1 GiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_512MB
+		bool "512 MiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_256MB
+		bool "256 MiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_128MB
+		bool "128 MiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_64MB
+		bool "64 MiB"
+
+	config SHMOBILE_IOMMU_ADDRSIZE_32MB
+		bool "32 MiB"
+
+endchoice
+
+config SHMOBILE_IOMMU_L1SIZE
+	int
+	default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB
+	default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB
+	default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB
+	default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB
+	default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB
+	default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB
+	default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index f66b816d455c..ef0e5207ad69 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -13,3 +13,5 @@ obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
+obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
+obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
new file mode 100644
index 000000000000..b6e8b57cf0a8
--- /dev/null
+++ b/drivers/iommu/shmobile-iommu.c
@@ -0,0 +1,395 @@
+/*
+ * IOMMU for IPMMU/IPMMUI
+ * Copyright (C) 2012  Hideki EIRAKU
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <asm/dma-iommu.h>
+#include "shmobile-ipmmu.h"
+
+#define L1_SIZE CONFIG_SHMOBILE_IOMMU_L1SIZE
+#define L1_LEN (L1_SIZE / 4)
+#define L1_ALIGN L1_SIZE
+#define L2_SIZE SZ_1K
+#define L2_LEN (L2_SIZE / 4)
+#define L2_ALIGN L2_SIZE
+
+struct shmobile_iommu_domain_pgtable {
+	uint32_t *pgtable;
+	dma_addr_t handle;
+};
+
+struct shmobile_iommu_archdata {
+	struct list_head attached_list;
+	struct dma_iommu_mapping *iommu_mapping;
+	spinlock_t attach_lock;
+	struct shmobile_iommu_domain *attached;
+	int num_attached_devices;
+	struct shmobile_ipmmu *ipmmu;
+};
+
+struct shmobile_iommu_domain {
+	struct shmobile_iommu_domain_pgtable l1, l2[L1_LEN];
+	spinlock_t map_lock;
+	spinlock_t attached_list_lock;
+	struct list_head attached_list;
+};
+
+static struct shmobile_iommu_archdata *ipmmu_archdata;
+static struct kmem_cache *l1cache, *l2cache;
+
+static int pgtable_alloc(struct shmobile_iommu_domain_pgtable *pgtable,
+			 struct kmem_cache *cache, size_t size)
+{
+	pgtable->pgtable = kmem_cache_zalloc(cache, GFP_ATOMIC);
+	if (!pgtable->pgtable)
+		return -ENOMEM;
+	pgtable->handle = dma_map_single(NULL, pgtable->pgtable, size,
+					 DMA_TO_DEVICE);
+	return 0;
+}
+
+static void pgtable_free(struct shmobile_iommu_domain_pgtable *pgtable,
+			 struct kmem_cache *cache, size_t size)
+{
+	dma_unmap_single(NULL, pgtable->handle, size, DMA_TO_DEVICE);
+	kmem_cache_free(cache, pgtable->pgtable);
+}
+
+static uint32_t pgtable_read(struct shmobile_iommu_domain_pgtable *pgtable,
+			     unsigned int index)
+{
+	return pgtable->pgtable[index];
+}
+
+static void pgtable_write(struct shmobile_iommu_domain_pgtable *pgtable,
+			  unsigned int index, unsigned int count, uint32_t val)
+{
+	unsigned int i;
+
+	for (i = 0; i < count; i++)
+		pgtable->pgtable[index + i] = val;
+	dma_sync_single_for_device(NULL, pgtable->handle + index * sizeof(val),
+				   sizeof(val) * count, DMA_TO_DEVICE);
+}
+
+static int shmobile_iommu_domain_init(struct iommu_domain *domain)
+{
+	struct shmobile_iommu_domain *sh_domain;
+	int i, ret;
+
+	sh_domain = kmalloc(sizeof(*sh_domain), GFP_KERNEL);
+	if (!sh_domain)
+		return -ENOMEM;
+	ret = pgtable_alloc(&sh_domain->l1, l1cache, L1_SIZE);
+	if (ret < 0) {
+		kfree(sh_domain);
+		return ret;
+	}
+	for (i = 0; i < L1_LEN; i++)
+		sh_domain->l2[i].pgtable = NULL;
+	spin_lock_init(&sh_domain->map_lock);
+	spin_lock_init(&sh_domain->attached_list_lock);
+	INIT_LIST_HEAD(&sh_domain->attached_list);
+	domain->priv = sh_domain;
+	return 0;
+}
+
+static void shmobile_iommu_domain_destroy(struct iommu_domain *domain)
+{
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	int i;
+
+	for (i = 0; i < L1_LEN; i++) {
+		if (sh_domain->l2[i].pgtable)
+			pgtable_free(&sh_domain->l2[i], l2cache, L2_SIZE);
+	}
+	pgtable_free(&sh_domain->l1, l1cache, L1_SIZE);
+	kfree(sh_domain);
+	domain->priv = NULL;
+}
+
+static int shmobile_iommu_attach_device(struct iommu_domain *domain,
+					struct device *dev)
+{
+	struct shmobile_iommu_archdata *archdata = dev->archdata.iommu;
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	int ret = -EBUSY;
+
+	if (!archdata)
+		return -ENODEV;
+	spin_lock(&sh_domain->attached_list_lock);
+	spin_lock(&archdata->attach_lock);
+	if (archdata->attached != sh_domain) {
+		if (archdata->attached)
+			goto err;
+		ipmmu_tlb_set(archdata->ipmmu, sh_domain->l1.handle, L1_SIZE,
+			      0);
+		ipmmu_tlb_flush(archdata->ipmmu);
+		archdata->attached = sh_domain;
+		archdata->num_attached_devices = 0;
+		list_add(&archdata->attached_list, &sh_domain->attached_list);
+	}
+	archdata->num_attached_devices++;
+	ret = 0;
+err:
+	spin_unlock(&archdata->attach_lock);
+	spin_unlock(&sh_domain->attached_list_lock);
+	return ret;
+}
+
+static void shmobile_iommu_detach_device(struct iommu_domain *domain,
+					 struct device *dev)
+{
+	struct shmobile_iommu_archdata *archdata = dev->archdata.iommu;
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+
+	if (!archdata)
+		return;
+	spin_lock(&sh_domain->attached_list_lock);
+	spin_lock(&archdata->attach_lock);
+	archdata->num_attached_devices--;
+	if (!archdata->num_attached_devices) {
+		ipmmu_tlb_set(archdata->ipmmu, 0, 0, 0);
+		ipmmu_tlb_flush(archdata->ipmmu);
+		archdata->attached = NULL;
+		list_del(&archdata->attached_list);
+	}
+	spin_unlock(&archdata->attach_lock);
+	spin_unlock(&sh_domain->attached_list_lock);
+}
+
+static void domain_tlb_flush(struct shmobile_iommu_domain *sh_domain)
+{
+	struct shmobile_iommu_archdata *archdata;
+
+	spin_lock(&sh_domain->attached_list_lock);
+	list_for_each_entry(archdata, &sh_domain->attached_list, attached_list)
+		ipmmu_tlb_flush(archdata->ipmmu);
+	spin_unlock(&sh_domain->attached_list_lock);
+}
+
+static int l2alloc(struct shmobile_iommu_domain *sh_domain,
+		   unsigned int l1index)
+{
+	int ret;
+
+	if (!sh_domain->l2[l1index].pgtable) {
+		ret = pgtable_alloc(&sh_domain->l2[l1index], l2cache, L2_SIZE);
+		if (ret < 0)
+			return ret;
+	}
+	pgtable_write(&sh_domain->l1, l1index, 1,
+		      sh_domain->l2[l1index].handle | 0x1);
+	return 0;
+}
+
+static void l2realfree(struct shmobile_iommu_domain_pgtable *l2)
+{
+	if (l2->pgtable)
+		pgtable_free(l2, l2cache, L2_SIZE);
+}
+
+static void l2free(struct shmobile_iommu_domain *sh_domain,
+		   unsigned int l1index,
+		   struct shmobile_iommu_domain_pgtable *l2)
+{
+	pgtable_write(&sh_domain->l1, l1index, 1, 0);
+	if (sh_domain->l2[l1index].pgtable) {
+		*l2 = sh_domain->l2[l1index];
+		sh_domain->l2[l1index].pgtable = NULL;
+	}
+}
+
+static int shmobile_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			      phys_addr_t paddr, size_t size, int prot)
+{
+	struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL };
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	unsigned int l1index, l2index;
+	int ret;
+
+	l1index = iova >> 20;
+	switch (size) {
+	case SZ_4K:
+		l2index = (iova >> 12) & 0xff;
+		spin_lock(&sh_domain->map_lock);
+		ret = l2alloc(sh_domain, l1index);
+		if (!ret)
+			pgtable_write(&sh_domain->l2[l1index], l2index, 1,
+				      paddr | 0xff2);
+		spin_unlock(&sh_domain->map_lock);
+		break;
+	case SZ_64K:
+		l2index = (iova >> 12) & 0xf0;
+		spin_lock(&sh_domain->map_lock);
+		ret = l2alloc(sh_domain, l1index);
+		if (!ret)
+			pgtable_write(&sh_domain->l2[l1index], l2index, 0x10,
+				      paddr | 0xff1);
+		spin_unlock(&sh_domain->map_lock);
+		break;
+	case SZ_1M:
+		spin_lock(&sh_domain->map_lock);
+		l2free(sh_domain, l1index, &l2);
+		pgtable_write(&sh_domain->l1, l1index, 1, paddr | 0xc02);
+		spin_unlock(&sh_domain->map_lock);
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	if (!ret)
+		domain_tlb_flush(sh_domain);
+	l2realfree(&l2);
+	return ret;
+}
+
+static size_t shmobile_iommu_unmap(struct iommu_domain *domain,
+				   unsigned long iova, size_t size)
+{
+	struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL };
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	unsigned int l1index, l2index;
+	uint32_t l2entry = 0;
+	size_t ret = 0;
+
+	l1index = iova >> 20;
+	if (!(iova & 0xfffff) && size >= SZ_1M) {
+		spin_lock(&sh_domain->map_lock);
+		l2free(sh_domain, l1index, &l2);
+		spin_unlock(&sh_domain->map_lock);
+		ret = SZ_1M;
+		goto done;
+	}
+	l2index = (iova >> 12) & 0xff;
+	spin_lock(&sh_domain->map_lock);
+	if (sh_domain->l2[l1index].pgtable)
+		l2entry = pgtable_read(&sh_domain->l2[l1index], l2index);
+	switch (l2entry & 3) {
+	case 1:
+		if (l2index & 0xf)
+			break;
+		pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, 0);
+		ret = SZ_64K;
+		break;
+	case 2:
+		pgtable_write(&sh_domain->l2[l1index], l2index, 1, 0);
+		ret = SZ_4K;
+		break;
+	}
+	spin_unlock(&sh_domain->map_lock);
+done:
+	if (ret)
+		domain_tlb_flush(sh_domain);
+	l2realfree(&l2);
+	return ret;
+}
+
+static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain,
+					       unsigned long iova)
+{
+	struct shmobile_iommu_domain *sh_domain = domain->priv;
+	uint32_t l1entry = 0, l2entry = 0;
+	unsigned int l1index, l2index;
+
+	l1index = iova >> 20;
+	l2index = (iova >> 12) & 0xff;
+	spin_lock(&sh_domain->map_lock);
+	if (sh_domain->l2[l1index].pgtable)
+		l2entry = pgtable_read(&sh_domain->l2[l1index], l2index);
+	else
+		l1entry = pgtable_read(&sh_domain->l1, l1index);
+	spin_unlock(&sh_domain->map_lock);
+	switch (l2entry & 3) {
+	case 1:
+		return (l2entry & ~0xffff) | (iova & 0xffff);
+	case 2:
+		return (l2entry & ~0xfff) | (iova & 0xfff);
+	default:
+		if ((l1entry & 3) == 2)
+			return (l1entry & ~0xfffff) | (iova & 0xfffff);
+		return 0;
+	}
+}
+
+static int find_dev_name(struct shmobile_ipmmu *ipmmu, const char *dev_name)
+{
+	unsigned int i, n = ipmmu->num_dev_names;
+
+	for (i = 0; i < n; i++) {
+		if (strcmp(ipmmu->dev_names[i], dev_name) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+static int shmobile_iommu_add_device(struct device *dev)
+{
+	struct shmobile_iommu_archdata *archdata = ipmmu_archdata;
+	struct dma_iommu_mapping *mapping;
+
+	if (!find_dev_name(archdata->ipmmu, dev_name(dev)))
+		return 0;
+	mapping = archdata->iommu_mapping;
+	if (!mapping) {
+		mapping = arm_iommu_create_mapping(&platform_bus_type, 0,
+						   L1_LEN << 20, 0);
+		if (IS_ERR(mapping))
+			return PTR_ERR(mapping);
+		archdata->iommu_mapping = mapping;
+	}
+	dev->archdata.iommu = archdata;
+	if (arm_iommu_attach_device(dev, mapping))
+		pr_err("arm_iommu_attach_device failed\n");
+	return 0;
+}
+
+static struct iommu_ops shmobile_iommu_ops = {
+	.domain_init = shmobile_iommu_domain_init,
+	.domain_destroy = shmobile_iommu_domain_destroy,
+	.attach_dev = shmobile_iommu_attach_device,
+	.detach_dev = shmobile_iommu_detach_device,
+	.map = shmobile_iommu_map,
+	.unmap = shmobile_iommu_unmap,
+	.iova_to_phys = shmobile_iommu_iova_to_phys,
+	.add_device = shmobile_iommu_add_device,
+	.pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K,
+};
+
+int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)
+{
+	static struct shmobile_iommu_archdata *archdata;
+
+	l1cache = kmem_cache_create("shmobile-iommu-pgtable1", L1_SIZE,
+				    L1_ALIGN, SLAB_HWCACHE_ALIGN, NULL);
+	if (!l1cache)
+		return -ENOMEM;
+	l2cache = kmem_cache_create("shmobile-iommu-pgtable2", L2_SIZE,
+				    L2_ALIGN, SLAB_HWCACHE_ALIGN, NULL);
+	if (!l2cache) {
+		kmem_cache_destroy(l1cache);
+		return -ENOMEM;
+	}
+	archdata = kmalloc(sizeof(*archdata), GFP_KERNEL);
+	if (!archdata) {
+		kmem_cache_destroy(l1cache);
+		kmem_cache_destroy(l2cache);
+		return -ENOMEM;
+	}
+	spin_lock_init(&archdata->attach_lock);
+	archdata->attached = NULL;
+	archdata->ipmmu = ipmmu;
+	ipmmu_archdata = archdata;
+	bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops);
+	return 0;
+}
diff --git a/drivers/iommu/shmobile-ipmmu.c b/drivers/iommu/shmobile-ipmmu.c
new file mode 100644
index 000000000000..8321f89596c4
--- /dev/null
+++ b/drivers/iommu/shmobile-ipmmu.c
@@ -0,0 +1,136 @@
+/*
+ * IPMMU/IPMMUI
+ * Copyright (C) 2012  Hideki EIRAKU
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/platform_data/sh_ipmmu.h>
+#include "shmobile-ipmmu.h"
+
+#define IMCTR1 0x000
+#define IMCTR2 0x004
+#define IMASID 0x010
+#define IMTTBR 0x014
+#define IMTTBCR 0x018
+
+#define IMCTR1_TLBEN (1 << 0)
+#define IMCTR1_FLUSH (1 << 1)
+
+static void ipmmu_reg_write(struct shmobile_ipmmu *ipmmu, unsigned long reg_off,
+			    unsigned long data)
+{
+	iowrite32(data, ipmmu->ipmmu_base + reg_off);
+}
+
+void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu)
+{
+	if (!ipmmu)
+		return;
+
+	mutex_lock(&ipmmu->flush_lock);
+	if (ipmmu->tlb_enabled)
+		ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN);
+	else
+		ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH);
+	mutex_unlock(&ipmmu->flush_lock);
+}
+
+void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,
+		   int asid)
+{
+	if (!ipmmu)
+		return;
+
+	mutex_lock(&ipmmu->flush_lock);
+	switch (size) {
+	default:
+		ipmmu->tlb_enabled = 0;
+		break;
+	case 0x2000:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 1);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x1000:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 2);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x800:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 3);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x400:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 4);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x200:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 5);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x100:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 6);
+		ipmmu->tlb_enabled = 1;
+		break;
+	case 0x80:
+		ipmmu_reg_write(ipmmu, IMTTBCR, 7);
+		ipmmu->tlb_enabled = 1;
+		break;
+	}
+	ipmmu_reg_write(ipmmu, IMTTBR, phys);
+	ipmmu_reg_write(ipmmu, IMASID, asid);
+	mutex_unlock(&ipmmu->flush_lock);
+}
+
+static int ipmmu_probe(struct platform_device *pdev)
+{
+	struct shmobile_ipmmu *ipmmu;
+	struct resource *res;
+	struct shmobile_ipmmu_platform_data *pdata = pdev->dev.platform_data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "cannot get platform resources\n");
+		return -ENOENT;
+	}
+	ipmmu = devm_kzalloc(&pdev->dev, sizeof(*ipmmu), GFP_KERNEL);
+	if (!ipmmu) {
+		dev_err(&pdev->dev, "cannot allocate device data\n");
+		return -ENOMEM;
+	}
+	mutex_init(&ipmmu->flush_lock);
+	ipmmu->dev = &pdev->dev;
+	ipmmu->ipmmu_base = devm_ioremap_nocache(&pdev->dev, res->start,
+						resource_size(res));
+	if (!ipmmu->ipmmu_base) {
+		dev_err(&pdev->dev, "ioremap_nocache failed\n");
+		return -ENOMEM;
+	}
+	ipmmu->dev_names = pdata->dev_names;
+	ipmmu->num_dev_names = pdata->num_dev_names;
+	platform_set_drvdata(pdev, ipmmu);
+	ipmmu_reg_write(ipmmu, IMCTR1, 0x0); /* disable TLB */
+	ipmmu_reg_write(ipmmu, IMCTR2, 0x0); /* disable PMB */
+	ipmmu_iommu_init(ipmmu);
+	return 0;
+}
+
+static struct platform_driver ipmmu_driver = {
+	.probe = ipmmu_probe,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "ipmmu",
+	},
+};
+
+static int __init ipmmu_init(void)
+{
+	return platform_driver_register(&ipmmu_driver);
+}
+subsys_initcall(ipmmu_init);
diff --git a/drivers/iommu/shmobile-ipmmu.h b/drivers/iommu/shmobile-ipmmu.h
new file mode 100644
index 000000000000..4d53684673e1
--- /dev/null
+++ b/drivers/iommu/shmobile-ipmmu.h
@@ -0,0 +1,34 @@
+/* shmobile-ipmmu.h
+ *
+ * Copyright (C) 2012  Hideki EIRAKU
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#ifndef __SHMOBILE_IPMMU_H__
+#define __SHMOBILE_IPMMU_H__
+
+struct shmobile_ipmmu {
+	struct device *dev;
+	void __iomem *ipmmu_base;
+	int tlb_enabled;
+	struct mutex flush_lock;
+	const char * const *dev_names;
+	unsigned int num_dev_names;
+};
+
+#ifdef CONFIG_SHMOBILE_IPMMU_TLB
+void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu);
+void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size,
+		   int asid);
+int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu);
+#else
+static inline int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu)
+{
+	return -EINVAL;
+}
+#endif
+
+#endif /* __SHMOBILE_IPMMU_H__ */
diff --git a/include/linux/platform_data/sh_ipmmu.h b/include/linux/platform_data/sh_ipmmu.h
new file mode 100644
index 000000000000..39f7405cdac5
--- /dev/null
+++ b/include/linux/platform_data/sh_ipmmu.h
@@ -0,0 +1,18 @@
+/* sh_ipmmu.h
+ *
+ * Copyright (C) 2012  Hideki EIRAKU
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#ifndef __SH_IPMMU_H__
+#define __SH_IPMMU_H__
+
+struct shmobile_ipmmu_platform_data {
+	const char * const *dev_names;
+	unsigned int num_dev_names;
+};
+
+#endif /* __SH_IPMMU_H__ */
-- 
cgit v1.2.3


From c0a05bf0182efdf7cd1fd8aa327e7a602360b67e Mon Sep 17 00:00:00 2001
From: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Date: Tue, 18 Dec 2012 11:32:03 +0100
Subject: of: add 'const' to of_node_full_name parameter

As the function just returns the np->full_name or the string "<no-node>", the
passed device_node pointer is not changed in any way.

The passed parameter can therefore be a const pointer.

Also, fix the following error from checkpatch.pl:

ERROR: "foo* bar" should be "foo *bar"
+static inline const char* of_node_full_name(const struct device_node *np)

Signed-off-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 5ebcc5c8e423..1e0d0c1bfb5e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -160,7 +160,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 
 #define OF_BAD_ADDR	((u64)-1)
 
-static inline const char* of_node_full_name(struct device_node *np)
+static inline const char *of_node_full_name(const struct device_node *np)
 {
 	return np ? np->full_name : "<no-node>";
 }
-- 
cgit v1.2.3


From 5372d2d71c46e5649e5d2edd4514adcd6fe7a085 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 12 Oct 2012 23:01:58 -0700
Subject: hwmon: Driver for Maxim MAX6697 and compatibles

Add support for MAX6581, MAX6602, MAX6622, MAX6636, MAX6689, MAX6693,
MAX6694, MAX6697, MAX6698, and MAX6699 temperature sensors

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/devicetree/bindings/i2c/max6697.txt |  64 ++
 Documentation/hwmon/max6697                       |  58 ++
 drivers/hwmon/Kconfig                             |  11 +
 drivers/hwmon/Makefile                            |   1 +
 drivers/hwmon/max6697.c                           | 726 ++++++++++++++++++++++
 include/linux/platform_data/max6697.h             |  36 ++
 6 files changed, 896 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/max6697.txt
 create mode 100644 Documentation/hwmon/max6697
 create mode 100644 drivers/hwmon/max6697.c
 create mode 100644 include/linux/platform_data/max6697.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/i2c/max6697.txt b/Documentation/devicetree/bindings/i2c/max6697.txt
new file mode 100644
index 000000000000..5f793998e4a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/max6697.txt
@@ -0,0 +1,64 @@
+max6697 properties
+
+Required properties:
+- compatible:
+	Should be one of
+		maxim,max6581
+		maxim,max6602
+		maxim,max6622
+		maxim,max6636
+		maxim,max6689
+		maxim,max6693
+		maxim,max6694
+		maxim,max6697
+		maxim,max6698
+		maxim,max6699
+- reg: I2C address
+
+Optional properties:
+
+- smbus-timeout-disable
+	Set to disable SMBus timeout. If not specified, SMBus timeout will be
+	enabled.
+- extended-range-enable
+	Only valid for MAX6581. Set to enable extended temperature range.
+	Extended temperature will be disabled if not specified.
+- beta-compensation-enable
+	Only valid for MAX6693 and MX6694. Set to enable beta compensation on
+	remote temperature channel 1.
+	Beta compensation will be disabled if not specified.
+- alert-mask
+	Alert bit mask. Alert disabled for bits set.
+	Select bit 0 for local temperature, bit 1..7 for remote temperatures.
+	If not specified, alert will be enabled for all channels.
+- over-temperature-mask
+	Over-temperature bit mask. Over-temperature reporting disabled for
+	bits set.
+	Select bit 0 for local temperature, bit 1..7 for remote temperatures.
+	If not specified, over-temperature reporting will be enabled for all
+	channels.
+- resistance-cancellation
+	Boolean for all chips other than MAX6581. Set to enable resistance
+	cancellation on remote temperature channel 1.
+	For MAX6581, resistance cancellation enabled for all channels if
+	specified as boolean, otherwise as per bit mask specified.
+	Only supported for remote temperatures (bit 1..7).
+	If not specified, resistance cancellation will be disabled for all
+	channels.
+- transistor-ideality
+	For MAX6581 only. Two values; first is bit mask, second is ideality
+	select value as per MAX6581 data sheet. Select bit 1..7 for remote
+	channels.
+	Transistor ideality will be initialized to default (1.008) if not
+	specified.
+
+Example:
+
+temp-sensor@1a {
+	compatible = "maxim,max6697";
+	reg = <0x1a>;
+	smbus-timeout-disable;
+	resistance-cancellation;
+	alert-mask = <0x72>;
+	over-temperature-mask = <0x7f>;
+};
diff --git a/Documentation/hwmon/max6697 b/Documentation/hwmon/max6697
new file mode 100644
index 000000000000..6594177ededa
--- /dev/null
+++ b/Documentation/hwmon/max6697
@@ -0,0 +1,58 @@
+Kernel driver max6697
+=====================
+
+Supported chips:
+  * Maxim MAX6581
+    Prefix: 'max6581'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6581.pdf
+  * Maxim MAX6602
+    Prefix: 'max6602'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6602.pdf
+  * Maxim MAX6622
+    Prefix: 'max6622'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6622.pdf
+  * Maxim MAX6636
+    Prefix: 'max6636'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6636.pdf
+  * Maxim MAX6689
+    Prefix: 'max6689'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6689.pdf
+  * Maxim MAX6693
+    Prefix: 'max6693'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6693.pdf
+  * Maxim MAX6694
+    Prefix: 'max6694'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6694.pdf
+  * Maxim MAX6697
+    Prefix: 'max6697'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6697.pdf
+  * Maxim MAX6698
+    Prefix: 'max6698'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6698.pdf
+  * Maxim MAX6699
+    Prefix: 'max6699'
+    Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6699.pdf
+
+Author:
+    Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This driver implements support for several MAX6697 compatible temperature sensor
+chips. The chips support one local temperature sensor plus four, six, or seven
+remote temperature sensors. Remote temperature sensors are diode-connected
+thermal transitors, except for MAX6698 which supports three diode-connected
+thermal transistors plus three thermistors in addition to the local temperature
+sensor.
+
+The driver provides the following sysfs attributes. temp1 is the local (chip)
+temperature, temp[2..n] are remote temperatures. The actually supported
+per-channel attributes are chip type and channel dependent.
+
+tempX_input      RO temperature
+tempX_max        RW temperature maximum threshold
+tempX_max_alarm  RO temperature maximum threshold alarm
+tempX_crit       RW temperature critical threshold
+tempX_crit_alarm RO temperature critical threshold alarm
+tempX_fault      RO temperature diode fault (remote sensors only)
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 32f238f3caea..ef5757265f78 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -854,6 +854,17 @@ config SENSORS_MAX6650
 	  This driver can also be built as a module.  If so, the module
 	  will be called max6650.
 
+config SENSORS_MAX6697
+	tristate "Maxim MAX6697 and compatibles"
+	depends on I2C
+	help
+	  If you say yes here you get support for MAX6581, MAX6602, MAX6622,
+	  MAX6636, MAX6689, MAX6693, MAX6694, MAX6697, MAX6698, and MAX6699
+	  temperature sensor chips.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called max6697.
+
 config SENSORS_MCP3021
 	tristate "Microchip MCP3021 and compatibles"
 	depends on I2C
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 5da287443f6c..a37a82c64da2 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_SENSORS_MAX197)	+= max197.o
 obj-$(CONFIG_SENSORS_MAX6639)	+= max6639.o
 obj-$(CONFIG_SENSORS_MAX6642)	+= max6642.o
 obj-$(CONFIG_SENSORS_MAX6650)	+= max6650.o
+obj-$(CONFIG_SENSORS_MAX6697)	+= max6697.o
 obj-$(CONFIG_SENSORS_MC13783_ADC)+= mc13783-adc.o
 obj-$(CONFIG_SENSORS_MCP3021)	+= mcp3021.o
 obj-$(CONFIG_SENSORS_NTC_THERMISTOR)	+= ntc_thermistor.o
diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c
new file mode 100644
index 000000000000..bf4aa3777fc1
--- /dev/null
+++ b/drivers/hwmon/max6697.c
@@ -0,0 +1,726 @@
+/*
+ * Copyright (c) 2012 Guenter Roeck <linux@roeck-us.net>
+ *
+ * based on max1668.c
+ * Copyright (c) 2011 David George <david.george@ska.ac.za>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+
+#include <linux/platform_data/max6697.h>
+
+enum chips { max6581, max6602, max6622, max6636, max6689, max6693, max6694,
+	     max6697, max6698, max6699 };
+
+/* Report local sensor as temp1 */
+
+static const u8 MAX6697_REG_TEMP[] = {
+			0x07, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08 };
+static const u8 MAX6697_REG_TEMP_EXT[] = {
+			0x57, 0x09, 0x52, 0x53, 0x54, 0x55, 0x56, 0 };
+static const u8 MAX6697_REG_MAX[] = {
+			0x17, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x18 };
+static const u8 MAX6697_REG_CRIT[] = {
+			0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 };
+
+/*
+ * Map device tree / platform data register bit map to chip bit map.
+ * Applies to alert register and over-temperature register.
+ */
+#define MAX6697_MAP_BITS(reg)	((((reg) & 0x7e) >> 1) | \
+				 (((reg) & 0x01) << 6) | ((reg) & 0x80))
+
+#define MAX6697_REG_STAT(n)		(0x44 + (n))
+
+#define MAX6697_REG_CONFIG		0x41
+#define MAX6581_CONF_EXTENDED		(1 << 1)
+#define MAX6693_CONF_BETA		(1 << 2)
+#define MAX6697_CONF_RESISTANCE		(1 << 3)
+#define MAX6697_CONF_TIMEOUT		(1 << 5)
+#define MAX6697_REG_ALERT_MASK		0x42
+#define MAX6697_REG_OVERT_MASK		0x43
+
+#define MAX6581_REG_RESISTANCE		0x4a
+#define MAX6581_REG_IDEALITY		0x4b
+#define MAX6581_REG_IDEALITY_SELECT	0x4c
+#define MAX6581_REG_OFFSET		0x4d
+#define MAX6581_REG_OFFSET_SELECT	0x4e
+
+#define MAX6697_CONV_TIME		156	/* ms per channel, worst case */
+
+struct max6697_chip_data {
+	int channels;
+	u32 have_ext;
+	u32 have_crit;
+	u32 have_fault;
+	u8 valid_conf;
+	const u8 *alarm_map;
+};
+
+struct max6697_data {
+	struct device *hwmon_dev;
+
+	enum chips type;
+	const struct max6697_chip_data *chip;
+
+	int update_interval;	/* in milli-seconds */
+	int temp_offset;	/* in degrees C */
+
+	struct mutex update_lock;
+	unsigned long last_updated;	/* In jiffies */
+	bool valid;		/* true if following fields are valid */
+
+	/* 1x local and up to 7x remote */
+	u8 temp[8][4];		/* [nr][0]=temp [1]=ext [2]=max [3]=crit */
+#define MAX6697_TEMP_INPUT	0
+#define MAX6697_TEMP_EXT	1
+#define MAX6697_TEMP_MAX	2
+#define MAX6697_TEMP_CRIT	3
+	u32 alarms;
+};
+
+/* Diode fault status bits on MAX6581 are right shifted by one bit */
+static const u8 max6581_alarm_map[] = {
+	 0, 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15,
+	 16, 17, 18, 19, 20, 21, 22, 23 };
+
+static const struct max6697_chip_data max6697_chip_data[] = {
+	[max6581] = {
+		.channels = 8,
+		.have_crit = 0xff,
+		.have_ext = 0x7f,
+		.have_fault = 0xfe,
+		.valid_conf = MAX6581_CONF_EXTENDED | MAX6697_CONF_TIMEOUT,
+		.alarm_map = max6581_alarm_map,
+	},
+	[max6602] = {
+		.channels = 5,
+		.have_crit = 0x12,
+		.have_ext = 0x02,
+		.have_fault = 0x1e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6622] = {
+		.channels = 5,
+		.have_crit = 0x12,
+		.have_ext = 0x02,
+		.have_fault = 0x1e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6636] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x7e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6689] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x7e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6693] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x7e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6693_CONF_BETA |
+		  MAX6697_CONF_TIMEOUT,
+	},
+	[max6694] = {
+		.channels = 5,
+		.have_crit = 0x12,
+		.have_ext = 0x02,
+		.have_fault = 0x1e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6693_CONF_BETA |
+		  MAX6697_CONF_TIMEOUT,
+	},
+	[max6697] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x7e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6698] = {
+		.channels = 7,
+		.have_crit = 0x72,
+		.have_ext = 0x02,
+		.have_fault = 0x0e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+	[max6699] = {
+		.channels = 5,
+		.have_crit = 0x12,
+		.have_ext = 0x02,
+		.have_fault = 0x1e,
+		.valid_conf = MAX6697_CONF_RESISTANCE | MAX6697_CONF_TIMEOUT,
+	},
+};
+
+static struct max6697_data *max6697_update_device(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct max6697_data *data = i2c_get_clientdata(client);
+	struct max6697_data *ret = data;
+	int val;
+	int i;
+	u32 alarms;
+
+	mutex_lock(&data->update_lock);
+
+	if (data->valid &&
+	    !time_after(jiffies, data->last_updated
+			+ msecs_to_jiffies(data->update_interval)))
+		goto abort;
+
+	for (i = 0; i < data->chip->channels; i++) {
+		if (data->chip->have_ext & (1 << i)) {
+			val = i2c_smbus_read_byte_data(client,
+						       MAX6697_REG_TEMP_EXT[i]);
+			if (unlikely(val < 0)) {
+				ret = ERR_PTR(val);
+				goto abort;
+			}
+			data->temp[i][MAX6697_TEMP_EXT] = val;
+		}
+
+		val = i2c_smbus_read_byte_data(client, MAX6697_REG_TEMP[i]);
+		if (unlikely(val < 0)) {
+			ret = ERR_PTR(val);
+			goto abort;
+		}
+		data->temp[i][MAX6697_TEMP_INPUT] = val;
+
+		val = i2c_smbus_read_byte_data(client, MAX6697_REG_MAX[i]);
+		if (unlikely(val < 0)) {
+			ret = ERR_PTR(val);
+			goto abort;
+		}
+		data->temp[i][MAX6697_TEMP_MAX] = val;
+
+		if (data->chip->have_crit & (1 << i)) {
+			val = i2c_smbus_read_byte_data(client,
+						       MAX6697_REG_CRIT[i]);
+			if (unlikely(val < 0)) {
+				ret = ERR_PTR(val);
+				goto abort;
+			}
+			data->temp[i][MAX6697_TEMP_CRIT] = val;
+		}
+	}
+
+	alarms = 0;
+	for (i = 0; i < 3; i++) {
+		val = i2c_smbus_read_byte_data(client, MAX6697_REG_STAT(i));
+		if (unlikely(val < 0)) {
+			ret = ERR_PTR(val);
+			goto abort;
+		}
+		alarms = (alarms << 8) | val;
+	}
+	data->alarms = alarms;
+	data->last_updated = jiffies;
+	data->valid = true;
+abort:
+	mutex_unlock(&data->update_lock);
+
+	return ret;
+}
+
+static ssize_t show_temp_input(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	int index = to_sensor_dev_attr(devattr)->index;
+	struct max6697_data *data = max6697_update_device(dev);
+	int temp;
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	temp = (data->temp[index][MAX6697_TEMP_INPUT] - data->temp_offset) << 3;
+	temp |= data->temp[index][MAX6697_TEMP_EXT] >> 5;
+
+	return sprintf(buf, "%d\n", temp * 125);
+}
+
+static ssize_t show_temp(struct device *dev,
+			 struct device_attribute *devattr, char *buf)
+{
+	int nr = to_sensor_dev_attr_2(devattr)->nr;
+	int index = to_sensor_dev_attr_2(devattr)->index;
+	struct max6697_data *data = max6697_update_device(dev);
+	int temp;
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	temp = data->temp[nr][index];
+	temp -= data->temp_offset;
+
+	return sprintf(buf, "%d\n", temp * 1000);
+}
+
+static ssize_t show_alarm(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	int index = to_sensor_dev_attr(attr)->index;
+	struct max6697_data *data = max6697_update_device(dev);
+
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	if (data->chip->alarm_map)
+		index = data->chip->alarm_map[index];
+
+	return sprintf(buf, "%u\n", (data->alarms >> index) & 0x1);
+}
+
+static ssize_t set_temp(struct device *dev,
+			struct device_attribute *devattr,
+			const char *buf, size_t count)
+{
+	int nr = to_sensor_dev_attr_2(devattr)->nr;
+	int index = to_sensor_dev_attr_2(devattr)->index;
+	struct i2c_client *client = to_i2c_client(dev);
+	struct max6697_data *data = i2c_get_clientdata(client);
+	long temp;
+	int ret;
+
+	ret = kstrtol(buf, 10, &temp);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&data->update_lock);
+	temp = DIV_ROUND_CLOSEST(temp, 1000) + data->temp_offset;
+	temp = clamp_val(temp, 0, data->type == max6581 ? 255 : 127);
+	data->temp[nr][index] = temp;
+	ret = i2c_smbus_write_byte_data(client,
+					index == 2 ? MAX6697_REG_MAX[nr]
+						   : MAX6697_REG_CRIT[nr],
+					temp);
+	mutex_unlock(&data->update_lock);
+
+	return ret < 0 ? ret : count;
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp_input, NULL, 0);
+static SENSOR_DEVICE_ATTR_2(temp1_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    0, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp1_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    0, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, show_temp_input, NULL, 1);
+static SENSOR_DEVICE_ATTR_2(temp2_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    1, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp2_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    1, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, show_temp_input, NULL, 2);
+static SENSOR_DEVICE_ATTR_2(temp3_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    2, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp3_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    2, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, show_temp_input, NULL, 3);
+static SENSOR_DEVICE_ATTR_2(temp4_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    3, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp4_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    3, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp5_input, S_IRUGO, show_temp_input, NULL, 4);
+static SENSOR_DEVICE_ATTR_2(temp5_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    4, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp5_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    4, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp6_input, S_IRUGO, show_temp_input, NULL, 5);
+static SENSOR_DEVICE_ATTR_2(temp6_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    5, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp6_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    5, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp7_input, S_IRUGO, show_temp_input, NULL, 6);
+static SENSOR_DEVICE_ATTR_2(temp7_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    6, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp7_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    6, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp8_input, S_IRUGO, show_temp_input, NULL, 7);
+static SENSOR_DEVICE_ATTR_2(temp8_max, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    7, MAX6697_TEMP_MAX);
+static SENSOR_DEVICE_ATTR_2(temp8_crit, S_IRUGO | S_IWUSR, show_temp, set_temp,
+			    7, MAX6697_TEMP_CRIT);
+
+static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL, 22);
+static SENSOR_DEVICE_ATTR(temp2_max_alarm, S_IRUGO, show_alarm, NULL, 16);
+static SENSOR_DEVICE_ATTR(temp3_max_alarm, S_IRUGO, show_alarm, NULL, 17);
+static SENSOR_DEVICE_ATTR(temp4_max_alarm, S_IRUGO, show_alarm, NULL, 18);
+static SENSOR_DEVICE_ATTR(temp5_max_alarm, S_IRUGO, show_alarm, NULL, 19);
+static SENSOR_DEVICE_ATTR(temp6_max_alarm, S_IRUGO, show_alarm, NULL, 20);
+static SENSOR_DEVICE_ATTR(temp7_max_alarm, S_IRUGO, show_alarm, NULL, 21);
+static SENSOR_DEVICE_ATTR(temp8_max_alarm, S_IRUGO, show_alarm, NULL, 23);
+
+static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL, 14);
+static SENSOR_DEVICE_ATTR(temp2_crit_alarm, S_IRUGO, show_alarm, NULL, 8);
+static SENSOR_DEVICE_ATTR(temp3_crit_alarm, S_IRUGO, show_alarm, NULL, 9);
+static SENSOR_DEVICE_ATTR(temp4_crit_alarm, S_IRUGO, show_alarm, NULL, 10);
+static SENSOR_DEVICE_ATTR(temp5_crit_alarm, S_IRUGO, show_alarm, NULL, 11);
+static SENSOR_DEVICE_ATTR(temp6_crit_alarm, S_IRUGO, show_alarm, NULL, 12);
+static SENSOR_DEVICE_ATTR(temp7_crit_alarm, S_IRUGO, show_alarm, NULL, 13);
+static SENSOR_DEVICE_ATTR(temp8_crit_alarm, S_IRUGO, show_alarm, NULL, 15);
+
+static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_fault, S_IRUGO, show_alarm, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp4_fault, S_IRUGO, show_alarm, NULL, 3);
+static SENSOR_DEVICE_ATTR(temp5_fault, S_IRUGO, show_alarm, NULL, 4);
+static SENSOR_DEVICE_ATTR(temp6_fault, S_IRUGO, show_alarm, NULL, 5);
+static SENSOR_DEVICE_ATTR(temp7_fault, S_IRUGO, show_alarm, NULL, 6);
+static SENSOR_DEVICE_ATTR(temp8_fault, S_IRUGO, show_alarm, NULL, 7);
+
+static struct attribute *max6697_attributes[8][7] = {
+	{
+		&sensor_dev_attr_temp1_input.dev_attr.attr,
+		&sensor_dev_attr_temp1_max.dev_attr.attr,
+		&sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp1_crit.dev_attr.attr,
+		&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp2_input.dev_attr.attr,
+		&sensor_dev_attr_temp2_max.dev_attr.attr,
+		&sensor_dev_attr_temp2_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp2_crit.dev_attr.attr,
+		&sensor_dev_attr_temp2_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp2_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp3_input.dev_attr.attr,
+		&sensor_dev_attr_temp3_max.dev_attr.attr,
+		&sensor_dev_attr_temp3_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp3_crit.dev_attr.attr,
+		&sensor_dev_attr_temp3_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp3_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp4_input.dev_attr.attr,
+		&sensor_dev_attr_temp4_max.dev_attr.attr,
+		&sensor_dev_attr_temp4_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp4_crit.dev_attr.attr,
+		&sensor_dev_attr_temp4_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp4_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp5_input.dev_attr.attr,
+		&sensor_dev_attr_temp5_max.dev_attr.attr,
+		&sensor_dev_attr_temp5_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp5_crit.dev_attr.attr,
+		&sensor_dev_attr_temp5_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp5_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp6_input.dev_attr.attr,
+		&sensor_dev_attr_temp6_max.dev_attr.attr,
+		&sensor_dev_attr_temp6_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp6_crit.dev_attr.attr,
+		&sensor_dev_attr_temp6_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp6_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp7_input.dev_attr.attr,
+		&sensor_dev_attr_temp7_max.dev_attr.attr,
+		&sensor_dev_attr_temp7_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp7_crit.dev_attr.attr,
+		&sensor_dev_attr_temp7_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp7_fault.dev_attr.attr,
+		NULL
+	}, {
+		&sensor_dev_attr_temp8_input.dev_attr.attr,
+		&sensor_dev_attr_temp8_max.dev_attr.attr,
+		&sensor_dev_attr_temp8_max_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp8_crit.dev_attr.attr,
+		&sensor_dev_attr_temp8_crit_alarm.dev_attr.attr,
+		&sensor_dev_attr_temp8_fault.dev_attr.attr,
+		NULL
+	}
+};
+
+static const struct attribute_group max6697_group[8] = {
+	{ .attrs = max6697_attributes[0] },
+	{ .attrs = max6697_attributes[1] },
+	{ .attrs = max6697_attributes[2] },
+	{ .attrs = max6697_attributes[3] },
+	{ .attrs = max6697_attributes[4] },
+	{ .attrs = max6697_attributes[5] },
+	{ .attrs = max6697_attributes[6] },
+	{ .attrs = max6697_attributes[7] },
+};
+
+static void max6697_get_config_of(struct device_node *node,
+				  struct max6697_platform_data *pdata)
+{
+	int len;
+	const __be32 *prop;
+
+	prop = of_get_property(node, "smbus-timeout-disable", &len);
+	if (prop)
+		pdata->smbus_timeout_disable = true;
+	prop = of_get_property(node, "extended-range-enable", &len);
+	if (prop)
+		pdata->extended_range_enable = true;
+	prop = of_get_property(node, "beta-compensation-enable", &len);
+	if (prop)
+		pdata->beta_compensation = true;
+	prop = of_get_property(node, "alert-mask", &len);
+	if (prop && len == sizeof(u32))
+		pdata->alert_mask = be32_to_cpu(prop[0]);
+	prop = of_get_property(node, "over-temperature-mask", &len);
+	if (prop && len == sizeof(u32))
+		pdata->over_temperature_mask = be32_to_cpu(prop[0]);
+	prop = of_get_property(node, "resistance-cancellation", &len);
+	if (prop) {
+		if (len == sizeof(u32))
+			pdata->resistance_cancellation = be32_to_cpu(prop[0]);
+		else
+			pdata->resistance_cancellation = 0xfe;
+	}
+	prop = of_get_property(node, "transistor-ideality", &len);
+	if (prop && len == 2 * sizeof(u32)) {
+			pdata->ideality_mask = be32_to_cpu(prop[0]);
+			pdata->ideality_value = be32_to_cpu(prop[1]);
+	}
+}
+
+static int max6697_init_chip(struct i2c_client *client)
+{
+	struct max6697_data *data = i2c_get_clientdata(client);
+	struct max6697_platform_data *pdata = dev_get_platdata(&client->dev);
+	struct max6697_platform_data p;
+	const struct max6697_chip_data *chip = data->chip;
+	int factor = chip->channels;
+	int ret, reg;
+
+	/*
+	 * Don't touch configuration if neither platform data nor OF
+	 * configuration was specified. If that is the case, use the
+	 * current chip configuration.
+	 */
+	if (!pdata && !client->dev.of_node) {
+		reg = i2c_smbus_read_byte_data(client, MAX6697_REG_CONFIG);
+		if (reg < 0)
+			return reg;
+		if (data->type == max6581) {
+			if (reg & MAX6581_CONF_EXTENDED)
+				data->temp_offset = 64;
+			reg = i2c_smbus_read_byte_data(client,
+						       MAX6581_REG_RESISTANCE);
+			if (reg < 0)
+				return reg;
+			factor += hweight8(reg);
+		} else {
+			if (reg & MAX6697_CONF_RESISTANCE)
+				factor++;
+		}
+		goto done;
+	}
+
+	if (client->dev.of_node) {
+		memset(&p, 0, sizeof(p));
+		max6697_get_config_of(client->dev.of_node, &p);
+		pdata = &p;
+	}
+
+	reg = 0;
+	if (pdata->smbus_timeout_disable &&
+	    (chip->valid_conf & MAX6697_CONF_TIMEOUT)) {
+		reg |= MAX6697_CONF_TIMEOUT;
+	}
+	if (pdata->extended_range_enable &&
+	    (chip->valid_conf & MAX6581_CONF_EXTENDED)) {
+		reg |= MAX6581_CONF_EXTENDED;
+		data->temp_offset = 64;
+	}
+	if (pdata->resistance_cancellation &&
+	    (chip->valid_conf & MAX6697_CONF_RESISTANCE)) {
+		reg |= MAX6697_CONF_RESISTANCE;
+		factor++;
+	}
+	if (pdata->beta_compensation &&
+	    (chip->valid_conf & MAX6693_CONF_BETA)) {
+		reg |= MAX6693_CONF_BETA;
+	}
+
+	ret = i2c_smbus_write_byte_data(client, MAX6697_REG_CONFIG, reg);
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, MAX6697_REG_ALERT_MASK,
+					MAX6697_MAP_BITS(pdata->alert_mask));
+	if (ret < 0)
+		return ret;
+
+	ret = i2c_smbus_write_byte_data(client, MAX6697_REG_OVERT_MASK,
+				MAX6697_MAP_BITS(pdata->over_temperature_mask));
+	if (ret < 0)
+		return ret;
+
+	if (data->type == max6581) {
+		factor += hweight8(pdata->resistance_cancellation >> 1);
+		ret = i2c_smbus_write_byte_data(client, MAX6581_REG_RESISTANCE,
+					pdata->resistance_cancellation >> 1);
+		if (ret < 0)
+			return ret;
+		ret = i2c_smbus_write_byte_data(client, MAX6581_REG_IDEALITY,
+						pdata->ideality_mask >> 1);
+		if (ret < 0)
+			return ret;
+		ret = i2c_smbus_write_byte_data(client,
+						MAX6581_REG_IDEALITY_SELECT,
+						pdata->ideality_value);
+		if (ret < 0)
+			return ret;
+	}
+done:
+	data->update_interval = factor * MAX6697_CONV_TIME;
+	return 0;
+}
+
+static void max6697_remove_files(struct i2c_client *client)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(max6697_group); i++)
+		sysfs_remove_group(&client->dev.kobj, &max6697_group[i]);
+}
+
+static int max6697_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = client->adapter;
+	struct device *dev = &client->dev;
+	struct max6697_data *data;
+	int i, err;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -ENODEV;
+
+	data = devm_kzalloc(dev, sizeof(struct max6697_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->type = id->driver_data;
+	data->chip = &max6697_chip_data[data->type];
+
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->update_lock);
+
+	err = max6697_init_chip(client);
+	if (err)
+		return err;
+
+	for (i = 0; i < data->chip->channels; i++) {
+		err = sysfs_create_file(&dev->kobj,
+					max6697_attributes[i][0]);
+		if (err)
+			goto error;
+		err = sysfs_create_file(&dev->kobj,
+					max6697_attributes[i][1]);
+		if (err)
+			goto error;
+		err = sysfs_create_file(&dev->kobj,
+					max6697_attributes[i][2]);
+		if (err)
+			goto error;
+
+		if (data->chip->have_crit & (1 << i)) {
+			err = sysfs_create_file(&dev->kobj,
+						max6697_attributes[i][3]);
+			if (err)
+				goto error;
+			err = sysfs_create_file(&dev->kobj,
+						max6697_attributes[i][4]);
+			if (err)
+				goto error;
+		}
+		if (data->chip->have_fault & (1 << i)) {
+			err = sysfs_create_file(&dev->kobj,
+						max6697_attributes[i][5]);
+			if (err)
+				goto error;
+		}
+	}
+
+	data->hwmon_dev = hwmon_device_register(dev);
+	if (IS_ERR(data->hwmon_dev)) {
+		err = PTR_ERR(data->hwmon_dev);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	max6697_remove_files(client);
+	return err;
+}
+
+static int max6697_remove(struct i2c_client *client)
+{
+	struct max6697_data *data = i2c_get_clientdata(client);
+
+	hwmon_device_unregister(data->hwmon_dev);
+	max6697_remove_files(client);
+
+	return 0;
+}
+
+static const struct i2c_device_id max6697_id[] = {
+	{ "max6581", max6581 },
+	{ "max6602", max6602 },
+	{ "max6622", max6622 },
+	{ "max6636", max6636 },
+	{ "max6689", max6689 },
+	{ "max6693", max6693 },
+	{ "max6694", max6694 },
+	{ "max6697", max6697 },
+	{ "max6698", max6698 },
+	{ "max6699", max6699 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max6697_id);
+
+static struct i2c_driver max6697_driver = {
+	.class = I2C_CLASS_HWMON,
+	.driver = {
+		.name	= "max6697",
+	},
+	.probe = max6697_probe,
+	.remove	= max6697_remove,
+	.id_table = max6697_id,
+};
+
+module_i2c_driver(max6697_driver);
+
+MODULE_AUTHOR("Guenter Roeck <linux@roeck-us.net>");
+MODULE_DESCRIPTION("MAX6697 temperature sensor driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/max6697.h b/include/linux/platform_data/max6697.h
new file mode 100644
index 000000000000..ed9d3b3daf02
--- /dev/null
+++ b/include/linux/platform_data/max6697.h
@@ -0,0 +1,36 @@
+/*
+ * max6697.h
+ *     Copyright (c) 2012 Guenter Roeck <linux@roeck-us.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MAX6697_H
+#define MAX6697_H
+
+#include <linux/types.h>
+
+/*
+ * For all bit masks:
+ * bit 0:    local temperature
+ * bit 1..7: remote temperatures
+ */
+struct max6697_platform_data {
+	bool smbus_timeout_disable;	/* set to disable SMBus timeouts */
+	bool extended_range_enable;	/* set to enable extended temp range */
+	bool beta_compensation;		/* set to enable beta compensation */
+	u8 alert_mask;			/* set bit to 1 to disable alert */
+	u8 over_temperature_mask;	/* set bit to 1 to disable */
+	u8 resistance_cancellation;	/* set bit to 0 to disable
+					 * bit mask for MAX6581,
+					 * boolean for other chips
+					 */
+	u8 ideality_mask;		/* set bit to 0 to disable */
+	u8 ideality_value;		/* transistor ideality as per
+					 * MAX6581 datasheet
+					 */
+};
+
+#endif /* MAX6697_H */
-- 
cgit v1.2.3


From 5aa57f0a655276f62683c0cc714cd6328d98e08a Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 4 Feb 2013 20:26:00 +0000
Subject: iio: Update iio_channel_get API to use consumer device pointer as
 argument

For iio_channel_get to work with OF based configurations, it needs the
consumer device pointer instead of the consumer device name as argument.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Anton Vorontsov <anton@enomsg.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/extcon/extcon-adc-jack.c    |  3 +--
 drivers/iio/inkern.c                | 11 ++++++++++-
 drivers/power/generic-adc-battery.c |  4 ++--
 drivers/power/lp8788-charger.c      |  8 ++++----
 include/linux/iio/consumer.h        |  5 +++--
 5 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index eda2a1aa4adb..d0233cd18ffa 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -135,8 +135,7 @@ static int adc_jack_probe(struct platform_device *pdev)
 		;
 	data->num_conditions = i;
 
-	data->chan = iio_channel_get(dev_name(&pdev->dev),
-			pdata->consumer_channel);
+	data->chan = iio_channel_get(&pdev->dev, pdata->consumer_channel);
 	if (IS_ERR(data->chan)) {
 		err = PTR_ERR(data->chan);
 		goto out;
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index c42aba6817e8..b289915b8469 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -93,7 +93,8 @@ static const struct iio_chan_spec
 }
 
 
-struct iio_channel *iio_channel_get(const char *name, const char *channel_name)
+static struct iio_channel *iio_channel_get_sys(const char *name,
+					       const char *channel_name)
 {
 	struct iio_map_internal *c_i = NULL, *c = NULL;
 	struct iio_channel *channel;
@@ -144,6 +145,14 @@ error_no_mem:
 	iio_device_put(c->indio_dev);
 	return ERR_PTR(err);
 }
+
+struct iio_channel *iio_channel_get(struct device *dev,
+				    const char *channel_name)
+{
+	const char *name = dev ? dev_name(dev) : NULL;
+
+	return iio_channel_get_sys(name, channel_name);
+}
 EXPORT_SYMBOL_GPL(iio_channel_get);
 
 void iio_channel_release(struct iio_channel *channel)
diff --git a/drivers/power/generic-adc-battery.c b/drivers/power/generic-adc-battery.c
index 32ce17e235c0..42733c4dcb0e 100644
--- a/drivers/power/generic-adc-battery.c
+++ b/drivers/power/generic-adc-battery.c
@@ -287,8 +287,8 @@ static int gab_probe(struct platform_device *pdev)
 	 * based on the channel supported by consumer device.
 	 */
 	for (chan = 0; chan < ARRAY_SIZE(gab_chan_name); chan++) {
-		adc_bat->channel[chan] = iio_channel_get(dev_name(&pdev->dev),
-						gab_chan_name[chan]);
+		adc_bat->channel[chan] = iio_channel_get(&pdev->dev,
+							 gab_chan_name[chan]);
 		if (IS_ERR(adc_bat->channel[chan])) {
 			ret = PTR_ERR(adc_bat->channel[chan]);
 		} else {
diff --git a/drivers/power/lp8788-charger.c b/drivers/power/lp8788-charger.c
index 22b6407c9ca9..27889088e78e 100644
--- a/drivers/power/lp8788-charger.c
+++ b/drivers/power/lp8788-charger.c
@@ -580,7 +580,7 @@ static void lp8788_irq_unregister(struct platform_device *pdev,
 	}
 }
 
-static void lp8788_setup_adc_channel(const char *consumer_name,
+static void lp8788_setup_adc_channel(struct device *dev,
 				struct lp8788_charger *pchg)
 {
 	struct lp8788_charger_platform_data *pdata = pchg->pdata;
@@ -590,11 +590,11 @@ static void lp8788_setup_adc_channel(const char *consumer_name,
 		return;
 
 	/* ADC channel for battery voltage */
-	chan = iio_channel_get(consumer_name, pdata->adc_vbatt);
+	chan = iio_channel_get(dev, pdata->adc_vbatt);
 	pchg->chan[LP8788_VBATT] = IS_ERR(chan) ? NULL : chan;
 
 	/* ADC channel for battery temperature */
-	chan = iio_channel_get(consumer_name, pdata->adc_batt_temp);
+	chan = iio_channel_get(dev, pdata->adc_batt_temp);
 	pchg->chan[LP8788_BATT_TEMP] = IS_ERR(chan) ? NULL : chan;
 }
 
@@ -704,7 +704,7 @@ static int lp8788_charger_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	lp8788_setup_adc_channel(pdev->name, pchg);
+	lp8788_setup_adc_channel(&pdev->dev, pchg);
 
 	ret = lp8788_psy_register(pdev, pchg);
 	if (ret)
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index a85787ac66ab..833926c91aa8 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -31,14 +31,15 @@ struct iio_channel {
 
 /**
  * iio_channel_get() - get description of all that is needed to access channel.
- * @name:		Unique name of the device as provided in the iio_map
+ * @dev:		Pointer to consumer device. Device name must match
+ *			the name of the device as provided in the iio_map
  *			with which the desired provider to consumer mapping
  *			was registered.
  * @consumer_channel:	Unique name to identify the channel on the consumer
  *			side. This typically describes the channels use within
  *			the consumer. E.g. 'battery_voltage'
  */
-struct iio_channel *iio_channel_get(const char *name,
+struct iio_channel *iio_channel_get(struct device *dev,
 				    const char *consumer_channel);
 
 /**
-- 
cgit v1.2.3


From fa1dfe4abe7f3e1d28ee80753acd9a8a87dbacd7 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Wed, 6 Feb 2013 18:58:49 +0530
Subject: ARM: OMAP2: MUSB: Specify omap4 has mailbox

Added has_mailbox to the musb platform data to specify that omap uses
an external mailbox (in control module) to communicate with the musb
core during device connect and disconnect.

Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/usb-musb.c | 3 +++
 include/linux/usb/musb.h       | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index 7b33b375fe77..9d27e3f8a09c 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -85,6 +85,9 @@ void __init usb_musb_init(struct omap_musb_board_data *musb_board_data)
 	musb_plat.mode = board_data->mode;
 	musb_plat.extvbus = board_data->extvbus;
 
+	if (cpu_is_omap44xx())
+		musb_plat.has_mailbox = true;
+
 	if (soc_is_am35xx()) {
 		oh_name = "am35x_otg_hs";
 		name = "musb-am35x";
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index eb505250940a..053c26841cc3 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -99,6 +99,8 @@ struct musb_hdrc_platform_data {
 	/* MUSB_HOST, MUSB_PERIPHERAL, or MUSB_OTG */
 	u8		mode;
 
+	u8		has_mailbox:1;
+
 	/* for clk_get() */
 	const char	*clock;
 
-- 
cgit v1.2.3


From c8ce60bbed39a8d884837ed6e53dc32f7f86e40f Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 6 Feb 2013 18:58:51 +0530
Subject: usb: omap_control_usb: fix compile warning

When CONFIG_OMAP_CONTROL_USB isn't enabled,
there's a compile warning stating that a
particular function isn't a prototype.

Fix it.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/omap_control_usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/omap_control_usb.h b/include/linux/usb/omap_control_usb.h
index f306db7149ca..27b5b8c931b0 100644
--- a/include/linux/usb/omap_control_usb.h
+++ b/include/linux/usb/omap_control_usb.h
@@ -70,7 +70,7 @@ extern void omap_control_usb3_phy_power(struct device *dev, bool on);
 extern void omap_control_usb_set_mode(struct device *dev,
 	enum omap_control_usb_mode mode);
 #else
-static inline struct device *omap_get_control_dev()
+static inline struct device *omap_get_control_dev(void)
 {
 	return ERR_PTR(-ENODEV);
 }
-- 
cgit v1.2.3


From 9f3b795a626ee79574595e06d1437fe0c7d51d29 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Fri, 1 Feb 2013 20:40:17 +0100
Subject: driver-core: constify data for class_find_device()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All in-kernel users of class_find_device() don't really need mutable
data for match callback.

In two places (kernel/power/suspend_test.c, drivers/scsi/osd/osd_uld.c)
this patch changes match callbacks to use const search data.

The const is propagated to rtc_class_open() and power_supply_get_by_name()
parameters.

Note that there's a dev reference leak in suspend_test.c that's not
touched in this patch.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/class.c                |  4 ++--
 drivers/base/core.c                 |  4 ++--
 drivers/gpio/gpiolib.c              |  2 +-
 drivers/isdn/mISDN/core.c           |  4 ++--
 drivers/net/phy/mdio_bus.c          |  2 +-
 drivers/power/power_supply_core.c   |  4 ++--
 drivers/rtc/interface.c             |  6 +++---
 drivers/scsi/hosts.c                |  4 ++--
 drivers/scsi/osd/osd_uld.c          | 26 +++++++++-----------------
 drivers/scsi/scsi_transport_iscsi.c |  4 ++--
 drivers/spi/spi.c                   |  4 ++--
 drivers/uwb/lc-rc.c                 | 21 ++++++++++-----------
 include/linux/device.h              |  4 ++--
 include/linux/power_supply.h        |  2 +-
 include/linux/rtc.h                 |  2 +-
 init/do_mounts.c                    |  4 ++--
 kernel/power/suspend_test.c         | 11 +++++------
 net/ieee802154/wpan-class.c         |  5 ++---
 net/nfc/core.c                      |  4 ++--
 19 files changed, 53 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 03243d4002fd..3ce845471327 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -420,8 +420,8 @@ EXPORT_SYMBOL_GPL(class_for_each_device);
  * code.  There's no locking restriction.
  */
 struct device *class_find_device(struct class *class, struct device *start,
-				 void *data,
-				 int (*match)(struct device *, void *))
+				 const void *data,
+				 int (*match)(struct device *, const void *))
 {
 	struct class_dev_iter iter;
 	struct device *dev;
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 27603a6c0a93..56536f4b0f6b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1617,9 +1617,9 @@ struct device *device_create(struct class *class, struct device *parent,
 }
 EXPORT_SYMBOL_GPL(device_create);
 
-static int __match_devt(struct device *dev, void *data)
+static int __match_devt(struct device *dev, const void *data)
 {
-	dev_t *devt = data;
+	const dev_t *devt = data;
 
 	return dev->devt == *devt;
 }
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 199fca15f270..5359ca78130f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -806,7 +806,7 @@ fail_unlock:
 }
 EXPORT_SYMBOL_GPL(gpio_export);
 
-static int match_export(struct device *dev, void *data)
+static int match_export(struct device *dev, const void *data)
 {
 	return dev_get_drvdata(dev) == data;
 }
diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c
index 3e245712bba7..da30c5cb9609 100644
--- a/drivers/isdn/mISDN/core.c
+++ b/drivers/isdn/mISDN/core.c
@@ -168,13 +168,13 @@ static struct class mISDN_class = {
 };
 
 static int
-_get_mdevice(struct device *dev, void *id)
+_get_mdevice(struct device *dev, const void *id)
 {
 	struct mISDNdevice *mdev = dev_to_mISDN(dev);
 
 	if (!mdev)
 		return 0;
-	if (mdev->id != *(u_int *)id)
+	if (mdev->id != *(const u_int *)id)
 		return 0;
 	return 1;
 }
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 044b5326459f..dc920974204e 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -95,7 +95,7 @@ static struct class mdio_bus_class = {
 
 #if IS_ENABLED(CONFIG_OF_MDIO)
 /* Helper function for of_mdio_find_bus */
-static int of_mdio_bus_match(struct device *dev, void *mdio_bus_np)
+static int of_mdio_bus_match(struct device *dev, const void *mdio_bus_np)
 {
 	return dev->of_node == mdio_bus_np;
 }
diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 8a7cfb3cc166..5deac432e2ae 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -141,7 +141,7 @@ int power_supply_set_battery_charged(struct power_supply *psy)
 }
 EXPORT_SYMBOL_GPL(power_supply_set_battery_charged);
 
-static int power_supply_match_device_by_name(struct device *dev, void *data)
+static int power_supply_match_device_by_name(struct device *dev, const void *data)
 {
 	const char *name = data;
 	struct power_supply *psy = dev_get_drvdata(dev);
@@ -149,7 +149,7 @@ static int power_supply_match_device_by_name(struct device *dev, void *data)
 	return strcmp(psy->name, name) == 0;
 }
 
-struct power_supply *power_supply_get_by_name(char *name)
+struct power_supply *power_supply_get_by_name(const char *name)
 {
 	struct device *dev = class_find_device(power_supply_class, NULL, name,
 					power_supply_match_device_by_name);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 9592b936b71b..42bd57da239d 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -587,16 +587,16 @@ void rtc_update_irq(struct rtc_device *rtc,
 }
 EXPORT_SYMBOL_GPL(rtc_update_irq);
 
-static int __rtc_match(struct device *dev, void *data)
+static int __rtc_match(struct device *dev, const void *data)
 {
-	char *name = (char *)data;
+	const char *name = data;
 
 	if (strcmp(dev_name(dev), name) == 0)
 		return 1;
 	return 0;
 }
 
-struct rtc_device *rtc_class_open(char *name)
+struct rtc_device *rtc_class_open(const char *name)
 {
 	struct device *dev;
 	struct rtc_device *rtc = NULL;
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 593085a52275..df0c3c71ea43 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -468,10 +468,10 @@ void scsi_unregister(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL(scsi_unregister);
 
-static int __scsi_host_match(struct device *dev, void *data)
+static int __scsi_host_match(struct device *dev, const void *data)
 {
 	struct Scsi_Host *p;
-	unsigned short *hostnum = (unsigned short *)data;
+	const unsigned short *hostnum = data;
 
 	p = class_to_shost(dev);
 	return p->host_no == *hostnum;
diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index 43754176a7b7..0fab6b5c7b82 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -268,18 +268,11 @@ static inline bool _the_same_or_null(const u8 *a1, unsigned a1_len,
 	return 0 == memcmp(a1, a2, a1_len);
 }
 
-struct find_oud_t {
-	const struct osd_dev_info *odi;
-	struct device *dev;
-	struct osd_uld_device *oud;
-} ;
-
-int _mach_odi(struct device *dev, void *find_data)
+static int _match_odi(struct device *dev, const void *find_data)
 {
 	struct osd_uld_device *oud = container_of(dev, struct osd_uld_device,
 						  class_dev);
-	struct find_oud_t *fot = find_data;
-	const struct osd_dev_info *odi = fot->odi;
+	const struct osd_dev_info *odi = find_data;
 
 	if (_the_same_or_null(oud->odi.systemid, oud->odi.systemid_len,
 			      odi->systemid, odi->systemid_len) &&
@@ -287,7 +280,6 @@ int _mach_odi(struct device *dev, void *find_data)
 			      odi->osdname, odi->osdname_len)) {
 		OSD_DEBUG("found device sysid_len=%d osdname=%d\n",
 			  odi->systemid_len, odi->osdname_len);
-		fot->oud = oud;
 		return 1;
 	} else {
 		return 0;
@@ -301,19 +293,19 @@ int _mach_odi(struct device *dev, void *find_data)
  */
 struct osd_dev *osduld_info_lookup(const struct osd_dev_info *odi)
 {
-	struct find_oud_t find = {.odi = odi};
-
-	find.dev = class_find_device(&osd_uld_class, NULL, &find, _mach_odi);
-	if (likely(find.dev)) {
+	struct device *dev = class_find_device(&osd_uld_class, NULL, odi, _match_odi);
+	if (likely(dev)) {
 		struct osd_dev_handle *odh = kzalloc(sizeof(*odh), GFP_KERNEL);
+		struct osd_uld_device *oud = container_of(dev,
+			struct osd_uld_device, class_dev);
 
 		if (unlikely(!odh)) {
-			put_device(find.dev);
+			put_device(dev);
 			return ERR_PTR(-ENOMEM);
 		}
 
-		odh->od = find.oud->od;
-		odh->oud = find.oud;
+		odh->od = oud->od;
+		odh->oud = oud;
 
 		return &odh->od;
 	}
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 31969f2e13ce..59d427bf08e2 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -183,10 +183,10 @@ static struct attribute_group iscsi_endpoint_group = {
 
 #define ISCSI_MAX_EPID -1
 
-static int iscsi_match_epid(struct device *dev, void *data)
+static int iscsi_match_epid(struct device *dev, const void *data)
 {
 	struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
-	uint64_t *epid = (uint64_t *) data;
+	const uint64_t *epid = data;
 
 	return *epid == ep->id;
 }
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 19ee901577da..493ce4a71717 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1248,10 +1248,10 @@ int spi_master_resume(struct spi_master *master)
 }
 EXPORT_SYMBOL_GPL(spi_master_resume);
 
-static int __spi_master_match(struct device *dev, void *data)
+static int __spi_master_match(struct device *dev, const void *data)
 {
 	struct spi_master *m;
-	u16 *bus_num = data;
+	const u16 *bus_num = data;
 
 	m = container_of(dev, struct spi_master, dev);
 	return m->bus_num == *bus_num;
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
index 4d688c750801..3eca6ceb9844 100644
--- a/drivers/uwb/lc-rc.c
+++ b/drivers/uwb/lc-rc.c
@@ -40,9 +40,9 @@
 
 #include "uwb-internal.h"
 
-static int uwb_rc_index_match(struct device *dev, void *data)
+static int uwb_rc_index_match(struct device *dev, const void *data)
 {
-	int *index = data;
+	const int *index = data;
 	struct uwb_rc *rc = dev_get_drvdata(dev);
 
 	if (rc->index == *index)
@@ -334,9 +334,9 @@ void uwb_rc_rm(struct uwb_rc *rc)
 }
 EXPORT_SYMBOL_GPL(uwb_rc_rm);
 
-static int find_rc_try_get(struct device *dev, void *data)
+static int find_rc_try_get(struct device *dev, const void *data)
 {
-	struct uwb_rc *target_rc = data;
+	const struct uwb_rc *target_rc = data;
 	struct uwb_rc *rc = dev_get_drvdata(dev);
 
 	if (rc == NULL) {
@@ -386,9 +386,9 @@ static inline struct uwb_rc *uwb_rc_get(struct uwb_rc *rc)
 	return rc;
 }
 
-static int find_rc_grandpa(struct device *dev, void *data)
+static int find_rc_grandpa(struct device *dev, const void *data)
 {
-	struct device *grandpa_dev = data;
+	const struct device *grandpa_dev = data;
 	struct uwb_rc *rc = dev_get_drvdata(dev);
 
 	if (rc->uwb_dev.dev.parent->parent == grandpa_dev) {
@@ -419,7 +419,7 @@ struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *grandpa_dev)
 	struct device *dev;
 	struct uwb_rc *rc = NULL;
 
-	dev = class_find_device(&uwb_rc_class, NULL, (void *)grandpa_dev,
+	dev = class_find_device(&uwb_rc_class, NULL, grandpa_dev,
 				find_rc_grandpa);
 	if (dev)
 		rc = dev_get_drvdata(dev);
@@ -432,9 +432,9 @@ EXPORT_SYMBOL_GPL(uwb_rc_get_by_grandpa);
  *
  * @returns the pointer to the radio controller, properly referenced
  */
-static int find_rc_dev(struct device *dev, void *data)
+static int find_rc_dev(struct device *dev, const void *data)
 {
-	struct uwb_dev_addr *addr = data;
+	const struct uwb_dev_addr *addr = data;
 	struct uwb_rc *rc = dev_get_drvdata(dev);
 
 	if (rc == NULL) {
@@ -453,8 +453,7 @@ struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *addr)
 	struct device *dev;
 	struct uwb_rc *rc = NULL;
 
-	dev = class_find_device(&uwb_rc_class, NULL, (void *)addr,
-				find_rc_dev);
+	dev = class_find_device(&uwb_rc_class, NULL, addr, find_rc_dev);
 	if (dev)
 		rc = dev_get_drvdata(dev);
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 251f33b21ef9..a089676084a5 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -395,8 +395,8 @@ extern int class_for_each_device(struct class *class, struct device *start,
 				 void *data,
 				 int (*fn)(struct device *dev, void *data));
 extern struct device *class_find_device(struct class *class,
-					struct device *start, void *data,
-					int (*match)(struct device *, void *));
+					struct device *start, const void *data,
+					int (*match)(struct device *, const void *));
 
 struct class_attribute {
 	struct attribute attr;
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 1f0ab90aff00..86ecaa679ded 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -224,7 +224,7 @@ struct power_supply_info {
 	int use_for_apm;
 };
 
-extern struct power_supply *power_supply_get_by_name(char *name);
+extern struct power_supply *power_supply_get_by_name(const char *name);
 extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
 extern int power_supply_set_battery_charged(struct power_supply *psy);
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 9531845c419f..445fe6e7c629 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -148,7 +148,7 @@ extern int rtc_initialize_alarm(struct rtc_device *rtc,
 extern void rtc_update_irq(struct rtc_device *rtc,
 			unsigned long num, unsigned long events);
 
-extern struct rtc_device *rtc_class_open(char *name);
+extern struct rtc_device *rtc_class_open(const char *name);
 extern void rtc_class_close(struct rtc_device *rtc);
 
 extern int rtc_irq_register(struct rtc_device *rtc,
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 1d1b6348f903..a2b49f2c1bd8 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -81,9 +81,9 @@ struct uuidcmp {
  *
  * Returns 1 if the device matches, and 0 otherwise.
  */
-static int match_dev_by_uuid(struct device *dev, void *data)
+static int match_dev_by_uuid(struct device *dev, const void *data)
 {
-	struct uuidcmp *cmp = data;
+	const struct uuidcmp *cmp = data;
 	struct hd_struct *part = dev_to_part(dev);
 
 	if (!part->info)
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
index 25596e450ac7..9b2a1d58558d 100644
--- a/kernel/power/suspend_test.c
+++ b/kernel/power/suspend_test.c
@@ -112,7 +112,7 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
 	rtc_set_alarm(rtc, &alm);
 }
 
-static int __init has_wakealarm(struct device *dev, void *name_ptr)
+static int __init has_wakealarm(struct device *dev, const void *data)
 {
 	struct rtc_device *candidate = to_rtc_device(dev);
 
@@ -121,7 +121,6 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr)
 	if (!device_may_wakeup(candidate->dev.parent))
 		return 0;
 
-	*(const char **)name_ptr = dev_name(dev);
 	return 1;
 }
 
@@ -159,8 +158,8 @@ static int __init test_suspend(void)
 	static char		warn_no_rtc[] __initdata =
 		KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
 
-	char			*pony = NULL;
 	struct rtc_device	*rtc = NULL;
+	struct device		*dev;
 
 	/* PM is initialized by now; is that state testable? */
 	if (test_state == PM_SUSPEND_ON)
@@ -171,9 +170,9 @@ static int __init test_suspend(void)
 	}
 
 	/* RTCs have initialized by now too ... can we use one? */
-	class_find_device(rtc_class, NULL, &pony, has_wakealarm);
-	if (pony)
-		rtc = rtc_class_open(pony);
+	dev = class_find_device(rtc_class, NULL, NULL, has_wakealarm);
+	if (dev)
+		rtc = rtc_class_open(dev_name(dev));
 	if (!rtc) {
 		printk(warn_no_rtc);
 		goto done;
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 1627ef2e8522..13571eae6bae 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -91,7 +91,7 @@ static struct class wpan_phy_class = {
 static DEFINE_MUTEX(wpan_phy_mutex);
 static int wpan_phy_idx;
 
-static int wpan_phy_match(struct device *dev, void *data)
+static int wpan_phy_match(struct device *dev, const void *data)
 {
 	return !strcmp(dev_name(dev), (const char *)data);
 }
@@ -103,8 +103,7 @@ struct wpan_phy *wpan_phy_find(const char *str)
 	if (WARN_ON(!str))
 		return NULL;
 
-	dev = class_find_device(&wpan_phy_class, NULL,
-			(void *)str, wpan_phy_match);
+	dev = class_find_device(&wpan_phy_class, NULL, str, wpan_phy_match);
 	if (!dev)
 		return NULL;
 
diff --git a/net/nfc/core.c b/net/nfc/core.c
index aa64ea441676..0f4a6de6f161 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -734,10 +734,10 @@ struct class nfc_class = {
 };
 EXPORT_SYMBOL(nfc_class);
 
-static int match_idx(struct device *d, void *data)
+static int match_idx(struct device *d, const void *data)
 {
 	struct nfc_dev *dev = to_nfc_dev(d);
-	unsigned int *idx = data;
+	const unsigned int *idx = data;
 
 	return dev->idx == *idx;
 }
-- 
cgit v1.2.3


From ca99ca14c95ae49fb4c9cd3abf5f84d11a7e8a61 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 5 Feb 2013 08:05:43 +0000
Subject: netpoll: protect napi_poll and poll_controller during
 dev_[open|close]

Ivan Vercera was recently backporting commit
9c13cb8bb477a83b9a3c9e5a5478a4e21294a760 to a RHEL kernel, and I noticed that,
while this patch protects the tg3 driver from having its ndo_poll_controller
routine called during device initalization, it does nothing for the driver
during shutdown. I.e. it would be entirely possible to have the
ndo_poll_controller method (or subsequently the ndo_poll) routine called for a
driver in the netpoll path on CPU A while in parallel on CPU B, the ndo_close or
ndo_open routine could be called.  Given that the two latter routines tend to
initizlize and free many data structures that the former two rely on, the result
can easily be data corruption or various other crashes.  Furthermore, it seems
that this is potentially a problem with all net drivers that support netpoll,
and so this should ideally be fixed in a common path.

As Ben H Pointed out to me, we can't preform dev_open/dev_close in atomic
context, so I've come up with this solution.  We can use a mutex to sleep in
open/close paths and just do a mutex_trylock in the napi poll path and abandon
the poll attempt if we're locked, as we'll just retry the poll on the next send
anyway.

I've tested this here by flooding netconsole with messages on a system whos nic
driver I modfied to periodically return NETDEV_TX_BUSY, so that the netpoll tx
workqueue would be forced to send frames and poll the device.  While this was
going on I rapidly ifdown/up'ed the interface and watched for any problems.
I've not found any.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Ivan Vecera <ivecera@redhat.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: Ben Hutchings <bhutchings@solarflare.com>
CC: Francois Romieu <romieu@fr.zoreil.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 11 ++++++++++-
 net/core/dev.c          | 27 ++++++++++++++++++++++++++-
 net/core/netpoll.c      | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index f54c3bb6a22b..ab856d507b7e 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -38,8 +38,9 @@ struct netpoll {
 struct netpoll_info {
 	atomic_t refcnt;
 
-	int rx_flags;
+	unsigned long rx_flags;
 	spinlock_t rx_lock;
+	struct mutex dev_lock;
 	struct list_head rx_np; /* netpolls that registered an rx_hook */
 
 	struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */
@@ -51,6 +52,14 @@ struct netpoll_info {
 	struct rcu_head rcu;
 };
 
+#ifdef CONFIG_NETPOLL
+extern int netpoll_rx_disable(struct net_device *dev);
+extern void netpoll_rx_enable(struct net_device *dev);
+#else
+static inline int netpoll_rx_disable(struct net_device *dev) { return 0; }
+static inline void netpoll_rx_enable(struct net_device *dev) { return; }
+#endif
+
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
 int netpoll_parse_options(struct netpoll *np, char *opt);
diff --git a/net/core/dev.c b/net/core/dev.c
index e04bfdc9e3e4..2b275a7b8677 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1266,6 +1266,14 @@ static int __dev_open(struct net_device *dev)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
+	/* Block netpoll from trying to do any rx path servicing.
+	 * If we don't do this there is a chance ndo_poll_controller
+	 * or ndo_poll may be running while we open the device
+	 */
+	ret = netpoll_rx_disable(dev);
+	if (ret)
+		return ret;
+
 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -1279,6 +1287,8 @@ static int __dev_open(struct net_device *dev)
 	if (!ret && ops->ndo_open)
 		ret = ops->ndo_open(dev);
 
+	netpoll_rx_enable(dev);
+
 	if (ret)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
@@ -1370,9 +1380,16 @@ static int __dev_close(struct net_device *dev)
 	int retval;
 	LIST_HEAD(single);
 
+	/* Temporarily disable netpoll until the interface is down */
+	retval = netpoll_rx_disable(dev);
+	if (retval)
+		return retval;
+
 	list_add(&dev->unreg_list, &single);
 	retval = __dev_close_many(&single);
 	list_del(&single);
+
+	netpoll_rx_enable(dev);
 	return retval;
 }
 
@@ -1408,14 +1425,22 @@ static int dev_close_many(struct list_head *head)
  */
 int dev_close(struct net_device *dev)
 {
+	int ret = 0;
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
 
+		/* Block netpoll rx while the interface is going down */
+		ret = netpoll_rx_disable(dev);
+		if (ret)
+			return ret;
+
 		list_add(&dev->unreg_list, &single);
 		dev_close_many(&single);
 		list_del(&single);
+
+		netpoll_rx_enable(dev);
 	}
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(dev_close);
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 331ccb90f915..edcd9ad95304 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -47,6 +47,8 @@ static struct sk_buff_head skb_pool;
 
 static atomic_t trapped;
 
+static struct srcu_struct netpoll_srcu;
+
 #define USEC_PER_POLL	50
 #define NETPOLL_RX_ENABLED  1
 #define NETPOLL_RX_DROP     2
@@ -199,6 +201,13 @@ static void netpoll_poll_dev(struct net_device *dev)
 	const struct net_device_ops *ops;
 	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 
+	/* Don't do any rx activity if the dev_lock mutex is held
+	 * the dev_open/close paths use this to block netpoll activity
+	 * while changing device state
+	 */
+	if (!mutex_trylock(&dev->npinfo->dev_lock))
+		return;
+
 	if (!dev || !netif_running(dev))
 		return;
 
@@ -211,6 +220,8 @@ static void netpoll_poll_dev(struct net_device *dev)
 
 	poll_napi(dev);
 
+	mutex_unlock(&dev->npinfo->dev_lock);
+
 	if (dev->flags & IFF_SLAVE) {
 		if (ni) {
 			struct net_device *bond_dev;
@@ -231,6 +242,31 @@ static void netpoll_poll_dev(struct net_device *dev)
 	zap_completion_queue();
 }
 
+int netpoll_rx_disable(struct net_device *dev)
+{
+	struct netpoll_info *ni;
+	int idx;
+	might_sleep();
+	idx = srcu_read_lock(&netpoll_srcu);
+	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
+	if (ni)
+		mutex_lock(&ni->dev_lock);
+	srcu_read_unlock(&netpoll_srcu, idx);
+	return 0;
+}
+EXPORT_SYMBOL(netpoll_rx_disable);
+
+void netpoll_rx_enable(struct net_device *dev)
+{
+	struct netpoll_info *ni;
+	rcu_read_lock();
+	ni = rcu_dereference(dev->npinfo);
+	if (ni)
+		mutex_unlock(&ni->dev_lock);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(netpoll_rx_enable);
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -1004,6 +1040,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
+		mutex_init(&npinfo->dev_lock);
 		skb_queue_head_init(&npinfo->neigh_tx);
 		skb_queue_head_init(&npinfo->txq);
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
@@ -1169,6 +1206,7 @@ EXPORT_SYMBOL(netpoll_setup);
 static int __init netpoll_init(void)
 {
 	skb_queue_head_init(&skb_pool);
+	init_srcu_struct(&netpoll_srcu);
 	return 0;
 }
 core_initcall(netpoll_init);
@@ -1208,6 +1246,8 @@ void __netpoll_cleanup(struct netpoll *np)
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
+	synchronize_srcu(&netpoll_srcu);
+
 	if (atomic_dec_and_test(&npinfo->refcnt)) {
 		const struct net_device_ops *ops;
 
-- 
cgit v1.2.3


From 3b72c2fe0c6bbec42ed7f899931daef227b80322 Mon Sep 17 00:00:00 2001
From: Mugunthan V N <mugunthanvnm@ti.com>
Date: Tue, 5 Feb 2013 08:26:48 +0000
Subject: drivers: net:ethernet: cpsw: add support for VLAN

adding support for VLAN interface for cpsw.

CPSW VLAN Capability
* Can filter VLAN packets in Hardware

Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c     | 106 ++++++++++++++++++++++++++++++++++++-
 drivers/net/ethernet/ti/cpsw_ale.h |   4 ++
 include/linux/platform_data/cpsw.h |   1 +
 3 files changed, 109 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 534bf7bc34db..888708ceb13c 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -32,6 +32,7 @@
 #include <linux/of.h>
 #include <linux/of_net.h>
 #include <linux/of_device.h>
+#include <linux/if_vlan.h>
 
 #include <linux/platform_data/cpsw.h>
 
@@ -118,6 +119,9 @@ do {								\
 #define TX_PRIORITY_MAPPING	0x33221100
 #define CPDMA_TX_PRIORITY_MAP	0x76543210
 
+#define CPSW_VLAN_AWARE		BIT(1)
+#define CPSW_ALE_VLAN_AWARE	1
+
 #define cpsw_enable_irq(priv)	\
 	do {			\
 		u32 i;		\
@@ -607,14 +611,40 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 	}
 }
 
+static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
+{
+	const int vlan = priv->data.default_vlan;
+	const int port = priv->host_port;
+	u32 reg;
+	int i;
+
+	reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
+	       CPSW2_PORT_VLAN;
+
+	writel(vlan, &priv->host_port_regs->port_vlan);
+
+	for (i = 0; i < 2; i++)
+		slave_write(priv->slaves + i, vlan, reg);
+
+	cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS << port,
+			  ALE_ALL_PORTS << port, ALE_ALL_PORTS << port,
+			  (ALE_PORT_1 | ALE_PORT_2) << port);
+}
+
 static void cpsw_init_host_port(struct cpsw_priv *priv)
 {
+	u32 control_reg;
+
 	/* soft reset the controller and initialize ale */
 	soft_reset("cpsw", &priv->regs->soft_reset);
 	cpsw_ale_start(priv->ale);
 
 	/* switch to vlan unaware mode */
-	cpsw_ale_control_set(priv->ale, 0, ALE_VLAN_AWARE, 0);
+	cpsw_ale_control_set(priv->ale, priv->host_port, ALE_VLAN_AWARE,
+			     CPSW_ALE_VLAN_AWARE);
+	control_reg = readl(&priv->regs->control);
+	control_reg |= CPSW_VLAN_AWARE;
+	writel(control_reg, &priv->regs->control);
 
 	/* setup host port priority mapping */
 	__raw_writel(CPDMA_TX_PRIORITY_MAP,
@@ -650,6 +680,9 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	cpsw_init_host_port(priv);
 	for_each_slave(priv, cpsw_slave_open, priv);
 
+	/* Add default VLAN */
+	cpsw_add_default_vlan(priv);
+
 	/* setup tx dma to fixed prio and zero offset */
 	cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
 	cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
@@ -933,6 +966,73 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev)
 }
 #endif
 
+static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv,
+				unsigned short vid)
+{
+	int ret;
+
+	ret = cpsw_ale_add_vlan(priv->ale, vid,
+				ALE_ALL_PORTS << priv->host_port,
+				0, ALE_ALL_PORTS << priv->host_port,
+				(ALE_PORT_1 | ALE_PORT_2) << priv->host_port);
+	if (ret != 0)
+		return ret;
+
+	ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
+				 priv->host_port, ALE_VLAN, vid);
+	if (ret != 0)
+		goto clean_vid;
+
+	ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+				 ALE_ALL_PORTS << priv->host_port,
+				 ALE_VLAN, vid, 0);
+	if (ret != 0)
+		goto clean_vlan_ucast;
+	return 0;
+
+clean_vlan_ucast:
+	cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+			    priv->host_port, ALE_VLAN, vid);
+clean_vid:
+	cpsw_ale_del_vlan(priv->ale, vid, 0);
+	return ret;
+}
+
+static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev,
+		unsigned short vid)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+
+	if (vid == priv->data.default_vlan)
+		return 0;
+
+	dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid);
+	return cpsw_add_vlan_ale_entry(priv, vid);
+}
+
+static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
+		unsigned short vid)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	if (vid == priv->data.default_vlan)
+		return 0;
+
+	dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid);
+	ret = cpsw_ale_del_vlan(priv->ale, vid, 0);
+	if (ret != 0)
+		return ret;
+
+	ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+				 priv->host_port, ALE_VLAN, vid);
+	if (ret != 0)
+		return ret;
+
+	return cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast,
+				  0, ALE_VLAN, vid);
+}
+
 static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_open		= cpsw_ndo_open,
 	.ndo_stop		= cpsw_ndo_stop,
@@ -947,6 +1047,8 @@ static const struct net_device_ops cpsw_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= cpsw_ndo_poll_controller,
 #endif
+	.ndo_vlan_rx_add_vid	= cpsw_ndo_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= cpsw_ndo_vlan_rx_kill_vid,
 };
 
 static void cpsw_get_drvinfo(struct net_device *ndev,
@@ -1354,7 +1456,7 @@ static int cpsw_probe(struct platform_device *pdev)
 		k++;
 	}
 
-	ndev->flags |= IFF_ALLMULTI;	/* see cpsw_ndo_change_rx_flags() */
+	ndev->features |= NETIF_F_HW_VLAN_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h
index a002417f952b..30daa1265f0c 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.h
+++ b/drivers/net/ethernet/ti/cpsw_ale.h
@@ -69,6 +69,10 @@ enum cpsw_ale_port_state {
 #define ALE_SUPER			BIT(2)
 #define ALE_VLAN			BIT(3)
 
+#define ALE_PORT_HOST			BIT(0)
+#define ALE_PORT_1			BIT(1)
+#define ALE_PORT_2			BIT(2)
+
 #define ALE_MCAST_FWD			0
 #define ALE_MCAST_BLOCK_LEARN_FWD	1
 #define ALE_MCAST_FWD_LEARN		2
diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
index 24368a2e8b87..e962cfd552e3 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/include/linux/platform_data/cpsw.h
@@ -35,6 +35,7 @@ struct cpsw_platform_data {
 	u32	bd_ram_size;  /*buffer descriptor ram size */
 	u32	rx_descs;	/* Number of Rx Descriptios */
 	u32	mac_control;	/* Mac control register */
+	u16	default_vlan;	/* Def VLAN for ALE lookup in VLAN aware mode*/
 };
 
 #endif /* __CPSW_H__ */
-- 
cgit v1.2.3


From e185483e6b84c127d0b1c890b6b703701ae52d35 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Tue, 5 Feb 2013 09:30:55 +0000
Subject: team: allow userspace to take control over carrier

Some modes don't require any special carrier handling so
in these cases, the kernel can control the carrier as for
any other interface.  However, some other modes, e.g. lacp,
requires more than just that, so userspace needs to control
the carrier itself.

The daemon today is ready to control it, but the kernel
still can change it based on events.

This fix so that either kernel or userspace is controlling
the carrier.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 8 ++++++++
 include/linux/if_team.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 694ccf6d71a3..05c5efe84591 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -508,6 +508,7 @@ static bool team_is_mode_set(struct team *team)
 
 static void team_set_no_mode(struct team *team)
 {
+	team->user_carrier_enabled = false;
 	team->mode = &__team_no_mode;
 }
 
@@ -1710,6 +1711,10 @@ static netdev_features_t team_fix_features(struct net_device *dev,
 
 static int team_change_carrier(struct net_device *dev, bool new_carrier)
 {
+	struct team *team = netdev_priv(dev);
+
+	team->user_carrier_enabled = true;
+
 	if (new_carrier)
 		netif_carrier_on(dev);
 	else
@@ -2573,6 +2578,9 @@ static void __team_carrier_check(struct team *team)
 	struct team_port *port;
 	bool team_linkup;
 
+	if (team->user_carrier_enabled)
+		return;
+
 	team_linkup = false;
 	list_for_each_entry(port, &team->port_list, list) {
 		if (port->linkup) {
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 0245def2aa93..4648d8021244 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -186,6 +186,7 @@ struct team {
 
 	const struct team_mode *mode;
 	struct team_mode_ops ops;
+	bool user_carrier_enabled;
 	bool queue_override_enabled;
 	struct list_head *qom_lists; /* array of queue override mapping lists */
 	long mode_priv[TEAM_MODE_PRIV_LONGS];
-- 
cgit v1.2.3


From 12b0004d1d1e2a9aa667412d479041e403bcafae Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Tue, 5 Feb 2013 16:36:38 +0000
Subject: net: adjust skb_gso_segment() for calling in rx path

skb_gso_segment() is almost always called in tx path,
except for openvswitch. It calls this function when
it receives the packet and tries to queue it to user-space.
In this special case, the ->ip_summed check inside
skb_gso_segment() is no longer true, as ->ip_summed value
has different meanings on rx path.

This patch adjusts skb_gso_segment() so that we can at least
avoid such warnings on checksum.

Cc: Jesse Gross <jesse@nicira.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  | 11 +++++++++--
 net/core/dev.c             | 21 ++++++++++++++++-----
 net/openvswitch/datapath.c |  2 +-
 3 files changed, 26 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 85b0949d9946..ab2774eb49e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2662,8 +2662,15 @@ extern int netdev_master_upper_dev_link(struct net_device *dev,
 extern void netdev_upper_dev_unlink(struct net_device *dev,
 				    struct net_device *upper_dev);
 extern int skb_checksum_help(struct sk_buff *skb);
-extern struct sk_buff *skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features);
+extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+	netdev_features_t features, bool tx_path);
+
+static inline
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
+{
+	return __skb_gso_segment(skb, features, true);
+}
+
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
 #else
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b275a7b8677..65da698c500b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2327,18 +2327,29 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+{
+	if (tx_path)
+		return skb->ip_summed != CHECKSUM_PARTIAL;
+	else
+		return skb->ip_summed == CHECKSUM_NONE;
+}
+
 /**
- *	skb_gso_segment - Perform segmentation on skb.
+ *	__skb_gso_segment - Perform segmentation on skb.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
+ *	@tx_path: whether it is called in TX path
  *
  *	This function segments the given skb and returns a list of segments.
  *
  *	It may return NULL if the skb requires no segmentation.  This is
  *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features)
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
@@ -2361,7 +2372,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+	if (unlikely(skb_needs_check(skb, tx_path))) {
 		skb_warn_bad_offload(skb);
 
 		if (skb_header_cloned(skb) &&
@@ -2390,7 +2401,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb,
 
 	return segs;
 }
-EXPORT_SYMBOL(skb_gso_segment);
+EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d8c13a965459..9dc537df46c4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -301,7 +301,7 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
 	struct sk_buff *segs, *nskb;
 	int err;
 
-	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false);
 	if (IS_ERR(segs))
 		return PTR_ERR(segs);
 
-- 
cgit v1.2.3


From cd431e738509e74726055390c9e5e81e8e7e03ec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Feb 2013 20:22:50 +0000
Subject: macvlan: add multicast filter

Setting up IPv6 addresses on configurations with many macvlans
is not really working, as many multicast messages are dropped.

Add a multicast filter to macvlan to reduce the amount of cloned
skbs and overhead.

Successfully tested with 1024 macvlans on one ethernet device.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 23 +++++++++++++++++++++++
 include/linux/if_macvlan.h |  6 ++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 7b44ebd7770e..f494da82c33f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -29,6 +29,7 @@
 #include <linux/if_vlan.h>
 #include <linux/if_link.h>
 #include <linux/if_macvlan.h>
+#include <linux/hash.h>
 #include <net/rtnetlink.h>
 #include <net/xfrm.h>
 
@@ -126,6 +127,13 @@ static int macvlan_broadcast_one(struct sk_buff *skb,
 	return vlan->receive(skb);
 }
 
+static unsigned int mc_hash(const unsigned char *addr)
+{
+	u32 val = __get_unaligned_cpu32(addr + 2);
+
+	return hash_32(val, MACVLAN_MC_FILTER_BITS);
+}
+
 static void macvlan_broadcast(struct sk_buff *skb,
 			      const struct macvlan_port *port,
 			      struct net_device *src,
@@ -137,6 +145,7 @@ static void macvlan_broadcast(struct sk_buff *skb,
 	struct sk_buff *nskb;
 	unsigned int i;
 	int err;
+	unsigned int hash = mc_hash(eth->h_dest);
 
 	if (skb->protocol == htons(ETH_P_PAUSE))
 		return;
@@ -146,6 +155,8 @@ static void macvlan_broadcast(struct sk_buff *skb,
 			if (vlan->dev == src || !(vlan->mode & mode))
 				continue;
 
+			if (!test_bit(hash, vlan->mc_filter))
+				continue;
 			nskb = skb_clone(skb, GFP_ATOMIC);
 			err = macvlan_broadcast_one(nskb, vlan, eth,
 					 mode == MACVLAN_MODE_BRIDGE);
@@ -405,6 +416,18 @@ static void macvlan_set_mac_lists(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		bitmap_fill(vlan->mc_filter, MACVLAN_MC_FILTER_SZ);
+	} else {
+		struct netdev_hw_addr *ha;
+		DECLARE_BITMAP(filter, MACVLAN_MC_FILTER_SZ);
+
+		bitmap_zero(filter, MACVLAN_MC_FILTER_SZ);
+		netdev_for_each_mc_addr(ha, dev) {
+			__set_bit(mc_hash(ha->addr), filter);
+		}
+		bitmap_copy(vlan->mc_filter, filter, MACVLAN_MC_FILTER_SZ);
+	}
 	dev_uc_sync(vlan->lowerdev, dev);
 	dev_mc_sync(vlan->lowerdev, dev);
 }
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index f65e8d250f7e..84dde1dd1da4 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -52,6 +52,9 @@ struct macvlan_pcpu_stats {
  */
 #define MAX_MACVTAP_QUEUES	(NR_CPUS < 16 ? NR_CPUS : 16)
 
+#define MACVLAN_MC_FILTER_BITS	8
+#define MACVLAN_MC_FILTER_SZ	(1 << MACVLAN_MC_FILTER_BITS)
+
 struct macvlan_dev {
 	struct net_device	*dev;
 	struct list_head	list;
@@ -59,6 +62,9 @@ struct macvlan_dev {
 	struct macvlan_port	*port;
 	struct net_device	*lowerdev;
 	struct macvlan_pcpu_stats __percpu *pcpu_stats;
+
+	DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+
 	enum macvlan_mode	mode;
 	u16			flags;
 	int (*receive)(struct sk_buff *skb);
-- 
cgit v1.2.3


From c93d08fa75020835741c7b1d0523ff854e8acde1 Mon Sep 17 00:00:00 2001
From: "Milo(Woogyom) Kim" <milo.kim@ti.com>
Date: Tue, 5 Feb 2013 18:01:23 +0900
Subject: leds-lp55xx: add new common driver for lp5521/5523

 This patch supports basic common driver code for LP5521, LP5523/55231 devices.

 ( Driver Structure Data )

 lp55xx_led and lp55xx_chip
 In lp55xx common driver, two different data structure is used.
 o lp55xx_led
   control multi output LED channels such as led current, channel index.
 o lp55xx_chip
   general chip control such like the I2C and platform data.

 For example, LP5521 has maximum 3 LED channels.
 LP5523/55231 has 9 output channels.

 lp55xx_chip for LP5521 ... lp55xx_led #1
                            lp55xx_led #2
                            lp55xx_led #3

 lp55xx_chip for LP5523 ... lp55xx_led #1
                            lp55xx_led #2
                            .
                            .
                            lp55xx_led #9

 ( Platform Data )

 LP5521 and LP5523/55231 have own specific platform data.
 However, this data can be handled with just one platform data structure.
 The lp55xx platform data is declared in the header.
 This structure is derived from leds-lp5521.h and leds-lp5523.h

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/Kconfig                      |  9 ++++
 drivers/leds/Makefile                     |  1 +
 drivers/leds/leds-lp55xx-common.c         | 59 +++++++++++++++++++++
 drivers/leds/leds-lp55xx-common.h         | 61 ++++++++++++++++++++++
 include/linux/platform_data/leds-lp55xx.h | 87 +++++++++++++++++++++++++++++++
 5 files changed, 217 insertions(+)
 create mode 100644 drivers/leds/leds-lp55xx-common.c
 create mode 100644 drivers/leds/leds-lp55xx-common.h
 create mode 100644 include/linux/platform_data/leds-lp55xx.h

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index b58bc8a14b9c..3d7822b3498f 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -193,9 +193,17 @@ config LEDS_LP3944
 	  To compile this driver as a module, choose M here: the
 	  module will be called leds-lp3944.
 
+config LEDS_LP55XX_COMMON
+	tristate "Common Driver for TI/National LP5521 and LP5523/55231"
+	depends on LEDS_LP5521 || LEDS_LP5523
+	help
+	  This option supports common operations for LP5521 and LP5523/55231
+	  devices.
+
 config LEDS_LP5521
 	tristate "LED Support for N.S. LP5521 LED driver chip"
 	depends on LEDS_CLASS && I2C
+	select LEDS_LP55XX_COMMON
 	help
 	  If you say yes here you get support for the National Semiconductor
 	  LP5521 LED driver. It is 3 channel chip with programmable engines.
@@ -205,6 +213,7 @@ config LEDS_LP5521
 config LEDS_LP5523
 	tristate "LED Support for TI/National LP5523/55231 LED driver chip"
 	depends on LEDS_CLASS && I2C
+	select LEDS_LP55XX_COMMON
 	help
 	  If you say yes here you get support for TI/National Semiconductor
 	  LP5523/55231 LED driver.
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 3fb9641b6194..215e7e3b6173 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_LEDS_PCA9532)		+= leds-pca9532.o
 obj-$(CONFIG_LEDS_GPIO_REGISTER)	+= leds-gpio-register.o
 obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
 obj-$(CONFIG_LEDS_LP3944)		+= leds-lp3944.o
+obj-$(CONFIG_LEDS_LP55XX_COMMON)	+= leds-lp55xx-common.o
 obj-$(CONFIG_LEDS_LP5521)		+= leds-lp5521.o
 obj-$(CONFIG_LEDS_LP5523)		+= leds-lp5523.o
 obj-$(CONFIG_LEDS_LP8788)		+= leds-lp8788.o
diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c
new file mode 100644
index 000000000000..1c716ecfa817
--- /dev/null
+++ b/drivers/leds/leds-lp55xx-common.c
@@ -0,0 +1,59 @@
+/*
+ * LP5521/LP5523/LP55231 Common Driver
+ *
+ * Copyright 2012 Texas Instruments
+ *
+ * Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Derived from leds-lp5521.c, leds-lp5523.c
+ */
+
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/platform_data/leds-lp55xx.h>
+
+#include "leds-lp55xx-common.h"
+
+int lp55xx_write(struct lp55xx_chip *chip, u8 reg, u8 val)
+{
+	return i2c_smbus_write_byte_data(chip->cl, reg, val);
+}
+EXPORT_SYMBOL_GPL(lp55xx_write);
+
+int lp55xx_read(struct lp55xx_chip *chip, u8 reg, u8 *val)
+{
+	s32 ret;
+
+	ret = i2c_smbus_read_byte_data(chip->cl, reg);
+	if (ret < 0)
+		return ret;
+
+	*val = ret;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lp55xx_read);
+
+int lp55xx_update_bits(struct lp55xx_chip *chip, u8 reg, u8 mask, u8 val)
+{
+	int ret;
+	u8 tmp;
+
+	ret = lp55xx_read(chip, reg, &tmp);
+	if (ret)
+		return ret;
+
+	tmp &= ~mask;
+	tmp |= val & mask;
+
+	return lp55xx_write(chip, reg, tmp);
+}
+EXPORT_SYMBOL_GPL(lp55xx_update_bits);
+
+MODULE_AUTHOR("Milo Kim <milo.kim@ti.com>");
+MODULE_DESCRIPTION("LP55xx Common Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/leds/leds-lp55xx-common.h b/drivers/leds/leds-lp55xx-common.h
new file mode 100644
index 000000000000..369cb9c91f17
--- /dev/null
+++ b/drivers/leds/leds-lp55xx-common.h
@@ -0,0 +1,61 @@
+/*
+ * LP55XX Common Driver Header
+ *
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * Derived from leds-lp5521.c, leds-lp5523.c
+ */
+
+#ifndef _LEDS_LP55XX_COMMON_H
+#define _LEDS_LP55XX_COMMON_H
+
+struct lp55xx_led;
+struct lp55xx_chip;
+
+/*
+ * struct lp55xx_chip
+ * @cl         : I2C communication for access registers
+ * @pdata      : Platform specific data
+ * @lock       : Lock for user-space interface
+ * @num_leds   : Number of registered LEDs
+ */
+struct lp55xx_chip {
+	struct i2c_client *cl;
+	struct lp55xx_platform_data *pdata;
+	struct mutex lock;	/* lock for user-space interface */
+	int num_leds;
+};
+
+/*
+ * struct lp55xx_led
+ * @chan_nr         : Channel number
+ * @cdev            : LED class device
+ * @led_current     : Current setting at each led channel
+ * @max_current     : Maximun current at each led channel
+ * @brightness_work : Workqueue for brightness control
+ * @brightness      : Brightness value
+ * @chip            : The lp55xx chip data
+ */
+struct lp55xx_led {
+	int chan_nr;
+	struct led_classdev cdev;
+	u8 led_current;
+	u8 max_current;
+	struct work_struct brightness_work;
+	u8 brightness;
+	struct lp55xx_chip *chip;
+};
+
+/* register access */
+extern int lp55xx_write(struct lp55xx_chip *chip, u8 reg, u8 val);
+extern int lp55xx_read(struct lp55xx_chip *chip, u8 reg, u8 *val);
+extern int lp55xx_update_bits(struct lp55xx_chip *chip, u8 reg,
+			u8 mask, u8 val);
+
+#endif /* _LEDS_LP55XX_COMMON_H */
diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h
new file mode 100644
index 000000000000..1509570d5a3f
--- /dev/null
+++ b/include/linux/platform_data/leds-lp55xx.h
@@ -0,0 +1,87 @@
+/*
+ * LP55XX Platform Data Header
+ *
+ * Copyright (C) 2012 Texas Instruments
+ *
+ * Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * Derived from leds-lp5521.h, leds-lp5523.h
+ */
+
+#ifndef _LEDS_LP55XX_H
+#define _LEDS_LP55XX_H
+
+/* Clock configuration */
+#define LP55XX_CLOCK_AUTO	0
+#define LP55XX_CLOCK_INT	1
+#define LP55XX_CLOCK_EXT	2
+
+/* Bits in LP5521 CONFIG register. 'update_config' in lp55xx_platform_data */
+#define LP5521_PWM_HF			0x40	/* PWM: 0 = 256Hz, 1 = 558Hz */
+#define LP5521_PWRSAVE_EN		0x20	/* 1 = Power save mode */
+#define LP5521_CP_MODE_OFF		0	/* Charge pump (CP) off */
+#define LP5521_CP_MODE_BYPASS		8	/* CP forced to bypass mode */
+#define LP5521_CP_MODE_1X5		0x10	/* CP forced to 1.5x mode */
+#define LP5521_CP_MODE_AUTO		0x18	/* Automatic mode selection */
+#define LP5521_R_TO_BATT		4	/* R out: 0 = CP, 1 = Vbat */
+#define LP5521_CLK_SRC_EXT		0	/* Ext-clk source (CLK_32K) */
+#define LP5521_CLK_INT			1	/* Internal clock */
+#define LP5521_CLK_AUTO			2	/* Automatic clock selection */
+
+struct lp55xx_led_config {
+	const char *name;
+	u8 chan_nr;
+	u8 led_current; /* mA x10, 0 if led is not connected */
+	u8 max_current;
+};
+
+struct lp55xx_predef_pattern {
+	u8 *r;
+	u8 *g;
+	u8 *b;
+	u8 size_r;
+	u8 size_g;
+	u8 size_b;
+};
+
+/*
+ * struct lp55xx_platform_data
+ * @led_config        : Configurable led class device
+ * @num_channels      : Number of LED channels
+ * @label             : Used for naming LEDs
+ * @clock_mode        : Input clock mode. LP55XX_CLOCK_AUTO or _INT or _EXT
+ * @setup_resources   : Platform specific function before enabling the chip
+ * @release_resources : Platform specific function after  disabling the chip
+ * @enable            : EN pin control by platform side
+ * @patterns          : Predefined pattern data for RGB channels
+ * @num_patterns      : Number of patterns
+ * @update_config     : Value of CONFIG register
+ */
+struct lp55xx_platform_data {
+
+	/* LED channel configuration */
+	struct lp55xx_led_config *led_config;
+	u8 num_channels;
+	const char *label;
+
+	/* Clock configuration */
+	u8 clock_mode;
+
+	/* Platform specific functions */
+	int (*setup_resources)(void);
+	void (*release_resources)(void);
+	void (*enable)(bool state);
+
+	/* Predefined pattern data */
+	struct lp55xx_predef_pattern *patterns;
+	unsigned int num_patterns;
+
+	/* _CONFIG register */
+	u8 update_config;
+};
+
+#endif /* _LEDS_LP55XX_H */
-- 
cgit v1.2.3


From df4094d24e6328824a2dfe8e6f641bff9a484d68 Mon Sep 17 00:00:00 2001
From: "Milo(Woogyom) Kim" <milo.kim@ti.com>
Date: Tue, 5 Feb 2013 19:26:59 +0900
Subject: leds-lp5521/5523: use new lp55xx common header

 The LP55xx common driver provides a new header, leds-lp55xx.h.
 This driver enables removing duplicate code for both drivers and
 making coherent driver structure.
 LP5521 and LP5523/55231 platform data were merged into one common file.
 Therefore, the LP5521/5523 platform code need to be fixed.

 This patch has been already acked.

 For ux500: https://lkml.org/lkml/2012/10/11/417
 Acked-by: Linus Walleij <linus.walleij@linaro.org>

 For omap: https://lkml.org/lkml/2012/10/11/334
 Acked-by: Tony Lindgren <tony@atomide.com>

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 arch/arm/mach-omap2/board-rx51-peripherals.c |  8 +--
 arch/arm/mach-ux500/board-mop500.c           | 14 +++---
 include/linux/leds-lp5521.h                  | 73 ----------------------------
 include/linux/leds-lp5523.h                  | 49 -------------------
 4 files changed, 11 insertions(+), 133 deletions(-)
 delete mode 100644 include/linux/leds-lp5521.h
 delete mode 100644 include/linux/leds-lp5523.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index cf07e289b4ea..1a2c4db13e32 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -40,7 +40,7 @@
 #include <sound/tpa6130a2-plat.h>
 #include <media/radio-si4713.h>
 #include <media/si4713.h>
-#include <linux/leds-lp5523.h>
+#include <linux/platform_data/leds-lp55xx.h>
 
 #include <../drivers/staging/iio/light/tsl2563.h>
 #include <linux/lis3lv02d.h>
@@ -160,7 +160,7 @@ static struct tsl2563_platform_data rx51_tsl2563_platform_data = {
 #endif
 
 #if defined(CONFIG_LEDS_LP5523) || defined(CONFIG_LEDS_LP5523_MODULE)
-static struct lp5523_led_config rx51_lp5523_led_config[] = {
+static struct lp55xx_led_config rx51_lp5523_led_config[] = {
 	{
 		.chan_nr	= 0,
 		.led_current	= 50,
@@ -207,10 +207,10 @@ static void rx51_lp5523_enable(bool state)
 	gpio_set_value(RX51_LP5523_CHIP_EN_GPIO, !!state);
 }
 
-static struct lp5523_platform_data rx51_lp5523_platform_data = {
+static struct lp55xx_platform_data rx51_lp5523_platform_data = {
 	.led_config		= rx51_lp5523_led_config,
 	.num_channels		= ARRAY_SIZE(rx51_lp5523_led_config),
-	.clock_mode		= LP5523_CLOCK_AUTO,
+	.clock_mode		= LP55XX_CLOCK_AUTO,
 	.setup_resources	= rx51_lp5523_setup,
 	.release_resources	= rx51_lp5523_release,
 	.enable			= rx51_lp5523_enable,
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index d453522edb0d..b04684e16e24 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -28,7 +28,7 @@
 #include <linux/mfd/tps6105x.h>
 #include <linux/mfd/abx500/ab8500-gpio.h>
 #include <linux/mfd/abx500/ab8500-codec.h>
-#include <linux/leds-lp5521.h>
+#include <linux/platform_data/leds-lp55xx.h>
 #include <linux/input.h>
 #include <linux/smsc911x.h>
 #include <linux/gpio_keys.h>
@@ -320,7 +320,7 @@ static struct tc3589x_platform_data mop500_tc35892_data = {
 	.irq_base	= MOP500_EGPIO_IRQ_BASE,
 };
 
-static struct lp5521_led_config lp5521_pri_led[] = {
+static struct lp55xx_led_config lp5521_pri_led[] = {
        [0] = {
 	       .chan_nr = 0,
 	       .led_current = 0x2f,
@@ -338,14 +338,14 @@ static struct lp5521_led_config lp5521_pri_led[] = {
        },
 };
 
-static struct lp5521_platform_data __initdata lp5521_pri_data = {
+static struct lp55xx_platform_data __initdata lp5521_pri_data = {
        .label = "lp5521_pri",
        .led_config     = &lp5521_pri_led[0],
        .num_channels   = 3,
-       .clock_mode     = LP5521_CLOCK_EXT,
+       .clock_mode     = LP55XX_CLOCK_EXT,
 };
 
-static struct lp5521_led_config lp5521_sec_led[] = {
+static struct lp55xx_led_config lp5521_sec_led[] = {
        [0] = {
 	       .chan_nr = 0,
 	       .led_current = 0x2f,
@@ -363,11 +363,11 @@ static struct lp5521_led_config lp5521_sec_led[] = {
        },
 };
 
-static struct lp5521_platform_data __initdata lp5521_sec_data = {
+static struct lp55xx_platform_data __initdata lp5521_sec_data = {
        .label = "lp5521_sec",
        .led_config     = &lp5521_sec_led[0],
        .num_channels   = 3,
-       .clock_mode     = LP5521_CLOCK_EXT,
+       .clock_mode     = LP55XX_CLOCK_EXT,
 };
 
 static struct i2c_board_info __initdata mop500_i2c0_devices[] = {
diff --git a/include/linux/leds-lp5521.h b/include/linux/leds-lp5521.h
deleted file mode 100644
index 3f071ec019b2..000000000000
--- a/include/linux/leds-lp5521.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * LP5521 LED chip driver.
- *
- * Copyright (C) 2010 Nokia Corporation
- *
- * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#ifndef __LINUX_LP5521_H
-#define __LINUX_LP5521_H
-
-/* See Documentation/leds/leds-lp5521.txt */
-
-struct lp5521_led_config {
-	char		*name;
-	u8		chan_nr;
-	u8		led_current; /* mA x10, 0 if led is not connected */
-	u8		max_current;
-};
-
-struct lp5521_led_pattern {
-	u8 *r;
-	u8 *g;
-	u8 *b;
-	u8 size_r;
-	u8 size_g;
-	u8 size_b;
-};
-
-#define LP5521_CLOCK_AUTO	0
-#define LP5521_CLOCK_INT	1
-#define LP5521_CLOCK_EXT	2
-
-/* Bits in CONFIG register */
-#define LP5521_PWM_HF			0x40	/* PWM: 0 = 256Hz, 1 = 558Hz */
-#define LP5521_PWRSAVE_EN		0x20	/* 1 = Power save mode */
-#define LP5521_CP_MODE_OFF		0	/* Charge pump (CP) off */
-#define LP5521_CP_MODE_BYPASS		8	/* CP forced to bypass mode */
-#define LP5521_CP_MODE_1X5		0x10	/* CP forced to 1.5x mode */
-#define LP5521_CP_MODE_AUTO		0x18	/* Automatic mode selection */
-#define LP5521_R_TO_BATT		4	/* R out: 0 = CP, 1 = Vbat */
-#define LP5521_CLK_SRC_EXT		0	/* Ext-clk source (CLK_32K) */
-#define LP5521_CLK_INT			1	/* Internal clock */
-#define LP5521_CLK_AUTO			2	/* Automatic clock selection */
-
-struct lp5521_platform_data {
-	struct lp5521_led_config *led_config;
-	u8	num_channels;
-	u8	clock_mode;
-	int	(*setup_resources)(void);
-	void	(*release_resources)(void);
-	void	(*enable)(bool state);
-	const char *label;
-	u8	update_config;
-	struct lp5521_led_pattern *patterns;
-	int num_patterns;
-};
-
-#endif /* __LINUX_LP5521_H */
diff --git a/include/linux/leds-lp5523.h b/include/linux/leds-lp5523.h
deleted file mode 100644
index 727877fb406d..000000000000
--- a/include/linux/leds-lp5523.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * LP5523 LED Driver
- *
- * Copyright (C) 2010 Nokia Corporation
- *
- * Contact: Samu Onkalo <samu.p.onkalo@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#ifndef __LINUX_LP5523_H
-#define __LINUX_LP5523_H
-
-/* See Documentation/leds/leds-lp5523.txt */
-
-struct lp5523_led_config {
-	const char	*name;
-	u8		chan_nr;
-	u8		led_current; /* mA x10, 0 if led is not connected */
-	u8		max_current;
-};
-
-#define LP5523_CLOCK_AUTO	0
-#define LP5523_CLOCK_INT	1
-#define LP5523_CLOCK_EXT	2
-
-struct lp5523_platform_data {
-	struct lp5523_led_config *led_config;
-	u8	num_channels;
-	u8	clock_mode;
-	int	(*setup_resources)(void);
-	void	(*release_resources)(void);
-	void	(*enable)(bool state);
-	const	char *label;
-};
-
-#endif /* __LINUX_LP5523_H */
-- 
cgit v1.2.3


From 6be195886ac26abe0194ed1bc7a9224f8a97c310 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: workqueue: replace WORK_CPU_NONE/LAST with WORK_CPU_END

Now that workqueue has moved away from gcwqs, workqueue no longer has
the need to have a CPU identifier indicating "no cpu associated" - we
now use WORK_OFFQ_POOL_NONE instead - and most uses of WORK_CPU_NONE
are gone.

The only left usage is as the end marker for for_each_*wq*()
iterators, where the name WORK_CPU_NONE is confusing w/o actual
WORK_CPU_NONE usages.  Similarly, WORK_CPU_LAST which equals
WORK_CPU_NONE no longer makes sense.

Replace both WORK_CPU_NONE and LAST with WORK_CPU_END.  This patch
doesn't introduce any functional difference.

tj: s/WORK_CPU_LAST/WORK_CPU_END/ and rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  3 +--
 kernel/workqueue.c        | 10 +++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a94e4e84e7b1..426c39c2aaa4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -57,8 +57,7 @@ enum {
 
 	/* special cpu IDs */
 	WORK_CPU_UNBOUND	= NR_CPUS,
-	WORK_CPU_NONE		= NR_CPUS + 1,
-	WORK_CPU_LAST		= WORK_CPU_NONE,
+	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
 	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 577de1073f24..7e11334a119f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -258,7 +258,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 		if (sw & 2)
 			return WORK_CPU_UNBOUND;
 	}
-	return WORK_CPU_NONE;
+	return WORK_CPU_END;
 }
 
 static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
@@ -282,17 +282,17 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
  */
 #define for_each_wq_cpu(cpu)						\
 	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3);		\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
 
 #define for_each_online_wq_cpu(cpu)					\
 	for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3);		\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
 #define for_each_cwq_cpu(cpu, wq)					\
 	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -3796,7 +3796,7 @@ static int __init init_workqueues(void)
 
 	/* make sure we have enough bits for OFFQ pool ID */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
-		     WORK_CPU_LAST * NR_STD_WORKER_POOLS);
+		     WORK_CPU_END * NR_STD_WORKER_POOLS);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
-- 
cgit v1.2.3


From 60c057bca22285efefbba033624763a778f243bf Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: workqueue: add delayed_work->wq to simplify reentrancy handling

To avoid executing the same work item from multiple CPUs concurrently,
a work_struct records the last pool it was on in its ->data so that,
on the next queueing, the pool can be queried to determine whether the
work item is still executing or not.

A delayed_work goes through timer before actually being queued on the
target workqueue and the timer needs to know the target workqueue and
CPU.  This is currently achieved by modifying delayed_work->work.data
such that it points to the cwq which points to the target workqueue
and the last CPU the work item was on.  __queue_delayed_work()
extracts the last CPU from delayed_work->work.data and then combines
it with the target workqueue to create new work.data.

The only thing this rather ugly hack achieves is encoding the target
workqueue into delayed_work->work.data without using a separate field,
which could be a trade off one can make; unfortunately, this entangles
work->data management between regular workqueue and delayed_work code
by setting cwq pointer before the work item is actually queued and
becomes a hindrance for further improvements of work->data handling.

This can be easily made sane by adding a target workqueue field to
delayed_work.  While delayed_work is used widely in the kernel and
this does make it a bit larger (<5%), I think this is the right
trade-off especially given the prospect of much saner handling of
work->data which currently involves quite tricky memory barrier
dancing, and don't expect to see any measureable effect.

Add delayed_work->wq and drop the delayed_work->work.data overloading.

tj: Rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  3 +++
 kernel/workqueue.c        | 32 +++-----------------------------
 2 files changed, 6 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 426c39c2aaa4..a3d7556510c3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -109,6 +109,9 @@ struct work_struct {
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
+
+	/* target workqueue and CPU ->timer uses to queue ->work */
+	struct workqueue_struct *wq;
 	int cpu;
 };
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a229a56f3a32..41a502ce3802 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1339,10 +1339,9 @@ EXPORT_SYMBOL_GPL(queue_work);
 void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	/* should have been called from irqsafe timer with irq already off */
-	__queue_work(dwork->cpu, cwq->wq, &dwork->work);
+	__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
@@ -1351,7 +1350,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 {
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
-	unsigned int lcpu;
 
 	WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
 		     timer->data != (unsigned long)dwork);
@@ -1371,30 +1369,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 
 	timer_stats_timer_set_start_info(&dwork->timer);
 
-	/*
-	 * This stores cwq for the moment, for the timer_fn.  Note that the
-	 * work's pool is preserved to allow reentrance detection for
-	 * delayed works.
-	 */
-	if (!(wq->flags & WQ_UNBOUND)) {
-		struct worker_pool *pool = get_work_pool(work);
-
-		/*
-		 * If we cannot get the last pool from @work directly,
-		 * select the last CPU such that it avoids unnecessarily
-		 * triggering non-reentrancy check in __queue_work().
-		 */
-		lcpu = cpu;
-		if (pool)
-			lcpu = pool->cpu;
-		if (lcpu == WORK_CPU_UNBOUND)
-			lcpu = raw_smp_processor_id();
-	} else {
-		lcpu = WORK_CPU_UNBOUND;
-	}
-
-	set_work_cwq(work, get_cwq(lcpu, wq), 0);
-
+	dwork->wq = wq;
 	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 
@@ -2944,8 +2919,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(dwork->cpu,
-			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+		__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work(&dwork->work);
 }
-- 
cgit v1.2.3


From 2a2483072393b27f4336ab068a1f48ca19ff1c1e Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 24 Jan 2013 14:14:19 +1000
Subject: vgacon/vt: clear buffer attributes when we load a 512 character font
 (v2)

When we switch from 256->512 byte font rendering mode, it means the
current contents of the screen is being reinterpreted. The bit that holds
the high bit of the 9-bit font, may have been previously set, and thus
the new font misrenders.

The problem case we see is grub2 writes spaces with the bit set, so it
ends up with data like 0x820, which gets reinterpreted into 0x120 char
which the font translates into G with a circumflex. This flashes up on
screen at boot and is quite ugly.

A current side effect of this patch though is that any rendering on the
screen changes color to a slightly darker color, but at least the screen
no longer corrupts.

v2: as suggested by hpa, always clear the attribute space, whether we
are are going to or from 512 chars.

Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/tty/vt/vt.c            |  2 +-
 drivers/video/console/vgacon.c | 22 +++++++++++++++-------
 include/linux/vt_kern.h        |  1 +
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 8fd89687d068..c8067aef4b86 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -638,7 +638,7 @@ static inline void save_screen(struct vc_data *vc)
  *	Redrawing of screen
  */
 
-static void clear_buffer_attributes(struct vc_data *vc)
+void clear_buffer_attributes(struct vc_data *vc)
 {
 	unsigned short *p = (unsigned short *)vc->vc_origin;
 	int count = vc->vc_screenbuf_size / 2;
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index d449a74d4a31..5855d17d19ac 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -1064,7 +1064,7 @@ static int vgacon_do_font_op(struct vgastate *state,char *arg,int set,int ch512)
 	unsigned short video_port_status = vga_video_port_reg + 6;
 	int font_select = 0x00, beg, i;
 	char *charmap;
-	
+	bool clear_attribs = false;
 	if (vga_video_type != VIDEO_TYPE_EGAM) {
 		charmap = (char *) VGA_MAP_MEM(colourmap, 0);
 		beg = 0x0e;
@@ -1169,12 +1169,6 @@ static int vgacon_do_font_op(struct vgastate *state,char *arg,int set,int ch512)
 
 	/* if 512 char mode is already enabled don't re-enable it. */
 	if ((set) && (ch512 != vga_512_chars)) {
-		/* attribute controller */
-		for (i = 0; i < MAX_NR_CONSOLES; i++) {
-			struct vc_data *c = vc_cons[i].d;
-			if (c && c->vc_sw == &vga_con)
-				c->vc_hi_font_mask = ch512 ? 0x0800 : 0;
-		}
 		vga_512_chars = ch512;
 		/* 256-char: enable intensity bit
 		   512-char: disable intensity bit */
@@ -1185,8 +1179,22 @@ static int vgacon_do_font_op(struct vgastate *state,char *arg,int set,int ch512)
 		   it means, but it works, and it appears necessary */
 		inb_p(video_port_status);
 		vga_wattr(state->vgabase, VGA_AR_ENABLE_DISPLAY, 0);	
+		clear_attribs = true;
 	}
 	raw_spin_unlock_irq(&vga_lock);
+
+	if (clear_attribs) {
+		for (i = 0; i < MAX_NR_CONSOLES; i++) {
+			struct vc_data *c = vc_cons[i].d;
+			if (c && c->vc_sw == &vga_con) {
+				/* force hi font mask to 0, so we always clear
+				   the bit on either transition */
+				c->vc_hi_font_mask = 0x00;
+				clear_buffer_attributes(c);
+				c->vc_hi_font_mask = ch512 ? 0x0800 : 0;
+			}
+		}
+	}
 	return 0;
 }
 
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 50ae7d0c279e..1f55665d7b49 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -47,6 +47,7 @@ int con_set_cmap(unsigned char __user *cmap);
 int con_get_cmap(unsigned char __user *cmap);
 void scrollback(struct vc_data *vc, int lines);
 void scrollfront(struct vc_data *vc, int lines);
+void clear_buffer_attributes(struct vc_data *vc);
 void update_region(struct vc_data *vc, unsigned long start, int count);
 void redraw_screen(struct vc_data *vc, int is_switch);
 #define update_screen(x) redraw_screen(x, 0)
-- 
cgit v1.2.3


From 9fff24aa2c5c504aadead1ff9599e813604c2e53 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 6 Feb 2013 22:30:23 -0500
Subject: jbd2: track request delay statistics

Track the delay between when we first request that the commit begin
and when it actually begins, so we can see how much of a gap exists.
In theory, this should just be the remaining scheduling quantuum of
the thread which requested the commit (assuming it was not a
synchronous operation which triggered the commit request) plus
scheduling overhead; however, it's possible that real time processes
might get in the way of letting the kjournald thread from executing.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c            |  8 ++++++++
 fs/jbd2/journal.c           | 12 +++++++++---
 fs/jbd2/transaction.c       |  1 +
 include/linux/jbd2.h        |  7 +++++++
 include/trace/events/jbd2.h |  8 ++++++--
 5 files changed, 31 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3091d42992f0..750c70148eff 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -435,7 +435,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 	trace_jbd2_commit_locking(journal, commit_transaction);
 	stats.run.rs_wait = commit_transaction->t_max_wait;
+	stats.run.rs_request_delay = 0;
 	stats.run.rs_locked = jiffies;
+	if (commit_transaction->t_requested)
+		stats.run.rs_request_delay =
+			jbd2_time_diff(commit_transaction->t_requested,
+				       stats.run.rs_locked);
 	stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
 					      stats.run.rs_locked);
 
@@ -1116,7 +1121,10 @@ restart_loop:
 	 */
 	spin_lock(&journal->j_history_lock);
 	journal->j_stats.ts_tid++;
+	if (commit_transaction->t_requested)
+		journal->j_stats.ts_requested++;
 	journal->j_stats.run.rs_wait += stats.run.rs_wait;
+	journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
 	journal->j_stats.run.rs_running += stats.run.rs_running;
 	journal->j_stats.run.rs_locked += stats.run.rs_locked;
 	journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 1a80e3146a59..4ba2e81e35ac 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -533,6 +533,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 		jbd_debug(1, "JBD2: requesting commit %d/%d\n",
 			  journal->j_commit_request,
 			  journal->j_commit_sequence);
+		journal->j_running_transaction->t_requested = jiffies;
 		wake_up(&journal->j_wait_commit);
 		return 1;
 	} else if (!tid_geq(journal->j_commit_request, target))
@@ -898,13 +899,18 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v)
 
 	if (v != SEQ_START_TOKEN)
 		return 0;
-	seq_printf(seq, "%lu transaction, each up to %u blocks\n",
-			s->stats->ts_tid,
-			s->journal->j_max_transaction_buffers);
+	seq_printf(seq, "%lu transactions (%lu requested), "
+		   "each up to %u blocks\n",
+		   s->stats->ts_tid, s->stats->ts_requested,
+		   s->journal->j_max_transaction_buffers);
 	if (s->stats->ts_tid == 0)
 		return 0;
 	seq_printf(seq, "average: \n  %ums waiting for transaction\n",
 	    jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
+	seq_printf(seq, "  %ums request delay\n",
+	    (s->stats->ts_requested == 0) ? 0 :
+	    jiffies_to_msecs(s->stats->run.rs_request_delay /
+			     s->stats->ts_requested));
 	seq_printf(seq, "  %ums running transaction\n",
 	    jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
 	seq_printf(seq, "  %ums transaction was being locked\n",
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index df9f29760efa..735609e2d636 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -100,6 +100,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
 	journal->j_running_transaction = transaction;
 	transaction->t_max_wait = 0;
 	transaction->t_start = jiffies;
+	transaction->t_requested = 0;
 
 	return transaction;
 }
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index e30b66346942..e0aafc46064f 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -580,6 +580,11 @@ struct transaction_s
 	 */
 	unsigned long		t_start;
 
+	/*
+	 * When commit was requested
+	 */
+	unsigned long		t_requested;
+
 	/*
 	 * Checkpointing stats [j_checkpoint_sem]
 	 */
@@ -637,6 +642,7 @@ struct transaction_s
 
 struct transaction_run_stats_s {
 	unsigned long		rs_wait;
+	unsigned long		rs_request_delay;
 	unsigned long		rs_running;
 	unsigned long		rs_locked;
 	unsigned long		rs_flushing;
@@ -649,6 +655,7 @@ struct transaction_run_stats_s {
 
 struct transaction_stats_s {
 	unsigned long		ts_tid;
+	unsigned long		ts_requested;
 	struct transaction_run_stats_s run;
 };
 
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 127993dbf322..5419f57beb1f 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -142,6 +142,7 @@ TRACE_EVENT(jbd2_run_stats,
 		__field(		dev_t,	dev		)
 		__field(	unsigned long,	tid		)
 		__field(	unsigned long,	wait		)
+		__field(	unsigned long,	request_delay	)
 		__field(	unsigned long,	running		)
 		__field(	unsigned long,	locked		)
 		__field(	unsigned long,	flushing	)
@@ -155,6 +156,7 @@ TRACE_EVENT(jbd2_run_stats,
 		__entry->dev		= dev;
 		__entry->tid		= tid;
 		__entry->wait		= stats->rs_wait;
+		__entry->request_delay	= stats->rs_request_delay;
 		__entry->running	= stats->rs_running;
 		__entry->locked		= stats->rs_locked;
 		__entry->flushing	= stats->rs_flushing;
@@ -164,10 +166,12 @@ TRACE_EVENT(jbd2_run_stats,
 		__entry->blocks_logged	= stats->rs_blocks_logged;
 	),
 
-	TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
-		  "logging %u handle_count %u blocks %u blocks_logged %u",
+	TP_printk("dev %d,%d tid %lu wait %u request_delay %u running %u "
+		  "locked %u flushing %u logging %u handle_count %u "
+		  "blocks %u blocks_logged %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
 		  jiffies_to_msecs(__entry->wait),
+		  jiffies_to_msecs(__entry->request_delay),
 		  jiffies_to_msecs(__entry->running),
 		  jiffies_to_msecs(__entry->locked),
 		  jiffies_to_msecs(__entry->flushing),
-- 
cgit v1.2.3


From 078d5039a13dedbd2ed14153a6d764fd75baae07 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 7 Feb 2013 00:02:15 -0500
Subject: jbd2: revert "jbd2: add COW fields to struct jbd2_journal_handle"

This reverts commit 93737456d68ddcb86232f669b83da673dd12e351.

The cow-snapshots effort is no longer active, so remove these extra
fields to shrink down the handle structure.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 include/linux/jbd2.h | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index e0aafc46064f..24db7256a5ff 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -397,35 +397,13 @@ struct jbd2_journal_handle
 	int			h_err;
 
 	/* Flags [no locking] */
-	unsigned int	h_sync:1;	/* sync-on-close */
-	unsigned int	h_jdata:1;	/* force data journaling */
-	unsigned int	h_aborted:1;	/* fatal error on handle */
-	unsigned int	h_cowing:1;	/* COWing block to snapshot */
-
-	/* Number of buffers requested by user:
-	 * (before adding the COW credits factor) */
-	unsigned int	h_base_credits:14;
-
-	/* Number of buffers the user is allowed to dirty:
-	 * (counts only buffers dirtied when !h_cowing) */
-	unsigned int	h_user_credits:14;
-
+	unsigned int	h_sync:		1;	/* sync-on-close */
+	unsigned int	h_jdata:	1;	/* force data journaling */
+	unsigned int	h_aborted:	1;	/* fatal error on handle */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	h_lockdep_map;
 #endif
-
-#ifdef CONFIG_JBD2_DEBUG
-	/* COW debugging counters: */
-	unsigned int h_cow_moved; /* blocks moved to snapshot */
-	unsigned int h_cow_copied; /* blocks copied to snapshot */
-	unsigned int h_cow_ok_jh; /* blocks already COWed during current
-				     transaction */
-	unsigned int h_cow_ok_bitmap; /* blocks not set in COW bitmap */
-	unsigned int h_cow_ok_mapped;/* blocks already mapped in snapshot */
-	unsigned int h_cow_bitmaps; /* COW bitmaps created */
-	unsigned int h_cow_excluded; /* blocks set in exclude bitmap */
-#endif
 };
 
 
-- 
cgit v1.2.3


From 2e033db5ddf299de2ae568919d78b0258a5a6423 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 21 Jan 2013 17:36:33 +0900
Subject: extcon: arizona: Support additional configuration of microphone
 detection

Allow systems to tune detection rate and debounce suitably for their
mechanical parameters.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/extcon/extcon-arizona.c   | 12 ++++++++++++
 include/linux/mfd/arizona/pdata.h |  6 ++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index ab8b9c7359fb..d7e1047ad68e 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -907,6 +907,18 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 				   arizona->pdata.micd_bias_start_time
 				   << ARIZONA_MICD_BIAS_STARTTIME_SHIFT);
 
+	if (arizona->pdata.micd_rate)
+		regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1,
+				   ARIZONA_MICD_RATE_MASK,
+				   arizona->pdata.micd_rate
+				   << ARIZONA_MICD_RATE_SHIFT);
+
+	if (arizona->pdata.micd_dbtime)
+		regmap_update_bits(arizona->regmap, ARIZONA_MIC_DETECT_1,
+				   ARIZONA_MICD_DBTIME_MASK,
+				   arizona->pdata.micd_dbtime
+				   << ARIZONA_MICD_DBTIME_SHIFT);
+
 	/*
 	 * If we have a clamp use it, activating in conjunction with
 	 * GPIO5 if that is connected for jack detect operation.
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index bcbe4fda87cb..2f5f08e10b79 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -111,6 +111,12 @@ struct arizona_pdata {
 	/** Mic detect ramp rate */
 	int micd_bias_start_time;
 
+	/** Mic detect sample rate */
+	int micd_rate;
+
+	/** Mic detect debounce level */
+	int micd_dbtime;
+
 	/** Headset polarity configurations */
 	struct arizona_micd_config *micd_configs;
 	int num_micd_configs;
-- 
cgit v1.2.3


From bbbd46e3d7fcdf1c8362bf1c83bcc08a93676cc9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 10 Jan 2013 19:38:43 +0000
Subject: extcon: arizona: Use regulated mode for microphone supply when
 detecting

When starting microphone detection some headsets should be exposed to
the fully regulated microphone bias in order to ensure that they behave
in an optimal fashion.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/extcon/Kconfig            |  2 +-
 drivers/extcon/extcon-arizona.c   | 93 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/arizona/pdata.h |  3 ++
 3 files changed, 97 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index 07122a9ef36e..9377050e5335 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -47,7 +47,7 @@ config EXTCON_MAX8997
 
 config EXTCON_ARIZONA
 	tristate "Wolfson Arizona EXTCON support"
-	depends on MFD_ARIZONA && INPUT
+	depends on MFD_ARIZONA && INPUT && SND_SOC
 	help
 	  Say Y here to enable support for external accessory detection
 	  with Wolfson Arizona devices. These are audio CODECs with
diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index d7e1047ad68e..aa724314677a 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -27,6 +27,8 @@
 #include <linux/regulator/consumer.h>
 #include <linux/extcon.h>
 
+#include <sound/soc.h>
+
 #include <linux/mfd/arizona/core.h>
 #include <linux/mfd/arizona/pdata.h>
 #include <linux/mfd/arizona/registers.h>
@@ -113,6 +115,52 @@ static void arizona_extcon_set_mode(struct arizona_extcon_info *info, int mode)
 	dev_dbg(arizona->dev, "Set jack polarity to %d\n", mode);
 }
 
+static const char *arizona_extcon_get_micbias(struct arizona_extcon_info *info)
+{
+	switch (info->micd_modes[0].bias >> ARIZONA_MICD_BIAS_SRC_SHIFT) {
+	case 1:
+		return "MICBIAS1";
+	case 2:
+		return "MICBIAS2";
+	case 3:
+		return "MICBIAS3";
+	default:
+		return "MICVDD";
+	}
+}
+
+static void arizona_extcon_pulse_micbias(struct arizona_extcon_info *info)
+{
+	struct arizona *arizona = info->arizona;
+	const char *widget = arizona_extcon_get_micbias(info);
+	struct snd_soc_dapm_context *dapm = arizona->dapm;
+	int ret;
+
+	mutex_lock(&dapm->card->dapm_mutex);
+
+	ret = snd_soc_dapm_force_enable_pin(dapm, widget);
+	if (ret != 0)
+		dev_warn(arizona->dev, "Failed to enable %s: %d\n",
+			 widget, ret);
+
+	mutex_unlock(&dapm->card->dapm_mutex);
+
+	snd_soc_dapm_sync(dapm);
+
+	if (!arizona->pdata.micd_force_micbias) {
+		mutex_lock(&dapm->card->dapm_mutex);
+
+		ret = snd_soc_dapm_disable_pin(arizona->dapm, widget);
+		if (ret != 0)
+			dev_warn(arizona->dev, "Failed to disable %s: %d\n",
+				 widget, ret);
+
+		mutex_unlock(&dapm->card->dapm_mutex);
+
+		snd_soc_dapm_sync(dapm);
+	}
+}
+
 static void arizona_start_mic(struct arizona_extcon_info *info)
 {
 	struct arizona *arizona = info->arizona;
@@ -122,6 +170,15 @@ static void arizona_start_mic(struct arizona_extcon_info *info)
 	/* Microphone detection can't use idle mode */
 	pm_runtime_get(info->dev);
 
+	if (info->detecting) {
+		ret = regulator_allow_bypass(info->micvdd, false);
+		if (ret != 0) {
+			dev_err(arizona->dev,
+				"Failed to regulate MICVDD: %d\n",
+				ret);
+		}
+	}
+
 	ret = regulator_enable(info->micvdd);
 	if (ret != 0) {
 		dev_err(arizona->dev, "Failed to enable MICVDD: %d\n",
@@ -138,6 +195,8 @@ static void arizona_start_mic(struct arizona_extcon_info *info)
 			   ARIZONA_ACCESSORY_DETECT_MODE_1,
 			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
 
+	arizona_extcon_pulse_micbias(info);
+
 	regmap_update_bits_check(arizona->regmap, ARIZONA_MIC_DETECT_1,
 				 ARIZONA_MICD_ENA, ARIZONA_MICD_ENA,
 				 &change);
@@ -150,18 +209,39 @@ static void arizona_start_mic(struct arizona_extcon_info *info)
 static void arizona_stop_mic(struct arizona_extcon_info *info)
 {
 	struct arizona *arizona = info->arizona;
+	const char *widget = arizona_extcon_get_micbias(info);
+	struct snd_soc_dapm_context *dapm = arizona->dapm;
 	bool change;
+	int ret;
 
 	regmap_update_bits_check(arizona->regmap, ARIZONA_MIC_DETECT_1,
 				 ARIZONA_MICD_ENA, 0,
 				 &change);
 
+	mutex_lock(&dapm->card->dapm_mutex);
+
+	ret = snd_soc_dapm_disable_pin(dapm, widget);
+	if (ret != 0)
+		dev_warn(arizona->dev,
+			 "Failed to disable %s: %d\n",
+			 widget, ret);
+
+	mutex_unlock(&dapm->card->dapm_mutex);
+
+	snd_soc_dapm_sync(dapm);
+
 	if (info->micd_reva) {
 		regmap_write(arizona->regmap, 0x80, 0x3);
 		regmap_write(arizona->regmap, 0x294, 2);
 		regmap_write(arizona->regmap, 0x80, 0x0);
 	}
 
+	ret = regulator_allow_bypass(info->micvdd, true);
+	if (ret != 0) {
+		dev_err(arizona->dev, "Failed to bypass MICVDD: %d\n",
+			ret);
+	}
+
 	if (change) {
 		regulator_disable(info->micvdd);
 		pm_runtime_mark_last_busy(info->dev);
@@ -564,6 +644,8 @@ static void arizona_start_hpdet_acc_id(struct arizona_extcon_info *info)
 
 	info->hpdet_active = true;
 
+	arizona_extcon_pulse_micbias(info);
+
 	ret = regmap_update_bits(arizona->regmap, 0x225, 0x4000, 0x4000);
 	if (ret != 0)
 		dev_warn(arizona->dev, "Failed to do magic: %d\n", ret);
@@ -649,6 +731,13 @@ static irqreturn_t arizona_micdet(int irq, void *data)
 			dev_err(arizona->dev, "Headset report failed: %d\n",
 				ret);
 
+		/* Don't need to regulate for button detection */
+		ret = regulator_allow_bypass(info->micvdd, false);
+		if (ret != 0) {
+			dev_err(arizona->dev, "Failed to bypass MICVDD: %d\n",
+				ret);
+		}
+
 		info->mic = true;
 		info->detecting = false;
 		goto handled;
@@ -716,6 +805,7 @@ static irqreturn_t arizona_micdet(int irq, void *data)
 			input_report_key(info->input,
 					 arizona_lvl_to_key[i].report, 0);
 		input_sync(info->input);
+		arizona_extcon_pulse_micbias(info);
 	}
 
 handled:
@@ -817,6 +907,9 @@ static int arizona_extcon_probe(struct platform_device *pdev)
 	int jack_irq_fall, jack_irq_rise;
 	int ret, mode, i;
 
+	if (!arizona->dapm || !arizona->dapm->card)
+		return -EPROBE_DEFER;
+
 	pdata = dev_get_platdata(arizona->dev);
 
 	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 2f5f08e10b79..f8241753415c 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -117,6 +117,9 @@ struct arizona_pdata {
 	/** Mic detect debounce level */
 	int micd_dbtime;
 
+	/** Force MICBIAS on for mic detect */
+	bool micd_force_micbias;
+
 	/** Headset polarity configurations */
 	struct arizona_micd_config *micd_configs;
 	int num_micd_configs;
-- 
cgit v1.2.3


From cf4aebc292fac7f34f8345664320e9d4a42ca76c Mon Sep 17 00:00:00 2001
From: Clark Williams <williams@redhat.com>
Date: Thu, 7 Feb 2013 09:46:59 -0600
Subject: sched: Move sched.h sysctl bits into separate header

Move the sysctl-related bits from include/linux/sched.h into
a new file: include/linux/sched/sysctl.h. Then update source
files requiring access to those bits by including the new
header file.

Signed-off-by: Clark Williams <williams@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20130207094659.06dced96@riff.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 block/blk-exec.c             |  1 +
 include/linux/sched.h        | 91 -----------------------------------------
 include/linux/sched/sysctl.h | 97 ++++++++++++++++++++++++++++++++++++++++++++
 init/init_task.c             |  1 +
 kernel/hrtimer.c             |  1 +
 kernel/sched/sched.h         |  1 +
 kernel/sysctl.c              |  1 +
 kernel/timer.c               |  1 +
 mm/mmap.c                    |  1 +
 mm/mremap.c                  |  1 +
 mm/nommu.c                   |  1 +
 11 files changed, 106 insertions(+), 91 deletions(-)
 create mode 100644 include/linux/sched/sysctl.h

(limited to 'include/linux')

diff --git a/block/blk-exec.c b/block/blk-exec.c
index 74638ec234c8..c88202f973d9 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/sched/sysctl.h>
 
 #include "blk.h"
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 719ee0815e3a..8fc9b2710a80 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -304,19 +304,6 @@ static inline void lockup_detector_init(void)
 }
 #endif
 
-#ifdef CONFIG_DETECT_HUNG_TASK
-extern unsigned int  sysctl_hung_task_panic;
-extern unsigned long sysctl_hung_task_check_count;
-extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-					 void __user *buffer,
-					 size_t *lenp, loff_t *ppos);
-#else
-/* Avoid need for ifdefs elsewhere in the code */
-enum { sysctl_hung_task_timeout_secs = 0 };
-#endif
-
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 
@@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
 struct nsproxy;
 struct user_namespace;
 
-/*
- * Default maximum number of active map areas, this limits the number of vmas
- * per mm struct. Users can overwrite this number by sysctl but there is a
- * problem.
- *
- * When a program's coredump is generated as ELF format, a section is created
- * per a vma. In ELF, the number of sections is represented in unsigned short.
- * This means the number of sections should be smaller than 65535 at coredump.
- * Because the kernel adds some informative sections to a image of program at
- * generating coredump, we need some margin. The number of extra sections is
- * 1-3 now and depends on arch. We use "5" as safe margin, here.
- */
-#define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-
-extern int sysctl_max_map_count;
-
 #include <linux/aio.h>
 
 #ifdef CONFIG_MMU
@@ -1221,12 +1191,6 @@ struct sched_rt_entity {
 #endif
 };
 
-/*
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define RR_TIMESLICE		(100 * HZ / 1000)
-
 struct rcu_node;
 
 enum perf_event_task_context {
@@ -2074,58 +2038,7 @@ extern void wake_up_idle_cpu(int cpu);
 static inline void wake_up_idle_cpu(int cpu) { }
 #endif
 
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_child_runs_first;
-
-enum sched_tunable_scaling {
-	SCHED_TUNABLESCALING_NONE,
-	SCHED_TUNABLESCALING_LOG,
-	SCHED_TUNABLESCALING_LINEAR,
-	SCHED_TUNABLESCALING_END,
-};
-extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
-
-extern unsigned int sysctl_numa_balancing_scan_delay;
-extern unsigned int sysctl_numa_balancing_scan_period_min;
-extern unsigned int sysctl_numa_balancing_scan_period_max;
-extern unsigned int sysctl_numa_balancing_scan_period_reset;
-extern unsigned int sysctl_numa_balancing_scan_size;
-extern unsigned int sysctl_numa_balancing_settle_count;
-
-#ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_migration_cost;
-extern unsigned int sysctl_sched_nr_migrate;
-extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_timer_migration;
-extern unsigned int sysctl_sched_shares_window;
-
-int sched_proc_update_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *length,
-		loff_t *ppos);
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return sysctl_timer_migration;
-}
-#else
-static inline unsigned int get_sysctl_timer_migration(void)
-{
-	return 1;
-}
-#endif
-extern unsigned int sysctl_sched_rt_period;
-extern int sysctl_sched_rt_runtime;
-
-int sched_rt_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-
 #ifdef CONFIG_SCHED_AUTOGROUP
-extern unsigned int sysctl_sched_autogroup_enabled;
-
 extern void sched_autogroup_create_attach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_fork(struct signal_struct *sig);
@@ -2141,10 +2054,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
-#ifdef CONFIG_CFS_BANDWIDTH
-extern unsigned int sysctl_sched_cfs_bandwidth_slice;
-#endif
-
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
new file mode 100644
index 000000000000..bac914e458ca
--- /dev/null
+++ b/include/linux/sched/sysctl.h
@@ -0,0 +1,97 @@
+#ifndef _SCHED_SYSCTL_H
+#define _SCHED_SYSCTL_H
+
+#ifdef CONFIG_DETECT_HUNG_TASK
+extern unsigned int  sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
+extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+					 void __user *buffer,
+					 size_t *lenp, loff_t *ppos);
+#else
+/* Avoid need for ifdefs elsewhere in the code */
+enum { sysctl_hung_task_timeout_secs = 0 };
+#endif
+
+/*
+ * Default maximum number of active map areas, this limits the number of vmas
+ * per mm struct. Users can overwrite this number by sysctl but there is a
+ * problem.
+ *
+ * When a program's coredump is generated as ELF format, a section is created
+ * per a vma. In ELF, the number of sections is represented in unsigned short.
+ * This means the number of sections should be smaller than 65535 at coredump.
+ * Because the kernel adds some informative sections to a image of program at
+ * generating coredump, we need some margin. The number of extra sections is
+ * 1-3 now and depends on arch. We use "5" as safe margin, here.
+ */
+#define MAPCOUNT_ELF_CORE_MARGIN	(5)
+#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+
+extern int sysctl_max_map_count;
+
+extern unsigned int sysctl_sched_latency;
+extern unsigned int sysctl_sched_min_granularity;
+extern unsigned int sysctl_sched_wakeup_granularity;
+extern unsigned int sysctl_sched_child_runs_first;
+
+enum sched_tunable_scaling {
+	SCHED_TUNABLESCALING_NONE,
+	SCHED_TUNABLESCALING_LOG,
+	SCHED_TUNABLESCALING_LINEAR,
+	SCHED_TUNABLESCALING_END,
+};
+extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
+
+extern unsigned int sysctl_numa_balancing_scan_delay;
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
+extern unsigned int sysctl_numa_balancing_scan_size;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
+#ifdef CONFIG_SCHED_DEBUG
+extern unsigned int sysctl_sched_migration_cost;
+extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_time_avg;
+extern unsigned int sysctl_timer_migration;
+extern unsigned int sysctl_sched_shares_window;
+
+int sched_proc_update_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *length,
+		loff_t *ppos);
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+	return 1;
+}
+#endif
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
+
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+#endif
+
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE		(100 * HZ / 1000)
+
+int sched_rt_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+
+#endif /* _SCHED_SYSCTL_H */
diff --git a/init/init_task.c b/init/init_task.c
index 8b2f3996b035..a031ad14c950 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -2,6 +2,7 @@
 #include <linux/export.h>
 #include <linux/mqueue.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6db7a5ed52b5..8a9aa59d0d61 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -44,6 +44,7 @@
 #include <linux/err.h>
 #include <linux/debugobjects.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/timer.h>
 
 #include <asm/uaccess.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fc886441436a..ed8de30a040e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,5 +1,6 @@
 
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c88878db491e..7357e23aaf68 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -61,6 +61,7 @@
 #include <linux/kmod.h>
 #include <linux/capability.h>
 #include <linux/binfmts.h>
+#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
diff --git a/kernel/timer.c b/kernel/timer.c
index 367d00858482..3e13baf3f0ea 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -39,6 +39,7 @@
 #include <linux/kallsyms.h>
 #include <linux/irq_work.h>
 #include <linux/sched.h>
+#include <linux/sched/sysctl.h>
 #include <linux/slab.h>
 
 #include <asm/uaccess.h>
diff --git a/mm/mmap.c b/mm/mmap.c
index 35730ee9d515..5dee4a0bb49f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -32,6 +32,7 @@
 #include <linux/khugepaged.h>
 #include <linux/uprobes.h>
 #include <linux/rbtree_augmented.h>
+#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/mm/mremap.c b/mm/mremap.c
index e1031e1f6a61..f9766f460299 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -19,6 +19,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/mmu_notifier.h>
+#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/mm/nommu.c b/mm/nommu.c
index 79c3cac87afa..b20db4e22263 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -29,6 +29,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
+#include <linux/sched/sysctl.h>
 
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
-- 
cgit v1.2.3


From ce0dbbbb30aee6a835511d5be446462388ba9eee Mon Sep 17 00:00:00 2001
From: Clark Williams <williams@redhat.com>
Date: Thu, 7 Feb 2013 09:47:04 -0600
Subject: sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice

Add a /proc/sys/kernel scheduler knob named
sched_rr_timeslice_ms that allows global changing of the
SCHED_RR timeslice value. User visable value is in milliseconds
but is stored as jiffies.  Setting to 0 (zero) resets to the
default (currently 100ms).

Signed-off-by: Clark Williams <williams@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20130207094704.13751796@riff.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/sysctl.h | 15 ++++++++++++++-
 kernel/sched/core.c          | 19 +++++++++++++++++++
 kernel/sched/rt.c            |  6 ++++--
 kernel/sysctl.c              |  7 +++++++
 4 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index bac914e458ca..d2bb0ae979d0 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -73,6 +73,13 @@ static inline unsigned int get_sysctl_timer_migration(void)
 	return 1;
 }
 #endif
+
+/*
+ *  control realtime throttling:
+ *
+ *  /proc/sys/kernel/sched_rt_period_us
+ *  /proc/sys/kernel/sched_rt_runtime_us
+ */
 extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
@@ -90,7 +97,13 @@ extern unsigned int sysctl_sched_autogroup_enabled;
  */
 #define RR_TIMESLICE		(100 * HZ / 1000)
 
-int sched_rt_handler(struct ctl_table *table, int write,
+extern int sched_rr_timeslice;
+
+extern int sched_rr_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos);
+
+extern int sched_rt_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1dff78a9e2ab..4a88f1d51563 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7509,6 +7509,25 @@ static int sched_rt_global_constraints(void)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
+int sched_rr_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp,
+		loff_t *ppos)
+{
+	int ret;
+	static DEFINE_MUTEX(mutex);
+
+	mutex_lock(&mutex);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	/* make sure that internally we keep jiffies */
+	/* also, writing zero resets timeslice to default */
+	if (!ret && write) {
+		sched_rr_timeslice = sched_rr_timeslice <= 0 ?
+			RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
+	}
+	mutex_unlock(&mutex);
+	return ret;
+}
+
 int sched_rt_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c25de141c576..fb0f77e402b6 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,8 @@
 
 #include <linux/slab.h>
 
+int sched_rr_timeslice = RR_TIMESLICE;
+
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
 
 struct rt_bandwidth def_rt_bandwidth;
@@ -2016,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 	if (--p->rt.time_slice)
 		return;
 
-	p->rt.time_slice = RR_TIMESLICE;
+	p->rt.time_slice = sched_rr_timeslice;
 
 	/*
 	 * Requeue to the end of queue if we (and all of our ancestors) are the
@@ -2047,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
 	 * Time slice is 0 for SCHED_FIFO tasks
 	 */
 	if (task->policy == SCHED_RR)
-		return RR_TIMESLICE;
+		return sched_rr_timeslice;
 	else
 		return 0;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7357e23aaf68..4fc9be955c71 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -404,6 +404,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= sched_rt_handler,
 	},
+	{
+		.procname	= "sched_rr_timeslice_ms",
+		.data		= &sched_rr_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sched_rr_handler,
+	},
 #ifdef CONFIG_SCHED_AUTOGROUP
 	{
 		.procname	= "sched_autogroup_enabled",
-- 
cgit v1.2.3


From 8bd75c77b7c6a3954140dd2e20346aef3efe4a35 Mon Sep 17 00:00:00 2001
From: Clark Williams <williams@redhat.com>
Date: Thu, 7 Feb 2013 09:47:07 -0600
Subject: sched/rt: Move rt specific bits into new header file

Move rt scheduler definitions out of include/linux/sched.h into
new file include/linux/sched/rt.h

Signed-off-by: Clark Williams <williams@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20130207094707.7b9f825f@riff.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/spi/spi.c                 |  2 +-
 drivers/staging/csr/bh.c          |  2 +-
 drivers/staging/csr/unifi_sme.c   |  2 +-
 drivers/tty/sysrq.c               |  1 +
 fs/select.c                       |  1 +
 include/linux/sched.h             | 55 ++-----------------------------------
 include/linux/sched/rt.h          | 58 +++++++++++++++++++++++++++++++++++++++
 init/init_task.c                  |  1 +
 kernel/futex.c                    |  1 +
 kernel/hrtimer.c                  |  1 +
 kernel/irq/manage.c               |  1 +
 kernel/mutex.c                    |  1 +
 kernel/rtmutex-debug.c            |  1 +
 kernel/rtmutex-tester.c           |  1 +
 kernel/rtmutex.c                  |  1 +
 kernel/sched/cpupri.c             |  2 ++
 kernel/sched/sched.h              |  1 +
 kernel/trace/trace.c              |  1 +
 kernel/trace/trace_sched_wakeup.c |  2 +-
 kernel/watchdog.c                 |  1 +
 mm/page-writeback.c               |  1 +
 mm/page_alloc.c                   |  1 +
 22 files changed, 81 insertions(+), 57 deletions(-)
 create mode 100644 include/linux/sched/rt.h

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 19ee901577da..3a6083b386a1 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -33,7 +33,7 @@
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/ioport.h>
diff --git a/drivers/staging/csr/bh.c b/drivers/staging/csr/bh.c
index 1a1f5c79822a..7b133597e923 100644
--- a/drivers/staging/csr/bh.c
+++ b/drivers/staging/csr/bh.c
@@ -15,7 +15,7 @@
  */
 #include "csr_wifi_hip_unifi.h"
 #include "unifi_priv.h"
-
+#include <linux/sched/rt.h>
 
 /*
  * ---------------------------------------------------------------------------
diff --git a/drivers/staging/csr/unifi_sme.c b/drivers/staging/csr/unifi_sme.c
index 7c6c4138fc76..49395da34b7f 100644
--- a/drivers/staging/csr/unifi_sme.c
+++ b/drivers/staging/csr/unifi_sme.c
@@ -15,7 +15,7 @@
 #include "unifi_priv.h"
 #include "csr_wifi_hip_unifi.h"
 #include "csr_wifi_hip_conversions.h"
-
+#include <linux/sched/rt.h>
 
 
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index b3c4a250ff86..40e5b3919e27 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -15,6 +15,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
+#include <linux/sched/rt.h>
 
 #include <asm/uaccess.h>
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fc9b2710a80..33cc42130371 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1164,6 +1164,7 @@ struct sched_entity {
 	/* rq "owned" by this entity/group: */
 	struct cfs_rq		*my_q;
 #endif
+
 /*
  * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
  * removed when useful for applications beyond shares distribution (e.g.
@@ -1191,6 +1192,7 @@ struct sched_rt_entity {
 #endif
 };
 
+
 struct rcu_node;
 
 enum perf_event_task_context {
@@ -1596,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled)
 }
 #endif
 
-/*
- * Priority of a process goes from 0..MAX_PRIO-1, valid RT
- * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
- * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
- * values are inverted: lower p->prio value means higher priority.
- *
- * The MAX_USER_RT_PRIO value allows the actual maximum
- * RT priority to be separate from the value exported to
- * user-space.  This allows kernel threads to set their
- * priority to a value higher than any user task. Note:
- * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
- */
-
-#define MAX_USER_RT_PRIO	100
-#define MAX_RT_PRIO		MAX_USER_RT_PRIO
-
-#define MAX_PRIO		(MAX_RT_PRIO + 40)
-#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
-
-static inline int rt_prio(int prio)
-{
-	if (unlikely(prio < MAX_RT_PRIO))
-		return 1;
-	return 0;
-}
-
-static inline int rt_task(struct task_struct *p)
-{
-	return rt_prio(p->prio);
-}
-
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
@@ -2054,26 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
-#ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
-extern void rt_mutex_adjust_pi(struct task_struct *p);
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-	return tsk->pi_blocked_on != NULL;
-}
-#else
-static inline int rt_mutex_getprio(struct task_struct *p)
-{
-	return p->normal_prio;
-}
-# define rt_mutex_adjust_pi(p)		do { } while (0)
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
-{
-	return false;
-}
-#endif
-
 extern bool yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
@@ -2703,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-extern void normalize_rt_tasks(void);
-
 #ifdef CONFIG_CGROUP_SCHED
 
 extern struct task_group root_task_group;
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
new file mode 100644
index 000000000000..94e19ea28fc3
--- /dev/null
+++ b/include/linux/sched/rt.h
@@ -0,0 +1,58 @@
+#ifndef _SCHED_RT_H
+#define _SCHED_RT_H
+
+/*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_USER_RT_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space.  This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO	100
+#define MAX_RT_PRIO		MAX_USER_RT_PRIO
+
+#define MAX_PRIO		(MAX_RT_PRIO + 40)
+#define DEFAULT_PRIO		(MAX_RT_PRIO + 20)
+
+static inline int rt_prio(int prio)
+{
+	if (unlikely(prio < MAX_RT_PRIO))
+		return 1;
+	return 0;
+}
+
+static inline int rt_task(struct task_struct *p)
+{
+	return rt_prio(p->prio);
+}
+
+#ifdef CONFIG_RT_MUTEXES
+extern int rt_mutex_getprio(struct task_struct *p);
+extern void rt_mutex_setprio(struct task_struct *p, int prio);
+extern void rt_mutex_adjust_pi(struct task_struct *p);
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return tsk->pi_blocked_on != NULL;
+}
+#else
+static inline int rt_mutex_getprio(struct task_struct *p)
+{
+	return p->normal_prio;
+}
+# define rt_mutex_adjust_pi(p)		do { } while (0)
+static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
+{
+	return false;
+}
+#endif
+
+extern void normalize_rt_tasks(void);
+
+
+#endif /* _SCHED_RT_H */
diff --git a/init/init_task.c b/init/init_task.c
index a031ad14c950..ba0a7f362d9e 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -3,6 +3,7 @@
 #include <linux/mqueue.h>
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 19eb089ca003..9618b6e9fb36 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -60,6 +60,7 @@
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/ptrace.h>
+#include <linux/sched/rt.h>
 
 #include <asm/futex.h>
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8a9aa59d0d61..c5dde988c0ce 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -45,6 +45,7 @@
 #include <linux/debugobjects.h>
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/timer.h>
 
 #include <asm/uaccess.h>
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e49a288fa479..02115d9592ec 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/task_work.h>
 
 #include "internals.h"
diff --git a/kernel/mutex.c b/kernel/mutex.c
index a307cc9c9526..52f23011b6e0 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -19,6 +19,7 @@
  */
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 16502d3a71c8..13b243a323fa 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -17,6 +17,7 @@
  * See rt.c in preempt-rt for proper credits and further information
  */
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 98ec49475460..7890b10084a7 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -10,6 +10,7 @@
 #include <linux/kthread.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a242e691c993..1e09308bf2a1 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include <linux/timer.h>
 
 #include "rtmutex_common.h"
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 23aa789c53ee..1095e878a46f 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -28,6 +28,8 @@
  */
 
 #include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
 #include "cpupri.h"
 
 /* Convert between a 140 based task->prio, and our 102 based cpupri */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ed8de30a040e..cc03cfdf469f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,6 +1,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/rt.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3c13e46d7d24..4d2e4afd956e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -39,6 +39,7 @@
 #include <linux/poll.h>
 #include <linux/nmi.h>
 #include <linux/fs.h>
+#include <linux/sched/rt.h>
 
 #include "trace.h"
 #include "trace_output.h"
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 9fe45fcefca0..75aa97fbe1a1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,8 +15,8 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
+#include <linux/sched/rt.h>
 #include <trace/events/sched.h>
-
 #include "trace.h"
 
 static struct trace_array	*wakeup_trace;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 75a2ab3d0b02..27689422aa92 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/smpboot.h>
+#include <linux/sched/rt.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0713bfbf0954..66a0024becd9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -35,6 +35,7 @@
 #include <linux/buffer_head.h> /* __set_page_dirty_buffers */
 #include <linux/pagevec.h>
 #include <linux/timer.h>
+#include <linux/sched/rt.h>
 #include <trace/events/writeback.h>
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index df2022ff0c8a..42d18e46f286 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -58,6 +58,7 @@
 #include <linux/prefetch.h>
 #include <linux/migrate.h>
 #include <linux/page-debug-flags.h>
+#include <linux/sched/rt.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
-- 
cgit v1.2.3


From 3bc97a782cc8c112f64a25143452b06206364cc8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 29 Nov 2012 16:46:05 +0800
Subject: srcu: Remove checks preventing offline CPUs from calling
 srcu_read_lock()

SRCU has its own statemachine and no longer relies on normal RCU.
Its read-side critical section can now be used by an offline CPU, so this
commit removes the check and the comments, reverting the SRCU portion
of c0d6d01b (rcu: Check for illegal use of RCU from offlined CPUs).

It also makes the code match the comments in whatisRCU.txt:

g.	Do you need read-side critical sections that are respected
	even though they are in the middle of the idle loop, during
	user-mode execution, or on an offlined CPU?  If so, SRCU is the
	only choice that will work for you.

[ paulmck: There is at least one remaining issue, namely use of lockdep
	   with tracing enabled. ]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 6eb691b08358..ae3ee39eb699 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -163,9 +163,6 @@ void srcu_barrier(struct srcu_struct *sp);
  * power mode. This way we can notice an extended quiescent state to
  * other CPUs that started a grace period. Otherwise we would delay any
  * grace period as long as we run in the idle task.
- *
- * Similarly, we avoid claiming an SRCU read lock held if the current
- * CPU is offline.
  */
 static inline int srcu_read_lock_held(struct srcu_struct *sp)
 {
@@ -173,8 +170,6 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
 		return 1;
 	if (rcu_is_cpu_idle())
 		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
 	return lock_is_held(&sp->dep_map);
 }
 
-- 
cgit v1.2.3


From 511a0868bed6694512348fc177cdfaf3fd97d0bb Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 29 Nov 2012 16:46:06 +0800
Subject: srcu: Remove checks preventing idle CPUs from calling
 srcu_read_lock()

SRCU has its own statemachine and no longer relies on normal RCU.
Its read-side critical section can now be used by an offline CPU, so this
commit removes the check and the comments, reverting the SRCU portion
of ff195cb6 (rcu: Warn when srcu_read_lock() is used in an extended
quiescent state).

It also makes the codes match the comments in whatisRCU.txt:

g.	Do you need read-side critical sections that are respected
	even though they are in the middle of the idle loop, during
	user-mode execution, or on an offlined CPU?  If so, SRCU is the
	only choice that will work for you.

[ paulmck: There is at least one remaining issue, namely use of lockdep
	   with tracing enabled. ]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index ae3ee39eb699..04f4121a23ae 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -151,25 +151,14 @@ void srcu_barrier(struct srcu_struct *sp);
  * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
  *
- * Note that if the CPU is in the idle loop from an RCU point of view
- * (ie: that we are in the section between rcu_idle_enter() and
- * rcu_idle_exit()) then srcu_read_lock_held() returns false even if
- * the CPU did an srcu_read_lock().  The reason for this is that RCU
- * ignores CPUs that are in such a section, considering these as in
- * extended quiescent state, so such a CPU is effectively never in an
- * RCU read-side critical section regardless of what RCU primitives it
- * invokes.  This state of affairs is required --- we need to keep an
- * RCU-free window in idle where the CPU may possibly enter into low
- * power mode. This way we can notice an extended quiescent state to
- * other CPUs that started a grace period. Otherwise we would delay any
- * grace period as long as we run in the idle task.
+ * Note that SRCU is based on its own statemachine and it doesn't
+ * relies on normal RCU, it can be called from the CPU which
+ * is in the idle loop from an RCU point of view or offline.
  */
 static inline int srcu_read_lock_held(struct srcu_struct *sp)
 {
 	if (!debug_lockdep_rcu_enabled())
 		return 1;
-	if (rcu_is_cpu_idle())
-		return 0;
 	return lock_is_held(&sp->dep_map);
 }
 
@@ -231,8 +220,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	int retval = __srcu_read_lock(sp);
 
 	rcu_lock_acquire(&(sp)->dep_map);
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
-			   "srcu_read_lock() used illegally while idle");
 	return retval;
 }
 
@@ -246,8 +233,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__releases(sp)
 {
-	rcu_lockdep_assert(!rcu_is_cpu_idle(),
-			   "srcu_read_unlock() used illegally while idle");
 	rcu_lock_release(&(sp)->dep_map);
 	__srcu_read_unlock(sp, idx);
 }
-- 
cgit v1.2.3


From 50e244cc793d511b86adea24972f3a7264cae114 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Fri, 25 Jan 2013 10:28:15 +1000
Subject: fb: rework locking to fix lock ordering on takeover

Adjust the console layer to allow a take over call where the caller
already holds the locks.  Make the fb layer lock in order.

This is partly a band aid, the fb layer is terminally confused about the
locking rules it uses for its notifiers it seems.

[akpm@linux-foundation.org: remove stray non-ascii char, tidy comment]
[akpm@linux-foundation.org: export do_take_over_console()]
[airlied: cleanup another non-ascii char]
Signed-off-by: Alan Cox <alan@linux.intel.com>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: stable <stable@vger.kernel.org>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/tty/vt/vt.c           | 93 ++++++++++++++++++++++++++++++++-----------
 drivers/video/console/fbcon.c | 29 +++++++++++++-
 drivers/video/fbmem.c         |  5 +--
 drivers/video/fbsysfs.c       |  3 ++
 include/linux/console.h       |  1 +
 5 files changed, 104 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 8fd89687d068..c076af0b300b 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -2987,7 +2987,7 @@ int __init vty_init(const struct file_operations *console_fops)
 
 static struct class *vtconsole_class;
 
-static int bind_con_driver(const struct consw *csw, int first, int last,
+static int do_bind_con_driver(const struct consw *csw, int first, int last,
 			   int deflt)
 {
 	struct module *owner = csw->owner;
@@ -2998,7 +2998,7 @@ static int bind_con_driver(const struct consw *csw, int first, int last,
 	if (!try_module_get(owner))
 		return -ENODEV;
 
-	console_lock();
+	WARN_CONSOLE_UNLOCKED();
 
 	/* check if driver is registered */
 	for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
@@ -3083,11 +3083,22 @@ static int bind_con_driver(const struct consw *csw, int first, int last,
 
 	retval = 0;
 err:
-	console_unlock();
 	module_put(owner);
 	return retval;
 };
 
+
+static int bind_con_driver(const struct consw *csw, int first, int last,
+			   int deflt)
+{
+	int ret;
+
+	console_lock();
+	ret = do_bind_con_driver(csw, first, last, deflt);
+	console_unlock();
+	return ret;
+}
+
 #ifdef CONFIG_VT_HW_CONSOLE_BINDING
 static int con_is_graphics(const struct consw *csw, int first, int last)
 {
@@ -3199,9 +3210,9 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
 	if (!con_is_bound(csw))
 		con_driver->flag &= ~CON_DRIVER_FLAG_INIT;
 
-	console_unlock();
 	/* ignore return value, binding should not fail */
-	bind_con_driver(defcsw, first, last, deflt);
+	do_bind_con_driver(defcsw, first, last, deflt);
+	console_unlock();
 err:
 	module_put(owner);
 	return retval;
@@ -3492,28 +3503,18 @@ int con_debug_leave(void)
 }
 EXPORT_SYMBOL_GPL(con_debug_leave);
 
-/**
- * register_con_driver - register console driver to console layer
- * @csw: console driver
- * @first: the first console to take over, minimum value is 0
- * @last: the last console to take over, maximum value is MAX_NR_CONSOLES -1
- *
- * DESCRIPTION: This function registers a console driver which can later
- * bind to a range of consoles specified by @first and @last. It will
- * also initialize the console driver by calling con_startup().
- */
-int register_con_driver(const struct consw *csw, int first, int last)
+static int do_register_con_driver(const struct consw *csw, int first, int last)
 {
 	struct module *owner = csw->owner;
 	struct con_driver *con_driver;
 	const char *desc;
 	int i, retval = 0;
 
+	WARN_CONSOLE_UNLOCKED();
+
 	if (!try_module_get(owner))
 		return -ENODEV;
 
-	console_lock();
-
 	for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
 		con_driver = &registered_con_driver[i];
 
@@ -3566,10 +3567,29 @@ int register_con_driver(const struct consw *csw, int first, int last)
 	}
 
 err:
-	console_unlock();
 	module_put(owner);
 	return retval;
 }
+
+/**
+ * register_con_driver - register console driver to console layer
+ * @csw: console driver
+ * @first: the first console to take over, minimum value is 0
+ * @last: the last console to take over, maximum value is MAX_NR_CONSOLES -1
+ *
+ * DESCRIPTION: This function registers a console driver which can later
+ * bind to a range of consoles specified by @first and @last. It will
+ * also initialize the console driver by calling con_startup().
+ */
+int register_con_driver(const struct consw *csw, int first, int last)
+{
+	int retval;
+
+	console_lock();
+	retval = do_register_con_driver(csw, first, last);
+	console_unlock();
+	return retval;
+}
 EXPORT_SYMBOL(register_con_driver);
 
 /**
@@ -3623,17 +3643,44 @@ EXPORT_SYMBOL(unregister_con_driver);
  *	when a driver wants to take over some existing consoles
  *	and become default driver for newly opened ones.
  *
- *      take_over_console is basically a register followed by unbind
+ *	take_over_console is basically a register followed by unbind
+ */
+int do_take_over_console(const struct consw *csw, int first, int last, int deflt)
+{
+	int err;
+
+	err = do_register_con_driver(csw, first, last);
+	/*
+	 * If we get an busy error we still want to bind the console driver
+	 * and return success, as we may have unbound the console driver
+	 * but not unregistered it.
+	 */
+	if (err == -EBUSY)
+		err = 0;
+	if (!err)
+		do_bind_con_driver(csw, first, last, deflt);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(do_take_over_console);
+
+/*
+ *	If we support more console drivers, this function is used
+ *	when a driver wants to take over some existing consoles
+ *	and become default driver for newly opened ones.
+ *
+ *	take_over_console is basically a register followed by unbind
  */
 int take_over_console(const struct consw *csw, int first, int last, int deflt)
 {
 	int err;
 
 	err = register_con_driver(csw, first, last);
-	/* if we get an busy error we still want to bind the console driver
+	/*
+	 * If we get an busy error we still want to bind the console driver
 	 * and return success, as we may have unbound the console driver
-	 * but not unregistered it.
-	*/
+	 * but not unregistered it.
+	 */
 	if (err == -EBUSY)
 		err = 0;
 	if (!err)
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index fdefa8fd72c4..4bd7820cf4d0 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -529,6 +529,33 @@ static int search_for_mapped_con(void)
 	return retval;
 }
 
+static int do_fbcon_takeover(int show_logo)
+{
+	int err, i;
+
+	if (!num_registered_fb)
+		return -ENODEV;
+
+	if (!show_logo)
+		logo_shown = FBCON_LOGO_DONTSHOW;
+
+	for (i = first_fb_vc; i <= last_fb_vc; i++)
+		con2fb_map[i] = info_idx;
+
+	err = do_take_over_console(&fb_con, first_fb_vc, last_fb_vc,
+				fbcon_is_default);
+
+	if (err) {
+		for (i = first_fb_vc; i <= last_fb_vc; i++)
+			con2fb_map[i] = -1;
+		info_idx = -1;
+	} else {
+		fbcon_has_console_bind = 1;
+	}
+
+	return err;
+}
+
 static int fbcon_takeover(int show_logo)
 {
 	int err, i;
@@ -3115,7 +3142,7 @@ static int fbcon_fb_registered(struct fb_info *info)
 		}
 
 		if (info_idx != -1)
-			ret = fbcon_takeover(1);
+			ret = do_fbcon_takeover(1);
 	} else {
 		for (i = first_fb_vc; i <= last_fb_vc; i++) {
 			if (con2fb_map_boot[i] == idx)
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 3ff0105a496a..d8d9831b3fdb 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1650,7 +1650,9 @@ static int do_register_framebuffer(struct fb_info *fb_info)
 	event.info = fb_info;
 	if (!lock_fb_info(fb_info))
 		return -ENODEV;
+	console_lock();
 	fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event);
+	console_unlock();
 	unlock_fb_info(fb_info);
 	return 0;
 }
@@ -1853,11 +1855,8 @@ int fb_new_modelist(struct fb_info *info)
 	err = 1;
 
 	if (!list_empty(&info->modelist)) {
-		if (!lock_fb_info(info))
-			return -ENODEV;
 		event.info = info;
 		err = fb_notifier_call_chain(FB_EVENT_NEW_MODELIST, &event);
-		unlock_fb_info(info);
 	}
 
 	return err;
diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c
index a55e3669d135..ef476b02fbe5 100644
--- a/drivers/video/fbsysfs.c
+++ b/drivers/video/fbsysfs.c
@@ -177,6 +177,8 @@ static ssize_t store_modes(struct device *device,
 	if (i * sizeof(struct fb_videomode) != count)
 		return -EINVAL;
 
+	if (!lock_fb_info(fb_info))
+		return -ENODEV;
 	console_lock();
 	list_splice(&fb_info->modelist, &old_list);
 	fb_videomode_to_modelist((const struct fb_videomode *)buf, i,
@@ -188,6 +190,7 @@ static ssize_t store_modes(struct device *device,
 		fb_destroy_modelist(&old_list);
 
 	console_unlock();
+	unlock_fb_info(fb_info);
 
 	return 0;
 }
diff --git a/include/linux/console.h b/include/linux/console.h
index dedb082fe50f..4ef4307dfb82 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -78,6 +78,7 @@ int con_is_bound(const struct consw *csw);
 int register_con_driver(const struct consw *csw, int first, int last);
 int unregister_con_driver(const struct consw *csw);
 int take_over_console(const struct consw *sw, int first, int last, int deflt);
+int do_take_over_console(const struct consw *sw, int first, int last, int deflt);
 void give_up_console(const struct consw *sw);
 #ifdef CONFIG_HW_CONSOLE
 int con_debug_enter(struct vc_data *vc);
-- 
cgit v1.2.3


From e93a9a868792ad71cdd09d75e5a02d8067473c4e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 25 Jan 2013 10:28:18 +1000
Subject: fb: Yet another band-aid for fixing lockdep mess

I've still got lockdep warnings even after Alan's patch, and it seems that
yet more band aids are required to paper over similar paths for
unbind_con_driver() and unregister_con_driver().  After this hack, lockdep
warnings are finally gone.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Cc: Alan Cox <alan@linux.intel.com>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: stable <stable@vger.kernel.org>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/tty/vt/vt.c           | 43 ++++++++++++++++++++++++++++---------------
 drivers/video/console/fbcon.c |  4 ++--
 drivers/video/fbmem.c         |  4 ++++
 include/linux/console.h       |  1 +
 include/linux/vt_kern.h       |  2 ++
 5 files changed, 37 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index c076af0b300b..457c41fe7eb9 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -3134,6 +3134,18 @@ static int con_is_graphics(const struct consw *csw, int first, int last)
  * or 0 on success.
  */
 int unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
+{
+	int retval;
+
+	console_lock();
+	retval = do_unbind_con_driver(csw, first, last, deflt);
+	console_unlock();
+	return retval;
+}
+EXPORT_SYMBOL(unbind_con_driver);
+
+/* unlocked version of unbind_con_driver() */
+int do_unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
 {
 	struct module *owner = csw->owner;
 	const struct consw *defcsw = NULL;
@@ -3143,7 +3155,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
 	if (!try_module_get(owner))
 		return -ENODEV;
 
-	console_lock();
+	WARN_CONSOLE_UNLOCKED();
 
 	/* check if driver is registered and if it is unbindable */
 	for (i = 0; i < MAX_NR_CON_DRIVER; i++) {
@@ -3156,10 +3168,8 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
 		}
 	}
 
-	if (retval) {
-		console_unlock();
+	if (retval)
 		goto err;
-	}
 
 	retval = -ENODEV;
 
@@ -3175,15 +3185,11 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
 		}
 	}
 
-	if (retval) {
-		console_unlock();
+	if (retval)
 		goto err;
-	}
 
-	if (!con_is_bound(csw)) {
-		console_unlock();
+	if (!con_is_bound(csw))
 		goto err;
-	}
 
 	first = max(first, con_driver->first);
 	last = min(last, con_driver->last);
@@ -3212,13 +3218,12 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt)
 
 	/* ignore return value, binding should not fail */
 	do_bind_con_driver(defcsw, first, last, deflt);
-	console_unlock();
 err:
 	module_put(owner);
 	return retval;
 
 }
-EXPORT_SYMBOL(unbind_con_driver);
+EXPORT_SYMBOL_GPL(do_unbind_con_driver);
 
 static int vt_bind(struct con_driver *con)
 {
@@ -3605,9 +3610,18 @@ EXPORT_SYMBOL(register_con_driver);
  */
 int unregister_con_driver(const struct consw *csw)
 {
-	int i, retval = -ENODEV;
+	int retval;
 
 	console_lock();
+	retval = do_unregister_con_driver(csw);
+	console_unlock();
+	return retval;
+}
+EXPORT_SYMBOL(unregister_con_driver);
+
+int do_unregister_con_driver(const struct consw *csw)
+{
+	int i, retval = -ENODEV;
 
 	/* cannot unregister a bound driver */
 	if (con_is_bound(csw))
@@ -3633,10 +3647,9 @@ int unregister_con_driver(const struct consw *csw)
 		}
 	}
 err:
-	console_unlock();
 	return retval;
 }
-EXPORT_SYMBOL(unregister_con_driver);
+EXPORT_SYMBOL_GPL(do_unregister_con_driver);
 
 /*
  *	If we support more console drivers, this function is used
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 4bd7820cf4d0..2aef9cac4d18 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -3004,7 +3004,7 @@ static int fbcon_unbind(void)
 {
 	int ret;
 
-	ret = unbind_con_driver(&fb_con, first_fb_vc, last_fb_vc,
+	ret = do_unbind_con_driver(&fb_con, first_fb_vc, last_fb_vc,
 				fbcon_is_default);
 
 	if (!ret)
@@ -3077,7 +3077,7 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 		primary_device = -1;
 
 	if (!num_registered_fb)
-		unregister_con_driver(&fb_con);
+		do_unregister_con_driver(&fb_con);
 
 	return 0;
 }
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index d8d9831b3fdb..070b9a13d892 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1668,8 +1668,10 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
 
 	if (!lock_fb_info(fb_info))
 		return -ENODEV;
+	console_lock();
 	event.info = fb_info;
 	ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event);
+	console_unlock();
 	unlock_fb_info(fb_info);
 
 	if (ret)
@@ -1684,7 +1686,9 @@ static int do_unregister_framebuffer(struct fb_info *fb_info)
 	num_registered_fb--;
 	fb_cleanup_device(fb_info);
 	event.info = fb_info;
+	console_lock();
 	fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event);
+	console_unlock();
 
 	/* this may free fb info */
 	put_fb_info(fb_info);
diff --git a/include/linux/console.h b/include/linux/console.h
index 4ef4307dfb82..47b858cffc47 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -77,6 +77,7 @@ extern const struct consw prom_con;	/* SPARC PROM console */
 int con_is_bound(const struct consw *csw);
 int register_con_driver(const struct consw *csw, int first, int last);
 int unregister_con_driver(const struct consw *csw);
+int do_unregister_con_driver(const struct consw *csw);
 int take_over_console(const struct consw *sw, int first, int last, int deflt);
 int do_take_over_console(const struct consw *sw, int first, int last, int deflt);
 void give_up_console(const struct consw *sw);
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 50ae7d0c279e..dbbc6bfee71f 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -130,6 +130,8 @@ void vt_event_post(unsigned int event, unsigned int old, unsigned int new);
 int vt_waitactive(int n);
 void change_console(struct vc_data *new_vc);
 void reset_vc(struct vc_data *vc);
+extern int do_unbind_con_driver(const struct consw *csw, int first, int last,
+			     int deflt);
 extern int unbind_con_driver(const struct consw *csw, int first, int last,
 			     int deflt);
 int vty_init(const struct file_operations *console_fops);
-- 
cgit v1.2.3


From 16a10ffd20a13215243bdba64c8e57ef277a55b9 Mon Sep 17 00:00:00 2001
From: Yan Burman <yanb@mellanox.com>
Date: Thu, 7 Feb 2013 02:25:22 +0000
Subject: net/mlx4: Move Ethernet related functionality from mlx4_core to
 mlx4_en

Move low level code that deals with management of Ethernet MACs and QPs from mlx4_core to mlx4_en.
Also convert the new functions to deal with MACs in form of char array instead of u64.

Actual functions moved:
mlx4_replace_mac
mlx4_get_eth_qp
mlx4_put_eth_qp

To conduct this change, some functionality had to be exported from the core,
the following functions were added:
mlx4_get_base_qp
__mlx4_replace_mac (low level function for CX1/A0 compatibility)

Signed-off-by: Yan Burman <yanb@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 216 +++++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx4/main.c      |   5 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h      |   7 -
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |   6 +
 drivers/net/ethernet/mellanox/mlx4/port.c      | 193 +---------------------
 include/linux/mlx4/device.h                    |   5 +-
 6 files changed, 224 insertions(+), 208 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0843dd793aa7..63a1ef348387 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -420,6 +420,206 @@ static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac)
 	memset(&dst_mac[ETH_ALEN], 0, 2);
 }
 
+static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv,
+				unsigned char *mac, int *qpn, u64 *reg_id)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+	int err;
+
+	switch (dev->caps.steering_mode) {
+	case MLX4_STEERING_MODE_B0: {
+		struct mlx4_qp qp;
+		u8 gid[16] = {0};
+
+		qp.qpn = *qpn;
+		memcpy(&gid[10], mac, ETH_ALEN);
+		gid[5] = priv->port;
+
+		err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
+		break;
+	}
+	case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+		struct mlx4_spec_list spec_eth = { {NULL} };
+		__be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+		struct mlx4_net_trans_rule rule = {
+			.queue_mode = MLX4_NET_TRANS_Q_FIFO,
+			.exclusive = 0,
+			.allow_loopback = 1,
+			.promisc_mode = MLX4_FS_PROMISC_NONE,
+			.priority = MLX4_DOMAIN_NIC,
+		};
+
+		rule.port = priv->port;
+		rule.qpn = *qpn;
+		INIT_LIST_HEAD(&rule.list);
+
+		spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH;
+		memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN);
+		memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+		list_add_tail(&spec_eth.list, &rule.list);
+
+		err = mlx4_flow_attach(dev, &rule, reg_id);
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+	if (err)
+		en_warn(priv, "Failed Attaching Unicast\n");
+
+	return err;
+}
+
+static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv,
+				     unsigned char *mac, int qpn, u64 reg_id)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+
+	switch (dev->caps.steering_mode) {
+	case MLX4_STEERING_MODE_B0: {
+		struct mlx4_qp qp;
+		u8 gid[16] = {0};
+
+		qp.qpn = qpn;
+		memcpy(&gid[10], mac, ETH_ALEN);
+		gid[5] = priv->port;
+
+		mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
+		break;
+	}
+	case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+		mlx4_flow_detach(dev, reg_id);
+		break;
+	}
+	default:
+		en_err(priv, "Invalid steering mode.\n");
+	}
+}
+
+static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+	struct mlx4_mac_entry *entry;
+	int index = 0;
+	int err = 0;
+	u64 reg_id;
+	int *qpn = &priv->base_qpn;
+	u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr);
+
+	en_dbg(DRV, priv, "Registering MAC: %pM for adding\n",
+	       priv->dev->dev_addr);
+	index = mlx4_register_mac(dev, priv->port, mac);
+	if (index < 0) {
+		err = index;
+		en_err(priv, "Failed adding MAC: %pM\n",
+		       priv->dev->dev_addr);
+		return err;
+	}
+
+	if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
+		int base_qpn = mlx4_get_base_qpn(dev, priv->port);
+		*qpn = base_qpn + index;
+		return 0;
+	}
+
+	err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
+	en_dbg(DRV, priv, "Reserved qp %d\n", *qpn);
+	if (err) {
+		en_err(priv, "Failed to reserve qp for mac registration\n");
+		goto qp_err;
+	}
+
+	err = mlx4_en_uc_steer_add(priv, priv->dev->dev_addr, qpn, &reg_id);
+	if (err)
+		goto steer_err;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		err = -ENOMEM;
+		goto alloc_err;
+	}
+	memcpy(entry->mac, priv->dev->dev_addr, sizeof(entry->mac));
+	entry->reg_id = reg_id;
+
+	err = radix_tree_insert(&priv->mac_tree, *qpn, entry);
+	if (err)
+		goto insert_err;
+	return 0;
+
+insert_err:
+	kfree(entry);
+
+alloc_err:
+	mlx4_en_uc_steer_release(priv, priv->dev->dev_addr, *qpn, reg_id);
+
+steer_err:
+	mlx4_qp_release_range(dev, *qpn, 1);
+
+qp_err:
+	mlx4_unregister_mac(dev, priv->port, mac);
+	return err;
+}
+
+static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+	struct mlx4_mac_entry *entry;
+	int qpn = priv->base_qpn;
+	u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr);
+
+	en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
+	       priv->dev->dev_addr);
+	mlx4_unregister_mac(dev, priv->port, mac);
+
+	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
+		entry = radix_tree_lookup(&priv->mac_tree, qpn);
+		if (entry) {
+			en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n",
+			       priv->port, entry->mac, qpn);
+			mlx4_en_uc_steer_release(priv, entry->mac,
+						 qpn, entry->reg_id);
+			mlx4_qp_release_range(dev, qpn, 1);
+			radix_tree_delete(&priv->mac_tree, qpn);
+			kfree(entry);
+		}
+	}
+}
+
+static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn,
+			       unsigned char *new_mac)
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_dev *dev = mdev->dev;
+	struct mlx4_mac_entry *entry;
+	int err = 0;
+	u64 new_mac_u64 = mlx4_en_mac_to_u64(new_mac);
+
+	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
+		u64 prev_mac_u64;
+
+		entry = radix_tree_lookup(&priv->mac_tree, qpn);
+		if (!entry)
+			return -EINVAL;
+		prev_mac_u64 = mlx4_en_mac_to_u64(entry->mac);
+		mlx4_en_uc_steer_release(priv, entry->mac,
+					 qpn, entry->reg_id);
+		mlx4_unregister_mac(dev, priv->port, prev_mac_u64);
+		memcpy(entry->mac, new_mac, ETH_ALEN);
+		entry->reg_id = 0;
+		mlx4_register_mac(dev, priv->port, new_mac_u64);
+		err = mlx4_en_uc_steer_add(priv, new_mac,
+					   &qpn, &entry->reg_id);
+		return err;
+	}
+
+	return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64);
+}
+
 u64 mlx4_en_mac_to_u64(u8 *addr)
 {
 	u64 mac = 0;
@@ -456,9 +656,8 @@ static void mlx4_en_do_set_mac(struct work_struct *work)
 	mutex_lock(&mdev->state_lock);
 	if (priv->port_up) {
 		/* Remove old MAC and insert the new one */
-		u64 mac = mlx4_en_mac_to_u64(priv->dev->dev_addr);
-		err = mlx4_replace_mac(mdev->dev, priv->port,
-				       priv->base_qpn, mac);
+		err = mlx4_en_replace_mac(priv, priv->base_qpn,
+					  priv->dev->dev_addr);
 		if (err)
 			en_err(priv, "Failed changing HW MAC address\n");
 		memcpy(priv->prev_mac, priv->dev->dev_addr,
@@ -1035,7 +1234,6 @@ int mlx4_en_start_port(struct net_device *dev)
 	int i;
 	int j;
 	u8 mc_list[16] = {0};
-	u64 mac = mlx4_en_mac_to_u64(dev->dev_addr);
 
 	if (priv->port_up) {
 		en_dbg(DRV, priv, "start port called while port already up\n");
@@ -1082,8 +1280,7 @@ int mlx4_en_start_port(struct net_device *dev)
 
 	/* Set qp number */
 	en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port);
-	err = mlx4_get_eth_qp(mdev->dev, priv->port,
-			      mac, &priv->base_qpn);
+	err = mlx4_en_get_qp(priv);
 	if (err) {
 		en_err(priv, "Failed getting eth qp\n");
 		goto cq_err;
@@ -1196,7 +1393,7 @@ tx_err:
 rss_err:
 	mlx4_en_release_rss_steer(priv);
 mac_err:
-	mlx4_put_eth_qp(mdev->dev, priv->port, mac, priv->base_qpn);
+	mlx4_en_put_qp(priv);
 cq_err:
 	while (rx_index--)
 		mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
@@ -1215,7 +1412,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 	struct ethtool_flow_id *flow, *tmp_flow;
 	int i;
 	u8 mc_list[16] = {0};
-	u64 mac = mlx4_en_mac_to_u64(dev->dev_addr);
 
 	if (!priv->port_up) {
 		en_dbg(DRV, priv, "stop port called while port already down\n");
@@ -1296,7 +1492,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 	mlx4_en_release_rss_steer(priv);
 
 	/* Unregister Mac address for the port */
-	mlx4_put_eth_qp(mdev->dev, priv->port, mac, priv->base_qpn);
+	mlx4_en_put_qp(priv);
 	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN))
 		mdev->mac_removed[priv->port] = 1;
 
@@ -1661,6 +1857,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
 #endif
 
+	INIT_RADIX_TREE(&priv->mac_tree, GFP_KERNEL);
+
 	/* Query for default mac and max mtu */
 	priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 12ddae6efce3..2d7b9377883a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1833,12 +1833,9 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 	info->dev = dev;
 	info->port = port;
 	if (!mlx4_is_slave(dev)) {
-		INIT_RADIX_TREE(&info->mac_tree, GFP_KERNEL);
 		mlx4_init_mac_table(dev, &info->mac_table);
 		mlx4_init_vlan_table(dev, &info->vlan_table);
-		info->base_qpn =
-			dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
-			(port - 1) * (1 << log_num_mac);
+		info->base_qpn = mlx4_get_base_qpn(dev, port);
 	}
 
 	sprintf(info->dev_name, "mlx4_port%d", port);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 172daaa29a9e..ed4a6959e828 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -653,11 +653,6 @@ struct mlx4_set_port_rqp_calc_context {
 	__be32 mcast;
 };
 
-struct mlx4_mac_entry {
-	u64 mac;
-	u64 reg_id;
-};
-
 struct mlx4_port_info {
 	struct mlx4_dev	       *dev;
 	int			port;
@@ -667,7 +662,6 @@ struct mlx4_port_info {
 	char			dev_mtu_name[16];
 	struct device_attribute port_mtu_attr;
 	struct mlx4_mac_table	mac_table;
-	struct radix_tree_root	mac_tree;
 	struct mlx4_vlan_table	vlan_table;
 	int			base_qpn;
 };
@@ -916,7 +910,6 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
 void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
-int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		     int start_index, int npages, u64 *page_list);
 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 9d0105d3eaf3..84ed328582ac 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -521,6 +521,7 @@ struct mlx4_en_priv {
 	bool wol;
 	struct device *ddev;
 	int base_tx_qpn;
+	struct radix_tree_root mac_tree;
 
 #ifdef CONFIG_MLX4_EN_DCB
 	struct ieee_ets ets;
@@ -540,6 +541,11 @@ enum mlx4_en_wol {
 	MLX4_EN_WOL_ENABLED = (1ULL << 62),
 };
 
+struct mlx4_mac_entry {
+	unsigned char mac[ETH_ALEN + 2];
+	u64 reg_id;
+};
+
 #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
 
 void mlx4_en_update_loopback_state(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 4c51b05efa28..719ead15e491 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -74,87 +74,6 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
 	table->total = 0;
 }
 
-static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port,
-			     u64 mac, int *qpn, u64 *reg_id)
-{
-	__be64 be_mac;
-	int err;
-
-	mac &= MLX4_MAC_MASK;
-	be_mac = cpu_to_be64(mac << 16);
-
-	switch (dev->caps.steering_mode) {
-	case MLX4_STEERING_MODE_B0: {
-		struct mlx4_qp qp;
-		u8 gid[16] = {0};
-
-		qp.qpn = *qpn;
-		memcpy(&gid[10], &be_mac, ETH_ALEN);
-		gid[5] = port;
-
-		err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
-		break;
-	}
-	case MLX4_STEERING_MODE_DEVICE_MANAGED: {
-		struct mlx4_spec_list spec_eth = { {NULL} };
-		__be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
-
-		struct mlx4_net_trans_rule rule = {
-			.queue_mode = MLX4_NET_TRANS_Q_FIFO,
-			.exclusive = 0,
-			.allow_loopback = 1,
-			.promisc_mode = MLX4_FS_PROMISC_NONE,
-			.priority = MLX4_DOMAIN_NIC,
-		};
-
-		rule.port = port;
-		rule.qpn = *qpn;
-		INIT_LIST_HEAD(&rule.list);
-
-		spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH;
-		memcpy(spec_eth.eth.dst_mac, &be_mac, ETH_ALEN);
-		memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
-		list_add_tail(&spec_eth.list, &rule.list);
-
-		err = mlx4_flow_attach(dev, &rule, reg_id);
-		break;
-	}
-	default:
-		return -EINVAL;
-	}
-	if (err)
-		mlx4_warn(dev, "Failed Attaching Unicast\n");
-
-	return err;
-}
-
-static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port,
-				  u64 mac, int qpn, u64 reg_id)
-{
-	switch (dev->caps.steering_mode) {
-	case MLX4_STEERING_MODE_B0: {
-		struct mlx4_qp qp;
-		u8 gid[16] = {0};
-		__be64 be_mac;
-
-		qp.qpn = qpn;
-		mac &= MLX4_MAC_MASK;
-		be_mac = cpu_to_be64(mac << 16);
-		memcpy(&gid[10], &be_mac, ETH_ALEN);
-		gid[5] = port;
-
-		mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
-		break;
-	}
-	case MLX4_STEERING_MODE_DEVICE_MANAGED: {
-		mlx4_flow_detach(dev, reg_id);
-		break;
-	}
-	default:
-		mlx4_err(dev, "Invalid steering mode.\n");
-	}
-}
-
 static int validate_index(struct mlx4_dev *dev,
 			  struct mlx4_mac_table *table, int index)
 {
@@ -181,92 +100,6 @@ static int find_index(struct mlx4_dev *dev,
 	return -EINVAL;
 }
 
-int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
-{
-	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
-	struct mlx4_mac_entry *entry;
-	int index = 0;
-	int err = 0;
-	u64 reg_id;
-
-	mlx4_dbg(dev, "Registering MAC: 0x%llx for adding\n",
-			(unsigned long long) mac);
-	index = mlx4_register_mac(dev, port, mac);
-	if (index < 0) {
-		err = index;
-		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
-			 (unsigned long long) mac);
-		return err;
-	}
-
-	if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
-		*qpn = info->base_qpn + index;
-		return 0;
-	}
-
-	err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
-	mlx4_dbg(dev, "Reserved qp %d\n", *qpn);
-	if (err) {
-		mlx4_err(dev, "Failed to reserve qp for mac registration\n");
-		goto qp_err;
-	}
-
-	err = mlx4_uc_steer_add(dev, port, mac, qpn, &reg_id);
-	if (err)
-		goto steer_err;
-
-	entry = kmalloc(sizeof *entry, GFP_KERNEL);
-	if (!entry) {
-		err = -ENOMEM;
-		goto alloc_err;
-	}
-	entry->mac = mac;
-	entry->reg_id = reg_id;
-	err = radix_tree_insert(&info->mac_tree, *qpn, entry);
-	if (err)
-		goto insert_err;
-	return 0;
-
-insert_err:
-	kfree(entry);
-
-alloc_err:
-	mlx4_uc_steer_release(dev, port, mac, *qpn, reg_id);
-
-steer_err:
-	mlx4_qp_release_range(dev, *qpn, 1);
-
-qp_err:
-	mlx4_unregister_mac(dev, port, mac);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx4_get_eth_qp);
-
-void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn)
-{
-	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
-	struct mlx4_mac_entry *entry;
-
-	mlx4_dbg(dev, "Registering MAC: 0x%llx for deleting\n",
-		 (unsigned long long) mac);
-	mlx4_unregister_mac(dev, port, mac);
-
-	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
-		entry = radix_tree_lookup(&info->mac_tree, qpn);
-		if (entry) {
-			mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx,"
-				 " qpn %d\n", port,
-				 (unsigned long long) mac, qpn);
-			mlx4_uc_steer_release(dev, port, entry->mac,
-					      qpn, entry->reg_id);
-			mlx4_qp_release_range(dev, qpn, 1);
-			radix_tree_delete(&info->mac_tree, qpn);
-			kfree(entry);
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(mlx4_put_eth_qp);
-
 static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
 				   __be64 *entries)
 {
@@ -359,6 +192,12 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 }
 EXPORT_SYMBOL_GPL(mlx4_register_mac);
 
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port)
+{
+	return dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
+			(port - 1) * (1 << dev->caps.log_num_macs);
+}
+EXPORT_SYMBOL_GPL(mlx4_get_base_qpn);
 
 void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
@@ -397,29 +236,13 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 }
 EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
 
-int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
 	struct mlx4_mac_table *table = &info->mac_table;
-	struct mlx4_mac_entry *entry;
 	int index = qpn - info->base_qpn;
 	int err = 0;
 
-	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
-		entry = radix_tree_lookup(&info->mac_tree, qpn);
-		if (!entry)
-			return -EINVAL;
-		mlx4_uc_steer_release(dev, port, entry->mac,
-				      qpn, entry->reg_id);
-		mlx4_unregister_mac(dev, port, entry->mac);
-		entry->mac = new_mac;
-		entry->reg_id = 0;
-		mlx4_register_mac(dev, port, new_mac);
-		err = mlx4_uc_steer_add(dev, port, entry->mac,
-					&qpn, &entry->reg_id);
-		return err;
-	}
-
 	/* CX1 doesn't support multi-functions */
 	mutex_lock(&table->mutex);
 
@@ -439,7 +262,7 @@ out:
 	mutex_unlock(&table->mutex);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx4_replace_mac);
+EXPORT_SYMBOL_GPL(__mlx4_replace_mac);
 
 static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 				    __be32 *entries)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1883e8e84718..6d48fce06b4a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -956,9 +956,8 @@ int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mo
 
 int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
-int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
-int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn);
-void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn);
+int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
+int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
-- 
cgit v1.2.3


From 180996c30517b5374f63df3c9765716c5b477155 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Thu, 7 Feb 2013 05:37:37 +0000
Subject: ssb: get mac address from sprom struct for gige driver

The mac address is already stored in the sprom structure by the
platform code of the SoC this Ethernet core is found on, it just has to
be fetched from this structure instead of accessing the nvram here.
This patch also adds a return value to indicate if a mac address could
be fetched from the sprom structure.
When CONFIG_SSB_DRIVER_GIGE is not set the header file now also declares
ssb_gige_get_macaddr().

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Michael Buesch <m@bues.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ssb/ssb_driver_gige.h | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h
index 6b05dcd927ff..86a12b0cb239 100644
--- a/include/linux/ssb/ssb_driver_gige.h
+++ b/include/linux/ssb/ssb_driver_gige.h
@@ -97,21 +97,16 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_BCM47XX
-#include <asm/mach-bcm47xx/nvram.h>
 /* Get the device MAC address */
-static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
-{
-	char buf[20];
-	if (nvram_getenv("et0macaddr", buf, sizeof(buf)) < 0)
-		return;
-	nvram_parse_macaddr(buf, macaddr);
-}
-#else
-static inline void ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
+static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
 {
+	struct ssb_gige *dev = pdev_to_ssb_gige(pdev);
+	if (!dev)
+		return -ENODEV;
+
+	memcpy(macaddr, dev->dev->bus->sprom.et0mac, 6);
+	return 0;
 }
-#endif
 
 extern int ssb_gige_pcibios_plat_dev_init(struct ssb_device *sdev,
 					  struct pci_dev *pdev);
@@ -175,6 +170,10 @@ static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev)
 {
 	return 0;
 }
+static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr)
+{
+	return -ENODEV;
+}
 
 #endif /* CONFIG_SSB_DRIVER_GIGE */
 #endif /* LINUX_SSB_DRIVER_GIGE_H_ */
-- 
cgit v1.2.3


From 7e6c63f03d94278135753fef7ffcc5b03e34282e Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Thu, 7 Feb 2013 05:37:39 +0000
Subject: tg3: add support for Ethernet core in bcm4785

The BCM4785 or sometimes named BMC4705 is a Broadcom SoC which a
Gigabit 5750 Ethernet core. The core is connected via PCI with the rest
of the SoC, but it uses some extension.

This core does not use a firmware or an eeprom.

Some devices only have a switch which supports 100MBit/s, this
currently does not work with this driver.

This patch was original written by Michael Buesch <m@bues.ch> and is in
OpenWrt for some years now.

This was tested on a Linksys WRT610N V1 and older versions of this patch
were tested by other people on different devices.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 117 ++++++++++++++++++++++++++++++++++--
 drivers/net/ethernet/broadcom/tg3.h |   5 ++
 include/linux/pci_ids.h             |   1 +
 3 files changed, 117 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index d9e81d6be7b9..b1b3bc01cbc2 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -44,6 +44,7 @@
 #include <linux/prefetch.h>
 #include <linux/dma-mapping.h>
 #include <linux/firmware.h>
+#include <linux/ssb/ssb_driver_gige.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 
@@ -263,6 +264,7 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = {
 			TG3_DRV_DATA_FLAG_5705_10_100},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5722)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F),
@@ -573,7 +575,9 @@ static void _tw32_flush(struct tg3 *tp, u32 off, u32 val, u32 usec_wait)
 static inline void tw32_mailbox_flush(struct tg3 *tp, u32 off, u32 val)
 {
 	tp->write32_mbox(tp, off, val);
-	if (!tg3_flag(tp, MBOX_WRITE_REORDER) && !tg3_flag(tp, ICH_WORKAROUND))
+	if (tg3_flag(tp, FLUSH_POSTED_WRITES) ||
+	    (!tg3_flag(tp, MBOX_WRITE_REORDER) &&
+	     !tg3_flag(tp, ICH_WORKAROUND)))
 		tp->read32_mbox(tp, off);
 }
 
@@ -583,7 +587,8 @@ static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val)
 	writel(val, mbox);
 	if (tg3_flag(tp, TXD_MBOX_HWBUG))
 		writel(val, mbox);
-	if (tg3_flag(tp, MBOX_WRITE_REORDER))
+	if (tg3_flag(tp, MBOX_WRITE_REORDER) ||
+	    tg3_flag(tp, FLUSH_POSTED_WRITES))
 		readl(mbox);
 }
 
@@ -1793,6 +1798,11 @@ static int tg3_poll_fw(struct tg3 *tp)
 	int i;
 	u32 val;
 
+	if (tg3_flag(tp, IS_SSB_CORE)) {
+		/* We don't use firmware. */
+		return 0;
+	}
+
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
 		/* Wait up to 20ms for init done. */
 		for (i = 0; i < 200; i++) {
@@ -3465,6 +3475,13 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset)
 		tw32_f(offset + CPU_MODE,  CPU_MODE_HALT);
 		udelay(10);
 	} else {
+		/*
+		 * There is only an Rx CPU for the 5750 derivative in the
+		 * BCM4785.
+		 */
+		if (tg3_flag(tp, IS_SSB_CORE))
+			return 0;
+
 		for (i = 0; i < 10000; i++) {
 			tw32(offset + CPU_STATE, 0xffffffff);
 			tw32(offset + CPU_MODE,  CPU_MODE_HALT);
@@ -3932,8 +3949,9 @@ static int tg3_power_down_prepare(struct tg3 *tp)
 	tg3_frob_aux_power(tp, true);
 
 	/* Workaround for unstable PLL clock */
-	if ((GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_AX) ||
-	    (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_BX)) {
+	if ((!tg3_flag(tp, IS_SSB_CORE)) &&
+	    ((GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_AX) ||
+	     (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5750_BX))) {
 		u32 val = tr32(0x7d00);
 
 		val &= ~((1 << 16) | (1 << 4) | (1 << 2) | (1 << 1) | 1);
@@ -4454,6 +4472,15 @@ relink:
 	if (current_link_up == 0 || (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)) {
 		tg3_phy_copper_begin(tp);
 
+		if (tg3_flag(tp, ROBOSWITCH)) {
+			current_link_up = 1;
+			/* FIXME: when BCM5325 switch is used use 100 MBit/s */
+			current_speed = SPEED_1000;
+			current_duplex = DUPLEX_FULL;
+			tp->link_config.active_speed = current_speed;
+			tp->link_config.active_duplex = current_duplex;
+		}
+
 		tg3_readphy(tp, MII_BMSR, &bmsr);
 		if ((!tg3_readphy(tp, MII_BMSR, &bmsr) && (bmsr & BMSR_LSTATUS)) ||
 		    (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK))
@@ -4472,6 +4499,26 @@ relink:
 	else
 		tp->mac_mode |= MAC_MODE_PORT_MODE_GMII;
 
+	/* In order for the 5750 core in BCM4785 chip to work properly
+	 * in RGMII mode, the Led Control Register must be set up.
+	 */
+	if (tg3_flag(tp, RGMII_MODE)) {
+		u32 led_ctrl = tr32(MAC_LED_CTRL);
+		led_ctrl &= ~(LED_CTRL_1000MBPS_ON | LED_CTRL_100MBPS_ON);
+
+		if (tp->link_config.active_speed == SPEED_10)
+			led_ctrl |= LED_CTRL_LNKLED_OVERRIDE;
+		else if (tp->link_config.active_speed == SPEED_100)
+			led_ctrl |= (LED_CTRL_LNKLED_OVERRIDE |
+				     LED_CTRL_100MBPS_ON);
+		else if (tp->link_config.active_speed == SPEED_1000)
+			led_ctrl |= (LED_CTRL_LNKLED_OVERRIDE |
+				     LED_CTRL_1000MBPS_ON);
+
+		tw32(MAC_LED_CTRL, led_ctrl);
+		udelay(40);
+	}
+
 	tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX;
 	if (tp->link_config.active_duplex == DUPLEX_HALF)
 		tp->mac_mode |= MAC_MODE_HALF_DUPLEX;
@@ -8449,6 +8496,16 @@ static int tg3_chip_reset(struct tg3 *tp)
 		tw32(0x5000, 0x400);
 	}
 
+	if (tg3_flag(tp, IS_SSB_CORE)) {
+		/*
+		 * BCM4785: In order to avoid repercussions from using
+		 * potentially defective internal ROM, stop the Rx RISC CPU,
+		 * which is not required.
+		 */
+		tg3_stop_fw(tp);
+		tg3_halt_cpu(tp, RX_CPU_BASE);
+	}
+
 	tw32(GRC_MODE, tp->grc_mode);
 
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5705_A0) {
@@ -10107,6 +10164,11 @@ static void tg3_timer(unsigned long __opaque)
 	    tg3_flag(tp, 57765_CLASS))
 		tg3_chk_missed_msi(tp);
 
+	if (tg3_flag(tp, FLUSH_POSTED_WRITES)) {
+		/* BCM4785: Flush posted writes from GbE to host memory. */
+		tr32(HOSTCC_MODE);
+	}
+
 	if (!tg3_flag(tp, TAGGED_STATUS)) {
 		/* All of this garbage is because when using non-tagged
 		 * IRQ status the mailbox/status_block protocol the chip
@@ -13879,6 +13941,14 @@ static void tg3_get_5720_nvram_info(struct tg3 *tp)
 /* Chips other than 5700/5701 use the NVRAM for fetching info. */
 static void tg3_nvram_init(struct tg3 *tp)
 {
+	if (tg3_flag(tp, IS_SSB_CORE)) {
+		/* No NVRAM and EEPROM on the SSB Broadcom GigE core. */
+		tg3_flag_clear(tp, NVRAM);
+		tg3_flag_clear(tp, NVRAM_BUFFERED);
+		tg3_flag_set(tp, NO_NVRAM);
+		return;
+	}
+
 	tw32_f(GRC_EEPROM_ADDR,
 	     (EEPROM_ADDR_FSM_RESET |
 	      (EEPROM_DEFAULT_CLOCK_PERIOD <<
@@ -14405,10 +14475,19 @@ static int tg3_phy_probe(struct tg3 *tp)
 			 * subsys device table.
 			 */
 			p = tg3_lookup_by_subsys(tp);
-			if (!p)
+			if (p) {
+				tp->phy_id = p->phy_id;
+			} else if (!tg3_flag(tp, IS_SSB_CORE)) {
+				/* For now we saw the IDs 0xbc050cd0,
+				 * 0xbc050f80 and 0xbc050c30 on devices
+				 * connected to an BCM4785 and there are
+				 * probably more. Just assume that the phy is
+				 * supported when it is connected to a SSB core
+				 * for now.
+				 */
 				return -ENODEV;
+			}
 
-			tp->phy_id = p->phy_id;
 			if (!tp->phy_id ||
 			    tp->phy_id == TG3_PHY_ID_BCM8002)
 				tp->phy_flags |= TG3_PHYFLG_PHY_SERDES;
@@ -15484,6 +15563,11 @@ static int tg3_get_invariants(struct tg3 *tp, const struct pci_device_id *ent)
 				     TG3_CPMU_STATUS_FSHFT_5719;
 	}
 
+	if (tg3_flag(tp, FLUSH_POSTED_WRITES)) {
+		tp->write32_tx_mbox = tg3_write_flush_reg32;
+		tp->write32_rx_mbox = tg3_write_flush_reg32;
+	}
+
 	/* Get eeprom hw config before calling tg3_set_power_state().
 	 * In particular, the TG3_FLAG_IS_NIC flag must be
 	 * determined before calling tg3_set_power_state() so that
@@ -15820,12 +15904,19 @@ static int tg3_get_device_address(struct tg3 *tp)
 	struct net_device *dev = tp->dev;
 	u32 hi, lo, mac_offset;
 	int addr_ok = 0;
+	int err;
 
 #ifdef CONFIG_SPARC
 	if (!tg3_get_macaddr_sparc(tp))
 		return 0;
 #endif
 
+	if (tg3_flag(tp, IS_SSB_CORE)) {
+		err = ssb_gige_get_macaddr(tp->pdev, &dev->dev_addr[0]);
+		if (!err && is_valid_ether_addr(&dev->dev_addr[0]))
+			return 0;
+	}
+
 	mac_offset = 0x7c;
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 ||
 	    tg3_flag(tp, 5780_CLASS)) {
@@ -16185,6 +16276,8 @@ static int tg3_test_dma(struct tg3 *tp)
 			tp->dma_rwctrl |= 0x001b000f;
 		}
 	}
+	if (tg3_flag(tp, ONE_DMA_AT_ONCE))
+		tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA;
 
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704)
@@ -16530,6 +16623,18 @@ static int tg3_init_one(struct pci_dev *pdev,
 	else
 		tp->msg_enable = TG3_DEF_MSG_ENABLE;
 
+	if (pdev_is_ssb_gige_core(pdev)) {
+		tg3_flag_set(tp, IS_SSB_CORE);
+		if (ssb_gige_must_flush_posted_writes(pdev))
+			tg3_flag_set(tp, FLUSH_POSTED_WRITES);
+		if (ssb_gige_one_dma_at_once(pdev))
+			tg3_flag_set(tp, ONE_DMA_AT_ONCE);
+		if (ssb_gige_have_roboswitch(pdev))
+			tg3_flag_set(tp, ROBOSWITCH);
+		if (ssb_gige_is_rgmii(pdev))
+			tg3_flag_set(tp, RGMII_MODE);
+	}
+
 	/* The word/byte swap controls here control register access byte
 	 * swapping.  DMA data byte swapping is controlled in the GRC_MODE
 	 * setting below.
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 9cd88a4b9a5f..ef6ced2bf9c3 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -3056,6 +3056,11 @@ enum TG3_FLAGS {
 	TG3_FLAG_57765_PLUS,
 	TG3_FLAG_57765_CLASS,
 	TG3_FLAG_5717_PLUS,
+	TG3_FLAG_IS_SSB_CORE,
+	TG3_FLAG_FLUSH_POSTED_WRITES,
+	TG3_FLAG_ROBOSWITCH,
+	TG3_FLAG_ONE_DMA_AT_ONCE,
+	TG3_FLAG_RGMII_MODE,
 
 	/* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */
 	TG3_FLAG_NUMBER_OF_FLAGS,	/* Last entry in enum TG3_FLAGS */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0eb65796bcb9..907e7e56fa4b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2127,6 +2127,7 @@
 #define PCI_DEVICE_ID_TIGON3_5754M	0x1672
 #define PCI_DEVICE_ID_TIGON3_5755M	0x1673
 #define PCI_DEVICE_ID_TIGON3_5756	0x1674
+#define PCI_DEVICE_ID_TIGON3_5750	0x1676
 #define PCI_DEVICE_ID_TIGON3_5751	0x1677
 #define PCI_DEVICE_ID_TIGON3_5715	0x1678
 #define PCI_DEVICE_ID_TIGON3_5715S	0x1679
-- 
cgit v1.2.3


From 9c79330d930b5774aed8eb323daebecedce2e245 Mon Sep 17 00:00:00 2001
From: Lucas Stach <dev@lynxeye.de>
Date: Thu, 7 Feb 2013 16:18:39 +0000
Subject: net: usb: fix regression from FLAG_NOARP code

In commit 6509141f9c2ba74df6cc72ec35cd1865276ae3a4 ("usbnet: add new
flag FLAG_NOARP for usb net devices"), the newly added flag NOARP was
using an already defined value, which broke drivers using flag
MULTI_PACKET.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 0de078d4cdb9..0e5ac93bab10 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -102,7 +102,6 @@ struct driver_info {
 #define FLAG_LINK_INTR	0x0800		/* updates link (carrier) status */
 
 #define FLAG_POINTTOPOINT 0x1000	/* possibly use "usb%d" names */
-#define FLAG_NOARP	0x2000		/* device can't do ARP */
 
 /*
  * Indicates to usbnet, that USB driver accumulates multiple IP packets.
@@ -110,6 +109,7 @@ struct driver_info {
  */
 #define FLAG_MULTI_PACKET	0x2000
 #define FLAG_RX_ASSEMBLE	0x4000	/* rx packets may span >1 frames */
+#define FLAG_NOARP		0x8000	/* device can't do ARP */
 
 	/* init device ... can sleep, or cause probe() failure */
 	int	(*bind)(struct usbnet *, struct usb_interface *);
-- 
cgit v1.2.3


From afb43e6d88e587441c960a5d214d2c698d076c9c Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Fri, 25 Jan 2013 11:57:48 +0200
Subject: wlcore: remove if_ops from platform_data

We can't pass pointers from the platform data to the modules, because
with DT it cannot be done.  Those pointers are not set by the board
files anyway.  It's the bus modules that set them, so they can be
safely removed from the platform data without changing any board
files.

Create a new structure that the bus modules pass to wlcore.  This
structure contains the if_ops pointers and a pointer to the actual
platform data.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
---
 drivers/net/wireless/ti/wl12xx/main.c     |  3 ++-
 drivers/net/wireless/ti/wlcore/main.c     |  5 +++--
 drivers/net/wireless/ti/wlcore/sdio.c     | 22 +++++++++++++++++-----
 drivers/net/wireless/ti/wlcore/spi.c      | 20 +++++++++++++++++---
 drivers/net/wireless/ti/wlcore/wlcore_i.h |  5 +++++
 include/linux/wl12xx.h                    |  2 --
 6 files changed, 44 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c
index 3254bfc81a2a..09694e39bb14 100644
--- a/drivers/net/wireless/ti/wl12xx/main.c
+++ b/drivers/net/wireless/ti/wl12xx/main.c
@@ -1703,7 +1703,8 @@ static struct ieee80211_sta_ht_cap wl12xx_ht_cap = {
 static int wl12xx_setup(struct wl1271 *wl)
 {
 	struct wl12xx_priv *priv = wl->priv;
-	struct wl12xx_platform_data *pdata = wl->pdev->dev.platform_data;
+	struct wlcore_platdev_data *pdev_data = wl->pdev->dev.platform_data;
+	struct wl12xx_platform_data *pdata = pdev_data->pdata;
 
 	wl->rtable = wl12xx_rtable;
 	wl->num_tx_desc = WL12XX_NUM_TX_DESCRIPTORS;
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 9a66acf1205f..cd70335cd5e8 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5966,7 +5966,8 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context)
 {
 	struct wl1271 *wl = context;
 	struct platform_device *pdev = wl->pdev;
-	struct wl12xx_platform_data *pdata = pdev->dev.platform_data;
+	struct wlcore_platdev_data *pdev_data = pdev->dev.platform_data;
+	struct wl12xx_platform_data *pdata = pdev_data->pdata;
 	unsigned long irqflags;
 	int ret;
 
@@ -5995,7 +5996,7 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context)
 
 	wl->irq = platform_get_irq(pdev, 0);
 	wl->platform_quirks = pdata->platform_quirks;
-	wl->if_ops = pdata->ops;
+	wl->if_ops = pdev_data->if_ops;
 
 	if (wl->platform_quirks & WL12XX_PLATFORM_QUIRK_EDGE_IRQ)
 		irqflags = IRQF_TRIGGER_RISING;
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index d4f184e2efed..1f6f6e30daca 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -218,6 +218,7 @@ static int wl1271_probe(struct sdio_func *func,
 				  const struct sdio_device_id *id)
 {
 	struct wl12xx_platform_data *wlan_data;
+	struct wlcore_platdev_data *pdev_data;
 	struct wl12xx_sdio_glue *glue;
 	struct resource res[1];
 	mmc_pm_flag_t mmcflags;
@@ -228,10 +229,18 @@ static int wl1271_probe(struct sdio_func *func,
 	if (func->num != 0x02)
 		return -ENODEV;
 
+	pdev_data = kzalloc(sizeof(*pdev_data), GFP_KERNEL);
+	if (!pdev_data) {
+		dev_err(&func->dev, "can't allocate platdev_data\n");
+		goto out;
+	}
+
+	pdev_data->if_ops = &sdio_ops;
+
 	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 	if (!glue) {
 		dev_err(&func->dev, "can't allocate glue\n");
-		goto out;
+		goto out_free_pdev_data;
 	}
 
 	glue->dev = &func->dev;
@@ -256,8 +265,6 @@ static int wl1271_probe(struct sdio_func *func,
 	if (mmcflags & MMC_PM_KEEP_POWER)
 		wlan_data->pwr_in_suspend = true;
 
-	wlan_data->ops = &sdio_ops;
-
 	sdio_set_drvdata(func, glue);
 
 	/* Tell PM core that we don't need the card to be powered now */
@@ -295,8 +302,10 @@ static int wl1271_probe(struct sdio_func *func,
 		goto out_dev_put;
 	}
 
-	ret = platform_device_add_data(glue->core, wlan_data,
-				       sizeof(*wlan_data));
+	pdev_data->pdata = wlan_data;
+
+	ret = platform_device_add_data(glue->core, pdev_data,
+				       sizeof(*pdev_data));
 	if (ret) {
 		dev_err(glue->dev, "can't add platform data\n");
 		goto out_dev_put;
@@ -315,6 +324,9 @@ out_dev_put:
 out_free_glue:
 	kfree(glue);
 
+out_free_pdev_data:
+	kfree(pdev_data);
+
 out:
 	return ret;
 }
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 2d700b7ae14c..d437f4d28bd0 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -328,6 +328,7 @@ static int wl1271_probe(struct spi_device *spi)
 {
 	struct wl12xx_spi_glue *glue;
 	struct wl12xx_platform_data *pdata;
+	struct wlcore_platdev_data *pdev_data;
 	struct resource res[1];
 	int ret = -ENOMEM;
 
@@ -337,12 +338,18 @@ static int wl1271_probe(struct spi_device *spi)
 		return -ENODEV;
 	}
 
-	pdata->ops = &spi_ops;
+	pdev_data = kzalloc(sizeof(*pdev_data), GFP_KERNEL);
+	if (!pdev_data) {
+		dev_err(&spi->dev, "can't allocate platdev_data\n");
+		goto out;
+	}
+
+	pdev_data->if_ops = &spi_ops;
 
 	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 	if (!glue) {
 		dev_err(&spi->dev, "can't allocate glue\n");
-		goto out;
+		goto out_free_pdev_data;
 	}
 
 	glue->dev = &spi->dev;
@@ -380,7 +387,10 @@ static int wl1271_probe(struct spi_device *spi)
 		goto out_dev_put;
 	}
 
-	ret = platform_device_add_data(glue->core, pdata, sizeof(*pdata));
+	pdev_data->pdata = pdata;
+
+	ret = platform_device_add_data(glue->core, pdev_data,
+				       sizeof(*pdev_data));
 	if (ret) {
 		dev_err(glue->dev, "can't add platform data\n");
 		goto out_dev_put;
@@ -399,6 +409,10 @@ out_dev_put:
 
 out_free_glue:
 	kfree(glue);
+
+out_free_pdev_data:
+	kfree(pdev_data);
+
 out:
 	return ret;
 }
diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h
index 20316ac328a2..c845b0ef7f4b 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore_i.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h
@@ -206,6 +206,11 @@ struct wl1271_if_operations {
 	void (*set_block_size) (struct device *child, unsigned int blksz);
 };
 
+struct wlcore_platdev_data {
+	struct wl12xx_platform_data *pdata;
+	struct wl1271_if_operations *if_ops;
+};
+
 #define MAX_NUM_KEYS 14
 #define MAX_KEY_SIZE 32
 
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 0d6373195d32..360c9bce665c 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -55,8 +55,6 @@ struct wl12xx_platform_data {
 	int board_tcxo_clock;
 	unsigned long platform_quirks;
 	bool pwr_in_suspend;
-
-	struct wl1271_if_operations *ops;
 };
 
 /* Platform does not support level trigger interrupts */
-- 
cgit v1.2.3


From 6cc9efed707c575a9e5880ea68f8b9d36b235f1f Mon Sep 17 00:00:00 2001
From: Luciano Coelho <coelho@ti.com>
Date: Fri, 25 Jan 2013 12:05:34 +0200
Subject: wlcore: move wl12xx_platform_data up and make it truly optional

The platform data is used not only by wlcore-based drivers, but also
by wl1251.  Move it up in the directory hierarchy to reflect this.

Additionally, make it truly optional.  At the moment, disabling
platform data while wl1251_sdio or wlcore_sdio are enabled doesn't
work, but it will be necessary when device tree support is
implemented.

Signed-off-by: Luciano Coelho <coelho@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-omap2/board-omap3evm.c               | 10 ++---
 drivers/net/wireless/ti/Kconfig                    |  9 ++++
 drivers/net/wireless/ti/Makefile                   |  4 +-
 drivers/net/wireless/ti/wilink_platform_data.c     | 49 ++++++++++++++++++++++
 drivers/net/wireless/ti/wlcore/Kconfig             |  5 ---
 drivers/net/wireless/ti/wlcore/Makefile            |  3 --
 .../net/wireless/ti/wlcore/wl12xx_platform_data.c  | 49 ----------------------
 include/linux/wl12xx.h                             | 14 +++++--
 8 files changed, 77 insertions(+), 66 deletions(-)
 create mode 100644 drivers/net/wireless/ti/wilink_platform_data.c
 delete mode 100644 drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c
index 3985f35aee06..a4ca63ba7faa 100644
--- a/arch/arm/mach-omap2/board-omap3evm.c
+++ b/arch/arm/mach-omap2/board-omap3evm.c
@@ -309,7 +309,7 @@ static struct omap2_hsmmc_info mmc[] = {
 		.gpio_wp	= 63,
 		.deferred	= true,
 	},
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 	{
 		.name		= "wl1271",
 		.mmc		= 2,
@@ -450,7 +450,7 @@ static struct regulator_init_data omap3evm_vio = {
 	.consumer_supplies	= omap3evm_vio_supply,
 };
 
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 
 #define OMAP3EVM_WLAN_PMENA_GPIO	(150)
 #define OMAP3EVM_WLAN_IRQ_GPIO		(149)
@@ -563,7 +563,7 @@ static struct omap_board_mux omap35x_board_mux[] __initdata = {
 				OMAP_PIN_OFF_NONE),
 	OMAP3_MUX(GPMC_WAIT2, OMAP_MUX_MODE4 | OMAP_PIN_INPUT_PULLUP |
 				OMAP_PIN_OFF_NONE),
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 	/* WLAN IRQ - GPIO 149 */
 	OMAP3_MUX(UART1_RTS, OMAP_MUX_MODE4 | OMAP_PIN_INPUT),
 
@@ -601,7 +601,7 @@ static struct omap_board_mux omap36x_board_mux[] __initdata = {
 	OMAP3_MUX(SYS_BOOT4, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE),
 	OMAP3_MUX(SYS_BOOT5, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE),
 	OMAP3_MUX(SYS_BOOT6, OMAP_MUX_MODE3 | OMAP_PIN_OFF_NONE),
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 	/* WLAN IRQ - GPIO 149 */
 	OMAP3_MUX(UART1_RTS, OMAP_MUX_MODE4 | OMAP_PIN_INPUT),
 
@@ -637,7 +637,7 @@ static struct gpio omap3_evm_ehci_gpios[] __initdata = {
 
 static void __init omap3_evm_wl12xx_init(void)
 {
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 	int ret;
 
 	/* WL12xx WLAN Init */
diff --git a/drivers/net/wireless/ti/Kconfig b/drivers/net/wireless/ti/Kconfig
index be800119d0a3..cbe1e7fef61b 100644
--- a/drivers/net/wireless/ti/Kconfig
+++ b/drivers/net/wireless/ti/Kconfig
@@ -12,4 +12,13 @@ source "drivers/net/wireless/ti/wl18xx/Kconfig"
 
 # keep last for automatic dependencies
 source "drivers/net/wireless/ti/wlcore/Kconfig"
+
+config WILINK_PLATFORM_DATA
+	bool "TI WiLink platform data"
+	depends on WLCORE_SDIO || WL1251_SDIO
+	default y
+	---help---
+	Small platform data bit needed to pass data to the sdio modules.
+
+
 endif # WL_TI
diff --git a/drivers/net/wireless/ti/Makefile b/drivers/net/wireless/ti/Makefile
index 4d6823983c04..af14231aeede 100644
--- a/drivers/net/wireless/ti/Makefile
+++ b/drivers/net/wireless/ti/Makefile
@@ -1,5 +1,7 @@
 obj-$(CONFIG_WLCORE)			+= wlcore/
 obj-$(CONFIG_WL12XX)			+= wl12xx/
-obj-$(CONFIG_WL12XX_PLATFORM_DATA)	+= wlcore/
 obj-$(CONFIG_WL1251)			+= wl1251/
 obj-$(CONFIG_WL18XX)			+= wl18xx/
+
+# small builtin driver bit
+obj-$(CONFIG_WILINK_PLATFORM_DATA)	+= wilink_platform_data.o
diff --git a/drivers/net/wireless/ti/wilink_platform_data.c b/drivers/net/wireless/ti/wilink_platform_data.c
new file mode 100644
index 000000000000..998e95895f9d
--- /dev/null
+++ b/drivers/net/wireless/ti/wilink_platform_data.c
@@ -0,0 +1,49 @@
+/*
+ * This file is part of wl12xx
+ *
+ * Copyright (C) 2010-2011 Texas Instruments, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/wl12xx.h>
+
+static struct wl12xx_platform_data *platform_data;
+
+int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
+{
+	if (platform_data)
+		return -EBUSY;
+	if (!data)
+		return -EINVAL;
+
+	platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
+	if (!platform_data)
+		return -ENOMEM;
+
+	return 0;
+}
+
+struct wl12xx_platform_data *wl12xx_get_platform_data(void)
+{
+	if (!platform_data)
+		return ERR_PTR(-ENODEV);
+
+	return platform_data;
+}
+EXPORT_SYMBOL(wl12xx_get_platform_data);
diff --git a/drivers/net/wireless/ti/wlcore/Kconfig b/drivers/net/wireless/ti/wlcore/Kconfig
index d7b907e67170..2b832825c3d4 100644
--- a/drivers/net/wireless/ti/wlcore/Kconfig
+++ b/drivers/net/wireless/ti/wlcore/Kconfig
@@ -33,8 +33,3 @@ config WLCORE_SDIO
 
 	  If you choose to build a module, it'll be called wlcore_sdio.
 	  Say N if unsure.
-
-config WL12XX_PLATFORM_DATA
-	bool
-	depends on WLCORE_SDIO != n || WL1251_SDIO != n
-	default y
diff --git a/drivers/net/wireless/ti/wlcore/Makefile b/drivers/net/wireless/ti/wlcore/Makefile
index d9fba9e32130..b21398f6c3ec 100644
--- a/drivers/net/wireless/ti/wlcore/Makefile
+++ b/drivers/net/wireless/ti/wlcore/Makefile
@@ -9,7 +9,4 @@ obj-$(CONFIG_WLCORE)			+= wlcore.o
 obj-$(CONFIG_WLCORE_SPI)		+= wlcore_spi.o
 obj-$(CONFIG_WLCORE_SDIO)		+= wlcore_sdio.o
 
-# small builtin driver bit
-obj-$(CONFIG_WL12XX_PLATFORM_DATA)	+= wl12xx_platform_data.o
-
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c b/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c
deleted file mode 100644
index 998e95895f9d..000000000000
--- a/drivers/net/wireless/ti/wlcore/wl12xx_platform_data.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * This file is part of wl12xx
- *
- * Copyright (C) 2010-2011 Texas Instruments, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/wl12xx.h>
-
-static struct wl12xx_platform_data *platform_data;
-
-int __init wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
-{
-	if (platform_data)
-		return -EBUSY;
-	if (!data)
-		return -EINVAL;
-
-	platform_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
-	if (!platform_data)
-		return -ENOMEM;
-
-	return 0;
-}
-
-struct wl12xx_platform_data *wl12xx_get_platform_data(void)
-{
-	if (!platform_data)
-		return ERR_PTR(-ENODEV);
-
-	return platform_data;
-}
-EXPORT_SYMBOL(wl12xx_get_platform_data);
diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 360c9bce665c..a54fe82e704b 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -24,6 +24,8 @@
 #ifndef _LINUX_WL12XX_H
 #define _LINUX_WL12XX_H
 
+#include <linux/err.h>
+
 /* Reference clock values */
 enum {
 	WL12XX_REFCLOCK_19	= 0, /* 19.2 MHz */
@@ -60,10 +62,12 @@ struct wl12xx_platform_data {
 /* Platform does not support level trigger interrupts */
 #define WL12XX_PLATFORM_QUIRK_EDGE_IRQ	BIT(0)
 
-#ifdef CONFIG_WL12XX_PLATFORM_DATA
+#ifdef CONFIG_WILINK_PLATFORM_DATA
 
 int wl12xx_set_platform_data(const struct wl12xx_platform_data *data);
 
+struct wl12xx_platform_data *wl12xx_get_platform_data(void);
+
 #else
 
 static inline
@@ -72,8 +76,12 @@ int wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
 	return -ENOSYS;
 }
 
-#endif
+static inline
+struct wl12xx_platform_data *wl12xx_get_platform_data(void)
+{
+	return ERR_PTR(-ENODATA);
+}
 
-struct wl12xx_platform_data *wl12xx_get_platform_data(void);
+#endif
 
 #endif
-- 
cgit v1.2.3


From cd7bed00340475ee72a013a070e200e065085ef3 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 22 Jan 2013 12:26:28 +0200
Subject: spi/pxa2xx: break out the private DMA API usage into a separate file

The PXA SPI driver uses PXA platform specific private DMA implementation
which does not work on non-PXA platforms. In order to use this driver on
other platforms we break out the private DMA implementation into a separate
file that gets compiled only when CONFIG_SPI_PXA2XX_PXADMA is set. The DMA
functions are stubbed out if there is no DMA implementation selected (i.e
we are building on non-PXA platform).

While we are there we can kill the dummy DMA bits in pxa2xx_spi.h as they
are not needed anymore for CE4100.

Once this is done we can add the generic DMA engine support to the driver
that allows usage of any DMA controller that implements DMA engine API.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Lu Cao <lucao@marvell.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/Kconfig             |   6 +
 drivers/spi/Makefile            |   4 +-
 drivers/spi/spi-pxa2xx-pxadma.c | 490 +++++++++++++++++++++++++++++++++
 drivers/spi/spi-pxa2xx.c        | 595 ++--------------------------------------
 drivers/spi/spi-pxa2xx.h        | 185 +++++++++++++
 include/linux/spi/pxa2xx_spi.h  |  80 ------
 6 files changed, 712 insertions(+), 648 deletions(-)
 create mode 100644 drivers/spi/spi-pxa2xx-pxadma.c
 create mode 100644 drivers/spi/spi-pxa2xx.h

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index a90393d7f106..f1878666e917 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -297,6 +297,12 @@ config SPI_PPC4xx
 	help
 	  This selects a driver for the PPC4xx SPI Controller.
 
+config SPI_PXA2XX_PXADMA
+	bool "PXA2xx SSP legacy PXA DMA API support"
+	depends on SPI_PXA2XX && ARCH_PXA
+	help
+	  Enable PXA private legacy DMA API support.
+
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
 	depends on ARCH_PXA || PCI
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 64e970ba261c..2e3cdd3caba9 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -47,7 +47,9 @@ obj-$(CONFIG_SPI_OMAP24XX)		+= spi-omap2-mcspi.o
 obj-$(CONFIG_SPI_ORION)			+= spi-orion.o
 obj-$(CONFIG_SPI_PL022)			+= spi-pl022.o
 obj-$(CONFIG_SPI_PPC4xx)		+= spi-ppc4xx.o
-obj-$(CONFIG_SPI_PXA2XX)		+= spi-pxa2xx.o
+spi-pxa2xx-platform-objs		:= spi-pxa2xx.o
+spi-pxa2xx-platform-$(CONFIG_SPI_PXA2XX_PXADMA)	+= spi-pxa2xx-pxadma.o
+obj-$(CONFIG_SPI_PXA2XX)		+= spi-pxa2xx-platform.o
 obj-$(CONFIG_SPI_PXA2XX_PCI)		+= spi-pxa2xx-pci.o
 obj-$(CONFIG_SPI_RSPI)			+= spi-rspi.o
 obj-$(CONFIG_SPI_S3C24XX)		+= spi-s3c24xx-hw.o
diff --git a/drivers/spi/spi-pxa2xx-pxadma.c b/drivers/spi/spi-pxa2xx-pxadma.c
new file mode 100644
index 000000000000..2916efc7cfe5
--- /dev/null
+++ b/drivers/spi/spi-pxa2xx-pxadma.c
@@ -0,0 +1,490 @@
+/*
+ * PXA2xx SPI private DMA support.
+ *
+ * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/pxa2xx_ssp.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
+
+#include "spi-pxa2xx.h"
+
+#define DMA_INT_MASK		(DCSR_ENDINTR | DCSR_STARTINTR | DCSR_BUSERR)
+#define RESET_DMA_CHANNEL	(DCSR_NODESC | DMA_INT_MASK)
+
+bool pxa2xx_spi_dma_is_possible(size_t len)
+{
+	/* Try to map dma buffer and do a dma transfer if successful, but
+	 * only if the length is non-zero and less than MAX_DMA_LEN.
+	 *
+	 * Zero-length non-descriptor DMA is illegal on PXA2xx; force use
+	 * of PIO instead.  Care is needed above because the transfer may
+	 * have have been passed with buffers that are already dma mapped.
+	 * A zero-length transfer in PIO mode will not try to write/read
+	 * to/from the buffers
+	 *
+	 * REVISIT large transfers are exactly where we most want to be
+	 * using DMA.  If this happens much, split those transfers into
+	 * multiple DMA segments rather than forcing PIO.
+	 */
+	return len > 0 && len <= MAX_DMA_LEN;
+}
+
+int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data)
+{
+	struct spi_message *msg = drv_data->cur_msg;
+	struct device *dev = &msg->spi->dev;
+
+	if (!drv_data->cur_chip->enable_dma)
+		return 0;
+
+	if (msg->is_dma_mapped)
+		return  drv_data->rx_dma && drv_data->tx_dma;
+
+	if (!IS_DMA_ALIGNED(drv_data->rx) || !IS_DMA_ALIGNED(drv_data->tx))
+		return 0;
+
+	/* Modify setup if rx buffer is null */
+	if (drv_data->rx == NULL) {
+		*drv_data->null_dma_buf = 0;
+		drv_data->rx = drv_data->null_dma_buf;
+		drv_data->rx_map_len = 4;
+	} else
+		drv_data->rx_map_len = drv_data->len;
+
+
+	/* Modify setup if tx buffer is null */
+	if (drv_data->tx == NULL) {
+		*drv_data->null_dma_buf = 0;
+		drv_data->tx = drv_data->null_dma_buf;
+		drv_data->tx_map_len = 4;
+	} else
+		drv_data->tx_map_len = drv_data->len;
+
+	/* Stream map the tx buffer. Always do DMA_TO_DEVICE first
+	 * so we flush the cache *before* invalidating it, in case
+	 * the tx and rx buffers overlap.
+	 */
+	drv_data->tx_dma = dma_map_single(dev, drv_data->tx,
+					drv_data->tx_map_len, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, drv_data->tx_dma))
+		return 0;
+
+	/* Stream map the rx buffer */
+	drv_data->rx_dma = dma_map_single(dev, drv_data->rx,
+					drv_data->rx_map_len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, drv_data->rx_dma)) {
+		dma_unmap_single(dev, drv_data->tx_dma,
+					drv_data->tx_map_len, DMA_TO_DEVICE);
+		return 0;
+	}
+
+	return 1;
+}
+
+static void pxa2xx_spi_unmap_dma_buffers(struct driver_data *drv_data)
+{
+	struct device *dev;
+
+	if (!drv_data->dma_mapped)
+		return;
+
+	if (!drv_data->cur_msg->is_dma_mapped) {
+		dev = &drv_data->cur_msg->spi->dev;
+		dma_unmap_single(dev, drv_data->rx_dma,
+					drv_data->rx_map_len, DMA_FROM_DEVICE);
+		dma_unmap_single(dev, drv_data->tx_dma,
+					drv_data->tx_map_len, DMA_TO_DEVICE);
+	}
+
+	drv_data->dma_mapped = 0;
+}
+
+static int wait_ssp_rx_stall(void const __iomem *ioaddr)
+{
+	unsigned long limit = loops_per_jiffy << 1;
+
+	while ((read_SSSR(ioaddr) & SSSR_BSY) && --limit)
+		cpu_relax();
+
+	return limit;
+}
+
+static int wait_dma_channel_stop(int channel)
+{
+	unsigned long limit = loops_per_jiffy << 1;
+
+	while (!(DCSR(channel) & DCSR_STOPSTATE) && --limit)
+		cpu_relax();
+
+	return limit;
+}
+
+static void pxa2xx_spi_dma_error_stop(struct driver_data *drv_data,
+				      const char *msg)
+{
+	void __iomem *reg = drv_data->ioaddr;
+
+	/* Stop and reset */
+	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
+	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
+	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+	if (!pxa25x_ssp_comp(drv_data))
+		write_SSTO(0, reg);
+	pxa2xx_spi_flush(drv_data);
+	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
+
+	pxa2xx_spi_unmap_dma_buffers(drv_data);
+
+	dev_err(&drv_data->pdev->dev, "%s\n", msg);
+
+	drv_data->cur_msg->state = ERROR_STATE;
+	tasklet_schedule(&drv_data->pump_transfers);
+}
+
+static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data)
+{
+	void __iomem *reg = drv_data->ioaddr;
+	struct spi_message *msg = drv_data->cur_msg;
+
+	/* Clear and disable interrupts on SSP and DMA channels*/
+	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
+	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
+
+	if (wait_dma_channel_stop(drv_data->rx_channel) == 0)
+		dev_err(&drv_data->pdev->dev,
+			"dma_handler: dma rx channel stop failed\n");
+
+	if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
+		dev_err(&drv_data->pdev->dev,
+			"dma_transfer: ssp rx stall failed\n");
+
+	pxa2xx_spi_unmap_dma_buffers(drv_data);
+
+	/* update the buffer pointer for the amount completed in dma */
+	drv_data->rx += drv_data->len -
+			(DCMD(drv_data->rx_channel) & DCMD_LENGTH);
+
+	/* read trailing data from fifo, it does not matter how many
+	 * bytes are in the fifo just read until buffer is full
+	 * or fifo is empty, which ever occurs first */
+	drv_data->read(drv_data);
+
+	/* return count of what was actually read */
+	msg->actual_length += drv_data->len -
+				(drv_data->rx_end - drv_data->rx);
+
+	/* Transfer delays and chip select release are
+	 * handled in pump_transfers or giveback
+	 */
+
+	/* Move to next transfer */
+	msg->state = pxa2xx_spi_next_transfer(drv_data);
+
+	/* Schedule transfer tasklet */
+	tasklet_schedule(&drv_data->pump_transfers);
+}
+
+void pxa2xx_spi_dma_handler(int channel, void *data)
+{
+	struct driver_data *drv_data = data;
+	u32 irq_status = DCSR(channel) & DMA_INT_MASK;
+
+	if (irq_status & DCSR_BUSERR) {
+
+		if (channel == drv_data->tx_channel)
+			pxa2xx_spi_dma_error_stop(drv_data,
+				"dma_handler: bad bus address on tx channel");
+		else
+			pxa2xx_spi_dma_error_stop(drv_data,
+				"dma_handler: bad bus address on rx channel");
+		return;
+	}
+
+	/* PXA255x_SSP has no timeout interrupt, wait for tailing bytes */
+	if ((channel == drv_data->tx_channel)
+		&& (irq_status & DCSR_ENDINTR)
+		&& (drv_data->ssp_type == PXA25x_SSP)) {
+
+		/* Wait for rx to stall */
+		if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
+			dev_err(&drv_data->pdev->dev,
+				"dma_handler: ssp rx stall failed\n");
+
+		/* finish this transfer, start the next */
+		pxa2xx_spi_dma_transfer_complete(drv_data);
+	}
+}
+
+irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data)
+{
+	u32 irq_status;
+	void __iomem *reg = drv_data->ioaddr;
+
+	irq_status = read_SSSR(reg) & drv_data->mask_sr;
+	if (irq_status & SSSR_ROR) {
+		pxa2xx_spi_dma_error_stop(drv_data,
+					  "dma_transfer: fifo overrun");
+		return IRQ_HANDLED;
+	}
+
+	/* Check for false positive timeout */
+	if ((irq_status & SSSR_TINT)
+		&& (DCSR(drv_data->tx_channel) & DCSR_RUN)) {
+		write_SSSR(SSSR_TINT, reg);
+		return IRQ_HANDLED;
+	}
+
+	if (irq_status & SSSR_TINT || drv_data->rx == drv_data->rx_end) {
+
+		/* Clear and disable timeout interrupt, do the rest in
+		 * dma_transfer_complete */
+		if (!pxa25x_ssp_comp(drv_data))
+			write_SSTO(0, reg);
+
+		/* finish this transfer, start the next */
+		pxa2xx_spi_dma_transfer_complete(drv_data);
+
+		return IRQ_HANDLED;
+	}
+
+	/* Opps problem detected */
+	return IRQ_NONE;
+}
+
+int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst)
+{
+	u32 dma_width;
+
+	switch (drv_data->n_bytes) {
+	case 1:
+		dma_width = DCMD_WIDTH1;
+		break;
+	case 2:
+		dma_width = DCMD_WIDTH2;
+		break;
+	default:
+		dma_width = DCMD_WIDTH4;
+		break;
+	}
+
+	/* Setup rx DMA Channel */
+	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
+	DSADR(drv_data->rx_channel) = drv_data->ssdr_physical;
+	DTADR(drv_data->rx_channel) = drv_data->rx_dma;
+	if (drv_data->rx == drv_data->null_dma_buf)
+		/* No target address increment */
+		DCMD(drv_data->rx_channel) = DCMD_FLOWSRC
+						| dma_width
+						| dma_burst
+						| drv_data->len;
+	else
+		DCMD(drv_data->rx_channel) = DCMD_INCTRGADDR
+						| DCMD_FLOWSRC
+						| dma_width
+						| dma_burst
+						| drv_data->len;
+
+	/* Setup tx DMA Channel */
+	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
+	DSADR(drv_data->tx_channel) = drv_data->tx_dma;
+	DTADR(drv_data->tx_channel) = drv_data->ssdr_physical;
+	if (drv_data->tx == drv_data->null_dma_buf)
+		/* No source address increment */
+		DCMD(drv_data->tx_channel) = DCMD_FLOWTRG
+						| dma_width
+						| dma_burst
+						| drv_data->len;
+	else
+		DCMD(drv_data->tx_channel) = DCMD_INCSRCADDR
+						| DCMD_FLOWTRG
+						| dma_width
+						| dma_burst
+						| drv_data->len;
+
+	/* Enable dma end irqs on SSP to detect end of transfer */
+	if (drv_data->ssp_type == PXA25x_SSP)
+		DCMD(drv_data->tx_channel) |= DCMD_ENDIRQEN;
+
+	return 0;
+}
+
+void pxa2xx_spi_dma_start(struct driver_data *drv_data)
+{
+	DCSR(drv_data->rx_channel) |= DCSR_RUN;
+	DCSR(drv_data->tx_channel) |= DCSR_RUN;
+}
+
+int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
+{
+	struct device *dev = &drv_data->pdev->dev;
+	struct ssp_device *ssp = drv_data->ssp;
+
+	/* Get two DMA channels	(rx and tx) */
+	drv_data->rx_channel = pxa_request_dma("pxa2xx_spi_ssp_rx",
+						DMA_PRIO_HIGH,
+						pxa2xx_spi_dma_handler,
+						drv_data);
+	if (drv_data->rx_channel < 0) {
+		dev_err(dev, "problem (%d) requesting rx channel\n",
+			drv_data->rx_channel);
+		return -ENODEV;
+	}
+	drv_data->tx_channel = pxa_request_dma("pxa2xx_spi_ssp_tx",
+						DMA_PRIO_MEDIUM,
+						pxa2xx_spi_dma_handler,
+						drv_data);
+	if (drv_data->tx_channel < 0) {
+		dev_err(dev, "problem (%d) requesting tx channel\n",
+			drv_data->tx_channel);
+		pxa_free_dma(drv_data->rx_channel);
+		return -ENODEV;
+	}
+
+	DRCMR(ssp->drcmr_rx) = DRCMR_MAPVLD | drv_data->rx_channel;
+	DRCMR(ssp->drcmr_tx) = DRCMR_MAPVLD | drv_data->tx_channel;
+
+	return 0;
+}
+
+void pxa2xx_spi_dma_release(struct driver_data *drv_data)
+{
+	struct ssp_device *ssp = drv_data->ssp;
+
+	DRCMR(ssp->drcmr_rx) = 0;
+	DRCMR(ssp->drcmr_tx) = 0;
+
+	if (drv_data->tx_channel != 0)
+		pxa_free_dma(drv_data->tx_channel);
+	if (drv_data->rx_channel != 0)
+		pxa_free_dma(drv_data->rx_channel);
+}
+
+void pxa2xx_spi_dma_resume(struct driver_data *drv_data)
+{
+	if (drv_data->rx_channel != -1)
+		DRCMR(drv_data->ssp->drcmr_rx) =
+			DRCMR_MAPVLD | drv_data->rx_channel;
+	if (drv_data->tx_channel != -1)
+		DRCMR(drv_data->ssp->drcmr_tx) =
+			DRCMR_MAPVLD | drv_data->tx_channel;
+}
+
+int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
+					   struct spi_device *spi,
+					   u8 bits_per_word, u32 *burst_code,
+					   u32 *threshold)
+{
+	struct pxa2xx_spi_chip *chip_info =
+			(struct pxa2xx_spi_chip *)spi->controller_data;
+	int bytes_per_word;
+	int burst_bytes;
+	int thresh_words;
+	int req_burst_size;
+	int retval = 0;
+
+	/* Set the threshold (in registers) to equal the same amount of data
+	 * as represented by burst size (in bytes).  The computation below
+	 * is (burst_size rounded up to nearest 8 byte, word or long word)
+	 * divided by (bytes/register); the tx threshold is the inverse of
+	 * the rx, so that there will always be enough data in the rx fifo
+	 * to satisfy a burst, and there will always be enough space in the
+	 * tx fifo to accept a burst (a tx burst will overwrite the fifo if
+	 * there is not enough space), there must always remain enough empty
+	 * space in the rx fifo for any data loaded to the tx fifo.
+	 * Whenever burst_size (in bytes) equals bits/word, the fifo threshold
+	 * will be 8, or half the fifo;
+	 * The threshold can only be set to 2, 4 or 8, but not 16, because
+	 * to burst 16 to the tx fifo, the fifo would have to be empty;
+	 * however, the minimum fifo trigger level is 1, and the tx will
+	 * request service when the fifo is at this level, with only 15 spaces.
+	 */
+
+	/* find bytes/word */
+	if (bits_per_word <= 8)
+		bytes_per_word = 1;
+	else if (bits_per_word <= 16)
+		bytes_per_word = 2;
+	else
+		bytes_per_word = 4;
+
+	/* use struct pxa2xx_spi_chip->dma_burst_size if available */
+	if (chip_info)
+		req_burst_size = chip_info->dma_burst_size;
+	else {
+		switch (chip->dma_burst_size) {
+		default:
+			/* if the default burst size is not set,
+			 * do it now */
+			chip->dma_burst_size = DCMD_BURST8;
+		case DCMD_BURST8:
+			req_burst_size = 8;
+			break;
+		case DCMD_BURST16:
+			req_burst_size = 16;
+			break;
+		case DCMD_BURST32:
+			req_burst_size = 32;
+			break;
+		}
+	}
+	if (req_burst_size <= 8) {
+		*burst_code = DCMD_BURST8;
+		burst_bytes = 8;
+	} else if (req_burst_size <= 16) {
+		if (bytes_per_word == 1) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST8;
+			burst_bytes = 8;
+			retval = 1;
+		} else {
+			*burst_code = DCMD_BURST16;
+			burst_bytes = 16;
+		}
+	} else {
+		if (bytes_per_word == 1) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST8;
+			burst_bytes = 8;
+			retval = 1;
+		} else if (bytes_per_word == 2) {
+			/* don't burst more than 1/2 the fifo */
+			*burst_code = DCMD_BURST16;
+			burst_bytes = 16;
+			retval = 1;
+		} else {
+			*burst_code = DCMD_BURST32;
+			burst_bytes = 32;
+		}
+	}
+
+	thresh_words = burst_bytes / bytes_per_word;
+
+	/* thresh_words will be between 2 and 8 */
+	*threshold = (SSCR1_RxTresh(thresh_words) & SSCR1_RFT)
+			| (SSCR1_TxTresh(16-thresh_words) & SSCR1_TFT);
+
+	return retval;
+}
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 304cf6eb50e6..5b7c2a4ba828 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -24,7 +24,6 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/spi/pxa2xx_spi.h>
-#include <linux/dma-mapping.h>
 #include <linux/spi/spi.h>
 #include <linux/workqueue.h>
 #include <linux/delay.h>
@@ -36,6 +35,7 @@
 #include <asm/irq.h>
 #include <asm/delay.h>
 
+#include "spi-pxa2xx.h"
 
 MODULE_AUTHOR("Stephen Street");
 MODULE_DESCRIPTION("PXA2xx SSP SPI Controller");
@@ -46,12 +46,6 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 
 #define TIMOUT_DFLT		1000
 
-#define DMA_INT_MASK		(DCSR_ENDINTR | DCSR_STARTINTR | DCSR_BUSERR)
-#define RESET_DMA_CHANNEL	(DCSR_NODESC | DMA_INT_MASK)
-#define IS_DMA_ALIGNED(x)	IS_ALIGNED((unsigned long)(x), DMA_ALIGNMENT)
-#define MAX_DMA_LEN		8191
-#define DMA_ALIGNMENT		8
-
 /*
  * for testing SSCR1 changes that require SSP restart, basically
  * everything except the service and interrupt enables, the pxa270 developer
@@ -66,106 +60,6 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 				| SSCR1_RFT | SSCR1_TFT | SSCR1_MWDS \
 				| SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
 
-#define DEFINE_SSP_REG(reg, off) \
-static inline u32 read_##reg(void const __iomem *p) \
-{ return __raw_readl(p + (off)); } \
-\
-static inline void write_##reg(u32 v, void __iomem *p) \
-{ __raw_writel(v, p + (off)); }
-
-DEFINE_SSP_REG(SSCR0, 0x00)
-DEFINE_SSP_REG(SSCR1, 0x04)
-DEFINE_SSP_REG(SSSR, 0x08)
-DEFINE_SSP_REG(SSITR, 0x0c)
-DEFINE_SSP_REG(SSDR, 0x10)
-DEFINE_SSP_REG(SSTO, 0x28)
-DEFINE_SSP_REG(SSPSP, 0x2c)
-
-#define START_STATE ((void*)0)
-#define RUNNING_STATE ((void*)1)
-#define DONE_STATE ((void*)2)
-#define ERROR_STATE ((void*)-1)
-
-struct driver_data {
-	/* Driver model hookup */
-	struct platform_device *pdev;
-
-	/* SSP Info */
-	struct ssp_device *ssp;
-
-	/* SPI framework hookup */
-	enum pxa_ssp_type ssp_type;
-	struct spi_master *master;
-
-	/* PXA hookup */
-	struct pxa2xx_spi_master *master_info;
-
-	/* DMA setup stuff */
-	int rx_channel;
-	int tx_channel;
-	u32 *null_dma_buf;
-
-	/* SSP register addresses */
-	void __iomem *ioaddr;
-	u32 ssdr_physical;
-
-	/* SSP masks*/
-	u32 dma_cr1;
-	u32 int_cr1;
-	u32 clear_sr;
-	u32 mask_sr;
-
-	/* Maximun clock rate */
-	unsigned long max_clk_rate;
-
-	/* Message Transfer pump */
-	struct tasklet_struct pump_transfers;
-
-	/* Current message transfer state info */
-	struct spi_message* cur_msg;
-	struct spi_transfer* cur_transfer;
-	struct chip_data *cur_chip;
-	size_t len;
-	void *tx;
-	void *tx_end;
-	void *rx;
-	void *rx_end;
-	int dma_mapped;
-	dma_addr_t rx_dma;
-	dma_addr_t tx_dma;
-	size_t rx_map_len;
-	size_t tx_map_len;
-	u8 n_bytes;
-	u32 dma_width;
-	int (*write)(struct driver_data *drv_data);
-	int (*read)(struct driver_data *drv_data);
-	irqreturn_t (*transfer_handler)(struct driver_data *drv_data);
-	void (*cs_control)(u32 command);
-};
-
-struct chip_data {
-	u32 cr0;
-	u32 cr1;
-	u32 psp;
-	u32 timeout;
-	u8 n_bytes;
-	u32 dma_width;
-	u32 dma_burst_size;
-	u32 threshold;
-	u32 dma_threshold;
-	u8 enable_dma;
-	u8 bits_per_word;
-	u32 speed_hz;
-	union {
-		int gpio_cs;
-		unsigned int frm;
-	};
-	int gpio_cs_inverted;
-	int (*write)(struct driver_data *drv_data);
-	int (*read)(struct driver_data *drv_data);
-	void (*cs_control)(u32 command);
-};
-
 static void cs_assert(struct driver_data *drv_data)
 {
 	struct chip_data *chip = drv_data->cur_chip;
@@ -200,26 +94,7 @@ static void cs_deassert(struct driver_data *drv_data)
 		gpio_set_value(chip->gpio_cs, !chip->gpio_cs_inverted);
 }
 
-static void write_SSSR_CS(struct driver_data *drv_data, u32 val)
-{
-	void __iomem *reg = drv_data->ioaddr;
-
-	if (drv_data->ssp_type == CE4100_SSP)
-		val |= read_SSSR(reg) & SSSR_ALT_FRM_MASK;
-
-	write_SSSR(val, reg);
-}
-
-static int pxa25x_ssp_comp(struct driver_data *drv_data)
-{
-	if (drv_data->ssp_type == PXA25x_SSP)
-		return 1;
-	if (drv_data->ssp_type == CE4100_SSP)
-		return 1;
-	return 0;
-}
-
-static int flush(struct driver_data *drv_data)
+int pxa2xx_spi_flush(struct driver_data *drv_data)
 {
 	unsigned long limit = loops_per_jiffy << 1;
 
@@ -345,7 +220,7 @@ static int u32_reader(struct driver_data *drv_data)
 	return drv_data->rx == drv_data->rx_end;
 }
 
-static void *next_transfer(struct driver_data *drv_data)
+void *pxa2xx_spi_next_transfer(struct driver_data *drv_data)
 {
 	struct spi_message *msg = drv_data->cur_msg;
 	struct spi_transfer *trans = drv_data->cur_transfer;
@@ -361,76 +236,6 @@ static void *next_transfer(struct driver_data *drv_data)
 		return DONE_STATE;
 }
 
-static int map_dma_buffers(struct driver_data *drv_data)
-{
-	struct spi_message *msg = drv_data->cur_msg;
-	struct device *dev = &msg->spi->dev;
-
-	if (!drv_data->cur_chip->enable_dma)
-		return 0;
-
-	if (msg->is_dma_mapped)
-		return  drv_data->rx_dma && drv_data->tx_dma;
-
-	if (!IS_DMA_ALIGNED(drv_data->rx) || !IS_DMA_ALIGNED(drv_data->tx))
-		return 0;
-
-	/* Modify setup if rx buffer is null */
-	if (drv_data->rx == NULL) {
-		*drv_data->null_dma_buf = 0;
-		drv_data->rx = drv_data->null_dma_buf;
-		drv_data->rx_map_len = 4;
-	} else
-		drv_data->rx_map_len = drv_data->len;
-
-
-	/* Modify setup if tx buffer is null */
-	if (drv_data->tx == NULL) {
-		*drv_data->null_dma_buf = 0;
-		drv_data->tx = drv_data->null_dma_buf;
-		drv_data->tx_map_len = 4;
-	} else
-		drv_data->tx_map_len = drv_data->len;
-
-	/* Stream map the tx buffer. Always do DMA_TO_DEVICE first
-	 * so we flush the cache *before* invalidating it, in case
-	 * the tx and rx buffers overlap.
-	 */
-	drv_data->tx_dma = dma_map_single(dev, drv_data->tx,
-					drv_data->tx_map_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, drv_data->tx_dma))
-		return 0;
-
-	/* Stream map the rx buffer */
-	drv_data->rx_dma = dma_map_single(dev, drv_data->rx,
-					drv_data->rx_map_len, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, drv_data->rx_dma)) {
-		dma_unmap_single(dev, drv_data->tx_dma,
-					drv_data->tx_map_len, DMA_TO_DEVICE);
-		return 0;
-	}
-
-	return 1;
-}
-
-static void unmap_dma_buffers(struct driver_data *drv_data)
-{
-	struct device *dev;
-
-	if (!drv_data->dma_mapped)
-		return;
-
-	if (!drv_data->cur_msg->is_dma_mapped) {
-		dev = &drv_data->cur_msg->spi->dev;
-		dma_unmap_single(dev, drv_data->rx_dma,
-					drv_data->rx_map_len, DMA_FROM_DEVICE);
-		dma_unmap_single(dev, drv_data->tx_dma,
-					drv_data->tx_map_len, DMA_TO_DEVICE);
-	}
-
-	drv_data->dma_mapped = 0;
-}
-
 /* caller already set message->status; dma and pio irqs are blocked */
 static void giveback(struct driver_data *drv_data)
 {
@@ -483,161 +288,6 @@ static void giveback(struct driver_data *drv_data)
 	drv_data->cur_chip = NULL;
 }
 
-static int wait_ssp_rx_stall(void const __iomem *ioaddr)
-{
-	unsigned long limit = loops_per_jiffy << 1;
-
-	while ((read_SSSR(ioaddr) & SSSR_BSY) && --limit)
-		cpu_relax();
-
-	return limit;
-}
-
-static int wait_dma_channel_stop(int channel)
-{
-	unsigned long limit = loops_per_jiffy << 1;
-
-	while (!(DCSR(channel) & DCSR_STOPSTATE) && --limit)
-		cpu_relax();
-
-	return limit;
-}
-
-static void dma_error_stop(struct driver_data *drv_data, const char *msg)
-{
-	void __iomem *reg = drv_data->ioaddr;
-
-	/* Stop and reset */
-	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-	write_SSSR_CS(drv_data, drv_data->clear_sr);
-	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-	if (!pxa25x_ssp_comp(drv_data))
-		write_SSTO(0, reg);
-	flush(drv_data);
-	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
-
-	unmap_dma_buffers(drv_data);
-
-	dev_err(&drv_data->pdev->dev, "%s\n", msg);
-
-	drv_data->cur_msg->state = ERROR_STATE;
-	tasklet_schedule(&drv_data->pump_transfers);
-}
-
-static void dma_transfer_complete(struct driver_data *drv_data)
-{
-	void __iomem *reg = drv_data->ioaddr;
-	struct spi_message *msg = drv_data->cur_msg;
-
-	/* Clear and disable interrupts on SSP and DMA channels*/
-	write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
-	write_SSSR_CS(drv_data, drv_data->clear_sr);
-	DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-	DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-
-	if (wait_dma_channel_stop(drv_data->rx_channel) == 0)
-		dev_err(&drv_data->pdev->dev,
-			"dma_handler: dma rx channel stop failed\n");
-
-	if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
-		dev_err(&drv_data->pdev->dev,
-			"dma_transfer: ssp rx stall failed\n");
-
-	unmap_dma_buffers(drv_data);
-
-	/* update the buffer pointer for the amount completed in dma */
-	drv_data->rx += drv_data->len -
-			(DCMD(drv_data->rx_channel) & DCMD_LENGTH);
-
-	/* read trailing data from fifo, it does not matter how many
-	 * bytes are in the fifo just read until buffer is full
-	 * or fifo is empty, which ever occurs first */
-	drv_data->read(drv_data);
-
-	/* return count of what was actually read */
-	msg->actual_length += drv_data->len -
-				(drv_data->rx_end - drv_data->rx);
-
-	/* Transfer delays and chip select release are
-	 * handled in pump_transfers or giveback
-	 */
-
-	/* Move to next transfer */
-	msg->state = next_transfer(drv_data);
-
-	/* Schedule transfer tasklet */
-	tasklet_schedule(&drv_data->pump_transfers);
-}
-
-static void dma_handler(int channel, void *data)
-{
-	struct driver_data *drv_data = data;
-	u32 irq_status = DCSR(channel) & DMA_INT_MASK;
-
-	if (irq_status & DCSR_BUSERR) {
-
-		if (channel == drv_data->tx_channel)
-			dma_error_stop(drv_data,
-					"dma_handler: "
-					"bad bus address on tx channel");
-		else
-			dma_error_stop(drv_data,
-					"dma_handler: "
-					"bad bus address on rx channel");
-		return;
-	}
-
-	/* PXA255x_SSP has no timeout interrupt, wait for tailing bytes */
-	if ((channel == drv_data->tx_channel)
-		&& (irq_status & DCSR_ENDINTR)
-		&& (drv_data->ssp_type == PXA25x_SSP)) {
-
-		/* Wait for rx to stall */
-		if (wait_ssp_rx_stall(drv_data->ioaddr) == 0)
-			dev_err(&drv_data->pdev->dev,
-				"dma_handler: ssp rx stall failed\n");
-
-		/* finish this transfer, start the next */
-		dma_transfer_complete(drv_data);
-	}
-}
-
-static irqreturn_t dma_transfer(struct driver_data *drv_data)
-{
-	u32 irq_status;
-	void __iomem *reg = drv_data->ioaddr;
-
-	irq_status = read_SSSR(reg) & drv_data->mask_sr;
-	if (irq_status & SSSR_ROR) {
-		dma_error_stop(drv_data, "dma_transfer: fifo overrun");
-		return IRQ_HANDLED;
-	}
-
-	/* Check for false positive timeout */
-	if ((irq_status & SSSR_TINT)
-		&& (DCSR(drv_data->tx_channel) & DCSR_RUN)) {
-		write_SSSR(SSSR_TINT, reg);
-		return IRQ_HANDLED;
-	}
-
-	if (irq_status & SSSR_TINT || drv_data->rx == drv_data->rx_end) {
-
-		/* Clear and disable timeout interrupt, do the rest in
-		 * dma_transfer_complete */
-		if (!pxa25x_ssp_comp(drv_data))
-			write_SSTO(0, reg);
-
-		/* finish this transfer, start the next */
-		dma_transfer_complete(drv_data);
-
-		return IRQ_HANDLED;
-	}
-
-	/* Opps problem detected */
-	return IRQ_NONE;
-}
-
 static void reset_sccr1(struct driver_data *drv_data)
 {
 	void __iomem *reg = drv_data->ioaddr;
@@ -659,7 +309,7 @@ static void int_error_stop(struct driver_data *drv_data, const char* msg)
 	reset_sccr1(drv_data);
 	if (!pxa25x_ssp_comp(drv_data))
 		write_SSTO(0, reg);
-	flush(drv_data);
+	pxa2xx_spi_flush(drv_data);
 	write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
 
 	dev_err(&drv_data->pdev->dev, "%s\n", msg);
@@ -687,7 +337,7 @@ static void int_transfer_complete(struct driver_data *drv_data)
 	 */
 
 	/* Move to next transfer */
-	drv_data->cur_msg->state = next_transfer(drv_data);
+	drv_data->cur_msg->state = pxa2xx_spi_next_transfer(drv_data);
 
 	/* Schedule transfer tasklet */
 	tasklet_schedule(&drv_data->pump_transfers);
@@ -798,103 +448,6 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 	return drv_data->transfer_handler(drv_data);
 }
 
-static int set_dma_burst_and_threshold(struct chip_data *chip,
-				struct spi_device *spi,
-				u8 bits_per_word, u32 *burst_code,
-				u32 *threshold)
-{
-	struct pxa2xx_spi_chip *chip_info =
-			(struct pxa2xx_spi_chip *)spi->controller_data;
-	int bytes_per_word;
-	int burst_bytes;
-	int thresh_words;
-	int req_burst_size;
-	int retval = 0;
-
-	/* Set the threshold (in registers) to equal the same amount of data
-	 * as represented by burst size (in bytes).  The computation below
-	 * is (burst_size rounded up to nearest 8 byte, word or long word)
-	 * divided by (bytes/register); the tx threshold is the inverse of
-	 * the rx, so that there will always be enough data in the rx fifo
-	 * to satisfy a burst, and there will always be enough space in the
-	 * tx fifo to accept a burst (a tx burst will overwrite the fifo if
-	 * there is not enough space), there must always remain enough empty
-	 * space in the rx fifo for any data loaded to the tx fifo.
-	 * Whenever burst_size (in bytes) equals bits/word, the fifo threshold
-	 * will be 8, or half the fifo;
-	 * The threshold can only be set to 2, 4 or 8, but not 16, because
-	 * to burst 16 to the tx fifo, the fifo would have to be empty;
-	 * however, the minimum fifo trigger level is 1, and the tx will
-	 * request service when the fifo is at this level, with only 15 spaces.
-	 */
-
-	/* find bytes/word */
-	if (bits_per_word <= 8)
-		bytes_per_word = 1;
-	else if (bits_per_word <= 16)
-		bytes_per_word = 2;
-	else
-		bytes_per_word = 4;
-
-	/* use struct pxa2xx_spi_chip->dma_burst_size if available */
-	if (chip_info)
-		req_burst_size = chip_info->dma_burst_size;
-	else {
-		switch (chip->dma_burst_size) {
-		default:
-			/* if the default burst size is not set,
-			 * do it now */
-			chip->dma_burst_size = DCMD_BURST8;
-		case DCMD_BURST8:
-			req_burst_size = 8;
-			break;
-		case DCMD_BURST16:
-			req_burst_size = 16;
-			break;
-		case DCMD_BURST32:
-			req_burst_size = 32;
-			break;
-		}
-	}
-	if (req_burst_size <= 8) {
-		*burst_code = DCMD_BURST8;
-		burst_bytes = 8;
-	} else if (req_burst_size <= 16) {
-		if (bytes_per_word == 1) {
-			/* don't burst more than 1/2 the fifo */
-			*burst_code = DCMD_BURST8;
-			burst_bytes = 8;
-			retval = 1;
-		} else {
-			*burst_code = DCMD_BURST16;
-			burst_bytes = 16;
-		}
-	} else {
-		if (bytes_per_word == 1) {
-			/* don't burst more than 1/2 the fifo */
-			*burst_code = DCMD_BURST8;
-			burst_bytes = 8;
-			retval = 1;
-		} else if (bytes_per_word == 2) {
-			/* don't burst more than 1/2 the fifo */
-			*burst_code = DCMD_BURST16;
-			burst_bytes = 16;
-			retval = 1;
-		} else {
-			*burst_code = DCMD_BURST32;
-			burst_bytes = 32;
-		}
-	}
-
-	thresh_words = burst_bytes / bytes_per_word;
-
-	/* thresh_words will be between 2 and 8 */
-	*threshold = (SSCR1_RxTresh(thresh_words) & SSCR1_RFT)
-			| (SSCR1_TxTresh(16-thresh_words) & SSCR1_TFT);
-
-	return retval;
-}
-
 static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 {
 	unsigned long ssp_clk = drv_data->max_clk_rate;
@@ -956,8 +509,8 @@ static void pump_transfers(unsigned long data)
 			cs_deassert(drv_data);
 	}
 
-	/* Check for transfers that need multiple DMA segments */
-	if (transfer->len > MAX_DMA_LEN && chip->enable_dma) {
+	/* Check if we can DMA this transfer */
+	if (!pxa2xx_spi_dma_is_possible(transfer->len) && chip->enable_dma) {
 
 		/* reject already-mapped transfers; PIO won't always work */
 		if (message->is_dma_mapped
@@ -980,21 +533,20 @@ static void pump_transfers(unsigned long data)
 	}
 
 	/* Setup the transfer state based on the type of transfer */
-	if (flush(drv_data) == 0) {
+	if (pxa2xx_spi_flush(drv_data) == 0) {
 		dev_err(&drv_data->pdev->dev, "pump_transfers: flush failed\n");
 		message->status = -EIO;
 		giveback(drv_data);
 		return;
 	}
 	drv_data->n_bytes = chip->n_bytes;
-	drv_data->dma_width = chip->dma_width;
 	drv_data->tx = (void *)transfer->tx_buf;
 	drv_data->tx_end = drv_data->tx + transfer->len;
 	drv_data->rx = transfer->rx_buf;
 	drv_data->rx_end = drv_data->rx + transfer->len;
 	drv_data->rx_dma = transfer->rx_dma;
 	drv_data->tx_dma = transfer->tx_dma;
-	drv_data->len = transfer->len & DCMD_LENGTH;
+	drv_data->len = transfer->len;
 	drv_data->write = drv_data->tx ? chip->write : null_writer;
 	drv_data->read = drv_data->rx ? chip->read : null_reader;
 
@@ -1015,21 +567,18 @@ static void pump_transfers(unsigned long data)
 
 		if (bits <= 8) {
 			drv_data->n_bytes = 1;
-			drv_data->dma_width = DCMD_WIDTH1;
 			drv_data->read = drv_data->read != null_reader ?
 						u8_reader : null_reader;
 			drv_data->write = drv_data->write != null_writer ?
 						u8_writer : null_writer;
 		} else if (bits <= 16) {
 			drv_data->n_bytes = 2;
-			drv_data->dma_width = DCMD_WIDTH2;
 			drv_data->read = drv_data->read != null_reader ?
 						u16_reader : null_reader;
 			drv_data->write = drv_data->write != null_writer ?
 						u16_writer : null_writer;
 		} else if (bits <= 32) {
 			drv_data->n_bytes = 4;
-			drv_data->dma_width = DCMD_WIDTH4;
 			drv_data->read = drv_data->read != null_reader ?
 						u32_reader : null_reader;
 			drv_data->write = drv_data->write != null_writer ?
@@ -1038,7 +587,8 @@ static void pump_transfers(unsigned long data)
 		/* if bits/word is changed in dma mode, then must check the
 		 * thresholds and burst also */
 		if (chip->enable_dma) {
-			if (set_dma_burst_and_threshold(chip, message->spi,
+			if (pxa2xx_spi_set_dma_burst_and_threshold(chip,
+							message->spi,
 							bits, &dma_burst,
 							&dma_thresh))
 				if (printk_ratelimit())
@@ -1057,70 +607,21 @@ static void pump_transfers(unsigned long data)
 
 	message->state = RUNNING_STATE;
 
-	/* Try to map dma buffer and do a dma transfer if successful, but
-	 * only if the length is non-zero and less than MAX_DMA_LEN.
-	 *
-	 * Zero-length non-descriptor DMA is illegal on PXA2xx; force use
-	 * of PIO instead.  Care is needed above because the transfer may
-	 * have have been passed with buffers that are already dma mapped.
-	 * A zero-length transfer in PIO mode will not try to write/read
-	 * to/from the buffers
-	 *
-	 * REVISIT large transfers are exactly where we most want to be
-	 * using DMA.  If this happens much, split those transfers into
-	 * multiple DMA segments rather than forcing PIO.
-	 */
 	drv_data->dma_mapped = 0;
-	if (drv_data->len > 0 && drv_data->len <= MAX_DMA_LEN)
-		drv_data->dma_mapped = map_dma_buffers(drv_data);
+	if (pxa2xx_spi_dma_is_possible(drv_data->len))
+		drv_data->dma_mapped = pxa2xx_spi_map_dma_buffers(drv_data);
 	if (drv_data->dma_mapped) {
 
 		/* Ensure we have the correct interrupt handler */
-		drv_data->transfer_handler = dma_transfer;
-
-		/* Setup rx DMA Channel */
-		DCSR(drv_data->rx_channel) = RESET_DMA_CHANNEL;
-		DSADR(drv_data->rx_channel) = drv_data->ssdr_physical;
-		DTADR(drv_data->rx_channel) = drv_data->rx_dma;
-		if (drv_data->rx == drv_data->null_dma_buf)
-			/* No target address increment */
-			DCMD(drv_data->rx_channel) = DCMD_FLOWSRC
-							| drv_data->dma_width
-							| dma_burst
-							| drv_data->len;
-		else
-			DCMD(drv_data->rx_channel) = DCMD_INCTRGADDR
-							| DCMD_FLOWSRC
-							| drv_data->dma_width
-							| dma_burst
-							| drv_data->len;
-
-		/* Setup tx DMA Channel */
-		DCSR(drv_data->tx_channel) = RESET_DMA_CHANNEL;
-		DSADR(drv_data->tx_channel) = drv_data->tx_dma;
-		DTADR(drv_data->tx_channel) = drv_data->ssdr_physical;
-		if (drv_data->tx == drv_data->null_dma_buf)
-			/* No source address increment */
-			DCMD(drv_data->tx_channel) = DCMD_FLOWTRG
-							| drv_data->dma_width
-							| dma_burst
-							| drv_data->len;
-		else
-			DCMD(drv_data->tx_channel) = DCMD_INCSRCADDR
-							| DCMD_FLOWTRG
-							| drv_data->dma_width
-							| dma_burst
-							| drv_data->len;
-
-		/* Enable dma end irqs on SSP to detect end of transfer */
-		if (drv_data->ssp_type == PXA25x_SSP)
-			DCMD(drv_data->tx_channel) |= DCMD_ENDIRQEN;
+		drv_data->transfer_handler = pxa2xx_spi_dma_transfer;
+
+		pxa2xx_spi_dma_prepare(drv_data, dma_burst);
 
 		/* Clear status and start DMA engine */
 		cr1 = chip->cr1 | dma_thresh | drv_data->dma_cr1;
 		write_SSSR(drv_data->clear_sr, reg);
-		DCSR(drv_data->rx_channel) |= DCSR_RUN;
-		DCSR(drv_data->tx_channel) |= DCSR_RUN;
+
+		pxa2xx_spi_dma_start(drv_data);
 	} else {
 		/* Ensure we have the correct interrupt handler	*/
 		drv_data->transfer_handler = interrupt_transfer;
@@ -1262,8 +763,6 @@ static int setup(struct spi_device *spi)
 			chip->gpio_cs = -1;
 		chip->enable_dma = 0;
 		chip->timeout = TIMOUT_DFLT;
-		chip->dma_burst_size = drv_data->master_info->enable_dma ?
-					DCMD_BURST8 : 0;
 	}
 
 	/* protocol drivers may change the chip settings, so...
@@ -1293,7 +792,8 @@ static int setup(struct spi_device *spi)
 	 * burst and threshold can still respond to changes in bits_per_word */
 	if (chip->enable_dma) {
 		/* set up legal burst and threshold for dma */
-		if (set_dma_burst_and_threshold(chip, spi, spi->bits_per_word,
+		if (pxa2xx_spi_set_dma_burst_and_threshold(chip, spi,
+						spi->bits_per_word,
 						&chip->dma_burst_size,
 						&chip->dma_threshold)) {
 			dev_warn(&spi->dev, "in setup: DMA burst size reduced "
@@ -1328,18 +828,15 @@ static int setup(struct spi_device *spi)
 
 	if (spi->bits_per_word <= 8) {
 		chip->n_bytes = 1;
-		chip->dma_width = DCMD_WIDTH1;
 		chip->read = u8_reader;
 		chip->write = u8_writer;
 	} else if (spi->bits_per_word <= 16) {
 		chip->n_bytes = 2;
-		chip->dma_width = DCMD_WIDTH2;
 		chip->read = u16_reader;
 		chip->write = u16_writer;
 	} else if (spi->bits_per_word <= 32) {
 		chip->cr0 |= SSCR0_EDSS;
 		chip->n_bytes = 4;
-		chip->dma_width = DCMD_WIDTH4;
 		chip->read = u32_reader;
 		chip->write = u32_writer;
 	} else {
@@ -1447,31 +944,11 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	drv_data->tx_channel = -1;
 	drv_data->rx_channel = -1;
 	if (platform_info->enable_dma) {
-
-		/* Get two DMA channels	(rx and tx) */
-		drv_data->rx_channel = pxa_request_dma("pxa2xx_spi_ssp_rx",
-							DMA_PRIO_HIGH,
-							dma_handler,
-							drv_data);
-		if (drv_data->rx_channel < 0) {
-			dev_err(dev, "problem (%d) requesting rx channel\n",
-				drv_data->rx_channel);
-			status = -ENODEV;
-			goto out_error_irq_alloc;
+		status = pxa2xx_spi_dma_setup(drv_data);
+		if (status) {
+			dev_warn(dev, "failed to setup DMA, using PIO\n");
+			platform_info->enable_dma = false;
 		}
-		drv_data->tx_channel = pxa_request_dma("pxa2xx_spi_ssp_tx",
-							DMA_PRIO_MEDIUM,
-							dma_handler,
-							drv_data);
-		if (drv_data->tx_channel < 0) {
-			dev_err(dev, "problem (%d) requesting tx channel\n",
-				drv_data->tx_channel);
-			status = -ENODEV;
-			goto out_error_dma_alloc;
-		}
-
-		DRCMR(ssp->drcmr_rx) = DRCMR_MAPVLD | drv_data->rx_channel;
-		DRCMR(ssp->drcmr_tx) = DRCMR_MAPVLD | drv_data->tx_channel;
 	}
 
 	/* Enable SOC clock */
@@ -1507,14 +984,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 
 out_error_clock_enabled:
 	clk_disable_unprepare(ssp->clk);
-
-out_error_dma_alloc:
-	if (drv_data->tx_channel != -1)
-		pxa_free_dma(drv_data->tx_channel);
-	if (drv_data->rx_channel != -1)
-		pxa_free_dma(drv_data->rx_channel);
-
-out_error_irq_alloc:
+	pxa2xx_spi_dma_release(drv_data);
 	free_irq(ssp->irq, drv_data);
 
 out_error_master_alloc:
@@ -1537,12 +1007,8 @@ static int pxa2xx_spi_remove(struct platform_device *pdev)
 	clk_disable_unprepare(ssp->clk);
 
 	/* Release DMA */
-	if (drv_data->master_info->enable_dma) {
-		DRCMR(ssp->drcmr_rx) = 0;
-		DRCMR(ssp->drcmr_tx) = 0;
-		pxa_free_dma(drv_data->tx_channel);
-		pxa_free_dma(drv_data->rx_channel);
-	}
+	if (drv_data->master_info->enable_dma)
+		pxa2xx_spi_dma_release(drv_data);
 
 	/* Release IRQ */
 	free_irq(ssp->irq, drv_data);
@@ -1589,12 +1055,7 @@ static int pxa2xx_spi_resume(struct device *dev)
 	struct ssp_device *ssp = drv_data->ssp;
 	int status = 0;
 
-	if (drv_data->rx_channel != -1)
-		DRCMR(drv_data->ssp->drcmr_rx) =
-			DRCMR_MAPVLD | drv_data->rx_channel;
-	if (drv_data->tx_channel != -1)
-		DRCMR(drv_data->ssp->drcmr_tx) =
-			DRCMR_MAPVLD | drv_data->tx_channel;
+	pxa2xx_spi_dma_resume(drv_data);
 
 	/* Enable the SSP clock */
 	clk_prepare_enable(ssp->clk);
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
new file mode 100644
index 000000000000..0a98905c916e
--- /dev/null
+++ b/drivers/spi/spi-pxa2xx.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
+ * Copyright (C) 2013, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef SPI_PXA2XX_H
+#define SPI_PXA2XX_H
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/pxa2xx_ssp.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
+
+struct driver_data {
+	/* Driver model hookup */
+	struct platform_device *pdev;
+
+	/* SSP Info */
+	struct ssp_device *ssp;
+
+	/* SPI framework hookup */
+	enum pxa_ssp_type ssp_type;
+	struct spi_master *master;
+
+	/* PXA hookup */
+	struct pxa2xx_spi_master *master_info;
+
+	/* PXA private DMA setup stuff */
+	int rx_channel;
+	int tx_channel;
+	u32 *null_dma_buf;
+
+	/* SSP register addresses */
+	void __iomem *ioaddr;
+	u32 ssdr_physical;
+
+	/* SSP masks*/
+	u32 dma_cr1;
+	u32 int_cr1;
+	u32 clear_sr;
+	u32 mask_sr;
+
+	/* Maximun clock rate */
+	unsigned long max_clk_rate;
+
+	/* Message Transfer pump */
+	struct tasklet_struct pump_transfers;
+
+	/* Current message transfer state info */
+	struct spi_message *cur_msg;
+	struct spi_transfer *cur_transfer;
+	struct chip_data *cur_chip;
+	size_t len;
+	void *tx;
+	void *tx_end;
+	void *rx;
+	void *rx_end;
+	int dma_mapped;
+	dma_addr_t rx_dma;
+	dma_addr_t tx_dma;
+	size_t rx_map_len;
+	size_t tx_map_len;
+	u8 n_bytes;
+	int (*write)(struct driver_data *drv_data);
+	int (*read)(struct driver_data *drv_data);
+	irqreturn_t (*transfer_handler)(struct driver_data *drv_data);
+	void (*cs_control)(u32 command);
+};
+
+struct chip_data {
+	u32 cr0;
+	u32 cr1;
+	u32 psp;
+	u32 timeout;
+	u8 n_bytes;
+	u32 dma_burst_size;
+	u32 threshold;
+	u32 dma_threshold;
+	u8 enable_dma;
+	u8 bits_per_word;
+	u32 speed_hz;
+	union {
+		int gpio_cs;
+		unsigned int frm;
+	};
+	int gpio_cs_inverted;
+	int (*write)(struct driver_data *drv_data);
+	int (*read)(struct driver_data *drv_data);
+	void (*cs_control)(u32 command);
+};
+
+#define DEFINE_SSP_REG(reg, off) \
+static inline u32 read_##reg(void const __iomem *p) \
+{ return __raw_readl(p + (off)); } \
+\
+static inline void write_##reg(u32 v, void __iomem *p) \
+{ __raw_writel(v, p + (off)); }
+
+DEFINE_SSP_REG(SSCR0, 0x00)
+DEFINE_SSP_REG(SSCR1, 0x04)
+DEFINE_SSP_REG(SSSR, 0x08)
+DEFINE_SSP_REG(SSITR, 0x0c)
+DEFINE_SSP_REG(SSDR, 0x10)
+DEFINE_SSP_REG(SSTO, 0x28)
+DEFINE_SSP_REG(SSPSP, 0x2c)
+
+#define START_STATE ((void *)0)
+#define RUNNING_STATE ((void *)1)
+#define DONE_STATE ((void *)2)
+#define ERROR_STATE ((void *)-1)
+
+#define MAX_DMA_LEN		8191
+#define IS_DMA_ALIGNED(x)	IS_ALIGNED((unsigned long)(x), DMA_ALIGNMENT)
+#define DMA_ALIGNMENT		8
+
+static inline int pxa25x_ssp_comp(struct driver_data *drv_data)
+{
+	if (drv_data->ssp_type == PXA25x_SSP)
+		return 1;
+	if (drv_data->ssp_type == CE4100_SSP)
+		return 1;
+	return 0;
+}
+
+static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
+{
+	void __iomem *reg = drv_data->ioaddr;
+
+	if (drv_data->ssp_type == CE4100_SSP)
+		val |= read_SSSR(reg) & SSSR_ALT_FRM_MASK;
+
+	write_SSSR(val, reg);
+}
+
+extern int pxa2xx_spi_flush(struct driver_data *drv_data);
+extern void *pxa2xx_spi_next_transfer(struct driver_data *drv_data);
+
+#if defined(CONFIG_SPI_PXA2XX_PXADMA)
+extern bool pxa2xx_spi_dma_is_possible(size_t len);
+extern int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data);
+extern irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data);
+extern int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst);
+extern void pxa2xx_spi_dma_start(struct driver_data *drv_data);
+extern int pxa2xx_spi_dma_setup(struct driver_data *drv_data);
+extern void pxa2xx_spi_dma_release(struct driver_data *drv_data);
+extern void pxa2xx_spi_dma_resume(struct driver_data *drv_data);
+extern int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
+						  struct spi_device *spi,
+						  u8 bits_per_word,
+						  u32 *burst_code,
+						  u32 *threshold);
+#else
+static inline bool pxa2xx_spi_dma_is_possible(size_t len) { return false; }
+static inline int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data)
+{
+	return 0;
+}
+#define pxa2xx_spi_dma_transfer NULL
+static inline void pxa2xx_spi_dma_prepare(struct driver_data *drv_data,
+					  u32 dma_burst) {}
+static inline void pxa2xx_spi_dma_start(struct driver_data *drv_data) {}
+static inline int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
+{
+	return 0;
+}
+static inline void pxa2xx_spi_dma_release(struct driver_data *drv_data) {}
+static inline void pxa2xx_spi_dma_resume(struct driver_data *drv_data) {}
+static inline int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
+							 struct spi_device *spi,
+							 u8 bits_per_word,
+							 u32 *burst_code,
+							 u32 *threshold)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /* SPI_PXA2XX_H */
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 053b5ba51b25..d6d2b4d557f8 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -53,85 +53,5 @@ struct pxa2xx_spi_chip {
 
 extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info);
 
-#else
-/*
- * This is the implemtation for CE4100 on x86. ARM defines them in mach/ or
- * plat/ include path.
- * The CE4100 does not provide DMA support. This bits are here to let the driver
- * compile and will never be used. Maybe we get DMA support at a later point in
- * time.
- */
-
-#define DCSR(n)         (n)
-#define DSADR(n)        (n)
-#define DTADR(n)        (n)
-#define DCMD(n)         (n)
-#define DRCMR(n)        (n)
-
-#define DCSR_RUN	(1 << 31)	/* Run Bit */
-#define DCSR_NODESC	(1 << 30)	/* No-Descriptor Fetch */
-#define DCSR_STOPIRQEN	(1 << 29)	/* Stop Interrupt Enable */
-#define DCSR_REQPEND	(1 << 8)	/* Request Pending (read-only) */
-#define DCSR_STOPSTATE	(1 << 3)	/* Stop State (read-only) */
-#define DCSR_ENDINTR	(1 << 2)	/* End Interrupt */
-#define DCSR_STARTINTR	(1 << 1)	/* Start Interrupt */
-#define DCSR_BUSERR	(1 << 0)	/* Bus Error Interrupt */
-
-#define DCSR_EORIRQEN	(1 << 28)	/* End of Receive Interrupt Enable */
-#define DCSR_EORJMPEN	(1 << 27)	/* Jump to next descriptor on EOR */
-#define DCSR_EORSTOPEN	(1 << 26)	/* STOP on an EOR */
-#define DCSR_SETCMPST	(1 << 25)	/* Set Descriptor Compare Status */
-#define DCSR_CLRCMPST	(1 << 24)	/* Clear Descriptor Compare Status */
-#define DCSR_CMPST	(1 << 10)	/* The Descriptor Compare Status */
-#define DCSR_EORINTR	(1 << 9)	/* The end of Receive */
-
-#define DRCMR_MAPVLD	(1 << 7)	/* Map Valid */
-#define DRCMR_CHLNUM	0x1f		/* mask for Channel Number */
-
-#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor */
-#define DDADR_STOP	(1 << 0)	/* Stop */
-
-#define DCMD_INCSRCADDR	(1 << 31)	/* Source Address Increment Setting. */
-#define DCMD_INCTRGADDR	(1 << 30)	/* Target Address Increment Setting. */
-#define DCMD_FLOWSRC	(1 << 29)	/* Flow Control by the source. */
-#define DCMD_FLOWTRG	(1 << 28)	/* Flow Control by the target. */
-#define DCMD_STARTIRQEN	(1 << 22)	/* Start Interrupt Enable */
-#define DCMD_ENDIRQEN	(1 << 21)	/* End Interrupt Enable */
-#define DCMD_ENDIAN	(1 << 18)	/* Device Endian-ness. */
-#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
-#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
-#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
-#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
-#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
-#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
-#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
-
-/*
- * Descriptor structure for PXA's DMA engine
- * Note: this structure must always be aligned to a 16-byte boundary.
- */
-
-typedef enum {
-	DMA_PRIO_HIGH = 0,
-	DMA_PRIO_MEDIUM = 1,
-	DMA_PRIO_LOW = 2
-} pxa_dma_prio;
-
-/*
- * DMA registration
- */
-
-static inline int pxa_request_dma(char *name,
-		pxa_dma_prio prio,
-		void (*irq_handler)(int, void *),
-		void *data)
-{
-	return -ENODEV;
-}
-
-static inline void pxa_free_dma(int dma_ch)
-{
-}
-
 #endif
 #endif
-- 
cgit v1.2.3


From 5928808ef623347e0d4aa22327b992e9e125b6ad Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 22 Jan 2013 12:26:29 +0200
Subject: spi/pxa2xx: add support for DMA engine

To be able to use DMA with this driver on non-PXA platforms we implement
support for the generic DMA engine API. This lets user to use different DMA
engines with little or no modification to the driver.

Request lines and channel numbers can be passed to the driver from the
platform specific data.

The DMA engine implementation will be selected by default even on PXA
platform. User can select the legacy DMA API by enabling Kconfig option
CONFIG_SPI_PXA2XX_PXADMA.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Lu Cao <lucao@marvell.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/Kconfig            |   7 +-
 drivers/spi/Makefile           |   1 +
 drivers/spi/spi-pxa2xx-dma.c   | 392 +++++++++++++++++++++++++++++++++++++++++
 drivers/spi/spi-pxa2xx.c       |   2 +-
 drivers/spi/spi-pxa2xx.h       |  32 +++-
 include/linux/spi/pxa2xx_spi.h |   6 +
 6 files changed, 437 insertions(+), 3 deletions(-)
 create mode 100644 drivers/spi/spi-pxa2xx-dma.c

(limited to 'include/linux')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index f1878666e917..8913d3dd5724 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -301,7 +301,12 @@ config SPI_PXA2XX_PXADMA
 	bool "PXA2xx SSP legacy PXA DMA API support"
 	depends on SPI_PXA2XX && ARCH_PXA
 	help
-	  Enable PXA private legacy DMA API support.
+	  Enable PXA private legacy DMA API support. Note that this is
+	  deprecated in favor of generic DMA engine API.
+
+config SPI_PXA2XX_DMA
+	def_bool y
+	depends on SPI_PXA2XX && !SPI_PXA2XX_PXADMA
 
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 2e3cdd3caba9..e53c30941340 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_SPI_PL022)			+= spi-pl022.o
 obj-$(CONFIG_SPI_PPC4xx)		+= spi-ppc4xx.o
 spi-pxa2xx-platform-objs		:= spi-pxa2xx.o
 spi-pxa2xx-platform-$(CONFIG_SPI_PXA2XX_PXADMA)	+= spi-pxa2xx-pxadma.o
+spi-pxa2xx-platform-$(CONFIG_SPI_PXA2XX_DMA)	+= spi-pxa2xx-dma.o
 obj-$(CONFIG_SPI_PXA2XX)		+= spi-pxa2xx-platform.o
 obj-$(CONFIG_SPI_PXA2XX_PCI)		+= spi-pxa2xx-pci.o
 obj-$(CONFIG_SPI_RSPI)			+= spi-rspi.o
diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
new file mode 100644
index 000000000000..c735c5a008a2
--- /dev/null
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -0,0 +1,392 @@
+/*
+ * PXA2xx SPI DMA engine support.
+ *
+ * Copyright (C) 2013, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/pxa2xx_ssp.h>
+#include <linux/scatterlist.h>
+#include <linux/sizes.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/pxa2xx_spi.h>
+
+#include "spi-pxa2xx.h"
+
+static int pxa2xx_spi_map_dma_buffer(struct driver_data *drv_data,
+				     enum dma_data_direction dir)
+{
+	int i, nents, len = drv_data->len;
+	struct scatterlist *sg;
+	struct device *dmadev;
+	struct sg_table *sgt;
+	void *buf, *pbuf;
+
+	/*
+	 * Some DMA controllers have problems transferring buffers that are
+	 * not multiple of 4 bytes. So we truncate the transfer so that it
+	 * is suitable for such controllers, and handle the trailing bytes
+	 * manually after the DMA completes.
+	 *
+	 * REVISIT: It would be better if this information could be
+	 * retrieved directly from the DMA device in a similar way than
+	 * ->copy_align etc. is done.
+	 */
+	len = ALIGN(drv_data->len, 4);
+
+	if (dir == DMA_TO_DEVICE) {
+		dmadev = drv_data->tx_chan->device->dev;
+		sgt = &drv_data->tx_sgt;
+		buf = drv_data->tx;
+		drv_data->tx_map_len = len;
+	} else {
+		dmadev = drv_data->rx_chan->device->dev;
+		sgt = &drv_data->rx_sgt;
+		buf = drv_data->rx;
+		drv_data->rx_map_len = len;
+	}
+
+	nents = DIV_ROUND_UP(len, SZ_2K);
+	if (nents != sgt->nents) {
+		int ret;
+
+		sg_free_table(sgt);
+		ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
+		if (ret)
+			return ret;
+	}
+
+	pbuf = buf;
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes = min_t(size_t, len, SZ_2K);
+
+		if (buf)
+			sg_set_buf(sg, pbuf, bytes);
+		else
+			sg_set_buf(sg, drv_data->dummy, bytes);
+
+		pbuf += bytes;
+		len -= bytes;
+	}
+
+	nents = dma_map_sg(dmadev, sgt->sgl, sgt->nents, dir);
+	if (!nents)
+		return -ENOMEM;
+
+	return nents;
+}
+
+static void pxa2xx_spi_unmap_dma_buffer(struct driver_data *drv_data,
+					enum dma_data_direction dir)
+{
+	struct device *dmadev;
+	struct sg_table *sgt;
+
+	if (dir == DMA_TO_DEVICE) {
+		dmadev = drv_data->tx_chan->device->dev;
+		sgt = &drv_data->tx_sgt;
+	} else {
+		dmadev = drv_data->rx_chan->device->dev;
+		sgt = &drv_data->rx_sgt;
+	}
+
+	dma_unmap_sg(dmadev, sgt->sgl, sgt->nents, dir);
+}
+
+static void pxa2xx_spi_unmap_dma_buffers(struct driver_data *drv_data)
+{
+	if (!drv_data->dma_mapped)
+		return;
+
+	pxa2xx_spi_unmap_dma_buffer(drv_data, DMA_FROM_DEVICE);
+	pxa2xx_spi_unmap_dma_buffer(drv_data, DMA_TO_DEVICE);
+
+	drv_data->dma_mapped = 0;
+}
+
+static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
+					     bool error)
+{
+	struct spi_message *msg = drv_data->cur_msg;
+
+	/*
+	 * It is possible that one CPU is handling ROR interrupt and other
+	 * just gets DMA completion. Calling pump_transfers() twice for the
+	 * same transfer leads to problems thus we prevent concurrent calls
+	 * by using ->dma_running.
+	 */
+	if (atomic_dec_and_test(&drv_data->dma_running)) {
+		void __iomem *reg = drv_data->ioaddr;
+
+		/*
+		 * If the other CPU is still handling the ROR interrupt we
+		 * might not know about the error yet. So we re-check the
+		 * ROR bit here before we clear the status register.
+		 */
+		if (!error) {
+			u32 status = read_SSSR(reg) & drv_data->mask_sr;
+			error = status & SSSR_ROR;
+		}
+
+		/* Clear status & disable interrupts */
+		write_SSCR1(read_SSCR1(reg) & ~drv_data->dma_cr1, reg);
+		write_SSSR_CS(drv_data, drv_data->clear_sr);
+		if (!pxa25x_ssp_comp(drv_data))
+			write_SSTO(0, reg);
+
+		if (!error) {
+			pxa2xx_spi_unmap_dma_buffers(drv_data);
+
+			/* Handle the last bytes of unaligned transfer */
+			drv_data->tx += drv_data->tx_map_len;
+			drv_data->write(drv_data);
+
+			drv_data->rx += drv_data->rx_map_len;
+			drv_data->read(drv_data);
+
+			msg->actual_length += drv_data->len;
+			msg->state = pxa2xx_spi_next_transfer(drv_data);
+		} else {
+			/* In case we got an error we disable the SSP now */
+			write_SSCR0(read_SSCR0(reg) & ~SSCR0_SSE, reg);
+
+			msg->state = ERROR_STATE;
+		}
+
+		tasklet_schedule(&drv_data->pump_transfers);
+	}
+}
+
+static void pxa2xx_spi_dma_callback(void *data)
+{
+	pxa2xx_spi_dma_transfer_complete(data, false);
+}
+
+static struct dma_async_tx_descriptor *
+pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
+			   enum dma_transfer_direction dir)
+{
+	struct pxa2xx_spi_master *pdata = drv_data->master_info;
+	struct chip_data *chip = drv_data->cur_chip;
+	enum dma_slave_buswidth width;
+	struct dma_slave_config cfg;
+	struct dma_chan *chan;
+	struct sg_table *sgt;
+	int nents, ret;
+
+	switch (drv_data->n_bytes) {
+	case 1:
+		width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		break;
+	case 2:
+		width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+		break;
+	default:
+		width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		break;
+	}
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.direction = dir;
+
+	if (dir == DMA_MEM_TO_DEV) {
+		cfg.dst_addr = drv_data->ssdr_physical;
+		cfg.dst_addr_width = width;
+		cfg.dst_maxburst = chip->dma_burst_size;
+		cfg.slave_id = pdata->tx_slave_id;
+
+		sgt = &drv_data->tx_sgt;
+		nents = drv_data->tx_nents;
+		chan = drv_data->tx_chan;
+	} else {
+		cfg.src_addr = drv_data->ssdr_physical;
+		cfg.src_addr_width = width;
+		cfg.src_maxburst = chip->dma_burst_size;
+		cfg.slave_id = pdata->rx_slave_id;
+
+		sgt = &drv_data->rx_sgt;
+		nents = drv_data->rx_nents;
+		chan = drv_data->rx_chan;
+	}
+
+	ret = dmaengine_slave_config(chan, &cfg);
+	if (ret) {
+		dev_warn(&drv_data->pdev->dev, "DMA slave config failed\n");
+		return NULL;
+	}
+
+	return dmaengine_prep_slave_sg(chan, sgt->sgl, nents, dir,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+}
+
+static bool pxa2xx_spi_dma_filter(struct dma_chan *chan, void *param)
+{
+	const struct pxa2xx_spi_master *pdata = param;
+
+	return chan->chan_id == pdata->tx_chan_id ||
+	       chan->chan_id == pdata->rx_chan_id;
+}
+
+bool pxa2xx_spi_dma_is_possible(size_t len)
+{
+	return len <= MAX_DMA_LEN;
+}
+
+int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data)
+{
+	const struct chip_data *chip = drv_data->cur_chip;
+	int ret;
+
+	if (!chip->enable_dma)
+		return 0;
+
+	/* Don't bother with DMA if we can't do even a single burst */
+	if (drv_data->len < chip->dma_burst_size)
+		return 0;
+
+	ret = pxa2xx_spi_map_dma_buffer(drv_data, DMA_TO_DEVICE);
+	if (ret <= 0) {
+		dev_warn(&drv_data->pdev->dev, "failed to DMA map TX\n");
+		return 0;
+	}
+
+	drv_data->tx_nents = ret;
+
+	ret = pxa2xx_spi_map_dma_buffer(drv_data, DMA_FROM_DEVICE);
+	if (ret <= 0) {
+		pxa2xx_spi_unmap_dma_buffer(drv_data, DMA_TO_DEVICE);
+		dev_warn(&drv_data->pdev->dev, "failed to DMA map RX\n");
+		return 0;
+	}
+
+	drv_data->rx_nents = ret;
+	return 1;
+}
+
+irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data)
+{
+	u32 status;
+
+	status = read_SSSR(drv_data->ioaddr) & drv_data->mask_sr;
+	if (status & SSSR_ROR) {
+		dev_err(&drv_data->pdev->dev, "FIFO overrun\n");
+
+		dmaengine_terminate_all(drv_data->rx_chan);
+		dmaengine_terminate_all(drv_data->tx_chan);
+
+		pxa2xx_spi_dma_transfer_complete(drv_data, true);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst)
+{
+	struct dma_async_tx_descriptor *tx_desc, *rx_desc;
+
+	tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV);
+	if (!tx_desc) {
+		dev_err(&drv_data->pdev->dev,
+			"failed to get DMA TX descriptor\n");
+		return -EBUSY;
+	}
+
+	rx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_DEV_TO_MEM);
+	if (!rx_desc) {
+		dev_err(&drv_data->pdev->dev,
+			"failed to get DMA RX descriptor\n");
+		return -EBUSY;
+	}
+
+	/* We are ready when RX completes */
+	rx_desc->callback = pxa2xx_spi_dma_callback;
+	rx_desc->callback_param = drv_data;
+
+	dmaengine_submit(rx_desc);
+	dmaengine_submit(tx_desc);
+	return 0;
+}
+
+void pxa2xx_spi_dma_start(struct driver_data *drv_data)
+{
+	dma_async_issue_pending(drv_data->rx_chan);
+	dma_async_issue_pending(drv_data->tx_chan);
+
+	atomic_set(&drv_data->dma_running, 1);
+}
+
+int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
+{
+	struct pxa2xx_spi_master *pdata = drv_data->master_info;
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	drv_data->dummy = devm_kzalloc(&drv_data->pdev->dev, SZ_2K, GFP_KERNEL);
+	if (!drv_data->dummy)
+		return -ENOMEM;
+
+	drv_data->tx_chan = dma_request_channel(mask, pxa2xx_spi_dma_filter,
+						pdata);
+	if (!drv_data->tx_chan)
+		return -ENODEV;
+
+	drv_data->rx_chan = dma_request_channel(mask, pxa2xx_spi_dma_filter,
+						pdata);
+	if (!drv_data->rx_chan) {
+		dma_release_channel(drv_data->tx_chan);
+		drv_data->tx_chan = NULL;
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+void pxa2xx_spi_dma_release(struct driver_data *drv_data)
+{
+	if (drv_data->rx_chan) {
+		dmaengine_terminate_all(drv_data->rx_chan);
+		dma_release_channel(drv_data->rx_chan);
+		sg_free_table(&drv_data->rx_sgt);
+		drv_data->rx_chan = NULL;
+	}
+	if (drv_data->tx_chan) {
+		dmaengine_terminate_all(drv_data->tx_chan);
+		dma_release_channel(drv_data->tx_chan);
+		sg_free_table(&drv_data->tx_sgt);
+		drv_data->tx_chan = NULL;
+	}
+}
+
+void pxa2xx_spi_dma_resume(struct driver_data *drv_data)
+{
+}
+
+int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
+					   struct spi_device *spi,
+					   u8 bits_per_word, u32 *burst_code,
+					   u32 *threshold)
+{
+	struct pxa2xx_spi_chip *chip_info = spi->controller_data;
+
+	/*
+	 * If the DMA burst size is given in chip_info we use that,
+	 * otherwise we use the default. Also we use the default FIFO
+	 * thresholds for now.
+	 */
+	*burst_code = chip_info ? chip_info->dma_burst_size : 16;
+	*threshold = SSCR1_RxTresh(RX_THRESH_DFLT)
+		   | SSCR1_TxTresh(TX_THRESH_DFLT);
+
+	return 0;
+}
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 5b7c2a4ba828..c3d78073d1ad 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -928,7 +928,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		drv_data->mask_sr = SSSR_RFS | SSSR_TFS | SSSR_ROR;
 	} else {
 		drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE | SSCR1_TINTE;
-		drv_data->dma_cr1 = SSCR1_TSRE | SSCR1_RSRE | SSCR1_TINTE;
+		drv_data->dma_cr1 = DEFAULT_DMA_CR1;
 		drv_data->clear_sr = SSSR_ROR | SSSR_TINT;
 		drv_data->mask_sr = SSSR_TINT | SSSR_RFS | SSSR_TFS | SSSR_ROR;
 	}
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 0a98905c916e..97ff4717e6ac 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -10,11 +10,15 @@
 #ifndef SPI_PXA2XX_H
 #define SPI_PXA2XX_H
 
+#include <linux/atomic.h>
+#include <linux/dmaengine.h>
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/pxa2xx_ssp.h>
+#include <linux/scatterlist.h>
+#include <linux/sizes.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
 
@@ -53,6 +57,16 @@ struct driver_data {
 	/* Message Transfer pump */
 	struct tasklet_struct pump_transfers;
 
+	/* DMA engine support */
+	struct dma_chan *rx_chan;
+	struct dma_chan *tx_chan;
+	struct sg_table rx_sgt;
+	struct sg_table tx_sgt;
+	int rx_nents;
+	int tx_nents;
+	void *dummy;
+	atomic_t dma_running;
+
 	/* Current message transfer state info */
 	struct spi_message *cur_msg;
 	struct spi_transfer *cur_transfer;
@@ -116,7 +130,6 @@ DEFINE_SSP_REG(SSPSP, 0x2c)
 #define DONE_STATE ((void *)2)
 #define ERROR_STATE ((void *)-1)
 
-#define MAX_DMA_LEN		8191
 #define IS_DMA_ALIGNED(x)	IS_ALIGNED((unsigned long)(x), DMA_ALIGNMENT)
 #define DMA_ALIGNMENT		8
 
@@ -142,7 +155,24 @@ static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
 extern int pxa2xx_spi_flush(struct driver_data *drv_data);
 extern void *pxa2xx_spi_next_transfer(struct driver_data *drv_data);
 
+/*
+ * Select the right DMA implementation.
+ */
 #if defined(CONFIG_SPI_PXA2XX_PXADMA)
+#define SPI_PXA2XX_USE_DMA	1
+#define MAX_DMA_LEN		8191
+#define DEFAULT_DMA_CR1		(SSCR1_TSRE | SSCR1_RSRE | SSCR1_TINTE)
+#elif defined(CONFIG_SPI_PXA2XX_DMA)
+#define SPI_PXA2XX_USE_DMA	1
+#define MAX_DMA_LEN		SZ_64K
+#define DEFAULT_DMA_CR1		(SSCR1_TSRE | SSCR1_RSRE | SSCR1_TRAIL)
+#else
+#undef SPI_PXA2XX_USE_DMA
+#define MAX_DMA_LEN		0
+#define DEFAULT_DMA_CR1		0
+#endif
+
+#ifdef SPI_PXA2XX_USE_DMA
 extern bool pxa2xx_spi_dma_is_possible(size_t len);
 extern int pxa2xx_spi_map_dma_buffers(struct driver_data *drv_data);
 extern irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data);
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index d6d2b4d557f8..e5cbbc4c57f7 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -29,6 +29,12 @@ struct pxa2xx_spi_master {
 	u16 num_chipselect;
 	u8 enable_dma;
 
+	/* DMA engine specific config */
+	int rx_chan_id;
+	int tx_chan_id;
+	int rx_slave_id;
+	int tx_slave_id;
+
 	/* For non-PXA arches */
 	struct ssp_device ssp;
 };
-- 
cgit v1.2.3


From 4f0a6847815837b63b05fc23878ba391701d8f6a Mon Sep 17 00:00:00 2001
From: Jonghwa Lee <jonghwa3.lee@samsung.com>
Date: Fri, 8 Feb 2013 01:13:06 +0000
Subject: Thermal: exynos: Add support for temperature falling interrupt.

This patch introduces using temperature falling interrupt in exynos
thermal driver. Former patch, it only use polling way to check
whether if system themperature is fallen. However, exynos SOC also
provides temperature falling interrupt way to do same things by hw.
This feature is not supported in exynos4210.

Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Jonghwa Lee <jonghwa3.lee@samsung.com>
Signed-off-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/exynos_thermal.c             | 81 ++++++++++++++++------------
 include/linux/platform_data/exynos_thermal.h |  3 ++
 2 files changed, 49 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c
index cd71e24194b1..f4dd68a25022 100644
--- a/drivers/thermal/exynos_thermal.c
+++ b/drivers/thermal/exynos_thermal.c
@@ -94,6 +94,7 @@
 #define SENSOR_NAME_LEN	16
 #define MAX_TRIP_COUNT	8
 #define MAX_COOLING_DEVICE 4
+#define MAX_THRESHOLD_LEVS 4
 
 #define ACTIVE_INTERVAL 500
 #define IDLE_INTERVAL 10000
@@ -133,6 +134,7 @@ struct exynos_tmu_data {
 struct	thermal_trip_point_conf {
 	int trip_val[MAX_TRIP_COUNT];
 	int trip_count;
+	u8 trigger_falling;
 };
 
 struct	thermal_cooling_conf {
@@ -182,7 +184,8 @@ static int exynos_set_mode(struct thermal_zone_device *thermal,
 
 	mutex_lock(&th_zone->therm_dev->lock);
 
-	if (mode == THERMAL_DEVICE_ENABLED)
+	if (mode == THERMAL_DEVICE_ENABLED &&
+		!th_zone->sensor_conf->trip_data.trigger_falling)
 		th_zone->therm_dev->polling_delay = IDLE_INTERVAL;
 	else
 		th_zone->therm_dev->polling_delay = 0;
@@ -428,7 +431,8 @@ static void exynos_report_trigger(void)
 			break;
 	}
 
-	if (th_zone->mode == THERMAL_DEVICE_ENABLED) {
+	if (th_zone->mode == THERMAL_DEVICE_ENABLED &&
+		!th_zone->sensor_conf->trip_data.trigger_falling) {
 		if (i > 0)
 			th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL;
 		else
@@ -467,7 +471,8 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
 
 	th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name,
 			EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0,
-			IDLE_INTERVAL);
+			sensor_conf->trip_data.trigger_falling ?
+			0 : IDLE_INTERVAL);
 
 	if (IS_ERR(th_zone->therm_dev)) {
 		pr_err("Failed to register thermal zone device\n");
@@ -574,8 +579,9 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 {
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct exynos_tmu_platform_data *pdata = data->pdata;
-	unsigned int status, trim_info, rising_threshold;
-	int ret = 0, threshold_code;
+	unsigned int status, trim_info;
+	unsigned int rising_threshold = 0, falling_threshold = 0;
+	int ret = 0, threshold_code, i, trigger_levs = 0;
 
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
@@ -600,6 +606,11 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 			(data->temp_error2 != 0))
 		data->temp_error1 = pdata->efuse_value;
 
+	/* Count trigger levels to be enabled */
+	for (i = 0; i < MAX_THRESHOLD_LEVS; i++)
+		if (pdata->trigger_levels[i])
+			trigger_levs++;
+
 	if (data->soc == SOC_ARCH_EXYNOS4210) {
 		/* Write temperature code for threshold */
 		threshold_code = temp_to_code(data, pdata->threshold);
@@ -609,44 +620,38 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 		}
 		writeb(threshold_code,
 			data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP);
-
-		writeb(pdata->trigger_levels[0],
-			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0);
-		writeb(pdata->trigger_levels[1],
-			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL1);
-		writeb(pdata->trigger_levels[2],
-			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL2);
-		writeb(pdata->trigger_levels[3],
-			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL3);
+		for (i = 0; i < trigger_levs; i++)
+			writeb(pdata->trigger_levels[i],
+			data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4);
 
 		writel(EXYNOS4210_TMU_INTCLEAR_VAL,
 			data->base + EXYNOS_TMU_REG_INTCLEAR);
 	} else if (data->soc == SOC_ARCH_EXYNOS) {
-		/* Write temperature code for threshold */
-		threshold_code = temp_to_code(data, pdata->trigger_levels[0]);
-		if (threshold_code < 0) {
-			ret = threshold_code;
-			goto out;
-		}
-		rising_threshold = threshold_code;
-		threshold_code = temp_to_code(data, pdata->trigger_levels[1]);
-		if (threshold_code < 0) {
-			ret = threshold_code;
-			goto out;
-		}
-		rising_threshold |= (threshold_code << 8);
-		threshold_code = temp_to_code(data, pdata->trigger_levels[2]);
-		if (threshold_code < 0) {
-			ret = threshold_code;
-			goto out;
+		/* Write temperature code for rising and falling threshold */
+		for (i = 0; i < trigger_levs; i++) {
+			threshold_code = temp_to_code(data,
+						pdata->trigger_levels[i]);
+			if (threshold_code < 0) {
+				ret = threshold_code;
+				goto out;
+			}
+			rising_threshold |= threshold_code << 8 * i;
+			if (pdata->threshold_falling) {
+				threshold_code = temp_to_code(data,
+						pdata->trigger_levels[i] -
+						pdata->threshold_falling);
+				if (threshold_code > 0)
+					falling_threshold |=
+						threshold_code << 8 * i;
+			}
 		}
-		rising_threshold |= (threshold_code << 16);
 
 		writel(rising_threshold,
 				data->base + EXYNOS_THD_TEMP_RISE);
-		writel(0, data->base + EXYNOS_THD_TEMP_FALL);
+		writel(falling_threshold,
+				data->base + EXYNOS_THD_TEMP_FALL);
 
-		writel(EXYNOS_TMU_CLEAR_RISE_INT|EXYNOS_TMU_CLEAR_FALL_INT,
+		writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT,
 				data->base + EXYNOS_TMU_REG_INTCLEAR);
 	}
 out:
@@ -679,6 +684,8 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on)
 			pdata->trigger_level2_en << 8 |
 			pdata->trigger_level1_en << 4 |
 			pdata->trigger_level0_en;
+		if (pdata->threshold_falling)
+			interrupt_en |= interrupt_en << 16;
 	} else {
 		con |= EXYNOS_TMU_CORE_OFF;
 		interrupt_en = 0; /* Disable all interrupts */
@@ -716,7 +723,8 @@ static void exynos_tmu_work(struct work_struct *work)
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
 	if (data->soc == SOC_ARCH_EXYNOS)
-		writel(EXYNOS_TMU_CLEAR_RISE_INT,
+		writel(EXYNOS_TMU_CLEAR_RISE_INT |
+				EXYNOS_TMU_CLEAR_FALL_INT,
 				data->base + EXYNOS_TMU_REG_INTCLEAR);
 	else
 		writel(EXYNOS4210_TMU_INTCLEAR_VAL,
@@ -772,6 +780,7 @@ static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = {
 
 #if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412)
 static struct exynos_tmu_platform_data const exynos_default_tmu_data = {
+	.threshold_falling = 10,
 	.trigger_levels[0] = 85,
 	.trigger_levels[1] = 103,
 	.trigger_levels[2] = 110,
@@ -1015,6 +1024,8 @@ static int __devinit exynos_tmu_probe(struct platform_device *pdev)
 		exynos_sensor_conf.trip_data.trip_val[i] =
 			pdata->threshold + pdata->trigger_levels[i];
 
+	exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling;
+
 	exynos_sensor_conf.cooling_data.freq_clip_count =
 						pdata->freq_tab_count;
 	for (i = 0; i < pdata->freq_tab_count; i++) {
diff --git a/include/linux/platform_data/exynos_thermal.h b/include/linux/platform_data/exynos_thermal.h
index a7bdb2f63b73..da7e6274b175 100644
--- a/include/linux/platform_data/exynos_thermal.h
+++ b/include/linux/platform_data/exynos_thermal.h
@@ -53,6 +53,8 @@ struct freq_clip_table {
  * struct exynos_tmu_platform_data
  * @threshold: basic temperature for generating interrupt
  *	       25 <= threshold <= 125 [unit: degree Celsius]
+ * @threshold_falling: differntial value for setting threshold
+ *		       of temperature falling interrupt.
  * @trigger_levels: array for each interrupt levels
  *	[unit: degree Celsius]
  *	0: temperature for trigger_level0 interrupt
@@ -97,6 +99,7 @@ struct freq_clip_table {
  */
 struct exynos_tmu_platform_data {
 	u8 threshold;
+	u8 threshold_falling;
 	u8 trigger_levels[4];
 	bool trigger_level0_en;
 	bool trigger_level1_en;
-- 
cgit v1.2.3


From a0d2642e9296882cda3ad03ff3d9a6649cd70439 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 22 Jan 2013 12:26:32 +0200
Subject: spi/pxa2xx: add support for Intel Low Power Subsystem SPI

Intel LPSS SPI is pretty much the same as the PXA27xx SPI except that it
has few additional features over the original:

	o FIFO depth is 256 entries
	o RX FIFO has one watermark
	o TX FIFO has two watermarks, low and high
	o chip select can be controlled by writing to a register

The new FIFO registers follow immediately the PXA27xx registers but then there
are some additional LPSS private registers at offset 1k or 2k from the base
address. For these private registers we add new accessors that take advantage
of drv_data->lpss_base once it is resolved.

We add a new type LPSS_SSP that can be used to distinguish the LPSS devices
from others.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Lu Cao <lucao@marvell.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/spi/spi-pxa2xx.c       | 133 +++++++++++++++++++++++++++++++++++++++--
 drivers/spi/spi-pxa2xx.h       |   6 ++
 include/linux/pxa2xx_ssp.h     |   9 +++
 include/linux/spi/pxa2xx_spi.h |   1 +
 4 files changed, 145 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 8a5ba0aa095a..4bd6b729f710 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
+ * Copyright (C) 2013, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -61,6 +62,98 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 				| SSCR1_RFT | SSCR1_TFT | SSCR1_MWDS \
 				| SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
 
+#define LPSS_RX_THRESH_DFLT	64
+#define LPSS_TX_LOTHRESH_DFLT	160
+#define LPSS_TX_HITHRESH_DFLT	224
+
+/* Offset from drv_data->lpss_base */
+#define SPI_CS_CONTROL		0x18
+#define SPI_CS_CONTROL_SW_MODE	BIT(0)
+#define SPI_CS_CONTROL_CS_HIGH	BIT(1)
+
+static bool is_lpss_ssp(const struct driver_data *drv_data)
+{
+	return drv_data->ssp_type == LPSS_SSP;
+}
+
+/*
+ * Read and write LPSS SSP private registers. Caller must first check that
+ * is_lpss_ssp() returns true before these can be called.
+ */
+static u32 __lpss_ssp_read_priv(struct driver_data *drv_data, unsigned offset)
+{
+	WARN_ON(!drv_data->lpss_base);
+	return readl(drv_data->lpss_base + offset);
+}
+
+static void __lpss_ssp_write_priv(struct driver_data *drv_data,
+				  unsigned offset, u32 value)
+{
+	WARN_ON(!drv_data->lpss_base);
+	writel(value, drv_data->lpss_base + offset);
+}
+
+/*
+ * lpss_ssp_setup - perform LPSS SSP specific setup
+ * @drv_data: pointer to the driver private data
+ *
+ * Perform LPSS SSP specific setup. This function must be called first if
+ * one is going to use LPSS SSP private registers.
+ */
+static void lpss_ssp_setup(struct driver_data *drv_data)
+{
+	unsigned offset = 0x400;
+	u32 value, orig;
+
+	if (!is_lpss_ssp(drv_data))
+		return;
+
+	/*
+	 * Perform auto-detection of the LPSS SSP private registers. They
+	 * can be either at 1k or 2k offset from the base address.
+	 */
+	orig = readl(drv_data->ioaddr + offset + SPI_CS_CONTROL);
+
+	value = orig | SPI_CS_CONTROL_SW_MODE;
+	writel(value, drv_data->ioaddr + offset + SPI_CS_CONTROL);
+	value = readl(drv_data->ioaddr + offset + SPI_CS_CONTROL);
+	if (value != (orig | SPI_CS_CONTROL_SW_MODE)) {
+		offset = 0x800;
+		goto detection_done;
+	}
+
+	value &= ~SPI_CS_CONTROL_SW_MODE;
+	writel(value, drv_data->ioaddr + offset + SPI_CS_CONTROL);
+	value = readl(drv_data->ioaddr + offset + SPI_CS_CONTROL);
+	if (value != orig) {
+		offset = 0x800;
+		goto detection_done;
+	}
+
+detection_done:
+	/* Now set the LPSS base */
+	drv_data->lpss_base = drv_data->ioaddr + offset;
+
+	/* Enable software chip select control */
+	value = SPI_CS_CONTROL_SW_MODE | SPI_CS_CONTROL_CS_HIGH;
+	__lpss_ssp_write_priv(drv_data, SPI_CS_CONTROL, value);
+}
+
+static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable)
+{
+	u32 value;
+
+	if (!is_lpss_ssp(drv_data))
+		return;
+
+	value = __lpss_ssp_read_priv(drv_data, SPI_CS_CONTROL);
+	if (enable)
+		value &= ~SPI_CS_CONTROL_CS_HIGH;
+	else
+		value |= SPI_CS_CONTROL_CS_HIGH;
+	__lpss_ssp_write_priv(drv_data, SPI_CS_CONTROL, value);
+}
+
 static void cs_assert(struct driver_data *drv_data)
 {
 	struct chip_data *chip = drv_data->cur_chip;
@@ -75,8 +168,12 @@ static void cs_assert(struct driver_data *drv_data)
 		return;
 	}
 
-	if (gpio_is_valid(chip->gpio_cs))
+	if (gpio_is_valid(chip->gpio_cs)) {
 		gpio_set_value(chip->gpio_cs, chip->gpio_cs_inverted);
+		return;
+	}
+
+	lpss_ssp_cs_control(drv_data, true);
 }
 
 static void cs_deassert(struct driver_data *drv_data)
@@ -91,8 +188,12 @@ static void cs_deassert(struct driver_data *drv_data)
 		return;
 	}
 
-	if (gpio_is_valid(chip->gpio_cs))
+	if (gpio_is_valid(chip->gpio_cs)) {
 		gpio_set_value(chip->gpio_cs, !chip->gpio_cs_inverted);
+		return;
+	}
+
+	lpss_ssp_cs_control(drv_data, false);
 }
 
 int pxa2xx_spi_flush(struct driver_data *drv_data)
@@ -642,6 +743,13 @@ static void pump_transfers(unsigned long data)
 		write_SSSR_CS(drv_data, drv_data->clear_sr);
 	}
 
+	if (is_lpss_ssp(drv_data)) {
+		if ((read_SSIRF(reg) & 0xff) != chip->lpss_rx_threshold)
+			write_SSIRF(chip->lpss_rx_threshold, reg);
+		if ((read_SSITF(reg) & 0xffff) != chip->lpss_tx_threshold)
+			write_SSITF(chip->lpss_tx_threshold, reg);
+	}
+
 	/* see if we need to reload the config registers */
 	if ((read_SSCR0(reg) != cr0)
 		|| (read_SSCR1(reg) & SSCR1_CHANGE_MASK) !=
@@ -754,8 +862,17 @@ static int setup(struct spi_device *spi)
 	struct chip_data *chip;
 	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
 	unsigned int clk_div;
-	uint tx_thres = TX_THRESH_DFLT;
-	uint rx_thres = RX_THRESH_DFLT;
+	uint tx_thres, tx_hi_thres, rx_thres;
+
+	if (is_lpss_ssp(drv_data)) {
+		tx_thres = LPSS_TX_LOTHRESH_DFLT;
+		tx_hi_thres = LPSS_TX_HITHRESH_DFLT;
+		rx_thres = LPSS_RX_THRESH_DFLT;
+	} else {
+		tx_thres = TX_THRESH_DFLT;
+		tx_hi_thres = 0;
+		rx_thres = RX_THRESH_DFLT;
+	}
 
 	if (!pxa25x_ssp_comp(drv_data)
 		&& (spi->bits_per_word < 4 || spi->bits_per_word > 32)) {
@@ -808,6 +925,8 @@ static int setup(struct spi_device *spi)
 			chip->timeout = chip_info->timeout;
 		if (chip_info->tx_threshold)
 			tx_thres = chip_info->tx_threshold;
+		if (chip_info->tx_hi_threshold)
+			tx_hi_thres = chip_info->tx_hi_threshold;
 		if (chip_info->rx_threshold)
 			rx_thres = chip_info->rx_threshold;
 		chip->enable_dma = drv_data->master_info->enable_dma;
@@ -819,6 +938,10 @@ static int setup(struct spi_device *spi)
 	chip->threshold = (SSCR1_RxTresh(rx_thres) & SSCR1_RFT) |
 			(SSCR1_TxTresh(tx_thres) & SSCR1_TFT);
 
+	chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres);
+	chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres)
+				| SSITF_TxHiThresh(tx_hi_thres);
+
 	/* set dma burst and threshold outside of chip_info path so that if
 	 * chip_info goes away after setting chip->enable_dma, the
 	 * burst and threshold can still respond to changes in bits_per_word */
@@ -1006,6 +1129,8 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		write_SSTO(0, drv_data->ioaddr);
 	write_SSPSP(0, drv_data->ioaddr);
 
+	lpss_ssp_setup(drv_data);
+
 	tasklet_init(&drv_data->pump_transfers, pump_transfers,
 		     (unsigned long)drv_data);
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 97ff4717e6ac..5adc2a11c7bc 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -86,6 +86,8 @@ struct driver_data {
 	int (*read)(struct driver_data *drv_data);
 	irqreturn_t (*transfer_handler)(struct driver_data *drv_data);
 	void (*cs_control)(u32 command);
+
+	void __iomem *lpss_base;
 };
 
 struct chip_data {
@@ -97,6 +99,8 @@ struct chip_data {
 	u32 dma_burst_size;
 	u32 threshold;
 	u32 dma_threshold;
+	u16 lpss_rx_threshold;
+	u16 lpss_tx_threshold;
 	u8 enable_dma;
 	u8 bits_per_word;
 	u32 speed_hz;
@@ -124,6 +128,8 @@ DEFINE_SSP_REG(SSITR, 0x0c)
 DEFINE_SSP_REG(SSDR, 0x10)
 DEFINE_SSP_REG(SSTO, 0x28)
 DEFINE_SSP_REG(SSPSP, 0x2c)
+DEFINE_SSP_REG(SSITF, SSITF)
+DEFINE_SSP_REG(SSIRF, SSIRF)
 
 #define START_STATE ((void *)0)
 #define RUNNING_STATE ((void *)1)
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 065e7f6c3ad7..467cc6307b62 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -155,6 +155,14 @@
 #define SSACD_ACDS(x)		((x) << 0)	/* Audio clock divider select */
 #define SSACD_SCDX8		(1 << 7)	/* SYSCLK division ratio select */
 
+/* LPSS SSP */
+#define SSITF			0x44		/* TX FIFO trigger level */
+#define SSITF_TxLoThresh(x)	(((x) - 1) << 8)
+#define SSITF_TxHiThresh(x)	((x) - 1)
+
+#define SSIRF			0x48		/* RX FIFO trigger level */
+#define SSIRF_RxThresh(x)	((x) - 1)
+
 enum pxa_ssp_type {
 	SSP_UNDEFINED = 0,
 	PXA25x_SSP,  /* pxa 210, 250, 255, 26x */
@@ -164,6 +172,7 @@ enum pxa_ssp_type {
 	PXA168_SSP,
 	PXA910_SSP,
 	CE4100_SSP,
+	LPSS_SSP,
 };
 
 struct ssp_device {
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index e5cbbc4c57f7..82d5111cd0c2 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -44,6 +44,7 @@ struct pxa2xx_spi_master {
  */
 struct pxa2xx_spi_chip {
 	u8 tx_threshold;
+	u8 tx_hi_threshold;
 	u8 rx_threshold;
 	u8 dma_burst_size;
 	u32 timeout;
-- 
cgit v1.2.3


From fe20d71f25400cccc8bffef865f79250be7dbc81 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 21 Nov 2012 17:32:30 +0100
Subject: uprobes: Kill uprobe_consumer->filter()

uprobe_consumer->filter() is pointless in its current form, kill it.

We will add it back, but with the different signature/semantics. Perhaps
we will even re-introduce the callsite in handler_chain(), but not to
just skip uc->handler().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 include/linux/uprobes.h     | 5 -----
 kernel/events/uprobes.c     | 6 ++----
 kernel/trace/trace_uprobe.c | 1 -
 3 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f628a6fc5b4..83742b91ff73 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -37,11 +37,6 @@ struct inode;
 
 struct uprobe_consumer {
 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
-	/*
-	 * filter is optional; If a filter exists, handler is run
-	 * if and only if filter returns true.
-	 */
-	bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
 
 	struct uprobe_consumer *next;
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a39d8163b713..5cbebac27c01 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -477,10 +477,8 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
 		return;
 
 	down_read(&uprobe->consumer_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next) {
-		if (!uc->filter || uc->filter(uc, current))
-			uc->handler(uc, regs);
-	}
+	for (uc = uprobe->consumers; uc; uc = uc->next)
+		uc->handler(uc, regs);
 	up_read(&uprobe->consumer_rwsem);
 }
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 87b6db4ccbc5..e668024773d4 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -550,7 +550,6 @@ static int probe_event_enable(struct trace_uprobe *tu, int flag)
 		return -EINTR;
 
 	utc->cons.handler = uprobe_dispatcher;
-	utc->cons.filter = NULL;
 	ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
 	if (ret) {
 		kfree(utc);
-- 
cgit v1.2.3


From 8a7f2fa0dea3b019500961b86d765e6fdd4bffb2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 28 Dec 2012 17:58:38 +0100
Subject: uprobes: Reintroduce uprobe_consumer->filter()

Finally add uprobe_consumer->filter() and change consumer_filter()
to actually call this method.

Note that ->filter() accepts mm_struct, not task_struct. Because:

	1. We do not have for_each_mm_user(mm, task).

	2. Even if we implement for_each_mm_user(), ->filter() can
	   use it itself.

	3. It is not clear who will actually need this interface to
	   do the "nontrivial" filtering.

Another argument is "enum uprobe_filter_ctx", consumer->filter() can
use it to figure out why/where it was called. For example, perhaps
we can add UPROBE_FILTER_PRE_REGISTER used by build_map_info() to
quickly "nack" the unwanted mm's. In this case consumer should know
that it is called under ->i_mmap_mutex.

See the previous discussion at http://marc.info/?t=135214229700002
Perhaps we should pass more arguments, vma/vaddr?

Note: this patch obviously can't help to filter out the child created
by fork(), this will be addressed later.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 include/linux/uprobes.h |  9 +++++++++
 kernel/events/uprobes.c | 18 +++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 83742b91ff73..c2df6934fdc6 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,8 +35,17 @@ struct inode;
 # include <asm/uprobes.h>
 #endif
 
+enum uprobe_filter_ctx {
+	UPROBE_FILTER_REGISTER,
+	UPROBE_FILTER_UNREGISTER,
+	UPROBE_FILTER_MMAP,
+};
+
 struct uprobe_consumer {
 	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
+	bool (*filter)(struct uprobe_consumer *self,
+				enum uprobe_filter_ctx ctx,
+				struct mm_struct *mm);
 
 	struct uprobe_consumer *next;
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 33912086d54e..c2737be3c4b8 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -579,19 +579,21 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	return ret;
 }
 
-static inline bool consumer_filter(struct uprobe_consumer *uc)
+static inline bool consumer_filter(struct uprobe_consumer *uc,
+				   enum uprobe_filter_ctx ctx, struct mm_struct *mm)
 {
-	return true; /* TODO: !uc->filter || uc->filter(...) */
+	return !uc->filter || uc->filter(uc, ctx, mm);
 }
 
-static bool filter_chain(struct uprobe *uprobe)
+static bool filter_chain(struct uprobe *uprobe,
+			 enum uprobe_filter_ctx ctx, struct mm_struct *mm)
 {
 	struct uprobe_consumer *uc;
 	bool ret = false;
 
 	down_read(&uprobe->consumer_rwsem);
 	for (uc = uprobe->consumers; uc; uc = uc->next) {
-		ret = consumer_filter(uc);
+		ret = consumer_filter(uc, ctx, mm);
 		if (ret)
 			break;
 	}
@@ -772,10 +774,12 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 
 		if (is_register) {
 			/* consult only the "caller", new consumer. */
-			if (consumer_filter(uprobe->consumers))
+			if (consumer_filter(uprobe->consumers,
+					UPROBE_FILTER_REGISTER, mm))
 				err = install_breakpoint(uprobe, mm, vma, info->vaddr);
 		} else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
-			if (!filter_chain(uprobe))
+			if (!filter_chain(uprobe,
+					UPROBE_FILTER_UNREGISTER, mm))
 				err |= remove_breakpoint(uprobe, mm, info->vaddr);
 		}
 
@@ -968,7 +972,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
 	 */
 	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
 		if (!fatal_signal_pending(current) &&
-		    filter_chain(uprobe)) {
+		    filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
 			unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
 			install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
 		}
-- 
cgit v1.2.3


From da1816b1caeccdff04531e763bb35d7caa3ed19f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 29 Dec 2012 17:49:11 +0100
Subject: uprobes: Teach handler_chain() to filter out the probed task

Currrently the are 2 problems with pre-filtering:

1. It is not possible to add/remove a task (mm) after uprobe_register()

2. A forked child inherits all breakpoints and uprobe_consumer can not
   control this.

This patch does the first step to improve the filtering. handler_chain()
removes the breakpoints installed by this uprobe from current->mm if all
handlers return UPROBE_HANDLER_REMOVE.

Note that handler_chain() relies on ->register_rwsem to avoid the race
with uprobe_register/unregister which can add/del a consumer, or even
remove and then insert the new uprobe at the same address.

Perhaps we will add uprobe_apply_mm(uprobe, mm, is_register) and teach
copy_mm() to do filter(UPROBE_FILTER_FORK), but I think this change makes
sense anyway.

Note: instead of checking the retcode from uc->handler, we could add
uc->filter(UPROBE_FILTER_BPHIT). But I think this is not optimal to
call 2 hooks in a row. This buys nothing, and if handler/filter do
something nontrivial they will probably do the same work twice.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 include/linux/uprobes.h |  3 +++
 kernel/events/uprobes.c | 58 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 51 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index c2df6934fdc6..95d0002efda5 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -35,6 +35,9 @@ struct inode;
 # include <asm/uprobes.h>
 #endif
 
+#define UPROBE_HANDLER_REMOVE		1
+#define UPROBE_HANDLER_MASK		1
+
 enum uprobe_filter_ctx {
 	UPROBE_FILTER_REGISTER,
 	UPROBE_FILTER_UNREGISTER,
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c2737be3c4b8..04c104ad9522 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -440,16 +440,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 	return uprobe;
 }
 
-static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
-{
-	struct uprobe_consumer *uc;
-
-	down_read(&uprobe->register_rwsem);
-	for (uc = uprobe->consumers; uc; uc = uc->next)
-		uc->handler(uc, regs);
-	up_read(&uprobe->register_rwsem);
-}
-
 static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
 {
 	down_write(&uprobe->consumer_rwsem);
@@ -882,6 +872,33 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
 	put_uprobe(uprobe);
 }
 
+static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+	int err = 0;
+
+	down_read(&mm->mmap_sem);
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long vaddr;
+		loff_t offset;
+
+		if (!valid_vma(vma, false) ||
+		    vma->vm_file->f_mapping->host != uprobe->inode)
+			continue;
+
+		offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+		if (uprobe->offset <  offset ||
+		    uprobe->offset >= offset + vma->vm_end - vma->vm_start)
+			continue;
+
+		vaddr = offset_to_vaddr(vma, uprobe->offset);
+		err |= remove_breakpoint(uprobe, mm, vaddr);
+	}
+	up_read(&mm->mmap_sem);
+
+	return err;
+}
+
 static struct rb_node *
 find_node_in_range(struct inode *inode, loff_t min, loff_t max)
 {
@@ -1435,6 +1452,27 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 	return uprobe;
 }
 
+static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
+{
+	struct uprobe_consumer *uc;
+	int remove = UPROBE_HANDLER_REMOVE;
+
+	down_read(&uprobe->register_rwsem);
+	for (uc = uprobe->consumers; uc; uc = uc->next) {
+		int rc = uc->handler(uc, regs);
+
+		WARN(rc & ~UPROBE_HANDLER_MASK,
+			"bad rc=0x%x from %pf()\n", rc, uc->handler);
+		remove &= rc;
+	}
+
+	if (remove && uprobe->consumers) {
+		WARN_ON(!uprobe_is_active(uprobe));
+		unapply_uprobe(uprobe, current->mm);
+	}
+	up_read(&uprobe->register_rwsem);
+}
+
 /*
  * Run handler and ask thread to singlestep.
  * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
-- 
cgit v1.2.3


From f22c1bb6b4706be3502b378cb14564449b15f983 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 2 Feb 2013 16:27:52 +0100
Subject: perf: Introduce hw_perf_event->tp_target and ->tp_list

sys_perf_event_open()->perf_init_event(event) is called before
find_get_context(event), this means that event->ctx == NULL when
class->reg(TRACE_REG_PERF_REGISTER/OPEN) is called and thus it
can't know if this event is per-task or system-wide.

This patch adds hw_perf_event->tp_target for PERF_TYPE_TRACEPOINT,
this is analogous to PERF_TYPE_BREAKPOINT/bp_target we already have.
The patch also moves ->bp_target up so that it can overlap with the
new member, this can help the compiler to generate the better code.

trace_uprobe_register() will use it for prefiltering to avoid the
unnecessary breakpoints in mm's we do not want to trace.

->tp_target doesn't have its own reference, but we can rely on the
fact that either sys_perf_event_open() holds a reference, or it is
equal to event->ctx->task. So this pointer is always valid until
free_event().

Also add the "struct list_head tp_list" into this union. It is not
strictly necessary, but it can simplify the next changes and we can
add it for free.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/perf_event.h | 9 +++++++--
 kernel/events/core.c       | 5 ++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 42adf012145d..e47ee462c2f2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -135,16 +135,21 @@ struct hw_perf_event {
 		struct { /* software */
 			struct hrtimer	hrtimer;
 		};
+		struct { /* tracepoint */
+			struct task_struct	*tp_target;
+			/* for tp_event->class */
+			struct list_head	tp_list;
+		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
-			struct arch_hw_breakpoint	info;
-			struct list_head		bp_list;
 			/*
 			 * Crufty hack to avoid the chicken and egg
 			 * problem hw_breakpoint has with context
 			 * creation and event initalization.
 			 */
 			struct task_struct		*bp_target;
+			struct arch_hw_breakpoint	info;
+			struct list_head		bp_list;
 		};
 #endif
 	};
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 301079d06f24..e2d4323c6ae6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6162,11 +6162,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	if (task) {
 		event->attach_state = PERF_ATTACH_TASK;
+
+		if (attr->type == PERF_TYPE_TRACEPOINT)
+			event->hw.tp_target = task;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		/*
 		 * hw_breakpoint is a bit difficult here..
 		 */
-		if (attr->type == PERF_TYPE_BREAKPOINT)
+		else if (attr->type == PERF_TYPE_BREAKPOINT)
 			event->hw.bp_target = task;
 #endif
 	}
-- 
cgit v1.2.3


From bdf8647c44766590ed02f9a84a450a796558b753 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 3 Feb 2013 19:21:12 +0100
Subject: uprobes: Introduce uprobe_apply()

Currently it is not possible to change the filtering constraints after
uprobe_register(), so a consumer can not, say, start to trace a task/mm
which was previously filtered out, or remove the no longer needed bp's.

Introduce uprobe_apply() which simply does register_for_each_vma() again
to consult uprobe_consumer->filter() and install/remove the breakpoints.
The only complication is that register_for_each_vma() can no longer
assume that uprobe->consumers should be consulter if is_register == T,
so we change it to accept "struct uprobe_consumer *new" instead.

Unlike uprobe_register(), uprobe_apply(true) doesn't do "unregister" if
register_for_each_vma() fails, it is up to caller to handle the error.

Note: we probably need to cleanup the current interface, it is strange
that uprobe_apply/unregister need inode/offset. We should either change
uprobe_register() to return "struct uprobe *", or add a private ->uprobe
member in uprobe_consumer. And in the long term uprobe_apply() should
take a single argument, uprobe or consumer, even "bool add" should go
away.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/uprobes.h |  6 ++++++
 kernel/events/uprobes.c | 39 +++++++++++++++++++++++++++++++++++----
 2 files changed, 41 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 95d0002efda5..02b83db8e2c5 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -101,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
 extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_mmap(struct vm_area_struct *vma);
 extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
@@ -124,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
 	return -ENOSYS;
 }
+static inline int
+uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
+{
+	return -ENOSYS;
+}
 static inline void
 uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 221fc58f59e3..a567c8c7ef31 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -733,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 	return curr;
 }
 
-static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
+static int
+register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
 {
+	bool is_register = !!new;
 	struct map_info *info;
 	int err = 0;
 
@@ -765,7 +767,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 
 		if (is_register) {
 			/* consult only the "caller", new consumer. */
-			if (consumer_filter(uprobe->consumers,
+			if (consumer_filter(new,
 					UPROBE_FILTER_REGISTER, mm))
 				err = install_breakpoint(uprobe, mm, vma, info->vaddr);
 		} else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
@@ -788,7 +790,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
 static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc)
 {
 	consumer_add(uprobe, uc);
-	return register_for_each_vma(uprobe, true);
+	return register_for_each_vma(uprobe, uc);
 }
 
 static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
@@ -798,7 +800,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
 	if (!consumer_del(uprobe, uc))	/* WARN? */
 		return;
 
-	err = register_for_each_vma(uprobe, false);
+	err = register_for_each_vma(uprobe, NULL);
 	/* TODO : cant unregister? schedule a worker thread */
 	if (!uprobe->consumers && !err)
 		delete_uprobe(uprobe);
@@ -854,6 +856,35 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
 }
 EXPORT_SYMBOL_GPL(uprobe_register);
 
+/*
+ * uprobe_apply - unregister a already registered probe.
+ * @inode: the file in which the probe has to be removed.
+ * @offset: offset from the start of the file.
+ * @uc: consumer which wants to add more or remove some breakpoints
+ * @add: add or remove the breakpoints
+ */
+int uprobe_apply(struct inode *inode, loff_t offset,
+			struct uprobe_consumer *uc, bool add)
+{
+	struct uprobe *uprobe;
+	struct uprobe_consumer *con;
+	int ret = -ENOENT;
+
+	uprobe = find_uprobe(inode, offset);
+	if (!uprobe)
+		return ret;
+
+	down_write(&uprobe->register_rwsem);
+	for (con = uprobe->consumers; con && con != uc ; con = con->next)
+		;
+	if (con)
+		ret = register_for_each_vma(uprobe, add ? uc : NULL);
+	up_write(&uprobe->register_rwsem);
+	put_uprobe(uprobe);
+
+	return ret;
+}
+
 /*
  * uprobe_unregister - unregister a already registered probe.
  * @inode: the file in which the probe has to be removed.
-- 
cgit v1.2.3


From 343d9c283c9847da043fda3e76e3197f27b667dd Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 8 Feb 2013 13:00:22 -0500
Subject: jbd2: add tracepoints which provide per-handle statistics

Handles which stay open a long time are problematic when it comes time
to close down a transaction so it can be committed.  These tracepoints
will help us determine which ones are the problematic ones, and to
validate whether changes makes things better or worse.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/transaction.c       | 28 ++++++++++++-
 include/linux/jbd2.h        |  8 +++-
 include/trace/events/jbd2.h | 98 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 735609e2d636..b7e2385c6e92 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,8 @@
 #include <linux/bug.h>
 #include <linux/module.h>
 
+#include <trace/events/jbd2.h>
+
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
 static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
 
@@ -307,6 +309,8 @@ repeat:
 	 */
 	update_t_max_wait(transaction, ts);
 	handle->h_transaction = transaction;
+	handle->h_requested_credits = nblocks;
+	handle->h_start_jiffies = jiffies;
 	atomic_inc(&transaction->t_updates);
 	atomic_inc(&transaction->t_handle_count);
 	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
@@ -353,7 +357,8 @@ static handle_t *new_handle(int nblocks)
  * Return a pointer to a newly allocated handle, or an ERR_PTR() value
  * on failure.
  */
-handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
+handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask,
+			      unsigned int type, unsigned int line_no)
 {
 	handle_t *handle = journal_current_handle();
 	int err;
@@ -379,6 +384,11 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
 		current->journal_info = NULL;
 		handle = ERR_PTR(err);
 	}
+	handle->h_type = type;
+	handle->h_line_no = line_no;
+	trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
+				handle->h_transaction->t_tid, type,
+				line_no, nblocks);
 	return handle;
 }
 EXPORT_SYMBOL(jbd2__journal_start);
@@ -386,7 +396,7 @@ EXPORT_SYMBOL(jbd2__journal_start);
 
 handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 {
-	return jbd2__journal_start(journal, nblocks, GFP_NOFS);
+	return jbd2__journal_start(journal, nblocks, GFP_NOFS, 0, 0);
 }
 EXPORT_SYMBOL(jbd2_journal_start);
 
@@ -448,7 +458,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
 		goto unlock;
 	}
 
+	trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
+				 handle->h_transaction->t_tid,
+				 handle->h_type, handle->h_line_no,
+				 handle->h_buffer_credits,
+				 nblocks);
+
 	handle->h_buffer_credits += nblocks;
+	handle->h_requested_credits += nblocks;
 	atomic_add(nblocks, &transaction->t_outstanding_credits);
 	result = 0;
 
@@ -1377,6 +1394,13 @@ int jbd2_journal_stop(handle_t *handle)
 	}
 
 	jbd_debug(4, "Handle %p going down\n", handle);
+	trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
+				handle->h_transaction->t_tid,
+				handle->h_type, handle->h_line_no,
+				jiffies - handle->h_start_jiffies,
+				handle->h_sync, handle->h_requested_credits,
+				(handle->h_requested_credits -
+				 handle->h_buffer_credits));
 
 	/*
 	 * Implement synchronous transaction batching.  If the handle
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 24db7256a5ff..fa5fea17b619 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -400,6 +400,11 @@ struct jbd2_journal_handle
 	unsigned int	h_sync:		1;	/* sync-on-close */
 	unsigned int	h_jdata:	1;	/* force data journaling */
 	unsigned int	h_aborted:	1;	/* fatal error on handle */
+	unsigned int	h_type:		8;	/* for handle statistics */
+	unsigned int	h_line_no:	16;	/* for handle statistics */
+
+	unsigned long		h_start_jiffies;
+	unsigned int		h_requested_credits;
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	h_lockdep_map;
@@ -1071,7 +1076,8 @@ static inline handle_t *journal_current_handle(void)
  */
 
 extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
-extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask);
+extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask,
+				     unsigned int type, unsigned int line_no);
 extern int	 jbd2_journal_restart(handle_t *, int nblocks);
 extern int	 jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
 extern int	 jbd2_journal_extend (handle_t *, int nblocks);
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 5419f57beb1f..070df49e4a1d 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -132,6 +132,104 @@ TRACE_EVENT(jbd2_submit_inode_data,
 		  (unsigned long) __entry->ino)
 );
 
+TRACE_EVENT(jbd2_handle_start,
+	TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
+		 unsigned int line_no, int requested_blocks),
+
+	TP_ARGS(dev, tid, type, line_no, requested_blocks),
+
+	TP_STRUCT__entry(
+		__field(		dev_t,	dev		)
+		__field(	unsigned long,	tid		)
+		__field(	 unsigned int,	type		)
+		__field(	 unsigned int,	line_no		)
+		__field(		  int,	requested_blocks)
+	),
+
+	TP_fast_assign(
+		__entry->dev		  = dev;
+		__entry->tid		  = tid;
+		__entry->type		  = type;
+		__entry->line_no	  = line_no;
+		__entry->requested_blocks = requested_blocks;
+	),
+
+	TP_printk("dev %d,%d tid %lu type %u line_no %u "
+		  "requested_blocks %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+		  __entry->type, __entry->line_no, __entry->requested_blocks)
+);
+
+TRACE_EVENT(jbd2_handle_extend,
+	TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
+		 unsigned int line_no, int buffer_credits,
+		 int requested_blocks),
+
+	TP_ARGS(dev, tid, type, line_no, buffer_credits, requested_blocks),
+
+	TP_STRUCT__entry(
+		__field(		dev_t,	dev		)
+		__field(	unsigned long,	tid		)
+		__field(	 unsigned int,	type		)
+		__field(	 unsigned int,	line_no		)
+		__field(		  int,	buffer_credits  )
+		__field(		  int,	requested_blocks)
+	),
+
+	TP_fast_assign(
+		__entry->dev		  = dev;
+		__entry->tid		  = tid;
+		__entry->type		  = type;
+		__entry->line_no	  = line_no;
+		__entry->buffer_credits   = buffer_credits;
+		__entry->requested_blocks = requested_blocks;
+	),
+
+	TP_printk("dev %d,%d tid %lu type %u line_no %u "
+		  "buffer_credits %d requested_blocks %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+		  __entry->type, __entry->line_no, __entry->buffer_credits,
+		  __entry->requested_blocks)
+);
+
+TRACE_EVENT(jbd2_handle_stats,
+	TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
+		 unsigned int line_no, int interval, int sync,
+		 int requested_blocks, int dirtied_blocks),
+
+	TP_ARGS(dev, tid, type, line_no, interval, sync,
+		requested_blocks, dirtied_blocks),
+
+	TP_STRUCT__entry(
+		__field(		dev_t,	dev		)
+		__field(	unsigned long,	tid		)
+		__field(	 unsigned int,	type		)
+		__field(	 unsigned int,	line_no		)
+		__field(		  int,	interval	)
+		__field(		  int,	sync		)
+		__field(		  int,	requested_blocks)
+		__field(		  int,	dirtied_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		  = dev;
+		__entry->tid		  = tid;
+		__entry->type		  = type;
+		__entry->line_no	  = line_no;
+		__entry->interval	  = interval;
+		__entry->sync		  = sync;
+		__entry->requested_blocks = requested_blocks;
+		__entry->dirtied_blocks	  = dirtied_blocks;
+	),
+
+	TP_printk("dev %d,%d tid %lu type %u line_no %u interval %d "
+		  "sync %d requested_blocks %d dirtied_blocks %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
+		  __entry->type, __entry->line_no, __entry->interval,
+		  __entry->sync, __entry->requested_blocks,
+		  __entry->dirtied_blocks)
+);
+
 TRACE_EVENT(jbd2_run_stats,
 	TP_PROTO(dev_t dev, unsigned long tid,
 		 struct transaction_run_stats_s *stats),
-- 
cgit v1.2.3


From 4c190e2f913f038c9c91ee63b59cd037260ba353 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 6 Feb 2013 08:28:55 -0500
Subject: sunrpc: trim off trailing checksum before returning decrypted or
 integrity authenticated buffer

When GSSAPI integrity signatures are in use, or when we're using GSSAPI
privacy with the v2 token format, there is a trailing checksum on the
xdr_buf that is returned.

It's checked during the authentication stage, and afterward nothing
cares about it. Ordinarily, it's not a problem since the XDR code
generally ignores it, but it will be when we try to compute a checksum
over the buffer to help prevent XID collisions in the duplicate reply
cache.

Fix the code to trim off the checksums after verifying them. Note that
in unwrap_integ_data, we must avoid trying to reverify the checksum if
the request was deferred since it will no longer be present when it's
revisited.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 include/linux/sunrpc/xdr.h          |  1 +
 net/sunrpc/auth_gss/gss_krb5_wrap.c |  2 ++
 net/sunrpc/auth_gss/svcauth_gss.c   | 10 +++++++--
 net/sunrpc/xdr.c                    | 41 +++++++++++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 224d06047e45..15f9204ee70b 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -152,6 +152,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
+extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
 extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 107c4528654f..88edec929d73 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -574,6 +574,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 
+	/* Trim off the checksum blob */
+	xdr_buf_trim(buf, GSS_KRB5_TOK_HDR_LEN + tailskip);
 	return GSS_S_COMPLETE;
 }
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73e957386600..a5b41e2ac25a 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -817,13 +817,17 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
  *	The server uses base of head iovec as read pointer, while the
  *	client uses separate pointer. */
 static int
-unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 {
 	int stat = -EINVAL;
 	u32 integ_len, maj_stat;
 	struct xdr_netobj mic;
 	struct xdr_buf integ_buf;
 
+	/* Did we already verify the signature on the original pass through? */
+	if (rqstp->rq_deferred)
+		return 0;
+
 	integ_len = svc_getnl(&buf->head[0]);
 	if (integ_len & 3)
 		return stat;
@@ -846,6 +850,8 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 		goto out;
 	if (svc_getnl(&buf->head[0]) != seq)
 		goto out;
+	/* trim off the mic at the end before returning */
+	xdr_buf_trim(buf, mic.len + 4);
 	stat = 0;
 out:
 	kfree(mic.data);
@@ -1190,7 +1196,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 			/* placeholders for length and seq. number: */
 			svc_putnl(resv, 0);
 			svc_putnl(resv, 0);
-			if (unwrap_integ_data(&rqstp->rq_arg,
+			if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
 			break;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 56055632f151..75edcfad6e26 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -879,6 +879,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
+/**
+ * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
+ * @buf: buf to be trimmed
+ * @len: number of bytes to reduce "buf" by
+ *
+ * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
+ * that it's possible that we'll trim less than that amount if the xdr_buf is
+ * too small, or if (for instance) it's all in the head and the parser has
+ * already read too far into it.
+ */
+void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
+{
+	size_t cur;
+	unsigned int trim = len;
+
+	if (buf->tail[0].iov_len) {
+		cur = min_t(size_t, buf->tail[0].iov_len, trim);
+		buf->tail[0].iov_len -= cur;
+		trim -= cur;
+		if (!trim)
+			goto fix_len;
+	}
+
+	if (buf->page_len) {
+		cur = min_t(unsigned int, buf->page_len, trim);
+		buf->page_len -= cur;
+		trim -= cur;
+		if (!trim)
+			goto fix_len;
+	}
+
+	if (buf->head[0].iov_len) {
+		cur = min_t(size_t, buf->head[0].iov_len, trim);
+		buf->head[0].iov_len -= cur;
+		trim -= cur;
+	}
+fix_len:
+	buf->len -= (len - trim);
+}
+EXPORT_SYMBOL_GPL(xdr_buf_trim);
+
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
 	unsigned int this_len;
-- 
cgit v1.2.3


From e5e67305885eb12849b5475764b0542f03dc2b59 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 8 Feb 2013 10:17:15 +0000
Subject: skbuff: Move definition of NETDEV_FRAG_PAGE_MAX_SIZE

In order to address the fact that some devices cannot support the full 32K
frag size we need to have the value accessible somewhere so that we can use it
to do comparisons against what the device can support.  As such I am moving
the values out of skbuff.c and into skbuff.h.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++++
 net/core/skbuff.c      | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0259b719bebf..d7573c37a51d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1832,6 +1832,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 		kfree_skb(skb);
 }
 
+#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
+
 extern void *netdev_alloc_frag(unsigned int fragsz);
 
 extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 55f7ef6ada6d..6114c1143564 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -351,10 +351,6 @@ struct netdev_alloc_cache {
 };
 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
 
-#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
-#define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
-#define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
-
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
 	struct netdev_alloc_cache *nc;
-- 
cgit v1.2.3


From d6d3c4e656513dcea61ce900f0ecb9ca820ee7cd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 6 Feb 2013 15:30:56 -0500
Subject: OF: convert devtree lock from rw_lock to raw spinlock

With the locking cleanup in place (from "OF: Fixup resursive
locking code paths"), we can now do the conversion from the
rw_lock to a raw spinlock as required for preempt-rt.

The previous cleanup and this conversion were originally
separate since they predated when mainline got raw spinlock (in
commit c2f21ce2e31286a "locking: Implement new raw_spinlock").

So, at that point in time, the cleanup was considered plausible
for mainline, but not this conversion.  In any case, we've kept
them separate as it makes for easier review and better bisection.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[PG: taken from preempt-rt, update subject & add a commit log]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 arch/sparc/kernel/prom_common.c |   4 +-
 drivers/of/base.c               | 100 ++++++++++++++++++++++------------------
 include/linux/of.h              |   2 +-
 3 files changed, 59 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index 1303021748c8..9f20566b0773 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -64,7 +64,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
 	err = -ENODEV;
 
 	mutex_lock(&of_set_property_mutex);
-	write_lock(&devtree_lock);
+	raw_spin_lock(&devtree_lock);
 	prevp = &dp->properties;
 	while (*prevp) {
 		struct property *prop = *prevp;
@@ -91,7 +91,7 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
 		}
 		prevp = &(*prevp)->next;
 	}
-	write_unlock(&devtree_lock);
+	raw_spin_unlock(&devtree_lock);
 	mutex_unlock(&of_set_property_mutex);
 
 	/* XXX Upate procfs if necessary... */
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 16ee7a08e044..f86be5594a15 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -55,7 +55,7 @@ static DEFINE_MUTEX(of_aliases_mutex);
 /* use when traversing tree through the allnext, child, sibling,
  * or parent members of struct device_node.
  */
-DEFINE_RWLOCK(devtree_lock);
+DEFINE_RAW_SPINLOCK(devtree_lock);
 
 int of_n_addr_cells(struct device_node *np)
 {
@@ -188,10 +188,11 @@ struct property *of_find_property(const struct device_node *np,
 				  int *lenp)
 {
 	struct property *pp;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	pp = __of_find_property(np, name, lenp);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	return pp;
 }
@@ -209,13 +210,13 @@ struct device_node *of_find_all_nodes(struct device_node *prev)
 {
 	struct device_node *np;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock(&devtree_lock);
 	np = prev ? prev->allnext : of_allnodes;
 	for (; np != NULL; np = np->allnext)
 		if (of_node_get(np))
 			break;
 	of_node_put(prev);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock(&devtree_lock);
 	return np;
 }
 EXPORT_SYMBOL(of_find_all_nodes);
@@ -274,11 +275,12 @@ static int __of_device_is_compatible(const struct device_node *device,
 int of_device_is_compatible(const struct device_node *device,
 		const char *compat)
 {
+	unsigned long flags;
 	int res;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	res = __of_device_is_compatible(device, compat);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return res;
 }
 EXPORT_SYMBOL(of_device_is_compatible);
@@ -340,13 +342,14 @@ EXPORT_SYMBOL(of_device_is_available);
 struct device_node *of_get_parent(const struct device_node *node)
 {
 	struct device_node *np;
+	unsigned long flags;
 
 	if (!node)
 		return NULL;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = of_node_get(node->parent);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_get_parent);
@@ -365,14 +368,15 @@ EXPORT_SYMBOL(of_get_parent);
 struct device_node *of_get_next_parent(struct device_node *node)
 {
 	struct device_node *parent;
+	unsigned long flags;
 
 	if (!node)
 		return NULL;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	parent = of_node_get(node->parent);
 	of_node_put(node);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return parent;
 }
 
@@ -388,14 +392,15 @@ struct device_node *of_get_next_child(const struct device_node *node,
 	struct device_node *prev)
 {
 	struct device_node *next;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	next = prev ? prev->sibling : node->child;
 	for (; next; next = next->sibling)
 		if (of_node_get(next))
 			break;
 	of_node_put(prev);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return next;
 }
 EXPORT_SYMBOL(of_get_next_child);
@@ -413,7 +418,7 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
 {
 	struct device_node *next;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock(&devtree_lock);
 	next = prev ? prev->sibling : node->child;
 	for (; next; next = next->sibling) {
 		if (!of_device_is_available(next))
@@ -422,7 +427,7 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
 			break;
 	}
 	of_node_put(prev);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock(&devtree_lock);
 	return next;
 }
 EXPORT_SYMBOL(of_get_next_available_child);
@@ -460,14 +465,15 @@ EXPORT_SYMBOL(of_get_child_by_name);
 struct device_node *of_find_node_by_path(const char *path)
 {
 	struct device_node *np = of_allnodes;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	for (; np; np = np->allnext) {
 		if (np->full_name && (of_node_cmp(np->full_name, path) == 0)
 		    && of_node_get(np))
 			break;
 	}
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_by_path);
@@ -487,15 +493,16 @@ struct device_node *of_find_node_by_name(struct device_node *from,
 	const char *name)
 {
 	struct device_node *np;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext)
 		if (np->name && (of_node_cmp(np->name, name) == 0)
 		    && of_node_get(np))
 			break;
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_by_name);
@@ -516,15 +523,16 @@ struct device_node *of_find_node_by_type(struct device_node *from,
 	const char *type)
 {
 	struct device_node *np;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext)
 		if (np->type && (of_node_cmp(np->type, type) == 0)
 		    && of_node_get(np))
 			break;
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_by_type);
@@ -547,8 +555,9 @@ struct device_node *of_find_compatible_node(struct device_node *from,
 	const char *type, const char *compatible)
 {
 	struct device_node *np;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		if (type
@@ -559,7 +568,7 @@ struct device_node *of_find_compatible_node(struct device_node *from,
 			break;
 	}
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_compatible_node);
@@ -581,8 +590,9 @@ struct device_node *of_find_node_with_property(struct device_node *from,
 {
 	struct device_node *np;
 	struct property *pp;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		for (pp = np->properties; pp; pp = pp->next) {
@@ -594,7 +604,7 @@ struct device_node *of_find_node_with_property(struct device_node *from,
 	}
 out:
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_with_property);
@@ -635,10 +645,11 @@ const struct of_device_id *of_match_node(const struct of_device_id *matches,
 					 const struct device_node *node)
 {
 	const struct of_device_id *match;
+	unsigned long flags;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	match = __of_match_node(matches, node);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return match;
 }
 EXPORT_SYMBOL(of_match_node);
@@ -662,11 +673,12 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from,
 {
 	struct device_node *np;
 	const struct of_device_id *m;
+	unsigned long flags;
 
 	if (match)
 		*match = NULL;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np = from ? from->allnext : of_allnodes;
 	for (; np; np = np->allnext) {
 		m = __of_match_node(matches, np);
@@ -677,7 +689,7 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from,
 		}
 	}
 	of_node_put(from);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
 EXPORT_SYMBOL(of_find_matching_node_and_match);
@@ -720,12 +732,12 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 {
 	struct device_node *np;
 
-	read_lock(&devtree_lock);
+	raw_spin_lock(&devtree_lock);
 	for (np = of_allnodes; np; np = np->allnext)
 		if (np->phandle == handle)
 			break;
 	of_node_get(np);
-	read_unlock(&devtree_lock);
+	raw_spin_unlock(&devtree_lock);
 	return np;
 }
 EXPORT_SYMBOL(of_find_node_by_phandle);
@@ -1197,18 +1209,18 @@ int of_add_property(struct device_node *np, struct property *prop)
 		return rc;
 
 	prop->next = NULL;
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
 	while (*next) {
 		if (strcmp(prop->name, (*next)->name) == 0) {
 			/* duplicate ! don't insert it */
-			write_unlock_irqrestore(&devtree_lock, flags);
+			raw_spin_unlock_irqrestore(&devtree_lock, flags);
 			return -1;
 		}
 		next = &(*next)->next;
 	}
 	*next = prop;
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 #ifdef CONFIG_PROC_DEVICETREE
 	/* try to add to proc as well if it was initialized */
@@ -1238,7 +1250,7 @@ int of_remove_property(struct device_node *np, struct property *prop)
 	if (rc)
 		return rc;
 
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
 	while (*next) {
 		if (*next == prop) {
@@ -1251,7 +1263,7 @@ int of_remove_property(struct device_node *np, struct property *prop)
 		}
 		next = &(*next)->next;
 	}
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	if (!found)
 		return -ENODEV;
@@ -1291,7 +1303,7 @@ int of_update_property(struct device_node *np, struct property *newprop)
 	if (!oldprop)
 		return of_add_property(np, newprop);
 
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	next = &np->properties;
 	while (*next) {
 		if (*next == oldprop) {
@@ -1305,7 +1317,7 @@ int of_update_property(struct device_node *np, struct property *newprop)
 		}
 		next = &(*next)->next;
 	}
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	if (!found)
 		return -ENODEV;
@@ -1378,12 +1390,12 @@ int of_attach_node(struct device_node *np)
 	if (rc)
 		return rc;
 
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	np->sibling = np->parent->child;
 	np->allnext = of_allnodes;
 	np->parent->child = np;
 	of_allnodes = np;
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	of_add_proc_dt_entry(np);
 	return 0;
@@ -1426,17 +1438,17 @@ int of_detach_node(struct device_node *np)
 	if (rc)
 		return rc;
 
-	write_lock_irqsave(&devtree_lock, flags);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 
 	if (of_node_check_flag(np, OF_DETACHED)) {
 		/* someone already detached it */
-		write_unlock_irqrestore(&devtree_lock, flags);
+		raw_spin_unlock_irqrestore(&devtree_lock, flags);
 		return rc;
 	}
 
 	parent = np->parent;
 	if (!parent) {
-		write_unlock_irqrestore(&devtree_lock, flags);
+		raw_spin_unlock_irqrestore(&devtree_lock, flags);
 		return rc;
 	}
 
@@ -1463,7 +1475,7 @@ int of_detach_node(struct device_node *np)
 	}
 
 	of_node_set_flag(np, OF_DETACHED);
-	write_unlock_irqrestore(&devtree_lock, flags);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	of_remove_proc_dt_entry(np);
 	return rc;
diff --git a/include/linux/of.h b/include/linux/of.h
index 5ebcc5c8e423..bb35c423b1f9 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -92,7 +92,7 @@ static inline void of_node_put(struct device_node *node) { }
 extern struct device_node *of_allnodes;
 extern struct device_node *of_chosen;
 extern struct device_node *of_aliases;
-extern rwlock_t devtree_lock;
+extern raw_spinlock_t devtree_lock;
 
 static inline bool of_have_populated_dt(void)
 {
-- 
cgit v1.2.3


From 84e345e4e209cbe796c88fa2ad1732d7121ec100 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Fri, 8 Feb 2013 17:59:53 -0500
Subject: time, Fix setting of hardware clock in NTP code

At init time, if the system time is "warped" forward in warp_clock()
it will differ from the hardware clock by sys_tz.tz_minuteswest.  This time
difference is not taken into account when ntp updates the hardware clock,
and this causes the system time to jump forward by this offset every reboot.

The kernel must take this offset into account when writing the system time
to the hardware clock in the ntp code.  This patch adds
persistent_clock_is_local which indicates that an offset has been applied
in warp_clock() and accounts for the "warp" before writing the hardware
clock.

x86 does not have this problem as rtc writes are software limited to a
+/-15 minute window relative to the current rtc time.  Other arches, such
as powerpc, however do a full synchronization of the system time to the
rtc and will see this problem.

[v2]: generated against tip/timers/core

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/time.h | 1 +
 kernel/time.c        | 8 ++++++++
 kernel/time/ntp.c    | 8 ++++++--
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 476e1d7b2c37..a3ab6a814a9c 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -128,6 +128,7 @@ static inline bool has_persistent_clock(void)
 
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
+extern int persistent_clock_is_local;
 extern int update_persistent_clock(struct timespec now);
 void timekeeping_init(void);
 extern int timekeeping_suspended;
diff --git a/kernel/time.c b/kernel/time.c
index d226c6a3fd28..c2a27dd93142 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -114,6 +114,12 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
 	return 0;
 }
 
+/*
+ * Indicates if there is an offset between the system clock and the hardware
+ * clock/persistent clock/rtc.
+ */
+int persistent_clock_is_local;
+
 /*
  * Adjust the time obtained from the CMOS to be UTC time instead of
  * local time.
@@ -135,6 +141,8 @@ static inline void warp_clock(void)
 	struct timespec adjust;
 
 	adjust = current_kernel_time();
+	if (sys_tz.tz_minuteswest != 0)
+		persistent_clock_is_local = 1;
 	adjust.tv_sec += sys_tz.tz_minuteswest * 60;
 	do_settimeofday(&adjust);
 }
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 313b161504b7..b10a42bb0165 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -511,13 +511,17 @@ static void sync_cmos_clock(struct work_struct *work)
 
 	getnstimeofday(&now);
 	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) {
+		struct timespec adjust = now;
+
 		fail = -ENODEV;
+		if (persistent_clock_is_local)
+			adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
-		fail = update_persistent_clock(now);
+		fail = update_persistent_clock(adjust);
 #endif
 #ifdef CONFIG_RTC_SYSTOHC
 		if (fail == -ENODEV)
-			fail = rtc_set_ntp_time(now);
+			fail = rtc_set_ntp_time(adjust);
 #endif
 	}
 
-- 
cgit v1.2.3


From 91ffbabfd9ae6b9bfef02bb1e0fbba451c7289a7 Mon Sep 17 00:00:00 2001
From: Denis CIOCCA <denis.ciocca@st.com>
Date: Thu, 7 Feb 2013 09:46:00 +0000
Subject: iio:common: added allocate and deallocate trigger functions when
 trigger is disabled.

This patch resolve a bugfix when driver is compiled without trigger.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Reported-by: Randy Dunlap  <rdunlap@infradead.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/common/st_sensors.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index c40fdf537f69..1f86a97ab2e2 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -235,6 +235,16 @@ void st_sensors_deallocate_trigger(struct iio_dev *indio_dev);
 irqreturn_t st_sensors_trigger_handler(int irq, void *p);
 
 int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf);
+#else
+static inline int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
+				const struct iio_trigger_ops *trigger_ops)
+{
+	return 0;
+}
+static inline void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
+{
+	return;
+}
 #endif
 
 int st_sensors_init_sensor(struct iio_dev *indio_dev);
-- 
cgit v1.2.3


From 6d9eecd418afb2c12e5db5be3d72f0f1df43bdd9 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 9 Jan 2013 17:31:00 +0000
Subject: spi: Add helper functions for setting up transfers

Quite often the pattern used for setting up and transferring a synchronous SPI
transaction looks very much like the following:

	struct spi_message msg;
	struct spi_transfer xfers[] = {
		...
	};

	spi_message_init(&msg);
	spi_message_add_tail(&xfers[0], &msg);
	...
	spi_message_add_tail(&xfers[ARRAY_SIZE(xfers) - 1], &msg);

	ret = spi_sync(&msg);

This patch adds two new helper functions for handling this case. The first
helper function spi_message_init_with_transfers() takes a spi_message and an
array of spi_transfers. It will initialize the message and then call
spi_message_add_tail() for each transfer in the array. E.g. the following

	spi_message_init(&msg);
	spi_message_add_tail(&xfers[0], &msg);
	...
	spi_message_add_tail(&xfers[ARRAY_SIZE(xfers) - 1], &msg);

can be rewritten as

	spi_message_init_with_transfers(&msg, xfers, ARRAY_SIZE(xfers));

The second function spi_sync_transfer() takes a SPI device and an array of
spi_transfers. It will allocate a new spi_message (on the stack) and add all
transfers in the array to the message. Finally it will call spi_sync() on the
message.

E.g. the follwing

	struct spi_message msg;
	struct spi_transfer xfers[] = {
		...
	};

	spi_message_init(&msg);
	spi_message_add_tail(&xfers[0], &msg);
	...
	spi_message_add_tail(&xfers[ARRAY_SIZE(xfers) - 1], &msg);

	ret = spi_sync(spi, &msg);

can be rewritten as

	struct spi_transfer xfers[] = {
		...
	};

	ret = spi_sync_transfer(spi, xfers, ARRAY_SIZE(xfers));

A coccinelle script to find such instances will follow.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/spi/spi.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index f62918946d86..7dbe58642525 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -591,6 +591,26 @@ spi_transfer_del(struct spi_transfer *t)
 	list_del(&t->transfer_list);
 }
 
+/**
+ * spi_message_init_with_transfers - Initialize spi_message and append transfers
+ * @m: spi_message to be initialized
+ * @xfers: An array of spi transfers
+ * @num_xfers: Number of items in the xfer array
+ *
+ * This function initializes the given spi_message and adds each spi_transfer in
+ * the given array to the message.
+ */
+static inline void
+spi_message_init_with_transfers(struct spi_message *m,
+struct spi_transfer *xfers, unsigned int num_xfers)
+{
+	unsigned int i;
+
+	spi_message_init(m);
+	for (i = 0; i < num_xfers; ++i)
+		spi_message_add_tail(&xfers[i], m);
+}
+
 /* It's fine to embed message and transaction structures in other data
  * structures so long as you don't free them while they're in use.
  */
@@ -683,6 +703,30 @@ spi_read(struct spi_device *spi, void *buf, size_t len)
 	return spi_sync(spi, &m);
 }
 
+/**
+ * spi_sync_transfer - synchronous SPI data transfer
+ * @spi: device with which data will be exchanged
+ * @xfers: An array of spi_transfers
+ * @num_xfers: Number of items in the xfer array
+ * Context: can sleep
+ *
+ * Does a synchronous SPI data transfer of the given spi_transfer array.
+ *
+ * For more specific semantics see spi_sync().
+ *
+ * It returns zero on success, else a negative error code.
+ */
+static inline int
+spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers,
+	unsigned int num_xfers)
+{
+	struct spi_message msg;
+
+	spi_message_init_with_transfers(&msg, xfers, num_xfers);
+
+	return spi_sync(spi, &msg);
+}
+
 /* this copies txbuf and rxbuf data; for small transfers only! */
 extern int spi_write_then_read(struct spi_device *spi,
 		const void *txbuf, unsigned n_tx,
-- 
cgit v1.2.3


From aaa300262c5912bda34c9cf871719209eae01b06 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <jic23@kernel.org>
Date: Sat, 9 Feb 2013 10:49:00 +0000
Subject: iio:triggers Protect functions in triggers.h from use when not
 compiled

Also include a couple of forward defs of struct iio_trigger and struct
iio_trigger_ops to avoid doing this in each driver.

Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Denis Ciocca <denis.ciocca@st.com>
---
 include/linux/iio/trigger.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 20239da1d0f7..c66e0a96f6e8 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -12,6 +12,7 @@
 #ifndef _IIO_TRIGGER_H_
 #define _IIO_TRIGGER_H_
 
+#ifdef CONFIG_IIO_TRIGGER
 struct iio_subirq {
 	bool enabled;
 };
@@ -117,4 +118,8 @@ irqreturn_t iio_trigger_generic_data_rdy_poll(int irq, void *private);
 __printf(1, 2) struct iio_trigger *iio_trigger_alloc(const char *fmt, ...);
 void iio_trigger_free(struct iio_trigger *trig);
 
+#else
+struct iio_trigger;
+struct iio_trigger_ops;
+#endif
 #endif /* _IIO_TRIGGER_H_ */
-- 
cgit v1.2.3


From b6e96d0067d81f6a300bedee661b5ece8164e210 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 9 Feb 2013 16:29:20 -0500
Subject: jbd2: use module parameters instead of debugfs for jbd_debug

There are multiple reasons to move away from debugfs.  First of all,
we are only using it for a single parameter, and it is much more
complicated to set up (some 30 lines of code compared to 3), and one
more thing that might fail while loading the jbd2 module.

Secondly, as a module paramter it can be specified as a boot option if
jbd2 is built into the kernel, or as a parameter when the module is
loaded, and it can also be manipulated dynamically under
/sys/module/jbd2/parameters/jbd2_debug.  So it is more flexible.

Ultimately we want to move away from using jbd_debug() towards
tracepoints, but for now this is still a useful simplification of the
code base.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/journal.c    | 50 ++++++++------------------------------------------
 include/linux/jbd2.h |  3 +--
 2 files changed, 9 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 4ba2e81e35ac..ed10991ab006 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -35,7 +35,6 @@
 #include <linux/kthread.h>
 #include <linux/poison.h>
 #include <linux/proc_fs.h>
-#include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/math64.h>
 #include <linux/hash.h>
@@ -51,6 +50,14 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_JBD2_DEBUG
+ushort jbd2_journal_enable_debug __read_mostly;
+EXPORT_SYMBOL(jbd2_journal_enable_debug);
+
+module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
+MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
+#endif
+
 EXPORT_SYMBOL(jbd2_journal_extend);
 EXPORT_SYMBOL(jbd2_journal_stop);
 EXPORT_SYMBOL(jbd2_journal_lock_updates);
@@ -2495,45 +2502,6 @@ restart:
 	spin_unlock(&journal->j_list_lock);
 }
 
-/*
- * debugfs tunables
- */
-#ifdef CONFIG_JBD2_DEBUG
-u8 jbd2_journal_enable_debug __read_mostly;
-EXPORT_SYMBOL(jbd2_journal_enable_debug);
-
-#define JBD2_DEBUG_NAME "jbd2-debug"
-
-static struct dentry *jbd2_debugfs_dir;
-static struct dentry *jbd2_debug;
-
-static void __init jbd2_create_debugfs_entry(void)
-{
-	jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
-	if (jbd2_debugfs_dir)
-		jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME,
-					       S_IRUGO | S_IWUSR,
-					       jbd2_debugfs_dir,
-					       &jbd2_journal_enable_debug);
-}
-
-static void __exit jbd2_remove_debugfs_entry(void)
-{
-	debugfs_remove(jbd2_debug);
-	debugfs_remove(jbd2_debugfs_dir);
-}
-
-#else
-
-static void __init jbd2_create_debugfs_entry(void)
-{
-}
-
-static void __exit jbd2_remove_debugfs_entry(void)
-{
-}
-
-#endif
 
 #ifdef CONFIG_PROC_FS
 
@@ -2619,7 +2587,6 @@ static int __init journal_init(void)
 
 	ret = journal_init_caches();
 	if (ret == 0) {
-		jbd2_create_debugfs_entry();
 		jbd2_create_jbd_stats_proc_entry();
 	} else {
 		jbd2_journal_destroy_caches();
@@ -2634,7 +2601,6 @@ static void __exit journal_exit(void)
 	if (n)
 		printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
 #endif
-	jbd2_remove_debugfs_entry();
 	jbd2_remove_jbd_stats_proc_entry();
 	jbd2_journal_destroy_caches();
 }
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index fa5fea17b619..50e5a5e6a712 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -20,7 +20,6 @@
 #ifndef __KERNEL__
 #include "jfs_compat.h"
 #define JBD2_DEBUG
-#define jfs_debug jbd_debug
 #else
 
 #include <linux/types.h>
@@ -57,7 +56,7 @@
  * CONFIG_JBD2_DEBUG is on.
  */
 #define JBD2_EXPENSIVE_CHECKING
-extern u8 jbd2_journal_enable_debug;
+extern ushort jbd2_journal_enable_debug;
 
 #define jbd_debug(n, f, a...)						\
 	do {								\
-- 
cgit v1.2.3


From 7e73c5ae6e7991a6c01f6d096ff8afaef4458c36 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 6 Feb 2013 13:00:36 +0100
Subject: PM: Introduce suspend state PM_SUSPEND_FREEZE

PM_SUSPEND_FREEZE state is a general state that
does not need any platform specific support, it equals
frozen processes + suspended devices + idle processors.

Compared with PM_SUSPEND_MEMORY,
PM_SUSPEND_FREEZE saves less power
because the system is still in a running state.
PM_SUSPEND_FREEZE has less resume latency because it does not
touch BIOS, and the processors are in idle state.

Compared with RTPM/idle,
PM_SUSPEND_FREEZE saves more power as
1. the processor has longer sleep time because processes are frozen.
   The deeper c-state the processor supports, more power saving we can get.
2. PM_SUSPEND_FREEZE uses system suspend code path, thus we can get
   more power saving from the devices that does not have good RTPM support.

This state is useful for
1) platforms that do not have STR, or have a broken STR.
2) platforms that have an extremely low power idle state,
   which can be used to replace STR.

The following describes how PM_SUSPEND_FREEZE state works.
1. echo freeze > /sys/power/state
2. the processes are frozen.
3. all the devices are suspended.
4. all the processors are blocked by a wait queue
5. all the processors idles and enters (Deep) c-state.
6. an interrupt fires.
7. a processor is woken up and handles the irq.
8. if it is a general event,
   a) the irq handler runs and quites.
   b) goto step 4.
9. if it is a real wake event, say, power button pressing, keyboard touch, mouse moving,
   a) the irq handler runs and activate the wakeup source
   b) wakeup_source_activate() notifies the wait queue.
   c) system starts resuming from PM_SUSPEND_FREEZE
10. all the devices are resumed.
11. all the processes are unfrozen.
12. system is back to working.

Known Issue:
The wakeup of this new PM_SUSPEND_FREEZE state may behave differently
from the previous suspend state.
Take ACPI platform for example, there are some GPEs that only enabled
when the system is in sleep state, to wake the system backk from S3/S4.
But we are not touching these GPEs during transition to PM_SUSPEND_FREEZE.
This means we may lose some wake event.
But on the other hand, as we do not disable all the Interrupts during
PM_SUSPEND_FREEZE, we may get some extra "wakeup" Interrupts, that are
not available for S3/S4.

The patches has been tested on an old Sony laptop, and here are the results:

Average Power:
1. RPTM/idle for half an hour:
   14.8W, 12.6W, 14.1W, 12.5W, 14.4W, 13.2W, 12.9W
2. Freeze for half an hour:
   11W, 10.4W, 9.4W, 11.3W 10.5W
3. RTPM/idle for three hours:
   11.6W
4. Freeze for three hours:
   10W
5. Suspend to Memory:
   0.5~0.9W

Average Resume Latency:
1. RTPM/idle with a black screen: (From pressing keyboard to screen back)
   Less than 0.2s
2. Freeze: (From pressing power button to screen back)
   2.50s
3. Suspend to Memory: (From pressing power button to screen back)
   4.33s

>From the results, we can see that all the platforms should benefit from
this patch, even if it does not have Low Power S0.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c |  6 ++++
 include/linux/suspend.h     |  6 +++-
 kernel/power/main.c         |  2 +-
 kernel/power/suspend.c      | 69 ++++++++++++++++++++++++++++++++++++---------
 4 files changed, 68 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index e6ee5e80e546..79715e7fa43e 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -382,6 +382,12 @@ static void wakeup_source_activate(struct wakeup_source *ws)
 {
 	unsigned int cec;
 
+	/*
+	 * active wakeup source should bring the system
+	 * out of PM_SUSPEND_FREEZE state
+	 */
+	freeze_wake();
+
 	ws->active = true;
 	ws->active_count++;
 	ws->last_time = ktime_get();
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 0c808d7fa579..d4e3f16d5e89 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -34,8 +34,10 @@ static inline void pm_restore_console(void)
 typedef int __bitwise suspend_state_t;
 
 #define PM_SUSPEND_ON		((__force suspend_state_t) 0)
-#define PM_SUSPEND_STANDBY	((__force suspend_state_t) 1)
+#define PM_SUSPEND_FREEZE	((__force suspend_state_t) 1)
+#define PM_SUSPEND_STANDBY	((__force suspend_state_t) 2)
 #define PM_SUSPEND_MEM		((__force suspend_state_t) 3)
+#define PM_SUSPEND_MIN		PM_SUSPEND_FREEZE
 #define PM_SUSPEND_MAX		((__force suspend_state_t) 4)
 
 enum suspend_stat_step {
@@ -192,6 +194,7 @@ struct platform_suspend_ops {
  */
 extern void suspend_set_ops(const struct platform_suspend_ops *ops);
 extern int suspend_valid_only_mem(suspend_state_t state);
+extern void freeze_wake(void);
 
 /**
  * arch_suspend_disable_irqs - disable IRQs for suspend
@@ -217,6 +220,7 @@ extern int pm_suspend(suspend_state_t state);
 
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
+static inline void freeze_wake(void) {}
 #endif /* !CONFIG_SUSPEND */
 
 /* struct pbe is used for creating lists of pages that should be restored
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 1c16f9167de1..b1c26a92ca9f 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -313,7 +313,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
 static suspend_state_t decode_state(const char *buf, size_t n)
 {
 #ifdef CONFIG_SUSPEND
-	suspend_state_t state = PM_SUSPEND_STANDBY;
+	suspend_state_t state = PM_SUSPEND_MIN;
 	const char * const *s;
 #endif
 	char *p;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c8b7446b27df..d4feda084a3a 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -30,12 +30,38 @@
 #include "power.h"
 
 const char *const pm_states[PM_SUSPEND_MAX] = {
+	[PM_SUSPEND_FREEZE]	= "freeze",
 	[PM_SUSPEND_STANDBY]	= "standby",
 	[PM_SUSPEND_MEM]	= "mem",
 };
 
 static const struct platform_suspend_ops *suspend_ops;
 
+static bool need_suspend_ops(suspend_state_t state)
+{
+	return !!(state > PM_SUSPEND_FREEZE);
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head);
+static bool suspend_freeze_wake;
+
+static void freeze_begin(void)
+{
+	suspend_freeze_wake = false;
+}
+
+static void freeze_enter(void)
+{
+	wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
+}
+
+void freeze_wake(void)
+{
+	suspend_freeze_wake = true;
+	wake_up(&suspend_freeze_wait_head);
+}
+EXPORT_SYMBOL_GPL(freeze_wake);
+
 /**
  * suspend_set_ops - Set the global suspend method table.
  * @ops: Suspend operations to use.
@@ -50,8 +76,11 @@ EXPORT_SYMBOL_GPL(suspend_set_ops);
 
 bool valid_state(suspend_state_t state)
 {
+	if (state == PM_SUSPEND_FREEZE)
+		return true;
 	/*
-	 * All states need lowlevel support and need to be valid to the lowlevel
+	 * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel
+	 * support and need to be valid to the lowlevel
 	 * implementation, no valid callback implies that none are valid.
 	 */
 	return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
@@ -89,11 +118,11 @@ static int suspend_test(int level)
  * hibernation).  Run suspend notifiers, allocate the "suspend" console and
  * freeze processes.
  */
-static int suspend_prepare(void)
+static int suspend_prepare(suspend_state_t state)
 {
 	int error;
 
-	if (!suspend_ops || !suspend_ops->enter)
+	if (need_suspend_ops(state) && (!suspend_ops || !suspend_ops->enter))
 		return -EPERM;
 
 	pm_prepare_console();
@@ -137,7 +166,7 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 {
 	int error;
 
-	if (suspend_ops->prepare) {
+	if (need_suspend_ops(state) && suspend_ops->prepare) {
 		error = suspend_ops->prepare();
 		if (error)
 			goto Platform_finish;
@@ -149,12 +178,23 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 		goto Platform_finish;
 	}
 
-	if (suspend_ops->prepare_late) {
+	if (need_suspend_ops(state) && suspend_ops->prepare_late) {
 		error = suspend_ops->prepare_late();
 		if (error)
 			goto Platform_wake;
 	}
 
+	/*
+	 * PM_SUSPEND_FREEZE equals
+	 * frozen processes + suspended devices + idle processors.
+	 * Thus we should invoke freeze_enter() soon after
+	 * all the devices are suspended.
+	 */
+	if (state == PM_SUSPEND_FREEZE) {
+		freeze_enter();
+		goto Platform_wake;
+	}
+
 	if (suspend_test(TEST_PLATFORM))
 		goto Platform_wake;
 
@@ -182,13 +222,13 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	enable_nonboot_cpus();
 
  Platform_wake:
-	if (suspend_ops->wake)
+	if (need_suspend_ops(state) && suspend_ops->wake)
 		suspend_ops->wake();
 
 	dpm_resume_start(PMSG_RESUME);
 
  Platform_finish:
-	if (suspend_ops->finish)
+	if (need_suspend_ops(state) && suspend_ops->finish)
 		suspend_ops->finish();
 
 	return error;
@@ -203,11 +243,11 @@ int suspend_devices_and_enter(suspend_state_t state)
 	int error;
 	bool wakeup = false;
 
-	if (!suspend_ops)
+	if (need_suspend_ops(state) && !suspend_ops)
 		return -ENOSYS;
 
 	trace_machine_suspend(state);
-	if (suspend_ops->begin) {
+	if (need_suspend_ops(state) && suspend_ops->begin) {
 		error = suspend_ops->begin(state);
 		if (error)
 			goto Close;
@@ -226,7 +266,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 
 	do {
 		error = suspend_enter(state, &wakeup);
-	} while (!error && !wakeup
+	} while (!error && !wakeup && need_suspend_ops(state)
 		&& suspend_ops->suspend_again && suspend_ops->suspend_again());
 
  Resume_devices:
@@ -236,13 +276,13 @@ int suspend_devices_and_enter(suspend_state_t state)
 	ftrace_start();
 	resume_console();
  Close:
-	if (suspend_ops->end)
+	if (need_suspend_ops(state) && suspend_ops->end)
 		suspend_ops->end();
 	trace_machine_suspend(PWR_EVENT_EXIT);
 	return error;
 
  Recover_platform:
-	if (suspend_ops->recover)
+	if (need_suspend_ops(state) && suspend_ops->recover)
 		suspend_ops->recover();
 	goto Resume_devices;
 }
@@ -278,12 +318,15 @@ static int enter_state(suspend_state_t state)
 	if (!mutex_trylock(&pm_mutex))
 		return -EBUSY;
 
+	if (state == PM_SUSPEND_FREEZE)
+		freeze_begin();
+
 	printk(KERN_INFO "PM: Syncing filesystems ... ");
 	sys_sync();
 	printk("done.\n");
 
 	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
-	error = suspend_prepare();
+	error = suspend_prepare(state);
 	if (error)
 		goto Unlock;
 
-- 
cgit v1.2.3


From 957d1282bb8c07e682e142b9237cd9fcb8348a0b Mon Sep 17 00:00:00 2001
From: Li Fei <fei.li@intel.com>
Date: Fri, 1 Feb 2013 08:56:03 +0000
Subject: suspend: enable freeze timeout configuration through sys

At present, the value of timeout for freezing is 20s, which is
meaningless in case that one thread is frozen with mutex locked
and another thread is trying to lock the mutex, as this time of
freezing will fail unavoidably.
And if there is no new wakeup event registered, the system will
waste at most 20s for such meaningless trying of freezing.

With this patch, the value of timeout can be configured to smaller
value, so such meaningless trying of freezing will be aborted in
earlier time, and later freezing can be also triggered in earlier
time. And more power will be saved.
In normal case on mobile phone, it costs real little time to freeze
processes. On some platform, it only costs about 20ms to freeze
user space processes and 10ms to freeze kernel freezable threads.

Signed-off-by: Liu Chuansheng <chuansheng.liu@intel.com>
Signed-off-by: Li Fei <fei.li@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/freezing-of-tasks.txt |  5 +++++
 include/linux/freezer.h                   |  5 +++++
 kernel/power/main.c                       | 27 +++++++++++++++++++++++++++
 kernel/power/process.c                    |  4 ++--
 4 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index 6ec291ea1c78..85894d83b352 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -223,3 +223,8 @@ since they ask the freezer to skip freezing this task, since it is anyway
 only after the entire suspend/hibernation sequence is complete.
 So, to summarize, use [un]lock_system_sleep() instead of directly using
 mutex_[un]lock(&pm_mutex). That would prevent freezing failures.
+
+V. Miscellaneous
+/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
+all user space processes or all freezable kernel threads, in unit of millisecond.
+The default value is 20000, with range of unsigned integer.
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index e4238ceaa4d6..e70df40d84f6 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -12,6 +12,11 @@ extern atomic_t system_freezing_cnt;	/* nr of freezing conds in effect */
 extern bool pm_freezing;		/* PM freezing in effect */
 extern bool pm_nosig_freezing;		/* PM nosig freezing in effect */
 
+/*
+ * Timeout for stopping processes
+ */
+extern unsigned int freeze_timeout_msecs;
+
 /*
  * Check if a process has been frozen
  */
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b1c26a92ca9f..d77663bfedeb 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -553,6 +553,30 @@ power_attr(pm_trace_dev_match);
 
 #endif /* CONFIG_PM_TRACE */
 
+#ifdef CONFIG_FREEZER
+static ssize_t pm_freeze_timeout_show(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", freeze_timeout_msecs);
+}
+
+static ssize_t pm_freeze_timeout_store(struct kobject *kobj,
+				       struct kobj_attribute *attr,
+				       const char *buf, size_t n)
+{
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val))
+		return -EINVAL;
+
+	freeze_timeout_msecs = val;
+	return n;
+}
+
+power_attr(pm_freeze_timeout);
+
+#endif	/* CONFIG_FREEZER*/
+
 static struct attribute * g[] = {
 	&state_attr.attr,
 #ifdef CONFIG_PM_TRACE
@@ -575,6 +599,9 @@ static struct attribute * g[] = {
 #ifdef CONFIG_PM_SLEEP_DEBUG
 	&pm_print_times_attr.attr,
 #endif
+#endif
+#ifdef CONFIG_FREEZER
+	&pm_freeze_timeout_attr.attr,
 #endif
 	NULL,
 };
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d5a258b60c6f..98088e0e71e8 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -21,7 +21,7 @@
 /* 
  * Timeout for stopping processes
  */
-#define TIMEOUT	(20 * HZ)
+unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC;
 
 static int try_to_freeze_tasks(bool user_only)
 {
@@ -36,7 +36,7 @@ static int try_to_freeze_tasks(bool user_only)
 
 	do_gettimeofday(&start);
 
-	end_time = jiffies + TIMEOUT;
+	end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs);
 
 	if (!user_only)
 		freeze_workqueues_begin();
-- 
cgit v1.2.3


From 710a03e9349f027cf2d77559973fc06076a4042a Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Tue, 15 Jan 2013 16:37:56 +0000
Subject: ARM: ux500: remove irq_base property from platform_data

AB8500 GPIO no longer handles its GPIO IRQs. Instead, the AB8500
core driver has taken back the responsibility. Prior to this
happening, the AB8500 GPIO driver provided a set of virtual IRQs
which were used as a pass-through. These virtual IRQs had a base
of MOP500_AB8500_VIR_GPIO_IRQ_BASE, which was passed though pdata.
We don't need to do this anymore, so we're pulling out the
property from the structure.

Cc: arm@kernel.org
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/board-mop500.c     | 1 -
 include/linux/mfd/abx500/ab8500-gpio.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index b6f14ee507ca..b8781caa54b8 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -92,7 +92,6 @@ static struct platform_device snowball_gpio_en_3v3_regulator_dev = {
 
 static struct abx500_gpio_platform_data ab8500_gpio_pdata = {
 	.gpio_base		= MOP500_AB8500_PIN_GPIO(1),
-	.irq_base		= MOP500_AB8500_VIR_GPIO_IRQ_BASE,
 };
 
 /* ab8500-codec */
diff --git a/include/linux/mfd/abx500/ab8500-gpio.h b/include/linux/mfd/abx500/ab8500-gpio.h
index e8c8281e194a..172b2f201ae0 100644
--- a/include/linux/mfd/abx500/ab8500-gpio.h
+++ b/include/linux/mfd/abx500/ab8500-gpio.h
@@ -16,7 +16,6 @@
 
 struct abx500_gpio_platform_data {
 	int gpio_base;
-	u32 irq_base;
 };
 
 enum abx500_gpio_pull_updown {
-- 
cgit v1.2.3


From 09a642b78523e9f4c5970c806ad218aa3de31551 Mon Sep 17 00:00:00 2001
From: Ge Gao <ggao@invensense.com>
Date: Sat, 2 Feb 2013 00:26:00 +0000
Subject: Invensense MPU6050 Device Driver.

This the basic functional Invensense MPU6050 Device driver.

Signed-off-by: Ge Gao <ggao@invensense.com>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/ABI/testing/sysfs-bus-iio-mpu6050  |  13 +
 drivers/iio/imu/Kconfig                          |   2 +
 drivers/iio/imu/Makefile                         |   2 +
 drivers/iio/imu/inv_mpu6050/Kconfig              |  13 +
 drivers/iio/imu/inv_mpu6050/Makefile             |   6 +
 drivers/iio/imu/inv_mpu6050/inv_mpu_core.c       | 795 +++++++++++++++++++++++
 drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h        | 246 +++++++
 drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c       | 196 ++++++
 drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c    | 155 +++++
 include/linux/platform_data/invensense_mpu6050.h |  31 +
 10 files changed, 1459 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-mpu6050
 create mode 100644 drivers/iio/imu/inv_mpu6050/Kconfig
 create mode 100644 drivers/iio/imu/inv_mpu6050/Makefile
 create mode 100644 drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
 create mode 100644 drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
 create mode 100644 drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
 create mode 100644 drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
 create mode 100644 include/linux/platform_data/invensense_mpu6050.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio-mpu6050 b/Documentation/ABI/testing/sysfs-bus-iio-mpu6050
new file mode 100644
index 000000000000..cb53737aacbf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-mpu6050
@@ -0,0 +1,13 @@
+What:           /sys/bus/iio/devices/iio:deviceX/in_gyro_matrix
+What:           /sys/bus/iio/devices/iio:deviceX/in_accel_matrix
+What:           /sys/bus/iio/devices/iio:deviceX/in_magn_matrix
+KernelVersion:  3.4.0
+Contact:        linux-iio@vger.kernel.org
+Description:
+		This is mounting matrix for motion sensors. Mounting matrix
+		is a 3x3 unitary matrix. A typical mounting matrix would look like
+		[0, 1, 0; 1, 0, 0; 0, 0, -1]. Using this information, it would be
+		easy to tell the relative positions among sensors as well as their
+		positions relative to the board that holds these sensors. Identity matrix
+		[1, 0, 0; 0, 1, 0; 0, 0, 1] means sensor chip and device are perfectly
+		aligned with each other. All axes are exactly the same.
diff --git a/drivers/iio/imu/Kconfig b/drivers/iio/imu/Kconfig
index 47f66ed9c4ef..4f40a10cb74f 100644
--- a/drivers/iio/imu/Kconfig
+++ b/drivers/iio/imu/Kconfig
@@ -36,3 +36,5 @@ config IIO_ADIS_LIB_BUFFER
 	help
 	  A set of buffer helper functions for the Analog Devices ADIS* device
 	  family.
+
+source "drivers/iio/imu/inv_mpu6050/Kconfig"
diff --git a/drivers/iio/imu/Makefile b/drivers/iio/imu/Makefile
index 019b717c5ff1..f2f56ceaed26 100644
--- a/drivers/iio/imu/Makefile
+++ b/drivers/iio/imu/Makefile
@@ -11,3 +11,5 @@ adis_lib-y += adis.o
 adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_trigger.o
 adis_lib-$(CONFIG_IIO_ADIS_LIB_BUFFER) += adis_buffer.o
 obj-$(CONFIG_IIO_ADIS_LIB) += adis_lib.o
+
+obj-y += inv_mpu6050/
diff --git a/drivers/iio/imu/inv_mpu6050/Kconfig b/drivers/iio/imu/inv_mpu6050/Kconfig
new file mode 100644
index 000000000000..b5cfa3a354cf
--- /dev/null
+++ b/drivers/iio/imu/inv_mpu6050/Kconfig
@@ -0,0 +1,13 @@
+#
+# inv-mpu6050 drivers for Invensense MPU devices and combos
+#
+
+config INV_MPU6050_IIO
+	tristate "Invensense MPU6050 devices"
+	depends on I2C && SYSFS
+	select IIO_TRIGGERED_BUFFER
+	help
+	  This driver supports the Invensense MPU6050 devices.
+	  It is a gyroscope/accelerometer combo device.
+	  This driver can be built as a module. The module will be called
+	  inv-mpu6050.
diff --git a/drivers/iio/imu/inv_mpu6050/Makefile b/drivers/iio/imu/inv_mpu6050/Makefile
new file mode 100644
index 000000000000..3a677c778afb
--- /dev/null
+++ b/drivers/iio/imu/inv_mpu6050/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for Invensense MPU6050 device.
+#
+
+obj-$(CONFIG_INV_MPU6050_IIO) += inv-mpu6050.o
+inv-mpu6050-objs := inv_mpu_core.o inv_mpu_ring.o inv_mpu_trigger.o
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
new file mode 100644
index 000000000000..37ca05b47e4b
--- /dev/null
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
@@ -0,0 +1,795 @@
+/*
+* Copyright (C) 2012 Invensense, Inc.
+*
+* This software is licensed under the terms of the GNU General Public
+* License version 2, as published by the Free Software Foundation, and
+* may be copied, distributed, and modified under those terms.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/sysfs.h>
+#include <linux/jiffies.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+#include "inv_mpu_iio.h"
+
+/*
+ * this is the gyro scale translated from dynamic range plus/minus
+ * {250, 500, 1000, 2000} to rad/s
+ */
+static const int gyro_scale_6050[] = {133090, 266181, 532362, 1064724};
+
+/*
+ * this is the accel scale translated from dynamic range plus/minus
+ * {2, 4, 8, 16} to m/s^2
+ */
+static const int accel_scale[] = {598, 1196, 2392, 4785};
+
+static const struct inv_mpu6050_reg_map reg_set_6050 = {
+	.sample_rate_div	= INV_MPU6050_REG_SAMPLE_RATE_DIV,
+	.lpf                    = INV_MPU6050_REG_CONFIG,
+	.user_ctrl              = INV_MPU6050_REG_USER_CTRL,
+	.fifo_en                = INV_MPU6050_REG_FIFO_EN,
+	.gyro_config            = INV_MPU6050_REG_GYRO_CONFIG,
+	.accl_config            = INV_MPU6050_REG_ACCEL_CONFIG,
+	.fifo_count_h           = INV_MPU6050_REG_FIFO_COUNT_H,
+	.fifo_r_w               = INV_MPU6050_REG_FIFO_R_W,
+	.raw_gyro               = INV_MPU6050_REG_RAW_GYRO,
+	.raw_accl               = INV_MPU6050_REG_RAW_ACCEL,
+	.temperature            = INV_MPU6050_REG_TEMPERATURE,
+	.int_enable             = INV_MPU6050_REG_INT_ENABLE,
+	.pwr_mgmt_1             = INV_MPU6050_REG_PWR_MGMT_1,
+	.pwr_mgmt_2             = INV_MPU6050_REG_PWR_MGMT_2,
+};
+
+static const struct inv_mpu6050_chip_config chip_config_6050 = {
+	.fsr = INV_MPU6050_FSR_2000DPS,
+	.lpf = INV_MPU6050_FILTER_20HZ,
+	.fifo_rate = INV_MPU6050_INIT_FIFO_RATE,
+	.gyro_fifo_enable = false,
+	.accl_fifo_enable = false,
+	.accl_fs = INV_MPU6050_FS_02G,
+};
+
+static const struct inv_mpu6050_hw hw_info[INV_NUM_PARTS] = {
+	{
+		.num_reg = 117,
+		.name = "MPU6050",
+		.reg = &reg_set_6050,
+		.config = &chip_config_6050,
+	},
+};
+
+int inv_mpu6050_write_reg(struct inv_mpu6050_state *st, int reg, u8 d)
+{
+	return i2c_smbus_write_i2c_block_data(st->client, reg, 1, &d);
+}
+
+int inv_mpu6050_switch_engine(struct inv_mpu6050_state *st, bool en, u32 mask)
+{
+	u8 d, mgmt_1;
+	int result;
+
+	/* switch clock needs to be careful. Only when gyro is on, can
+	   clock source be switched to gyro. Otherwise, it must be set to
+	   internal clock */
+	if (INV_MPU6050_BIT_PWR_GYRO_STBY == mask) {
+		result = i2c_smbus_read_i2c_block_data(st->client,
+				       st->reg->pwr_mgmt_1, 1, &mgmt_1);
+		if (result != 1)
+			return result;
+
+		mgmt_1 &= ~INV_MPU6050_BIT_CLK_MASK;
+	}
+
+	if ((INV_MPU6050_BIT_PWR_GYRO_STBY == mask) && (!en)) {
+		/* turning off gyro requires switch to internal clock first.
+		   Then turn off gyro engine */
+		mgmt_1 |= INV_CLK_INTERNAL;
+		result = inv_mpu6050_write_reg(st, st->reg->pwr_mgmt_1, mgmt_1);
+		if (result)
+			return result;
+	}
+
+	result = i2c_smbus_read_i2c_block_data(st->client,
+				       st->reg->pwr_mgmt_2, 1, &d);
+	if (result != 1)
+		return result;
+	if (en)
+		d &= ~mask;
+	else
+		d |= mask;
+	result = inv_mpu6050_write_reg(st, st->reg->pwr_mgmt_2, d);
+	if (result)
+		return result;
+
+	if (en) {
+		/* Wait for output stablize */
+		msleep(INV_MPU6050_TEMP_UP_TIME);
+		if (INV_MPU6050_BIT_PWR_GYRO_STBY == mask) {
+			/* switch internal clock to PLL */
+			mgmt_1 |= INV_CLK_PLL;
+			result = inv_mpu6050_write_reg(st,
+					st->reg->pwr_mgmt_1, mgmt_1);
+			if (result)
+				return result;
+		}
+	}
+
+	return 0;
+}
+
+int inv_mpu6050_set_power_itg(struct inv_mpu6050_state *st, bool power_on)
+{
+	int result;
+
+	if (power_on)
+		result = inv_mpu6050_write_reg(st, st->reg->pwr_mgmt_1, 0);
+	else
+		result = inv_mpu6050_write_reg(st, st->reg->pwr_mgmt_1,
+						INV_MPU6050_BIT_SLEEP);
+	if (result)
+		return result;
+
+	if (power_on)
+		msleep(INV_MPU6050_REG_UP_TIME);
+
+	return 0;
+}
+
+/**
+ *  inv_mpu6050_init_config() - Initialize hardware, disable FIFO.
+ *
+ *  Initial configuration:
+ *  FSR: ± 2000DPS
+ *  DLPF: 20Hz
+ *  FIFO rate: 50Hz
+ *  Clock source: Gyro PLL
+ */
+static int inv_mpu6050_init_config(struct iio_dev *indio_dev)
+{
+	int result;
+	u8 d;
+	struct inv_mpu6050_state *st = iio_priv(indio_dev);
+
+	result = inv_mpu6050_set_power_itg(st, true);
+	if (result)
+		return result;
+	d = (INV_MPU6050_FSR_2000DPS << INV_MPU6050_GYRO_CONFIG_FSR_SHIFT);
+	result = inv_mpu6050_write_reg(st, st->reg->gyro_config, d);
+	if (result)
+		return result;
+
+	d = INV_MPU6050_FILTER_20HZ;
+	result = inv_mpu6050_write_reg(st, st->reg->lpf, d);
+	if (result)
+		return result;
+
+	d = INV_MPU6050_ONE_K_HZ / INV_MPU6050_INIT_FIFO_RATE - 1;
+	result = inv_mpu6050_write_reg(st, st->reg->sample_rate_div, d);
+	if (result)
+		return result;
+
+	d = (INV_MPU6050_FS_02G << INV_MPU6050_ACCL_CONFIG_FSR_SHIFT);
+	result = inv_mpu6050_write_reg(st, st->reg->accl_config, d);
+	if (result)
+		return result;
+
+	memcpy(&st->chip_config, hw_info[st->chip_type].config,
+		sizeof(struct inv_mpu6050_chip_config));
+	result = inv_mpu6050_set_power_itg(st, false);
+
+	return result;
+}
+
+static int inv_mpu6050_sensor_show(struct inv_mpu6050_state  *st, int reg,
+				int axis, int *val)
+{
+	int ind, result;
+	__be16 d;
+
+	ind = (axis - IIO_MOD_X) * 2;
+	result = i2c_smbus_read_i2c_block_data(st->client, reg + ind,  2,
+						(u8 *)&d);
+	if (result != 2)
+		return -EINVAL;
+	*val = (short)be16_to_cpup(&d);
+
+	return IIO_VAL_INT;
+}
+
+static int inv_mpu6050_read_raw(struct iio_dev *indio_dev,
+			      struct iio_chan_spec const *chan,
+			      int *val,
+			      int *val2,
+			      long mask) {
+	struct inv_mpu6050_state  *st = iio_priv(indio_dev);
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+	{
+		int ret, result;
+
+		ret = IIO_VAL_INT;
+		result = 0;
+		mutex_lock(&indio_dev->mlock);
+		if (!st->chip_config.enable) {
+			result = inv_mpu6050_set_power_itg(st, true);
+			if (result)
+				goto error_read_raw;
+		}
+		/* when enable is on, power is already on */
+		switch (chan->type) {
+		case IIO_ANGL_VEL:
+			if (!st->chip_config.gyro_fifo_enable ||
+					!st->chip_config.enable) {
+				result = inv_mpu6050_switch_engine(st, true,
+						INV_MPU6050_BIT_PWR_GYRO_STBY);
+				if (result)
+					goto error_read_raw;
+			}
+			ret =  inv_mpu6050_sensor_show(st, st->reg->raw_gyro,
+						chan->channel2, val);
+			if (!st->chip_config.gyro_fifo_enable ||
+					!st->chip_config.enable) {
+				result = inv_mpu6050_switch_engine(st, false,
+						INV_MPU6050_BIT_PWR_GYRO_STBY);
+				if (result)
+					goto error_read_raw;
+			}
+			break;
+		case IIO_ACCEL:
+			if (!st->chip_config.accl_fifo_enable ||
+					!st->chip_config.enable) {
+				result = inv_mpu6050_switch_engine(st, true,
+						INV_MPU6050_BIT_PWR_ACCL_STBY);
+				if (result)
+					goto error_read_raw;
+			}
+			ret = inv_mpu6050_sensor_show(st, st->reg->raw_accl,
+						chan->channel2, val);
+			if (!st->chip_config.accl_fifo_enable ||
+					!st->chip_config.enable) {
+				result = inv_mpu6050_switch_engine(st, false,
+						INV_MPU6050_BIT_PWR_ACCL_STBY);
+				if (result)
+					goto error_read_raw;
+			}
+			break;
+		case IIO_TEMP:
+			/* wait for stablization */
+			msleep(INV_MPU6050_SENSOR_UP_TIME);
+			inv_mpu6050_sensor_show(st, st->reg->temperature,
+							IIO_MOD_X, val);
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+error_read_raw:
+		if (!st->chip_config.enable)
+			result |= inv_mpu6050_set_power_itg(st, false);
+		mutex_unlock(&indio_dev->mlock);
+		if (result)
+			return result;
+
+		return ret;
+	}
+	case IIO_CHAN_INFO_SCALE:
+		switch (chan->type) {
+		case IIO_ANGL_VEL:
+			*val  = 0;
+			*val2 = gyro_scale_6050[st->chip_config.fsr];
+
+			return IIO_VAL_INT_PLUS_NANO;
+		case IIO_ACCEL:
+			*val = 0;
+			*val2 = accel_scale[st->chip_config.accl_fs];
+
+			return IIO_VAL_INT_PLUS_MICRO;
+		case IIO_TEMP:
+			*val = 0;
+			*val2 = INV_MPU6050_TEMP_SCALE;
+
+			return IIO_VAL_INT_PLUS_MICRO;
+		default:
+			return -EINVAL;
+		}
+	case IIO_CHAN_INFO_OFFSET:
+		switch (chan->type) {
+		case IIO_TEMP:
+			*val = INV_MPU6050_TEMP_OFFSET;
+
+			return IIO_VAL_INT;
+		default:
+			return -EINVAL;
+		}
+	default:
+		return -EINVAL;
+	}
+}
+
+static int inv_mpu6050_write_fsr(struct inv_mpu6050_state *st, int fsr)
+{
+	int result;
+	u8 d;
+
+	if (fsr < 0 || fsr > INV_MPU6050_MAX_GYRO_FS_PARAM)
+		return -EINVAL;
+	if (fsr == st->chip_config.fsr)
+		return 0;
+
+	d = (fsr << INV_MPU6050_GYRO_CONFIG_FSR_SHIFT);
+	result = inv_mpu6050_write_reg(st, st->reg->gyro_config, d);
+	if (result)
+		return result;
+	st->chip_config.fsr = fsr;
+
+	return 0;
+}
+
+static int inv_mpu6050_write_accel_fs(struct inv_mpu6050_state *st, int fs)
+{
+	int result;
+	u8 d;
+
+	if (fs < 0 || fs > INV_MPU6050_MAX_ACCL_FS_PARAM)
+		return -EINVAL;
+	if (fs == st->chip_config.accl_fs)
+		return 0;
+
+	d = (fs << INV_MPU6050_ACCL_CONFIG_FSR_SHIFT);
+	result = inv_mpu6050_write_reg(st, st->reg->accl_config, d);
+	if (result)
+		return result;
+	st->chip_config.accl_fs = fs;
+
+	return 0;
+}
+
+static int inv_mpu6050_write_raw(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan,
+			       int val,
+			       int val2,
+			       long mask) {
+	struct inv_mpu6050_state  *st = iio_priv(indio_dev);
+	int result;
+
+	mutex_lock(&indio_dev->mlock);
+	/* we should only update scale when the chip is disabled, i.e.,
+		not running */
+	if (st->chip_config.enable) {
+		result = -EBUSY;
+		goto error_write_raw;
+	}
+	result = inv_mpu6050_set_power_itg(st, true);
+	if (result)
+		goto error_write_raw;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SCALE:
+		switch (chan->type) {
+		case IIO_ANGL_VEL:
+			result = inv_mpu6050_write_fsr(st, val);
+			break;
+		case IIO_ACCEL:
+			result = inv_mpu6050_write_accel_fs(st, val);
+			break;
+		default:
+			result = -EINVAL;
+			break;
+		}
+		break;
+	default:
+		result = -EINVAL;
+		break;
+	}
+
+error_write_raw:
+	result |= inv_mpu6050_set_power_itg(st, false);
+	mutex_unlock(&indio_dev->mlock);
+
+	return result;
+}
+
+/**
+ *  inv_mpu6050_set_lpf() - set low pass filer based on fifo rate.
+ *
+ *                  Based on the Nyquist principle, the sampling rate must
+ *                  exceed twice of the bandwidth of the signal, or there
+ *                  would be alising. This function basically search for the
+ *                  correct low pass parameters based on the fifo rate, e.g,
+ *                  sampling frequency.
+ */
+static int inv_mpu6050_set_lpf(struct inv_mpu6050_state *st, int rate)
+{
+	const int hz[] = {188, 98, 42, 20, 10, 5};
+	const int d[] = {INV_MPU6050_FILTER_188HZ, INV_MPU6050_FILTER_98HZ,
+			INV_MPU6050_FILTER_42HZ, INV_MPU6050_FILTER_20HZ,
+			INV_MPU6050_FILTER_10HZ, INV_MPU6050_FILTER_5HZ};
+	int i, h, result;
+	u8 data;
+
+	h = (rate >> 1);
+	i = 0;
+	while ((h < hz[i]) && (i < ARRAY_SIZE(d) - 1))
+		i++;
+	data = d[i];
+	result = inv_mpu6050_write_reg(st, st->reg->lpf, data);
+	if (result)
+		return result;
+	st->chip_config.lpf = data;
+
+	return 0;
+}
+
+/**
+ * inv_mpu6050_fifo_rate_store() - Set fifo rate.
+ */
+static ssize_t inv_mpu6050_fifo_rate_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	s32 fifo_rate;
+	u8 d;
+	int result;
+	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct inv_mpu6050_state *st = iio_priv(indio_dev);
+
+	if (kstrtoint(buf, 10, &fifo_rate))
+		return -EINVAL;
+	if (fifo_rate < INV_MPU6050_MIN_FIFO_RATE ||
+				fifo_rate > INV_MPU6050_MAX_FIFO_RATE)
+		return -EINVAL;
+	if (fifo_rate == st->chip_config.fifo_rate)
+		return count;
+
+	mutex_lock(&indio_dev->mlock);
+	if (st->chip_config.enable) {
+		result = -EBUSY;
+		goto fifo_rate_fail;
+	}
+	result = inv_mpu6050_set_power_itg(st, true);
+	if (result)
+		goto fifo_rate_fail;
+
+	d = INV_MPU6050_ONE_K_HZ / fifo_rate - 1;
+	result = inv_mpu6050_write_reg(st, st->reg->sample_rate_div, d);
+	if (result)
+		goto fifo_rate_fail;
+	st->chip_config.fifo_rate = fifo_rate;
+
+	result = inv_mpu6050_set_lpf(st, fifo_rate);
+	if (result)
+		goto fifo_rate_fail;
+
+fifo_rate_fail:
+	result |= inv_mpu6050_set_power_itg(st, false);
+	mutex_unlock(&indio_dev->mlock);
+	if (result)
+		return result;
+
+	return count;
+}
+
+/**
+ * inv_fifo_rate_show() - Get the current sampling rate.
+ */
+static ssize_t inv_fifo_rate_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct inv_mpu6050_state *st = iio_priv(dev_to_iio_dev(dev));
+
+	return sprintf(buf, "%d\n", st->chip_config.fifo_rate);
+}
+
+/**
+ * inv_attr_show() - calling this function will show current
+ *                    parameters.
+ */
+static ssize_t inv_attr_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct inv_mpu6050_state *st = iio_priv(dev_to_iio_dev(dev));
+	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
+	s8 *m;
+
+	switch (this_attr->address) {
+	/* In MPU6050, the two matrix are the same because gyro and accel
+	   are integrated in one chip */
+	case ATTR_GYRO_MATRIX:
+	case ATTR_ACCL_MATRIX:
+		m = st->plat_data.orientation;
+
+		return sprintf(buf, "%d, %d, %d; %d, %d, %d; %d, %d, %d\n",
+			m[0], m[1], m[2], m[3], m[4], m[5], m[6], m[7], m[8]);
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * inv_mpu6050_validate_trigger() - validate_trigger callback for invensense
+ *                                  MPU6050 device.
+ * @indio_dev: The IIO device
+ * @trig: The new trigger
+ *
+ * Returns: 0 if the 'trig' matches the trigger registered by the MPU6050
+ * device, -EINVAL otherwise.
+ */
+static int inv_mpu6050_validate_trigger(struct iio_dev *indio_dev,
+					struct iio_trigger *trig)
+{
+	struct inv_mpu6050_state *st = iio_priv(indio_dev);
+
+	if (st->trig != trig)
+		return -EINVAL;
+
+	return 0;
+}
+
+#define INV_MPU6050_CHAN(_type, _channel2, _index)                    \
+	{                                                             \
+		.type = _type,                                        \
+		.modified = 1,                                        \
+		.channel2 = _channel2,                                \
+		.info_mask =  IIO_CHAN_INFO_SCALE_SHARED_BIT          \
+				| IIO_CHAN_INFO_RAW_SEPARATE_BIT,     \
+		.scan_index = _index,                                 \
+		.scan_type = {                                        \
+				.sign = 's',                          \
+				.realbits = 16,                       \
+				.storagebits = 16,                    \
+				.shift = 0 ,                          \
+				.endianness = IIO_BE,                 \
+			     },                                       \
+	}
+
+static const struct iio_chan_spec inv_mpu_channels[] = {
+	IIO_CHAN_SOFT_TIMESTAMP(INV_MPU6050_SCAN_TIMESTAMP),
+	/*
+	 * Note that temperature should only be via polled reading only,
+	 * not the final scan elements output.
+	 */
+	{
+		.type = IIO_TEMP,
+		.info_mask =  IIO_CHAN_INFO_RAW_SEPARATE_BIT
+				| IIO_CHAN_INFO_OFFSET_SEPARATE_BIT
+				| IIO_CHAN_INFO_SCALE_SEPARATE_BIT,
+		.scan_index = -1,
+	},
+	INV_MPU6050_CHAN(IIO_ANGL_VEL, IIO_MOD_X, INV_MPU6050_SCAN_GYRO_X),
+	INV_MPU6050_CHAN(IIO_ANGL_VEL, IIO_MOD_Y, INV_MPU6050_SCAN_GYRO_Y),
+	INV_MPU6050_CHAN(IIO_ANGL_VEL, IIO_MOD_Z, INV_MPU6050_SCAN_GYRO_Z),
+
+	INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_X, INV_MPU6050_SCAN_ACCL_X),
+	INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_Y, INV_MPU6050_SCAN_ACCL_Y),
+	INV_MPU6050_CHAN(IIO_ACCEL, IIO_MOD_Z, INV_MPU6050_SCAN_ACCL_Z),
+};
+
+/* constant IIO attribute */
+static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("10 20 50 100 200 500");
+static IIO_DEV_ATTR_SAMP_FREQ(S_IRUGO | S_IWUSR, inv_fifo_rate_show,
+	inv_mpu6050_fifo_rate_store);
+static IIO_DEVICE_ATTR(in_gyro_matrix, S_IRUGO, inv_attr_show, NULL,
+	ATTR_GYRO_MATRIX);
+static IIO_DEVICE_ATTR(in_accel_matrix, S_IRUGO, inv_attr_show, NULL,
+	ATTR_ACCL_MATRIX);
+
+static struct attribute *inv_attributes[] = {
+	&iio_dev_attr_in_gyro_matrix.dev_attr.attr,
+	&iio_dev_attr_in_accel_matrix.dev_attr.attr,
+	&iio_dev_attr_sampling_frequency.dev_attr.attr,
+	&iio_const_attr_sampling_frequency_available.dev_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group inv_attribute_group = {
+	.attrs = inv_attributes
+};
+
+static const struct iio_info mpu_info = {
+	.driver_module = THIS_MODULE,
+	.read_raw = &inv_mpu6050_read_raw,
+	.write_raw = &inv_mpu6050_write_raw,
+	.attrs = &inv_attribute_group,
+	.validate_trigger = inv_mpu6050_validate_trigger,
+};
+
+/**
+ *  inv_check_and_setup_chip() - check and setup chip.
+ */
+static int inv_check_and_setup_chip(struct inv_mpu6050_state *st,
+		const struct i2c_device_id *id)
+{
+	int result;
+
+	st->chip_type = INV_MPU6050;
+	st->hw  = &hw_info[st->chip_type];
+	st->reg = hw_info[st->chip_type].reg;
+
+	/* reset to make sure previous state are not there */
+	result = inv_mpu6050_write_reg(st, st->reg->pwr_mgmt_1,
+					INV_MPU6050_BIT_H_RESET);
+	if (result)
+		return result;
+	msleep(INV_MPU6050_POWER_UP_TIME);
+	/* toggle power state. After reset, the sleep bit could be on
+		or off depending on the OTP settings. Toggling power would
+		make it in a definite state as well as making the hardware
+		state align with the software state */
+	result = inv_mpu6050_set_power_itg(st, false);
+	if (result)
+		return result;
+	result = inv_mpu6050_set_power_itg(st, true);
+	if (result)
+		return result;
+
+	result = inv_mpu6050_switch_engine(st, false,
+					INV_MPU6050_BIT_PWR_ACCL_STBY);
+	if (result)
+		return result;
+	result = inv_mpu6050_switch_engine(st, false,
+					INV_MPU6050_BIT_PWR_GYRO_STBY);
+	if (result)
+		return result;
+
+	return 0;
+}
+
+/**
+ *  inv_mpu_probe() - probe function.
+ *  @client:          i2c client.
+ *  @id:              i2c device id.
+ *
+ *  Returns 0 on success, a negative error code otherwise.
+ */
+static int inv_mpu_probe(struct i2c_client *client,
+	const struct i2c_device_id *id)
+{
+	struct inv_mpu6050_state *st;
+	struct iio_dev *indio_dev;
+	int result;
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_READ_I2C_BLOCK |
+					I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) {
+		result = -ENOSYS;
+		goto out_no_free;
+	}
+	indio_dev = iio_device_alloc(sizeof(*st));
+	if (indio_dev == NULL) {
+		result =  -ENOMEM;
+		goto out_no_free;
+	}
+	st = iio_priv(indio_dev);
+	st->client = client;
+	st->plat_data = *(struct inv_mpu6050_platform_data
+				*)dev_get_platdata(&client->dev);
+	/* power is turned on inside check chip type*/
+	result = inv_check_and_setup_chip(st, id);
+	if (result)
+		goto out_free;
+
+	result = inv_mpu6050_init_config(indio_dev);
+	if (result) {
+		dev_err(&client->dev,
+			"Could not initialize device.\n");
+		goto out_free;
+	}
+
+	i2c_set_clientdata(client, indio_dev);
+	indio_dev->dev.parent = &client->dev;
+	indio_dev->name = id->name;
+	indio_dev->channels = inv_mpu_channels;
+	indio_dev->num_channels = ARRAY_SIZE(inv_mpu_channels);
+
+	indio_dev->info = &mpu_info;
+	indio_dev->modes = INDIO_BUFFER_TRIGGERED;
+
+	result = iio_triggered_buffer_setup(indio_dev,
+					    inv_mpu6050_irq_handler,
+					    inv_mpu6050_read_fifo,
+					    NULL);
+	if (result) {
+		dev_err(&st->client->dev, "configure buffer fail %d\n",
+				result);
+		goto out_free;
+	}
+	result = inv_mpu6050_probe_trigger(indio_dev);
+	if (result) {
+		dev_err(&st->client->dev, "trigger probe fail %d\n", result);
+		goto out_unreg_ring;
+	}
+
+	INIT_KFIFO(st->timestamps);
+	spin_lock_init(&st->time_stamp_lock);
+	result = iio_device_register(indio_dev);
+	if (result) {
+		dev_err(&st->client->dev, "IIO register fail %d\n", result);
+		goto out_remove_trigger;
+	}
+
+	return 0;
+
+out_remove_trigger:
+	inv_mpu6050_remove_trigger(st);
+out_unreg_ring:
+	iio_triggered_buffer_cleanup(indio_dev);
+out_free:
+	iio_device_free(indio_dev);
+out_no_free:
+
+	return result;
+}
+
+static int inv_mpu_remove(struct i2c_client *client)
+{
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct inv_mpu6050_state *st = iio_priv(indio_dev);
+
+	iio_device_unregister(indio_dev);
+	inv_mpu6050_remove_trigger(st);
+	iio_triggered_buffer_cleanup(indio_dev);
+	iio_device_free(indio_dev);
+
+	return 0;
+}
+#ifdef CONFIG_PM_SLEEP
+
+static int inv_mpu_resume(struct device *dev)
+{
+	return inv_mpu6050_set_power_itg(
+		iio_priv(i2c_get_clientdata(to_i2c_client(dev))), true);
+}
+
+static int inv_mpu_suspend(struct device *dev)
+{
+	return inv_mpu6050_set_power_itg(
+		iio_priv(i2c_get_clientdata(to_i2c_client(dev))), false);
+}
+static SIMPLE_DEV_PM_OPS(inv_mpu_pmops, inv_mpu_suspend, inv_mpu_resume);
+
+#define INV_MPU6050_PMOPS (&inv_mpu_pmops)
+#else
+#define INV_MPU6050_PMOPS NULL
+#endif /* CONFIG_PM_SLEEP */
+
+/*
+ * device id table is used to identify what device can be
+ * supported by this driver
+ */
+static const struct i2c_device_id inv_mpu_id[] = {
+	{"mpu6050", INV_MPU6050},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, inv_mpu_id);
+
+static struct i2c_driver inv_mpu_driver = {
+	.probe		=	inv_mpu_probe,
+	.remove		=	inv_mpu_remove,
+	.id_table	=	inv_mpu_id,
+	.driver = {
+		.owner	=	THIS_MODULE,
+		.name	=	"inv-mpu6050",
+		.pm     =       INV_MPU6050_PMOPS,
+	},
+};
+
+module_i2c_driver(inv_mpu_driver);
+
+MODULE_AUTHOR("Invensense Corporation");
+MODULE_DESCRIPTION("Invensense device MPU6050 driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
new file mode 100644
index 000000000000..f38395529a44
--- /dev/null
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
@@ -0,0 +1,246 @@
+/*
+* Copyright (C) 2012 Invensense, Inc.
+*
+* This software is licensed under the terms of the GNU General Public
+* License version 2, as published by the Free Software Foundation, and
+* may be copied, distributed, and modified under those terms.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+#include <linux/i2c.h>
+#include <linux/kfifo.h>
+#include <linux/spinlock.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/kfifo_buf.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/platform_data/invensense_mpu6050.h>
+
+/**
+ *  struct inv_mpu6050_reg_map - Notable registers.
+ *  @sample_rate_div:	Divider applied to gyro output rate.
+ *  @lpf:		Configures internal low pass filter.
+ *  @user_ctrl:		Enables/resets the FIFO.
+ *  @fifo_en:		Determines which data will appear in FIFO.
+ *  @gyro_config:	gyro config register.
+ *  @accl_config:	accel config register
+ *  @fifo_count_h:	Upper byte of FIFO count.
+ *  @fifo_r_w:		FIFO register.
+ *  @raw_gyro:		Address of first gyro register.
+ *  @raw_accl:		Address of first accel register.
+ *  @temperature:	temperature register
+ *  @int_enable:	Interrupt enable register.
+ *  @pwr_mgmt_1:	Controls chip's power state and clock source.
+ *  @pwr_mgmt_2:	Controls power state of individual sensors.
+ */
+struct inv_mpu6050_reg_map {
+	u8 sample_rate_div;
+	u8 lpf;
+	u8 user_ctrl;
+	u8 fifo_en;
+	u8 gyro_config;
+	u8 accl_config;
+	u8 fifo_count_h;
+	u8 fifo_r_w;
+	u8 raw_gyro;
+	u8 raw_accl;
+	u8 temperature;
+	u8 int_enable;
+	u8 pwr_mgmt_1;
+	u8 pwr_mgmt_2;
+};
+
+/*device enum */
+enum inv_devices {
+	INV_MPU6050,
+	INV_NUM_PARTS
+};
+
+/**
+ *  struct inv_mpu6050_chip_config - Cached chip configuration data.
+ *  @fsr:		Full scale range.
+ *  @lpf:		Digital low pass filter frequency.
+ *  @accl_fs:		accel full scale range.
+ *  @enable:		master enable state.
+ *  @accl_fifo_enable:	enable accel data output
+ *  @gyro_fifo_enable:	enable gyro data output
+ *  @fifo_rate:		FIFO update rate.
+ */
+struct inv_mpu6050_chip_config {
+	unsigned int fsr:2;
+	unsigned int lpf:3;
+	unsigned int accl_fs:2;
+	unsigned int enable:1;
+	unsigned int accl_fifo_enable:1;
+	unsigned int gyro_fifo_enable:1;
+	u16 fifo_rate;
+};
+
+/**
+ *  struct inv_mpu6050_hw - Other important hardware information.
+ *  @num_reg:	Number of registers on device.
+ *  @name:      name of the chip.
+ *  @reg:   register map of the chip.
+ *  @config:    configuration of the chip.
+ */
+struct inv_mpu6050_hw {
+	u8 num_reg;
+	u8 *name;
+	const struct inv_mpu6050_reg_map *reg;
+	const struct inv_mpu6050_chip_config *config;
+};
+
+/*
+ *  struct inv_mpu6050_state - Driver state variables.
+ *  @TIMESTAMP_FIFO_SIZE: fifo size for timestamp.
+ *  @trig:              IIO trigger.
+ *  @chip_config:	Cached attribute information.
+ *  @reg:		Map of important registers.
+ *  @hw:		Other hardware-specific information.
+ *  @chip_type:		chip type.
+ *  @time_stamp_lock:	spin lock to time stamp.
+ *  @client:		i2c client handle.
+ *  @plat_data:		platform data.
+ *  @timestamps:        kfifo queue to store time stamp.
+ */
+struct inv_mpu6050_state {
+#define TIMESTAMP_FIFO_SIZE 16
+	struct iio_trigger  *trig;
+	struct inv_mpu6050_chip_config chip_config;
+	const struct inv_mpu6050_reg_map *reg;
+	const struct inv_mpu6050_hw *hw;
+	enum   inv_devices chip_type;
+	spinlock_t time_stamp_lock;
+	struct i2c_client *client;
+	struct inv_mpu6050_platform_data plat_data;
+	DECLARE_KFIFO(timestamps, long long, TIMESTAMP_FIFO_SIZE);
+};
+
+/*register and associated bit definition*/
+#define INV_MPU6050_REG_SAMPLE_RATE_DIV     0x19
+#define INV_MPU6050_REG_CONFIG              0x1A
+#define INV_MPU6050_REG_GYRO_CONFIG         0x1B
+#define INV_MPU6050_REG_ACCEL_CONFIG	    0x1C
+
+#define INV_MPU6050_REG_FIFO_EN             0x23
+#define INV_MPU6050_BIT_ACCEL_OUT                   0x08
+#define INV_MPU6050_BITS_GYRO_OUT                   0x70
+
+#define INV_MPU6050_REG_INT_ENABLE          0x38
+#define INV_MPU6050_BIT_DATA_RDY_EN                 0x01
+#define INV_MPU6050_BIT_DMP_INT_EN                  0x02
+
+#define INV_MPU6050_REG_RAW_ACCEL           0x3B
+#define INV_MPU6050_REG_TEMPERATURE         0x41
+#define INV_MPU6050_REG_RAW_GYRO            0x43
+
+#define INV_MPU6050_REG_USER_CTRL           0x6A
+#define INV_MPU6050_BIT_FIFO_RST                    0x04
+#define INV_MPU6050_BIT_DMP_RST                     0x08
+#define INV_MPU6050_BIT_I2C_MST_EN                  0x20
+#define INV_MPU6050_BIT_FIFO_EN                     0x40
+#define INV_MPU6050_BIT_DMP_EN                      0x80
+
+#define INV_MPU6050_REG_PWR_MGMT_1          0x6B
+#define INV_MPU6050_BIT_H_RESET                     0x80
+#define INV_MPU6050_BIT_SLEEP                       0x40
+#define INV_MPU6050_BIT_CLK_MASK                    0x7
+
+#define INV_MPU6050_REG_PWR_MGMT_2          0x6C
+#define INV_MPU6050_BIT_PWR_ACCL_STBY               0x38
+#define INV_MPU6050_BIT_PWR_GYRO_STBY               0x07
+
+#define INV_MPU6050_REG_FIFO_COUNT_H        0x72
+#define INV_MPU6050_REG_FIFO_R_W            0x74
+
+#define INV_MPU6050_BYTES_PER_3AXIS_SENSOR   6
+#define INV_MPU6050_FIFO_COUNT_BYTE          2
+#define INV_MPU6050_FIFO_THRESHOLD           500
+#define INV_MPU6050_POWER_UP_TIME            100
+#define INV_MPU6050_TEMP_UP_TIME             100
+#define INV_MPU6050_SENSOR_UP_TIME           30
+#define INV_MPU6050_REG_UP_TIME              5
+
+#define INV_MPU6050_TEMP_OFFSET	             12421
+#define INV_MPU6050_TEMP_SCALE               2941
+#define INV_MPU6050_MAX_GYRO_FS_PARAM        3
+#define INV_MPU6050_MAX_ACCL_FS_PARAM        3
+#define INV_MPU6050_THREE_AXIS               3
+#define INV_MPU6050_GYRO_CONFIG_FSR_SHIFT    3
+#define INV_MPU6050_ACCL_CONFIG_FSR_SHIFT    3
+
+/* 6 + 6 round up and plus 8 */
+#define INV_MPU6050_OUTPUT_DATA_SIZE         24
+
+/* init parameters */
+#define INV_MPU6050_INIT_FIFO_RATE           50
+#define INV_MPU6050_TIME_STAMP_TOR                        5
+#define INV_MPU6050_MAX_FIFO_RATE                         1000
+#define INV_MPU6050_MIN_FIFO_RATE                         4
+#define INV_MPU6050_ONE_K_HZ                              1000
+
+/* scan element definition */
+enum inv_mpu6050_scan {
+	INV_MPU6050_SCAN_ACCL_X,
+	INV_MPU6050_SCAN_ACCL_Y,
+	INV_MPU6050_SCAN_ACCL_Z,
+	INV_MPU6050_SCAN_GYRO_X,
+	INV_MPU6050_SCAN_GYRO_Y,
+	INV_MPU6050_SCAN_GYRO_Z,
+	INV_MPU6050_SCAN_TIMESTAMP,
+};
+
+enum inv_mpu6050_filter_e {
+	INV_MPU6050_FILTER_256HZ_NOLPF2 = 0,
+	INV_MPU6050_FILTER_188HZ,
+	INV_MPU6050_FILTER_98HZ,
+	INV_MPU6050_FILTER_42HZ,
+	INV_MPU6050_FILTER_20HZ,
+	INV_MPU6050_FILTER_10HZ,
+	INV_MPU6050_FILTER_5HZ,
+	INV_MPU6050_FILTER_2100HZ_NOLPF,
+	NUM_MPU6050_FILTER
+};
+
+/* IIO attribute address */
+enum INV_MPU6050_IIO_ATTR_ADDR {
+	ATTR_GYRO_MATRIX,
+	ATTR_ACCL_MATRIX,
+};
+
+enum inv_mpu6050_accl_fs_e {
+	INV_MPU6050_FS_02G = 0,
+	INV_MPU6050_FS_04G,
+	INV_MPU6050_FS_08G,
+	INV_MPU6050_FS_16G,
+	NUM_ACCL_FSR
+};
+
+enum inv_mpu6050_fsr_e {
+	INV_MPU6050_FSR_250DPS = 0,
+	INV_MPU6050_FSR_500DPS,
+	INV_MPU6050_FSR_1000DPS,
+	INV_MPU6050_FSR_2000DPS,
+	NUM_MPU6050_FSR
+};
+
+enum inv_mpu6050_clock_sel_e {
+	INV_CLK_INTERNAL = 0,
+	INV_CLK_PLL,
+	NUM_CLK
+};
+
+irqreturn_t inv_mpu6050_irq_handler(int irq, void *p);
+irqreturn_t inv_mpu6050_read_fifo(int irq, void *p);
+int inv_mpu6050_probe_trigger(struct iio_dev *indio_dev);
+void inv_mpu6050_remove_trigger(struct inv_mpu6050_state *st);
+int inv_reset_fifo(struct iio_dev *indio_dev);
+int inv_mpu6050_switch_engine(struct inv_mpu6050_state *st, bool en, u32 mask);
+int inv_mpu6050_write_reg(struct inv_mpu6050_state *st, int reg, u8 val);
+int inv_mpu6050_set_power_itg(struct inv_mpu6050_state *st, bool power_on);
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
new file mode 100644
index 000000000000..331781ffbb15
--- /dev/null
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
@@ -0,0 +1,196 @@
+/*
+* Copyright (C) 2012 Invensense, Inc.
+*
+* This software is licensed under the terms of the GNU General Public
+* License version 2, as published by the Free Software Foundation, and
+* may be copied, distributed, and modified under those terms.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/sysfs.h>
+#include <linux/jiffies.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/kfifo.h>
+#include <linux/poll.h>
+#include "inv_mpu_iio.h"
+
+int inv_reset_fifo(struct iio_dev *indio_dev)
+{
+	int result;
+	u8 d;
+	struct inv_mpu6050_state  *st = iio_priv(indio_dev);
+
+	/* disable interrupt */
+	result = inv_mpu6050_write_reg(st, st->reg->int_enable, 0);
+	if (result) {
+		dev_err(&st->client->dev, "int_enable failed %d\n", result);
+		return result;
+	}
+	/* disable the sensor output to FIFO */
+	result = inv_mpu6050_write_reg(st, st->reg->fifo_en, 0);
+	if (result)
+		goto reset_fifo_fail;
+	/* disable fifo reading */
+	result = inv_mpu6050_write_reg(st, st->reg->user_ctrl, 0);
+	if (result)
+		goto reset_fifo_fail;
+
+	/* reset FIFO*/
+	result = inv_mpu6050_write_reg(st, st->reg->user_ctrl,
+					INV_MPU6050_BIT_FIFO_RST);
+	if (result)
+		goto reset_fifo_fail;
+	/* enable interrupt */
+	if (st->chip_config.accl_fifo_enable ||
+	    st->chip_config.gyro_fifo_enable) {
+		result = inv_mpu6050_write_reg(st, st->reg->int_enable,
+					INV_MPU6050_BIT_DATA_RDY_EN);
+		if (result)
+			return result;
+	}
+	/* enable FIFO reading and I2C master interface*/
+	result = inv_mpu6050_write_reg(st, st->reg->user_ctrl,
+					INV_MPU6050_BIT_FIFO_EN);
+	if (result)
+		goto reset_fifo_fail;
+	/* enable sensor output to FIFO */
+	d = 0;
+	if (st->chip_config.gyro_fifo_enable)
+		d |= INV_MPU6050_BITS_GYRO_OUT;
+	if (st->chip_config.accl_fifo_enable)
+		d |= INV_MPU6050_BIT_ACCEL_OUT;
+	result = inv_mpu6050_write_reg(st, st->reg->fifo_en, d);
+	if (result)
+		goto reset_fifo_fail;
+
+	return 0;
+
+reset_fifo_fail:
+	dev_err(&st->client->dev, "reset fifo failed %d\n", result);
+	result = inv_mpu6050_write_reg(st, st->reg->int_enable,
+					INV_MPU6050_BIT_DATA_RDY_EN);
+
+	return result;
+}
+
+static void inv_clear_kfifo(struct inv_mpu6050_state *st)
+{
+	unsigned long flags;
+
+	/* take the spin lock sem to avoid interrupt kick in */
+	spin_lock_irqsave(&st->time_stamp_lock, flags);
+	kfifo_reset(&st->timestamps);
+	spin_unlock_irqrestore(&st->time_stamp_lock, flags);
+}
+
+/**
+ * inv_mpu6050_irq_handler() - Cache a timestamp at each data ready interrupt.
+ */
+irqreturn_t inv_mpu6050_irq_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct inv_mpu6050_state *st = iio_priv(indio_dev);
+	s64 timestamp;
+
+	timestamp = iio_get_time_ns();
+	spin_lock(&st->time_stamp_lock);
+	kfifo_in(&st->timestamps, &timestamp, 1);
+	spin_unlock(&st->time_stamp_lock);
+
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * inv_mpu6050_read_fifo() - Transfer data from hardware FIFO to KFIFO.
+ */
+irqreturn_t inv_mpu6050_read_fifo(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct inv_mpu6050_state *st = iio_priv(indio_dev);
+	size_t bytes_per_datum;
+	int result;
+	u8 data[INV_MPU6050_OUTPUT_DATA_SIZE];
+	u16 fifo_count;
+	s64 timestamp;
+	u64 *tmp;
+
+	mutex_lock(&indio_dev->mlock);
+	if (!(st->chip_config.accl_fifo_enable |
+		st->chip_config.gyro_fifo_enable))
+		goto end_session;
+	bytes_per_datum = 0;
+	if (st->chip_config.accl_fifo_enable)
+		bytes_per_datum += INV_MPU6050_BYTES_PER_3AXIS_SENSOR;
+
+	if (st->chip_config.gyro_fifo_enable)
+		bytes_per_datum += INV_MPU6050_BYTES_PER_3AXIS_SENSOR;
+
+	/*
+	 * read fifo_count register to know how many bytes inside FIFO
+	 * right now
+	 */
+	result = i2c_smbus_read_i2c_block_data(st->client,
+				       st->reg->fifo_count_h,
+				       INV_MPU6050_FIFO_COUNT_BYTE, data);
+	if (result != INV_MPU6050_FIFO_COUNT_BYTE)
+		goto end_session;
+	fifo_count = be16_to_cpup((__be16 *)(&data[0]));
+	if (fifo_count < bytes_per_datum)
+		goto end_session;
+	/* fifo count can't be odd number, if it is odd, reset fifo*/
+	if (fifo_count & 1)
+		goto flush_fifo;
+	if (fifo_count >  INV_MPU6050_FIFO_THRESHOLD)
+		goto flush_fifo;
+	/* Timestamp mismatch. */
+	if (kfifo_len(&st->timestamps) >
+		fifo_count / bytes_per_datum + INV_MPU6050_TIME_STAMP_TOR)
+			goto flush_fifo;
+	while (fifo_count >= bytes_per_datum) {
+		result = i2c_smbus_read_i2c_block_data(st->client,
+						       st->reg->fifo_r_w,
+						       bytes_per_datum, data);
+		if (result != bytes_per_datum)
+			goto flush_fifo;
+
+		result = kfifo_out(&st->timestamps, &timestamp, 1);
+		/* when there is no timestamp, put timestamp as 0 */
+		if (0 == result)
+			timestamp = 0;
+
+		tmp = (u64 *)data;
+		tmp[DIV_ROUND_UP(bytes_per_datum, 8)] = timestamp;
+		result = iio_push_to_buffers(indio_dev, data);
+		if (result)
+			goto flush_fifo;
+		fifo_count -= bytes_per_datum;
+	}
+
+end_session:
+	mutex_unlock(&indio_dev->mlock);
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+
+flush_fifo:
+	/* Flush HW and SW FIFOs. */
+	inv_reset_fifo(indio_dev);
+	inv_clear_kfifo(st);
+	mutex_unlock(&indio_dev->mlock);
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
new file mode 100644
index 000000000000..e1d0869e0ad1
--- /dev/null
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c
@@ -0,0 +1,155 @@
+/*
+* Copyright (C) 2012 Invensense, Inc.
+*
+* This software is licensed under the terms of the GNU General Public
+* License version 2, as published by the Free Software Foundation, and
+* may be copied, distributed, and modified under those terms.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#include "inv_mpu_iio.h"
+
+static void inv_scan_query(struct iio_dev *indio_dev)
+{
+	struct inv_mpu6050_state  *st = iio_priv(indio_dev);
+
+	st->chip_config.gyro_fifo_enable =
+		test_bit(INV_MPU6050_SCAN_GYRO_X,
+			indio_dev->active_scan_mask) ||
+			test_bit(INV_MPU6050_SCAN_GYRO_Y,
+			indio_dev->active_scan_mask) ||
+			test_bit(INV_MPU6050_SCAN_GYRO_Z,
+			indio_dev->active_scan_mask);
+
+	st->chip_config.accl_fifo_enable =
+		test_bit(INV_MPU6050_SCAN_ACCL_X,
+			indio_dev->active_scan_mask) ||
+			test_bit(INV_MPU6050_SCAN_ACCL_Y,
+			indio_dev->active_scan_mask) ||
+			test_bit(INV_MPU6050_SCAN_ACCL_Z,
+			indio_dev->active_scan_mask);
+}
+
+/**
+ *  inv_mpu6050_set_enable() - enable chip functions.
+ *  @indio_dev:	Device driver instance.
+ *  @enable: enable/disable
+ */
+static int inv_mpu6050_set_enable(struct iio_dev *indio_dev, bool enable)
+{
+	struct inv_mpu6050_state *st = iio_priv(indio_dev);
+	int result;
+
+	if (enable) {
+		result = inv_mpu6050_set_power_itg(st, true);
+		if (result)
+			return result;
+		inv_scan_query(indio_dev);
+		if (st->chip_config.gyro_fifo_enable) {
+			result = inv_mpu6050_switch_engine(st, true,
+					INV_MPU6050_BIT_PWR_GYRO_STBY);
+			if (result)
+				return result;
+		}
+		if (st->chip_config.accl_fifo_enable) {
+			result = inv_mpu6050_switch_engine(st, true,
+					INV_MPU6050_BIT_PWR_ACCL_STBY);
+			if (result)
+				return result;
+		}
+		result = inv_reset_fifo(indio_dev);
+		if (result)
+			return result;
+	} else {
+		result = inv_mpu6050_write_reg(st, st->reg->fifo_en, 0);
+		if (result)
+			return result;
+
+		result = inv_mpu6050_write_reg(st, st->reg->int_enable, 0);
+		if (result)
+			return result;
+
+		result = inv_mpu6050_write_reg(st, st->reg->user_ctrl, 0);
+		if (result)
+			return result;
+
+		result = inv_mpu6050_switch_engine(st, false,
+					INV_MPU6050_BIT_PWR_GYRO_STBY);
+		if (result)
+			return result;
+
+		result = inv_mpu6050_switch_engine(st, false,
+					INV_MPU6050_BIT_PWR_ACCL_STBY);
+		if (result)
+			return result;
+		result = inv_mpu6050_set_power_itg(st, false);
+		if (result)
+			return result;
+	}
+	st->chip_config.enable = enable;
+
+	return 0;
+}
+
+/**
+ * inv_mpu_data_rdy_trigger_set_state() - set data ready interrupt state
+ * @trig: Trigger instance
+ * @state: Desired trigger state
+ */
+static int inv_mpu_data_rdy_trigger_set_state(struct iio_trigger *trig,
+						bool state)
+{
+	return inv_mpu6050_set_enable(trig->private_data, state);
+}
+
+static const struct iio_trigger_ops inv_mpu_trigger_ops = {
+	.owner = THIS_MODULE,
+	.set_trigger_state = &inv_mpu_data_rdy_trigger_set_state,
+};
+
+int inv_mpu6050_probe_trigger(struct iio_dev *indio_dev)
+{
+	int ret;
+	struct inv_mpu6050_state *st = iio_priv(indio_dev);
+
+	st->trig = iio_trigger_alloc("%s-dev%d",
+					indio_dev->name,
+					indio_dev->id);
+	if (st->trig == NULL) {
+		ret = -ENOMEM;
+		goto error_ret;
+	}
+	ret = request_irq(st->client->irq, &iio_trigger_generic_data_rdy_poll,
+				IRQF_TRIGGER_RISING,
+				"inv_mpu",
+				st->trig);
+	if (ret)
+		goto error_free_trig;
+	st->trig->dev.parent = &st->client->dev;
+	st->trig->private_data = indio_dev;
+	st->trig->ops = &inv_mpu_trigger_ops;
+	ret = iio_trigger_register(st->trig);
+	if (ret)
+		goto error_free_irq;
+	indio_dev->trig = st->trig;
+
+	return 0;
+
+error_free_irq:
+	free_irq(st->client->irq, st->trig);
+error_free_trig:
+	iio_trigger_free(st->trig);
+error_ret:
+	return ret;
+}
+
+void inv_mpu6050_remove_trigger(struct inv_mpu6050_state *st)
+{
+	iio_trigger_unregister(st->trig);
+	free_irq(st->client->irq, st->trig);
+	iio_trigger_free(st->trig);
+}
diff --git a/include/linux/platform_data/invensense_mpu6050.h b/include/linux/platform_data/invensense_mpu6050.h
new file mode 100644
index 000000000000..ad3aa7b95f35
--- /dev/null
+++ b/include/linux/platform_data/invensense_mpu6050.h
@@ -0,0 +1,31 @@
+/*
+* Copyright (C) 2012 Invensense, Inc.
+*
+* This software is licensed under the terms of the GNU General Public
+* License version 2, as published by the Free Software Foundation, and
+* may be copied, distributed, and modified under those terms.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef __INV_MPU6050_PLATFORM_H_
+#define __INV_MPU6050_PLATFORM_H_
+
+/**
+ * struct inv_mpu6050_platform_data - Platform data for the mpu driver
+ * @orientation:	Orientation matrix of the chip
+ *
+ * Contains platform specific information on how to configure the MPU6050 to
+ * work on this platform.  The orientation matricies are 3x3 rotation matricies
+ * that are applied to the data to rotate from the mounting orientation to the
+ * platform orientation.  The values must be one of 0, 1, or -1 and each row and
+ * column should have exactly 1 non-zero value.
+ */
+struct inv_mpu6050_platform_data {
+	__s8 orientation[9];
+};
+
+#endif
-- 
cgit v1.2.3


From 095c3752e673c0ba039a2f67fd867297fde75ae7 Mon Sep 17 00:00:00 2001
From: Andreas Larsson <andreas@gaisler.com>
Date: Tue, 29 Jan 2013 15:53:41 +0100
Subject: spi: Document cs_gpios and cs_gpio in kernel-doc

This adds missing kernel-doc entries for cs_gpios in struct spi_master and
cs_gpio in struct spi_device.

Signed-off-by: Andreas Larsson <andreas@gaisler.com>
[grant.likely: tweaked the language of the descriptions]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 include/linux/spi/spi.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index f62918946d86..30e9c50a5e20 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -57,6 +57,8 @@ extern struct bus_type spi_bus_type;
  * @modalias: Name of the driver to use with this device, or an alias
  *	for that name.  This appears in the sysfs "modalias" attribute
  *	for driver coldplugging, and in uevents used for hotplugging
+ * @cs_gpio: gpio number of the chipselect line (optional, -EINVAL when
+ *	when not using a GPIO line)
  *
  * A @spi_device is used to interchange data between an SPI slave
  * (usually a discrete chip) and CPU memory.
@@ -258,6 +260,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @unprepare_transfer_hardware: there are currently no more messages on the
  *	queue so the subsystem notifies the driver that it may relax the
  *	hardware by issuing this call
+ * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
+ *	number. Any individual value may be -EINVAL for CS lines that
+ *	are not GPIOs (driven by the SPI controller itself).
  *
  * Each SPI master controller can communicate with one or more @spi_device
  * children.  These make a small bus, sharing MOSI, MISO and SCK signals
-- 
cgit v1.2.3


From d021c344051af91f42c5ba9fdedc176740cbd238 Mon Sep 17 00:00:00 2001
From: Andy King <acking@vmware.com>
Date: Wed, 6 Feb 2013 14:23:56 +0000
Subject: VSOCK: Introduce VM Sockets

VM Sockets allows communication between virtual machines and the hypervisor.
User level applications both in a virtual machine and on the host can use the
VM Sockets API, which facilitates fast and efficient communication between
guest virtual machines and their host.  A socket address family, designed to be
compatible with UDP and TCP at the interface level, is provided.

Today, VM Sockets is used by various VMware Tools components inside the guest
for zero-config, network-less access to VMware host services.  In addition to
this, VMware's users are using VM Sockets for various applications, where
network access of the virtual machine is restricted or non-existent.  Examples
of this are VMs communicating with device proxies for proprietary hardware
running as host applications and automated testing of applications running
within virtual machines.

The VMware VM Sockets are similar to other socket types, like Berkeley UNIX
socket interface.  The VM Sockets module supports both connection-oriented
stream sockets like TCP, and connectionless datagram sockets like UDP. The VM
Sockets protocol family is defined as "AF_VSOCK" and the socket operations
split for SOCK_DGRAM and SOCK_STREAM.

For additional information about the use of VM Sockets, please refer to the
VM Sockets Programming Guide available at:

https://www.vmware.com/support/developer/vmci-sdk/

Signed-off-by: George Zhang <georgezhang@vmware.com>
Signed-off-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Andy king <acking@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h                       |    4 +-
 include/uapi/linux/vm_sockets.h              |  171 ++
 net/Kconfig                                  |    1 +
 net/Makefile                                 |    1 +
 net/vmw_vsock/Kconfig                        |   28 +
 net/vmw_vsock/Makefile                       |    7 +
 net/vmw_vsock/af_vsock.c                     | 2015 ++++++++++++++++++++++++
 net/vmw_vsock/af_vsock.h                     |  175 +++
 net/vmw_vsock/vmci_transport.c               | 2157 ++++++++++++++++++++++++++
 net/vmw_vsock/vmci_transport.h               |  139 ++
 net/vmw_vsock/vmci_transport_notify.c        |  680 ++++++++
 net/vmw_vsock/vmci_transport_notify.h        |   83 +
 net/vmw_vsock/vmci_transport_notify_qstate.c |  438 ++++++
 net/vmw_vsock/vsock_addr.c                   |   86 +
 net/vmw_vsock/vsock_addr.h                   |   32 +
 net/vmw_vsock/vsock_version.h                |   22 +
 16 files changed, 6038 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/vm_sockets.h
 create mode 100644 net/vmw_vsock/Kconfig
 create mode 100644 net/vmw_vsock/Makefile
 create mode 100644 net/vmw_vsock/af_vsock.c
 create mode 100644 net/vmw_vsock/af_vsock.h
 create mode 100644 net/vmw_vsock/vmci_transport.c
 create mode 100644 net/vmw_vsock/vmci_transport.h
 create mode 100644 net/vmw_vsock/vmci_transport_notify.c
 create mode 100644 net/vmw_vsock/vmci_transport_notify.h
 create mode 100644 net/vmw_vsock/vmci_transport_notify_qstate.c
 create mode 100644 net/vmw_vsock/vsock_addr.c
 create mode 100644 net/vmw_vsock/vsock_addr.h
 create mode 100644 net/vmw_vsock/vsock_version.h

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 9a546ff853dc..2b9f74b0ffea 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -178,7 +178,8 @@ struct ucred {
 #define AF_CAIF		37	/* CAIF sockets			*/
 #define AF_ALG		38	/* Algorithm sockets		*/
 #define AF_NFC		39	/* NFC sockets			*/
-#define AF_MAX		40	/* For now.. */
+#define AF_VSOCK	40	/* vSockets			*/
+#define AF_MAX		41	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -221,6 +222,7 @@ struct ucred {
 #define PF_CAIF		AF_CAIF
 #define PF_ALG		AF_ALG
 #define PF_NFC		AF_NFC
+#define PF_VSOCK	AF_VSOCK
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
new file mode 100644
index 000000000000..f7f2e99dec84
--- /dev/null
+++ b/include/uapi/linux/vm_sockets.h
@@ -0,0 +1,171 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VM_SOCKETS_H_
+#define _VM_SOCKETS_H_
+
+#if !defined(__KERNEL__)
+#include <sys/socket.h>
+#endif
+
+/* Option name for STREAM socket buffer size.  Use as the option name in
+ * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
+ * specifies the size of the buffer underlying a vSockets STREAM socket.
+ * Value is clamped to the MIN and MAX.
+ */
+
+#define SO_VM_SOCKETS_BUFFER_SIZE 0
+
+/* Option name for STREAM socket minimum buffer size.  Use as the option name
+ * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
+ * specifies the minimum size allowed for the buffer underlying a vSockets
+ * STREAM socket.
+ */
+
+#define SO_VM_SOCKETS_BUFFER_MIN_SIZE 1
+
+/* Option name for STREAM socket maximum buffer size.  Use as the option name
+ * in setsockopt(3) or getsockopt(3) to set or get an unsigned long long
+ * that specifies the maximum size allowed for the buffer underlying a
+ * vSockets STREAM socket.
+ */
+
+#define SO_VM_SOCKETS_BUFFER_MAX_SIZE 2
+
+/* Option name for socket peer's host-specific VM ID.  Use as the option name
+ * in getsockopt(3) to get a host-specific identifier for the peer endpoint's
+ * VM.  The identifier is a signed integer.
+ * Only available for hypervisor endpoints.
+ */
+
+#define SO_VM_SOCKETS_PEER_HOST_VM_ID 3
+
+/* Option name for socket's service label.  Use as the option name in
+ * setsockopt(3) or getsockopt(3) to set or get the service label for a socket.
+ * The service label is a C-style NUL-terminated string.  Only available for
+ * hypervisor endpoints.
+ */
+
+#define SO_VM_SOCKETS_SERVICE_LABEL 4
+
+/* Option name for determining if a socket is trusted.  Use as the option name
+ * in getsockopt(3) to determine if a socket is trusted.  The value is a
+ * signed integer.
+ */
+
+#define SO_VM_SOCKETS_TRUSTED 5
+
+/* Option name for STREAM socket connection timeout.  Use as the option name
+ * in setsockopt(3) or getsockopt(3) to set or get the connection
+ * timeout for a STREAM socket.
+ */
+
+#define SO_VM_SOCKETS_CONNECT_TIMEOUT 6
+
+/* Option name for using non-blocking send/receive.  Use as the option name
+ * for setsockopt(3) or getsockopt(3) to set or get the non-blocking
+ * transmit/receive flag for a STREAM socket.  This flag determines whether
+ * send() and recv() can be called in non-blocking contexts for the given
+ * socket.  The value is a signed integer.
+ *
+ * This option is only relevant to kernel endpoints, where descheduling the
+ * thread of execution is not allowed, for example, while holding a spinlock.
+ * It is not to be confused with conventional non-blocking socket operations.
+ *
+ * Only available for hypervisor endpoints.
+ */
+
+#define SO_VM_SOCKETS_NONBLOCK_TXRX 7
+
+/* The vSocket equivalent of INADDR_ANY.  This works for the svm_cid field of
+ * sockaddr_vm and indicates the context ID of the current endpoint.
+ */
+
+#define VMADDR_CID_ANY -1U
+
+/* Bind to any available port.  Works for the svm_port field of
+ * sockaddr_vm.
+ */
+
+#define VMADDR_PORT_ANY -1U
+
+/* Use this as the destination CID in an address when referring to the
+ * hypervisor.  VMCI relies on it being 0, but this would be useful for other
+ * transports too.
+ */
+
+#define VMADDR_CID_HYPERVISOR 0
+
+/* This CID is specific to VMCI and can be considered reserved (even VMCI
+ * doesn't use it anymore, it's a legacy value from an older release).
+ */
+
+#define VMADDR_CID_RESERVED 1
+
+/* Use this as the destination CID in an address when referring to the host
+ * (any process other than the hypervisor).  VMCI relies on it being 2, but
+ * this would be useful for other transports too.
+ */
+
+#define VMADDR_CID_HOST 2
+
+/* Invalid vSockets version. */
+
+#define VM_SOCKETS_INVALID_VERSION -1U
+
+/* The epoch (first) component of the vSockets version.  A single byte
+ * representing the epoch component of the vSockets version.
+ */
+
+#define VM_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24)
+
+/* The major (second) component of the vSockets version.   A single byte
+ * representing the major component of the vSockets version.  Typically
+ * changes for every major release of a product.
+ */
+
+#define VM_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16)
+
+/* The minor (third) component of the vSockets version.  Two bytes representing
+ * the minor component of the vSockets version.
+ */
+
+#define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF))
+
+/* Address structure for vSockets.   The address family should be set to
+ * whatever vmci_sock_get_af_value_fd() returns.  The structure members should
+ * all align on their natural boundaries without resorting to compiler packing
+ * directives.  The total size of this structure should be exactly the same as
+ * that of struct sockaddr.
+ */
+
+struct sockaddr_vm {
+	sa_family_t svm_family;
+	unsigned short svm_reserved1;
+	unsigned int svm_port;
+	unsigned int svm_cid;
+	unsigned char svm_zero[sizeof(struct sockaddr) -
+			       sizeof(sa_family_t) -
+			       sizeof(unsigned short) -
+			       sizeof(unsigned int) - sizeof(unsigned int)];
+};
+
+#define IOCTL_VM_SOCKETS_GET_LOCAL_CID		_IO(7, 0xb9)
+
+#if defined(__KERNEL__)
+int vm_sockets_get_local_cid(void);
+#endif
+
+#endif
diff --git a/net/Kconfig b/net/Kconfig
index c31348e70aad..5a1888bb036d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -217,6 +217,7 @@ source "net/dcb/Kconfig"
 source "net/dns_resolver/Kconfig"
 source "net/batman-adv/Kconfig"
 source "net/openvswitch/Kconfig"
+source "net/vmw_vsock/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index c5aa8b3b49dc..091e7b04f301 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -69,3 +69,4 @@ obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
 obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
+obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
new file mode 100644
index 000000000000..b5fa7e40cdcb
--- /dev/null
+++ b/net/vmw_vsock/Kconfig
@@ -0,0 +1,28 @@
+#
+# Vsock protocol
+#
+
+config VSOCKETS
+	tristate "Virtual Socket protocol"
+	help
+	  Virtual Socket Protocol is a socket protocol similar to TCP/IP
+	  allowing comunication between Virtual Machines and hypervisor
+	  or host.
+
+	  You should also select one or more hypervisor-specific transports
+	  below.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vsock. If unsure, say N.
+
+config VMWARE_VMCI_VSOCKETS
+	tristate "VMware VMCI transport for Virtual Sockets"
+	depends on VSOCKETS && VMWARE_VMCI
+	help
+	  This module implements a VMCI transport for Virtual Sockets.
+
+	  Enable this transport if your Virtual Machine runs on a VMware
+	  hypervisor.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vmw_vsock_vmci_transport. If unsure, say N.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
new file mode 100644
index 000000000000..2ce52d70f224
--- /dev/null
+++ b/net/vmw_vsock/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
+
+vsock-y += af_vsock.o vsock_addr.o
+
+vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
+	vmci_transport_notify_qstate.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
new file mode 100644
index 000000000000..54bb7bdf92d3
--- /dev/null
+++ b/net/vmw_vsock/af_vsock.c
@@ -0,0 +1,2015 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+/* Implementation notes:
+ *
+ * - There are two kinds of sockets: those created by user action (such as
+ * calling socket(2)) and those created by incoming connection request packets.
+ *
+ * - There are two "global" tables, one for bound sockets (sockets that have
+ * specified an address that they are responsible for) and one for connected
+ * sockets (sockets that have established a connection with another socket).
+ * These tables are "global" in that all sockets on the system are placed
+ * within them. - Note, though, that the bound table contains an extra entry
+ * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
+ * that list. The bound table is used solely for lookup of sockets when packets
+ * are received and that's not necessary for SOCK_DGRAM sockets since we create
+ * a datagram handle for each and need not perform a lookup.  Keeping SOCK_DGRAM
+ * sockets out of the bound hash buckets will reduce the chance of collisions
+ * when looking for SOCK_STREAM sockets and prevents us from having to check the
+ * socket type in the hash table lookups.
+ *
+ * - Sockets created by user action will either be "client" sockets that
+ * initiate a connection or "server" sockets that listen for connections; we do
+ * not support simultaneous connects (two "client" sockets connecting).
+ *
+ * - "Server" sockets are referred to as listener sockets throughout this
+ * implementation because they are in the SS_LISTEN state.  When a connection
+ * request is received (the second kind of socket mentioned above), we create a
+ * new socket and refer to it as a pending socket.  These pending sockets are
+ * placed on the pending connection list of the listener socket.  When future
+ * packets are received for the address the listener socket is bound to, we
+ * check if the source of the packet is from one that has an existing pending
+ * connection.  If it does, we process the packet for the pending socket.  When
+ * that socket reaches the connected state, it is removed from the listener
+ * socket's pending list and enqueued in the listener socket's accept queue.
+ * Callers of accept(2) will accept connected sockets from the listener socket's
+ * accept queue.  If the socket cannot be accepted for some reason then it is
+ * marked rejected.  Once the connection is accepted, it is owned by the user
+ * process and the responsibility for cleanup falls with that user process.
+ *
+ * - It is possible that these pending sockets will never reach the connected
+ * state; in fact, we may never receive another packet after the connection
+ * request.  Because of this, we must schedule a cleanup function to run in the
+ * future, after some amount of time passes where a connection should have been
+ * established.  This function ensures that the socket is off all lists so it
+ * cannot be retrieved, then drops all references to the socket so it is cleaned
+ * up (sock_put() -> sk_free() -> our sk_destruct implementation).  Note this
+ * function will also cleanup rejected sockets, those that reach the connected
+ * state but leave it before they have been accepted.
+ *
+ * - Sockets created by user action will be cleaned up when the user process
+ * calls close(2), causing our release implementation to be called. Our release
+ * implementation will perform some cleanup then drop the last reference so our
+ * sk_destruct implementation is invoked.  Our sk_destruct implementation will
+ * perform additional cleanup that's common for both types of sockets.
+ *
+ * - A socket's reference count is what ensures that the structure won't be
+ * freed.  Each entry in a list (such as the "global" bound and connected tables
+ * and the listener socket's pending list and connected queue) ensures a
+ * reference.  When we defer work until process context and pass a socket as our
+ * argument, we must ensure the reference count is increased to ensure the
+ * socket isn't freed before the function is run; the deferred function will
+ * then drop the reference.
+ */
+
+#include <linux/types.h>
+
+#define EXPORT_SYMTAB
+#include <linux/bitops.h>
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "af_vsock.h"
+#include "vsock_version.h"
+
+static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
+static void vsock_sk_destruct(struct sock *sk);
+static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+/* Protocol family. */
+static struct proto vsock_proto = {
+	.name = "AF_VSOCK",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct vsock_sock),
+};
+
+/* The default peer timeout indicates how long we will wait for a peer response
+ * to a control message.
+ */
+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+
+#define SS_LISTEN 255
+
+static const struct vsock_transport *transport;
+static DEFINE_MUTEX(vsock_register_mutex);
+
+/**** EXPORTS ****/
+
+/* Get the ID of the local context.  This is transport dependent. */
+
+int vm_sockets_get_local_cid(void)
+{
+	return transport->get_local_cid();
+}
+EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
+
+/**** UTILS ****/
+
+/* Each bound VSocket is stored in the bind hash table and each connected
+ * VSocket is stored in the connected hash table.
+ *
+ * Unbound sockets are all put on the same list attached to the end of the hash
+ * table (vsock_unbound_sockets).  Bound sockets are added to the hash table in
+ * the bucket that their local address hashes to (vsock_bound_sockets(addr)
+ * represents the list that addr hashes to).
+ *
+ * Specifically, we initialize the vsock_bind_table array to a size of
+ * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
+ * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
+ * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
+ * mods with VSOCK_HASH_SIZE - 1 to ensure this.
+ */
+#define VSOCK_HASH_SIZE         251
+#define MAX_PORT_RETRIES        24
+
+#define VSOCK_HASH(addr)        ((addr)->svm_port % (VSOCK_HASH_SIZE - 1))
+#define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
+#define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])
+
+/* XXX This can probably be implemented in a better way. */
+#define VSOCK_CONN_HASH(src, dst)				\
+	(((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1))
+#define vsock_connected_sockets(src, dst)		\
+	(&vsock_connected_table[VSOCK_CONN_HASH(src, dst)])
+#define vsock_connected_sockets_vsk(vsk)				\
+	vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
+
+static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+static DEFINE_SPINLOCK(vsock_table_lock);
+
+static __init void vsock_init_tables(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++)
+		INIT_LIST_HEAD(&vsock_bind_table[i]);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
+		INIT_LIST_HEAD(&vsock_connected_table[i]);
+}
+
+static void __vsock_insert_bound(struct list_head *list,
+				 struct vsock_sock *vsk)
+{
+	sock_hold(&vsk->sk);
+	list_add(&vsk->bound_table, list);
+}
+
+static void __vsock_insert_connected(struct list_head *list,
+				     struct vsock_sock *vsk)
+{
+	sock_hold(&vsk->sk);
+	list_add(&vsk->connected_table, list);
+}
+
+static void __vsock_remove_bound(struct vsock_sock *vsk)
+{
+	list_del_init(&vsk->bound_table);
+	sock_put(&vsk->sk);
+}
+
+static void __vsock_remove_connected(struct vsock_sock *vsk)
+{
+	list_del_init(&vsk->connected_table);
+	sock_put(&vsk->sk);
+}
+
+static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
+{
+	struct vsock_sock *vsk;
+
+	list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table)
+		if (vsock_addr_equals_addr_any(addr, &vsk->local_addr))
+			return sk_vsock(vsk);
+
+	return NULL;
+}
+
+static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
+						  struct sockaddr_vm *dst)
+{
+	struct vsock_sock *vsk;
+
+	list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
+			    connected_table) {
+		if (vsock_addr_equals_addr(src, &vsk->remote_addr)
+		    && vsock_addr_equals_addr(dst, &vsk->local_addr)) {
+			return sk_vsock(vsk);
+		}
+	}
+
+	return NULL;
+}
+
+static bool __vsock_in_bound_table(struct vsock_sock *vsk)
+{
+	return !list_empty(&vsk->bound_table);
+}
+
+static bool __vsock_in_connected_table(struct vsock_sock *vsk)
+{
+	return !list_empty(&vsk->connected_table);
+}
+
+static void vsock_insert_unbound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_insert_bound(vsock_unbound_sockets, vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+
+void vsock_insert_connected(struct vsock_sock *vsk)
+{
+	struct list_head *list = vsock_connected_sockets(
+		&vsk->remote_addr, &vsk->local_addr);
+
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_insert_connected(list, vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_insert_connected);
+
+void vsock_remove_bound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_remove_bound(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_bound);
+
+void vsock_remove_connected(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_table_lock);
+	__vsock_remove_connected(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_connected);
+
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
+{
+	struct sock *sk;
+
+	spin_lock_bh(&vsock_table_lock);
+	sk = __vsock_find_bound_socket(addr);
+	if (sk)
+		sock_hold(sk);
+
+	spin_unlock_bh(&vsock_table_lock);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
+
+struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+					 struct sockaddr_vm *dst)
+{
+	struct sock *sk;
+
+	spin_lock_bh(&vsock_table_lock);
+	sk = __vsock_find_connected_socket(src, dst);
+	if (sk)
+		sock_hold(sk);
+
+	spin_unlock_bh(&vsock_table_lock);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
+
+static bool vsock_in_bound_table(struct vsock_sock *vsk)
+{
+	bool ret;
+
+	spin_lock_bh(&vsock_table_lock);
+	ret = __vsock_in_bound_table(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return ret;
+}
+
+static bool vsock_in_connected_table(struct vsock_sock *vsk)
+{
+	bool ret;
+
+	spin_lock_bh(&vsock_table_lock);
+	ret = __vsock_in_connected_table(vsk);
+	spin_unlock_bh(&vsock_table_lock);
+
+	return ret;
+}
+
+void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+{
+	int i;
+
+	spin_lock_bh(&vsock_table_lock);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
+		struct vsock_sock *vsk;
+		list_for_each_entry(vsk, &vsock_connected_table[i],
+				    connected_table);
+			fn(sk_vsock(vsk));
+	}
+
+	spin_unlock_bh(&vsock_table_lock);
+}
+EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket);
+
+void vsock_add_pending(struct sock *listener, struct sock *pending)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vpending;
+
+	vlistener = vsock_sk(listener);
+	vpending = vsock_sk(pending);
+
+	sock_hold(pending);
+	sock_hold(listener);
+	list_add_tail(&vpending->pending_links, &vlistener->pending_links);
+}
+EXPORT_SYMBOL_GPL(vsock_add_pending);
+
+void vsock_remove_pending(struct sock *listener, struct sock *pending)
+{
+	struct vsock_sock *vpending = vsock_sk(pending);
+
+	list_del_init(&vpending->pending_links);
+	sock_put(listener);
+	sock_put(pending);
+}
+EXPORT_SYMBOL_GPL(vsock_remove_pending);
+
+void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vconnected;
+
+	vlistener = vsock_sk(listener);
+	vconnected = vsock_sk(connected);
+
+	sock_hold(connected);
+	sock_hold(listener);
+	list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue);
+}
+EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
+
+static struct sock *vsock_dequeue_accept(struct sock *listener)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vconnected;
+
+	vlistener = vsock_sk(listener);
+
+	if (list_empty(&vlistener->accept_queue))
+		return NULL;
+
+	vconnected = list_entry(vlistener->accept_queue.next,
+				struct vsock_sock, accept_queue);
+
+	list_del_init(&vconnected->accept_queue);
+	sock_put(listener);
+	/* The caller will need a reference on the connected socket so we let
+	 * it call sock_put().
+	 */
+
+	return sk_vsock(vconnected);
+}
+
+static bool vsock_is_accept_queue_empty(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	return list_empty(&vsk->accept_queue);
+}
+
+static bool vsock_is_pending(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	return !list_empty(&vsk->pending_links);
+}
+
+static int vsock_send_shutdown(struct sock *sk, int mode)
+{
+	return transport->shutdown(vsock_sk(sk), mode);
+}
+
+void vsock_pending_work(struct work_struct *work)
+{
+	struct sock *sk;
+	struct sock *listener;
+	struct vsock_sock *vsk;
+	bool cleanup;
+
+	vsk = container_of(work, struct vsock_sock, dwork.work);
+	sk = sk_vsock(vsk);
+	listener = vsk->listener;
+	cleanup = true;
+
+	lock_sock(listener);
+	lock_sock(sk);
+
+	if (vsock_is_pending(sk)) {
+		vsock_remove_pending(listener, sk);
+	} else if (!vsk->rejected) {
+		/* We are not on the pending list and accept() did not reject
+		 * us, so we must have been accepted by our user process.  We
+		 * just need to drop our references to the sockets and be on
+		 * our way.
+		 */
+		cleanup = false;
+		goto out;
+	}
+
+	listener->sk_ack_backlog--;
+
+	/* We need to remove ourself from the global connected sockets list so
+	 * incoming packets can't find this socket, and to reduce the reference
+	 * count.
+	 */
+	if (vsock_in_connected_table(vsk))
+		vsock_remove_connected(vsk);
+
+	sk->sk_state = SS_FREE;
+
+out:
+	release_sock(sk);
+	release_sock(listener);
+	if (cleanup)
+		sock_put(sk);
+
+	sock_put(sk);
+	sock_put(listener);
+}
+EXPORT_SYMBOL_GPL(vsock_pending_work);
+
+/**** SOCKET OPERATIONS ****/
+
+static int __vsock_bind_stream(struct vsock_sock *vsk,
+			       struct sockaddr_vm *addr)
+{
+	static u32 port = LAST_RESERVED_PORT + 1;
+	struct sockaddr_vm new_addr;
+
+	vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
+
+	if (addr->svm_port == VMADDR_PORT_ANY) {
+		bool found = false;
+		unsigned int i;
+
+		for (i = 0; i < MAX_PORT_RETRIES; i++) {
+			if (port <= LAST_RESERVED_PORT)
+				port = LAST_RESERVED_PORT + 1;
+
+			new_addr.svm_port = port++;
+
+			if (!__vsock_find_bound_socket(&new_addr)) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			return -EADDRNOTAVAIL;
+	} else {
+		/* If port is in reserved range, ensure caller
+		 * has necessary privileges.
+		 */
+		if (addr->svm_port <= LAST_RESERVED_PORT &&
+		    !capable(CAP_NET_BIND_SERVICE)) {
+			return -EACCES;
+		}
+
+		if (__vsock_find_bound_socket(&new_addr))
+			return -EADDRINUSE;
+	}
+
+	vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
+
+	/* Remove stream sockets from the unbound list and add them to the hash
+	 * table for easy lookup by its address.  The unbound list is simply an
+	 * extra entry at the end of the hash table, a trick used by AF_UNIX.
+	 */
+	__vsock_remove_bound(vsk);
+	__vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
+
+	return 0;
+}
+
+static int __vsock_bind_dgram(struct vsock_sock *vsk,
+			      struct sockaddr_vm *addr)
+{
+	return transport->dgram_bind(vsk, addr);
+}
+
+static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+	u32 cid;
+	int retval;
+
+	/* First ensure this socket isn't already bound. */
+	if (vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	/* Now bind to the provided address or select appropriate values if
+	 * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY).  Note that
+	 * like AF_INET prevents binding to a non-local IP address (in most
+	 * cases), we only allow binding to the local CID.
+	 */
+	cid = transport->get_local_cid();
+	if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY)
+		return -EADDRNOTAVAIL;
+
+	switch (sk->sk_socket->type) {
+	case SOCK_STREAM:
+		spin_lock_bh(&vsock_table_lock);
+		retval = __vsock_bind_stream(vsk, addr);
+		spin_unlock_bh(&vsock_table_lock);
+		break;
+
+	case SOCK_DGRAM:
+		retval = __vsock_bind_dgram(vsk, addr);
+		break;
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	return retval;
+}
+
+struct sock *__vsock_create(struct net *net,
+			    struct socket *sock,
+			    struct sock *parent,
+			    gfp_t priority,
+			    unsigned short type)
+{
+	struct sock *sk;
+	struct vsock_sock *psk;
+	struct vsock_sock *vsk;
+
+	sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk);
+
+	/* sk->sk_type is normally set in sock_init_data, but only if sock is
+	 * non-NULL. We make sure that our sockets always have a type by
+	 * setting it here if needed.
+	 */
+	if (!sock)
+		sk->sk_type = type;
+
+	vsk = vsock_sk(sk);
+	vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+	vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	sk->sk_destruct = vsock_sk_destruct;
+	sk->sk_backlog_rcv = vsock_queue_rcv_skb;
+	sk->sk_state = 0;
+	sock_reset_flag(sk, SOCK_DONE);
+
+	INIT_LIST_HEAD(&vsk->bound_table);
+	INIT_LIST_HEAD(&vsk->connected_table);
+	vsk->listener = NULL;
+	INIT_LIST_HEAD(&vsk->pending_links);
+	INIT_LIST_HEAD(&vsk->accept_queue);
+	vsk->rejected = false;
+	vsk->sent_request = false;
+	vsk->ignore_connecting_rst = false;
+	vsk->peer_shutdown = 0;
+
+	psk = parent ? vsock_sk(parent) : NULL;
+	if (parent) {
+		vsk->trusted = psk->trusted;
+		vsk->owner = get_cred(psk->owner);
+		vsk->connect_timeout = psk->connect_timeout;
+	} else {
+		vsk->trusted = capable(CAP_NET_ADMIN);
+		vsk->owner = get_current_cred();
+		vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
+	}
+
+	if (transport->init(vsk, psk) < 0) {
+		sk_free(sk);
+		return NULL;
+	}
+
+	if (sock)
+		vsock_insert_unbound(vsk);
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(__vsock_create);
+
+static void __vsock_release(struct sock *sk)
+{
+	if (sk) {
+		struct sk_buff *skb;
+		struct sock *pending;
+		struct vsock_sock *vsk;
+
+		vsk = vsock_sk(sk);
+		pending = NULL;	/* Compiler warning. */
+
+		if (vsock_in_bound_table(vsk))
+			vsock_remove_bound(vsk);
+
+		if (vsock_in_connected_table(vsk))
+			vsock_remove_connected(vsk);
+
+		transport->release(vsk);
+
+		lock_sock(sk);
+		sock_orphan(sk);
+		sk->sk_shutdown = SHUTDOWN_MASK;
+
+		while ((skb = skb_dequeue(&sk->sk_receive_queue)))
+			kfree_skb(skb);
+
+		/* Clean up any sockets that never were accepted. */
+		while ((pending = vsock_dequeue_accept(sk)) != NULL) {
+			__vsock_release(pending);
+			sock_put(pending);
+		}
+
+		release_sock(sk);
+		sock_put(sk);
+	}
+}
+
+static void vsock_sk_destruct(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	transport->destruct(vsk);
+
+	/* When clearing these addresses, there's no need to set the family and
+	 * possibly register the address family with the kernel.
+	 */
+	vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+	vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	put_cred(vsk->owner);
+}
+
+static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+
+	err = sock_queue_rcv_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+
+	return err;
+}
+
+s64 vsock_stream_has_data(struct vsock_sock *vsk)
+{
+	return transport->stream_has_data(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_stream_has_data);
+
+s64 vsock_stream_has_space(struct vsock_sock *vsk)
+{
+	return transport->stream_has_space(vsk);
+}
+EXPORT_SYMBOL_GPL(vsock_stream_has_space);
+
+static int vsock_release(struct socket *sock)
+{
+	__vsock_release(sock->sk);
+	sock->sk = NULL;
+	sock->state = SS_FREE;
+
+	return 0;
+}
+
+static int
+vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	int err;
+	struct sock *sk;
+	struct sockaddr_vm *vm_addr;
+
+	sk = sock->sk;
+
+	if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+	err = __vsock_bind(sk, vm_addr);
+	release_sock(sk);
+
+	return err;
+}
+
+static int vsock_getname(struct socket *sock,
+			 struct sockaddr *addr, int *addr_len, int peer)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *vm_addr;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	lock_sock(sk);
+
+	if (peer) {
+		if (sock->state != SS_CONNECTED) {
+			err = -ENOTCONN;
+			goto out;
+		}
+		vm_addr = &vsk->remote_addr;
+	} else {
+		vm_addr = &vsk->local_addr;
+	}
+
+	if (!vm_addr) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* sys_getsockname() and sys_getpeername() pass us a
+	 * MAX_SOCK_ADDR-sized buffer and don't set addr_len.  Unfortunately
+	 * that macro is defined in socket.c instead of .h, so we hardcode its
+	 * value here.
+	 */
+	BUILD_BUG_ON(sizeof(*vm_addr) > 128);
+	memcpy(addr, vm_addr, sizeof(*vm_addr));
+	*addr_len = sizeof(*vm_addr);
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_shutdown(struct socket *sock, int mode)
+{
+	int err;
+	struct sock *sk;
+
+	/* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses
+	 * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode
+	 * here like the other address families do.  Note also that the
+	 * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3),
+	 * which is what we want.
+	 */
+	mode++;
+
+	if ((mode & ~SHUTDOWN_MASK) || !mode)
+		return -EINVAL;
+
+	/* If this is a STREAM socket and it is not connected then bail out
+	 * immediately.  If it is a DGRAM socket then we must first kick the
+	 * socket so that it wakes up from any sleeping calls, for example
+	 * recv(), and then afterwards return the error.
+	 */
+
+	sk = sock->sk;
+	if (sock->state == SS_UNCONNECTED) {
+		err = -ENOTCONN;
+		if (sk->sk_type == SOCK_STREAM)
+			return err;
+	} else {
+		sock->state = SS_DISCONNECTING;
+		err = 0;
+	}
+
+	/* Receive and send shutdowns are treated alike. */
+	mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
+	if (mode) {
+		lock_sock(sk);
+		sk->sk_shutdown |= mode;
+		sk->sk_state_change(sk);
+		release_sock(sk);
+
+		if (sk->sk_type == SOCK_STREAM) {
+			sock_reset_flag(sk, SOCK_DONE);
+			vsock_send_shutdown(sk, mode);
+		}
+	}
+
+	return err;
+}
+
+static unsigned int vsock_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	struct sock *sk;
+	unsigned int mask;
+	struct vsock_sock *vsk;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	if (sk->sk_err)
+		/* Signify that there has been an error on this socket. */
+		mask |= POLLERR;
+
+	/* INET sockets treat local write shutdown and peer write shutdown as a
+	 * case of POLLHUP set.
+	 */
+	if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
+	    ((sk->sk_shutdown & SEND_SHUTDOWN) &&
+	     (vsk->peer_shutdown & SEND_SHUTDOWN))) {
+		mask |= POLLHUP;
+	}
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN ||
+	    vsk->peer_shutdown & SEND_SHUTDOWN) {
+		mask |= POLLRDHUP;
+	}
+
+	if (sock->type == SOCK_DGRAM) {
+		/* For datagram sockets we can read if there is something in
+		 * the queue and write as long as the socket isn't shutdown for
+		 * sending.
+		 */
+		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		    (sk->sk_shutdown & RCV_SHUTDOWN)) {
+			mask |= POLLIN | POLLRDNORM;
+		}
+
+		if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+			mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	} else if (sock->type == SOCK_STREAM) {
+		lock_sock(sk);
+
+		/* Listening sockets that have connections in their accept
+		 * queue can be read.
+		 */
+		if (sk->sk_state == SS_LISTEN
+		    && !vsock_is_accept_queue_empty(sk))
+			mask |= POLLIN | POLLRDNORM;
+
+		/* If there is something in the queue then we can read. */
+		if (transport->stream_is_active(vsk) &&
+		    !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+			bool data_ready_now = false;
+			int ret = transport->notify_poll_in(
+					vsk, 1, &data_ready_now);
+			if (ret < 0) {
+				mask |= POLLERR;
+			} else {
+				if (data_ready_now)
+					mask |= POLLIN | POLLRDNORM;
+
+			}
+		}
+
+		/* Sockets whose connections have been closed, reset, or
+		 * terminated should also be considered read, and we check the
+		 * shutdown flag for that.
+		 */
+		if (sk->sk_shutdown & RCV_SHUTDOWN ||
+		    vsk->peer_shutdown & SEND_SHUTDOWN) {
+			mask |= POLLIN | POLLRDNORM;
+		}
+
+		/* Connected sockets that can produce data can be written. */
+		if (sk->sk_state == SS_CONNECTED) {
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+				bool space_avail_now = false;
+				int ret = transport->notify_poll_out(
+						vsk, 1, &space_avail_now);
+				if (ret < 0) {
+					mask |= POLLERR;
+				} else {
+					if (space_avail_now)
+						/* Remove POLLWRBAND since INET
+						 * sockets are not setting it.
+						 */
+						mask |= POLLOUT | POLLWRNORM;
+
+				}
+			}
+		}
+
+		/* Simulate INET socket poll behaviors, which sets
+		 * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
+		 * but local send is not shutdown.
+		 */
+		if (sk->sk_state == SS_UNCONNECTED) {
+			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
+				mask |= POLLOUT | POLLWRNORM;
+
+		}
+
+		release_sock(sk);
+	}
+
+	return mask;
+}
+
+static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	/* For now, MSG_DONTWAIT is always assumed... */
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		struct sockaddr_vm local_addr;
+
+		vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+		err = __vsock_bind(sk, &local_addr);
+		if (err != 0)
+			goto out;
+
+	}
+
+	/* If the provided message contains an address, use that.  Otherwise
+	 * fall back on the socket's remote handle (if it has been connected).
+	 */
+	if (msg->msg_name &&
+	    vsock_addr_cast(msg->msg_name, msg->msg_namelen,
+			    &remote_addr) == 0) {
+		/* Ensure this address is of the right type and is a valid
+		 * destination.
+		 */
+
+		if (remote_addr->svm_cid == VMADDR_CID_ANY)
+			remote_addr->svm_cid = transport->get_local_cid();
+
+		if (!vsock_addr_bound(remote_addr)) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else if (sock->state == SS_CONNECTED) {
+		remote_addr = &vsk->remote_addr;
+
+		if (remote_addr->svm_cid == VMADDR_CID_ANY)
+			remote_addr->svm_cid = transport->get_local_cid();
+
+		/* XXX Should connect() or this function ensure remote_addr is
+		 * bound?
+		 */
+		if (!vsock_addr_bound(&vsk->remote_addr)) {
+			err = -EINVAL;
+			goto out;
+		}
+	} else {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (!transport->dgram_allow(remote_addr->svm_cid,
+				    remote_addr->svm_port)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = transport->dgram_enqueue(vsk, remote_addr, msg->msg_iov, len);
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_dgram_connect(struct socket *sock,
+			       struct sockaddr *addr, int addr_len, int flags)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	err = vsock_addr_cast(addr, addr_len, &remote_addr);
+	if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) {
+		lock_sock(sk);
+		vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY,
+				VMADDR_PORT_ANY);
+		sock->state = SS_UNCONNECTED;
+		release_sock(sk);
+		return 0;
+	} else if (err != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		struct sockaddr_vm local_addr;
+
+		vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+		err = __vsock_bind(sk, &local_addr);
+		if (err != 0)
+			goto out;
+
+	}
+
+	if (!transport->dgram_allow(remote_addr->svm_cid,
+				    remote_addr->svm_port)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr));
+	sock->state = SS_CONNECTED;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_dgram_recvmsg(struct kiocb *kiocb, struct socket *sock,
+			       struct msghdr *msg, size_t len, int flags)
+{
+	return transport->dgram_dequeue(kiocb, vsock_sk(sock->sk), msg, len,
+					flags);
+}
+
+static const struct proto_ops vsock_dgram_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_dgram_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = sock_no_setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = vsock_dgram_sendmsg,
+	.recvmsg = vsock_dgram_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static void vsock_connect_timeout(struct work_struct *work)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+
+	vsk = container_of(work, struct vsock_sock, dwork.work);
+	sk = sk_vsock(vsk);
+
+	lock_sock(sk);
+	if (sk->sk_state == SS_CONNECTING &&
+	    (sk->sk_shutdown != SHUTDOWN_MASK)) {
+		sk->sk_state = SS_UNCONNECTED;
+		sk->sk_err = ETIMEDOUT;
+		sk->sk_error_report(sk);
+	}
+	release_sock(sk);
+
+	sock_put(sk);
+}
+
+static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
+				int addr_len, int flags)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	struct sockaddr_vm *remote_addr;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	/* XXX AF_UNSPEC should make us disconnect like AF_INET. */
+	switch (sock->state) {
+	case SS_CONNECTED:
+		err = -EISCONN;
+		goto out;
+	case SS_DISCONNECTING:
+		err = -EINVAL;
+		goto out;
+	case SS_CONNECTING:
+		/* This continues on so we can move sock into the SS_CONNECTED
+		 * state once the connection has completed (at which point err
+		 * will be set to zero also).  Otherwise, we will either wait
+		 * for the connection or return -EALREADY should this be a
+		 * non-blocking call.
+		 */
+		err = -EALREADY;
+		break;
+	default:
+		if ((sk->sk_state == SS_LISTEN) ||
+		    vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* The hypervisor and well-known contexts do not have socket
+		 * endpoints.
+		 */
+		if (!transport->stream_allow(remote_addr->svm_cid,
+					     remote_addr->svm_port)) {
+			err = -ENETUNREACH;
+			goto out;
+		}
+
+		/* Set the remote address that we are connecting to. */
+		memcpy(&vsk->remote_addr, remote_addr,
+		       sizeof(vsk->remote_addr));
+
+		/* Autobind this socket to the local address if necessary. */
+		if (!vsock_addr_bound(&vsk->local_addr)) {
+			struct sockaddr_vm local_addr;
+
+			vsock_addr_init(&local_addr, VMADDR_CID_ANY,
+					VMADDR_PORT_ANY);
+			err = __vsock_bind(sk, &local_addr);
+			if (err != 0)
+				goto out;
+
+		}
+
+		sk->sk_state = SS_CONNECTING;
+
+		err = transport->connect(vsk);
+		if (err < 0)
+			goto out;
+
+		/* Mark sock as connecting and set the error code to in
+		 * progress in case this is a non-blocking connect.
+		 */
+		sock->state = SS_CONNECTING;
+		err = -EINPROGRESS;
+	}
+
+	/* The receive path will handle all communication until we are able to
+	 * enter the connected state.  Here we wait for the connection to be
+	 * completed or a notification of an error.
+	 */
+	timeout = vsk->connect_timeout;
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+		if (flags & O_NONBLOCK) {
+			/* If we're not going to block, we schedule a timeout
+			 * function to generate a timeout on the connection
+			 * attempt, in case the peer doesn't respond in a
+			 * timely manner. We hold on to the socket until the
+			 * timeout fires.
+			 */
+			sock_hold(sk);
+			INIT_DELAYED_WORK(&vsk->dwork,
+					  vsock_connect_timeout);
+			schedule_delayed_work(&vsk->dwork, timeout);
+
+			/* Skip ahead to preserve error code set above. */
+			goto out_wait;
+		}
+
+		release_sock(sk);
+		timeout = schedule_timeout(timeout);
+		lock_sock(sk);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			goto out_wait_error;
+		} else if (timeout == 0) {
+			err = -ETIMEDOUT;
+			goto out_wait_error;
+		}
+
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+	}
+
+	if (sk->sk_err) {
+		err = -sk->sk_err;
+		goto out_wait_error;
+	} else
+		err = 0;
+
+out_wait:
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+
+out_wait_error:
+	sk->sk_state = SS_UNCONNECTED;
+	sock->state = SS_UNCONNECTED;
+	goto out_wait;
+}
+
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+	struct sock *listener;
+	int err;
+	struct sock *connected;
+	struct vsock_sock *vconnected;
+	long timeout;
+	DEFINE_WAIT(wait);
+
+	err = 0;
+	listener = sock->sk;
+
+	lock_sock(listener);
+
+	if (sock->type != SOCK_STREAM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (listener->sk_state != SS_LISTEN) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Wait for children sockets to appear; these are the new sockets
+	 * created upon connection establishment.
+	 */
+	timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
+	prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+
+	while ((connected = vsock_dequeue_accept(listener)) == NULL &&
+	       listener->sk_err == 0) {
+		release_sock(listener);
+		timeout = schedule_timeout(timeout);
+		lock_sock(listener);
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeout);
+			goto out_wait;
+		} else if (timeout == 0) {
+			err = -EAGAIN;
+			goto out_wait;
+		}
+
+		prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+	}
+
+	if (listener->sk_err)
+		err = -listener->sk_err;
+
+	if (connected) {
+		listener->sk_ack_backlog--;
+
+		lock_sock(connected);
+		vconnected = vsock_sk(connected);
+
+		/* If the listener socket has received an error, then we should
+		 * reject this socket and return.  Note that we simply mark the
+		 * socket rejected, drop our reference, and let the cleanup
+		 * function handle the cleanup; the fact that we found it in
+		 * the listener's accept queue guarantees that the cleanup
+		 * function hasn't run yet.
+		 */
+		if (err) {
+			vconnected->rejected = true;
+			release_sock(connected);
+			sock_put(connected);
+			goto out_wait;
+		}
+
+		newsock->state = SS_CONNECTED;
+		sock_graft(connected, newsock);
+		release_sock(connected);
+		sock_put(connected);
+	}
+
+out_wait:
+	finish_wait(sk_sleep(listener), &wait);
+out:
+	release_sock(listener);
+	return err;
+}
+
+static int vsock_listen(struct socket *sock, int backlog)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+
+	sk = sock->sk;
+
+	lock_sock(sk);
+
+	if (sock->type != SOCK_STREAM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (sock->state != SS_UNCONNECTED) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_state = SS_LISTEN;
+
+	err = 0;
+
+out:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_stream_setsockopt(struct socket *sock,
+				   int level,
+				   int optname,
+				   char __user *optval,
+				   unsigned int optlen)
+{
+	int err;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	u64 val;
+
+	if (level != AF_VSOCK)
+		return -ENOPROTOOPT;
+
+#define COPY_IN(_v)                                       \
+	do {						  \
+		if (optlen < sizeof(_v)) {		  \
+			err = -EINVAL;			  \
+			goto exit;			  \
+		}					  \
+		if (copy_from_user(&_v, optval, sizeof(_v)) != 0) {	\
+			err = -EFAULT;					\
+			goto exit;					\
+		}							\
+	} while (0)
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case SO_VM_SOCKETS_BUFFER_SIZE:
+		COPY_IN(val);
+		transport->set_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
+		COPY_IN(val);
+		transport->set_max_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
+		COPY_IN(val);
+		transport->set_min_buffer_size(vsk, val);
+		break;
+
+	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
+		struct timeval tv;
+		COPY_IN(tv);
+		if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
+		    tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
+			vsk->connect_timeout = tv.tv_sec * HZ +
+			    DIV_ROUND_UP(tv.tv_usec, (1000000 / HZ));
+			if (vsk->connect_timeout == 0)
+				vsk->connect_timeout =
+				    VSOCK_DEFAULT_CONNECT_TIMEOUT;
+
+		} else {
+			err = -ERANGE;
+		}
+		break;
+	}
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+#undef COPY_IN
+
+exit:
+	release_sock(sk);
+	return err;
+}
+
+static int vsock_stream_getsockopt(struct socket *sock,
+				   int level, int optname,
+				   char __user *optval,
+				   int __user *optlen)
+{
+	int err;
+	int len;
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	u64 val;
+
+	if (level != AF_VSOCK)
+		return -ENOPROTOOPT;
+
+	err = get_user(len, optlen);
+	if (err != 0)
+		return err;
+
+#define COPY_OUT(_v)                            \
+	do {					\
+		if (len < sizeof(_v))		\
+			return -EINVAL;		\
+						\
+		len = sizeof(_v);		\
+		if (copy_to_user(optval, &_v, len) != 0)	\
+			return -EFAULT;				\
+								\
+	} while (0)
+
+	err = 0;
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+
+	switch (optname) {
+	case SO_VM_SOCKETS_BUFFER_SIZE:
+		val = transport->get_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
+		val = transport->get_max_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
+		val = transport->get_min_buffer_size(vsk);
+		COPY_OUT(val);
+		break;
+
+	case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
+		struct timeval tv;
+		tv.tv_sec = vsk->connect_timeout / HZ;
+		tv.tv_usec =
+		    (vsk->connect_timeout -
+		     tv.tv_sec * HZ) * (1000000 / HZ);
+		COPY_OUT(tv);
+		break;
+	}
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	err = put_user(len, optlen);
+	if (err != 0)
+		return -EFAULT;
+
+#undef COPY_OUT
+
+	return 0;
+}
+
+static int vsock_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				struct msghdr *msg, size_t len)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	ssize_t total_written;
+	long timeout;
+	int err;
+	struct vsock_transport_send_notify_data send_data;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	total_written = 0;
+	err = 0;
+
+	if (msg->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+
+	/* Callers should not provide a destination with stream sockets. */
+	if (msg->msg_namelen) {
+		err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* Send data only if both sides are not shutdown in the direction. */
+	if (sk->sk_shutdown & SEND_SHUTDOWN ||
+	    vsk->peer_shutdown & RCV_SHUTDOWN) {
+		err = -EPIPE;
+		goto out;
+	}
+
+	if (sk->sk_state != SS_CONNECTED ||
+	    !vsock_addr_bound(&vsk->local_addr)) {
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	if (!vsock_addr_bound(&vsk->remote_addr)) {
+		err = -EDESTADDRREQ;
+		goto out;
+	}
+
+	/* Wait for room in the produce queue to enqueue our user's data. */
+	timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	err = transport->notify_send_init(vsk, &send_data);
+	if (err < 0)
+		goto out;
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (total_written < len) {
+		ssize_t written;
+
+		while (vsock_stream_has_space(vsk) == 0 &&
+		       sk->sk_err == 0 &&
+		       !(sk->sk_shutdown & SEND_SHUTDOWN) &&
+		       !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
+
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				goto out_wait;
+			}
+
+			err = transport->notify_send_pre_block(vsk, &send_data);
+			if (err < 0)
+				goto out_wait;
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				goto out_wait;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				goto out_wait;
+			}
+
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+		}
+
+		/* These checks occur both as part of and after the loop
+		 * conditional since we need to check before and after
+		 * sleeping.
+		 */
+		if (sk->sk_err) {
+			err = -sk->sk_err;
+			goto out_wait;
+		} else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+			   (vsk->peer_shutdown & RCV_SHUTDOWN)) {
+			err = -EPIPE;
+			goto out_wait;
+		}
+
+		err = transport->notify_send_pre_enqueue(vsk, &send_data);
+		if (err < 0)
+			goto out_wait;
+
+		/* Note that enqueue will only write as many bytes as are free
+		 * in the produce queue, so we don't need to ensure len is
+		 * smaller than the queue size.  It is the caller's
+		 * responsibility to check how many bytes we were able to send.
+		 */
+
+		written = transport->stream_enqueue(
+				vsk, msg->msg_iov,
+				len - total_written);
+		if (written < 0) {
+			err = -ENOMEM;
+			goto out_wait;
+		}
+
+		total_written += written;
+
+		err = transport->notify_send_post_enqueue(
+				vsk, written, &send_data);
+		if (err < 0)
+			goto out_wait;
+
+	}
+
+out_wait:
+	if (total_written > 0)
+		err = total_written;
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+}
+
+
+static int
+vsock_stream_recvmsg(struct kiocb *kiocb,
+		     struct socket *sock,
+		     struct msghdr *msg, size_t len, int flags)
+{
+	struct sock *sk;
+	struct vsock_sock *vsk;
+	int err;
+	size_t target;
+	ssize_t copied;
+	long timeout;
+	struct vsock_transport_recv_notify_data recv_data;
+
+	DEFINE_WAIT(wait);
+
+	sk = sock->sk;
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != SS_CONNECTED) {
+		/* Recvmsg is supposed to return 0 if a peer performs an
+		 * orderly shutdown. Differentiate between that case and when a
+		 * peer has not connected or a local shutdown occured with the
+		 * SOCK_DONE flag.
+		 */
+		if (sock_flag(sk, SOCK_DONE))
+			err = 0;
+		else
+			err = -ENOTCONN;
+
+		goto out;
+	}
+
+	if (flags & MSG_OOB) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* We don't check peer_shutdown flag here since peer may actually shut
+	 * down, but there can be data in the queue that a local socket can
+	 * receive.
+	 */
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		err = 0;
+		goto out;
+	}
+
+	/* It is valid on Linux to pass in a zero-length receive buffer.  This
+	 * is not an error.  We may as well bail out now.
+	 */
+	if (!len) {
+		err = 0;
+		goto out;
+	}
+
+	/* We must not copy less than target bytes into the user's buffer
+	 * before returning successfully, so we wait for the consume queue to
+	 * have that much data to consume before dequeueing.  Note that this
+	 * makes it impossible to handle cases where target is greater than the
+	 * queue size.
+	 */
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+	if (target >= transport->stream_rcvhiwat(vsk)) {
+		err = -ENOMEM;
+		goto out;
+	}
+	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	copied = 0;
+
+	err = transport->notify_recv_init(vsk, target, &recv_data);
+	if (err < 0)
+		goto out;
+
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+	while (1) {
+		s64 ready = vsock_stream_has_data(vsk);
+
+		if (ready < 0) {
+			/* Invalid queue pair content. XXX This should be
+			 * changed to a connection reset in a later change.
+			 */
+
+			err = -ENOMEM;
+			goto out_wait;
+		} else if (ready > 0) {
+			ssize_t read;
+
+			err = transport->notify_recv_pre_dequeue(
+					vsk, target, &recv_data);
+			if (err < 0)
+				break;
+
+			read = transport->stream_dequeue(
+					vsk, msg->msg_iov,
+					len - copied, flags);
+			if (read < 0) {
+				err = -ENOMEM;
+				break;
+			}
+
+			copied += read;
+
+			err = transport->notify_recv_post_dequeue(
+					vsk, target, read,
+					!(flags & MSG_PEEK), &recv_data);
+			if (err < 0)
+				goto out_wait;
+
+			if (read >= target || flags & MSG_PEEK)
+				break;
+
+			target -= read;
+		} else {
+			if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
+			    || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+				break;
+			}
+			/* Don't wait for non-blocking sockets. */
+			if (timeout == 0) {
+				err = -EAGAIN;
+				break;
+			}
+
+			err = transport->notify_recv_pre_block(
+					vsk, target, &recv_data);
+			if (err < 0)
+				break;
+
+			release_sock(sk);
+			timeout = schedule_timeout(timeout);
+			lock_sock(sk);
+
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeout);
+				break;
+			} else if (timeout == 0) {
+				err = -EAGAIN;
+				break;
+			}
+
+			prepare_to_wait(sk_sleep(sk), &wait,
+					TASK_INTERRUPTIBLE);
+		}
+	}
+
+	if (sk->sk_err)
+		err = -sk->sk_err;
+	else if (sk->sk_shutdown & RCV_SHUTDOWN)
+		err = 0;
+
+	if (copied > 0) {
+		/* We only do these additional bookkeeping/notification steps
+		 * if we actually copied something out of the queue pair
+		 * instead of just peeking ahead.
+		 */
+
+		if (!(flags & MSG_PEEK)) {
+			/* If the other side has shutdown for sending and there
+			 * is nothing more to read, then modify the socket
+			 * state.
+			 */
+			if (vsk->peer_shutdown & SEND_SHUTDOWN) {
+				if (vsock_stream_has_data(vsk) <= 0) {
+					sk->sk_state = SS_UNCONNECTED;
+					sock_set_flag(sk, SOCK_DONE);
+					sk->sk_state_change(sk);
+				}
+			}
+		}
+		err = copied;
+	}
+
+out_wait:
+	finish_wait(sk_sleep(sk), &wait);
+out:
+	release_sock(sk);
+	return err;
+}
+
+static const struct proto_ops vsock_stream_ops = {
+	.family = PF_VSOCK,
+	.owner = THIS_MODULE,
+	.release = vsock_release,
+	.bind = vsock_bind,
+	.connect = vsock_stream_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = vsock_accept,
+	.getname = vsock_getname,
+	.poll = vsock_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = vsock_listen,
+	.shutdown = vsock_shutdown,
+	.setsockopt = vsock_stream_setsockopt,
+	.getsockopt = vsock_stream_getsockopt,
+	.sendmsg = vsock_stream_sendmsg,
+	.recvmsg = vsock_stream_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static int vsock_create(struct net *net, struct socket *sock,
+			int protocol, int kern)
+{
+	if (!sock)
+		return -EINVAL;
+
+	if (protocol)
+		return -EPROTONOSUPPORT;
+
+	switch (sock->type) {
+	case SOCK_DGRAM:
+		sock->ops = &vsock_dgram_ops;
+		break;
+	case SOCK_STREAM:
+		sock->ops = &vsock_stream_ops;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	sock->state = SS_UNCONNECTED;
+
+	return __vsock_create(net, sock, NULL, GFP_KERNEL, 0) ? 0 : -ENOMEM;
+}
+
+static const struct net_proto_family vsock_family_ops = {
+	.family = AF_VSOCK,
+	.create = vsock_create,
+	.owner = THIS_MODULE,
+};
+
+static long vsock_dev_do_ioctl(struct file *filp,
+			       unsigned int cmd, void __user *ptr)
+{
+	u32 __user *p = ptr;
+	int retval = 0;
+
+	switch (cmd) {
+	case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
+		if (put_user(transport->get_local_cid(), p) != 0)
+			retval = -EFAULT;
+		break;
+
+	default:
+		pr_err("Unknown ioctl %d\n", cmd);
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static long vsock_dev_ioctl(struct file *filp,
+			    unsigned int cmd, unsigned long arg)
+{
+	return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg);
+}
+
+#ifdef CONFIG_COMPAT
+static long vsock_dev_compat_ioctl(struct file *filp,
+				   unsigned int cmd, unsigned long arg)
+{
+	return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations vsock_device_ops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= vsock_dev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= vsock_dev_compat_ioctl,
+#endif
+	.open		= nonseekable_open,
+};
+
+static struct miscdevice vsock_device = {
+	.name		= "vsock",
+	.minor		= MISC_DYNAMIC_MINOR,
+	.fops		= &vsock_device_ops,
+};
+
+static int __vsock_core_init(void)
+{
+	int err;
+
+	vsock_init_tables();
+
+	err = misc_register(&vsock_device);
+	if (err) {
+		pr_err("Failed to register misc device\n");
+		return -ENOENT;
+	}
+
+	err = proto_register(&vsock_proto, 1);	/* we want our slab */
+	if (err) {
+		pr_err("Cannot register vsock protocol\n");
+		goto err_misc_deregister;
+	}
+
+	err = sock_register(&vsock_family_ops);
+	if (err) {
+		pr_err("could not register af_vsock (%d) address family: %d\n",
+		       AF_VSOCK, err);
+		goto err_unregister_proto;
+	}
+
+	return 0;
+
+err_unregister_proto:
+	proto_unregister(&vsock_proto);
+err_misc_deregister:
+	misc_deregister(&vsock_device);
+	return err;
+}
+
+int vsock_core_init(const struct vsock_transport *t)
+{
+	int retval = mutex_lock_interruptible(&vsock_register_mutex);
+	if (retval)
+		return retval;
+
+	if (transport) {
+		retval = -EBUSY;
+		goto out;
+	}
+
+	transport = t;
+	retval = __vsock_core_init();
+	if (retval)
+		transport = NULL;
+
+out:
+	mutex_unlock(&vsock_register_mutex);
+	return retval;
+}
+EXPORT_SYMBOL_GPL(vsock_core_init);
+
+void vsock_core_exit(void)
+{
+	mutex_lock(&vsock_register_mutex);
+
+	misc_deregister(&vsock_device);
+	sock_unregister(AF_VSOCK);
+	proto_unregister(&vsock_proto);
+
+	/* We do not want the assignment below re-ordered. */
+	mb();
+	transport = NULL;
+
+	mutex_unlock(&vsock_register_mutex);
+}
+EXPORT_SYMBOL_GPL(vsock_core_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMware Virtual Socket Family");
+MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING);
+MODULE_LICENSE("GPL v2");
diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h
new file mode 100644
index 000000000000..7d64d3609ec9
--- /dev/null
+++ b/net/vmw_vsock/af_vsock.h
@@ -0,0 +1,175 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __AF_VSOCK_H__
+#define __AF_VSOCK_H__
+
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/vm_sockets.h>
+
+#include "vsock_addr.h"
+
+#define LAST_RESERVED_PORT 1023
+
+#define vsock_sk(__sk)    ((struct vsock_sock *)__sk)
+#define sk_vsock(__vsk)   (&(__vsk)->sk)
+
+struct vsock_sock {
+	/* sk must be the first member. */
+	struct sock sk;
+	struct sockaddr_vm local_addr;
+	struct sockaddr_vm remote_addr;
+	/* Links for the global tables of bound and connected sockets. */
+	struct list_head bound_table;
+	struct list_head connected_table;
+	/* Accessed without the socket lock held. This means it can never be
+	 * modified outsided of socket create or destruct.
+	 */
+	bool trusted;
+	bool cached_peer_allow_dgram;	/* Dgram communication allowed to
+					 * cached peer?
+					 */
+	u32 cached_peer;  /* Context ID of last dgram destination check. */
+	const struct cred *owner;
+	/* Rest are SOCK_STREAM only. */
+	long connect_timeout;
+	/* Listening socket that this came from. */
+	struct sock *listener;
+	/* Used for pending list and accept queue during connection handshake.
+	 * The listening socket is the head for both lists.  Sockets created
+	 * for connection requests are placed in the pending list until they
+	 * are connected, at which point they are put in the accept queue list
+	 * so they can be accepted in accept().  If accept() cannot accept the
+	 * connection, it is marked as rejected so the cleanup function knows
+	 * to clean up the socket.
+	 */
+	struct list_head pending_links;
+	struct list_head accept_queue;
+	bool rejected;
+	struct delayed_work dwork;
+	u32 peer_shutdown;
+	bool sent_request;
+	bool ignore_connecting_rst;
+
+	/* Private to transport. */
+	void *trans;
+};
+
+s64 vsock_stream_has_data(struct vsock_sock *vsk);
+s64 vsock_stream_has_space(struct vsock_sock *vsk);
+void vsock_pending_work(struct work_struct *work);
+struct sock *__vsock_create(struct net *net,
+			    struct socket *sock,
+			    struct sock *parent,
+			    gfp_t priority, unsigned short type);
+
+/**** TRANSPORT ****/
+
+struct vsock_transport_recv_notify_data {
+	u64 data1; /* Transport-defined. */
+	u64 data2; /* Transport-defined. */
+	bool notify_on_block;
+};
+
+struct vsock_transport_send_notify_data {
+	u64 data1; /* Transport-defined. */
+	u64 data2; /* Transport-defined. */
+};
+
+struct vsock_transport {
+	/* Initialize/tear-down socket. */
+	int (*init)(struct vsock_sock *, struct vsock_sock *);
+	void (*destruct)(struct vsock_sock *);
+	void (*release)(struct vsock_sock *);
+
+	/* Connections. */
+	int (*connect)(struct vsock_sock *);
+
+	/* DGRAM. */
+	int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
+	int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk,
+			     struct msghdr *msg, size_t len, int flags);
+	int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
+			     struct iovec *, size_t len);
+	bool (*dgram_allow)(u32 cid, u32 port);
+
+	/* STREAM. */
+	/* TODO: stream_bind() */
+	ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *,
+				  size_t len, int flags);
+	ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *,
+				  size_t len);
+	s64 (*stream_has_data)(struct vsock_sock *);
+	s64 (*stream_has_space)(struct vsock_sock *);
+	u64 (*stream_rcvhiwat)(struct vsock_sock *);
+	bool (*stream_is_active)(struct vsock_sock *);
+	bool (*stream_allow)(u32 cid, u32 port);
+
+	/* Notification. */
+	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
+	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
+	int (*notify_recv_init)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
+		struct vsock_transport_recv_notify_data *);
+	int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
+		ssize_t, bool, struct vsock_transport_recv_notify_data *);
+	int (*notify_send_init)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_pre_block)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_pre_enqueue)(struct vsock_sock *,
+		struct vsock_transport_send_notify_data *);
+	int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
+		struct vsock_transport_send_notify_data *);
+
+	/* Shutdown. */
+	int (*shutdown)(struct vsock_sock *, int);
+
+	/* Buffer sizes. */
+	void (*set_buffer_size)(struct vsock_sock *, u64);
+	void (*set_min_buffer_size)(struct vsock_sock *, u64);
+	void (*set_max_buffer_size)(struct vsock_sock *, u64);
+	u64 (*get_buffer_size)(struct vsock_sock *);
+	u64 (*get_min_buffer_size)(struct vsock_sock *);
+	u64 (*get_max_buffer_size)(struct vsock_sock *);
+
+	/* Addressing. */
+	u32 (*get_local_cid)(void);
+};
+
+/**** CORE ****/
+
+int vsock_core_init(const struct vsock_transport *t);
+void vsock_core_exit(void);
+
+/**** UTILS ****/
+
+void vsock_release_pending(struct sock *pending);
+void vsock_add_pending(struct sock *listener, struct sock *pending);
+void vsock_remove_pending(struct sock *listener, struct sock *pending);
+void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
+void vsock_insert_connected(struct vsock_sock *vsk);
+void vsock_remove_bound(struct vsock_sock *vsk);
+void vsock_remove_connected(struct vsock_sock *vsk);
+struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
+struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
+					 struct sockaddr_vm *dst);
+void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+
+#endif /* __AF_VSOCK_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
new file mode 100644
index 000000000000..e8a87cf37072
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport.c
@@ -0,0 +1,2157 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+
+#define EXPORT_SYMTAB
+#include <linux/bitops.h>
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "af_vsock.h"
+#include "vmci_transport_notify.h"
+
+static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
+static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
+static void vmci_transport_peer_attach_cb(u32 sub_id,
+					  const struct vmci_event_data *ed,
+					  void *client_data);
+static void vmci_transport_peer_detach_cb(u32 sub_id,
+					  const struct vmci_event_data *ed,
+					  void *client_data);
+static void vmci_transport_recv_pkt_work(struct work_struct *work);
+static int vmci_transport_recv_listen(struct sock *sk,
+				      struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_server(
+					struct sock *sk,
+					struct sock *pending,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client_negotiate(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connecting_client_invalid(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt);
+static int vmci_transport_recv_connected(struct sock *sk,
+					 struct vmci_transport_packet *pkt);
+static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
+static u16 vmci_transport_new_proto_supported_versions(void);
+static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
+						  bool old_pkt_proto);
+
+struct vmci_transport_recv_pkt_info {
+	struct work_struct work;
+	struct sock *sk;
+	struct vmci_transport_packet pkt;
+};
+
+static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
+							   VMCI_INVALID_ID };
+static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+
+static int PROTOCOL_OVERRIDE = -1;
+
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN   128
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE       262144
+#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX   262144
+
+/* The default peer timeout indicates how long we will wait for a peer response
+ * to a control message.
+ */
+#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+
+#define SS_LISTEN 255
+
+/* Helper function to convert from a VMCI error code to a VSock error code. */
+
+static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
+{
+	int err;
+
+	switch (vmci_error) {
+	case VMCI_ERROR_NO_MEM:
+		err = ENOMEM;
+		break;
+	case VMCI_ERROR_DUPLICATE_ENTRY:
+	case VMCI_ERROR_ALREADY_EXISTS:
+		err = EADDRINUSE;
+		break;
+	case VMCI_ERROR_NO_ACCESS:
+		err = EPERM;
+		break;
+	case VMCI_ERROR_NO_RESOURCES:
+		err = ENOBUFS;
+		break;
+	case VMCI_ERROR_INVALID_RESOURCE:
+		err = EHOSTUNREACH;
+		break;
+	case VMCI_ERROR_INVALID_ARGS:
+	default:
+		err = EINVAL;
+	}
+
+	return err > 0 ? -err : err;
+}
+
+static inline void
+vmci_transport_packet_init(struct vmci_transport_packet *pkt,
+			   struct sockaddr_vm *src,
+			   struct sockaddr_vm *dst,
+			   u8 type,
+			   u64 size,
+			   u64 mode,
+			   struct vmci_transport_waiting_info *wait,
+			   u16 proto,
+			   struct vmci_handle handle)
+{
+	/* We register the stream control handler as an any cid handle so we
+	 * must always send from a source address of VMADDR_CID_ANY
+	 */
+	pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
+				       VMCI_TRANSPORT_PACKET_RID);
+	pkt->dg.dst = vmci_make_handle(dst->svm_cid,
+				       VMCI_TRANSPORT_PACKET_RID);
+	pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
+	pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
+	pkt->type = type;
+	pkt->src_port = src->svm_port;
+	pkt->dst_port = dst->svm_port;
+	memset(&pkt->proto, 0, sizeof(pkt->proto));
+	memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
+		pkt->u.size = 0;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
+		pkt->u.size = size;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
+	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
+		pkt->u.handle = handle;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		pkt->u.size = 0;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
+		pkt->u.mode = mode;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
+		memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
+		pkt->u.size = size;
+		pkt->proto = proto;
+		break;
+	}
+}
+
+static inline void
+vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
+				    struct sockaddr_vm *local,
+				    struct sockaddr_vm *remote)
+{
+	vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
+	vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
+}
+
+static int
+__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
+				  struct sockaddr_vm *src,
+				  struct sockaddr_vm *dst,
+				  enum vmci_transport_packet_type type,
+				  u64 size,
+				  u64 mode,
+				  struct vmci_transport_waiting_info *wait,
+				  u16 proto,
+				  struct vmci_handle handle,
+				  bool convert_error)
+{
+	int err;
+
+	vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
+				   proto, handle);
+	err = vmci_datagram_send(&pkt->dg);
+	if (convert_error && (err < 0))
+		return vmci_transport_error_to_vsock_error(err);
+
+	return err;
+}
+
+static int
+vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
+				      enum vmci_transport_packet_type type,
+				      u64 size,
+				      u64 mode,
+				      struct vmci_transport_waiting_info *wait,
+				      struct vmci_handle handle)
+{
+	struct vmci_transport_packet reply;
+	struct sockaddr_vm src, dst;
+
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
+		return 0;
+	} else {
+		vmci_transport_packet_get_addresses(pkt, &src, &dst);
+		return __vmci_transport_send_control_pkt(&reply, &src, &dst,
+							 type,
+							 size, mode, wait,
+							 VSOCK_PROTO_INVALID,
+							 handle, true);
+	}
+}
+
+static int
+vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
+				   struct sockaddr_vm *dst,
+				   enum vmci_transport_packet_type type,
+				   u64 size,
+				   u64 mode,
+				   struct vmci_transport_waiting_info *wait,
+				   struct vmci_handle handle)
+{
+	/* Note that it is safe to use a single packet across all CPUs since
+	 * two tasklets of the same type are guaranteed to not ever run
+	 * simultaneously. If that ever changes, or VMCI stops using tasklets,
+	 * we can use per-cpu packets.
+	 */
+	static struct vmci_transport_packet pkt;
+
+	return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
+						 size, mode, wait,
+						 VSOCK_PROTO_INVALID, handle,
+						 false);
+}
+
+static int
+vmci_transport_send_control_pkt(struct sock *sk,
+				enum vmci_transport_packet_type type,
+				u64 size,
+				u64 mode,
+				struct vmci_transport_waiting_info *wait,
+				u16 proto,
+				struct vmci_handle handle)
+{
+	struct vmci_transport_packet *pkt;
+	struct vsock_sock *vsk;
+	int err;
+
+	vsk = vsock_sk(sk);
+
+	if (!vsock_addr_bound(&vsk->local_addr))
+		return -EINVAL;
+
+	if (!vsock_addr_bound(&vsk->remote_addr))
+		return -EINVAL;
+
+	pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
+						&vsk->remote_addr, type, size,
+						mode, wait, proto, handle,
+						true);
+	kfree(pkt);
+
+	return err;
+}
+
+static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
+					struct sockaddr_vm *src,
+					struct vmci_transport_packet *pkt)
+{
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
+		return 0;
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_reset(struct sock *sk,
+				     struct vmci_transport_packet *pkt)
+{
+	if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
+		return 0;
+	return vmci_transport_send_control_pkt(sk,
+					VMCI_TRANSPORT_PACKET_TYPE_RST,
+					0, 0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
+{
+	return vmci_transport_send_control_pkt(
+					sk,
+					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
+					size, 0, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
+					  u16 version)
+{
+	return vmci_transport_send_control_pkt(
+					sk,
+					VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
+					size, 0, NULL, version,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_qp_offer(struct sock *sk,
+					struct vmci_handle handle)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
+					0, NULL,
+					VSOCK_PROTO_INVALID, handle);
+}
+
+static int vmci_transport_send_attach(struct sock *sk,
+				      struct vmci_handle handle)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
+					0, 0, NULL, VSOCK_PROTO_INVALID,
+					handle);
+}
+
+static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
+{
+	return vmci_transport_reply_control_pkt_fast(
+						pkt,
+						VMCI_TRANSPORT_PACKET_TYPE_RST,
+						0, 0, NULL,
+						VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
+					  struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_INVALID,
+					0, 0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
+				 struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
+				struct sockaddr_vm *src)
+{
+	return vmci_transport_send_control_pkt_bh(
+					dst, src,
+					VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
+					0, NULL, VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_wrote(struct sock *sk)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
+					0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_read(struct sock *sk)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
+					0, NULL, VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_waiting_write(struct sock *sk,
+				      struct vmci_transport_waiting_info *wait)
+{
+	return vmci_transport_send_control_pkt(
+				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
+				0, 0, wait, VSOCK_PROTO_INVALID,
+				VMCI_INVALID_HANDLE);
+}
+
+int vmci_transport_send_waiting_read(struct sock *sk,
+				     struct vmci_transport_waiting_info *wait)
+{
+	return vmci_transport_send_control_pkt(
+				sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
+				0, 0, wait, VSOCK_PROTO_INVALID,
+				VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
+{
+	return vmci_transport_send_control_pkt(
+					&vsk->sk,
+					VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
+					0, mode, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
+{
+	return vmci_transport_send_control_pkt(sk,
+					VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
+					size, 0, NULL,
+					VSOCK_PROTO_INVALID,
+					VMCI_INVALID_HANDLE);
+}
+
+static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
+					     u16 version)
+{
+	return vmci_transport_send_control_pkt(
+					sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
+					size, 0, NULL, version,
+					VMCI_INVALID_HANDLE);
+}
+
+static struct sock *vmci_transport_get_pending(
+					struct sock *listener,
+					struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vlistener;
+	struct vsock_sock *vpending;
+	struct sock *pending;
+
+	vlistener = vsock_sk(listener);
+
+	list_for_each_entry(vpending, &vlistener->pending_links,
+			    pending_links) {
+		struct sockaddr_vm src;
+		struct sockaddr_vm dst;
+
+		vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
+		vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
+
+		if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
+		    vsock_addr_equals_addr(&dst, &vpending->local_addr)) {
+			pending = sk_vsock(vpending);
+			sock_hold(pending);
+			goto found;
+		}
+	}
+
+	pending = NULL;
+found:
+	return pending;
+
+}
+
+static void vmci_transport_release_pending(struct sock *pending)
+{
+	sock_put(pending);
+}
+
+/* We allow two kinds of sockets to communicate with a restricted VM: 1)
+ * trusted sockets 2) sockets from applications running as the same user as the
+ * VM (this is only true for the host side and only when using hosted products)
+ */
+
+static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
+{
+	return vsock->trusted ||
+	       vmci_is_context_owner(peer_cid, vsock->owner->uid);
+}
+
+/* We allow sending datagrams to and receiving datagrams from a restricted VM
+ * only if it is trusted as described in vmci_transport_is_trusted.
+ */
+
+static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
+{
+	if (vsock->cached_peer != peer_cid) {
+		vsock->cached_peer = peer_cid;
+		if (!vmci_transport_is_trusted(vsock, peer_cid) &&
+		    (vmci_context_get_priv_flags(peer_cid) &
+		     VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
+			vsock->cached_peer_allow_dgram = false;
+		} else {
+			vsock->cached_peer_allow_dgram = true;
+		}
+	}
+
+	return vsock->cached_peer_allow_dgram;
+}
+
+static int
+vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
+				struct vmci_handle *handle,
+				u64 produce_size,
+				u64 consume_size,
+				u32 peer, u32 flags, bool trusted)
+{
+	int err = 0;
+
+	if (trusted) {
+		/* Try to allocate our queue pair as trusted. This will only
+		 * work if vsock is running in the host.
+		 */
+
+		err = vmci_qpair_alloc(qpair, handle, produce_size,
+				       consume_size,
+				       peer, flags,
+				       VMCI_PRIVILEGE_FLAG_TRUSTED);
+		if (err != VMCI_ERROR_NO_ACCESS)
+			goto out;
+
+	}
+
+	err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
+			       peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
+out:
+	if (err < 0) {
+		pr_err("Could not attach to queue pair with %d\n",
+		       err);
+		err = vmci_transport_error_to_vsock_error(err);
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_datagram_create_hnd(u32 resource_id,
+				   u32 flags,
+				   vmci_datagram_recv_cb recv_cb,
+				   void *client_data,
+				   struct vmci_handle *out_handle)
+{
+	int err = 0;
+
+	/* Try to allocate our datagram handler as trusted. This will only work
+	 * if vsock is running in the host.
+	 */
+
+	err = vmci_datagram_create_handle_priv(resource_id, flags,
+					       VMCI_PRIVILEGE_FLAG_TRUSTED,
+					       recv_cb,
+					       client_data, out_handle);
+
+	if (err == VMCI_ERROR_NO_ACCESS)
+		err = vmci_datagram_create_handle(resource_id, flags,
+						  recv_cb, client_data,
+						  out_handle);
+
+	return err;
+}
+
+/* This is invoked as part of a tasklet that's scheduled when the VMCI
+ * interrupt fires.  This is run in bottom-half context and if it ever needs to
+ * sleep it should defer that work to a work queue.
+ */
+
+static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
+{
+	struct sock *sk;
+	size_t size;
+	struct sk_buff *skb;
+	struct vsock_sock *vsk;
+
+	sk = (struct sock *)data;
+
+	/* This handler is privileged when this module is running on the host.
+	 * We will get datagrams from all endpoints (even VMs that are in a
+	 * restricted context). If we get one from a restricted context then
+	 * the destination socket must be trusted.
+	 *
+	 * NOTE: We access the socket struct without holding the lock here.
+	 * This is ok because the field we are interested is never modified
+	 * outside of the create and destruct socket functions.
+	 */
+	vsk = vsock_sk(sk);
+	if (!vmci_transport_allow_dgram(vsk, dg->src.context))
+		return VMCI_ERROR_NO_ACCESS;
+
+	size = VMCI_DG_SIZE(dg);
+
+	/* Attach the packet to the socket's receive queue as an sk_buff. */
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (skb) {
+		/* sk_receive_skb() will do a sock_put(), so hold here. */
+		sock_hold(sk);
+		skb_put(skb, size);
+		memcpy(skb->data, dg, size);
+		sk_receive_skb(sk, skb, 0);
+	}
+
+	return VMCI_SUCCESS;
+}
+
+static bool vmci_transport_stream_allow(u32 cid, u32 port)
+{
+	static const u32 non_socket_contexts[] = {
+		VMADDR_CID_HYPERVISOR,
+		VMADDR_CID_RESERVED,
+	};
+	int i;
+
+	BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
+
+	for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
+		if (cid == non_socket_contexts[i])
+			return false;
+	}
+
+	return true;
+}
+
+/* This is invoked as part of a tasklet that's scheduled when the VMCI
+ * interrupt fires.  This is run in bottom-half context but it defers most of
+ * its work to the packet handling work queue.
+ */
+
+static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
+{
+	struct sock *sk;
+	struct sockaddr_vm dst;
+	struct sockaddr_vm src;
+	struct vmci_transport_packet *pkt;
+	struct vsock_sock *vsk;
+	bool bh_process_pkt;
+	int err;
+
+	sk = NULL;
+	err = VMCI_SUCCESS;
+	bh_process_pkt = false;
+
+	/* Ignore incoming packets from contexts without sockets, or resources
+	 * that aren't vsock implementations.
+	 */
+
+	if (!vmci_transport_stream_allow(dg->src.context, -1)
+	    || VMCI_TRANSPORT_PACKET_RID != dg->src.resource)
+		return VMCI_ERROR_NO_ACCESS;
+
+	if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
+		/* Drop datagrams that do not contain full VSock packets. */
+		return VMCI_ERROR_INVALID_ARGS;
+
+	pkt = (struct vmci_transport_packet *)dg;
+
+	/* Find the socket that should handle this packet.  First we look for a
+	 * connected socket and if there is none we look for a socket bound to
+	 * the destintation address.
+	 */
+	vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
+	vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
+
+	sk = vsock_find_connected_socket(&src, &dst);
+	if (!sk) {
+		sk = vsock_find_bound_socket(&dst);
+		if (!sk) {
+			/* We could not find a socket for this specified
+			 * address.  If this packet is a RST, we just drop it.
+			 * If it is another packet, we send a RST.  Note that
+			 * we do not send a RST reply to RSTs so that we do not
+			 * continually send RSTs between two endpoints.
+			 *
+			 * Note that since this is a reply, dst is src and src
+			 * is dst.
+			 */
+			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
+				pr_err("unable to send reset\n");
+
+			err = VMCI_ERROR_NOT_FOUND;
+			goto out;
+		}
+	}
+
+	/* If the received packet type is beyond all types known to this
+	 * implementation, reply with an invalid message.  Hopefully this will
+	 * help when implementing backwards compatibility in the future.
+	 */
+	if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
+		vmci_transport_send_invalid_bh(&dst, &src);
+		err = VMCI_ERROR_INVALID_ARGS;
+		goto out;
+	}
+
+	/* This handler is privileged when this module is running on the host.
+	 * We will get datagram connect requests from all endpoints (even VMs
+	 * that are in a restricted context). If we get one from a restricted
+	 * context then the destination socket must be trusted.
+	 *
+	 * NOTE: We access the socket struct without holding the lock here.
+	 * This is ok because the field we are interested is never modified
+	 * outside of the create and destruct socket functions.
+	 */
+	vsk = vsock_sk(sk);
+	if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
+		err = VMCI_ERROR_NO_ACCESS;
+		goto out;
+	}
+
+	/* We do most everything in a work queue, but let's fast path the
+	 * notification of reads and writes to help data transfer performance.
+	 * We can only do this if there is no process context code executing
+	 * for this socket since that may change the state.
+	 */
+	bh_lock_sock(sk);
+
+	if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED)
+		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+				sk, pkt, true, &dst, &src,
+				&bh_process_pkt);
+
+	bh_unlock_sock(sk);
+
+	if (!bh_process_pkt) {
+		struct vmci_transport_recv_pkt_info *recv_pkt_info;
+
+		recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
+		if (!recv_pkt_info) {
+			if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
+				pr_err("unable to send reset\n");
+
+			err = VMCI_ERROR_NO_MEM;
+			goto out;
+		}
+
+		recv_pkt_info->sk = sk;
+		memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
+		INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
+
+		schedule_work(&recv_pkt_info->work);
+		/* Clear sk so that the reference count incremented by one of
+		 * the Find functions above is not decremented below.  We need
+		 * that reference count for the packet handler we've scheduled
+		 * to run.
+		 */
+		sk = NULL;
+	}
+
+out:
+	if (sk)
+		sock_put(sk);
+
+	return err;
+}
+
+static void vmci_transport_peer_attach_cb(u32 sub_id,
+					  const struct vmci_event_data *e_data,
+					  void *client_data)
+{
+	struct sock *sk = client_data;
+	const struct vmci_event_payload_qp *e_payload;
+	struct vsock_sock *vsk;
+
+	e_payload = vmci_event_data_const_payload(e_data);
+
+	vsk = vsock_sk(sk);
+
+	/* We don't ask for delayed CBs when we subscribe to this event (we
+	 * pass 0 as flags to vmci_event_subscribe()).  VMCI makes no
+	 * guarantees in that case about what context we might be running in,
+	 * so it could be BH or process, blockable or non-blockable.  So we
+	 * need to account for all possible contexts here.
+	 */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	/* XXX This is lame, we should provide a way to lookup sockets by
+	 * qp_handle.
+	 */
+	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
+				 e_payload->handle)) {
+		/* XXX This doesn't do anything, but in the future we may want
+		 * to set a flag here to verify the attach really did occur and
+		 * we weren't just sent a datagram claiming it was.
+		 */
+		goto out;
+	}
+
+out:
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void vmci_transport_handle_detach(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
+		sock_set_flag(sk, SOCK_DONE);
+
+		/* On a detach the peer will not be sending or receiving
+		 * anymore.
+		 */
+		vsk->peer_shutdown = SHUTDOWN_MASK;
+
+		/* We should not be sending anymore since the peer won't be
+		 * there to receive, but we can still receive if there is data
+		 * left in our consume queue.
+		 */
+		if (vsock_stream_has_data(vsk) <= 0) {
+			if (sk->sk_state == SS_CONNECTING) {
+				/* The peer may detach from a queue pair while
+				 * we are still in the connecting state, i.e.,
+				 * if the peer VM is killed after attaching to
+				 * a queue pair, but before we complete the
+				 * handshake. In that case, we treat the detach
+				 * event like a reset.
+				 */
+
+				sk->sk_state = SS_UNCONNECTED;
+				sk->sk_err = ECONNRESET;
+				sk->sk_error_report(sk);
+				return;
+			}
+			sk->sk_state = SS_UNCONNECTED;
+		}
+		sk->sk_state_change(sk);
+	}
+}
+
+static void vmci_transport_peer_detach_cb(u32 sub_id,
+					  const struct vmci_event_data *e_data,
+					  void *client_data)
+{
+	struct sock *sk = client_data;
+	const struct vmci_event_payload_qp *e_payload;
+	struct vsock_sock *vsk;
+
+	e_payload = vmci_event_data_const_payload(e_data);
+	vsk = vsock_sk(sk);
+	if (vmci_handle_is_invalid(e_payload->handle))
+		return;
+
+	/* Same rules for locking as for peer_attach_cb(). */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	/* XXX This is lame, we should provide a way to lookup sockets by
+	 * qp_handle.
+	 */
+	if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
+				 e_payload->handle))
+		vmci_transport_handle_detach(sk);
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+}
+
+static void vmci_transport_qp_resumed_cb(u32 sub_id,
+					 const struct vmci_event_data *e_data,
+					 void *client_data)
+{
+	vsock_for_each_connected_socket(vmci_transport_handle_detach);
+}
+
+static void vmci_transport_recv_pkt_work(struct work_struct *work)
+{
+	struct vmci_transport_recv_pkt_info *recv_pkt_info;
+	struct vmci_transport_packet *pkt;
+	struct sock *sk;
+
+	recv_pkt_info =
+		container_of(work, struct vmci_transport_recv_pkt_info, work);
+	sk = recv_pkt_info->sk;
+	pkt = &recv_pkt_info->pkt;
+
+	lock_sock(sk);
+
+	switch (sk->sk_state) {
+	case SS_LISTEN:
+		vmci_transport_recv_listen(sk, pkt);
+		break;
+	case SS_CONNECTING:
+		/* Processing of pending connections for servers goes through
+		 * the listening socket, so see vmci_transport_recv_listen()
+		 * for that path.
+		 */
+		vmci_transport_recv_connecting_client(sk, pkt);
+		break;
+	case SS_CONNECTED:
+		vmci_transport_recv_connected(sk, pkt);
+		break;
+	default:
+		/* Because this function does not run in the same context as
+		 * vmci_transport_recv_stream_cb it is possible that the
+		 * socket has closed. We need to let the other side know or it
+		 * could be sitting in a connect and hang forever. Send a
+		 * reset to prevent that.
+		 */
+		vmci_transport_send_reset(sk, pkt);
+		goto out;
+	}
+
+out:
+	release_sock(sk);
+	kfree(recv_pkt_info);
+	/* Release reference obtained in the stream callback when we fetched
+	 * this socket out of the bound or connected list.
+	 */
+	sock_put(sk);
+}
+
+static int vmci_transport_recv_listen(struct sock *sk,
+				      struct vmci_transport_packet *pkt)
+{
+	struct sock *pending;
+	struct vsock_sock *vpending;
+	int err;
+	u64 qp_size;
+	bool old_request = false;
+	bool old_pkt_proto = false;
+
+	err = 0;
+
+	/* Because we are in the listen state, we could be receiving a packet
+	 * for ourself or any previous connection requests that we received.
+	 * If it's the latter, we try to find a socket in our list of pending
+	 * connections and, if we do, call the appropriate handler for the
+	 * state that that socket is in.  Otherwise we try to service the
+	 * connection request.
+	 */
+	pending = vmci_transport_get_pending(sk, pkt);
+	if (pending) {
+		lock_sock(pending);
+		switch (pending->sk_state) {
+		case SS_CONNECTING:
+			err = vmci_transport_recv_connecting_server(sk,
+								    pending,
+								    pkt);
+			break;
+		default:
+			vmci_transport_send_reset(pending, pkt);
+			err = -EINVAL;
+		}
+
+		if (err < 0)
+			vsock_remove_pending(sk, pending);
+
+		release_sock(pending);
+		vmci_transport_release_pending(pending);
+
+		return err;
+	}
+
+	/* The listen state only accepts connection requests.  Reply with a
+	 * reset unless we received a reset.
+	 */
+
+	if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
+	      pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
+		vmci_transport_reply_reset(pkt);
+		return -EINVAL;
+	}
+
+	if (pkt->u.size == 0) {
+		vmci_transport_reply_reset(pkt);
+		return -EINVAL;
+	}
+
+	/* If this socket can't accommodate this connection request, we send a
+	 * reset.  Otherwise we create and initialize a child socket and reply
+	 * with a connection negotiation.
+	 */
+	if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
+		vmci_transport_reply_reset(pkt);
+		return -ECONNREFUSED;
+	}
+
+	pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
+				 sk->sk_type);
+	if (!pending) {
+		vmci_transport_send_reset(sk, pkt);
+		return -ENOMEM;
+	}
+
+	vpending = vsock_sk(pending);
+
+	vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
+			pkt->dst_port);
+	vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
+			pkt->src_port);
+
+	/* If the proposed size fits within our min/max, accept it. Otherwise
+	 * propose our own size.
+	 */
+	if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
+	    pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
+		qp_size = pkt->u.size;
+	} else {
+		qp_size = vmci_trans(vpending)->queue_pair_size;
+	}
+
+	/* Figure out if we are using old or new requests based on the
+	 * overrides pkt types sent by our peer.
+	 */
+	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
+		old_request = old_pkt_proto;
+	} else {
+		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
+			old_request = true;
+		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
+			old_request = false;
+
+	}
+
+	if (old_request) {
+		/* Handle a REQUEST (or override) */
+		u16 version = VSOCK_PROTO_INVALID;
+		if (vmci_transport_proto_to_notify_struct(
+			pending, &version, true))
+			err = vmci_transport_send_negotiate(pending, qp_size);
+		else
+			err = -EINVAL;
+
+	} else {
+		/* Handle a REQUEST2 (or override) */
+		int proto_int = pkt->proto;
+		int pos;
+		u16 active_proto_version = 0;
+
+		/* The list of possible protocols is the intersection of all
+		 * protocols the client supports ... plus all the protocols we
+		 * support.
+		 */
+		proto_int &= vmci_transport_new_proto_supported_versions();
+
+		/* We choose the highest possible protocol version and use that
+		 * one.
+		 */
+		pos = fls(proto_int);
+		if (pos) {
+			active_proto_version = (1 << (pos - 1));
+			if (vmci_transport_proto_to_notify_struct(
+				pending, &active_proto_version, false))
+				err = vmci_transport_send_negotiate2(pending,
+							qp_size,
+							active_proto_version);
+			else
+				err = -EINVAL;
+
+		} else {
+			err = -EINVAL;
+		}
+	}
+
+	if (err < 0) {
+		vmci_transport_send_reset(sk, pkt);
+		sock_put(pending);
+		err = vmci_transport_error_to_vsock_error(err);
+		goto out;
+	}
+
+	vsock_add_pending(sk, pending);
+	sk->sk_ack_backlog++;
+
+	pending->sk_state = SS_CONNECTING;
+	vmci_trans(vpending)->produce_size =
+		vmci_trans(vpending)->consume_size = qp_size;
+	vmci_trans(vpending)->queue_pair_size = qp_size;
+
+	vmci_trans(vpending)->notify_ops->process_request(pending);
+
+	/* We might never receive another message for this socket and it's not
+	 * connected to any process, so we have to ensure it gets cleaned up
+	 * ourself.  Our delayed work function will take care of that.  Note
+	 * that we do not ever cancel this function since we have few
+	 * guarantees about its state when calling cancel_delayed_work().
+	 * Instead we hold a reference on the socket for that function and make
+	 * it capable of handling cases where it needs to do nothing but
+	 * release that reference.
+	 */
+	vpending->listener = sk;
+	sock_hold(sk);
+	sock_hold(pending);
+	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
+	schedule_delayed_work(&vpending->dwork, HZ);
+
+out:
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_server(struct sock *listener,
+				      struct sock *pending,
+				      struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vpending;
+	struct vmci_handle handle;
+	struct vmci_qp *qpair;
+	bool is_local;
+	u32 flags;
+	u32 detach_sub_id;
+	int err;
+	int skerr;
+
+	vpending = vsock_sk(pending);
+	detach_sub_id = VMCI_INVALID_ID;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
+		if (vmci_handle_is_invalid(pkt->u.handle)) {
+			vmci_transport_send_reset(pending, pkt);
+			skerr = EPROTO;
+			err = -EINVAL;
+			goto destroy;
+		}
+		break;
+	default:
+		/* Close and cleanup the connection. */
+		vmci_transport_send_reset(pending, pkt);
+		skerr = EPROTO;
+		err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
+		goto destroy;
+	}
+
+	/* In order to complete the connection we need to attach to the offered
+	 * queue pair and send an attach notification.  We also subscribe to the
+	 * detach event so we know when our peer goes away, and we do that
+	 * before attaching so we don't miss an event.  If all this succeeds,
+	 * we update our state and wakeup anything waiting in accept() for a
+	 * connection.
+	 */
+
+	/* We don't care about attach since we ensure the other side has
+	 * attached by specifying the ATTACH_ONLY flag below.
+	 */
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
+				   vmci_transport_peer_detach_cb,
+				   pending, &detach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		vmci_transport_send_reset(pending, pkt);
+		err = vmci_transport_error_to_vsock_error(err);
+		skerr = -err;
+		goto destroy;
+	}
+
+	vmci_trans(vpending)->detach_sub_id = detach_sub_id;
+
+	/* Now attach to the queue pair the client created. */
+	handle = pkt->u.handle;
+
+	/* vpending->local_addr always has a context id so we do not need to
+	 * worry about VMADDR_CID_ANY in this case.
+	 */
+	is_local =
+	    vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
+	flags = VMCI_QPFLAG_ATTACH_ONLY;
+	flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
+
+	err = vmci_transport_queue_pair_alloc(
+					&qpair,
+					&handle,
+					vmci_trans(vpending)->produce_size,
+					vmci_trans(vpending)->consume_size,
+					pkt->dg.src.context,
+					flags,
+					vmci_transport_is_trusted(
+						vpending,
+						vpending->remote_addr.svm_cid));
+	if (err < 0) {
+		vmci_transport_send_reset(pending, pkt);
+		skerr = -err;
+		goto destroy;
+	}
+
+	vmci_trans(vpending)->qp_handle = handle;
+	vmci_trans(vpending)->qpair = qpair;
+
+	/* When we send the attach message, we must be ready to handle incoming
+	 * control messages on the newly connected socket. So we move the
+	 * pending socket to the connected state before sending the attach
+	 * message. Otherwise, an incoming packet triggered by the attach being
+	 * received by the peer may be processed concurrently with what happens
+	 * below after sending the attach message, and that incoming packet
+	 * will find the listening socket instead of the (currently) pending
+	 * socket. Note that enqueueing the socket increments the reference
+	 * count, so even if a reset comes before the connection is accepted,
+	 * the socket will be valid until it is removed from the queue.
+	 *
+	 * If we fail sending the attach below, we remove the socket from the
+	 * connected list and move the socket to SS_UNCONNECTED before
+	 * releasing the lock, so a pending slow path processing of an incoming
+	 * packet will not see the socket in the connected state in that case.
+	 */
+	pending->sk_state = SS_CONNECTED;
+
+	vsock_insert_connected(vpending);
+
+	/* Notify our peer of our attach. */
+	err = vmci_transport_send_attach(pending, handle);
+	if (err < 0) {
+		vsock_remove_connected(vpending);
+		pr_err("Could not send attach\n");
+		vmci_transport_send_reset(pending, pkt);
+		err = vmci_transport_error_to_vsock_error(err);
+		skerr = -err;
+		goto destroy;
+	}
+
+	/* We have a connection. Move the now connected socket from the
+	 * listener's pending list to the accept queue so callers of accept()
+	 * can find it.
+	 */
+	vsock_remove_pending(listener, pending);
+	vsock_enqueue_accept(listener, pending);
+
+	/* Callers of accept() will be be waiting on the listening socket, not
+	 * the pending socket.
+	 */
+	listener->sk_state_change(listener);
+
+	return 0;
+
+destroy:
+	pending->sk_err = skerr;
+	pending->sk_state = SS_UNCONNECTED;
+	/* As long as we drop our reference, all necessary cleanup will handle
+	 * when the cleanup function drops its reference and our destruct
+	 * implementation is called.  Note that since the listen handler will
+	 * remove pending from the pending list upon our failure, the cleanup
+	 * function won't drop the additional reference, which is why we do it
+	 * here.
+	 */
+	sock_put(pending);
+
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_client(struct sock *sk,
+				      struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vsk;
+	int err;
+	int skerr;
+
+	vsk = vsock_sk(sk);
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
+		if (vmci_handle_is_invalid(pkt->u.handle) ||
+		    !vmci_handle_is_equal(pkt->u.handle,
+					  vmci_trans(vsk)->qp_handle)) {
+			skerr = EPROTO;
+			err = -EINVAL;
+			goto destroy;
+		}
+
+		/* Signify the socket is connected and wakeup the waiter in
+		 * connect(). Also place the socket in the connected table for
+		 * accounting (it can already be found since it's in the bound
+		 * table).
+		 */
+		sk->sk_state = SS_CONNECTED;
+		sk->sk_socket->state = SS_CONNECTED;
+		vsock_insert_connected(vsk);
+		sk->sk_state_change(sk);
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
+	case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
+		if (pkt->u.size == 0
+		    || pkt->dg.src.context != vsk->remote_addr.svm_cid
+		    || pkt->src_port != vsk->remote_addr.svm_port
+		    || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
+		    || vmci_trans(vsk)->qpair
+		    || vmci_trans(vsk)->produce_size != 0
+		    || vmci_trans(vsk)->consume_size != 0
+		    || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
+		    || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
+			skerr = EPROTO;
+			err = -EINVAL;
+
+			goto destroy;
+		}
+
+		err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
+		if (err) {
+			skerr = -err;
+			goto destroy;
+		}
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
+		err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
+		if (err) {
+			skerr = -err;
+			goto destroy;
+		}
+
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		/* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
+		 * continue processing here after they sent an INVALID packet.
+		 * This meant that we got a RST after the INVALID. We ignore a
+		 * RST after an INVALID. The common code doesn't send the RST
+		 * ... so we can hang if an old version of the common code
+		 * fails between getting a REQUEST and sending an OFFER back.
+		 * Not much we can do about it... except hope that it doesn't
+		 * happen.
+		 */
+		if (vsk->ignore_connecting_rst) {
+			vsk->ignore_connecting_rst = false;
+		} else {
+			skerr = ECONNRESET;
+			err = 0;
+			goto destroy;
+		}
+
+		break;
+	default:
+		/* Close and cleanup the connection. */
+		skerr = EPROTO;
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	return 0;
+
+destroy:
+	vmci_transport_send_reset(sk, pkt);
+
+	sk->sk_state = SS_UNCONNECTED;
+	sk->sk_err = skerr;
+	sk->sk_error_report(sk);
+	return err;
+}
+
+static int vmci_transport_recv_connecting_client_negotiate(
+					struct sock *sk,
+					struct vmci_transport_packet *pkt)
+{
+	int err;
+	struct vsock_sock *vsk;
+	struct vmci_handle handle;
+	struct vmci_qp *qpair;
+	u32 attach_sub_id;
+	u32 detach_sub_id;
+	bool is_local;
+	u32 flags;
+	bool old_proto = true;
+	bool old_pkt_proto;
+	u16 version;
+
+	vsk = vsock_sk(sk);
+	handle = VMCI_INVALID_HANDLE;
+	attach_sub_id = VMCI_INVALID_ID;
+	detach_sub_id = VMCI_INVALID_ID;
+
+	/* If we have gotten here then we should be past the point where old
+	 * linux vsock could have sent the bogus rst.
+	 */
+	vsk->sent_request = false;
+	vsk->ignore_connecting_rst = false;
+
+	/* Verify that we're OK with the proposed queue pair size */
+	if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
+	    pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	/* At this point we know the CID the peer is using to talk to us. */
+
+	if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
+		vsk->local_addr.svm_cid = pkt->dg.dst.context;
+
+	/* Setup the notify ops to be the highest supported version that both
+	 * the server and the client support.
+	 */
+
+	if (vmci_transport_old_proto_override(&old_pkt_proto)) {
+		old_proto = old_pkt_proto;
+	} else {
+		if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
+			old_proto = true;
+		else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
+			old_proto = false;
+
+	}
+
+	if (old_proto)
+		version = VSOCK_PROTO_INVALID;
+	else
+		version = pkt->proto;
+
+	if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
+		err = -EINVAL;
+		goto destroy;
+	}
+
+	/* Subscribe to attach and detach events first.
+	 *
+	 * XXX We attach once for each queue pair created for now so it is easy
+	 * to find the socket (it's provided), but later we should only
+	 * subscribe once and add a way to lookup sockets by queue pair handle.
+	 */
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
+				   vmci_transport_peer_attach_cb,
+				   sk, &attach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
+				   vmci_transport_peer_detach_cb,
+				   sk, &detach_sub_id);
+	if (err < VMCI_SUCCESS) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	/* Make VMCI select the handle for us. */
+	handle = VMCI_INVALID_HANDLE;
+	is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
+	flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
+
+	err = vmci_transport_queue_pair_alloc(&qpair,
+					      &handle,
+					      pkt->u.size,
+					      pkt->u.size,
+					      vsk->remote_addr.svm_cid,
+					      flags,
+					      vmci_transport_is_trusted(
+						  vsk,
+						  vsk->
+						  remote_addr.svm_cid));
+	if (err < 0)
+		goto destroy;
+
+	err = vmci_transport_send_qp_offer(sk, handle);
+	if (err < 0) {
+		err = vmci_transport_error_to_vsock_error(err);
+		goto destroy;
+	}
+
+	vmci_trans(vsk)->qp_handle = handle;
+	vmci_trans(vsk)->qpair = qpair;
+
+	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
+		pkt->u.size;
+
+	vmci_trans(vsk)->attach_sub_id = attach_sub_id;
+	vmci_trans(vsk)->detach_sub_id = detach_sub_id;
+
+	vmci_trans(vsk)->notify_ops->process_negotiate(sk);
+
+	return 0;
+
+destroy:
+	if (attach_sub_id != VMCI_INVALID_ID)
+		vmci_event_unsubscribe(attach_sub_id);
+
+	if (detach_sub_id != VMCI_INVALID_ID)
+		vmci_event_unsubscribe(detach_sub_id);
+
+	if (!vmci_handle_is_invalid(handle))
+		vmci_qpair_detach(&qpair);
+
+	return err;
+}
+
+static int
+vmci_transport_recv_connecting_client_invalid(struct sock *sk,
+					      struct vmci_transport_packet *pkt)
+{
+	int err = 0;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsk->sent_request) {
+		vsk->sent_request = false;
+		vsk->ignore_connecting_rst = true;
+
+		err = vmci_transport_send_conn_request(
+			sk, vmci_trans(vsk)->queue_pair_size);
+		if (err < 0)
+			err = vmci_transport_error_to_vsock_error(err);
+		else
+			err = 0;
+
+	}
+
+	return err;
+}
+
+static int vmci_transport_recv_connected(struct sock *sk,
+					 struct vmci_transport_packet *pkt)
+{
+	struct vsock_sock *vsk;
+	bool pkt_processed = false;
+
+	/* In cases where we are closing the connection, it's sufficient to
+	 * mark the state change (and maybe error) and wake up any waiting
+	 * threads. Since this is a connected socket, it's owned by a user
+	 * process and will be cleaned up when the failure is passed back on
+	 * the current or next system call.  Our system call implementations
+	 * must therefore check for error and state changes on entry and when
+	 * being awoken.
+	 */
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
+		if (pkt->u.mode) {
+			vsk = vsock_sk(sk);
+
+			vsk->peer_shutdown |= pkt->u.mode;
+			sk->sk_state_change(sk);
+		}
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_RST:
+		vsk = vsock_sk(sk);
+		/* It is possible that we sent our peer a message (e.g a
+		 * WAITING_READ) right before we got notified that the peer had
+		 * detached. If that happens then we can get a RST pkt back
+		 * from our peer even though there is data available for us to
+		 * read. In that case, don't shutdown the socket completely but
+		 * instead allow the local client to finish reading data off
+		 * the queuepair. Always treat a RST pkt in connected mode like
+		 * a clean shutdown.
+		 */
+		sock_set_flag(sk, SOCK_DONE);
+		vsk->peer_shutdown = SHUTDOWN_MASK;
+		if (vsock_stream_has_data(vsk) <= 0)
+			sk->sk_state = SS_DISCONNECTING;
+
+		sk->sk_state_change(sk);
+		break;
+
+	default:
+		vsk = vsock_sk(sk);
+		vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+				sk, pkt, false, NULL, NULL,
+				&pkt_processed);
+		if (!pkt_processed)
+			return -EINVAL;
+
+		break;
+	}
+
+	return 0;
+}
+
+static int vmci_transport_socket_init(struct vsock_sock *vsk,
+				      struct vsock_sock *psk)
+{
+	vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
+	if (!vsk->trans)
+		return -ENOMEM;
+
+	vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
+	vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
+	vmci_trans(vsk)->qpair = NULL;
+	vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
+	vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
+		VMCI_INVALID_ID;
+	vmci_trans(vsk)->notify_ops = NULL;
+	if (psk) {
+		vmci_trans(vsk)->queue_pair_size =
+			vmci_trans(psk)->queue_pair_size;
+		vmci_trans(vsk)->queue_pair_min_size =
+			vmci_trans(psk)->queue_pair_min_size;
+		vmci_trans(vsk)->queue_pair_max_size =
+			vmci_trans(psk)->queue_pair_max_size;
+	} else {
+		vmci_trans(vsk)->queue_pair_size =
+			VMCI_TRANSPORT_DEFAULT_QP_SIZE;
+		vmci_trans(vsk)->queue_pair_min_size =
+			 VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
+		vmci_trans(vsk)->queue_pair_max_size =
+			VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
+	}
+
+	return 0;
+}
+
+static void vmci_transport_destruct(struct vsock_sock *vsk)
+{
+	if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
+		vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
+	}
+
+	if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
+		vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
+	}
+
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
+		vmci_qpair_detach(&vmci_trans(vsk)->qpair);
+		vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
+		vmci_trans(vsk)->produce_size = 0;
+		vmci_trans(vsk)->consume_size = 0;
+	}
+
+	if (vmci_trans(vsk)->notify_ops)
+		vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
+
+	kfree(vsk->trans);
+	vsk->trans = NULL;
+}
+
+static void vmci_transport_release(struct vsock_sock *vsk)
+{
+	if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
+		vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
+		vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
+	}
+}
+
+static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
+				     struct sockaddr_vm *addr)
+{
+	u32 port;
+	u32 flags;
+	int err;
+
+	/* VMCI will select a resource ID for us if we provide
+	 * VMCI_INVALID_ID.
+	 */
+	port = addr->svm_port == VMADDR_PORT_ANY ?
+			VMCI_INVALID_ID : addr->svm_port;
+
+	if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
+		return -EACCES;
+
+	flags = addr->svm_cid == VMADDR_CID_ANY ?
+				VMCI_FLAG_ANYCID_DG_HND : 0;
+
+	err = vmci_transport_datagram_create_hnd(port, flags,
+						 vmci_transport_recv_dgram_cb,
+						 &vsk->sk,
+						 &vmci_trans(vsk)->dg_handle);
+	if (err < VMCI_SUCCESS)
+		return vmci_transport_error_to_vsock_error(err);
+	vsock_addr_init(&vsk->local_addr, addr->svm_cid,
+			vmci_trans(vsk)->dg_handle.resource);
+
+	return 0;
+}
+
+static int vmci_transport_dgram_enqueue(
+	struct vsock_sock *vsk,
+	struct sockaddr_vm *remote_addr,
+	struct iovec *iov,
+	size_t len)
+{
+	int err;
+	struct vmci_datagram *dg;
+
+	if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
+		return -EMSGSIZE;
+
+	if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
+		return -EPERM;
+
+	/* Allocate a buffer for the user's message and our packet header. */
+	dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
+	if (!dg)
+		return -ENOMEM;
+
+	memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len);
+
+	dg->dst = vmci_make_handle(remote_addr->svm_cid,
+				   remote_addr->svm_port);
+	dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
+				   vsk->local_addr.svm_port);
+	dg->payload_size = len;
+
+	err = vmci_datagram_send(dg);
+	kfree(dg);
+	if (err < 0)
+		return vmci_transport_error_to_vsock_error(err);
+
+	return err - sizeof(*dg);
+}
+
+static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
+					struct vsock_sock *vsk,
+					struct msghdr *msg, size_t len,
+					int flags)
+{
+	int err;
+	int noblock;
+	struct vmci_datagram *dg;
+	size_t payload_len;
+	struct sk_buff *skb;
+
+	noblock = flags & MSG_DONTWAIT;
+
+	if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
+		return -EOPNOTSUPP;
+
+	/* Retrieve the head sk_buff from the socket's receive queue. */
+	err = 0;
+	skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
+	if (err)
+		return err;
+
+	if (!skb)
+		return -EAGAIN;
+
+	dg = (struct vmci_datagram *)skb->data;
+	if (!dg)
+		/* err is 0, meaning we read zero bytes. */
+		goto out;
+
+	payload_len = dg->payload_size;
+	/* Ensure the sk_buff matches the payload size claimed in the packet. */
+	if (payload_len != skb->len - sizeof(*dg)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (payload_len > len) {
+		payload_len = len;
+		msg->msg_flags |= MSG_TRUNC;
+	}
+
+	/* Place the datagram payload in the user's iovec. */
+	err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
+		payload_len);
+	if (err)
+		goto out;
+
+	msg->msg_namelen = 0;
+	if (msg->msg_name) {
+		struct sockaddr_vm *vm_addr;
+
+		/* Provide the address of the sender. */
+		vm_addr = (struct sockaddr_vm *)msg->msg_name;
+		vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
+		msg->msg_namelen = sizeof(*vm_addr);
+	}
+	err = payload_len;
+
+out:
+	skb_free_datagram(&vsk->sk, skb);
+	return err;
+}
+
+static bool vmci_transport_dgram_allow(u32 cid, u32 port)
+{
+	if (cid == VMADDR_CID_HYPERVISOR) {
+		/* Registrations of PBRPC Servers do not modify VMX/Hypervisor
+		 * state and are allowed.
+		 */
+		return port == VMCI_UNITY_PBRPC_REGISTER;
+	}
+
+	return true;
+}
+
+static int vmci_transport_connect(struct vsock_sock *vsk)
+{
+	int err;
+	bool old_pkt_proto = false;
+	struct sock *sk = &vsk->sk;
+
+	if (vmci_transport_old_proto_override(&old_pkt_proto) &&
+		old_pkt_proto) {
+		err = vmci_transport_send_conn_request(
+			sk, vmci_trans(vsk)->queue_pair_size);
+		if (err < 0) {
+			sk->sk_state = SS_UNCONNECTED;
+			return err;
+		}
+	} else {
+		int supported_proto_versions =
+			vmci_transport_new_proto_supported_versions();
+		err = vmci_transport_send_conn_request2(
+				sk, vmci_trans(vsk)->queue_pair_size,
+				supported_proto_versions);
+		if (err < 0) {
+			sk->sk_state = SS_UNCONNECTED;
+			return err;
+		}
+
+		vsk->sent_request = true;
+	}
+
+	return err;
+}
+
+static ssize_t vmci_transport_stream_dequeue(
+	struct vsock_sock *vsk,
+	struct iovec *iov,
+	size_t len,
+	int flags)
+{
+	if (flags & MSG_PEEK)
+		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0);
+	else
+		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0);
+}
+
+static ssize_t vmci_transport_stream_enqueue(
+	struct vsock_sock *vsk,
+	struct iovec *iov,
+	size_t len)
+{
+	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0);
+}
+
+static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
+{
+	return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
+}
+
+static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
+{
+	return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
+}
+
+static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->consume_size;
+}
+
+static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
+{
+	return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
+}
+
+static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_size;
+}
+
+static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_min_size;
+}
+
+static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
+{
+	return vmci_trans(vsk)->queue_pair_max_size;
+}
+
+static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	if (val < vmci_trans(vsk)->queue_pair_min_size)
+		vmci_trans(vsk)->queue_pair_min_size = val;
+	if (val > vmci_trans(vsk)->queue_pair_max_size)
+		vmci_trans(vsk)->queue_pair_max_size = val;
+	vmci_trans(vsk)->queue_pair_size = val;
+}
+
+static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
+					       u64 val)
+{
+	if (val > vmci_trans(vsk)->queue_pair_size)
+		vmci_trans(vsk)->queue_pair_size = val;
+	vmci_trans(vsk)->queue_pair_min_size = val;
+}
+
+static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
+					       u64 val)
+{
+	if (val < vmci_trans(vsk)->queue_pair_size)
+		vmci_trans(vsk)->queue_pair_size = val;
+	vmci_trans(vsk)->queue_pair_max_size = val;
+}
+
+static int vmci_transport_notify_poll_in(
+	struct vsock_sock *vsk,
+	size_t target,
+	bool *data_ready_now)
+{
+	return vmci_trans(vsk)->notify_ops->poll_in(
+			&vsk->sk, target, data_ready_now);
+}
+
+static int vmci_transport_notify_poll_out(
+	struct vsock_sock *vsk,
+	size_t target,
+	bool *space_available_now)
+{
+	return vmci_trans(vsk)->notify_ops->poll_out(
+			&vsk->sk, target, space_available_now);
+}
+
+static int vmci_transport_notify_recv_init(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_init(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_pre_block(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_pre_block(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_pre_dequeue(
+	struct vsock_sock *vsk,
+	size_t target,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
+			&vsk->sk, target,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_recv_post_dequeue(
+	struct vsock_sock *vsk,
+	size_t target,
+	ssize_t copied,
+	bool data_read,
+	struct vsock_transport_recv_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
+			&vsk->sk, target, copied, data_read,
+			(struct vmci_transport_recv_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_init(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_init(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_pre_block(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_pre_block(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_pre_enqueue(
+	struct vsock_sock *vsk,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
+			&vsk->sk,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static int vmci_transport_notify_send_post_enqueue(
+	struct vsock_sock *vsk,
+	ssize_t written,
+	struct vsock_transport_send_notify_data *data)
+{
+	return vmci_trans(vsk)->notify_ops->send_post_enqueue(
+			&vsk->sk, written,
+			(struct vmci_transport_send_notify_data *)data);
+}
+
+static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
+{
+	if (PROTOCOL_OVERRIDE != -1) {
+		if (PROTOCOL_OVERRIDE == 0)
+			*old_pkt_proto = true;
+		else
+			*old_pkt_proto = false;
+
+		pr_info("Proto override in use\n");
+		return true;
+	}
+
+	return false;
+}
+
+static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
+						  u16 *proto,
+						  bool old_pkt_proto)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (old_pkt_proto) {
+		if (*proto != VSOCK_PROTO_INVALID) {
+			pr_err("Can't set both an old and new protocol\n");
+			return false;
+		}
+		vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
+		goto exit;
+	}
+
+	switch (*proto) {
+	case VSOCK_PROTO_PKT_ON_NOTIFY:
+		vmci_trans(vsk)->notify_ops =
+			&vmci_transport_notify_pkt_q_state_ops;
+		break;
+	default:
+		pr_err("Unknown notify protocol version\n");
+		return false;
+	}
+
+exit:
+	vmci_trans(vsk)->notify_ops->socket_init(sk);
+	return true;
+}
+
+static u16 vmci_transport_new_proto_supported_versions(void)
+{
+	if (PROTOCOL_OVERRIDE != -1)
+		return PROTOCOL_OVERRIDE;
+
+	return VSOCK_PROTO_ALL_SUPPORTED;
+}
+
+static u32 vmci_transport_get_local_cid(void)
+{
+	return vmci_get_context_id();
+}
+
+static struct vsock_transport vmci_transport = {
+	.init = vmci_transport_socket_init,
+	.destruct = vmci_transport_destruct,
+	.release = vmci_transport_release,
+	.connect = vmci_transport_connect,
+	.dgram_bind = vmci_transport_dgram_bind,
+	.dgram_dequeue = vmci_transport_dgram_dequeue,
+	.dgram_enqueue = vmci_transport_dgram_enqueue,
+	.dgram_allow = vmci_transport_dgram_allow,
+	.stream_dequeue = vmci_transport_stream_dequeue,
+	.stream_enqueue = vmci_transport_stream_enqueue,
+	.stream_has_data = vmci_transport_stream_has_data,
+	.stream_has_space = vmci_transport_stream_has_space,
+	.stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
+	.stream_is_active = vmci_transport_stream_is_active,
+	.stream_allow = vmci_transport_stream_allow,
+	.notify_poll_in = vmci_transport_notify_poll_in,
+	.notify_poll_out = vmci_transport_notify_poll_out,
+	.notify_recv_init = vmci_transport_notify_recv_init,
+	.notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
+	.notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
+	.notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
+	.notify_send_init = vmci_transport_notify_send_init,
+	.notify_send_pre_block = vmci_transport_notify_send_pre_block,
+	.notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
+	.notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
+	.shutdown = vmci_transport_shutdown,
+	.set_buffer_size = vmci_transport_set_buffer_size,
+	.set_min_buffer_size = vmci_transport_set_min_buffer_size,
+	.set_max_buffer_size = vmci_transport_set_max_buffer_size,
+	.get_buffer_size = vmci_transport_get_buffer_size,
+	.get_min_buffer_size = vmci_transport_get_min_buffer_size,
+	.get_max_buffer_size = vmci_transport_get_max_buffer_size,
+	.get_local_cid = vmci_transport_get_local_cid,
+};
+
+static int __init vmci_transport_init(void)
+{
+	int err;
+
+	/* Create the datagram handle that we will use to send and receive all
+	 * VSocket control messages for this context.
+	 */
+	err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
+						 VMCI_FLAG_ANYCID_DG_HND,
+						 vmci_transport_recv_stream_cb,
+						 NULL,
+						 &vmci_transport_stream_handle);
+	if (err < VMCI_SUCCESS) {
+		pr_err("Unable to create datagram handle. (%d)\n", err);
+		return vmci_transport_error_to_vsock_error(err);
+	}
+
+	err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
+				   vmci_transport_qp_resumed_cb,
+				   NULL, &vmci_transport_qp_resumed_sub_id);
+	if (err < VMCI_SUCCESS) {
+		pr_err("Unable to subscribe to resumed event. (%d)\n", err);
+		err = vmci_transport_error_to_vsock_error(err);
+		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+		goto err_destroy_stream_handle;
+	}
+
+	err = vsock_core_init(&vmci_transport);
+	if (err < 0)
+		goto err_unsubscribe;
+
+	return 0;
+
+err_unsubscribe:
+	vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
+err_destroy_stream_handle:
+	vmci_datagram_destroy_handle(vmci_transport_stream_handle);
+	return err;
+}
+module_init(vmci_transport_init);
+
+static void __exit vmci_transport_exit(void)
+{
+	if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
+		if (vmci_datagram_destroy_handle(
+			vmci_transport_stream_handle) != VMCI_SUCCESS)
+			pr_err("Couldn't destroy datagram handle\n");
+		vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
+	}
+
+	if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
+		vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
+		vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
+	}
+
+	vsock_core_exit();
+}
+module_exit(vmci_transport_exit);
+
+MODULE_AUTHOR("VMware, Inc.");
+MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("vmware_vsock");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
new file mode 100644
index 000000000000..1bf991803ec0
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport.h
@@ -0,0 +1,139 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VMCI_TRANSPORT_H_
+#define _VMCI_TRANSPORT_H_
+
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+
+#include "vsock_addr.h"
+#include "af_vsock.h"
+
+/* If the packet format changes in a release then this should change too. */
+#define VMCI_TRANSPORT_PACKET_VERSION 1
+
+/* The resource ID on which control packets are sent. */
+#define VMCI_TRANSPORT_PACKET_RID 1
+
+#define VSOCK_PROTO_INVALID        0
+#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0)
+#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY)
+
+#define vmci_trans(_vsk) ((struct vmci_transport *)((_vsk)->trans))
+
+enum vmci_transport_packet_type {
+	VMCI_TRANSPORT_PACKET_TYPE_INVALID = 0,
+	VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
+	VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
+	VMCI_TRANSPORT_PACKET_TYPE_OFFER,
+	VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
+	VMCI_TRANSPORT_PACKET_TYPE_WROTE,
+	VMCI_TRANSPORT_PACKET_TYPE_READ,
+	VMCI_TRANSPORT_PACKET_TYPE_RST,
+	VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
+	VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
+	VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
+	VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
+	VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
+	VMCI_TRANSPORT_PACKET_TYPE_MAX
+};
+
+struct vmci_transport_waiting_info {
+	u64 generation;
+	u64 offset;
+};
+
+/* Control packet type for STREAM sockets.  DGRAMs have no control packets nor
+ * special packet header for data packets, they are just raw VMCI DGRAM
+ * messages.  For STREAMs, control packets are sent over the control channel
+ * while data is written and read directly from queue pairs with no packet
+ * format.
+ */
+struct vmci_transport_packet {
+	struct vmci_datagram dg;
+	u8 version;
+	u8 type;
+	u16 proto;
+	u32 src_port;
+	u32 dst_port;
+	u32 _reserved2;
+	union {
+		u64 size;
+		u64 mode;
+		struct vmci_handle handle;
+		struct vmci_transport_waiting_info wait;
+	} u;
+};
+
+struct vmci_transport_notify_pkt {
+	u64 write_notify_window;
+	u64 write_notify_min_window;
+	bool peer_waiting_read;
+	bool peer_waiting_write;
+	bool peer_waiting_write_detected;
+	bool sent_waiting_read;
+	bool sent_waiting_write;
+	struct vmci_transport_waiting_info peer_waiting_read_info;
+	struct vmci_transport_waiting_info peer_waiting_write_info;
+	u64 produce_q_generation;
+	u64 consume_q_generation;
+};
+
+struct vmci_transport_notify_pkt_q_state {
+	u64 write_notify_window;
+	u64 write_notify_min_window;
+	bool peer_waiting_write;
+	bool peer_waiting_write_detected;
+};
+
+union vmci_transport_notify {
+	struct vmci_transport_notify_pkt pkt;
+	struct vmci_transport_notify_pkt_q_state pkt_q_state;
+};
+
+/* Our transport-specific data. */
+struct vmci_transport {
+	/* For DGRAMs. */
+	struct vmci_handle dg_handle;
+	/* For STREAMs. */
+	struct vmci_handle qp_handle;
+	struct vmci_qp *qpair;
+	u64 produce_size;
+	u64 consume_size;
+	u64 queue_pair_size;
+	u64 queue_pair_min_size;
+	u64 queue_pair_max_size;
+	u32 attach_sub_id;
+	u32 detach_sub_id;
+	union vmci_transport_notify notify;
+	struct vmci_transport_notify_ops *notify_ops;
+};
+
+int vmci_transport_register(void);
+void vmci_transport_unregister(void);
+
+int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
+				 struct sockaddr_vm *src);
+int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
+				struct sockaddr_vm *src);
+int vmci_transport_send_wrote(struct sock *sk);
+int vmci_transport_send_read(struct sock *sk);
+int vmci_transport_send_waiting_write(struct sock *sk,
+				      struct vmci_transport_waiting_info *wait);
+int vmci_transport_send_waiting_read(struct sock *sk,
+				     struct vmci_transport_waiting_info *wait);
+
+#endif
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
new file mode 100644
index 000000000000..9a730744e7bc
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -0,0 +1,680 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vmci_transport_notify.h"
+
+#define PKT_FIELD(vsk, field_name) (vmci_trans(vsk)->notify.pkt.field_name)
+
+static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	bool retval;
+	u64 notify_limit;
+
+	if (!PKT_FIELD(vsk, peer_waiting_write))
+		return false;
+
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	/* When the sender blocks, we take that as a sign that the sender is
+	 * faster than the receiver. To reduce the transmit rate of the sender,
+	 * we delay the sending of the read notification by decreasing the
+	 * write_notify_window. The notification is delayed until the number of
+	 * bytes used in the queue drops below the write_notify_window.
+	 */
+
+	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
+		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
+		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+		} else {
+			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
+			if (PKT_FIELD(vsk, write_notify_window) <
+			    PKT_FIELD(vsk, write_notify_min_window))
+				PKT_FIELD(vsk, write_notify_window) =
+				    PKT_FIELD(vsk, write_notify_min_window);
+
+		}
+	}
+	notify_limit = vmci_trans(vsk)->consume_size -
+		PKT_FIELD(vsk, write_notify_window);
+#else
+	notify_limit = 0;
+#endif
+
+	/* For now we ignore the wait information and just see if the free
+	 * space exceeds the notify limit.  Note that improving this function
+	 * to be more intelligent will not require a protocol change and will
+	 * retain compatibility between endpoints with mixed versions of this
+	 * function.
+	 *
+	 * The notify_limit is used to delay notifications in the case where
+	 * flow control is enabled. Below the test is expressed in terms of
+	 * free space in the queue: if free_space > ConsumeSize -
+	 * write_notify_window then notify An alternate way of expressing this
+	 * is to rewrite the expression to use the data ready in the receive
+	 * queue: if write_notify_window > bufferReady then notify as
+	 * free_space == ConsumeSize - bufferReady.
+	 */
+	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
+		notify_limit;
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	if (retval) {
+		/*
+		 * Once we notify the peer, we reset the detected flag so the
+		 * next wait will again cause a decrease in the window size.
+		 */
+
+		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	}
+#endif
+	return retval;
+#else
+	return true;
+#endif
+}
+
+static bool vmci_transport_notify_waiting_read(struct vsock_sock *vsk)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	if (!PKT_FIELD(vsk, peer_waiting_read))
+		return false;
+
+	/* For now we ignore the wait information and just see if there is any
+	 * data for our peer to read.  Note that improving this function to be
+	 * more intelligent will not require a protocol change and will retain
+	 * compatibility between endpoints with mixed versions of this
+	 * function.
+	 */
+	return vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) > 0;
+#else
+	return true;
+#endif
+}
+
+static void
+vmci_transport_handle_waiting_read(struct sock *sk,
+				   struct vmci_transport_packet *pkt,
+				   bool bottom_half,
+				   struct sockaddr_vm *dst,
+				   struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, peer_waiting_read) = true;
+	memcpy(&PKT_FIELD(vsk, peer_waiting_read_info), &pkt->u.wait,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
+
+	if (vmci_transport_notify_waiting_read(vsk)) {
+		bool sent;
+
+		if (bottom_half)
+			sent = vmci_transport_send_wrote_bh(dst, src) > 0;
+		else
+			sent = vmci_transport_send_wrote(sk) > 0;
+
+		if (sent)
+			PKT_FIELD(vsk, peer_waiting_read) = false;
+	}
+#endif
+}
+
+static void
+vmci_transport_handle_waiting_write(struct sock *sk,
+				    struct vmci_transport_packet *pkt,
+				    bool bottom_half,
+				    struct sockaddr_vm *dst,
+				    struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, peer_waiting_write) = true;
+	memcpy(&PKT_FIELD(vsk, peer_waiting_write_info), &pkt->u.wait,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		bool sent;
+
+		if (bottom_half)
+			sent = vmci_transport_send_read_bh(dst, src) > 0;
+		else
+			sent = vmci_transport_send_read(sk) > 0;
+
+		if (sent)
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+	}
+#endif
+}
+
+static void
+vmci_transport_handle_read(struct sock *sk,
+			   struct vmci_transport_packet *pkt,
+			   bool bottom_half,
+			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+
+	vsk = vsock_sk(sk);
+	PKT_FIELD(vsk, sent_waiting_write) = false;
+#endif
+
+	sk->sk_write_space(sk);
+}
+
+static bool send_waiting_read(struct sock *sk, u64 room_needed)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+	struct vmci_transport_waiting_info waiting_info;
+	u64 tail;
+	u64 head;
+	u64 room_left;
+	bool ret;
+
+	vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, sent_waiting_read))
+		return true;
+
+	if (PKT_FIELD(vsk, write_notify_window) <
+			vmci_trans(vsk)->consume_size)
+		PKT_FIELD(vsk, write_notify_window) =
+		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
+			vmci_trans(vsk)->consume_size);
+
+	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair, &tail, &head);
+	room_left = vmci_trans(vsk)->consume_size - head;
+	if (room_needed >= room_left) {
+		waiting_info.offset = room_needed - room_left;
+		waiting_info.generation =
+		    PKT_FIELD(vsk, consume_q_generation) + 1;
+	} else {
+		waiting_info.offset = head + room_needed;
+		waiting_info.generation = PKT_FIELD(vsk, consume_q_generation);
+	}
+
+	ret = vmci_transport_send_waiting_read(sk, &waiting_info) > 0;
+	if (ret)
+		PKT_FIELD(vsk, sent_waiting_read) = true;
+
+	return ret;
+#else
+	return true;
+#endif
+}
+
+static bool send_waiting_write(struct sock *sk, u64 room_needed)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk;
+	struct vmci_transport_waiting_info waiting_info;
+	u64 tail;
+	u64 head;
+	u64 room_left;
+	bool ret;
+
+	vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, sent_waiting_write))
+		return true;
+
+	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair, &tail, &head);
+	room_left = vmci_trans(vsk)->produce_size - tail;
+	if (room_needed + 1 >= room_left) {
+		/* Wraps around to current generation. */
+		waiting_info.offset = room_needed + 1 - room_left;
+		waiting_info.generation = PKT_FIELD(vsk, produce_q_generation);
+	} else {
+		waiting_info.offset = tail + room_needed + 1;
+		waiting_info.generation =
+		    PKT_FIELD(vsk, produce_q_generation) - 1;
+	}
+
+	ret = vmci_transport_send_waiting_write(sk, &waiting_info) > 0;
+	if (ret)
+		PKT_FIELD(vsk, sent_waiting_write) = true;
+
+	return ret;
+#else
+	return true;
+#endif
+}
+
+static int vmci_transport_send_read_notification(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+	bool sent_read;
+	unsigned int retries;
+	int err;
+
+	vsk = vsock_sk(sk);
+	sent_read = false;
+	retries = 0;
+	err = 0;
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		/* Notify the peer that we have read, retrying the send on
+		 * failure up to our maximum value.  XXX For now we just log
+		 * the failure, but later we should schedule a work item to
+		 * handle the resend until it succeeds.  That would require
+		 * keeping track of work items in the vsk and cleaning them up
+		 * upon socket close.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_read &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_read(sk);
+			if (err >= 0)
+				sent_read = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS)
+			pr_err("%p unable to send read notify to peer\n", sk);
+		else
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+#endif
+
+	}
+	return err;
+}
+
+static void
+vmci_transport_handle_wrote(struct sock *sk,
+			    struct vmci_transport_packet *pkt,
+			    bool bottom_half,
+			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	struct vsock_sock *vsk = vsock_sk(sk);
+	PKT_FIELD(vsk, sent_waiting_read) = false;
+#endif
+	sk->sk_data_ready(sk, 0);
+}
+
+static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_read) = false;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	PKT_FIELD(vsk, sent_waiting_read) = false;
+	PKT_FIELD(vsk, sent_waiting_write) = false;
+	PKT_FIELD(vsk, produce_q_generation) = 0;
+	PKT_FIELD(vsk, consume_q_generation) = 0;
+
+	memset(&PKT_FIELD(vsk, peer_waiting_read_info), 0,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_read_info)));
+	memset(&PKT_FIELD(vsk, peer_waiting_write_info), 0,
+	       sizeof(PKT_FIELD(vsk, peer_waiting_write_info)));
+}
+
+static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
+{
+}
+
+static int
+vmci_transport_notify_pkt_poll_in(struct sock *sk,
+				  size_t target, bool *data_ready_now)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk)) {
+		*data_ready_now = true;
+	} else {
+		/* We can't read right now because there is nothing in the
+		 * queue. Ask for notifications when there is something to
+		 * read.
+		 */
+		if (sk->sk_state == SS_CONNECTED) {
+			if (!send_waiting_read(sk, 1))
+				return -1;
+
+		}
+		*data_ready_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_poll_out(struct sock *sk,
+				   size_t target, bool *space_avail_now)
+{
+	s64 produce_q_free_space;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	produce_q_free_space = vsock_stream_has_space(vsk);
+	if (produce_q_free_space > 0) {
+		*space_avail_now = true;
+		return 0;
+	} else if (produce_q_free_space == 0) {
+		/* This is a connected socket but we can't currently send data.
+		 * Notify the peer that we are waiting if the queue is full. We
+		 * only send a waiting write if the queue is full because
+		 * otherwise we end up in an infinite WAITING_WRITE, READ,
+		 * WAITING_WRITE, READ, etc. loop. Treat failing to send the
+		 * notification as a socket error, passing that back through
+		 * the mask.
+		 */
+		if (!send_waiting_write(sk, 1))
+			return -1;
+
+		*space_avail_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_init(
+			struct sock *sk,
+			size_t target,
+			struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
+	data->consume_head = 0;
+	data->produce_tail = 0;
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	data->notify_on_block = false;
+
+	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
+		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
+		if (PKT_FIELD(vsk, write_notify_window) <
+		    PKT_FIELD(vsk, write_notify_min_window)) {
+			/* If the current window is smaller than the new
+			 * minimal window size, we need to reevaluate whether
+			 * we need to notify the sender. If the number of ready
+			 * bytes are smaller than the new window, we need to
+			 * send a notification to the sender before we block.
+			 */
+
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+			data->notify_on_block = true;
+		}
+	}
+#endif
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_block(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	int err = 0;
+
+	/* Notify our peer that we are waiting for data to read. */
+	if (!send_waiting_read(sk, target)) {
+		err = -EHOSTUNREACH;
+		return err;
+	}
+#ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL
+	if (data->notify_on_block) {
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+		data->notify_on_block = false;
+	}
+#endif
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_dequeue(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	/* Now consume up to len bytes from the queue.  Note that since we have
+	 * the socket locked we should copy at least ready bytes.
+	 */
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	vmci_qpair_get_consume_indexes(vmci_trans(vsk)->qpair,
+				       &data->produce_tail,
+				       &data->consume_head);
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_post_dequeue(
+				struct sock *sk,
+				size_t target,
+				ssize_t copied,
+				bool data_read,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk;
+	int err;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	if (data_read) {
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+		/* Detect a wrap-around to maintain queue generation.  Note
+		 * that this is safe since we hold the socket lock across the
+		 * two queue pair operations.
+		 */
+		if (copied >=
+			vmci_trans(vsk)->consume_size - data->consume_head)
+			PKT_FIELD(vsk, consume_q_generation)++;
+#endif
+
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+	}
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_send_init(
+			struct sock *sk,
+			struct vmci_transport_send_notify_data *data)
+{
+#ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY
+	data->consume_head = 0;
+	data->produce_tail = 0;
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_block(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	/* Notify our peer that we are waiting for room to write. */
+	if (!send_waiting_write(sk, 1))
+		return -EHOSTUNREACH;
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_enqueue(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	vmci_qpair_get_produce_indexes(vmci_trans(vsk)->qpair,
+				       &data->produce_tail,
+				       &data->consume_head);
+#endif
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_post_enqueue(
+				struct sock *sk,
+				ssize_t written,
+				struct vmci_transport_send_notify_data *data)
+{
+	int err = 0;
+	struct vsock_sock *vsk;
+	bool sent_wrote = false;
+	int retries = 0;
+
+	vsk = vsock_sk(sk);
+
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+	/* Detect a wrap-around to maintain queue generation.  Note that this
+	 * is safe since we hold the socket lock across the two queue pair
+	 * operations.
+	 */
+	if (written >= vmci_trans(vsk)->produce_size - data->produce_tail)
+		PKT_FIELD(vsk, produce_q_generation)++;
+
+#endif
+
+	if (vmci_transport_notify_waiting_read(vsk)) {
+		/* Notify the peer that we have written, retrying the send on
+		 * failure up to our maximum value. See the XXX comment for the
+		 * corresponding piece of code in StreamRecvmsg() for potential
+		 * improvements.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_wrote &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_wrote(sk);
+			if (err >= 0)
+				sent_wrote = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			pr_err("%p unable to send wrote notify to peer\n", sk);
+			return err;
+		} else {
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+			PKT_FIELD(vsk, peer_waiting_read) = false;
+#endif
+		}
+	}
+	return err;
+}
+
+static void
+vmci_transport_notify_pkt_handle_pkt(
+			struct sock *sk,
+			struct vmci_transport_packet *pkt,
+			bool bottom_half,
+			struct sockaddr_vm *dst,
+			struct sockaddr_vm *src, bool *pkt_processed)
+{
+	bool processed = false;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
+		vmci_transport_handle_waiting_write(sk, pkt, bottom_half,
+						    dst, src);
+		processed = true;
+		break;
+
+	case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
+		vmci_transport_handle_waiting_read(sk, pkt, bottom_half,
+						   dst, src);
+		processed = true;
+		break;
+	}
+
+	if (pkt_processed)
+		*pkt_processed = processed;
+}
+
+static void vmci_transport_notify_pkt_process_request(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+/* Socket control packet based operations. */
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops = {
+	vmci_transport_notify_pkt_socket_init,
+	vmci_transport_notify_pkt_socket_destruct,
+	vmci_transport_notify_pkt_poll_in,
+	vmci_transport_notify_pkt_poll_out,
+	vmci_transport_notify_pkt_handle_pkt,
+	vmci_transport_notify_pkt_recv_init,
+	vmci_transport_notify_pkt_recv_pre_block,
+	vmci_transport_notify_pkt_recv_pre_dequeue,
+	vmci_transport_notify_pkt_recv_post_dequeue,
+	vmci_transport_notify_pkt_send_init,
+	vmci_transport_notify_pkt_send_pre_block,
+	vmci_transport_notify_pkt_send_pre_enqueue,
+	vmci_transport_notify_pkt_send_post_enqueue,
+	vmci_transport_notify_pkt_process_request,
+	vmci_transport_notify_pkt_process_negotiate,
+};
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
new file mode 100644
index 000000000000..7df793249b6c
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -0,0 +1,83 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __VMCI_TRANSPORT_NOTIFY_H__
+#define __VMCI_TRANSPORT_NOTIFY_H__
+
+#include <linux/types.h>
+#include <linux/vmw_vmci_defs.h>
+#include <linux/vmw_vmci_api.h>
+#include <linux/vm_sockets.h>
+
+#include "vmci_transport.h"
+
+/* Comment this out to compare with old protocol. */
+#define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1
+#if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY)
+/* Comment this out to remove flow control for "new" protocol */
+#define VSOCK_OPTIMIZATION_FLOW_CONTROL 1
+#endif
+
+#define VMCI_TRANSPORT_MAX_DGRAM_RESENDS       10
+
+struct vmci_transport_recv_notify_data {
+	u64 consume_head;
+	u64 produce_tail;
+	bool notify_on_block;
+};
+
+struct vmci_transport_send_notify_data {
+	u64 consume_head;
+	u64 produce_tail;
+};
+
+/* Socket notification callbacks. */
+struct vmci_transport_notify_ops {
+	void (*socket_init) (struct sock *sk);
+	void (*socket_destruct) (struct vsock_sock *vsk);
+	int (*poll_in) (struct sock *sk, size_t target,
+			  bool *data_ready_now);
+	int (*poll_out) (struct sock *sk, size_t target,
+			   bool *space_avail_now);
+	void (*handle_notify_pkt) (struct sock *sk,
+				   struct vmci_transport_packet *pkt,
+				   bool bottom_half, struct sockaddr_vm *dst,
+				   struct sockaddr_vm *src,
+				   bool *pkt_processed);
+	int (*recv_init) (struct sock *sk, size_t target,
+			  struct vmci_transport_recv_notify_data *data);
+	int (*recv_pre_block) (struct sock *sk, size_t target,
+			       struct vmci_transport_recv_notify_data *data);
+	int (*recv_pre_dequeue) (struct sock *sk, size_t target,
+				 struct vmci_transport_recv_notify_data *data);
+	int (*recv_post_dequeue) (struct sock *sk, size_t target,
+				  ssize_t copied, bool data_read,
+				  struct vmci_transport_recv_notify_data *data);
+	int (*send_init) (struct sock *sk,
+			  struct vmci_transport_send_notify_data *data);
+	int (*send_pre_block) (struct sock *sk,
+			       struct vmci_transport_send_notify_data *data);
+	int (*send_pre_enqueue) (struct sock *sk,
+				 struct vmci_transport_send_notify_data *data);
+	int (*send_post_enqueue) (struct sock *sk, ssize_t written,
+				  struct vmci_transport_send_notify_data *data);
+	void (*process_request) (struct sock *sk);
+	void (*process_negotiate) (struct sock *sk);
+};
+
+extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_ops;
+extern struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops;
+
+#endif /* __VMCI_TRANSPORT_NOTIFY_H__ */
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
new file mode 100644
index 000000000000..622bd7aa1016
--- /dev/null
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -0,0 +1,438 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2009-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vmci_transport_notify.h"
+
+#define PKT_FIELD(vsk, field_name) \
+	(vmci_trans(vsk)->notify.pkt_q_state.field_name)
+
+static bool vmci_transport_notify_waiting_write(struct vsock_sock *vsk)
+{
+	bool retval;
+	u64 notify_limit;
+
+	if (!PKT_FIELD(vsk, peer_waiting_write))
+		return false;
+
+	/* When the sender blocks, we take that as a sign that the sender is
+	 * faster than the receiver. To reduce the transmit rate of the sender,
+	 * we delay the sending of the read notification by decreasing the
+	 * write_notify_window. The notification is delayed until the number of
+	 * bytes used in the queue drops below the write_notify_window.
+	 */
+
+	if (!PKT_FIELD(vsk, peer_waiting_write_detected)) {
+		PKT_FIELD(vsk, peer_waiting_write_detected) = true;
+		if (PKT_FIELD(vsk, write_notify_window) < PAGE_SIZE) {
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+		} else {
+			PKT_FIELD(vsk, write_notify_window) -= PAGE_SIZE;
+			if (PKT_FIELD(vsk, write_notify_window) <
+			    PKT_FIELD(vsk, write_notify_min_window))
+				PKT_FIELD(vsk, write_notify_window) =
+				    PKT_FIELD(vsk, write_notify_min_window);
+
+		}
+	}
+	notify_limit = vmci_trans(vsk)->consume_size -
+		PKT_FIELD(vsk, write_notify_window);
+
+	/* The notify_limit is used to delay notifications in the case where
+	 * flow control is enabled. Below the test is expressed in terms of
+	 * free space in the queue: if free_space > ConsumeSize -
+	 * write_notify_window then notify An alternate way of expressing this
+	 * is to rewrite the expression to use the data ready in the receive
+	 * queue: if write_notify_window > bufferReady then notify as
+	 * free_space == ConsumeSize - bufferReady.
+	 */
+
+	retval = vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair) >
+		notify_limit;
+
+	if (retval) {
+		/* Once we notify the peer, we reset the detected flag so the
+		 * next wait will again cause a decrease in the window size.
+		 */
+
+		PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+	}
+	return retval;
+}
+
+static void
+vmci_transport_handle_read(struct sock *sk,
+			   struct vmci_transport_packet *pkt,
+			   bool bottom_half,
+			   struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+	sk->sk_write_space(sk);
+}
+
+static void
+vmci_transport_handle_wrote(struct sock *sk,
+			    struct vmci_transport_packet *pkt,
+			    bool bottom_half,
+			    struct sockaddr_vm *dst, struct sockaddr_vm *src)
+{
+	sk->sk_data_ready(sk, 0);
+}
+
+static void vsock_block_update_write_window(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (PKT_FIELD(vsk, write_notify_window) < vmci_trans(vsk)->consume_size)
+		PKT_FIELD(vsk, write_notify_window) =
+		    min(PKT_FIELD(vsk, write_notify_window) + PAGE_SIZE,
+			vmci_trans(vsk)->consume_size);
+}
+
+static int vmci_transport_send_read_notification(struct sock *sk)
+{
+	struct vsock_sock *vsk;
+	bool sent_read;
+	unsigned int retries;
+	int err;
+
+	vsk = vsock_sk(sk);
+	sent_read = false;
+	retries = 0;
+	err = 0;
+
+	if (vmci_transport_notify_waiting_write(vsk)) {
+		/* Notify the peer that we have read, retrying the send on
+		 * failure up to our maximum value.  XXX For now we just log
+		 * the failure, but later we should schedule a work item to
+		 * handle the resend until it succeeds.  That would require
+		 * keeping track of work items in the vsk and cleaning them up
+		 * upon socket close.
+		 */
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_read &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_read(sk);
+			if (err >= 0)
+				sent_read = true;
+
+			retries++;
+		}
+
+		if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_read)
+			pr_err("%p unable to send read notification to peer\n",
+			       sk);
+		else
+			PKT_FIELD(vsk, peer_waiting_write) = false;
+
+	}
+	return err;
+}
+
+static void vmci_transport_notify_pkt_socket_init(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+}
+
+static void vmci_transport_notify_pkt_socket_destruct(struct vsock_sock *vsk)
+{
+	PKT_FIELD(vsk, write_notify_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, write_notify_min_window) = PAGE_SIZE;
+	PKT_FIELD(vsk, peer_waiting_write) = false;
+	PKT_FIELD(vsk, peer_waiting_write_detected) = false;
+}
+
+static int
+vmci_transport_notify_pkt_poll_in(struct sock *sk,
+				  size_t target, bool *data_ready_now)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	if (vsock_stream_has_data(vsk)) {
+		*data_ready_now = true;
+	} else {
+		/* We can't read right now because there is nothing in the
+		 * queue. Ask for notifications when there is something to
+		 * read.
+		 */
+		if (sk->sk_state == SS_CONNECTED)
+			vsock_block_update_write_window(sk);
+		*data_ready_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_poll_out(struct sock *sk,
+				   size_t target, bool *space_avail_now)
+{
+	s64 produce_q_free_space;
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	produce_q_free_space = vsock_stream_has_space(vsk);
+	if (produce_q_free_space > 0) {
+		*space_avail_now = true;
+		return 0;
+	} else if (produce_q_free_space == 0) {
+		/* This is a connected socket but we can't currently send data.
+		 * Nothing else to do.
+		 */
+		*space_avail_now = false;
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_init(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	data->consume_head = 0;
+	data->produce_tail = 0;
+	data->notify_on_block = false;
+
+	if (PKT_FIELD(vsk, write_notify_min_window) < target + 1) {
+		PKT_FIELD(vsk, write_notify_min_window) = target + 1;
+		if (PKT_FIELD(vsk, write_notify_window) <
+		    PKT_FIELD(vsk, write_notify_min_window)) {
+			/* If the current window is smaller than the new
+			 * minimal window size, we need to reevaluate whether
+			 * we need to notify the sender. If the number of ready
+			 * bytes are smaller than the new window, we need to
+			 * send a notification to the sender before we block.
+			 */
+
+			PKT_FIELD(vsk, write_notify_window) =
+			    PKT_FIELD(vsk, write_notify_min_window);
+			data->notify_on_block = true;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_block(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	int err = 0;
+
+	vsock_block_update_write_window(sk);
+
+	if (data->notify_on_block) {
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+		data->notify_on_block = false;
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_recv_post_dequeue(
+				struct sock *sk,
+				size_t target,
+				ssize_t copied,
+				bool data_read,
+				struct vmci_transport_recv_notify_data *data)
+{
+	struct vsock_sock *vsk;
+	int err;
+	bool was_full = false;
+	u64 free_space;
+
+	vsk = vsock_sk(sk);
+	err = 0;
+
+	if (data_read) {
+		smp_mb();
+
+		free_space =
+			vmci_qpair_consume_free_space(vmci_trans(vsk)->qpair);
+		was_full = free_space == copied;
+
+		if (was_full)
+			PKT_FIELD(vsk, peer_waiting_write) = true;
+
+		err = vmci_transport_send_read_notification(sk);
+		if (err < 0)
+			return err;
+
+		/* See the comment in
+		 * vmci_transport_notify_pkt_send_post_enqueue().
+		 */
+		sk->sk_data_ready(sk, 0);
+	}
+
+	return err;
+}
+
+static int
+vmci_transport_notify_pkt_send_init(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	data->consume_head = 0;
+	data->produce_tail = 0;
+
+	return 0;
+}
+
+static int
+vmci_transport_notify_pkt_send_post_enqueue(
+				struct sock *sk,
+				ssize_t written,
+				struct vmci_transport_send_notify_data *data)
+{
+	int err = 0;
+	struct vsock_sock *vsk;
+	bool sent_wrote = false;
+	bool was_empty;
+	int retries = 0;
+
+	vsk = vsock_sk(sk);
+
+	smp_mb();
+
+	was_empty =
+		vmci_qpair_produce_buf_ready(vmci_trans(vsk)->qpair) == written;
+	if (was_empty) {
+		while (!(vsk->peer_shutdown & RCV_SHUTDOWN) &&
+		       !sent_wrote &&
+		       retries < VMCI_TRANSPORT_MAX_DGRAM_RESENDS) {
+			err = vmci_transport_send_wrote(sk);
+			if (err >= 0)
+				sent_wrote = true;
+
+			retries++;
+		}
+	}
+
+	if (retries >= VMCI_TRANSPORT_MAX_DGRAM_RESENDS && !sent_wrote) {
+		pr_err("%p unable to send wrote notification to peer\n",
+		       sk);
+		return err;
+	}
+
+	return err;
+}
+
+static void
+vmci_transport_notify_pkt_handle_pkt(
+				struct sock *sk,
+				struct vmci_transport_packet *pkt,
+				bool bottom_half,
+				struct sockaddr_vm *dst,
+				struct sockaddr_vm *src, bool *pkt_processed)
+{
+	bool processed = false;
+
+	switch (pkt->type) {
+	case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
+		vmci_transport_handle_wrote(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	case VMCI_TRANSPORT_PACKET_TYPE_READ:
+		vmci_transport_handle_read(sk, pkt, bottom_half, dst, src);
+		processed = true;
+		break;
+	}
+
+	if (pkt_processed)
+		*pkt_processed = processed;
+}
+
+static void vmci_transport_notify_pkt_process_request(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static void vmci_transport_notify_pkt_process_negotiate(struct sock *sk)
+{
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	PKT_FIELD(vsk, write_notify_window) = vmci_trans(vsk)->consume_size;
+	if (vmci_trans(vsk)->consume_size <
+		PKT_FIELD(vsk, write_notify_min_window))
+		PKT_FIELD(vsk, write_notify_min_window) =
+			vmci_trans(vsk)->consume_size;
+}
+
+static int
+vmci_transport_notify_pkt_recv_pre_dequeue(
+				struct sock *sk,
+				size_t target,
+				struct vmci_transport_recv_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_block(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+static int
+vmci_transport_notify_pkt_send_pre_enqueue(
+				struct sock *sk,
+				struct vmci_transport_send_notify_data *data)
+{
+	return 0; /* NOP for QState. */
+}
+
+/* Socket always on control packet based operations. */
+struct vmci_transport_notify_ops vmci_transport_notify_pkt_q_state_ops = {
+	vmci_transport_notify_pkt_socket_init,
+	vmci_transport_notify_pkt_socket_destruct,
+	vmci_transport_notify_pkt_poll_in,
+	vmci_transport_notify_pkt_poll_out,
+	vmci_transport_notify_pkt_handle_pkt,
+	vmci_transport_notify_pkt_recv_init,
+	vmci_transport_notify_pkt_recv_pre_block,
+	vmci_transport_notify_pkt_recv_pre_dequeue,
+	vmci_transport_notify_pkt_recv_post_dequeue,
+	vmci_transport_notify_pkt_send_init,
+	vmci_transport_notify_pkt_send_pre_block,
+	vmci_transport_notify_pkt_send_pre_enqueue,
+	vmci_transport_notify_pkt_send_post_enqueue,
+	vmci_transport_notify_pkt_process_request,
+	vmci_transport_notify_pkt_process_negotiate,
+};
diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
new file mode 100644
index 000000000000..b7df1aea7c59
--- /dev/null
+++ b/net/vmw_vsock/vsock_addr.c
@@ -0,0 +1,86 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/stddef.h>
+#include <net/sock.h>
+
+#include "vsock_addr.h"
+
+void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port)
+{
+	memset(addr, 0, sizeof(*addr));
+	addr->svm_family = AF_VSOCK;
+	addr->svm_cid = cid;
+	addr->svm_port = port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_init);
+
+int vsock_addr_validate(const struct sockaddr_vm *addr)
+{
+	if (!addr)
+		return -EFAULT;
+
+	if (addr->svm_family != AF_VSOCK)
+		return -EAFNOSUPPORT;
+
+	if (addr->svm_zero[0] != 0)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_validate);
+
+bool vsock_addr_bound(const struct sockaddr_vm *addr)
+{
+	return addr->svm_port != VMADDR_PORT_ANY;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_bound);
+
+void vsock_addr_unbind(struct sockaddr_vm *addr)
+{
+	vsock_addr_init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+}
+EXPORT_SYMBOL_GPL(vsock_addr_unbind);
+
+bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
+			    const struct sockaddr_vm *other)
+{
+	return addr->svm_cid == other->svm_cid &&
+		addr->svm_port == other->svm_port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_equals_addr);
+
+bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr,
+				const struct sockaddr_vm *other)
+{
+	return (addr->svm_cid == VMADDR_CID_ANY ||
+		other->svm_cid == VMADDR_CID_ANY ||
+		addr->svm_cid == other->svm_cid) &&
+	       addr->svm_port == other->svm_port;
+}
+EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any);
+
+int vsock_addr_cast(const struct sockaddr *addr,
+		    size_t len, struct sockaddr_vm **out_addr)
+{
+	if (len < sizeof(**out_addr))
+		return -EFAULT;
+
+	*out_addr = (struct sockaddr_vm *)addr;
+	return vsock_addr_validate(*out_addr);
+}
+EXPORT_SYMBOL_GPL(vsock_addr_cast);
diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h
new file mode 100644
index 000000000000..cdfbcefdf843
--- /dev/null
+++ b/net/vmw_vsock/vsock_addr.h
@@ -0,0 +1,32 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VSOCK_ADDR_H_
+#define _VSOCK_ADDR_H_
+
+#include <linux/vm_sockets.h>
+
+void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);
+int vsock_addr_validate(const struct sockaddr_vm *addr);
+bool vsock_addr_bound(const struct sockaddr_vm *addr);
+void vsock_addr_unbind(struct sockaddr_vm *addr);
+bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
+			    const struct sockaddr_vm *other);
+bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr,
+				const struct sockaddr_vm *other);
+int vsock_addr_cast(const struct sockaddr *addr, size_t len,
+		    struct sockaddr_vm **out_addr);
+
+#endif
diff --git a/net/vmw_vsock/vsock_version.h b/net/vmw_vsock/vsock_version.h
new file mode 100644
index 000000000000..4df7f5e2151c
--- /dev/null
+++ b/net/vmw_vsock/vsock_version.h
@@ -0,0 +1,22 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2011-2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VSOCK_VERSION_H_
+#define _VSOCK_VERSION_H_
+
+#define VSOCK_DRIVER_VERSION_PARTS	{ 1, 0, 0, 0 }
+#define VSOCK_DRIVER_VERSION_STRING	"1.0.0.0-k"
+
+#endif /* _VSOCK_VERSION_H_ */
-- 
cgit v1.2.3


From febf018d22347b5df94066bca05d0c11a84e839d Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Fri, 8 Feb 2013 17:17:06 +0000
Subject: net/802: Implement Multiple Registration Protocol (MRP)

Initial implementation of the Multiple Registration Protocol (MRP)
from IEEE 802.1Q-2011, based on the existing implementation of the
Generic Attribute Registration Protocol (GARP).

Signed-off-by: David Ward <david.ward@ll.mit.edu>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   2 +
 include/net/mrp.h         | 143 ++++++++
 net/802/Kconfig           |   3 +
 net/802/Makefile          |   1 +
 net/802/mrp.c             | 895 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1044 insertions(+)
 create mode 100644 include/net/mrp.h
 create mode 100644 net/802/mrp.c

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ab2774eb49e8..25bd46f52877 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1290,6 +1290,8 @@ struct net_device {
 	};
 	/* GARP */
 	struct garp_port __rcu	*garp_port;
+	/* MRP */
+	struct mrp_port __rcu	*mrp_port;
 
 	/* class/net/name entry */
 	struct device		dev;
diff --git a/include/net/mrp.h b/include/net/mrp.h
new file mode 100644
index 000000000000..4fbf02aa2ec1
--- /dev/null
+++ b/include/net/mrp.h
@@ -0,0 +1,143 @@
+#ifndef _NET_MRP_H
+#define _NET_MRP_H
+
+#define MRP_END_MARK		0x0
+
+struct mrp_pdu_hdr {
+	u8	version;
+};
+
+struct mrp_msg_hdr {
+	u8	attrtype;
+	u8	attrlen;
+};
+
+struct mrp_vecattr_hdr {
+	__be16	lenflags;
+	unsigned char	firstattrvalue[];
+#define MRP_VECATTR_HDR_LEN_MASK cpu_to_be16(0x1FFF)
+#define MRP_VECATTR_HDR_FLAG_LA cpu_to_be16(0x2000)
+};
+
+enum mrp_vecattr_event {
+	MRP_VECATTR_EVENT_NEW,
+	MRP_VECATTR_EVENT_JOIN_IN,
+	MRP_VECATTR_EVENT_IN,
+	MRP_VECATTR_EVENT_JOIN_MT,
+	MRP_VECATTR_EVENT_MT,
+	MRP_VECATTR_EVENT_LV,
+	__MRP_VECATTR_EVENT_MAX
+};
+
+struct mrp_skb_cb {
+	struct mrp_msg_hdr	*mh;
+	struct mrp_vecattr_hdr	*vah;
+	unsigned char		attrvalue[];
+};
+
+static inline struct mrp_skb_cb *mrp_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct mrp_skb_cb) >
+		     FIELD_SIZEOF(struct sk_buff, cb));
+	return (struct mrp_skb_cb *)skb->cb;
+}
+
+enum mrp_applicant_state {
+	MRP_APPLICANT_INVALID,
+	MRP_APPLICANT_VO,
+	MRP_APPLICANT_VP,
+	MRP_APPLICANT_VN,
+	MRP_APPLICANT_AN,
+	MRP_APPLICANT_AA,
+	MRP_APPLICANT_QA,
+	MRP_APPLICANT_LA,
+	MRP_APPLICANT_AO,
+	MRP_APPLICANT_QO,
+	MRP_APPLICANT_AP,
+	MRP_APPLICANT_QP,
+	__MRP_APPLICANT_MAX
+};
+#define MRP_APPLICANT_MAX	(__MRP_APPLICANT_MAX - 1)
+
+enum mrp_event {
+	MRP_EVENT_NEW,
+	MRP_EVENT_JOIN,
+	MRP_EVENT_LV,
+	MRP_EVENT_TX,
+	MRP_EVENT_R_NEW,
+	MRP_EVENT_R_JOIN_IN,
+	MRP_EVENT_R_IN,
+	MRP_EVENT_R_JOIN_MT,
+	MRP_EVENT_R_MT,
+	MRP_EVENT_R_LV,
+	MRP_EVENT_R_LA,
+	MRP_EVENT_REDECLARE,
+	MRP_EVENT_PERIODIC,
+	__MRP_EVENT_MAX
+};
+#define MRP_EVENT_MAX		(__MRP_EVENT_MAX - 1)
+
+enum mrp_tx_action {
+	MRP_TX_ACTION_NONE,
+	MRP_TX_ACTION_S_NEW,
+	MRP_TX_ACTION_S_JOIN_IN,
+	MRP_TX_ACTION_S_JOIN_IN_OPTIONAL,
+	MRP_TX_ACTION_S_IN_OPTIONAL,
+	MRP_TX_ACTION_S_LV,
+};
+
+struct mrp_attr {
+	struct rb_node			node;
+	enum mrp_applicant_state	state;
+	u8				type;
+	u8				len;
+	unsigned char			value[];
+};
+
+enum mrp_applications {
+	MRP_APPLICATION_MVRP,
+	__MRP_APPLICATION_MAX
+};
+#define MRP_APPLICATION_MAX	(__MRP_APPLICATION_MAX - 1)
+
+struct mrp_application {
+	enum mrp_applications	type;
+	unsigned int		maxattr;
+	struct packet_type	pkttype;
+	unsigned char		group_address[ETH_ALEN];
+	u8			version;
+};
+
+struct mrp_applicant {
+	struct mrp_application	*app;
+	struct net_device	*dev;
+	struct timer_list	join_timer;
+
+	spinlock_t		lock;
+	struct sk_buff_head	queue;
+	struct sk_buff		*pdu;
+	struct rb_root		mad;
+	struct rcu_head		rcu;
+};
+
+struct mrp_port {
+	struct mrp_applicant __rcu	*applicants[MRP_APPLICATION_MAX + 1];
+	struct rcu_head			rcu;
+};
+
+extern int	mrp_register_application(struct mrp_application *app);
+extern void	mrp_unregister_application(struct mrp_application *app);
+
+extern int	mrp_init_applicant(struct net_device *dev,
+				    struct mrp_application *app);
+extern void	mrp_uninit_applicant(struct net_device *dev,
+				      struct mrp_application *app);
+
+extern int	mrp_request_join(const struct net_device *dev,
+				  const struct mrp_application *app,
+				  const void *value, u8 len, u8 type);
+extern void	mrp_request_leave(const struct net_device *dev,
+				   const struct mrp_application *app,
+				   const void *value, u8 len, u8 type);
+
+#endif /* _NET_MRP_H */
diff --git a/net/802/Kconfig b/net/802/Kconfig
index be33d27c8e69..80d4bf78905d 100644
--- a/net/802/Kconfig
+++ b/net/802/Kconfig
@@ -5,3 +5,6 @@ config STP
 config GARP
 	tristate
 	select STP
+
+config MRP
+	tristate
diff --git a/net/802/Makefile b/net/802/Makefile
index a30d6e385aed..37e654d6615e 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_IPX)	+= p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)	+= p8022.o psnap.o
 obj-$(CONFIG_STP)	+= stp.o
 obj-$(CONFIG_GARP)	+= garp.o
+obj-$(CONFIG_MRP)	+= mrp.o
diff --git a/net/802/mrp.c b/net/802/mrp.c
new file mode 100644
index 000000000000..47a9e14c8ba7
--- /dev/null
+++ b/net/802/mrp.c
@@ -0,0 +1,895 @@
+/*
+ *	IEEE 802.1Q Multiple Registration Protocol (MRP)
+ *
+ *	Copyright (c) 2012 Massachusetts Institute of Technology
+ *
+ *	Adapted from code in net/802/garp.c
+ *	Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/mrp.h>
+#include <asm/unaligned.h>
+
+static unsigned int mrp_join_time __read_mostly = 200;
+module_param(mrp_join_time, uint, 0644);
+MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+MODULE_LICENSE("GPL");
+
+static const u8
+mrp_applicant_state_table[MRP_APPLICANT_MAX + 1][MRP_EVENT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VO,
+	},
+	[MRP_APPLICANT_VP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VP,
+	},
+	[MRP_APPLICANT_VN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_VN,
+	},
+	[MRP_APPLICANT_AN] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_AN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AN,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VN,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AN,
+	},
+	[MRP_APPLICANT_AA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_QA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AA,
+	},
+	[MRP_APPLICANT_LA] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AA,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_LA,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_LA,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_LA,
+	},
+	[MRP_APPLICANT_AO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AO,
+	},
+	[MRP_APPLICANT_QO] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QO,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AO,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VO,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_QO,
+	},
+	[MRP_APPLICANT_AP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_AO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QA,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+	[MRP_APPLICANT_QP] = {
+		[MRP_EVENT_NEW]		= MRP_APPLICANT_VN,
+		[MRP_EVENT_JOIN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_LV]		= MRP_APPLICANT_QO,
+		[MRP_EVENT_TX]		= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_NEW]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_IN]	= MRP_APPLICANT_QP,
+		[MRP_EVENT_R_JOIN_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_MT]	= MRP_APPLICANT_AP,
+		[MRP_EVENT_R_LV]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_R_LA]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_REDECLARE]	= MRP_APPLICANT_VP,
+		[MRP_EVENT_PERIODIC]	= MRP_APPLICANT_AP,
+	},
+};
+
+static const u8
+mrp_tx_action_table[MRP_APPLICANT_MAX + 1] = {
+	[MRP_APPLICANT_VO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_VP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_VN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AN] = MRP_TX_ACTION_S_NEW,
+	[MRP_APPLICANT_AA] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QA] = MRP_TX_ACTION_S_JOIN_IN_OPTIONAL,
+	[MRP_APPLICANT_LA] = MRP_TX_ACTION_S_LV,
+	[MRP_APPLICANT_AO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_QO] = MRP_TX_ACTION_S_IN_OPTIONAL,
+	[MRP_APPLICANT_AP] = MRP_TX_ACTION_S_JOIN_IN,
+	[MRP_APPLICANT_QP] = MRP_TX_ACTION_S_IN_OPTIONAL,
+};
+
+static void mrp_attrvalue_inc(void *value, u8 len)
+{
+	u8 *v = (u8 *)value;
+
+	/* Add 1 to the last byte. If it becomes zero,
+	 * go to the previous byte and repeat.
+	 */
+	while (len > 0 && !++v[--len])
+		;
+}
+
+static int mrp_attr_cmp(const struct mrp_attr *attr,
+			 const void *value, u8 len, u8 type)
+{
+	if (attr->type != type)
+		return attr->type - type;
+	if (attr->len != len)
+		return attr->len - len;
+	return memcmp(attr->value, value, len);
+}
+
+static struct mrp_attr *mrp_attr_lookup(const struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (parent) {
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			parent = parent->rb_left;
+		else if (d < 0)
+			parent = parent->rb_right;
+		else
+			return attr;
+	}
+	return NULL;
+}
+
+static struct mrp_attr *mrp_attr_create(struct mrp_applicant *app,
+					const void *value, u8 len, u8 type)
+{
+	struct rb_node *parent = NULL, **p = &app->mad.rb_node;
+	struct mrp_attr *attr;
+	int d;
+
+	while (*p) {
+		parent = *p;
+		attr = rb_entry(parent, struct mrp_attr, node);
+		d = mrp_attr_cmp(attr, value, len, type);
+		if (d > 0)
+			p = &parent->rb_left;
+		else if (d < 0)
+			p = &parent->rb_right;
+		else {
+			/* The attribute already exists; re-use it. */
+			return attr;
+		}
+	}
+	attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC);
+	if (!attr)
+		return attr;
+	attr->state = MRP_APPLICANT_VO;
+	attr->type  = type;
+	attr->len   = len;
+	memcpy(attr->value, value, len);
+
+	rb_link_node(&attr->node, parent, p);
+	rb_insert_color(&attr->node, &app->mad);
+	return attr;
+}
+
+static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
+{
+	rb_erase(&attr->node, &app->mad);
+	kfree(attr);
+}
+
+static int mrp_pdu_init(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+	struct mrp_pdu_hdr *ph;
+
+	skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev),
+			GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	skb->dev = app->dev;
+	skb->protocol = app->app->pkttype.type;
+	skb_reserve(skb, LL_RESERVED_SPACE(app->dev));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
+	ph = (struct mrp_pdu_hdr *)__skb_put(skb, sizeof(*ph));
+	ph->version = app->app->version;
+
+	app->pdu = skb;
+	return 0;
+}
+
+static int mrp_pdu_append_end_mark(struct mrp_applicant *app)
+{
+	__be16 *endmark;
+
+	if (skb_tailroom(app->pdu) < sizeof(*endmark))
+		return -1;
+	endmark = (__be16 *)__skb_put(app->pdu, sizeof(*endmark));
+	put_unaligned(MRP_END_MARK, endmark);
+	return 0;
+}
+
+static void mrp_pdu_queue(struct mrp_applicant *app)
+{
+	if (!app->pdu)
+		return;
+
+	if (mrp_cb(app->pdu)->mh)
+		mrp_pdu_append_end_mark(app);
+	mrp_pdu_append_end_mark(app);
+
+	dev_hard_header(app->pdu, app->dev, ntohs(app->app->pkttype.type),
+			app->app->group_address, app->dev->dev_addr,
+			app->pdu->len);
+
+	skb_queue_tail(&app->queue, app->pdu);
+	app->pdu = NULL;
+}
+
+static void mrp_queue_xmit(struct mrp_applicant *app)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&app->queue)))
+		dev_queue_xmit(skb);
+}
+
+static int mrp_pdu_append_msg_hdr(struct mrp_applicant *app,
+				  u8 attrtype, u8 attrlen)
+{
+	struct mrp_msg_hdr *mh;
+
+	if (mrp_cb(app->pdu)->mh) {
+		if (mrp_pdu_append_end_mark(app) < 0)
+			return -1;
+		mrp_cb(app->pdu)->mh = NULL;
+		mrp_cb(app->pdu)->vah = NULL;
+	}
+
+	if (skb_tailroom(app->pdu) < sizeof(*mh))
+		return -1;
+	mh = (struct mrp_msg_hdr *)__skb_put(app->pdu, sizeof(*mh));
+	mh->attrtype = attrtype;
+	mh->attrlen = attrlen;
+	mrp_cb(app->pdu)->mh = mh;
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_hdr(struct mrp_applicant *app,
+				      const void *firstattrvalue, u8 attrlen)
+{
+	struct mrp_vecattr_hdr *vah;
+
+	if (skb_tailroom(app->pdu) < sizeof(*vah) + attrlen)
+		return -1;
+	vah = (struct mrp_vecattr_hdr *)__skb_put(app->pdu,
+						  sizeof(*vah) + attrlen);
+	put_unaligned(0, &vah->lenflags);
+	memcpy(vah->firstattrvalue, firstattrvalue, attrlen);
+	mrp_cb(app->pdu)->vah = vah;
+	memcpy(mrp_cb(app->pdu)->attrvalue, firstattrvalue, attrlen);
+	return 0;
+}
+
+static int mrp_pdu_append_vecattr_event(struct mrp_applicant *app,
+					const struct mrp_attr *attr,
+					enum mrp_vecattr_event vaevent)
+{
+	u16 len, pos;
+	u8 *vaevents;
+	int err;
+again:
+	if (!app->pdu) {
+		err = mrp_pdu_init(app);
+		if (err < 0)
+			return err;
+	}
+
+	/* If there is no Message header in the PDU, or the Message header is
+	 * for a different attribute type, add an EndMark (if necessary) and a
+	 * new Message header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->mh ||
+	    mrp_cb(app->pdu)->mh->attrtype != attr->type ||
+	    mrp_cb(app->pdu)->mh->attrlen != attr->len) {
+		if (mrp_pdu_append_msg_hdr(app, attr->type, attr->len) < 0)
+			goto queue;
+	}
+
+	/* If there is no VectorAttribute header for this Message in the PDU,
+	 * or this attribute's value does not sequentially follow the previous
+	 * attribute's value, add a new VectorAttribute header to the PDU.
+	 */
+	if (!mrp_cb(app->pdu)->vah ||
+	    memcmp(mrp_cb(app->pdu)->attrvalue, attr->value, attr->len)) {
+		if (mrp_pdu_append_vecattr_hdr(app, attr->value, attr->len) < 0)
+			goto queue;
+	}
+
+	len = be16_to_cpu(get_unaligned(&mrp_cb(app->pdu)->vah->lenflags));
+	pos = len % 3;
+
+	/* Events are packed into Vectors in the PDU, three to a byte. Add a
+	 * byte to the end of the Vector if necessary.
+	 */
+	if (!pos) {
+		if (skb_tailroom(app->pdu) < sizeof(u8))
+			goto queue;
+		vaevents = (u8 *)__skb_put(app->pdu, sizeof(u8));
+	} else {
+		vaevents = (u8 *)(skb_tail_pointer(app->pdu) - sizeof(u8));
+	}
+
+	switch (pos) {
+	case 0:
+		*vaevents = vaevent * (__MRP_VECATTR_EVENT_MAX *
+				       __MRP_VECATTR_EVENT_MAX);
+		break;
+	case 1:
+		*vaevents += vaevent * __MRP_VECATTR_EVENT_MAX;
+		break;
+	case 2:
+		*vaevents += vaevent;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	/* Increment the length of the VectorAttribute in the PDU, as well as
+	 * the value of the next attribute that would continue its Vector.
+	 */
+	put_unaligned(cpu_to_be16(++len), &mrp_cb(app->pdu)->vah->lenflags);
+	mrp_attrvalue_inc(mrp_cb(app->pdu)->attrvalue, attr->len);
+
+	return 0;
+
+queue:
+	mrp_pdu_queue(app);
+	goto again;
+}
+
+static void mrp_attr_event(struct mrp_applicant *app,
+			   struct mrp_attr *attr, enum mrp_event event)
+{
+	enum mrp_applicant_state state;
+
+	state = mrp_applicant_state_table[attr->state][event];
+	if (state == MRP_APPLICANT_INVALID) {
+		WARN_ON(1);
+		return;
+	}
+
+	if (event == MRP_EVENT_TX) {
+		/* When appending the attribute fails, don't update its state
+		 * in order to retry at the next TX event.
+		 */
+
+		switch (mrp_tx_action_table[attr->state]) {
+		case MRP_TX_ACTION_NONE:
+		case MRP_TX_ACTION_S_JOIN_IN_OPTIONAL:
+		case MRP_TX_ACTION_S_IN_OPTIONAL:
+			break;
+		case MRP_TX_ACTION_S_NEW:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_NEW) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_JOIN_IN:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_JOIN_IN) < 0)
+				return;
+			break;
+		case MRP_TX_ACTION_S_LV:
+			if (mrp_pdu_append_vecattr_event(
+				    app, attr, MRP_VECATTR_EVENT_LV) < 0)
+				return;
+			/* As a pure applicant, sending a leave message
+			 * implies that the attribute was unregistered and
+			 * can be destroyed.
+			 */
+			mrp_attr_destroy(app, attr);
+			return;
+		default:
+			WARN_ON(1);
+		}
+	}
+
+	attr->state = state;
+}
+
+int mrp_request_join(const struct net_device *dev,
+		     const struct mrp_application *appl,
+		     const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -ENOMEM;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_create(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return -ENOMEM;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_JOIN);
+	spin_unlock_bh(&app->lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_request_join);
+
+void mrp_request_leave(const struct net_device *dev,
+		       const struct mrp_application *appl,
+		       const void *value, u8 len, u8 type)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+	struct mrp_attr *attr;
+
+	if (sizeof(struct mrp_skb_cb) + len >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return;
+
+	spin_lock_bh(&app->lock);
+	attr = mrp_attr_lookup(app, value, len, type);
+	if (!attr) {
+		spin_unlock_bh(&app->lock);
+		return;
+	}
+	mrp_attr_event(app, attr, MRP_EVENT_LV);
+	spin_unlock_bh(&app->lock);
+}
+EXPORT_SYMBOL_GPL(mrp_request_leave);
+
+static void mrp_mad_event(struct mrp_applicant *app, enum mrp_event event)
+{
+	struct rb_node *node, *next;
+	struct mrp_attr *attr;
+
+	for (node = rb_first(&app->mad);
+	     next = node ? rb_next(node) : NULL, node != NULL;
+	     node = next) {
+		attr = rb_entry(node, struct mrp_attr, node);
+		mrp_attr_event(app, attr, event);
+	}
+}
+
+static void mrp_join_timer_arm(struct mrp_applicant *app)
+{
+	unsigned long delay;
+
+	delay = (u64)msecs_to_jiffies(mrp_join_time) * net_random() >> 32;
+	mod_timer(&app->join_timer, jiffies + delay);
+}
+
+static void mrp_join_timer(unsigned long data)
+{
+	struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+	spin_lock(&app->lock);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	spin_unlock(&app->lock);
+
+	mrp_queue_xmit(app);
+	mrp_join_timer_arm(app);
+}
+
+static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
+{
+	__be16 endmark;
+
+	if (skb_copy_bits(skb, *offset, &endmark, sizeof(endmark)) < 0)
+		return -1;
+	if (endmark == MRP_END_MARK) {
+		*offset += sizeof(endmark);
+		return -1;
+	}
+	return 0;
+}
+
+static void mrp_pdu_parse_vecattr_event(struct mrp_applicant *app,
+					struct sk_buff *skb,
+					enum mrp_vecattr_event vaevent)
+{
+	struct mrp_attr *attr;
+	enum mrp_event event;
+
+	attr = mrp_attr_lookup(app, mrp_cb(skb)->attrvalue,
+			       mrp_cb(skb)->mh->attrlen,
+			       mrp_cb(skb)->mh->attrtype);
+	if (attr == NULL)
+		return;
+
+	switch (vaevent) {
+	case MRP_VECATTR_EVENT_NEW:
+		event = MRP_EVENT_R_NEW;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_IN:
+		event = MRP_EVENT_R_JOIN_IN;
+		break;
+	case MRP_VECATTR_EVENT_IN:
+		event = MRP_EVENT_R_IN;
+		break;
+	case MRP_VECATTR_EVENT_JOIN_MT:
+		event = MRP_EVENT_R_JOIN_MT;
+		break;
+	case MRP_VECATTR_EVENT_MT:
+		event = MRP_EVENT_R_MT;
+		break;
+	case MRP_VECATTR_EVENT_LV:
+		event = MRP_EVENT_R_LV;
+		break;
+	default:
+		return;
+	}
+
+	mrp_attr_event(app, attr, event);
+}
+
+static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
+				 struct sk_buff *skb, int *offset)
+{
+	struct mrp_vecattr_hdr _vah;
+	u16 valen;
+	u8 vaevents, vaevent;
+
+	mrp_cb(skb)->vah = skb_header_pointer(skb, *offset, sizeof(_vah),
+					      &_vah);
+	if (!mrp_cb(skb)->vah)
+		return -1;
+	*offset += sizeof(_vah);
+
+	if (get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+	    MRP_VECATTR_HDR_FLAG_LA)
+		mrp_mad_event(app, MRP_EVENT_R_LA);
+	valen = be16_to_cpu(get_unaligned(&mrp_cb(skb)->vah->lenflags) &
+			    MRP_VECATTR_HDR_LEN_MASK);
+
+	/* The VectorAttribute structure in a PDU carries event information
+	 * about one or more attributes having consecutive values. Only the
+	 * value for the first attribute is contained in the structure. So
+	 * we make a copy of that value, and then increment it each time we
+	 * advance to the next event in its Vector.
+	 */
+	if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen >
+	    FIELD_SIZEOF(struct sk_buff, cb))
+		return -1;
+	if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue,
+			  mrp_cb(skb)->mh->attrlen) < 0)
+		return -1;
+	*offset += mrp_cb(skb)->mh->attrlen;
+
+	/* In a VectorAttribute, the Vector contains events which are packed
+	 * three to a byte. We process one byte of the Vector at a time.
+	 */
+	while (valen > 0) {
+		if (skb_copy_bits(skb, *offset, &vaevents,
+				  sizeof(vaevents)) < 0)
+			return -1;
+		*offset += sizeof(vaevents);
+
+		/* Extract and process the first event. */
+		vaevent = vaevents / (__MRP_VECATTR_EVENT_MAX *
+				      __MRP_VECATTR_EVENT_MAX);
+		if (vaevent >= __MRP_VECATTR_EVENT_MAX) {
+			/* The byte is malformed; stop processing. */
+			return -1;
+		}
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the second event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= (__MRP_VECATTR_EVENT_MAX *
+			     __MRP_VECATTR_EVENT_MAX);
+		vaevent = vaevents / __MRP_VECATTR_EVENT_MAX;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+
+		/* If present, extract and process the third event. */
+		if (!--valen)
+			break;
+		mrp_attrvalue_inc(mrp_cb(skb)->attrvalue,
+				  mrp_cb(skb)->mh->attrlen);
+		vaevents %= __MRP_VECATTR_EVENT_MAX;
+		vaevent = vaevents;
+		mrp_pdu_parse_vecattr_event(app, skb, vaevent);
+	}
+	return 0;
+}
+
+static int mrp_pdu_parse_msg(struct mrp_applicant *app, struct sk_buff *skb,
+			     int *offset)
+{
+	struct mrp_msg_hdr _mh;
+
+	mrp_cb(skb)->mh = skb_header_pointer(skb, *offset, sizeof(_mh), &_mh);
+	if (!mrp_cb(skb)->mh)
+		return -1;
+	*offset += sizeof(_mh);
+
+	if (mrp_cb(skb)->mh->attrtype == 0 ||
+	    mrp_cb(skb)->mh->attrtype > app->app->maxattr ||
+	    mrp_cb(skb)->mh->attrlen == 0)
+		return -1;
+
+	while (skb->len > *offset) {
+		if (mrp_pdu_parse_end_mark(skb, offset) < 0)
+			break;
+		if (mrp_pdu_parse_vecattr(app, skb, offset) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+int mrp_rcv(struct sk_buff *skb, struct net_device *dev,
+	    struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct mrp_application *appl = container_of(pt, struct mrp_application,
+						    pkttype);
+	struct mrp_port *port;
+	struct mrp_applicant *app;
+	struct mrp_pdu_hdr _ph;
+	const struct mrp_pdu_hdr *ph;
+	int offset = skb_network_offset(skb);
+
+	/* If the interface is in promiscuous mode, drop the packet if
+	 * it was unicast to another host.
+	 */
+	if (unlikely(skb->pkt_type == PACKET_OTHERHOST))
+		goto out;
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		goto out;
+	port = rcu_dereference(dev->mrp_port);
+	if (unlikely(!port))
+		goto out;
+	app = rcu_dereference(port->applicants[appl->type]);
+	if (unlikely(!app))
+		goto out;
+
+	ph = skb_header_pointer(skb, offset, sizeof(_ph), &_ph);
+	if (!ph)
+		goto out;
+	offset += sizeof(_ph);
+
+	if (ph->version != app->app->version)
+		goto out;
+
+	spin_lock(&app->lock);
+	while (skb->len > offset) {
+		if (mrp_pdu_parse_end_mark(skb, &offset) < 0)
+			break;
+		if (mrp_pdu_parse_msg(app, skb, &offset) < 0)
+			break;
+	}
+	spin_unlock(&app->lock);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int mrp_init_port(struct net_device *dev)
+{
+	struct mrp_port *port;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+	rcu_assign_pointer(dev->mrp_port, port);
+	return 0;
+}
+
+static void mrp_release_port(struct net_device *dev)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	unsigned int i;
+
+	for (i = 0; i <= MRP_APPLICATION_MAX; i++) {
+		if (rtnl_dereference(port->applicants[i]))
+			return;
+	}
+	RCU_INIT_POINTER(dev->mrp_port, NULL);
+	kfree_rcu(port, rcu);
+}
+
+int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_applicant *app;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (!rtnl_dereference(dev->mrp_port)) {
+		err = mrp_init_port(dev);
+		if (err < 0)
+			goto err1;
+	}
+
+	err = -ENOMEM;
+	app = kzalloc(sizeof(*app), GFP_KERNEL);
+	if (!app)
+		goto err2;
+
+	err = dev_mc_add(dev, appl->group_address);
+	if (err < 0)
+		goto err3;
+
+	app->dev = dev;
+	app->app = appl;
+	app->mad = RB_ROOT;
+	spin_lock_init(&app->lock);
+	skb_queue_head_init(&app->queue);
+	rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
+	setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
+	mrp_join_timer_arm(app);
+	return 0;
+
+err3:
+	kfree(app);
+err2:
+	mrp_release_port(dev);
+err1:
+	return err;
+}
+EXPORT_SYMBOL_GPL(mrp_init_applicant);
+
+void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
+{
+	struct mrp_port *port = rtnl_dereference(dev->mrp_port);
+	struct mrp_applicant *app = rtnl_dereference(
+		port->applicants[appl->type]);
+
+	ASSERT_RTNL();
+
+	RCU_INIT_POINTER(port->applicants[appl->type], NULL);
+
+	/* Delete timer and generate a final TX event to flush out
+	 * all pending messages before the applicant is gone.
+	 */
+	del_timer_sync(&app->join_timer);
+	mrp_mad_event(app, MRP_EVENT_TX);
+	mrp_pdu_queue(app);
+	mrp_queue_xmit(app);
+
+	dev_mc_del(dev, appl->group_address);
+	kfree_rcu(app, rcu);
+	mrp_release_port(dev);
+}
+EXPORT_SYMBOL_GPL(mrp_uninit_applicant);
+
+int mrp_register_application(struct mrp_application *appl)
+{
+	appl->pkttype.func = mrp_rcv;
+	dev_add_pack(&appl->pkttype);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mrp_register_application);
+
+void mrp_unregister_application(struct mrp_application *appl)
+{
+	dev_remove_pack(&appl->pkttype);
+}
+EXPORT_SYMBOL_GPL(mrp_unregister_application);
-- 
cgit v1.2.3


From 9350393239153c4a98cbed4d69b9ed81e37d5e74 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sun, 10 Feb 2013 15:57:38 +1030
Subject: virtio: make config_ops const

It is just a table of function pointers, make it const for cleanliness and security
reasons.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c         | 2 +-
 drivers/remoteproc/remoteproc_virtio.c | 2 +-
 drivers/s390/kvm/kvm_virtio.c          | 2 +-
 drivers/virtio/virtio_mmio.c           | 2 +-
 drivers/virtio/virtio_pci.c            | 2 +-
 include/linux/virtio.h                 | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index fc92ccbd71dc..b3256ff0d426 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -396,7 +396,7 @@ static const char *lg_bus_name(struct virtio_device *vdev)
 }
 
 /* The ops structure which hooks everything together. */
-static struct virtio_config_ops lguest_config_ops = {
+static const struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
 	.finalize_features = lg_finalize_features,
 	.get = lg_get,
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 9e198e590675..afed9b7731c4 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -222,7 +222,7 @@ static void rproc_virtio_finalize_features(struct virtio_device *vdev)
 	rvdev->gfeatures = vdev->features[0];
 }
 
-static struct virtio_config_ops rproc_virtio_config_ops = {
+static const struct virtio_config_ops rproc_virtio_config_ops = {
 	.get_features	= rproc_virtio_get_features,
 	.finalize_features = rproc_virtio_finalize_features,
 	.find_vqs	= rproc_virtio_find_vqs,
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 8491111aec12..7d08fba7542d 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -275,7 +275,7 @@ static const char *kvm_bus_name(struct virtio_device *vdev)
 /*
  * The config ops structure as defined by virtio config
  */
-static struct virtio_config_ops kvm_vq_configspace_ops = {
+static const struct virtio_config_ops kvm_vq_configspace_ops = {
 	.get_features = kvm_get_features,
 	.finalize_features = kvm_finalize_features,
 	.get = kvm_get,
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 833e6dbf38db..1ba0d6831015 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -423,7 +423,7 @@ static const char *vm_bus_name(struct virtio_device *vdev)
 	return vm_dev->pdev->name;
 }
 
-static struct virtio_config_ops virtio_mmio_config_ops = {
+static const struct virtio_config_ops virtio_mmio_config_ops = {
 	.get		= vm_get,
 	.set		= vm_set,
 	.get_status	= vm_get_status,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 0c142892c105..52c5ce6aec8d 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -652,7 +652,7 @@ static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	return 0;
 }
 
-static struct virtio_config_ops virtio_pci_config_ops = {
+static const struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
 	.get_status	= vp_get_status,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index cf8adb1f5b2c..eb34cf774473 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -78,7 +78,7 @@ struct virtio_device {
 	int index;
 	struct device dev;
 	struct virtio_device_id id;
-	struct virtio_config_ops *config;
+	const struct virtio_config_ops *config;
 	struct list_head vqs;
 	/* Note that this is a Linux set_bit-style bitmap. */
 	unsigned long features[1];
-- 
cgit v1.2.3


From 5d9ab708200fefc3ec6e4454c65584d14ce716b0 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 5 Feb 2013 10:13:38 +0000
Subject: extcon: arizona: Clear _trig_sts bits after jack detection

It is important to clear the wake trigger status bits otherwise DCVDD
will be held high independent of the state of the LDOENA line.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/extcon/extcon-arizona.c       | 7 +++++++
 include/linux/mfd/arizona/registers.h | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index cfd206c4797c..aeaf217a05ee 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -939,6 +939,13 @@ static irqreturn_t arizona_jackdet(int irq, void *data)
 				   ARIZONA_MICD_CLAMP_DB | ARIZONA_JD1_DB);
 	}
 
+	/* Clear trig_sts to make sure DCVDD is not forced up */
+	regmap_write(arizona->regmap, ARIZONA_AOD_WKUP_AND_TRIG,
+		     ARIZONA_MICD_CLAMP_FALL_TRIG_STS |
+		     ARIZONA_MICD_CLAMP_RISE_TRIG_STS |
+		     ARIZONA_JD1_FALL_TRIG_STS |
+		     ARIZONA_JD1_RISE_TRIG_STS);
+
 	mutex_unlock(&info->lock);
 
 	pm_runtime_mark_last_busy(info->dev);
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 79e9dd4073d8..188d89abd963 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -5267,6 +5267,14 @@
 /*
  * R3408 (0xD50) - AOD wkup and trig
  */
+#define ARIZONA_MICD_CLAMP_FALL_TRIG_STS         0x0080  /* MICD_CLAMP_FALL_TRIG_STS */
+#define ARIZONA_MICD_CLAMP_FALL_TRIG_STS_MASK    0x0080  /* MICD_CLAMP_FALL_TRIG_STS */
+#define ARIZONA_MICD_CLAMP_FALL_TRIG_STS_SHIFT        7  /* MICD_CLAMP_FALL_TRIG_STS */
+#define ARIZONA_MICD_CLAMP_FALL_TRIG_STS_WIDTH        1  /* MICD_CLAMP_FALL_TRIG_STS */
+#define ARIZONA_MICD_CLAMP_RISE_TRIG_STS         0x0040  /* MICD_CLAMP_RISE_TRIG_STS */
+#define ARIZONA_MICD_CLAMP_RISE_TRIG_STS_MASK    0x0040  /* MICD_CLAMP_RISE_TRIG_STS */
+#define ARIZONA_MICD_CLAMP_RISE_TRIG_STS_SHIFT        6  /* MICD_CLAMP_RISE_TRIG_STS */
+#define ARIZONA_MICD_CLAMP_RISE_TRIG_STS_WIDTH        1  /* MICD_CLAMP_RISE_TRIG_STS */
 #define ARIZONA_GP5_FALL_TRIG_STS                0x0020  /* GP5_FALL_TRIG_STS */
 #define ARIZONA_GP5_FALL_TRIG_STS_MASK           0x0020  /* GP5_FALL_TRIG_STS */
 #define ARIZONA_GP5_FALL_TRIG_STS_SHIFT               5  /* GP5_FALL_TRIG_STS */
-- 
cgit v1.2.3


From 41875e388401ad97c33252d5fa39d52e0b70ee9b Mon Sep 17 00:00:00 2001
From: Sujit Reddy Thumma <sthumma@codeaurora.org>
Date: Tue, 4 Dec 2012 17:06:19 +0530
Subject: mmc: sdio: Fix SDIO 3.0 UHS-I initialization sequence

According to UHS-I initialization sequence for SDIO 3.0 cards,
the host must set bit[24] (S18R) of OCR register during OCR
handshake to know whether the SDIO card is capable of doing
1.8V I/O.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Reviewed-by: Johan Rudholm <johan.rudholm@stericsson.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 22 +++++++++++-----------
 include/linux/mmc/host.h |  8 ++++++++
 2 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 34ad4c877c1f..9565d38d91a4 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -157,10 +157,7 @@ static int sdio_read_cccr(struct mmc_card *card, u32 ocr)
 			if (ret)
 				goto out;
 
-			if (card->host->caps &
-				(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
-				 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
-				 MMC_CAP_UHS_DDR50)) {
+			if (mmc_host_uhs(card->host)) {
 				if (data & SDIO_UHS_DDR50)
 					card->sw_caps.sd3_bus_mode
 						|= SD_MODE_UHS_DDR50;
@@ -478,8 +475,7 @@ static int sdio_set_bus_speed_mode(struct mmc_card *card)
 	 * If the host doesn't support any of the UHS-I modes, fallback on
 	 * default speed.
 	 */
-	if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
-	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)))
+	if (!mmc_host_uhs(card->host))
 		return 0;
 
 	bus_speed = SDIO_SPEED_SDR12;
@@ -645,11 +641,7 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 	 * systems that claim 1.8v signalling in fact do not support
 	 * it.
 	 */
-	if ((ocr & R4_18V_PRESENT) &&
-		(host->caps &
-			(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
-			 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
-			 MMC_CAP_UHS_DDR50))) {
+	if ((ocr & R4_18V_PRESENT) && mmc_host_uhs(host)) {
 		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180,
 				true);
 		if (err) {
@@ -1022,6 +1014,10 @@ static int mmc_sdio_power_restore(struct mmc_host *host)
 		goto out;
 	}
 
+	if (mmc_host_uhs(host))
+		/* to query card if 1.8V signalling is supported */
+		host->ocr |= R4_18V_PRESENT;
+
 	ret = mmc_sdio_init_card(host, host->ocr, host->card,
 				mmc_card_keep_power(host));
 	if (!ret && host->sdio_irqs)
@@ -1087,6 +1083,10 @@ int mmc_attach_sdio(struct mmc_host *host)
 	/*
 	 * Detect and init the card.
 	 */
+	if (mmc_host_uhs(host))
+		/* to query card if 1.8V signalling is supported */
+		host->ocr |= R4_18V_PRESENT;
+
 	err = mmc_sdio_init_card(host, host->ocr, NULL, 0);
 	if (err) {
 		if (err == -EAGAIN) {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 61a10c17aabd..c89a1bb87fa5 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -434,6 +434,14 @@ static inline int mmc_boot_partition_access(struct mmc_host *host)
 	return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC);
 }
 
+static inline int mmc_host_uhs(struct mmc_host *host)
+{
+	return host->caps &
+		(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+		 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
+		 MMC_CAP_UHS_DDR50);
+}
+
 #ifdef CONFIG_MMC_CLKGATE
 void mmc_host_clk_hold(struct mmc_host *host);
 void mmc_host_clk_release(struct mmc_host *host);
-- 
cgit v1.2.3


From 2220eedfd7aea69008173a224975e10284fbe854 Mon Sep 17 00:00:00 2001
From: Konstantin Dorfman <kdorfman@codeaurora.org>
Date: Mon, 14 Jan 2013 14:28:17 -0500
Subject: mmc: fix async request mechanism for sequential read scenarios

When current request is running on the bus and if next request fetched
by mmcqd is NULL, mmc context (mmcqd thread) gets blocked until the
current request completes. This means that if new request comes in while
the mmcqd thread is blocked, this new request can not be prepared in
parallel to current ongoing request. This may result in delaying the new
request execution and increase it's latency.

This change allows to wake up the MMC thread on new request arrival.
Now once the MMC thread is woken up, a new request can be fetched and
prepared in parallel to the current running request which means this new
request can be started immediately after the current running request
completes.

With this change read throughput is improved by 16%.

Signed-off-by: Konstantin Dorfman <kdorfman@codeaurora.org>
Reviewed-by: Seungwon Jeon <tgih.jun@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c |  30 +++++++-----
 drivers/mmc/card/queue.c |  22 ++++++++-
 drivers/mmc/card/queue.h |   3 ++
 drivers/mmc/core/bus.c   |   1 +
 drivers/mmc/core/core.c  | 121 +++++++++++++++++++++++++++++++++++++++++++++--
 drivers/mmc/core/core.h  |   1 +
 include/linux/mmc/card.h |  12 +++++
 include/linux/mmc/core.h |   3 +-
 include/linux/mmc/host.h |  17 +++++++
 9 files changed, 191 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 21056b9ef0a0..f79b4688e471 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -113,17 +113,6 @@ struct mmc_blk_data {
 
 static DEFINE_MUTEX(open_lock);
 
-enum mmc_blk_status {
-	MMC_BLK_SUCCESS = 0,
-	MMC_BLK_PARTIAL,
-	MMC_BLK_CMD_ERR,
-	MMC_BLK_RETRY,
-	MMC_BLK_ABORT,
-	MMC_BLK_DATA_ERR,
-	MMC_BLK_ECC_ERR,
-	MMC_BLK_NOMEDIUM,
-};
-
 module_param(perdev_minors, int, 0444);
 MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
 
@@ -1364,8 +1353,11 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 		} else
 			areq = NULL;
 		areq = mmc_start_req(card->host, areq, (int *) &status);
-		if (!areq)
+		if (!areq) {
+			if (status == MMC_BLK_NEW_REQUEST)
+				mq->flags |= MMC_QUEUE_NEW_REQUEST;
 			return 0;
+		}
 
 		mq_rq = container_of(areq, struct mmc_queue_req, mmc_active);
 		brq = &mq_rq->brq;
@@ -1438,6 +1430,10 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			break;
 		case MMC_BLK_NOMEDIUM:
 			goto cmd_abort;
+		default:
+			pr_err("%s: Unhandled return value (%d)",
+					req->rq_disk->disk_name, status);
+			goto cmd_abort;
 		}
 
 		if (ret) {
@@ -1472,6 +1468,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	int ret;
 	struct mmc_blk_data *md = mq->data;
 	struct mmc_card *card = md->queue.card;
+	struct mmc_host *host = card->host;
+	unsigned long flags;
 
 	if (req && !mq->mqrq_prev->req)
 		/* claim host only for the first request */
@@ -1486,6 +1484,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		goto out;
 	}
 
+	mq->flags &= ~MMC_QUEUE_NEW_REQUEST;
 	if (req && req->cmd_flags & REQ_DISCARD) {
 		/* complete ongoing async transfer before issuing discard */
 		if (card->host->areq)
@@ -1501,11 +1500,16 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 			mmc_blk_issue_rw_rq(mq, NULL);
 		ret = mmc_blk_issue_flush(mq, req);
 	} else {
+		if (!req && host->areq) {
+			spin_lock_irqsave(&host->context_info.lock, flags);
+			host->context_info.is_waiting_last_req = true;
+			spin_unlock_irqrestore(&host->context_info.lock, flags);
+		}
 		ret = mmc_blk_issue_rw_rq(mq, req);
 	}
 
 out:
-	if (!req)
+	if (!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST))
 		/* release host only when there are no more requests */
 		mmc_release_host(card->host);
 	return ret;
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index d630d9861e7b..5e0971016ac5 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -22,7 +22,6 @@
 
 #define MMC_QUEUE_BOUNCESZ	65536
 
-#define MMC_QUEUE_SUSPENDED	(1 << 0)
 
 #define MMC_REQ_SPECIAL_MASK	(REQ_DISCARD | REQ_FLUSH)
 
@@ -72,6 +71,10 @@ static int mmc_queue_thread(void *d)
 			set_current_state(TASK_RUNNING);
 			cmd_flags = req ? req->cmd_flags : 0;
 			mq->issue_fn(mq, req);
+			if (mq->flags & MMC_QUEUE_NEW_REQUEST) {
+				mq->flags &= ~MMC_QUEUE_NEW_REQUEST;
+				continue; /* fetch again */
+			}
 
 			/*
 			 * Current request becomes previous request
@@ -113,6 +116,8 @@ static void mmc_request_fn(struct request_queue *q)
 {
 	struct mmc_queue *mq = q->queuedata;
 	struct request *req;
+	unsigned long flags;
+	struct mmc_context_info *cntx;
 
 	if (!mq) {
 		while ((req = blk_fetch_request(q)) != NULL) {
@@ -122,7 +127,20 @@ static void mmc_request_fn(struct request_queue *q)
 		return;
 	}
 
-	if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
+	cntx = &mq->card->host->context_info;
+	if (!mq->mqrq_cur->req && mq->mqrq_prev->req) {
+		/*
+		 * New MMC request arrived when MMC thread may be
+		 * blocked on the previous request to be complete
+		 * with no current request fetched
+		 */
+		spin_lock_irqsave(&cntx->lock, flags);
+		if (cntx->is_waiting_last_req) {
+			cntx->is_new_req = true;
+			wake_up_interruptible(&cntx->wait);
+		}
+		spin_unlock_irqrestore(&cntx->lock, flags);
+	} else if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
 		wake_up_process(mq->thread);
 }
 
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index d2a1eb4b9f9f..e20c27b2b8b4 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -27,6 +27,9 @@ struct mmc_queue {
 	struct task_struct	*thread;
 	struct semaphore	thread_sem;
 	unsigned int		flags;
+#define MMC_QUEUE_SUSPENDED	(1 << 0)
+#define MMC_QUEUE_NEW_REQUEST	(1 << 1)
+
 	int			(*issue_fn)(struct mmc_queue *, struct request *);
 	void			*data;
 	struct request_queue	*queue;
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 420cb6753c1e..e219c97a02a4 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -321,6 +321,7 @@ int mmc_add_card(struct mmc_card *card)
 #ifdef CONFIG_DEBUG_FS
 	mmc_add_card_debugfs(card);
 #endif
+	mmc_init_context_info(card->host);
 
 	ret = device_add(&card->dev);
 	if (ret)
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index aaed7687cf09..8b3a1222e665 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -319,11 +319,44 @@ out:
 }
 EXPORT_SYMBOL(mmc_start_bkops);
 
+/*
+ * mmc_wait_data_done() - done callback for data request
+ * @mrq: done data request
+ *
+ * Wakes up mmc context, passed as a callback to host controller driver
+ */
+static void mmc_wait_data_done(struct mmc_request *mrq)
+{
+	mrq->host->context_info.is_done_rcv = true;
+	wake_up_interruptible(&mrq->host->context_info.wait);
+}
+
 static void mmc_wait_done(struct mmc_request *mrq)
 {
 	complete(&mrq->completion);
 }
 
+/*
+ *__mmc_start_data_req() - starts data request
+ * @host: MMC host to start the request
+ * @mrq: data request to start
+ *
+ * Sets the done callback to be called when request is completed by the card.
+ * Starts data mmc request execution
+ */
+static int __mmc_start_data_req(struct mmc_host *host, struct mmc_request *mrq)
+{
+	mrq->done = mmc_wait_data_done;
+	mrq->host = host;
+	if (mmc_card_removed(host->card)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		return -ENOMEDIUM;
+	}
+	mmc_start_request(host, mrq);
+
+	return 0;
+}
+
 static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
 {
 	init_completion(&mrq->completion);
@@ -337,6 +370,62 @@ static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
 	return 0;
 }
 
+/*
+ * mmc_wait_for_data_req_done() - wait for request completed
+ * @host: MMC host to prepare the command.
+ * @mrq: MMC request to wait for
+ *
+ * Blocks MMC context till host controller will ack end of data request
+ * execution or new request notification arrives from the block layer.
+ * Handles command retries.
+ *
+ * Returns enum mmc_blk_status after checking errors.
+ */
+static int mmc_wait_for_data_req_done(struct mmc_host *host,
+				      struct mmc_request *mrq,
+				      struct mmc_async_req *next_req)
+{
+	struct mmc_command *cmd;
+	struct mmc_context_info *context_info = &host->context_info;
+	int err;
+	unsigned long flags;
+
+	while (1) {
+		wait_event_interruptible(context_info->wait,
+				(context_info->is_done_rcv ||
+				 context_info->is_new_req));
+		spin_lock_irqsave(&context_info->lock, flags);
+		context_info->is_waiting_last_req = false;
+		spin_unlock_irqrestore(&context_info->lock, flags);
+		if (context_info->is_done_rcv) {
+			context_info->is_done_rcv = false;
+			context_info->is_new_req = false;
+			cmd = mrq->cmd;
+			if (!cmd->error || !cmd->retries ||
+			    mmc_card_removed(host->card)) {
+				err = host->areq->err_check(host->card,
+							    host->areq);
+				break; /* return err */
+			} else {
+				pr_info("%s: req failed (CMD%u): %d, retrying...\n",
+					mmc_hostname(host),
+					cmd->opcode, cmd->error);
+				cmd->retries--;
+				cmd->error = 0;
+				host->ops->request(host, mrq);
+				continue; /* wait for done/new event again */
+			}
+		} else if (context_info->is_new_req) {
+			context_info->is_new_req = false;
+			if (!next_req) {
+				err = MMC_BLK_NEW_REQUEST;
+				break; /* return err */
+			}
+		}
+	}
+	return err;
+}
+
 static void mmc_wait_for_req_done(struct mmc_host *host,
 				  struct mmc_request *mrq)
 {
@@ -426,8 +515,17 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
 		mmc_pre_req(host, areq->mrq, !host->areq);
 
 	if (host->areq) {
-		mmc_wait_for_req_done(host, host->areq->mrq);
-		err = host->areq->err_check(host->card, host->areq);
+			err = mmc_wait_for_data_req_done(host, host->areq->mrq,
+					areq);
+			if (err == MMC_BLK_NEW_REQUEST) {
+				if (error)
+					*error = err;
+				/*
+				 * The previous request was not completed,
+				 * nothing to return
+				 */
+				return NULL;
+			}
 		/*
 		 * Check BKOPS urgency for each R1 response
 		 */
@@ -439,7 +537,7 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
 	}
 
 	if (!err && areq)
-		start_err = __mmc_start_req(host, areq->mrq);
+		start_err = __mmc_start_data_req(host, areq->mrq);
 
 	if (host->areq)
 		mmc_post_req(host, host->areq->mrq, 0);
@@ -2581,6 +2679,23 @@ int mmc_pm_notify(struct notifier_block *notify_block,
 }
 #endif
 
+/**
+ * mmc_init_context_info() - init synchronization context
+ * @host: mmc host
+ *
+ * Init struct context_info needed to implement asynchronous
+ * request mechanism, used by mmc core, host driver and mmc requests
+ * supplier.
+ */
+void mmc_init_context_info(struct mmc_host *host)
+{
+	spin_lock_init(&host->context_info.lock);
+	host->context_info.is_new_req = false;
+	host->context_info.is_done_rcv = false;
+	host->context_info.is_waiting_last_req = false;
+	init_waitqueue_head(&host->context_info.wait);
+}
+
 static int __init mmc_init(void)
 {
 	int ret;
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 3bdafbca354f..0272b3284b5e 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -76,5 +76,6 @@ void mmc_remove_host_debugfs(struct mmc_host *host);
 void mmc_add_card_debugfs(struct mmc_card *card);
 void mmc_remove_card_debugfs(struct mmc_card *card);
 
+void mmc_init_context_info(struct mmc_host *host);
 #endif
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 5c69315d60cc..be2500a49925 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -187,6 +187,18 @@ struct sdio_func_tuple;
 
 #define SDIO_MAX_FUNCS		7
 
+enum mmc_blk_status {
+	MMC_BLK_SUCCESS = 0,
+	MMC_BLK_PARTIAL,
+	MMC_BLK_CMD_ERR,
+	MMC_BLK_RETRY,
+	MMC_BLK_ABORT,
+	MMC_BLK_DATA_ERR,
+	MMC_BLK_ECC_ERR,
+	MMC_BLK_NOMEDIUM,
+	MMC_BLK_NEW_REQUEST,
+};
+
 /* The number of MMC physical partitions.  These consist of:
  * boot partitions (2), general purpose partitions (4) in MMC v4.4.
  */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 5bf7c2274fcb..495d1336149c 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -120,6 +120,7 @@ struct mmc_data {
 	s32			host_cookie;	/* host private data */
 };
 
+struct mmc_host;
 struct mmc_request {
 	struct mmc_command	*sbc;		/* SET_BLOCK_COUNT for multiblock */
 	struct mmc_command	*cmd;
@@ -128,9 +129,9 @@ struct mmc_request {
 
 	struct completion	completion;
 	void			(*done)(struct mmc_request *);/* completion function */
+	struct mmc_host		*host;
 };
 
-struct mmc_host;
 struct mmc_card;
 struct mmc_async_req;
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c89a1bb87fa5..523d570f58ad 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -170,6 +170,22 @@ struct mmc_slot {
 	void *handler_priv;
 };
 
+/**
+ * mmc_context_info - synchronization details for mmc context
+ * @is_done_rcv		wake up reason was done request
+ * @is_new_req		wake up reason was new request
+ * @is_waiting_last_req	mmc context waiting for single running request
+ * @wait		wait queue
+ * @lock		lock to protect data fields
+ */
+struct mmc_context_info {
+	bool			is_done_rcv;
+	bool			is_new_req;
+	bool			is_waiting_last_req;
+	wait_queue_head_t	wait;
+	spinlock_t		lock;
+};
+
 struct regulator;
 
 struct mmc_supply {
@@ -331,6 +347,7 @@ struct mmc_host {
 	struct dentry		*debugfs_root;
 
 	struct mmc_async_req	*areq;		/* active async req */
+	struct mmc_context_info	context_info;	/* async synchronization info */
 
 #ifdef CONFIG_FAIL_MMC_REQUEST
 	struct fault_attr	fail_mmc_request;
-- 
cgit v1.2.3


From 7c7945a8f971e0b9cbdd7684ee106de768a0e474 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Wed, 23 Jan 2013 13:18:03 -0500
Subject: ARM: gic: add missing distributor defintions

Add missing register map offsets for the distributor and rename
GIC_DIST_ACTIVE_BIT to GIC_DIST_ACTIVE_SET to be consistent.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index a67ca55e6f4e..9509e8785cad 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -20,11 +20,13 @@
 
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
+#define GIC_DIST_IGROUP			0x080
 #define GIC_DIST_ENABLE_SET		0x100
 #define GIC_DIST_ENABLE_CLEAR		0x180
 #define GIC_DIST_PENDING_SET		0x200
 #define GIC_DIST_PENDING_CLEAR		0x280
-#define GIC_DIST_ACTIVE_BIT		0x300
+#define GIC_DIST_ACTIVE_SET		0x300
+#define GIC_DIST_ACTIVE_CLEAR		0x380
 #define GIC_DIST_PRI			0x400
 #define GIC_DIST_TARGET			0x800
 #define GIC_DIST_CONFIG			0xc00
-- 
cgit v1.2.3


From fdf77a72ec110572ac033d499c8be8b389c05740 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 21 Jan 2013 19:36:11 -0500
Subject: ARM: gic: define GICH offsets for VGIC support

The GICH_* constants are defined by the GIC HW spec, and even though
they only be used by KVM to begin with, define them generically in gic.h.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 9509e8785cad..ef61d80946be 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -32,6 +32,31 @@
 #define GIC_DIST_CONFIG			0xc00
 #define GIC_DIST_SOFTINT		0xf00
 
+#define GICH_HCR			0x0
+#define GICH_VTR			0x4
+#define GICH_VMCR			0x8
+#define GICH_MISR			0x10
+#define GICH_EISR0 			0x20
+#define GICH_EISR1 			0x24
+#define GICH_ELRSR0 			0x30
+#define GICH_ELRSR1 			0x34
+#define GICH_APR			0xf0
+#define GICH_LR0			0x100
+
+#define GICH_HCR_EN			(1 << 0)
+#define GICH_HCR_UIE			(1 << 1)
+
+#define GICH_LR_VIRTUALID		(0x3ff << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT	(10)
+#define GICH_LR_PHYSID_CPUID		(7 << GICH_LR_PHYSID_CPUID_SHIFT)
+#define GICH_LR_STATE			(3 << 28)
+#define GICH_LR_PENDING_BIT		(1 << 28)
+#define GICH_LR_ACTIVE_BIT		(1 << 29)
+#define GICH_LR_EOI			(1 << 19)
+
+#define GICH_MISR_EOI			(1 << 0)
+#define GICH_MISR_U			(1 << 1)
+
 struct device_node;
 
 extern struct irq_chip gic_arch_extn;
-- 
cgit v1.2.3


From a96ab03917dcf4c9477d03b31e8d74779bca1074 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Jan 2013 13:39:43 +0000
Subject: ARM: gic: add __ASSEMBLY__ guard to C definitions

The GIC include file being used by some of the KVM assembly code,
wrap the C definitions with a #ifdef __ASSEMBLY__ guard.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index ef61d80946be..3fd8e4290a1c 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -57,6 +57,8 @@
 #define GICH_MISR_EOI			(1 << 0)
 #define GICH_MISR_U			(1 << 1)
 
+#ifndef __ASSEMBLY__
+
 struct device_node;
 
 extern struct irq_chip gic_arch_extn;
@@ -72,4 +74,6 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+#endif /* __ASSEMBLY */
+
 #endif
-- 
cgit v1.2.3


From 2c5e89338493882719f8d138f8f2717ee9a04153 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sun, 10 Feb 2013 03:50:18 +0000
Subject: ipv6: by default join ff01::1 and in case of forwarding ff01::2 and
 ff05:2

Cc: Erik Hugne <erik.hugne@ericsson.com>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h |  9 +++++++++
 net/ipv6/addrconf.c | 15 +++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index a16e19349ec0..34edf1f6c9a3 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -36,4 +36,13 @@ extern const struct in6_addr in6addr_linklocal_allnodes;
 extern const struct in6_addr in6addr_linklocal_allrouters;
 #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \
 		{ { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } }
+extern const struct in6_addr in6addr_interfacelocal_allnodes;
+#define IN6ADDR_INTERFACELOCAL_ALLNODES_INIT \
+		{ { { 0xff,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+extern const struct in6_addr in6addr_interfacelocal_allrouters;
+#define IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT \
+		{ { { 0xff,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } }
+extern const struct in6_addr in6addr_sitelocal_allrouters;
+#define IN6ADDR_SITELOCAL_ALLROUTERS_INIT \
+		{ { { 0xff,5,0,0,0,0,0,0,0,0,0,0,0,0,0,2 } } }
 #endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bd9f9360f769..86c235d05aba 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -244,6 +244,9 @@ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
 
 /* Check if a valid qdisc is available */
 static inline bool addrconf_qdisc_ok(const struct net_device *dev)
@@ -428,6 +431,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	/* protected by rtnl_lock */
 	rcu_assign_pointer(dev->ip6_ptr, ndev);
 
+	/* Join interface-local all-node multicast group */
+	ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
+
 	/* Join all-node multicast group */
 	ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
 
@@ -611,10 +617,15 @@ static void dev_forward_change(struct inet6_dev *idev)
 	if (idev->cnf.forwarding)
 		dev_disable_lro(dev);
 	if (dev->flags & IFF_MULTICAST) {
-		if (idev->cnf.forwarding)
+		if (idev->cnf.forwarding) {
 			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
-		else
+			ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_inc(dev, &in6addr_sitelocal_allrouters);
+		} else {
 			ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_interfacelocal_allrouters);
+			ipv6_dev_mc_dec(dev, &in6addr_sitelocal_allrouters);
+		}
 	}
 
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
-- 
cgit v1.2.3


From 57fa8273a246d6abbbfdc995da6293b0040807d8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 8 Feb 2013 18:51:05 -0500
Subject: cpuidle: remove vestage definition of cpuidle_state_usage.driver_data

This field is no longer used.

Signed-off-by: Len Brown <len.brown@intel.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/cpuidle.h | 22 ----------------------
 1 file changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 24cd1037b6d6..480c14dc1ddd 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -32,8 +32,6 @@ struct cpuidle_driver;
  ****************************/
 
 struct cpuidle_state_usage {
-	void		*driver_data;
-
 	unsigned long long	disable;
 	unsigned long long	usage;
 	unsigned long long	time; /* in US */
@@ -62,26 +60,6 @@ struct cpuidle_state {
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
 
-/**
- * cpuidle_get_statedata - retrieves private driver state data
- * @st_usage: the state usage statistics
- */
-static inline void *cpuidle_get_statedata(struct cpuidle_state_usage *st_usage)
-{
-	return st_usage->driver_data;
-}
-
-/**
- * cpuidle_set_statedata - stores private driver state data
- * @st_usage: the state usage statistics
- * @data: the private data
- */
-static inline void
-cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data)
-{
-	st_usage->driver_data = data;
-}
-
 struct cpuidle_device {
 	unsigned int		registered:1;
 	unsigned int		enabled:1;
-- 
cgit v1.2.3


From 2cde6acd49daca58b96f1fbc697492825511ad31 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 11 Feb 2013 10:25:30 +0000
Subject: netpoll: Fix __netpoll_rcu_free so that it can hold the rtnl lock

__netpoll_rcu_free is used to free netpoll structures when the rtnl_lock is
already held.  The mechanism is used to asynchronously call __netpoll_cleanup
outside of the holding of the rtnl_lock, so as to avoid deadlock.
Unfortunately, __netpoll_cleanup modifies pointers (dev->np), which means the
rtnl_lock must be held while calling it.  Further, it cannot be held, because
rcu callbacks may be issued in softirq contexts, which cannot sleep.

Fix this by converting the rcu callback to a work queue that is guaranteed to
get scheduled in process context, so that we can hold the rtnl properly while
calling __netpoll_cleanup

Tested successfully by myself.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: Cong Wang <amwang@redhat.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  2 +-
 include/linux/netpoll.h         |  4 ++--
 net/8021q/vlan_dev.c            |  2 +-
 net/bridge/br_device.c          |  2 +-
 net/core/netpoll.c              | 16 ++++++++++------
 5 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 22399374b1e1..94c1534dd578 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1249,7 +1249,7 @@ static inline void slave_disable_netpoll(struct slave *slave)
 		return;
 
 	slave->np = NULL;
-	__netpoll_free_rcu(np);
+	__netpoll_free_async(np);
 }
 static inline bool slave_dev_support_netpoll(struct net_device *slave_dev)
 {
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index ab856d507b7e..9d7d8c64f7c8 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -32,7 +32,7 @@ struct netpoll {
 	u8 remote_mac[ETH_ALEN];
 
 	struct list_head rx; /* rx_np list element */
-	struct rcu_head rcu;
+	struct work_struct cleanup_work;
 };
 
 struct netpoll_info {
@@ -68,7 +68,7 @@ int netpoll_setup(struct netpoll *np);
 int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void __netpoll_cleanup(struct netpoll *np);
-void __netpoll_free_rcu(struct netpoll *np);
+void __netpoll_free_async(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 34df5b3c9b75..19cf81bf9f69 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -733,7 +733,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	vlan->netpoll = NULL;
 
-	__netpoll_free_rcu(netpoll);
+	__netpoll_free_async(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ba6fb2d60940..ca98fa5b2c78 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -265,7 +265,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	__netpoll_free_rcu(np);
+	__netpoll_free_async(np);
 }
 
 #endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index edcd9ad95304..c536474e2260 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -61,6 +61,7 @@ static struct srcu_struct netpoll_srcu;
 
 static void zap_completion_queue(void);
 static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
+static void netpoll_async_cleanup(struct work_struct *work);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -1020,6 +1021,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 
 	np->dev = ndev;
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
+	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
 	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
 	    !ndev->netdev_ops->ndo_poll_controller) {
@@ -1255,25 +1257,27 @@ void __netpoll_cleanup(struct netpoll *np)
 		if (ops->ndo_netpoll_cleanup)
 			ops->ndo_netpoll_cleanup(np->dev);
 
-		RCU_INIT_POINTER(np->dev->npinfo, NULL);
+		rcu_assign_pointer(np->dev->npinfo, NULL);
 		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
 	}
 }
 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+static void netpoll_async_cleanup(struct work_struct *work)
 {
-	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
+	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
 
+	rtnl_lock();
 	__netpoll_cleanup(np);
+	rtnl_unlock();
 	kfree(np);
 }
 
-void __netpoll_free_rcu(struct netpoll *np)
+void __netpoll_free_async(struct netpoll *np)
 {
-	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
+	schedule_work(&np->cleanup_work);
 }
-EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
+EXPORT_SYMBOL_GPL(__netpoll_free_async);
 
 void netpoll_cleanup(struct netpoll *np)
 {
-- 
cgit v1.2.3


From 05ec260edecaf3dc214cff49d43b1ad9b2cbb710 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 7 Feb 2013 10:17:31 +0100
Subject: mfd: db8500-prcmu: update resource passing

When trying to get rid of the cross-includes of <mach/id.h>
from different drivers, so we can localize ASIC/CPU detection
to the mach-ux500 folder, we run into the way the PRCMU
handles base addresses and firmware detection.

This patch updates the firmware version detection to pass
the required information as platform data instead of
relying on cpu_is_* macros.

Now the PRCMU base address, the secondary TCDM area, the
TCPM area and the IRQ are passed as resources instead of
being grabbed from <mach/*> files. Incidentally this also
removes part of the reliance on <mach/irqs.h>.

Further it updates the firmware version detection, since the
location of the firmware ID bytes in the designated memory
are is now passed from the platform data instead. There is
no reason not to include the nice split-off of a struct to
hold the firmware information and a separate function to
populate it.

The patch actually rids the need to use the external
db8500_prcmu_early_init call at all, but I'm keepin back
that removal as I don't want the patch to be too big.

Cc: arm@kernel.org
Cc: Michel Jaoen <michel.jaouen@stericsson.com>
Cc: Lee Jones <lee.jones@linaro.org>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Acked-by: Loic Pallardy <loic.pallardy@stericsson.com>
Acked-by: Fabio Baltieri <fabio.baltieri@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-ux500/board-mop500.c   |   5 +-
 arch/arm/mach-ux500/cpu-db8500.c     |   7 +-
 arch/arm/mach-ux500/devices-db8500.c |  44 +++++++++++++
 arch/arm/mach-ux500/devices-db8500.h |   5 ++
 drivers/mfd/db8500-prcmu.c           | 122 +++++++++++++++++++++++------------
 include/linux/mfd/db8500-prcmu.h     |  14 ----
 include/linux/mfd/dbx500-prcmu.h     |  46 +++++++++++++
 7 files changed, 180 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index d453522edb0d..6cb1407961e0 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -215,7 +215,7 @@ static struct platform_device snowball_sbnet_dev = {
 	},
 };
 
-static struct ab8500_platform_data ab8500_platdata = {
+struct ab8500_platform_data ab8500_platdata = {
 	.irq_base	= MOP500_AB8500_IRQ_BASE,
 	.regulator_reg_init = ab8500_regulator_reg_init,
 	.num_regulator_reg_init	= ARRAY_SIZE(ab8500_regulator_reg_init),
@@ -651,6 +651,7 @@ static void __init mop500_init_machine(void)
 	int i2c0_devs;
 	int i;
 
+	platform_device_register(&db8500_prcmu_device);
 	mop500_gpio_keys[0].gpio = GPIO_PROX_SENSOR;
 
 	mop500_pinmaps_init();
@@ -685,6 +686,7 @@ static void __init snowball_init_machine(void)
 	struct device *parent = NULL;
 	int i;
 
+	platform_device_register(&db8500_prcmu_device);
 	snowball_pinmaps_init();
 	parent = u8500_init_devices(&ab8500_platdata);
 
@@ -710,6 +712,7 @@ static void __init hrefv60_init_machine(void)
 	int i2c0_devs;
 	int i;
 
+	platform_device_register(&db8500_prcmu_device);
 	/*
 	 * The HREFv60 board removed a GPIO expander and routed
 	 * all these GPIO pins to the internal GPIO controller
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 09c3fee726d4..8501970641bc 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -139,14 +139,9 @@ static struct platform_device db8500_pmu_device = {
 	.dev.platform_data	= &db8500_pmu_platdata,
 };
 
-static struct platform_device db8500_prcmu_device = {
-	.name			= "db8500-prcmu",
-};
-
 static struct platform_device *platform_devs[] __initdata = {
 	&u8500_dma40_device,
 	&db8500_pmu_device,
-	&db8500_prcmu_device,
 };
 
 static resource_size_t __initdata db8500_gpio_base[] = {
@@ -286,6 +281,8 @@ static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = {
 	OF_DEV_AUXDATA("st,nomadik-i2c", 0x80128000, "nmk-i2c.2", NULL),
 	OF_DEV_AUXDATA("st,nomadik-i2c", 0x80110000, "nmk-i2c.3", NULL),
 	OF_DEV_AUXDATA("st,nomadik-i2c", 0x8012a000, "nmk-i2c.4", NULL),
+	OF_DEV_AUXDATA("stericsson,db8500-prcmu", 0x80157000, "db8500-prcmu",
+			&db8500_prcmu_pdata),
 	/* Requires device name bindings. */
 	OF_DEV_AUXDATA("stericsson,nmk_pinctrl", 0, "pinctrl-db8500", NULL),
 	/* Requires clock name and DMA bindings. */
diff --git a/arch/arm/mach-ux500/devices-db8500.c b/arch/arm/mach-ux500/devices-db8500.c
index 318d49020894..f3d9419f75d3 100644
--- a/arch/arm/mach-ux500/devices-db8500.c
+++ b/arch/arm/mach-ux500/devices-db8500.c
@@ -13,11 +13,13 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/pl022.h>
 #include <linux/platform_data/dma-ste-dma40.h>
+#include <linux/mfd/dbx500-prcmu.h>
 
 #include <mach/hardware.h>
 #include <mach/setup.h>
 #include <mach/irqs.h>
 
+#include "devices-db8500.h"
 #include "ste-dma40-db8500.h"
 
 static struct resource dma40_resources[] = {
@@ -194,3 +196,45 @@ struct platform_device u8500_ske_keypad_device = {
 	.num_resources = ARRAY_SIZE(keypad_resources),
 	.resource = keypad_resources,
 };
+
+struct prcmu_pdata db8500_prcmu_pdata = {
+	.ab_platdata	= &ab8500_platdata,
+	.version_offset	= DB8500_PRCMU_FW_VERSION_OFFSET,
+	.legacy_offset	= DB8500_PRCMU_LEGACY_OFFSET,
+};
+
+static struct resource db8500_prcmu_res[] = {
+	{
+		.name  = "prcmu",
+		.start = U8500_PRCMU_BASE,
+		.end   = U8500_PRCMU_BASE + SZ_8K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "prcmu-tcdm",
+		.start = U8500_PRCMU_TCDM_BASE,
+		.end   = U8500_PRCMU_TCDM_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.name  = "irq",
+		.start = IRQ_DB8500_PRCMU1,
+		.end   = IRQ_DB8500_PRCMU1,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "prcmu-tcpm",
+		.start = U8500_PRCMU_TCPM_BASE,
+		.end   = U8500_PRCMU_TCPM_BASE + SZ_4K - 1,
+		.flags = IORESOURCE_MEM,
+	},
+};
+
+struct platform_device db8500_prcmu_device = {
+	.name			= "db8500-prcmu",
+	.resource		= db8500_prcmu_res,
+	.num_resources		= ARRAY_SIZE(db8500_prcmu_res),
+	.dev = {
+		.platform_data = &db8500_prcmu_pdata,
+	},
+};
diff --git a/arch/arm/mach-ux500/devices-db8500.h b/arch/arm/mach-ux500/devices-db8500.h
index a5e05f6e256f..dbcb35c48f06 100644
--- a/arch/arm/mach-ux500/devices-db8500.h
+++ b/arch/arm/mach-ux500/devices-db8500.h
@@ -14,6 +14,11 @@
 
 struct ske_keypad_platform_data;
 struct pl022_ssp_controller;
+struct platform_device;
+
+extern struct ab8500_platform_data ab8500_platdata;
+extern struct prcmu_pdata db8500_prcmu_pdata;
+extern struct platform_device db8500_prcmu_device;
 
 static inline struct platform_device *
 db8500_add_ske_keypad(struct device *parent,
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 67d8b25d183e..eba03d2329dd 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -38,9 +38,6 @@
 #include <mach/db8500-regs.h>
 #include "dbx500-prcmu-regs.h"
 
-/* Offset for the firmware version within the TCPM */
-#define PRCMU_FW_VERSION_OFFSET 0xA4
-
 /* Index of different voltages to be used when accessing AVSData */
 #define PRCM_AVS_BASE		0x2FC
 #define PRCM_AVS_VBB_RET	(PRCM_AVS_BASE + 0x0)
@@ -2704,21 +2701,43 @@ static struct irq_chip prcmu_irq_chip = {
 	.irq_unmask	= prcmu_irq_unmask,
 };
 
-static char *fw_project_name(u8 project)
+static __init char *fw_project_name(u32 project)
 {
 	switch (project) {
 	case PRCMU_FW_PROJECT_U8500:
 		return "U8500";
-	case PRCMU_FW_PROJECT_U8500_C2:
-		return "U8500 C2";
+	case PRCMU_FW_PROJECT_U8400:
+		return "U8400";
 	case PRCMU_FW_PROJECT_U9500:
 		return "U9500";
-	case PRCMU_FW_PROJECT_U9500_C2:
-		return "U9500 C2";
+	case PRCMU_FW_PROJECT_U8500_MBB:
+		return "U8500 MBB";
+	case PRCMU_FW_PROJECT_U8500_C1:
+		return "U8500 C1";
+	case PRCMU_FW_PROJECT_U8500_C2:
+		return "U8500 C2";
+	case PRCMU_FW_PROJECT_U8500_C3:
+		return "U8500 C3";
+	case PRCMU_FW_PROJECT_U8500_C4:
+		return "U8500 C4";
+	case PRCMU_FW_PROJECT_U9500_MBL:
+		return "U9500 MBL";
+	case PRCMU_FW_PROJECT_U8500_MBL:
+		return "U8500 MBL";
+	case PRCMU_FW_PROJECT_U8500_MBL2:
+		return "U8500 MBL2";
 	case PRCMU_FW_PROJECT_U8520:
-		return "U8520";
+		return "U8520 MBL";
 	case PRCMU_FW_PROJECT_U8420:
 		return "U8420";
+	case PRCMU_FW_PROJECT_U9540:
+		return "U9540";
+	case PRCMU_FW_PROJECT_A9420:
+		return "A9420";
+	case PRCMU_FW_PROJECT_L8540:
+		return "L8540";
+	case PRCMU_FW_PROJECT_L8580:
+		return "L8580";
 	default:
 		return "Unknown";
 	}
@@ -2759,37 +2778,44 @@ static int db8500_irq_init(struct device_node *np)
 	return 0;
 }
 
-void __init db8500_prcmu_early_init(void)
+static void dbx500_fw_version_init(struct platform_device *pdev,
+			    u32 version_offset)
 {
-	if (cpu_is_u8500v2() || cpu_is_u9540()) {
-		void *tcpm_base = ioremap_nocache(U8500_PRCMU_TCPM_BASE, SZ_4K);
-
-		if (tcpm_base != NULL) {
-			u32 version;
-			version = readl(tcpm_base + PRCMU_FW_VERSION_OFFSET);
-			fw_info.version.project = version & 0xFF;
-			fw_info.version.api_version = (version >> 8) & 0xFF;
-			fw_info.version.func_version = (version >> 16) & 0xFF;
-			fw_info.version.errata = (version >> 24) & 0xFF;
-			fw_info.valid = true;
-			pr_info("PRCMU firmware: %s, version %d.%d.%d\n",
-				fw_project_name(fw_info.version.project),
-				(version >> 8) & 0xFF, (version >> 16) & 0xFF,
-				(version >> 24) & 0xFF);
-			iounmap(tcpm_base);
-		}
+	struct resource *res;
+	void __iomem *tcpm_base;
 
-		if (cpu_is_u9540())
-			tcdm_base = ioremap_nocache(U8500_PRCMU_TCDM_BASE,
-						SZ_4K + SZ_8K) + SZ_8K;
-		else
-			tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
-	} else {
-		pr_err("prcmu: Unsupported chip version\n");
-		BUG();
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "prcmu-tcpm");
+	if (!res) {
+		dev_err(&pdev->dev,
+			"Error: no prcmu tcpm memory region provided\n");
+		return;
+	}
+	tcpm_base = ioremap(res->start, resource_size(res));
+	if (tcpm_base != NULL) {
+		u32 version;
+
+		version = readl(tcpm_base + version_offset);
+		fw_info.version.project = (version & 0xFF);
+		fw_info.version.api_version = (version >> 8) & 0xFF;
+		fw_info.version.func_version = (version >> 16) & 0xFF;
+		fw_info.version.errata = (version >> 24) & 0xFF;
+		strncpy(fw_info.version.project_name,
+			fw_project_name(fw_info.version.project),
+			PRCMU_FW_PROJECT_NAME_LEN);
+		fw_info.valid = true;
+		pr_info("PRCMU firmware: %s(%d), version %d.%d.%d\n",
+			fw_info.version.project_name,
+			fw_info.version.project,
+			fw_info.version.api_version,
+			fw_info.version.func_version,
+			fw_info.version.errata);
+		iounmap(tcpm_base);
 	}
-	tcdm_base = __io_address(U8500_PRCMU_TCDM_BASE);
+}
 
+void __init db8500_prcmu_early_init(void)
+{
 	spin_lock_init(&mb0_transfer.lock);
 	spin_lock_init(&mb0_transfer.dbb_irqs_lock);
 	mutex_init(&mb0_transfer.ac_wake_lock);
@@ -3099,20 +3125,30 @@ static void db8500_prcmu_update_cpufreq(void)
  */
 static int db8500_prcmu_probe(struct platform_device *pdev)
 {
-	struct ab8500_platform_data *ab8500_platdata = pdev->dev.platform_data;
 	struct device_node *np = pdev->dev.of_node;
+	struct prcmu_pdata *pdata = dev_get_platdata(&pdev->dev);
 	int irq = 0, err = 0, i;
+	struct resource *res;
 
 	init_prcm_registers();
 
+	dbx500_fw_version_init(pdev, pdata->version_offset);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "prcmu-tcdm");
+	if (!res) {
+		dev_err(&pdev->dev, "no prcmu tcdm region provided\n");
+		return -ENOENT;
+	}
+	tcdm_base = devm_ioremap(&pdev->dev, res->start,
+			resource_size(res));
+
 	/* Clean up the mailbox interrupts after pre-kernel code. */
 	writel(ALL_MBOX_BITS, PRCM_ARM_IT1_CLR);
 
-	if (np)
-		irq = platform_get_irq(pdev, 0);
-
-	if (!np || irq <= 0)
-		irq = IRQ_DB8500_PRCMU1;
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev, "no prcmu irq provided\n");
+		return -ENOENT;
+	}
 
 	err = request_threaded_irq(irq, prcmu_irq_handler,
 	        prcmu_irq_thread_fn, IRQF_NO_SUSPEND, "prcmu", NULL);
@@ -3126,7 +3162,7 @@ static int db8500_prcmu_probe(struct platform_device *pdev)
 
 	for (i = 0; i < ARRAY_SIZE(db8500_prcmu_devs); i++) {
 		if (!strcmp(db8500_prcmu_devs[i].name, "ab8500-core")) {
-			db8500_prcmu_devs[i].platform_data = ab8500_platdata;
+			db8500_prcmu_devs[i].platform_data = pdata->ab_platdata;
 			db8500_prcmu_devs[i].pdata_size = sizeof(struct ab8500_platform_data);
 		}
 	}
diff --git a/include/linux/mfd/db8500-prcmu.h b/include/linux/mfd/db8500-prcmu.h
index a65deddede2f..77a46ae2fc17 100644
--- a/include/linux/mfd/db8500-prcmu.h
+++ b/include/linux/mfd/db8500-prcmu.h
@@ -487,20 +487,6 @@ struct prcmu_auto_pm_config {
 	u8 sva_policy;
 };
 
-#define PRCMU_FW_PROJECT_U8500		2
-#define PRCMU_FW_PROJECT_U9500		4
-#define PRCMU_FW_PROJECT_U8500_C2	7
-#define PRCMU_FW_PROJECT_U9500_C2	11
-#define PRCMU_FW_PROJECT_U8520		13
-#define PRCMU_FW_PROJECT_U8420		14
-
-struct prcmu_fw_version {
-	u8 project;
-	u8 api_version;
-	u8 func_version;
-	u8 errata;
-};
-
 #ifdef CONFIG_MFD_DB8500_PRCMU
 
 void db8500_prcmu_early_init(void);
diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
index 155280642583..f8bac7cfc25f 100644
--- a/include/linux/mfd/dbx500-prcmu.h
+++ b/include/linux/mfd/dbx500-prcmu.h
@@ -12,6 +12,10 @@
 #include <linux/notifier.h>
 #include <linux/err.h>
 
+/* Offset for the firmware version within the TCPM */
+#define DB8500_PRCMU_FW_VERSION_OFFSET 0xA4
+#define DBX540_PRCMU_FW_VERSION_OFFSET 0xA8
+
 /* PRCMU Wakeup defines */
 enum prcmu_wakeup_index {
 	PRCMU_WAKEUP_INDEX_RTC,
@@ -214,6 +218,48 @@ enum ddr_pwrst {
 	DDR_PWR_STATE_OFFHIGHLAT    = 0x03
 };
 
+#define DB8500_PRCMU_LEGACY_OFFSET		0xDD4
+
+struct prcmu_pdata
+{
+	bool enable_set_ddr_opp;
+	bool enable_ape_opp_100_voltage;
+	struct ab8500_platform_data *ab_platdata;
+	u32 version_offset;
+	u32 legacy_offset;
+	u32 adt_offset;
+};
+
+#define PRCMU_FW_PROJECT_U8500		2
+#define PRCMU_FW_PROJECT_U8400		3
+#define PRCMU_FW_PROJECT_U9500		4 /* Customer specific */
+#define PRCMU_FW_PROJECT_U8500_MBB	5
+#define PRCMU_FW_PROJECT_U8500_C1	6
+#define PRCMU_FW_PROJECT_U8500_C2	7
+#define PRCMU_FW_PROJECT_U8500_C3	8
+#define PRCMU_FW_PROJECT_U8500_C4	9
+#define PRCMU_FW_PROJECT_U9500_MBL	10
+#define PRCMU_FW_PROJECT_U8500_MBL	11 /* Customer specific */
+#define PRCMU_FW_PROJECT_U8500_MBL2	12 /* Customer specific */
+#define PRCMU_FW_PROJECT_U8520		13
+#define PRCMU_FW_PROJECT_U8420		14
+#define PRCMU_FW_PROJECT_A9420		20
+/* [32..63] 9540 and derivatives */
+#define PRCMU_FW_PROJECT_U9540		32
+/* [64..95] 8540 and derivatives */
+#define PRCMU_FW_PROJECT_L8540		64
+/* [96..126] 8580 and derivatives */
+#define PRCMU_FW_PROJECT_L8580		96
+
+#define PRCMU_FW_PROJECT_NAME_LEN	20
+struct prcmu_fw_version {
+	u32 project; /* Notice, project shifted with 8 on ux540 */
+	u8 api_version;
+	u8 func_version;
+	u8 errata;
+	char project_name[PRCMU_FW_PROJECT_NAME_LEN];
+};
+
 #include <linux/mfd/db8500-prcmu.h>
 
 #if defined(CONFIG_UX500_SOC_DB8500)
-- 
cgit v1.2.3


From a93bc0c6e07ed9bac44700280e65e2945d864fd4 Mon Sep 17 00:00:00 2001
From: Seiji Aguchi <seiji.aguchi@hds.com>
Date: Tue, 12 Feb 2013 13:04:41 -0800
Subject: efi_pstore: Introducing workqueue updating sysfs

[Problem]
efi_pstore creates sysfs entries, which enable users to access to NVRAM,
in a write callback. If a kernel panic happens in an interrupt context,
it may fail because it could sleep due to dynamic memory allocations during
creating sysfs entries.

[Patch Description]
This patch removes sysfs operations from a write callback by introducing
a workqueue updating sysfs entries which is scheduled after the write
callback is called.

Also, the workqueue is kicked in a just oops case.
A system will go down in other cases such as panic, clean shutdown and emergency
restart. And we don't need to create sysfs entries because there is no chance for
users to access to them.

efi_pstore will be robust against a kernel panic in an interrupt context with this patch.

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/firmware/efivars.c | 85 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/efi.h        |  3 +-
 2 files changed, 82 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index a64fb7bda365..69225115304d 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -158,6 +158,13 @@ efivar_create_sysfs_entry(struct efivars *efivars,
 			  efi_char16_t *variable_name,
 			  efi_guid_t *vendor_guid);
 
+/*
+ * Prototype for workqueue functions updating sysfs entry
+ */
+
+static void efivar_update_sysfs_entries(struct work_struct *);
+static DECLARE_WORK(efivar_work, efivar_update_sysfs_entries);
+
 /* Return the number of unicode characters in data */
 static unsigned long
 utf16_strnlen(efi_char16_t *s, size_t maxlength)
@@ -1248,11 +1255,8 @@ static int efi_pstore_write(enum pstore_type_id type,
 
 	spin_unlock_irqrestore(&efivars->lock, flags);
 
-	if (size)
-		ret = efivar_create_sysfs_entry(efivars,
-					  utf16_strsize(efi_name,
-							DUMP_NAME_LEN * 2),
-					  efi_name, &vendor);
+	if (reason == KMSG_DUMP_OOPS)
+		schedule_work(&efivar_work);
 
 	*id = part;
 	return ret;
@@ -1496,6 +1500,75 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
 	return count;
 }
 
+static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor)
+{
+	struct efivar_entry *entry, *n;
+	struct efivars *efivars = &__efivars;
+	unsigned long strsize1, strsize2;
+	bool found = false;
+
+	strsize1 = utf16_strsize(variable_name, 1024);
+	list_for_each_entry_safe(entry, n, &efivars->list, list) {
+		strsize2 = utf16_strsize(entry->var.VariableName, 1024);
+		if (strsize1 == strsize2 &&
+			!memcmp(variable_name, &(entry->var.VariableName),
+				strsize2) &&
+			!efi_guidcmp(entry->var.VendorGuid,
+				*vendor)) {
+			found = true;
+			break;
+		}
+	}
+	return found;
+}
+
+static void efivar_update_sysfs_entries(struct work_struct *work)
+{
+	struct efivars *efivars = &__efivars;
+	efi_guid_t vendor;
+	efi_char16_t *variable_name;
+	unsigned long variable_name_size = 1024;
+	efi_status_t status = EFI_NOT_FOUND;
+	bool found;
+
+	/* Add new sysfs entries */
+	while (1) {
+		variable_name = kzalloc(variable_name_size, GFP_KERNEL);
+		if (!variable_name) {
+			pr_err("efivars: Memory allocation failed.\n");
+			return;
+		}
+
+		spin_lock_irq(&efivars->lock);
+		found = false;
+		while (1) {
+			variable_name_size = 1024;
+			status = efivars->ops->get_next_variable(
+							&variable_name_size,
+							variable_name,
+							&vendor);
+			if (status != EFI_SUCCESS) {
+				break;
+			} else {
+				if (!variable_is_present(variable_name,
+				    &vendor)) {
+					found = true;
+					break;
+				}
+			}
+		}
+		spin_unlock_irq(&efivars->lock);
+
+		if (!found) {
+			kfree(variable_name);
+			break;
+		} else
+			efivar_create_sysfs_entry(efivars,
+						  variable_name_size,
+						  variable_name, &vendor);
+	}
+}
+
 /*
  * Let's not leave out systab information that snuck into
  * the efivars driver
@@ -1833,6 +1906,8 @@ err_put:
 static void __exit
 efivars_exit(void)
 {
+	cancel_work_sync(&efivar_work);
+
 	if (efi_enabled) {
 		unregister_efivars(&__efivars);
 		kobject_put(efi_kobj);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8b84916dc671..d0c68aee0925 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -728,7 +728,8 @@ struct efivars {
 	 * 1) ->list - adds, removals, reads, writes
 	 * 2) ops.[gs]et_variable() calls.
 	 * It must not be held when creating sysfs entries or calling kmalloc.
-	 * ops.get_next_variable() is only called from register_efivars(),
+	 * ops.get_next_variable() is only called from register_efivars()
+	 * or efivar_update_sysfs_entries(),
 	 * which is protected by the BKL, so that path is safe.
 	 */
 	spinlock_t lock;
-- 
cgit v1.2.3


From d9ba8f9e6298af71ec1c1fd3d88c3ef68abd0ec3 Mon Sep 17 00:00:00 2001
From: Mugunthan V N <mugunthanvnm@ti.com>
Date: Mon, 11 Feb 2013 09:52:20 +0000
Subject: driver: net: ethernet: cpsw: dual emac interface implementation

The CPSW switch can act as Dual EMAC by segregating the switch ports
using VLAN and port VLAN as per the TRM description in
14.3.2.10.2 Dual Mac Mode

Following CPSW components will be common for both the interfaces.
* Interrupt source is common for both eth interfaces
* Interrupt pacing is common for both interfaces
* Hardware statistics is common for all the ports
* CPDMA is common for both eth interface
* CPTS is common for both the interface and it should not be enabled on
  both the interface as timestamping information doesn't contain port
  information.

Constrains
* Reserved VID of One port should not be used in other interface which will
  enable switching functionality
* Same VID must not be used in both the interface which will enable switching
  functionality

Signed-off-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt |   2 +
 drivers/net/ethernet/ti/cpsw.c                 | 335 +++++++++++++++++++++----
 include/linux/platform_data/cpsw.h             |   3 +
 3 files changed, 288 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 6ddd0286a9b7..ecfdf756d10f 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -24,6 +24,8 @@ Required properties:
 Optional properties:
 - ti,hwmods		: Must be "cpgmac0"
 - no_bd_ram		: Must be 0 or 1
+- dual_emac		: Specifies Switch to act as Dual EMAC
+- dual_emac_res_vlan	: Specifies VID to be used to segregate the ports
 
 Note: "ti,hwmods" field is used to fetch the base address and irq
 resources from TI, omap hwmod data base during device registration.
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 4b964bb02d4c..4ceed6e0f1be 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -122,6 +122,10 @@ do {								\
 #define CPSW_VLAN_AWARE		BIT(1)
 #define CPSW_ALE_VLAN_AWARE	1
 
+#define CPSW_FIFO_NORMAL_MODE		(0 << 15)
+#define CPSW_FIFO_DUAL_MAC_MODE		(1 << 15)
+#define CPSW_FIFO_RATE_LIMIT_MODE	(2 << 15)
+
 #define cpsw_enable_irq(priv)	\
 	do {			\
 		u32 i;		\
@@ -254,7 +258,7 @@ struct cpsw_ss_regs {
 struct cpsw_host_regs {
 	u32	max_blks;
 	u32	blk_cnt;
-	u32	flow_thresh;
+	u32	tx_in_ctl;
 	u32	port_vlan;
 	u32	tx_pri_map;
 	u32	cpdma_tx_pri_map;
@@ -281,6 +285,9 @@ struct cpsw_slave {
 	u32				mac_control;
 	struct cpsw_slave_data		*data;
 	struct phy_device		*phy;
+	struct net_device		*ndev;
+	u32				port_vlan;
+	u32				open_stat;
 };
 
 static inline u32 slave_read(struct cpsw_slave *slave, u32 offset)
@@ -320,15 +327,63 @@ struct cpsw_priv {
 	u32 irqs_table[4];
 	u32 num_irqs;
 	struct cpts *cpts;
+	u32 emac_port;
 };
 
 #define napi_to_priv(napi)	container_of(napi, struct cpsw_priv, napi)
-#define for_each_slave(priv, func, arg...)			\
-	do {							\
-		int idx;					\
-		for (idx = 0; idx < (priv)->data.slaves; idx++)	\
-			(func)((priv)->slaves + idx, ##arg);	\
+#define for_each_slave(priv, func, arg...)				\
+	do {								\
+		int idx;						\
+		if (priv->data.dual_emac)				\
+			(func)((priv)->slaves + priv->emac_port, ##arg);\
+		else							\
+			for (idx = 0; idx < (priv)->data.slaves; idx++)	\
+				(func)((priv)->slaves + idx, ##arg);	\
+	} while (0)
+#define cpsw_get_slave_ndev(priv, __slave_no__)				\
+	(priv->slaves[__slave_no__].ndev)
+#define cpsw_get_slave_priv(priv, __slave_no__)				\
+	((priv->slaves[__slave_no__].ndev) ?				\
+		netdev_priv(priv->slaves[__slave_no__].ndev) : NULL)	\
+
+#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb)		\
+	do {								\
+		if (!priv->data.dual_emac)				\
+			break;						\
+		if (CPDMA_RX_SOURCE_PORT(status) == 1) {		\
+			ndev = cpsw_get_slave_ndev(priv, 0);		\
+			priv = netdev_priv(ndev);			\
+			skb->dev = ndev;				\
+		} else if (CPDMA_RX_SOURCE_PORT(status) == 2) {		\
+			ndev = cpsw_get_slave_ndev(priv, 1);		\
+			priv = netdev_priv(ndev);			\
+			skb->dev = ndev;				\
+		}							\
 	} while (0)
+#define cpsw_add_mcast(priv, addr)					\
+	do {								\
+		if (priv->data.dual_emac) {				\
+			struct cpsw_slave *slave = priv->slaves +	\
+						priv->emac_port;	\
+			int slave_port = cpsw_get_slave_port(priv,	\
+						slave->slave_num);	\
+			cpsw_ale_add_mcast(priv->ale, addr,		\
+				1 << slave_port | 1 << priv->host_port,	\
+				ALE_VLAN, slave->port_vlan, 0);		\
+		} else {						\
+			cpsw_ale_add_mcast(priv->ale, addr,		\
+				ALE_ALL_PORTS << priv->host_port,	\
+				0, 0, 0);				\
+		}							\
+	} while (0)
+
+static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
+{
+	if (priv->host_port == 0)
+		return slave_num + 1;
+	else
+		return slave_num;
+}
 
 static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 {
@@ -348,8 +403,7 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 
 		/* program multicast address list into ALE register */
 		netdev_for_each_mc_addr(ha, ndev) {
-			cpsw_ale_add_mcast(priv->ale, (u8 *)ha->addr,
-				ALE_ALL_PORTS << priv->host_port, 0, 0, 0);
+			cpsw_add_mcast(priv, (u8 *)ha->addr);
 		}
 	}
 }
@@ -396,6 +450,8 @@ void cpsw_rx_handler(void *token, int len, int status)
 	struct cpsw_priv	*priv = netdev_priv(ndev);
 	int			ret = 0;
 
+	cpsw_dual_emac_src_port_detect(status, priv, ndev, skb);
+
 	/* free and bail if we are shutting down */
 	if (unlikely(!netif_running(ndev)) ||
 			unlikely(!netif_carrier_ok(ndev))) {
@@ -437,18 +493,17 @@ static irqreturn_t cpsw_interrupt(int irq, void *dev_id)
 		cpsw_intr_disable(priv);
 		cpsw_disable_irq(priv);
 		napi_schedule(&priv->napi);
+	} else {
+		priv = cpsw_get_slave_priv(priv, 1);
+		if (likely(priv) && likely(netif_running(priv->ndev))) {
+			cpsw_intr_disable(priv);
+			cpsw_disable_irq(priv);
+			napi_schedule(&priv->napi);
+		}
 	}
 	return IRQ_HANDLED;
 }
 
-static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
-{
-	if (priv->host_port == 0)
-		return slave_num + 1;
-	else
-		return slave_num;
-}
-
 static int cpsw_poll(struct napi_struct *napi, int budget)
 {
 	struct cpsw_priv	*priv = napi_to_priv(napi);
@@ -566,6 +621,54 @@ static inline int __show_stat(char *buf, int maxlen, const char *name, u32 val)
 				leader + strlen(name), val);
 }
 
+static int cpsw_common_res_usage_state(struct cpsw_priv *priv)
+{
+	u32 i;
+	u32 usage_count = 0;
+
+	if (!priv->data.dual_emac)
+		return 0;
+
+	for (i = 0; i < priv->data.slaves; i++)
+		if (priv->slaves[i].open_stat)
+			usage_count++;
+
+	return usage_count;
+}
+
+static inline int cpsw_tx_packet_submit(struct net_device *ndev,
+			struct cpsw_priv *priv, struct sk_buff *skb)
+{
+	if (!priv->data.dual_emac)
+		return cpdma_chan_submit(priv->txch, skb, skb->data,
+				  skb->len, 0, GFP_KERNEL);
+
+	if (ndev == cpsw_get_slave_ndev(priv, 0))
+		return cpdma_chan_submit(priv->txch, skb, skb->data,
+				  skb->len, 1, GFP_KERNEL);
+	else
+		return cpdma_chan_submit(priv->txch, skb, skb->data,
+				  skb->len, 2, GFP_KERNEL);
+}
+
+static inline void cpsw_add_dual_emac_def_ale_entries(
+		struct cpsw_priv *priv, struct cpsw_slave *slave,
+		u32 slave_port)
+{
+	u32 port_mask = 1 << slave_port | 1 << priv->host_port;
+
+	if (priv->version == CPSW_VERSION_1)
+		slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN);
+	else
+		slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN);
+	cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask,
+			  port_mask, port_mask, 0);
+	cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+			   port_mask, ALE_VLAN, slave->port_vlan, 0);
+	cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
+		priv->host_port, ALE_VLAN, slave->port_vlan);
+}
+
 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 {
 	char name[32];
@@ -595,8 +698,11 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 
 	slave_port = cpsw_get_slave_port(priv, slave->slave_num);
 
-	cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
-			   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
+	if (priv->data.dual_emac)
+		cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
+	else
+		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
 	slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
 				 &cpsw_adjust_link, slave->data->phy_if);
@@ -634,6 +740,7 @@ static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
 static void cpsw_init_host_port(struct cpsw_priv *priv)
 {
 	u32 control_reg;
+	u32 fifo_mode;
 
 	/* soft reset the controller and initialize ale */
 	soft_reset("cpsw", &priv->regs->soft_reset);
@@ -645,6 +752,9 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
 	control_reg = readl(&priv->regs->control);
 	control_reg |= CPSW_VLAN_AWARE;
 	writel(control_reg, &priv->regs->control);
+	fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
+		     CPSW_FIFO_NORMAL_MODE;
+	writel(fifo_mode, &priv->host_port_regs->tx_in_ctl);
 
 	/* setup host port priority mapping */
 	__raw_writel(CPDMA_TX_PRIORITY_MAP,
@@ -654,9 +764,12 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
 	cpsw_ale_control_set(priv->ale, priv->host_port,
 			     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
-	cpsw_ale_add_ucast(priv->ale, priv->mac_addr, priv->host_port, 0, 0);
-	cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
-			   1 << priv->host_port, 0, 0, ALE_MCAST_FWD_2);
+	if (!priv->data.dual_emac) {
+		cpsw_ale_add_ucast(priv->ale, priv->mac_addr, priv->host_port,
+				   0, 0);
+		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+				   1 << priv->host_port, 0, 0, ALE_MCAST_FWD_2);
+	}
 }
 
 static int cpsw_ndo_open(struct net_device *ndev)
@@ -665,7 +778,8 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	int i, ret;
 	u32 reg;
 
-	cpsw_intr_disable(priv);
+	if (!cpsw_common_res_usage_state(priv))
+		cpsw_intr_disable(priv);
 	netif_carrier_off(ndev);
 
 	pm_runtime_get_sync(&priv->pdev->dev);
@@ -677,46 +791,54 @@ static int cpsw_ndo_open(struct net_device *ndev)
 		 CPSW_RTL_VERSION(reg));
 
 	/* initialize host and slave ports */
-	cpsw_init_host_port(priv);
+	if (!cpsw_common_res_usage_state(priv))
+		cpsw_init_host_port(priv);
 	for_each_slave(priv, cpsw_slave_open, priv);
 
 	/* Add default VLAN */
-	cpsw_add_default_vlan(priv);
+	if (!priv->data.dual_emac)
+		cpsw_add_default_vlan(priv);
 
-	/* setup tx dma to fixed prio and zero offset */
-	cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
-	cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
+	if (!cpsw_common_res_usage_state(priv)) {
+		/* setup tx dma to fixed prio and zero offset */
+		cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
+		cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
 
-	/* disable priority elevation and enable statistics on all ports */
-	__raw_writel(0, &priv->regs->ptype);
+		/* disable priority elevation */
+		__raw_writel(0, &priv->regs->ptype);
 
-	/* enable statistics collection only on the host port */
-	__raw_writel(0x7, &priv->regs->stat_port_en);
+		/* enable statistics collection only on all ports */
+		__raw_writel(0x7, &priv->regs->stat_port_en);
 
-	if (WARN_ON(!priv->data.rx_descs))
-		priv->data.rx_descs = 128;
+		if (WARN_ON(!priv->data.rx_descs))
+			priv->data.rx_descs = 128;
 
-	for (i = 0; i < priv->data.rx_descs; i++) {
-		struct sk_buff *skb;
+		for (i = 0; i < priv->data.rx_descs; i++) {
+			struct sk_buff *skb;
 
-		ret = -ENOMEM;
-		skb = netdev_alloc_skb_ip_align(priv->ndev,
-						priv->rx_packet_max);
-		if (!skb)
-			break;
-		ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
+			ret = -ENOMEM;
+			skb = netdev_alloc_skb_ip_align(priv->ndev,
+							priv->rx_packet_max);
+			if (!skb)
+				break;
+			ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
 					skb_tailroom(skb), 0, GFP_KERNEL);
-		if (WARN_ON(ret < 0))
-			break;
+			if (WARN_ON(ret < 0))
+				break;
+		}
+		/* continue even if we didn't manage to submit all
+		 * receive descs
+		 */
+		cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
 	}
-	/* continue even if we didn't manage to submit all receive descs */
-	cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
 
 	cpdma_ctlr_start(priv->dma);
 	cpsw_intr_enable(priv);
 	napi_enable(&priv->napi);
 	cpdma_ctlr_eoi(priv->dma);
 
+	if (priv->data.dual_emac)
+		priv->slaves[priv->emac_port].open_stat = true;
 	return 0;
 }
 
@@ -737,12 +859,17 @@ static int cpsw_ndo_stop(struct net_device *ndev)
 	netif_stop_queue(priv->ndev);
 	napi_disable(&priv->napi);
 	netif_carrier_off(priv->ndev);
-	cpsw_intr_disable(priv);
-	cpdma_ctlr_int_ctrl(priv->dma, false);
-	cpdma_ctlr_stop(priv->dma);
-	cpsw_ale_stop(priv->ale);
+
+	if (cpsw_common_res_usage_state(priv) <= 1) {
+		cpsw_intr_disable(priv);
+		cpdma_ctlr_int_ctrl(priv->dma, false);
+		cpdma_ctlr_stop(priv->dma);
+		cpsw_ale_stop(priv->ale);
+	}
 	for_each_slave(priv, cpsw_slave_stop, priv);
 	pm_runtime_put_sync(&priv->pdev->dev);
+	if (priv->data.dual_emac)
+		priv->slaves[priv->emac_port].open_stat = false;
 	return 0;
 }
 
@@ -766,8 +893,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 
 	skb_tx_timestamp(skb);
 
-	ret = cpdma_chan_submit(priv->txch, skb, skb->data,
-				skb->len, 0, GFP_KERNEL);
+	ret = cpsw_tx_packet_submit(ndev, priv, skb);
 	if (unlikely(ret != 0)) {
 		cpsw_err(priv, tx_err, "desc submit failed\n");
 		goto fail;
@@ -836,9 +962,14 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
 
 static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
 {
-	struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave];
+	struct cpsw_slave *slave;
 	u32 ctrl, mtype;
 
+	if (priv->data.dual_emac)
+		slave = &priv->slaves[priv->emac_port];
+	else
+		slave = &priv->slaves[priv->data.cpts_active_slave];
+
 	ctrl = slave_read(slave, CPSW2_CONTROL);
 	ctrl &= ~CTRL_ALL_TS_MASK;
 
@@ -1124,6 +1255,7 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
 	slave->data	= data;
 	slave->regs	= regs + slave_reg_ofs;
 	slave->sliver	= regs + sliver_reg_ofs;
+	slave->port_vlan = data->dual_emac_res_vlan;
 }
 
 static int cpsw_probe_dt(struct cpsw_platform_data *data,
@@ -1204,6 +1336,9 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->mac_control = prop;
 
+	if (!of_property_read_u32(node, "dual_emac", &prop))
+		data->dual_emac = prop;
+
 	/*
 	 * Populate all the child nodes here...
 	 */
@@ -1237,6 +1372,18 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		if (mac_addr)
 			memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN);
 
+		if (data->dual_emac) {
+			if (of_property_read_u32(node, "dual_emac_res_vlan",
+						 &prop)) {
+				pr_err("Missing dual_emac_res_vlan in DT.\n");
+				slave_data->dual_emac_res_vlan = i+1;
+				pr_err("Using %d as Reserved VLAN for %d slave\n",
+				       slave_data->dual_emac_res_vlan, i);
+			} else {
+				slave_data->dual_emac_res_vlan = prop;
+			}
+		}
+
 		i++;
 	}
 
@@ -1247,6 +1394,79 @@ error_ret:
 	return ret;
 }
 
+static int cpsw_probe_dual_emac(struct platform_device *pdev,
+				struct cpsw_priv *priv)
+{
+	struct cpsw_platform_data	*data = &priv->data;
+	struct net_device		*ndev;
+	struct cpsw_priv		*priv_sl2;
+	int ret = 0, i;
+
+	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
+	if (!ndev) {
+		pr_err("cpsw: error allocating net_device\n");
+		return -ENOMEM;
+	}
+
+	priv_sl2 = netdev_priv(ndev);
+	spin_lock_init(&priv_sl2->lock);
+	priv_sl2->data = *data;
+	priv_sl2->pdev = pdev;
+	priv_sl2->ndev = ndev;
+	priv_sl2->dev  = &ndev->dev;
+	priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
+	priv_sl2->rx_packet_max = max(rx_packet_max, 128);
+
+	if (is_valid_ether_addr(data->slave_data[1].mac_addr)) {
+		memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr,
+			ETH_ALEN);
+		pr_info("cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr);
+	} else {
+		random_ether_addr(priv_sl2->mac_addr);
+		pr_info("cpsw: Random MACID = %pM\n", priv_sl2->mac_addr);
+	}
+	memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN);
+
+	priv_sl2->slaves = priv->slaves;
+	priv_sl2->clk = priv->clk;
+
+	priv_sl2->cpsw_res = priv->cpsw_res;
+	priv_sl2->regs = priv->regs;
+	priv_sl2->host_port = priv->host_port;
+	priv_sl2->host_port_regs = priv->host_port_regs;
+	priv_sl2->wr_regs = priv->wr_regs;
+	priv_sl2->dma = priv->dma;
+	priv_sl2->txch = priv->txch;
+	priv_sl2->rxch = priv->rxch;
+	priv_sl2->ale = priv->ale;
+	priv_sl2->emac_port = 1;
+	priv->slaves[1].ndev = ndev;
+	priv_sl2->cpts = priv->cpts;
+	priv_sl2->version = priv->version;
+
+	for (i = 0; i < priv->num_irqs; i++) {
+		priv_sl2->irqs_table[i] = priv->irqs_table[i];
+		priv_sl2->num_irqs = priv->num_irqs;
+	}
+
+	ndev->features |= NETIF_F_HW_VLAN_FILTER;
+
+	ndev->netdev_ops = &cpsw_netdev_ops;
+	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
+	netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT);
+
+	/* register the network device */
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+	ret = register_netdev(ndev);
+	if (ret) {
+		pr_err("cpsw: error registering net device\n");
+		free_netdev(ndev);
+		ret = -ENODEV;
+	}
+
+	return ret;
+}
+
 static int cpsw_probe(struct platform_device *pdev)
 {
 	struct cpsw_platform_data	*data = pdev->dev.platform_data;
@@ -1310,6 +1530,9 @@ static int cpsw_probe(struct platform_device *pdev)
 	for (i = 0; i < data->slaves; i++)
 		priv->slaves[i].slave_num = i;
 
+	priv->slaves[0].ndev = ndev;
+	priv->emac_port = 0;
+
 	priv->clk = clk_get(&pdev->dev, "fck");
 	if (IS_ERR(priv->clk)) {
 		dev_err(&pdev->dev, "fck is not found\n");
@@ -1484,6 +1707,14 @@ static int cpsw_probe(struct platform_device *pdev)
 	cpsw_notice(priv, probe, "initialized device (regs %x, irq %d)\n",
 		  priv->cpsw_res->start, ndev->irq);
 
+	if (priv->data.dual_emac) {
+		ret = cpsw_probe_dual_emac(pdev, priv);
+		if (ret) {
+			cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
+			goto clean_irq_ret;
+		}
+	}
+
 	return 0;
 
 clean_irq_ret:
diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h
index e962cfd552e3..798fb80b024b 100644
--- a/include/linux/platform_data/cpsw.h
+++ b/include/linux/platform_data/cpsw.h
@@ -21,6 +21,8 @@ struct cpsw_slave_data {
 	char		phy_id[MII_BUS_ID_SIZE];
 	int		phy_if;
 	u8		mac_addr[ETH_ALEN];
+	u16		dual_emac_res_vlan;	/* Reserved VLAN for DualEMAC */
+
 };
 
 struct cpsw_platform_data {
@@ -36,6 +38,7 @@ struct cpsw_platform_data {
 	u32	rx_descs;	/* Number of Rx Descriptios */
 	u32	mac_control;	/* Mac control register */
 	u16	default_vlan;	/* Def VLAN for ALE lookup in VLAN aware mode*/
+	bool	dual_emac;	/* Enable Dual EMAC mode */
 };
 
 #endif /* __CPSW_H__ */
-- 
cgit v1.2.3


From 6e105e0593055dc4aae21df7bca3868045754148 Mon Sep 17 00:00:00 2001
From: Sjur Brændeland <sjur.brandeland@stericsson.com>
Date: Wed, 13 Feb 2013 15:52:36 +1030
Subject: virtio: Add module driver macro for virtio drivers.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add helper macro for drivers that don't do anything
special in module init/exit.

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index eb34cf774473..ff6714e6d0f5 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -126,4 +126,13 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
 
 int register_virtio_driver(struct virtio_driver *drv);
 void unregister_virtio_driver(struct virtio_driver *drv);
+
+/* module_virtio_driver() - Helper macro for drivers that don't do
+ * anything special in module init/exit.  This eliminates a lot of
+ * boilerplate.  Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit()
+ */
+#define module_virtio_driver(__virtio_driver) \
+	module_driver(__virtio_driver, register_virtio_driver, \
+			unregister_virtio_driver)
 #endif /* _LINUX_VIRTIO_H */
-- 
cgit v1.2.3


From bd69f73f2c81eed9a398708b8c4bb3409ba1b0f9 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Sun, 10 Feb 2013 22:57:21 +0000
Subject: of: Create function for counting number of phandles in a property

This patch creates of_count_phandle_with_args(), a new function for
counting the number of phandle+argument tuples in a given property. This
is better than the existing method of parsing each phandle individually
until parsing fails which is a horribly slow way to do the count.

Tested on ARM using the selftest code.

v3: - Rebased on top of selftest code cleanup patch
v2: - fix bug where of_parse_phandle_with_args() could behave like _count_.
    - made of_gpio_named_count() into a static inline regardless of CONFIG_OF_GPIO

Tested-by: Andreas Larsson <andreas@gaisler.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 drivers/of/base.c     | 41 +++++++++++++++++++++++++++++++++++++----
 drivers/of/selftest.c | 15 +++++++++++++++
 include/linux/of.h    |  9 +++++++++
 3 files changed, 61 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index c5572cb87a88..321d3ef05006 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1091,9 +1091,10 @@ EXPORT_SYMBOL(of_parse_phandle);
  * To get a device_node of the `node2' node you may call this:
  * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
  */
-int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
-				const char *cells_name, int index,
-				struct of_phandle_args *out_args)
+static int __of_parse_phandle_with_args(const struct device_node *np,
+					const char *list_name,
+					const char *cells_name, int index,
+					struct of_phandle_args *out_args)
 {
 	const __be32 *list, *list_end;
 	int rc = 0, size, cur_index = 0;
@@ -1183,15 +1184,47 @@ int of_parse_phandle_with_args(const struct device_node *np, const char *list_na
 	 * Unlock node before returning result; will be one of:
 	 * -ENOENT : index is for empty phandle
 	 * -EINVAL : parsing error on data
+	 * [1..n]  : Number of phandle (count mode; when index = -1)
 	 */
-	rc = -ENOENT;
+	rc = index < 0 ? cur_index : -ENOENT;
  err:
 	if (node)
 		of_node_put(node);
 	return rc;
 }
+
+int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
+				const char *cells_name, int index,
+				struct of_phandle_args *out_args)
+{
+	if (index < 0)
+		return -EINVAL;
+	return __of_parse_phandle_with_args(np, list_name, cells_name, index, out_args);
+}
 EXPORT_SYMBOL(of_parse_phandle_with_args);
 
+/**
+ * of_count_phandle_with_args() - Find the number of phandles references in a property
+ * @np:		pointer to a device tree node containing a list
+ * @list_name:	property name that contains a list
+ * @cells_name:	property name that specifies phandles' arguments count
+ *
+ * Returns the number of phandle + argument tuples within a property. It
+ * is a typical pattern to encode a list of phandle and variable
+ * arguments into a single property. The number of arguments is encoded
+ * by a property in the phandle-target node. For example, a gpios
+ * property would contain a list of GPIO specifies consisting of a
+ * phandle and 1 or more arguments. The number of arguments are
+ * determined by the #gpio-cells property in the node pointed to by the
+ * phandle.
+ */
+int of_count_phandle_with_args(const struct device_node *np, const char *list_name,
+				const char *cells_name)
+{
+	return __of_parse_phandle_with_args(np, list_name, cells_name, -1, NULL);
+}
+EXPORT_SYMBOL(of_count_phandle_with_args);
+
 #if defined(CONFIG_OF_DYNAMIC)
 static int of_property_notify(int action, struct device_node *np,
 			      struct property *prop)
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 01b4174a3b46..0eb5c38b4e07 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -36,6 +36,9 @@ static void __init of_selftest_parse_phandle_with_args(void)
 		return;
 	}
 
+	rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells");
+	selftest(rc == 7, "of_count_phandle_with_args() returned %i, expected 7\n", rc);
+
 	for (i = 0; i < 8; i++) {
 		bool passed = true;
 		rc = of_parse_phandle_with_args(np, "phandle-list",
@@ -94,21 +97,33 @@ static void __init of_selftest_parse_phandle_with_args(void)
 	rc = of_parse_phandle_with_args(np, "phandle-list-missing",
 					"#phandle-cells", 0, &args);
 	selftest(rc == -ENOENT, "expected:%i got:%i\n", -ENOENT, rc);
+	rc = of_count_phandle_with_args(np, "phandle-list-missing",
+					"#phandle-cells");
+	selftest(rc == -ENOENT, "expected:%i got:%i\n", -ENOENT, rc);
 
 	/* Check for missing cells property */
 	rc = of_parse_phandle_with_args(np, "phandle-list",
 					"#phandle-cells-missing", 0, &args);
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
+	rc = of_count_phandle_with_args(np, "phandle-list",
+					"#phandle-cells-missing");
+	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 
 	/* Check for bad phandle in list */
 	rc = of_parse_phandle_with_args(np, "phandle-list-bad-phandle",
 					"#phandle-cells", 0, &args);
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
+	rc = of_count_phandle_with_args(np, "phandle-list-bad-phandle",
+					"#phandle-cells");
+	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 
 	/* Check for incorrectly formed argument list */
 	rc = of_parse_phandle_with_args(np, "phandle-list-bad-args",
 					"#phandle-cells", 1, &args);
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
+	rc = of_count_phandle_with_args(np, "phandle-list-bad-args",
+					"#phandle-cells");
+	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 }
 
 static void __init of_selftest_property_match_string(void)
diff --git a/include/linux/of.h b/include/linux/of.h
index b9e1b911f0eb..a0f129284948 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -277,6 +277,8 @@ extern struct device_node *of_parse_phandle(const struct device_node *np,
 extern int of_parse_phandle_with_args(const struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct of_phandle_args *out_args);
+extern int of_count_phandle_with_args(const struct device_node *np,
+	const char *list_name, const char *cells_name);
 
 extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
 extern int of_alias_get_id(struct device_node *np, const char *stem);
@@ -467,6 +469,13 @@ static inline int of_parse_phandle_with_args(struct device_node *np,
 	return -ENOSYS;
 }
 
+static inline int of_count_phandle_with_args(struct device_node *np,
+					     const char *list_name,
+					     const char *cells_name)
+{
+	return -ENOSYS;
+}
+
 static inline int of_alias_get_id(struct device_node *np, const char *stem)
 {
 	return -ENOSYS;
-- 
cgit v1.2.3


From e80beb27d2f81a1c3c8887e0e0a82d77bb392d28 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Tue, 12 Feb 2013 17:48:37 +0000
Subject: gpio: Make of_count_named_gpios() use new
 of_count_phandle_with_args()

This patch replaces the horribly coded of_count_named_gpios() with a
call to of_count_phandle_with_args() which is far more efficient. This
also changes the return value of of_gpio_count() & of_gpio_named_count()
from 'unsigned int' to 'int' so that it can return an error code. All
the users of that function are fixed up to correctly handle a negative
return value.

v2: Split GPIO portion into a separate patch

Tested-by: Andreas Larsson <andreas@gaisler.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 drivers/gpio/gpiolib-of.c              | 35 -----------------------------
 drivers/hwmon/gpio-fan.c               |  4 ++--
 drivers/input/keyboard/matrix_keypad.c |  8 +++----
 drivers/net/phy/mdio-mux-gpio.c        |  4 ++--
 drivers/spi/spi-fsl-spi.c              |  4 ++--
 drivers/spi/spi-oc-tiny.c              |  8 +++----
 drivers/spi/spi-ppc4xx.c               |  4 ++--
 drivers/spi/spi.c                      |  5 ++---
 include/linux/of_gpio.h                | 40 +++++++++++++++++++---------------
 9 files changed, 41 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d542a141811a..dd8a2129222f 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -88,41 +88,6 @@ int of_get_named_gpio_flags(struct device_node *np, const char *propname,
 }
 EXPORT_SYMBOL(of_get_named_gpio_flags);
 
-/**
- * of_gpio_named_count - Count GPIOs for a device
- * @np:		device node to count GPIOs for
- * @propname:	property name containing gpio specifier(s)
- *
- * The function returns the count of GPIOs specified for a node.
- *
- * Note that the empty GPIO specifiers counts too. For example,
- *
- * gpios = <0
- *          &pio1 1 2
- *          0
- *          &pio2 3 4>;
- *
- * defines four GPIOs (so this function will return 4), two of which
- * are not specified.
- */
-unsigned int of_gpio_named_count(struct device_node *np, const char* propname)
-{
-	unsigned int cnt = 0;
-
-	do {
-		int ret;
-
-		ret = of_parse_phandle_with_args(np, propname, "#gpio-cells",
-						 cnt, NULL);
-		/* A hole in the gpios = <> counts anyway. */
-		if (ret < 0 && ret != -EEXIST)
-			break;
-	} while (++cnt);
-
-	return cnt;
-}
-EXPORT_SYMBOL(of_gpio_named_count);
-
 /**
  * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags
  * @gc:		pointer to the gpio_chip structure
diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c
index 4e04c1228e51..39781945a5d2 100644
--- a/drivers/hwmon/gpio-fan.c
+++ b/drivers/hwmon/gpio-fan.c
@@ -422,7 +422,7 @@ static int gpio_fan_get_of_pdata(struct device *dev,
 
 	/* Fill GPIO pin array */
 	pdata->num_ctrl = of_gpio_count(node);
-	if (!pdata->num_ctrl) {
+	if (pdata->num_ctrl <= 0) {
 		dev_err(dev, "gpios DT property empty / missing");
 		return -ENODEV;
 	}
@@ -477,7 +477,7 @@ static int gpio_fan_get_of_pdata(struct device *dev,
 	pdata->speed = speed;
 
 	/* Alarm GPIO if one exists */
-	if (of_gpio_named_count(node, "alarm-gpios")) {
+	if (of_gpio_named_count(node, "alarm-gpios") > 0) {
 		struct gpio_fan_alarm *alarm;
 		int val;
 		enum of_gpio_flags flags;
diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index f4ff0dda7597..71d77192ac1e 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -403,7 +403,7 @@ matrix_keypad_parse_dt(struct device *dev)
 	struct matrix_keypad_platform_data *pdata;
 	struct device_node *np = dev->of_node;
 	unsigned int *gpios;
-	int i;
+	int i, nrow, ncol;
 
 	if (!np) {
 		dev_err(dev, "device lacks DT data\n");
@@ -416,9 +416,9 @@ matrix_keypad_parse_dt(struct device *dev)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	pdata->num_row_gpios = of_gpio_named_count(np, "row-gpios");
-	pdata->num_col_gpios = of_gpio_named_count(np, "col-gpios");
-	if (!pdata->num_row_gpios || !pdata->num_col_gpios) {
+	pdata->num_row_gpios = nrow = of_gpio_named_count(np, "row-gpios");
+	pdata->num_col_gpios = ncol = of_gpio_named_count(np, "col-gpios");
+	if (nrow <= 0 || ncol <= 0) {
 		dev_err(dev, "number of keypad rows/columns not specified\n");
 		return ERR_PTR(-EINVAL);
 	}
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index 0c9accb1c14f..e91d7d736ae2 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -53,7 +53,7 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
 {
 	enum of_gpio_flags f;
 	struct mdio_mux_gpio_state *s;
-	unsigned int num_gpios;
+	int num_gpios;
 	unsigned int n;
 	int r;
 
@@ -61,7 +61,7 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	num_gpios = of_gpio_count(pdev->dev.of_node);
-	if (num_gpios == 0 || num_gpios > MDIO_MUX_GPIO_MAX_BITS)
+	if (num_gpios <= 0 || num_gpios > MDIO_MUX_GPIO_MAX_BITS)
 		return -ENODEV;
 
 	s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 1a7f6359d998..086a9eef2e05 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -947,12 +947,12 @@ static int of_fsl_spi_get_chipselects(struct device *dev)
 	struct device_node *np = dev->of_node;
 	struct fsl_spi_platform_data *pdata = dev->platform_data;
 	struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata);
-	unsigned int ngpios;
+	int ngpios;
 	int i = 0;
 	int ret;
 
 	ngpios = of_gpio_count(np);
-	if (!ngpios) {
+	if (ngpios <= 0) {
 		/*
 		 * SPI w/o chip-select line. One SPI device is still permitted
 		 * though.
diff --git a/drivers/spi/spi-oc-tiny.c b/drivers/spi/spi-oc-tiny.c
index 432e66ec3088..cb2e284bd814 100644
--- a/drivers/spi/spi-oc-tiny.c
+++ b/drivers/spi/spi-oc-tiny.c
@@ -54,7 +54,7 @@ struct tiny_spi {
 	unsigned int txc, rxc;
 	const u8 *txp;
 	u8 *rxp;
-	unsigned int gpio_cs_count;
+	int gpio_cs_count;
 	int *gpio_cs;
 };
 
@@ -74,7 +74,7 @@ static void tiny_spi_chipselect(struct spi_device *spi, int is_active)
 {
 	struct tiny_spi *hw = tiny_spi_to_hw(spi);
 
-	if (hw->gpio_cs_count) {
+	if (hw->gpio_cs_count > 0) {
 		gpio_set_value(hw->gpio_cs[spi->chip_select],
 			(spi->mode & SPI_CS_HIGH) ? is_active : !is_active);
 	}
@@ -254,7 +254,7 @@ static int tiny_spi_of_probe(struct platform_device *pdev)
 	if (!np)
 		return 0;
 	hw->gpio_cs_count = of_gpio_count(np);
-	if (hw->gpio_cs_count) {
+	if (hw->gpio_cs_count > 0) {
 		hw->gpio_cs = devm_kzalloc(&pdev->dev,
 				hw->gpio_cs_count * sizeof(unsigned int),
 				GFP_KERNEL);
@@ -352,7 +352,7 @@ static int tiny_spi_probe(struct platform_device *pdev)
 			goto exit_gpio;
 		gpio_direction_output(hw->gpio_cs[i], 1);
 	}
-	hw->bitbang.master->num_chipselect = max(1U, hw->gpio_cs_count);
+	hw->bitbang.master->num_chipselect = max(1, hw->gpio_cs_count);
 
 	/* register our spi controller */
 	err = spi_bitbang_start(&hw->bitbang);
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 7a85f22b6474..af3e6e756dc9 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -419,7 +419,7 @@ static int __init spi_ppc4xx_of_probe(struct platform_device *op)
 	 * This includes both "null" gpio's and real ones.
 	 */
 	num_gpios = of_gpio_count(np);
-	if (num_gpios) {
+	if (num_gpios > 0) {
 		int i;
 
 		hw->gpios = kzalloc(sizeof(int) * num_gpios, GFP_KERNEL);
@@ -471,7 +471,7 @@ static int __init spi_ppc4xx_of_probe(struct platform_device *op)
 		SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST;
 
 	/* this many pins in all GPIO controllers */
-	bbp->master->num_chipselect = num_gpios;
+	bbp->master->num_chipselect = num_gpios > 0 ? num_gpios : 0;
 
 	/* Get the clock for the OPB */
 	opbnp = of_find_compatible_node(NULL, NULL, "ibm,opb");
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 19ee901577da..21c47482d9fd 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1059,15 +1059,14 @@ EXPORT_SYMBOL_GPL(spi_alloc_master);
 #ifdef CONFIG_OF
 static int of_spi_register_master(struct spi_master *master)
 {
-	u16 nb;
-	int i, *cs;
+	int nb, i, *cs;
 	struct device_node *np = master->dev.of_node;
 
 	if (!np)
 		return 0;
 
 	nb = of_gpio_named_count(np, "cs-gpios");
-	master->num_chipselect = max(nb, master->num_chipselect);
+	master->num_chipselect = max(nb, (int)master->num_chipselect);
 
 	if (nb < 1)
 		return 0;
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index c454f5796747..a83dc6f5008e 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -50,9 +50,6 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 extern int of_get_named_gpio_flags(struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags);
 
-extern unsigned int of_gpio_named_count(struct device_node *np,
-					const char* propname);
-
 extern int of_mm_gpiochip_add(struct device_node *np,
 			      struct of_mm_gpio_chip *mm_gc);
 
@@ -71,12 +68,6 @@ static inline int of_get_named_gpio_flags(struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline unsigned int of_gpio_named_count(struct device_node *np,
-					const char* propname)
-{
-	return 0;
-}
-
 static inline int of_gpio_simple_xlate(struct gpio_chip *gc,
 				       const struct of_phandle_args *gpiospec,
 				       u32 *flags)
@@ -90,22 +81,37 @@ static inline void of_gpiochip_remove(struct gpio_chip *gc) { }
 #endif /* CONFIG_OF_GPIO */
 
 /**
- * of_gpio_count - Count GPIOs for a device
+ * of_gpio_named_count() - Count GPIOs for a device
  * @np:		device node to count GPIOs for
+ * @propname:	property name containing gpio specifier(s)
  *
  * The function returns the count of GPIOs specified for a node.
+ * Note that the empty GPIO specifiers count too. Returns either
+ *   Number of gpios defined in property,
+ *   -EINVAL for an incorrectly formed gpios property, or
+ *   -ENOENT for a missing gpios property
  *
- * Note that the empty GPIO specifiers counts too. For example,
- *
+ * Example:
  * gpios = <0
- *          &pio1 1 2
+ *          &gpio1 1 2
  *          0
- *          &pio2 3 4>;
+ *          &gpio2 3 4>;
+ *
+ * The above example defines four GPIOs, two of which are not specified.
+ * This function will return '4'
+ */
+static inline int of_gpio_named_count(struct device_node *np, const char* propname)
+{
+	return of_count_phandle_with_args(np, propname, "#gpio-cells");
+}
+
+/**
+ * of_gpio_count() - Count GPIOs for a device
+ * @np:		device node to count GPIOs for
  *
- * defines four GPIOs (so this function will return 4), two of which
- * are not specified.
+ * Same as of_gpio_named_count, but hard coded to use the 'gpios' property
  */
-static inline unsigned int of_gpio_count(struct device_node *np)
+static inline int of_gpio_count(struct device_node *np)
 {
 	return of_gpio_named_count(np, "gpios");
 }
-- 
cgit v1.2.3


From 7f07863ec60f7d3dbeec5aff881ea074db3925ed Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Wed, 13 Feb 2013 13:12:35 +0200
Subject: ARM: OMAP: Consolidate OMAP USB-HS platform data (part 1/3)

Let's have a single platform data structure for the OMAP's High-Speed
USB host subsystem instead of having 3 separate ones i.e. one for
board data, one for USB Host (UHH) module and one for USB-TLL module.

This makes the code much simpler and avoids creating multiple copies of
platform data.

Part 1 touches platform headers
Part 2 touches drivers
Part 3 touches platform data

Signed-off-by: Roger Quadros <rogerq@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/platform_data/usb-omap.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h
index ef65b67c56c3..e697c85ad3bc 100644
--- a/include/linux/platform_data/usb-omap.h
+++ b/include/linux/platform_data/usb-omap.h
@@ -55,13 +55,17 @@ struct ohci_hcd_omap_platform_data {
 };
 
 struct usbhs_omap_platform_data {
-	enum usbhs_omap_port_mode		port_mode[OMAP3_HS_USB_PORTS];
+	enum usbhs_omap_port_mode	port_mode[OMAP3_HS_USB_PORTS];
+	int				reset_gpio_port[OMAP3_HS_USB_PORTS];
+	struct regulator		*regulator[OMAP3_HS_USB_PORTS];
 
 	struct ehci_hcd_omap_platform_data	*ehci_data;
 	struct ohci_hcd_omap_platform_data	*ohci_data;
 
 	/* OMAP3 <= ES2.1 have a single ulpi bypass control bit */
-	unsigned				single_ulpi_bypass:1;
+	unsigned single_ulpi_bypass:1;
+	unsigned es2_compatibility:1;
+	unsigned phy_reset:1;
 };
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3


From ccac71a7f063ad31eb99fac37e95b70ff57f1354 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Thu, 8 Nov 2012 19:18:08 +0200
Subject: mfd: omap-usb-host: override number of ports from platform data

Both OMAP4 and 5 exhibit the same revision ID in the REVISION register
but they have different number of ports i.e. 2 and 3 respectively.
So we can't rely on REVISION register for number of ports on OMAP5
and depend on platform data (or device tree) instead.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
---
 drivers/mfd/omap-usb-host.c            | 34 +++++++++++++++++++++-------------
 include/linux/platform_data/usb-omap.h |  1 +
 2 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 26319ca72e38..779588be8ab2 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -493,19 +493,27 @@ static int usbhs_omap_probe(struct platform_device *pdev)
 	 */
 	pm_runtime_put_sync(dev);
 
-	switch (omap->usbhs_rev) {
-	case OMAP_USBHS_REV1:
-		omap->nports = 3;
-		break;
-	case OMAP_USBHS_REV2:
-		omap->nports = 2;
-		break;
-	default:
-		omap->nports = OMAP3_HS_USB_PORTS;
-		dev_dbg(dev,
-		  "USB HOST Rev : 0x%d not recognized, assuming %d ports\n",
-		   omap->usbhs_rev, omap->nports);
-		break;
+	/*
+	 * If platform data contains nports then use that
+	 * else make out number of ports from USBHS revision
+	 */
+	if (pdata->nports) {
+		omap->nports = pdata->nports;
+	} else {
+		switch (omap->usbhs_rev) {
+		case OMAP_USBHS_REV1:
+			omap->nports = 3;
+			break;
+		case OMAP_USBHS_REV2:
+			omap->nports = 2;
+			break;
+		default:
+			omap->nports = OMAP3_HS_USB_PORTS;
+			dev_dbg(dev,
+			 "USB HOST Rev:0x%d not recognized, assuming %d ports\n",
+			 omap->usbhs_rev, omap->nports);
+			break;
+		}
 	}
 
 	for (i = 0; i < omap->nports; i++)
diff --git a/include/linux/platform_data/usb-omap.h b/include/linux/platform_data/usb-omap.h
index e697c85ad3bc..fa579b4c666b 100644
--- a/include/linux/platform_data/usb-omap.h
+++ b/include/linux/platform_data/usb-omap.h
@@ -55,6 +55,7 @@ struct ohci_hcd_omap_platform_data {
 };
 
 struct usbhs_omap_platform_data {
+	int				nports;
 	enum usbhs_omap_port_mode	port_mode[OMAP3_HS_USB_PORTS];
 	int				reset_gpio_port[OMAP3_HS_USB_PORTS];
 	struct regulator		*regulator[OMAP3_HS_USB_PORTS];
-- 
cgit v1.2.3


From 64fd7401c5e4cf7c64452ecd9b700a55a5ebea50 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hp.com>
Date: Mon, 11 Feb 2013 22:33:20 +0000
Subject: ACPI: Remove the use of CONFIG_ACPI_CONTAINER_MODULE

config ACPI_CONTAINER has been changed to bool (y/n), and its
module option is no longer valid.  So, remove the use of
CONFIG_ACPI_CONTAINER_MODULE.

Signed-off-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c6ccd9fa8f08..bcbdd7484e58 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -363,8 +363,7 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 #if defined(CONFIG_ACPI_HOTPLUG_CPU) &&			\
 	(defined(CONFIG_ACPI_HOTPLUG_MEMORY) ||		\
 	 defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) &&	\
-	(defined(CONFIG_ACPI_CONTAINER) ||		\
-	 defined(CONFIG_ACPI_CONTAINER_MODULE))
+	defined(CONFIG_ACPI_CONTAINER)
 #define ACPI_HOTPLUG_OST
 #endif
 
-- 
cgit v1.2.3


From d83f5901bc0cd7131a3b8534169ee889efc4c257 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 30 Jan 2013 19:21:14 -0800
Subject: coda: Restrict coda messages to the initial user namespace

Remove the slight chance that uids and gids in coda messages will be
interpreted in the wrong user namespace.

- Only allow processes in the initial user namespace to open the coda
  character device to communicate with coda filesystems.
- Explicitly convert the uids in the coda header into the initial user
  namespace.
- In coda_vattr_to_attr make kuids and kgids from the initial user
  namespace uids and gids in struct coda_vattr that just came from
  userspace.
- In coda_iattr_to_vattr convert kuids and kgids into uids and gids
  in the intial user namespace and store them in struct coda_vattr for
  sending to coda userspace programs.

Nothing needs to be changed with mounts as coda does not support
being mounted in anything other than the initial user namespace.

Cc: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/coda/coda_linux.c       | 8 ++++----
 fs/coda/psdev.c            | 3 +++
 fs/coda/upcall.c           | 6 +++---
 include/linux/coda_psdev.h | 2 +-
 4 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 854ace712685..2849f41e72a2 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -100,9 +100,9 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
 	if (attr->va_mode != (u_short) -1)
 	        inode->i_mode = attr->va_mode | inode_type;
         if (attr->va_uid != -1) 
-	        inode->i_uid = (uid_t) attr->va_uid;
+	        inode->i_uid = make_kuid(&init_user_ns, (uid_t) attr->va_uid);
         if (attr->va_gid != -1)
-	        inode->i_gid = (gid_t) attr->va_gid;
+	        inode->i_gid = make_kgid(&init_user_ns, (gid_t) attr->va_gid);
 	if (attr->va_nlink != -1)
 		set_nlink(inode, attr->va_nlink);
 	if (attr->va_size != -1)
@@ -171,10 +171,10 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
                 vattr->va_mode = iattr->ia_mode;
 	}
         if ( valid & ATTR_UID ) {
-                vattr->va_uid = (vuid_t) iattr->ia_uid;
+                vattr->va_uid = (vuid_t) from_kuid(&init_user_ns, iattr->ia_uid);
 	}
         if ( valid & ATTR_GID ) {
-                vattr->va_gid = (vgid_t) iattr->ia_gid;
+                vattr->va_gid = (vgid_t) from_kgid(&init_user_ns, iattr->ia_gid);
 	}
         if ( valid & ATTR_SIZE ) {
                 vattr->va_size = iattr->ia_size;
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index dd60f905d4fe..ebc2bae6c289 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -270,6 +270,9 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
 	if (task_active_pid_ns(current) != &init_pid_ns)
 		return -EINVAL;
 
+	if (current_user_ns() != &init_user_ns)
+		return -EINVAL;
+
 	idx = iminor(inode);
 	if (idx < 0 || idx >= MAX_CODADEVS)
 		return -ENODEV;
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 5c6d2cd6ee86..3a731976dc5e 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,7 +52,7 @@ static void *alloc_upcall(int opcode, int size)
         inp->ih.opcode = opcode;
 	inp->ih.pid = task_pid_nr_ns(current, &init_pid_ns);
 	inp->ih.pgid = task_pgrp_nr_ns(current, &init_pid_ns);
-	inp->ih.uid = current_fsuid();
+	inp->ih.uid = from_kuid(&init_user_ns, current_fsuid());
 
 	return (void*)inp;
 }
@@ -157,7 +157,7 @@ int venus_lookup(struct super_block *sb, struct CodaFid *fid,
 }
 
 int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
-		vuid_t uid)
+		kuid_t uid)
 {
 	union inputArgs *inp;
 	union outputArgs *outp;
@@ -166,7 +166,7 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
 	insize = SIZE(release);
 	UPARG(CODA_CLOSE);
 	
-	inp->ih.uid = uid;
+	inp->ih.uid = from_kuid(&init_user_ns, uid);
         inp->coda_close.VFid = *fid;
         inp->coda_close.flags = flags;
 
diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 8031d6eef102..5b8721efa948 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h
@@ -34,7 +34,7 @@ int venus_lookup(struct super_block *sb, struct CodaFid *fid,
 		 const char *name, int length, int *type, 
 		 struct CodaFid *resfid);
 int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
-		vuid_t uid);
+		kuid_t uid);
 int venus_open(struct super_block *sb, struct CodaFid *fid, int flags,
 	       struct file **f);
 int venus_mkdir(struct super_block *sb, struct CodaFid *dirfid, 
-- 
cgit v1.2.3


From 7eaf040b720bc8c0ce5cd49151ca194ca2d56842 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 1 Feb 2013 16:31:17 -0800
Subject: sunrpc: Use kuid_t and kgid_t where appropriate

Convert variables that store uids and gids to be of type
kuid_t and kgid_t instead of type uid_t and gid_t.

Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sunrpc/auth.h    | 7 ++++---
 include/linux/sunrpc/svcauth.h | 4 ++--
 net/sunrpc/auth_gss/auth_gss.c | 8 ++++----
 net/sunrpc/auth_unix.c         | 4 ++--
 net/sunrpc/svcauth_unix.c      | 8 ++++----
 5 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index f25ba922baaf..58fda1c3c783 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -17,14 +17,15 @@
 
 #include <linux/atomic.h>
 #include <linux/rcupdate.h>
+#include <linux/uidgid.h>
 
 /* size of the nodename buffer */
 #define UNX_MAXNODENAME	32
 
 /* Work around the lack of a VFS credential */
 struct auth_cred {
-	uid_t	uid;
-	gid_t	gid;
+	kuid_t	uid;
+	kgid_t	gid;
 	struct group_info *group_info;
 	const char *principal;
 	unsigned char machine_cred : 1;
@@ -48,7 +49,7 @@ struct rpc_cred {
 	unsigned long		cr_flags;	/* various flags */
 	atomic_t		cr_count;	/* ref count */
 
-	uid_t			cr_uid;
+	kuid_t			cr_uid;
 
 	/* per-flavor data */
 };
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index dd74084a9799..ff374ab30839 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -18,8 +18,8 @@
 #include <linux/cred.h>
 
 struct svc_cred {
-	uid_t			cr_uid;
-	gid_t			cr_gid;
+	kuid_t			cr_uid;
+	kgid_t			cr_gid;
 	struct group_info	*cr_group_info;
 	u32			cr_flavor; /* pseudoflavor */
 	char			*cr_principal; /* for gss */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 6e5c824b040b..4daab81ca337 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -256,7 +256,7 @@ err:
 
 struct gss_upcall_msg {
 	atomic_t count;
-	uid_t	uid;
+	kuid_t	uid;
 	struct rpc_pipe_msg msg;
 	struct list_head list;
 	struct gss_auth *auth;
@@ -303,7 +303,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
 }
 
 static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_pipe *pipe, uid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
 {
 	struct gss_upcall_msg *pos;
 	list_for_each_entry(pos, &pipe->in_downcall, list) {
@@ -445,7 +445,7 @@ static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
 
 static struct gss_upcall_msg *
 gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
-		uid_t uid, const char *service_name)
+		kuid_t uid, const char *service_name)
 {
 	struct gss_upcall_msg *gss_msg;
 	int vers;
@@ -475,7 +475,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr
 	struct gss_cred *gss_cred = container_of(cred,
 			struct gss_cred, gc_base);
 	struct gss_upcall_msg *gss_new, *gss_msg;
-	uid_t uid = cred->cr_uid;
+	kuid_t uid = cred->cr_uid;
 
 	gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal);
 	if (IS_ERR(gss_new))
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 2f8627082fa7..372d9156f6e3 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -18,8 +18,8 @@
 
 struct unx_cred {
 	struct rpc_cred		uc_base;
-	gid_t			uc_gid;
-	gid_t			uc_gids[NFS_NGROUPS];
+	kgid_t			uc_gid;
+	kgid_t			uc_gids[NFS_NGROUPS];
 };
 #define uc_uid			uc_base.cr_uid
 
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index c8cb5dc8d871..caae662f9fa3 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -415,7 +415,7 @@ svcauth_unix_info_release(struct svc_xprt *xpt)
 
 struct unix_gid {
 	struct cache_head	h;
-	uid_t			uid;
+	kuid_t			uid;
 	struct group_info	*gi;
 };
 
@@ -475,7 +475,7 @@ static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h)
 	return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request);
 }
 
-static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid);
+static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid);
 
 static int unix_gid_parse(struct cache_detail *cd,
 			char *mesg, int mlen)
@@ -615,7 +615,7 @@ void unix_gid_cache_destroy(struct net *net)
 	cache_destroy_net(cd, net);
 }
 
-static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid)
+static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid)
 {
 	struct unix_gid ug;
 	struct cache_head *ch;
@@ -628,7 +628,7 @@ static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid)
 		return NULL;
 }
 
-static struct group_info *unix_gid_find(uid_t uid, struct svc_rqst *rqstp)
+static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
 {
 	struct unix_gid *ug;
 	struct group_info *gi;
-- 
cgit v1.2.3


From 54f834cd5501fb5fc801e4719a3ad0c894a3af2c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 1 Feb 2013 14:16:29 -0800
Subject: nfs: Convert struct nfs_fattr to Use kuid_t and kgid_t

Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/nfs_xdr.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 29adb12c7ecf..13441ddac33d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -48,8 +48,8 @@ struct nfs_fattr {
 	unsigned int		valid;		/* which fields are valid */
 	umode_t			mode;
 	__u32			nlink;
-	__u32			uid;
-	__u32			gid;
+	kuid_t			uid;
+	kgid_t			gid;
 	dev_t			rdev;
 	__u64			size;
 	union {
-- 
cgit v1.2.3


From 9f309c86cf343c59c79d25d9bde5d4a895d2e81f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 1 Feb 2013 03:21:47 -0800
Subject: nfs: Convert idmap to use kuids and kgids

Convert nfs_map_name_to_uid to return a kuid_t value.
Convert nfs_map_name_to_gid to return a kgid_t value.
Convert nfs_map_uid_to_name to take a kuid_t paramater.
Convert nfs_map_gid_to_name to take a kgid_t paramater.

Tweak nfs_fattr_map_owner_to_name to use a kuid_t intermediate value.
Tweak nfs_fattr_map_group_to_name to use a kgid_t intermediate value.

Which makes these functions properly handle kuids and kgids, including
erroring of the generated kuid or kgid is invalid.

Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/nfs/idmap.c            | 50 ++++++++++++++++++++++++++++++++---------------
 include/linux/nfs_idmap.h |  9 +++++----
 2 files changed, 39 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index f77017c4e28b..b9623d19d599 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -97,7 +97,7 @@ static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
 static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
 {
 	struct nfs4_string *owner = fattr->owner_name;
-	__u32 uid;
+	kuid_t uid;
 
 	if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
 		return false;
@@ -111,7 +111,7 @@ static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr
 static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
 {
 	struct nfs4_string *group = fattr->group_name;
-	__u32 gid;
+	kgid_t gid;
 
 	if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
 		return false;
@@ -837,43 +837,61 @@ idmap_release_pipe(struct inode *inode)
 	nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
 }
 
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
 {
 	struct idmap *idmap = server->nfs_client->cl_idmap;
+	__u32 id = -1;
+	int ret = 0;
 
-	if (nfs_map_string_to_numeric(name, namelen, uid))
-		return 0;
-	return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap);
+	if (!nfs_map_string_to_numeric(name, namelen, &id))
+		ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
+	if (ret == 0) {
+		*uid = make_kuid(&init_user_ns, id);
+		if (!uid_valid(*uid))
+			ret = -ERANGE;
+	}
+	return ret;
 }
 
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
 {
 	struct idmap *idmap = server->nfs_client->cl_idmap;
+	__u32 id = -1;
+	int ret = 0;
 
-	if (nfs_map_string_to_numeric(name, namelen, gid))
-		return 0;
-	return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap);
+	if (!nfs_map_string_to_numeric(name, namelen, &id))
+		ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
+	if (ret == 0) {
+		*gid = make_kgid(&init_user_ns, id);
+		if (!gid_valid(*gid))
+			ret = -ERANGE;
+	}
+	return ret;
 }
 
-int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
+int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
 {
 	struct idmap *idmap = server->nfs_client->cl_idmap;
 	int ret = -EINVAL;
+	__u32 id;
 
+	id = from_kuid(&init_user_ns, uid);
 	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-		ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap);
+		ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
 	if (ret < 0)
-		ret = nfs_map_numeric_to_string(uid, buf, buflen);
+		ret = nfs_map_numeric_to_string(id, buf, buflen);
 	return ret;
 }
-int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
+int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
 {
 	struct idmap *idmap = server->nfs_client->cl_idmap;
 	int ret = -EINVAL;
+	__u32 id;
 
+	id = from_kgid(&init_user_ns, gid);
 	if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-		ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap);
+		ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
 	if (ret < 0)
-		ret = nfs_map_numeric_to_string(gid, buf, buflen);
+		ret = nfs_map_numeric_to_string(id, buf, buflen);
 	return ret;
 }
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index 2dcef3ab58b6..0f4b79da6584 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -36,6 +36,7 @@
 #ifndef NFS_IDMAP_H
 #define NFS_IDMAP_H
 
+#include <linux/uidgid.h>
 #include <uapi/linux/nfs_idmap.h>
 
 
@@ -67,10 +68,10 @@ void nfs_fattr_init_names(struct nfs_fattr *fattr,
 void nfs_fattr_free_names(struct nfs_fattr *);
 void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
 
-int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, __u32 *);
-int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, __u32 *);
-int nfs_map_uid_to_name(const struct nfs_server *, __u32, char *, size_t);
-int nfs_map_gid_to_group(const struct nfs_server *, __u32, char *, size_t);
+int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *);
+int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *);
+int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
+int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
 
 extern unsigned int nfs_idmap_cache_timeout;
 #endif /* NFS_IDMAP_H */
-- 
cgit v1.2.3


From ab8e4aee0a3f73d1b12e6d63b42075f0586ad4fd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 2 Feb 2013 05:18:08 -0800
Subject: nfsd: Handle kuids and kgids in the nfs4acl to posix_acl conversion

In struct nfs4_ace remove the member who and replace it with an
anonymous union holding who_uid and who_gid.  Allowing typesafe
storage uids and gids.

Add a helper pace_gt for sorting posix_acl_entries.

In struct posix_user_ace_state to replace uid with a union
of kuid_t uid and kgid_t gid.

Remove all initializations of the deprecated posic_acl_entry
e_id field.  Which is not present when user namespaces are enabled.

Split find_uid into two functions find_uid and find_gid that work
in a typesafe manner.

In nfs4xdr update nfsd4_encode_fattr to deal with the changes
in struct nfs4_ace.

Rewrite nfsd4_encode_name to take a kuid_t and a kgid_t instead
of a generic id and flag if it is a group or a uid.  Replace
the group flag with a test for a valid gid.

Modify nfsd4_encode_user to take a kuid_t and call the modifed
nfsd4_encode_name.

Modify nfsd4_encode_group to take a kgid_t and call the modified
nfsd4_encode_name.

Modify nfsd4_encode_aclname to take an ace instead of taking the
fields of an ace broken out.  This allows it to detect if the ace is
for a user or a group and to pass the appropriate value while still
being typesafe.

Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/nfsd/nfs4acl.c    | 63 ++++++++++++++++++++++++++++++++++++++--------------
 fs/nfsd/nfs4xdr.c    | 41 +++++++++++++++++++++-------------
 include/linux/nfs4.h |  6 ++++-
 3 files changed, 76 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 9c51aff02ae2..8a50b3c18093 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -264,7 +264,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
 			ace->flag = eflag;
 			ace->access_mask = deny_mask_from_posix(deny, flags);
 			ace->whotype = NFS4_ACL_WHO_NAMED;
-			ace->who = pa->e_id;
+			ace->who_uid = pa->e_uid;
 			ace++;
 			acl->naces++;
 		}
@@ -273,7 +273,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
 		ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
 						   flags);
 		ace->whotype = NFS4_ACL_WHO_NAMED;
-		ace->who = pa->e_id;
+		ace->who_uid = pa->e_uid;
 		ace++;
 		acl->naces++;
 		pa++;
@@ -300,7 +300,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
 		ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
 						   flags);
 		ace->whotype = NFS4_ACL_WHO_NAMED;
-		ace->who = pa->e_id;
+		ace->who_gid = pa->e_gid;
 		ace++;
 		acl->naces++;
 		pa++;
@@ -329,7 +329,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
 			ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
 			ace->access_mask = deny_mask_from_posix(deny, flags);
 			ace->whotype = NFS4_ACL_WHO_NAMED;
-			ace->who = pa->e_id;
+			ace->who_gid = pa->e_gid;
 			ace++;
 			acl->naces++;
 		}
@@ -345,6 +345,18 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
 	acl->naces++;
 }
 
+static bool
+pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2)
+{
+	if (pace1->e_tag != pace2->e_tag)
+		return pace1->e_tag > pace2->e_tag;
+	if (pace1->e_tag == ACL_USER)
+		return uid_gt(pace1->e_uid, pace2->e_uid);
+	if (pace1->e_tag == ACL_GROUP)
+		return gid_gt(pace1->e_gid, pace2->e_gid);
+	return false;
+}
+
 static void
 sort_pacl_range(struct posix_acl *pacl, int start, int end) {
 	int sorted = 0, i;
@@ -355,8 +367,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) {
 	while (!sorted) {
 		sorted = 1;
 		for (i = start; i < end; i++) {
-			if (pacl->a_entries[i].e_id
-					> pacl->a_entries[i+1].e_id) {
+			if (pace_gt(&pacl->a_entries[i],
+				    &pacl->a_entries[i+1])) {
 				sorted = 0;
 				tmp = pacl->a_entries[i];
 				pacl->a_entries[i] = pacl->a_entries[i+1];
@@ -398,7 +410,10 @@ struct posix_ace_state {
 };
 
 struct posix_user_ace_state {
-	uid_t uid;
+	union {
+		kuid_t uid;
+		kgid_t gid;
+	};
 	struct posix_ace_state perms;
 };
 
@@ -521,7 +536,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 	if (error)
 		goto out_err;
 	low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
-	pace->e_id = ACL_UNDEFINED_ID;
 
 	for (i=0; i < state->users->n; i++) {
 		pace++;
@@ -531,7 +545,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 			goto out_err;
 		low_mode_from_nfs4(state->users->aces[i].perms.allow,
 					&pace->e_perm, flags);
-		pace->e_id = state->users->aces[i].uid;
+		pace->e_uid = state->users->aces[i].uid;
 		add_to_mask(state, &state->users->aces[i].perms);
 	}
 
@@ -541,7 +555,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 	if (error)
 		goto out_err;
 	low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
-	pace->e_id = ACL_UNDEFINED_ID;
 	add_to_mask(state, &state->group);
 
 	for (i=0; i < state->groups->n; i++) {
@@ -552,14 +565,13 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 			goto out_err;
 		low_mode_from_nfs4(state->groups->aces[i].perms.allow,
 					&pace->e_perm, flags);
-		pace->e_id = state->groups->aces[i].uid;
+		pace->e_gid = state->groups->aces[i].gid;
 		add_to_mask(state, &state->groups->aces[i].perms);
 	}
 
 	pace++;
 	pace->e_tag = ACL_MASK;
 	low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
-	pace->e_id = ACL_UNDEFINED_ID;
 
 	pace++;
 	pace->e_tag = ACL_OTHER;
@@ -567,7 +579,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 	if (error)
 		goto out_err;
 	low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
-	pace->e_id = ACL_UNDEFINED_ID;
 
 	return pacl;
 out_err:
@@ -587,12 +598,13 @@ static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
 	astate->deny |= mask & ~astate->allow;
 }
 
-static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid)
+static int find_uid(struct posix_acl_state *state, kuid_t uid)
 {
+	struct posix_ace_state_array *a = state->users;
 	int i;
 
 	for (i = 0; i < a->n; i++)
-		if (a->aces[i].uid == uid)
+		if (uid_eq(a->aces[i].uid, uid))
 			return i;
 	/* Not found: */
 	a->n++;
@@ -603,6 +615,23 @@ static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array
 	return i;
 }
 
+static int find_gid(struct posix_acl_state *state, kgid_t gid)
+{
+	struct posix_ace_state_array *a = state->groups;
+	int i;
+
+	for (i = 0; i < a->n; i++)
+		if (gid_eq(a->aces[i].gid, gid))
+			return i;
+	/* Not found: */
+	a->n++;
+	a->aces[i].gid = gid;
+	a->aces[i].perms.allow = state->everyone.allow;
+	a->aces[i].perms.deny  = state->everyone.deny;
+
+	return i;
+}
+
 static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
 {
 	int i;
@@ -636,7 +665,7 @@ static void process_one_v4_ace(struct posix_acl_state *state,
 		}
 		break;
 	case ACL_USER:
-		i = find_uid(state, state->users, ace->who);
+		i = find_uid(state, ace->who_uid);
 		if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
 			allow_bits(&state->users->aces[i].perms, mask);
 		} else {
@@ -658,7 +687,7 @@ static void process_one_v4_ace(struct posix_acl_state *state,
 		}
 		break;
 	case ACL_GROUP:
-		i = find_uid(state, state->groups, ace->who);
+		i = find_gid(state, ace->who_gid);
 		if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
 			allow_bits(&state->groups->aces[i].perms, mask);
 		} else {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0dc11586682f..3812b06d24b1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -293,13 +293,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
 			ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
 			status = nfs_ok;
 			if (ace->whotype != NFS4_ACL_WHO_NAMED)
-				ace->who = 0;
+				;
 			else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
 				status = nfsd_map_name_to_gid(argp->rqstp,
-						buf, dummy32, &ace->who);
+						buf, dummy32, &ace->who_gid);
 			else
 				status = nfsd_map_name_to_uid(argp->rqstp,
-						buf, dummy32, &ace->who);
+						buf, dummy32, &ace->who_uid);
 			if (status)
 				return status;
 		}
@@ -1926,7 +1926,7 @@ static u32 nfs4_file_type(umode_t mode)
 }
 
 static __be32
-nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, kuid_t uid, kgid_t gid,
 			__be32 **p, int *buflen)
 {
 	int status;
@@ -1935,10 +1935,10 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 		return nfserr_resource;
 	if (whotype != NFS4_ACL_WHO_NAMED)
 		status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
-	else if (group)
-		status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1));
+	else if (gid_valid(gid))
+		status = nfsd_map_gid_to_name(rqstp, gid, (u8 *)(*p + 1));
 	else
-		status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1));
+		status = nfsd_map_uid_to_name(rqstp, uid, (u8 *)(*p + 1));
 	if (status < 0)
 		return nfserrno(status);
 	*p = xdr_encode_opaque(*p, NULL, status);
@@ -1948,22 +1948,33 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 }
 
 static inline __be32
-nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen)
+nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t user, __be32 **p, int *buflen)
 {
-	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
+	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, user, INVALID_GID,
+				 p, buflen);
 }
 
 static inline __be32
-nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen)
+nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t group, __be32 **p, int *buflen)
 {
-	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
+	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, INVALID_UID, group,
+				 p, buflen);
 }
 
 static inline __be32
-nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
 		__be32 **p, int *buflen)
 {
-	return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
+	kuid_t uid = INVALID_UID;
+	kgid_t gid = INVALID_GID;
+
+	if (ace->whotype == NFS4_ACL_WHO_NAMED) {
+		if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
+			gid = ace->who_gid;
+		else
+			uid = ace->who_uid;
+	}
+	return nfsd4_encode_name(rqstp, ace->whotype, uid, gid, p, buflen);
 }
 
 #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -2224,9 +2235,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			WRITE32(ace->type);
 			WRITE32(ace->flag);
 			WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
-			status = nfsd4_encode_aclname(rqstp, ace->whotype,
-				ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP,
-				&p, &buflen);
+			status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen);
 			if (status == nfserr_resource)
 				goto out_resource;
 			if (status)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index e111fa419a4e..7b8fc73810ad 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -13,6 +13,7 @@
 #define _LINUX_NFS4_H
 
 #include <linux/list.h>
+#include <linux/uidgid.h>
 #include <uapi/linux/nfs4.h>
 
 struct nfs4_ace {
@@ -20,7 +21,10 @@ struct nfs4_ace {
 	uint32_t	flag;
 	uint32_t	access_mask;
 	int		whotype;
-	uid_t		who;
+	union {
+		kuid_t	who_uid;
+		kgid_t	who_gid;
+	};
 };
 
 struct nfs4_acl {
-- 
cgit v1.2.3


From 4c1e1b34d5c800ad3ac9a7e2805b0bea70ad2278 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 2 Feb 2013 06:42:53 -0800
Subject: nfsd: Store ex_anon_uid and ex_anon_gid as kuids and kgids

Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/nfsd/auth.c              |  2 +-
 fs/nfsd/export.c            | 22 ++++++++++++++--------
 include/linux/nfsd/export.h |  4 ++--
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 34a10d78b839..4d6642b38eae 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -58,7 +58,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 
 		for (i = 0; i < rqgi->ngroups; i++) {
 			if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i)))
-				GROUP_AT(gi, i) = make_kgid(&init_user_ns, exp->ex_anon_gid);
+				GROUP_AT(gi, i) = exp->ex_anon_gid;
 			else
 				GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
 		}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index a3946cf13fc8..5681c5906f08 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -544,13 +544,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 		err = get_int(&mesg, &an_int);
 		if (err)
 			goto out3;
-		exp.ex_anon_uid= an_int;
+		exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
+		if (!uid_valid(exp.ex_anon_uid))
+			goto out3;
 
 		/* anon gid */
 		err = get_int(&mesg, &an_int);
 		if (err)
 			goto out3;
-		exp.ex_anon_gid= an_int;
+		exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
+		if (!gid_valid(exp.ex_anon_gid))
+			goto out3;
 
 		/* fsid */
 		err = get_int(&mesg, &an_int);
@@ -613,7 +617,7 @@ out:
 }
 
 static void exp_flags(struct seq_file *m, int flag, int fsid,
-		uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs);
+		kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs);
 static void show_secinfo(struct seq_file *m, struct svc_export *exp);
 
 static int svc_export_show(struct seq_file *m,
@@ -1179,15 +1183,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp)
 }
 
 static void exp_flags(struct seq_file *m, int flag, int fsid,
-		uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc)
+		kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc)
 {
 	show_expflags(m, flag, NFSEXP_ALLFLAGS);
 	if (flag & NFSEXP_FSID)
 		seq_printf(m, ",fsid=%d", fsid);
-	if (anonu != (uid_t)-2 && anonu != (0x10000-2))
-		seq_printf(m, ",anonuid=%u", anonu);
-	if (anong != (gid_t)-2 && anong != (0x10000-2))
-		seq_printf(m, ",anongid=%u", anong);
+	if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) &&
+	    !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2)))
+		seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu));
+	if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) &&
+	    !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2)))
+		seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong));
 	if (fsloc && fsloc->locations_count > 0) {
 		char *loctype = (fsloc->migrated) ? "refer" : "replicas";
 		int i;
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 24c139288db4..7898c997dfea 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -49,8 +49,8 @@ struct svc_export {
 	struct auth_domain *	ex_client;
 	int			ex_flags;
 	struct path		ex_path;
-	uid_t			ex_anon_uid;
-	gid_t			ex_anon_gid;
+	kuid_t			ex_anon_uid;
+	kgid_t			ex_anon_gid;
 	int			ex_fsid;
 	unsigned char *		ex_uuid; /* 16 byte fsid */
 	struct nfsd4_fs_locations ex_fslocs;
-- 
cgit v1.2.3


From fd116066d966549211b41d929feea5474d57461b Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 13 Feb 2013 18:35:00 +0900
Subject: extcon: gpio: Rename filename of extcon-gpio.c according to kernel
 naming style

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/extcon/extcon-gpio.c       |  2 +-
 include/linux/extcon/extcon-gpio.h | 52 ++++++++++++++++++++++++++++++++++++++
 include/linux/extcon/extcon_gpio.h | 52 --------------------------------------
 3 files changed, 53 insertions(+), 53 deletions(-)
 create mode 100644 include/linux/extcon/extcon-gpio.h
 delete mode 100644 include/linux/extcon/extcon_gpio.h

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index 1b14bfcdc176..02bec32adde4 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -29,7 +29,7 @@
 #include <linux/workqueue.h>
 #include <linux/gpio.h>
 #include <linux/extcon.h>
-#include <linux/extcon/extcon_gpio.h>
+#include <linux/extcon/extcon-gpio.h>
 
 struct gpio_extcon_data {
 	struct extcon_dev edev;
diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h
new file mode 100644
index 000000000000..2d8307f7d67d
--- /dev/null
+++ b/include/linux/extcon/extcon-gpio.h
@@ -0,0 +1,52 @@
+/*
+ *  External connector (extcon) class generic GPIO driver
+ *
+ * Copyright (C) 2012 Samsung Electronics
+ * Author: MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * based on switch class driver
+ * Copyright (C) 2008 Google, Inc.
+ * Author: Mike Lockwood <lockwood@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+*/
+#ifndef __EXTCON_GPIO_H__
+#define __EXTCON_GPIO_H__ __FILE__
+
+#include <linux/extcon.h>
+
+/**
+ * struct gpio_extcon_platform_data - A simple GPIO-controlled extcon device.
+ * @name	The name of this GPIO extcon device.
+ * @gpio	Corresponding GPIO.
+ * @debounce	Debounce time for GPIO IRQ in ms.
+ * @irq_flags	IRQ Flags (e.g., IRQF_TRIGGER_LOW).
+ * @state_on	print_state is overriden with state_on if attached. If Null,
+ *		default method of extcon class is used.
+ * @state_off	print_state is overriden with state_on if detached. If Null,
+ *		default method of extcon class is used.
+ *
+ * Note that in order for state_on or state_off to be valid, both state_on
+ * and state_off should be not NULL. If at least one of them is NULL,
+ * the print_state is not overriden.
+ */
+struct gpio_extcon_platform_data {
+	const char *name;
+	unsigned gpio;
+	unsigned long debounce;
+	unsigned long irq_flags;
+
+	/* if NULL, "0" or "1" will be printed */
+	const char *state_on;
+	const char *state_off;
+};
+
+#endif /* __EXTCON_GPIO_H__ */
diff --git a/include/linux/extcon/extcon_gpio.h b/include/linux/extcon/extcon_gpio.h
deleted file mode 100644
index 2d8307f7d67d..000000000000
--- a/include/linux/extcon/extcon_gpio.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- *  External connector (extcon) class generic GPIO driver
- *
- * Copyright (C) 2012 Samsung Electronics
- * Author: MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * based on switch class driver
- * Copyright (C) 2008 Google, Inc.
- * Author: Mike Lockwood <lockwood@android.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
-*/
-#ifndef __EXTCON_GPIO_H__
-#define __EXTCON_GPIO_H__ __FILE__
-
-#include <linux/extcon.h>
-
-/**
- * struct gpio_extcon_platform_data - A simple GPIO-controlled extcon device.
- * @name	The name of this GPIO extcon device.
- * @gpio	Corresponding GPIO.
- * @debounce	Debounce time for GPIO IRQ in ms.
- * @irq_flags	IRQ Flags (e.g., IRQF_TRIGGER_LOW).
- * @state_on	print_state is overriden with state_on if attached. If Null,
- *		default method of extcon class is used.
- * @state_off	print_state is overriden with state_on if detached. If Null,
- *		default method of extcon class is used.
- *
- * Note that in order for state_on or state_off to be valid, both state_on
- * and state_off should be not NULL. If at least one of them is NULL,
- * the print_state is not overriden.
- */
-struct gpio_extcon_platform_data {
-	const char *name;
-	unsigned gpio;
-	unsigned long debounce;
-	unsigned long irq_flags;
-
-	/* if NULL, "0" or "1" will be printed */
-	const char *state_on;
-	const char *state_off;
-};
-
-#endif /* __EXTCON_GPIO_H__ */
-- 
cgit v1.2.3


From 513b032c98b4b9414aa4e9b4a315cb1bf0380101 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Sun, 10 Feb 2013 04:08:32 -0500
Subject: pps: Add pps_lookup_dev() function

The PPS serial line discipline wants to attach a PPS device to a tty
without changing the tty code to add a struct pps_device * pointer.

Since the number of PPS devices in a typical system is generally very low
(n=1 is by far the most common), it's practical to search the entire list
of allocated pps devices.  (We capture the timestamp before the lookup,
so the timing isn't affected.)

It is a bit ugly that this function, which is part of the in-kernel
PPS API, has to be in pps.c as opposed to kapi,c, but that's not
something that affects users.

Signed-off-by: George Spelvin <linux@horizon.com>
Acked-by: Rodolfo Giometti <giometti@enneenne.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pps/pps.c          | 33 +++++++++++++++++++++++++++++++++
 include/linux/pps_kernel.h | 17 ++++++++++++++---
 2 files changed, 47 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
index 2420d5af0583..a70e384262e5 100644
--- a/drivers/pps/pps.c
+++ b/drivers/pps/pps.c
@@ -352,10 +352,43 @@ free_idr:
 
 void pps_unregister_cdev(struct pps_device *pps)
 {
+	pps->lookup_cookie = NULL;
 	device_destroy(pps_class, pps->dev->devt);
 	cdev_del(&pps->cdev);
 }
 
+/*
+ * Look up a pps device by magic cookie.
+ * The cookie is usually a pointer to some enclosing device, but this
+ * code doesn't care; you should never be dereferencing it.
+ *
+ * This is a bit of a kludge that is currently used only by the PPS
+ * serial line discipline.  It may need to be tweaked when a second user
+ * is found.
+ *
+ * There is no function interface for setting the lookup_cookie field.
+ * It's initialized to NULL when the pps device is created, and if a
+ * client wants to use it, just fill it in afterward.
+ *
+ * The cookie is automatically set to NULL in pps_unregister_source()
+ * so that it will not be used again, even if the pps device cannot
+ * be removed from the idr due to pending references holding the minor
+ * number in use.
+ */
+struct pps_device *pps_lookup_dev(void const *cookie)
+{
+	struct pps_device *pps;
+	unsigned id;
+
+	rcu_read_lock();
+	idr_for_each_entry(&pps_idr, pps, id)
+		if (cookie == pps->lookup_cookie)
+			break;
+	rcu_read_unlock();
+	return pps;
+}
+EXPORT_SYMBOL(pps_lookup_dev);
+
 /*
  * Module stuff
  */
diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index 0cc45ae1afd5..7db3eb93a079 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h
@@ -43,7 +43,7 @@ struct pps_source_info {
 			int event, void *data);	/* PPS echo function */
 
 	struct module *owner;
-	struct device *dev;
+	struct device *dev;		/* Parent device for device_create */
 };
 
 struct pps_event_time {
@@ -69,6 +69,7 @@ struct pps_device {
 	wait_queue_head_t queue;		/* PPS event queue */
 
 	unsigned int id;			/* PPS source unique ID */
+	void const *lookup_cookie;		/* pps_lookup_dev only */
 	struct cdev cdev;
 	struct device *dev;
 	struct fasync_struct *async_queue;	/* fasync method */
@@ -81,6 +82,16 @@ struct pps_device {
 
 extern struct device_attribute pps_attrs[];
 
+/*
+ * Internal functions.
+ *
+ * These are not actually part of the exported API, but this is a
+ * convenient header file to put them in.
+ */
+
+extern int pps_register_cdev(struct pps_device *pps);
+extern void pps_unregister_cdev(struct pps_device *pps);
+
 /*
  * Exported functions
  */
@@ -88,10 +99,10 @@ extern struct device_attribute pps_attrs[];
 extern struct pps_device *pps_register_source(
 		struct pps_source_info *info, int default_params);
 extern void pps_unregister_source(struct pps_device *pps);
-extern int pps_register_cdev(struct pps_device *pps);
-extern void pps_unregister_cdev(struct pps_device *pps);
 extern void pps_event(struct pps_device *pps,
 		struct pps_event_time *ts, int event, void *data);
+/* Look up a pps device by magic cookie */
+struct pps_device *pps_lookup_dev(void const *cookie);
 
 static inline void timespec_to_pps_ktime(struct pps_ktime *kt,
 		struct timespec ts)
-- 
cgit v1.2.3


From 593fb1ae457aab28b392ac114f6e3358788da985 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Tue, 12 Feb 2013 02:00:43 -0500
Subject: pps: Move timestamp read into PPS code proper

The PPS (Pulse-Per-Second) line discipline has developed a number of
unhealthy attachments to core tty data and functions, ultimately leading
to its breakage.

The previous patches fixed the crashing.  This one reduces coupling further
by eliminating the timestamp parameter from the dcd_change ldisc method.
This reduces header file linkage and makes the extension more generic,
and the timestamp read is delayed only slightly, from just before the
ldisc->ops->dcd_change method call to just after.

Fix attendant build breakage in
    drivers/tty/n_tty.c
    drivers/tty/tty_buffer.c
    drivers/staging/speakup/selection.c
    drivers/staging/dgrp/dgrp_*.c

Cc: William Hubbs <w.d.hubbs@gmail.com>
Cc: Chris Brannon <chris@the-brannons.com>
Cc: Kirk Reiser <kirk@braille.uwo.ca>
Cc: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: George Spelvin <linux@horizon.com>
Acked-by: Rodolfo Giometti <giometti@enneenne.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pps/clients/pps-ldisc.c     | 11 +++++++----
 drivers/staging/dgrp/dgrp_net_ops.c |  1 +
 drivers/staging/dgrp/dgrp_tty.c     |  1 +
 drivers/staging/speakup/selection.c |  1 +
 drivers/tty/n_tty.c                 |  3 ++-
 drivers/tty/serial/serial_core.c    |  5 +----
 drivers/tty/tty_buffer.c            |  1 +
 include/linux/serial_core.h         |  1 -
 include/linux/tty_ldisc.h           | 11 ++++-------
 9 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pps/clients/pps-ldisc.c b/drivers/pps/clients/pps-ldisc.c
index a94f73e1480d..73bd3bb4d93b 100644
--- a/drivers/pps/clients/pps-ldisc.c
+++ b/drivers/pps/clients/pps-ldisc.c
@@ -29,11 +29,14 @@
 
 #define PPS_TTY_MAGIC		0x0001
 
-static void pps_tty_dcd_change(struct tty_struct *tty, unsigned int status,
-				struct pps_event_time *ts)
+static void pps_tty_dcd_change(struct tty_struct *tty, unsigned int status)
 {
-	struct pps_device *pps = pps_lookup_dev(tty);
+	struct pps_device *pps;
+	struct pps_event_time ts;
+
+	pps_get_ts(&ts);
 
+	pps = pps_lookup_dev(tty);
 	/*
 	 * This should never fail, but the ldisc locking is very
 	 * convoluted, so don't crash just in case.
@@ -42,7 +45,7 @@ static void pps_tty_dcd_change(struct tty_struct *tty, unsigned int status,
 		return;
 
 	/* Now do the PPS event report */
-	pps_event(pps, ts, status ? PPS_CAPTUREASSERT :
+	pps_event(pps, &ts, status ? PPS_CAPTUREASSERT :
 			PPS_CAPTURECLEAR, NULL);
 
 	dev_dbg(pps->dev, "PPS %s at %lu\n",
diff --git a/drivers/staging/dgrp/dgrp_net_ops.c b/drivers/staging/dgrp/dgrp_net_ops.c
index 4c7abfabf197..e6018823b9de 100644
--- a/drivers/staging/dgrp/dgrp_net_ops.c
+++ b/drivers/staging/dgrp/dgrp_net_ops.c
@@ -37,6 +37,7 @@
 #include <linux/proc_fs.h>
 #include <linux/types.h>
 #include <linux/string.h>
+#include <linux/device.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/spinlock.h>
diff --git a/drivers/staging/dgrp/dgrp_tty.c b/drivers/staging/dgrp/dgrp_tty.c
index 51d3ed3dca27..654f6010b473 100644
--- a/drivers/staging/dgrp/dgrp_tty.c
+++ b/drivers/staging/dgrp/dgrp_tty.c
@@ -39,6 +39,7 @@
 #include <linux/slab.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
+#include <linux/device.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
 
diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c
index 0612df06a4bf..2aa22379fda0 100644
--- a/drivers/staging/speakup/selection.c
+++ b/drivers/staging/speakup/selection.c
@@ -2,6 +2,7 @@
 #include <linux/consolemap.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/device.h> /* for dev_warn */
 #include <linux/selection.h>
 
 #include "speakup.h"
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 095072899b5e..05e72bea9b07 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -49,6 +49,7 @@
 #include <linux/file.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/ratelimit.h>
 
 
 /* number of characters left in xmit buffer before select has we have room */
@@ -2188,7 +2189,7 @@ struct tty_ldisc_ops tty_ldisc_N_TTY = {
  *	n_tty_inherit_ops	-	inherit N_TTY methods
  *	@ops: struct tty_ldisc_ops where to save N_TTY methods
  *
- *	Used by a generic struct tty_ldisc_ops to easily inherit N_TTY
+ *	Enables a 'subclass' line discipline to 'inherit' N_TTY
  *	methods.
  */
 
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index ca98a3f65fe1..765be520cd2e 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2726,13 +2726,12 @@ void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 	struct uart_state *state = uport->state;
 	struct tty_port *port = &state->port;
 	struct tty_ldisc *ld = NULL;
-	struct pps_event_time ts;
 	struct tty_struct *tty = port->tty;
 
 	if (tty)
 	        ld = tty_ldisc_ref(tty);
 	if (ld && ld->ops->dcd_change)
-		pps_get_ts(&ts);
+		ld->ops->dcd_change(tty, status);
 
 	uport->icount.dcd++;
 #ifdef CONFIG_HARD_PPS
@@ -2747,8 +2746,6 @@ void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 			tty_hangup(tty);
 	}
 
-	if (ld && ld->ops->dcd_change)
-		ld->ops->dcd_change(tty, status, &ts);
 	if (ld)
 		tty_ldisc_deref(ld);
 }
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 61ec4ddf47e0..bb119934e76c 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -16,6 +16,7 @@
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/ratelimit.h>
 
 /**
  *	tty_buffer_free_all		-	free buffers used by a tty
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index d97142159e0f..87d4bbc773fc 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -29,7 +29,6 @@
 #include <linux/tty.h>
 #include <linux/mutex.h>
 #include <linux/sysrq.h>
-#include <linux/pps_kernel.h>
 #include <uapi/linux/serial_core.h>
 
 struct uart_port;
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index fb79dd8d1537..455a0d7bf220 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -100,16 +100,14 @@
  *	seek to perform this action quickly but should wait until
  *	any pending driver I/O is completed.
  *
- * void (*dcd_change)(struct tty_struct *tty, unsigned int status,
- *			struct pps_event_time *ts)
+ * void (*dcd_change)(struct tty_struct *tty, unsigned int status)
  *
- *	Tells the discipline that the DCD pin has changed its status and
- *	the relative timestamp. Pointer ts cannot be NULL.
+ *	Tells the discipline that the DCD pin has changed its status.
+ *	Used exclusively by the N_PPS (Pulse-Per-Second) line discipline.
  */
 
 #include <linux/fs.h>
 #include <linux/wait.h>
-#include <linux/pps_kernel.h>
 #include <linux/wait.h>
 
 struct tty_ldisc_ops {
@@ -144,8 +142,7 @@ struct tty_ldisc_ops {
 	void	(*receive_buf)(struct tty_struct *, const unsigned char *cp,
 			       char *fp, int count);
 	void	(*write_wakeup)(struct tty_struct *);
-	void	(*dcd_change)(struct tty_struct *, unsigned int,
-				struct pps_event_time *);
+	void	(*dcd_change)(struct tty_struct *, unsigned int);
 
 	struct  module *owner;
 	
-- 
cgit v1.2.3


From ceaa1fef65a7c2e017b260b879b310dd24888083 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Mon, 11 Feb 2013 05:50:17 +0000
Subject: tcp: adding a per-socket timestamp offset

This functionality is used for restoring tcp sockets. A tcp timestamp
depends on how long a system has been running, so it's differ for each
host. The solution is to set a per-socket offset.

A per-socket offset for a TIME_WAIT socket is inherited from a proper
tcp socket.

tcp_request_sock doesn't have a timestamp offset, because the repair
mode for them are not implemented.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 3 +++
 net/ipv4/tcp.c           | 2 ++
 net/ipv4/tcp_minisocks.c | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 6d0d46138ae8..f28408c07dc2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -162,6 +162,8 @@ struct tcp_sock {
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
 
+	u32	tsoffset;	/* timestamp offset */
+
 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
 	unsigned long	tsq_flags;
 
@@ -353,6 +355,7 @@ struct tcp_timewait_sock {
 	u32			  tw_rcv_nxt;
 	u32			  tw_snd_nxt;
 	u32			  tw_rcv_wnd;
+	u32			  tw_ts_offset;
 	u32			  tw_ts_recent;
 	long			  tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2c7e5963c2ea..8a90bda96038 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -400,6 +400,8 @@ void tcp_init_sock(struct sock *sk)
 	tcp_enable_early_retrans(tp);
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
 
+	tp->tsoffset = 0;
+
 	sk->sk_state = TCP_CLOSE;
 
 	sk->sk_write_space = sk_stream_write_space;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f0409287b5f4..4dfc99f54f67 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -288,6 +288,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_rcv_wnd	= tcp_receive_window(tp);
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
+		tcptw->tw_ts_offset	= tp->tsoffset;
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
@@ -499,6 +500,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			newtp->rx_opt.ts_recent_stamp = 0;
 			newtp->tcp_header_len = sizeof(struct tcphdr);
 		}
+		newtp->tsoffset = 0;
 #ifdef CONFIG_TCP_MD5SIG
 		newtp->md5sig_info = NULL;	/*XXX*/
 		if (newtp->af_specific->md5_lookup(sk, newsk))
-- 
cgit v1.2.3


From c9af6db4c11ccc6c3e7f19bbc15d54023956f97c Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Mon, 11 Feb 2013 09:27:41 +0000
Subject: net: Fix possible wrong checksum generation.

Patch cef401de7be8c4e (net: fix possible wrong checksum
generation) fixed wrong checksum calculation but it broke TSO by
defining new GSO type but not a netdev feature for that type.
net_gso_ok() would not allow hardware checksum/segmentation
offload of such packets without the feature.

Following patch fixes TSO and wrong checksum. This patch uses
same logic that Eric Dumazet used. Patch introduces new flag
SKBTX_SHARED_FRAG if at least one frag can be modified by
the user. but SKBTX_SHARED_FRAG flag is kept in skb shared
info tx_flags rather than gso_type.

tx_flags is better compared to gso_type since we can have skb with
shared frag without gso packet. It does not link SHARED_FRAG to
GSO, So there is no need to define netdev feature for this.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c    |  4 ++--
 drivers/net/tun.c        | 13 +++++--------
 drivers/net/virtio_net.c | 13 +++++--------
 include/linux/skbuff.h   | 17 +++++++++--------
 net/core/skbuff.c        |  5 ++---
 net/ipv4/af_inet.c       |  1 -
 net/ipv4/ip_output.c     |  1 +
 net/ipv4/tcp.c           |  4 +---
 net/ipv4/tcp_input.c     |  4 ++--
 net/ipv4/tcp_output.c    |  4 ++--
 net/ipv6/ip6_offload.c   |  1 -
 11 files changed, 29 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index b181dfb3d6d6..97243011d319 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -543,7 +543,6 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 		skb->data_len += len;
 		skb->len += len;
 		skb->truesize += truesize;
-		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (len) {
 			int off = base & ~PAGE_MASK;
@@ -599,7 +598,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
 
 	if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		skb_shinfo(skb)->gso_size = vnet_hdr->gso_size;
-		skb_shinfo(skb)->gso_type |= gso_type;
+		skb_shinfo(skb)->gso_type = gso_type;
 
 		/* Header must be checked, and gso_segs computed. */
 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -743,6 +742,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
 	if (zerocopy) {
 		skb_shinfo(skb)->destructor_arg = m->msg_control;
 		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 	}
 	if (vlan)
 		macvlan_start_xmit(skb, vlan->dev);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b1038c0e2240..b6f45c5d84d5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1019,7 +1019,6 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 		skb->data_len += len;
 		skb->len += len;
 		skb->truesize += truesize;
-		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
 		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
 		while (len) {
 			int off = base & ~PAGE_MASK;
@@ -1165,18 +1164,16 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		unsigned short gso_type = 0;
-
 		pr_debug("GSO!\n");
 		switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
-			gso_type = SKB_GSO_TCPV4;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 			break;
 		case VIRTIO_NET_HDR_GSO_TCPV6:
-			gso_type = SKB_GSO_TCPV6;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 			break;
 		case VIRTIO_NET_HDR_GSO_UDP:
-			gso_type = SKB_GSO_UDP;
+			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 			break;
 		default:
 			tun->dev->stats.rx_frame_errors++;
@@ -1185,10 +1182,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 		}
 
 		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			gso_type |= SKB_GSO_TCP_ECN;
+			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
 		skb_shinfo(skb)->gso_size = gso.gso_size;
-		skb_shinfo(skb)->gso_type |= gso_type;
 		if (skb_shinfo(skb)->gso_size == 0) {
 			tun->dev->stats.rx_frame_errors++;
 			kfree_skb(skb);
@@ -1204,6 +1200,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	if (zerocopy) {
 		skb_shinfo(skb)->destructor_arg = msg_control;
 		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 	}
 
 	skb_reset_network_header(skb);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 381a2d8d8a81..192c91c8e799 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -227,7 +227,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
 	skb->len += size;
 	skb->truesize += PAGE_SIZE;
 	skb_shinfo(skb)->nr_frags++;
-	skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
+	skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 	*len -= size;
 }
 
@@ -387,18 +387,16 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		 ntohs(skb->protocol), skb->len, skb->pkt_type);
 
 	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-		unsigned short gso_type = 0;
-
 		pr_debug("GSO!\n");
 		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 		case VIRTIO_NET_HDR_GSO_TCPV4:
-			gso_type = SKB_GSO_TCPV4;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 			break;
 		case VIRTIO_NET_HDR_GSO_UDP:
-			gso_type = SKB_GSO_UDP;
+			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 			break;
 		case VIRTIO_NET_HDR_GSO_TCPV6:
-			gso_type = SKB_GSO_TCPV6;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 			break;
 		default:
 			net_warn_ratelimited("%s: bad gso type %u.\n",
@@ -407,7 +405,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		}
 
 		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-			gso_type |= SKB_GSO_TCP_ECN;
+			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
 		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
 		if (skb_shinfo(skb)->gso_size == 0) {
@@ -415,7 +413,6 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 			goto frame_err;
 		}
 
-		skb_shinfo(skb)->gso_type |= gso_type;
 		/* Header must be checked, and gso_segs computed. */
 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 		skb_shinfo(skb)->gso_segs = 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d7573c37a51d..9da99520ccd5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -230,6 +230,13 @@ enum {
 
 	/* generate wifi status information (where possible) */
 	SKBTX_WIFI_STATUS = 1 << 4,
+
+	/* This indicates at least one fragment might be overwritten
+	 * (as in vmsplice(), sendfile() ...)
+	 * If we need to compute a TX checksum, we'll need to copy
+	 * all frags to avoid possible bad checksum
+	 */
+	SKBTX_SHARED_FRAG = 1 << 5,
 };
 
 /*
@@ -307,13 +314,6 @@ enum {
 	SKB_GSO_TCPV6 = 1 << 4,
 
 	SKB_GSO_FCOE = 1 << 5,
-
-	/* This indicates at least one fragment might be overwritten
-	 * (as in vmsplice(), sendfile() ...)
-	 * If we need to compute a TX checksum, we'll need to copy
-	 * all frags to avoid possible bad checksum
-	 */
-	SKB_GSO_SHARED_FRAG = 1 << 6,
 };
 
 #if BITS_PER_LONG > 32
@@ -2220,7 +2220,8 @@ static inline int skb_linearize(struct sk_buff *skb)
  */
 static inline bool skb_has_shared_frag(const struct sk_buff *skb)
 {
-	return skb_shinfo(skb)->gso_type & SKB_GSO_SHARED_FRAG;
+	return skb_is_nonlinear(skb) &&
+	       skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
 }
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 21a22cce6e53..6c1ad09f8796 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2326,8 +2326,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
 
-	skb_shinfo(skb1)->gso_type = skb_shinfo(skb)->gso_type;
-
+	skb_shinfo(skb)->tx_flags = skb_shinfo(skb1)->tx_flags & SKBTX_SHARED_FRAG;
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2833,7 +2832,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_copy_from_linear_data_offset(skb, offset,
 						 skb_put(nskb, hsize), hsize);
 
-		skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type;
+		skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
 
 		while (pos < offset + len && i < nfrags) {
 			*frag = skb_shinfo(skb)->frags[i];
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1aec92bf8018..e6e5d8506336 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1287,7 +1287,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
-		       SKB_GSO_SHARED_FRAG |
 		       0)))
 		goto out;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3e98ed2bff55..5e12dca7b3dd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -598,6 +598,7 @@ slow_path:
 	/* for offloaded checksums cleanup checksum before fragmentation */
 	if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
 		goto fail;
+	iph = ip_hdr(skb);
 
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;		/* Where to start from */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 801b07b796f0..1f0bedb8622f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -897,8 +897,7 @@ new_segment:
 			get_page(page);
 			skb_fill_page_desc(skb, i, page, offset, copy);
 		}
-
-		skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
+		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 
 		skb->len += copy;
 		skb->data_len += copy;
@@ -3044,7 +3043,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
-			       SKB_GSO_SHARED_FRAG |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d9bfaea34322..a759e19496d2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1239,13 +1239,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (!skb_shinfo(prev)->gso_size) {
 		skb_shinfo(prev)->gso_size = mss;
-		skb_shinfo(prev)->gso_type |= sk->sk_gso_type;
+		skb_shinfo(prev)->gso_type = sk->sk_gso_type;
 	}
 
 	/* CHECKME: To clear or not to clear? Mimics normal skb currently */
 	if (skb_shinfo(skb)->gso_segs <= 1) {
 		skb_shinfo(skb)->gso_size = 0;
-		skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
+		skb_shinfo(skb)->gso_type = 0;
 	}
 
 	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 564bf89d9fd3..6182d90e97b0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1133,7 +1133,6 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int mss_now)
 {
-	skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG;
 	if (skb->len <= mss_now || !sk_can_gso(sk) ||
 	    skb->ip_summed == CHECKSUM_NONE) {
 		/* Avoid the costly divide in the normal
@@ -1141,10 +1140,11 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 		 */
 		skb_shinfo(skb)->gso_segs = 1;
 		skb_shinfo(skb)->gso_size = 0;
+		skb_shinfo(skb)->gso_type = 0;
 	} else {
 		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
 		skb_shinfo(skb)->gso_size = mss_now;
-		skb_shinfo(skb)->gso_type |= sk->sk_gso_type;
+		skb_shinfo(skb)->gso_type = sk->sk_gso_type;
 	}
 }
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d141fc32a2ea..f26f0da7f095 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -100,7 +100,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_TCPV6 |
-		       SKB_GSO_SHARED_FRAG |
 		       0)))
 		goto out;
 
-- 
cgit v1.2.3


From 5488c753530b7b08437df6115a2c2c6156c2f0f6 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 13 Feb 2013 14:54:30 -0500
Subject: pps: Fix build breakage from decoupling pps from tty

Fixes:
tree:   git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git tty-next
head:   bc80fbe46be7430487a45ad92841932bb2eaa3e6
commit: 593fb1ae457aab28b392ac114f6e3358788da985 pps: Move timestamp read into PPS code proper
date:   78 minutes ago
config: make ARCH=sparc defconfig

All error/warnings:

   In file included from drivers/tty/serial/suncore.c:20:0:
>> include/linux/sunserialcore.h:29:15: warning: 'struct device_node' declared inside parameter list [enabled by default]
>> include/linux/sunserialcore.h:29:15: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default]
>> include/linux/sunserialcore.h:31:18: warning: 'struct device_node' declared inside parameter list [enabled by default]
>> drivers/tty/serial/suncore.c:55:5: error: conflicting types for 'sunserial_console_match'
   include/linux/sunserialcore.h:28:12: note: previous declaration of 'sunserial_console_match' was here
>> drivers/tty/serial/suncore.c:83:1: error: conflicting types for 'sunserial_console_match'
   include/linux/sunserialcore.h:28:12: note: previous declaration of 'sunserial_console_match' was here
>> drivers/tty/serial/suncore.c:85:6: error: conflicting types for 'sunserial_console_termios'
   include/linux/sunserialcore.h:30:13: note: previous declaration of 'sunserial_console_termios' was here

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: George Spelvin <linux@horizon.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sunserialcore.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunserialcore.h b/include/linux/sunserialcore.h
index 68e7430bb0fe..dbe4d7fca1b8 100644
--- a/include/linux/sunserialcore.h
+++ b/include/linux/sunserialcore.h
@@ -13,6 +13,10 @@
 #ifndef _SERIAL_SUN_H
 #define _SERIAL_SUN_H
 
+#include <linux/device.h>
+#include <linux/serial_core.h>
+#include <linux/console.h>
+
 /* Serial keyboard defines for L1-A processing... */
 #define SUNKBD_RESET		0xff
 #define SUNKBD_L1		0x01
-- 
cgit v1.2.3


From e3e5bc02d2365d3e09164fd9a559011399ca2a4f Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 12 Feb 2013 20:44:19 +0900
Subject: extcon: max8997: Move defined constant to header file

This patch move defined constants to header file(max77693-private.h)
because of mask/unmask selectively interrupt of MUIC device according
to attribute of H/W board.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max8997.c     | 92 +++++++++++++------------------------
 include/linux/mfd/max8997-private.h | 49 ++++++++++++++++++++
 2 files changed, 80 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index d16090ddd65a..0fb1d48b0741 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -30,51 +30,6 @@
 
 #define	DEV_NAME			"max8997-muic"
 
-/* MAX8997-MUIC STATUS1 register */
-#define STATUS1_ADC_SHIFT		0
-#define STATUS1_ADCLOW_SHIFT		5
-#define STATUS1_ADCERR_SHIFT		6
-#define STATUS1_ADC_MASK		(0x1f << STATUS1_ADC_SHIFT)
-#define STATUS1_ADCLOW_MASK		(0x1 << STATUS1_ADCLOW_SHIFT)
-#define STATUS1_ADCERR_MASK		(0x1 << STATUS1_ADCERR_SHIFT)
-
-/* MAX8997-MUIC STATUS2 register */
-#define STATUS2_CHGTYP_SHIFT		0
-#define STATUS2_CHGDETRUN_SHIFT		3
-#define STATUS2_DCDTMR_SHIFT		4
-#define STATUS2_DBCHG_SHIFT		5
-#define STATUS2_VBVOLT_SHIFT		6
-#define STATUS2_CHGTYP_MASK		(0x7 << STATUS2_CHGTYP_SHIFT)
-#define STATUS2_CHGDETRUN_MASK		(0x1 << STATUS2_CHGDETRUN_SHIFT)
-#define STATUS2_DCDTMR_MASK		(0x1 << STATUS2_DCDTMR_SHIFT)
-#define STATUS2_DBCHG_MASK		(0x1 << STATUS2_DBCHG_SHIFT)
-#define STATUS2_VBVOLT_MASK		(0x1 << STATUS2_VBVOLT_SHIFT)
-
-/* MAX8997-MUIC STATUS3 register */
-#define STATUS3_OVP_SHIFT		2
-#define STATUS3_OVP_MASK		(0x1 << STATUS3_OVP_SHIFT)
-
-/* MAX8997-MUIC CONTROL1 register */
-#define COMN1SW_SHIFT			0
-#define COMP2SW_SHIFT			3
-#define COMN1SW_MASK			(0x7 << COMN1SW_SHIFT)
-#define COMP2SW_MASK			(0x7 << COMP2SW_SHIFT)
-#define SW_MASK				(COMP2SW_MASK | COMN1SW_MASK)
-
-#define MAX8997_SW_USB		((1 << COMP2SW_SHIFT) | (1 << COMN1SW_SHIFT))
-#define MAX8997_SW_AUDIO	((2 << COMP2SW_SHIFT) | (2 << COMN1SW_SHIFT))
-#define MAX8997_SW_UART		((3 << COMP2SW_SHIFT) | (3 << COMN1SW_SHIFT))
-#define MAX8997_SW_OPEN		((0 << COMP2SW_SHIFT) | (0 << COMN1SW_SHIFT))
-
-#define	MAX8997_ADC_GROUND		0x00
-#define	MAX8997_ADC_MHL			0x01
-#define	MAX8997_ADC_JIG_USB_1		0x18
-#define	MAX8997_ADC_JIG_USB_2		0x19
-#define	MAX8997_ADC_DESKDOCK		0x1a
-#define	MAX8997_ADC_JIG_UART		0x1c
-#define	MAX8997_ADC_CARDOCK		0x1d
-#define	MAX8997_ADC_OPEN		0x1f
-
 struct max8997_muic_irq {
 	unsigned int irq;
 	const char *name;
@@ -109,17 +64,32 @@ struct max8997_muic_info {
 	struct extcon_dev	*edev;
 };
 
+enum {
+	EXTCON_CABLE_USB = 0,
+	EXTCON_CABLE_USB_HOST,
+	EXTCON_CABLE_TA,
+	EXTCON_CABLE_FAST_CHARGER,
+	EXTCON_CABLE_SLOW_CHARGER,
+	EXTCON_CABLE_CHARGE_DOWNSTREAM,
+	EXTCON_CABLE_MHL,
+	EXTCON_CABLE_DOCK_DESK,
+	EXTCON_CABLE_DOCK_CARD,
+	EXTCON_CABLE_JIG,
+
+	_EXTCON_CABLE_NUM,
+};
+
 static const char *max8997_extcon_cable[] = {
-	[0] = "USB",
-	[1] = "USB-Host",
-	[2] = "TA",
-	[3] = "Fast-charger",
-	[4] = "Slow-charger",
-	[5] = "Charge-downstream",
-	[6] = "MHL",
-	[7] = "Dock-desk",
-	[8] = "Dock-card",
-	[9] = "JIG",
+	[EXTCON_CABLE_USB]			= "USB",
+	[EXTCON_CABLE_USB_HOST]			= "USB-Host",
+	[EXTCON_CABLE_TA]			= "TA",
+	[EXTCON_CABLE_FAST_CHARGER]		= "Fast-charger",
+	[EXTCON_CABLE_SLOW_CHARGER]		= "Slow-charger",
+	[EXTCON_CABLE_CHARGE_DOWNSTREAM]	= "Charge-downstream",
+	[EXTCON_CABLE_MHL]			= "MHL",
+	[EXTCON_CABLE_DOCK_DESK]		= "Dock-Desk",
+	[EXTCON_CABLE_DOCK_CARD]		= "Dock-Card",
+	[EXTCON_CABLE_JIG]			= "JIG",
 
 	NULL,
 };
@@ -132,8 +102,8 @@ static int max8997_muic_handle_usb(struct max8997_muic_info *info,
 	if (usb_type == MAX8997_USB_HOST) {
 		/* switch to USB */
 		ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
-				attached ? MAX8997_SW_USB : MAX8997_SW_OPEN,
-				SW_MASK);
+				attached ? CONTROL1_SW_USB : CONTROL1_SW_OPEN,
+				CONTROL1_SW_MASK);
 		if (ret) {
 			dev_err(info->dev, "failed to update muic register\n");
 			goto out;
@@ -163,8 +133,8 @@ static int max8997_muic_handle_dock(struct max8997_muic_info *info,
 
 	/* switch to AUDIO */
 	ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
-				attached ? MAX8997_SW_AUDIO : MAX8997_SW_OPEN,
-				SW_MASK);
+				attached ? CONTROL1_SW_AUDIO : CONTROL1_SW_OPEN,
+				CONTROL1_SW_MASK);
 	if (ret) {
 		dev_err(info->dev, "failed to update muic register\n");
 		goto out;
@@ -192,8 +162,8 @@ static int max8997_muic_handle_jig_uart(struct max8997_muic_info *info,
 
 	/* switch to UART */
 	ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
-				attached ? MAX8997_SW_UART : MAX8997_SW_OPEN,
-				SW_MASK);
+				attached ? CONTROL1_SW_UART : CONTROL1_SW_OPEN,
+				CONTROL1_SW_MASK);
 	if (ret) {
 		dev_err(info->dev, "failed to update muic register\n");
 		goto out;
diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index 6ae21bf47d64..acf42e960320 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -194,6 +194,55 @@ enum max8997_muic_reg {
 	MAX8997_MUIC_REG_END		= 0xf,
 };
 
+/* MAX8997-MUIC STATUS1 register */
+#define STATUS1_ADC_SHIFT		0
+#define STATUS1_ADCLOW_SHIFT		5
+#define STATUS1_ADCERR_SHIFT		6
+#define STATUS1_ADC_MASK		(0x1f << STATUS1_ADC_SHIFT)
+#define STATUS1_ADCLOW_MASK		(0x1 << STATUS1_ADCLOW_SHIFT)
+#define STATUS1_ADCERR_MASK		(0x1 << STATUS1_ADCERR_SHIFT)
+
+/* MAX8997-MUIC STATUS2 register */
+#define STATUS2_CHGTYP_SHIFT		0
+#define STATUS2_CHGDETRUN_SHIFT		3
+#define STATUS2_DCDTMR_SHIFT		4
+#define STATUS2_DBCHG_SHIFT		5
+#define STATUS2_VBVOLT_SHIFT		6
+#define STATUS2_CHGTYP_MASK		(0x7 << STATUS2_CHGTYP_SHIFT)
+#define STATUS2_CHGDETRUN_MASK		(0x1 << STATUS2_CHGDETRUN_SHIFT)
+#define STATUS2_DCDTMR_MASK		(0x1 << STATUS2_DCDTMR_SHIFT)
+#define STATUS2_DBCHG_MASK		(0x1 << STATUS2_DBCHG_SHIFT)
+#define STATUS2_VBVOLT_MASK		(0x1 << STATUS2_VBVOLT_SHIFT)
+
+/* MAX8997-MUIC STATUS3 register */
+#define STATUS3_OVP_SHIFT		2
+#define STATUS3_OVP_MASK		(0x1 << STATUS3_OVP_SHIFT)
+
+/* MAX8997-MUIC CONTROL1 register */
+#define COMN1SW_SHIFT			0
+#define COMP2SW_SHIFT			3
+#define COMN1SW_MASK			(0x7 << COMN1SW_SHIFT)
+#define COMP2SW_MASK			(0x7 << COMP2SW_SHIFT)
+#define CONTROL1_SW_MASK		(COMP2SW_MASK | COMN1SW_MASK)
+
+#define CONTROL1_SW_USB			((1 << COMP2SW_SHIFT) \
+						| (1 << COMN1SW_SHIFT))
+#define CONTROL1_SW_AUDIO		((2 << COMP2SW_SHIFT) \
+						| (2 << COMN1SW_SHIFT))
+#define CONTROL1_SW_UART		((3 << COMP2SW_SHIFT) \
+						| (3 << COMN1SW_SHIFT))
+#define CONTROL1_SW_OPEN		((0 << COMP2SW_SHIFT) \
+						| (0 << COMN1SW_SHIFT))
+
+#define	MAX8997_ADC_GROUND		0x00
+#define	MAX8997_ADC_MHL			0x01
+#define	MAX8997_ADC_JIG_USB_1		0x18
+#define	MAX8997_ADC_JIG_USB_2		0x19
+#define	MAX8997_ADC_DESKDOCK		0x1a
+#define	MAX8997_ADC_JIG_UART		0x1c
+#define	MAX8997_ADC_CARDOCK		0x1d
+#define	MAX8997_ADC_OPEN		0x1f
+
 enum max8997_haptic_reg {
 	MAX8997_HAPTIC_REG_GENERAL	= 0x00,
 	MAX8997_HAPTIC_REG_CONF1	= 0x01,
-- 
cgit v1.2.3


From 07c70503a420d48402b3859e2c1c4c847a130a8b Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 13 Feb 2013 08:42:37 +0900
Subject: extcon: max8997: Remove duplicate code related to set H/W line path

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max8997.c     | 62 ++++++++++++++++++++++++++++++-------
 include/linux/mfd/max8997-private.h | 19 +++++++++++-
 2 files changed, 69 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index 0fb1d48b0741..8739b50c2b36 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -94,16 +94,61 @@ static const char *max8997_extcon_cable[] = {
 	NULL,
 };
 
+/*
+ * max8997_muic_set_path - Set hardware line according to attached cable
+ * @info: the instance including private data of max8997 MUIC
+ * @value: the path according to attached cable
+ * @attached: the state of cable (true:attached, false:detached)
+ *
+ * The max8997 MUIC device share outside H/W line among a varity of cables,
+ * so this function set internal path of H/W line according to the type of
+ * attached cable.
+ */
+static int max8997_muic_set_path(struct max8997_muic_info *info,
+		u8 val, bool attached)
+{
+	int ret = 0;
+	u8 ctrl1, ctrl2 = 0;
+
+	if (attached)
+		ctrl1 = val;
+	else
+		ctrl1 = CONTROL1_SW_OPEN;
+
+	ret = max8997_update_reg(info->muic,
+			MAX8997_MUIC_REG_CONTROL1, ctrl1, COMP_SW_MASK);
+	if (ret < 0) {
+		dev_err(info->dev, "failed to update MUIC register\n");
+		return -EAGAIN;
+	}
+
+	if (attached)
+		ctrl2 |= CONTROL2_CPEN_MASK;	/* LowPwr=0, CPEn=1 */
+	else
+		ctrl2 |= CONTROL2_LOWPWR_MASK;	/* LowPwr=1, CPEn=0 */
+
+	ret = max8997_update_reg(info->muic,
+			MAX8997_MUIC_REG_CONTROL2, ctrl2,
+			CONTROL2_LOWPWR_MASK | CONTROL2_CPEN_MASK);
+	if (ret < 0) {
+		dev_err(info->dev, "failed to update MUIC register\n");
+		return -EAGAIN;
+	}
+
+	dev_info(info->dev,
+		"CONTROL1 : 0x%02x, CONTROL2 : 0x%02x, state : %s\n",
+		ctrl1, ctrl2, attached ? "attached" : "detached");
+
+	return 0;
+}
+
 static int max8997_muic_handle_usb(struct max8997_muic_info *info,
 			enum max8997_muic_usb_type usb_type, bool attached)
 {
 	int ret = 0;
 
 	if (usb_type == MAX8997_USB_HOST) {
-		/* switch to USB */
-		ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
-				attached ? CONTROL1_SW_USB : CONTROL1_SW_OPEN,
-				CONTROL1_SW_MASK);
+		ret = max8997_muic_set_path(info, CONTROL1_SW_USB, attached);
 		if (ret) {
 			dev_err(info->dev, "failed to update muic register\n");
 			goto out;
@@ -131,10 +176,7 @@ static int max8997_muic_handle_dock(struct max8997_muic_info *info,
 {
 	int ret = 0;
 
-	/* switch to AUDIO */
-	ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
-				attached ? CONTROL1_SW_AUDIO : CONTROL1_SW_OPEN,
-				CONTROL1_SW_MASK);
+	ret = max8997_muic_set_path(info, CONTROL1_SW_AUDIO, attached);
 	if (ret) {
 		dev_err(info->dev, "failed to update muic register\n");
 		goto out;
@@ -161,9 +203,7 @@ static int max8997_muic_handle_jig_uart(struct max8997_muic_info *info,
 	int ret = 0;
 
 	/* switch to UART */
-	ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
-				attached ? CONTROL1_SW_UART : CONTROL1_SW_OPEN,
-				CONTROL1_SW_MASK);
+	ret = max8997_muic_set_path(info, CONTROL1_SW_UART, attached);
 	if (ret) {
 		dev_err(info->dev, "failed to update muic register\n");
 		goto out;
diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index acf42e960320..010173a92274 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -223,7 +223,7 @@ enum max8997_muic_reg {
 #define COMP2SW_SHIFT			3
 #define COMN1SW_MASK			(0x7 << COMN1SW_SHIFT)
 #define COMP2SW_MASK			(0x7 << COMP2SW_SHIFT)
-#define CONTROL1_SW_MASK		(COMP2SW_MASK | COMN1SW_MASK)
+#define COMP_SW_MASK		(COMP2SW_MASK | COMN1SW_MASK)
 
 #define CONTROL1_SW_USB			((1 << COMP2SW_SHIFT) \
 						| (1 << COMN1SW_SHIFT))
@@ -234,6 +234,23 @@ enum max8997_muic_reg {
 #define CONTROL1_SW_OPEN		((0 << COMP2SW_SHIFT) \
 						| (0 << COMN1SW_SHIFT))
 
+#define CONTROL2_LOWPWR_SHIFT		(0)
+#define CONTROL2_ADCEN_SHIFT		(1)
+#define CONTROL2_CPEN_SHIFT		(2)
+#define CONTROL2_SFOUTASRT_SHIFT	(3)
+#define CONTROL2_SFOUTORD_SHIFT		(4)
+#define CONTROL2_ACCDET_SHIFT		(5)
+#define CONTROL2_USBCPINT_SHIFT		(6)
+#define CONTROL2_RCPS_SHIFT		(7)
+#define CONTROL2_LOWPWR_MASK		(0x1 << CONTROL2_LOWPWR_SHIFT)
+#define CONTROL2_ADCEN_MASK		(0x1 << CONTROL2_ADCEN_SHIFT)
+#define CONTROL2_CPEN_MASK		(0x1 << CONTROL2_CPEN_SHIFT)
+#define CONTROL2_SFOUTASRT_MASK		(0x1 << CONTROL2_SFOUTASRT_SHIFT)
+#define CONTROL2_SFOUTORD_MASK		(0x1 << CONTROL2_SFOUTORD_SHIFT)
+#define CONTROL2_ACCDET_MASK		(0x1 << CONTROL2_ACCDET_SHIFT)
+#define CONTROL2_USBCPINT_MASK		(0x1 << CONTROL2_USBCPINT_SHIFT)
+#define CONTROL2_RCPS_MASK		(0x1 << CONTROL2_RCPS_SHIFT)
+
 #define	MAX8997_ADC_GROUND		0x00
 #define	MAX8997_ADC_MHL			0x01
 #define	MAX8997_ADC_JIG_USB_1		0x18
-- 
cgit v1.2.3


From 027fcd50500fd87847891d5c2f341c1f002de3e8 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 13 Feb 2013 08:50:00 +0900
Subject: extcon: max8997: Set default of ADC debounce time during
 initialization

This patch set default of ADC Debounce Time(25ms) during probe step.
Also, can possible change ADC Debounce Time according to H/W situation
by using max8997_set_adc_debounce_time()

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max8997.c     | 42 +++++++++++++++++++++++++++++++++++++
 include/linux/mfd/max8997-private.h |  7 +++++++
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index 8739b50c2b36..3206daaf8e08 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -30,6 +30,13 @@
 
 #define	DEV_NAME			"max8997-muic"
 
+enum max8997_muic_adc_debounce_time {
+	ADC_DEBOUNCE_TIME_0_5MS = 0,	/* 0.5ms */
+	ADC_DEBOUNCE_TIME_10MS,		/* 10ms */
+	ADC_DEBOUNCE_TIME_25MS,		/* 25ms */
+	ADC_DEBOUNCE_TIME_38_62MS,	/* 38.62ms */
+};
+
 struct max8997_muic_irq {
 	unsigned int irq;
 	const char *name;
@@ -94,6 +101,38 @@ static const char *max8997_extcon_cable[] = {
 	NULL,
 };
 
+/*
+ * max8997_muic_set_debounce_time - Set the debounce time of ADC
+ * @info: the instance including private data of max8997 MUIC
+ * @time: the debounce time of ADC
+ */
+static int max8997_muic_set_debounce_time(struct max8997_muic_info *info,
+		enum max8997_muic_adc_debounce_time time)
+{
+	int ret;
+
+	switch (time) {
+	case ADC_DEBOUNCE_TIME_0_5MS:
+	case ADC_DEBOUNCE_TIME_10MS:
+	case ADC_DEBOUNCE_TIME_25MS:
+	case ADC_DEBOUNCE_TIME_38_62MS:
+		ret = max8997_update_reg(info->muic,
+					  MAX8997_MUIC_REG_CONTROL3,
+					  time << CONTROL3_ADCDBSET_SHIFT,
+					  CONTROL3_ADCDBSET_MASK);
+		if (ret) {
+			dev_err(info->dev, "failed to set ADC debounce time\n");
+			return -EAGAIN;
+		}
+		break;
+	default:
+		dev_err(info->dev, "invalid ADC debounce time\n");
+		return -EINVAL;
+	}
+
+	return 0;
+};
+
 /*
  * max8997_muic_set_path - Set hardware line according to attached cable
  * @info: the instance including private data of max8997 MUIC
@@ -507,6 +546,9 @@ static int max8997_muic_probe(struct platform_device *pdev)
 		}
 	}
 
+	/* Set ADC debounce time */
+	max8997_muic_set_debounce_time(info, ADC_DEBOUNCE_TIME_25MS);
+
 	/* Initial device detection */
 	max8997_muic_detect_dev(info);
 
diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index 010173a92274..cd37a92ccba9 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -251,6 +251,13 @@ enum max8997_muic_reg {
 #define CONTROL2_USBCPINT_MASK		(0x1 << CONTROL2_USBCPINT_SHIFT)
 #define CONTROL2_RCPS_MASK		(0x1 << CONTROL2_RCPS_SHIFT)
 
+#define CONTROL3_JIGSET_SHIFT		(0)
+#define CONTROL3_BTLDSET_SHIFT		(2)
+#define CONTROL3_ADCDBSET_SHIFT		(4)
+#define CONTROL3_JIGSET_MASK		(0x3 << CONTROL3_JIGSET_SHIFT)
+#define CONTROL3_BTLDSET_MASK		(0x3 << CONTROL3_BTLDSET_SHIFT)
+#define CONTROL3_ADCDBSET_MASK		(0x3 << CONTROL3_ADCDBSET_SHIFT)
+
 #define	MAX8997_ADC_GROUND		0x00
 #define	MAX8997_ADC_MHL			0x01
 #define	MAX8997_ADC_JIG_USB_1		0x18
-- 
cgit v1.2.3


From f73f6232af9131f7b6fc6e377267e4a441727eb3 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 13 Feb 2013 12:05:42 +0900
Subject: extcon: max8997: Consolidate duplicate code for checking ADC/CHG
 cable type

This patch make max8997_muic_get_cable_type() function to remove
duplicate code for checking ADC/Charger cable type because almost
internal function need to read adc/chg_type value of MUIC register.

Also, remove *_detach() function, extcon-max8997 driver treat
attach/detach operation of cable in max8997_*_handler() function.
Lastly, this patch move defined constant in header file(include/
linux/mfd/max8997.h, max8997-private.h) because defined constant
is only used in the 'extcon-max8997.c'.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max8997.c     | 492 +++++++++++++++++++++++-------------
 include/linux/mfd/max8997-private.h |   9 -
 include/linux/mfd/max8997.h         |  15 --
 3 files changed, 323 insertions(+), 193 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index 3206daaf8e08..35338a090c06 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -44,31 +44,89 @@ struct max8997_muic_irq {
 };
 
 static struct max8997_muic_irq muic_irqs[] = {
-	{ MAX8997_MUICIRQ_ADCError, "muic-ADC_error" },
-	{ MAX8997_MUICIRQ_ADCLow, "muic-ADC_low" },
-	{ MAX8997_MUICIRQ_ADC, "muic-ADC" },
-	{ MAX8997_MUICIRQ_VBVolt, "muic-VB_voltage" },
-	{ MAX8997_MUICIRQ_DBChg, "muic-DB_charger" },
-	{ MAX8997_MUICIRQ_DCDTmr, "muic-DCD_timer" },
-	{ MAX8997_MUICIRQ_ChgDetRun, "muic-CDR_status" },
-	{ MAX8997_MUICIRQ_ChgTyp, "muic-charger_type" },
-	{ MAX8997_MUICIRQ_OVP, "muic-over_voltage" },
+	{ MAX8997_MUICIRQ_ADCError,	"muic-ADCERROR" },
+	{ MAX8997_MUICIRQ_ADCLow,	"muic-ADCLOW" },
+	{ MAX8997_MUICIRQ_ADC,		"muic-ADC" },
+	{ MAX8997_MUICIRQ_VBVolt,	"muic-VBVOLT" },
+	{ MAX8997_MUICIRQ_DBChg,	"muic-DBCHG" },
+	{ MAX8997_MUICIRQ_DCDTmr,	"muic-DCDTMR" },
+	{ MAX8997_MUICIRQ_ChgDetRun,	"muic-CHGDETRUN" },
+	{ MAX8997_MUICIRQ_ChgTyp,	"muic-CHGTYP" },
+	{ MAX8997_MUICIRQ_OVP,		"muic-OVP" },
+};
+
+/* Define supported cable type */
+enum max8997_muic_acc_type {
+	MAX8997_MUIC_ADC_GROUND = 0x0,
+	MAX8997_MUIC_ADC_MHL,			/* MHL*/
+	MAX8997_MUIC_ADC_REMOTE_S1_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S2_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S3_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S4_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S5_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S6_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S7_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S8_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S9_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S10_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S11_BUTTON,
+	MAX8997_MUIC_ADC_REMOTE_S12_BUTTON,
+	MAX8997_MUIC_ADC_RESERVED_ACC_1,
+	MAX8997_MUIC_ADC_RESERVED_ACC_2,
+	MAX8997_MUIC_ADC_RESERVED_ACC_3,
+	MAX8997_MUIC_ADC_RESERVED_ACC_4,
+	MAX8997_MUIC_ADC_RESERVED_ACC_5,
+	MAX8997_MUIC_ADC_CEA936_AUDIO,
+	MAX8997_MUIC_ADC_PHONE_POWERED_DEV,
+	MAX8997_MUIC_ADC_TTY_CONVERTER,
+	MAX8997_MUIC_ADC_UART_CABLE,
+	MAX8997_MUIC_ADC_CEA936A_TYPE1_CHG,
+	MAX8997_MUIC_ADC_FACTORY_MODE_USB_OFF,	/* JIG-USB-OFF */
+	MAX8997_MUIC_ADC_FACTORY_MODE_USB_ON,	/* JIG-USB-ON */
+	MAX8997_MUIC_ADC_AV_CABLE_NOLOAD,	/* DESKDOCK */
+	MAX8997_MUIC_ADC_CEA936A_TYPE2_CHG,
+	MAX8997_MUIC_ADC_FACTORY_MODE_UART_OFF,	/* JIG-UART */
+	MAX8997_MUIC_ADC_FACTORY_MODE_UART_ON,	/* CARDOCK */
+	MAX8997_MUIC_ADC_AUDIO_MODE_REMOTE,
+	MAX8997_MUIC_ADC_OPEN,			/* OPEN */
+};
+
+enum max8997_muic_cable_group {
+	MAX8997_CABLE_GROUP_ADC = 0,
+	MAX8997_CABLE_GROUP_ADC_GND,
+	MAX8997_CABLE_GROUP_CHG,
+	MAX8997_CABLE_GROUP_VBVOLT,
+};
+
+enum max8997_muic_usb_type {
+	MAX8997_USB_HOST,
+	MAX8997_USB_DEVICE,
+};
+
+enum max8997_muic_charger_type {
+	MAX8997_CHARGER_TYPE_NONE = 0,
+	MAX8997_CHARGER_TYPE_USB,
+	MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT,
+	MAX8997_CHARGER_TYPE_DEDICATED_CHG,
+	MAX8997_CHARGER_TYPE_500MA,
+	MAX8997_CHARGER_TYPE_1A,
+	MAX8997_CHARGER_TYPE_DEAD_BATTERY = 7,
 };
 
 struct max8997_muic_info {
 	struct device *dev;
 	struct i2c_client *muic;
-	struct max8997_muic_platform_data *muic_pdata;
+	struct extcon_dev *edev;
+	int prev_cable_type;
+	int prev_chg_type;
+	u8 status[2];
 
 	int irq;
 	struct work_struct irq_work;
-
-	enum max8997_muic_charger_type pre_charger_type;
-	int pre_adc;
-
 	struct mutex mutex;
 
-	struct extcon_dev	*edev;
+	struct max8997_muic_platform_data *muic_pdata;
+	enum max8997_muic_charger_type pre_charger_type;
 };
 
 enum {
@@ -181,6 +239,83 @@ static int max8997_muic_set_path(struct max8997_muic_info *info,
 	return 0;
 }
 
+/*
+ * max8997_muic_get_cable_type - Return cable type and check cable state
+ * @info: the instance including private data of max8997 MUIC
+ * @group: the path according to attached cable
+ * @attached: store cable state and return
+ *
+ * This function check the cable state either attached or detached,
+ * and then divide precise type of cable according to cable group.
+ *	- MAX8997_CABLE_GROUP_ADC
+ *	- MAX8997_CABLE_GROUP_CHG
+ */
+static int max8997_muic_get_cable_type(struct max8997_muic_info *info,
+		enum max8997_muic_cable_group group, bool *attached)
+{
+	int cable_type = 0;
+	int adc;
+	int chg_type;
+
+	switch (group) {
+	case MAX8997_CABLE_GROUP_ADC:
+		/*
+		 * Read ADC value to check cable type and decide cable state
+		 * according to cable type
+		 */
+		adc = info->status[0] & STATUS1_ADC_MASK;
+		adc >>= STATUS1_ADC_SHIFT;
+
+		/*
+		 * Check current cable state/cable type and store cable type
+		 * (info->prev_cable_type) for handling cable when cable is
+		 * detached.
+		 */
+		if (adc == MAX8997_MUIC_ADC_OPEN) {
+			*attached = false;
+
+			cable_type = info->prev_cable_type;
+			info->prev_cable_type = MAX8997_MUIC_ADC_OPEN;
+		} else {
+			*attached = true;
+
+			cable_type = info->prev_cable_type = adc;
+		}
+		break;
+	case MAX8997_CABLE_GROUP_CHG:
+		/*
+		 * Read charger type to check cable type and decide cable state
+		 * according to type of charger cable.
+		 */
+		chg_type = info->status[1] & STATUS2_CHGTYP_MASK;
+		chg_type >>= STATUS2_CHGTYP_SHIFT;
+
+		if (chg_type == MAX8997_CHARGER_TYPE_NONE) {
+			*attached = false;
+
+			cable_type = info->prev_chg_type;
+			info->prev_chg_type = MAX8997_CHARGER_TYPE_NONE;
+		} else {
+			*attached = true;
+
+			/*
+			 * Check current cable state/cable type and store cable
+			 * type(info->prev_chg_type) for handling cable when
+			 * charger cable is detached.
+			 */
+			cable_type = info->prev_chg_type = chg_type;
+		}
+
+		break;
+	default:
+		dev_err(info->dev, "Unknown cable group (%d)\n", group);
+		cable_type = -EINVAL;
+		break;
+	}
+
+	return cable_type;
+}
+
 static int max8997_muic_handle_usb(struct max8997_muic_info *info,
 			enum max8997_muic_usb_type usb_type, bool attached)
 {
@@ -188,9 +323,9 @@ static int max8997_muic_handle_usb(struct max8997_muic_info *info,
 
 	if (usb_type == MAX8997_USB_HOST) {
 		ret = max8997_muic_set_path(info, CONTROL1_SW_USB, attached);
-		if (ret) {
+		if (ret < 0) {
 			dev_err(info->dev, "failed to update muic register\n");
-			goto out;
+			return ret;
 		}
 	}
 
@@ -202,38 +337,39 @@ static int max8997_muic_handle_usb(struct max8997_muic_info *info,
 		extcon_set_cable_state(info->edev, "USB", attached);
 		break;
 	default:
-		ret = -EINVAL;
-		break;
+		dev_err(info->dev, "failed to detect %s usb cable\n",
+			attached ? "attached" : "detached");
+		return -EINVAL;
 	}
 
-out:
-	return ret;
+	return 0;
 }
 
 static int max8997_muic_handle_dock(struct max8997_muic_info *info,
-			int adc, bool attached)
+			int cable_type, bool attached)
 {
 	int ret = 0;
 
 	ret = max8997_muic_set_path(info, CONTROL1_SW_AUDIO, attached);
 	if (ret) {
 		dev_err(info->dev, "failed to update muic register\n");
-		goto out;
+		return ret;
 	}
 
-	switch (adc) {
-	case MAX8997_ADC_DESKDOCK:
+	switch (cable_type) {
+	case MAX8997_MUIC_ADC_AV_CABLE_NOLOAD:
 		extcon_set_cable_state(info->edev, "Dock-desk", attached);
 		break;
-	case MAX8997_ADC_CARDOCK:
+	case MAX8997_MUIC_ADC_FACTORY_MODE_UART_ON:
 		extcon_set_cable_state(info->edev, "Dock-card", attached);
 		break;
 	default:
-		ret = -EINVAL;
-		break;
+		dev_err(info->dev, "failed to detect %s dock device\n",
+			attached ? "attached" : "detached");
+		return -EINVAL;
 	}
-out:
-	return ret;
+
+	return 0;
 }
 
 static int max8997_muic_handle_jig_uart(struct max8997_muic_info *info,
@@ -245,193 +381,185 @@ static int max8997_muic_handle_jig_uart(struct max8997_muic_info *info,
 	ret = max8997_muic_set_path(info, CONTROL1_SW_UART, attached);
 	if (ret) {
 		dev_err(info->dev, "failed to update muic register\n");
-		goto out;
+		return -EINVAL;
 	}
 
 	extcon_set_cable_state(info->edev, "JIG", attached);
-out:
-	return ret;
-}
 
-static int max8997_muic_handle_adc_detach(struct max8997_muic_info *info)
-{
-	int ret = 0;
-
-	switch (info->pre_adc) {
-	case MAX8997_ADC_GROUND:
-		ret = max8997_muic_handle_usb(info, MAX8997_USB_HOST, false);
-		break;
-	case MAX8997_ADC_MHL:
-		extcon_set_cable_state(info->edev, "MHL", false);
-		break;
-	case MAX8997_ADC_JIG_USB_1:
-	case MAX8997_ADC_JIG_USB_2:
-		ret = max8997_muic_handle_usb(info, MAX8997_USB_DEVICE, false);
-		break;
-	case MAX8997_ADC_DESKDOCK:
-	case MAX8997_ADC_CARDOCK:
-		ret = max8997_muic_handle_dock(info, info->pre_adc, false);
-		break;
-	case MAX8997_ADC_JIG_UART:
-		ret = max8997_muic_handle_jig_uart(info, false);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
+	return 0;
 }
 
-static int max8997_muic_handle_adc(struct max8997_muic_info *info, int adc)
+static int max8997_muic_adc_handler(struct max8997_muic_info *info)
 {
+	int cable_type;
+	bool attached;
 	int ret = 0;
 
-	switch (adc) {
-	case MAX8997_ADC_GROUND:
-		ret = max8997_muic_handle_usb(info, MAX8997_USB_HOST, true);
-		break;
-	case MAX8997_ADC_MHL:
-		extcon_set_cable_state(info->edev, "MHL", true);
-		break;
-	case MAX8997_ADC_JIG_USB_1:
-	case MAX8997_ADC_JIG_USB_2:
-		ret = max8997_muic_handle_usb(info, MAX8997_USB_DEVICE, true);
-		break;
-	case MAX8997_ADC_DESKDOCK:
-	case MAX8997_ADC_CARDOCK:
-		ret = max8997_muic_handle_dock(info, adc, true);
-		break;
-	case MAX8997_ADC_JIG_UART:
-		ret = max8997_muic_handle_jig_uart(info, true);
-		break;
-	case MAX8997_ADC_OPEN:
-		ret = max8997_muic_handle_adc_detach(info);
-		break;
-	default:
-		ret = -EINVAL;
-		goto out;
-	}
-
-	info->pre_adc = adc;
-out:
-	return ret;
-}
-
-static int max8997_muic_handle_charger_type_detach(
-				struct max8997_muic_info *info)
-{
-	switch (info->pre_charger_type) {
-	case MAX8997_CHARGER_TYPE_USB:
-		extcon_set_cable_state(info->edev, "USB", false);
-		break;
-	case MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT:
-		extcon_set_cable_state(info->edev, "Charge-downstream", false);
-		break;
-	case MAX8997_CHARGER_TYPE_DEDICATED_CHG:
-		extcon_set_cable_state(info->edev, "TA", false);
-		break;
-	case MAX8997_CHARGER_TYPE_500MA:
-		extcon_set_cable_state(info->edev, "Slow-charger", false);
-		break;
-	case MAX8997_CHARGER_TYPE_1A:
-		extcon_set_cable_state(info->edev, "Fast-charger", false);
-		break;
+	/* Check cable state which is either detached or attached */
+	cable_type = max8997_muic_get_cable_type(info,
+				MAX8997_CABLE_GROUP_ADC, &attached);
+
+	switch (cable_type) {
+	case MAX8997_MUIC_ADC_GROUND:
+		ret = max8997_muic_handle_usb(info, MAX8997_USB_HOST, attached);
+		if (ret < 0)
+			return ret;
+		break;
+	case MAX8997_MUIC_ADC_MHL:
+		extcon_set_cable_state(info->edev, "MHL", attached);
+		break;
+	case MAX8997_MUIC_ADC_FACTORY_MODE_USB_OFF:
+	case MAX8997_MUIC_ADC_FACTORY_MODE_USB_ON:
+		ret = max8997_muic_handle_usb(info, MAX8997_USB_DEVICE, attached);
+		if (ret < 0)
+			return ret;
+		break;
+	case MAX8997_MUIC_ADC_AV_CABLE_NOLOAD:
+	case MAX8997_MUIC_ADC_FACTORY_MODE_UART_ON:
+		ret = max8997_muic_handle_dock(info, cable_type, attached);
+		if (ret < 0)
+			return ret;
+		break;
+	case MAX8997_MUIC_ADC_FACTORY_MODE_UART_OFF:
+		ret = max8997_muic_handle_jig_uart(info, attached);
+		break;
+	case MAX8997_MUIC_ADC_REMOTE_S1_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S2_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S3_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S4_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S5_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S6_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S7_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S8_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S9_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S10_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S11_BUTTON:
+	case MAX8997_MUIC_ADC_REMOTE_S12_BUTTON:
+	case MAX8997_MUIC_ADC_RESERVED_ACC_1:
+	case MAX8997_MUIC_ADC_RESERVED_ACC_2:
+	case MAX8997_MUIC_ADC_RESERVED_ACC_3:
+	case MAX8997_MUIC_ADC_RESERVED_ACC_4:
+	case MAX8997_MUIC_ADC_RESERVED_ACC_5:
+	case MAX8997_MUIC_ADC_CEA936_AUDIO:
+	case MAX8997_MUIC_ADC_PHONE_POWERED_DEV:
+	case MAX8997_MUIC_ADC_TTY_CONVERTER:
+	case MAX8997_MUIC_ADC_UART_CABLE:
+	case MAX8997_MUIC_ADC_CEA936A_TYPE1_CHG:
+	case MAX8997_MUIC_ADC_CEA936A_TYPE2_CHG:
+	case MAX8997_MUIC_ADC_AUDIO_MODE_REMOTE:
+		/*
+		 * This cable isn't used in general case if it is specially
+		 * needed to detect additional cable, should implement
+		 * proper operation when this cable is attached/detached.
+		 */
+		dev_info(info->dev,
+			"cable is %s but it isn't used (type:0x%x)\n",
+			attached ? "attached" : "detached", cable_type);
+		return -EAGAIN;
 	default:
+		dev_err(info->dev,
+			"failed to detect %s unknown cable (type:0x%x)\n",
+			attached ? "attached" : "detached", cable_type);
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
-static int max8997_muic_handle_charger_type(struct max8997_muic_info *info,
-				enum max8997_muic_charger_type charger_type)
+static int max8997_muic_chg_handler(struct max8997_muic_info *info)
 {
-	u8 adc;
-	int ret;
+	int chg_type;
+	bool attached;
+	int adc;
 
-	ret = max8997_read_reg(info->muic, MAX8997_MUIC_REG_STATUS1, &adc);
-	if (ret) {
-		dev_err(info->dev, "failed to read muic register\n");
-		goto out;
-	}
+	chg_type = max8997_muic_get_cable_type(info,
+				MAX8997_CABLE_GROUP_CHG, &attached);
 
-	switch (charger_type) {
+	switch (chg_type) {
 	case MAX8997_CHARGER_TYPE_NONE:
-		ret = max8997_muic_handle_charger_type_detach(info);
 		break;
 	case MAX8997_CHARGER_TYPE_USB:
-		if ((adc & STATUS1_ADC_MASK) == MAX8997_ADC_OPEN) {
+		adc = info->status[0] & STATUS1_ADC_MASK;
+		adc >>= STATUS1_ADC_SHIFT;
+
+		if ((adc & STATUS1_ADC_MASK) == MAX8997_MUIC_ADC_OPEN) {
 			max8997_muic_handle_usb(info,
-					MAX8997_USB_DEVICE, true);
+					MAX8997_USB_DEVICE, attached);
 		}
 		break;
 	case MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT:
-		extcon_set_cable_state(info->edev, "Charge-downstream", true);
+		extcon_set_cable_state(info->edev, "Charge-downstream", attached);
 		break;
 	case MAX8997_CHARGER_TYPE_DEDICATED_CHG:
-		extcon_set_cable_state(info->edev, "TA", true);
+		extcon_set_cable_state(info->edev, "TA", attached);
 		break;
 	case MAX8997_CHARGER_TYPE_500MA:
-		extcon_set_cable_state(info->edev, "Slow-charger", true);
+		extcon_set_cable_state(info->edev, "Slow-charger", attached);
 		break;
 	case MAX8997_CHARGER_TYPE_1A:
-		extcon_set_cable_state(info->edev, "Fast-charger", true);
+		extcon_set_cable_state(info->edev, "Fast-charger", attached);
 		break;
 	default:
-		ret = -EINVAL;
-		goto out;
+		dev_err(info->dev,
+			"failed to detect %s unknown chg cable (type:0x%x)\n",
+			attached ? "attached" : "detached", chg_type);
+		return -EINVAL;
 	}
 
-	info->pre_charger_type = charger_type;
-out:
-	return ret;
+	return 0;
 }
 
 static void max8997_muic_irq_work(struct work_struct *work)
 {
 	struct max8997_muic_info *info = container_of(work,
 			struct max8997_muic_info, irq_work);
-	u8 status[2];
-	u8 adc, chg_type;
 	int irq_type = 0;
 	int i, ret;
 
+	if (!info->edev)
+		return;
+
 	mutex_lock(&info->mutex);
 
+	for (i = 0 ; i < ARRAY_SIZE(muic_irqs) ; i++)
+		if (info->irq == muic_irqs[i].virq)
+			irq_type = muic_irqs[i].irq;
+
 	ret = max8997_bulk_read(info->muic, MAX8997_MUIC_REG_STATUS1,
-				2, status);
+				2, info->status);
 	if (ret) {
 		dev_err(info->dev, "failed to read muic register\n");
 		mutex_unlock(&info->mutex);
 		return;
 	}
 
-	dev_dbg(info->dev, "%s: STATUS1:0x%x, 2:0x%x\n", __func__,
-			status[0], status[1]);
-
-	for (i = 0 ; i < ARRAY_SIZE(muic_irqs) ; i++)
-		if (info->irq == muic_irqs[i].virq)
-			irq_type = muic_irqs[i].irq;
-
 	switch (irq_type) {
+	case MAX8997_MUICIRQ_ADCError:
+	case MAX8997_MUICIRQ_ADCLow:
 	case MAX8997_MUICIRQ_ADC:
-		adc = status[0] & STATUS1_ADC_MASK;
-		adc >>= STATUS1_ADC_SHIFT;
-
-		max8997_muic_handle_adc(info, adc);
+		/* Handle all of cable except for charger cable */
+		ret = max8997_muic_adc_handler(info);
 		break;
+	case MAX8997_MUICIRQ_VBVolt:
+	case MAX8997_MUICIRQ_DBChg:
+	case MAX8997_MUICIRQ_DCDTmr:
+	case MAX8997_MUICIRQ_ChgDetRun:
 	case MAX8997_MUICIRQ_ChgTyp:
-		chg_type = status[1] & STATUS2_CHGTYP_MASK;
-		chg_type >>= STATUS2_CHGTYP_SHIFT;
-
-		max8997_muic_handle_charger_type(info, chg_type);
+		/* Handle charger cable */
+		ret = max8997_muic_chg_handler(info);
+		break;
+	case MAX8997_MUICIRQ_OVP:
 		break;
 	default:
 		dev_info(info->dev, "misc interrupt: irq %d occurred\n",
 				irq_type);
-		break;
+		mutex_unlock(&info->mutex);
+		return;
 	}
 
+	if (ret < 0)
+		dev_err(info->dev, "failed to handle MUIC interrupt\n");
+
 	mutex_unlock(&info->mutex);
 
 	return;
@@ -449,29 +577,49 @@ static irqreturn_t max8997_muic_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static void max8997_muic_detect_dev(struct max8997_muic_info *info)
+static int max8997_muic_detect_dev(struct max8997_muic_info *info)
 {
-	int ret;
-	u8 status[2], adc, chg_type;
+	int ret = 0;
+	int adc;
+	int chg_type;
+	bool attached;
 
-	ret = max8997_bulk_read(info->muic, MAX8997_MUIC_REG_STATUS1,
-				2, status);
+	mutex_lock(&info->mutex);
+
+	/* Read STATUSx register to detect accessory */
+	ret = max8997_bulk_read(info->muic,
+			MAX8997_MUIC_REG_STATUS1, 2, info->status);
 	if (ret) {
-		dev_err(info->dev, "failed to read muic register\n");
-		return;
+		dev_err(info->dev, "failed to read MUIC register\n");
+		mutex_unlock(&info->mutex);
+		return -EINVAL;
 	}
 
-	dev_info(info->dev, "STATUS1:0x%x, STATUS2:0x%x\n",
-			status[0], status[1]);
+	adc = max8997_muic_get_cable_type(info, MAX8997_CABLE_GROUP_ADC,
+					&attached);
+	if (attached && adc != MAX8997_MUIC_ADC_OPEN) {
+		ret = max8997_muic_adc_handler(info);
+		if (ret < 0) {
+			dev_err(info->dev, "Cannot detect ADC cable\n");
+			mutex_unlock(&info->mutex);
+			return ret;
+		}
+	}
 
-	adc = status[0] & STATUS1_ADC_MASK;
-	adc >>= STATUS1_ADC_SHIFT;
+	chg_type = max8997_muic_get_cable_type(info, MAX8997_CABLE_GROUP_CHG,
+					&attached);
+	if (attached && chg_type != MAX8997_CHARGER_TYPE_NONE) {
+		ret = max8997_muic_chg_handler(info);
+		if (ret < 0) {
+			dev_err(info->dev, "Cannot detect charger cable\n");
+			mutex_unlock(&info->mutex);
+			return ret;
+		}
+	}
 
-	chg_type = status[1] & STATUS2_CHGTYP_MASK;
-	chg_type >>= STATUS2_CHGTYP_SHIFT;
+	mutex_unlock(&info->mutex);
 
-	max8997_muic_handle_adc(info, adc);
-	max8997_muic_handle_charger_type(info, chg_type);
+	return 0;
 }
 
 static int max8997_muic_probe(struct platform_device *pdev)
@@ -550,10 +698,16 @@ static int max8997_muic_probe(struct platform_device *pdev)
 	max8997_muic_set_debounce_time(info, ADC_DEBOUNCE_TIME_25MS);
 
 	/* Initial device detection */
-	max8997_muic_detect_dev(info);
+	ret = max8997_muic_detect_dev(info);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to detect cable type\n");
+		goto err_extcon;
+	}
 
 	return ret;
 
+err_extcon:
+	extcon_dev_unregister(info->edev);
 err_irq:
 	while (--i >= 0)
 		free_irq(muic_irqs[i].virq, info);
diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index cd37a92ccba9..fb465dfbb59e 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -258,15 +258,6 @@ enum max8997_muic_reg {
 #define CONTROL3_BTLDSET_MASK		(0x3 << CONTROL3_BTLDSET_SHIFT)
 #define CONTROL3_ADCDBSET_MASK		(0x3 << CONTROL3_ADCDBSET_SHIFT)
 
-#define	MAX8997_ADC_GROUND		0x00
-#define	MAX8997_ADC_MHL			0x01
-#define	MAX8997_ADC_JIG_USB_1		0x18
-#define	MAX8997_ADC_JIG_USB_2		0x19
-#define	MAX8997_ADC_DESKDOCK		0x1a
-#define	MAX8997_ADC_JIG_UART		0x1c
-#define	MAX8997_ADC_CARDOCK		0x1d
-#define	MAX8997_ADC_OPEN		0x1f
-
 enum max8997_haptic_reg {
 	MAX8997_HAPTIC_REG_GENERAL	= 0x00,
 	MAX8997_HAPTIC_REG_CONF1	= 0x01,
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 1d4a4fe6ac33..65f8d6a3a608 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -78,21 +78,6 @@ struct max8997_regulator_data {
 	struct device_node *reg_node;
 };
 
-enum max8997_muic_usb_type {
-	MAX8997_USB_HOST,
-	MAX8997_USB_DEVICE,
-};
-
-enum max8997_muic_charger_type {
-	MAX8997_CHARGER_TYPE_NONE = 0,
-	MAX8997_CHARGER_TYPE_USB,
-	MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT,
-	MAX8997_CHARGER_TYPE_DEDICATED_CHG,
-	MAX8997_CHARGER_TYPE_500MA,
-	MAX8997_CHARGER_TYPE_1A,
-	MAX8997_CHARGER_TYPE_DEAD_BATTERY = 7,
-};
-
 struct max8997_muic_reg_data {
 	u8 addr;
 	u8 data;
-- 
cgit v1.2.3


From 685dc9a7dbfede28cc4a0fe4e65804194ec04fa5 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 13 Feb 2013 15:04:15 +0900
Subject: extcon: max8997: Set default UART/USB path on probe

This patch set default H/W line path according to platfomr data.
The MAX8997 MUIC device can possibly set UART/USB or UART_AUX
/USB_AUX to internal H/W line path of MUIC device. Namely, only
one H/W line is used for two operation.

For example,
if H/W line path of MAX8997 device set UART/USB, micro usb cable
is connected to AP(Application Processor) and if H/W line path
set UART_AUX/USB_AUX, micro usb cable is connected to CP(Coprocessor).

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max8997.c | 28 ++++++++++++++++++++++++++--
 include/linux/mfd/max8997.h     |  7 +++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index 35338a090c06..349f65f6a4a4 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -127,6 +127,13 @@ struct max8997_muic_info {
 
 	struct max8997_muic_platform_data *muic_pdata;
 	enum max8997_muic_charger_type pre_charger_type;
+
+	/*
+	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
+	 * h/w path of COMP2/COMN1 on CONTROL1 register.
+	 */
+	int path_usb;
+	int path_uart;
 };
 
 enum {
@@ -322,7 +329,7 @@ static int max8997_muic_handle_usb(struct max8997_muic_info *info,
 	int ret = 0;
 
 	if (usb_type == MAX8997_USB_HOST) {
-		ret = max8997_muic_set_path(info, CONTROL1_SW_USB, attached);
+		ret = max8997_muic_set_path(info, info->path_usb, attached);
 		if (ret < 0) {
 			dev_err(info->dev, "failed to update muic register\n");
 			return ret;
@@ -378,7 +385,7 @@ static int max8997_muic_handle_jig_uart(struct max8997_muic_info *info,
 	int ret = 0;
 
 	/* switch to UART */
-	ret = max8997_muic_set_path(info, CONTROL1_SW_UART, attached);
+	ret = max8997_muic_set_path(info, info->path_uart, attached);
 	if (ret) {
 		dev_err(info->dev, "failed to update muic register\n");
 		return -EINVAL;
@@ -694,6 +701,23 @@ static int max8997_muic_probe(struct platform_device *pdev)
 		}
 	}
 
+	/*
+	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
+	 * h/w path of COMP2/COMN1 on CONTROL1 register.
+	 */
+	if (pdata->muic_pdata->path_uart)
+		info->path_uart = pdata->muic_pdata->path_uart;
+	else
+		info->path_uart = CONTROL1_SW_UART;
+
+	if (pdata->muic_pdata->path_usb)
+		info->path_usb = pdata->muic_pdata->path_usb;
+	else
+		info->path_usb = CONTROL1_SW_USB;
+
+	/* Set initial path for UART */
+	 max8997_muic_set_path(info, info->path_uart, true);
+
 	/* Set ADC debounce time */
 	max8997_muic_set_debounce_time(info, ADC_DEBOUNCE_TIME_25MS);
 
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 65f8d6a3a608..2d2d67b6a393 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -92,6 +92,13 @@ struct max8997_muic_reg_data {
 struct max8997_muic_platform_data {
 	struct max8997_muic_reg_data *init_data;
 	int num_init_data;
+
+	/*
+	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
+	 * h/w path of COMP2/COMN1 on CONTROL1 register.
+	 */
+	int path_usb;
+	int path_uart;
 };
 
 enum max8997_haptic_motor_type {
-- 
cgit v1.2.3


From af5eb1a13273447c5708cd5425696f3b6f79dd9b Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 13 Feb 2013 15:10:00 +0900
Subject: extcon: max8997: Use workqueue to check cable state after completing
 boot of platform

This patch use delayed workqueue to check cable state after a certain
time. If extcon-max8997 driver check cable state during booting of
platform, this couldn't send the correct notification of cable state
to extcon consumer. Alwasys, this driver should check cable state
after the completion of platform initialization

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Myungjoo Ham <myungjoo.ham@samsung.com>
---
 drivers/extcon/extcon-max8997.c | 45 ++++++++++++++++++++++++++++++++---------
 include/linux/mfd/max8997.h     |  3 +++
 2 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index 349f65f6a4a4..e636d950ad6c 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -29,6 +29,7 @@
 #include <linux/irqdomain.h>
 
 #define	DEV_NAME			"max8997-muic"
+#define	DELAY_MS_DEFAULT		20000		/* unit: millisecond */
 
 enum max8997_muic_adc_debounce_time {
 	ADC_DEBOUNCE_TIME_0_5MS = 0,	/* 0.5ms */
@@ -128,6 +129,14 @@ struct max8997_muic_info {
 	struct max8997_muic_platform_data *muic_pdata;
 	enum max8997_muic_charger_type pre_charger_type;
 
+	/*
+	 * Use delayed workqueue to detect cable state and then
+	 * notify cable state to notifiee/platform through uevent.
+	 * After completing the booting of platform, the extcon provider
+	 * driver should notify cable state to upper layer.
+	 */
+	struct delayed_work wq_detcable;
+
 	/*
 	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
 	 * h/w path of COMP2/COMN1 on CONTROL1 register.
@@ -629,11 +638,23 @@ static int max8997_muic_detect_dev(struct max8997_muic_info *info)
 	return 0;
 }
 
+static void max8997_muic_detect_cable_wq(struct work_struct *work)
+{
+	struct max8997_muic_info *info = container_of(to_delayed_work(work),
+				struct max8997_muic_info, wq_detcable);
+	int ret;
+
+	ret = max8997_muic_detect_dev(info);
+	if (ret < 0)
+		pr_err("failed to detect cable type\n");
+}
+
 static int max8997_muic_probe(struct platform_device *pdev)
 {
 	struct max8997_dev *max8997 = dev_get_drvdata(pdev->dev.parent);
 	struct max8997_platform_data *pdata = dev_get_platdata(max8997->dev);
 	struct max8997_muic_info *info;
+	int delay_jiffies;
 	int ret, i;
 
 	info = devm_kzalloc(&pdev->dev, sizeof(struct max8997_muic_info),
@@ -721,17 +742,23 @@ static int max8997_muic_probe(struct platform_device *pdev)
 	/* Set ADC debounce time */
 	max8997_muic_set_debounce_time(info, ADC_DEBOUNCE_TIME_25MS);
 
-	/* Initial device detection */
-	ret = max8997_muic_detect_dev(info);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to detect cable type\n");
-		goto err_extcon;
-	}
+	/*
+	 * Detect accessory after completing the initialization of platform
+	 *
+	 * - Use delayed workqueue to detect cable state and then
+	 * notify cable state to notifiee/platform through uevent.
+	 * After completing the booting of platform, the extcon provider
+	 * driver should notify cable state to upper layer.
+	 */
+	INIT_DELAYED_WORK(&info->wq_detcable, max8997_muic_detect_cable_wq);
+	if (pdata->muic_pdata->detcable_delay_ms)
+		delay_jiffies = msecs_to_jiffies(pdata->muic_pdata->detcable_delay_ms);
+	else
+		delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT);
+	schedule_delayed_work(&info->wq_detcable, delay_jiffies);
 
-	return ret;
+	return 0;
 
-err_extcon:
-	extcon_dev_unregister(info->edev);
 err_irq:
 	while (--i >= 0)
 		free_irq(muic_irqs[i].virq, info);
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 2d2d67b6a393..cf815577bd68 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -93,6 +93,9 @@ struct max8997_muic_platform_data {
 	struct max8997_muic_reg_data *init_data;
 	int num_init_data;
 
+	/* Check cable state after certain delay */
+	int detcable_delay_ms;
+
 	/*
 	 * Default usb/uart path whether UART/USB or AUX_UART/AUX_USB
 	 * h/w path of COMP2/COMN1 on CONTROL1 register.
-- 
cgit v1.2.3


From 60c185f059c88ad4b9b170b1f9322e3adcccca07 Mon Sep 17 00:00:00 2001
From: Laxman Dewangan <ldewangan@nvidia.com>
Date: Thu, 3 Jan 2013 16:16:58 +0530
Subject: mfd: palmas: Add APIs to access the Palmas' registers

Palmas register set is divided into different blocks (base and offset)
and hence different i2c addresses. The i2c address offsets are derived
from base address of block of registers.

Add inline APIs to access the Palma's registers which takes the base of
register block and register offset. The i2c address offset is  derived
from the base address of register blocks.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/palmas.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index 29f6616e12f0..a4d13d7cd001 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -2789,4 +2789,56 @@ enum usb_irq_events {
 #define PALMAS_GPADC_TRIM15					0xE
 #define PALMAS_GPADC_TRIM16					0xF
 
+static inline int palmas_read(struct palmas *palmas, unsigned int base,
+		unsigned int reg, unsigned int *val)
+{
+	unsigned int addr =  PALMAS_BASE_TO_REG(base, reg);
+	int slave_id = PALMAS_BASE_TO_SLAVE(base);
+
+	return regmap_read(palmas->regmap[slave_id], addr, val);
+}
+
+static inline int palmas_write(struct palmas *palmas, unsigned int base,
+		unsigned int reg, unsigned int value)
+{
+	unsigned int addr = PALMAS_BASE_TO_REG(base, reg);
+	int slave_id = PALMAS_BASE_TO_SLAVE(base);
+
+	return regmap_write(palmas->regmap[slave_id], addr, value);
+}
+
+static inline int palmas_bulk_write(struct palmas *palmas, unsigned int base,
+	unsigned int reg, const void *val, size_t val_count)
+{
+	unsigned int addr = PALMAS_BASE_TO_REG(base, reg);
+	int slave_id = PALMAS_BASE_TO_SLAVE(base);
+
+	return regmap_bulk_write(palmas->regmap[slave_id], addr,
+			val, val_count);
+}
+
+static inline int palmas_bulk_read(struct palmas *palmas, unsigned int base,
+		unsigned int reg, void *val, size_t val_count)
+{
+	unsigned int addr = PALMAS_BASE_TO_REG(base, reg);
+	int slave_id = PALMAS_BASE_TO_SLAVE(base);
+
+	return regmap_bulk_read(palmas->regmap[slave_id], addr,
+		val, val_count);
+}
+
+static inline int palmas_update_bits(struct palmas *palmas, unsigned int base,
+	unsigned int reg, unsigned int mask, unsigned int val)
+{
+	unsigned int addr = PALMAS_BASE_TO_REG(base, reg);
+	int slave_id = PALMAS_BASE_TO_SLAVE(base);
+
+	return regmap_update_bits(palmas->regmap[slave_id], addr, mask, val);
+}
+
+static inline int palmas_irq_get_virq(struct palmas *palmas, int irq)
+{
+	return regmap_irq_get_virq(palmas->irq_data, irq);
+}
+
 #endif /*  __LINUX_MFD_PALMAS_H */
-- 
cgit v1.2.3


From 3d91f8282c66d9edafa3980385324ce6a48edcda Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 29 Jan 2013 00:47:37 +0800
Subject: mfd: arizona: Provide platform data for MICBIAS configuration

Allow the MICBIAS voltages and other attributes to be configured by the
platform.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/arizona-core.c        | 23 +++++++++++++++++++++++
 include/linux/mfd/arizona/pdata.h | 12 ++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 317ba0a4ea25..b562c7bf8a46 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -510,6 +510,29 @@ int arizona_dev_init(struct arizona *arizona)
 		goto err_reset;
 	}
 
+	for (i = 0; i < ARIZONA_MAX_MICBIAS; i++) {
+		if (!arizona->pdata.micbias[i].mV)
+			continue;
+
+		val = (arizona->pdata.micbias[i].mV - 1500) / 100;
+		val <<= ARIZONA_MICB1_LVL_SHIFT;
+
+		if (arizona->pdata.micbias[i].ext_cap)
+			val |= ARIZONA_MICB1_EXT_CAP;
+
+		if (arizona->pdata.micbias[i].discharge)
+			val |= ARIZONA_MICB1_DISCH;
+
+		if (arizona->pdata.micbias[i].fast_start)
+			val |= ARIZONA_MICB1_RATE;
+
+		regmap_update_bits(arizona->regmap,
+				   ARIZONA_MIC_BIAS_CTRL_1 + i,
+				   ARIZONA_MICB1_LVL_MASK |
+				   ARIZONA_MICB1_DISCH |
+				   ARIZONA_MICB1_RATE, val);
+	}
+
 	for (i = 0; i < ARIZONA_MAX_INPUT; i++) {
 		/* Default for both is 0 so noop with defaults */
 		val = arizona->pdata.dmic_ref[i]
diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h
index 8b1d1daaae16..37894d6a4f6f 100644
--- a/include/linux/mfd/arizona/pdata.h
+++ b/include/linux/mfd/arizona/pdata.h
@@ -56,6 +56,8 @@
 #define ARIZONA_DMIC_MICBIAS2 2
 #define ARIZONA_DMIC_MICBIAS3 3
 
+#define ARIZONA_MAX_MICBIAS 3
+
 #define ARIZONA_INMODE_DIFF 0
 #define ARIZONA_INMODE_SE   1
 #define ARIZONA_INMODE_DMIC 2
@@ -69,6 +71,13 @@
 
 struct regulator_init_data;
 
+struct arizona_micbias {
+	int mV;                    /** Regulated voltage */
+	unsigned int ext_cap:1;    /** External capacitor fitted */
+	unsigned int discharge:1;  /** Actively discharge */
+	unsigned int fast_start:1; /** Enable aggressive startup ramp rate */
+};
+
 struct arizona_micd_config {
 	unsigned int src;
 	unsigned int bias;
@@ -106,6 +115,9 @@ struct arizona_pdata {
 	/** Reference voltage for DMIC inputs */
 	int dmic_ref[ARIZONA_MAX_INPUT];
 
+	/** MICBIAS configurations */
+	struct arizona_micbias micbias[ARIZONA_MAX_MICBIAS];
+
 	/** Mode of input structures */
 	int inmode[ARIZONA_MAX_INPUT];
 
-- 
cgit v1.2.3


From 5d4e9bd79a5ab5bd4695d3becaa71da447a76a94 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 16 Jan 2013 14:53:50 +0100
Subject: mfd: twl-core: Clean up module id lookup and definitions

Use enums for all module definitions:
twl_module_ids for common functionality among twl4030/twl6030
twl4030_module_ids for twl4030 specific ids
twl6030_module_ids for twl6030 specific ids

In this way the list can be managed easier when new functionality going to
be implemented.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 105 +++++++++++++++++++++++-------------------------
 include/linux/i2c/twl.h |  66 ++++++++++++++++--------------
 2 files changed, 86 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 4f3baadd0038..b781cdd0629d 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -74,8 +74,6 @@
 #define SUB_CHIP_ID3 3
 #define SUB_CHIP_ID_INVAL 0xff
 
-#define TWL_MODULE_LAST TWL4030_MODULE_LAST
-
 /* Base Address defns for twl4030_map[] */
 
 /* subchip/slave 0 - USB ID */
@@ -94,10 +92,7 @@
 #define TWL4030_BASEADD_MADC		0x0000
 #define TWL4030_BASEADD_MAIN_CHARGE	0x0074
 #define TWL4030_BASEADD_PRECHARGE	0x00AA
-#define TWL4030_BASEADD_PWM0		0x00F8
-#define TWL4030_BASEADD_PWM1		0x00FB
-#define TWL4030_BASEADD_PWMA		0x00EF
-#define TWL4030_BASEADD_PWMB		0x00F1
+#define TWL4030_BASEADD_PWM		0x00F8
 #define TWL4030_BASEADD_KEYPAD		0x00D2
 
 #define TWL5031_BASEADD_ACCESSORY	0x0074 /* Replaces Main Charge */
@@ -117,7 +112,7 @@
 
 /* subchip/slave 0 0x48 - POWER */
 #define TWL6030_BASEADD_RTC		0x0000
-#define TWL6030_BASEADD_MEM		0x0017
+#define TWL6030_BASEADD_SECURED_REG	0x0017
 #define TWL6030_BASEADD_PM_MASTER	0x001F
 #define TWL6030_BASEADD_PM_SLAVE_MISC	0x0030 /* PM_RECEIVER */
 #define TWL6030_BASEADD_PM_MISC		0x00E2
@@ -132,6 +127,7 @@
 #define TWL6030_BASEADD_PIH		0x00D0
 #define TWL6030_BASEADD_CHARGER		0x00E0
 #define TWL6025_BASEADD_CHARGER		0x00DA
+#define TWL6030_BASEADD_LED		0x00F4
 
 /* subchip/slave 2 0x4A - DFT */
 #define TWL6030_BASEADD_DIEID		0x00C0
@@ -188,34 +184,33 @@ static struct twl_mapping twl4030_map[] = {
 	 * so they continue to match the order in this table.
 	 */
 
+	/* Common IPs */
 	{ 0, TWL4030_BASEADD_USB },
+	{ 1, TWL4030_BASEADD_PIH },
+	{ 2, TWL4030_BASEADD_MAIN_CHARGE },
+	{ 3, TWL4030_BASEADD_PM_MASTER },
+	{ 3, TWL4030_BASEADD_PM_RECEIVER },
+
+	{ 3, TWL4030_BASEADD_RTC },
+	{ 2, TWL4030_BASEADD_PWM },
+	{ 2, TWL4030_BASEADD_LED },
+	{ 3, TWL4030_BASEADD_SECURED_REG },
+
+	/* TWL4030 specific IPs */
 	{ 1, TWL4030_BASEADD_AUDIO_VOICE },
 	{ 1, TWL4030_BASEADD_GPIO },
 	{ 1, TWL4030_BASEADD_INTBR },
-	{ 1, TWL4030_BASEADD_PIH },
-
 	{ 1, TWL4030_BASEADD_TEST },
 	{ 2, TWL4030_BASEADD_KEYPAD },
+
 	{ 2, TWL4030_BASEADD_MADC },
 	{ 2, TWL4030_BASEADD_INTERRUPTS },
-	{ 2, TWL4030_BASEADD_LED },
-
-	{ 2, TWL4030_BASEADD_MAIN_CHARGE },
 	{ 2, TWL4030_BASEADD_PRECHARGE },
-	{ 2, TWL4030_BASEADD_PWM0 },
-	{ 2, TWL4030_BASEADD_PWM1 },
-	{ 2, TWL4030_BASEADD_PWMA },
-
-	{ 2, TWL4030_BASEADD_PWMB },
-	{ 2, TWL5031_BASEADD_ACCESSORY },
-	{ 2, TWL5031_BASEADD_INTERRUPTS },
 	{ 3, TWL4030_BASEADD_BACKUP },
 	{ 3, TWL4030_BASEADD_INT },
 
-	{ 3, TWL4030_BASEADD_PM_MASTER },
-	{ 3, TWL4030_BASEADD_PM_RECEIVER },
-	{ 3, TWL4030_BASEADD_RTC },
-	{ 3, TWL4030_BASEADD_SECURED_REG },
+	{ 2, TWL5031_BASEADD_ACCESSORY },
+	{ 2, TWL5031_BASEADD_INTERRUPTS },
 };
 
 static struct regmap_config twl4030_regmap_config[4] = {
@@ -251,35 +246,25 @@ static struct twl_mapping twl6030_map[] = {
 	 * <linux/i2c/twl.h> defines for TWL4030_MODULE_*
 	 * so they continue to match the order in this table.
 	 */
-	{ SUB_CHIP_ID1, TWL6030_BASEADD_USB },
-	{ SUB_CHIP_ID_INVAL, TWL6030_BASEADD_AUDIO },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_DIEID },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-	{ SUB_CHIP_ID1, TWL6030_BASEADD_PIH },
-
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-	{ SUB_CHIP_ID1, TWL6030_BASEADD_GPADC_CTRL },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-
-	{ SUB_CHIP_ID1, TWL6030_BASEADD_CHARGER },
-	{ SUB_CHIP_ID1, TWL6030_BASEADD_GASGAUGE },
-	{ SUB_CHIP_ID1, TWL6030_BASEADD_PWM },
-	{ SUB_CHIP_ID0, TWL6030_BASEADD_ZERO },
-	{ SUB_CHIP_ID1, TWL6030_BASEADD_ZERO },
-
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_ZERO },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_ZERO },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-	{ SUB_CHIP_ID2, TWL6030_BASEADD_RSV },
-
-	{ SUB_CHIP_ID0, TWL6030_BASEADD_PM_MASTER },
-	{ SUB_CHIP_ID0, TWL6030_BASEADD_PM_SLAVE_MISC },
-	{ SUB_CHIP_ID0, TWL6030_BASEADD_RTC },
-	{ SUB_CHIP_ID0, TWL6030_BASEADD_MEM },
-	{ SUB_CHIP_ID1, TWL6025_BASEADD_CHARGER },
+
+	/* Common IPs */
+	{ 1, TWL6030_BASEADD_USB },
+	{ 1, TWL6030_BASEADD_PIH },
+	{ 1, TWL6030_BASEADD_CHARGER },
+	{ 0, TWL6030_BASEADD_PM_MASTER },
+	{ 0, TWL6030_BASEADD_PM_SLAVE_MISC },
+
+	{ 0, TWL6030_BASEADD_RTC },
+	{ 1, TWL6030_BASEADD_PWM },
+	{ 1, TWL6030_BASEADD_LED },
+	{ 0, TWL6030_BASEADD_SECURED_REG },
+
+	/* TWL6030 specific IPs */
+	{ 0, TWL6030_BASEADD_ZERO },
+	{ 1, TWL6030_BASEADD_ZERO },
+	{ 2, TWL6030_BASEADD_ZERO },
+	{ 1, TWL6030_BASEADD_GPADC_CTRL },
+	{ 1, TWL6030_BASEADD_GASGAUGE },
 };
 
 static struct regmap_config twl6030_regmap_config[3] = {
@@ -305,6 +290,14 @@ static struct regmap_config twl6030_regmap_config[3] = {
 
 /*----------------------------------------------------------------------*/
 
+static inline int twl_get_last_module(void)
+{
+	if (twl_class_is_4030())
+		return TWL4030_MODULE_LAST;
+	else
+		return TWL6030_MODULE_LAST;
+}
+
 /* Exported Functions */
 
 /**
@@ -325,7 +318,7 @@ int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 	int sid;
 	struct twl_client *twl;
 
-	if (unlikely(mod_no >= TWL_MODULE_LAST)) {
+	if (unlikely(mod_no >= twl_get_last_module())) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
@@ -367,7 +360,7 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 	int sid;
 	struct twl_client *twl;
 
-	if (unlikely(mod_no >= TWL_MODULE_LAST)) {
+	if (unlikely(mod_no >= twl_get_last_module())) {
 		pr_err("%s: invalid module number %d\n", DRIVER_NAME, mod_no);
 		return -EPERM;
 	}
@@ -1228,6 +1221,10 @@ twl_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if ((id->driver_data) & TWL6030_CLASS) {
 		twl_id = TWL6030_CLASS_ID;
 		twl_map = &twl6030_map[0];
+		/* The charger base address is different in twl6025 */
+		if ((id->driver_data) & TWL6025_SUBCLASS)
+			twl_map[TWL_MODULE_MAIN_CHARGE].base =
+							TWL6025_BASEADD_CHARGER;
 		twl_regmap_config = twl6030_regmap_config;
 		num_slaves = TWL_NUM_SLAVES - 1;
 	} else {
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 1ff54b114efc..72adc8807912 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -39,51 +39,55 @@
  * address each module uses within a given i2c slave.
  */
 
+/* Module IDs for similar functionalities found in twl4030/twl6030 */
+enum twl_module_ids {
+	TWL_MODULE_USB,
+	TWL_MODULE_PIH,
+	TWL_MODULE_MAIN_CHARGE,
+	TWL_MODULE_PM_MASTER,
+	TWL_MODULE_PM_RECEIVER,
+
+	TWL_MODULE_RTC,
+	TWL_MODULE_PWM,
+	TWL_MODULE_LED,
+	TWL_MODULE_SECURED_REG,
+
+	TWL_MODULE_LAST,
+};
+
+/* Modules only available in twl4030 series */
 enum twl4030_module_ids {
-	TWL4030_MODULE_USB = 0,		/* Slave 0 (i2c address 0x48) */
-	TWL4030_MODULE_AUDIO_VOICE,	/* Slave 1 (i2c address 0x49) */
+	TWL4030_MODULE_AUDIO_VOICE = TWL_MODULE_LAST,
 	TWL4030_MODULE_GPIO,
 	TWL4030_MODULE_INTBR,
-	TWL4030_MODULE_PIH,
-
 	TWL4030_MODULE_TEST,
-	TWL4030_MODULE_KEYPAD,		/* Slave 2 (i2c address 0x4a) */
+	TWL4030_MODULE_KEYPAD,
+
 	TWL4030_MODULE_MADC,
 	TWL4030_MODULE_INTERRUPTS,
-	TWL4030_MODULE_LED,
-
-	TWL4030_MODULE_MAIN_CHARGE,
 	TWL4030_MODULE_PRECHARGE,
-	TWL4030_MODULE_PWM0,
-	TWL4030_MODULE_PWM1,
-	TWL4030_MODULE_PWMA,
+	TWL4030_MODULE_BACKUP,
+	TWL4030_MODULE_INT,
 
-	TWL4030_MODULE_PWMB,
 	TWL5031_MODULE_ACCESSORY,
 	TWL5031_MODULE_INTERRUPTS,
-	TWL4030_MODULE_BACKUP,		/* Slave 3 (i2c address 0x4b) */
-	TWL4030_MODULE_INT,
 
-	TWL4030_MODULE_PM_MASTER,
-	TWL4030_MODULE_PM_RECEIVER,
-	TWL4030_MODULE_RTC,
-	TWL4030_MODULE_SECURED_REG,
 	TWL4030_MODULE_LAST,
 };
 
-/* Similar functionalities implemented in TWL4030/6030 */
-#define TWL_MODULE_USB		TWL4030_MODULE_USB
-#define TWL_MODULE_PIH		TWL4030_MODULE_PIH
-#define TWL_MODULE_MAIN_CHARGE	TWL4030_MODULE_MAIN_CHARGE
-#define TWL_MODULE_PM_MASTER	TWL4030_MODULE_PM_MASTER
-#define TWL_MODULE_PM_RECEIVER	TWL4030_MODULE_PM_RECEIVER
-#define TWL_MODULE_RTC		TWL4030_MODULE_RTC
-#define TWL_MODULE_PWM		TWL4030_MODULE_PWM0
-#define TWL_MODULE_LED		TWL4030_MODULE_LED
-
-#define TWL6030_MODULE_ID0	13
-#define TWL6030_MODULE_ID1	14
-#define TWL6030_MODULE_ID2	15
+/* Modules only available in twl6030 series */
+enum twl6030_module_ids {
+	TWL6030_MODULE_ID0 = TWL_MODULE_LAST,
+	TWL6030_MODULE_ID1,
+	TWL6030_MODULE_ID2,
+	TWL6030_MODULE_GPADC,
+	TWL6030_MODULE_GASGAUGE,
+
+	TWL6030_MODULE_LAST,
+};
+
+/* Until the clients has been converted to use TWL_MODULE_LED */
+#define TWL4030_MODULE_LED	TWL_MODULE_LED
 
 #define GPIO_INTR_OFFSET	0
 #define KEYPAD_INTR_OFFSET	1
-- 
cgit v1.2.3


From ac7bc5a953c38c32513d1825decf336c4909ce3b Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 16 Jan 2013 14:53:58 +0100
Subject: mfd: twl-core: Remove no longer valid comment regarding to write
 buffer size

With the regmap conversion there is no longeer a need to allocate bigger
buffer for writes

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 3 ---
 include/linux/i2c/twl.h | 3 ---
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index e2895a4026a1..9661204fb159 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -308,9 +308,6 @@ EXPORT_SYMBOL(twl_rev);
  * @reg: register address (just offset will do)
  * @num_bytes: number of bytes to transfer
  *
- * IMPORTANT: for 'value' parameter: Allocate value num_bytes+1 and
- * valid data starts at Offset 1.
- *
  * Returns the result of operation - 0 is success
  */
 int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 72adc8807912..e441fd8e7c86 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -182,9 +182,6 @@ int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
 
 /*
  * Read and write several 8-bit registers at once.
- *
- * IMPORTANT:  For twl_i2c_write(), allocate num_bytes + 1
- * for the value, and populate your data starting at offset 1.
  */
 int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
-- 
cgit v1.2.3


From fbc6ae363e5e589a28135c051a2ff835e6236d5f Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 16 Jan 2013 14:53:59 +0100
Subject: mfd: twl-core: Move twl_i2c_read/write_u8 to header as inline
 function

twl_i2c_read/write_u8 become as a simple wrapper over the twl_i2c_read/write.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl-core.c  | 28 ----------------------------
 include/linux/i2c/twl.h | 17 +++++++++++------
 2 files changed, 11 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 9661204fb159..557f9ee5ec18 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -379,34 +379,6 @@ int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes)
 }
 EXPORT_SYMBOL(twl_i2c_read);
 
-/**
- * twl_i2c_write_u8 - Writes a 8 bit register in TWL4030/TWL5030/TWL60X0
- * @mod_no: module number
- * @value: the value to be written 8 bit
- * @reg: register address (just offset will do)
- *
- * Returns result of operation - 0 is success
- */
-int twl_i2c_write_u8(u8 mod_no, u8 value, u8 reg)
-{
-	return twl_i2c_write(mod_no, &value, reg, 1);
-}
-EXPORT_SYMBOL(twl_i2c_write_u8);
-
-/**
- * twl_i2c_read_u8 - Reads a 8 bit register from TWL4030/TWL5030/TWL60X0
- * @mod_no: module number
- * @value: the value read 8 bit
- * @reg: register address (just offset will do)
- *
- * Returns result of operation - 0 is success
- */
-int twl_i2c_read_u8(u8 mod_no, u8 *value, u8 reg)
-{
-	return twl_i2c_read(mod_no, value, reg, 1);
-}
-EXPORT_SYMBOL(twl_i2c_read_u8);
-
 /*----------------------------------------------------------------------*/
 
 /**
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index e441fd8e7c86..488debbef895 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -174,18 +174,23 @@ static inline int twl_class_is_ ##class(void)	\
 TWL_CLASS_IS(4030, TWL4030_CLASS_ID)
 TWL_CLASS_IS(6030, TWL6030_CLASS_ID)
 
-/*
- * Read and write single 8-bit registers
- */
-int twl_i2c_write_u8(u8 mod_no, u8 val, u8 reg);
-int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg);
-
 /*
  * Read and write several 8-bit registers at once.
  */
 int twl_i2c_write(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 int twl_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
+/*
+ * Read and write single 8-bit registers
+ */
+static inline int twl_i2c_write_u8(u8 mod_no, u8 val, u8 reg) {
+	return twl_i2c_write(mod_no, &val, reg, 1);
+}
+
+static inline int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg) {
+	return twl_i2c_read(mod_no, val, reg, 1);
+}
+
 int twl_get_type(void);
 int twl_get_version(void);
 int twl_get_hfclk_rate(void);
-- 
cgit v1.2.3


From 98c60a0d3afc8f68e6e4b85b93df14e238fec3cb Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@linaro.org>
Date: Fri, 18 Jan 2013 12:40:11 +0100
Subject: mfd: dbx500-prcmu: Add watchdog ID definitions

Add definition of watchdog IDs to be used by ux500_wdt driver.

Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/dbx500-prcmu.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h
index c202d6c4d879..c6e0608a21b5 100644
--- a/include/linux/mfd/dbx500-prcmu.h
+++ b/include/linux/mfd/dbx500-prcmu.h
@@ -146,6 +146,18 @@ enum prcmu_clock {
 	PRCMU_DSI2ESCCLK,
 };
 
+/**
+ * enum prcmu_wdog_id - PRCMU watchdog IDs
+ * @PRCMU_WDOG_ALL: use all timers
+ * @PRCMU_WDOG_CPU1: use first CPU timer only
+ * @PRCMU_WDOG_CPU2: use second CPU timer conly
+ */
+enum prcmu_wdog_id {
+	PRCMU_WDOG_ALL = 0x00,
+	PRCMU_WDOG_CPU1 = 0x01,
+	PRCMU_WDOG_CPU2 = 0x02,
+};
+
 /**
  * enum ape_opp - APE OPP states definition
  * @APE_OPP_INIT:
-- 
cgit v1.2.3


From f0e5bd412fde30de3839c8dfa93a3e19e71ee462 Mon Sep 17 00:00:00 2001
From: Fabio Baltieri <fabio.baltieri@linaro.org>
Date: Tue, 29 Jan 2013 09:57:19 +0100
Subject: watchdog: Add support for ux500_wdt watchdog

This patch adds support for the ux500_wdt watchdog that is found in
ST-Ericsson Ux500 platform.  The driver is based on PRCMU APIs.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Wim Van Sebroeck <wim@iguana.be>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/watchdog/Kconfig                |  12 +++
 drivers/watchdog/Makefile               |   1 +
 drivers/watchdog/ux500_wdt.c            | 171 ++++++++++++++++++++++++++++++++
 include/linux/platform_data/ux500_wdt.h |  19 ++++
 4 files changed, 203 insertions(+)
 create mode 100644 drivers/watchdog/ux500_wdt.c
 create mode 100644 include/linux/platform_data/ux500_wdt.h

(limited to 'include/linux')

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 7f809fd4a57f..26e1fdbddf69 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -364,6 +364,18 @@ config IMX2_WDT
 	  To compile this driver as a module, choose M here: the
 	  module will be called imx2_wdt.
 
+config UX500_WATCHDOG
+	tristate "ST-Ericsson Ux500 watchdog"
+	depends on MFD_DB8500_PRCMU
+	select WATCHDOG_CORE
+	default y
+	help
+	  Say Y here to include Watchdog timer support for the watchdog
+	  existing in the prcmu of ST-Ericsson Ux500 series platforms.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ux500_wdt.
+
 # AVR32 Architecture
 
 config AT32AP700X_WDT
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 97bbdb3a4648..bec86ee6e9e3 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_STMP3XXX_WATCHDOG) += stmp3xxx_wdt.o
 obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o
 obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o
 obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o
+obj-$(CONFIG_UX500_WATCHDOG) += ux500_wdt.o
 
 # AVR32 Architecture
 obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o
diff --git a/drivers/watchdog/ux500_wdt.c b/drivers/watchdog/ux500_wdt.c
new file mode 100644
index 000000000000..a614d84121c3
--- /dev/null
+++ b/drivers/watchdog/ux500_wdt.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2011-2013
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/miscdevice.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <linux/watchdog.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/ux500_wdt.h>
+
+#include <linux/mfd/dbx500-prcmu.h>
+
+#define WATCHDOG_TIMEOUT 600 /* 10 minutes */
+
+#define WATCHDOG_MIN	0
+#define WATCHDOG_MAX28	268435  /* 28 bit resolution in ms == 268435.455 s */
+#define WATCHDOG_MAX32	4294967 /* 32 bit resolution in ms == 4294967.295 s */
+
+static unsigned int timeout = WATCHDOG_TIMEOUT;
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout,
+	"Watchdog timeout in seconds. default="
+				__MODULE_STRING(WATCHDOG_TIMEOUT) ".");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout,
+	"Watchdog cannot be stopped once started (default="
+				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int ux500_wdt_start(struct watchdog_device *wdd)
+{
+	return prcmu_enable_a9wdog(PRCMU_WDOG_ALL);
+}
+
+static int ux500_wdt_stop(struct watchdog_device *wdd)
+{
+	return prcmu_disable_a9wdog(PRCMU_WDOG_ALL);
+}
+
+static int ux500_wdt_keepalive(struct watchdog_device *wdd)
+{
+	return prcmu_kick_a9wdog(PRCMU_WDOG_ALL);
+}
+
+static int ux500_wdt_set_timeout(struct watchdog_device *wdd,
+				 unsigned int timeout)
+{
+	ux500_wdt_stop(wdd);
+	prcmu_load_a9wdog(PRCMU_WDOG_ALL, timeout * 1000);
+	ux500_wdt_start(wdd);
+
+	return 0;
+}
+
+static const struct watchdog_info ux500_wdt_info = {
+	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+	.identity = "Ux500 WDT",
+	.firmware_version = 1,
+};
+
+static const struct watchdog_ops ux500_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = ux500_wdt_start,
+	.stop  = ux500_wdt_stop,
+	.ping  = ux500_wdt_keepalive,
+	.set_timeout = ux500_wdt_set_timeout,
+};
+
+static struct watchdog_device ux500_wdt = {
+	.info = &ux500_wdt_info,
+	.ops = &ux500_wdt_ops,
+	.min_timeout = WATCHDOG_MIN,
+	.max_timeout = WATCHDOG_MAX32,
+};
+
+static int ux500_wdt_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct ux500_wdt_data *pdata = pdev->dev.platform_data;
+
+	if (pdata) {
+		if (pdata->timeout > 0)
+			timeout = pdata->timeout;
+		if (pdata->has_28_bits_resolution)
+			ux500_wdt.max_timeout = WATCHDOG_MAX28;
+	}
+
+	watchdog_set_nowayout(&ux500_wdt, nowayout);
+
+	/* disable auto off on sleep */
+	prcmu_config_a9wdog(PRCMU_WDOG_CPU1, false);
+
+	/* set HW initial value */
+	prcmu_load_a9wdog(PRCMU_WDOG_ALL, timeout * 1000);
+
+	ret = watchdog_register_device(&ux500_wdt);
+	if (ret)
+		return ret;
+
+	dev_info(&pdev->dev, "initialized\n");
+
+	return 0;
+}
+
+static int ux500_wdt_remove(struct platform_device *dev)
+{
+	watchdog_unregister_device(&ux500_wdt);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int ux500_wdt_suspend(struct platform_device *pdev,
+			     pm_message_t state)
+{
+	if (watchdog_active(&ux500_wdt)) {
+		ux500_wdt_stop(&ux500_wdt);
+		prcmu_config_a9wdog(PRCMU_WDOG_CPU1, true);
+
+		prcmu_load_a9wdog(PRCMU_WDOG_ALL, timeout * 1000);
+		ux500_wdt_start(&ux500_wdt);
+	}
+	return 0;
+}
+
+static int ux500_wdt_resume(struct platform_device *pdev)
+{
+	if (watchdog_active(&ux500_wdt)) {
+		ux500_wdt_stop(&ux500_wdt);
+		prcmu_config_a9wdog(PRCMU_WDOG_CPU1, false);
+
+		prcmu_load_a9wdog(PRCMU_WDOG_ALL, timeout * 1000);
+		ux500_wdt_start(&ux500_wdt);
+	}
+	return 0;
+}
+#else
+#define ux500_wdt_suspend NULL
+#define ux500_wdt_resume NULL
+#endif
+
+static struct platform_driver ux500_wdt_driver = {
+	.probe		= ux500_wdt_probe,
+	.remove		= ux500_wdt_remove,
+	.suspend	= ux500_wdt_suspend,
+	.resume		= ux500_wdt_resume,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "ux500_wdt",
+	},
+};
+
+module_platform_driver(ux500_wdt_driver);
+
+MODULE_AUTHOR("Jonas Aaberg <jonas.aberg@stericsson.com>");
+MODULE_DESCRIPTION("Ux500 Watchdog Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
+MODULE_ALIAS("platform:ux500_wdt");
diff --git a/include/linux/platform_data/ux500_wdt.h b/include/linux/platform_data/ux500_wdt.h
new file mode 100644
index 000000000000..1689ff4c3bfd
--- /dev/null
+++ b/include/linux/platform_data/ux500_wdt.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) ST Ericsson SA 2011
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * STE Ux500 Watchdog platform data
+ */
+#ifndef __UX500_WDT_H
+#define __UX500_WDT_H
+
+/**
+ * struct ux500_wdt_data
+ */
+struct ux500_wdt_data {
+	unsigned int timeout;
+	bool has_28_bits_resolution;
+};
+
+#endif /* __UX500_WDT_H */
-- 
cgit v1.2.3


From 306df798507d8e009a7d4a5e8ce238a3b107de20 Mon Sep 17 00:00:00 2001
From: Yi Zhang <yizhang@marvell.com>
Date: Tue, 22 Jan 2013 10:43:45 +0800
Subject: mfd: 88pm80x: Remove redundant devm_* calls

devm_* functions are device managed and make error handling
and code simpler; it also fix error exit paths

Signed-off-by: Yi Zhang <yizhang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm800.c       | 10 +++-------
 drivers/mfd/88pm805.c       |  4 ++--
 drivers/mfd/88pm80x.c       | 22 ++++------------------
 include/linux/mfd/88pm80x.h |  2 +-
 4 files changed, 10 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm800.c b/drivers/mfd/88pm800.c
index 391e23e6a647..582bda543520 100644
--- a/drivers/mfd/88pm800.c
+++ b/drivers/mfd/88pm800.c
@@ -531,7 +531,7 @@ static int pm800_probe(struct i2c_client *client,
 	ret = device_800_init(chip, pdata);
 	if (ret) {
 		dev_err(chip->dev, "%s id 0x%x failed!\n", __func__, chip->id);
-		goto err_800_init;
+		goto err_subchip_alloc;
 	}
 
 	ret = pm800_pages_init(chip);
@@ -546,10 +546,8 @@ static int pm800_probe(struct i2c_client *client,
 err_page_init:
 	mfd_remove_devices(chip->dev);
 	device_irq_exit_800(chip);
-err_800_init:
-	devm_kfree(&client->dev, subchip);
 err_subchip_alloc:
-	pm80x_deinit(client);
+	pm80x_deinit();
 out_init:
 	return ret;
 }
@@ -562,9 +560,7 @@ static int pm800_remove(struct i2c_client *client)
 	device_irq_exit_800(chip);
 
 	pm800_pages_exit(chip);
-	devm_kfree(&client->dev, chip->subchip);
-
-	pm80x_deinit(client);
+	pm80x_deinit();
 
 	return 0;
 }
diff --git a/drivers/mfd/88pm805.c b/drivers/mfd/88pm805.c
index e671230be2b1..65d7ac099b20 100644
--- a/drivers/mfd/88pm805.c
+++ b/drivers/mfd/88pm805.c
@@ -257,7 +257,7 @@ static int pm805_probe(struct i2c_client *client,
 		pdata->plat_config(chip, pdata);
 
 err_805_init:
-	pm80x_deinit(client);
+	pm80x_deinit();
 out_init:
 	return ret;
 }
@@ -269,7 +269,7 @@ static int pm805_remove(struct i2c_client *client)
 	mfd_remove_devices(chip->dev);
 	device_irq_exit_805(chip);
 
-	pm80x_deinit(client);
+	pm80x_deinit();
 
 	return 0;
 }
diff --git a/drivers/mfd/88pm80x.c b/drivers/mfd/88pm80x.c
index 1adb355d86d1..f736a46eb8c0 100644
--- a/drivers/mfd/88pm80x.c
+++ b/drivers/mfd/88pm80x.c
@@ -48,14 +48,12 @@ int pm80x_init(struct i2c_client *client,
 		ret = PTR_ERR(map);
 		dev_err(&client->dev, "Failed to allocate register map: %d\n",
 			ret);
-		goto err_regmap_init;
+		return ret;
 	}
 
 	chip->id = id->driver_data;
-	if (chip->id < CHIP_PM800 || chip->id > CHIP_PM805) {
-		ret = -EINVAL;
-		goto err_chip_id;
-	}
+	if (chip->id < CHIP_PM800 || chip->id > CHIP_PM805)
+		return -EINVAL;
 
 	chip->client = client;
 	chip->regmap = map;
@@ -82,19 +80,11 @@ int pm80x_init(struct i2c_client *client,
 	}
 
 	return 0;
-
-err_chip_id:
-	regmap_exit(map);
-err_regmap_init:
-	devm_kfree(&client->dev, chip);
-	return ret;
 }
 EXPORT_SYMBOL_GPL(pm80x_init);
 
-int pm80x_deinit(struct i2c_client *client)
+int pm80x_deinit(void)
 {
-	struct pm80x_chip *chip = i2c_get_clientdata(client);
-
 	/*
 	 * workaround: clear the dependency between pm800 and pm805.
 	 * would remove it after HW chip fixes the issue.
@@ -103,10 +93,6 @@ int pm80x_deinit(struct i2c_client *client)
 		g_pm80x_chip->companion = NULL;
 	else
 		g_pm80x_chip = NULL;
-
-	regmap_exit(chip->regmap);
-	devm_kfree(&client->dev, chip);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pm80x_deinit);
diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h
index 478672ed0c3d..e94537befabd 100644
--- a/include/linux/mfd/88pm80x.h
+++ b/include/linux/mfd/88pm80x.h
@@ -365,5 +365,5 @@ static inline int pm80x_dev_resume(struct device *dev)
 
 extern int pm80x_init(struct i2c_client *client,
 		      const struct i2c_device_id *id);
-extern int pm80x_deinit(struct i2c_client *client);
+extern int pm80x_deinit(void);
 #endif /* __LINUX_MFD_88PM80X_H */
-- 
cgit v1.2.3


From 5f384c1f8be19487f904731d7232120dcfeca8e1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 25 Jan 2013 17:09:23 +0800
Subject: mfd: wm5102: Make DSP scratch registers readable

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm5102-tables.c           |  8 ++++++++
 include/linux/mfd/arizona/registers.h | 16 ++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index ff8f0804dde3..d754596eb942 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1795,6 +1795,10 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_DSP1_STATUS_1:
 	case ARIZONA_DSP1_STATUS_2:
 	case ARIZONA_DSP1_STATUS_3:
+	case ARIZONA_DSP1_SCRATCH_0:
+	case ARIZONA_DSP1_SCRATCH_1:
+	case ARIZONA_DSP1_SCRATCH_2:
+	case ARIZONA_DSP1_SCRATCH_3:
 		return true;
 	default:
 		if ((reg >= 0x100000 && reg < 0x106000) ||
@@ -1849,6 +1853,10 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_DSP1_STATUS_1:
 	case ARIZONA_DSP1_STATUS_2:
 	case ARIZONA_DSP1_STATUS_3:
+	case ARIZONA_DSP1_SCRATCH_0:
+	case ARIZONA_DSP1_SCRATCH_1:
+	case ARIZONA_DSP1_SCRATCH_2:
+	case ARIZONA_DSP1_SCRATCH_3:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_MIC_DETECT_3:
 		return true;
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 1f6fe31a4d5c..718126084ad1 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -982,18 +982,34 @@
 #define ARIZONA_DSP1_STATUS_1                    0x1104
 #define ARIZONA_DSP1_STATUS_2                    0x1105
 #define ARIZONA_DSP1_STATUS_3                    0x1106
+#define ARIZONA_DSP1_SCRATCH_0                   0x1140
+#define ARIZONA_DSP1_SCRATCH_1                   0x1141
+#define ARIZONA_DSP1_SCRATCH_2                   0x1142
+#define ARIZONA_DSP1_SCRATCH_3                   0x1143
 #define ARIZONA_DSP2_CONTROL_1                   0x1200
 #define ARIZONA_DSP2_CLOCKING_1                  0x1201
 #define ARIZONA_DSP2_STATUS_1                    0x1204
 #define ARIZONA_DSP2_STATUS_2                    0x1205
+#define ARIZONA_DSP2_SCRATCH_0                   0x1240
+#define ARIZONA_DSP2_SCRATCH_1                   0x1241
+#define ARIZONA_DSP2_SCRATCH_2                   0x1242
+#define ARIZONA_DSP2_SCRATCH_3                   0x1243
 #define ARIZONA_DSP3_CONTROL_1                   0x1300
 #define ARIZONA_DSP3_CLOCKING_1                  0x1301
 #define ARIZONA_DSP3_STATUS_1                    0x1304
 #define ARIZONA_DSP3_STATUS_2                    0x1305
+#define ARIZONA_DSP3_SCRATCH_0                   0x1340
+#define ARIZONA_DSP3_SCRATCH_1                   0x1341
+#define ARIZONA_DSP3_SCRATCH_2                   0x1342
+#define ARIZONA_DSP3_SCRATCH_3                   0x1343
 #define ARIZONA_DSP4_CONTROL_1                   0x1400
 #define ARIZONA_DSP4_CLOCKING_1                  0x1401
 #define ARIZONA_DSP4_STATUS_1                    0x1404
 #define ARIZONA_DSP4_STATUS_2                    0x1405
+#define ARIZONA_DSP4_SCRATCH_0                   0x1440
+#define ARIZONA_DSP4_SCRATCH_1                   0x1441
+#define ARIZONA_DSP4_SCRATCH_2                   0x1442
+#define ARIZONA_DSP4_SCRATCH_3                   0x1443
 
 /*
  * Field Definitions.
-- 
cgit v1.2.3


From 3730bb8b65f9ee8b7097021f8073b80511af770d Mon Sep 17 00:00:00 2001
From: Wei WANG <wei_wang@realsil.com.cn>
Date: Tue, 29 Jan 2013 15:21:32 +0800
Subject: mfd: rtsx: Fix typo in comment

Fix a misspelling word in comment

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/rtsx_pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 4b117a3f54d4..3f2bf26ca0d1 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -465,7 +465,7 @@
 #define	SD_RSP_TYPE_R6			0x01
 #define	SD_RSP_TYPE_R7			0x01
 
-/* SD_CONFIURE3 */
+/* SD_CONFIGURE3 */
 #define	SD_RSP_80CLK_TIMEOUT_EN		0x01
 
 /* Card Transfer Reset Register */
-- 
cgit v1.2.3


From 8ea402f5646e6e36c8cd0a62053ba8939204dceb Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@arm.com>
Date: Wed, 30 Jan 2013 10:33:16 +0000
Subject: mfd: vexpress: Add pseudo-GPIO based LEDs

The LEDs on the Versatile Express motherboard are controlled
through simple memory-mapped register. This patch extends
the pseudo-GPIO controller definition for these lines and
creates generic "leds-gpio" device using them

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/vexpress-sysreg.c | 73 ++++++++++++++++++++++++++++++++-----------
 include/linux/vexpress.h      |  8 +++++
 2 files changed, 63 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index 77048b18439e..51c3ca263bf5 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -49,6 +49,8 @@
 #define SYS_ID_HBI_SHIFT	16
 #define SYS_PROCIDx_HBI_SHIFT	0
 
+#define SYS_LED_LED(n)		(1 << (n))
+
 #define SYS_MCI_CARDIN		(1 << 0)
 #define SYS_MCI_WPROT		(1 << 1)
 
@@ -348,22 +350,27 @@ void __init vexpress_sysreg_of_early_init(void)
 }
 
 
+#define VEXPRESS_SYSREG_GPIO(_name, _reg, _value) \
+	[VEXPRESS_GPIO_##_name] = { \
+		.reg = _reg, \
+		.value = _reg##_##_value, \
+	}
+
 static struct vexpress_sysreg_gpio {
 	unsigned long reg;
 	u32 value;
 } vexpress_sysreg_gpios[] = {
-	[VEXPRESS_GPIO_MMC_CARDIN] = {
-		.reg = SYS_MCI,
-		.value = SYS_MCI_CARDIN,
-	},
-	[VEXPRESS_GPIO_MMC_WPROT] = {
-		.reg = SYS_MCI,
-		.value = SYS_MCI_WPROT,
-	},
-	[VEXPRESS_GPIO_FLASH_WPn] = {
-		.reg = SYS_FLASH,
-		.value = SYS_FLASH_WPn,
-	},
+	VEXPRESS_SYSREG_GPIO(MMC_CARDIN,	SYS_MCI,	CARDIN),
+	VEXPRESS_SYSREG_GPIO(MMC_WPROT,		SYS_MCI,	WPROT),
+	VEXPRESS_SYSREG_GPIO(FLASH_WPn,		SYS_FLASH,	WPn),
+	VEXPRESS_SYSREG_GPIO(LED0,		SYS_LED,	LED(0)),
+	VEXPRESS_SYSREG_GPIO(LED1,		SYS_LED,	LED(1)),
+	VEXPRESS_SYSREG_GPIO(LED2,		SYS_LED,	LED(2)),
+	VEXPRESS_SYSREG_GPIO(LED3,		SYS_LED,	LED(3)),
+	VEXPRESS_SYSREG_GPIO(LED4,		SYS_LED,	LED(4)),
+	VEXPRESS_SYSREG_GPIO(LED5,		SYS_LED,	LED(5)),
+	VEXPRESS_SYSREG_GPIO(LED6,		SYS_LED,	LED(6)),
+	VEXPRESS_SYSREG_GPIO(LED7,		SYS_LED,	LED(7)),
 };
 
 static int vexpress_sysreg_gpio_direction_input(struct gpio_chip *chip,
@@ -372,12 +379,6 @@ static int vexpress_sysreg_gpio_direction_input(struct gpio_chip *chip,
 	return 0;
 }
 
-static int vexpress_sysreg_gpio_direction_output(struct gpio_chip *chip,
-						unsigned offset, int value)
-{
-	return 0;
-}
-
 static int vexpress_sysreg_gpio_get(struct gpio_chip *chip,
 				       unsigned offset)
 {
@@ -401,6 +402,14 @@ static void vexpress_sysreg_gpio_set(struct gpio_chip *chip,
 	writel(reg_value, vexpress_sysreg_base + gpio->reg);
 }
 
+static int vexpress_sysreg_gpio_direction_output(struct gpio_chip *chip,
+						unsigned offset, int value)
+{
+	vexpress_sysreg_gpio_set(chip, offset, value);
+
+	return 0;
+}
+
 static struct gpio_chip vexpress_sysreg_gpio_chip = {
 	.label = "vexpress-sysreg",
 	.direction_input = vexpress_sysreg_gpio_direction_input,
@@ -412,6 +421,30 @@ static struct gpio_chip vexpress_sysreg_gpio_chip = {
 };
 
 
+#define VEXPRESS_SYSREG_GREEN_LED(_name, _default_trigger, _gpio) \
+	{ \
+		.name = "v2m:green:"_name, \
+		.default_trigger = _default_trigger, \
+		.gpio = VEXPRESS_GPIO_##_gpio, \
+	}
+
+struct gpio_led vexpress_sysreg_leds[] = {
+	VEXPRESS_SYSREG_GREEN_LED("user1",	"heartbeat",	LED0),
+	VEXPRESS_SYSREG_GREEN_LED("user2",	"mmc0",		LED1),
+	VEXPRESS_SYSREG_GREEN_LED("user3",	"cpu0",		LED2),
+	VEXPRESS_SYSREG_GREEN_LED("user4",	"cpu1",		LED3),
+	VEXPRESS_SYSREG_GREEN_LED("user5",	"cpu2",		LED4),
+	VEXPRESS_SYSREG_GREEN_LED("user6",	"cpu3",		LED5),
+	VEXPRESS_SYSREG_GREEN_LED("user7",	"cpu4",		LED6),
+	VEXPRESS_SYSREG_GREEN_LED("user8",	"cpu5",		LED7),
+};
+
+struct gpio_led_platform_data vexpress_sysreg_leds_pdata = {
+	.num_leds = ARRAY_SIZE(vexpress_sysreg_leds),
+	.leds = vexpress_sysreg_leds,
+};
+
+
 static ssize_t vexpress_sysreg_sys_id_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -456,6 +489,10 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	platform_device_register_data(vexpress_sysreg_dev, "leds-gpio",
+			PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata,
+			sizeof(vexpress_sysreg_leds_pdata));
+
 	vexpress_sysreg_dev = &pdev->dev;
 
 	device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id);
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
index c52215ff4245..75818744ab59 100644
--- a/include/linux/vexpress.h
+++ b/include/linux/vexpress.h
@@ -27,6 +27,14 @@
 #define VEXPRESS_GPIO_MMC_CARDIN	0
 #define VEXPRESS_GPIO_MMC_WPROT		1
 #define VEXPRESS_GPIO_FLASH_WPn		2
+#define VEXPRESS_GPIO_LED0		3
+#define VEXPRESS_GPIO_LED1		4
+#define VEXPRESS_GPIO_LED2		5
+#define VEXPRESS_GPIO_LED3		6
+#define VEXPRESS_GPIO_LED4		7
+#define VEXPRESS_GPIO_LED5		8
+#define VEXPRESS_GPIO_LED6		9
+#define VEXPRESS_GPIO_LED7		10
 
 #define VEXPRESS_RES_FUNC(_site, _func)	\
 {					\
-- 
cgit v1.2.3


From e12379320b2e1ceffc4211ad174989bc042149d9 Mon Sep 17 00:00:00 2001
From: Roger Tseng <rogerable@realtek.com>
Date: Mon, 4 Feb 2013 15:45:59 +0800
Subject: mfd: rtsx: Support RTS5227

Support new model RTS5227.

Signed-off-by: Roger Tseng <rogerable@realtek.com>
Reviewed-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile         |   2 +-
 drivers/mfd/rts5227.c        | 234 +++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/rtsx_pcr.c       |   5 +
 drivers/mfd/rtsx_pcr.h       |   1 +
 include/linux/mfd/rtsx_pci.h |   5 +
 5 files changed, 246 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mfd/rts5227.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8b977f8045ae..b90409c23664 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_MFD_88PM805)	+= 88pm805.o 88pm80x.o
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o tmio_core.o
 
-rtsx_pci-objs			:= rtsx_pcr.o rts5209.o rts5229.o rtl8411.o
+rtsx_pci-objs			:= rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o
 obj-$(CONFIG_MFD_RTSX_PCI)	+= rtsx_pci.o
 
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
diff --git a/drivers/mfd/rts5227.c b/drivers/mfd/rts5227.c
new file mode 100644
index 000000000000..fc831dcb1480
--- /dev/null
+++ b/drivers/mfd/rts5227.c
@@ -0,0 +1,234 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ *   No. 450, Shenhu Road, Suzhou Industry Park, Suzhou, China
+ *
+ *   Roger Tseng <rogerable@realtek.com>
+ *   No. 2, Innovation Road II, Hsinchu Science Park, Hsinchu 300, Taiwan
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mfd/rtsx_pci.h>
+
+#include "rtsx_pcr.h"
+
+static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	u16 cap;
+
+	rtsx_pci_init_cmd(pcr);
+
+	/* Configure GPIO as output */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02);
+	/* Switch LDO3318 source from DV33 to card_3v3 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01);
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02);
+	/* Configure LTR */
+	pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cap);
+	if (cap & PCI_EXP_LTR_EN)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LTR_CTL, 0xFF, 0xA3);
+	/* Configure OBFF */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG, 0x03, 0x03);
+	/* Configure force_clock_req
+	 * Maybe We should define 0xFF03 as some name
+	 */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, 0xFF03, 0x08, 0x08);
+	/* Correct driving */
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+			SD30_CLK_DRIVE_SEL, 0xFF, 0x96);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+			SD30_CMD_DRIVE_SEL, 0xFF, 0x96);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD,
+			SD30_DAT_DRIVE_SEL, 0xFF, 0x96);
+
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5227_optimize_phy(struct rtsx_pcr *pcr)
+{
+	/* Optimize RX sensitivity */
+	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
+}
+
+static int rts5227_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02);
+}
+
+static int rts5227_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00);
+}
+
+static int rts5227_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08);
+}
+
+static int rts5227_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00);
+}
+
+static int rts5227_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	int err;
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_PARTIAL_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x02);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	/* To avoid too large in-rush current */
+	udelay(150);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK, SD_POWER_ON);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0x06);
+	err = rtsx_pci_send_cmd(pcr, 100);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int rts5227_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL,
+			SD_POWER_MASK | PMOS_STRG_MASK,
+			SD_POWER_OFF | PMOS_STRG_400mA);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL,
+			LDO3318_PWR_MASK, 0X00);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static int rts5227_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+	u8 drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24);
+		if (err < 0)
+			return err;
+		drive_sel = 0x96;
+	} else if (voltage == OUTPUT_1V8) {
+		err = rtsx_pci_write_phy_register(pcr, 0x11, 0x3C02);
+		if (err < 0)
+			return err;
+		err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C80 | 0x24);
+		if (err < 0)
+			return err;
+		drive_sel = 0xB3;
+	} else {
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL,
+			0xFF, drive_sel);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL,
+			0xFF, drive_sel);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL,
+			0xFF, drive_sel);
+	return rtsx_pci_send_cmd(pcr, 100);
+}
+
+static const struct pcr_ops rts5227_pcr_ops = {
+	.extra_init_hw = rts5227_extra_init_hw,
+	.optimize_phy = rts5227_optimize_phy,
+	.turn_on_led = rts5227_turn_on_led,
+	.turn_off_led = rts5227_turn_off_led,
+	.enable_auto_blink = rts5227_enable_auto_blink,
+	.disable_auto_blink = rts5227_disable_auto_blink,
+	.card_power_on = rts5227_card_power_on,
+	.card_power_off = rts5227_card_power_off,
+	.switch_output_voltage = rts5227_switch_output_voltage,
+	.cd_deglitch = NULL,
+	.conv_clk_and_div_n = NULL,
+};
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5227_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5227_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+/* MS Pull Control Enable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5227_ms_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+/* MS Pull Control Disable:
+ *     MS CD       ==> pull up
+ *     others      ==> pull down
+ */
+static const u32 rts5227_ms_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15),
+	0,
+};
+
+void rts5227_init_params(struct rtsx_pcr *pcr)
+{
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 2;
+	pcr->ops = &rts5227_pcr_ops;
+
+	pcr->sd_pull_ctl_enable_tbl = rts5227_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5227_sd_pull_ctl_disable_tbl;
+	pcr->ms_pull_ctl_enable_tbl = rts5227_ms_pull_ctl_enable_tbl;
+	pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl;
+}
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 9016932f0267..822237e322ba 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -55,6 +55,7 @@ static DEFINE_PCI_DEVICE_TABLE(rtsx_pci_ids) = {
 	{ PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ 0, }
 };
 
@@ -998,6 +999,10 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
 	case 0x5289:
 		rtl8411_init_params(pcr);
 		break;
+
+	case 0x5227:
+		rts5227_init_params(pcr);
+		break;
 	}
 
 	dev_dbg(&(pcr->pci->dev), "PID: 0x%04x, IC version: 0x%02x\n",
diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h
index 33c210be1daa..2b3ab8a04823 100644
--- a/drivers/mfd/rtsx_pcr.h
+++ b/drivers/mfd/rtsx_pcr.h
@@ -31,5 +31,6 @@
 void rts5209_init_params(struct rtsx_pcr *pcr);
 void rts5229_init_params(struct rtsx_pcr *pcr);
 void rtl8411_init_params(struct rtsx_pcr *pcr);
+void rts5227_init_params(struct rtsx_pcr *pcr);
 
 #endif
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 3f2bf26ca0d1..5d9b81e8aff4 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -581,8 +581,11 @@
 #define CARD_GPIO_DIR			0xFD57
 #define CARD_GPIO			0xFD58
 #define CARD_DATA_SOURCE		0xFD5B
+#define SD30_CLK_DRIVE_SEL		0xFD5A
 #define CARD_SELECT			0xFD5C
 #define SD30_DRIVE_SEL			0xFD5E
+#define SD30_CMD_DRIVE_SEL		0xFD5E
+#define SD30_DAT_DRIVE_SEL		0xFD5F
 #define CARD_CLK_EN			0xFD69
 #define SDIO_CTRL			0xFD6B
 #define CD_PAD_CTL			0xFD73
@@ -655,6 +658,8 @@
 #define MSGTXDATA3			0xFE47
 #define MSGTXCTL			0xFE48
 #define PETXCFG				0xFE49
+#define LTR_CTL				0xFE4A
+#define OBFF_CFG			0xFE4C
 
 #define CDRESUMECTL			0xFE52
 #define WAKE_SEL_CTL			0xFE54
-- 
cgit v1.2.3


From 4e405ae256b7e04f7c1213136f3bfd9fb76e2023 Mon Sep 17 00:00:00 2001
From: Qing Xu <qingx@marvell.com>
Date: Mon, 4 Feb 2013 23:40:42 +0800
Subject: mfd: max8925: Add irqdomain for dt

Add irqdomains for max8925's main irq, wrap irq register operations
into irqdomain's map func. it is necessary for dt support.

Also, add dt support for max8925 driver.

Signed-off-by: Qing Xu <qingx@marvell.com>
Signed-off-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/max8925-core.c  | 73 ++++++++++++++++++++++++++-------------------
 drivers/mfd/max8925-i2c.c   | 36 ++++++++++++++++++++--
 include/linux/mfd/max8925.h |  3 +-
 3 files changed, 78 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index e32466e865b9..0ad8d9a7c15a 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -14,10 +14,13 @@
 #include <linux/i2c.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/machine.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/max8925.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 
 static struct resource bk_resources[] = {
 	{ 0x84, 0x84, "mode control", IORESOURCE_REG, },
@@ -639,17 +642,33 @@ static struct irq_chip max8925_irq_chip = {
 	.irq_disable	= max8925_irq_disable,
 };
 
+static int max8925_irq_domain_map(struct irq_domain *d, unsigned int virq,
+				 irq_hw_number_t hw)
+{
+	irq_set_chip_data(virq, d->host_data);
+	irq_set_chip_and_handler(virq, &max8925_irq_chip, handle_edge_irq);
+	irq_set_nested_thread(virq, 1);
+#ifdef CONFIG_ARM
+	set_irq_flags(virq, IRQF_VALID);
+#else
+	irq_set_noprobe(virq);
+#endif
+	return 0;
+}
+
+static struct irq_domain_ops max8925_irq_domain_ops = {
+	.map	= max8925_irq_domain_map,
+	.xlate	= irq_domain_xlate_onetwocell,
+};
+
+
 static int max8925_irq_init(struct max8925_chip *chip, int irq,
 			    struct max8925_platform_data *pdata)
 {
 	unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
-	int i, ret;
-	int __irq;
+	int ret;
+	struct device_node *node = chip->dev->of_node;
 
-	if (!pdata || !pdata->irq_base) {
-		dev_warn(chip->dev, "No interrupt support on IRQ base\n");
-		return -EINVAL;
-	}
 	/* clear all interrupts */
 	max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1);
 	max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ2);
@@ -667,35 +686,30 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq,
 	max8925_reg_write(chip->rtc, MAX8925_RTC_IRQ_MASK, 0xff);
 
 	mutex_init(&chip->irq_lock);
-	chip->core_irq = irq;
-	chip->irq_base = pdata->irq_base;
-
-	/* register with genirq */
-	for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) {
-		__irq = i + chip->irq_base;
-		irq_set_chip_data(__irq, chip);
-		irq_set_chip_and_handler(__irq, &max8925_irq_chip,
-					 handle_edge_irq);
-		irq_set_nested_thread(__irq, 1);
-#ifdef CONFIG_ARM
-		set_irq_flags(__irq, IRQF_VALID);
-#else
-		irq_set_noprobe(__irq);
-#endif
-	}
-	if (!irq) {
-		dev_warn(chip->dev, "No interrupt support on core IRQ\n");
-		goto tsc_irq;
+	chip->irq_base = irq_alloc_descs(-1, 0, MAX8925_NR_IRQS, 0);
+	if (chip->irq_base < 0) {
+		dev_err(chip->dev, "Failed to allocate interrupts, ret:%d\n",
+			chip->irq_base);
+		return -EBUSY;
 	}
 
+	irq_domain_add_legacy(node, MAX8925_NR_IRQS, chip->irq_base, 0,
+			      &max8925_irq_domain_ops, chip);
+
+	/* request irq handler for pmic main irq*/
+	chip->core_irq = irq;
+	if (!chip->core_irq)
+		return -EBUSY;
 	ret = request_threaded_irq(irq, NULL, max8925_irq, flags | IRQF_ONESHOT,
 				   "max8925", chip);
 	if (ret) {
 		dev_err(chip->dev, "Failed to request core IRQ: %d\n", ret);
 		chip->core_irq = 0;
+		return -EBUSY;
 	}
 
-tsc_irq:
+	/* request irq handler for pmic tsc irq*/
+
 	/* mask TSC interrupt */
 	max8925_reg_write(chip->adc, MAX8925_TSC_IRQ_MASK, 0x0f);
 
@@ -704,7 +718,6 @@ tsc_irq:
 		return 0;
 	}
 	chip->tsc_irq = pdata->tsc_irq;
-
 	ret = request_threaded_irq(chip->tsc_irq, NULL, max8925_tsc_irq,
 				   flags | IRQF_ONESHOT, "max8925-tsc", chip);
 	if (ret) {
@@ -875,11 +888,11 @@ int max8925_device_init(struct max8925_chip *chip,
 
 	if (pdata && pdata->power) {
 		ret = mfd_add_devices(chip->dev, 0, &power_devs[0],
-					ARRAY_SIZE(power_devs),
+				      ARRAY_SIZE(power_devs),
 				      &power_supply_resources[0], 0, NULL);
 		if (ret < 0) {
-			dev_err(chip->dev, "Failed to add power supply "
-				"subdev\n");
+			dev_err(chip->dev,
+				"Failed to add power supply subdev\n");
 			goto out_dev;
 		}
 	}
diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c
index 00b5b456063d..92bbebd31598 100644
--- a/drivers/mfd/max8925-i2c.c
+++ b/drivers/mfd/max8925-i2c.c
@@ -135,13 +135,37 @@ static const struct i2c_device_id max8925_id_table[] = {
 };
 MODULE_DEVICE_TABLE(i2c, max8925_id_table);
 
+static int max8925_dt_init(struct device_node *np, struct device *dev,
+			   struct max8925_platform_data *pdata)
+{
+	int ret;
+
+	ret = of_property_read_u32(np, "maxim,tsc-irq", &pdata->tsc_irq);
+	if (ret) {
+		dev_err(dev, "Not found maxim,tsc-irq property\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int max8925_probe(struct i2c_client *client,
 				   const struct i2c_device_id *id)
 {
 	struct max8925_platform_data *pdata = client->dev.platform_data;
 	static struct max8925_chip *chip;
-
-	if (!pdata) {
+	struct device_node *node = client->dev.of_node;
+
+	if (node && !pdata) {
+		/* parse DT to get platform data */
+		pdata = devm_kzalloc(&client->dev,
+				     sizeof(struct max8925_platform_data),
+				     GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+
+		if (max8925_dt_init(node, &client->dev, pdata))
+			return -EINVAL;
+	} else if (!pdata) {
 		pr_info("%s: platform data is missing\n", __func__);
 		return -EINVAL;
 	}
@@ -203,11 +227,18 @@ static int max8925_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(max8925_pm_ops, max8925_suspend, max8925_resume);
 
+static const struct of_device_id max8925_dt_ids[] = {
+	{ .compatible = "maxim,max8925", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, max8925_dt_ids);
+
 static struct i2c_driver max8925_driver = {
 	.driver	= {
 		.name	= "max8925",
 		.owner	= THIS_MODULE,
 		.pm     = &max8925_pm_ops,
+		.of_match_table = of_match_ptr(max8925_dt_ids),
 	},
 	.probe		= max8925_probe,
 	.remove		= max8925_remove,
@@ -217,7 +248,6 @@ static struct i2c_driver max8925_driver = {
 static int __init max8925_i2c_init(void)
 {
 	int ret;
-
 	ret = i2c_add_driver(&max8925_driver);
 	if (ret != 0)
 		pr_err("Failed to register MAX8925 I2C driver: %d\n", ret);
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 74d8e2969630..ce8502e9e7dc 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -190,6 +190,8 @@ enum {
 	MAX8925_NR_IRQS,
 };
 
+
+
 struct max8925_chip {
 	struct device		*dev;
 	struct i2c_client	*i2c;
@@ -201,7 +203,6 @@ struct max8925_chip {
 	int			irq_base;
 	int			core_irq;
 	int			tsc_irq;
-
 	unsigned int            wakeup_flag;
 };
 
-- 
cgit v1.2.3


From c3481955f6c78c8dd99921759306d7469c999ec2 Mon Sep 17 00:00:00 2001
From: Wei WANG <wei_wang@realsil.com.cn>
Date: Fri, 8 Feb 2013 15:24:27 +0800
Subject: mfd: rtsx: Fix issue that booting OS with SD card inserted

Realtek card reader supports both SD and MS card. According to the
settings of rtsx MFD driver, SD host will be probed before MS host.
If we boot/reboot Linux with SD card inserted, the resetting flow of SD
card will succeed, and the following resetting flow of MS is sure to fail.
Then MS upper-level driver will ask rtsx driver to turn power off. This
request leads to the result that the following SD commands fail and SD card
can't be accessed again.

In this commit, Realtek's SD and MS host driver will check whether the card
that upper driver requesting is the one existing in the slot. If not, Realtek's
host driver will refuse the operation to make sure the exlusive accessing
at the same time.

Signed-off-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/memstick/host/rtsx_pci_ms.c |  7 +++++++
 drivers/mfd/rtsx_pcr.c              | 30 ++++++++++++++++++++++++++++++
 drivers/mmc/host/rtsx_pci_sdmmc.c   | 18 ++++++++++++++++++
 include/linux/mfd/rtsx_pci.h        |  2 ++
 4 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/memstick/host/rtsx_pci_ms.c b/drivers/memstick/host/rtsx_pci_ms.c
index f5ddb82dadb7..64a779c58a74 100644
--- a/drivers/memstick/host/rtsx_pci_ms.c
+++ b/drivers/memstick/host/rtsx_pci_ms.c
@@ -426,6 +426,9 @@ static void rtsx_pci_ms_request(struct memstick_host *msh)
 
 	dev_dbg(ms_dev(host), "--> %s\n", __func__);
 
+	if (rtsx_pci_card_exclusive_check(host->pcr, RTSX_MS_CARD))
+		return;
+
 	schedule_work(&host->handle_req);
 }
 
@@ -441,6 +444,10 @@ static int rtsx_pci_ms_set_param(struct memstick_host *msh,
 	dev_dbg(ms_dev(host), "%s: param = %d, value = %d\n",
 			__func__, param, value);
 
+	err = rtsx_pci_card_exclusive_check(host->pcr, RTSX_MS_CARD);
+	if (err)
+		return err;
+
 	switch (param) {
 	case MEMSTICK_POWER:
 		if (value == MEMSTICK_POWER_ON)
diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 822237e322ba..481a98a10ecd 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -708,6 +708,25 @@ int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card)
 }
 EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off);
 
+int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card)
+{
+	unsigned int cd_mask[] = {
+		[RTSX_SD_CARD] = SD_EXIST,
+		[RTSX_MS_CARD] = MS_EXIST
+	};
+
+	if (!pcr->ms_pmos) {
+		/* When using single PMOS, accessing card is not permitted
+		 * if the existing card is not the designated one.
+		 */
+		if (pcr->card_exist & (~cd_mask[card]))
+			return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_card_exclusive_check);
+
 int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
 {
 	if (pcr->ops->switch_output_voltage)
@@ -784,6 +803,9 @@ static void rtsx_pci_card_detect(struct work_struct *work)
 			card_inserted = pcr->ops->cd_deglitch(pcr);
 
 		card_detect = card_inserted | card_removed;
+
+		pcr->card_exist |= card_inserted;
+		pcr->card_exist &= ~card_removed;
 	}
 
 	mutex_unlock(&pcr->pcr_mutex);
@@ -976,6 +998,14 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
 			return err;
 	}
 
+	/* No CD interrupt if probing driver with card inserted.
+	 * So we need to initialize pcr->card_exist here.
+	 */
+	if (pcr->ops->cd_deglitch)
+		pcr->card_exist = pcr->ops->cd_deglitch(pcr);
+	else
+		pcr->card_exist = rtsx_pci_readl(pcr, RTSX_BIPR) & CARD_EXIST;
+
 	return 0;
 }
 
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index f74b5adca642..468c92303167 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -678,12 +678,19 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	struct mmc_command *cmd = mrq->cmd;
 	struct mmc_data *data = mrq->data;
 	unsigned int data_size = 0;
+	int err;
 
 	if (host->eject) {
 		cmd->error = -ENOMEDIUM;
 		goto finish;
 	}
 
+	err = rtsx_pci_card_exclusive_check(host->pcr, RTSX_SD_CARD);
+	if (err) {
+		cmd->error = err;
+		goto finish;
+	}
+
 	mutex_lock(&pcr->pcr_mutex);
 
 	rtsx_pci_start_run(pcr);
@@ -901,6 +908,9 @@ static void sdmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (host->eject)
 		return;
 
+	if (rtsx_pci_card_exclusive_check(host->pcr, RTSX_SD_CARD))
+		return;
+
 	mutex_lock(&pcr->pcr_mutex);
 
 	rtsx_pci_start_run(pcr);
@@ -1073,6 +1083,10 @@ static int sdmmc_switch_voltage(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (host->eject)
 		return -ENOMEDIUM;
 
+	err = rtsx_pci_card_exclusive_check(host->pcr, RTSX_SD_CARD);
+	if (err)
+		return err;
+
 	mutex_lock(&pcr->pcr_mutex);
 
 	rtsx_pci_start_run(pcr);
@@ -1122,6 +1136,10 @@ static int sdmmc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	if (host->eject)
 		return -ENOMEDIUM;
 
+	err = rtsx_pci_card_exclusive_check(host->pcr, RTSX_SD_CARD);
+	if (err)
+		return err;
+
 	mutex_lock(&pcr->pcr_mutex);
 
 	rtsx_pci_start_run(pcr);
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 5d9b81e8aff4..26ea7f1b7caf 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -740,6 +740,7 @@ struct rtsx_pcr {
 
 	unsigned int			card_inserted;
 	unsigned int			card_removed;
+	unsigned int			card_exist;
 
 	struct delayed_work		carddet_work;
 	struct delayed_work		idle_work;
@@ -804,6 +805,7 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
 int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card);
 int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card);
+int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card);
 int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage);
 unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr);
 void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr);
-- 
cgit v1.2.3


From 407af3299ef1ac7e87ce3fb530e32a009d1a9efd Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Wed, 13 Feb 2013 12:00:12 +0000
Subject: bridge: Add netlink interface to configure vlans on bridge ports

Add a netlink interface to add and remove vlan configuration on bridge port.
The interface uses the RTM_SETLINK message and encodes the vlan
configuration inside the IFLA_AF_SPEC.  It is possble to include multiple
vlans to either add or remove in a single message.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      |   2 +
 include/uapi/linux/if_bridge.h |   9 +++
 net/bridge/br_device.c         |   1 +
 net/bridge/br_if.c             |   1 +
 net/bridge/br_netlink.c        | 139 +++++++++++++++++++++++++++++++++++------
 net/bridge/br_private.h        |   1 +
 net/core/rtnetlink.c           |  72 +++++++++++++++++++++
 7 files changed, 207 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 25bd46f52877..1b90f9401000 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1020,6 +1020,8 @@ struct net_device_ops {
 	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
 						      u32 pid, u32 seq,
 						      struct net_device *dev);
+	int			(*ndo_bridge_dellink)(struct net_device *dev,
+						      struct nlmsghdr *nlh);
 	int			(*ndo_change_carrier)(struct net_device *dev,
 						      bool new_carrier);
 };
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 5db297514aec..3ca9817ca7e8 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -108,15 +108,24 @@ struct __fdb_entry {
  * [IFLA_AF_SPEC] = {
  *     [IFLA_BRIDGE_FLAGS]
  *     [IFLA_BRIDGE_MODE]
+ *     [IFLA_BRIDGE_VLAN_INFO]
  * }
  */
 enum {
 	IFLA_BRIDGE_FLAGS,
 	IFLA_BRIDGE_MODE,
+	IFLA_BRIDGE_VLAN_INFO,
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
 
+#define BRIDGE_VLAN_INFO_MASTER	(1<<0)	/* Operate on Bridge device as well */
+
+struct bridge_vlan_info {
+	u16 flags;
+	u16 vid;
+};
+
 /* Bridge multicast database attributes
  * [MDBA_MDB] = {
  *     [MDBA_MDB_ENTRY] = {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 35a2c2c84f33..091bedf266a0 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -316,6 +316,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_fdb_dump		 = br_fdb_dump,
 	.ndo_bridge_getlink	 = br_getlink,
 	.ndo_bridge_setlink	 = br_setlink,
+	.ndo_bridge_dellink	 = br_dellink,
 };
 
 static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index af9d65ab4001..335c60cebfd1 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -23,6 +23,7 @@
 #include <linux/if_ether.h>
 #include <linux/slab.h>
 #include <net/sock.h>
+#include <linux/if_vlan.h>
 
 #include "br_private.h"
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 39ca9796f3f7..534a9f4587a9 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -16,6 +16,7 @@
 #include <net/rtnetlink.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
+#include <uapi/linux/if_bridge.h>
 
 #include "br_private.h"
 #include "br_private_stp.h"
@@ -119,10 +120,14 @@ nla_put_failure:
  */
 void br_ifinfo_notify(int event, struct net_bridge_port *port)
 {
-	struct net *net = dev_net(port->dev);
+	struct net *net;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
+	if (!port)
+		return;
+
+	net = dev_net(port->dev);
 	br_debug(port->br, "port %u(%s) event %d\n",
 		 (unsigned int)port->port_no, port->dev->name, event);
 
@@ -144,6 +149,7 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
 }
 
+
 /*
  * Dump information about all ports, in response to GETLINK
  */
@@ -162,6 +168,64 @@ out:
 	return err;
 }
 
+const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
+	[IFLA_BRIDGE_FLAGS]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_MODE]	= { .type = NLA_U16 },
+	[IFLA_BRIDGE_VLAN_INFO]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct bridge_vlan_info), },
+};
+
+static int br_afspec(struct net_bridge *br,
+		     struct net_bridge_port *p,
+		     struct nlattr *af_spec,
+		     int cmd)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
+	int err = 0;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
+	if (err)
+		return err;
+
+	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
+		struct bridge_vlan_info *vinfo;
+
+		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
+
+		if (vinfo->vid >= VLAN_N_VID)
+			return -EINVAL;
+
+		switch (cmd) {
+		case RTM_SETLINK:
+			if (p) {
+				err = nbp_vlan_add(p, vinfo->vid);
+				if (err)
+					break;
+
+				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+					err = br_vlan_add(p->br, vinfo->vid);
+			} else
+				err = br_vlan_add(br, vinfo->vid);
+
+			if (err)
+				break;
+
+			break;
+
+		case RTM_DELLINK:
+			if (p) {
+				nbp_vlan_delete(p, vinfo->vid);
+				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+					br_vlan_delete(p->br, vinfo->vid);
+			} else
+				br_vlan_delete(br, vinfo->vid);
+			break;
+		}
+	}
+
+	return err;
+}
+
 static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_STATE]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_COST]	= { .type = NLA_U32 },
@@ -241,6 +305,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 {
 	struct ifinfomsg *ifm;
 	struct nlattr *protinfo;
+	struct nlattr *afspec;
 	struct net_bridge_port *p;
 	struct nlattr *tb[IFLA_BRPORT_MAX + 1];
 	int err;
@@ -248,38 +313,76 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
 	ifm = nlmsg_data(nlh);
 
 	protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO);
-	if (!protinfo)
+	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+	if (!protinfo && !afspec)
 		return 0;
 
 	p = br_port_get_rtnl(dev);
-	if (!p)
+	/* We want to accept dev as bridge itself if the AF_SPEC
+	 * is set to see if someone is setting vlan info on the brigde
+	 */
+	if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec))
 		return -EINVAL;
 
-	if (protinfo->nla_type & NLA_F_NESTED) {
-		err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
-				       protinfo, ifla_brport_policy);
+	if (p && protinfo) {
+		if (protinfo->nla_type & NLA_F_NESTED) {
+			err = nla_parse_nested(tb, IFLA_BRPORT_MAX,
+					       protinfo, ifla_brport_policy);
+			if (err)
+				return err;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_setport(p, tb);
+			spin_unlock_bh(&p->br->lock);
+		} else {
+			/* Binary compatability with old RSTP */
+			if (nla_len(protinfo) < sizeof(u8))
+				return -EINVAL;
+
+			spin_lock_bh(&p->br->lock);
+			err = br_set_port_state(p, nla_get_u8(protinfo));
+			spin_unlock_bh(&p->br->lock);
+		}
 		if (err)
-			return err;
-
-		spin_lock_bh(&p->br->lock);
-		err = br_setport(p, tb);
-		spin_unlock_bh(&p->br->lock);
-	} else {
-		/* Binary compatability with old RSTP */
-		if (nla_len(protinfo) < sizeof(u8))
-			return -EINVAL;
+			goto out;
+	}
 
-		spin_lock_bh(&p->br->lock);
-		err = br_set_port_state(p, nla_get_u8(protinfo));
-		spin_unlock_bh(&p->br->lock);
+	if (afspec) {
+		err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+				afspec, RTM_SETLINK);
 	}
 
 	if (err == 0)
 		br_ifinfo_notify(RTM_NEWLINK, p);
 
+out:
 	return err;
 }
 
+/* Delete port information */
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
+{
+	struct ifinfomsg *ifm;
+	struct nlattr *afspec;
+	struct net_bridge_port *p;
+	int err;
+
+	ifm = nlmsg_data(nlh);
+
+	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
+	if (!afspec)
+		return 0;
+
+	p = br_port_get_rtnl(dev);
+	/* We want to accept dev as bridge itself as well */
+	if (!p && !(dev->priv_flags & IFF_EBRIDGE))
+		return -EINVAL;
+
+	err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
+			afspec, RTM_DELLINK);
+
+	return err;
+}
 static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 {
 	if (tb[IFLA_ADDRESS]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f0f24610d111..a42f9d49a64e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -713,6 +713,7 @@ extern int br_netlink_init(void);
 extern void br_netlink_fini(void);
 extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
+extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
 extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 		      struct net_device *dev);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c1e4db60eeca..2c9ccbfbd93c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2464,6 +2464,77 @@ out:
 	return err;
 }
 
+static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			       void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct ifinfomsg *ifm;
+	struct net_device *dev;
+	struct nlattr *br_spec, *attr = NULL;
+	int rem, err = -EOPNOTSUPP;
+	u16 oflags, flags = 0;
+	bool have_flags = false;
+
+	if (nlmsg_len(nlh) < sizeof(*ifm))
+		return -EINVAL;
+
+	ifm = nlmsg_data(nlh);
+	if (ifm->ifi_family != AF_BRIDGE)
+		return -EPFNOSUPPORT;
+
+	dev = __dev_get_by_index(net, ifm->ifi_index);
+	if (!dev) {
+		pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+		return -ENODEV;
+	}
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (br_spec) {
+		nla_for_each_nested(attr, br_spec, rem) {
+			if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+				have_flags = true;
+				flags = nla_get_u16(attr);
+				break;
+			}
+		}
+	}
+
+	oflags = flags;
+
+	if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
+		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+
+		if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+		if (err)
+			goto out;
+
+		flags &= ~BRIDGE_FLAGS_MASTER;
+	}
+
+	if ((flags & BRIDGE_FLAGS_SELF)) {
+		if (!dev->netdev_ops->ndo_bridge_dellink)
+			err = -EOPNOTSUPP;
+		else
+			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+
+		if (!err)
+			flags &= ~BRIDGE_FLAGS_SELF;
+	}
+
+	if (have_flags)
+		memcpy(nla_data(attr), &flags, sizeof(flags));
+	/* Generate event to notify upper layer of bridge change */
+	if (!err)
+		err = rtnl_bridge_notify(dev, oflags);
+out:
+	return err;
+}
+
 /* Protected by RTNL sempahore.  */
 static struct rtattr **rta_buf;
 static int rtattr_max;
@@ -2647,6 +2718,7 @@ void __init rtnetlink_init(void)
 	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
 
 	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
+	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
 }
 
-- 
cgit v1.2.3


From 6cbdceeb1cb12c7d620161925a8c3e81daadb2e4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Wed, 13 Feb 2013 12:00:13 +0000
Subject: bridge: Dump vlan information from a bridge port

Using the RTM_GETLINK dump the vlan filter list of a given
bridge port.  The information depends on setting the filter
flag similar to how nic VF info is dumped.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  3 +-
 include/linux/netdevice.h                     |  3 +-
 include/uapi/linux/rtnetlink.h                |  1 +
 net/bridge/br_netlink.c                       | 94 +++++++++++++++++++++++----
 net/bridge/br_private.h                       |  3 +-
 net/bridge/br_vlan.c                          |  2 +
 net/core/rtnetlink.c                          | 16 +++--
 7 files changed, 104 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6999269b3a4a..4e2aa47193cb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7079,7 +7079,8 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
 }
 
 static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				    struct net_device *dev)
+				    struct net_device *dev,
+				    u32 filter_mask)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	u16 mode;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1b90f9401000..1964ca66df56 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1019,7 +1019,8 @@ struct net_device_ops {
 						      struct nlmsghdr *nlh);
 	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
 						      u32 pid, u32 seq,
-						      struct net_device *dev);
+						      struct net_device *dev,
+						      u32 filter_mask);
 	int			(*ndo_bridge_dellink)(struct net_device *dev,
 						      struct nlmsghdr *nlh);
 	int			(*ndo_change_carrier)(struct net_device *dev,
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 7a5eb196ade9..7a2144e1afae 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -630,6 +630,7 @@ struct tcamsg {
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
+#define RTEXT_FILTER_BRVLAN	(1 << 1)
 
 /* End of information exported to user level */
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 534a9f4587a9..fe1980d5a7e4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -65,15 +65,21 @@ static int br_port_fill_attrs(struct sk_buff *skb,
  * Create one netlink message for one interface
  * Contains port and master info as well as carrier and bridge state.
  */
-static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *port,
-			  u32 pid, u32 seq, int event, unsigned int flags)
+static int br_fill_ifinfo(struct sk_buff *skb,
+			  const struct net_bridge_port *port,
+			  u32 pid, u32 seq, int event, unsigned int flags,
+			  u32 filter_mask, const struct net_device *dev)
 {
-	const struct net_bridge *br = port->br;
-	const struct net_device *dev = port->dev;
+	const struct net_bridge *br;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 
+	if (port)
+		br = port->br;
+	else
+		br = netdev_priv(dev);
+
 	br_debug(br, "br_fill_info event %d port %s master %s\n",
 		     event, dev->name, br->dev->name);
 
@@ -99,7 +105,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 	     nla_put_u32(skb, IFLA_LINK, dev->iflink)))
 		goto nla_put_failure;
 
-	if (event == RTM_NEWLINK) {
+	if (event == RTM_NEWLINK && port) {
 		struct nlattr *nest
 			= nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
 
@@ -108,6 +114,40 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 		nla_nest_end(skb, nest);
 	}
 
+	/* Check if  the VID information is requested */
+	if (filter_mask & RTEXT_FILTER_BRVLAN) {
+		struct nlattr *af;
+		const struct net_port_vlans *pv;
+		struct bridge_vlan_info vinfo;
+		u16 vid;
+
+		if (port)
+			pv = nbp_get_vlan_info(port);
+		else
+			pv = br_get_vlan_info(br);
+
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+			goto done;
+
+		af = nla_nest_start(skb, IFLA_AF_SPEC);
+		if (!af)
+			goto nla_put_failure;
+
+		for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		     vid < BR_VLAN_BITMAP_LEN;
+		     vid = find_next_bit(pv->vlan_bitmap,
+					 BR_VLAN_BITMAP_LEN, vid+1)) {
+			vinfo.vid = vid;
+			vinfo.flags = 0;
+			if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
+				    sizeof(vinfo), &vinfo))
+				goto nla_put_failure;
+		}
+
+		nla_nest_end(skb, af);
+	}
+
+done:
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -135,7 +175,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 	if (skb == NULL)
 		goto errout;
 
-	err = br_fill_ifinfo(skb, port, 0, 0, event, 0);
+	err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in br_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -154,16 +194,17 @@ errout:
  * Dump information about all ports, in response to GETLINK
  */
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-	       struct net_device *dev)
+	       struct net_device *dev, u32 filter_mask)
 {
 	int err = 0;
 	struct net_bridge_port *port = br_port_get_rcu(dev);
 
-	/* not a bridge port */
-	if (!port)
+	/* not a bridge port and  */
+	if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
 		goto out;
 
-	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI);
+	err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
+			     filter_mask, dev);
 out:
 	return err;
 }
@@ -395,6 +436,29 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
+static size_t br_get_link_af_size(const struct net_device *dev)
+{
+	struct net_port_vlans *pv;
+
+	if (br_port_exists(dev))
+		pv = nbp_get_vlan_info(br_port_get_rcu(dev));
+	else if (dev->priv_flags & IFF_EBRIDGE)
+		pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
+	else
+		return 0;
+
+	if (!pv)
+		return 0;
+
+	/* Each VLAN is returned in bridge_vlan_info along with flags */
+	return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
+}
+
+struct rtnl_af_ops br_af_ops = {
+	.family			= AF_BRIDGE,
+	.get_link_af_size	= br_get_link_af_size,
+};
+
 struct rtnl_link_ops br_link_ops __read_mostly = {
 	.kind		= "bridge",
 	.priv_size	= sizeof(struct net_bridge),
@@ -408,11 +472,18 @@ int __init br_netlink_init(void)
 	int err;
 
 	br_mdb_init();
-	err = rtnl_link_register(&br_link_ops);
+	err = rtnl_af_register(&br_af_ops);
 	if (err)
 		goto out;
 
+	err = rtnl_link_register(&br_link_ops);
+	if (err)
+		goto out_af;
+
 	return 0;
+
+out_af:
+	rtnl_af_unregister(&br_af_ops);
 out:
 	br_mdb_uninit();
 	return err;
@@ -421,5 +492,6 @@ out:
 void __exit br_netlink_fini(void)
 {
 	br_mdb_uninit();
+	rtnl_af_unregister(&br_af_ops);
 	rtnl_link_unregister(&br_link_ops);
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a42f9d49a64e..ce2235255c2f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -73,6 +73,7 @@ struct net_port_vlans {
 	}				parent;
 	struct rcu_head			rcu;
 	unsigned long			vlan_bitmap[BR_VLAN_BITMAP_LEN];
+	u16				num_vlans;
 };
 
 struct net_bridge_fdb_entry
@@ -715,7 +716,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
 extern int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
 extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-		      struct net_device *dev);
+		      struct net_device *dev, u32 filter_mask);
 
 #ifdef CONFIG_SYSFS
 /* br_sysfs_if.c */
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index d8690bfe63d4..f2bf5a197ea3 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -28,6 +28,7 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid)
 	}
 
 	set_bit(vid, v->vlan_bitmap);
+	v->num_vlans++;
 	return 0;
 }
 
@@ -44,6 +45,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
 	}
 
 	clear_bit(vid, v->vlan_bitmap);
+	v->num_vlans--;
 	if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
 		if (v->port_idx)
 			rcu_assign_pointer(v->parent.port->vlan_info, NULL);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2c9ccbfbd93c..f3a112ec86d5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2315,6 +2315,13 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx = 0;
 	u32 portid = NETLINK_CB(cb->skb).portid;
 	u32 seq = cb->nlh->nlmsg_seq;
+	struct nlattr *extfilt;
+	u32 filter_mask = 0;
+
+	extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
+				  IFLA_EXT_MASK);
+	if (extfilt)
+		filter_mask = nla_get_u32(extfilt);
 
 	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
@@ -2324,14 +2331,15 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
 		if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
 			if (idx >= cb->args[0] &&
 			    br_dev->netdev_ops->ndo_bridge_getlink(
-				    skb, portid, seq, dev) < 0)
+				    skb, portid, seq, dev, filter_mask) < 0)
 				break;
 			idx++;
 		}
 
 		if (ops->ndo_bridge_getlink) {
 			if (idx >= cb->args[0] &&
-			    ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0)
+			    ops->ndo_bridge_getlink(skb, portid, seq, dev,
+						    filter_mask) < 0)
 				break;
 			idx++;
 		}
@@ -2372,14 +2380,14 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
 
 	if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
 	    br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
-		err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+		err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
 		if (err < 0)
 			goto errout;
 	}
 
 	if ((flags & BRIDGE_FLAGS_SELF) &&
 	    dev->netdev_ops->ndo_bridge_getlink) {
-		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev);
+		err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
 		if (err < 0)
 			goto errout;
 	}
-- 
cgit v1.2.3


From 1690be63a27b20ae65c792729a44f5970561ffa4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Wed, 13 Feb 2013 12:00:18 +0000
Subject: bridge: Add vlan support to static neighbors

When a user adds bridge neighbors, allow him to specify VLAN id.
If the VLAN id is not specified, the neighbor will be added
for VLANs currently in the ports filter list.  If no VLANs are
configured on the port, we use vlan 0 and only add 1 entry.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |   2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |   1 +
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   4 +-
 drivers/net/macvlan.c                            |   2 +-
 drivers/net/vxlan.c                              |   3 +-
 include/linux/netdevice.h                        |   4 +-
 include/uapi/linux/neighbour.h                   |   1 +
 net/bridge/br_fdb.c                              | 148 ++++++++++++++++++++---
 net/bridge/br_private.h                          |   6 +-
 net/core/rtnetlink.c                             |  26 ++--
 10 files changed, 162 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4e2aa47193cb..1c0efcb7920f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7002,7 +7002,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	return err;
 }
 
-static int ixgbe_ndo_fdb_del(struct ndmsg *ndm,
+static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			     struct net_device *dev,
 			     const unsigned char *addr)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 937bcc3d3212..5088dc5c3d1a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1959,6 +1959,7 @@ static int mlx4_en_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 }
 
 static int mlx4_en_fdb_del(struct ndmsg *ndm,
+			   struct nlattr *tb[],
 			   struct net_device *dev,
 			   const unsigned char *addr)
 {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index b745194391a1..b95316831587 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -247,8 +247,8 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p)
 	return 0;
 }
 
-static int qlcnic_fdb_del(struct ndmsg *ndm, struct net_device *netdev,
-			const unsigned char *addr)
+static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+			struct net_device *netdev, const unsigned char *addr)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int err = -EOPNOTSUPP;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e4b8078e88a9..defcd8a85744 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -599,7 +599,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	return err;
 }
 
-static int macvlan_fdb_del(struct ndmsg *ndm,
+static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
 			   const unsigned char *addr)
 {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 72485b9b9005..9d70421cf3a0 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -393,7 +393,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 }
 
 /* Delete entry (via netlink) */
-static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
+static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+			    struct net_device *dev,
 			    const unsigned char *addr)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1964ca66df56..9deb672d999f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -884,7 +884,8 @@ struct netdev_fcoe_hbainfo {
  *		      struct net_device *dev,
  *		      const unsigned char *addr, u16 flags)
  *	Adds an FDB entry to dev for addr.
- * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev,
+ * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
+ *		      struct net_device *dev,
  *		      const unsigned char *addr)
  *	Deletes the FDB entry from dev coresponding to addr.
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
@@ -1008,6 +1009,7 @@ struct net_device_ops {
 					       const unsigned char *addr,
 					       u16 flags);
 	int			(*ndo_fdb_del)(struct ndmsg *ndm,
+					       struct nlattr *tb[],
 					       struct net_device *dev,
 					       const unsigned char *addr);
 	int			(*ndo_fdb_dump)(struct sk_buff *skb,
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 275e5d65dcb2..adb068c53c4e 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -20,6 +20,7 @@ enum {
 	NDA_LLADDR,
 	NDA_CACHEINFO,
 	NDA_PROBES,
+	NDA_VLAN,
 	__NDA_MAX
 };
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 276a52254606..4b75ad43aa85 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -505,6 +505,10 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	ci.ndm_refcnt	 = 0;
 	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
+
+	if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
+		goto nla_put_failure;
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -516,6 +520,7 @@ static inline size_t fdb_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+		+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
@@ -617,6 +622,25 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 	return 0;
 }
 
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
+	       const unsigned char *addr, u16 nlh_flags, u16 vid)
+{
+	int err = 0;
+
+	if (ndm->ndm_flags & NTF_USE) {
+		rcu_read_lock();
+		br_fdb_update(p->br, p, addr, vid);
+		rcu_read_unlock();
+	} else {
+		spin_lock_bh(&p->br->hash_lock);
+		err = fdb_add_entry(p, addr, ndm->ndm_state,
+				    nlh_flags, vid);
+		spin_unlock_bh(&p->br->hash_lock);
+	}
+
+	return err;
+}
+
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	       struct net_device *dev,
@@ -624,12 +648,29 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 {
 	struct net_bridge_port *p;
 	int err = 0;
+	struct net_port_vlans *pv;
+	unsigned short vid = VLAN_N_VID;
 
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
 		return -EINVAL;
 	}
 
+	if (tb[NDA_VLAN]) {
+		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(tb[NDA_VLAN]);
+
+		if (vid >= VLAN_N_VID) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
+
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
@@ -637,41 +678,90 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 	}
 
-	if (ndm->ndm_flags & NTF_USE) {
-		rcu_read_lock();
-		br_fdb_update(p->br, p, addr, 0);
-		rcu_read_unlock();
+	pv = nbp_get_vlan_info(p);
+	if (vid != VLAN_N_VID) {
+		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
+			pr_info("bridge: RTM_NEWNEIGH with unconfigured "
+				"vlan %d on port %s\n", vid, dev->name);
+			return -EINVAL;
+		}
+
+		/* VID was specified, so use it. */
+		err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 	} else {
-		spin_lock_bh(&p->br->hash_lock);
-		err = fdb_add_entry(p, addr, ndm->ndm_state, nlh_flags,
-				0);
-		spin_unlock_bh(&p->br->hash_lock);
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+			goto out;
+		}
+
+		/* We have vlans configured on this port and user didn't
+		 * specify a VLAN.  To be nice, add/update entry for every
+		 * vlan on this port.
+		 */
+		vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		while (vid < BR_VLAN_BITMAP_LEN) {
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+			if (err)
+				goto out;
+			vid = find_next_bit(pv->vlan_bitmap,
+					    BR_VLAN_BITMAP_LEN, vid+1);
+		}
 	}
 
+out:
 	return err;
 }
 
-static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
+static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
+			      u16 vlan)
 {
-	struct net_bridge *br = p->br;
-	struct hlist_head *head = &br->hash[br_mac_hash(addr, 0)];
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
 	struct net_bridge_fdb_entry *fdb;
 
-	fdb = fdb_find(head, addr, 0);
+	fdb = fdb_find(head, addr, vlan);
 	if (!fdb)
 		return -ENOENT;
 
-	fdb_delete(p->br, fdb);
+	fdb_delete(br, fdb);
 	return 0;
 }
 
+static int __br_fdb_delete(struct net_bridge_port *p,
+			   const unsigned char *addr, u16 vid)
+{
+	int err;
+
+	spin_lock_bh(&p->br->hash_lock);
+	err = fdb_delete_by_addr(p->br, addr, vid);
+	spin_unlock_bh(&p->br->hash_lock);
+
+	return err;
+}
+
 /* Remove neighbor entry with RTM_DELNEIGH */
-int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
+int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
+		  struct net_device *dev,
 		  const unsigned char *addr)
 {
 	struct net_bridge_port *p;
 	int err;
+	struct net_port_vlans *pv;
+	unsigned short vid = VLAN_N_VID;
 
+	if (tb[NDA_VLAN]) {
+		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(tb[NDA_VLAN]);
+
+		if (vid >= VLAN_N_VID) {
+			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
@@ -679,9 +769,33 @@ int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
 		return -EINVAL;
 	}
 
-	spin_lock_bh(&p->br->hash_lock);
-	err = fdb_delete_by_addr(p, addr);
-	spin_unlock_bh(&p->br->hash_lock);
+	pv = nbp_get_vlan_info(p);
+	if (vid != VLAN_N_VID) {
+		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
+			pr_info("bridge: RTM_DELNEIGH with unconfigured "
+				"vlan %d on port %s\n", vid, dev->name);
+			return -EINVAL;
+		}
 
+		err = __br_fdb_delete(p, addr, vid);
+	} else {
+		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+			err = __br_fdb_delete(p, addr, 0);
+			goto out;
+		}
+
+		/* We have vlans configured on this port and user didn't
+		 * specify a VLAN.  To be nice, add/update entry for every
+		 * vlan on this port.
+		 */
+		err = -ENOENT;
+		vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+		while (vid < BR_VLAN_BITMAP_LEN) {
+			err &= __br_fdb_delete(p, addr, vid);
+			vid = find_next_bit(pv->vlan_bitmap,
+					    BR_VLAN_BITMAP_LEN, vid+1);
+		}
+	}
+out:
 	return err;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 22915c8e9961..799dbb37e5a2 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -388,7 +388,7 @@ extern void br_fdb_update(struct net_bridge *br,
 			  const unsigned char *addr,
 			  u16 vid);
 
-extern int br_fdb_delete(struct ndmsg *ndm,
+extern int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 			 struct net_device *dev,
 			 const unsigned char *addr);
 extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[],
@@ -577,13 +577,13 @@ extern void nbp_vlan_flush(struct net_bridge_port *port);
 static inline struct net_port_vlans *br_get_vlan_info(
 						const struct net_bridge *br)
 {
-	return rcu_dereference(br->vlan_info);
+	return rcu_dereference_rtnl(br->vlan_info);
 }
 
 static inline struct net_port_vlans *nbp_get_vlan_info(
 						const struct net_bridge_port *p)
 {
-	return rcu_dereference(p->vlan_info);
+	return rcu_dereference_rtnl(p->vlan_info);
 }
 
 /* Since bridge now depends on 8021Q module, but the time bridge sees the
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f3a112ec86d5..d8aa20f6a46e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2119,13 +2119,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ndmsg *ndm;
-	struct nlattr *llattr;
+	struct nlattr *tb[NDA_MAX+1];
 	struct net_device *dev;
 	int err = -EINVAL;
 	__u8 *addr;
 
-	if (nlmsg_len(nlh) < sizeof(*ndm))
-		return -EINVAL;
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+	if (err < 0)
+		return err;
 
 	ndm = nlmsg_data(nlh);
 	if (ndm->ndm_ifindex == 0) {
@@ -2139,13 +2143,17 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		return -ENODEV;
 	}
 
-	llattr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_LLADDR);
-	if (llattr == NULL || nla_len(llattr) != ETH_ALEN) {
-		pr_info("PF_BRIGDE: RTM_DELNEIGH with invalid address\n");
+	if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
+		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+		return -EINVAL;
+	}
+
+	addr = nla_data(tb[NDA_LLADDR]);
+	if (!is_valid_ether_addr(addr)) {
+		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
 		return -EINVAL;
 	}
 
-	addr = nla_data(llattr);
 	err = -EOPNOTSUPP;
 
 	/* Support fdb on master device the net/bridge default case */
@@ -2155,7 +2163,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		const struct net_device_ops *ops = br_dev->netdev_ops;
 
 		if (ops->ndo_fdb_del)
-			err = ops->ndo_fdb_del(ndm, dev, addr);
+			err = ops->ndo_fdb_del(ndm, tb, dev, addr);
 
 		if (err)
 			goto out;
@@ -2165,7 +2173,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	/* Embedded bridge, macvlan, and any other device support */
 	if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) {
-		err = dev->netdev_ops->ndo_fdb_del(ndm, dev, addr);
+		err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
 
 		if (!err) {
 			rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
-- 
cgit v1.2.3


From 3ebc21f7bc2f9c0145bbbf0f12430b766a200f9f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Thu, 31 Jan 2013 16:02:01 -0600
Subject: libceph: fix messenger CONFIG_BLOCK dependencies

The ceph messenger has a few spots that are only used when
bio messages are supported, and that's only when CONFIG_BLOCK
is defined.  This surrounds a couple of spots with #ifdef's
that would cause a problem if CONFIG_BLOCK were not present
in the kernel configuration.

This resolves:
    http://tracker.ceph.com/issues/3976

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/messenger.h | 2 ++
 net/ceph/messenger.c           | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 14ba5ee738a9..60903e0f665c 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -83,9 +83,11 @@ struct ceph_msg {
 	struct list_head list_head;
 
 	struct kref kref;
+#ifdef CONFIG_BLOCK
 	struct bio  *bio;		/* instead of pages/pagelist */
 	struct bio  *bio_iter;		/* bio iterator */
 	int bio_seg;			/* current bio segment */
+#endif /* CONFIG_BLOCK */
 	struct ceph_pagelist *trail;	/* the trailing part of the data */
 	bool front_is_vmalloc;
 	bool more_to_follow;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 5ccf87ed8d68..8a62a559a2aa 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -9,8 +9,9 @@
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
+#ifdef	CONFIG_BLOCK
 #include <linux/bio.h>
-#include <linux/blkdev.h>
+#endif	/* CONFIG_BLOCK */
 #include <linux/dns_resolver.h>
 #include <net/tcp.h>
 
@@ -2651,9 +2652,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 	m->page_alignment = 0;
 	m->pages = NULL;
 	m->pagelist = NULL;
+#ifdef	CONFIG_BLOCK
 	m->bio = NULL;
 	m->bio_iter = NULL;
 	m->bio_seg = 0;
+#endif	/* CONFIG_BLOCK */
 	m->trail = NULL;
 
 	/* front */
-- 
cgit v1.2.3


From 72fe25e3460c8673984370208e0e6261101372d6 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 30 Jan 2013 11:13:33 -0600
Subject: libceph: add a compatibility check interface

An upcoming change implements semantic change that could lead to
a crash if an old version of the libceph kernel module is used with
a new version of the rbd kernel module.

In order to preclude that possibility, this adds a compatibilty
check interface.  If this interface doesn't exist, the modules are
obviously not compatible.  But if it does exist, this provides a way
of letting the caller know whether it will operate properly with
this libceph module.

Perhaps confusingly, it returns false right now.  The semantic
change mentioned above will make it return true.

This resolves:
    http://tracker.ceph.com/issues/3800

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/libceph.h |  2 ++
 net/ceph/ceph_common.c       | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 084d3c622b12..c44275ab375c 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len)
 }
 
 /* ceph_common.c */
+extern bool libceph_compatible(void *data);
+
 extern const char *ceph_msg_type_name(int type);
 extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
 extern struct kmem_cache *ceph_inode_cachep;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ee71ea26777a..a98c03ff853f 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -26,6 +26,22 @@
 #include "crypto.h"
 
 
+/*
+ * Module compatibility interface.  For now it doesn't do anything,
+ * but its existence signals a certain level of functionality.
+ *
+ * The data buffer is used to pass information both to and from
+ * libceph.  The return value indicates whether libceph determines
+ * it is compatible with the caller (from another kernel module),
+ * given the provided data.
+ *
+ * The data pointer can be null.
+ */
+bool libceph_compatible(void *data)
+{
+	return false;
+}
+EXPORT_SYMBOL(libceph_compatible);
 
 /*
  * find filename portion of a path (/foo/bar/baz -> baz)
-- 
cgit v1.2.3


From 112202d9098aae2c36436e5178c0cf3ced423c7b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Feb 2013 19:29:12 -0800
Subject: workqueue: rename cpu_workqueue to pool_workqueue

workqueue has moved away from global_cwqs to worker_pools and with the
scheduled custom worker pools, wforkqueues will be associated with
pools which don't have anything to do with CPUs.  The workqueue code
went through significant amount of changes recently and mass renaming
isn't likely to hurt much additionally.  Let's replace 'cpu' with
'pool' so that it reflects the current design.

* s/struct cpu_workqueue_struct/struct pool_workqueue/
* s/cpu_wq/pool_wq/
* s/cwq/pwq/

This patch is purely cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h        |  12 +-
 include/trace/events/workqueue.h |  10 +-
 kernel/workqueue.c               | 433 +++++++++++++++++++--------------------
 kernel/workqueue_internal.h      |   2 +-
 4 files changed, 228 insertions(+), 229 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a3d7556510c3..8afab27cdbc2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data);
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
-	WORK_STRUCT_CWQ_BIT	= 2,	/* data points to cwq */
+	WORK_STRUCT_PWQ_BIT	= 2,	/* data points to pwq */
 	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC_BIT	= 4,	/* static initializer (debugobjects) */
@@ -40,7 +40,7 @@ enum {
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
 	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
-	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
+	WORK_STRUCT_PWQ		= 1 << WORK_STRUCT_PWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
@@ -60,14 +60,14 @@ enum {
 	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
-	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 256 bytes and allows 15
-	 * workqueue flush colors.
+	 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
+	 * This makes pwqs aligned to 256 bytes and allows 15 workqueue
+	 * flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
 
-	/* data contains off-queue information when !WORK_STRUCT_CWQ */
+	/* data contains off-queue information when !WORK_STRUCT_PWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 4e798e384a6a..bf0e18ba6cfb 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work,
 /**
  * workqueue_queue_work - called when a work gets queued
  * @req_cpu:	the requested cpu
- * @cwq:	pointer to struct cpu_workqueue_struct
+ * @pwq:	pointer to struct pool_workqueue
  * @work:	pointer to struct work_struct
  *
  * This event occurs when a work is queued immediately or once a
@@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work,
  */
 TRACE_EVENT(workqueue_queue_work,
 
-	TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq,
+	TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
 		 struct work_struct *work),
 
-	TP_ARGS(req_cpu, cwq, work),
+	TP_ARGS(req_cpu, pwq, work),
 
 	TP_STRUCT__entry(
 		__field( void *,	work	)
@@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work,
 	TP_fast_assign(
 		__entry->work		= work;
 		__entry->function	= work->func;
-		__entry->workqueue	= cwq->wq;
+		__entry->workqueue	= pwq->wq;
 		__entry->req_cpu	= req_cpu;
-		__entry->cpu		= cwq->pool->cpu;
+		__entry->cpu		= pwq->pool->cpu;
 	),
 
 	TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ea7f696f1060..0f1a264d0f22 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -154,11 +154,12 @@ struct worker_pool {
 } ____cacheline_aligned_in_smp;
 
 /*
- * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
- * work_struct->data are used for flags and thus cwqs need to be
- * aligned at two's power of the number of flag bits.
+ * The per-pool workqueue.  While queued, the lower WORK_STRUCT_FLAG_BITS
+ * of work_struct->data are used for flags and the remaining high bits
+ * point to the pwq; thus, pwqs need to be aligned at two's power of the
+ * number of flag bits.
  */
-struct cpu_workqueue_struct {
+struct pool_workqueue {
 	struct worker_pool	*pool;		/* I: the associated pool */
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	int			work_color;	/* L: current color */
@@ -207,16 +208,16 @@ typedef unsigned long mayday_mask_t;
 struct workqueue_struct {
 	unsigned int		flags;		/* W: WQ_* flags */
 	union {
-		struct cpu_workqueue_struct __percpu	*pcpu;
-		struct cpu_workqueue_struct		*single;
+		struct pool_workqueue __percpu		*pcpu;
+		struct pool_workqueue			*single;
 		unsigned long				v;
-	} cpu_wq;				/* I: cwq's */
+	} pool_wq;				/* I: pwq's */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
 	int			work_color;	/* F: current work color */
 	int			flush_color;	/* F: current flush color */
-	atomic_t		nr_cwqs_to_flush; /* flush in progress */
+	atomic_t		nr_pwqs_to_flush; /* flush in progress */
 	struct wq_flusher	*first_flusher;	/* F: first flusher */
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
@@ -225,7 +226,7 @@ struct workqueue_struct {
 	struct worker		*rescuer;	/* I: rescue worker */
 
 	int			nr_drainers;	/* W: drain in progress */
-	int			saved_max_active; /* W: saved cwq max_active */
+	int			saved_max_active; /* W: saved pwq max_active */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -268,7 +269,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 	return WORK_CPU_END;
 }
 
-static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
+static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
 				 struct workqueue_struct *wq)
 {
 	return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
@@ -284,7 +285,7 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
  *
  * for_each_wq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
  * for_each_online_wq_cpu()	: online CPUs + WORK_CPU_UNBOUND
- * for_each_cwq_cpu()		: possible CPUs for bound workqueues,
+ * for_each_pwq_cpu()		: possible CPUs for bound workqueues,
  *				  WORK_CPU_UNBOUND for unbound workqueues
  */
 #define for_each_wq_cpu(cpu)						\
@@ -297,10 +298,10 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
 	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
-#define for_each_cwq_cpu(cpu, wq)					\
-	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
+#define for_each_pwq_cpu(cpu, wq)					\
+	for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq));	\
 	     (cpu) < WORK_CPU_END;					\
-	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
+	     (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
@@ -479,14 +480,14 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
-static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
-					    struct workqueue_struct *wq)
+static struct pool_workqueue *get_pwq(unsigned int cpu,
+				      struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (likely(cpu < nr_cpu_ids))
-			return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
+			return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
 	} else if (likely(cpu == WORK_CPU_UNBOUND))
-		return wq->cpu_wq.single;
+		return wq->pool_wq.single;
 	return NULL;
 }
 
@@ -507,18 +508,18 @@ static int work_next_color(int color)
 }
 
 /*
- * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
- * contain the pointer to the queued cwq.  Once execution starts, the flag
+ * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
+ * contain the pointer to the queued pwq.  Once execution starts, the flag
  * is cleared and the high bits contain OFFQ flags and pool ID.
  *
- * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the cwq, pool or clear
+ * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the pwq, pool or clear
  * work->data.  These functions should only be called while the work is
  * owned - ie. while the PENDING bit is set.
  *
- * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq
+ * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
  * corresponding to a work.  Pool is available once the work has been
- * queued anywhere after initialization until it is sync canceled.  cwq is
+ * queued anywhere after initialization until it is sync canceled.  pwq is
  * available only while the work item is queued.
  *
  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
@@ -533,12 +534,11 @@ static inline void set_work_data(struct work_struct *work, unsigned long data,
 	atomic_long_set(&work->data, data | flags | work_static(work));
 }
 
-static void set_work_cwq(struct work_struct *work,
-			 struct cpu_workqueue_struct *cwq,
+static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
 			 unsigned long extra_flags)
 {
-	set_work_data(work, (unsigned long)cwq,
-		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
+	set_work_data(work, (unsigned long)pwq,
+		      WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
 }
 
 static void set_work_pool_and_keep_pending(struct work_struct *work,
@@ -567,11 +567,11 @@ static void clear_work_data(struct work_struct *work)
 	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
-static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
+static struct pool_workqueue *get_work_pwq(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	if (data & WORK_STRUCT_CWQ)
+	if (data & WORK_STRUCT_PWQ)
 		return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
 	else
 		return NULL;
@@ -589,8 +589,8 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
 	struct worker_pool *pool;
 	int pool_id;
 
-	if (data & WORK_STRUCT_CWQ)
-		return ((struct cpu_workqueue_struct *)
+	if (data & WORK_STRUCT_PWQ)
+		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
 
 	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
@@ -613,8 +613,8 @@ static int get_work_pool_id(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	if (data & WORK_STRUCT_CWQ)
-		return ((struct cpu_workqueue_struct *)
+	if (data & WORK_STRUCT_PWQ)
+		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
 
 	return data >> WORK_OFFQ_POOL_SHIFT;
@@ -632,7 +632,7 @@ static bool work_is_canceling(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
+	return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
 }
 
 /*
@@ -961,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
-static void cwq_activate_delayed_work(struct work_struct *work)
+static void pwq_activate_delayed_work(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct pool_workqueue *pwq = get_work_pwq(work);
 
 	trace_workqueue_activate_work(work);
-	move_linked_works(work, &cwq->pool->worklist, NULL);
+	move_linked_works(work, &pwq->pool->worklist, NULL);
 	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
-	cwq->nr_active++;
+	pwq->nr_active++;
 }
 
-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
 {
-	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+	struct work_struct *work = list_first_entry(&pwq->delayed_works,
 						    struct work_struct, entry);
 
-	cwq_activate_delayed_work(work);
+	pwq_activate_delayed_work(work);
 }
 
 /**
- * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
- * @cwq: cwq of interest
+ * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
+ * @pwq: pwq of interest
  * @color: color of work which left the queue
  *
  * A work either has completed or is removed from pending queue,
- * decrement nr_in_flight of its cwq and handle workqueue flushing.
+ * decrement nr_in_flight of its pwq and handle workqueue flushing.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 {
 	/* ignore uncolored works */
 	if (color == WORK_NO_COLOR)
 		return;
 
-	cwq->nr_in_flight[color]--;
+	pwq->nr_in_flight[color]--;
 
-	cwq->nr_active--;
-	if (!list_empty(&cwq->delayed_works)) {
+	pwq->nr_active--;
+	if (!list_empty(&pwq->delayed_works)) {
 		/* one down, submit a delayed one */
-		if (cwq->nr_active < cwq->max_active)
-			cwq_activate_first_delayed(cwq);
+		if (pwq->nr_active < pwq->max_active)
+			pwq_activate_first_delayed(pwq);
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
-	if (likely(cwq->flush_color != color))
+	if (likely(pwq->flush_color != color))
 		return;
 
 	/* are there still in-flight works? */
-	if (cwq->nr_in_flight[color])
+	if (pwq->nr_in_flight[color])
 		return;
 
-	/* this cwq is done, clear flush_color */
-	cwq->flush_color = -1;
+	/* this pwq is done, clear flush_color */
+	pwq->flush_color = -1;
 
 	/*
-	 * If this was the last cwq, wake up the first flusher.  It
+	 * If this was the last pwq, wake up the first flusher.  It
 	 * will handle the rest.
 	 */
-	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
-		complete(&cwq->wq->first_flusher->done);
+	if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
+		complete(&pwq->wq->first_flusher->done);
 }
 
 /**
@@ -1053,7 +1053,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			       unsigned long *flags)
 {
 	struct worker_pool *pool;
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 
 	local_irq_save(*flags);
 
@@ -1084,31 +1084,31 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 
 	spin_lock(&pool->lock);
 	/*
-	 * work->data is guaranteed to point to cwq only while the work
-	 * item is queued on cwq->wq, and both updating work->data to point
-	 * to cwq on queueing and to pool on dequeueing are done under
-	 * cwq->pool->lock.  This in turn guarantees that, if work->data
-	 * points to cwq which is associated with a locked pool, the work
+	 * work->data is guaranteed to point to pwq only while the work
+	 * item is queued on pwq->wq, and both updating work->data to point
+	 * to pwq on queueing and to pool on dequeueing are done under
+	 * pwq->pool->lock.  This in turn guarantees that, if work->data
+	 * points to pwq which is associated with a locked pool, the work
 	 * item is currently queued on that pool.
 	 */
-	cwq = get_work_cwq(work);
-	if (cwq && cwq->pool == pool) {
+	pwq = get_work_pwq(work);
+	if (pwq && pwq->pool == pool) {
 		debug_work_deactivate(work);
 
 		/*
 		 * A delayed work item cannot be grabbed directly because
 		 * it might have linked NO_COLOR work items which, if left
-		 * on the delayed_list, will confuse cwq->nr_active
+		 * on the delayed_list, will confuse pwq->nr_active
 		 * management later on and cause stall.  Make sure the work
 		 * item is activated before grabbing.
 		 */
 		if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
-			cwq_activate_delayed_work(work);
+			pwq_activate_delayed_work(work);
 
 		list_del_init(&work->entry);
-		cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work));
+		pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work));
 
-		/* work->data points to cwq iff queued, point to pool */
+		/* work->data points to pwq iff queued, point to pool */
 		set_work_pool_and_keep_pending(work, pool->id);
 
 		spin_unlock(&pool->lock);
@@ -1125,25 +1125,24 @@ fail:
 
 /**
  * insert_work - insert a work into a pool
- * @cwq: cwq @work belongs to
+ * @pwq: pwq @work belongs to
  * @work: work to insert
  * @head: insertion point
  * @extra_flags: extra WORK_STRUCT_* flags to set
  *
- * Insert @work which belongs to @cwq after @head.  @extra_flags is or'd to
+ * Insert @work which belongs to @pwq after @head.  @extra_flags is or'd to
  * work_struct flags.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void insert_work(struct cpu_workqueue_struct *cwq,
-			struct work_struct *work, struct list_head *head,
-			unsigned int extra_flags)
+static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
+			struct list_head *head, unsigned int extra_flags)
 {
-	struct worker_pool *pool = cwq->pool;
+	struct worker_pool *pool = pwq->pool;
 
 	/* we own @work, set data and link */
-	set_work_cwq(work, cwq, extra_flags);
+	set_work_pwq(work, pwq, extra_flags);
 	list_add_tail(&work->entry, head);
 
 	/*
@@ -1170,13 +1169,13 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	 * Return %true iff I'm a worker execuing a work item on @wq.  If
 	 * I'm @worker, it's safe to dereference it without locking.
 	 */
-	return worker && worker->current_cwq->wq == wq;
+	return worker && worker->current_pwq->wq == wq;
 }
 
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 	struct list_head *worklist;
 	unsigned int work_flags;
 	unsigned int req_cpu = cpu;
@@ -1196,7 +1195,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
-	/* determine the cwq to use */
+	/* determine the pwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct worker_pool *last_pool;
 
@@ -1209,54 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * work needs to be queued on that cpu to guarantee
 		 * non-reentrancy.
 		 */
-		cwq = get_cwq(cpu, wq);
+		pwq = get_pwq(cpu, wq);
 		last_pool = get_work_pool(work);
 
-		if (last_pool && last_pool != cwq->pool) {
+		if (last_pool && last_pool != pwq->pool) {
 			struct worker *worker;
 
 			spin_lock(&last_pool->lock);
 
 			worker = find_worker_executing_work(last_pool, work);
 
-			if (worker && worker->current_cwq->wq == wq) {
-				cwq = get_cwq(last_pool->cpu, wq);
+			if (worker && worker->current_pwq->wq == wq) {
+				pwq = get_pwq(last_pool->cpu, wq);
 			} else {
 				/* meh... not running there, queue here */
 				spin_unlock(&last_pool->lock);
-				spin_lock(&cwq->pool->lock);
+				spin_lock(&pwq->pool->lock);
 			}
 		} else {
-			spin_lock(&cwq->pool->lock);
+			spin_lock(&pwq->pool->lock);
 		}
 	} else {
-		cwq = get_cwq(WORK_CPU_UNBOUND, wq);
-		spin_lock(&cwq->pool->lock);
+		pwq = get_pwq(WORK_CPU_UNBOUND, wq);
+		spin_lock(&pwq->pool->lock);
 	}
 
-	/* cwq determined, queue */
-	trace_workqueue_queue_work(req_cpu, cwq, work);
+	/* pwq determined, queue */
+	trace_workqueue_queue_work(req_cpu, pwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
-		spin_unlock(&cwq->pool->lock);
+		spin_unlock(&pwq->pool->lock);
 		return;
 	}
 
-	cwq->nr_in_flight[cwq->work_color]++;
-	work_flags = work_color_to_flags(cwq->work_color);
+	pwq->nr_in_flight[pwq->work_color]++;
+	work_flags = work_color_to_flags(pwq->work_color);
 
-	if (likely(cwq->nr_active < cwq->max_active)) {
+	if (likely(pwq->nr_active < pwq->max_active)) {
 		trace_workqueue_activate_work(work);
-		cwq->nr_active++;
-		worklist = &cwq->pool->worklist;
+		pwq->nr_active++;
+		worklist = &pwq->pool->worklist;
 	} else {
 		work_flags |= WORK_STRUCT_DELAYED;
-		worklist = &cwq->delayed_works;
+		worklist = &pwq->delayed_works;
 	}
 
-	insert_work(cwq, work, worklist, work_flags);
+	insert_work(pwq, work, worklist, work_flags);
 
-	spin_unlock(&cwq->pool->lock);
+	spin_unlock(&pwq->pool->lock);
 }
 
 /**
@@ -1661,14 +1660,14 @@ static void rebind_workers(struct worker_pool *pool)
 
 		/*
 		 * wq doesn't really matter but let's keep @worker->pool
-		 * and @cwq->pool consistent for sanity.
+		 * and @pwq->pool consistent for sanity.
 		 */
 		if (std_worker_pool_pri(worker->pool))
 			wq = system_highpri_wq;
 		else
 			wq = system_wq;
 
-		insert_work(get_cwq(pool->cpu, wq), rebind_work,
+		insert_work(get_pwq(pool->cpu, wq), rebind_work,
 			    worker->scheduled.next,
 			    work_color_to_flags(WORK_NO_COLOR));
 	}
@@ -1845,15 +1844,15 @@ static void idle_worker_timeout(unsigned long __pool)
 
 static bool send_mayday(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
-	struct workqueue_struct *wq = cwq->wq;
+	struct pool_workqueue *pwq = get_work_pwq(work);
+	struct workqueue_struct *wq = pwq->wq;
 	unsigned int cpu;
 
 	if (!(wq->flags & WQ_RESCUER))
 		return false;
 
 	/* mayday mayday mayday */
-	cpu = cwq->pool->cpu;
+	cpu = pwq->pool->cpu;
 	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
 	if (cpu == WORK_CPU_UNBOUND)
 		cpu = 0;
@@ -2082,9 +2081,9 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 __releases(&pool->lock)
 __acquires(&pool->lock)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct pool_workqueue *pwq = get_work_pwq(work);
 	struct worker_pool *pool = worker->pool;
-	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
+	bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
 	int work_color;
 	struct worker *collision;
 #ifdef CONFIG_LOCKDEP
@@ -2125,7 +2124,7 @@ __acquires(&pool->lock)
 	hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
 	worker->current_work = work;
 	worker->current_func = work->func;
-	worker->current_cwq = cwq;
+	worker->current_pwq = pwq;
 	work_color = get_work_color(work);
 
 	list_del_init(&work->entry);
@@ -2154,7 +2153,7 @@ __acquires(&pool->lock)
 
 	spin_unlock_irq(&pool->lock);
 
-	lock_map_acquire_read(&cwq->wq->lockdep_map);
+	lock_map_acquire_read(&pwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
 	trace_workqueue_execute_start(work);
 	worker->current_func(work);
@@ -2164,7 +2163,7 @@ __acquires(&pool->lock)
 	 */
 	trace_workqueue_execute_end(work);
 	lock_map_release(&lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
+	lock_map_release(&pwq->wq->lockdep_map);
 
 	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 		pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
@@ -2185,8 +2184,8 @@ __acquires(&pool->lock)
 	hash_del(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_func = NULL;
-	worker->current_cwq = NULL;
-	cwq_dec_nr_in_flight(cwq, work_color);
+	worker->current_pwq = NULL;
+	pwq_dec_nr_in_flight(pwq, work_color);
 }
 
 /**
@@ -2353,8 +2352,8 @@ repeat:
 	 */
 	for_each_mayday_cpu(cpu, wq->mayday_mask) {
 		unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
-		struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
-		struct worker_pool *pool = cwq->pool;
+		struct pool_workqueue *pwq = get_pwq(tcpu, wq);
+		struct worker_pool *pool = pwq->pool;
 		struct work_struct *work, *n;
 
 		__set_current_state(TASK_RUNNING);
@@ -2370,7 +2369,7 @@ repeat:
 		 */
 		BUG_ON(!list_empty(&rescuer->scheduled));
 		list_for_each_entry_safe(work, n, &pool->worklist, entry)
-			if (get_work_cwq(work) == cwq)
+			if (get_work_pwq(work) == pwq)
 				move_linked_works(work, scheduled, &n);
 
 		process_scheduled_works(rescuer);
@@ -2405,7 +2404,7 @@ static void wq_barrier_func(struct work_struct *work)
 
 /**
  * insert_wq_barrier - insert a barrier work
- * @cwq: cwq to insert barrier into
+ * @pwq: pwq to insert barrier into
  * @barr: wq_barrier to insert
  * @target: target work to attach @barr to
  * @worker: worker currently executing @target, NULL if @target is not executing
@@ -2422,12 +2421,12 @@ static void wq_barrier_func(struct work_struct *work)
  * after a work with LINKED flag set.
  *
  * Note that when @worker is non-NULL, @target may be modified
- * underneath us, so we can't reliably determine cwq from @target.
+ * underneath us, so we can't reliably determine pwq from @target.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
+static void insert_wq_barrier(struct pool_workqueue *pwq,
 			      struct wq_barrier *barr,
 			      struct work_struct *target, struct worker *worker)
 {
@@ -2460,23 +2459,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	}
 
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head,
+	insert_work(pwq, &barr->work, head,
 		    work_color_to_flags(WORK_NO_COLOR) | linked);
 }
 
 /**
- * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
+ * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
  * @wq: workqueue being flushed
  * @flush_color: new flush color, < 0 for no-op
  * @work_color: new work color, < 0 for no-op
  *
- * Prepare cwqs for workqueue flushing.
+ * Prepare pwqs for workqueue flushing.
  *
- * If @flush_color is non-negative, flush_color on all cwqs should be
- * -1.  If no cwq has in-flight commands at the specified color, all
- * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
- * has in flight commands, its cwq->flush_color is set to
- * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
+ * If @flush_color is non-negative, flush_color on all pwqs should be
+ * -1.  If no pwq has in-flight commands at the specified color, all
+ * pwq->flush_color's stay at -1 and %false is returned.  If any pwq
+ * has in flight commands, its pwq->flush_color is set to
+ * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
  * wakeup logic is armed and %true is returned.
  *
  * The caller should have initialized @wq->first_flusher prior to
@@ -2484,7 +2483,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
  * @flush_color is negative, no flush color update is done and %false
  * is returned.
  *
- * If @work_color is non-negative, all cwqs should have the same
+ * If @work_color is non-negative, all pwqs should have the same
  * work_color which is previous to @work_color and all will be
  * advanced to @work_color.
  *
@@ -2495,42 +2494,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
  * %true if @flush_color >= 0 and there's something to flush.  %false
  * otherwise.
  */
-static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
+static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 				      int flush_color, int work_color)
 {
 	bool wait = false;
 	unsigned int cpu;
 
 	if (flush_color >= 0) {
-		BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
-		atomic_set(&wq->nr_cwqs_to_flush, 1);
+		BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
+		atomic_set(&wq->nr_pwqs_to_flush, 1);
 	}
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
 
 		if (flush_color >= 0) {
-			BUG_ON(cwq->flush_color != -1);
+			BUG_ON(pwq->flush_color != -1);
 
-			if (cwq->nr_in_flight[flush_color]) {
-				cwq->flush_color = flush_color;
-				atomic_inc(&wq->nr_cwqs_to_flush);
+			if (pwq->nr_in_flight[flush_color]) {
+				pwq->flush_color = flush_color;
+				atomic_inc(&wq->nr_pwqs_to_flush);
 				wait = true;
 			}
 		}
 
 		if (work_color >= 0) {
-			BUG_ON(work_color != work_next_color(cwq->work_color));
-			cwq->work_color = work_color;
+			BUG_ON(work_color != work_next_color(pwq->work_color));
+			pwq->work_color = work_color;
 		}
 
 		spin_unlock_irq(&pool->lock);
 	}
 
-	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
+	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
 		complete(&wq->first_flusher->done);
 
 	return wait;
@@ -2581,7 +2580,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 			wq->first_flusher = &this_flusher;
 
-			if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
+			if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
 						       wq->work_color)) {
 				/* nothing to flush, done */
 				wq->flush_color = next_color;
@@ -2592,7 +2591,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 			/* wait in queue */
 			BUG_ON(wq->flush_color == this_flusher.flush_color);
 			list_add_tail(&this_flusher.list, &wq->flusher_queue);
-			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
 	} else {
 		/*
@@ -2659,7 +2658,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 			list_splice_tail_init(&wq->flusher_overflow,
 					      &wq->flusher_queue);
-			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
 
 		if (list_empty(&wq->flusher_queue)) {
@@ -2669,7 +2668,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 		/*
 		 * Need to flush more colors.  Make the next flusher
-		 * the new first flusher and arm cwqs.
+		 * the new first flusher and arm pwqs.
 		 */
 		BUG_ON(wq->flush_color == wq->work_color);
 		BUG_ON(wq->flush_color != next->flush_color);
@@ -2677,7 +2676,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 		list_del_init(&next->list);
 		wq->first_flusher = next;
 
-		if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
+		if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
 			break;
 
 		/*
@@ -2720,13 +2719,13 @@ void drain_workqueue(struct workqueue_struct *wq)
 reflush:
 	flush_workqueue(wq);
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		bool drained;
 
-		spin_lock_irq(&cwq->pool->lock);
-		drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
-		spin_unlock_irq(&cwq->pool->lock);
+		spin_lock_irq(&pwq->pool->lock);
+		drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
+		spin_unlock_irq(&pwq->pool->lock);
 
 		if (drained)
 			continue;
@@ -2749,7 +2748,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
 	struct worker_pool *pool;
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 
 	might_sleep();
 	pool = get_work_pool(work);
@@ -2758,18 +2757,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 
 	spin_lock_irq(&pool->lock);
 	/* see the comment in try_to_grab_pending() with the same code */
-	cwq = get_work_cwq(work);
-	if (cwq) {
-		if (unlikely(cwq->pool != pool))
+	pwq = get_work_pwq(work);
+	if (pwq) {
+		if (unlikely(pwq->pool != pool))
 			goto already_gone;
 	} else {
 		worker = find_worker_executing_work(pool, work);
 		if (!worker)
 			goto already_gone;
-		cwq = worker->current_cwq;
+		pwq = worker->current_pwq;
 	}
 
-	insert_wq_barrier(cwq, barr, work, worker);
+	insert_wq_barrier(pwq, barr, work, worker);
 	spin_unlock_irq(&pool->lock);
 
 	/*
@@ -2778,11 +2777,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	 * flusher is not running on the same workqueue by verifying write
 	 * access.
 	 */
-	if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
-		lock_map_acquire(&cwq->wq->lockdep_map);
+	if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
+		lock_map_acquire(&pwq->wq->lockdep_map);
 	else
-		lock_map_acquire_read(&cwq->wq->lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
+		lock_map_acquire_read(&pwq->wq->lockdep_map);
+	lock_map_release(&pwq->wq->lockdep_map);
 
 	return true;
 already_gone:
@@ -3092,46 +3091,46 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static int alloc_cwqs(struct workqueue_struct *wq)
+static int alloc_pwqs(struct workqueue_struct *wq)
 {
 	/*
-	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
+	 * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
 	 * Make sure that the alignment isn't lower than that of
 	 * unsigned long long.
 	 */
-	const size_t size = sizeof(struct cpu_workqueue_struct);
+	const size_t size = sizeof(struct pool_workqueue);
 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
 				   __alignof__(unsigned long long));
 
 	if (!(wq->flags & WQ_UNBOUND))
-		wq->cpu_wq.pcpu = __alloc_percpu(size, align);
+		wq->pool_wq.pcpu = __alloc_percpu(size, align);
 	else {
 		void *ptr;
 
 		/*
-		 * Allocate enough room to align cwq and put an extra
+		 * Allocate enough room to align pwq and put an extra
 		 * pointer at the end pointing back to the originally
 		 * allocated pointer which will be used for free.
 		 */
 		ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
 		if (ptr) {
-			wq->cpu_wq.single = PTR_ALIGN(ptr, align);
-			*(void **)(wq->cpu_wq.single + 1) = ptr;
+			wq->pool_wq.single = PTR_ALIGN(ptr, align);
+			*(void **)(wq->pool_wq.single + 1) = ptr;
 		}
 	}
 
 	/* just in case, make sure it's actually aligned */
-	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
-	return wq->cpu_wq.v ? 0 : -ENOMEM;
+	BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align));
+	return wq->pool_wq.v ? 0 : -ENOMEM;
 }
 
-static void free_cwqs(struct workqueue_struct *wq)
+static void free_pwqs(struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND))
-		free_percpu(wq->cpu_wq.pcpu);
-	else if (wq->cpu_wq.single) {
-		/* the pointer to free is stored right after the cwq */
-		kfree(*(void **)(wq->cpu_wq.single + 1));
+		free_percpu(wq->pool_wq.pcpu);
+	else if (wq->pool_wq.single) {
+		/* the pointer to free is stored right after the pwq */
+		kfree(*(void **)(wq->pool_wq.single + 1));
 	}
 }
 
@@ -3185,25 +3184,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
-	atomic_set(&wq->nr_cwqs_to_flush, 0);
+	atomic_set(&wq->nr_pwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
-	if (alloc_cwqs(wq) < 0)
+	if (alloc_pwqs(wq) < 0)
 		goto err;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
-		cwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
-		cwq->wq = wq;
-		cwq->flush_color = -1;
-		cwq->max_active = max_active;
-		INIT_LIST_HEAD(&cwq->delayed_works);
+		BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
+		pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
+		pwq->wq = wq;
+		pwq->flush_color = -1;
+		pwq->max_active = max_active;
+		INIT_LIST_HEAD(&pwq->delayed_works);
 	}
 
 	if (flags & WQ_RESCUER) {
@@ -3234,8 +3233,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	spin_lock(&workqueue_lock);
 
 	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
-		for_each_cwq_cpu(cpu, wq)
-			get_cwq(cpu, wq)->max_active = 0;
+		for_each_pwq_cpu(cpu, wq)
+			get_pwq(cpu, wq)->max_active = 0;
 
 	list_add(&wq->list, &workqueues);
 
@@ -3244,7 +3243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	return wq;
 err:
 	if (wq) {
-		free_cwqs(wq);
+		free_pwqs(wq);
 		free_mayday_mask(wq->mayday_mask);
 		kfree(wq->rescuer);
 		kfree(wq);
@@ -3275,14 +3274,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	spin_unlock(&workqueue_lock);
 
 	/* sanity check */
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		int i;
 
 		for (i = 0; i < WORK_NR_COLORS; i++)
-			BUG_ON(cwq->nr_in_flight[i]);
-		BUG_ON(cwq->nr_active);
-		BUG_ON(!list_empty(&cwq->delayed_works));
+			BUG_ON(pwq->nr_in_flight[i]);
+		BUG_ON(pwq->nr_active);
+		BUG_ON(!list_empty(&pwq->delayed_works));
 	}
 
 	if (wq->flags & WQ_RESCUER) {
@@ -3291,29 +3290,29 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		kfree(wq->rescuer);
 	}
 
-	free_cwqs(wq);
+	free_pwqs(wq);
 	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
 /**
- * cwq_set_max_active - adjust max_active of a cwq
- * @cwq: target cpu_workqueue_struct
+ * pwq_set_max_active - adjust max_active of a pwq
+ * @pwq: target pool_workqueue
  * @max_active: new max_active value.
  *
- * Set @cwq->max_active to @max_active and activate delayed works if
+ * Set @pwq->max_active to @max_active and activate delayed works if
  * increased.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active)
+static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
 {
-	cwq->max_active = max_active;
+	pwq->max_active = max_active;
 
-	while (!list_empty(&cwq->delayed_works) &&
-	       cwq->nr_active < cwq->max_active)
-		cwq_activate_first_delayed(cwq);
+	while (!list_empty(&pwq->delayed_works) &&
+	       pwq->nr_active < pwq->max_active)
+		pwq_activate_first_delayed(pwq);
 }
 
 /**
@@ -3336,15 +3335,15 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	wq->saved_max_active = max_active;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
 
 		if (!(wq->flags & WQ_FREEZABLE) ||
 		    !(pool->flags & POOL_FREEZING))
-			cwq_set_max_active(cwq, max_active);
+			pwq_set_max_active(pwq, max_active);
 
 		spin_unlock_irq(&pool->lock);
 	}
@@ -3367,9 +3366,9 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
  */
 bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
 {
-	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-	return !list_empty(&cwq->delayed_works);
+	return !list_empty(&pwq->delayed_works);
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
@@ -3408,7 +3407,7 @@ EXPORT_SYMBOL_GPL(work_busy);
  * CPU hotplug.
  *
  * There are two challenges in supporting CPU hotplug.  Firstly, there
- * are a lot of assumptions on strong associations among work, cwq and
+ * are a lot of assumptions on strong associations among work, pwq and
  * pool which make migrating pending and scheduled works very
  * difficult to implement without impacting hot paths.  Secondly,
  * worker pools serve mix of short, long and very long running works making
@@ -3612,11 +3611,11 @@ void freeze_workqueues_begin(void)
 			pool->flags |= POOL_FREEZING;
 
 			list_for_each_entry(wq, &workqueues, list) {
-				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+				struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-				if (cwq && cwq->pool == pool &&
+				if (pwq && pwq->pool == pool &&
 				    (wq->flags & WQ_FREEZABLE))
-					cwq->max_active = 0;
+					pwq->max_active = 0;
 			}
 
 			spin_unlock_irq(&pool->lock);
@@ -3655,13 +3654,13 @@ bool freeze_workqueues_busy(void)
 		 * to peek without lock.
 		 */
 		list_for_each_entry(wq, &workqueues, list) {
-			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+			struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-			if (!cwq || !(wq->flags & WQ_FREEZABLE))
+			if (!pwq || !(wq->flags & WQ_FREEZABLE))
 				continue;
 
-			BUG_ON(cwq->nr_active < 0);
-			if (cwq->nr_active) {
+			BUG_ON(pwq->nr_active < 0);
+			if (pwq->nr_active) {
 				busy = true;
 				goto out_unlock;
 			}
@@ -3701,14 +3700,14 @@ void thaw_workqueues(void)
 			pool->flags &= ~POOL_FREEZING;
 
 			list_for_each_entry(wq, &workqueues, list) {
-				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+				struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-				if (!cwq || cwq->pool != pool ||
+				if (!pwq || pwq->pool != pool ||
 				    !(wq->flags & WQ_FREEZABLE))
 					continue;
 
 				/* restore max_active and repopulate worklist */
-				cwq_set_max_active(cwq, wq->saved_max_active);
+				pwq_set_max_active(pwq, wq->saved_max_active);
 			}
 
 			wake_up_worker(pool);
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 328be4a269aa..07650264ec15 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -28,7 +28,7 @@ struct worker {
 
 	struct work_struct	*current_work;	/* L: work being processed */
 	work_func_t		current_func;	/* L: current_work's fn */
-	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
+	struct pool_workqueue	*current_pwq; /* L: current_work's pwq */
 	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct worker_pool	*pool;		/* I: the associated pool */
-- 
cgit v1.2.3


From b2dc0c2b7a84864fdbad2e264ce9b5c7068a0a0f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 13 Feb 2013 23:08:18 +0100
Subject: ARM: s3c: i2c: add platform_device forward declaration

A recent cleanup to the mach-osiris.c file is causing build errors
because the i2c-s3c2410.h header file is included before we see
the definition for platform_device. The fix is to make the header file
more robust against inclusion from other places. While this should
normally go through the i2c tree, the bug only exists in arm-soc
at the moment, so it's easier to fix it there before it goes upstream.

Without this patch, building s3c2410_defconfig results in:

arch/arm/mach-s3c24xx/mach-osiris.c:34:0:
include/linux/platform_data/i2c-s3c2410.h:37:26: warning: 'struct platform_device' declared inside parameter list [enabled by default]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: linux-i2c@vger.kernel.org
Cc: Wolfram Sang <w.sang@pengutronix.de>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Kukjin Kim <kgene.kim@samsung.com>
---
 include/linux/platform_data/i2c-s3c2410.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/i2c-s3c2410.h b/include/linux/platform_data/i2c-s3c2410.h
index 51d52e767a19..2a50048c1c44 100644
--- a/include/linux/platform_data/i2c-s3c2410.h
+++ b/include/linux/platform_data/i2c-s3c2410.h
@@ -15,6 +15,8 @@
 
 #define S3C_IICFLG_FILTER	(1<<0)	/* enable s3c2440 filter */
 
+struct platform_device;
+
 /**
  *	struct s3c2410_platform_i2c - Platform data for s3c I2C.
  *	@bus_num: The bus number to use (if possible).
-- 
cgit v1.2.3


From e9b04b5b67ec628a5e9a312e14b6864f8f73ba12 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 20 Nov 2012 11:14:10 -0500
Subject: make do_sigaltstack() static

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h | 1 -
 kernel/signal.c       | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8f983293b403..6dd06494997a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2259,7 +2259,6 @@ extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
-extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
 
 static inline void restore_saved_sigmask(void)
 {
diff --git a/kernel/signal.c b/kernel/signal.c
index 87c09e3061d2..6919050c8156 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3129,7 +3129,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 	return 0;
 }
 
-int 
+static int 
 do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long sp)
 {
 	stack_t oss;
-- 
cgit v1.2.3


From d64008a8f30e0b381b292788ec6f3ee509b3bb40 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Nov 2012 23:12:10 -0500
Subject: burying unused conditionals

__ARCH_WANT_SYS_RT_SIGACTION,
__ARCH_WANT_SYS_RT_SIGSUSPEND,
__ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND,
__ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL - not used anymore
CONFIG_GENERIC_{SIGALTSTACK,COMPAT_RT_SIG{ACTION,QUEUEINFO,PENDING,PROCMASK}} -
can be assumed always set.
---
 arch/Kconfig                         | 15 ---------------
 arch/alpha/Kconfig                   |  1 -
 arch/alpha/include/asm/unistd.h      |  1 -
 arch/arm/Kconfig                     |  1 -
 arch/arm/include/asm/unistd.h        |  2 --
 arch/arm64/Kconfig                   |  5 -----
 arch/arm64/include/asm/unistd.h      |  2 --
 arch/avr32/Kconfig                   |  1 -
 arch/avr32/include/asm/unistd.h      |  2 --
 arch/blackfin/Kconfig                |  1 -
 arch/blackfin/include/asm/unistd.h   |  2 --
 arch/c6x/Kconfig                     |  1 -
 arch/cris/Kconfig                    |  1 -
 arch/cris/include/asm/unistd.h       |  2 --
 arch/frv/Kconfig                     |  1 -
 arch/frv/include/asm/unistd.h        |  2 --
 arch/h8300/Kconfig                   |  1 -
 arch/h8300/include/asm/unistd.h      |  2 --
 arch/hexagon/Kconfig                 |  1 -
 arch/ia64/Kconfig                    |  1 -
 arch/ia64/include/asm/unistd.h       |  3 ---
 arch/m32r/Kconfig                    |  1 -
 arch/m32r/include/asm/unistd.h       |  2 --
 arch/m68k/Kconfig                    |  1 -
 arch/m68k/include/asm/unistd.h       |  2 --
 arch/microblaze/Kconfig              |  1 -
 arch/microblaze/include/asm/unistd.h |  2 --
 arch/mips/Kconfig                    |  5 -----
 arch/mips/include/asm/unistd.h       |  1 -
 arch/mn10300/Kconfig                 |  1 -
 arch/mn10300/include/asm/unistd.h    |  2 --
 arch/openrisc/Kconfig                |  1 -
 arch/parisc/Kconfig                  |  5 -----
 arch/parisc/include/asm/unistd.h     |  3 ---
 arch/powerpc/Kconfig                 |  5 -----
 arch/powerpc/include/asm/unistd.h    |  4 ----
 arch/s390/Kconfig                    |  5 -----
 arch/s390/include/asm/unistd.h       |  3 ---
 arch/score/Kconfig                   |  1 -
 arch/sh/Kconfig                      |  1 -
 arch/sh/include/asm/unistd.h         |  2 --
 arch/sparc/Kconfig                   |  5 -----
 arch/sparc/include/asm/unistd.h      |  3 ---
 arch/tile/Kconfig                    |  5 -----
 arch/tile/include/asm/unistd.h       |  1 -
 arch/unicore32/Kconfig               |  1 -
 arch/x86/Kconfig                     |  4 ----
 arch/x86/include/asm/unistd.h        |  2 --
 arch/x86/um/Kconfig                  |  1 -
 arch/xtensa/Kconfig                  |  1 -
 arch/xtensa/include/asm/unistd.h     |  2 --
 include/asm-generic/syscalls.h       |  7 -------
 include/asm-generic/unistd.h         |  3 ---
 include/linux/compat.h               | 14 --------------
 include/linux/syscalls.h             |  2 --
 kernel/signal.c                      | 12 ------------
 56 files changed, 159 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index e50d3af294d4..956756c27ac6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -356,21 +356,6 @@ config MODULES_USE_ELF_REL
 	  Modules only use ELF REL relocations.  Modules with ELF RELA
 	  relocations will give an error.
 
-config GENERIC_SIGALTSTACK
-	bool
-
-config GENERIC_COMPAT_RT_SIGPROCMASK
-	bool
-
-config GENERIC_COMPAT_RT_SIGPENDING
-	bool
-
-config GENERIC_COMPAT_RT_SIGQUEUEINFO
-	bool
-
-config GENERIC_COMPAT_RT_SIGACTION
-	bool
-
 #
 # ABI hall of shame
 #
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 15740cf29bd4..dd083c403ab3 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -22,7 +22,6 @@ config ALPHA
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
 	help
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index b3396ee039b7..6d6fe7ab5473 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -14,7 +14,6 @@
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index db3152d6dd88..fcb406633328 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -56,7 +56,6 @@ config ARM
 	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
 	select MODULES_USE_ELF_REL
 	select CLONE_BACKWARDS
-	select GENERIC_SIGALTSTACK
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 	help
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 21a2700d2957..e4ddfb39ca34 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -26,8 +26,6 @@
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1f27c58f44ad..626ab20f12ea 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -6,15 +6,10 @@ config ARM64
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select GENERIC_CLOCKEVENTS
-	select GENERIC_COMPAT_RT_SIGACTION
-	select GENERIC_COMPAT_RT_SIGPENDING
-	select GENERIC_COMPAT_RT_SIGPROCMASK
-	select GENERIC_COMPAT_RT_SIGQUEUEINFO
 	select GENERIC_HARDIRQS_NO_DEPRECATED
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
-	select GENERIC_SIGALTSTACK
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 744087fb521c..82ce217e94cf 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -20,10 +20,8 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
-#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index e888b72b6e10..2ae6591b3a55 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -17,7 +17,6 @@ config AVR32
 	select GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index 0bdf6371574e..dc4d5a931112 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h
@@ -37,8 +37,6 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_GETPGRP
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index a8a9ca7d40f3..b6f3ad5441c5 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -45,7 +45,6 @@ config BLACKFIN
 	select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index e943cb130048..04e83ea8d5cc 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -18,8 +18,6 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_NICE
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_VFORK
 
 /*
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 12d97b7ef0dc..f6a3648f5ec3 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -18,7 +18,6 @@ config C6X
 	select OF_EARLY_FLATTREE
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 
 config MMU
 	def_bool n
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index c2a1d0a8924c..0e5c187ac7d2 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -50,7 +50,6 @@ config CRIS
 	select GENERIC_CMOS_UPDATE
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS2
-	select GENERIC_SIGALTSTACK
 	select OLD_SIGSUSPEND
 	select OLD_SIGACTION
 
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index 6d062bdf92d4..be57a988bfb9 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -30,8 +30,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index e3f8ffdd4e7b..b7465cd3dbbb 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -12,7 +12,6 @@ config FRV
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CPU_DEVICES
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select GENERIC_SIGALTSTACK
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index d685da17f5fb..4cfcc7bba25a 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -27,8 +27,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 /* #define __ARCH_WANT_SYS_SIGPENDING */
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 0b0176ce2c35..05b613af223a 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -9,7 +9,6 @@ config H8300
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index aa38105959fb..6721856d841b 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -29,8 +29,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 3e6e27c11f93..0744f7d7b1fd 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -31,7 +31,6 @@ config HEXAGON
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 98482d1cbc5b..3279646120e3 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -42,7 +42,6 @@ config IA64
 	select GENERIC_TIME_VSYSCALL_OLD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index c827049eb62c..096373800f73 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -27,9 +27,6 @@
 #define __IGNORE_vfork		/* clone() */
 #define __IGNORE_umount2	/* umount() */
 
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-
 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
 
 #include <linux/types.h>
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 1f550d4dd5d0..f807721e19a5 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -15,7 +15,6 @@ config M32R
 	select GENERIC_ATOMIC64
 	select ARCH_USES_GETTIMEOFFSET
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 
 config SBUS
 	bool
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index 79b063caec85..555629b05267 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -20,8 +20,6 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT /*will be unused*/
 #define __ARCH_WANT_SYS_OLDUMOUNT
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index a358bf63defe..efb1ce1f14a3 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -18,7 +18,6 @@ config M68K
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 847994ce6804..df19631cc4de 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -29,8 +29,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 5e30d75c74ed..ba3b7c8c04b8 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -27,7 +27,6 @@ config MICROBLAZE
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS
-	select GENERIC_SIGALTSTACK
 
 config SWAP
 	def_bool n
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 10f8ac186855..b3778391d9cc 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -33,8 +33,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_FORK
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 0772b5c5bc72..a3d4646098fe 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -42,11 +42,6 @@ config MIPS
 	select MODULES_USE_ELF_REL if MODULES
 	select MODULES_USE_ELF_RELA if MODULES && 64BIT
 	select CLONE_BACKWARDS
-	select GENERIC_SIGALTSTACK
-	select GENERIC_COMPAT_RT_SIGACTION
-	select GENERIC_COMPAT_RT_SIGQUEUEINFO
-	select GENERIC_COMPAT_RT_SIGPROCMASK
-	select GENERIC_COMPAT_RT_SIGPENDING
 
 menu "Machine selection"
 
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 06f6463c24ad..64f661e32879 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -35,7 +35,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
 # ifdef CONFIG_32BIT
 #  define __ARCH_WANT_STAT64
 #  define __ARCH_WANT_SYS_TIME
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 12bf06f9abe5..ad0caea0bfea 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -10,7 +10,6 @@ config MN10300
 	select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index e6d2ed4ba68f..7f9d9adfa51e 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -41,8 +41,6 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index d3632eb98a1c..0ac66f67521f 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -22,7 +22,6 @@ config OPENRISC
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 
 config MMU
 	def_bool y
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 2bd407ffaebf..b77feffbadea 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -23,11 +23,6 @@ config PARISC
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS
-	select GENERIC_SIGALTSTACK
-	select GENERIC_COMPAT_RT_SIGACTION
-	select GENERIC_COMPAT_RT_SIGQUEUEINFO
-	select GENERIC_COMPAT_RT_SIGPROCMASK
-	select GENERIC_COMPAT_RT_SIGPENDING
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 3043194547cd..93b1d089864b 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -160,9 +160,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)	\
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ec89a7b11f7b..cf90f0526411 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -144,11 +144,6 @@ config PPC
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS
-	select GENERIC_SIGALTSTACK
-	select GENERIC_COMPAT_RT_SIGACTION
-	select GENERIC_COMPAT_RT_SIGQUEUEINFO
-	select GENERIC_COMPAT_RT_SIGPROCMASK
-	select GENERIC_COMPAT_RT_SIGPENDING
 	select OLD_SIGSUSPEND
 	select OLD_SIGACTION if PPC32
 
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 1d4864a40e35..f25b5c45c435 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -44,17 +44,13 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #ifdef CONFIG_PPC32
 #define __ARCH_WANT_OLD_STAT
 #endif
 #ifdef CONFIG_PPC64
 #define __ARCH_WANT_COMPAT_SYS_TIME
-#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
-#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bcdcf31fa672..ec12a3582ae9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -140,11 +140,6 @@ config S390
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
 	select CLONE_BACKWARDS2
-	select GENERIC_SIGALTSTACK
-	select GENERIC_COMPAT_RT_SIGACTION
-	select GENERIC_COMPAT_RT_SIGQUEUEINFO
-	select GENERIC_COMPAT_RT_SIGPROCMASK
-	select GENERIC_COMPAT_RT_SIGPENDING
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 636530872516..a6667a952969 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -43,15 +43,12 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 # ifndef CONFIG_64BIT
 #   define __ARCH_WANT_STAT64
 #   define __ARCH_WANT_SYS_TIME
 # endif
 # ifdef CONFIG_COMPAT
 #   define __ARCH_WANT_COMPAT_SYS_TIME
-#   define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 # endif
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index a125d7207bcc..3b1482e7afac 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -14,7 +14,6 @@ config SCORE
        select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select CLONE_BACKWARDS
-	select GENERIC_SIGALTSTACK
 
 choice
 	prompt "System type"
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b5fd9f3c9925..5f9b0a3f5f00 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -40,7 +40,6 @@ config SUPERH
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
 	select OLD_SIGSUSPEND
 	select OLD_SIGACTION
 	help
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index 012004ed3330..5e90fa2b7eed 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -4,7 +4,6 @@
 #  include <asm/unistd_64.h>
 # endif
 
-# define __ARCH_WANT_SYS_RT_SIGSUSPEND
 # define __ARCH_WANT_OLD_READDIR
 # define __ARCH_WANT_OLD_STAT
 # define __ARCH_WANT_STAT64
@@ -27,7 +26,6 @@
 # define __ARCH_WANT_SYS_OLDUMOUNT
 # define __ARCH_WANT_SYS_SIGPENDING
 # define __ARCH_WANT_SYS_SIGPROCMASK
-# define __ARCH_WANT_SYS_RT_SIGACTION
 # define __ARCH_WANT_SYS_FORK
 # define __ARCH_WANT_SYS_VFORK
 # define __ARCH_WANT_SYS_CLONE
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e557b0821540..9821a0ff9864 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,11 +42,6 @@ config SPARC
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
 	select ODD_RT_SIGACTION
-	select GENERIC_SIGALTSTACK
-	select GENERIC_COMPAT_RT_SIGQUEUEINFO
-	select GENERIC_COMPAT_RT_SIGPROCMASK
-	select GENERIC_COMPAT_RT_SIGPENDING
-	select GENERIC_COMPAT_RT_SIGACTION
 	select OLD_SIGSUSPEND
 
 config SPARC32
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 87ce24c5eb95..5356810bd7e7 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -38,14 +38,11 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #ifdef __32bit_syscall_numbers__
 #define __ARCH_WANT_SYS_IPC
 #else
 #define __ARCH_WANT_COMPAT_SYS_TIME
-#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
-#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 
 /*
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 96a717ebb1fa..875d008828b8 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -21,11 +21,6 @@ config TILE
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
-	select GENERIC_SIGALTSTACK
-	select GENERIC_COMPAT_RT_SIGACTION
-	select GENERIC_COMPAT_RT_SIGQUEUEINFO
-	select GENERIC_COMPAT_RT_SIGPROCMASK
-	select GENERIC_COMPAT_RT_SIGPENDING
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index 6ac21034f69a..940831fe9e94 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -14,7 +14,6 @@
 /* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
 #ifdef CONFIG_COMPAT
 #define __ARCH_WANT_SYS_LLSEEK
-#define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #endif
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index a62786fdcaab..60651df5f952 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -16,7 +16,6 @@ config UNICORE32
 	select ARCH_WANT_FRAME_POINTERS
 	select GENERIC_IOMAP
 	select MODULES_USE_ELF_REL
-	select GENERIC_SIGALTSTACK
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87d09175a0a9..49fb44e95f3c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -113,10 +113,6 @@ config X86
 	select MODULES_USE_ELF_REL if X86_32
 	select MODULES_USE_ELF_RELA if X86_64
 	select CLONE_BACKWARDS if X86_32
-	select GENERIC_SIGALTSTACK
-	select GENERIC_COMPAT_RT_SIGACTION
-	select GENERIC_COMPAT_RT_SIGQUEUEINFO
-	select GENERIC_COMPAT_RT_SIGPENDING
 	select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
 	select OLD_SIGACTION if X86_32
 	select COMPAT_OLD_SIGACTION if IA32_EMULATION
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index a0790e07ba65..3d5df1c4447f 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -38,8 +38,6 @@
 # define __ARCH_WANT_SYS_OLD_GETRLIMIT
 # define __ARCH_WANT_SYS_OLD_UNAME
 # define __ARCH_WANT_SYS_PAUSE
-# define __ARCH_WANT_SYS_RT_SIGACTION
-# define __ARCH_WANT_SYS_RT_SIGSUSPEND
 # define __ARCH_WANT_SYS_SGETMASK
 # define __ARCH_WANT_SYS_SIGNAL
 # define __ARCH_WANT_SYS_SIGPENDING
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index cf0f2731484e..fafc94193bc8 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,7 +13,6 @@ endmenu
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
-	select GENERIC_SIGALTSTACK
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 23cc6ae35da0..5aab1acabf1c 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -16,7 +16,6 @@ config XTENSA
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select CLONE_BACKWARDS
 	select IRQ_DOMAIN
-	select GENERIC_SIGALTSTACK
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index eb63ea87815c..c38834de9ac7 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -15,8 +15,6 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_UTIME
 #define __ARCH_WANT_SYS_LLSEEK
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_GETPGRP
 
 /* 
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index 9e25a3179d6c..1f74be5113b2 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -21,13 +21,6 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
 			unsigned long fd, off_t pgoff);
 #endif
 
-#ifndef CONFIG_GENERIC_SIGALTSTACK
-#ifndef sys_sigaltstack
-asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
-			struct pt_regs *);
-#endif
-#endif
-
 #ifndef sys_rt_sigreturn
 asmlinkage long sys_rt_sigreturn(struct pt_regs *regs);
 #endif
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index a36991ab334e..257c55ec4f77 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -9,9 +9,6 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_LLSEEK
 #endif
-#define __ARCH_WANT_SYS_RT_SIGACTION
-#define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 
 /*
  * "Conditional" syscalls
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 8de903587fb9..de095b0462a7 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -68,7 +68,6 @@
 #ifndef compat_user_stack_pointer
 #define compat_user_stack_pointer() current_user_stack_pointer()
 #endif
-#ifdef CONFIG_GENERIC_SIGALTSTACK
 #ifndef compat_sigaltstack	/* we'll need that for MIPS */
 typedef struct compat_sigaltstack {
 	compat_uptr_t			ss_sp;
@@ -76,7 +75,6 @@ typedef struct compat_sigaltstack {
 	compat_size_t			ss_size;
 } compat_stack_t;
 #endif
-#endif
 
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
@@ -142,7 +140,6 @@ typedef struct {
 	compat_sigset_word	sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
 
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION
 struct compat_sigaction {
 #ifndef __ARCH_HAS_ODD_SIGACTION
 	compat_uptr_t			sa_handler;
@@ -156,7 +153,6 @@ struct compat_sigaction {
 #endif
 	compat_sigset_t			sa_mask __packed;
 };
-#endif
 
 /*
  * These functions operate strictly on struct compat_time*
@@ -623,27 +619,19 @@ asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
 		struct compat_timespec __user *uts, compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset,
 					 compat_size_t sigsetsize);
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPROCMASK
 asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set,
 					  compat_sigset_t __user *oset,
 					  compat_size_t sigsetsize);
-#endif
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPENDING
 asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset,
 					 compat_size_t sigsetsize);
-#endif
 #ifndef CONFIG_ODD_RT_SIGACTION
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION
 asmlinkage long compat_sys_rt_sigaction(int,
 				 const struct compat_sigaction __user *,
 				 struct compat_sigaction __user *,
 				 compat_size_t);
 #endif
-#endif
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGQUEUEINFO
 asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig,
 				struct compat_siginfo __user *uinfo);
-#endif
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 				 unsigned long arg);
@@ -694,13 +682,11 @@ asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
 
 asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
 				    compat_off_t __user *offset, compat_size_t count);
-#ifdef CONFIG_GENERIC_SIGALTSTACK
 asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
 				       compat_stack_t __user *uoss_ptr);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
 int __compat_save_altstack(compat_stack_t __user *, unsigned long);
-#endif
 
 asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 						 struct compat_timespec __user *interval);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 66d298f69f98..313a8e0a6553 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -300,10 +300,8 @@ asmlinkage long sys_personality(unsigned int personality);
 asmlinkage long sys_sigpending(old_sigset_t __user *set);
 asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
 				old_sigset_t __user *oset);
-#ifdef CONFIG_GENERIC_SIGALTSTACK
 asmlinkage long sys_sigaltstack(const struct sigaltstack __user *uss,
 				struct sigaltstack __user *uoss);
-#endif
 
 asmlinkage long sys_getitimer(int which, struct itimerval __user *value);
 asmlinkage long sys_setitimer(int which,
diff --git a/kernel/signal.c b/kernel/signal.c
index 6919050c8156..2b8282bb487c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2623,7 +2623,6 @@ SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset,
 }
 
 #ifdef CONFIG_COMPAT
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPROCMASK
 COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
 		compat_sigset_t __user *, oset, compat_size_t, sigsetsize)
 {
@@ -2661,7 +2660,6 @@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
 #endif
 }
 #endif
-#endif
 
 static int do_sigpending(void *set, unsigned long sigsetsize)
 {
@@ -2694,7 +2692,6 @@ SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
 }
 
 #ifdef CONFIG_COMPAT
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGPENDING
 COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
 		compat_size_t, sigsetsize)
 {
@@ -2714,7 +2711,6 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
 #endif
 }
 #endif
-#endif
 
 #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER
 
@@ -3024,7 +3020,6 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
 }
 
 #ifdef CONFIG_COMPAT
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGQUEUEINFO
 COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
 			compat_pid_t, pid,
 			int, sig,
@@ -3037,7 +3032,6 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
 	return do_rt_sigqueueinfo(pid, sig, &info);
 }
 #endif
-#endif
 
 static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
 {
@@ -3194,12 +3188,10 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
 out:
 	return error;
 }
-#ifdef CONFIG_GENERIC_SIGALTSTACK
 SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
 {
 	return do_sigaltstack(uss, uoss, current_user_stack_pointer());
 }
-#endif
 
 int restore_altstack(const stack_t __user *uss)
 {
@@ -3217,7 +3209,6 @@ int __save_altstack(stack_t __user *uss, unsigned long sp)
 }
 
 #ifdef CONFIG_COMPAT
-#ifdef CONFIG_GENERIC_SIGALTSTACK
 COMPAT_SYSCALL_DEFINE2(sigaltstack,
 			const compat_stack_t __user *, uss_ptr,
 			compat_stack_t __user *, uoss_ptr)
@@ -3267,7 +3258,6 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
 		__put_user(t->sas_ss_size, &uss->ss_size);
 }
 #endif
-#endif
 
 #ifdef __ARCH_WANT_SYS_SIGPENDING
 
@@ -3368,7 +3358,6 @@ out:
 	return ret;
 }
 #ifdef CONFIG_COMPAT
-#ifdef CONFIG_GENERIC_COMPAT_RT_SIGACTION
 COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
 		const struct compat_sigaction __user *, act,
 		struct compat_sigaction __user *, oact,
@@ -3415,7 +3404,6 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
 	return ret;
 }
 #endif
-#endif
 #endif /* !CONFIG_ODD_RT_SIGACTION */
 
 #ifdef CONFIG_OLD_SIGACTION
-- 
cgit v1.2.3


From 7d7e499f7333f68b7e7f67d14b9c1480913b4afb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Jan 2013 12:11:12 +0000
Subject: smpboot: Allow selfparking per cpu threads

The stop machine threads are still killed when a cpu goes offline. The
reason is that the thread is used to bring the cpu down, so it can't
be parked along with the other per cpu threads.

Allow a per cpu thread to be excluded from automatic parking, so it
can park itself once it's done

Add a create callback function as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Arjan van de Veen <arjan@infradead.org>
Cc: Paul Turner <pjt@google.com>
Cc: Richard Weinberger <rw@linutronix.de>
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20130131120741.553993267@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/smpboot.h | 5 +++++
 kernel/smpboot.c        | 5 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index e0106d8581d3..c65dee059913 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -14,6 +14,8 @@ struct smpboot_thread_data;
  * @thread_should_run:	Check whether the thread should run or not. Called with
  *			preemption disabled.
  * @thread_fn:		The associated thread function
+ * @create:		Optional setup function, called when the thread gets
+ *			created (Not called from the thread context)
  * @setup:		Optional setup function, called when the thread gets
  *			operational the first time
  * @cleanup:		Optional cleanup function, called when the thread
@@ -22,6 +24,7 @@ struct smpboot_thread_data;
  *			parked (cpu offline)
  * @unpark:		Optional unpark function, called when the thread is
  *			unparked (cpu online)
+ * @selfparking:	Thread is not parked by the park function.
  * @thread_comm:	The base name of the thread
  */
 struct smp_hotplug_thread {
@@ -29,10 +32,12 @@ struct smp_hotplug_thread {
 	struct list_head		list;
 	int				(*thread_should_run)(unsigned int cpu);
 	void				(*thread_fn)(unsigned int cpu);
+	void				(*create)(unsigned int cpu);
 	void				(*setup)(unsigned int cpu);
 	void				(*cleanup)(unsigned int cpu, bool online);
 	void				(*park)(unsigned int cpu);
 	void				(*unpark)(unsigned int cpu);
+	bool				selfparking;
 	const char			*thread_comm;
 };
 
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index d6c5fc054242..d4abac261779 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -183,9 +183,10 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
 		kfree(td);
 		return PTR_ERR(tsk);
 	}
-
 	get_task_struct(tsk);
 	*per_cpu_ptr(ht->store, cpu) = tsk;
+	if (ht->create)
+		ht->create(cpu);
 	return 0;
 }
 
@@ -225,7 +226,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
 {
 	struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
 
-	if (tsk)
+	if (tsk && !ht->selfparking)
 		kthread_park(tsk);
 }
 
-- 
cgit v1.2.3


From 978c4172af48f0adc082f8b1d94acb817d947730 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 14 Feb 2013 11:00:16 +0200
Subject: dmaengine.h: remove redundant else keyword

dmaengine_device_control returns -ENOSYS in case the dma driver doesn't have
such functionality.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index bfcdecb5d87a..f5939999cb65 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -610,8 +610,8 @@ static inline int dmaengine_device_control(struct dma_chan *chan,
 {
 	if (chan->device->device_control)
 		return chan->device->device_control(chan, cmd, arg);
-	else
-		return -ENOSYS;
+
+	return -ENOSYS;
 }
 
 static inline int dmaengine_slave_config(struct dma_chan *chan,
-- 
cgit v1.2.3


From abf09bed3cceadd809f0356065c2ada6cee90d4a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 7 Nov 2012 13:17:37 +0100
Subject: s390/mm: implement software dirty bits

The s390 architecture is unique in respect to dirty page detection,
it uses the change bit in the per-page storage key to track page
modifications. All other architectures track dirty bits by means
of page table entries. This property of s390 has caused numerous
problems in the past, e.g. see git commit ef5d437f71afdf4a
"mm: fix XFS oops due to dirty pages without buffers on s390".

To avoid future issues in regard to per-page dirty bits convert
s390 to a fault based software dirty bit detection mechanism. All
user page table entries which are marked as clean will be hardware
read-only, even if the pte is supposed to be writable. A write by
the user process will trigger a protection fault which will cause
the user pte to be marked as dirty and the hardware read-only bit
is removed.

With this change the dirty bit in the storage key is irrelevant
for Linux as a host, but the storage key is still required for
KVM guests. The effect is that page_test_and_clear_dirty and the
related code can be removed. The referenced bit in the storage
key is still used by the page_test_and_clear_young primitive to
provide page age information.

For page cache pages of mappings with mapping_cap_account_dirty
there will not be any change in behavior as the dirty bit tracking
already uses read-only ptes to control the amount of dirty pages.
Only for swap cache pages and pages of mappings without
mapping_cap_account_dirty there can be additional protection faults.
To avoid an excessive number of additional faults the mk_pte
primitive checks for PageDirty if the pgprot value allows for writes
and pre-dirties the pte. That avoids all additional faults for
tmpfs and shmem pages until these pages are added to the swap cache.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/page.h    |  22 -------
 arch/s390/include/asm/pgtable.h | 131 +++++++++++++++++++++++++++-------------
 arch/s390/include/asm/sclp.h    |   1 -
 arch/s390/include/asm/setup.h   |  16 ++---
 arch/s390/kvm/kvm-s390.c        |   2 +-
 arch/s390/lib/uaccess_pt.c      |   2 +-
 arch/s390/mm/pageattr.c         |   2 +-
 arch/s390/mm/vmem.c             |  24 +++-----
 drivers/s390/char/sclp_cmd.c    |  10 +--
 include/asm-generic/pgtable.h   |  10 ---
 include/linux/page-flags.h      |   8 ---
 mm/rmap.c                       |  24 --------
 12 files changed, 112 insertions(+), 140 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index a86ad4084073..75ce9b065f9f 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -154,28 +154,6 @@ static inline int page_reset_referenced(unsigned long addr)
 #define _PAGE_FP_BIT		0x08	/* HW fetch protection bit	*/
 #define _PAGE_ACC_BITS		0xf0	/* HW access control bits	*/
 
-/*
- * Test and clear dirty bit in storage key.
- * We can't clear the changed bit atomically. This is a potential
- * race against modification of the referenced bit. This function
- * should therefore only be called if it is not mapped in any
- * address space.
- *
- * Note that the bit gets set whenever page content is changed. That means
- * also when the page is modified by DMA or from inside the kernel.
- */
-#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
-static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped)
-{
-	unsigned char skey;
-
-	skey = page_get_storage_key(pfn << PAGE_SHIFT);
-	if (!(skey & _PAGE_CHANGED))
-		return 0;
-	page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped);
-	return 1;
-}
-
 /*
  * Test and clear referenced bit in storage key.
  */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index a009d4dd70cb..97de1200c849 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -29,6 +29,7 @@
 #ifndef __ASSEMBLY__
 #include <linux/sched.h>
 #include <linux/mm_types.h>
+#include <linux/page-flags.h>
 #include <asm/bug.h>
 #include <asm/page.h>
 
@@ -221,13 +222,15 @@ extern unsigned long MODULES_END;
 /* Software bits in the page table entry */
 #define _PAGE_SWT	0x001		/* SW pte type bit t */
 #define _PAGE_SWX	0x002		/* SW pte type bit x */
-#define _PAGE_SWC	0x004		/* SW pte changed bit (for KVM) */
-#define _PAGE_SWR	0x008		/* SW pte referenced bit (for KVM) */
-#define _PAGE_SPECIAL	0x010		/* SW associated with special page */
+#define _PAGE_SWC	0x004		/* SW pte changed bit */
+#define _PAGE_SWR	0x008		/* SW pte referenced bit */
+#define _PAGE_SWW	0x010		/* SW pte write bit */
+#define _PAGE_SPECIAL	0x020		/* SW associated with special page */
 #define __HAVE_ARCH_PTE_SPECIAL
 
 /* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR)
+#define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
+				 _PAGE_SWC | _PAGE_SWR)
 
 /* Six different types of pages. */
 #define _PAGE_TYPE_EMPTY	0x400
@@ -321,6 +324,7 @@ extern unsigned long MODULES_END;
 
 /* Bits in the region table entry */
 #define _REGION_ENTRY_ORIGIN	~0xfffUL/* region/segment table origin	    */
+#define _REGION_ENTRY_RO	0x200	/* region protection bit	    */
 #define _REGION_ENTRY_INV	0x20	/* invalid region table entry	    */
 #define _REGION_ENTRY_TYPE_MASK	0x0c	/* region/segment table type mask   */
 #define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
@@ -382,9 +386,10 @@ extern unsigned long MODULES_END;
  */
 #define PAGE_NONE	__pgprot(_PAGE_TYPE_NONE)
 #define PAGE_RO		__pgprot(_PAGE_TYPE_RO)
-#define PAGE_RW		__pgprot(_PAGE_TYPE_RW)
+#define PAGE_RW		__pgprot(_PAGE_TYPE_RO | _PAGE_SWW)
+#define PAGE_RWC	__pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC)
 
-#define PAGE_KERNEL	PAGE_RW
+#define PAGE_KERNEL	PAGE_RWC
 #define PAGE_SHARED	PAGE_KERNEL
 #define PAGE_COPY	PAGE_RO
 
@@ -632,23 +637,23 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
 	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
 	/* Clear page changed & referenced bit in the storage key */
 	if (bits & _PAGE_CHANGED)
-		page_set_storage_key(address, skey ^ bits, 1);
+		page_set_storage_key(address, skey ^ bits, 0);
 	else if (bits)
 		page_reset_referenced(address);
 	/* Transfer page changed & referenced bit to guest bits in pgste */
 	pgste_val(pgste) |= bits << 48;		/* RCP_GR_BIT & RCP_GC_BIT */
 	/* Get host changed & referenced bits from pgste */
 	bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52;
-	/* Clear host bits in pgste. */
+	/* Transfer page changed & referenced bit to kvm user bits */
+	pgste_val(pgste) |= bits << 45;		/* KVM_UR_BIT & KVM_UC_BIT */
+	/* Clear relevant host bits in pgste. */
 	pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT);
 	pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT);
 	/* Copy page access key and fetch protection bit to pgste */
 	pgste_val(pgste) |=
 		(unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
-	/* Transfer changed and referenced to kvm user bits */
-	pgste_val(pgste) |= bits << 45;		/* KVM_UR_BIT & KVM_UC_BIT */
-	/* Transfer changed & referenced to pte sofware bits */
-	pte_val(*ptep) |= bits << 1;		/* _PAGE_SWR & _PAGE_SWC */
+	/* Transfer referenced bit to pte */
+	pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1;
 #endif
 	return pgste;
 
@@ -661,20 +666,25 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
 
 	if (!pte_present(*ptep))
 		return pgste;
+	/* Get referenced bit from storage key */
 	young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
-	/* Transfer page referenced bit to pte software bit (host view) */
-	if (young || (pgste_val(pgste) & RCP_HR_BIT))
+	if (young)
+		pgste_val(pgste) |= RCP_GR_BIT;
+	/* Get host referenced bit from pgste */
+	if (pgste_val(pgste) & RCP_HR_BIT) {
+		pgste_val(pgste) &= ~RCP_HR_BIT;
+		young = 1;
+	}
+	/* Transfer referenced bit to kvm user bits and pte */
+	if (young) {
+		pgste_val(pgste) |= KVM_UR_BIT;
 		pte_val(*ptep) |= _PAGE_SWR;
-	/* Clear host referenced bit in pgste. */
-	pgste_val(pgste) &= ~RCP_HR_BIT;
-	/* Transfer page referenced bit to guest bit in pgste */
-	pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */
+	}
 #endif
 	return pgste;
-
 }
 
-static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
 {
 #ifdef CONFIG_PGSTE
 	unsigned long address;
@@ -688,10 +698,23 @@ static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 	/* Set page access key and fetch protection bit from pgste */
 	nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56;
 	if (okey != nkey)
-		page_set_storage_key(address, nkey, 1);
+		page_set_storage_key(address, nkey, 0);
 #endif
 }
 
+static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
+{
+	if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) {
+		/*
+		 * Without enhanced suppression-on-protection force
+		 * the dirty bit on for all writable ptes.
+		 */
+		pte_val(entry) |= _PAGE_SWC;
+		pte_val(entry) &= ~_PAGE_RO;
+	}
+	*ptep = entry;
+}
+
 /**
  * struct gmap_struct - guest address space
  * @mm: pointer to the parent mm_struct
@@ -750,11 +773,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste_set_pte(ptep, pgste, entry);
-		*ptep = entry;
+		pgste_set_key(ptep, pgste, entry);
+		pgste_set_pte(ptep, entry);
 		pgste_set_unlock(ptep, pgste);
-	} else
+	} else {
+		if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
+			pte_val(entry) |= _PAGE_CO;
 		*ptep = entry;
+	}
 }
 
 /*
@@ -763,16 +789,12 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
  */
 static inline int pte_write(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_RO) == 0;
+	return (pte_val(pte) & _PAGE_SWW) != 0;
 }
 
 static inline int pte_dirty(pte_t pte)
 {
-#ifdef CONFIG_PGSTE
-	if (pte_val(pte) & _PAGE_SWC)
-		return 1;
-#endif
-	return 0;
+	return (pte_val(pte) & _PAGE_SWC) != 0;
 }
 
 static inline int pte_young(pte_t pte)
@@ -822,11 +844,14 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	pte_val(pte) &= _PAGE_CHG_MASK;
 	pte_val(pte) |= pgprot_val(newprot);
+	if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW))
+		pte_val(pte) &= ~_PAGE_RO;
 	return pte;
 }
 
 static inline pte_t pte_wrprotect(pte_t pte)
 {
+	pte_val(pte) &= ~_PAGE_SWW;
 	/* Do not clobber _PAGE_TYPE_NONE pages!  */
 	if (!(pte_val(pte) & _PAGE_INVALID))
 		pte_val(pte) |= _PAGE_RO;
@@ -835,20 +860,26 @@ static inline pte_t pte_wrprotect(pte_t pte)
 
 static inline pte_t pte_mkwrite(pte_t pte)
 {
-	pte_val(pte) &= ~_PAGE_RO;
+	pte_val(pte) |= _PAGE_SWW;
+	if (pte_val(pte) & _PAGE_SWC)
+		pte_val(pte) &= ~_PAGE_RO;
 	return pte;
 }
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-#ifdef CONFIG_PGSTE
 	pte_val(pte) &= ~_PAGE_SWC;
-#endif
+	/* Do not clobber _PAGE_TYPE_NONE pages!  */
+	if (!(pte_val(pte) & _PAGE_INVALID))
+		pte_val(pte) |= _PAGE_RO;
 	return pte;
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
+	pte_val(pte) |= _PAGE_SWC;
+	if (pte_val(pte) & _PAGE_SWW)
+		pte_val(pte) &= ~_PAGE_RO;
 	return pte;
 }
 
@@ -886,10 +917,10 @@ static inline pte_t pte_mkhuge(pte_t pte)
 		pte_val(pte) |= _SEGMENT_ENTRY_INV;
 	}
 	/*
-	 * Clear SW pte bits SWT and SWX, there are no SW bits in a segment
-	 * table entry.
+	 * Clear SW pte bits, there are no SW bits in a segment table entry.
 	 */
-	pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX);
+	pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX | _PAGE_SWC |
+			  _PAGE_SWR | _PAGE_SWW);
 	/*
 	 * Also set the change-override bit because we don't need dirty bit
 	 * tracking for hugetlbfs pages.
@@ -1041,9 +1072,11 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 					   unsigned long address,
 					   pte_t *ptep, pte_t pte)
 {
-	*ptep = pte;
-	if (mm_has_pgste(mm))
+	if (mm_has_pgste(mm)) {
+		pgste_set_pte(ptep, pte);
 		pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE));
+	} else
+		*ptep = pte;
 }
 
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
@@ -1111,10 +1144,13 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
 
 		if (!mm_exclusive(mm))
 			__ptep_ipte(address, ptep);
-		*ptep = pte_wrprotect(pte);
+		pte = pte_wrprotect(pte);
 
-		if (mm_has_pgste(mm))
+		if (mm_has_pgste(mm)) {
+			pgste_set_pte(ptep, pte);
 			pgste_set_unlock(ptep, pgste);
+		} else
+			*ptep = pte;
 	}
 	return pte;
 }
@@ -1132,10 +1168,12 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 		pgste = pgste_get_lock(ptep);
 
 	__ptep_ipte(address, ptep);
-	*ptep = entry;
 
-	if (mm_has_pgste(vma->vm_mm))
+	if (mm_has_pgste(vma->vm_mm)) {
+		pgste_set_pte(ptep, entry);
 		pgste_set_unlock(ptep, pgste);
+	} else
+		*ptep = entry;
 	return 1;
 }
 
@@ -1153,8 +1191,13 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 {
 	unsigned long physpage = page_to_phys(page);
+	pte_t __pte = mk_pte_phys(physpage, pgprot);
 
-	return mk_pte_phys(physpage, pgprot);
+	if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) {
+		pte_val(__pte) |= _PAGE_SWC;
+		pte_val(__pte) &= ~_PAGE_RO;
+	}
+	return __pte;
 }
 
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
@@ -1246,6 +1289,8 @@ static inline int pmd_trans_splitting(pmd_t pmd)
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t entry)
 {
+	if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1)
+		pmd_val(entry) |= _SEGMENT_ENTRY_CO;
 	*pmdp = entry;
 }
 
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 833788693f09..06a136136047 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -46,7 +46,6 @@ int sclp_cpu_deconfigure(u8 cpu);
 void sclp_facilities_detect(void);
 unsigned long long sclp_get_rnmax(void);
 unsigned long long sclp_get_rzm(void);
-u8 sclp_get_fac85(void);
 int sclp_sdias_blk_count(void);
 int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
 int sclp_chp_configure(struct chp_id chpid);
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index f69f76b3447a..f6857516e523 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -64,13 +64,14 @@ extern unsigned int s390_user_mode;
 
 #define MACHINE_FLAG_VM		(1UL << 0)
 #define MACHINE_FLAG_IEEE	(1UL << 1)
-#define MACHINE_FLAG_CSP	(1UL << 3)
-#define MACHINE_FLAG_MVPG	(1UL << 4)
-#define MACHINE_FLAG_DIAG44	(1UL << 5)
-#define MACHINE_FLAG_IDTE	(1UL << 6)
-#define MACHINE_FLAG_DIAG9C	(1UL << 7)
-#define MACHINE_FLAG_MVCOS	(1UL << 8)
-#define MACHINE_FLAG_KVM	(1UL << 9)
+#define MACHINE_FLAG_CSP	(1UL << 2)
+#define MACHINE_FLAG_MVPG	(1UL << 3)
+#define MACHINE_FLAG_DIAG44	(1UL << 4)
+#define MACHINE_FLAG_IDTE	(1UL << 5)
+#define MACHINE_FLAG_DIAG9C	(1UL << 6)
+#define MACHINE_FLAG_MVCOS	(1UL << 7)
+#define MACHINE_FLAG_KVM	(1UL << 8)
+#define MACHINE_FLAG_ESOP	(1UL << 9)
 #define MACHINE_FLAG_EDAT1	(1UL << 10)
 #define MACHINE_FLAG_EDAT2	(1UL << 11)
 #define MACHINE_FLAG_LPAR	(1UL << 12)
@@ -84,6 +85,7 @@ extern unsigned int s390_user_mode;
 #define MACHINE_IS_LPAR		(S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
 
 #define MACHINE_HAS_DIAG9C	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_HAS_ESOP	(S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
 #define MACHINE_HAS_PFMF	MACHINE_HAS_EDAT1
 #define MACHINE_HAS_HPAGE	MACHINE_HAS_EDAT1
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f090e819bf71..2923781590a6 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -147,7 +147,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = KVM_MAX_VCPUS;
 		break;
 	case KVM_CAP_S390_COW:
-		r = sclp_get_fac85() & 0x2;
+		r = MACHINE_HAS_ESOP;
 		break;
 	default:
 		r = 0;
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 9017a63dda3d..a70ee84c0241 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -50,7 +50,7 @@ static __always_inline unsigned long follow_table(struct mm_struct *mm,
 	ptep = pte_offset_map(pmd, addr);
 	if (!pte_present(*ptep))
 		return -0x11UL;
-	if (write && !pte_write(*ptep))
+	if (write && (!pte_write(*ptep) || !pte_dirty(*ptep)))
 		return -0x04UL;
 
 	return (pte_val(*ptep) & PAGE_MASK) + (addr & ~PAGE_MASK);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 29ccee3651f4..d21040ed5e59 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -127,7 +127,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 			pte_val(*pte) = _PAGE_TYPE_EMPTY;
 			continue;
 		}
-		*pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
+		pte_val(*pte) = __pa(address);
 	}
 }
 
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 6ed1426d27c5..79699f46a443 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -85,11 +85,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
 	pte_t *pt_dir;
-	pte_t  pte;
 	int ret = -ENOMEM;
 
 	while (address < end) {
-		pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
 		pg_dir = pgd_offset_k(address);
 		if (pgd_none(*pg_dir)) {
 			pu_dir = vmem_pud_alloc();
@@ -101,9 +99,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
 		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
 		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
-			pte_val(pte) |= _REGION3_ENTRY_LARGE;
-			pte_val(pte) |= _REGION_ENTRY_TYPE_R3;
-			pud_val(*pu_dir) = pte_val(pte);
+			pud_val(*pu_dir) = __pa(address) |
+				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
+				(ro ? _REGION_ENTRY_RO : 0);
 			address += PUD_SIZE;
 			continue;
 		}
@@ -118,8 +116,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
 		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
 		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
-			pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
-			pmd_val(*pm_dir) = pte_val(pte);
+			pmd_val(*pm_dir) = __pa(address) |
+				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
+				(ro ? _SEGMENT_ENTRY_RO : 0);
 			address += PMD_SIZE;
 			continue;
 		}
@@ -132,7 +131,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
 		}
 
 		pt_dir = pte_offset_kernel(pm_dir, address);
-		*pt_dir = pte;
+		pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0);
 		address += PAGE_SIZE;
 	}
 	ret = 0;
@@ -199,7 +198,6 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
 	pud_t *pu_dir;
 	pmd_t *pm_dir;
 	pte_t *pt_dir;
-	pte_t  pte;
 	int ret = -ENOMEM;
 
 	start_addr = (unsigned long) start;
@@ -237,9 +235,8 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
 				new_page = vmemmap_alloc_block(PMD_SIZE, node);
 				if (!new_page)
 					goto out;
-				pte = mk_pte_phys(__pa(new_page), PAGE_RW);
-				pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
-				pmd_val(*pm_dir) = pte_val(pte);
+				pmd_val(*pm_dir) = __pa(new_page) |
+					_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE;
 				address = (address + PMD_SIZE) & PMD_MASK;
 				continue;
 			}
@@ -260,8 +257,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
 			new_page =__pa(vmem_alloc_pages(0));
 			if (!new_page)
 				goto out;
-			pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
-			*pt_dir = pte;
+			pte_val(*pt_dir) = __pa(new_page);
 		}
 		address += PAGE_SIZE;
 	}
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index c44d13f607bc..30a2255389e5 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -56,7 +56,6 @@ static int __initdata early_read_info_sccb_valid;
 
 u64 sclp_facilities;
 static u8 sclp_fac84;
-static u8 sclp_fac85;
 static unsigned long long rzm;
 static unsigned long long rnmax;
 
@@ -131,7 +130,8 @@ void __init sclp_facilities_detect(void)
 	sccb = &early_read_info_sccb;
 	sclp_facilities = sccb->facilities;
 	sclp_fac84 = sccb->fac84;
-	sclp_fac85 = sccb->fac85;
+	if (sccb->fac85 & 0x02)
+		S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
 	rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
 	rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
 	rzm <<= 20;
@@ -171,12 +171,6 @@ unsigned long long sclp_get_rzm(void)
 	return rzm;
 }
 
-u8 sclp_get_fac85(void)
-{
-	return sclp_fac85;
-}
-EXPORT_SYMBOL_GPL(sclp_get_fac85);
-
 /*
  * This function will be called after sclp_facilities_detect(), which gets
  * called from early.c code. Therefore the sccb should have valid contents.
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 5cf680a98f9b..bfd87685fc1f 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -197,16 +197,6 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
-#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
-#define page_test_and_clear_dirty(pfn, mapped)	(0)
-#endif
-
-#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY
-#define pte_maybe_dirty(pte)		pte_dirty(pte)
-#else
-#define pte_maybe_dirty(pte)		(1)
-#endif
-
 #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
 #define page_test_and_clear_young(pfn) (0)
 #endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 70473da47b3f..6d53675c2b54 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -303,21 +303,13 @@ static inline void __SetPageUptodate(struct page *page)
 
 static inline void SetPageUptodate(struct page *page)
 {
-#ifdef CONFIG_S390
-	if (!test_and_set_bit(PG_uptodate, &page->flags))
-		page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, 0);
-#else
 	/*
 	 * Memory barrier must be issued before setting the PG_uptodate bit,
 	 * so that all previous stores issued in order to bring the page
 	 * uptodate are actually visible before PageUptodate becomes true.
-	 *
-	 * s390 doesn't need an explicit smp_wmb here because the test and
-	 * set bit already provides full barriers.
 	 */
 	smp_wmb();
 	set_bit(PG_uptodate, &(page)->flags);
-#endif
 }
 
 CLEARPAGEFLAG(Uptodate, uptodate)
diff --git a/mm/rmap.c b/mm/rmap.c
index 2c78f8cadc95..3d38edffda41 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1126,7 +1126,6 @@ void page_add_file_rmap(struct page *page)
  */
 void page_remove_rmap(struct page *page)
 {
-	struct address_space *mapping = page_mapping(page);
 	bool anon = PageAnon(page);
 	bool locked;
 	unsigned long flags;
@@ -1143,29 +1142,6 @@ void page_remove_rmap(struct page *page)
 	if (!atomic_add_negative(-1, &page->_mapcount))
 		goto out;
 
-	/*
-	 * Now that the last pte has gone, s390 must transfer dirty
-	 * flag from storage key to struct page.  We can usually skip
-	 * this if the page is anon, so about to be freed; but perhaps
-	 * not if it's in swapcache - there might be another pte slot
-	 * containing the swap entry, but page not yet written to swap.
-	 *
-	 * And we can skip it on file pages, so long as the filesystem
-	 * participates in dirty tracking (note that this is not only an
-	 * optimization but also solves problems caused by dirty flag in
-	 * storage key getting set by a write from inside kernel); but need to
-	 * catch shm and tmpfs and ramfs pages which have been modified since
-	 * creation by read fault.
-	 *
-	 * Note that mapping must be decided above, before decrementing
-	 * mapcount (which luckily provides a barrier): once page is unmapped,
-	 * it could be truncated and page->mapping reset to NULL at any moment.
-	 * Note also that we are relying on page_mapping(page) to set mapping
-	 * to &swapper_space when PageSwapCache(page).
-	 */
-	if (mapping && !mapping_cap_account_dirty(mapping) &&
-	    page_test_and_clear_dirty(page_to_pfn(page), 1))
-		set_page_dirty(page);
 	/*
 	 * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
 	 * and not charged by memcg for now.
-- 
cgit v1.2.3


From 878ec67b3ac528a2b6d44055f049cdbb9cfda30c Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 14 Feb 2013 17:39:08 +0100
Subject: regmap: mmio: add register clock support

Some mmio devices have a dedicated interface clock that needs
to be enabled to access their registers. This patch optionally
enables a clock before accessing registers in the regmap_bus
callbacks.

I added (devm_)regmap_init_mmio_clk variants of the init
functions that have an added clk_id string parameter. This
is passed to clk_get to request the clock from the clk
framework.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/base/regmap/regmap-mmio.c | 79 ++++++++++++++++++++++++++++++++-------
 include/linux/regmap.h            | 47 ++++++++++++++++++++---
 2 files changed, 107 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index f05fc74dd84a..98745dd77e8c 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -16,6 +16,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -26,6 +27,7 @@
 struct regmap_mmio_context {
 	void __iomem *regs;
 	unsigned val_bytes;
+	struct clk *clk;
 };
 
 static int regmap_mmio_gather_write(void *context,
@@ -34,9 +36,16 @@ static int regmap_mmio_gather_write(void *context,
 {
 	struct regmap_mmio_context *ctx = context;
 	u32 offset;
+	int ret;
 
 	BUG_ON(reg_size != 4);
 
+	if (ctx->clk) {
+		ret = clk_enable(ctx->clk);
+		if (ret < 0)
+			return ret;
+	}
+
 	offset = *(u32 *)reg;
 
 	while (val_size) {
@@ -64,6 +73,9 @@ static int regmap_mmio_gather_write(void *context,
 		offset += ctx->val_bytes;
 	}
 
+	if (ctx->clk)
+		clk_disable(ctx->clk);
+
 	return 0;
 }
 
@@ -80,9 +92,16 @@ static int regmap_mmio_read(void *context,
 {
 	struct regmap_mmio_context *ctx = context;
 	u32 offset;
+	int ret;
 
 	BUG_ON(reg_size != 4);
 
+	if (ctx->clk) {
+		ret = clk_enable(ctx->clk);
+		if (ret < 0)
+			return ret;
+	}
+
 	offset = *(u32 *)reg;
 
 	while (val_size) {
@@ -110,11 +129,20 @@ static int regmap_mmio_read(void *context,
 		offset += ctx->val_bytes;
 	}
 
+	if (ctx->clk)
+		clk_disable(ctx->clk);
+
 	return 0;
 }
 
 static void regmap_mmio_free_context(void *context)
 {
+	struct regmap_mmio_context *ctx = context;
+
+	if (ctx->clk) {
+		clk_unprepare(ctx->clk);
+		clk_put(ctx->clk);
+	}
 	kfree(context);
 }
 
@@ -128,11 +156,14 @@ static struct regmap_bus regmap_mmio = {
 	.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
 };
 
-static struct regmap_mmio_context *regmap_mmio_gen_context(void __iomem *regs,
+static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
+					const char *clk_id,
+					void __iomem *regs,
 					const struct regmap_config *config)
 {
 	struct regmap_mmio_context *ctx;
 	int min_stride;
+	int ret;
 
 	if (config->reg_bits != 32)
 		return ERR_PTR(-EINVAL);
@@ -179,37 +210,59 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(void __iomem *regs,
 	ctx->regs = regs;
 	ctx->val_bytes = config->val_bits / 8;
 
+	if (clk_id == NULL)
+		return ctx;
+
+	ctx->clk = clk_get(dev, clk_id);
+	if (IS_ERR(ctx->clk)) {
+		ret = PTR_ERR(ctx->clk);
+		goto err_free;
+	}
+
+	ret = clk_prepare(ctx->clk);
+	if (ret < 0) {
+		clk_put(ctx->clk);
+		goto err_free;
+	}
+
 	return ctx;
+
+err_free:
+	kfree(ctx);
+
+	return ERR_PTR(ret);
 }
 
 /**
- * regmap_init_mmio(): Initialise register map
+ * regmap_init_mmio_clk(): Initialise register map with register clock
  *
  * @dev: Device that will be interacted with
+ * @clk_id: register clock consumer ID
  * @regs: Pointer to memory-mapped IO region
  * @config: Configuration for register map
  *
  * The return value will be an ERR_PTR() on error or a valid pointer to
  * a struct regmap.
  */
-struct regmap *regmap_init_mmio(struct device *dev,
-				void __iomem *regs,
-				const struct regmap_config *config)
+struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+				    void __iomem *regs,
+				    const struct regmap_config *config)
 {
 	struct regmap_mmio_context *ctx;
 
-	ctx = regmap_mmio_gen_context(regs, config);
+	ctx = regmap_mmio_gen_context(dev, clk_id, regs, config);
 	if (IS_ERR(ctx))
 		return ERR_CAST(ctx);
 
 	return regmap_init(dev, &regmap_mmio, ctx, config);
 }
-EXPORT_SYMBOL_GPL(regmap_init_mmio);
+EXPORT_SYMBOL_GPL(regmap_init_mmio_clk);
 
 /**
- * devm_regmap_init_mmio(): Initialise managed register map
+ * devm_regmap_init_mmio_clk(): Initialise managed register map with clock
  *
  * @dev: Device that will be interacted with
+ * @clk_id: register clock consumer ID
  * @regs: Pointer to memory-mapped IO region
  * @config: Configuration for register map
  *
@@ -217,18 +270,18 @@ EXPORT_SYMBOL_GPL(regmap_init_mmio);
  * to a struct regmap.  The regmap will be automatically freed by the
  * device management code.
  */
-struct regmap *devm_regmap_init_mmio(struct device *dev,
-				     void __iomem *regs,
-				     const struct regmap_config *config)
+struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+					 void __iomem *regs,
+					 const struct regmap_config *config)
 {
 	struct regmap_mmio_context *ctx;
 
-	ctx = regmap_mmio_gen_context(regs, config);
+	ctx = regmap_mmio_gen_context(dev, clk_id, regs, config);
 	if (IS_ERR(ctx))
 		return ERR_CAST(ctx);
 
 	return devm_regmap_init(dev, &regmap_mmio, ctx, config);
 }
-EXPORT_SYMBOL_GPL(devm_regmap_init_mmio);
+EXPORT_SYMBOL_GPL(devm_regmap_init_mmio_clk);
 
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b7e95bf942c9..f0480a3297e4 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -285,9 +285,9 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c,
 			       const struct regmap_config *config);
 struct regmap *regmap_init_spi(struct spi_device *dev,
 			       const struct regmap_config *config);
-struct regmap *regmap_init_mmio(struct device *dev,
-				void __iomem *regs,
-				const struct regmap_config *config);
+struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+				    void __iomem *regs,
+				    const struct regmap_config *config);
 
 struct regmap *devm_regmap_init(struct device *dev,
 				const struct regmap_bus *bus,
@@ -297,9 +297,44 @@ struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c,
 				    const struct regmap_config *config);
 struct regmap *devm_regmap_init_spi(struct spi_device *dev,
 				    const struct regmap_config *config);
-struct regmap *devm_regmap_init_mmio(struct device *dev,
-				     void __iomem *regs,
-				     const struct regmap_config *config);
+struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
+					 void __iomem *regs,
+					 const struct regmap_config *config);
+
+/**
+ * regmap_init_mmio(): Initialise register map
+ *
+ * @dev: Device that will be interacted with
+ * @regs: Pointer to memory-mapped IO region
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+static inline struct regmap *regmap_init_mmio(struct device *dev,
+					void __iomem *regs,
+					const struct regmap_config *config)
+{
+	return regmap_init_mmio_clk(dev, NULL, regs, config);
+}
+
+/**
+ * devm_regmap_init_mmio(): Initialise managed register map
+ *
+ * @dev: Device that will be interacted with
+ * @regs: Pointer to memory-mapped IO region
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+static inline struct regmap *devm_regmap_init_mmio(struct device *dev,
+					void __iomem *regs,
+					const struct regmap_config *config)
+{
+	return devm_regmap_init_mmio_clk(dev, NULL, regs, config);
+}
 
 void regmap_exit(struct regmap *map);
 int regmap_reinit_cache(struct regmap *map,
-- 
cgit v1.2.3


From 56cc6cb707fedd897d391f6e50e3b56d62d6683f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Feb 2013 23:26:56 +0100
Subject: ARM defconfigs: add missing inclusions of linux/platform_device.h

Patch 16559ae "kgdb: remove #include <linux/serial_8250.h> from kgdb.h"
removed an implicit inclusion of linux/platform_device.h
In a number of places. This adds back explicit inclusions in a few
more places I found.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/plat-samsung/include/plat/adc.h | 1 +
 drivers/video/clps711xfb.c               | 1 +
 include/linux/platform_data/s3c-hsotg.h  | 2 ++
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/plat-samsung/include/plat/adc.h b/arch/arm/plat-samsung/include/plat/adc.h
index b258a08de591..2fc89315553f 100644
--- a/arch/arm/plat-samsung/include/plat/adc.h
+++ b/arch/arm/plat-samsung/include/plat/adc.h
@@ -15,6 +15,7 @@
 #define __ASM_PLAT_ADC_H __FILE__
 
 struct s3c_adc_client;
+struct platform_device;
 
 extern int s3c_adc_start(struct s3c_adc_client *client,
 			 unsigned int channel, unsigned int nr_samples);
diff --git a/drivers/video/clps711xfb.c b/drivers/video/clps711xfb.c
index 5a7af0deced2..f00980607b8f 100644
--- a/drivers/video/clps711xfb.c
+++ b/drivers/video/clps711xfb.c
@@ -26,6 +26,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/platform_device.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
diff --git a/include/linux/platform_data/s3c-hsotg.h b/include/linux/platform_data/s3c-hsotg.h
index 8b79e0967f9c..3f1cbf95ec3b 100644
--- a/include/linux/platform_data/s3c-hsotg.h
+++ b/include/linux/platform_data/s3c-hsotg.h
@@ -15,6 +15,8 @@
 #ifndef __LINUX_USB_S3C_HSOTG_H
 #define __LINUX_USB_S3C_HSOTG_H
 
+struct platform_device;
+
 enum s3c_hsotg_dmamode {
 	S3C_HSOTG_DMA_NONE,	/* do not use DMA at-all */
 	S3C_HSOTG_DMA_ONLY,	/* always use DMA */
-- 
cgit v1.2.3


From 7bf9b9a0f0372d45b581f00173505fb76a9c5d23 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Dec 2012 18:45:41 +0100
Subject: wireless: define operating mode action frame

Define the action frame format, the VHT category
and its action types and the field format and EID
for operating mode notifications. The frame may
be used outside of VHT context as well, so don't
include "VHT" in the names.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7e8a498efe6d..67c1a6c45837 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -714,6 +714,30 @@ enum ieee80211_ht_chanwidth_values {
 	IEEE80211_HT_CHANWIDTH_ANY = 1,
 };
 
+/**
+ * enum ieee80211_opmode_bits - VHT operating mode field bits
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: 80 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: 160 MHz or 80+80 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_RX_NSS_MASK: number of spatial streams mask
+ *	(the NSS value is the value of this field + 1)
+ * @IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT: number of spatial streams shift
+ * @IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF: indicates streams in SU-MIMO PPDU
+ *	using a beamforming steering matrix
+ */
+enum ieee80211_vht_opmode_bits {
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK	= 3,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ	= 0,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ	= 1,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ	= 2,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ	= 3,
+	IEEE80211_OPMODE_NOTIF_RX_NSS_MASK	= 0x70,
+	IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT	= 4,
+	IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF	= 0x80,
+};
+
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 
 struct ieee80211_mgmt {
@@ -844,6 +868,10 @@ struct ieee80211_mgmt {
 					__le16 capability;
 					u8 variable[0];
 				} __packed tdls_discover_resp;
+				struct {
+					u8 action_code;
+					u8 operating_mode;
+				} __packed vht_opmode_notif;
 			} u;
 		} __packed action;
 	} u;
@@ -1598,6 +1626,7 @@ enum ieee80211_eid {
 
 	WLAN_EID_VHT_CAPABILITY = 191,
 	WLAN_EID_VHT_OPERATION = 192,
+	WLAN_EID_OPMODE_NOTIF = 199,
 
 	/* 802.11ad */
 	WLAN_EID_NON_TX_BSSID_CAP =  83,
@@ -1652,6 +1681,7 @@ enum ieee80211_category {
 	WLAN_CATEGORY_WMM = 17,
 	WLAN_CATEGORY_FST = 18,
 	WLAN_CATEGORY_UNPROT_DMG = 20,
+	WLAN_CATEGORY_VHT = 21,
 	WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
 	WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
 };
@@ -1677,6 +1707,13 @@ enum ieee80211_ht_actioncode {
 	WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7,
 };
 
+/* VHT action codes */
+enum ieee80211_vht_actioncode {
+	WLAN_VHT_ACTION_COMPRESSED_BF = 0,
+	WLAN_VHT_ACTION_GROUPID_MGMT = 1,
+	WLAN_VHT_ACTION_OPMODE_NOTIF = 2,
+};
+
 /* Self Protected Action codes */
 enum ieee80211_self_protected_actioncode {
 	WLAN_SP_RESERVED = 0,
-- 
cgit v1.2.3


From 0af83d3df5863224336a18c24a14fda542b712f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Dec 2012 18:55:36 +0100
Subject: mac80211: handle VHT operating mode notification

Handle the operating mode notification action frame.
When the supported streams or the bandwidth change
let the driver and rate control algorithm know.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  1 +
 include/net/mac80211.h     |  3 ++
 net/mac80211/ht.c          |  4 ++
 net/mac80211/ieee80211_i.h |  3 ++
 net/mac80211/rx.c          | 30 +++++++++++++++
 net/mac80211/sta_info.h    |  4 ++
 net/mac80211/vht.c         | 93 +++++++++++++++++++++++++++++++++++++++++-----
 7 files changed, 128 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 67c1a6c45837..12b5996533ec 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1301,6 +1301,7 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454			0x00000002
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ		0x00000004
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ	0x00000008
+#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK			0x0000000C
 #define IEEE80211_VHT_CAP_RXLDPC				0x00000010
 #define IEEE80211_VHT_CAP_SHORT_GI_80				0x00000020
 #define IEEE80211_VHT_CAP_SHORT_GI_160				0x00000040
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a608ab9879b4..b7fb311e83f4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2118,11 +2118,14 @@ enum ieee80211_frame_release_type {
  * @IEEE80211_RC_SUPP_RATES_CHANGED: The supported rate set of this peer
  *	changed (in IBSS mode) due to discovering more information about
  *	the peer.
+ * @IEEE80211_RC_NSS_CHANGED: N_SS (number of spatial streams) was changed
+ *	by the peer
  */
 enum ieee80211_rate_control_changed {
 	IEEE80211_RC_BW_CHANGED		= BIT(0),
 	IEEE80211_RC_SMPS_CHANGED	= BIT(1),
 	IEEE80211_RC_SUPP_RATES_CHANGED	= BIT(2),
+	IEEE80211_RC_NSS_CHANGED	= BIT(3),
 };
 
 /**
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index a64b4f0d373f..797969bc26e1 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -212,6 +212,10 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 		changed = true;
 	sta->sta.bandwidth = bw;
 
+	sta->cur_max_bandwidth =
+		ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+
 	return changed;
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3b13af4e6c49..4947c91c6c86 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1433,6 +1433,9 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 					 struct sta_info *sta);
 enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
 void ieee80211_sta_set_rx_nss(struct sta_info *sta);
+void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+				 struct sta_info *sta, u8 opmode,
+				 enum ieee80211_band band);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8a861a50b12f..1617e0bd4ca6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2435,6 +2435,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto invalid;
 		}
 
+		break;
+	case WLAN_CATEGORY_VHT:
+		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+		    sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
+		    sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+		    sdata->vif.type != NL80211_IFTYPE_AP &&
+		    sdata->vif.type != NL80211_IFTYPE_ADHOC)
+			break;
+
+		/* verify action code is present */
+		if (len < IEEE80211_MIN_ACTION_SIZE + 1)
+			goto invalid;
+
+		switch (mgmt->u.action.u.vht_opmode_notif.action_code) {
+		case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+			u8 opmode;
+
+			/* verify opmode is present */
+			if (len < IEEE80211_MIN_ACTION_SIZE + 2)
+				goto invalid;
+
+			opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+			ieee80211_vht_handle_opmode(rx->sdata, rx->sta,
+						    opmode, status->band);
+			goto handled;
+		}
+		default:
+			break;
+		}
 		break;
 	case WLAN_CATEGORY_BACK:
 		if (sdata->vif.type != NL80211_IFTYPE_STATION &&
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 03c42f8d39f0..63dfdb5e91da 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -297,6 +297,8 @@ struct sta_ampdu_mlme {
  * @sta_state: duplicates information about station state (for debug)
  * @beacon_loss_count: number of times beacon loss has triggered
  * @rcu_head: RCU head used for freeing this station struct
+ * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
+ *	taken from HT/VHT capabilities or VHT operating mode notification
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -398,6 +400,8 @@ struct sta_info {
 	} debugfs;
 #endif
 
+	enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
+
 	unsigned int lost_packets;
 	unsigned int beacon_loss_count;
 
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 67436e3efbbd..0951f74e7ff5 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -10,6 +10,7 @@
 #include <linux/export.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
+#include "rate.h"
 
 
 void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
@@ -39,6 +40,15 @@ void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
 	memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,
 	       sizeof(struct ieee80211_vht_mcs_info));
 
+	switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
+	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
+	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+		break;
+	default:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+	}
+
 	sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
 }
 
@@ -46,12 +56,13 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	u32 cap = sta->sta.vht_cap.cap;
+	enum ieee80211_sta_rx_bandwidth bw;
 
-	if (!sta->sta.vht_cap.vht_supported)
-		return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+	if (!sta->sta.vht_cap.vht_supported) {
+		bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
 				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
-
-	/* TODO: handle VHT opmode notification data */
+		goto check_max;
+	}
 
 	switch (sdata->vif.bss_conf.chandef.width) {
 	default:
@@ -60,19 +71,31 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
 	case NL80211_CHAN_WIDTH_20_NOHT:
 	case NL80211_CHAN_WIDTH_20:
 	case NL80211_CHAN_WIDTH_40:
-		return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+		bw = sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
 				IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
+		break;
 	case NL80211_CHAN_WIDTH_160:
-		if (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)
-			return IEEE80211_STA_RX_BW_160;
+		if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) ==
+				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) {
+			bw = IEEE80211_STA_RX_BW_160;
+			break;
+		}
 		/* fall through */
 	case NL80211_CHAN_WIDTH_80P80:
-		if (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
-			return IEEE80211_STA_RX_BW_160;
+		if ((cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) ==
+				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) {
+			bw = IEEE80211_STA_RX_BW_160;
+			break;
+		}
 		/* fall through */
 	case NL80211_CHAN_WIDTH_80:
-		return IEEE80211_STA_RX_BW_80;
+		bw = IEEE80211_STA_RX_BW_80;
 	}
+
+ check_max:
+	if (bw > sta->cur_max_bandwidth)
+		bw = sta->cur_max_bandwidth;
+	return bw;
 }
 
 void ieee80211_sta_set_rx_nss(struct sta_info *sta)
@@ -115,3 +138,53 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
 	ht_rx_nss = max(ht_rx_nss, vht_rx_nss);
 	sta->sta.rx_nss = max_t(u8, 1, ht_rx_nss);
 }
+
+void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
+				 struct sta_info *sta, u8 opmode,
+				 enum ieee80211_band band)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_supported_band *sband;
+	enum ieee80211_sta_rx_bandwidth new_bw;
+	u32 changed = 0;
+	u8 nss;
+
+	sband = local->hw.wiphy->bands[band];
+
+	/* ignore - no support for BF yet */
+	if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)
+		return;
+
+	nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
+	nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
+	nss += 1;
+
+	if (sta->sta.rx_nss != nss) {
+		sta->sta.rx_nss = nss;
+		changed |= IEEE80211_RC_NSS_CHANGED;
+	}
+
+	switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+		break;
+	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ:
+		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+		break;
+	}
+
+	new_bw = ieee80211_sta_cur_vht_bw(sta);
+	if (new_bw != sta->sta.bandwidth) {
+		sta->sta.bandwidth = new_bw;
+		changed |= IEEE80211_RC_NSS_CHANGED;
+	}
+
+	if (changed)
+		rate_control_rate_update(local, sband, sta, changed);
+}
-- 
cgit v1.2.3


From 4a3cb702b05868f67c4ee3da3380461c5b90b4ca Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 12 Feb 2013 16:43:19 +0100
Subject: mac80211: constify IE parsing

Make all the parsed IE pointers const, and propagate
the change to all the users etc.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h   |  2 +-
 net/mac80211/ht.c           |  2 +-
 net/mac80211/ieee80211_i.h  | 87 +++++++++++++++++++++++----------------------
 net/mac80211/mesh.h         | 11 +++---
 net/mac80211/mesh_hwmp.c    | 42 ++++++++++++----------
 net/mac80211/mesh_pathtbl.c | 11 +++---
 net/mac80211/mlme.c         | 15 ++++----
 net/mac80211/util.c         |  4 +--
 net/mac80211/vht.c          |  9 ++---
 9 files changed, 96 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 12b5996533ec..e085fcf52b26 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2152,7 +2152,7 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu)
  * @tim_len: length of the TIM IE
  * @aid: the AID to look for
  */
-static inline bool ieee80211_check_tim(struct ieee80211_tim_ie *tim,
+static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 				       u8 tim_len, u16 aid)
 {
 	u8 mask;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 797969bc26e1..b84147ac5b4c 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -94,7 +94,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 
 bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_supported_band *sband,
-				       struct ieee80211_ht_cap *ht_cap_ie,
+				       const struct ieee80211_ht_cap *ht_cap_ie,
 				       struct sta_info *sta)
 {
 	struct ieee80211_sta_ht_cap ht_cap;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d1074442f1b5..d702f7dc321b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1163,41 +1163,41 @@ struct ieee80211_ra_tid {
 
 /* Parsed Information Elements */
 struct ieee802_11_elems {
-	u8 *ie_start;
+	const u8 *ie_start;
 	size_t total_len;
 
 	/* pointers to IEs */
-	u8 *ssid;
-	u8 *supp_rates;
-	u8 *fh_params;
-	u8 *ds_params;
-	u8 *cf_params;
-	struct ieee80211_tim_ie *tim;
-	u8 *ibss_params;
-	u8 *challenge;
-	u8 *rsn;
-	u8 *erp_info;
-	u8 *ext_supp_rates;
-	u8 *wmm_info;
-	u8 *wmm_param;
-	struct ieee80211_ht_cap *ht_cap_elem;
-	struct ieee80211_ht_operation *ht_operation;
-	struct ieee80211_vht_cap *vht_cap_elem;
-	struct ieee80211_vht_operation *vht_operation;
-	struct ieee80211_meshconf_ie *mesh_config;
-	u8 *mesh_id;
-	u8 *peering;
-	__le16 *awake_window;
-	u8 *preq;
-	u8 *prep;
-	u8 *perr;
-	struct ieee80211_rann_ie *rann;
-	struct ieee80211_channel_sw_ie *ch_switch_ie;
-	u8 *country_elem;
-	u8 *pwr_constr_elem;
-	u8 *quiet_elem;	/* first quite element */
-	u8 *timeout_int;
-	u8 *opmode_notif;
+	const u8 *ssid;
+	const u8 *supp_rates;
+	const u8 *fh_params;
+	const u8 *ds_params;
+	const u8 *cf_params;
+	const struct ieee80211_tim_ie *tim;
+	const u8 *ibss_params;
+	const u8 *challenge;
+	const u8 *rsn;
+	const u8 *erp_info;
+	const u8 *ext_supp_rates;
+	const u8 *wmm_info;
+	const u8 *wmm_param;
+	const struct ieee80211_ht_cap *ht_cap_elem;
+	const struct ieee80211_ht_operation *ht_operation;
+	const struct ieee80211_vht_cap *vht_cap_elem;
+	const struct ieee80211_vht_operation *vht_operation;
+	const struct ieee80211_meshconf_ie *mesh_config;
+	const u8 *mesh_id;
+	const u8 *peering;
+	const __le16 *awake_window;
+	const u8 *preq;
+	const u8 *prep;
+	const u8 *perr;
+	const struct ieee80211_rann_ie *rann;
+	const struct ieee80211_channel_sw_ie *ch_switch_ie;
+	const u8 *country_elem;
+	const u8 *pwr_constr_elem;
+	const u8 *quiet_elem;	/* first quite element */
+	const u8 *timeout_int;
+	const u8 *opmode_notif;
 
 	/* length of them, respectively */
 	u8 ssid_len;
@@ -1276,10 +1276,10 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
 int ieee80211_max_network_latency(struct notifier_block *nb,
 				  unsigned long data, void *dummy);
 int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
-void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
-				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss,
-				      u64 timestamp);
+void
+ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
+				 const struct ieee80211_channel_sw_ie *sw_elem,
+				 struct ieee80211_bss *bss, u64 timestamp);
 void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
@@ -1387,7 +1387,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_sta_ht_cap *ht_cap);
 bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_supported_band *sband,
-				       struct ieee80211_ht_cap *ht_cap_ie,
+				       const struct ieee80211_ht_cap *ht_cap_ie,
 				       struct sta_info *sta);
 void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
 			  const u8 *da, u16 tid,
@@ -1428,10 +1428,11 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
 u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs);
 
 /* VHT */
-void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_supported_band *sband,
-					 struct ieee80211_vht_cap *vht_cap_ie,
-					 struct sta_info *sta);
+void
+ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_supported_band *sband,
+				    const struct ieee80211_vht_cap *vht_cap_ie,
+				    struct sta_info *sta);
 enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta);
 void ieee80211_sta_set_rx_nss(struct sta_info *sta);
 void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
@@ -1555,7 +1556,7 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
-			 u8 *extra, size_t extra_len, const u8 *bssid,
+			 const u8 *extra, size_t extra_len, const u8 *bssid,
 			 const u8 *da, const u8 *key, u8 key_len, u8 key_idx,
 			 u32 tx_flags);
 void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
@@ -1606,7 +1607,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
 
 /* channel management */
 void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
-				  struct ieee80211_ht_operation *ht_oper,
+				  const struct ieee80211_ht_operation *ht_oper,
 				  struct cfg80211_chan_def *chandef);
 
 int __must_check
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7ad035f0cacc..a1bad310f2e9 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -265,8 +265,8 @@ int mesh_nexthop_lookup(struct sk_buff *skb,
 int mesh_nexthop_resolve(struct sk_buff *skb,
 			 struct ieee80211_sub_if_data *sdata);
 void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata);
-struct mesh_path *mesh_path_lookup(u8 *dst,
-		struct ieee80211_sub_if_data *sdata);
+struct mesh_path *mesh_path_lookup(const u8 *dst,
+				   struct ieee80211_sub_if_data *sdata);
 struct mesh_path *mpp_path_lookup(u8 *dst,
 				  struct ieee80211_sub_if_data *sdata);
 int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata);
@@ -276,7 +276,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop);
 void mesh_path_expire(struct ieee80211_sub_if_data *sdata);
 void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
 		struct ieee80211_mgmt *mgmt, size_t len);
-int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata);
+int mesh_path_add(const u8 *dst, struct ieee80211_sub_if_data *sdata);
 
 int mesh_path_add_gate(struct mesh_path *mpath);
 int mesh_path_send_to_gates(struct mesh_path *mpath);
@@ -301,8 +301,9 @@ void mesh_sta_cleanup(struct sta_info *sta);
 void mesh_mpath_table_grow(void);
 void mesh_mpp_table_grow(void);
 /* Mesh paths */
-int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode,
-		       const u8 *ra, struct ieee80211_sub_if_data *sdata);
+int mesh_path_error_tx(u8 ttl, const u8 *target, __le32 target_sn,
+		       __le16 target_rcode, const u8 *ra,
+		       struct ieee80211_sub_if_data *sdata);
 void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta);
 void mesh_path_flush_pending(struct mesh_path *mpath);
 void mesh_path_tx_pending(struct mesh_path *mpath);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index f0dd8742ed42..585c1e26cca8 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -30,14 +30,14 @@
 
 static void mesh_queue_preq(struct mesh_path *, u8);
 
-static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
+static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
 		offset += 6;
 	return get_unaligned_le32(preq_elem + offset);
 }
 
-static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae)
+static inline u32 u16_field_get(const u8 *preq_elem, int offset, bool ae)
 {
 	if (ae)
 		offset += 6;
@@ -102,10 +102,13 @@ enum mpath_frame_type {
 static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
 static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
-		u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target,
-		__le32 target_sn, const u8 *da, u8 hop_count, u8 ttl,
-		__le32 lifetime, __le32 metric, __le32 preq_id,
-		struct ieee80211_sub_if_data *sdata)
+				  const u8 *orig_addr, __le32 orig_sn,
+				  u8 target_flags, const u8 *target,
+				  __le32 target_sn, const u8 *da,
+				  u8 hop_count, u8 ttl,
+				  __le32 lifetime, __le32 metric,
+				  __le32 preq_id,
+				  struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
@@ -235,7 +238,7 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata,
  * also acquires in the TX path.  To avoid a deadlock we don't transmit the
  * frame directly but add it to the pending queue instead.
  */
-int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
+int mesh_path_error_tx(u8 ttl, const u8 *target, __le32 target_sn,
 		       __le16 target_rcode, const u8 *ra,
 		       struct ieee80211_sub_if_data *sdata)
 {
@@ -369,14 +372,14 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
  * path routing information is updated.
  */
 static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
-			    struct ieee80211_mgmt *mgmt,
-			    u8 *hwmp_ie, enum mpath_frame_type action)
+			       struct ieee80211_mgmt *mgmt,
+			       const u8 *hwmp_ie, enum mpath_frame_type action)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct mesh_path *mpath;
 	struct sta_info *sta;
 	bool fresh_info;
-	u8 *orig_addr, *ta;
+	const u8 *orig_addr, *ta;
 	u32 orig_sn, orig_metric;
 	unsigned long orig_lifetime, exp_time;
 	u32 last_hop_metric, new_metric;
@@ -511,11 +514,11 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 
 static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
-				    u8 *preq_elem, u32 metric)
+				    const u8 *preq_elem, u32 metric)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath = NULL;
-	u8 *target_addr, *orig_addr;
+	const u8 *target_addr, *orig_addr;
 	const u8 *da;
 	u8 target_flags, ttl, flags;
 	u32 orig_sn, target_sn, lifetime, orig_metric;
@@ -648,11 +651,11 @@ next_hop_deref_protected(struct mesh_path *mpath)
 
 static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_mgmt *mgmt,
-				    u8 *prep_elem, u32 metric)
+				    const u8 *prep_elem, u32 metric)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
-	u8 *target_addr, *orig_addr;
+	const u8 *target_addr, *orig_addr;
 	u8 ttl, hopcount, flags;
 	u8 next_hop[ETH_ALEN];
 	u32 target_sn, orig_sn, lifetime;
@@ -711,12 +714,13 @@ fail:
 }
 
 static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
-			     struct ieee80211_mgmt *mgmt, u8 *perr_elem)
+				    struct ieee80211_mgmt *mgmt,
+				    const u8 *perr_elem)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct mesh_path *mpath;
 	u8 ttl;
-	u8 *ta, *target_addr;
+	const u8 *ta, *target_addr;
 	u32 target_sn;
 	u16 target_rcode;
 
@@ -758,15 +762,15 @@ endperr:
 }
 
 static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
-				struct ieee80211_mgmt *mgmt,
-				struct ieee80211_rann_ie *rann)
+				    struct ieee80211_mgmt *mgmt,
+				    const struct ieee80211_rann_ie *rann)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta;
 	struct mesh_path *mpath;
 	u8 ttl, flags, hopcount;
-	u8 *orig_addr;
+	const u8 *orig_addr;
 	u32 orig_sn, metric, metric_txsta, interval;
 	bool root_is_gate;
 
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index d5786c3eaee2..2ce4c4023a97 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -181,7 +181,7 @@ errcopy:
 	return -ENOMEM;
 }
 
-static u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata,
+static u32 mesh_table_hash(const u8 *addr, struct ieee80211_sub_if_data *sdata,
 			   struct mesh_table *tbl)
 {
 	/* Use last four bytes of hw addr and interface index as hash index */
@@ -326,8 +326,8 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath,
 }
 
 
-static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst,
-					  struct ieee80211_sub_if_data *sdata)
+static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst,
+				      struct ieee80211_sub_if_data *sdata)
 {
 	struct mesh_path *mpath;
 	struct hlist_node *n;
@@ -359,7 +359,8 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, u8 *dst,
  *
  * Locking: must be called within a read rcu section.
  */
-struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata)
+struct mesh_path *mesh_path_lookup(const u8 *dst,
+				   struct ieee80211_sub_if_data *sdata)
 {
 	return mpath_lookup(rcu_dereference(mesh_paths), dst, sdata);
 }
@@ -494,7 +495,7 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata)
  *
  * State: the initial state of the new path is set to 0
  */
-int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
+int mesh_path_add(const u8 *dst, struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 05b229e3b226..7a8cd789e487 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1041,10 +1041,10 @@ static void ieee80211_chswitch_timer(unsigned long data)
 	ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
 }
 
-void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
-				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss,
-				      u64 timestamp)
+void
+ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
+				 const struct ieee80211_channel_sw_ie *sw_elem,
+				 struct ieee80211_bss *bss, u64 timestamp)
 {
 	struct cfg80211_bss *cbss =
 		container_of((void *)bss, struct cfg80211_bss, priv);
@@ -1479,13 +1479,14 @@ void ieee80211_dfs_cac_timer_work(struct work_struct *work)
 /* MLME */
 static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				     struct ieee80211_sub_if_data *sdata,
-				     u8 *wmm_param, size_t wmm_param_len)
+				     const u8 *wmm_param, size_t wmm_param_len)
 {
 	struct ieee80211_tx_queue_params params;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	size_t left;
 	int count;
-	u8 *pos, uapsd_queues = 0;
+	const u8 *pos;
+	u8 uapsd_queues = 0;
 
 	if (!local->ops->conf_tx)
 		return false;
@@ -2670,7 +2671,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period;
 
 		if (elems->tim && !elems->parse_error) {
-			struct ieee80211_tim_ie *tim_ie = elems->tim;
+			const struct ieee80211_tim_ie *tim_ie = elems->tim;
 			sdata->u.mgd.dtim_period = tim_ie->dtim_period;
 		}
 	}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e24ff38606a9..0f38f43ac62e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1035,7 +1035,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg, u16 status,
-			 u8 *extra, size_t extra_len, const u8 *da,
+			 const u8 *extra, size_t extra_len, const u8 *da,
 			 const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx,
 			 u32 tx_flags)
 {
@@ -1947,7 +1947,7 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 }
 
 void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
-				  struct ieee80211_ht_operation *ht_oper,
+				  const struct ieee80211_ht_operation *ht_oper,
 				  struct cfg80211_chan_def *chandef)
 {
 	enum nl80211_channel_type channel_type;
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index a9549fcc5a04..a2c2258bc84e 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -13,10 +13,11 @@
 #include "rate.h"
 
 
-void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
-					 struct ieee80211_supported_band *sband,
-					 struct ieee80211_vht_cap *vht_cap_ie,
-					 struct sta_info *sta)
+void
+ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
+				    struct ieee80211_supported_band *sband,
+				    const struct ieee80211_vht_cap *vht_cap_ie,
+				    struct sta_info *sta)
 {
 	struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
 
-- 
cgit v1.2.3


From c6f9d6c3bdeb337809d667ef2a41597229a1ce57 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 11 Feb 2013 14:27:08 +0100
Subject: mac80211: advertise operating mode notification capability

Use the new extended capabilities advertising to advertise
the fact that operating mode notification is supported.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 ++
 net/mac80211/main.c       | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e085fcf52b26..7e24fe0cfbcd 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1776,6 +1776,8 @@ enum ieee80211_tdls_actioncode {
 #define WLAN_EXT_CAPA5_TDLS_ENABLED	BIT(5)
 #define WLAN_EXT_CAPA5_TDLS_PROHIBITED	BIT(6)
 
+#define WLAN_EXT_CAPA8_OPMODE_NOTIF	BIT(6)
+
 /* TDLS specific payload type in the LLC/SNAP header */
 #define WLAN_TDLS_SNAP_RFTYPE	0x2
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 9cdbc774cfd7..035344bc6b9c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -501,6 +501,11 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
 	},
 };
 
+static const u8 extended_capabilities[] = {
+	0, 0, 0, 0, 0, 0, 0,
+	WLAN_EXT_CAPA8_OPMODE_NOTIF,
+};
+
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -557,6 +562,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 			WIPHY_FLAG_REPORTS_OBSS |
 			WIPHY_FLAG_OFFCHAN_TX;
 
+	wiphy->extended_capabilities = extended_capabilities;
+	wiphy->extended_capabilities_mask = extended_capabilities;
+	wiphy->extended_capabilities_len = ARRAY_SIZE(extended_capabilities);
+
 	if (ops->remain_on_channel)
 		wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
 
-- 
cgit v1.2.3


From ea27c396174d5a4576853cbe7aeabeb9f7cba6e1 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Tue, 12 Feb 2013 01:10:57 +0800
Subject: pinctrl: generic: rename input schmitt disable

Rename PIN_CONFIG_INPUT_SCHMITT_DISABLE to
PIN_CONFIG_INPUT_SCHMITT_ENABLE. It's used to make it more generialize.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf-generic.c       | 2 +-
 include/linux/pinctrl/pinconf-generic.h | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c
index e5948f8172af..06c304ac6f7d 100644
--- a/drivers/pinctrl/pinconf-generic.c
+++ b/drivers/pinctrl/pinconf-generic.c
@@ -41,7 +41,7 @@ struct pin_config_item conf_items[] = {
 	PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL),
 	PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL),
-	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_DISABLE, "input schmitt disabled", NULL),
+	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT_ENABLE, "input schmitt enabled", NULL),
 	PCONFDUMP(PIN_CONFIG_INPUT_SCHMITT, "input schmitt trigger", NULL),
 	PCONFDUMP(PIN_CONFIG_INPUT_DEBOUNCE, "input debounce", "time units"),
 	PCONFDUMP(PIN_CONFIG_POWER_SOURCE, "pin power source", "selector"),
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 3e7909aa5c03..72474e18f1e0 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -48,7 +48,9 @@
  *	argument is ignored.
  * @PIN_CONFIG_DRIVE_STRENGTH: the pin will output the current passed as
  * 	argument. The argument is in mA.
- * @PIN_CONFIG_INPUT_SCHMITT_DISABLE: disable schmitt-trigger mode on the pin.
+ * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin.
+ *      If the argument != 0, schmitt-trigger mode is enabled. If it's 0,
+ *      schmitt-trigger mode is disabled.
  * @PIN_CONFIG_INPUT_SCHMITT: this will configure an input pin to run in
  *	schmitt-trigger mode. If the schmitt-trigger has adjustable hysteresis,
  *	the threshold value is given on a custom format as argument when
@@ -82,7 +84,7 @@ enum pin_config_param {
 	PIN_CONFIG_DRIVE_OPEN_DRAIN,
 	PIN_CONFIG_DRIVE_OPEN_SOURCE,
 	PIN_CONFIG_DRIVE_STRENGTH,
-	PIN_CONFIG_INPUT_SCHMITT_DISABLE,
+	PIN_CONFIG_INPUT_SCHMITT_ENABLE,
 	PIN_CONFIG_INPUT_SCHMITT,
 	PIN_CONFIG_INPUT_DEBOUNCE,
 	PIN_CONFIG_POWER_SOURCE,
-- 
cgit v1.2.3


From 73fb847a44224d5708550e4be7baba9da75e00af Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 4 Feb 2013 14:02:45 +0300
Subject: SUNRPC: introduce cache_detail->cache_request callback

This callback will allow to simplify upcalls in further patches in this
series.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/dns_resolve.c              | 3 ++-
 fs/nfsd/export.c                  | 6 ++++--
 fs/nfsd/nfs4idmap.c               | 6 ++++--
 include/linux/sunrpc/cache.h      | 4 ++++
 net/sunrpc/auth_gss/svcauth_gss.c | 3 ++-
 net/sunrpc/svcauth_unix.c         | 6 ++++--
 6 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 9cbc98a4159d..55bc5d11457c 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -144,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd,
 
 	ret = nfs_cache_upcall(cd, key->hostname);
 	if (ret)
-		ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request);
+		ret = sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request);
 	return ret;
 }
 
@@ -359,6 +359,7 @@ static struct cache_detail nfs_dns_resolve_template = {
 	.name		= "dns_resolve",
 	.cache_put	= nfs_dns_ent_put,
 	.cache_upcall	= nfs_dns_upcall,
+	.cache_request	= nfs_dns_request,
 	.cache_parse	= nfs_dns_parse,
 	.cache_show	= nfs_dns_show,
 	.match		= nfs_dns_match,
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8e9df45b3d3c..0e16c7fa6800 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -69,7 +69,7 @@ static void expkey_request(struct cache_detail *cd,
 
 static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
 {
-	return sunrpc_cache_pipe_upcall(cd, h, expkey_request);
+	return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request);
 }
 
 static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new,
@@ -246,6 +246,7 @@ static struct cache_detail svc_expkey_cache_template = {
 	.name		= "nfsd.fh",
 	.cache_put	= expkey_put,
 	.cache_upcall	= expkey_upcall,
+	.cache_request	= expkey_request,
 	.cache_parse	= expkey_parse,
 	.cache_show	= expkey_show,
 	.match		= expkey_match,
@@ -340,7 +341,7 @@ static void svc_export_request(struct cache_detail *cd,
 
 static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
 {
-	return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+	return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request);
 }
 
 static struct svc_export *svc_export_update(struct svc_export *new,
@@ -714,6 +715,7 @@ static struct cache_detail svc_export_cache_template = {
 	.name		= "nfsd.export",
 	.cache_put	= svc_export_put,
 	.cache_upcall	= svc_export_upcall,
+	.cache_request	= svc_export_request,
 	.cache_parse	= svc_export_parse,
 	.cache_show	= svc_export_show,
 	.match		= svc_export_match,
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index a1f10c0a6255..9033dfde1812 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -142,7 +142,7 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 static int
 idtoname_upcall(struct cache_detail *cd, struct cache_head *ch)
 {
-	return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request);
+	return sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request);
 }
 
 static int
@@ -193,6 +193,7 @@ static struct cache_detail idtoname_cache_template = {
 	.name		= "nfs4.idtoname",
 	.cache_put	= ent_put,
 	.cache_upcall	= idtoname_upcall,
+	.cache_request	= idtoname_request,
 	.cache_parse	= idtoname_parse,
 	.cache_show	= idtoname_show,
 	.warn_no_listener = warn_no_idmapd,
@@ -323,7 +324,7 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 static int
 nametoid_upcall(struct cache_detail *cd, struct cache_head *ch)
 {
-	return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request);
+	return sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request);
 }
 
 static int
@@ -366,6 +367,7 @@ static struct cache_detail nametoid_cache_template = {
 	.name		= "nfs4.nametoid",
 	.cache_put	= ent_put,
 	.cache_upcall	= nametoid_upcall,
+	.cache_request	= nametoid_request,
 	.cache_parse	= nametoid_parse,
 	.cache_show	= nametoid_show,
 	.warn_no_listener = warn_no_idmapd,
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 5dc9ee4d616e..4f1c8582053c 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -83,6 +83,10 @@ struct cache_detail {
 	int			(*cache_upcall)(struct cache_detail *,
 						struct cache_head *);
 
+	void			(*cache_request)(struct cache_detail *cd,
+						 struct cache_head *ch,
+						 char **bpp, int *blen);
+
 	int			(*cache_parse)(struct cache_detail *,
 					       char *buf, int len);
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index a5b41e2ac25a..1b0df530b59d 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -184,7 +184,7 @@ static void rsi_request(struct cache_detail *cd,
 
 static int rsi_upcall(struct cache_detail *cd, struct cache_head *h)
 {
-	return sunrpc_cache_pipe_upcall(cd, h, rsi_request);
+	return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request);
 }
 
 
@@ -276,6 +276,7 @@ static struct cache_detail rsi_cache_template = {
 	.name           = "auth.rpcsec.init",
 	.cache_put      = rsi_put,
 	.cache_upcall   = rsi_upcall,
+	.cache_request  = rsi_request,
 	.cache_parse    = rsi_parse,
 	.match		= rsi_match,
 	.init		= rsi_init,
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index ce34c8e5b8eb..18b8742eaa50 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -159,7 +159,7 @@ static void ip_map_request(struct cache_detail *cd,
 
 static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
 {
-	return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
+	return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request);
 }
 
 static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
@@ -472,7 +472,7 @@ static void unix_gid_request(struct cache_detail *cd,
 
 static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h)
 {
-	return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request);
+	return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request);
 }
 
 static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, uid_t uid);
@@ -578,6 +578,7 @@ static struct cache_detail unix_gid_cache_template = {
 	.name		= "auth.unix.gid",
 	.cache_put	= unix_gid_put,
 	.cache_upcall	= unix_gid_upcall,
+	.cache_request	= unix_gid_request,
 	.cache_parse	= unix_gid_parse,
 	.cache_show	= unix_gid_show,
 	.match		= unix_gid_match,
@@ -875,6 +876,7 @@ static struct cache_detail ip_map_cache_template = {
 	.name		= "auth.unix.ip",
 	.cache_put	= ip_map_put,
 	.cache_upcall	= ip_map_upcall,
+	.cache_request	= ip_map_request,
 	.cache_parse	= ip_map_parse,
 	.cache_show	= ip_map_show,
 	.match		= ip_map_match,
-- 
cgit v1.2.3


From 21cd1254d3402a72927ed744e8ac1a7cf532f1ea Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Mon, 4 Feb 2013 14:02:55 +0300
Subject: SUNRPC: remove "cache_request" argument in sunrpc_cache_pipe_upcall()
 function

Passing this pointer is redundant since it's stored on cache_detail structure,
which is also passed to sunrpc_cache_pipe_upcall () function.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/dns_resolve.c         |  2 +-
 include/linux/sunrpc/cache.h |  6 +-----
 net/sunrpc/cache.c           | 10 +++-------
 3 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 55bc5d11457c..945527092295 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -144,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd,
 
 	ret = nfs_cache_upcall(cd, key->hostname);
 	if (ret)
-		ret = sunrpc_cache_pipe_upcall(cd, ch, cd->cache_request);
+		ret = sunrpc_cache_pipe_upcall(cd, ch);
 	return ret;
 }
 
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 4f1c8582053c..303399b1ba59 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -161,11 +161,7 @@ sunrpc_cache_update(struct cache_detail *detail,
 		    struct cache_head *new, struct cache_head *old, int hash);
 
 extern int
-sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
-		void (*cache_request)(struct cache_detail *,
-				      struct cache_head *,
-				      char **,
-				      int *));
+sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h);
 
 
 extern void cache_clean_deferred(void *owner);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 51853d8ed0bc..8ffec5aebeff 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -198,7 +198,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
 {
 	if (cd->cache_upcall)
 		return cd->cache_upcall(cd, h);
-	return sunrpc_cache_pipe_upcall(cd, h, cd->cache_request);
+	return sunrpc_cache_pipe_upcall(cd, h);
 }
 
 static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
@@ -1140,11 +1140,7 @@ static bool cache_listeners_exist(struct cache_detail *detail)
  *
  * Each request is at most one page long.
  */
-int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
-		void (*cache_request)(struct cache_detail *,
-				      struct cache_head *,
-				      char **,
-				      int *))
+int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 {
 
 	char *buf;
@@ -1172,7 +1168,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
 
 	bp = buf; len = PAGE_SIZE;
 
-	cache_request(detail, h, &bp, &len);
+	detail->cache_request(detail, h, &bp, &len);
 
 	if (len < 0) {
 		kfree(buf);
-- 
cgit v1.2.3


From 686855f5d833178e518d79e7912cdb3268a9fa69 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Thu, 14 Feb 2013 18:19:58 +0400
Subject: sched: add wait_for_completion_io[_timeout]

The only difference between wait_for_completion[_timeout]() and
wait_for_completion_io[_timeout]() is that the latter calls
io_schedule_timeout() instead of schedule_timeout() so that the caller
is accounted as waiting for IO, not just sleeping.

These functions can be used for correct iowait time accounting when the
completion struct is actually used for waiting for IO (e.g. completion
of a bio request in the block layer).

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/completion.h |  3 +++
 kernel/sched/core.c        | 57 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 55 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 51494e6b5548..33f0280fd533 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -77,10 +77,13 @@ static inline void init_completion(struct completion *x)
 }
 
 extern void wait_for_completion(struct completion *);
+extern void wait_for_completion_io(struct completion *);
 extern int wait_for_completion_interruptible(struct completion *x);
 extern int wait_for_completion_killable(struct completion *x);
 extern unsigned long wait_for_completion_timeout(struct completion *x,
 						   unsigned long timeout);
+extern unsigned long wait_for_completion_io_timeout(struct completion *x,
+						    unsigned long timeout);
 extern long wait_for_completion_interruptible_timeout(
 	struct completion *x, unsigned long timeout);
 extern long wait_for_completion_killable_timeout(
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..d6fdcdcbb9b1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3267,7 +3267,8 @@ void complete_all(struct completion *x)
 EXPORT_SYMBOL(complete_all);
 
 static inline long __sched
-do_wait_for_common(struct completion *x, long timeout, int state)
+do_wait_for_common(struct completion *x,
+		   long (*action)(long), long timeout, int state)
 {
 	if (!x->done) {
 		DECLARE_WAITQUEUE(wait, current);
@@ -3280,7 +3281,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
 			}
 			__set_current_state(state);
 			spin_unlock_irq(&x->wait.lock);
-			timeout = schedule_timeout(timeout);
+			timeout = action(timeout);
 			spin_lock_irq(&x->wait.lock);
 		} while (!x->done && timeout);
 		__remove_wait_queue(&x->wait, &wait);
@@ -3291,17 +3292,30 @@ do_wait_for_common(struct completion *x, long timeout, int state)
 	return timeout ?: 1;
 }
 
-static long __sched
-wait_for_common(struct completion *x, long timeout, int state)
+static inline long __sched
+__wait_for_common(struct completion *x,
+		  long (*action)(long), long timeout, int state)
 {
 	might_sleep();
 
 	spin_lock_irq(&x->wait.lock);
-	timeout = do_wait_for_common(x, timeout, state);
+	timeout = do_wait_for_common(x, action, timeout, state);
 	spin_unlock_irq(&x->wait.lock);
 	return timeout;
 }
 
+static long __sched
+wait_for_common(struct completion *x, long timeout, int state)
+{
+	return __wait_for_common(x, schedule_timeout, timeout, state);
+}
+
+static long __sched
+wait_for_common_io(struct completion *x, long timeout, int state)
+{
+	return __wait_for_common(x, io_schedule_timeout, timeout, state);
+}
+
 /**
  * wait_for_completion: - waits for completion of a task
  * @x:  holds the state of this particular completion
@@ -3337,6 +3351,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 }
 EXPORT_SYMBOL(wait_for_completion_timeout);
 
+/**
+ * wait_for_completion_io: - waits for completion of a task
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout. The caller is accounted as waiting
+ * for IO.
+ */
+void __sched wait_for_completion_io(struct completion *x)
+{
+	wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io);
+
+/**
+ * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible. The caller is accounted as waiting for IO.
+ *
+ * The return value is 0 if timed out, and positive (at least 1, or number of
+ * jiffies left till timeout) if completed.
+ */
+unsigned long __sched
+wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
+{
+	return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io_timeout);
+
 /**
  * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
  * @x:  holds the state of this particular completion
-- 
cgit v1.2.3


From f25cc71e634edcf8a15bc60a48f2b5f3ec9fbb1d Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Wed, 13 Feb 2013 08:40:16 -0700
Subject: lockd: nlmclnt_reclaim(): avoid stack overflow

Even though nlmclnt_reclaim() is only one call into the stack frame,
928 bytes on the stack seems like a lot. Recode to dynamically
allocate the request structure once from within the reclaimer task,
then pass this pointer into nlmclnt_reclaim() for reuse on
subsequent calls.

smatch analysis:

fs/lockd/clntproc.c:620 nlmclnt_reclaim() warn: 'reqst' puts
 928 bytes on stack

Also remove redundant assignment of 0 after memset.

Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/clntlock.c         | 12 +++++++++++-
 fs/lockd/clntproc.c         |  6 ++----
 include/linux/lockd/lockd.h |  3 ++-
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 4885b53e56bd..6cd673d34fb9 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -220,10 +220,19 @@ reclaimer(void *ptr)
 {
 	struct nlm_host	  *host = (struct nlm_host *) ptr;
 	struct nlm_wait	  *block;
+	struct nlm_rqst   *req;
 	struct file_lock *fl, *next;
 	u32 nsmstate;
 	struct net *net = host->net;
 
+	req = kmalloc(sizeof(*req), GFP_KERNEL);
+	if (!req) {
+		printk(KERN_ERR "lockd: reclaimer unable to alloc memory."
+				" Locks for %s won't be reclaimed!\n",
+				host->h_name);
+		return 0;
+	}
+
 	allow_signal(SIGKILL);
 
 	down_write(&host->h_rwsem);
@@ -253,7 +262,7 @@ restart:
 		 */
 		if (signalled())
 			continue;
-		if (nlmclnt_reclaim(host, fl) != 0)
+		if (nlmclnt_reclaim(host, fl, req) != 0)
 			continue;
 		list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
 		if (host->h_nsmstate != nsmstate) {
@@ -279,5 +288,6 @@ restart:
 	/* Release host handle after use */
 	nlmclnt_release_host(host);
 	lockd_down(net);
+	kfree(req);
 	return 0;
 }
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 54f9e6ce0430..b43114c4332a 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -615,17 +615,15 @@ out_unlock:
  * RECLAIM: Try to reclaim a lock
  */
 int
-nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl)
+nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl,
+		struct nlm_rqst *req)
 {
-	struct nlm_rqst reqst, *req;
 	int		status;
 
-	req = &reqst;
 	memset(req, 0, sizeof(*req));
 	locks_init_lock(&req->a_args.lock.fl);
 	locks_init_lock(&req->a_res.lock.fl);
 	req->a_host  = host;
-	req->a_flags = 0;
 
 	/* Set up the argument struct */
 	nlmclnt_setlockargs(req, fl);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f5a051a79273..a395f1e7998f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -212,7 +212,8 @@ int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 __be32		  nlmclnt_grant(const struct sockaddr *addr,
 				const struct nlm_lock *lock);
 void		  nlmclnt_recovery(struct nlm_host *);
-int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
+int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
+				  struct nlm_rqst *);
 void		  nlmclnt_next_cookie(struct nlm_cookie *);
 
 /*
-- 
cgit v1.2.3


From 14bbd6a565e1bcdc240d44687edb93f721cfdf99 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 14 Feb 2013 09:44:49 +0000
Subject: net: Add skb_unclone() helper function.

This function will be used in next GRE_GSO patch. This patch does
not change any functionality.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Eric Dumazet <edumazet@google.com>
---
 drivers/net/ppp/ppp_generic.c           |  3 +--
 include/linux/skbuff.h                  | 10 ++++++++++
 net/ipv4/ah4.c                          |  3 +--
 net/ipv4/ip_fragment.c                  |  2 +-
 net/ipv4/tcp_output.c                   |  2 +-
 net/ipv4/xfrm4_input.c                  |  2 +-
 net/ipv4/xfrm4_mode_tunnel.c            |  3 +--
 net/ipv6/ah6.c                          |  3 +--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/reassembly.c                   |  2 +-
 net/ipv6/xfrm6_mode_tunnel.c            |  3 +--
 net/sched/act_ipt.c                     |  6 ++----
 net/sched/act_pedit.c                   |  3 +--
 13 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 0b2706abe3e3..4fd754e74eb2 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1805,8 +1805,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 		/* the filter instructions are constructed assuming
 		   a four-byte PPP header on each packet */
 		if (ppp->pass_filter || ppp->active_filter) {
-			if (skb_cloned(skb) &&
-			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			if (skb_unclone(skb, GFP_ATOMIC))
 				goto err;
 
 			*skb_push(skb, 2) = 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9da99520ccd5..ca6ee7d93edb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -804,6 +804,16 @@ static inline int skb_cloned(const struct sk_buff *skb)
 	       (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1;
 }
 
+static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
+{
+	might_sleep_if(pri & __GFP_WAIT);
+
+	if (skb_cloned(skb))
+		return pskb_expand_head(skb, 0, 0, pri);
+
+	return 0;
+}
+
 /**
  *	skb_header_cloned - is the header a clone
  *	@skb: buffer to check
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a69b4e4a02b5..2e7f1948216f 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -321,8 +321,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 1211613c6c34..b6d30acb600c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -590,7 +590,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_nomem;
 
 	/* If the first fragment is fragmented itself, we split
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6182d90e97b0..fd0cea114b5d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1331,7 +1331,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 /* Remove acked data from a packet in the transmit queue. */
 int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 {
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return -ENOMEM;
 
 	__pskb_trim_head(skb, len);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 06814b6216dc..1f12c8b45864 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -132,7 +132,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * header and optional ESP marker bytes) and then modify the
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
-	if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto drop;
 
 	/* Now we can update and verify the packet length... */
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ddee0a099a2c..1162ace30838 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -142,8 +142,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	for_each_input_rcu(rcv_notify_handlers, handler)
 		handler->handler(skb);
 
-	if (skb_cloned(skb) &&
-	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	if (err = skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 384233188ac1..bb02e176cb70 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -521,8 +521,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* We are going to _remove_ AH header to keep sockets happy,
 	 * so... Later this can change. */
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c674f158efa8..b89a8c3186cd 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -368,7 +368,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 	}
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
+	if (skb_unclone(head, GFP_ATOMIC)) {
 		pr_debug("skb is cloned but can't expand head");
 		goto out_oom;
 	}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index bab2c270f292..e354743ed426 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -404,7 +404,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
-	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_oom;
 
 	/* If the first fragment is fragmented itself, we split
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9f2095b19ad0..93c41a81c4c3 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -69,8 +69,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
 
-	if (skb_cloned(skb) &&
-	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+	if (err = skb_unclone(skb, GFP_ATOMIC))
 		goto out;
 
 	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0fb9e3f567e6..e0f6de64afec 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -207,10 +207,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	struct tcf_ipt *ipt = a->priv;
 	struct xt_action_param par;
 
-	if (skb_cloned(skb)) {
-		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-			return TC_ACT_UNSPEC;
-	}
+	if (skb_unclone(skb, GFP_ATOMIC))
+		return TC_ACT_UNSPEC;
 
 	spin_lock(&ipt->tcf_lock);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 0c3faddf3f2c..7ed78c9e505c 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -131,8 +131,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 	int i, munged = 0;
 	unsigned int off;
 
-	if (skb_cloned(skb) &&
-	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+	if (skb_unclone(skb, GFP_ATOMIC))
 		return p->tcf_action;
 
 	off = skb_network_offset(skb);
-- 
cgit v1.2.3


From 05e8ef4ab2d8087d360e814d14da20b9f7fb2283 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 14 Feb 2013 09:44:55 +0000
Subject: net: factor out skb_mac_gso_segment() from skb_gso_segment()

This function will be used in next GRE_GSO patch. This patch does
not change any functionality. It only exports skb_mac_gso_segment()
function.

[ Use skb_reset_mac_len() -DaveM ]

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 79 ++++++++++++++++++++++++++++-------------------
 2 files changed, 50 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9deb672d999f..920361bc27e7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2671,6 +2671,8 @@ extern void netdev_upper_dev_unlink(struct net_device *dev,
 extern int skb_checksum_help(struct sk_buff *skb);
 extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 	netdev_features_t features, bool tx_path);
+extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+					  netdev_features_t features);
 
 static inline
 struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
diff --git a/net/core/dev.c b/net/core/dev.c
index f44473696b8b..67deae60214c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2327,37 +2327,20 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
-/* openvswitch calls this on rx path, so we need a different check.
- */
-static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
-{
-	if (tx_path)
-		return skb->ip_summed != CHECKSUM_PARTIAL;
-	else
-		return skb->ip_summed == CHECKSUM_NONE;
-}
-
 /**
- *	__skb_gso_segment - Perform segmentation on skb.
+ *	skb_mac_gso_segment - mac layer segmentation handler.
  *	@skb: buffer to segment
  *	@features: features for the output path (see dev->features)
- *	@tx_path: whether it is called in TX path
- *
- *	This function segments the given skb and returns a list of segments.
- *
- *	It may return NULL if the skb requires no segmentation.  This is
- *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
-				  netdev_features_t features, bool tx_path)
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	int vlan_depth = ETH_HLEN;
-	int err;
 
 	while (type == htons(ETH_P_8021Q)) {
+		int vlan_depth = ETH_HLEN;
 		struct vlan_hdr *vh;
 
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
@@ -2368,22 +2351,14 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 		vlan_depth += VLAN_HLEN;
 	}
 
-	skb_reset_mac_header(skb);
-	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb_needs_check(skb, tx_path))) {
-		skb_warn_bad_offload(skb);
-
-		if (skb_header_cloned(skb) &&
-		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
-			return ERR_PTR(err);
-	}
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+				int err;
+
 				err = ptype->callbacks.gso_send_check(skb);
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
@@ -2401,6 +2376,48 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 
 	return segs;
 }
+EXPORT_SYMBOL(skb_mac_gso_segment);
+
+
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+{
+	if (tx_path)
+		return skb->ip_summed != CHECKSUM_PARTIAL;
+	else
+		return skb->ip_summed == CHECKSUM_NONE;
+}
+
+/**
+ *	__skb_gso_segment - Perform segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *	@tx_path: whether it is called in TX path
+ *
+ *	This function segments the given skb and returns a list of segments.
+ *
+ *	It may return NULL if the skb requires no segmentation.  This is
+ *	only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path)
+{
+	if (unlikely(skb_needs_check(skb, tx_path))) {
+		int err;
+
+		skb_warn_bad_offload(skb);
+
+		if (skb_header_cloned(skb) &&
+		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+			return ERR_PTR(err);
+	}
+
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	return skb_mac_gso_segment(skb, features);
+}
 EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
-- 
cgit v1.2.3


From 68c331631143f5f039baac99a650e0b9e1ea02b6 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 14 Feb 2013 14:02:41 +0000
Subject: v4 GRE: Add TCP segmentation offload for GRE

Following patch adds GRE protocol offload handler so that
skb_gso_segment() can segment GRE packets.
SKB GSO CB is added to keep track of total header length so that
skb_segment can push entire header. e.g. in case of GRE, skb_segment
need to push inner and outer headers to every segment.
New NETIF_F_GRE_GSO feature is added for devices which support HW
GRE TSO offload. Currently none of devices support it therefore GRE GSO
always fall backs to software GSO.

[ Compute pkt_len before ip_local_out() invocation. -DaveM ]

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |   3 +-
 include/linux/skbuff.h          |  17 ++++++
 net/core/dev.c                  |   1 +
 net/core/ethtool.c              |   1 +
 net/core/skbuff.c               |   6 +-
 net/ipv4/af_inet.c              |   1 +
 net/ipv4/gre.c                  | 118 ++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ip_gre.c               |  82 +++++++++++++++++++++++++---
 net/ipv4/tcp.c                  |   1 +
 net/ipv4/udp.c                  |   3 +-
 net/ipv6/ip6_offload.c          |   1 +
 net/ipv6/udp_offload.c          |   3 +-
 12 files changed, 226 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 5ac32123035a..3dd39340430e 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -41,7 +41,7 @@ enum {
 	NETIF_F_TSO_ECN_BIT,		/* ... TCP ECN support */
 	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
-	NETIF_F_GSO_RESERVED1,		/* ... free (fill GSO_MASK to 8 bits) */
+	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
 	/**/NETIF_F_GSO_LAST,		/* [can't be last bit, see GSO_MASK] */
 	NETIF_F_GSO_RESERVED2		/* ... free (fill GSO_MASK to 8 bits) */
 		= NETIF_F_GSO_LAST,
@@ -102,6 +102,7 @@ enum {
 #define NETIF_F_VLAN_CHALLENGED	__NETIF_F(VLAN_CHALLENGED)
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
+#define NETIF_F_GRE_GSO		__NETIF_F(GSO_GRE)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca6ee7d93edb..821c7f45d2a7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -314,6 +314,8 @@ enum {
 	SKB_GSO_TCPV6 = 1 << 4,
 
 	SKB_GSO_FCOE = 1 << 5,
+
+	SKB_GSO_GRE = 1 << 6,
 };
 
 #if BITS_PER_LONG > 32
@@ -2732,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 }
 #endif
 
+/* Keeps track of mac header offset relative to skb->head.
+ * It is useful for TSO of Tunneling protocol. e.g. GRE.
+ * For non-tunnel skb it points to skb_mac_header() and for
+ * tunnel skb it points to outer mac header. */
+struct skb_gso_cb {
+	int mac_offset;
+};
+#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
+
+static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
+{
+	return (skb_mac_header(inner_skb) - inner_skb->head) -
+		SKB_GSO_CB(inner_skb)->mac_offset;
+}
+
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/net/core/dev.c b/net/core/dev.c
index 67deae60214c..1cd6297fd34b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2413,6 +2413,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 			return ERR_PTR(err);
 	}
 
+	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
 	skb_reset_mac_header(skb);
 	skb_reset_mac_len(skb);
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d9d55209db67..3e9b2c3e30f0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
 	[NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",
 	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",
+	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation",
 
 	[NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
 	[NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6c1ad09f8796..2a3ca33c30aa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 	unsigned int mss = skb_shinfo(skb)->gso_size;
 	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
+	unsigned int tnl_hlen = skb_tnl_header_len(skb);
 	unsigned int headroom;
 	unsigned int len;
 	int sg = !!(features & NETIF_F_SG);
@@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
 					  skb_network_header_len(skb));
-		skb_copy_from_linear_data(skb, nskb->data, doffset);
+
+		skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+						 nskb->data - tnl_hlen,
+						 doffset + tnl_hlen);
 
 		if (fskb != skb_shinfo(skb)->frag_list)
 			continue;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e6e5d8506336..e225a4e5b572 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
 		       0)))
 		goto out;
 
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 42a491055c76..7a4c710c4cdd 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netdevice.h>
+#include <linux/if_tunnel.h>
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 #include <net/gre.h>
@@ -26,6 +27,11 @@
 
 static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
 static DEFINE_SPINLOCK(gre_proto_lock);
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+};
+#define GRE_HEADER_SECTION 4
 
 int gre_add_protocol(const struct gre_protocol *proto, u8 version)
 {
@@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	rcu_read_unlock();
 }
 
+static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	netdev_features_t enc_features;
+	int ghl = GRE_HEADER_SECTION;
+	struct gre_base_hdr *greh;
+	int mac_len = skb->mac_len;
+	int tnl_hlen;
+	bool csum;
+
+	if (unlikely(skb_shinfo(skb)->gso_type &
+				~(SKB_GSO_TCPV4 |
+				  SKB_GSO_TCPV6 |
+				  SKB_GSO_UDP |
+				  SKB_GSO_DODGY |
+				  SKB_GSO_TCP_ECN |
+				  SKB_GSO_GRE)))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
+		goto out;
+
+	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+
+	if (greh->flags & GRE_KEY)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_SEQ)
+		ghl += GRE_HEADER_SECTION;
+	if (greh->flags & GRE_CSUM) {
+		ghl += GRE_HEADER_SECTION;
+		csum = true;
+	} else
+		csum = false;
+
+	/* setup inner skb. */
+	if (greh->protocol == htons(ETH_P_TEB)) {
+		struct ethhdr *eth = eth_hdr(skb);
+		skb->protocol = eth->h_proto;
+	} else {
+		skb->protocol = greh->protocol;
+	}
+
+	skb->encapsulation = 0;
+
+	if (unlikely(!pskb_may_pull(skb, ghl)))
+		goto out;
+	__skb_pull(skb, ghl);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb_inner_network_offset(skb));
+	skb->mac_len = skb_inner_network_offset(skb);
+
+	/* segment inner packet. */
+	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	segs = skb_mac_gso_segment(skb, enc_features);
+	if (!segs || IS_ERR(segs))
+		goto out;
+
+	skb = segs;
+	tnl_hlen = skb_tnl_header_len(skb);
+	do {
+		__skb_push(skb, ghl);
+		if (csum) {
+			__be32 *pcsum;
+
+			if (skb_has_shared_frag(skb)) {
+				int err;
+
+				err = __skb_linearize(skb);
+				if (err) {
+					kfree_skb(segs);
+					segs = ERR_PTR(err);
+					goto out;
+				}
+			}
+
+			greh = (struct gre_base_hdr *)(skb->data);
+			pcsum = (__be32 *)(greh + 1);
+			*pcsum = 0;
+			*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+		}
+		__skb_push(skb, tnl_hlen - ghl);
+
+		skb_reset_mac_header(skb);
+		skb_set_network_header(skb, mac_len);
+		skb->mac_len = mac_len;
+	} while ((skb = skb->next));
+out:
+	return segs;
+}
+
+static int gre_gso_send_check(struct sk_buff *skb)
+{
+	if (!skb->encapsulation)
+		return -EINVAL;
+	return 0;
+}
+
 static const struct net_protocol net_gre_protocol = {
 	.handler     = gre_rcv,
 	.err_handler = gre_err,
 	.netns_ok    = 1,
 };
 
+static const struct net_offload gre_offload = {
+	.callbacks = {
+		.gso_send_check =	gre_gso_send_check,
+		.gso_segment    =	gre_gso_segment,
+	},
+};
+
 static int __init gre_init(void)
 {
 	pr_info("GRE over IPv4 demultiplexor driver\n");
@@ -127,11 +238,18 @@ static int __init gre_init(void)
 		return -EAGAIN;
 	}
 
+	if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
+		pr_err("can't add protocol offload\n");
+		inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+		return -EAGAIN;
+	}
+
 	return 0;
 }
 
 static void __exit gre_exit(void)
 {
+	inet_del_offload(&gre_offload, IPPROTO_GRE);
 	inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
 }
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 00a14b9864ea..a56f1182c176 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -735,8 +735,33 @@ drop:
 	return 0;
 }
 
+static struct sk_buff *handle_offloads(struct sk_buff *skb)
+{
+	int err;
+
+	if (skb_is_gso(skb)) {
+		err = skb_unclone(skb, GFP_ATOMIC);
+		if (unlikely(err))
+			goto error;
+		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
+		return skb;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		err = skb_checksum_help(skb);
+		if (unlikely(err))
+			goto error;
+	}
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return skb;
+
+error:
+	kfree_skb(skb);
+	return ERR_PTR(err);
+}
+
 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr  *old_iph;
 	const struct iphdr  *tiph;
@@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	__be32 dst;
 	int    mtu;
 	u8     ttl;
+	int    err;
+	int    pkt_len;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    skb_checksum_help(skb))
-		goto tx_error;
+	skb = handle_offloads(skb);
+	if (IS_ERR(skb)) {
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	if (!skb->encapsulation) {
+		skb_reset_inner_headers(skb);
+		skb->encapsulation = 1;
+	}
 
 	old_iph = ip_hdr(skb);
 
@@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	if (skb->protocol == htons(ETH_P_IP)) {
 		df |= (old_iph->frag_off&htons(IP_DF));
 
-		if ((old_iph->frag_off&htons(IP_DF)) &&
+		if (!skb_is_gso(skb) &&
+		    (old_iph->frag_off&htons(IP_DF)) &&
 		    mtu < ntohs(old_iph->tot_len)) {
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 			ip_rt_put(rt);
@@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			}
 		}
 
-		if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
+		if (!skb_is_gso(skb) &&
+		    mtu >= IPV6_MIN_MTU &&
+		    mtu < skb->len - tunnel->hlen + gre_hlen) {
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			ip_rt_put(rt);
 			goto tx_error;
@@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	iph->daddr		=	fl4.daddr;
 	iph->saddr		=	fl4.saddr;
 	iph->ttl		=	ttl;
+	iph->id			=	0;
 
 	if (ttl == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
@@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			*ptr = tunnel->parms.o_key;
 			ptr--;
 		}
-		if (tunnel->parms.o_flags&GRE_CSUM) {
+		/* Skip GRE checksum if skb is getting offloaded. */
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
+		    (tunnel->parms.o_flags&GRE_CSUM)) {
 			int offset = skb_transport_offset(skb);
 
+			if (skb_has_shared_frag(skb)) {
+				err = __skb_linearize(skb);
+				if (err) {
+					ip_rt_put(rt);
+					goto tx_error;
+				}
+			}
+
 			*ptr = 0;
 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
 								 skb->len - offset,
@@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		}
 	}
 
-	iptunnel_xmit(skb, dev);
+	nf_reset(skb);
+
+	pkt_len = skb->len - skb_transport_offset(skb);
+	err = ip_local_out(skb);
+	if (likely(net_xmit_eval(err) == 0)) {
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->tx_bytes += pkt_len;
+		tstats->tx_packets++;
+		u64_stats_update_end(&tstats->syncp);
+	} else {
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
+	}
 	return NETDEV_TX_OK;
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 		mtu = 68;
 
 	tunnel->hlen = addend;
+	/* TCP offload with GRE SEQ is not supported. */
+	if (!(tunnel->parms.o_flags & GRE_SEQ)) {
+		dev->features		|= NETIF_F_GSO_SOFTWARE;
+		dev->hw_features	|= NETIF_F_GSO_SOFTWARE;
+	}
 
 	return mtu;
 }
@@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev)
 
 	dev->iflink		= 0;
 	dev->features		|= NETIF_F_NETNS_LOCAL;
+
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
 }
 
 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1f0bedb8622f..7a5ba48c2cc9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
 			       SKB_GSO_DODGY |
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
+			       SKB_GSO_GRE |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6791aac06ea9..39a5e7a9a77f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index f26f0da7f095..8234c1dcdf72 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		     ~(SKB_GSO_UDP |
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_GRE |
 		       SKB_GSO_TCPV6 |
 		       0)))
 		goto out;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 0c8934a317c2..cf05cf073c51 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		/* Packet is from an untrusted source, reset gso_segs. */
 		int type = skb_shinfo(skb)->gso_type;
 
-		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
+				      SKB_GSO_GRE) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
-- 
cgit v1.2.3


From 0db3863add5cb249520b31a4ad36f275a30e7ba6 Mon Sep 17 00:00:00 2001
From: Michael Trimarchi <michael@amarulasolutions.com>
Date: Fri, 15 Feb 2013 14:43:38 -0800
Subject: Input: bma150 - make some defines public and fix some comments

Make the constants referring to range and bandwidth public so they can
be used when initializing the platform data fields in the platform code.

Fix also some comments regarding the unit of measurement to use for the
range and bandwidth fields, the values are not actually expected to be
in G or HZ, the code in bma150.c just uses the BMA150_RANGE_xxx and
BMA150_BW_xxx constants like they are with no translation from actual
values in G or HZ.

Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Antonio Ospite <ao2@amarulasolutions.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/bma150.c | 12 ------------
 include/linux/bma150.h      | 16 ++++++++++++++--
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/misc/bma150.c b/drivers/input/misc/bma150.c
index e5d18946bb2e..865c2f9d25b9 100644
--- a/drivers/input/misc/bma150.c
+++ b/drivers/input/misc/bma150.c
@@ -46,18 +46,6 @@
 #define BMA150_POLL_MAX		200
 #define BMA150_POLL_MIN		0
 
-#define BMA150_BW_25HZ		0
-#define BMA150_BW_50HZ		1
-#define BMA150_BW_100HZ		2
-#define BMA150_BW_190HZ		3
-#define BMA150_BW_375HZ		4
-#define BMA150_BW_750HZ		5
-#define BMA150_BW_1500HZ	6
-
-#define BMA150_RANGE_2G		0
-#define BMA150_RANGE_4G		1
-#define BMA150_RANGE_8G		2
-
 #define BMA150_MODE_NORMAL	0
 #define BMA150_MODE_SLEEP	2
 #define BMA150_MODE_WAKE_UP	3
diff --git a/include/linux/bma150.h b/include/linux/bma150.h
index 7911fda23bb4..97ade7cdc870 100644
--- a/include/linux/bma150.h
+++ b/include/linux/bma150.h
@@ -22,6 +22,18 @@
 
 #define BMA150_DRIVER		"bma150"
 
+#define BMA150_RANGE_2G		0
+#define BMA150_RANGE_4G		1
+#define BMA150_RANGE_8G		2
+
+#define BMA150_BW_25HZ		0
+#define BMA150_BW_50HZ		1
+#define BMA150_BW_100HZ		2
+#define BMA150_BW_190HZ		3
+#define BMA150_BW_375HZ		4
+#define BMA150_BW_750HZ		5
+#define BMA150_BW_1500HZ	6
+
 struct bma150_cfg {
 	bool any_motion_int;		/* Set to enable any-motion interrupt */
 	bool hg_int;			/* Set to enable high-G interrupt */
@@ -34,8 +46,8 @@ struct bma150_cfg {
 	unsigned char lg_hyst;		/* Low-G hysterisis */
 	unsigned char lg_dur;		/* Low-G duration */
 	unsigned char lg_thres;		/* Low-G threshold */
-	unsigned char range;		/* BMA0150_RANGE_xxx (in G) */
-	unsigned char bandwidth;	/* BMA0150_BW_xxx (in Hz) */
+	unsigned char range;		/* one of BMA0150_RANGE_xxx */
+	unsigned char bandwidth;	/* one of BMA0150_BW_xxx */
 };
 
 struct bma150_platform_data {
-- 
cgit v1.2.3


From 9eee07d39fa606a191ae65d3c0e12771a80e70ca Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 15 Feb 2013 17:04:12 -0800
Subject: Input: tegra-kbc - require CONFIG_OF, remove platform data

Tegra only supports, and always enables, device tree. Remove all ifdefs
and runtime checks for DT support from the driver. Platform data is
therefore no longer required. Delete the header that defines it, and
rework the driver to parse the device tree directly into struct
tegra_kbc.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/Kconfig     |   2 +-
 drivers/input/keyboard/tegra-kbc.c | 195 +++++++++++++++++--------------------
 include/linux/input/tegra_kbc.h    |  62 ------------
 3 files changed, 93 insertions(+), 166 deletions(-)
 delete mode 100644 include/linux/input/tegra_kbc.h

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 72377737eec8..992137cf3a64 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -420,7 +420,7 @@ config KEYBOARD_NOMADIK
 
 config KEYBOARD_TEGRA
 	tristate "NVIDIA Tegra internal matrix keyboard controller support"
-	depends on ARCH_TEGRA
+	depends on ARCH_TEGRA && OF
 	select INPUT_MATRIXKMAP
 	help
 	  Say Y here if you want to use a matrix keyboard connected directly
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index 4526bdd36022..d89e7d392d1e 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -29,9 +29,16 @@
 #include <linux/of.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
-#include <linux/input/tegra_kbc.h>
+#include <linux/input/matrix_keypad.h>
 #include <mach/clk.h>
 
+#define KBC_MAX_GPIO	24
+#define KBC_MAX_KPENT	8
+
+#define KBC_MAX_ROW	16
+#define KBC_MAX_COL	8
+#define KBC_MAX_KEY	(KBC_MAX_ROW * KBC_MAX_COL)
+
 #define KBC_MAX_DEBOUNCE_CNT	0x3ffu
 
 /* KBC row scan time and delay for beginning the row scan. */
@@ -67,10 +74,27 @@
 
 #define KBC_ROW_SHIFT	3
 
+enum tegra_pin_type {
+	PIN_CFG_IGNORE,
+	PIN_CFG_COL,
+	PIN_CFG_ROW,
+};
+
+struct tegra_kbc_pin_cfg {
+	enum tegra_pin_type type;
+	unsigned char num;
+};
+
 struct tegra_kbc {
+	struct device *dev;
+	unsigned int debounce_cnt;
+	unsigned int repeat_cnt;
+	struct tegra_kbc_pin_cfg pin_cfg[KBC_MAX_GPIO];
+	const struct matrix_keymap_data *keymap_data;
+	bool wakeup;
 	void __iomem *mmio;
 	struct input_dev *idev;
-	unsigned int irq;
+	int irq;
 	spinlock_t lock;
 	unsigned int repoll_dly;
 	unsigned long cp_dly_jiffies;
@@ -78,7 +102,6 @@ struct tegra_kbc {
 	bool use_fn_map;
 	bool use_ghost_filter;
 	bool keypress_caused_wake;
-	const struct tegra_kbc_platform_data *pdata;
 	unsigned short keycode[KBC_MAX_KEY * 2];
 	unsigned short current_keys[KBC_MAX_KPENT];
 	unsigned int num_pressed_keys;
@@ -286,12 +309,11 @@ static irqreturn_t tegra_kbc_isr(int irq, void *args)
 
 static void tegra_kbc_setup_wakekeys(struct tegra_kbc *kbc, bool filter)
 {
-	const struct tegra_kbc_platform_data *pdata = kbc->pdata;
 	int i;
 	unsigned int rst_val;
 
 	/* Either mask all keys or none. */
-	rst_val = (filter && !pdata->wakeup) ? ~0 : 0;
+	rst_val = (filter && !kbc->wakeup) ? ~0 : 0;
 
 	for (i = 0; i < KBC_MAX_ROW; i++)
 		writel(rst_val, kbc->mmio + KBC_ROW0_MASK_0 + i * 4);
@@ -299,7 +321,6 @@ static void tegra_kbc_setup_wakekeys(struct tegra_kbc *kbc, bool filter)
 
 static void tegra_kbc_config_pins(struct tegra_kbc *kbc)
 {
-	const struct tegra_kbc_platform_data *pdata = kbc->pdata;
 	int i;
 
 	for (i = 0; i < KBC_MAX_GPIO; i++) {
@@ -315,13 +336,13 @@ static void tegra_kbc_config_pins(struct tegra_kbc *kbc)
 		row_cfg &= ~r_mask;
 		col_cfg &= ~c_mask;
 
-		switch (pdata->pin_cfg[i].type) {
+		switch (kbc->pin_cfg[i].type) {
 		case PIN_CFG_ROW:
-			row_cfg |= ((pdata->pin_cfg[i].num << 1) | 1) << r_shft;
+			row_cfg |= ((kbc->pin_cfg[i].num << 1) | 1) << r_shft;
 			break;
 
 		case PIN_CFG_COL:
-			col_cfg |= ((pdata->pin_cfg[i].num << 1) | 1) << c_shft;
+			col_cfg |= ((kbc->pin_cfg[i].num << 1) | 1) << c_shft;
 			break;
 
 		case PIN_CFG_IGNORE:
@@ -335,7 +356,6 @@ static void tegra_kbc_config_pins(struct tegra_kbc *kbc)
 
 static int tegra_kbc_start(struct tegra_kbc *kbc)
 {
-	const struct tegra_kbc_platform_data *pdata = kbc->pdata;
 	unsigned int debounce_cnt;
 	u32 val = 0;
 
@@ -350,10 +370,10 @@ static int tegra_kbc_start(struct tegra_kbc *kbc)
 	tegra_kbc_config_pins(kbc);
 	tegra_kbc_setup_wakekeys(kbc, false);
 
-	writel(pdata->repeat_cnt, kbc->mmio + KBC_RPT_DLY_0);
+	writel(kbc->repeat_cnt, kbc->mmio + KBC_RPT_DLY_0);
 
 	/* Keyboard debounce count is maximum of 12 bits. */
-	debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
+	debounce_cnt = min(kbc->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
 	val = KBC_DEBOUNCE_CNT_SHIFT(debounce_cnt);
 	val |= KBC_FIFO_TH_CNT_SHIFT(1); /* set fifo interrupt threshold to 1 */
 	val |= KBC_CONTROL_FIFO_CNT_INT_EN;  /* interrupt on FIFO threshold */
@@ -420,21 +440,20 @@ static void tegra_kbc_close(struct input_dev *dev)
 	return tegra_kbc_stop(kbc);
 }
 
-static bool
-tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
-			struct device *dev, unsigned int *num_rows)
+static bool tegra_kbc_check_pin_cfg(const struct tegra_kbc *kbc,
+					unsigned int *num_rows)
 {
 	int i;
 
 	*num_rows = 0;
 
 	for (i = 0; i < KBC_MAX_GPIO; i++) {
-		const struct tegra_kbc_pin_cfg *pin_cfg = &pdata->pin_cfg[i];
+		const struct tegra_kbc_pin_cfg *pin_cfg = &kbc->pin_cfg[i];
 
 		switch (pin_cfg->type) {
 		case PIN_CFG_ROW:
 			if (pin_cfg->num >= KBC_MAX_ROW) {
-				dev_err(dev,
+				dev_err(kbc->dev,
 					"pin_cfg[%d]: invalid row number %d\n",
 					i, pin_cfg->num);
 				return false;
@@ -444,7 +463,7 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
 
 		case PIN_CFG_COL:
 			if (pin_cfg->num >= KBC_MAX_COL) {
-				dev_err(dev,
+				dev_err(kbc->dev,
 					"pin_cfg[%d]: invalid column number %d\n",
 					i, pin_cfg->num);
 				return false;
@@ -455,7 +474,7 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
 			break;
 
 		default:
-			dev_err(dev,
+			dev_err(kbc->dev,
 				"pin_cfg[%d]: invalid entry type %d\n",
 				pin_cfg->type, pin_cfg->num);
 			return false;
@@ -465,12 +484,9 @@ tegra_kbc_check_pin_cfg(const struct tegra_kbc_platform_data *pdata,
 	return true;
 }
 
-#ifdef CONFIG_OF
-static struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata(
-	struct platform_device *pdev)
+static int tegra_kbc_parse_dt(struct tegra_kbc *kbc)
 {
-	struct tegra_kbc_platform_data *pdata;
-	struct device_node *np = pdev->dev.of_node;
+	struct device_node *np = kbc->dev->of_node;
 	u32 prop;
 	int i;
 	u32 num_rows = 0;
@@ -480,109 +496,96 @@ static struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata(
 	int proplen;
 	int ret;
 
-	if (!np) {
-		dev_err(&pdev->dev, "device tree data is missing\n");
-		return ERR_PTR(-ENOENT);
-	}
-
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
-
 	if (!of_property_read_u32(np, "nvidia,debounce-delay-ms", &prop))
-		pdata->debounce_cnt = prop;
+		kbc->debounce_cnt = prop;
 
 	if (!of_property_read_u32(np, "nvidia,repeat-delay-ms", &prop))
-		pdata->repeat_cnt = prop;
+		kbc->repeat_cnt = prop;
 
 	if (of_find_property(np, "nvidia,needs-ghost-filter", NULL))
-		pdata->use_ghost_filter = true;
+		kbc->use_ghost_filter = true;
 
 	if (of_find_property(np, "nvidia,wakeup-source", NULL))
-		pdata->wakeup = true;
+		kbc->wakeup = true;
 
 	if (!of_get_property(np, "nvidia,kbc-row-pins", &proplen)) {
-		dev_err(&pdev->dev, "property nvidia,kbc-row-pins not found\n");
-		return ERR_PTR(-ENOENT);
+		dev_err(kbc->dev, "property nvidia,kbc-row-pins not found\n");
+		return -ENOENT;
 	}
 	num_rows = proplen / sizeof(u32);
 
 	if (!of_get_property(np, "nvidia,kbc-col-pins", &proplen)) {
-		dev_err(&pdev->dev, "property nvidia,kbc-col-pins not found\n");
-		return ERR_PTR(-ENOENT);
+		dev_err(kbc->dev, "property nvidia,kbc-col-pins not found\n");
+		return -ENOENT;
 	}
 	num_cols = proplen / sizeof(u32);
 
 	if (!of_get_property(np, "linux,keymap", &proplen)) {
-		dev_err(&pdev->dev, "property linux,keymap not found\n");
-		return ERR_PTR(-ENOENT);
+		dev_err(kbc->dev, "property linux,keymap not found\n");
+		return -ENOENT;
 	}
 
 	if (!num_rows || !num_cols || ((num_rows + num_cols) > KBC_MAX_GPIO)) {
-		dev_err(&pdev->dev,
+		dev_err(kbc->dev,
 			"keypad rows/columns not porperly specified\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	/* Set all pins as non-configured */
 	for (i = 0; i < KBC_MAX_GPIO; i++)
-		pdata->pin_cfg[i].type = PIN_CFG_IGNORE;
+		kbc->pin_cfg[i].type = PIN_CFG_IGNORE;
 
 	ret = of_property_read_u32_array(np, "nvidia,kbc-row-pins",
 				rows_cfg, num_rows);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "Rows configurations are not proper\n");
-		return ERR_PTR(-EINVAL);
+		dev_err(kbc->dev, "Rows configurations are not proper\n");
+		return -EINVAL;
 	}
 
 	ret = of_property_read_u32_array(np, "nvidia,kbc-col-pins",
 				cols_cfg, num_cols);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "Cols configurations are not proper\n");
-		return ERR_PTR(-EINVAL);
+		dev_err(kbc->dev, "Cols configurations are not proper\n");
+		return -EINVAL;
 	}
 
 	for (i = 0; i < num_rows; i++) {
-		pdata->pin_cfg[rows_cfg[i]].type = PIN_CFG_ROW;
-		pdata->pin_cfg[rows_cfg[i]].num = i;
+		kbc->pin_cfg[rows_cfg[i]].type = PIN_CFG_ROW;
+		kbc->pin_cfg[rows_cfg[i]].num = i;
 	}
 
 	for (i = 0; i < num_cols; i++) {
-		pdata->pin_cfg[cols_cfg[i]].type = PIN_CFG_COL;
-		pdata->pin_cfg[cols_cfg[i]].num = i;
+		kbc->pin_cfg[cols_cfg[i]].type = PIN_CFG_COL;
+		kbc->pin_cfg[cols_cfg[i]].num = i;
 	}
 
-	return pdata;
-}
-#else
-static inline struct tegra_kbc_platform_data *tegra_kbc_dt_parse_pdata(
-	struct platform_device *pdev)
-{
-	dev_err(&pdev->dev, "platform data is missing\n");
-	return ERR_PTR(-EINVAL);
+	return 0;
 }
-#endif
 
 static int tegra_kbc_probe(struct platform_device *pdev)
 {
-	const struct tegra_kbc_platform_data *pdata = pdev->dev.platform_data;
 	struct tegra_kbc *kbc;
-	struct input_dev *input_dev;
 	struct resource *res;
-	int irq;
 	int err;
 	int num_rows = 0;
 	unsigned int debounce_cnt;
 	unsigned int scan_time_rows;
 	unsigned int keymap_rows = KBC_MAX_KEY;
 
-	if (!pdata)
-		pdata = tegra_kbc_dt_parse_pdata(pdev);
+	kbc = devm_kzalloc(&pdev->dev, sizeof(*kbc), GFP_KERNEL);
+	if (!kbc) {
+		dev_err(&pdev->dev, "failed to alloc memory for kbc\n");
+		return -ENOMEM;
+	}
+
+	kbc->dev = &pdev->dev;
+	spin_lock_init(&kbc->lock);
 
-	if (IS_ERR(pdata))
-		return PTR_ERR(pdata);
+	err = tegra_kbc_parse_dt(kbc);
+	if (err)
+		return err;
 
-	if (!tegra_kbc_check_pin_cfg(pdata, &pdev->dev, &num_rows))
+	if (!tegra_kbc_check_pin_cfg(kbc, &num_rows))
 		return -EINVAL;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -591,28 +594,18 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
+	kbc->irq = platform_get_irq(pdev, 0);
+	if (kbc->irq < 0) {
 		dev_err(&pdev->dev, "failed to get keyboard IRQ\n");
 		return -ENXIO;
 	}
 
-	kbc = devm_kzalloc(&pdev->dev, sizeof(*kbc), GFP_KERNEL);
-	if (!kbc) {
-		dev_err(&pdev->dev, "failed to alloc memory for kbc\n");
-		return -ENOMEM;
-	}
-
-	input_dev = devm_input_allocate_device(&pdev->dev);
-	if (!input_dev) {
+	kbc->idev = devm_input_allocate_device(&pdev->dev);
+	if (!kbc->idev) {
 		dev_err(&pdev->dev, "failed to allocate input device\n");
 		return -ENOMEM;
 	}
 
-	kbc->pdata = pdata;
-	kbc->idev = input_dev;
-	kbc->irq = irq;
-	spin_lock_init(&kbc->lock);
 	setup_timer(&kbc->timer, tegra_kbc_keypress_timer, (unsigned long)kbc);
 
 	kbc->mmio = devm_request_and_ioremap(&pdev->dev, res);
@@ -633,36 +626,32 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 	 * the rows. There is an additional delay before the row scanning
 	 * starts. The repoll delay is computed in milliseconds.
 	 */
-	debounce_cnt = min(pdata->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
+	debounce_cnt = min(kbc->debounce_cnt, KBC_MAX_DEBOUNCE_CNT);
 	scan_time_rows = (KBC_ROW_SCAN_TIME + debounce_cnt) * num_rows;
-	kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + pdata->repeat_cnt;
+	kbc->repoll_dly = KBC_ROW_SCAN_DLY + scan_time_rows + kbc->repeat_cnt;
 	kbc->repoll_dly = DIV_ROUND_UP(kbc->repoll_dly, KBC_CYCLE_MS);
 
-	kbc->wakeup_key = pdata->wakeup_key;
-	kbc->use_fn_map = pdata->use_fn_map;
-	kbc->use_ghost_filter = pdata->use_ghost_filter;
-
-	input_dev->name = pdev->name;
-	input_dev->id.bustype = BUS_HOST;
-	input_dev->dev.parent = &pdev->dev;
-	input_dev->open = tegra_kbc_open;
-	input_dev->close = tegra_kbc_close;
+	kbc->idev->name = pdev->name;
+	kbc->idev->id.bustype = BUS_HOST;
+	kbc->idev->dev.parent = &pdev->dev;
+	kbc->idev->open = tegra_kbc_open;
+	kbc->idev->close = tegra_kbc_close;
 
-	if (pdata->keymap_data && pdata->use_fn_map)
+	if (kbc->keymap_data && kbc->use_fn_map)
 		keymap_rows *= 2;
 
-	err = matrix_keypad_build_keymap(pdata->keymap_data, NULL,
+	err = matrix_keypad_build_keymap(kbc->keymap_data, NULL,
 					 keymap_rows, KBC_MAX_COL,
-					 kbc->keycode, input_dev);
+					 kbc->keycode, kbc->idev);
 	if (err) {
 		dev_err(&pdev->dev, "failed to setup keymap\n");
 		return err;
 	}
 
-	__set_bit(EV_REP, input_dev->evbit);
-	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+	__set_bit(EV_REP, kbc->idev->evbit);
+	input_set_capability(kbc->idev, EV_MSC, MSC_SCAN);
 
-	input_set_drvdata(input_dev, kbc);
+	input_set_drvdata(kbc->idev, kbc);
 
 	err = devm_request_irq(&pdev->dev, kbc->irq, tegra_kbc_isr,
 			  IRQF_NO_SUSPEND | IRQF_TRIGGER_HIGH, pdev->name, kbc);
@@ -680,7 +669,7 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 	}
 
 	platform_set_drvdata(pdev, kbc);
-	device_init_wakeup(&pdev->dev, pdata->wakeup);
+	device_init_wakeup(&pdev->dev, kbc->wakeup);
 
 	return 0;
 }
diff --git a/include/linux/input/tegra_kbc.h b/include/linux/input/tegra_kbc.h
deleted file mode 100644
index a13025612939..000000000000
--- a/include/linux/input/tegra_kbc.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Platform definitions for tegra-kbc keyboard input driver
- *
- * Copyright (c) 2010-2011, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef ASMARM_ARCH_TEGRA_KBC_H
-#define ASMARM_ARCH_TEGRA_KBC_H
-
-#include <linux/types.h>
-#include <linux/input/matrix_keypad.h>
-
-#define KBC_MAX_GPIO	24
-#define KBC_MAX_KPENT	8
-
-#define KBC_MAX_ROW	16
-#define KBC_MAX_COL	8
-#define KBC_MAX_KEY	(KBC_MAX_ROW * KBC_MAX_COL)
-
-enum tegra_pin_type {
-	PIN_CFG_IGNORE,
-	PIN_CFG_COL,
-	PIN_CFG_ROW,
-};
-
-struct tegra_kbc_pin_cfg {
-	enum tegra_pin_type type;
-	unsigned char num;
-};
-
-struct tegra_kbc_wake_key {
-	u8 row:4;
-	u8 col:4;
-};
-
-struct tegra_kbc_platform_data {
-	unsigned int debounce_cnt;
-	unsigned int repeat_cnt;
-
-	struct tegra_kbc_pin_cfg pin_cfg[KBC_MAX_GPIO];
-	const struct matrix_keymap_data *keymap_data;
-
-	u32 wakeup_key;
-	bool wakeup;
-	bool use_fn_map;
-	bool use_ghost_filter;
-};
-#endif
-- 
cgit v1.2.3


From ac6324e7021dfa917ce4f9a836318c3e46fbb84e Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali.rohar@gmail.com>
Date: Sun, 10 Feb 2013 18:32:18 +0100
Subject: bq2415x_charger: Add support for offline and 100mA mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Renamed mode BQ2415X_MODE_NONE to BQ2415X_MODE_OFF because this mode
  turning chaging completly off

* Added new mode BQ2415X_MODE_NONE which enable charging with maximal
  current limit 100mA (this is minimal safe value for bq2415x chips)

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Anton Vorontsov <anton@enomsg.org>
---
 drivers/power/bq2415x_charger.c       | 25 ++++++++++++++++++-------
 include/linux/power/bq2415x_charger.h |  3 ++-
 2 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/bq2415x_charger.c b/drivers/power/bq2415x_charger.c
index ca70365e9410..ca91396fc48e 100644
--- a/drivers/power/bq2415x_charger.c
+++ b/drivers/power/bq2415x_charger.c
@@ -733,12 +733,10 @@ static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode)
 	int charger = 0;
 	int boost = 0;
 
-	if (mode == BQ2415X_MODE_HOST_CHARGER ||
-		mode == BQ2415X_MODE_DEDICATED_CHARGER)
-			charger = 1;
-
 	if (mode == BQ2415X_MODE_BOOST)
 		boost = 1;
+	else if (mode != BQ2415X_MODE_OFF)
+		charger = 1;
 
 	if (!charger)
 		ret = bq2415x_exec_command(bq, BQ2415X_CHARGER_DISABLE);
@@ -750,6 +748,10 @@ static int bq2415x_set_mode(struct bq2415x_device *bq, enum bq2415x_mode mode)
 		return ret;
 
 	switch (mode) {
+	case BQ2415X_MODE_OFF:
+		dev_dbg(bq->dev, "changing mode to: Offline\n");
+		ret = bq2415x_set_current_limit(bq, 100);
+		break;
 	case BQ2415X_MODE_NONE:
 		dev_dbg(bq->dev, "changing mode to: N/A\n");
 		ret = bq2415x_set_current_limit(bq, 100);
@@ -842,7 +844,7 @@ static void bq2415x_timer_error(struct bq2415x_device *bq, const char *msg)
 	dev_err(bq->dev, "%s\n", msg);
 	if (bq->automode > 0)
 		bq->automode = 0;
-	bq2415x_set_mode(bq, BQ2415X_MODE_NONE);
+	bq2415x_set_mode(bq, BQ2415X_MODE_OFF);
 	bq2415x_set_autotimer(bq, 0);
 }
 
@@ -1135,6 +1137,10 @@ static ssize_t bq2415x_sysfs_set_mode(struct device *dev,
 			return -ENOSYS;
 		bq->automode = 1;
 		mode = bq->reported_mode;
+	} else if (strncmp(buf, "off", 3) == 0) {
+		if (bq->automode > 0)
+			bq->automode = 0;
+		mode = BQ2415X_MODE_OFF;
 	} else if (strncmp(buf, "none", 4) == 0) {
 		if (bq->automode > 0)
 			bq->automode = 0;
@@ -1182,6 +1188,9 @@ static ssize_t bq2415x_sysfs_show_mode(struct device *dev,
 		ret += sprintf(buf+ret, "auto (");
 
 	switch (bq->mode) {
+	case BQ2415X_MODE_OFF:
+		ret += sprintf(buf+ret, "off");
+		break;
 	case BQ2415X_MODE_NONE:
 		ret += sprintf(buf+ret, "none");
 		break;
@@ -1216,6 +1225,8 @@ static ssize_t bq2415x_sysfs_show_reported_mode(struct device *dev,
 		return -EINVAL;
 
 	switch (bq->reported_mode) {
+	case BQ2415X_MODE_OFF:
+		return sprintf(buf, "off\n");
 	case BQ2415X_MODE_NONE:
 		return sprintf(buf, "none\n");
 	case BQ2415X_MODE_HOST_CHARGER:
@@ -1535,8 +1546,8 @@ static int bq2415x_probe(struct i2c_client *client,
 	bq->dev = &client->dev;
 	bq->chip = id->driver_data;
 	bq->name = name;
-	bq->mode = BQ2415X_MODE_NONE;
-	bq->reported_mode = BQ2415X_MODE_NONE;
+	bq->mode = BQ2415X_MODE_OFF;
+	bq->reported_mode = BQ2415X_MODE_OFF;
 	bq->autotimer = 0;
 	bq->automode = 0;
 
diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h
index 97a1665eaeaf..8dcc0f46fc0a 100644
--- a/include/linux/power/bq2415x_charger.h
+++ b/include/linux/power/bq2415x_charger.h
@@ -75,7 +75,8 @@
 
 /* Supported modes with maximal current limit */
 enum bq2415x_mode {
-	BQ2415X_MODE_NONE,		/* unknown or no charger (100mA) */
+	BQ2415X_MODE_OFF,		/* offline mode (charger disabled) */
+	BQ2415X_MODE_NONE,		/* unknown charger (100mA) */
 	BQ2415X_MODE_HOST_CHARGER,	/* usb host/hub charger (500mA) */
 	BQ2415X_MODE_DEDICATED_CHARGER, /* dedicated charger (unlimited) */
 	BQ2415X_MODE_BOOST,		/* boost mode (charging disabled) */
-- 
cgit v1.2.3


From 558bd3e8dc7a798c5c845f90cf038b9bbd2df2b8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 9 Feb 2013 21:51:27 -0500
Subject: PM idle: remove global declaration of pm_idle

pm_idle appears in no generic Linux code,
it appears only in architecture-specific code.

Thus, pm_idle should not be declared in pm.h.

Architectures that  use an idle function pointer
should delcare one local to their architecture,
and/or use cpuidle.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Tested-by: Kevin Hilman <khilman@linaro.org>
Cc: linux-pm@vger.kernel.org
---
 include/linux/pm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 03d7bb145311..97bcf23e045a 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -31,7 +31,6 @@
 /*
  * Callbacks for platform drivers to implement.
  */
-extern void (*pm_idle)(void);
 extern void (*pm_power_off)(void);
 extern void (*pm_power_off_prepare)(void);
 
-- 
cgit v1.2.3


From e7e319a9c51409c7effe34333ea26facf2fab9e1 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Thu, 14 Feb 2013 12:16:43 -0600
Subject: libceph: improve packing in struct ceph_osd_req_op

The layout of struct ceph_osd_req_op leaves lots of holes.
Rearranging things a little for better field alignment
reduces the size by a third.

This resolves:
    http://tracker.ceph.com/issues/4163

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osd_client.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 69287ccfe68a..82bf6338d6c1 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -157,6 +157,7 @@ struct ceph_osd_client {
 
 struct ceph_osd_req_op {
 	u16 op;           /* CEPH_OSD_OP_* */
+	u32 payload_len;
 	union {
 		struct {
 			u64 offset, length;
@@ -165,23 +166,24 @@ struct ceph_osd_req_op {
 		} extent;
 		struct {
 			const char *name;
-			u32 name_len;
 			const char  *val;
+			u32 name_len;
 			u32 value_len;
 			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
 			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
 		} xattr;
 		struct {
 			const char *class_name;
-			__u8 class_len;
 			const char *method_name;
-			__u8 method_len;
-			__u8 argc;
 			const char *indata;
 			u32 indata_len;
+			__u8 class_len;
+			__u8 method_len;
+			__u8 argc;
 		} cls;
 		struct {
-			u64 cookie, count;
+			u64 cookie;
+			u64 count;
 		} pgls;
 	        struct {
 		        u64 snapid;
@@ -189,12 +191,11 @@ struct ceph_osd_req_op {
 		struct {
 			u64 cookie;
 			u64 ver;
-			__u8 flag;
 			u32 prot_ver;
 			u32 timeout;
+			__u8 flag;
 		} watch;
 	};
-	u32 payload_len;
 };
 
 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
-- 
cgit v1.2.3


From 87f979d390f9ecfa3d0038a9f9a002a62f8a1895 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:29 -0600
Subject: ceph: kill ceph_osdc_writepages() "nofail" parameter

There is only one caller of ceph_osdc_writepages(), and it always
passes the value true as its "nofail" argument.  Get rid of that
argument and replace its use in ceph_osdc_writepages() with the
constant value true.

This and a number of cleanup patches that follow resolve:
    http://tracker.ceph.com/issues/4126

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 fs/ceph/addr.c                  | 2 +-
 include/linux/ceph/osd_client.h | 2 +-
 net/ceph/osd_client.c           | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 064d1a68d2c1..c7e401c96fc9 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -493,7 +493,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 				   page_off, len,
 				   ci->i_truncate_seq, ci->i_truncate_size,
 				   &inode->i_mtime,
-				   &page, 1, 0, 0, true);
+				   &page, 1, 0, 0);
 	if (err < 0) {
 		dout("writepage setting page/mapping error %d %p\n", err, page);
 		SetPageError(page);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 82bf6338d6c1..afcb255b016a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -275,7 +275,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				u32 truncate_seq, u64 truncate_size,
 				struct timespec *mtime,
 				struct page **pages, int nr_pages,
-				int flags, int do_sync, bool nofail);
+				int flags, int do_sync);
 
 /* watch/notify events */
 extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d9d58bbe9f9a..dd01b1340e95 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1867,7 +1867,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 			 u32 truncate_seq, u64 truncate_size,
 			 struct timespec *mtime,
 			 struct page **pages, int num_pages,
-			 int flags, int do_sync, bool nofail)
+			 int flags, int do_sync)
 {
 	struct ceph_osd_request *req;
 	int rc = 0;
@@ -1880,7 +1880,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 					    CEPH_OSD_FLAG_WRITE,
 				    snapc, do_sync,
 				    truncate_seq, truncate_size, mtime,
-				    nofail, 1, page_align);
+				    true, 1, page_align);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1889,7 +1889,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 	dout("writepages %llu~%llu (%d pages)\n", off, len,
 	     req->r_num_pages);
 
-	rc = ceph_osdc_start_request(osdc, req, nofail);
+	rc = ceph_osdc_start_request(osdc, req, true);
 	if (!rc)
 		rc = ceph_osdc_wait_request(osdc, req);
 
-- 
cgit v1.2.3


From fbf8685fb155e12a9f4d4b966c7b3442ed557687 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:29 -0600
Subject: ceph: kill ceph_osdc_writepages() "dosync" parameter

There is only one caller of ceph_osdc_writepages(), and it always
passes 0 as its "dosync" argument.  Get rid of that argument and
replace its use in ceph_osdc_writepages() with 0.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 fs/ceph/addr.c                  | 2 +-
 include/linux/ceph/osd_client.h | 2 +-
 net/ceph/osd_client.c           | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c7e401c96fc9..bef552819312 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -493,7 +493,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 				   page_off, len,
 				   ci->i_truncate_seq, ci->i_truncate_size,
 				   &inode->i_mtime,
-				   &page, 1, 0, 0);
+				   &page, 1, 0);
 	if (err < 0) {
 		dout("writepage setting page/mapping error %d %p\n", err, page);
 		SetPageError(page);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index afcb255b016a..7a63100a3e69 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -275,7 +275,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				u32 truncate_seq, u64 truncate_size,
 				struct timespec *mtime,
 				struct page **pages, int nr_pages,
-				int flags, int do_sync);
+				int flags);
 
 /* watch/notify events */
 extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index dd01b1340e95..ac186b7c9986 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1867,7 +1867,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 			 u32 truncate_seq, u64 truncate_size,
 			 struct timespec *mtime,
 			 struct page **pages, int num_pages,
-			 int flags, int do_sync)
+			 int flags)
 {
 	struct ceph_osd_request *req;
 	int rc = 0;
@@ -1878,7 +1878,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 				    CEPH_OSD_OP_WRITE,
 				    flags | CEPH_OSD_FLAG_ONDISK |
 					    CEPH_OSD_FLAG_WRITE,
-				    snapc, do_sync,
+				    snapc, 0,
 				    truncate_seq, truncate_size, mtime,
 				    true, 1, page_align);
 	if (IS_ERR(req))
-- 
cgit v1.2.3


From 2480882611e3ab844563dd3d0a822227604ab8fe Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:29 -0600
Subject: ceph: kill ceph_osdc_writepages() "flags" parameter

There is only one caller of ceph_osdc_writepages(), and it always
passes 0 as its "flags" argument.  Get rid of that argument and
replace its use in ceph_osdc_writepages() with 0.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 fs/ceph/addr.c                  | 3 +--
 include/linux/ceph/osd_client.h | 3 +--
 net/ceph/osd_client.c           | 6 ++----
 3 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index bef552819312..8d3240d6f289 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -492,8 +492,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 				   &ci->i_layout, snapc,
 				   page_off, len,
 				   ci->i_truncate_seq, ci->i_truncate_size,
-				   &inode->i_mtime,
-				   &page, 1, 0);
+				   &inode->i_mtime, &page, 1);
 	if (err < 0) {
 		dout("writepage setting page/mapping error %d %p\n", err, page);
 		SetPageError(page);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 7a63100a3e69..6540e8861998 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -274,8 +274,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				u64 off, u64 len,
 				u32 truncate_seq, u64 truncate_size,
 				struct timespec *mtime,
-				struct page **pages, int nr_pages,
-				int flags);
+				struct page **pages, int nr_pages);
 
 /* watch/notify events */
 extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ac186b7c9986..d4e3812bcebc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1866,8 +1866,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 			 u64 off, u64 len,
 			 u32 truncate_seq, u64 truncate_size,
 			 struct timespec *mtime,
-			 struct page **pages, int num_pages,
-			 int flags)
+			 struct page **pages, int num_pages)
 {
 	struct ceph_osd_request *req;
 	int rc = 0;
@@ -1876,8 +1875,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 	BUG_ON(vino.snap != CEPH_NOSNAP);
 	req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
 				    CEPH_OSD_OP_WRITE,
-				    flags | CEPH_OSD_FLAG_ONDISK |
-					    CEPH_OSD_FLAG_WRITE,
+				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
 				    snapc, 0,
 				    truncate_seq, truncate_size, mtime,
 				    true, 1, page_align);
-- 
cgit v1.2.3


From a3bea47e8bdd51d921e5b2045720d60140612c7c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:29 -0600
Subject: ceph: kill ceph_osdc_new_request() "num_reply" parameter

The "num_reply" parameter to ceph_osdc_new_request() is never
used inside that function, so get rid of it.

Note that ceph_sync_write() passes 2 for that argument, while all
other callers pass 1.  It doesn't matter, but perhaps someone should
verify this doesn't indicate a problem.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 fs/ceph/addr.c                  | 4 ++--
 fs/ceph/file.c                  | 2 +-
 include/linux/ceph/osd_client.h | 3 +--
 net/ceph/osd_client.c           | 6 +++---
 4 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8d3240d6f289..fc613715af46 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -315,7 +315,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
 				    NULL, 0,
 				    ci->i_truncate_seq, ci->i_truncate_size,
-				    NULL, false, 1, 0);
+				    NULL, false, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -837,7 +837,7 @@ get_more_pages:
 					    snapc, do_sync,
 					    ci->i_truncate_seq,
 					    ci->i_truncate_size,
-					    &inode->i_mtime, true, 1, 0);
+					    &inode->i_mtime, true, 0);
 
 				if (IS_ERR(req)) {
 					rc = PTR_ERR(req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index a1e5b81e8118..9c4325e654ca 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -541,7 +541,7 @@ more:
 				    ci->i_snap_realm->cached_context,
 				    do_sync,
 				    ci->i_truncate_seq, ci->i_truncate_size,
-				    &mtime, false, 2, page_align);
+				    &mtime, false, page_align);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 6540e8861998..5812802bd8ae 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -234,8 +234,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      int do_sync, u32 truncate_seq,
 				      u64 truncate_size,
 				      struct timespec *mtime,
-				      bool use_mempool, int num_reply,
-				      int page_align);
+				      bool use_mempool, int page_align);
 
 extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
 					 struct ceph_osd_request *req);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d4e3812bcebc..d3e75138506b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -393,7 +393,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       u32 truncate_seq,
 					       u64 truncate_size,
 					       struct timespec *mtime,
-					       bool use_mempool, int num_reply,
+					       bool use_mempool,
 					       int page_align)
 {
 	struct ceph_osd_req_op ops[2];
@@ -1837,7 +1837,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 	req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
 				    NULL, 0, truncate_seq, truncate_size, NULL,
-				    false, 1, page_align);
+				    false, page_align);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -1878,7 +1878,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
 				    snapc, 0,
 				    truncate_seq, truncate_size, mtime,
-				    true, 1, page_align);
+				    true, page_align);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-- 
cgit v1.2.3


From 60e56f138180e72fa8487d4b9c1c916013494f46 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:29 -0600
Subject: libceph: kill ceph_calc_raw_layout()

There is no caller of ceph_calc_raw_layout() outside of libceph, so
there's no need to export from the module.

Furthermore, there is only one caller, in calc_layout(), and it
is not much more than a simple wrapper for that function.

So get rid of ceph_calc_raw_layout() and embed it instead within
calc_layout().

While touching "osd_client.c", get rid of the unnecessary forward
declaration of __send_request().

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osd_client.h |  5 ---
 net/ceph/osd_client.c           | 77 +++++++++++++++++------------------------
 2 files changed, 32 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 5812802bd8ae..c39e7ed4b203 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -207,11 +207,6 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
 
-extern int ceph_calc_raw_layout(struct ceph_file_layout *layout,
-			u64 off, u64 *plen, u64 *bno,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op);
-
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
 					       unsigned int num_op,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 0d67cd37173b..cd3a489b7438 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -38,49 +38,6 @@ static int op_has_extent(int op)
 		op == CEPH_OSD_OP_WRITE);
 }
 
-int ceph_calc_raw_layout(struct ceph_file_layout *layout,
-			u64 off, u64 *plen, u64 *bno,
-			struct ceph_osd_request *req,
-			struct ceph_osd_req_op *op)
-{
-	u64 orig_len = *plen;
-	u64 objoff, objlen;    /* extent in object */
-	int r;
-
-	/* object extent? */
-	r = ceph_calc_file_object_mapping(layout, off, orig_len, bno,
-					  &objoff, &objlen);
-	if (r < 0)
-		return r;
-	if (objlen < orig_len) {
-		*plen = objlen;
-		dout(" skipping last %llu, final file extent %llu~%llu\n",
-		     orig_len - *plen, off, *plen);
-	}
-
-	if (op_has_extent(op->op)) {
-		u32 osize = le32_to_cpu(layout->fl_object_size);
-		op->extent.offset = objoff;
-		op->extent.length = objlen;
-		if (op->extent.truncate_size <= off - objoff) {
-			op->extent.truncate_size = 0;
-		} else {
-			op->extent.truncate_size -= off - objoff;
-			if (op->extent.truncate_size > osize)
-				op->extent.truncate_size = osize;
-		}
-	}
-	req->r_num_pages = calc_pages_for(off, *plen);
-	req->r_page_alignment = off & ~PAGE_MASK;
-	if (op->op == CEPH_OSD_OP_WRITE)
-		op->payload_len = *plen;
-
-	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
-	     *bno, objoff, objlen, req->r_num_pages);
-	return 0;
-}
-EXPORT_SYMBOL(ceph_calc_raw_layout);
-
 /*
  * Implement client access to distributed object storage cluster.
  *
@@ -112,12 +69,42 @@ static int calc_layout(struct ceph_vino vino,
 		       struct ceph_osd_request *req,
 		       struct ceph_osd_req_op *op)
 {
-	u64 bno;
+	u64 orig_len = *plen;
+	u64 bno = 0;
+	u64 objoff = 0;
+	u64 objlen = 0;
 	int r;
 
-	r = ceph_calc_raw_layout(layout, off, plen, &bno, req, op);
+	/* object extent? */
+	r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno,
+					  &objoff, &objlen);
 	if (r < 0)
 		return r;
+	if (objlen < orig_len) {
+		*plen = objlen;
+		dout(" skipping last %llu, final file extent %llu~%llu\n",
+		     orig_len - *plen, off, *plen);
+	}
+
+	if (op_has_extent(op->op)) {
+		u32 osize = le32_to_cpu(layout->fl_object_size);
+		op->extent.offset = objoff;
+		op->extent.length = objlen;
+		if (op->extent.truncate_size <= off - objoff) {
+			op->extent.truncate_size = 0;
+		} else {
+			op->extent.truncate_size -= off - objoff;
+			if (op->extent.truncate_size > osize)
+				op->extent.truncate_size = osize;
+		}
+	}
+	req->r_num_pages = calc_pages_for(off, *plen);
+	req->r_page_alignment = off & ~PAGE_MASK;
+	if (op->op == CEPH_OSD_OP_WRITE)
+		op->payload_len = *plen;
+
+	dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
+	     bno, objoff, objlen, req->r_num_pages);
 
 	snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
 	req->r_oid_len = strlen(req->r_oid);
-- 
cgit v1.2.3


From 3c663bbdcdf9296e0fe3362acb9e81f49d7b72c6 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:30 -0600
Subject: libceph: kill ceph_osdc_create_event() "one_shot" parameter

There is only one caller of ceph_osdc_create_event(), and it
provides 0 as its "one_shot" argument.  Get rid of that argument and
just use 0 in its place.

Replace the code in handle_watch_notify() that executes if one_shot
is nonzero in the event with a BUG_ON() call.

While modifying "osd_client.c", give handle_watch_notify() static
scope.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  2 +-
 include/linux/ceph/osd_client.h |  3 +--
 net/ceph/osd_client.c           | 11 +++++------
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 982963ec2607..13ee789f2328 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1744,7 +1744,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start)
 	rbd_assert(start ^ !!rbd_dev->watch_request);
 
 	if (start) {
-		ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, rbd_dev,
+		ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
 						&rbd_dev->watch_event);
 		if (ret < 0)
 			return ret;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index c39e7ed4b203..39c55d61e159 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -273,8 +273,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 /* watch/notify events */
 extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
 				  void (*event_cb)(u64, u64, u8, void *),
-				  int one_shot, void *data,
-				  struct ceph_osd_event **pevent);
+				  void *data, struct ceph_osd_event **pevent);
 extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
 extern int ceph_osdc_wait_event(struct ceph_osd_event *event,
 				unsigned long timeout);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index cd3a489b7438..4322faa7c811 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1477,8 +1477,7 @@ static void __remove_event(struct ceph_osd_event *event)
 
 int ceph_osdc_create_event(struct ceph_osd_client *osdc,
 			   void (*event_cb)(u64, u64, u8, void *),
-			   int one_shot, void *data,
-			   struct ceph_osd_event **pevent)
+			   void *data, struct ceph_osd_event **pevent)
 {
 	struct ceph_osd_event *event;
 
@@ -1488,7 +1487,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc,
 
 	dout("create_event %p\n", event);
 	event->cb = event_cb;
-	event->one_shot = one_shot;
+	event->one_shot = 0;
 	event->data = data;
 	event->osdc = osdc;
 	INIT_LIST_HEAD(&event->osd_node);
@@ -1541,7 +1540,8 @@ static void do_event_work(struct work_struct *work)
 /*
  * Process osd watch notifications
  */
-void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+static void handle_watch_notify(struct ceph_osd_client *osdc,
+				struct ceph_msg *msg)
 {
 	void *p, *end;
 	u8 proto_ver;
@@ -1562,9 +1562,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 	spin_lock(&osdc->event_lock);
 	event = __find_event(osdc, cookie);
 	if (event) {
+		BUG_ON(event->one_shot);
 		get_event(event);
-		if (event->one_shot)
-			__remove_event(event);
 	}
 	spin_unlock(&osdc->event_lock);
 	dout("handle_watch_notify cookie %lld ver %lld event %p\n",
-- 
cgit v1.2.3


From 2d2f522699fe8b827087941eb31b9a12cf465f17 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:30 -0600
Subject: libceph: kill ceph_osdc_wait_event()

There are no actual users of ceph_osdc_wait_event().  This would
have been one-shot events, but we no longer support those so just
get rid of this function.

Since this leaves nothing else that waits for the completion of an
event, we can get rid of the completion in a struct ceph_osd_event.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osd_client.h |  3 ---
 net/ceph/osd_client.c           | 18 ------------------
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 39c55d61e159..388158ff0cbc 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -107,7 +107,6 @@ struct ceph_osd_event {
 	struct rb_node node;
 	struct list_head osd_node;
 	struct kref kref;
-	struct completion completion;
 };
 
 struct ceph_osd_event_work {
@@ -275,8 +274,6 @@ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
 				  void (*event_cb)(u64, u64, u8, void *),
 				  void *data, struct ceph_osd_event **pevent);
 extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
-extern int ceph_osdc_wait_event(struct ceph_osd_event *event,
-				unsigned long timeout);
 extern void ceph_osdc_put_event(struct ceph_osd_event *event);
 #endif
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4322faa7c811..ad6b8b35f5ca 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1494,7 +1494,6 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc,
 	RB_CLEAR_NODE(&event->node);
 	kref_init(&event->kref);   /* one ref for us */
 	kref_get(&event->kref);    /* one ref for the caller */
-	init_completion(&event->completion);
 
 	spin_lock(&osdc->event_lock);
 	event->cookie = ++osdc->event_count;
@@ -1530,7 +1529,6 @@ static void do_event_work(struct work_struct *work)
 
 	dout("do_event_work completing %p\n", event);
 	event->cb(ver, notify_id, opcode, event->data);
-	complete(&event->completion);
 	dout("do_event_work completed %p\n", event);
 	ceph_osdc_put_event(event);
 	kfree(event_work);
@@ -1588,7 +1586,6 @@ static void handle_watch_notify(struct ceph_osd_client *osdc,
 	return;
 
 done_err:
-	complete(&event->completion);
 	ceph_osdc_put_event(event);
 	return;
 
@@ -1597,21 +1594,6 @@ bad:
 	return;
 }
 
-int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
-{
-	int err;
-
-	dout("wait_event %p\n", event);
-	err = wait_for_completion_interruptible_timeout(&event->completion,
-							timeout * HZ);
-	ceph_osdc_put_event(event);
-	if (err > 0)
-		err = 0;
-	dout("wait_event %p returns %d\n", event, err);
-	return err;
-}
-EXPORT_SYMBOL(ceph_osdc_wait_event);
-
 /*
  * Register request, send initial attempt.
  */
-- 
cgit v1.2.3


From 0315a7770983bbe69211efed1aaee08324acd54c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:30 -0600
Subject: libceph: update rados.h

Update most of "include/linux/ceph/rados.h" to match its user space
counterpart in "src/include/rados.h" in the ceph tree.

Almost everything that has changed is either:
    - added or revised comments
    - added definitions (therefore no real effect on existing code)
    - defining the same value a different way (e.g., "1 << 0" vs "1")

The only exceptions are:
    - The declaration of ceph_osd_state_name() was excluded; that
      will be inserted in the next patch.
    - ceph_osd_op_mode_read() and ceph_osd_op_mode_modify() are
      defined differently, but they were never used in the kernel
    - CEPH_OSD_FLAG_PEERSTAT is now CEPH_OSD_FLAG_PEERSTAT_OLD, but
      that was never used in the kernel

Anything that was present in this file but not in its user space
counterpart was left intact here.  I left the definitions of
EOLDSNAPC and EBLACKLISTED using numerical values here; I'm
not sure the right way to go with those.

This and the next two commits resolve:
    http://tracker.ceph.com/issues/4164

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/rados.h | 91 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 71 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 2c04afeead1c..9c3b4aaf516b 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -145,8 +145,10 @@ struct ceph_eversion {
  */
 
 /* status bits */
-#define CEPH_OSD_EXISTS 1
-#define CEPH_OSD_UP     2
+#define CEPH_OSD_EXISTS  (1<<0)
+#define CEPH_OSD_UP      (1<<1)
+#define CEPH_OSD_AUTOOUT (1<<2)  /* osd was automatically marked out */
+#define CEPH_OSD_NEW     (1<<3)  /* osd is new, never marked in */
 
 /* osd weights.  fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
 #define CEPH_OSD_IN  0x10000
@@ -161,9 +163,25 @@ struct ceph_eversion {
 #define CEPH_OSDMAP_PAUSERD  (1<<2)  /* pause all reads */
 #define CEPH_OSDMAP_PAUSEWR  (1<<3)  /* pause all writes */
 #define CEPH_OSDMAP_PAUSEREC (1<<4)  /* pause recovery */
+#define CEPH_OSDMAP_NOUP     (1<<5)  /* block osd boot */
+#define CEPH_OSDMAP_NODOWN   (1<<6)  /* block osd mark-down/failure */
+#define CEPH_OSDMAP_NOOUT    (1<<7)  /* block osd auto mark-out */
+#define CEPH_OSDMAP_NOIN     (1<<8)  /* block osd auto mark-in */
+#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
+#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
+
+/*
+ * The error code to return when an OSD can't handle a write
+ * because it is too large.
+ */
+#define OSD_WRITETOOBIG EMSGSIZE
 
 /*
  * osd ops
+ *
+ * WARNING: do not use these op codes directly.  Use the helpers
+ * defined below instead.  In certain cases, op code behavior was
+ * redefined, resulting in special-cases in the helpers.
  */
 #define CEPH_OSD_OP_MODE       0xf000
 #define CEPH_OSD_OP_MODE_RD    0x1000
@@ -177,6 +195,7 @@ struct ceph_eversion {
 #define CEPH_OSD_OP_TYPE_ATTR  0x0300
 #define CEPH_OSD_OP_TYPE_EXEC  0x0400
 #define CEPH_OSD_OP_TYPE_PG    0x0500
+#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
 
 enum {
 	/** data **/
@@ -217,6 +236,23 @@ enum {
 
 	CEPH_OSD_OP_WATCH   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
 
+	/* omap */
+	CEPH_OSD_OP_OMAPGETKEYS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
+	CEPH_OSD_OP_OMAPGETVALS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
+	CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19,
+	CEPH_OSD_OP_OMAPGETVALSBYKEYS  =
+	  CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20,
+	CEPH_OSD_OP_OMAPSETVALS   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21,
+	CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22,
+	CEPH_OSD_OP_OMAPCLEAR     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23,
+	CEPH_OSD_OP_OMAPRMKEYS    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
+	CEPH_OSD_OP_OMAP_CMP      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
+
+	/** multi **/
+	CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
+	CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
+	CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3,
+
 	/** attrs **/
 	/* read */
 	CEPH_OSD_OP_GETXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
@@ -238,6 +274,7 @@ enum {
 	CEPH_OSD_OP_SCRUB_RESERVE   = CEPH_OSD_OP_MODE_SUB | 6,
 	CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
 	CEPH_OSD_OP_SCRUB_STOP      = CEPH_OSD_OP_MODE_SUB | 8,
+	CEPH_OSD_OP_SCRUB_MAP     = CEPH_OSD_OP_MODE_SUB | 9,
 
 	/** lock **/
 	CEPH_OSD_OP_WRLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
@@ -248,10 +285,12 @@ enum {
 	CEPH_OSD_OP_DNLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
 
 	/** exec **/
+	/* note: the RD bit here is wrong; see special-case below in helper */
 	CEPH_OSD_OP_CALL    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
 
 	/** pg **/
 	CEPH_OSD_OP_PGLS      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
+	CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2,
 };
 
 static inline int ceph_osd_op_type_lock(int op)
@@ -274,6 +313,10 @@ static inline int ceph_osd_op_type_pg(int op)
 {
 	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
 }
+static inline int ceph_osd_op_type_multi(int op)
+{
+	return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI;
+}
 
 static inline int ceph_osd_op_mode_subop(int op)
 {
@@ -281,11 +324,12 @@ static inline int ceph_osd_op_mode_subop(int op)
 }
 static inline int ceph_osd_op_mode_read(int op)
 {
-	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD;
+	return (op & CEPH_OSD_OP_MODE_RD) &&
+		op != CEPH_OSD_OP_CALL;
 }
 static inline int ceph_osd_op_mode_modify(int op)
 {
-	return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR;
+	return op & CEPH_OSD_OP_MODE_WR;
 }
 
 /*
@@ -294,34 +338,38 @@ static inline int ceph_osd_op_mode_modify(int op)
  */
 #define CEPH_OSD_TMAP_HDR 'h'
 #define CEPH_OSD_TMAP_SET 's'
+#define CEPH_OSD_TMAP_CREATE 'c' /* create key */
 #define CEPH_OSD_TMAP_RM  'r'
+#define CEPH_OSD_TMAP_RMSLOPPY 'R'
 
 extern const char *ceph_osd_op_name(int op);
 
-
 /*
  * osd op flags
  *
  * An op may be READ, WRITE, or READ|WRITE.
  */
 enum {
-	CEPH_OSD_FLAG_ACK = 1,          /* want (or is) "ack" ack */
-	CEPH_OSD_FLAG_ONNVRAM = 2,      /* want (or is) "onnvram" ack */
-	CEPH_OSD_FLAG_ONDISK = 4,       /* want (or is) "ondisk" ack */
-	CEPH_OSD_FLAG_RETRY = 8,        /* resend attempt */
-	CEPH_OSD_FLAG_READ = 16,        /* op may read */
-	CEPH_OSD_FLAG_WRITE = 32,       /* op may write */
-	CEPH_OSD_FLAG_ORDERSNAP = 64,   /* EOLDSNAP if snapc is out of order */
-	CEPH_OSD_FLAG_PEERSTAT = 128,   /* msg includes osd_peer_stat */
-	CEPH_OSD_FLAG_BALANCE_READS = 256,
-	CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */
-	CEPH_OSD_FLAG_PGOP = 1024,      /* pg op, no object */
-	CEPH_OSD_FLAG_EXEC = 2048,      /* op may exec */
-	CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */
+	CEPH_OSD_FLAG_ACK =            0x0001,  /* want (or is) "ack" ack */
+	CEPH_OSD_FLAG_ONNVRAM =        0x0002,  /* want (or is) "onnvram" ack */
+	CEPH_OSD_FLAG_ONDISK =         0x0004,  /* want (or is) "ondisk" ack */
+	CEPH_OSD_FLAG_RETRY =          0x0008,  /* resend attempt */
+	CEPH_OSD_FLAG_READ =           0x0010,  /* op may read */
+	CEPH_OSD_FLAG_WRITE =          0x0020,  /* op may write */
+	CEPH_OSD_FLAG_ORDERSNAP =      0x0040,  /* EOLDSNAP if snapc is out of order */
+	CEPH_OSD_FLAG_PEERSTAT_OLD =   0x0080,  /* DEPRECATED msg includes osd_peer_stat */
+	CEPH_OSD_FLAG_BALANCE_READS =  0x0100,
+	CEPH_OSD_FLAG_PARALLELEXEC =   0x0200,  /* execute op in parallel */
+	CEPH_OSD_FLAG_PGOP =           0x0400,  /* pg op, no object */
+	CEPH_OSD_FLAG_EXEC =           0x0800,  /* op may exec */
+	CEPH_OSD_FLAG_EXEC_PUBLIC =    0x1000,  /* DEPRECATED op may exec (public) */
+	CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000,  /* read from nearby replica, if any */
+	CEPH_OSD_FLAG_RWORDERED =      0x4000,  /* order wrt concurrent reads */
 };
 
 enum {
 	CEPH_OSD_OP_FLAG_EXCL = 1,      /* EXCL object create */
+	CEPH_OSD_OP_FLAG_FAILOK = 2,    /* continue despite failure */
 };
 
 #define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
@@ -381,7 +429,11 @@ struct ceph_osd_op {
 			__le64 ver;
 			__u8 flag;	/* 0 = unwatch, 1 = watch */
 		} __attribute__ ((packed)) watch;
-};
+		struct {
+			__le64 offset, length;
+			__le64 src_offset;
+		} __attribute__ ((packed)) clonerange;
+	};
 	__le32 payload_len;
 } __attribute__ ((packed));
 
@@ -424,5 +476,4 @@ struct ceph_osd_reply_head {
 } __attribute__ ((packed));
 
 
-
 #endif
-- 
cgit v1.2.3


From 4b568b1aaf23d0ce64b98d01d5ad1bcc7694440a Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:30 -0600
Subject: libceph: add ceph_osd_state_name()

Add the definition of ceph_osd_state_name(), to match its
counterpart in user space.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/rados.h |  2 ++
 net/ceph/ceph_strings.c    | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 9c3b4aaf516b..b65182aba6f7 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -150,6 +150,8 @@ struct ceph_eversion {
 #define CEPH_OSD_AUTOOUT (1<<2)  /* osd was automatically marked out */
 #define CEPH_OSD_NEW     (1<<3)  /* osd is new, never marked in */
 
+extern const char *ceph_osd_state_name(int s);
+
 /* osd weights.  fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
 #define CEPH_OSD_IN  0x10000
 #define CEPH_OSD_OUT 0
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 3fbda04de29c..833075cff4e8 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -68,6 +68,21 @@ const char *ceph_osd_op_name(int op)
 	return "???";
 }
 
+const char *ceph_osd_state_name(int s)
+{
+	switch (s) {
+	case CEPH_OSD_EXISTS:
+		return "exists";
+	case CEPH_OSD_UP:
+		return "up";
+	case CEPH_OSD_AUTOOUT:
+		return "autoout";
+	case CEPH_OSD_NEW:
+		return "new";
+	default:
+		return "???";
+	}
+}
 
 const char *ceph_pool_op_name(int op)
 {
-- 
cgit v1.2.3


From dd6f5e105d85e02bc41db0891eb07152b1746ad9 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 15 Feb 2013 11:42:30 -0600
Subject: libceph: update ceph_fs.h

Update most of "include/linux/ceph/ceph_fs.h" to match its user
space counterpart in "src/include/ceph_fs.h" in the ceph tree.

Everything that has changed is either:
    - added definitions (therefore no real effect on existing code)
    - deleting unused symbols
    - added or revised comments

There were some differences between the struct definitions for
ceph_mon_subscribe_item and the open field of ceph_mds_request_args;
those differences remain.

This and the next commit resolve:
    http://tracker.ceph.com/issues/4165

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/ceph_fs.h | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index cf6f4d998a76..2ad7b860f062 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -21,16 +21,14 @@
  * internal cluster protocols separately from the public,
  * client-facing protocol.
  */
-#define CEPH_OSD_PROTOCOL     8 /* cluster internal */
-#define CEPH_MDS_PROTOCOL    12 /* cluster internal */
-#define CEPH_MON_PROTOCOL     5 /* cluster internal */
 #define CEPH_OSDC_PROTOCOL   24 /* server/client */
 #define CEPH_MDSC_PROTOCOL   32 /* server/client */
 #define CEPH_MONC_PROTOCOL   15 /* server/client */
 
 
-#define CEPH_INO_ROOT  1
-#define CEPH_INO_CEPH  2        /* hidden .ceph dir */
+#define CEPH_INO_ROOT   1
+#define CEPH_INO_CEPH   2       /* hidden .ceph dir */
+#define CEPH_INO_DOTDOT 3	/* used by ceph fuse for parent (..) */
 
 /* arbitrary limit on max # of monitors (cluster of 3 is typical) */
 #define CEPH_MAX_MON   31
@@ -51,7 +49,7 @@ struct ceph_file_layout {
 	__le32 fl_object_stripe_unit;  /* UNUSED.  for per-object parity, if any */
 
 	/* object -> pg layout */
-	__le32 fl_unused;       /* unused; used to be preferred primary (-1) */
+	__le32 fl_unused;       /* unused; used to be preferred primary for pg (-1 for none) */
 	__le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
 } __attribute__ ((packed));
 
@@ -101,6 +99,8 @@ struct ceph_dir_layout {
 #define CEPH_MSG_MON_SUBSCRIBE_ACK      16
 #define CEPH_MSG_AUTH			17
 #define CEPH_MSG_AUTH_REPLY		18
+#define CEPH_MSG_MON_GET_VERSION        19
+#define CEPH_MSG_MON_GET_VERSION_REPLY  20
 
 /* client <-> mds */
 #define CEPH_MSG_MDS_MAP                21
@@ -220,6 +220,11 @@ struct ceph_mon_subscribe_ack {
 	struct ceph_fsid fsid;
 } __attribute__ ((packed));
 
+/*
+ * mdsmap flags
+ */
+#define CEPH_MDSMAP_DOWN    (1<<0)  /* cluster deliberately down */
+
 /*
  * mds states
  *   > 0 -> in
@@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack {
 #define CEPH_MDS_STATE_CREATING    -6  /* up, creating MDS instance. */
 #define CEPH_MDS_STATE_STARTING    -7  /* up, starting previously stopped mds */
 #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
+#define CEPH_MDS_STATE_REPLAYONCE   -9 /* up, replaying an active node's journal */
 
 #define CEPH_MDS_STATE_REPLAY       8  /* up, replaying journal. */
 #define CEPH_MDS_STATE_RESOLVE      9  /* up, disambiguating distributed
@@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s);
 #define CEPH_LOCK_IXATTR      2048
 #define CEPH_LOCK_IFLOCK      4096  /* advisory file locks */
 #define CEPH_LOCK_INO         8192  /* immutable inode bits; not a lock */
+#define CEPH_LOCK_IPOLICY     16384 /* policy lock on dirs. MDS internal */
 
 /* client_session ops */
 enum {
@@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op);
 #define CEPH_SETATTR_SIZE  32
 #define CEPH_SETATTR_CTIME 64
 
+/*
+ * Ceph setxattr request flags.
+ */
+#define CEPH_XATTR_CREATE  1
+#define CEPH_XATTR_REPLACE 2
+
 union ceph_mds_request_args {
 	struct {
 		__le32 mask;                 /* CEPH_CAP_* */
@@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags);
 #define CEPH_CAP_GWREXTEND  64  /* (file) client can extend EOF */
 #define CEPH_CAP_GLAZYIO   128  /* (file) client can perform lazy io */
 
+#define CEPH_CAP_SIMPLE_BITS  2
+#define CEPH_CAP_FILE_BITS    8
+
 /* per-lock shift */
 #define CEPH_CAP_SAUTH      2
 #define CEPH_CAP_SLINK      4
 #define CEPH_CAP_SXATTR     6
 #define CEPH_CAP_SFILE      8
-#define CEPH_CAP_SFLOCK    20 
+#define CEPH_CAP_SFLOCK    20
 
-#define CEPH_CAP_BITS       22
+#define CEPH_CAP_BITS      22
 
 /* composed values */
 #define CEPH_CAP_AUTH_SHARED  (CEPH_CAP_GSHARED  << CEPH_CAP_SAUTH)
-- 
cgit v1.2.3


From b4278c961aca320839964e23cfc7906ff61af0c2 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Mon, 18 Feb 2013 01:34:55 +0000
Subject: net: proc: remove proc_net_fops_create

proc_net_fops_create has been replaced by proc_create,
we can remove it now.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_net.c      | 8 --------
 include/linux/proc_fs.h | 3 ---
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..30f7c678424b 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -177,14 +177,6 @@ const struct file_operations proc_net_operations = {
 	.readdir	= proc_tgid_net_readdir,
 };
 
-
-struct proc_dir_entry *proc_net_fops_create(struct net *net,
-	const char *name, umode_t mode, const struct file_operations *fops)
-{
-	return proc_create(name, mode, net->proc_net, fops);
-}
-EXPORT_SYMBOL_GPL(proc_net_fops_create);
-
 void proc_net_remove(struct net *net, const char *name)
 {
 	remove_proc_entry(name, net->proc_net);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 32676b35d2f5..35ee1891ae25 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -171,8 +171,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 	return res;
 }
  
-extern struct proc_dir_entry *proc_net_fops_create(struct net *net,
-	const char *name, umode_t mode, const struct file_operations *fops);
 extern void proc_net_remove(struct net *net, const char *name);
 extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 	struct proc_dir_entry *parent);
@@ -184,7 +182,6 @@ extern int proc_alloc_inum(unsigned int *pino);
 extern void proc_free_inum(unsigned int inum);
 #else
 
-#define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
 static inline void proc_net_remove(struct net *net, const char *name) {}
 
 static inline void proc_flush_task(struct task_struct *task)
-- 
cgit v1.2.3


From c2399059a389ba686fa7f45d8913a708914752c4 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Mon, 18 Feb 2013 01:34:57 +0000
Subject: net: proc: remove proc_net_remove

proc_net_remove has been replaced by remove_proc_entry.
we can remove it now.

Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_net.c      | 6 ------
 include/linux/proc_fs.h | 3 ---
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 30f7c678424b..3131a03d7d37 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -177,12 +177,6 @@ const struct file_operations proc_net_operations = {
 	.readdir	= proc_tgid_net_readdir,
 };
 
-void proc_net_remove(struct net *net, const char *name)
-{
-	remove_proc_entry(name, net->proc_net);
-}
-EXPORT_SYMBOL_GPL(proc_net_remove);
-
 static __net_init int proc_net_ns_init(struct net *net)
 {
 	struct proc_dir_entry *netd, *net_statd;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 35ee1891ae25..319f69422667 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -171,7 +171,6 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 	return res;
 }
  
-extern void proc_net_remove(struct net *net, const char *name);
 extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
 	struct proc_dir_entry *parent);
 
@@ -182,8 +181,6 @@ extern int proc_alloc_inum(unsigned int *pino);
 extern void proc_free_inum(unsigned int inum);
 #else
 
-static inline void proc_net_remove(struct net *net, const char *name) {}
-
 static inline void proc_flush_task(struct task_struct *task)
 {
 }
-- 
cgit v1.2.3


From 900ff8c6321418dafa03c22e215cb9646a2541b9 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 18 Feb 2013 19:20:33 +0000
Subject: net: move procfs code to net/core/net-procfs.c

Similar to net/core/net-sysfs.c, group procfs code to
a single unit.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  36 +++-
 net/core/Makefile         |   1 +
 net/core/dev.c            | 384 +-----------------------------------------
 net/core/dev_addr_lists.c |  74 ---------
 net/core/net-procfs.c     | 414 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 450 insertions(+), 459 deletions(-)
 create mode 100644 net/core/net-procfs.c

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 920361bc27e7..f111b4f038f3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2692,9 +2692,9 @@ extern void		net_enable_timestamp(void);
 extern void		net_disable_timestamp(void);
 
 #ifdef CONFIG_PROC_FS
-extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
-extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
-extern void dev_seq_stop(struct seq_file *seq, void *v);
+extern int __init dev_proc_init(void);
+#else
+#define dev_proc_init() 0
 #endif
 
 extern int netdev_class_create_file(struct class_attribute *class_attr);
@@ -2896,4 +2896,34 @@ do {								\
 })
 #endif
 
+/*
+ *	The list of packet types we will receive (as opposed to discard)
+ *	and the routines to invoke.
+ *
+ *	Why 16. Because with 16 the only overlap we get on a hash of the
+ *	low nibble of the protocol value is RARP/SNAP/X.25.
+ *
+ *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
+ *             sure which should go first, but I bet it won't make much
+ *             difference if we are running VLANs.  The good news is that
+ *             this protocol won't be in the list unless compiled in, so
+ *             the average user (w/out VLANs) will not be adversely affected.
+ *             --BLG
+ *
+ *		0800	IP
+ *		8100    802.1Q VLAN
+ *		0001	802.3
+ *		0002	AX.25
+ *		0004	802.2
+ *		8035	RARP
+ *		0005	SNAP
+ *		0805	X.25
+ *		0806	ARP
+ *		8137	IPX
+ *		0009	Localtalk
+ *		86DD	IPv6
+ */
+#define PTYPE_HASH_SIZE	(16)
+#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
+
 #endif	/* _LINUX_NETDEVICE_H */
diff --git a/net/core/Makefile b/net/core/Makefile
index 0c5e3618c80b..b33b996f5dd6 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,6 +13,7 @@ obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
+obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/dev.c b/net/core/dev.c
index decf55f9ad80..8d9ddb09f208 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -97,8 +97,6 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
 #include <linux/stat.h>
 #include <net/dst.h>
 #include <net/pkt_sched.h>
@@ -110,7 +108,6 @@
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
-#include <net/wext.h>
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
@@ -141,41 +138,10 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
-/*
- *	The list of packet types we will receive (as opposed to discard)
- *	and the routines to invoke.
- *
- *	Why 16. Because with 16 the only overlap we get on a hash of the
- *	low nibble of the protocol value is RARP/SNAP/X.25.
- *
- *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
- *             sure which should go first, but I bet it won't make much
- *             difference if we are running VLANs.  The good news is that
- *             this protocol won't be in the list unless compiled in, so
- *             the average user (w/out VLANs) will not be adversely affected.
- *             --BLG
- *
- *		0800	IP
- *		8100    802.1Q VLAN
- *		0001	802.3
- *		0002	AX.25
- *		0004	802.2
- *		8035	RARP
- *		0005	SNAP
- *		0805	X.25
- *		0806	ARP
- *		8137	IPX
- *		0009	Localtalk
- *		86DD	IPv6
- */
-
-#define PTYPE_HASH_SIZE	(16)
-#define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
-
 static DEFINE_SPINLOCK(ptype_lock);
 static DEFINE_SPINLOCK(offload_lock);
-static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-static struct list_head ptype_all __read_mostly;	/* Taps */
+struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
 
 /*
@@ -4217,352 +4183,6 @@ softnet_break:
 	goto out;
 }
 
-#ifdef CONFIG_PROC_FS
-
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
-
-#define get_bucket(x) ((x) >> BUCKET_SPACE)
-#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
-#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
-{
-	struct net *net = seq_file_net(seq);
-	struct net_device *dev;
-	struct hlist_node *p;
-	struct hlist_head *h;
-	unsigned int count = 0, offset = get_offset(*pos);
-
-	h = &net->dev_name_head[get_bucket(*pos)];
-	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
-		if (++count == offset)
-			return dev;
-	}
-
-	return NULL;
-}
-
-static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
-{
-	struct net_device *dev;
-	unsigned int bucket;
-
-	do {
-		dev = dev_from_same_bucket(seq, pos);
-		if (dev)
-			return dev;
-
-		bucket = get_bucket(*pos) + 1;
-		*pos = set_bucket_offset(bucket, 1);
-	} while (bucket < NETDEV_HASHENTRIES);
-
-	return NULL;
-}
-
-/*
- *	This is invoked by the /proc filesystem handler to display a device
- *	in detail.
- */
-void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	rcu_read_lock();
-	if (!*pos)
-		return SEQ_START_TOKEN;
-
-	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
-		return NULL;
-
-	return dev_from_bucket(seq, pos);
-}
-
-void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return dev_from_bucket(seq, pos);
-}
-
-void dev_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
-{
-	struct rtnl_link_stats64 temp;
-	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
-
-	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
-		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
-		   dev->name, stats->rx_bytes, stats->rx_packets,
-		   stats->rx_errors,
-		   stats->rx_dropped + stats->rx_missed_errors,
-		   stats->rx_fifo_errors,
-		   stats->rx_length_errors + stats->rx_over_errors +
-		    stats->rx_crc_errors + stats->rx_frame_errors,
-		   stats->rx_compressed, stats->multicast,
-		   stats->tx_bytes, stats->tx_packets,
-		   stats->tx_errors, stats->tx_dropped,
-		   stats->tx_fifo_errors, stats->collisions,
-		   stats->tx_carrier_errors +
-		    stats->tx_aborted_errors +
-		    stats->tx_window_errors +
-		    stats->tx_heartbeat_errors,
-		   stats->tx_compressed);
-}
-
-/*
- *	Called from the PROCfs module. This now uses the new arbitrary sized
- *	/proc/net interface to create /proc/net/dev
- */
-static int dev_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "Inter-|   Receive                            "
-			      "                    |  Transmit\n"
-			      " face |bytes    packets errs drop fifo frame "
-			      "compressed multicast|bytes    packets errs "
-			      "drop fifo colls carrier compressed\n");
-	else
-		dev_seq_printf_stats(seq, v);
-	return 0;
-}
-
-static struct softnet_data *softnet_get_online(loff_t *pos)
-{
-	struct softnet_data *sd = NULL;
-
-	while (*pos < nr_cpu_ids)
-		if (cpu_online(*pos)) {
-			sd = &per_cpu(softnet_data, *pos);
-			break;
-		} else
-			++*pos;
-	return sd;
-}
-
-static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	return softnet_get_online(pos);
-}
-
-static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return softnet_get_online(pos);
-}
-
-static void softnet_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int softnet_seq_show(struct seq_file *seq, void *v)
-{
-	struct softnet_data *sd = v;
-
-	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		   sd->processed, sd->dropped, sd->time_squeeze, 0,
-		   0, 0, 0, 0, /* was fastroute */
-		   sd->cpu_collision, sd->received_rps);
-	return 0;
-}
-
-static const struct seq_operations dev_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_seq_show,
-};
-
-static int dev_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-static const struct seq_operations softnet_seq_ops = {
-	.start = softnet_seq_start,
-	.next  = softnet_seq_next,
-	.stop  = softnet_seq_stop,
-	.show  = softnet_seq_show,
-};
-
-static int softnet_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &softnet_seq_ops);
-}
-
-static const struct file_operations softnet_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = softnet_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-static void *ptype_get_idx(loff_t pos)
-{
-	struct packet_type *pt = NULL;
-	loff_t i = 0;
-	int t;
-
-	list_for_each_entry_rcu(pt, &ptype_all, list) {
-		if (i == pos)
-			return pt;
-		++i;
-	}
-
-	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
-		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
-			if (i == pos)
-				return pt;
-			++i;
-		}
-	}
-	return NULL;
-}
-
-static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	rcu_read_lock();
-	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct packet_type *pt;
-	struct list_head *nxt;
-	int hash;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ptype_get_idx(0);
-
-	pt = v;
-	nxt = pt->list.next;
-	if (pt->type == htons(ETH_P_ALL)) {
-		if (nxt != &ptype_all)
-			goto found;
-		hash = 0;
-		nxt = ptype_base[0].next;
-	} else
-		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
-
-	while (nxt == &ptype_base[hash]) {
-		if (++hash >= PTYPE_HASH_SIZE)
-			return NULL;
-		nxt = ptype_base[hash].next;
-	}
-found:
-	return list_entry(nxt, struct packet_type, list);
-}
-
-static void ptype_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static int ptype_seq_show(struct seq_file *seq, void *v)
-{
-	struct packet_type *pt = v;
-
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "Type Device      Function\n");
-	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
-		if (pt->type == htons(ETH_P_ALL))
-			seq_puts(seq, "ALL ");
-		else
-			seq_printf(seq, "%04x", ntohs(pt->type));
-
-		seq_printf(seq, " %-8s %pF\n",
-			   pt->dev ? pt->dev->name : "", pt->func);
-	}
-
-	return 0;
-}
-
-static const struct seq_operations ptype_seq_ops = {
-	.start = ptype_seq_start,
-	.next  = ptype_seq_next,
-	.stop  = ptype_seq_stop,
-	.show  = ptype_seq_show,
-};
-
-static int ptype_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ptype_seq_ops,
-			sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ptype_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = ptype_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-
-static int __net_init dev_proc_net_init(struct net *net)
-{
-	int rc = -ENOMEM;
-
-	if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
-		goto out;
-	if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
-			 &softnet_seq_fops))
-		goto out_dev;
-	if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
-		goto out_softnet;
-
-	if (wext_proc_init(net))
-		goto out_ptype;
-	rc = 0;
-out:
-	return rc;
-out_ptype:
-	remove_proc_entry("ptype", net->proc_net);
-out_softnet:
-	remove_proc_entry("softnet_stat", net->proc_net);
-out_dev:
-	remove_proc_entry("dev", net->proc_net);
-	goto out;
-}
-
-static void __net_exit dev_proc_net_exit(struct net *net)
-{
-	wext_proc_exit(net);
-
-	remove_proc_entry("ptype", net->proc_net);
-	remove_proc_entry("softnet_stat", net->proc_net);
-	remove_proc_entry("dev", net->proc_net);
-}
-
-static struct pernet_operations __net_initdata dev_proc_ops = {
-	.init = dev_proc_net_init,
-	.exit = dev_proc_net_exit,
-};
-
-static int __init dev_proc_init(void)
-{
-	return register_pernet_subsys(&dev_proc_ops);
-}
-#else
-#define dev_proc_init() 0
-#endif	/* CONFIG_PROC_FS */
-
-
 struct netdev_upper {
 	struct net_device *dev;
 	bool master;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 89562529df45..bd2eb9d3e369 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -15,7 +15,6 @@
 #include <linux/rtnetlink.h>
 #include <linux/export.h>
 #include <linux/list.h>
-#include <linux/proc_fs.h>
 
 /*
  * General list handling functions
@@ -727,76 +726,3 @@ void dev_mc_init(struct net_device *dev)
 	__hw_addr_init(&dev->mc);
 }
 EXPORT_SYMBOL(dev_mc_init);
-
-#ifdef CONFIG_PROC_FS
-#include <linux/seq_file.h>
-
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-	struct netdev_hw_addr *ha;
-	struct net_device *dev = v;
-
-	if (v == SEQ_START_TOKEN)
-		return 0;
-
-	netif_addr_lock_bh(dev);
-	netdev_for_each_mc_addr(ha, dev) {
-		int i;
-
-		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-			   dev->name, ha->refcount, ha->global_use);
-
-		for (i = 0; i < dev->addr_len; i++)
-			seq_printf(seq, "%02x", ha->addr[i]);
-
-		seq_putc(seq, '\n');
-	}
-	netif_addr_unlock_bh(dev);
-	return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_mc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
-	if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-	remove_proc_entry("dev_mcast", net->proc_net);
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-	.init = dev_mc_net_init,
-	.exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
-	register_pernet_subsys(&dev_mc_net_ops);
-}
-
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
new file mode 100644
index 000000000000..ac87066491e9
--- /dev/null
+++ b/net/core/net-procfs.c
@@ -0,0 +1,414 @@
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/wext.h>
+
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
+
+extern struct list_head ptype_all __read_mostly;
+extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
+{
+	struct net *net = seq_file_net(seq);
+	struct net_device *dev;
+	struct hlist_node *p;
+	struct hlist_head *h;
+	unsigned int count = 0, offset = get_offset(*pos);
+
+	h = &net->dev_name_head[get_bucket(*pos)];
+	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+		if (++count == offset)
+			return dev;
+	}
+
+	return NULL;
+}
+
+static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
+{
+	struct net_device *dev;
+	unsigned int bucket;
+
+	do {
+		dev = dev_from_same_bucket(seq, pos);
+		if (dev)
+			return dev;
+
+		bucket = get_bucket(*pos) + 1;
+		*pos = set_bucket_offset(bucket, 1);
+	} while (bucket < NETDEV_HASHENTRIES);
+
+	return NULL;
+}
+
+/*
+ *	This is invoked by the /proc filesystem handler to display a device
+ *	in detail.
+ */
+static void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	if (!*pos)
+		return SEQ_START_TOKEN;
+
+	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
+		return NULL;
+
+	return dev_from_bucket(seq, pos);
+}
+
+static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return dev_from_bucket(seq, pos);
+}
+
+static void dev_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
+{
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
+	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+		   dev->name, stats->rx_bytes, stats->rx_packets,
+		   stats->rx_errors,
+		   stats->rx_dropped + stats->rx_missed_errors,
+		   stats->rx_fifo_errors,
+		   stats->rx_length_errors + stats->rx_over_errors +
+		    stats->rx_crc_errors + stats->rx_frame_errors,
+		   stats->rx_compressed, stats->multicast,
+		   stats->tx_bytes, stats->tx_packets,
+		   stats->tx_errors, stats->tx_dropped,
+		   stats->tx_fifo_errors, stats->collisions,
+		   stats->tx_carrier_errors +
+		    stats->tx_aborted_errors +
+		    stats->tx_window_errors +
+		    stats->tx_heartbeat_errors,
+		   stats->tx_compressed);
+}
+
+/*
+ *	Called from the PROCfs module. This now uses the new arbitrary sized
+ *	/proc/net interface to create /proc/net/dev
+ */
+static int dev_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Inter-|   Receive                            "
+			      "                    |  Transmit\n"
+			      " face |bytes    packets errs drop fifo frame "
+			      "compressed multicast|bytes    packets errs "
+			      "drop fifo colls carrier compressed\n");
+	else
+		dev_seq_printf_stats(seq, v);
+	return 0;
+}
+
+static struct softnet_data *softnet_get_online(loff_t *pos)
+{
+	struct softnet_data *sd = NULL;
+
+	while (*pos < nr_cpu_ids)
+		if (cpu_online(*pos)) {
+			sd = &per_cpu(softnet_data, *pos);
+			break;
+		} else
+			++*pos;
+	return sd;
+}
+
+static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return softnet_get_online(pos);
+}
+
+static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	++*pos;
+	return softnet_get_online(pos);
+}
+
+static void softnet_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int softnet_seq_show(struct seq_file *seq, void *v)
+{
+	struct softnet_data *sd = v;
+
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+		   sd->processed, sd->dropped, sd->time_squeeze, 0,
+		   0, 0, 0, 0, /* was fastroute */
+		   sd->cpu_collision, sd->received_rps);
+	return 0;
+}
+
+static const struct seq_operations dev_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_seq_show,
+};
+
+static int dev_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+static const struct seq_operations softnet_seq_ops = {
+	.start = softnet_seq_start,
+	.next  = softnet_seq_next,
+	.stop  = softnet_seq_stop,
+	.show  = softnet_seq_show,
+};
+
+static int softnet_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &softnet_seq_ops);
+}
+
+static const struct file_operations softnet_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = softnet_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+static void *ptype_get_idx(loff_t pos)
+{
+	struct packet_type *pt = NULL;
+	loff_t i = 0;
+	int t;
+
+	list_for_each_entry_rcu(pt, &ptype_all, list) {
+		if (i == pos)
+			return pt;
+		++i;
+	}
+
+	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
+		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+			if (i == pos)
+				return pt;
+			++i;
+		}
+	}
+	return NULL;
+}
+
+static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct packet_type *pt;
+	struct list_head *nxt;
+	int hash;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ptype_get_idx(0);
+
+	pt = v;
+	nxt = pt->list.next;
+	if (pt->type == htons(ETH_P_ALL)) {
+		if (nxt != &ptype_all)
+			goto found;
+		hash = 0;
+		nxt = ptype_base[0].next;
+	} else
+		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
+
+	while (nxt == &ptype_base[hash]) {
+		if (++hash >= PTYPE_HASH_SIZE)
+			return NULL;
+		nxt = ptype_base[hash].next;
+	}
+found:
+	return list_entry(nxt, struct packet_type, list);
+}
+
+static void ptype_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static int ptype_seq_show(struct seq_file *seq, void *v)
+{
+	struct packet_type *pt = v;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Type Device      Function\n");
+	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+		if (pt->type == htons(ETH_P_ALL))
+			seq_puts(seq, "ALL ");
+		else
+			seq_printf(seq, "%04x", ntohs(pt->type));
+
+		seq_printf(seq, " %-8s %pF\n",
+			   pt->dev ? pt->dev->name : "", pt->func);
+	}
+
+	return 0;
+}
+
+static const struct seq_operations ptype_seq_ops = {
+	.start = ptype_seq_start,
+	.next  = ptype_seq_next,
+	.stop  = ptype_seq_stop,
+	.show  = ptype_seq_show,
+};
+
+static int ptype_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &ptype_seq_ops,
+			sizeof(struct seq_net_private));
+}
+
+static const struct file_operations ptype_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ptype_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+
+static int __net_init dev_proc_net_init(struct net *net)
+{
+	int rc = -ENOMEM;
+
+	if (!proc_create("dev", S_IRUGO, net->proc_net, &dev_seq_fops))
+		goto out;
+	if (!proc_create("softnet_stat", S_IRUGO, net->proc_net,
+			 &softnet_seq_fops))
+		goto out_dev;
+	if (!proc_create("ptype", S_IRUGO, net->proc_net, &ptype_seq_fops))
+		goto out_softnet;
+
+	if (wext_proc_init(net))
+		goto out_ptype;
+	rc = 0;
+out:
+	return rc;
+out_ptype:
+	remove_proc_entry("ptype", net->proc_net);
+out_softnet:
+	remove_proc_entry("softnet_stat", net->proc_net);
+out_dev:
+	remove_proc_entry("dev", net->proc_net);
+	goto out;
+}
+
+static void __net_exit dev_proc_net_exit(struct net *net)
+{
+	wext_proc_exit(net);
+
+	remove_proc_entry("ptype", net->proc_net);
+	remove_proc_entry("softnet_stat", net->proc_net);
+	remove_proc_entry("dev", net->proc_net);
+}
+
+static struct pernet_operations __net_initdata dev_proc_ops = {
+	.init = dev_proc_net_init,
+	.exit = dev_proc_net_exit,
+};
+
+int __init dev_proc_init(void)
+{
+	return register_pernet_subsys(&dev_proc_ops);
+}
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct netdev_hw_addr *ha;
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	netif_addr_lock_bh(dev);
+	netdev_for_each_mc_addr(ha, dev) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, ha->refcount, ha->global_use);
+
+		for (i = 0; i < dev->addr_len; i++)
+			seq_printf(seq, "%02x", ha->addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+	if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+	remove_proc_entry("dev_mcast", net->proc_net);
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+	.init = dev_mc_net_init,
+	.exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+	register_pernet_subsys(&dev_mc_net_ops);
+}
-- 
cgit v1.2.3


From 77852fea6e2442a0e654a9292060489895de18c7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 16 Feb 2013 09:46:48 +0100
Subject: sched/rt: Add <linux/sched/rt.h> header to <linux/init_task.h>

IA64 relied on it through sched.h inclusion:

  arch/ia64/kernel/init_task.c:38:11: error: 'MAX_PRIO' undeclared here (not in a function)
  arch/ia64/kernel/init_task.c:38:11: error: 'RR_TIMESLICE' undeclared here (not in a function)

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-xaan1twswggedMR0airtpjui@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index cc898b871cef..5cd0f0949927 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,7 @@
 #include <linux/securebits.h>
 #include <linux/seqlock.h>
 #include <net/net_namespace.h>
+#include <linux/sched/rt.h>
 
 #ifdef CONFIG_SMP
 # define INIT_PUSHABLE_TASKS(tsk)					\
-- 
cgit v1.2.3


From 5cd3f5affad2109fd1458aab3f6216f2181e26ea Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Thu, 24 Jan 2013 21:53:17 +0100
Subject: lockdep: Silence warning if CONFIG_LOCKDEP isn't set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit c9a4962881929df7f1ef6e63e1b9da304faca4dd ("nfsd:
make client_lock per net") compiling nfs4state.o without
CONFIG_LOCKDEP set, triggers this GCC warning:

    fs/nfsd/nfs4state.c: In function ‘free_client’:
    fs/nfsd/nfs4state.c:1051:19: warning: unused variable ‘nn’ [-Wunused-variable]

The cause of that warning is that lockdep_assert_held() compiles
away if CONFIG_LOCKDEP is not set. Silence this warning by using
the argument to lockdep_assert_held() as a nop if CONFIG_LOCKDEP
is not set.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stanislav Kinsbursky <skinsbursky@parallels.com>
Cc: J. Bruce Fields <bfields@redhat.com>
Link: http://lkml.kernel.org/r/1359060797.1325.33.camel@x61.thuisdomein
Signed-off-by: Ingo Molnar <mingo@kernel.org>
--
 include/linux/lockdep.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
---
 include/linux/lockdep.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 2bca44b0893c..f05631effc73 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -410,7 +410,7 @@ struct lock_class_key { };
 
 #define lockdep_depth(tsk)	(0)
 
-#define lockdep_assert_held(l)			do { } while (0)
+#define lockdep_assert_held(l)			do { (void)(l); } while (0)
 
 #define lockdep_recursing(tsk)			(0)
 
-- 
cgit v1.2.3


From eece09ec213e93333010bf4c6bb9175b32229c54 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 21:25:03 +0200
Subject: locking: Various static lock initializer fixes

The static lock initializers want to be fed the proper name of the
lock and not some random string. In mainline random strings are
obfuscating the readability of debug output, but for RT they prevent
the spinlock substitution. Fix it up.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/char/random.c        | 6 +++---
 drivers/usb/chipidea/debug.c | 2 +-
 fs/file.c                    | 2 +-
 include/linux/idr.h          | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 85e81ec1451e..594bda9dcfc8 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -445,7 +445,7 @@ static struct entropy_store input_pool = {
 	.poolinfo = &poolinfo_table[0],
 	.name = "input",
 	.limit = 1,
-	.lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock),
+	.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
 	.pool = input_pool_data
 };
 
@@ -454,7 +454,7 @@ static struct entropy_store blocking_pool = {
 	.name = "blocking",
 	.limit = 1,
 	.pull = &input_pool,
-	.lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock),
+	.lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
 	.pool = blocking_pool_data
 };
 
@@ -462,7 +462,7 @@ static struct entropy_store nonblocking_pool = {
 	.poolinfo = &poolinfo_table[1],
 	.name = "nonblocking",
 	.pull = &input_pool,
-	.lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock),
+	.lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
 	.pool = nonblocking_pool_data
 };
 
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 3bc244d2636a..a62c4a47d52c 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -222,7 +222,7 @@ static struct {
 } dbg_data = {
 	.idx = 0,
 	.tty = 0,
-	.lck = __RW_LOCK_UNLOCKED(lck)
+	.lck = __RW_LOCK_UNLOCKED(dbg_data.lck)
 };
 
 /**
diff --git a/fs/file.c b/fs/file.c
index 2b3570b7caeb..3906d9577a18 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -516,7 +516,7 @@ struct files_struct init_files = {
 		.close_on_exec	= init_files.close_on_exec_init,
 		.open_fds	= init_files.open_fds_init,
 	},
-	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
+	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),
 };
 
 /*
diff --git a/include/linux/idr.h b/include/linux/idr.h
index de7e190f1af4..e5eb125effe6 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -136,7 +136,7 @@ struct ida {
 	struct ida_bitmap	*free_bitmap;
 };
 
-#define IDA_INIT(name)		{ .idr = IDR_INIT(name), .free_bitmap = NULL, }
+#define IDA_INIT(name)		{ .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
 #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
-- 
cgit v1.2.3


From 65c9d1bbc9f32c568a4f7ad154c9a10b0028df52 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 16 Jul 2011 18:38:22 +0200
Subject: seqlock: Remove unused functions

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/seqlock.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 600060e25ec6..cb0599ca049c 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -69,17 +69,6 @@ static inline void write_sequnlock(seqlock_t *sl)
 	spin_unlock(&sl->lock);
 }
 
-static inline int write_tryseqlock(seqlock_t *sl)
-{
-	int ret = spin_trylock(&sl->lock);
-
-	if (ret) {
-		++sl->sequence;
-		smp_wmb();
-	}
-	return ret;
-}
-
 /* Start of read calculation -- fetch last complete writer token */
 static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
 {
@@ -269,14 +258,4 @@ static inline void write_seqcount_barrier(seqcount_t *s)
 #define write_sequnlock_bh(lock)					\
 	do { write_sequnlock(lock); local_bh_enable(); } while(0)
 
-#define read_seqbegin_irqsave(lock, flags)				\
-	({ local_irq_save(flags);   read_seqbegin(lock); })
-
-#define read_seqretry_irqrestore(lock, iv, flags)			\
-	({								\
-		int ret = read_seqretry(lock, iv);			\
-		local_irq_restore(flags);				\
-		ret;							\
-	})
-
 #endif /* __LINUX_SEQLOCK_H */
-- 
cgit v1.2.3


From 6617feca15c68aad12f4a1edd8d8e2ef8d5b4ae5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 16 Jul 2011 18:40:26 +0200
Subject: seqlock: Use seqcount infrastructure

No point in having different implementations for the same
thing. Change the macro mess to inline functions where possible.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/seqlock.h | 174 +++++++++++++++++++++++++-----------------------
 1 file changed, 92 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index cb0599ca049c..18299057402f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -30,81 +30,12 @@
 #include <linux/preempt.h>
 #include <asm/processor.h>
 
-typedef struct {
-	unsigned sequence;
-	spinlock_t lock;
-} seqlock_t;
-
-/*
- * These macros triggered gcc-3.x compile-time problems.  We think these are
- * OK now.  Be cautious.
- */
-#define __SEQLOCK_UNLOCKED(lockname) \
-		 { 0, __SPIN_LOCK_UNLOCKED(lockname) }
-
-#define seqlock_init(x)					\
-	do {						\
-		(x)->sequence = 0;			\
-		spin_lock_init(&(x)->lock);		\
-	} while (0)
-
-#define DEFINE_SEQLOCK(x) \
-		seqlock_t x = __SEQLOCK_UNLOCKED(x)
-
-/* Lock out other writers and update the count.
- * Acts like a normal spin_lock/unlock.
- * Don't need preempt_disable() because that is in the spin_lock already.
- */
-static inline void write_seqlock(seqlock_t *sl)
-{
-	spin_lock(&sl->lock);
-	++sl->sequence;
-	smp_wmb();
-}
-
-static inline void write_sequnlock(seqlock_t *sl)
-{
-	smp_wmb();
-	sl->sequence++;
-	spin_unlock(&sl->lock);
-}
-
-/* Start of read calculation -- fetch last complete writer token */
-static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
-{
-	unsigned ret;
-
-repeat:
-	ret = ACCESS_ONCE(sl->sequence);
-	if (unlikely(ret & 1)) {
-		cpu_relax();
-		goto repeat;
-	}
-	smp_rmb();
-
-	return ret;
-}
-
-/*
- * Test if reader processed invalid data.
- *
- * If sequence value changed then writer changed data while in section.
- */
-static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
-{
-	smp_rmb();
-
-	return unlikely(sl->sequence != start);
-}
-
-
 /*
  * Version using sequence counter only.
  * This can be used when code has its own mutex protecting the
  * updating starting before the write_seqcountbeqin() and ending
  * after the write_seqcount_end().
  */
-
 typedef struct seqcount {
 	unsigned sequence;
 } seqcount_t;
@@ -207,7 +138,6 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
 {
 	smp_rmb();
-
 	return __read_seqcount_retry(s, start);
 }
 
@@ -241,21 +171,101 @@ static inline void write_seqcount_barrier(seqcount_t *s)
 	s->sequence+=2;
 }
 
+typedef struct {
+	struct seqcount seqcount;
+	spinlock_t lock;
+} seqlock_t;
+
+/*
+ * These macros triggered gcc-3.x compile-time problems.  We think these are
+ * OK now.  Be cautious.
+ */
+#define __SEQLOCK_UNLOCKED(lockname)			\
+	{						\
+		.seqcount = SEQCNT_ZERO,		\
+		.lock =	__SPIN_LOCK_UNLOCKED(lockname)	\
+	}
+
+#define seqlock_init(x)					\
+	do {						\
+		seqcount_init(&(x)->seqcount);		\
+		spin_lock_init(&(x)->lock);		\
+	} while (0)
+
+#define DEFINE_SEQLOCK(x) \
+		seqlock_t x = __SEQLOCK_UNLOCKED(x)
+
+/*
+ * Read side functions for starting and finalizing a read side section.
+ */
+static inline unsigned read_seqbegin(const seqlock_t *sl)
+{
+	return read_seqcount_begin(&sl->seqcount);
+}
+
+static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
+{
+	return read_seqcount_retry(&sl->seqcount, start);
+}
+
 /*
- * Possible sw/hw IRQ protected versions of the interfaces.
+ * Lock out other writers and update the count.
+ * Acts like a normal spin_lock/unlock.
+ * Don't need preempt_disable() because that is in the spin_lock already.
  */
+static inline void write_seqlock(seqlock_t *sl)
+{
+	spin_lock(&sl->lock);
+	write_seqcount_begin(&sl->seqcount);
+}
+
+static inline void write_sequnlock(seqlock_t *sl)
+{
+	write_seqcount_end(&sl->seqcount);
+	spin_unlock(&sl->lock);
+}
+
+static inline void write_seqlock_bh(seqlock_t *sl)
+{
+	spin_lock_bh(&sl->lock);
+	write_seqcount_begin(&sl->seqcount);
+}
+
+static inline void write_sequnlock_bh(seqlock_t *sl)
+{
+	write_seqcount_end(&sl->seqcount);
+	spin_unlock_bh(&sl->lock);
+}
+
+static inline void write_seqlock_irq(seqlock_t *sl)
+{
+	spin_lock_irq(&sl->lock);
+	write_seqcount_begin(&sl->seqcount);
+}
+
+static inline void write_sequnlock_irq(seqlock_t *sl)
+{
+	write_seqcount_end(&sl->seqcount);
+	spin_unlock_irq(&sl->lock);
+}
+
+static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sl->lock, flags);
+	write_seqcount_begin(&sl->seqcount);
+	return flags;
+}
+
 #define write_seqlock_irqsave(lock, flags)				\
-	do { local_irq_save(flags); write_seqlock(lock); } while (0)
-#define write_seqlock_irq(lock)						\
-	do { local_irq_disable();   write_seqlock(lock); } while (0)
-#define write_seqlock_bh(lock)						\
-        do { local_bh_disable();    write_seqlock(lock); } while (0)
+	do { flags = __write_seqlock_irqsave(lock); } while (0)
 
-#define write_sequnlock_irqrestore(lock, flags)				\
-	do { write_sequnlock(lock); local_irq_restore(flags); } while(0)
-#define write_sequnlock_irq(lock)					\
-	do { write_sequnlock(lock); local_irq_enable(); } while(0)
-#define write_sequnlock_bh(lock)					\
-	do { write_sequnlock(lock); local_bh_enable(); } while(0)
+static inline void
+write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
+{
+	write_seqcount_end(&sl->seqcount);
+	spin_unlock_irqrestore(&sl->lock, flags);
+}
 
 #endif /* __LINUX_SEQLOCK_H */
-- 
cgit v1.2.3


From 4fc1a601f147abe3bfb4d70fe718110ed21953e1 Mon Sep 17 00:00:00 2001
From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Tue, 19 Feb 2013 00:43:10 +0000
Subject: net: proc: fix build failed when procfs is not configured

commit d4beaa66add8aebf83ab16d2fde4e4de8dac36df
"net: proc: change proc_net_fops_create to proc_create"
uses proc_create to replace proc_net_fops_create, when
CONFIG_PROC isn't configured, some build error will
occurs.

net/packet/af_packet.c: In function 'packet_net_init':
net/packet/af_packet.c:3831:48: error: 'packet_seq_fops' undeclared (first use in this function)
net/packet/af_packet.c:3831:48: note: each undeclared identifier is reported only once for each function it appears in

There may be other build fails like above,this patch
change proc_create from function to macros when CONFIG_PROC
is not configured,just like what proc_net_fops_create did
before this commit.

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/proc_fs.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 319f69422667..d0a1f2ca1c3f 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -187,12 +187,9 @@ static inline void proc_flush_task(struct task_struct *task)
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	umode_t mode, struct proc_dir_entry *parent) { return NULL; }
-static inline struct proc_dir_entry *proc_create(const char *name,
-	umode_t mode, struct proc_dir_entry *parent,
-	const struct file_operations *proc_fops)
-{
-	return NULL;
-}
+
+#define proc_create(name, mode, parent, fops)  ({ (void)(mode), NULL; })
+
 static inline struct proc_dir_entry *proc_create_data(const char *name,
 	umode_t mode, struct proc_dir_entry *parent,
 	const struct file_operations *proc_fops, void *data)
-- 
cgit v1.2.3


From cd0615746ba0f6643fb984345ae6ee0b73404ca6 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 19 Feb 2013 02:47:05 +0000
Subject: net: fix a build failure when !CONFIG_PROC_FS

When !CONFIG_PROC_FS dev_mcast_init() is not defined,
actually we can just merge dev_mcast_init() into
dev_proc_init().

Reported-by: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 net/core/dev.c            |  1 -
 net/core/net-procfs.c     | 12 +++++-------
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f111b4f038f3..b3d00fa4b314 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2646,7 +2646,6 @@ extern void		netdev_notify_peers(struct net_device *dev);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
-extern void		dev_mcast_init(void);
 extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 					       struct rtnl_link_stats64 *storage);
 extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
diff --git a/net/core/dev.c b/net/core/dev.c
index 8d9ddb09f208..17bc535115d3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6260,7 +6260,6 @@ static int __init net_dev_init(void)
 
 	hotcpu_notifier(dev_cpu_callback, 0);
 	dst_init();
-	dev_mcast_init();
 	rc = 0;
 out:
 	return rc;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index ac87066491e9..0f6bb6f8d391 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -341,11 +341,6 @@ static struct pernet_operations __net_initdata dev_proc_ops = {
 	.exit = dev_proc_net_exit,
 };
 
-int __init dev_proc_init(void)
-{
-	return register_pernet_subsys(&dev_proc_ops);
-}
-
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
 {
 	struct netdev_hw_addr *ha;
@@ -408,7 +403,10 @@ static struct pernet_operations __net_initdata dev_mc_net_ops = {
 	.exit = dev_mc_net_exit,
 };
 
-void __init dev_mcast_init(void)
+int __init dev_proc_init(void)
 {
-	register_pernet_subsys(&dev_mc_net_ops);
+	int ret = register_pernet_subsys(&dev_proc_ops);
+	if (!ret)
+		return register_pernet_subsys(&dev_mc_net_ops);
+	return ret;
 }
-- 
cgit v1.2.3


From f84adf4921ae3115502f44ff467b04bf2f88cf04 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 13 Feb 2013 13:01:55 -0500
Subject: xen-blkfront: drop the use of llist_for_each_entry_safe

Replace llist_for_each_entry_safe with a while loop.

llist_for_each_entry_safe can trigger a bug in GCC 4.1, so it's best
to remove it and use a while loop and do the deletion manually.

Specifically this bug can be triggered by hot-unplugging a disk, either
by doing xm block-detach or by save/restore cycle.

BUG: unable to handle kernel paging request at fffffffffffffff0
IP: [<ffffffffa0047223>] blkif_free+0x63/0x130 [xen_blkfront]
The crash call trace is:
	...
bad_area_nosemaphore+0x13/0x20
do_page_fault+0x25e/0x4b0
page_fault+0x25/0x30
? blkif_free+0x63/0x130 [xen_blkfront]
blkfront_resume+0x46/0xa0 [xen_blkfront]
xenbus_dev_resume+0x6c/0x140
pm_op+0x192/0x1b0
device_resume+0x82/0x1e0
dpm_resume+0xc9/0x1a0
dpm_resume_end+0x15/0x30
do_suspend+0x117/0x1e0

When drilling down to the assembler code, on newer GCC it does
.L29:
        cmpq    $-16, %r12      #, persistent_gnt check
        je      .L30    	#, out of the loop
.L25:
	... code in the loop
        testq   %r13, %r13      # n
        je      .L29    	#, back to the top of the loop
        cmpq    $-16, %r12      #, persistent_gnt check
        movq    16(%r12), %r13  # <variable>.node.next, n
        jne     .L25    	#,	back to the top of the loop
.L30:

While on GCC 4.1, it is:
L78:
	... code in the loop
	testq   %r13, %r13      # n
        je      .L78    #,	back to the top of the loop
        movq    16(%rbx), %r13  # <variable>.node.next, n
        jmp     .L78    #,	back to the top of the loop

Which basically means that the exit loop condition instead of
being:

	&(pos)->member != NULL;

is:
	;

which makes the loop unbound.

Since xen-blkfront is the only user of the llist_for_each_entry_safe
macro remove it from llist.h.

Orabug: 16263164
CC: stable@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 13 ++++++++++---
 include/linux/llist.h        | 25 -------------------------
 2 files changed, 10 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 11043c18ac5a..c3dae2e0f290 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work)
 static void blkif_free(struct blkfront_info *info, int suspend)
 {
 	struct llist_node *all_gnts;
-	struct grant *persistent_gnt;
+	struct grant *persistent_gnt, *tmp;
 	struct llist_node *n;
 
 	/* Prevent new requests being issued until we fix things up. */
@@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	/* Remove all persistent grants */
 	if (info->persistent_gnts_c) {
 		all_gnts = llist_del_all(&info->persistent_gnts);
-		llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
+		persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
+		while (persistent_gnt) {
 			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
 			__free_page(pfn_to_page(persistent_gnt->pfn));
-			kfree(persistent_gnt);
+			tmp = persistent_gnt;
+			n = persistent_gnt->node.next;
+			if (n)
+				persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
+			else
+				persistent_gnt = NULL;
+			kfree(tmp);
 		}
 		info->persistent_gnts_c = 0;
 	}
diff --git a/include/linux/llist.h b/include/linux/llist.h
index d0ab98f73d38..a5199f6d0e82 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -124,31 +124,6 @@ static inline void init_llist_head(struct llist_head *list)
 	     &(pos)->member != NULL;					\
 	     (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
 
-/**
- * llist_for_each_entry_safe - iterate safely against remove over some entries
- * of lock-less list of given type.
- * @pos:	the type * to use as a loop cursor.
- * @n:		another type * to use as a temporary storage.
- * @node:	the fist entry of deleted list entries.
- * @member:	the name of the llist_node with the struct.
- *
- * In general, some entries of the lock-less list can be traversed
- * safely only after being removed from list, so start with an entry
- * instead of list head. This variant allows removal of entries
- * as we iterate.
- *
- * If being used on entries deleted from lock-less list directly, the
- * traverse order is from the newest to the oldest added entry.  If
- * you want to traverse from the oldest to the newest, you must
- * reverse the order by yourself before traversing.
- */
-#define llist_for_each_entry_safe(pos, n, node, member)		\
-	for ((pos) = llist_entry((node), typeof(*(pos)), member),	\
-	     (n) = (pos)->member.next;					\
-	     &(pos)->member != NULL;					\
-	     (pos) = llist_entry(n, typeof(*(pos)), member),		\
-	     (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL)
-
 /**
  * llist_empty - tests whether a lock-less list is empty
  * @head:	the list to test
-- 
cgit v1.2.3


From b324814e8436772cb3367b14149ba003a9954525 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 6 Feb 2013 13:11:38 -0600
Subject: libceph: use void pointers in page vector functions

The functions used for working with ceph page vectors are defined
with char pointers, but they're really intended to operate on
untyped data.  Change the types of these function parameters
to (void *) to reflect this.

(Note that the functions now assume void pointer arithmetic works
like arithmetic on char pointers.)

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/libceph.h | 10 +++++-----
 net/ceph/pagevec.c           | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index c44275ab375c..2250f8bb2490 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -222,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client);
 /* pagevec.c */
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 
-extern struct page **ceph_get_direct_page_vector(const char __user *data,
+extern struct page **ceph_get_direct_page_vector(const void __user *data,
 						 int num_pages,
 						 bool write_page);
 extern void ceph_put_page_vector(struct page **pages, int num_pages,
@@ -230,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages,
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_copy_user_to_page_vector(struct page **pages,
-					 const char __user *data,
+					 const void __user *data,
 					 loff_t off, size_t len);
 extern int ceph_copy_to_page_vector(struct page **pages,
-				    const char *data,
+				    const void *data,
 				    loff_t off, size_t len);
 extern int ceph_copy_from_page_vector(struct page **pages,
-				    char *data,
+				    void *data,
 				    loff_t off, size_t len);
-extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
+extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
 				    loff_t off, size_t len);
 extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
 
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index cd9c21df87d1..5b20be979c19 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -12,7 +12,7 @@
 /*
  * build a vector of user pages
  */
-struct page **ceph_get_direct_page_vector(const char __user *data,
+struct page **ceph_get_direct_page_vector(const void __user *data,
 					  int num_pages, bool write_page)
 {
 	struct page **pages;
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector);
  * copy user data into a page vector
  */
 int ceph_copy_user_to_page_vector(struct page **pages,
-					 const char __user *data,
+					 const void __user *data,
 					 loff_t off, size_t len)
 {
 	int i = 0;
@@ -119,7 +119,7 @@ int ceph_copy_user_to_page_vector(struct page **pages,
 EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
 
 int ceph_copy_to_page_vector(struct page **pages,
-				    const char *data,
+				    const void *data,
 				    loff_t off, size_t len)
 {
 	int i = 0;
@@ -143,7 +143,7 @@ int ceph_copy_to_page_vector(struct page **pages,
 EXPORT_SYMBOL(ceph_copy_to_page_vector);
 
 int ceph_copy_from_page_vector(struct page **pages,
-				    char *data,
+				    void *data,
 				    loff_t off, size_t len)
 {
 	int i = 0;
@@ -170,7 +170,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector);
  * copy user data from a page vector into a user pointer
  */
 int ceph_copy_page_vector_to_user(struct page **pages,
-					 char __user *data,
+					 void __user *data,
 					 loff_t off, size_t len)
 {
 	int i = 0;
-- 
cgit v1.2.3


From 903bb32e890237ca43ab847e561e5377cfe0fdb3 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 6 Feb 2013 13:11:38 -0600
Subject: libceph: drop return value from page vector copy routines

The return values provided for ceph_copy_to_page_vector() and
ceph_copy_from_page_vector() serve no purpose, so get rid of them.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c          |  5 ++---
 include/linux/ceph/libceph.h |  4 ++--
 net/ceph/pagevec.c           | 14 ++++++--------
 3 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c259b4089e95..b0eea3eaee93 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1890,8 +1890,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
 	if (ret < 0)
 		goto out;
 	ret = 0;
-	(void) ceph_copy_from_page_vector(pages, inbound, 0,
-					obj_request->xferred);
+	ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
 	if (version)
 		*version = obj_request->version;
 out:
@@ -2089,7 +2088,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
 
 	rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
 	size = (size_t) obj_request->xferred;
-	(void) ceph_copy_from_page_vector(pages, buf, 0, size);
+	ceph_copy_from_page_vector(pages, buf, 0, size);
 	rbd_assert(size <= (size_t) INT_MAX);
 	ret = (int) size;
 	if (version)
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 2250f8bb2490..29818fc3fa49 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -232,10 +232,10 @@ extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_copy_user_to_page_vector(struct page **pages,
 					 const void __user *data,
 					 loff_t off, size_t len);
-extern int ceph_copy_to_page_vector(struct page **pages,
+extern void ceph_copy_to_page_vector(struct page **pages,
 				    const void *data,
 				    loff_t off, size_t len);
-extern int ceph_copy_from_page_vector(struct page **pages,
+extern void ceph_copy_from_page_vector(struct page **pages,
 				    void *data,
 				    loff_t off, size_t len);
 extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 5b20be979c19..815a2249cfa9 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages,
 }
 EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
 
-int ceph_copy_to_page_vector(struct page **pages,
+void ceph_copy_to_page_vector(struct page **pages,
 				    const void *data,
 				    loff_t off, size_t len)
 {
 	int i = 0;
 	size_t po = off & ~PAGE_CACHE_MASK;
 	size_t left = len;
-	size_t l;
 
 	while (left > 0) {
-		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
 		memcpy(page_address(pages[i]) + po, data, l);
 		data += l;
 		left -= l;
@@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages,
 			i++;
 		}
 	}
-	return len;
 }
 EXPORT_SYMBOL(ceph_copy_to_page_vector);
 
-int ceph_copy_from_page_vector(struct page **pages,
+void ceph_copy_from_page_vector(struct page **pages,
 				    void *data,
 				    loff_t off, size_t len)
 {
 	int i = 0;
 	size_t po = off & ~PAGE_CACHE_MASK;
 	size_t left = len;
-	size_t l;
 
 	while (left > 0) {
-		l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+		size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
 		memcpy(data, page_address(pages[i]) + po, l);
 		data += l;
 		left -= l;
@@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages,
 			i++;
 		}
 	}
-	return len;
 }
 EXPORT_SYMBOL(ceph_copy_from_page_vector);
 
-- 
cgit v1.2.3


From 1c3e826482ab698e418c7a894440e62c76aac893 Mon Sep 17 00:00:00 2001
From: Sha Zhengju <handai.szj@taobao.com>
Date: Wed, 20 Feb 2013 17:14:38 +0800
Subject: sched/core: Remove the obsolete and unused nr_uninterruptible()
 function

Signed-off-by: Sha Zhengju <handai.szj@taobao.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1361351678-8065-1-git-send-email-handai.szj@taobao.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 -
 kernel/sched/core.c   | 22 ++--------------------
 2 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 33cc42130371..f9ca237df7e8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -98,7 +98,6 @@ extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
-extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern unsigned long this_cpu_load(void);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 03d7784b7bd2..b7b03cd2d4cd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1969,11 +1969,10 @@ context_switch(struct rq *rq, struct task_struct *prev,
 }
 
 /*
- * nr_running, nr_uninterruptible and nr_context_switches:
+ * nr_running and nr_context_switches:
  *
  * externally visible scheduler statistics: current number of runnable
- * threads, current number of uninterruptible-sleeping threads, total
- * number of context switches performed since bootup.
+ * threads, total number of context switches performed since bootup.
  */
 unsigned long nr_running(void)
 {
@@ -1985,23 +1984,6 @@ unsigned long nr_running(void)
 	return sum;
 }
 
-unsigned long nr_uninterruptible(void)
-{
-	unsigned long i, sum = 0;
-
-	for_each_possible_cpu(i)
-		sum += cpu_rq(i)->nr_uninterruptible;
-
-	/*
-	 * Since we read the counters lockless, it might be slightly
-	 * inaccurate. Do not allow it to go below zero though:
-	 */
-	if (unlikely((long)sum < 0))
-		sum = 0;
-
-	return sum;
-}
-
 unsigned long long nr_context_switches(void)
 {
 	int i;
-- 
cgit v1.2.3


From 55e301fd57a6239ec14b91a1cf2e70b3dd135194 Mon Sep 17 00:00:00 2001
From: Filipe Brandenburger <filbranden@google.com>
Date: Tue, 29 Jan 2013 06:04:50 +0000
Subject: Btrfs: move fs/btrfs/ioctl.h to include/uapi/linux/btrfs.h

The header file will then be installed under /usr/include/linux so that
userspace applications can refer to Btrfs ioctls by name and use the same
structs used internally in the kernel.

Signed-off-by: Filipe Brandenburger <filbranden@google.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/backref.h         |   2 +-
 fs/btrfs/ctree.h           |   2 +-
 fs/btrfs/file.c            |   2 +-
 fs/btrfs/inode.c           |   2 +-
 fs/btrfs/ioctl.c           |   2 +-
 fs/btrfs/ioctl.h           | 502 --------------------------------------------
 fs/btrfs/qgroup.c          |   2 +-
 fs/btrfs/super.c           |   2 +-
 fs/btrfs/volumes.h         |   2 +-
 include/linux/btrfs.h      |   6 +
 include/uapi/linux/Kbuild  |   1 +
 include/uapi/linux/btrfs.h | 503 +++++++++++++++++++++++++++++++++++++++++++++
 12 files changed, 518 insertions(+), 510 deletions(-)
 delete mode 100644 fs/btrfs/ioctl.h
 create mode 100644 include/linux/btrfs.h
 create mode 100644 include/uapi/linux/btrfs.h

(limited to 'include/linux')

diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index d61feca79455..310a7f6d09b1 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -19,7 +19,7 @@
 #ifndef __BTRFS_BACKREF__
 #define __BTRFS_BACKREF__
 
-#include "ioctl.h"
+#include <linux/btrfs.h>
 #include "ulist.h"
 #include "extent_io.h"
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 541ce9a9949e..69321013683c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -31,10 +31,10 @@
 #include <trace/events/btrfs.h>
 #include <asm/kmap_types.h>
 #include <linux/pagemap.h>
+#include <linux/btrfs.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "async-thread.h"
-#include "ioctl.h"
 
 struct btrfs_trans_handle;
 struct btrfs_transaction;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 083abca56055..13c78ea3ebce 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -30,11 +30,11 @@
 #include <linux/statfs.h>
 #include <linux/compat.h>
 #include <linux/slab.h>
+#include <linux/btrfs.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "ioctl.h"
 #include "print-tree.h"
 #include "tree-log.h"
 #include "locking.h"
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 60ec7589900c..fc8aa8bf80a1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,12 +39,12 @@
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
 #include <linux/mount.h>
+#include <linux/btrfs.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "ioctl.h"
 #include "print-tree.h"
 #include "ordered-data.h"
 #include "xattr.h"
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1b554b47e814..96ecefc1724f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -42,12 +42,12 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/uuid.h>
+#include <linux/btrfs.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "ioctl.h"
 #include "print-tree.h"
 #include "volumes.h"
 #include "locking.h"
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
deleted file mode 100644
index dabca9cc8c2e..000000000000
--- a/fs/btrfs/ioctl.h
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __IOCTL_
-#define __IOCTL_
-#include <linux/ioctl.h>
-
-#define BTRFS_IOCTL_MAGIC 0x94
-#define BTRFS_VOL_NAME_MAX 255
-
-/* this should be 4k */
-#define BTRFS_PATH_NAME_MAX 4087
-struct btrfs_ioctl_vol_args {
-	__s64 fd;
-	char name[BTRFS_PATH_NAME_MAX + 1];
-};
-
-#define BTRFS_DEVICE_PATH_NAME_MAX 1024
-
-#define BTRFS_SUBVOL_CREATE_ASYNC	(1ULL << 0)
-#define BTRFS_SUBVOL_RDONLY		(1ULL << 1)
-#define BTRFS_SUBVOL_QGROUP_INHERIT	(1ULL << 2)
-#define BTRFS_FSID_SIZE 16
-#define BTRFS_UUID_SIZE 16
-
-#define BTRFS_QGROUP_INHERIT_SET_LIMITS	(1ULL << 0)
-
-struct btrfs_qgroup_limit {
-	__u64	flags;
-	__u64	max_rfer;
-	__u64	max_excl;
-	__u64	rsv_rfer;
-	__u64	rsv_excl;
-};
-
-struct btrfs_qgroup_inherit {
-	__u64	flags;
-	__u64	num_qgroups;
-	__u64	num_ref_copies;
-	__u64	num_excl_copies;
-	struct btrfs_qgroup_limit lim;
-	__u64	qgroups[0];
-};
-
-struct btrfs_ioctl_qgroup_limit_args {
-	__u64	qgroupid;
-	struct btrfs_qgroup_limit lim;
-};
-
-#define BTRFS_SUBVOL_NAME_MAX 4039
-struct btrfs_ioctl_vol_args_v2 {
-	__s64 fd;
-	__u64 transid;
-	__u64 flags;
-	union {
-		struct {
-			__u64 size;
-			struct btrfs_qgroup_inherit __user *qgroup_inherit;
-		};
-		__u64 unused[4];
-	};
-	char name[BTRFS_SUBVOL_NAME_MAX + 1];
-};
-
-/*
- * structure to report errors and progress to userspace, either as a
- * result of a finished scrub, a canceled scrub or a progress inquiry
- */
-struct btrfs_scrub_progress {
-	__u64 data_extents_scrubbed;	/* # of data extents scrubbed */
-	__u64 tree_extents_scrubbed;	/* # of tree extents scrubbed */
-	__u64 data_bytes_scrubbed;	/* # of data bytes scrubbed */
-	__u64 tree_bytes_scrubbed;	/* # of tree bytes scrubbed */
-	__u64 read_errors;		/* # of read errors encountered (EIO) */
-	__u64 csum_errors;		/* # of failed csum checks */
-	__u64 verify_errors;		/* # of occurences, where the metadata
-					 * of a tree block did not match the
-					 * expected values, like generation or
-					 * logical */
-	__u64 no_csum;			/* # of 4k data block for which no csum
-					 * is present, probably the result of
-					 * data written with nodatasum */
-	__u64 csum_discards;		/* # of csum for which no data was found
-					 * in the extent tree. */
-	__u64 super_errors;		/* # of bad super blocks encountered */
-	__u64 malloc_errors;		/* # of internal kmalloc errors. These
-					 * will likely cause an incomplete
-					 * scrub */
-	__u64 uncorrectable_errors;	/* # of errors where either no intact
-					 * copy was found or the writeback
-					 * failed */
-	__u64 corrected_errors;		/* # of errors corrected */
-	__u64 last_physical;		/* last physical address scrubbed. In
-					 * case a scrub was aborted, this can
-					 * be used to restart the scrub */
-	__u64 unverified_errors;	/* # of occurences where a read for a
-					 * full (64k) bio failed, but the re-
-					 * check succeeded for each 4k piece.
-					 * Intermittent error. */
-};
-
-#define BTRFS_SCRUB_READONLY	1
-struct btrfs_ioctl_scrub_args {
-	__u64 devid;				/* in */
-	__u64 start;				/* in */
-	__u64 end;				/* in */
-	__u64 flags;				/* in */
-	struct btrfs_scrub_progress progress;	/* out */
-	/* pad to 1k */
-	__u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
-};
-
-#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS	0
-#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID	1
-struct btrfs_ioctl_dev_replace_start_params {
-	__u64 srcdevid;	/* in, if 0, use srcdev_name instead */
-	__u64 cont_reading_from_srcdev_mode;	/* in, see #define
-						 * above */
-	__u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1];	/* in */
-	__u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1];	/* in */
-};
-
-#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED	0
-#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED		1
-#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED		2
-#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED		3
-#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED		4
-struct btrfs_ioctl_dev_replace_status_params {
-	__u64 replace_state;	/* out, see #define above */
-	__u64 progress_1000;	/* out, 0 <= x <= 1000 */
-	__u64 time_started;	/* out, seconds since 1-Jan-1970 */
-	__u64 time_stopped;	/* out, seconds since 1-Jan-1970 */
-	__u64 num_write_errors;	/* out */
-	__u64 num_uncorrectable_read_errors;	/* out */
-};
-
-#define BTRFS_IOCTL_DEV_REPLACE_CMD_START			0
-#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS			1
-#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL			2
-#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR			0
-#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED		1
-#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED		2
-struct btrfs_ioctl_dev_replace_args {
-	__u64 cmd;	/* in */
-	__u64 result;	/* out */
-
-	union {
-		struct btrfs_ioctl_dev_replace_start_params start;
-		struct btrfs_ioctl_dev_replace_status_params status;
-	};	/* in/out */
-
-	__u64 spare[64];
-};
-
-struct btrfs_ioctl_dev_info_args {
-	__u64 devid;				/* in/out */
-	__u8 uuid[BTRFS_UUID_SIZE];		/* in/out */
-	__u64 bytes_used;			/* out */
-	__u64 total_bytes;			/* out */
-	__u64 unused[379];			/* pad to 4k */
-	__u8 path[BTRFS_DEVICE_PATH_NAME_MAX];	/* out */
-};
-
-struct btrfs_ioctl_fs_info_args {
-	__u64 max_id;				/* out */
-	__u64 num_devices;			/* out */
-	__u8 fsid[BTRFS_FSID_SIZE];		/* out */
-	__u64 reserved[124];			/* pad to 1k */
-};
-
-/* balance control ioctl modes */
-#define BTRFS_BALANCE_CTL_PAUSE		1
-#define BTRFS_BALANCE_CTL_CANCEL	2
-
-/*
- * this is packed, because it should be exactly the same as its disk
- * byte order counterpart (struct btrfs_disk_balance_args)
- */
-struct btrfs_balance_args {
-	__u64 profiles;
-	__u64 usage;
-	__u64 devid;
-	__u64 pstart;
-	__u64 pend;
-	__u64 vstart;
-	__u64 vend;
-
-	__u64 target;
-
-	__u64 flags;
-
-	__u64 unused[8];
-} __attribute__ ((__packed__));
-
-/* report balance progress to userspace */
-struct btrfs_balance_progress {
-	__u64 expected;		/* estimated # of chunks that will be
-				 * relocated to fulfill the request */
-	__u64 considered;	/* # of chunks we have considered so far */
-	__u64 completed;	/* # of chunks relocated so far */
-};
-
-#define BTRFS_BALANCE_STATE_RUNNING	(1ULL << 0)
-#define BTRFS_BALANCE_STATE_PAUSE_REQ	(1ULL << 1)
-#define BTRFS_BALANCE_STATE_CANCEL_REQ	(1ULL << 2)
-
-struct btrfs_ioctl_balance_args {
-	__u64 flags;				/* in/out */
-	__u64 state;				/* out */
-
-	struct btrfs_balance_args data;		/* in/out */
-	struct btrfs_balance_args meta;		/* in/out */
-	struct btrfs_balance_args sys;		/* in/out */
-
-	struct btrfs_balance_progress stat;	/* out */
-
-	__u64 unused[72];			/* pad to 1k */
-};
-
-#define BTRFS_INO_LOOKUP_PATH_MAX 4080
-struct btrfs_ioctl_ino_lookup_args {
-	__u64 treeid;
-	__u64 objectid;
-	char name[BTRFS_INO_LOOKUP_PATH_MAX];
-};
-
-struct btrfs_ioctl_search_key {
-	/* which root are we searching.  0 is the tree of tree roots */
-	__u64 tree_id;
-
-	/* keys returned will be >= min and <= max */
-	__u64 min_objectid;
-	__u64 max_objectid;
-
-	/* keys returned will be >= min and <= max */
-	__u64 min_offset;
-	__u64 max_offset;
-
-	/* max and min transids to search for */
-	__u64 min_transid;
-	__u64 max_transid;
-
-	/* keys returned will be >= min and <= max */
-	__u32 min_type;
-	__u32 max_type;
-
-	/*
-	 * how many items did userland ask for, and how many are we
-	 * returning
-	 */
-	__u32 nr_items;
-
-	/* align to 64 bits */
-	__u32 unused;
-
-	/* some extra for later */
-	__u64 unused1;
-	__u64 unused2;
-	__u64 unused3;
-	__u64 unused4;
-};
-
-struct btrfs_ioctl_search_header {
-	__u64 transid;
-	__u64 objectid;
-	__u64 offset;
-	__u32 type;
-	__u32 len;
-};
-
-#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
-/*
- * the buf is an array of search headers where
- * each header is followed by the actual item
- * the type field is expanded to 32 bits for alignment
- */
-struct btrfs_ioctl_search_args {
-	struct btrfs_ioctl_search_key key;
-	char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
-};
-
-struct btrfs_ioctl_clone_range_args {
-  __s64 src_fd;
-  __u64 src_offset, src_length;
-  __u64 dest_offset;
-};
-
-/* flags for the defrag range ioctl */
-#define BTRFS_DEFRAG_RANGE_COMPRESS 1
-#define BTRFS_DEFRAG_RANGE_START_IO 2
-
-struct btrfs_ioctl_space_info {
-	__u64 flags;
-	__u64 total_bytes;
-	__u64 used_bytes;
-};
-
-struct btrfs_ioctl_space_args {
-	__u64 space_slots;
-	__u64 total_spaces;
-	struct btrfs_ioctl_space_info spaces[0];
-};
-
-struct btrfs_data_container {
-	__u32	bytes_left;	/* out -- bytes not needed to deliver output */
-	__u32	bytes_missing;	/* out -- additional bytes needed for result */
-	__u32	elem_cnt;	/* out */
-	__u32	elem_missed;	/* out */
-	__u64	val[0];		/* out */
-};
-
-struct btrfs_ioctl_ino_path_args {
-	__u64				inum;		/* in */
-	__u64				size;		/* in */
-	__u64				reserved[4];
-	/* struct btrfs_data_container	*fspath;	   out */
-	__u64				fspath;		/* out */
-};
-
-struct btrfs_ioctl_logical_ino_args {
-	__u64				logical;	/* in */
-	__u64				size;		/* in */
-	__u64				reserved[4];
-	/* struct btrfs_data_container	*inodes;	out   */
-	__u64				inodes;
-};
-
-enum btrfs_dev_stat_values {
-	/* disk I/O failure stats */
-	BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
-	BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
-	BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
-
-	/* stats for indirect indications for I/O failures */
-	BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
-					 * contents is illegal: this is an
-					 * indication that the block was damaged
-					 * during read or write, or written to
-					 * wrong location or read from wrong
-					 * location */
-	BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
-					 * been written */
-
-	BTRFS_DEV_STAT_VALUES_MAX
-};
-
-/* Reset statistics after reading; needs SYS_ADMIN capability */
-#define	BTRFS_DEV_STATS_RESET		(1ULL << 0)
-
-struct btrfs_ioctl_get_dev_stats {
-	__u64 devid;				/* in */
-	__u64 nr_items;				/* in/out */
-	__u64 flags;				/* in/out */
-
-	/* out values: */
-	__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
-
-	__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
-};
-
-#define BTRFS_QUOTA_CTL_ENABLE	1
-#define BTRFS_QUOTA_CTL_DISABLE	2
-#define BTRFS_QUOTA_CTL_RESCAN	3
-struct btrfs_ioctl_quota_ctl_args {
-	__u64 cmd;
-	__u64 status;
-};
-
-struct btrfs_ioctl_qgroup_assign_args {
-	__u64 assign;
-	__u64 src;
-	__u64 dst;
-};
-
-struct btrfs_ioctl_qgroup_create_args {
-	__u64 create;
-	__u64 qgroupid;
-};
-struct btrfs_ioctl_timespec {
-	__u64 sec;
-	__u32 nsec;
-};
-
-struct btrfs_ioctl_received_subvol_args {
-	char	uuid[BTRFS_UUID_SIZE];	/* in */
-	__u64	stransid;		/* in */
-	__u64	rtransid;		/* out */
-	struct btrfs_ioctl_timespec stime; /* in */
-	struct btrfs_ioctl_timespec rtime; /* out */
-	__u64	flags;			/* in */
-	__u64	reserved[16];		/* in */
-};
-
-struct btrfs_ioctl_send_args {
-	__s64 send_fd;			/* in */
-	__u64 clone_sources_count;	/* in */
-	__u64 __user *clone_sources;	/* in */
-	__u64 parent_root;		/* in */
-	__u64 flags;			/* in */
-	__u64 reserved[4];		/* in */
-};
-
-#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
-				   struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
-				   struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
-				   struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
-				   struct btrfs_ioctl_vol_args)
-/* trans start and trans end are dangerous, and only for
- * use by applications that know how to avoid the
- * resulting deadlocks
- */
-#define BTRFS_IOC_TRANS_START  _IO(BTRFS_IOCTL_MAGIC, 6)
-#define BTRFS_IOC_TRANS_END    _IO(BTRFS_IOCTL_MAGIC, 7)
-#define BTRFS_IOC_SYNC         _IO(BTRFS_IOCTL_MAGIC, 8)
-
-#define BTRFS_IOC_CLONE        _IOW(BTRFS_IOCTL_MAGIC, 9, int)
-#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
-				   struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
-				   struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
-				   struct btrfs_ioctl_vol_args)
-
-#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
-				  struct btrfs_ioctl_clone_range_args)
-
-#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
-				   struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
-				struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
-				struct btrfs_ioctl_defrag_range_args)
-#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
-				   struct btrfs_ioctl_search_args)
-#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
-				   struct btrfs_ioctl_ino_lookup_args)
-#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
-#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
-				    struct btrfs_ioctl_space_args)
-#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
-#define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
-#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
-				   struct btrfs_ioctl_vol_args_v2)
-#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
-				   struct btrfs_ioctl_vol_args_v2)
-#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
-#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
-#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
-			      struct btrfs_ioctl_scrub_args)
-#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
-#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
-				       struct btrfs_ioctl_scrub_args)
-#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
-				 struct btrfs_ioctl_dev_info_args)
-#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
-			       struct btrfs_ioctl_fs_info_args)
-#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
-				   struct btrfs_ioctl_balance_args)
-#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
-#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
-					struct btrfs_ioctl_balance_args)
-#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
-					struct btrfs_ioctl_ino_path_args)
-#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
-					struct btrfs_ioctl_ino_path_args)
-#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
-				struct btrfs_ioctl_received_subvol_args)
-#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
-#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
-				     struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
-			       struct btrfs_ioctl_quota_ctl_args)
-#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
-			       struct btrfs_ioctl_qgroup_assign_args)
-#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
-			       struct btrfs_ioctl_qgroup_create_args)
-#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
-			       struct btrfs_ioctl_qgroup_limit_args)
-#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
-				      struct btrfs_ioctl_get_dev_stats)
-#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
-				    struct btrfs_ioctl_dev_replace_args)
-
-#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a5c856234323..a0d6368249fa 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -23,13 +23,13 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
+#include <linux/btrfs.h>
 
 #include "ctree.h"
 #include "transaction.h"
 #include "disk-io.h"
 #include "locking.h"
 #include "ulist.h"
-#include "ioctl.h"
 #include "backref.h"
 
 /* TODO XXX FIXME
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 67b373bf3ff9..6846ededfe95 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,13 +41,13 @@
 #include <linux/slab.h>
 #include <linux/cleancache.h>
 #include <linux/ratelimit.h>
+#include <linux/btrfs.h>
 #include "compat.h"
 #include "delayed-inode.h"
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
-#include "ioctl.h"
 #include "print-tree.h"
 #include "xattr.h"
 #include "volumes.h"
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d3c3939ac751..12bb84166a5f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -21,8 +21,8 @@
 
 #include <linux/bio.h>
 #include <linux/sort.h>
+#include <linux/btrfs.h>
 #include "async-thread.h"
-#include "ioctl.h"
 
 #define BTRFS_STRIPE_LEN	(64 * 1024)
 
diff --git a/include/linux/btrfs.h b/include/linux/btrfs.h
new file mode 100644
index 000000000000..22d799147db2
--- /dev/null
+++ b/include/linux/btrfs.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_BTRFS_H
+#define _LINUX_BTRFS_H
+
+#include <uapi/linux/btrfs.h>
+
+#endif /* _LINUX_BTRFS_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 19e765fbfef7..896ee1247294 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -68,6 +68,7 @@ header-y += blkpg.h
 header-y += blktrace_api.h
 header-y += bpqether.h
 header-y += bsg.h
+header-y += btrfs.h
 header-y += can.h
 header-y += capability.h
 header-y += capi.h
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
new file mode 100644
index 000000000000..cffbb582dd90
--- /dev/null
+++ b/include/uapi/linux/btrfs.h
@@ -0,0 +1,503 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef _UAPI_LINUX_BTRFS_H
+#define _UAPI_LINUX_BTRFS_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define BTRFS_IOCTL_MAGIC 0x94
+#define BTRFS_VOL_NAME_MAX 255
+
+/* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
+struct btrfs_ioctl_vol_args {
+	__s64 fd;
+	char name[BTRFS_PATH_NAME_MAX + 1];
+};
+
+#define BTRFS_DEVICE_PATH_NAME_MAX 1024
+
+#define BTRFS_SUBVOL_CREATE_ASYNC	(1ULL << 0)
+#define BTRFS_SUBVOL_RDONLY		(1ULL << 1)
+#define BTRFS_SUBVOL_QGROUP_INHERIT	(1ULL << 2)
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_UUID_SIZE 16
+
+#define BTRFS_QGROUP_INHERIT_SET_LIMITS	(1ULL << 0)
+
+struct btrfs_qgroup_limit {
+	__u64	flags;
+	__u64	max_rfer;
+	__u64	max_excl;
+	__u64	rsv_rfer;
+	__u64	rsv_excl;
+};
+
+struct btrfs_qgroup_inherit {
+	__u64	flags;
+	__u64	num_qgroups;
+	__u64	num_ref_copies;
+	__u64	num_excl_copies;
+	struct btrfs_qgroup_limit lim;
+	__u64	qgroups[0];
+};
+
+struct btrfs_ioctl_qgroup_limit_args {
+	__u64	qgroupid;
+	struct btrfs_qgroup_limit lim;
+};
+
+#define BTRFS_SUBVOL_NAME_MAX 4039
+struct btrfs_ioctl_vol_args_v2 {
+	__s64 fd;
+	__u64 transid;
+	__u64 flags;
+	union {
+		struct {
+			__u64 size;
+			struct btrfs_qgroup_inherit __user *qgroup_inherit;
+		};
+		__u64 unused[4];
+	};
+	char name[BTRFS_SUBVOL_NAME_MAX + 1];
+};
+
+/*
+ * structure to report errors and progress to userspace, either as a
+ * result of a finished scrub, a canceled scrub or a progress inquiry
+ */
+struct btrfs_scrub_progress {
+	__u64 data_extents_scrubbed;	/* # of data extents scrubbed */
+	__u64 tree_extents_scrubbed;	/* # of tree extents scrubbed */
+	__u64 data_bytes_scrubbed;	/* # of data bytes scrubbed */
+	__u64 tree_bytes_scrubbed;	/* # of tree bytes scrubbed */
+	__u64 read_errors;		/* # of read errors encountered (EIO) */
+	__u64 csum_errors;		/* # of failed csum checks */
+	__u64 verify_errors;		/* # of occurences, where the metadata
+					 * of a tree block did not match the
+					 * expected values, like generation or
+					 * logical */
+	__u64 no_csum;			/* # of 4k data block for which no csum
+					 * is present, probably the result of
+					 * data written with nodatasum */
+	__u64 csum_discards;		/* # of csum for which no data was found
+					 * in the extent tree. */
+	__u64 super_errors;		/* # of bad super blocks encountered */
+	__u64 malloc_errors;		/* # of internal kmalloc errors. These
+					 * will likely cause an incomplete
+					 * scrub */
+	__u64 uncorrectable_errors;	/* # of errors where either no intact
+					 * copy was found or the writeback
+					 * failed */
+	__u64 corrected_errors;		/* # of errors corrected */
+	__u64 last_physical;		/* last physical address scrubbed. In
+					 * case a scrub was aborted, this can
+					 * be used to restart the scrub */
+	__u64 unverified_errors;	/* # of occurences where a read for a
+					 * full (64k) bio failed, but the re-
+					 * check succeeded for each 4k piece.
+					 * Intermittent error. */
+};
+
+#define BTRFS_SCRUB_READONLY	1
+struct btrfs_ioctl_scrub_args {
+	__u64 devid;				/* in */
+	__u64 start;				/* in */
+	__u64 end;				/* in */
+	__u64 flags;				/* in */
+	struct btrfs_scrub_progress progress;	/* out */
+	/* pad to 1k */
+	__u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS	0
+#define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID	1
+struct btrfs_ioctl_dev_replace_start_params {
+	__u64 srcdevid;	/* in, if 0, use srcdev_name instead */
+	__u64 cont_reading_from_srcdev_mode;	/* in, see #define
+						 * above */
+	__u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1];	/* in */
+	__u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1];	/* in */
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED	0
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED		1
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED		2
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED		3
+#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED		4
+struct btrfs_ioctl_dev_replace_status_params {
+	__u64 replace_state;	/* out, see #define above */
+	__u64 progress_1000;	/* out, 0 <= x <= 1000 */
+	__u64 time_started;	/* out, seconds since 1-Jan-1970 */
+	__u64 time_stopped;	/* out, seconds since 1-Jan-1970 */
+	__u64 num_write_errors;	/* out */
+	__u64 num_uncorrectable_read_errors;	/* out */
+};
+
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_START			0
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS			1
+#define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL			2
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR			0
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED		1
+#define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED		2
+struct btrfs_ioctl_dev_replace_args {
+	__u64 cmd;	/* in */
+	__u64 result;	/* out */
+
+	union {
+		struct btrfs_ioctl_dev_replace_start_params start;
+		struct btrfs_ioctl_dev_replace_status_params status;
+	};	/* in/out */
+
+	__u64 spare[64];
+};
+
+struct btrfs_ioctl_dev_info_args {
+	__u64 devid;				/* in/out */
+	__u8 uuid[BTRFS_UUID_SIZE];		/* in/out */
+	__u64 bytes_used;			/* out */
+	__u64 total_bytes;			/* out */
+	__u64 unused[379];			/* pad to 4k */
+	__u8 path[BTRFS_DEVICE_PATH_NAME_MAX];	/* out */
+};
+
+struct btrfs_ioctl_fs_info_args {
+	__u64 max_id;				/* out */
+	__u64 num_devices;			/* out */
+	__u8 fsid[BTRFS_FSID_SIZE];		/* out */
+	__u64 reserved[124];			/* pad to 1k */
+};
+
+/* balance control ioctl modes */
+#define BTRFS_BALANCE_CTL_PAUSE		1
+#define BTRFS_BALANCE_CTL_CANCEL	2
+
+/*
+ * this is packed, because it should be exactly the same as its disk
+ * byte order counterpart (struct btrfs_disk_balance_args)
+ */
+struct btrfs_balance_args {
+	__u64 profiles;
+	__u64 usage;
+	__u64 devid;
+	__u64 pstart;
+	__u64 pend;
+	__u64 vstart;
+	__u64 vend;
+
+	__u64 target;
+
+	__u64 flags;
+
+	__u64 unused[8];
+} __attribute__ ((__packed__));
+
+/* report balance progress to userspace */
+struct btrfs_balance_progress {
+	__u64 expected;		/* estimated # of chunks that will be
+				 * relocated to fulfill the request */
+	__u64 considered;	/* # of chunks we have considered so far */
+	__u64 completed;	/* # of chunks relocated so far */
+};
+
+#define BTRFS_BALANCE_STATE_RUNNING	(1ULL << 0)
+#define BTRFS_BALANCE_STATE_PAUSE_REQ	(1ULL << 1)
+#define BTRFS_BALANCE_STATE_CANCEL_REQ	(1ULL << 2)
+
+struct btrfs_ioctl_balance_args {
+	__u64 flags;				/* in/out */
+	__u64 state;				/* out */
+
+	struct btrfs_balance_args data;		/* in/out */
+	struct btrfs_balance_args meta;		/* in/out */
+	struct btrfs_balance_args sys;		/* in/out */
+
+	struct btrfs_balance_progress stat;	/* out */
+
+	__u64 unused[72];			/* pad to 1k */
+};
+
+#define BTRFS_INO_LOOKUP_PATH_MAX 4080
+struct btrfs_ioctl_ino_lookup_args {
+	__u64 treeid;
+	__u64 objectid;
+	char name[BTRFS_INO_LOOKUP_PATH_MAX];
+};
+
+struct btrfs_ioctl_search_key {
+	/* which root are we searching.  0 is the tree of tree roots */
+	__u64 tree_id;
+
+	/* keys returned will be >= min and <= max */
+	__u64 min_objectid;
+	__u64 max_objectid;
+
+	/* keys returned will be >= min and <= max */
+	__u64 min_offset;
+	__u64 max_offset;
+
+	/* max and min transids to search for */
+	__u64 min_transid;
+	__u64 max_transid;
+
+	/* keys returned will be >= min and <= max */
+	__u32 min_type;
+	__u32 max_type;
+
+	/*
+	 * how many items did userland ask for, and how many are we
+	 * returning
+	 */
+	__u32 nr_items;
+
+	/* align to 64 bits */
+	__u32 unused;
+
+	/* some extra for later */
+	__u64 unused1;
+	__u64 unused2;
+	__u64 unused3;
+	__u64 unused4;
+};
+
+struct btrfs_ioctl_search_header {
+	__u64 transid;
+	__u64 objectid;
+	__u64 offset;
+	__u32 type;
+	__u32 len;
+};
+
+#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
+/*
+ * the buf is an array of search headers where
+ * each header is followed by the actual item
+ * the type field is expanded to 32 bits for alignment
+ */
+struct btrfs_ioctl_search_args {
+	struct btrfs_ioctl_search_key key;
+	char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
+};
+
+struct btrfs_ioctl_clone_range_args {
+  __s64 src_fd;
+  __u64 src_offset, src_length;
+  __u64 dest_offset;
+};
+
+/* flags for the defrag range ioctl */
+#define BTRFS_DEFRAG_RANGE_COMPRESS 1
+#define BTRFS_DEFRAG_RANGE_START_IO 2
+
+struct btrfs_ioctl_space_info {
+	__u64 flags;
+	__u64 total_bytes;
+	__u64 used_bytes;
+};
+
+struct btrfs_ioctl_space_args {
+	__u64 space_slots;
+	__u64 total_spaces;
+	struct btrfs_ioctl_space_info spaces[0];
+};
+
+struct btrfs_data_container {
+	__u32	bytes_left;	/* out -- bytes not needed to deliver output */
+	__u32	bytes_missing;	/* out -- additional bytes needed for result */
+	__u32	elem_cnt;	/* out */
+	__u32	elem_missed;	/* out */
+	__u64	val[0];		/* out */
+};
+
+struct btrfs_ioctl_ino_path_args {
+	__u64				inum;		/* in */
+	__u64				size;		/* in */
+	__u64				reserved[4];
+	/* struct btrfs_data_container	*fspath;	   out */
+	__u64				fspath;		/* out */
+};
+
+struct btrfs_ioctl_logical_ino_args {
+	__u64				logical;	/* in */
+	__u64				size;		/* in */
+	__u64				reserved[4];
+	/* struct btrfs_data_container	*inodes;	out   */
+	__u64				inodes;
+};
+
+enum btrfs_dev_stat_values {
+	/* disk I/O failure stats */
+	BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
+	BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
+	BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
+
+	/* stats for indirect indications for I/O failures */
+	BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
+					 * contents is illegal: this is an
+					 * indication that the block was damaged
+					 * during read or write, or written to
+					 * wrong location or read from wrong
+					 * location */
+	BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
+					 * been written */
+
+	BTRFS_DEV_STAT_VALUES_MAX
+};
+
+/* Reset statistics after reading; needs SYS_ADMIN capability */
+#define	BTRFS_DEV_STATS_RESET		(1ULL << 0)
+
+struct btrfs_ioctl_get_dev_stats {
+	__u64 devid;				/* in */
+	__u64 nr_items;				/* in/out */
+	__u64 flags;				/* in/out */
+
+	/* out values: */
+	__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
+
+	__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
+};
+
+#define BTRFS_QUOTA_CTL_ENABLE	1
+#define BTRFS_QUOTA_CTL_DISABLE	2
+#define BTRFS_QUOTA_CTL_RESCAN	3
+struct btrfs_ioctl_quota_ctl_args {
+	__u64 cmd;
+	__u64 status;
+};
+
+struct btrfs_ioctl_qgroup_assign_args {
+	__u64 assign;
+	__u64 src;
+	__u64 dst;
+};
+
+struct btrfs_ioctl_qgroup_create_args {
+	__u64 create;
+	__u64 qgroupid;
+};
+struct btrfs_ioctl_timespec {
+	__u64 sec;
+	__u32 nsec;
+};
+
+struct btrfs_ioctl_received_subvol_args {
+	char	uuid[BTRFS_UUID_SIZE];	/* in */
+	__u64	stransid;		/* in */
+	__u64	rtransid;		/* out */
+	struct btrfs_ioctl_timespec stime; /* in */
+	struct btrfs_ioctl_timespec rtime; /* out */
+	__u64	flags;			/* in */
+	__u64	reserved[16];		/* in */
+};
+
+struct btrfs_ioctl_send_args {
+	__s64 send_fd;			/* in */
+	__u64 clone_sources_count;	/* in */
+	__u64 __user *clone_sources;	/* in */
+	__u64 parent_root;		/* in */
+	__u64 flags;			/* in */
+	__u64 reserved[4];		/* in */
+};
+
+#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
+				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
+				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
+				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
+				   struct btrfs_ioctl_vol_args)
+/* trans start and trans end are dangerous, and only for
+ * use by applications that know how to avoid the
+ * resulting deadlocks
+ */
+#define BTRFS_IOC_TRANS_START  _IO(BTRFS_IOCTL_MAGIC, 6)
+#define BTRFS_IOC_TRANS_END    _IO(BTRFS_IOCTL_MAGIC, 7)
+#define BTRFS_IOC_SYNC         _IO(BTRFS_IOCTL_MAGIC, 8)
+
+#define BTRFS_IOC_CLONE        _IOW(BTRFS_IOCTL_MAGIC, 9, int)
+#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
+				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
+				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
+				   struct btrfs_ioctl_vol_args)
+
+#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
+				  struct btrfs_ioctl_clone_range_args)
+
+#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
+				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+				struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
+				struct btrfs_ioctl_defrag_range_args)
+#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+				   struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
+				   struct btrfs_ioctl_ino_lookup_args)
+#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
+#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
+				    struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+				   struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
+				   struct btrfs_ioctl_vol_args_v2)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
+#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
+			      struct btrfs_ioctl_scrub_args)
+#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
+#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
+				       struct btrfs_ioctl_scrub_args)
+#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
+				 struct btrfs_ioctl_dev_info_args)
+#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
+			       struct btrfs_ioctl_fs_info_args)
+#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
+				   struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
+#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
+					struct btrfs_ioctl_balance_args)
+#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
+					struct btrfs_ioctl_ino_path_args)
+#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
+					struct btrfs_ioctl_ino_path_args)
+#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
+				struct btrfs_ioctl_received_subvol_args)
+#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
+#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
+				     struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
+			       struct btrfs_ioctl_quota_ctl_args)
+#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
+			       struct btrfs_ioctl_qgroup_assign_args)
+#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
+			       struct btrfs_ioctl_qgroup_create_args)
+#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
+			       struct btrfs_ioctl_qgroup_limit_args)
+#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
+				      struct btrfs_ioctl_get_dev_stats)
+#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
+				    struct btrfs_ioctl_dev_replace_args)
+
+#endif /* _UAPI_LINUX_BTRFS_H */
-- 
cgit v1.2.3


From 8b975bd3f9089f8ee5d7bbfd798537b992bbc7e7 Mon Sep 17 00:00:00 2001
From: "Markus F.X.J. Oberhumer" <markus@oberhumer.com>
Date: Mon, 13 Aug 2012 17:25:44 +0200
Subject: lib/lzo: Update LZO compression to current upstream version

This commit updates the kernel LZO code to the current upsteam version
which features a significant speed improvement - benchmarking the Calgary
and Silesia test corpora typically shows a doubled performance in
both compression and decompression on modern i386/x86_64/powerpc machines.

Signed-off-by: Markus F.X.J. Oberhumer <markus@oberhumer.com>
---
 include/linux/lzo.h             |  15 +-
 lib/lzo/lzo1x_compress.c        | 335 ++++++++++++++++++++++----------------
 lib/lzo/lzo1x_decompress_safe.c | 350 +++++++++++++++++++---------------------
 lib/lzo/lzodefs.h               |  38 +++--
 4 files changed, 395 insertions(+), 343 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lzo.h b/include/linux/lzo.h
index d793497ec1ca..a0848d9377e5 100644
--- a/include/linux/lzo.h
+++ b/include/linux/lzo.h
@@ -4,28 +4,28 @@
  *  LZO Public Kernel Interface
  *  A mini subset of the LZO real-time data compression library
  *
- *  Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ *  Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  *
  *  The full LZO package can be found at:
  *  http://www.oberhumer.com/opensource/lzo/
  *
- *  Changed for kernel use by:
+ *  Changed for Linux kernel use by:
  *  Nitin Gupta <nitingupta910@gmail.com>
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
-#define LZO1X_MEM_COMPRESS	(16384 * sizeof(unsigned char *))
-#define LZO1X_1_MEM_COMPRESS	LZO1X_MEM_COMPRESS
+#define LZO1X_1_MEM_COMPRESS	(8192 * sizeof(unsigned short))
+#define LZO1X_MEM_COMPRESS	LZO1X_1_MEM_COMPRESS
 
 #define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3)
 
-/* This requires 'workmem' of size LZO1X_1_MEM_COMPRESS */
+/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
 int lzo1x_1_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem);
+		     unsigned char *dst, size_t *dst_len, void *wrkmem);
 
 /* safe decompression with overrun testing */
 int lzo1x_decompress_safe(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len);
+			  unsigned char *dst, size_t *dst_len);
 
 /*
  * Return values (< 0 = Error)
@@ -40,5 +40,6 @@ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len,
 #define LZO_E_EOF_NOT_FOUND		(-7)
 #define LZO_E_INPUT_NOT_CONSUMED	(-8)
 #define LZO_E_NOT_YET_IMPLEMENTED	(-9)
+#define LZO_E_INVALID_ARGUMENT		(-10)
 
 #endif
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index a6040990a62e..236eb21167b5 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -1,194 +1,243 @@
 /*
- *  LZO1X Compressor from MiniLZO
+ *  LZO1X Compressor from LZO
  *
- *  Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ *  Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  *
  *  The full LZO package can be found at:
  *  http://www.oberhumer.com/opensource/lzo/
  *
- *  Changed for kernel use by:
+ *  Changed for Linux kernel use by:
  *  Nitin Gupta <nitingupta910@gmail.com>
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lzo.h>
 #include <asm/unaligned.h>
+#include <linux/lzo.h>
 #include "lzodefs.h"
 
 static noinline size_t
-_lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
-		unsigned char *out, size_t *out_len, void *wrkmem)
+lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
+		    unsigned char *out, size_t *out_len,
+		    size_t ti, void *wrkmem)
 {
+	const unsigned char *ip;
+	unsigned char *op;
 	const unsigned char * const in_end = in + in_len;
-	const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5;
-	const unsigned char ** const dict = wrkmem;
-	const unsigned char *ip = in, *ii = ip;
-	const unsigned char *end, *m, *m_pos;
-	size_t m_off, m_len, dindex;
-	unsigned char *op = out;
+	const unsigned char * const ip_end = in + in_len - 20;
+	const unsigned char *ii;
+	lzo_dict_t * const dict = (lzo_dict_t *) wrkmem;
 
-	ip += 4;
+	op = out;
+	ip = in;
+	ii = ip;
+	ip += ti < 4 ? 4 - ti : 0;
 
 	for (;;) {
-		dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK;
-		m_pos = dict[dindex];
-
-		if (m_pos < in)
-			goto literal;
-
-		if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
-			goto literal;
-
-		m_off = ip - m_pos;
-		if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
-			goto try_match;
-
-		dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f);
-		m_pos = dict[dindex];
-
-		if (m_pos < in)
-			goto literal;
-
-		if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
-			goto literal;
-
-		m_off = ip - m_pos;
-		if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
-			goto try_match;
-
-		goto literal;
-
-try_match:
-		if (get_unaligned((const unsigned short *)m_pos)
-				== get_unaligned((const unsigned short *)ip)) {
-			if (likely(m_pos[2] == ip[2]))
-					goto match;
-		}
-
+		const unsigned char *m_pos;
+		size_t t, m_len, m_off;
+		u32 dv;
 literal:
-		dict[dindex] = ip;
-		++ip;
+		ip += 1 + ((ip - ii) >> 5);
+next:
 		if (unlikely(ip >= ip_end))
 			break;
-		continue;
-
-match:
-		dict[dindex] = ip;
-		if (ip != ii) {
-			size_t t = ip - ii;
+		dv = get_unaligned_le32(ip);
+		t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
+		m_pos = in + dict[t];
+		dict[t] = (lzo_dict_t) (ip - in);
+		if (unlikely(dv != get_unaligned_le32(m_pos)))
+			goto literal;
 
+		ii -= ti;
+		ti = 0;
+		t = ip - ii;
+		if (t != 0) {
 			if (t <= 3) {
 				op[-2] |= t;
-			} else if (t <= 18) {
+				COPY4(op, ii);
+				op += t;
+			} else if (t <= 16) {
 				*op++ = (t - 3);
+				COPY8(op, ii);
+				COPY8(op + 8, ii + 8);
+				op += t;
 			} else {
-				size_t tt = t - 18;
-
-				*op++ = 0;
-				while (tt > 255) {
-					tt -= 255;
+				if (t <= 18) {
+					*op++ = (t - 3);
+				} else {
+					size_t tt = t - 18;
 					*op++ = 0;
+					while (unlikely(tt > 255)) {
+						tt -= 255;
+						*op++ = 0;
+					}
+					*op++ = tt;
 				}
-				*op++ = tt;
+				do {
+					COPY8(op, ii);
+					COPY8(op + 8, ii + 8);
+					op += 16;
+					ii += 16;
+					t -= 16;
+				} while (t >= 16);
+				if (t > 0) do {
+					*op++ = *ii++;
+				} while (--t > 0);
 			}
-			do {
-				*op++ = *ii++;
-			} while (--t > 0);
 		}
 
-		ip += 3;
-		if (m_pos[3] != *ip++ || m_pos[4] != *ip++
-				|| m_pos[5] != *ip++ || m_pos[6] != *ip++
-				|| m_pos[7] != *ip++ || m_pos[8] != *ip++) {
-			--ip;
-			m_len = ip - ii;
+		m_len = 4;
+		{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
+		u64 v;
+		v = get_unaligned((const u64 *) (ip + m_len)) ^
+		    get_unaligned((const u64 *) (m_pos + m_len));
+		if (unlikely(v == 0)) {
+			do {
+				m_len += 8;
+				v = get_unaligned((const u64 *) (ip + m_len)) ^
+				    get_unaligned((const u64 *) (m_pos + m_len));
+				if (unlikely(ip + m_len >= ip_end))
+					goto m_len_done;
+			} while (v == 0);
+		}
+#  if defined(__LITTLE_ENDIAN)
+		m_len += (unsigned) __builtin_ctzll(v) / 8;
+#  elif defined(__BIG_ENDIAN)
+		m_len += (unsigned) __builtin_clzll(v) / 8;
+#  else
+#    error "missing endian definition"
+#  endif
+#elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32)
+		u32 v;
+		v = get_unaligned((const u32 *) (ip + m_len)) ^
+		    get_unaligned((const u32 *) (m_pos + m_len));
+		if (unlikely(v == 0)) {
+			do {
+				m_len += 4;
+				v = get_unaligned((const u32 *) (ip + m_len)) ^
+				    get_unaligned((const u32 *) (m_pos + m_len));
+				if (v != 0)
+					break;
+				m_len += 4;
+				v = get_unaligned((const u32 *) (ip + m_len)) ^
+				    get_unaligned((const u32 *) (m_pos + m_len));
+				if (unlikely(ip + m_len >= ip_end))
+					goto m_len_done;
+			} while (v == 0);
+		}
+#  if defined(__LITTLE_ENDIAN)
+		m_len += (unsigned) __builtin_ctz(v) / 8;
+#  elif defined(__BIG_ENDIAN)
+		m_len += (unsigned) __builtin_clz(v) / 8;
+#  else
+#    error "missing endian definition"
+#  endif
+#else
+		if (unlikely(ip[m_len] == m_pos[m_len])) {
+			do {
+				m_len += 1;
+				if (ip[m_len] != m_pos[m_len])
+					break;
+				m_len += 1;
+				if (ip[m_len] != m_pos[m_len])
+					break;
+				m_len += 1;
+				if (ip[m_len] != m_pos[m_len])
+					break;
+				m_len += 1;
+				if (ip[m_len] != m_pos[m_len])
+					break;
+				m_len += 1;
+				if (ip[m_len] != m_pos[m_len])
+					break;
+				m_len += 1;
+				if (ip[m_len] != m_pos[m_len])
+					break;
+				m_len += 1;
+				if (ip[m_len] != m_pos[m_len])
+					break;
+				m_len += 1;
+				if (unlikely(ip + m_len >= ip_end))
+					goto m_len_done;
+			} while (ip[m_len] == m_pos[m_len]);
+		}
+#endif
+		}
+m_len_done:
 
-			if (m_off <= M2_MAX_OFFSET) {
-				m_off -= 1;
-				*op++ = (((m_len - 1) << 5)
-						| ((m_off & 7) << 2));
-				*op++ = (m_off >> 3);
-			} else if (m_off <= M3_MAX_OFFSET) {
-				m_off -= 1;
+		m_off = ip - m_pos;
+		ip += m_len;
+		ii = ip;
+		if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
+			m_off -= 1;
+			*op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
+			*op++ = (m_off >> 3);
+		} else if (m_off <= M3_MAX_OFFSET) {
+			m_off -= 1;
+			if (m_len <= M3_MAX_LEN)
 				*op++ = (M3_MARKER | (m_len - 2));
-				goto m3_m4_offset;
-			} else {
-				m_off -= 0x4000;
-
-				*op++ = (M4_MARKER | ((m_off & 0x4000) >> 11)
-						| (m_len - 2));
-				goto m3_m4_offset;
+			else {
+				m_len -= M3_MAX_LEN;
+				*op++ = M3_MARKER | 0;
+				while (unlikely(m_len > 255)) {
+					m_len -= 255;
+					*op++ = 0;
+				}
+				*op++ = (m_len);
 			}
+			*op++ = (m_off << 2);
+			*op++ = (m_off >> 6);
 		} else {
-			end = in_end;
-			m = m_pos + M2_MAX_LEN + 1;
-
-			while (ip < end && *m == *ip) {
-				m++;
-				ip++;
-			}
-			m_len = ip - ii;
-
-			if (m_off <= M3_MAX_OFFSET) {
-				m_off -= 1;
-				if (m_len <= 33) {
-					*op++ = (M3_MARKER | (m_len - 2));
-				} else {
-					m_len -= 33;
-					*op++ = M3_MARKER | 0;
-					goto m3_m4_len;
-				}
-			} else {
-				m_off -= 0x4000;
-				if (m_len <= M4_MAX_LEN) {
-					*op++ = (M4_MARKER
-						| ((m_off & 0x4000) >> 11)
+			m_off -= 0x4000;
+			if (m_len <= M4_MAX_LEN)
+				*op++ = (M4_MARKER | ((m_off >> 11) & 8)
 						| (m_len - 2));
-				} else {
-					m_len -= M4_MAX_LEN;
-					*op++ = (M4_MARKER
-						| ((m_off & 0x4000) >> 11));
-m3_m4_len:
-					while (m_len > 255) {
-						m_len -= 255;
-						*op++ = 0;
-					}
-
-					*op++ = (m_len);
+			else {
+				m_len -= M4_MAX_LEN;
+				*op++ = (M4_MARKER | ((m_off >> 11) & 8));
+				while (unlikely(m_len > 255)) {
+					m_len -= 255;
+					*op++ = 0;
 				}
+				*op++ = (m_len);
 			}
-m3_m4_offset:
-			*op++ = ((m_off & 63) << 2);
+			*op++ = (m_off << 2);
 			*op++ = (m_off >> 6);
 		}
-
-		ii = ip;
-		if (unlikely(ip >= ip_end))
-			break;
+		goto next;
 	}
-
 	*out_len = op - out;
-	return in_end - ii;
+	return in_end - (ii - ti);
 }
 
-int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out,
-			size_t *out_len, void *wrkmem)
+int lzo1x_1_compress(const unsigned char *in, size_t in_len,
+		     unsigned char *out, size_t *out_len,
+		     void *wrkmem)
 {
-	const unsigned char *ii;
+	const unsigned char *ip = in;
 	unsigned char *op = out;
-	size_t t;
+	size_t l = in_len;
+	size_t t = 0;
 
-	if (unlikely(in_len <= M2_MAX_LEN + 5)) {
-		t = in_len;
-	} else {
-		t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem);
+	while (l > 20) {
+		size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
+		uintptr_t ll_end = (uintptr_t) ip + ll;
+		if ((ll_end + ((t + ll) >> 5)) <= ll_end)
+			break;
+		BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
+		memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
+		t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
+		ip += ll;
 		op += *out_len;
+		l  -= ll;
 	}
+	t += l;
 
 	if (t > 0) {
-		ii = in + in_len - t;
+		const unsigned char *ii = in + in_len - t;
 
 		if (op == out && t <= 238) {
 			*op++ = (17 + t);
@@ -198,16 +247,21 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out,
 			*op++ = (t - 3);
 		} else {
 			size_t tt = t - 18;
-
 			*op++ = 0;
 			while (tt > 255) {
 				tt -= 255;
 				*op++ = 0;
 			}
-
 			*op++ = tt;
 		}
-		do {
+		if (t >= 16) do {
+			COPY8(op, ii);
+			COPY8(op + 8, ii + 8);
+			op += 16;
+			ii += 16;
+			t -= 16;
+		} while (t >= 16);
+		if (t > 0) do {
 			*op++ = *ii++;
 		} while (--t > 0);
 	}
@@ -223,4 +277,3 @@ EXPORT_SYMBOL_GPL(lzo1x_1_compress);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X-1 Compressor");
-
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index f2fd09850223..569985d522d5 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -1,12 +1,12 @@
 /*
- *  LZO1X Decompressor from MiniLZO
+ *  LZO1X Decompressor from LZO
  *
- *  Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ *  Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  *
  *  The full LZO package can be found at:
  *  http://www.oberhumer.com/opensource/lzo/
  *
- *  Changed for kernel use by:
+ *  Changed for Linux kernel use by:
  *  Nitin Gupta <nitingupta910@gmail.com>
  *  Richard Purdie <rpurdie@openedhand.com>
  */
@@ -15,225 +15,207 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #endif
-
 #include <asm/unaligned.h>
 #include <linux/lzo.h>
 #include "lzodefs.h"
 
-#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
-#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x))
-#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op)
-
-#define COPY4(dst, src)	\
-		put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
+#define HAVE_IP(x)      ((size_t)(ip_end - ip) >= (size_t)(x))
+#define HAVE_OP(x)      ((size_t)(op_end - op) >= (size_t)(x))
+#define NEED_IP(x)      if (!HAVE_IP(x)) goto input_overrun
+#define NEED_OP(x)      if (!HAVE_OP(x)) goto output_overrun
+#define TEST_LB(m_pos)  if ((m_pos) < out) goto lookbehind_overrun
 
 int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
-			unsigned char *out, size_t *out_len)
+			  unsigned char *out, size_t *out_len)
 {
+	unsigned char *op;
+	const unsigned char *ip;
+	size_t t, next;
+	size_t state = 0;
+	const unsigned char *m_pos;
 	const unsigned char * const ip_end = in + in_len;
 	unsigned char * const op_end = out + *out_len;
-	const unsigned char *ip = in, *m_pos;
-	unsigned char *op = out;
-	size_t t;
 
-	*out_len = 0;
+	op = out;
+	ip = in;
 
+	if (unlikely(in_len < 3))
+		goto input_overrun;
 	if (*ip > 17) {
 		t = *ip++ - 17;
-		if (t < 4)
+		if (t < 4) {
+			next = t;
 			goto match_next;
-		if (HAVE_OP(t, op_end, op))
-			goto output_overrun;
-		if (HAVE_IP(t + 1, ip_end, ip))
-			goto input_overrun;
-		do {
-			*op++ = *ip++;
-		} while (--t > 0);
-		goto first_literal_run;
-	}
-
-	while ((ip < ip_end)) {
-		t = *ip++;
-		if (t >= 16)
-			goto match;
-		if (t == 0) {
-			if (HAVE_IP(1, ip_end, ip))
-				goto input_overrun;
-			while (*ip == 0) {
-				t += 255;
-				ip++;
-				if (HAVE_IP(1, ip_end, ip))
-					goto input_overrun;
-			}
-			t += 15 + *ip++;
-		}
-		if (HAVE_OP(t + 3, op_end, op))
-			goto output_overrun;
-		if (HAVE_IP(t + 4, ip_end, ip))
-			goto input_overrun;
-
-		COPY4(op, ip);
-		op += 4;
-		ip += 4;
-		if (--t > 0) {
-			if (t >= 4) {
-				do {
-					COPY4(op, ip);
-					op += 4;
-					ip += 4;
-					t -= 4;
-				} while (t >= 4);
-				if (t > 0) {
-					do {
-						*op++ = *ip++;
-					} while (--t > 0);
-				}
-			} else {
-				do {
-					*op++ = *ip++;
-				} while (--t > 0);
-			}
 		}
+		goto copy_literal_run;
+	}
 
-first_literal_run:
+	for (;;) {
 		t = *ip++;
-		if (t >= 16)
-			goto match;
-		m_pos = op - (1 + M2_MAX_OFFSET);
-		m_pos -= t >> 2;
-		m_pos -= *ip++ << 2;
-
-		if (HAVE_LB(m_pos, out, op))
-			goto lookbehind_overrun;
-
-		if (HAVE_OP(3, op_end, op))
-			goto output_overrun;
-		*op++ = *m_pos++;
-		*op++ = *m_pos++;
-		*op++ = *m_pos;
-
-		goto match_done;
-
-		do {
-match:
-			if (t >= 64) {
-				m_pos = op - 1;
-				m_pos -= (t >> 2) & 7;
-				m_pos -= *ip++ << 3;
-				t = (t >> 5) - 1;
-				if (HAVE_LB(m_pos, out, op))
-					goto lookbehind_overrun;
-				if (HAVE_OP(t + 3 - 1, op_end, op))
-					goto output_overrun;
-				goto copy_match;
-			} else if (t >= 32) {
-				t &= 31;
-				if (t == 0) {
-					if (HAVE_IP(1, ip_end, ip))
-						goto input_overrun;
-					while (*ip == 0) {
+		if (t < 16) {
+			if (likely(state == 0)) {
+				if (unlikely(t == 0)) {
+					while (unlikely(*ip == 0)) {
 						t += 255;
 						ip++;
-						if (HAVE_IP(1, ip_end, ip))
-							goto input_overrun;
+						NEED_IP(1);
 					}
-					t += 31 + *ip++;
+					t += 15 + *ip++;
 				}
-				m_pos = op - 1;
-				m_pos -= get_unaligned_le16(ip) >> 2;
-				ip += 2;
-			} else if (t >= 16) {
-				m_pos = op;
-				m_pos -= (t & 8) << 11;
-
-				t &= 7;
-				if (t == 0) {
-					if (HAVE_IP(1, ip_end, ip))
-						goto input_overrun;
-					while (*ip == 0) {
-						t += 255;
-						ip++;
-						if (HAVE_IP(1, ip_end, ip))
-							goto input_overrun;
-					}
-					t += 7 + *ip++;
+				t += 3;
+copy_literal_run:
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+				if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) {
+					const unsigned char *ie = ip + t;
+					unsigned char *oe = op + t;
+					do {
+						COPY8(op, ip);
+						op += 8;
+						ip += 8;
+						COPY8(op, ip);
+						op += 8;
+						ip += 8;
+					} while (ip < ie);
+					ip = ie;
+					op = oe;
+				} else
+#endif
+				{
+					NEED_OP(t);
+					NEED_IP(t + 3);
+					do {
+						*op++ = *ip++;
+					} while (--t > 0);
 				}
-				m_pos -= get_unaligned_le16(ip) >> 2;
-				ip += 2;
-				if (m_pos == op)
-					goto eof_found;
-				m_pos -= 0x4000;
-			} else {
+				state = 4;
+				continue;
+			} else if (state != 4) {
+				next = t & 3;
 				m_pos = op - 1;
 				m_pos -= t >> 2;
 				m_pos -= *ip++ << 2;
-
-				if (HAVE_LB(m_pos, out, op))
-					goto lookbehind_overrun;
-				if (HAVE_OP(2, op_end, op))
-					goto output_overrun;
-
-				*op++ = *m_pos++;
-				*op++ = *m_pos;
-				goto match_done;
+				TEST_LB(m_pos);
+				NEED_OP(2);
+				op[0] = m_pos[0];
+				op[1] = m_pos[1];
+				op += 2;
+				goto match_next;
+			} else {
+				next = t & 3;
+				m_pos = op - (1 + M2_MAX_OFFSET);
+				m_pos -= t >> 2;
+				m_pos -= *ip++ << 2;
+				t = 3;
 			}
-
-			if (HAVE_LB(m_pos, out, op))
-				goto lookbehind_overrun;
-			if (HAVE_OP(t + 3 - 1, op_end, op))
-				goto output_overrun;
-
-			if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
-				COPY4(op, m_pos);
-				op += 4;
-				m_pos += 4;
-				t -= 4 - (3 - 1);
+		} else if (t >= 64) {
+			next = t & 3;
+			m_pos = op - 1;
+			m_pos -= (t >> 2) & 7;
+			m_pos -= *ip++ << 3;
+			t = (t >> 5) - 1 + (3 - 1);
+		} else if (t >= 32) {
+			t = (t & 31) + (3 - 1);
+			if (unlikely(t == 2)) {
+				while (unlikely(*ip == 0)) {
+					t += 255;
+					ip++;
+					NEED_IP(1);
+				}
+				t += 31 + *ip++;
+				NEED_IP(2);
+			}
+			m_pos = op - 1;
+			next = get_unaligned_le16(ip);
+			ip += 2;
+			m_pos -= next >> 2;
+			next &= 3;
+		} else {
+			m_pos = op;
+			m_pos -= (t & 8) << 11;
+			t = (t & 7) + (3 - 1);
+			if (unlikely(t == 2)) {
+				while (unlikely(*ip == 0)) {
+					t += 255;
+					ip++;
+					NEED_IP(1);
+				}
+				t += 7 + *ip++;
+				NEED_IP(2);
+			}
+			next = get_unaligned_le16(ip);
+			ip += 2;
+			m_pos -= next >> 2;
+			next &= 3;
+			if (m_pos == op)
+				goto eof_found;
+			m_pos -= 0x4000;
+		}
+		TEST_LB(m_pos);
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+		if (op - m_pos >= 8) {
+			unsigned char *oe = op + t;
+			if (likely(HAVE_OP(t + 15))) {
 				do {
-					COPY4(op, m_pos);
-					op += 4;
-					m_pos += 4;
-					t -= 4;
-				} while (t >= 4);
-				if (t > 0)
-					do {
-						*op++ = *m_pos++;
-					} while (--t > 0);
+					COPY8(op, m_pos);
+					op += 8;
+					m_pos += 8;
+					COPY8(op, m_pos);
+					op += 8;
+					m_pos += 8;
+				} while (op < oe);
+				op = oe;
+				if (HAVE_IP(6)) {
+					state = next;
+					COPY4(op, ip);
+					op += next;
+					ip += next;
+					continue;
+				}
 			} else {
-copy_match:
-				*op++ = *m_pos++;
-				*op++ = *m_pos++;
+				NEED_OP(t);
 				do {
 					*op++ = *m_pos++;
-				} while (--t > 0);
+				} while (op < oe);
 			}
-match_done:
-			t = ip[-2] & 3;
-			if (t == 0)
-				break;
+		} else
+#endif
+		{
+			unsigned char *oe = op + t;
+			NEED_OP(t);
+			op[0] = m_pos[0];
+			op[1] = m_pos[1];
+			op += 2;
+			m_pos += 2;
+			do {
+				*op++ = *m_pos++;
+			} while (op < oe);
+		}
 match_next:
-			if (HAVE_OP(t, op_end, op))
-				goto output_overrun;
-			if (HAVE_IP(t + 1, ip_end, ip))
-				goto input_overrun;
-
-			*op++ = *ip++;
-			if (t > 1) {
+		state = next;
+		t = next;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+		if (likely(HAVE_IP(6) && HAVE_OP(4))) {
+			COPY4(op, ip);
+			op += t;
+			ip += t;
+		} else
+#endif
+		{
+			NEED_IP(t + 3);
+			NEED_OP(t);
+			while (t > 0) {
 				*op++ = *ip++;
-				if (t > 2)
-					*op++ = *ip++;
+				t--;
 			}
-
-			t = *ip++;
-		} while (ip < ip_end);
+		}
 	}
 
-	*out_len = op - out;
-	return LZO_E_EOF_NOT_FOUND;
-
 eof_found:
 	*out_len = op - out;
-	return (ip == ip_end ? LZO_E_OK :
-		(ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
+	return (t != 3       ? LZO_E_ERROR :
+		ip == ip_end ? LZO_E_OK :
+		ip <  ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN);
+
 input_overrun:
 	*out_len = op - out;
 	return LZO_E_INPUT_OVERRUN;
diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h
index b6d482c492ef..6710b83ce72e 100644
--- a/lib/lzo/lzodefs.h
+++ b/lib/lzo/lzodefs.h
@@ -1,19 +1,37 @@
 /*
  *  lzodefs.h -- architecture, OS and compiler specific defines
  *
- *  Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ *  Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  *
  *  The full LZO package can be found at:
  *  http://www.oberhumer.com/opensource/lzo/
  *
- *  Changed for kernel use by:
+ *  Changed for Linux kernel use by:
  *  Nitin Gupta <nitingupta910@gmail.com>
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
-#define LZO_VERSION		0x2020
-#define LZO_VERSION_STRING	"2.02"
-#define LZO_VERSION_DATE	"Oct 17 2005"
+
+#define COPY4(dst, src)	\
+		put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
+#if defined(__x86_64__)
+#define COPY8(dst, src)	\
+		put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst))
+#else
+#define COPY8(dst, src)	\
+		COPY4(dst, src); COPY4((dst) + 4, (src) + 4)
+#endif
+
+#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
+#error "conflicting endian definitions"
+#elif defined(__x86_64__)
+#define LZO_USE_CTZ64	1
+#define LZO_USE_CTZ32	1
+#elif defined(__i386__) || defined(__powerpc__)
+#define LZO_USE_CTZ32	1
+#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5)
+#define LZO_USE_CTZ32	1
+#endif
 
 #define M1_MAX_OFFSET	0x0400
 #define M2_MAX_OFFSET	0x0800
@@ -34,10 +52,8 @@
 #define M3_MARKER	32
 #define M4_MARKER	16
 
-#define D_BITS		14
-#define D_MASK		((1u << D_BITS) - 1)
+#define lzo_dict_t      unsigned short
+#define D_BITS		13
+#define D_SIZE		(1u << D_BITS)
+#define D_MASK		(D_SIZE - 1)
 #define D_HIGH		((D_MASK >> 1) + 1)
-
-#define DX2(p, s1, s2)	(((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \
-							<< (s1)) ^ (p)[0])
-#define DX3(p, s1, s2, s3)	((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0])
-- 
cgit v1.2.3


From 52608ba20546139dc76cca8a46c1d901455d5450 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund@ericsson.com>
Date: Wed, 8 Aug 2012 12:30:56 -0300
Subject: i5100_edac: probe for device 19 function 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Probe and store the device handle for the device 19 function 0 during
driver initialization. The device is used during fault injection.

Signed-off-by: Niklas Söderlund <niklas.soderlund@ericsson.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/i5100_edac.c | 28 +++++++++++++++++++++++++++-
 include/linux/pci_ids.h   |  1 +
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index d6955b2cc99f..33c5c8e663f2 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -338,6 +338,7 @@ struct i5100_priv {
 	unsigned ranksperchan;	/* number of ranks per channel */
 
 	struct pci_dev *mc;	/* device 16 func 1 */
+	struct pci_dev *einj;	/* device 19 func 0 */
 	struct pci_dev *ch0mm;	/* device 21 func 0 */
 	struct pci_dev *ch1mm;	/* device 22 func 0 */
 
@@ -869,7 +870,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	struct mem_ctl_info *mci;
 	struct edac_mc_layer layers[2];
 	struct i5100_priv *priv;
-	struct pci_dev *ch0mm, *ch1mm;
+	struct pci_dev *ch0mm, *ch1mm, *einj;
 	int ret = 0;
 	u32 dw;
 	int ranksperch;
@@ -941,6 +942,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto bail_disable_ch1;
 	}
 
+
+	/* device 19, func 0, Error injection */
+	einj = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_19, 0);
+	if (!einj) {
+		ret = -ENODEV;
+		goto bail_einj;
+	}
+
+	rc = pci_enable_device(einj);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_disable_einj;
+	}
+
+
 	mci->pdev = &pdev->dev;
 
 	priv = mci->pvt_info;
@@ -948,6 +965,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	priv->mc = pdev;
 	priv->ch0mm = ch0mm;
 	priv->ch1mm = ch1mm;
+	priv->einj = einj;
 
 	INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing);
 
@@ -999,6 +1017,12 @@ bail_scrub:
 	cancel_delayed_work_sync(&(priv->i5100_scrubbing));
 	edac_mc_free(mci);
 
+bail_disable_einj:
+	pci_disable_device(einj);
+
+bail_einj:
+	pci_dev_put(einj);
+
 bail_disable_ch1:
 	pci_disable_device(ch1mm);
 
@@ -1036,8 +1060,10 @@ static void i5100_remove_one(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 	pci_disable_device(priv->ch0mm);
 	pci_disable_device(priv->ch1mm);
+	pci_disable_device(priv->einj);
 	pci_dev_put(priv->ch0mm);
 	pci_dev_put(priv->ch1mm);
+	pci_dev_put(priv->einj);
 
 	edac_mc_free(mci);
 }
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0eb65796bcb9..d0d1e801e350 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2776,6 +2776,7 @@
 #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX	0x3ce0
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_19	0x65f3
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
 #define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
 #define PCI_DEVICE_ID_INTEL_5400_ERR	0x4030
-- 
cgit v1.2.3


From c66b5a79a9348ccd6d1cd81416027d0e12da965d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 15 Feb 2013 07:21:08 -0300
Subject: edac: add a new memory layer type

There are some cases where the memory controller layout is
completely hidden. This is the case of firmware-driven error
code, like the one provided by GHES. Add a new layer to be
used on such memory error report mechanisms.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 1 +
 include/linux/edac.h   | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index fb219bc5cb2c..78d8c7d6e76a 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -900,6 +900,7 @@ const char *edac_layer_name[] = {
 	[EDAC_MC_LAYER_CHANNEL] = "channel",
 	[EDAC_MC_LAYER_SLOT] = "slot",
 	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
+	[EDAC_MC_LAYER_ALL_MEM] = "memory",
 };
 EXPORT_SYMBOL_GPL(edac_layer_name);
 
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 4784213c819d..1b7744c219b8 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -375,6 +375,9 @@ enum scrub_type {
  * @EDAC_MC_LAYER_CHANNEL:	memory layer is named "channel"
  * @EDAC_MC_LAYER_SLOT:		memory layer is named "slot"
  * @EDAC_MC_LAYER_CHIP_SELECT:	memory layer is named "chip select"
+ * @EDAC_MC_LAYER_ALL_MEM:	memory layout is unknown. All memory is mapped
+ *				as a single memory area. This is used when
+ *				retrieving errors from a firmware driven driver.
  *
  * This enum is used by the drivers to tell edac_mc_sysfs what name should
  * be used when describing a memory stick location.
@@ -384,6 +387,7 @@ enum edac_mc_layer_type {
 	EDAC_MC_LAYER_CHANNEL,
 	EDAC_MC_LAYER_SLOT,
 	EDAC_MC_LAYER_CHIP_SELECT,
+	EDAC_MC_LAYER_ALL_MEM,
 };
 
 /**
-- 
cgit v1.2.3


From c2c93dbc97622e26dc19edc71e50ebaa996d7804 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 19 Feb 2013 06:50:05 -0300
Subject: edac: remove proc_name from mci structure

proc_name isn't used anywhere. Remove it.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/edac.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 1b7744c219b8..ff18efc754f3 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -48,7 +48,6 @@ static inline void opstate_init(void)
 }
 
 #define EDAC_MC_LABEL_LEN	31
-#define MC_PROC_NAME_MAX_LEN	7
 
 /**
  * enum dev_type - describe the type of memory DRAM chips used at the stick
@@ -633,7 +632,6 @@ struct mem_ctl_info {
 	const char *mod_ver;
 	const char *ctl_name;
 	const char *dev_name;
-	char proc_name[MC_PROC_NAME_MAX_LEN + 1];
 	void *pvt_info;
 	unsigned long start_time;	/* mci load start time (in jiffies) */
 
-- 
cgit v1.2.3


From c7ef7645544131b0750478d1cf94cdfa945c809d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 21 Feb 2013 13:36:45 -0300
Subject: edac: reduce stack pressure by using a pre-allocated buffer

The number of variables at the stack is too big.
Reduces the stack usage by using a pre-allocated error
buffer.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 81 ++++++++++++++++++++++++++++++--------------------
 include/linux/edac.h   | 56 ++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 34eb9703ed33..4f18dd755939 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1065,7 +1065,6 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
 }
 
-#define OTHER_LABEL " or "
 
 /**
  * edac_mc_handle_error - reports a memory event to userspace
@@ -1097,19 +1096,28 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  const char *msg,
 			  const char *other_detail)
 {
-	/* FIXME: too much for stack: move it to some pre-alocated area */
-	char detail[80], location[80];
-	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
+	char detail[80];
 	char *p;
 	int row = -1, chan = -1;
 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
-	int i;
-	long grain;
-	bool enable_per_layer_report = false;
+	int i, n_labels = 0;
 	u8 grain_bits;
+	struct edac_raw_error_desc *e = &mci->error_desc;
 
 	edac_dbg(3, "MC%d\n", mci->mc_idx);
 
+	/* Fills the error report buffer */
+	memset(e, 0, sizeof (*e));
+	e->error_count = error_count;
+	e->top_layer = top_layer;
+	e->mid_layer = mid_layer;
+	e->low_layer = low_layer;
+	e->page_frame_number = page_frame_number;
+	e->offset_in_page = offset_in_page;
+	e->syndrome = syndrome;
+	e->msg = msg;
+	e->other_detail = other_detail;
+
 	/*
 	 * Check if the event report is consistent and if the memory
 	 * location is known. If it is known, enable_per_layer_report will be
@@ -1132,7 +1140,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			pos[i] = -1;
 		}
 		if (pos[i] >= 0)
-			enable_per_layer_report = true;
+			e->enable_per_layer_report = true;
 	}
 
 	/*
@@ -1146,8 +1154,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	 * where each memory belongs to a separate channel within the same
 	 * branch.
 	 */
-	grain = 0;
-	p = label;
+	p = e->label;
 	*p = '\0';
 
 	for (i = 0; i < mci->tot_dimms; i++) {
@@ -1161,8 +1168,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			continue;
 
 		/* get the max grain, over the error match range */
-		if (dimm->grain > grain)
-			grain = dimm->grain;
+		if (dimm->grain > e->grain)
+			e->grain = dimm->grain;
 
 		/*
 		 * If the error is memory-controller wide, there's no need to
@@ -1170,8 +1177,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 		 * channel/memory controller/...  may be affected.
 		 * Also, don't show errors for empty DIMM slots.
 		 */
-		if (enable_per_layer_report && dimm->nr_pages) {
-			if (p != label) {
+		if (e->enable_per_layer_report && dimm->nr_pages) {
+			if (n_labels >= EDAC_MAX_LABELS) {
+				e->enable_per_layer_report = false;
+				break;
+			}
+			n_labels++;
+			if (p != e->label) {
 				strcpy(p, OTHER_LABEL);
 				p += strlen(OTHER_LABEL);
 			}
@@ -1198,12 +1210,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 		}
 	}
 
-	if (!enable_per_layer_report) {
-		strcpy(label, "any memory");
+	if (!e->enable_per_layer_report) {
+		strcpy(e->label, "any memory");
 	} else {
 		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
-		if (p == label)
-			strcpy(label, "unknown memory");
+		if (p == e->label)
+			strcpy(e->label, "unknown memory");
 		if (type == HW_EVENT_ERR_CORRECTED) {
 			if (row >= 0) {
 				mci->csrows[row]->ce_count += error_count;
@@ -1216,7 +1228,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	}
 
 	/* Fill the RAM location data */
-	p = location;
+	p = e->location;
 
 	for (i = 0; i < mci->n_layers; i++) {
 		if (pos[i] < 0)
@@ -1226,32 +1238,35 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			     edac_layer_name[mci->layers[i].type],
 			     pos[i]);
 	}
-	if (p > location)
+	if (p > e->location)
 		*(p - 1) = '\0';
 
 	/* Report the error via the trace interface */
-	grain_bits = fls_long(grain) + 1;
-	trace_mc_event(type, msg, label, error_count,
-		       mci->mc_idx, top_layer, mid_layer, low_layer,
-		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
-		       grain_bits, syndrome, other_detail);
+	grain_bits = fls_long(e->grain) + 1;
+	trace_mc_event(type, e->msg, e->label, e->error_count,
+		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+		       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+		       grain_bits, e->syndrome, other_detail);
 
 	/* Memory type dependent details about the error */
 	if (type == HW_EVENT_ERR_CORRECTED) {
 		snprintf(detail, sizeof(detail),
 			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
-			page_frame_number, offset_in_page,
-			grain, syndrome);
-		edac_ce_error(mci, error_count, pos, msg, location, label,
-			      detail, other_detail, enable_per_layer_report,
-			      page_frame_number, offset_in_page, grain);
+			e->page_frame_number, e->offset_in_page,
+			e->grain, e->syndrome);
+		edac_ce_error(mci, e->error_count, pos, e->msg, e->location,
+			      e->label, detail, other_detail,
+			      e->enable_per_layer_report,
+			      e->page_frame_number, e->offset_in_page,
+			      e->grain);
 	} else {
 		snprintf(detail, sizeof(detail),
 			"page:0x%lx offset:0x%lx grain:%ld",
-			page_frame_number, offset_in_page, grain);
+			page_frame_number, offset_in_page, e->grain);
 
-		edac_ue_error(mci, error_count, pos, msg, location, label,
-			      detail, other_detail, enable_per_layer_report);
+		edac_ue_error(mci, e->error_count, pos, e->msg, e->location,
+			      e->label, detail, other_detail,
+			      e->enable_per_layer_report);
 	}
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index ff18efc754f3..096b7fcdf484 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -47,8 +47,18 @@ static inline void opstate_init(void)
 	return;
 }
 
+/* Max length of a DIMM label*/
 #define EDAC_MC_LABEL_LEN	31
 
+/* Maximum size of the location string */
+#define LOCATION_SIZE 80
+
+/* Defines the maximum number of labels that can be reported */
+#define EDAC_MAX_LABELS		8
+
+/* String used to join two or more labels */
+#define OTHER_LABEL " or "
+
 /**
  * enum dev_type - describe the type of memory DRAM chips used at the stick
  * @DEV_UNKNOWN:	Can't be determined, or MC doesn't support detect it
@@ -553,6 +563,46 @@ struct errcount_attribute_data {
 	int layer0, layer1, layer2;
 };
 
+/**
+ * edac_raw_error_desc - Raw error report structure
+ * @grain:			minimum granularity for an error report, in bytes
+ * @error_count:		number of errors of the same type
+ * @top_layer:			top layer of the error (layer[0])
+ * @mid_layer:			middle layer of the error (layer[1])
+ * @low_layer:			low layer of the error (layer[2])
+ * @page_frame_number:		page where the error happened
+ * @offset_in_page:		page offset
+ * @syndrome:			syndrome of the error (or 0 if unknown or if
+ * 				the syndrome is not applicable)
+ * @msg:			error message
+ * @location:			location of the error
+ * @label:			label of the affected DIMM(s)
+ * @other_detail:		other driver-specific detail about the error
+ * @enable_per_layer_report:	if false, the error affects all layers
+ *				(typically, a memory controller error)
+ */
+struct edac_raw_error_desc {
+	/*
+	 * NOTE: everything before grain won't be cleaned by
+	 * edac_raw_error_desc_clean()
+	 */
+	char location[LOCATION_SIZE];
+	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS];
+	long grain;
+
+	/* the vars below and grain will be cleaned on every new error report */
+	u16 error_count;
+	int top_layer;
+	int mid_layer;
+	int low_layer;
+	unsigned long page_frame_number;
+	unsigned long offset_in_page;
+	unsigned long syndrome;
+	const char *msg;
+	const char *other_detail;
+	bool enable_per_layer_report;
+};
+
 /* MEMORY controller information structure
  */
 struct mem_ctl_info {
@@ -660,6 +710,12 @@ struct mem_ctl_info {
 	/* work struct for this MC */
 	struct delayed_work work;
 
+	/*
+	 * Used to report an error - by being at the global struct
+	 * makes the memory allocated by the EDAC core
+	 */
+	struct edac_raw_error_desc error_desc;
+
 	/* the internal state of this controller instance */
 	int op_state;
 
-- 
cgit v1.2.3


From 8dd93d450bff251575c56b8f058393124e1f00fb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 19 Feb 2013 21:26:22 -0300
Subject: edac: add support for error type "Info"

The CPER spec defines a forth type of error: informational
logs. Add support for it at the edac API and at the
trace event interface.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/edac.h    | 16 ++++++++++++++++
 include/ras/ras_event.h |  4 +---
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 096b7fcdf484..4fd4999ccb5b 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -109,8 +109,24 @@ enum hw_event_mc_err_type {
 	HW_EVENT_ERR_CORRECTED,
 	HW_EVENT_ERR_UNCORRECTED,
 	HW_EVENT_ERR_FATAL,
+	HW_EVENT_ERR_INFO,
 };
 
+static inline char *mc_event_error_type(const unsigned int err_type)
+{
+	switch (err_type) {
+	case HW_EVENT_ERR_CORRECTED:
+		return "Corrected";
+	case HW_EVENT_ERR_UNCORRECTED:
+		return "Uncorrected";
+	case HW_EVENT_ERR_FATAL:
+		return "Fatal";
+	default:
+	case HW_EVENT_ERR_INFO:
+		return "Info";
+	}
+}
+
 /**
  * enum mem_type - memory types. For a more detailed reference, please see
  *			http://en.wikipedia.org/wiki/DRAM
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 260470e72483..21cdb0b7b0fb 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -78,9 +78,7 @@ TRACE_EVENT(mc_event,
 
 	TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)",
 		  __entry->error_count,
-		  (__entry->error_type == HW_EVENT_ERR_CORRECTED) ? "Corrected" :
-			((__entry->error_type == HW_EVENT_ERR_FATAL) ?
-			"Fatal" : "Uncorrected"),
+		  mc_event_error_type(__entry->error_type),
 		  __entry->error_count > 1 ? "s" : "",
 		  ((char *)__get_str(msg))[0] ? " " : "",
 		  __get_str(msg),
-- 
cgit v1.2.3


From aee38fadd25989c3e6d99fc08752e2d87601ffc1 Mon Sep 17 00:00:00 2001
From: Shani Michaeli <shanim@mellanox.com>
Date: Wed, 6 Feb 2013 16:19:07 +0000
Subject: IB/mlx4_ib: Remove local invalidate segment unused fields

Remove unused fields from the local invalidate WQE segment structure.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/qp.c | 6 ++----
 include/linux/mlx4/qp.h         | 8 +++-----
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 19e0637220b9..c6dde71b4642 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1983,10 +1983,8 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
 
 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
 {
-	iseg->flags	= 0;
-	iseg->mem_key	= cpu_to_be32(rkey);
-	iseg->guest_id	= 0;
-	iseg->pa	= 0;
+	memset(iseg, 0, sizeof(*iseg));
+	iseg->mem_key = cpu_to_be32(rkey);
 }
 
 static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 4b4ad6ffef92..6c8a68c602be 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -304,12 +304,10 @@ struct mlx4_wqe_fmr_ext_seg {
 };
 
 struct mlx4_wqe_local_inval_seg {
-	__be32			flags;
-	u32			reserved1;
+	u64			reserved1;
 	__be32			mem_key;
-	u32			reserved2[2];
-	__be32			guest_id;
-	__be64			pa;
+	u32			reserved2;
+	u64			reserved3[2];
 };
 
 struct mlx4_wqe_raddr_seg {
-- 
cgit v1.2.3


From 61083720702a329ed5952e32bda384e3bbc9093c Mon Sep 17 00:00:00 2001
From: Shani Michaeli <shanim@mellanox.com>
Date: Wed, 6 Feb 2013 16:19:09 +0000
Subject: mlx4_core: Propagate MR deregistration failures to caller

MR deregistration fails when memory windows are bound to the MR.
Handle such failures by propagating them to the caller ULP.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/mr.c              | 13 ++++++++-----
 drivers/net/ethernet/mellanox/mlx4/en_main.c |  4 ++--
 drivers/net/ethernet/mellanox/mlx4/mr.c      | 29 +++++++++++++++++++++-------
 include/linux/mlx4/device.h                  |  2 +-
 4 files changed, 33 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index bbaf6176f207..254e1cf26439 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -68,7 +68,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
 	return &mr->ibmr;
 
 err_mr:
-	mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+	(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
 
 err_free:
 	kfree(mr);
@@ -163,7 +163,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	return &mr->ibmr;
 
 err_mr:
-	mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+	(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
 
 err_umem:
 	ib_umem_release(mr->umem);
@@ -177,8 +177,11 @@ err_free:
 int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 {
 	struct mlx4_ib_mr *mr = to_mmr(ibmr);
+	int ret;
 
-	mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+	ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+	if (ret)
+		return ret;
 	if (mr->umem)
 		ib_umem_release(mr->umem);
 	kfree(mr);
@@ -212,7 +215,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 	return &mr->ibmr;
 
 err_mr:
-	mlx4_mr_free(dev->dev, &mr->mmr);
+	(void) mlx4_mr_free(dev->dev, &mr->mmr);
 
 err_free:
 	kfree(mr);
@@ -291,7 +294,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
 	return &fmr->ibfmr;
 
 err_mr:
-	mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
+	(void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
 
 err_free:
 	kfree(fmr);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 3a2b8c65642d..a2987142734d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -176,7 +176,7 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr)
 
 	flush_workqueue(mdev->workqueue);
 	destroy_workqueue(mdev->workqueue);
-	mlx4_mr_free(dev, &mdev->mr);
+	(void) mlx4_mr_free(dev, &mdev->mr);
 	iounmap(mdev->uar_map);
 	mlx4_uar_free(dev, &mdev->priv_uar);
 	mlx4_pd_free(dev, mdev->priv_pdn);
@@ -283,7 +283,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
 	return mdev;
 
 err_mr:
-	mlx4_mr_free(dev, &mdev->mr);
+	(void) mlx4_mr_free(dev, &mdev->mr);
 err_map:
 	if (!mdev->uar_map)
 		iounmap(mdev->uar_map);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 49705cf860c6..06b16e4ace80 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -442,7 +442,7 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
 
-static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
+static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
 {
 	int err;
 
@@ -450,20 +450,31 @@ static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
 		err = mlx4_HW2SW_MPT(dev, NULL,
 				     key_to_hw_index(mr->key) &
 				     (dev->caps.num_mpts - 1));
-		if (err)
-			mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err);
+		if (err) {
+			mlx4_warn(dev, "HW2SW_MPT failed (%d),", err);
+			mlx4_warn(dev, "MR has MWs bound to it.\n");
+			return err;
+		}
 
 		mr->enabled = MLX4_MPT_EN_SW;
 	}
 	mlx4_mtt_cleanup(dev, &mr->mtt);
+
+	return 0;
 }
 
-void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
+int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
 {
-	mlx4_mr_free_reserved(dev, mr);
+	int ret;
+
+	ret = mlx4_mr_free_reserved(dev, mr);
+	if (ret)
+		return ret;
 	if (mr->enabled)
 		mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
 	mlx4_mpt_release(dev, key_to_hw_index(mr->key));
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_free);
 
@@ -831,7 +842,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
 	return 0;
 
 err_free:
-	mlx4_mr_free(dev, &fmr->mr);
+	(void) mlx4_mr_free(dev, &fmr->mr);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
@@ -888,10 +899,14 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
 
 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
 {
+	int ret;
+
 	if (fmr->maps)
 		return -EBUSY;
 
-	mlx4_mr_free(dev, &fmr->mr);
+	ret = mlx4_mr_free(dev, &fmr->mr);
+	if (ret)
+		return ret;
 	fmr->mr.enabled = MLX4_MPT_DISABLED;
 
 	return 0;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 20ea939c22a6..e9fe8caaf8bb 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -801,7 +801,7 @@ u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
 
 int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
 		  int npages, int page_shift, struct mlx4_mr *mr);
-void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
+int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
 int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   int start_index, int npages, u64 *page_list);
-- 
cgit v1.2.3


From 6640dfdf6fff387c0a8f7fb8ac1d47c6b093484e Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:32 -0800
Subject: compiler-gcc4.h: Reorder macros based upon gcc ver

This helps to keep the file from getting confusing, removes one
duplicate version check and should encourage future editors to put new
macros where they belong.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 662fd1b4c42a..c9785c22744e 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -13,6 +13,10 @@
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
 
+#if __GNUC_MINOR__ > 0
+# define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
+#endif
+
 #if __GNUC_MINOR__ >= 3
 /* Mark functions as cold. gcc will assume any path leading to a call
    to them will be unlikely.  This means a lot of manual unlikely()s
@@ -33,6 +37,12 @@
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
+#ifndef __CHECKER__
+# define __compiletime_warning(message) __attribute__((warning(message)))
+# define __compiletime_error(message) __attribute__((error(message)))
+#endif /* __CHECKER__ */
+#endif /* __GNUC_MINOR__ >= 3 */
+
 #if __GNUC_MINOR__ >= 5
 /*
  * Mark a position in code as unreachable.  This can be used to
@@ -48,8 +58,7 @@
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__))
 
-#endif
-#endif
+#endif /* __GNUC_MINOR__ >= 5 */
 
 #if __GNUC_MINOR__ >= 6
 /*
@@ -58,13 +67,6 @@
 #define __visible __attribute__((externally_visible))
 #endif
 
-#if __GNUC_MINOR__ > 0
-#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-#endif
-#if __GNUC_MINOR__ >= 3 && !defined(__CHECKER__)
-#define __compiletime_warning(message) __attribute__((warning(message)))
-#define __compiletime_error(message) __attribute__((error(message)))
-#endif
 
 #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
 #if __GNUC_MINOR__ >= 4
-- 
cgit v1.2.3


From 3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:39 -0800
Subject: compiler-gcc.h: Add gcc-recommended GCC_VERSION macro

Throughout compiler*.h, many version checks are made.  These can be
simplified by using the macro that gcc's documentation recommends.
However, my primary reason for adding this is that I need bug-check
macros that are enabled at certain gcc versions and it's cleaner to use
this macro than the tradition method:

  #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ => 2)

If you add patch level, it gets this ugly:

  #if __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ > 2 || \
      __GNUC_MINOR__ == 2 __GNUC_PATCHLEVEL__ >= 1))

As opposed to:

  #if GCC_VERSION >= 40201

While having separate headers for gcc 3 & 4 eliminates some of this
verbosity, they can still be cleaned up by this.

See also:

  http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 6a6d7aefe12d..24545cd90a25 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -5,6 +5,9 @@
 /*
  * Common definitions for all gcc versions go here.
  */
+#define GCC_VERSION (__GNUC__ * 10000 \
+		   + __GNUC_MINOR__ * 100 \
+		   + __GNUC_PATCHLEVEL__)
 
 
 /* Optimization barrier */
-- 
cgit v1.2.3


From 733ed6e43756b0aec25c9429b810ba74e24c980c Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:41 -0800
Subject: compiler-gcc{3,4}.h: Use GCC_VERSION macro

Using GCC_VERSION reduces complexity, is easier to read and is GCC's
recommended mechanism for doing version checks.  (Just don't ask me why
they didn't define it in the first place.) This also makes it easy to
merge compiler-gcc{,3,4}.h should somebody want to.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc3.h |  8 ++++----
 include/linux/compiler-gcc4.h | 20 ++++++++++----------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index 37d412436d0f..7d89febe4d79 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -2,22 +2,22 @@
 #error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead."
 #endif
 
-#if __GNUC_MINOR__ < 2
+#if GCC_VERSION < 30200
 # error Sorry, your compiler is too old - please upgrade it.
 #endif
 
-#if __GNUC_MINOR__ >= 3
+#if GCC_VERSION >= 30300
 # define __used			__attribute__((__used__))
 #else
 # define __used			__attribute__((__unused__))
 #endif
 
-#if __GNUC_MINOR__ >= 4
+#if GCC_VERSION >= 30400
 #define __must_check		__attribute__((warn_unused_result))
 #endif
 
 #ifdef CONFIG_GCOV_KERNEL
-# if __GNUC_MINOR__ < 4
+# if GCC_VERSION < 30400
 #   error "GCOV profiling support for gcc versions below 3.4 not included"
 # endif /* __GNUC_MINOR__ */
 #endif /* CONFIG_GCOV_KERNEL */
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index c9785c22744e..a9ffdfe7713c 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -4,7 +4,7 @@
 
 /* GCC 4.1.[01] miscompiles __weak */
 #ifdef __KERNEL__
-# if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1
+# if GCC_VERSION >= 40100 &&  GCC_VERSION <= 40101
 #  error Your version of gcc miscompiles the __weak directive
 # endif
 #endif
@@ -13,11 +13,11 @@
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
 
-#if __GNUC_MINOR__ > 0
+#if GCC_VERSION >= 40100
 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #endif
 
-#if __GNUC_MINOR__ >= 3
+#if GCC_VERSION >= 40300
 /* Mark functions as cold. gcc will assume any path leading to a call
    to them will be unlikely.  This means a lot of manual unlikely()s
    are unnecessary now for any paths leading to the usual suspects
@@ -41,9 +41,9 @@
 # define __compiletime_warning(message) __attribute__((warning(message)))
 # define __compiletime_error(message) __attribute__((error(message)))
 #endif /* __CHECKER__ */
-#endif /* __GNUC_MINOR__ >= 3 */
+#endif /* GCC_VERSION >= 40300 */
 
-#if __GNUC_MINOR__ >= 5
+#if GCC_VERSION >= 40500
 /*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
@@ -58,9 +58,9 @@
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__))
 
-#endif /* __GNUC_MINOR__ >= 5 */
+#endif /* GCC_VERSION >= 40500 */
 
-#if __GNUC_MINOR__ >= 6
+#if GCC_VERSION >= 40600
 /*
  * Tell the optimizer that something else uses this function or variable.
  */
@@ -69,11 +69,11 @@
 
 
 #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
-#if __GNUC_MINOR__ >= 4
+#if GCC_VERSION >= 40400
 #define __HAVE_BUILTIN_BSWAP32__
 #define __HAVE_BUILTIN_BSWAP64__
 #endif
-#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6)
+#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600)
 #define __HAVE_BUILTIN_BSWAP16__
 #endif
-#endif
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
-- 
cgit v1.2.3


From 6ae8d04871f84d853673e9e9f3f713e77a2fe145 Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:42 -0800
Subject: compiler{,-gcc4}.h, bug.h: Remove duplicate macros

__linktime_error() does the same thing as __compiletime_error() and is
only used in bug.h.  Since the macro defines a function attribute that
will cause a failure at compile-time (not link-time), it makes more sense
to keep __compiletime_error(), which is also neatly mated with
__compiletime_warning().

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h           | 2 +-
 include/linux/compiler-gcc4.h | 2 --
 include/linux/compiler.h      | 3 ---
 3 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index b1cf40de847e..2a11774c5e64 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -74,7 +74,7 @@ extern int __build_bug_on_failed;
 #define BUILD_BUG()						\
 	do {							\
 		extern void __build_bug_failed(void)		\
-			__linktime_error("BUILD_BUG failed");	\
+			__compiletime_error("BUILD_BUG failed");\
 		__build_bug_failed();				\
 	} while (0)
 
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index a9ffdfe7713c..68b162d92254 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -33,8 +33,6 @@
    the kernel context */
 #define __cold			__attribute__((__cold__))
 
-#define __linktime_error(message) __attribute__((__error__(message)))
-
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
 #ifndef __CHECKER__
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index dd852b73b286..4c638be76093 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -308,9 +308,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #ifndef __compiletime_error
 # define __compiletime_error(message)
 #endif
-#ifndef __linktime_error
-# define __linktime_error(message)
-#endif
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3


From ca623c914e82c3351cd657073fdb24a1df8c27b9 Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:44 -0800
Subject: bug.h: fix BUILD_BUG_ON macro in __CHECKER__

When __CHECKER__ is defined, we disable all of the BUILD_BUG.* macros.
However, both BUILD_BUG_ON_NOT_POWER_OF_2 and BUILD_BUG_ON was evaluating
to nothing in this case, and we want (0) since this is a function-like
macro that will be followed by a semicolon.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 2a11774c5e64..27d404f91b3e 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -12,11 +12,11 @@ enum bug_trap_type {
 struct pt_regs;
 
 #ifdef __CHECKER__
-#define BUILD_BUG_ON_NOT_POWER_OF_2(n)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_ZERO(e) (0)
 #define BUILD_BUG_ON_NULL(e) ((void*)0)
 #define BUILD_BUG_ON_INVALID(e) (0)
-#define BUILD_BUG_ON(condition)
+#define BUILD_BUG_ON(condition) (0)
 #define BUILD_BUG() (0)
 #else /* __CHECKER__ */
 
-- 
cgit v1.2.3


From 1d6a0d19c85587581a364850b77f30446810a560 Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:45 -0800
Subject: bug.h: prevent double evaulation of `condition' in BUILD_BUG_ON

When calling BUILD_BUG_ON in an optimized build using gcc 4.3 and later,
the condition will be evaulated twice, possibily with side-effects.  This
patch eliminates that error.

[akpm@linux-foundation.org: tweak code layout]
Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 27d404f91b3e..89fb91d0c929 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -59,9 +59,10 @@ struct pt_regs;
 extern int __build_bug_on_failed;
 #define BUILD_BUG_ON(condition)					\
 	do {							\
-		((void)sizeof(char[1 - 2*!!(condition)]));	\
-		if (condition) __build_bug_on_failed = 1;	\
-	} while(0)
+		bool __cond = !!(condition);			\
+		((void)sizeof(char[1 - 2 * __cond]));		\
+		if (__cond) __build_bug_on_failed = 1;		\
+	} while (0)
 #endif
 
 /**
-- 
cgit v1.2.3


From a3ccc497cd17147713363a4bf975f1a269fadb6d Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:52 -0800
Subject: bug.h: make BUILD_BUG_ON generate compile-time error

Negative sized arrays wont create a compile-time error in some cases
starting with gcc 4.4 (e.g., inlined functions), but gcc 4.3 introduced
the error function attribute that will.

This patch modifies BUILD_BUG_ON to behave like BUILD_BUG already does,
using the error function attribute so that you don't have to build the
entire kernel to discover that you have a problem, and then enjoy trying
to track it down from a link-time error.

Also, we are only including asm/bug.h and then expecting that
linux/compiler.h will eventually be included to define __linktime_error
(used in BUILD_BUG_ON).  This patch includes it directly for clarity and
to avoid the possibility of changes in <arch>/*/include/asm/bug.h being
changed or not including linux/compiler.h for some reason.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 89fb91d0c929..73af37ca472c 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -2,6 +2,7 @@
 #define _LINUX_BUG_H
 
 #include <asm/bug.h>
+#include <linux/compiler.h>
 
 enum bug_trap_type {
 	BUG_TRAP_TYPE_NONE = 0,
@@ -43,25 +44,30 @@ struct pt_regs;
  * @condition: the condition which the compiler should know is false.
  *
  * If you have some code which relies on certain constants being equal, or
- * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
  * detect if someone changes it.
  *
- * The implementation uses gcc's reluctance to create a negative array, but
- * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments
- * to inline functions).  So as a fallback we use the optimizer; if it can't
- * prove the condition is false, it will cause a link error on the undefined
- * "__build_bug_on_failed".  This error message can be harder to track down
- * though, hence the two different methods.
+ * The implementation uses gcc's reluctance to create a negative array, but gcc
+ * (as of 4.4) only emits that error for obvious cases (e.g. not arguments to
+ * inline functions).  Luckily, in 4.3 they added the "error" function
+ * attribute just for this type of case.  Thus, we use a negative sized array
+ * (should always create an error on gcc versions older than 4.4) and then call
+ * an undefined function with the error attribute (should always create an
+ * error on gcc 4.3 and later).  If for some reason, neither creates a
+ * compile-time error, we'll still have a link-time error, which is harder to
+ * track down.
  */
 #ifndef __OPTIMIZE__
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 #else
-extern int __build_bug_on_failed;
-#define BUILD_BUG_ON(condition)					\
-	do {							\
-		bool __cond = !!(condition);			\
-		((void)sizeof(char[1 - 2 * __cond]));		\
-		if (__cond) __build_bug_on_failed = 1;		\
+#define BUILD_BUG_ON(condition)						\
+	do {								\
+		bool __cond = !!(condition);				\
+		extern void __build_bug_on_failed(void)			\
+			__compiletime_error("BUILD_BUG_ON failed");	\
+		if (__cond)						\
+			__build_bug_on_failed();			\
+		((void)sizeof(char[1 - 2 * __cond]));			\
 	} while (0)
 #endif
 
-- 
cgit v1.2.3


From c361d3e54364d19bb5e803d6e766e94674da7b0e Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:54 -0800
Subject: compiler.h, bug.h: prevent double error messages with BUILD_BUG{,_ON}

Prior to the introduction of __attribute__((error("msg"))) in gcc 4.3,
creating compile-time errors required a little trickery.
BUILD_BUG{,_ON} uses this attribute when available to generate
compile-time errors, but also uses the negative-sized array trick for
older compilers, resulting in two error messages in some cases.  The
reason it's "some" cases is that as of gcc 4.4, the negative-sized array
will not create an error in some situations, like inline functions.

This patch replaces the negative-sized array code with the new
__compiletime_error_fallback() macro which expands to the same thing
unless the the error attribute is available, in which case it expands to
do{}while(0), resulting in exactly one compile-time error on all
versions of gcc.

Note that we are not changing the negative-sized array code for the
unoptimized version of BUILD_BUG_ON, since it has the potential to catch
problems that would be disabled in later versions of gcc were
__compiletime_error_fallback used.  The reason is that that an
unoptimized build can't always remove calls to an error-attributed
function call (like we are using) that should effectively become dead
code if it were optimized.  However, using a negative-sized array with a
similar value will not result in an false-positive (error).  The only
caveat being that it will also fail to catch valid conditions, which we
should be expecting in an unoptimized build anyway.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h      | 2 +-
 include/linux/compiler.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index 73af37ca472c..dc11dc762fc3 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -67,7 +67,7 @@ struct pt_regs;
 			__compiletime_error("BUILD_BUG_ON failed");	\
 		if (__cond)						\
 			__build_bug_on_failed();			\
-		((void)sizeof(char[1 - 2 * __cond]));			\
+		__compiletime_error_fallback(__cond);			\
 	} while (0)
 #endif
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 4c638be76093..423bb6bd660f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -307,7 +307,12 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #endif
 #ifndef __compiletime_error
 # define __compiletime_error(message)
+# define __compiletime_error_fallback(condition) \
+	do { ((void)sizeof(char[1 - 2*!!(condition)])); } while (0)
+#else
+# define __compiletime_error_fallback(condition) do { } while (0)
 #endif
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3


From 9a8ab1c39970a4938a72d94e6fd13be88a797590 Mon Sep 17 00:00:00 2001
From: Daniel Santos <daniel.santos@pobox.com>
Date: Thu, 21 Feb 2013 16:41:55 -0800
Subject: bug.h, compiler.h: introduce compiletime_assert & BUILD_BUG_ON_MSG

Introduce compiletime_assert to compiler.h, which moves the details of
how to break a build and emit an error message for a specific compiler
to the headers where these details should be.  Following in the
tradition of the POSIX assert macro, compiletime_assert creates a
build-time error when the supplied condition is *false*.

Next, we add BUILD_BUG_ON_MSG to bug.h which simply wraps
compiletime_assert, inverting the logic, so that it fails when the
condition is *true*, consistent with the language "build bug on." This
macro allows you to specify the error message you want emitted when the
supplied condition is true.

Finally, we remove all other code from bug.h that mucks with these
details (BUILD_BUG & BUILD_BUG_ON), and have them all call
BUILD_BUG_ON_MSG.  This not only reduces source code bloat, but also
prevents the possibility of code being changed for one macro and not for
the other (which was previously the case for BUILD_BUG and
BUILD_BUG_ON).

Since __compiletime_error_fallback is now only used in compiler.h, I'm
considering it a private macro and removing the double negation that's
now extraneous.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Joe Perches <joe@perches.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bug.h      | 28 +++++++++++++---------------
 include/linux/compiler.h | 26 +++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index dc11dc762fc3..7f4818673c41 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -17,6 +17,7 @@ struct pt_regs;
 #define BUILD_BUG_ON_ZERO(e) (0)
 #define BUILD_BUG_ON_NULL(e) ((void*)0)
 #define BUILD_BUG_ON_INVALID(e) (0)
+#define BUILD_BUG_ON_MSG(cond, msg) (0)
 #define BUILD_BUG_ON(condition) (0)
 #define BUILD_BUG() (0)
 #else /* __CHECKER__ */
@@ -39,6 +40,15 @@ struct pt_regs;
  */
 #define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
 
+/**
+ * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
+ *		      error message.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * See BUILD_BUG_ON for description.
+ */
+#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
+
 /**
  * BUILD_BUG_ON - break compile if a condition is true.
  * @condition: the condition which the compiler should know is false.
@@ -60,15 +70,8 @@ struct pt_regs;
 #ifndef __OPTIMIZE__
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 #else
-#define BUILD_BUG_ON(condition)						\
-	do {								\
-		bool __cond = !!(condition);				\
-		extern void __build_bug_on_failed(void)			\
-			__compiletime_error("BUILD_BUG_ON failed");	\
-		if (__cond)						\
-			__build_bug_on_failed();			\
-		__compiletime_error_fallback(__cond);			\
-	} while (0)
+#define BUILD_BUG_ON(condition) \
+	BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
 #endif
 
 /**
@@ -78,12 +81,7 @@ struct pt_regs;
  * build time, you should use BUILD_BUG to detect if it is
  * unexpectedly used.
  */
-#define BUILD_BUG()						\
-	do {							\
-		extern void __build_bug_failed(void)		\
-			__compiletime_error("BUILD_BUG failed");\
-		__build_bug_failed();				\
-	} while (0)
+#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
 
 #endif	/* __CHECKER__ */
 
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 423bb6bd660f..10b8f23fab0f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -308,11 +308,35 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #ifndef __compiletime_error
 # define __compiletime_error(message)
 # define __compiletime_error_fallback(condition) \
-	do { ((void)sizeof(char[1 - 2*!!(condition)])); } while (0)
+	do { ((void)sizeof(char[1 - 2 * condition])); } while (0)
 #else
 # define __compiletime_error_fallback(condition) do { } while (0)
 #endif
 
+#define __compiletime_assert(condition, msg, prefix, suffix)		\
+	do {								\
+		bool __cond = !(condition);				\
+		extern void prefix ## suffix(void) __compiletime_error(msg); \
+		if (__cond)						\
+			prefix ## suffix();				\
+		__compiletime_error_fallback(__cond);			\
+	} while (0)
+
+#define _compiletime_assert(condition, msg, prefix, suffix) \
+	__compiletime_assert(condition, msg, prefix, suffix)
+
+/**
+ * compiletime_assert - break build and emit msg if condition is false
+ * @condition: a compile-time constant condition to check
+ * @msg:       a message to emit if condition is false
+ *
+ * In tradition of POSIX assert, this macro will break the build if the
+ * supplied condition is *false*, emitting the supplied error message if the
+ * compiler has support to do so.
+ */
+#define compiletime_assert(condition, msg) \
+	_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
+
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
-- 
cgit v1.2.3


From b1ae345d971664f70cfdc293029c40ccfb093591 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 21 Feb 2013 16:42:47 -0800
Subject: lockdep: make lockdep_assert_held() not have a return value

I recently made the mistake of writing:

  foo = lockdep_dereference_protected(..., lockdep_assert_held(...));

which is clearly bogus.  If lockdep is disabled in the config this would
cause a compile failure, if it is enabled then it compiles and causes a
puzzling warning about dereferencing without the correct protection.

Wrap the macro in "do { ...  } while (0)" to also fail compile for this
when lockdep is enabled.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 2bca44b0893c..bfe88c4aa251 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -359,7 +359,9 @@ extern void lockdep_trace_alloc(gfp_t mask);
 
 #define lockdep_depth(tsk)	(debug_locks ? (tsk)->lockdep_depth : 0)
 
-#define lockdep_assert_held(l)	WARN_ON(debug_locks && !lockdep_is_held(l))
+#define lockdep_assert_held(l)	do {				\
+		WARN_ON(debug_locks && !lockdep_is_held(l));	\
+	} while (0)
 
 #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
 
-- 
cgit v1.2.3


From 7d311cdab663f4f7ab3a4c0d5d484234406f8268 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 21 Feb 2013 16:42:48 -0800
Subject: bdi: allow block devices to say that they require stable page writes

This patchset ("stable page writes, part 2") makes some key
modifications to the original 'stable page writes' patchset.  First, it
provides creators (devices and filesystems) of a backing_dev_info a flag
that declares whether or not it is necessary to ensure that page
contents cannot change during writeout.  It is no longer assumed that
this is true of all devices (which was never true anyway).  Second, the
flag is used to relaxed the wait_on_page_writeback calls so that wait
only occurs if the device needs it.  Third, it fixes up the remaining
disk-backed filesystems to use this improved conditional-wait logic to
provide stable page writes on those filesystems.

It is hoped that (for people not using checksumming devices, anyway)
this patchset will give back unnecessary performance decreases since the
original stable page write patchset went into 3.0.  Sorry about not
fixing it sooner.

Complaints were registered by several people about the long write
latencies introduced by the original stable page write patchset.
Generally speaking, the kernel ought to allocate as little extra memory
as possible to facilitate writeout, but for people who simply cannot
wait, a second page stability strategy is (re)introduced: snapshotting
page contents.  The waiting behavior is still the default strategy; to
enable page snapshotting, a superblock flag (MS_SNAP_STABLE) must be
set.  This flag is used to bandaid^Henable stable page writeback on
ext3[1], and is not used anywhere else.

Given that there are already a few storage devices and network FSes that
have rolled their own page stability wait/page snapshot code, it would
be nice to move towards consolidating all of these.  It seems possible
that iscsi and raid5 may wish to use the new stable page write support
to enable zero-copy writeout.

Thank you to Jan Kara for helping fix a couple more filesystems.

Per Andrew Morton's request, here are the result of using dbench to measure
latencies on ext2:

3.8.0-rc3:
   Operation      Count    AvgLat    MaxLat
   ----------------------------------------
   WriteX        109347     0.028    59.817
   ReadX         347180     0.004     3.391
   Flush          15514    29.828   287.283

  Throughput 57.429 MB/sec  4 clients  4 procs  max_latency=287.290 ms

3.8.0-rc3 + patches:
   WriteX        105556     0.029     4.273
   ReadX         335004     0.005     4.112
   Flush          14982    30.540   298.634

  Throughput 55.4496 MB/sec  4 clients  4 procs  max_latency=298.650 ms

As you can see, for ext2 the maximum write latency decreases from ~60ms
on a laptop hard disk to ~4ms.  I'm not sure why the flush latencies
increase, though I suspect that being able to dirty pages faster gives
the flusher more work to do.

On ext4, the average write latency decreases as well as all the maximum
latencies:

3.8.0-rc3:
   WriteX         85624     0.152    33.078
   ReadX         272090     0.010    61.210
   Flush          12129    36.219   168.260

  Throughput 44.8618 MB/sec  4 clients  4 procs  max_latency=168.276 ms

3.8.0-rc3 + patches:
   WriteX         86082     0.141    30.928
   ReadX         273358     0.010    36.124
   Flush          12214    34.800   165.689

  Throughput 44.9941 MB/sec  4 clients  4 procs  max_latency=165.722 ms

XFS seems to exhibit similar latency improvements as ext2:

3.8.0-rc3:
   WriteX        125739     0.028   104.343
   ReadX         399070     0.005     4.115
   Flush          17851    25.004   131.390

  Throughput 66.0024 MB/sec  4 clients  4 procs  max_latency=131.406 ms

3.8.0-rc3 + patches:
   WriteX        123529     0.028     6.299
   ReadX         392434     0.005     4.287
   Flush          17549    25.120   188.687

  Throughput 64.9113 MB/sec  4 clients  4 procs  max_latency=188.704 ms

...and btrfs, just to round things out, also shows some latency
decreases:

3.8.0-rc3:
   WriteX         67122     0.083    82.355
   ReadX         212719     0.005     2.828
   Flush           9547    47.561   147.418

  Throughput 35.3391 MB/sec  4 clients  4 procs  max_latency=147.433 ms

3.8.0-rc3 + patches:
   WriteX         64898     0.101    71.631
   ReadX         206673     0.005     7.123
   Flush           9190    47.963   219.034

  Throughput 34.0795 MB/sec  4 clients  4 procs  max_latency=219.044 ms

Before this patchset, all filesystems would block, regardless of whether
or not it was necessary.  ext3 would wait, but still generate occasional
checksum errors.  The network filesystems were left to do their own
thing, so they'd wait too.

After this patchset, all the disk filesystems except ext3 and btrfs will
wait only if the hardware requires it.  ext3 (if necessary) snapshots
pages instead of blocking, and btrfs provides its own bdi so the mm will
never wait.  Network filesystems haven't been touched, so either they
provide their own wait code, or they don't block at all.  The blocking
behavior is back to what it was before 3.0 if you don't have a disk
requiring stable page writes.

This patchset has been tested on 3.8.0-rc3 on x64 with ext3, ext4, and
xfs.  I've spot-checked 3.8.0-rc4 and seem to be getting the same
results as -rc3.

[1] The alternative fixes to ext3 include fixing the locking order and
page bit handling like we did for ext4 (but then why not just use
ext4?), or setting PG_writeback so early that ext3 becomes extremely
slow.  I tried that, but the number of write()s I could initiate dropped
by nearly an order of magnitude.  That was a bit much even for the
author of the stable page series! :)

This patch:

Creates a per-backing-device flag that tracks whether or not pages must
be held immutable during writeout.  Eventually it will be used to waive
wait_for_page_writeback() if nothing requires stable pages.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-class-bdi |  5 +++++
 block/blk-integrity.c                     |  4 ++++
 include/linux/backing-dev.h               |  6 ++++++
 mm/backing-dev.c                          | 11 +++++++++++
 4 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index 5f500977b42f..d773d5697cf5 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -48,3 +48,8 @@ max_ratio (read-write)
 	most of the write-back cache.  For example in case of an NFS
 	mount that is prone to get stuck, or a FUSE mount which cannot
 	be trusted to play fair.
+
+stable_pages_required (read-only)
+
+	If set, the backing device requires that all pages comprising a write
+	request must not be changed until writeout is complete.
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index da2a818c3a92..dabd221857e1 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -420,6 +420,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 	} else
 		bi->name = bi_unsupported_name;
 
+	disk->queue->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
+
 	return 0;
 }
 EXPORT_SYMBOL(blk_integrity_register);
@@ -438,6 +440,8 @@ void blk_integrity_unregister(struct gendisk *disk)
 	if (!disk || !disk->integrity)
 		return;
 
+	disk->queue->backing_dev_info.capabilities &= ~BDI_CAP_STABLE_WRITES;
+
 	bi = disk->integrity;
 
 	kobject_uevent(&bi->kobj, KOBJ_REMOVE);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 12731a19ef06..350459910fe1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -254,6 +254,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 #define BDI_CAP_EXEC_MAP	0x00000040
 #define BDI_CAP_NO_ACCT_WB	0x00000080
 #define BDI_CAP_SWAP_BACKED	0x00000100
+#define BDI_CAP_STABLE_WRITES	0x00000200
 
 #define BDI_CAP_VMFLAGS \
 	(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -308,6 +309,11 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout);
 int pdflush_proc_obsolete(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp, loff_t *ppos);
 
+static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi)
+{
+	return bdi->capabilities & BDI_CAP_STABLE_WRITES;
+}
+
 static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
 {
 	return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee176..41733c5dc820 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -221,12 +221,23 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio)
 
+static ssize_t stable_pages_required_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *page)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+
+	return snprintf(page, PAGE_SIZE-1, "%d\n",
+			bdi_cap_stable_pages_required(bdi) ? 1 : 0);
+}
+
 #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
 
 static struct device_attribute bdi_dev_attrs[] = {
 	__ATTR_RW(read_ahead_kb),
 	__ATTR_RW(min_ratio),
 	__ATTR_RW(max_ratio),
+	__ATTR_RO(stable_pages_required),
 	__ATTR_NULL,
 };
 
-- 
cgit v1.2.3


From 1d1d1a767206fbe5d4c69493b7e6d2a8d08cc0a0 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Thu, 21 Feb 2013 16:42:51 -0800
Subject: mm: only enforce stable page writes if the backing device requires it

Create a helper function to check if a backing device requires stable
page writes and, if so, performs the necessary wait.  Then, make it so
that all points in the memory manager that handle making pages writable
use the helper function.  This should provide stable page write support
to most filesystems, while eliminating unnecessary waiting for devices
that don't require the feature.

Before this patchset, all filesystems would block, regardless of whether
or not it was necessary.  ext3 would wait, but still generate occasional
checksum errors.  The network filesystems were left to do their own
thing, so they'd wait too.

After this patchset, all the disk filesystems except ext3 and btrfs will
wait only if the hardware requires it.  ext3 (if necessary) snapshots
pages instead of blocking, and btrfs provides its own bdi so the mm will
never wait.  Network filesystems haven't been touched, so either they
provide their own stable page guarantees or they don't block at all.
The blocking behavior is back to what it was before 3.0 if you don't
have a disk requiring stable page writes.

Here's the result of using dbench to test latency on ext2:

3.8.0-rc3:
 Operation      Count    AvgLat    MaxLat
 ----------------------------------------
 WriteX        109347     0.028    59.817
 ReadX         347180     0.004     3.391
 Flush          15514    29.828   287.283

Throughput 57.429 MB/sec  4 clients  4 procs  max_latency=287.290 ms

3.8.0-rc3 + patches:
 WriteX        105556     0.029     4.273
 ReadX         335004     0.005     4.112
 Flush          14982    30.540   298.634

Throughput 55.4496 MB/sec  4 clients  4 procs  max_latency=298.650 ms

As you can see, the maximum write latency drops considerably with this
patch enabled.  The other filesystems (ext3/ext4/xfs/btrfs) behave
similarly, but see the cover letter for those results.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c             |  2 +-
 fs/ext4/inode.c         |  2 +-
 fs/gfs2/file.c          |  2 +-
 fs/nilfs2/file.c        |  2 +-
 include/linux/pagemap.h |  1 +
 mm/filemap.c            |  3 ++-
 mm/page-writeback.c     | 20 ++++++++++++++++++++
 7 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 7a75c3e0fd58..2ea9cd44aeae 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2359,7 +2359,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 	if (unlikely(ret < 0))
 		goto out_unlock;
 	set_page_dirty(page);
-	wait_on_page_writeback(page);
+	wait_for_stable_page(page);
 	return 0;
 out_unlock:
 	unlock_page(page);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..cd818d8bb221 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4968,7 +4968,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 					    0, len, NULL,
 					    ext4_bh_unmapped)) {
 			/* Wait so that we don't change page under IO */
-			wait_on_page_writeback(page);
+			wait_for_stable_page(page);
 			ret = VM_FAULT_LOCKED;
 			goto out;
 		}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 06b7092a3f25..2687f50d98cb 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -483,7 +483,7 @@ out:
 	gfs2_holder_uninit(&gh);
 	if (ret == 0) {
 		set_page_dirty(page);
-		wait_on_page_writeback(page);
+		wait_for_stable_page(page);
 	}
 	sb_end_pagefault(inode->i_sb);
 	return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..bec4af6eab13 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	nilfs_transaction_commit(inode->i_sb);
 
  mapped:
-	wait_on_page_writeback(page);
+	wait_for_stable_page(page);
  out:
 	sb_end_pagefault(inode->i_sb);
 	return block_page_mkwrite_return(ret);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 6da609d14c15..0e38e13eb249 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -414,6 +414,7 @@ static inline void wait_on_page_writeback(struct page *page)
 }
 
 extern void end_page_writeback(struct page *page);
+void wait_for_stable_page(struct page *page);
 
 /*
  * Add an arbitrary waiter to a page's wait queue
diff --git a/mm/filemap.c b/mm/filemap.c
index 24a7ea583f0c..c610076c30e1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1728,6 +1728,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * see the dirty page and writeprotect it again.
 	 */
 	set_page_dirty(page);
+	wait_for_stable_page(page);
 out:
 	sb_end_pagefault(inode->i_sb);
 	return ret;
@@ -2274,7 +2275,7 @@ repeat:
 		return NULL;
 	}
 found:
-	wait_on_page_writeback(page);
+	wait_for_stable_page(page);
 	return page;
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 66a0024becd9..355d5ee69058 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2290,3 +2290,23 @@ int mapping_tagged(struct address_space *mapping, int tag)
 	return radix_tree_tagged(&mapping->page_tree, tag);
 }
 EXPORT_SYMBOL(mapping_tagged);
+
+/**
+ * wait_for_stable_page() - wait for writeback to finish, if necessary.
+ * @page:	The page to wait on.
+ *
+ * This function determines if the given page is related to a backing device
+ * that requires page contents to be held stable during writeback.  If so, then
+ * it will wait for any pending writeback to complete.
+ */
+void wait_for_stable_page(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+
+	if (!bdi_cap_stable_pages_required(bdi))
+		return;
+
+	wait_on_page_writeback(page);
+}
+EXPORT_SYMBOL_GPL(wait_for_stable_page);
-- 
cgit v1.2.3


From 9a46ad6d6df3b547d057c39db13f69d7170a99e9 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Thu, 21 Feb 2013 16:43:03 -0800
Subject: smp: make smp_call_function_many() use logic similar to
 smp_call_function_single()

I'm testing swapout workload in a two-socket Xeon machine.  The workload
has 10 threads, each thread sequentially accesses separate memory
region.  TLB flush overhead is very big in the workload.  For each page,
page reclaim need move it from active lru list and then unmap it.  Both
need a TLB flush.  And this is a multthread workload, TLB flush happens
in 10 CPUs.  In X86, TLB flush uses generic smp_call)function.  So this
workload stress smp_call_function_many heavily.

Without patch, perf shows:
+  24.49%  [k] generic_smp_call_function_interrupt
-  21.72%  [k] _raw_spin_lock
   - _raw_spin_lock
      + 79.80% __page_check_address
      + 6.42% generic_smp_call_function_interrupt
      + 3.31% get_swap_page
      + 2.37% free_pcppages_bulk
      + 1.75% handle_pte_fault
      + 1.54% put_super
      + 1.41% grab_super_passive
      + 1.36% __swap_duplicate
      + 0.68% blk_flush_plug_list
      + 0.62% swap_info_get
+   6.55%  [k] flush_tlb_func
+   6.46%  [k] smp_call_function_many
+   5.09%  [k] call_function_interrupt
+   4.75%  [k] default_send_IPI_mask_sequence_phys
+   2.18%  [k] find_next_bit

swapout throughput is around 1300M/s.

With the patch, perf shows:
-  27.23%  [k] _raw_spin_lock
   - _raw_spin_lock
      + 80.53% __page_check_address
      + 8.39% generic_smp_call_function_single_interrupt
      + 2.44% get_swap_page
      + 1.76% free_pcppages_bulk
      + 1.40% handle_pte_fault
      + 1.15% __swap_duplicate
      + 1.05% put_super
      + 0.98% grab_super_passive
      + 0.86% blk_flush_plug_list
      + 0.57% swap_info_get
+   8.25%  [k] default_send_IPI_mask_sequence_phys
+   7.55%  [k] call_function_interrupt
+   7.47%  [k] smp_call_function_many
+   7.25%  [k] flush_tlb_func
+   3.81%  [k] _raw_spin_lock_irqsave
+   3.78%  [k] generic_smp_call_function_single_interrupt

swapout throughput is around 1400M/s.  So there is around a 7%
improvement, and total cpu utilization doesn't change.

Without the patch, cfd_data is shared by all CPUs.
generic_smp_call_function_interrupt does read/write cfd_data several times
which will create a lot of cache ping-pong.  With the patch, the data
becomes per-cpu.  The ping-pong is avoided.  And from the perf data, this
doesn't make call_single_queue lock contend.

Next step is to remove generic_smp_call_function_interrupt() from arch
code.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/smp.h |   3 +-
 kernel/smp.c        | 183 +++++++++-------------------------------------------
 2 files changed, 32 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index dd6f06be3c9f..3e07a7df6478 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -89,7 +89,8 @@ void kick_all_cpus_sync(void);
 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 void __init call_function_init(void);
 void generic_smp_call_function_single_interrupt(void);
-void generic_smp_call_function_interrupt(void);
+#define generic_smp_call_function_interrupt \
+	generic_smp_call_function_single_interrupt
 #else
 static inline void call_function_init(void) { }
 #endif
diff --git a/kernel/smp.c b/kernel/smp.c
index 69f38bd98b42..8e451f3ff51b 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -16,22 +16,12 @@
 #include "smpboot.h"
 
 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
-static struct {
-	struct list_head	queue;
-	raw_spinlock_t		lock;
-} call_function __cacheline_aligned_in_smp =
-	{
-		.queue		= LIST_HEAD_INIT(call_function.queue),
-		.lock		= __RAW_SPIN_LOCK_UNLOCKED(call_function.lock),
-	};
-
 enum {
 	CSD_FLAG_LOCK		= 0x01,
 };
 
 struct call_function_data {
-	struct call_single_data	csd;
-	atomic_t		refs;
+	struct call_single_data	__percpu *csd;
 	cpumask_var_t		cpumask;
 	cpumask_var_t		cpumask_ipi;
 };
@@ -60,6 +50,11 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
 				cpu_to_node(cpu)))
 			return notifier_from_errno(-ENOMEM);
+		cfd->csd = alloc_percpu(struct call_single_data);
+		if (!cfd->csd) {
+			free_cpumask_var(cfd->cpumask);
+			return notifier_from_errno(-ENOMEM);
+		}
 		break;
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -70,6 +65,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_DEAD_FROZEN:
 		free_cpumask_var(cfd->cpumask);
 		free_cpumask_var(cfd->cpumask_ipi);
+		free_percpu(cfd->csd);
 		break;
 #endif
 	};
@@ -170,85 +166,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
 		csd_lock_wait(data);
 }
 
-/*
- * Invoked by arch to handle an IPI for call function. Must be called with
- * interrupts disabled.
- */
-void generic_smp_call_function_interrupt(void)
-{
-	struct call_function_data *data;
-	int cpu = smp_processor_id();
-
-	/*
-	 * Shouldn't receive this interrupt on a cpu that is not yet online.
-	 */
-	WARN_ON_ONCE(!cpu_online(cpu));
-
-	/*
-	 * Ensure entry is visible on call_function_queue after we have
-	 * entered the IPI. See comment in smp_call_function_many.
-	 * If we don't have this, then we may miss an entry on the list
-	 * and never get another IPI to process it.
-	 */
-	smp_mb();
-
-	/*
-	 * It's ok to use list_for_each_rcu() here even though we may
-	 * delete 'pos', since list_del_rcu() doesn't clear ->next
-	 */
-	list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
-		int refs;
-		smp_call_func_t func;
-
-		/*
-		 * Since we walk the list without any locks, we might
-		 * see an entry that was completed, removed from the
-		 * list and is in the process of being reused.
-		 *
-		 * We must check that the cpu is in the cpumask before
-		 * checking the refs, and both must be set before
-		 * executing the callback on this cpu.
-		 */
-
-		if (!cpumask_test_cpu(cpu, data->cpumask))
-			continue;
-
-		smp_rmb();
-
-		if (atomic_read(&data->refs) == 0)
-			continue;
-
-		func = data->csd.func;		/* save for later warn */
-		func(data->csd.info);
-
-		/*
-		 * If the cpu mask is not still set then func enabled
-		 * interrupts (BUG), and this cpu took another smp call
-		 * function interrupt and executed func(info) twice
-		 * on this cpu.  That nested execution decremented refs.
-		 */
-		if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) {
-			WARN(1, "%pf enabled interrupts and double executed\n", func);
-			continue;
-		}
-
-		refs = atomic_dec_return(&data->refs);
-		WARN_ON(refs < 0);
-
-		if (refs)
-			continue;
-
-		WARN_ON(!cpumask_empty(data->cpumask));
-
-		raw_spin_lock(&call_function.lock);
-		list_del_rcu(&data->csd.list);
-		raw_spin_unlock(&call_function.lock);
-
-		csd_unlock(&data->csd);
-	}
-
-}
-
 /*
  * Invoked by arch to handle an IPI for call function single. Must be
  * called from the arch with interrupts disabled.
@@ -453,8 +370,7 @@ void smp_call_function_many(const struct cpumask *mask,
 			    smp_call_func_t func, void *info, bool wait)
 {
 	struct call_function_data *data;
-	unsigned long flags;
-	int refs, cpu, next_cpu, this_cpu = smp_processor_id();
+	int cpu, next_cpu, this_cpu = smp_processor_id();
 
 	/*
 	 * Can deadlock when called with interrupts disabled.
@@ -486,50 +402,13 @@ void smp_call_function_many(const struct cpumask *mask,
 	}
 
 	data = &__get_cpu_var(cfd_data);
-	csd_lock(&data->csd);
-
-	/* This BUG_ON verifies our reuse assertions and can be removed */
-	BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask));
-
-	/*
-	 * The global call function queue list add and delete are protected
-	 * by a lock, but the list is traversed without any lock, relying
-	 * on the rcu list add and delete to allow safe concurrent traversal.
-	 * We reuse the call function data without waiting for any grace
-	 * period after some other cpu removes it from the global queue.
-	 * This means a cpu might find our data block as it is being
-	 * filled out.
-	 *
-	 * We hold off the interrupt handler on the other cpu by
-	 * ordering our writes to the cpu mask vs our setting of the
-	 * refs counter.  We assert only the cpu owning the data block
-	 * will set a bit in cpumask, and each bit will only be cleared
-	 * by the subject cpu.  Each cpu must first find its bit is
-	 * set and then check that refs is set indicating the element is
-	 * ready to be processed, otherwise it must skip the entry.
-	 *
-	 * On the previous iteration refs was set to 0 by another cpu.
-	 * To avoid the use of transitivity, set the counter to 0 here
-	 * so the wmb will pair with the rmb in the interrupt handler.
-	 */
-	atomic_set(&data->refs, 0);	/* convert 3rd to 1st party write */
-
-	data->csd.func = func;
-	data->csd.info = info;
 
-	/* Ensure 0 refs is visible before mask.  Also orders func and info */
-	smp_wmb();
-
-	/* We rely on the "and" being processed before the store */
 	cpumask_and(data->cpumask, mask, cpu_online_mask);
 	cpumask_clear_cpu(this_cpu, data->cpumask);
-	refs = cpumask_weight(data->cpumask);
 
 	/* Some callers race with other cpus changing the passed mask */
-	if (unlikely(!refs)) {
-		csd_unlock(&data->csd);
+	if (unlikely(!cpumask_weight(data->cpumask)))
 		return;
-	}
 
 	/*
 	 * After we put an entry into the list, data->cpumask
@@ -537,34 +416,32 @@ void smp_call_function_many(const struct cpumask *mask,
 	 * a SMP function call, so data->cpumask will be zero.
 	 */
 	cpumask_copy(data->cpumask_ipi, data->cpumask);
-	raw_spin_lock_irqsave(&call_function.lock, flags);
-	/*
-	 * Place entry at the _HEAD_ of the list, so that any cpu still
-	 * observing the entry in generic_smp_call_function_interrupt()
-	 * will not miss any other list entries:
-	 */
-	list_add_rcu(&data->csd.list, &call_function.queue);
-	/*
-	 * We rely on the wmb() in list_add_rcu to complete our writes
-	 * to the cpumask before this write to refs, which indicates
-	 * data is on the list and is ready to be processed.
-	 */
-	atomic_set(&data->refs, refs);
-	raw_spin_unlock_irqrestore(&call_function.lock, flags);
 
-	/*
-	 * Make the list addition visible before sending the ipi.
-	 * (IPIs must obey or appear to obey normal Linux cache
-	 * coherency rules -- see comment in generic_exec_single).
-	 */
-	smp_mb();
+	for_each_cpu(cpu, data->cpumask) {
+		struct call_single_data *csd = per_cpu_ptr(data->csd, cpu);
+		struct call_single_queue *dst =
+					&per_cpu(call_single_queue, cpu);
+		unsigned long flags;
+
+		csd_lock(csd);
+		csd->func = func;
+		csd->info = info;
+
+		raw_spin_lock_irqsave(&dst->lock, flags);
+		list_add_tail(&csd->list, &dst->list);
+		raw_spin_unlock_irqrestore(&dst->lock, flags);
+	}
 
 	/* Send a message to all CPUs in the map */
 	arch_send_call_function_ipi_mask(data->cpumask_ipi);
 
-	/* Optionally wait for the CPUs to complete */
-	if (wait)
-		csd_lock_wait(&data->csd);
+	if (wait) {
+		for_each_cpu(cpu, data->cpumask) {
+			struct call_single_data *csd =
+					per_cpu_ptr(data->csd, cpu);
+			csd_lock_wait(csd);
+		}
+	}
 }
 EXPORT_SYMBOL(smp_call_function_many);
 
-- 
cgit v1.2.3


From 36d308d8b547ee19d3fa7399858e5a1632413d0e Mon Sep 17 00:00:00 2001
From: Mikhail Gruzdev <michail.gruzdev@gmail.com>
Date: Thu, 21 Feb 2013 16:43:10 -0800
Subject: printk: add pr_devel_once and pr_devel_ratelimited

Standardize pr_devel logging macros family by adding pr_devel_once and
pr_devel_ratelimited.

Signed-off-by: Mikhail Gruzdev <michail.gruzdev@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 86c4b6294713..54419f4f82c2 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -252,6 +252,15 @@ extern void dump_stack(void) __cold;
 	printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_cont_once(fmt, ...)					\
 	printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
+
+#if defined(DEBUG)
+#define pr_devel_once(fmt, ...)					\
+	printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel_once(fmt, ...)					\
+	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
 /* If you are writing a driver, please use dev_dbg instead */
 #if defined(DEBUG)
 #define pr_debug_once(fmt, ...)					\
@@ -295,6 +304,15 @@ extern void dump_stack(void) __cold;
 #define pr_info_ratelimited(fmt, ...)					\
 	printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 /* no pr_cont_ratelimited, don't do that... */
+
+#if defined(DEBUG)
+#define pr_devel_ratelimited(fmt, ...)					\
+	printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define pr_devel_ratelimited(fmt, ...)					\
+	no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
 /* If you are writing a driver, please use dev_dbg instead */
 #if defined(DEBUG)
 #define pr_debug_ratelimited(fmt, ...)					\
-- 
cgit v1.2.3


From 26e8ccc223ebfd2047a96074f142544dc7062cfe Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@ti.com>
Date: Thu, 21 Feb 2013 16:44:06 -0800
Subject: backlight: lp855x_bl: support new LP8557 device

LP8557 is one of LP855x family device, but it has different register map
and initialization process.  To support this device, device specific
configuration is done through the lp855x_device_config structure.

Few register definitions are fixed for better readability.
  BRIGHTNESS_CTRL -> LP855X_BRIGHTNESS_CTRL
  DEVICE_CTRL     -> LP855X_DEVICE_CTRL
  EEPROM_START    -> LP855X_EEPROM_START
  EEPROM_END      -> LP855X_EEPROM_END
  EPROM_START     -> LP8556_EPROM_START
  EPROM_END       -> LP8556_EPROM_END

And LP8557 register definitions are added.  New register function,
lp855x_update_bit() is added.

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/backlight/lp855x-driver.txt |  4 +-
 drivers/video/backlight/Kconfig           |  2 +-
 drivers/video/backlight/lp855x_bl.c       | 87 +++++++++++++++++++++++++------
 include/linux/platform_data/lp855x.h      | 19 +++++++
 4 files changed, 94 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
index 1529394cfe8b..18b06ca038ea 100644
--- a/Documentation/backlight/lp855x-driver.txt
+++ b/Documentation/backlight/lp855x-driver.txt
@@ -4,7 +4,7 @@ Kernel driver lp855x
 Backlight driver for LP855x ICs
 
 Supported chips:
-	Texas Instruments LP8550, LP8551, LP8552, LP8553 and LP8556
+	Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557
 
 Author: Milo(Woogyom) Kim <milo.kim@ti.com>
 
@@ -24,7 +24,7 @@ Value : pwm based or register based
 
 2) chip_id
 The lp855x chip id.
-Value : lp8550/lp8551/lp8552/lp8553/lp8556
+Value : lp8550/lp8551/lp8552/lp8553/lp8556/lp8557
 
 Platform data for lp855x
 ------------------------
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index a942a2488480..be27b551473f 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -381,7 +381,7 @@ config BACKLIGHT_LP855X
 	tristate "Backlight driver for TI LP855X"
 	depends on BACKLIGHT_CLASS_DEVICE && I2C
 	help
-	  This supports TI LP8550, LP8551, LP8552, LP8553 and LP8556
+	  This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557
 	  backlight driver.
 
 config BACKLIGHT_OT200
diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
index 050cfbb53667..edd2041b1527 100644
--- a/drivers/video/backlight/lp855x_bl.c
+++ b/drivers/video/backlight/lp855x_bl.c
@@ -17,13 +17,23 @@
 #include <linux/platform_data/lp855x.h>
 #include <linux/pwm.h>
 
-/* Registers */
-#define BRIGHTNESS_CTRL		0x00
-#define DEVICE_CTRL		0x01
-#define EEPROM_START		0xA0
-#define EEPROM_END		0xA7
-#define EPROM_START		0xA0
-#define EPROM_END		0xAF
+/* LP8550/1/2/3/6 Registers */
+#define LP855X_BRIGHTNESS_CTRL		0x00
+#define LP855X_DEVICE_CTRL		0x01
+#define LP855X_EEPROM_START		0xA0
+#define LP855X_EEPROM_END		0xA7
+#define LP8556_EPROM_START		0xA0
+#define LP8556_EPROM_END		0xAF
+
+/* LP8557 Registers */
+#define LP8557_BL_CMD			0x00
+#define LP8557_BL_MASK			0x01
+#define LP8557_BL_ON			0x01
+#define LP8557_BL_OFF			0x00
+#define LP8557_BRIGHTNESS_CTRL		0x04
+#define LP8557_CONFIG			0x10
+#define LP8557_EPROM_START		0x10
+#define LP8557_EPROM_END		0x1E
 
 #define BUF_SIZE		20
 #define DEFAULT_BL_NAME		"lcd-backlight"
@@ -75,6 +85,24 @@ static int lp855x_write_byte(struct lp855x *lp, u8 reg, u8 data)
 	return i2c_smbus_write_byte_data(lp->client, reg, data);
 }
 
+static int lp855x_update_bit(struct lp855x *lp, u8 reg, u8 mask, u8 data)
+{
+	int ret;
+	u8 tmp;
+
+	ret = i2c_smbus_read_byte_data(lp->client, reg);
+	if (ret < 0) {
+		dev_err(lp->dev, "failed to read 0x%.2x\n", reg);
+		return ret;
+	}
+
+	tmp = (u8)ret;
+	tmp &= ~mask;
+	tmp |= data & mask;
+
+	return lp855x_write_byte(lp, reg, tmp);
+}
+
 static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr)
 {
 	u8 start, end;
@@ -84,12 +112,16 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr)
 	case LP8551:
 	case LP8552:
 	case LP8553:
-		start = EEPROM_START;
-		end = EEPROM_END;
+		start = LP855X_EEPROM_START;
+		end = LP855X_EEPROM_END;
 		break;
 	case LP8556:
-		start = EPROM_START;
-		end = EPROM_END;
+		start = LP8556_EPROM_START;
+		end = LP8556_EPROM_END;
+		break;
+	case LP8557:
+		start = LP8557_EPROM_START;
+		end = LP8557_EPROM_END;
 		break;
 	default:
 		return false;
@@ -98,9 +130,30 @@ static bool lp855x_is_valid_rom_area(struct lp855x *lp, u8 addr)
 	return (addr >= start && addr <= end);
 }
 
+static int lp8557_bl_off(struct lp855x *lp)
+{
+	/* BL_ON = 0 before updating EPROM settings */
+	return lp855x_update_bit(lp, LP8557_BL_CMD, LP8557_BL_MASK,
+				LP8557_BL_OFF);
+}
+
+static int lp8557_bl_on(struct lp855x *lp)
+{
+	/* BL_ON = 1 after updating EPROM settings */
+	return lp855x_update_bit(lp, LP8557_BL_CMD, LP8557_BL_MASK,
+				LP8557_BL_ON);
+}
+
 static struct lp855x_device_config lp855x_dev_cfg = {
-	.reg_brightness = BRIGHTNESS_CTRL,
-	.reg_devicectrl = DEVICE_CTRL,
+	.reg_brightness = LP855X_BRIGHTNESS_CTRL,
+	.reg_devicectrl = LP855X_DEVICE_CTRL,
+};
+
+static struct lp855x_device_config lp8557_dev_cfg = {
+	.reg_brightness = LP8557_BRIGHTNESS_CTRL,
+	.reg_devicectrl = LP8557_CONFIG,
+	.pre_init_device = lp8557_bl_off,
+	.post_init_device = lp8557_bl_on,
 };
 
 /*
@@ -123,6 +176,9 @@ static int lp855x_configure(struct lp855x *lp)
 	case LP8550 ... LP8556:
 		lp->cfg = &lp855x_dev_cfg;
 		break;
+	case LP8557:
+		lp->cfg = &lp8557_dev_cfg;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -210,7 +266,7 @@ static int lp855x_bl_update_status(struct backlight_device *bl)
 
 	} else if (mode == REGISTER_BASED) {
 		u8 val = bl->props.brightness;
-		lp855x_write_byte(lp, BRIGHTNESS_CTRL, val);
+		lp855x_write_byte(lp, lp->cfg->reg_brightness, val);
 	}
 
 	return 0;
@@ -224,7 +280,7 @@ static int lp855x_bl_get_brightness(struct backlight_device *bl)
 	if (mode == REGISTER_BASED) {
 		u8 val = 0;
 
-		lp855x_read_byte(lp, BRIGHTNESS_CTRL, &val);
+		lp855x_read_byte(lp, lp->cfg->reg_brightness, &val);
 		bl->props.brightness = val;
 	}
 
@@ -376,6 +432,7 @@ static const struct i2c_device_id lp855x_ids[] = {
 	{"lp8552", LP8552},
 	{"lp8553", LP8553},
 	{"lp8556", LP8556},
+	{"lp8557", LP8557},
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lp855x_ids);
diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h
index e81f62d24ee2..20ee8b221dbd 100644
--- a/include/linux/platform_data/lp855x.h
+++ b/include/linux/platform_data/lp855x.h
@@ -49,12 +49,24 @@
 #define LP8556_FAST_CONFIG	BIT(7) /* use it if EPROMs should be maintained
 					  when exiting the low power mode */
 
+/* CONFIG register - LP8557 */
+#define LP8557_PWM_STANDBY	BIT(7)
+#define LP8557_PWM_FILTER	BIT(6)
+#define LP8557_RELOAD_EPROM	BIT(3)	/* use it if EPROMs should be reset
+					   when the backlight turns on */
+#define LP8557_OFF_OPENLEDS	BIT(2)
+#define LP8557_PWM_CONFIG	LP8557_PWM_ONLY
+#define LP8557_I2C_CONFIG	LP8557_I2C_ONLY
+#define LP8557_COMB1_CONFIG	LP8557_COMBINED1
+#define LP8557_COMB2_CONFIG	LP8557_COMBINED2
+
 enum lp855x_chip_id {
 	LP8550,
 	LP8551,
 	LP8552,
 	LP8553,
 	LP8556,
+	LP8557,
 };
 
 enum lp855x_brightness_ctrl_mode {
@@ -89,6 +101,13 @@ enum lp8556_brightness_source {
 	LP8556_COMBINED2,	/* pwm + i2c after the shaper block */
 };
 
+enum lp8557_brightness_source {
+	LP8557_PWM_ONLY,
+	LP8557_I2C_ONLY,
+	LP8557_COMBINED1,	/* pwm + i2c after the shaper block */
+	LP8557_COMBINED2,	/* pwm + i2c before the shaper block */
+};
+
 struct lp855x_rom_data {
 	u8 addr;
 	u8 val;
-- 
cgit v1.2.3


From f142d3bd556c5e82e9bb3d33d07d6708702ea4ce Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Wed, 21 Nov 2012 15:29:29 +0100
Subject: video: Add generic HDMI infoframe helpers

Add generic helpers to pack HDMI infoframes into binary buffers.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/video/Kconfig  |   3 +
 drivers/video/Makefile |   1 +
 drivers/video/hdmi.c   | 308 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hdmi.h   | 231 +++++++++++++++++++++++++++++++++++++
 4 files changed, 543 insertions(+)
 create mode 100644 drivers/video/hdmi.c
 create mode 100644 include/linux/hdmi.h

(limited to 'include/linux')

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 09f1a18c1adf..b11eeab94151 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -52,6 +52,9 @@ config OF_VIDEOMODE
 	help
 	  helper to get videomodes from the devicetree
 
+config HDMI
+	bool
+
 menuconfig FB
 	tristate "Support for frame buffer devices"
 	---help---
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index f592f3b32ec7..0b50082635e3 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -5,6 +5,7 @@
 # Each configuration option enables a list of files.
 
 obj-$(CONFIG_VGASTATE)            += vgastate.o
+obj-$(CONFIG_HDMI)                += hdmi.o
 obj-y                             += fb_notify.o
 obj-$(CONFIG_FB)                  += fb.o
 fb-y                              := fbmem.o fbmon.o fbcmap.o fbsysfs.o \
diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
new file mode 100644
index 000000000000..ab23c9b79143
--- /dev/null
+++ b/drivers/video/hdmi.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/hdmi.h>
+#include <linux/string.h>
+
+static void hdmi_infoframe_checksum(void *buffer, size_t size)
+{
+	u8 *ptr = buffer;
+	u8 csum = 0;
+	size_t i;
+
+	/* compute checksum */
+	for (i = 0; i < size; i++)
+		csum += ptr[i];
+
+	ptr[3] = 256 - csum;
+}
+
+/**
+ * hdmi_avi_infoframe_init() - initialize an HDMI AVI infoframe
+ * @frame: HDMI AVI infoframe
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame)
+{
+	memset(frame, 0, sizeof(*frame));
+
+	frame->type = HDMI_INFOFRAME_TYPE_AVI;
+	frame->version = 2;
+	frame->length = 13;
+
+	return 0;
+}
+EXPORT_SYMBOL(hdmi_avi_infoframe_init);
+
+/**
+ * hdmi_avi_infoframe_pack() - write HDMI AVI infoframe to binary buffer
+ * @frame: HDMI AVI infoframe
+ * @buffer: destination buffer
+ * @size: size of buffer
+ *
+ * Packs the information contained in the @frame structure into a binary
+ * representation that can be written into the corresponding controller
+ * registers. Also computes the checksum as required by section 5.3.5 of
+ * the HDMI 1.4 specification.
+ *
+ * Returns the number of bytes packed into the binary buffer or a negative
+ * error code on failure.
+ */
+ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer,
+				size_t size)
+{
+	u8 *ptr = buffer;
+	size_t length;
+
+	length = HDMI_INFOFRAME_HEADER_SIZE + frame->length;
+
+	if (size < length)
+		return -ENOSPC;
+
+	memset(buffer, 0, length);
+
+	ptr[0] = frame->type;
+	ptr[1] = frame->version;
+	ptr[2] = frame->length;
+	ptr[3] = 0; /* checksum */
+
+	/* start infoframe payload */
+	ptr += HDMI_INFOFRAME_HEADER_SIZE;
+
+	ptr[0] = ((frame->colorspace & 0x3) << 5) | (frame->scan_mode & 0x3);
+
+	if (frame->active_info_valid)
+		ptr[0] |= BIT(4);
+
+	if (frame->horizontal_bar_valid)
+		ptr[0] |= BIT(3);
+
+	if (frame->vertical_bar_valid)
+		ptr[0] |= BIT(2);
+
+	ptr[1] = ((frame->colorimetry & 0x3) << 6) |
+		 ((frame->picture_aspect & 0x3) << 4) |
+		 (frame->active_aspect & 0xf);
+
+	ptr[2] = ((frame->extended_colorimetry & 0x7) << 4) |
+		 ((frame->quantization_range & 0x3) << 2) |
+		 (frame->nups & 0x3);
+
+	if (frame->itc)
+		ptr[2] |= BIT(7);
+
+	ptr[3] = frame->video_code & 0x7f;
+
+	ptr[4] = ((frame->ycc_quantization_range & 0x3) << 6) |
+		 ((frame->content_type & 0x3) << 4) |
+		 (frame->pixel_repeat & 0xf);
+
+	ptr[5] = frame->top_bar & 0xff;
+	ptr[6] = (frame->top_bar >> 8) & 0xff;
+	ptr[7] = frame->bottom_bar & 0xff;
+	ptr[8] = (frame->bottom_bar >> 8) & 0xff;
+	ptr[9] = frame->left_bar & 0xff;
+	ptr[10] = (frame->left_bar >> 8) & 0xff;
+	ptr[11] = frame->right_bar & 0xff;
+	ptr[12] = (frame->right_bar >> 8) & 0xff;
+
+	hdmi_infoframe_checksum(buffer, length);
+
+	return length;
+}
+EXPORT_SYMBOL(hdmi_avi_infoframe_pack);
+
+/**
+ * hdmi_spd_infoframe_init() - initialize an HDMI SPD infoframe
+ * @frame: HDMI SPD infoframe
+ * @vendor: vendor string
+ * @product: product string
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int hdmi_spd_infoframe_init(struct hdmi_spd_infoframe *frame,
+			    const char *vendor, const char *product)
+{
+	memset(frame, 0, sizeof(*frame));
+
+	frame->type = HDMI_INFOFRAME_TYPE_SPD;
+	frame->version = 1;
+	frame->length = 25;
+
+	strncpy(frame->vendor, vendor, sizeof(frame->vendor));
+	strncpy(frame->product, product, sizeof(frame->product));
+
+	return 0;
+}
+EXPORT_SYMBOL(hdmi_spd_infoframe_init);
+
+/**
+ * hdmi_spd_infoframe_pack() - write HDMI SPD infoframe to binary buffer
+ * @frame: HDMI SPD infoframe
+ * @buffer: destination buffer
+ * @size: size of buffer
+ *
+ * Packs the information contained in the @frame structure into a binary
+ * representation that can be written into the corresponding controller
+ * registers. Also computes the checksum as required by section 5.3.5 of
+ * the HDMI 1.4 specification.
+ *
+ * Returns the number of bytes packed into the binary buffer or a negative
+ * error code on failure.
+ */
+ssize_t hdmi_spd_infoframe_pack(struct hdmi_spd_infoframe *frame, void *buffer,
+				size_t size)
+{
+	u8 *ptr = buffer;
+	size_t length;
+
+	length = HDMI_INFOFRAME_HEADER_SIZE + frame->length;
+
+	if (size < length)
+		return -ENOSPC;
+
+	memset(buffer, 0, length);
+
+	ptr[0] = frame->type;
+	ptr[1] = frame->version;
+	ptr[2] = frame->length;
+	ptr[3] = 0; /* checksum */
+
+	/* start infoframe payload */
+	ptr += HDMI_INFOFRAME_HEADER_SIZE;
+
+	memcpy(ptr, frame->vendor, sizeof(frame->vendor));
+	memcpy(ptr + 8, frame->product, sizeof(frame->product));
+
+	ptr[24] = frame->sdi;
+
+	hdmi_infoframe_checksum(buffer, length);
+
+	return length;
+}
+EXPORT_SYMBOL(hdmi_spd_infoframe_pack);
+
+/**
+ * hdmi_audio_infoframe_init() - initialize an HDMI audio infoframe
+ * @frame: HDMI audio infoframe
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int hdmi_audio_infoframe_init(struct hdmi_audio_infoframe *frame)
+{
+	memset(frame, 0, sizeof(*frame));
+
+	frame->type = HDMI_INFOFRAME_TYPE_AUDIO;
+	frame->version = 1;
+	frame->length = 10;
+
+	return 0;
+}
+EXPORT_SYMBOL(hdmi_audio_infoframe_init);
+
+/**
+ * hdmi_audio_infoframe_pack() - write HDMI audio infoframe to binary buffer
+ * @frame: HDMI audio infoframe
+ * @buffer: destination buffer
+ * @size: size of buffer
+ *
+ * Packs the information contained in the @frame structure into a binary
+ * representation that can be written into the corresponding controller
+ * registers. Also computes the checksum as required by section 5.3.5 of
+ * the HDMI 1.4 specification.
+ *
+ * Returns the number of bytes packed into the binary buffer or a negative
+ * error code on failure.
+ */
+ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame,
+				  void *buffer, size_t size)
+{
+	unsigned char channels;
+	u8 *ptr = buffer;
+	size_t length;
+
+	length = HDMI_INFOFRAME_HEADER_SIZE + frame->length;
+
+	if (size < length)
+		return -ENOSPC;
+
+	memset(buffer, 0, length);
+
+	if (frame->channels >= 2)
+		channels = frame->channels - 1;
+	else
+		channels = 0;
+
+	ptr[0] = frame->type;
+	ptr[1] = frame->version;
+	ptr[2] = frame->length;
+	ptr[3] = 0; /* checksum */
+
+	/* start infoframe payload */
+	ptr += HDMI_INFOFRAME_HEADER_SIZE;
+
+	ptr[0] = ((frame->coding_type & 0xf) << 4) | (channels & 0x7);
+	ptr[1] = ((frame->sample_frequency & 0x7) << 2) |
+		 (frame->sample_size & 0x3);
+	ptr[2] = frame->coding_type_ext & 0x1f;
+	ptr[3] = frame->channel_allocation;
+	ptr[4] = (frame->level_shift_value & 0xf) << 3;
+
+	if (frame->downmix_inhibit)
+		ptr[4] |= BIT(7);
+
+	hdmi_infoframe_checksum(buffer, length);
+
+	return length;
+}
+EXPORT_SYMBOL(hdmi_audio_infoframe_pack);
+
+/**
+ * hdmi_vendor_infoframe_pack() - write a HDMI vendor infoframe to binary
+ *                                buffer
+ * @frame: HDMI vendor infoframe
+ * @buffer: destination buffer
+ * @size: size of buffer
+ *
+ * Packs the information contained in the @frame structure into a binary
+ * representation that can be written into the corresponding controller
+ * registers. Also computes the checksum as required by section 5.3.5 of
+ * the HDMI 1.4 specification.
+ *
+ * Returns the number of bytes packed into the binary buffer or a negative
+ * error code on failure.
+ */
+ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame,
+				   void *buffer, size_t size)
+{
+	u8 *ptr = buffer;
+	size_t length;
+
+	length = HDMI_INFOFRAME_HEADER_SIZE + frame->length;
+
+	if (size < length)
+		return -ENOSPC;
+
+	memset(buffer, 0, length);
+
+	ptr[0] = frame->type;
+	ptr[1] = frame->version;
+	ptr[2] = frame->length;
+	ptr[3] = 0; /* checksum */
+
+	memcpy(&ptr[HDMI_INFOFRAME_HEADER_SIZE], frame->data, frame->length);
+
+	hdmi_infoframe_checksum(buffer, length);
+
+	return length;
+}
+EXPORT_SYMBOL(hdmi_vendor_infoframe_pack);
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
new file mode 100644
index 000000000000..3b589440ecfe
--- /dev/null
+++ b/include/linux/hdmi.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_HDMI_H_
+#define __LINUX_HDMI_H_
+
+#include <linux/types.h>
+
+enum hdmi_infoframe_type {
+	HDMI_INFOFRAME_TYPE_VENDOR = 0x81,
+	HDMI_INFOFRAME_TYPE_AVI = 0x82,
+	HDMI_INFOFRAME_TYPE_SPD = 0x83,
+	HDMI_INFOFRAME_TYPE_AUDIO = 0x84,
+};
+
+#define HDMI_INFOFRAME_HEADER_SIZE  4
+#define HDMI_AVI_INFOFRAME_SIZE    13
+#define HDMI_SPD_INFOFRAME_SIZE    25
+#define HDMI_AUDIO_INFOFRAME_SIZE  10
+
+enum hdmi_colorspace {
+	HDMI_COLORSPACE_RGB,
+	HDMI_COLORSPACE_YUV422,
+	HDMI_COLORSPACE_YUV444,
+};
+
+enum hdmi_scan_mode {
+	HDMI_SCAN_MODE_NONE,
+	HDMI_SCAN_MODE_OVERSCAN,
+	HDMI_SCAN_MODE_UNDERSCAN,
+};
+
+enum hdmi_colorimetry {
+	HDMI_COLORIMETRY_NONE,
+	HDMI_COLORIMETRY_ITU_601,
+	HDMI_COLORIMETRY_ITU_709,
+	HDMI_COLORIMETRY_EXTENDED,
+};
+
+enum hdmi_picture_aspect {
+	HDMI_PICTURE_ASPECT_NONE,
+	HDMI_PICTURE_ASPECT_4_3,
+	HDMI_PICTURE_ASPECT_16_9,
+};
+
+enum hdmi_active_aspect {
+	HDMI_ACTIVE_ASPECT_16_9_TOP = 2,
+	HDMI_ACTIVE_ASPECT_14_9_TOP = 3,
+	HDMI_ACTIVE_ASPECT_16_9_CENTER = 4,
+	HDMI_ACTIVE_ASPECT_PICTURE = 8,
+	HDMI_ACTIVE_ASPECT_4_3 = 9,
+	HDMI_ACTIVE_ASPECT_16_9 = 10,
+	HDMI_ACTIVE_ASPECT_14_9 = 11,
+	HDMI_ACTIVE_ASPECT_4_3_SP_14_9 = 13,
+	HDMI_ACTIVE_ASPECT_16_9_SP_14_9 = 14,
+	HDMI_ACTIVE_ASPECT_16_9_SP_4_3 = 15,
+};
+
+enum hdmi_extended_colorimetry {
+	HDMI_EXTENDED_COLORIMETRY_XV_YCC_601,
+	HDMI_EXTENDED_COLORIMETRY_XV_YCC_709,
+	HDMI_EXTENDED_COLORIMETRY_S_YCC_601,
+	HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601,
+	HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB,
+};
+
+enum hdmi_quantization_range {
+	HDMI_QUANTIZATION_RANGE_DEFAULT,
+	HDMI_QUANTIZATION_RANGE_LIMITED,
+	HDMI_QUANTIZATION_RANGE_FULL,
+};
+
+/* non-uniform picture scaling */
+enum hdmi_nups {
+	HDMI_NUPS_UNKNOWN,
+	HDMI_NUPS_HORIZONTAL,
+	HDMI_NUPS_VERTICAL,
+	HDMI_NUPS_BOTH,
+};
+
+enum hdmi_ycc_quantization_range {
+	HDMI_YCC_QUANTIZATION_RANGE_LIMITED,
+	HDMI_YCC_QUANTIZATION_RANGE_FULL,
+};
+
+enum hdmi_content_type {
+	HDMI_CONTENT_TYPE_NONE,
+	HDMI_CONTENT_TYPE_PHOTO,
+	HDMI_CONTENT_TYPE_CINEMA,
+	HDMI_CONTENT_TYPE_GAME,
+};
+
+struct hdmi_avi_infoframe {
+	enum hdmi_infoframe_type type;
+	unsigned char version;
+	unsigned char length;
+	enum hdmi_colorspace colorspace;
+	bool active_info_valid;
+	bool horizontal_bar_valid;
+	bool vertical_bar_valid;
+	enum hdmi_scan_mode scan_mode;
+	enum hdmi_colorimetry colorimetry;
+	enum hdmi_picture_aspect picture_aspect;
+	enum hdmi_active_aspect active_aspect;
+	bool itc;
+	enum hdmi_extended_colorimetry extended_colorimetry;
+	enum hdmi_quantization_range quantization_range;
+	enum hdmi_nups nups;
+	unsigned char video_code;
+	enum hdmi_ycc_quantization_range ycc_quantization_range;
+	enum hdmi_content_type content_type;
+	unsigned char pixel_repeat;
+	unsigned short top_bar;
+	unsigned short bottom_bar;
+	unsigned short left_bar;
+	unsigned short right_bar;
+};
+
+int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame);
+ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer,
+				size_t size);
+
+enum hdmi_spd_sdi {
+	HDMI_SPD_SDI_UNKNOWN,
+	HDMI_SPD_SDI_DSTB,
+	HDMI_SPD_SDI_DVDP,
+	HDMI_SPD_SDI_DVHS,
+	HDMI_SPD_SDI_HDDVR,
+	HDMI_SPD_SDI_DVC,
+	HDMI_SPD_SDI_DSC,
+	HDMI_SPD_SDI_VCD,
+	HDMI_SPD_SDI_GAME,
+	HDMI_SPD_SDI_PC,
+	HDMI_SPD_SDI_BD,
+	HDMI_SPD_SDI_SACD,
+	HDMI_SPD_SDI_HDDVD,
+	HDMI_SPD_SDI_PMP,
+};
+
+struct hdmi_spd_infoframe {
+	enum hdmi_infoframe_type type;
+	unsigned char version;
+	unsigned char length;
+	char vendor[8];
+	char product[16];
+	enum hdmi_spd_sdi sdi;
+};
+
+int hdmi_spd_infoframe_init(struct hdmi_spd_infoframe *frame,
+			    const char *vendor, const char *product);
+ssize_t hdmi_spd_infoframe_pack(struct hdmi_spd_infoframe *frame, void *buffer,
+				size_t size);
+
+enum hdmi_audio_coding_type {
+	HDMI_AUDIO_CODING_TYPE_STREAM,
+	HDMI_AUDIO_CODING_TYPE_PCM,
+	HDMI_AUDIO_CODING_TYPE_AC3,
+	HDMI_AUDIO_CODING_TYPE_MPEG1,
+	HDMI_AUDIO_CODING_TYPE_MP3,
+	HDMI_AUDIO_CODING_TYPE_MPEG2,
+	HDMI_AUDIO_CODING_TYPE_AAC_LC,
+	HDMI_AUDIO_CODING_TYPE_DTS,
+	HDMI_AUDIO_CODING_TYPE_ATRAC,
+	HDMI_AUDIO_CODING_TYPE_DSD,
+	HDMI_AUDIO_CODING_TYPE_EAC3,
+	HDMI_AUDIO_CODING_TYPE_DTS_HD,
+	HDMI_AUDIO_CODING_TYPE_MLP,
+	HDMI_AUDIO_CODING_TYPE_DST,
+	HDMI_AUDIO_CODING_TYPE_WMA_PRO,
+};
+
+enum hdmi_audio_sample_size {
+	HDMI_AUDIO_SAMPLE_SIZE_STREAM,
+	HDMI_AUDIO_SAMPLE_SIZE_16,
+	HDMI_AUDIO_SAMPLE_SIZE_20,
+	HDMI_AUDIO_SAMPLE_SIZE_24,
+};
+
+enum hdmi_audio_sample_frequency {
+	HDMI_AUDIO_SAMPLE_FREQUENCY_STREAM,
+	HDMI_AUDIO_SAMPLE_FREQUENCY_32000,
+	HDMI_AUDIO_SAMPLE_FREQUENCY_44100,
+	HDMI_AUDIO_SAMPLE_FREQUENCY_48000,
+	HDMI_AUDIO_SAMPLE_FREQUENCY_88200,
+	HDMI_AUDIO_SAMPLE_FREQUENCY_96000,
+	HDMI_AUDIO_SAMPLE_FREQUENCY_176400,
+	HDMI_AUDIO_SAMPLE_FREQUENCY_192000,
+};
+
+enum hdmi_audio_coding_type_ext {
+	HDMI_AUDIO_CODING_TYPE_EXT_STREAM,
+	HDMI_AUDIO_CODING_TYPE_EXT_HE_AAC,
+	HDMI_AUDIO_CODING_TYPE_EXT_HE_AAC_V2,
+	HDMI_AUDIO_CODING_TYPE_EXT_MPEG_SURROUND,
+};
+
+struct hdmi_audio_infoframe {
+	enum hdmi_infoframe_type type;
+	unsigned char version;
+	unsigned char length;
+	unsigned char channels;
+	enum hdmi_audio_coding_type coding_type;
+	enum hdmi_audio_sample_size sample_size;
+	enum hdmi_audio_sample_frequency sample_frequency;
+	enum hdmi_audio_coding_type_ext coding_type_ext;
+	unsigned char channel_allocation;
+	unsigned char level_shift_value;
+	bool downmix_inhibit;
+
+};
+
+int hdmi_audio_infoframe_init(struct hdmi_audio_infoframe *frame);
+ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame,
+				  void *buffer, size_t size);
+
+struct hdmi_vendor_infoframe {
+	enum hdmi_infoframe_type type;
+	unsigned char version;
+	unsigned char length;
+	u8 data[27];
+};
+
+ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame,
+				   void *buffer, size_t size);
+
+#endif /* _DRM_HDMI_H */
-- 
cgit v1.2.3


From 24dea0c9feccf699749f860fa2f4ccd84d30390d Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dmitry_eremin@mentor.com>
Date: Thu, 31 Jan 2013 21:06:34 +0400
Subject: mtd: map: BUG() in non handled cases

Several map-related functions look like a serie of ifs, checking
widths of map. Those functions do not have any handling for default
case. Instead of fiddling with uninitialized_var in those functions,
let's just add a (correct) BUG() to the default case on those maps. This
will also allow us to catch potential errors in maps setup in future.

Signed-off-by: Dmitry Eremin-Solenikov <dmitry_eremin@mentor.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 include/linux/mtd/map.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 8b9bfd7dcaa3..4b02512e421c 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -329,7 +329,7 @@ static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word
 
 static inline map_word map_word_load(struct map_info *map, const void *ptr)
 {
-	map_word r = {{0} };
+	map_word r;
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = *(unsigned char *)ptr;
@@ -343,6 +343,8 @@ static inline map_word map_word_load(struct map_info *map, const void *ptr)
 #endif
 	else if (map_bankwidth_is_large(map))
 		memcpy(r.x, ptr, map->bankwidth);
+	else
+		BUG();
 
 	return r;
 }
@@ -392,7 +394,7 @@ static inline map_word map_word_ff(struct map_info *map)
 
 static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
 {
-	map_word uninitialized_var(r);
+	map_word r;
 
 	if (map_bankwidth_is_1(map))
 		r.x[0] = __raw_readb(map->virt + ofs);
@@ -426,6 +428,8 @@ static inline void inline_map_write(struct map_info *map, const map_word datum,
 #endif
 	else if (map_bankwidth_is_large(map))
 		memcpy_toio(map->virt+ofs, datum.x, map->bankwidth);
+	else
+		BUG();
 	mb();
 }
 
-- 
cgit v1.2.3


From 45ebd3945b2a3cf4eb89d5fb0090a3cb71af7973 Mon Sep 17 00:00:00 2001
From: Clark Williams <clark.williams@gmail.com>
Date: Wed, 20 Feb 2013 09:19:09 -0600
Subject: sched: Move RR_TIMESLICE from sysctl.h to rt.h

This fixes an ia64 build bug reported by Tony Luck.

Reported-by: Tony Luck <tony.luck@gmail.com>
Signed-off-by: Clark Williams <clark.williams@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1361373550-4011-2-git-send-email-clark.williams@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/rt.h     | 6 ++++++
 include/linux/sched/sysctl.h | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index 94e19ea28fc3..440434df3627 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -55,4 +55,10 @@ static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
 extern void normalize_rt_tasks(void);
 
 
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE		(100 * HZ / 1000)
+
 #endif /* _SCHED_RT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d2bb0ae979d0..bf8086b2506e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -91,12 +91,6 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice;
 extern unsigned int sysctl_sched_autogroup_enabled;
 #endif
 
-/*
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define RR_TIMESLICE		(100 * HZ / 1000)
-
 extern int sched_rr_timeslice;
 
 extern int sched_rr_handler(struct ctl_table *table, int write,
-- 
cgit v1.2.3


From bc681593b588786e6326b3e5f78ccc1683e2269c Mon Sep 17 00:00:00 2001
From: Clark Williams <clark.williams@gmail.com>
Date: Fri, 22 Feb 2013 09:20:11 -0800
Subject: sched: move RR_TIMESLICE from sysctl.h to rt.h

Originally submitted by Clark Williams as part of a cleanup,
but happens also to fix an ia64 build problem:

arch/ia64/kernel/init_task.c:38: error: 'RR_TIMESLICE' undeclared here (not in a function)

Signed-off-by: Clark Williams <clark.williams@gmail.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/sched/rt.h     | 6 ++++++
 include/linux/sched/sysctl.h | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index 94e19ea28fc3..440434df3627 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -55,4 +55,10 @@ static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
 extern void normalize_rt_tasks(void);
 
 
+/*
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
+ * Timeslices get refilled after they expire.
+ */
+#define RR_TIMESLICE		(100 * HZ / 1000)
+
 #endif /* _SCHED_RT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d2bb0ae979d0..bf8086b2506e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -91,12 +91,6 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice;
 extern unsigned int sysctl_sched_autogroup_enabled;
 #endif
 
-/*
- * default timeslice is 100 msecs (used only for SCHED_RR tasks).
- * Timeslices get refilled after they expire.
- */
-#define RR_TIMESLICE		(100 * HZ / 1000)
-
 extern int sched_rr_timeslice;
 
 extern int sched_rr_handler(struct ctl_table *table, int write,
-- 
cgit v1.2.3


From d6561bb206aae9de8eee4516549339ee96386b87 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Thu, 21 Feb 2013 11:04:45 +0000
Subject: PM / OPP: fix condition for empty of_init_opp_table()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

randconfig build reports the following error which is caused by
CONFIG_PM_OPP being unset.

  CC      arch/arm/mach-imx/mach-imx6q.o
arch/arm/mach-imx/mach-imx6q.c: In function ‘imx6q_opp_init’:
arch/arm/mach-imx/mach-imx6q.c:248:2: error: implicit declaration of function ‘of_init_opp_table’ [-Werror=implicit-function-declaration]

Fix the error by giving a more correct condition for empty
of_init_opp_table() implementation.

Reported-by: Rob Herring <robherring2@gmail.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/opp.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/opp.h b/include/linux/opp.h
index 214e0ebcb84d..3aca2b8def33 100644
--- a/include/linux/opp.h
+++ b/include/linux/opp.h
@@ -47,15 +47,6 @@ int opp_enable(struct device *dev, unsigned long freq);
 int opp_disable(struct device *dev, unsigned long freq);
 
 struct srcu_notifier_head *opp_get_notifier(struct device *dev);
-
-#ifdef CONFIG_OF
-int of_init_opp_table(struct device *dev);
-#else
-static inline int of_init_opp_table(struct device *dev)
-{
-	return -EINVAL;
-}
-#endif /* CONFIG_OF */
 #else
 static inline unsigned long opp_get_voltage(struct opp *opp)
 {
@@ -112,6 +103,15 @@ static inline struct srcu_notifier_head *opp_get_notifier(struct device *dev)
 }
 #endif		/* CONFIG_PM_OPP */
 
+#if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
+int of_init_opp_table(struct device *dev);
+#else
+static inline int of_init_opp_table(struct device *dev)
+{
+	return -EINVAL;
+}
+#endif
+
 #if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
 int opp_init_cpufreq_table(struct device *dev,
 			    struct cpufreq_frequency_table **table);
-- 
cgit v1.2.3


From 496ad9aa8ef448058e36ca7a787c61f2e63f0f54 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 23 Jan 2013 17:07:38 -0500
Subject: new helper: file_inode(file)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/srm_env.c                     |  2 +-
 arch/blackfin/kernel/cplbinfo.c                 |  2 +-
 arch/cris/arch-v10/drivers/sync_serial.c        |  8 ++--
 arch/cris/arch-v32/drivers/cryptocop.c          |  3 +-
 arch/cris/arch-v32/drivers/sync_serial.c        |  8 ++--
 arch/ia64/kernel/salinfo.c                      |  6 +--
 arch/mips/kernel/rtlx.c                         | 13 ++-----
 arch/mips/kernel/vpe.c                          |  2 +-
 arch/mips/lasat/picvue_proc.c                   |  2 +-
 arch/powerpc/kernel/proc_powerpc.c              |  6 +--
 arch/powerpc/kernel/rtas_flash.c                | 16 ++++----
 arch/powerpc/platforms/cell/spufs/coredump.c    |  4 +-
 arch/powerpc/platforms/cell/spufs/file.c        |  6 +--
 arch/powerpc/platforms/cell/spufs/inode.c       |  2 +-
 arch/powerpc/platforms/cell/spufs/syscalls.c    |  2 +-
 arch/powerpc/platforms/pseries/hvCall_inst.c    |  2 +-
 arch/powerpc/platforms/pseries/scanlog.c        |  8 +---
 arch/s390/hypfs/hypfs_dbfs.c                    |  2 +-
 arch/s390/hypfs/inode.c                         |  2 +-
 arch/s390/kernel/debug.c                        |  2 +-
 arch/s390/pci/pci_debug.c                       |  4 +-
 arch/sh/mm/alignment.c                          |  2 +-
 arch/x86/ia32/ia32_aout.c                       |  6 +--
 arch/x86/kernel/cpuid.c                         |  4 +-
 drivers/block/DAC960.c                          |  2 +-
 drivers/block/nbd.c                             |  2 +-
 drivers/char/dsp56k.c                           |  8 ++--
 drivers/char/dtlk.c                             |  4 +-
 drivers/char/lp.c                               |  8 ++--
 drivers/char/mem.c                              |  4 +-
 drivers/char/nsc_gpio.c                         |  4 +-
 drivers/char/pcmcia/cm4000_cs.c                 |  2 +-
 drivers/char/ppdev.c                            |  6 +--
 drivers/char/ps3flash.c                         |  2 +-
 drivers/char/raw.c                              |  2 +-
 drivers/char/sonypi.c                           |  2 +-
 drivers/char/tb0219.c                           |  4 +-
 drivers/gpu/drm/gma500/gtt.c                    |  2 +-
 drivers/gpu/drm/i915/i915_gem.c                 | 10 ++---
 drivers/gpu/drm/ttm/ttm_tt.c                    |  4 +-
 drivers/gpu/drm/udl/udl_gem.c                   |  2 +-
 drivers/hid/hid-roccat.c                        |  2 +-
 drivers/hid/hidraw.c                            |  6 +--
 drivers/i2c/i2c-dev.c                           |  4 +-
 drivers/ide/ide-proc.c                          |  4 +-
 drivers/infiniband/core/uverbs_cmd.c            |  2 +-
 drivers/infiniband/hw/ipath/ipath_file_ops.c    |  4 +-
 drivers/infiniband/hw/ipath/ipath_fs.c          |  6 +--
 drivers/infiniband/hw/qib/qib_file_ops.c        |  2 +-
 drivers/infiniband/hw/qib/qib_fs.c              |  4 +-
 drivers/iommu/tegra-smmu.c                      |  4 +-
 drivers/isdn/hardware/eicon/divasproc.c         |  6 +--
 drivers/isdn/hysdn/hysdn_proclog.c              |  4 +-
 drivers/isdn/i4l/isdn_common.c                  |  8 ++--
 drivers/isdn/i4l/isdn_ppp.c                     |  2 +-
 drivers/md/bitmap.c                             |  4 +-
 drivers/media/pci/zoran/zoran_procfs.c          |  2 +-
 drivers/media/rc/lirc_dev.c                     | 14 +++----
 drivers/media/v4l2-core/v4l2-dev.c              |  2 +-
 drivers/mtd/ubi/cdev.c                          |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |  2 +-
 drivers/net/wan/cosa.c                          |  4 +-
 drivers/net/wireless/ray_cs.c                   |  2 +-
 drivers/parisc/led.c                            |  2 +-
 drivers/pci/proc.c                              | 10 ++---
 drivers/platform/x86/sony-laptop.c              |  2 +-
 drivers/platform/x86/thinkpad_acpi.c            |  2 +-
 drivers/platform/x86/toshiba_acpi.c             |  8 ++--
 drivers/pnp/isapnp/proc.c                       |  4 +-
 drivers/pnp/pnpbios/proc.c                      |  2 +-
 drivers/s390/char/fs3270.c                      |  4 +-
 drivers/s390/char/tape_char.c                   |  8 ++--
 drivers/s390/char/vmur.c                        |  2 +-
 drivers/s390/cio/qdio_debug.c                   |  4 +-
 drivers/sbus/char/display7seg.c                 |  2 +-
 drivers/scsi/3w-9xxx.c                          |  2 +-
 drivers/scsi/3w-sas.c                           |  2 +-
 drivers/scsi/3w-xxxx.c                          |  2 +-
 drivers/scsi/csiostor/csio_init.c               |  2 +-
 drivers/scsi/dpt_i2o.c                          |  4 +-
 drivers/scsi/st.c                               |  2 +-
 drivers/staging/bcm/Misc.c                      |  2 +-
 drivers/staging/ccg/f_mass_storage.c            |  2 +-
 drivers/staging/ccg/rndis.c                     |  2 +-
 drivers/staging/ccg/storage_common.c            |  2 +-
 drivers/staging/dgrp/dgrp_specproc.c            |  4 +-
 drivers/staging/omapdrm/omap_gem_helpers.c      |  2 +-
 drivers/staging/usbip/usbip_common.c            |  2 +-
 drivers/staging/vme/devices/vme_user.c          |  8 ++--
 drivers/target/target_core_file.c               |  2 +-
 drivers/tty/vt/vc_screen.c                      |  8 ++--
 drivers/usb/core/devices.c                      |  4 +-
 drivers/usb/core/devio.c                        |  6 +--
 drivers/usb/gadget/atmel_usba_udc.c             |  8 ++--
 drivers/usb/gadget/f_mass_storage.c             |  2 +-
 drivers/usb/gadget/printer.c                    |  2 +-
 drivers/usb/gadget/rndis.c                      |  2 +-
 drivers/usb/gadget/storage_common.c             |  2 +-
 drivers/video/fb_defio.c                        |  2 +-
 drivers/video/fbmem.c                           |  2 +-
 drivers/video/msm/mdp.c                         |  2 +-
 drivers/watchdog/cpwd.c                         |  4 +-
 drivers/zorro/proc.c                            |  4 +-
 fs/9p/vfs_file.c                                | 10 ++---
 fs/adfs/dir.c                                   |  2 +-
 fs/affs/dir.c                                   |  2 +-
 fs/afs/dir.c                                    |  4 +-
 fs/afs/flock.c                                  |  4 +-
 fs/afs/write.c                                  |  7 ++--
 fs/autofs4/autofs_i.h                           |  2 +-
 fs/autofs4/dev-ioctl.c                          |  2 +-
 fs/autofs4/root.c                               |  4 +-
 fs/befs/linuxvfs.c                              |  2 +-
 fs/bfs/dir.c                                    |  2 +-
 fs/binfmt_aout.c                                |  4 +-
 fs/binfmt_elf.c                                 |  2 +-
 fs/binfmt_elf_fdpic.c                           |  4 +-
 fs/binfmt_flat.c                                |  2 +-
 fs/binfmt_misc.c                                |  4 +-
 fs/block_dev.c                                  |  2 +-
 fs/btrfs/file.c                                 |  8 ++--
 fs/btrfs/inode.c                                |  4 +-
 fs/btrfs/ioctl.c                                | 52 ++++++++++++-------------
 fs/btrfs/send.c                                 |  2 +-
 fs/buffer.c                                     |  4 +-
 fs/ceph/addr.c                                  | 12 +++---
 fs/ceph/dir.c                                   |  6 +--
 fs/ceph/file.c                                  | 10 ++---
 fs/ceph/ioctl.c                                 | 16 ++++----
 fs/ceph/locks.c                                 |  2 +-
 fs/cifs/cifsfs.c                                |  6 +--
 fs/cifs/file.c                                  | 26 ++++++-------
 fs/cifs/inode.c                                 |  8 ++--
 fs/cifs/ioctl.c                                 |  2 +-
 fs/cifs/readdir.c                               |  4 +-
 fs/coda/dir.c                                   |  2 +-
 fs/coda/file.c                                  | 12 +++---
 fs/coda/inode.c                                 |  2 +-
 fs/coda/pioctl.c                                |  2 +-
 fs/compat_ioctl.c                               |  2 +-
 fs/configfs/dir.c                               |  2 +-
 fs/coredump.c                                   |  4 +-
 fs/cramfs/inode.c                               |  2 +-
 fs/ecryptfs/file.c                              |  4 +-
 fs/efs/dir.c                                    |  2 +-
 fs/exec.c                                       |  8 ++--
 fs/exofs/dir.c                                  |  2 +-
 fs/ext2/dir.c                                   |  2 +-
 fs/ext2/ioctl.c                                 |  2 +-
 fs/ext3/dir.c                                   |  8 ++--
 fs/ext3/ioctl.c                                 |  2 +-
 fs/ext3/namei.c                                 |  4 +-
 fs/ext4/dir.c                                   |  8 ++--
 fs/ext4/extents.c                               |  4 +-
 fs/ext4/file.c                                  |  2 +-
 fs/ext4/inline.c                                |  2 +-
 fs/ext4/inode.c                                 |  6 +--
 fs/ext4/ioctl.c                                 |  2 +-
 fs/ext4/move_extent.c                           |  6 +--
 fs/ext4/namei.c                                 |  2 +-
 fs/ext4/super.c                                 |  2 +-
 fs/f2fs/dir.c                                   |  2 +-
 fs/fat/dir.c                                    |  6 +--
 fs/fat/file.c                                   |  4 +-
 fs/fcntl.c                                      |  2 +-
 fs/file_table.c                                 |  2 +-
 fs/freevxfs/vxfs_lookup.c                       |  2 +-
 fs/fuse/control.c                               |  2 +-
 fs/fuse/dir.c                                   |  2 +-
 fs/gfs2/file.c                                  | 17 ++++----
 fs/gfs2/rgrp.c                                  |  2 +-
 fs/hfs/dir.c                                    |  2 +-
 fs/hfs/inode.c                                  |  2 +-
 fs/hfsplus/dir.c                                |  2 +-
 fs/hfsplus/inode.c                              |  2 +-
 fs/hfsplus/ioctl.c                              |  4 +-
 fs/hostfs/hostfs_kern.c                         |  2 +-
 fs/hpfs/dir.c                                   |  4 +-
 fs/hpfs/file.c                                  |  2 +-
 fs/hppfs/hppfs.c                                |  8 ++--
 fs/hugetlbfs/inode.c                            |  2 +-
 fs/inode.c                                      |  2 +-
 fs/ioctl.c                                      | 12 +++---
 fs/isofs/compress.c                             |  2 +-
 fs/isofs/dir.c                                  |  2 +-
 fs/jffs2/dir.c                                  |  4 +-
 fs/jfs/ioctl.c                                  |  2 +-
 fs/jfs/jfs_dtree.c                              |  2 +-
 fs/lockd/clntlock.c                             |  2 +-
 fs/lockd/clntproc.c                             |  2 +-
 fs/lockd/svclock.c                              | 16 ++++----
 fs/lockd/svcsubs.c                              |  2 +-
 fs/locks.c                                      | 24 ++++++------
 fs/logfs/dir.c                                  |  4 +-
 fs/logfs/file.c                                 |  2 +-
 fs/minix/dir.c                                  |  2 +-
 fs/namei.c                                      |  2 +-
 fs/namespace.c                                  |  2 +-
 fs/ncpfs/inode.c                                |  4 +-
 fs/ncpfs/ioctl.c                                |  2 +-
 fs/ncpfs/mmap.c                                 |  2 +-
 fs/nfs/dir.c                                    |  8 ++--
 fs/nfs/file.c                                   |  2 +-
 fs/nfs/idmap.c                                  |  2 +-
 fs/nfs/inode.c                                  |  4 +-
 fs/nfs/nfs3proc.c                               |  2 +-
 fs/nfs/nfs4file.c                               |  2 +-
 fs/nfs/proc.c                                   |  2 +-
 fs/nfsd/fault_inject.c                          |  6 +--
 fs/nfsd/nfsctl.c                                |  2 +-
 fs/nfsd/vfs.c                                   |  6 +--
 fs/nilfs2/dir.c                                 |  2 +-
 fs/nilfs2/file.c                                |  2 +-
 fs/nilfs2/ioctl.c                               |  2 +-
 fs/notify/dnotify/dnotify.c                     |  4 +-
 fs/notify/fanotify/fanotify_user.c              |  2 +-
 fs/ntfs/dir.c                                   |  2 +-
 fs/ocfs2/aops.c                                 |  4 +-
 fs/ocfs2/dir.c                                  |  2 +-
 fs/ocfs2/dlmfs/dlmfs.c                          |  6 +--
 fs/ocfs2/file.c                                 | 10 ++---
 fs/ocfs2/ioctl.c                                |  4 +-
 fs/ocfs2/mmap.c                                 |  8 ++--
 fs/ocfs2/move_extents.c                         |  2 +-
 fs/ocfs2/refcounttree.c                         |  4 +-
 fs/omfs/dir.c                                   |  4 +-
 fs/open.c                                       |  6 +--
 fs/openpromfs/inode.c                           |  2 +-
 fs/pipe.c                                       | 16 ++++----
 fs/proc/base.c                                  | 38 +++++++++---------
 fs/proc/generic.c                               | 10 ++---
 fs/proc/inode.c                                 | 14 +++----
 fs/proc/nommu.c                                 |  2 +-
 fs/proc/proc_net.c                              |  2 +-
 fs/proc/proc_sysctl.c                           |  4 +-
 fs/proc/task_mmu.c                              |  6 +--
 fs/proc/task_nommu.c                            |  2 +-
 fs/qnx4/dir.c                                   |  2 +-
 fs/qnx6/dir.c                                   |  2 +-
 fs/ramfs/file-nommu.c                           |  2 +-
 fs/read_write.c                                 |  8 ++--
 fs/readdir.c                                    |  2 +-
 fs/reiserfs/file.c                              |  2 +-
 fs/reiserfs/ioctl.c                             |  2 +-
 fs/reiserfs/procfs.c                            |  2 +-
 fs/romfs/super.c                                |  2 +-
 fs/splice.c                                     |  2 +-
 fs/squashfs/dir.c                               |  2 +-
 fs/sync.c                                       |  2 +-
 fs/sysfs/bin.c                                  |  6 +--
 fs/sysv/dir.c                                   |  2 +-
 fs/ubifs/dir.c                                  |  2 +-
 fs/ubifs/file.c                                 |  2 +-
 fs/ubifs/ioctl.c                                |  2 +-
 fs/udf/dir.c                                    |  2 +-
 fs/udf/file.c                                   |  4 +-
 fs/ufs/dir.c                                    |  2 +-
 fs/xfs/xfs_dfrag.c                              |  8 ++--
 fs/xfs/xfs_file.c                               |  4 +-
 fs/xfs/xfs_ioctl.c                              |  6 +--
 fs/xfs/xfs_ioctl32.c                            |  2 +-
 include/linux/fs.h                              |  9 ++++-
 include/linux/fsnotify.h                        |  2 +-
 include/linux/hugetlb.h                         |  2 +-
 include/linux/lockd/lockd.h                     |  2 +-
 ipc/mqueue.c                                    | 16 ++++----
 ipc/shm.c                                       |  8 ++--
 kernel/acct.c                                   |  2 +-
 kernel/cgroup.c                                 |  6 +--
 kernel/events/core.c                            |  2 +-
 kernel/fork.c                                   |  2 +-
 kernel/irq/proc.c                               |  2 +-
 kernel/nsproxy.c                                |  2 +-
 kernel/relay.c                                  |  4 +-
 kernel/sys.c                                    |  8 ++--
 mm/fadvise.c                                    |  2 +-
 mm/filemap.c                                    |  2 +-
 mm/hugetlb.c                                    |  4 +-
 mm/mmap.c                                       |  8 ++--
 mm/nommu.c                                      | 12 +++---
 mm/shmem.c                                      | 12 +++---
 mm/swapfile.c                                   |  2 +-
 net/atm/proc.c                                  |  2 +-
 net/core/net_namespace.c                        |  2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c              |  2 +-
 net/netfilter/xt_recent.c                       |  2 +-
 net/netlink/af_netlink.c                        |  2 +-
 net/sunrpc/auth_gss/auth_gss.c                  |  2 +-
 net/sunrpc/cache.c                              | 28 ++++++-------
 net/sunrpc/rpc_pipe.c                           | 10 ++---
 net/unix/garbage.c                              |  2 +-
 security/apparmor/domain.c                      |  4 +-
 security/apparmor/file.c                        |  4 +-
 security/apparmor/lsm.c                         |  6 +--
 security/integrity/ima/ima_api.c                |  6 +--
 security/integrity/ima/ima_crypto.c             |  2 +-
 security/integrity/ima/ima_main.c               |  4 +-
 security/selinux/hooks.c                        | 10 ++---
 security/selinux/selinuxfs.c                    | 20 ++++------
 security/smack/smack_lsm.c                      | 14 ++-----
 security/tomoyo/securityfs_if.c                 |  2 +-
 sound/core/info.c                               |  2 +-
 sound/core/pcm_native.c                         |  2 +-
 sound/oss/msnd_pinnacle.c                       |  6 +--
 sound/oss/soundcard.c                           | 10 ++---
 sound/sound_firmware.c                          |  2 +-
 306 files changed, 696 insertions(+), 717 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c
index b9fc6c309d2e..e64559f0a82d 100644
--- a/arch/alpha/kernel/srm_env.c
+++ b/arch/alpha/kernel/srm_env.c
@@ -111,7 +111,7 @@ static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer,
 				  size_t count, loff_t *pos)
 {
 	int res;
-	srm_env_t	*entry = PDE(file->f_path.dentry->d_inode)->data;
+	srm_env_t	*entry = PDE(file_inode(file))->data;
 	char		*buf = (char *) __get_free_page(GFP_USER);
 	unsigned long	ret1, ret2;
 
diff --git a/arch/blackfin/kernel/cplbinfo.c b/arch/blackfin/kernel/cplbinfo.c
index 0bdaa517a501..e1d0b24c6070 100644
--- a/arch/blackfin/kernel/cplbinfo.c
+++ b/arch/blackfin/kernel/cplbinfo.c
@@ -116,7 +116,7 @@ static const struct seq_operations cplbinfo_sops = {
 
 static int cplbinfo_open(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	char cplb_type;
 	unsigned int cpu;
 	int ret;
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index c4b71710fb0e..a1c498d18d31 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -654,7 +654,7 @@ static int sync_serial_release(struct inode *inode, struct file *file)
 
 static unsigned int sync_serial_poll(struct file *file, poll_table *wait)
 {
-	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int dev = MINOR(file_inode(file)->i_rdev);
 	unsigned int mask = 0;
 	struct sync_port *port;
 	DEBUGPOLL(static unsigned int prev_mask = 0);
@@ -685,7 +685,7 @@ static int sync_serial_ioctl_unlocked(struct file *file,
 	int return_val = 0;
 	unsigned long flags;
 
-	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int dev = MINOR(file_inode(file)->i_rdev);
 	struct sync_port *port;
 
 	if (dev < 0 || dev >= NUMBER_OF_PORTS || !ports[dev].enabled) {
@@ -973,7 +973,7 @@ static long sync_serial_ioctl(struct file *file,
 static ssize_t sync_serial_write(struct file *file, const char *buf,
 	size_t count, loff_t *ppos)
 {
-	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int dev = MINOR(file_inode(file)->i_rdev);
 	DECLARE_WAITQUEUE(wait, current);
 	struct sync_port *port;
 	unsigned long flags;
@@ -1097,7 +1097,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 static ssize_t sync_serial_read(struct file *file, char *buf,
 				size_t count, loff_t *ppos)
 {
-	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int dev = MINOR(file_inode(file)->i_rdev);
 	int avail;
 	struct sync_port *port;
 	unsigned char *start;
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index f8476d9e856b..877da1908234 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -3135,11 +3135,10 @@ static long cryptocop_ioctl_unlocked(struct inode *inode,
 static long
 cryptocop_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-       struct inode *inode = file->f_path.dentry->d_inode;
        long ret;
 
        mutex_lock(&cryptocop_mutex);
-       ret = cryptocop_ioctl_unlocked(inode, filp, cmd, arg);
+       ret = cryptocop_ioctl_unlocked(file_inode(filp), filp, cmd, arg);
        mutex_unlock(&cryptocop_mutex);
 
        return ret;
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index a6a180bc566f..219f704e3221 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -609,7 +609,7 @@ static int sync_serial_release(struct inode *inode, struct file *file)
 
 static unsigned int sync_serial_poll(struct file *file, poll_table *wait)
 {
-	int dev = iminor(file->f_path.dentry->d_inode);
+	int dev = iminor(file_inode(file));
 	unsigned int mask = 0;
 	sync_port *port;
 	DEBUGPOLL( static unsigned int prev_mask = 0; );
@@ -657,7 +657,7 @@ static int sync_serial_ioctl(struct file *file,
 {
 	int return_val = 0;
 	int dma_w_size = regk_dma_set_w_size1;
-	int dev = iminor(file->f_path.dentry->d_inode);
+	int dev = iminor(file_inode(file));
 	sync_port *port;
 	reg_sser_rw_tr_cfg tr_cfg;
 	reg_sser_rw_rec_cfg rec_cfg;
@@ -979,7 +979,7 @@ static long sync_serial_ioctl(struct file *file,
 static ssize_t sync_serial_write(struct file *file, const char *buf,
 				 size_t count, loff_t *ppos)
 {
-	int dev = iminor(file->f_path.dentry->d_inode);
+	int dev = iminor(file_inode(file));
 	DECLARE_WAITQUEUE(wait, current);
 	struct sync_port *port;
 	int trunc_count;
@@ -1102,7 +1102,7 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
 static ssize_t sync_serial_read(struct file * file, char * buf,
 				size_t count, loff_t *ppos)
 {
-	int dev = iminor(file->f_path.dentry->d_inode);
+	int dev = iminor(file_inode(file));
 	int avail;
 	sync_port *port;
 	unsigned char* start;
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 79802e540e53..aa527d7e91f2 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -301,7 +301,7 @@ salinfo_event_open(struct inode *inode, struct file *file)
 static ssize_t
 salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct proc_dir_entry *entry = PDE(inode);
 	struct salinfo_data *data = entry->data;
 	char cmd[32];
@@ -463,7 +463,7 @@ retry:
 static ssize_t
 salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct proc_dir_entry *entry = PDE(inode);
 	struct salinfo_data *data = entry->data;
 	u8 *buf;
@@ -524,7 +524,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu)
 static ssize_t
 salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct proc_dir_entry *entry = PDE(inode);
 	struct salinfo_data *data = entry->data;
 	char cmd[32];
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index b8c18dcdd2c4..88f7b50d541c 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -399,11 +399,9 @@ static int file_release(struct inode *inode, struct file *filp)
 
 static unsigned int file_poll(struct file *file, poll_table * wait)
 {
-	int minor;
+	int minor = iminor(file_inode(file));
 	unsigned int mask = 0;
 
-	minor = iminor(file->f_path.dentry->d_inode);
-
 	poll_wait(file, &channel_wqs[minor].rt_queue, wait);
 	poll_wait(file, &channel_wqs[minor].lx_queue, wait);
 
@@ -424,7 +422,7 @@ static unsigned int file_poll(struct file *file, poll_table * wait)
 static ssize_t file_read(struct file *file, char __user * buffer, size_t count,
 			 loff_t * ppos)
 {
-	int minor = iminor(file->f_path.dentry->d_inode);
+	int minor = iminor(file_inode(file));
 
 	/* data available? */
 	if (!rtlx_read_poll(minor, (file->f_flags & O_NONBLOCK) ? 0 : 1)) {
@@ -437,11 +435,8 @@ static ssize_t file_read(struct file *file, char __user * buffer, size_t count,
 static ssize_t file_write(struct file *file, const char __user * buffer,
 			  size_t count, loff_t * ppos)
 {
-	int minor;
-	struct rtlx_channel *rt;
-
-	minor = iminor(file->f_path.dentry->d_inode);
-	rt = &rtlx->channel[minor];
+	int minor = iminor(file_inode(file));
+	struct rtlx_channel *rt = &rtlx->channel[minor];
 
 	/* any space left... */
 	if (!rtlx_write_poll(minor)) {
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index eec690af6581..d75a5289d9b3 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -1149,7 +1149,7 @@ static ssize_t vpe_write(struct file *file, const char __user * buffer,
 	size_t ret = count;
 	struct vpe *v;
 
-	if (iminor(file->f_path.dentry->d_inode) != minor)
+	if (iminor(file_inode(file)) != minor)
 		return -ENODEV;
 
 	v = get_vpe(tclimit);
diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c
index 8e388da1926f..c592bc8b8c99 100644
--- a/arch/mips/lasat/picvue_proc.c
+++ b/arch/mips/lasat/picvue_proc.c
@@ -64,7 +64,7 @@ static int pvc_line_proc_open(struct inode *inode, struct file *file)
 static ssize_t pvc_line_proc_write(struct file *file, const char __user *buf,
 				   size_t count, loff_t *pos)
 {
-	int lineno = *(int *)PDE(file->f_path.dentry->d_inode)->data;
+	int lineno = *(int *)PDE(file_inode(file))->data;
 	char kbuf[PVC_LINELEN];
 	size_t len;
 
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index c8ae3714e79b..f19d0bdc3241 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -32,7 +32,7 @@
 static loff_t page_map_seek( struct file *file, loff_t off, int whence)
 {
 	loff_t new;
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 
 	switch(whence) {
 	case 0:
@@ -55,13 +55,13 @@ static loff_t page_map_seek( struct file *file, loff_t off, int whence)
 static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
 			      loff_t *ppos)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
 }
 
 static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 
 	if ((vma->vm_end - vma->vm_start) > dp->size)
 		return -EINVAL;
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 8329190312c1..c642f0132988 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -191,7 +191,7 @@ static void free_flash_list(struct flash_block_list *f)
 
 static int rtas_flash_release(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct rtas_update_flash_t *uf;
 	
 	uf = (struct rtas_update_flash_t *) dp->data;
@@ -253,7 +253,7 @@ static void get_flash_status_msg(int status, char *buf)
 static ssize_t rtas_flash_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *ppos)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct rtas_update_flash_t *uf;
 	char msg[RTAS_MSG_MAXLEN];
 
@@ -282,7 +282,7 @@ void rtas_block_ctor(void *ptr)
 static ssize_t rtas_flash_write(struct file *file, const char __user *buffer,
 				size_t count, loff_t *off)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct rtas_update_flash_t *uf;
 	char *p;
 	int next_free;
@@ -374,7 +374,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf)
 static ssize_t manage_flash_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *ppos)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct rtas_manage_flash_t *args_buf;
 	char msg[RTAS_MSG_MAXLEN];
 	int msglen;
@@ -391,7 +391,7 @@ static ssize_t manage_flash_read(struct file *file, char __user *buf,
 static ssize_t manage_flash_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *off)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct rtas_manage_flash_t *args_buf;
 	const char reject_str[] = "0";
 	const char commit_str[] = "1";
@@ -462,7 +462,7 @@ static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf,
 static ssize_t validate_flash_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *ppos)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct rtas_validate_flash_t *args_buf;
 	char msg[RTAS_MSG_MAXLEN];
 	int msglen;
@@ -477,7 +477,7 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf,
 static ssize_t validate_flash_write(struct file *file, const char __user *buf,
 				    size_t count, loff_t *off)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct rtas_validate_flash_t *args_buf;
 	int rc;
 
@@ -526,7 +526,7 @@ done:
 
 static int validate_flash_release(struct inode *inode, struct file *file)
 {
-	struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct rtas_validate_flash_t *args_buf;
 
 	args_buf = (struct rtas_validate_flash_t *) dp->data;
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 657e3f233a64..c9500ea7be2f 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -111,7 +111,7 @@ static int match_context(const void *v, struct file *file, unsigned fd)
 	struct spu_context *ctx;
 	if (file->f_op != &spufs_context_fops)
 		return 0;
-	ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx;
+	ctx = SPUFS_I(file_inode(file))->i_ctx;
 	if (ctx->flags & SPU_CREATE_NOSCHED)
 		return 0;
 	return fd + 1;
@@ -137,7 +137,7 @@ static struct spu_context *coredump_next_context(int *fd)
 		return NULL;
 	*fd = n - 1;
 	file = fcheck(*fd);
-	return SPUFS_I(file->f_dentry->d_inode)->i_ctx;
+	return SPUFS_I(file_inode(file))->i_ctx;
 }
 
 int spufs_coredump_extra_notes_size(void)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 0cfece4cf6ef..68c57d38745a 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1852,7 +1852,7 @@ out:
 
 static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
 	if (!err) {
 		mutex_lock(&inode->i_mutex);
@@ -2501,7 +2501,7 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
 static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
 			     size_t len, loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
 	int error = 0, cnt = 0;
 
@@ -2571,7 +2571,7 @@ static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
 
 static unsigned int spufs_switch_log_poll(struct file *file, poll_table *wait)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
 	unsigned int mask = 0;
 	int rc;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index dba1ce235da5..99db6161e5c9 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -368,7 +368,7 @@ spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
 			return ERR_PTR(-EINVAL);
 
 		neighbor = get_spu_context(
-				SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
+				SPUFS_I(file_inode(filp))->i_ctx);
 
 		if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
 		    !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index baee994fe810..b045fdda4845 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp,
 	if (filp->f_op != &spufs_context_fops)
 		goto out;
 
-	i = SPUFS_I(filp->f_path.dentry->d_inode);
+	i = SPUFS_I(file_inode(filp));
 	ret = spufs_run_spu(i->i_ctx, &npc, &status);
 
 	if (put_user(npc, unpc))
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index c9311cfdfcac..cf4e7736e4f1 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -86,7 +86,7 @@ static int hcall_inst_seq_open(struct inode *inode, struct file *file)
 
 	rc = seq_open(file, &hcall_inst_seq_ops);
 	seq = file->private_data;
-	seq->private = file->f_path.dentry->d_inode->i_private;
+	seq->private = file_inode(file)->i_private;
 
 	return rc;
 }
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index 554457294a2b..47f3cda2a68b 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -46,16 +46,12 @@ static struct proc_dir_entry *proc_ppc64_scan_log_dump;	/* The proc file */
 static ssize_t scanlog_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-        struct inode * inode = file->f_path.dentry->d_inode;
-	struct proc_dir_entry *dp;
-	unsigned int *data;
+	struct proc_dir_entry *dp = PDE(file_inode(file));
+	unsigned int *data = (unsigned int *)dp->data;
 	int status;
 	unsigned long len, off;
 	unsigned int wait_time;
 
-        dp = PDE(inode);
- 	data = (unsigned int *)dp->data;
-
 	if (count > RTAS_DATA_BUF_SIZE)
 		count = RTAS_DATA_BUF_SIZE;
 
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index 13e76dabbe8b..9fd4a40c6752 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -54,7 +54,7 @@ static ssize_t dbfs_read(struct file *file, char __user *buf,
 	if (*ppos != 0)
 		return 0;
 
-	df = file->f_path.dentry->d_inode->i_private;
+	df = file_inode(file)->i_private;
 	mutex_lock(&df->lock);
 	if (!df->data) {
 		data = hypfs_dbfs_data_alloc(df);
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 06ea69bd387a..280ded8b79ba 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -119,7 +119,7 @@ static void hypfs_evict_inode(struct inode *inode)
 
 static int hypfs_open(struct inode *inode, struct file *filp)
 {
-	char *data = filp->f_path.dentry->d_inode->i_private;
+	char *data = file_inode(filp)->i_private;
 	struct hypfs_sb_info *fs_info;
 
 	if (filp->f_mode & FMODE_WRITE) {
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4e8215e0d4b6..19dcf136b851 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -611,7 +611,7 @@ debug_open(struct inode *inode, struct file *file)
 	debug_info_t *debug_info, *debug_info_snapshot;
 
 	mutex_lock(&debug_mutex);
-	debug_info = file->f_path.dentry->d_inode->i_private;
+	debug_info = file_inode(file)->i_private;
 	/* find debug view */
 	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
 		if (!debug_info->views[i])
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index a303c95346cb..a5d07bc2a547 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -99,7 +99,7 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
 static int pci_perf_seq_open(struct inode *inode, struct file *filp)
 {
 	return single_open(filp, pci_perf_show,
-			   filp->f_path.dentry->d_inode->i_private);
+			   file_inode(filp)->i_private);
 }
 
 static const struct file_operations debugfs_pci_perf_fops = {
@@ -121,7 +121,7 @@ static int pci_debug_show(struct seq_file *m, void *v)
 static int pci_debug_seq_open(struct inode *inode, struct file *filp)
 {
 	return single_open(filp, pci_debug_show,
-			   filp->f_path.dentry->d_inode->i_private);
+			   file_inode(filp)->i_private);
 }
 
 static const struct file_operations debugfs_pci_debug_fops = {
diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c
index 620fa7ff9eec..aea14855e656 100644
--- a/arch/sh/mm/alignment.c
+++ b/arch/sh/mm/alignment.c
@@ -140,7 +140,7 @@ static int alignment_proc_open(struct inode *inode, struct file *file)
 static ssize_t alignment_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
-	int *data = PDE(file->f_path.dentry->d_inode)->data;
+	int *data = PDE(file_inode(file))->data;
 	char mode;
 
 	if (count > 0) {
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a703af19c281..03abf9b70011 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -271,7 +271,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
 	     N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
 	    N_TRSIZE(ex) || N_DRSIZE(ex) ||
-	    i_size_read(bprm->file->f_path.dentry->d_inode) <
+	    i_size_read(file_inode(bprm->file)) <
 	    ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		return -ENOEXEC;
 	}
@@ -425,12 +425,10 @@ beyond_if:
 
 static int load_aout_library(struct file *file)
 {
-	struct inode *inode;
 	unsigned long bss, start_addr, len, error;
 	int retval;
 	struct exec ex;
 
-	inode = file->f_path.dentry->d_inode;
 
 	retval = -ENOEXEC;
 	error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
@@ -440,7 +438,7 @@ static int load_aout_library(struct file *file)
 	/* We come in here for the regular a.out style of shared libraries */
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
 	    N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
-	    i_size_read(inode) <
+	    i_size_read(file_inode(file)) <
 	    ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		goto out;
 	}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 60c78917190c..1e4dbcfe6d31 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -85,7 +85,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 {
 	char __user *tmp = buf;
 	struct cpuid_regs cmd;
-	int cpu = iminor(file->f_path.dentry->d_inode);
+	int cpu = iminor(file_inode(file));
 	u64 pos = *ppos;
 	ssize_t bytes = 0;
 	int err = 0;
@@ -116,7 +116,7 @@ static int cpuid_open(struct inode *inode, struct file *file)
 	unsigned int cpu;
 	struct cpuinfo_x86 *c;
 
-	cpu = iminor(file->f_path.dentry->d_inode);
+	cpu = iminor(file_inode(file));
 	if (cpu >= nr_cpu_ids || !cpu_online(cpu))
 		return -ENXIO;	/* No such CPU */
 
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 9a13e889837e..8f12dc78a848 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6547,7 +6547,7 @@ static ssize_t dac960_user_command_proc_write(struct file *file,
 				       const char __user *Buffer,
 				       size_t Count, loff_t *pos)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file->f_path.dentry->d_inode)->data;
+  DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file_inode(file))->data;
   unsigned char CommandBuffer[80];
   int Length;
   if (Count > sizeof(CommandBuffer)-1) return -EINVAL;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 043ddcca4abf..ade146bf65e5 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -625,7 +625,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 			return -EBUSY;
 		file = fget(arg);
 		if (file) {
-			struct inode *inode = file->f_path.dentry->d_inode;
+			struct inode *inode = file_inode(file);
 			if (S_ISSOCK(inode->i_mode)) {
 				nbd->file = file;
 				nbd->sock = SOCKET_I(inode);
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 052797b32bd3..01a5ca7425d7 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -181,7 +181,7 @@ static int dsp56k_upload(u_char __user *bin, int len)
 static ssize_t dsp56k_read(struct file *file, char __user *buf, size_t count,
 			   loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int dev = iminor(inode) & 0x0f;
 
 	switch(dev)
@@ -244,7 +244,7 @@ static ssize_t dsp56k_read(struct file *file, char __user *buf, size_t count,
 static ssize_t dsp56k_write(struct file *file, const char __user *buf, size_t count,
 			    loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int dev = iminor(inode) & 0x0f;
 
 	switch(dev)
@@ -306,7 +306,7 @@ static ssize_t dsp56k_write(struct file *file, const char __user *buf, size_t co
 static long dsp56k_ioctl(struct file *file, unsigned int cmd,
 			 unsigned long arg)
 {
-	int dev = iminor(file->f_path.dentry->d_inode) & 0x0f;
+	int dev = iminor(file_inode(file)) & 0x0f;
 	void __user *argp = (void __user *)arg;
 
 	switch(dev)
@@ -408,7 +408,7 @@ static long dsp56k_ioctl(struct file *file, unsigned int cmd,
 #if 0
 static unsigned int dsp56k_poll(struct file *file, poll_table *wait)
 {
-	int dev = iminor(file->f_path.dentry->d_inode) & 0x0f;
+	int dev = iminor(file_inode(file)) & 0x0f;
 
 	switch(dev)
 	{
diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index 85156dd0caee..65a8d96c0e93 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -125,7 +125,7 @@ static char dtlk_write_tts(char);
 static ssize_t dtlk_read(struct file *file, char __user *buf,
 			 size_t count, loff_t * ppos)
 {
-	unsigned int minor = iminor(file->f_path.dentry->d_inode);
+	unsigned int minor = iminor(file_inode(file));
 	char ch;
 	int i = 0, retries;
 
@@ -177,7 +177,7 @@ static ssize_t dtlk_write(struct file *file, const char __user *buf,
 	}
 #endif
 
-	if (iminor(file->f_path.dentry->d_inode) != DTLK_MINOR)
+	if (iminor(file_inode(file)) != DTLK_MINOR)
 		return -EINVAL;
 
 	while (1) {
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index a741e418b456..dafd9ac6428f 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -294,7 +294,7 @@ static int lp_wait_ready(int minor, int nonblock)
 static ssize_t lp_write(struct file * file, const char __user * buf,
 		        size_t count, loff_t *ppos)
 {
-	unsigned int minor = iminor(file->f_path.dentry->d_inode);
+	unsigned int minor = iminor(file_inode(file));
 	struct parport *port = lp_table[minor].dev->port;
 	char *kbuf = lp_table[minor].lp_buffer;
 	ssize_t retv = 0;
@@ -413,7 +413,7 @@ static ssize_t lp_read(struct file * file, char __user * buf,
 		       size_t count, loff_t *ppos)
 {
 	DEFINE_WAIT(wait);
-	unsigned int minor=iminor(file->f_path.dentry->d_inode);
+	unsigned int minor=iminor(file_inode(file));
 	struct parport *port = lp_table[minor].dev->port;
 	ssize_t retval = 0;
 	char *kbuf = lp_table[minor].lp_buffer;
@@ -679,7 +679,7 @@ static long lp_ioctl(struct file *file, unsigned int cmd,
 	struct timeval par_timeout;
 	int ret;
 
-	minor = iminor(file->f_path.dentry->d_inode);
+	minor = iminor(file_inode(file));
 	mutex_lock(&lp_mutex);
 	switch (cmd) {
 	case LPSETTIMEOUT:
@@ -707,7 +707,7 @@ static long lp_compat_ioctl(struct file *file, unsigned int cmd,
 	struct timeval par_timeout;
 	int ret;
 
-	minor = iminor(file->f_path.dentry->d_inode);
+	minor = iminor(file_inode(file));
 	mutex_lock(&lp_mutex);
 	switch (cmd) {
 	case LPSETTIMEOUT:
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index c6fa3bc2baa8..e23b4a247b72 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -708,7 +708,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 {
 	loff_t ret;
 
-	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
+	mutex_lock(&file_inode(file)->i_mutex);
 	switch (orig) {
 	case SEEK_CUR:
 		offset += file->f_pos;
@@ -725,7 +725,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 	default:
 		ret = -EINVAL;
 	}
-	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+	mutex_unlock(&file_inode(file)->i_mutex);
 	return ret;
 }
 
diff --git a/drivers/char/nsc_gpio.c b/drivers/char/nsc_gpio.c
index 808d44e9a32a..b07b119ae57f 100644
--- a/drivers/char/nsc_gpio.c
+++ b/drivers/char/nsc_gpio.c
@@ -41,7 +41,7 @@ void nsc_gpio_dump(struct nsc_gpio_ops *amp, unsigned index)
 ssize_t nsc_gpio_write(struct file *file, const char __user *data,
 		       size_t len, loff_t *ppos)
 {
-	unsigned m = iminor(file->f_path.dentry->d_inode);
+	unsigned m = iminor(file_inode(file));
 	struct nsc_gpio_ops *amp = file->private_data;
 	struct device *dev = amp->dev;
 	size_t i;
@@ -104,7 +104,7 @@ ssize_t nsc_gpio_write(struct file *file, const char __user *data,
 ssize_t nsc_gpio_read(struct file *file, char __user * buf,
 		      size_t len, loff_t * ppos)
 {
-	unsigned m = iminor(file->f_path.dentry->d_inode);
+	unsigned m = iminor(file_inode(file));
 	int value;
 	struct nsc_gpio_ops *amp = file->private_data;
 
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index a7584860e9a7..c115217c79ae 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1400,7 +1400,7 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct cm4000_dev *dev = filp->private_data;
 	unsigned int iobase = dev->p_dev->resource[0]->start;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct pcmcia_device *link;
 	int size;
 	int rc;
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 1cd49241e60e..ae0b42b66e55 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -107,7 +107,7 @@ static inline void pp_enable_irq (struct pp_struct *pp)
 static ssize_t pp_read (struct file * file, char __user * buf, size_t count,
 			loff_t * ppos)
 {
-	unsigned int minor = iminor(file->f_path.dentry->d_inode);
+	unsigned int minor = iminor(file_inode(file));
 	struct pp_struct *pp = file->private_data;
 	char * kbuffer;
 	ssize_t bytes_read = 0;
@@ -189,7 +189,7 @@ static ssize_t pp_read (struct file * file, char __user * buf, size_t count,
 static ssize_t pp_write (struct file * file, const char __user * buf,
 			 size_t count, loff_t * ppos)
 {
-	unsigned int minor = iminor(file->f_path.dentry->d_inode);
+	unsigned int minor = iminor(file_inode(file));
 	struct pp_struct *pp = file->private_data;
 	char * kbuffer;
 	ssize_t bytes_written = 0;
@@ -324,7 +324,7 @@ static enum ieee1284_phase init_phase (int mode)
 
 static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	unsigned int minor = iminor(file->f_path.dentry->d_inode);
+	unsigned int minor = iminor(file_inode(file));
 	struct pp_struct *pp = file->private_data;
 	struct parport * port;
 	void __user *argp = (void __user *)arg;
diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c
index 588063ac9517..8cafa9ccd43f 100644
--- a/drivers/char/ps3flash.c
+++ b/drivers/char/ps3flash.c
@@ -312,7 +312,7 @@ static int ps3flash_flush(struct file *file, fl_owner_t id)
 
 static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int err;
 	mutex_lock(&inode->i_mutex);
 	err = ps3flash_writeback(ps3flash_dev);
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 54a3a6d09819..f3223aac4df1 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -80,7 +80,7 @@ static int raw_open(struct inode *inode, struct file *filp)
 	filp->f_flags |= O_DIRECT;
 	filp->f_mapping = bdev->bd_inode->i_mapping;
 	if (++raw_devices[minor].inuse == 1)
-		filp->f_path.dentry->d_inode->i_mapping =
+		file_inode(filp)->i_mapping =
 			bdev->bd_inode->i_mapping;
 	filp->private_data = bdev;
 	mutex_unlock(&raw_mutex);
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index d780295a1473..8450e178b819 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -938,7 +938,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	}
 
 	if (ret > 0) {
-		struct inode *inode = file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(file);
 		inode->i_atime = current_fs_time(inode->i_sb);
 	}
 
diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c
index 34c63f85104d..47b9fdfcf083 100644
--- a/drivers/char/tb0219.c
+++ b/drivers/char/tb0219.c
@@ -164,7 +164,7 @@ static ssize_t tanbac_tb0219_read(struct file *file, char __user *buf, size_t le
 	unsigned int minor;
 	char value;
 
-	minor = iminor(file->f_path.dentry->d_inode);
+	minor = iminor(file_inode(file));
 	switch (minor) {
 	case 0:
 		value = get_led();
@@ -200,7 +200,7 @@ static ssize_t tanbac_tb0219_write(struct file *file, const char __user *data,
 	int retval = 0;
 	char c;
 
-	minor = iminor(file->f_path.dentry->d_inode);
+	minor = iminor(file_inode(file));
 	switch (minor) {
 	case 0:
 		type = TYPE_LED;
diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
index 04a371aceb34..054e26e769ec 100644
--- a/drivers/gpu/drm/gma500/gtt.c
+++ b/drivers/gpu/drm/gma500/gtt.c
@@ -202,7 +202,7 @@ static int psb_gtt_attach_pages(struct gtt_range *gt)
 	WARN_ON(gt->pages);
 
 	/* This is the shared memory object that backs the GEM resource */
-	inode = gt->gem.filp->f_path.dentry->d_inode;
+	inode = file_inode(gt->gem.filp);
 	mapping = inode->i_mapping;
 
 	gt->pages = kmalloc(pages * sizeof(struct page *), GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8febea6daa08..d7d772b30f1a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1635,7 +1635,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	 * To do this we must instruct the shmfs to drop all of its
 	 * backing pages, *now*.
 	 */
-	inode = obj->base.filp->f_path.dentry->d_inode;
+	inode = file_inode(obj->base.filp);
 	shmem_truncate_range(inode, 0, (loff_t)-1);
 
 	obj->madv = __I915_MADV_PURGED;
@@ -1800,7 +1800,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	 *
 	 * Fail silently without starting the shrinker
 	 */
-	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
+	mapping = file_inode(obj->base.filp)->i_mapping;
 	gfp = mapping_gfp_mask(mapping);
 	gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
 	gfp &= ~(__GFP_IO | __GFP_WAIT);
@@ -3724,7 +3724,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 		mask |= __GFP_DMA32;
 	}
 
-	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
+	mapping = file_inode(obj->base.filp)->i_mapping;
 	mapping_set_gfp_mask(mapping, mask);
 
 	i915_gem_object_init(obj, &i915_gem_object_ops);
@@ -4228,7 +4228,7 @@ void i915_gem_free_all_phys_object(struct drm_device *dev)
 void i915_gem_detach_phys_object(struct drm_device *dev,
 				 struct drm_i915_gem_object *obj)
 {
-	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
+	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
 	char *vaddr;
 	int i;
 	int page_count;
@@ -4264,7 +4264,7 @@ i915_gem_attach_phys_object(struct drm_device *dev,
 			    int id,
 			    int align)
 {
-	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
+	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret = 0;
 	int page_count;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 7d759a430294..5e93a52d4f2c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -296,7 +296,7 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
 	swap_storage = ttm->swap_storage;
 	BUG_ON(swap_storage == NULL);
 
-	swap_space = swap_storage->f_path.dentry->d_inode->i_mapping;
+	swap_space = file_inode(swap_storage)->i_mapping;
 
 	for (i = 0; i < ttm->num_pages; ++i) {
 		from_page = shmem_read_mapping_page(swap_space, i);
@@ -345,7 +345,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage)
 	} else
 		swap_storage = persistent_swap_storage;
 
-	swap_space = swap_storage->f_path.dentry->d_inode->i_mapping;
+	swap_space = file_inode(swap_storage)->i_mapping;
 
 	for (i = 0; i < ttm->num_pages; ++i) {
 		from_page = ttm->pages[i];
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index afd212c99216..3816270ba49b 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -137,7 +137,7 @@ static int udl_gem_get_pages(struct udl_gem_object *obj, gfp_t gfpmask)
 	if (obj->pages == NULL)
 		return -ENOMEM;
 
-	inode = obj->base.filp->f_path.dentry->d_inode;
+	inode = file_inode(obj->base.filp);
 	mapping = inode->i_mapping;
 	gfpmask |= mapping_gfp_mask(mapping);
 
diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c
index b685b04dbf9d..d7437ef5c695 100644
--- a/drivers/hid/hid-roccat.c
+++ b/drivers/hid/hid-roccat.c
@@ -378,7 +378,7 @@ EXPORT_SYMBOL_GPL(roccat_disconnect);
 
 static long roccat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct roccat_device *device;
 	unsigned int minor = iminor(inode);
 	long retval = 0;
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 413a73187d33..3ad07a53a229 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -108,7 +108,7 @@ out:
  * This function is to be called with the minors_lock mutex held */
 static ssize_t hidraw_send_report(struct file *file, const char __user *buffer, size_t count, unsigned char report_type)
 {
-	unsigned int minor = iminor(file->f_path.dentry->d_inode);
+	unsigned int minor = iminor(file_inode(file));
 	struct hid_device *dev;
 	__u8 *buf;
 	int ret = 0;
@@ -176,7 +176,7 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t
  *  mutex held. */
 static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t count, unsigned char report_type)
 {
-	unsigned int minor = iminor(file->f_path.dentry->d_inode);
+	unsigned int minor = iminor(file_inode(file));
 	struct hid_device *dev;
 	__u8 *buf;
 	int ret = 0, len;
@@ -340,7 +340,7 @@ unlock:
 static long hidraw_ioctl(struct file *file, unsigned int cmd,
 							unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	unsigned int minor = iminor(inode);
 	long ret = 0;
 	struct hidraw *dev;
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 5ec2261574ec..c3ccdea3d180 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -148,7 +148,7 @@ static ssize_t i2cdev_read(struct file *file, char __user *buf, size_t count,
 		return -ENOMEM;
 
 	pr_debug("i2c-dev: i2c-%d reading %zu bytes.\n",
-		iminor(file->f_path.dentry->d_inode), count);
+		iminor(file_inode(file)), count);
 
 	ret = i2c_master_recv(client, tmp, count);
 	if (ret >= 0)
@@ -172,7 +172,7 @@ static ssize_t i2cdev_write(struct file *file, const char __user *buf,
 		return PTR_ERR(tmp);
 
 	pr_debug("i2c-dev: i2c-%d writing %zu bytes.\n",
-		iminor(file->f_path.dentry->d_inode), count);
+		iminor(file_inode(file)), count);
 
 	ret = i2c_master_send(client, tmp, count);
 	kfree(tmp);
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index a3133d7b2a0c..2abcc4790f12 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -333,7 +333,7 @@ static int ide_settings_proc_open(struct inode *inode, struct file *file)
 static ssize_t ide_settings_proc_write(struct file *file, const char __user *buffer,
 				       size_t count, loff_t *pos)
 {
-	ide_drive_t	*drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data;
+	ide_drive_t	*drive = (ide_drive_t *) PDE(file_inode(file))->data;
 	char		name[MAX_LEN + 1];
 	int		for_real = 0, mul_factor, div_factor;
 	unsigned long	n;
@@ -558,7 +558,7 @@ static int ide_replace_subdriver(ide_drive_t *drive, const char *driver)
 static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer,
 				     size_t count, loff_t *pos)
 {
-	ide_drive_t	*drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data;
+	ide_drive_t	*drive = (ide_drive_t *) PDE(file_inode(file))->data;
 	char name[32];
 
 	if (!capable(CAP_SYS_ADMIN))
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 0cb0007724a2..792e7e9b376f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -730,7 +730,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 			goto err_tree_mutex_unlock;
 		}
 
-		inode = f.file->f_path.dentry->d_inode;
+		inode = file_inode(f.file);
 		xrcd = find_xrcd(file->device, inode);
 		if (!xrcd && !(cmd.oflags & O_CREAT)) {
 			/* no file descriptor. Need CREATE flag */
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 3eb7e454849b..aed8afee56da 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1864,9 +1864,9 @@ static int ipath_assign_port(struct file *fp,
 		goto done_chk_sdma;
 	}
 
-	i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE;
+	i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
 	ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
-		   (long)fp->f_path.dentry->d_inode->i_rdev, i_minor);
+		   (long)file_inode(fp)->i_rdev, i_minor);
 
 	if (i_minor)
 		ret = find_free_port(i_minor - 1, fp, uinfo);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index a4de9d58e9b4..a479375a8fd8 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -113,7 +113,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf,
 	struct infinipath_counters counters;
 	struct ipath_devdata *dd;
 
-	dd = file->f_path.dentry->d_inode->i_private;
+	dd = file_inode(file)->i_private;
 	dd->ipath_f_read_counters(dd, &counters);
 
 	return simple_read_from_buffer(buf, count, ppos, &counters,
@@ -154,7 +154,7 @@ static ssize_t flash_read(struct file *file, char __user *buf,
 		goto bail;
 	}
 
-	dd = file->f_path.dentry->d_inode->i_private;
+	dd = file_inode(file)->i_private;
 	if (ipath_eeprom_read(dd, pos, tmp, count)) {
 		ipath_dev_err(dd, "failed to read from flash\n");
 		ret = -ENXIO;
@@ -207,7 +207,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
 		goto bail_tmp;
 	}
 
-	dd = file->f_path.dentry->d_inode->i_private;
+	dd = file_inode(file)->i_private;
 	if (ipath_eeprom_write(dd, pos, tmp, count)) {
 		ret = -ENXIO;
 		ipath_dev_err(dd, "failed to write to flash\n");
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 959a5c4ff812..4f7aa301b3b1 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1524,7 +1524,7 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
 		}
 	}
 
-	i_minor = iminor(fp->f_dentry->d_inode) - QIB_USER_MINOR_BASE;
+	i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
 	if (i_minor)
 		ret = find_free_ctxt(i_minor - 1, fp, uinfo);
 	else
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 65a2a23f6f8a..644bd6f6467c 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -45,7 +45,7 @@
 
 static struct super_block *qib_super;
 
-#define private2dd(file) ((file)->f_dentry->d_inode->i_private)
+#define private2dd(file) (file_inode(file)->i_private)
 
 static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
 		       umode_t mode, const struct file_operations *fops,
@@ -171,7 +171,7 @@ static const struct file_operations cntr_ops[] = {
 };
 
 /*
- * Could use file->f_dentry->d_inode->i_ino to figure out which file,
+ * Could use file_inode(file)->i_ino to figure out which file,
  * instead of separate routine for each, but for now, this works...
  */
 
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index fc178893789a..7db150ca163e 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -964,7 +964,6 @@ static ssize_t smmu_debugfs_stats_write(struct file *file,
 {
 	struct smmu_debugfs_info *info;
 	struct smmu_device *smmu;
-	struct dentry *dent;
 	int i;
 	enum {
 		_OFF = 0,
@@ -992,8 +991,7 @@ static ssize_t smmu_debugfs_stats_write(struct file *file,
 	if (i == ARRAY_SIZE(command))
 		return -EINVAL;
 
-	dent = file->f_dentry;
-	info = dent->d_inode->i_private;
+	info = file_inode(file)->i_private;
 	smmu = info->smmu;
 
 	offs = SMMU_CACHE_CONFIG(info->cache);
diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c
index af4fd3d036c1..3a4165c61196 100644
--- a/drivers/isdn/hardware/eicon/divasproc.c
+++ b/drivers/isdn/hardware/eicon/divasproc.c
@@ -145,7 +145,7 @@ void remove_divas_proc(void)
 static ssize_t grp_opt_proc_write(struct file *file, const char __user *buffer,
 				  size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
+	diva_os_xdi_adapter_t *a = PDE(file_inode(file))->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
 	if ((count == 1) || (count == 2)) {
@@ -172,7 +172,7 @@ static ssize_t grp_opt_proc_write(struct file *file, const char __user *buffer,
 static ssize_t d_l1_down_proc_write(struct file *file, const char __user *buffer,
 				    size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
+	diva_os_xdi_adapter_t *a = PDE(file_inode(file))->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
 	if ((count == 1) || (count == 2)) {
@@ -251,7 +251,7 @@ static const struct file_operations grp_opt_proc_fops = {
 static ssize_t info_proc_write(struct file *file, const char __user *buffer,
 			       size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
+	diva_os_xdi_adapter_t *a = PDE(file_inode(file))->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 	char c[4];
 
diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c
index 88e4f0ee073c..9a3ce93665c5 100644
--- a/drivers/isdn/hysdn/hysdn_proclog.c
+++ b/drivers/isdn/hysdn/hysdn_proclog.c
@@ -173,7 +173,7 @@ hysdn_log_read(struct file *file, char __user *buf, size_t count, loff_t *off)
 {
 	struct log_data *inf;
 	int len;
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	struct procdata *pd = NULL;
 	hysdn_card *card;
 
@@ -319,7 +319,7 @@ static unsigned int
 hysdn_log_poll(struct file *file, poll_table *wait)
 {
 	unsigned int mask = 0;
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	hysdn_card *card;
 	struct procdata *pd = NULL;
 
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index e2a945ee9f05..32049ed2f24c 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -1058,7 +1058,7 @@ isdn_info_update(void)
 static ssize_t
 isdn_read(struct file *file, char __user *buf, size_t count, loff_t *off)
 {
-	uint minor = iminor(file->f_path.dentry->d_inode);
+	uint minor = iminor(file_inode(file));
 	int len = 0;
 	int drvidx;
 	int chidx;
@@ -1165,7 +1165,7 @@ out:
 static ssize_t
 isdn_write(struct file *file, const char __user *buf, size_t count, loff_t *off)
 {
-	uint minor = iminor(file->f_path.dentry->d_inode);
+	uint minor = iminor(file_inode(file));
 	int drvidx;
 	int chidx;
 	int retval;
@@ -1228,7 +1228,7 @@ static unsigned int
 isdn_poll(struct file *file, poll_table *wait)
 {
 	unsigned int mask = 0;
-	unsigned int minor = iminor(file->f_path.dentry->d_inode);
+	unsigned int minor = iminor(file_inode(file));
 	int drvidx = isdn_minor2drv(minor - ISDN_MINOR_CTRL);
 
 	mutex_lock(&isdn_mutex);
@@ -1269,7 +1269,7 @@ out:
 static int
 isdn_ioctl(struct file *file, uint cmd, ulong arg)
 {
-	uint minor = iminor(file->f_path.dentry->d_inode);
+	uint minor = iminor(file_inode(file));
 	isdn_ctrl c;
 	int drvidx;
 	int ret;
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 61d78fa03b1a..38ceac5053a0 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -668,7 +668,7 @@ isdn_ppp_poll(struct file *file, poll_table *wait)
 
 	if (is->debug & 0x2)
 		printk(KERN_DEBUG "isdn_ppp_poll: minor: %d\n",
-		       iminor(file->f_path.dentry->d_inode));
+		       iminor(file_inode(file)));
 
 	/* just registers wait_queue hook. This doesn't really wait. */
 	poll_wait(file, &is->wq, wait);
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 7155945f8eb8..4fd9d6aeff6a 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -337,7 +337,7 @@ static int read_page(struct file *file, unsigned long index,
 		     struct page *page)
 {
 	int ret = 0;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct buffer_head *bh;
 	sector_t block;
 
@@ -755,7 +755,7 @@ static void bitmap_file_unmap(struct bitmap_storage *store)
 		free_buffers(sb_page);
 
 	if (file) {
-		struct inode *inode = file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(file);
 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
 		fput(file);
 	}
diff --git a/drivers/media/pci/zoran/zoran_procfs.c b/drivers/media/pci/zoran/zoran_procfs.c
index f1423b777db1..e084b0a21b1b 100644
--- a/drivers/media/pci/zoran/zoran_procfs.c
+++ b/drivers/media/pci/zoran/zoran_procfs.c
@@ -137,7 +137,7 @@ static int zoran_open(struct inode *inode, struct file *file)
 static ssize_t zoran_write(struct file *file, const char __user *buffer,
 			size_t count, loff_t *ppos)
 {
-	struct zoran *zr = PDE(file->f_path.dentry->d_inode)->data;
+	struct zoran *zr = PDE(file_inode(file))->data;
 	char *string, *sp;
 	char *line, *ldelim, *varname, *svar, *tdelim;
 
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index ca12d3289bfe..35002367485c 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -531,7 +531,7 @@ EXPORT_SYMBOL(lirc_dev_fop_close);
 
 unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait)
 {
-	struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)];
+	struct irctl *ir = irctls[iminor(file_inode(file))];
 	unsigned int ret;
 
 	if (!ir) {
@@ -565,7 +565,7 @@ long lirc_dev_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	__u32 mode;
 	int result = 0;
-	struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)];
+	struct irctl *ir = irctls[iminor(file_inode(file))];
 
 	if (!ir) {
 		printk(KERN_ERR "lirc_dev: %s: no irctl found!\n", __func__);
@@ -650,7 +650,7 @@ ssize_t lirc_dev_fop_read(struct file *file,
 			  size_t length,
 			  loff_t *ppos)
 {
-	struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)];
+	struct irctl *ir = irctls[iminor(file_inode(file))];
 	unsigned char *buf;
 	int ret = 0, written = 0;
 	DECLARE_WAITQUEUE(wait, current);
@@ -754,10 +754,10 @@ void *lirc_get_pdata(struct file *file)
 {
 	void *data = NULL;
 
-	if (file && file->f_dentry && file->f_dentry->d_inode &&
-	    file->f_dentry->d_inode->i_rdev) {
+	if (file && file->f_dentry && file_inode(file) &&
+	    file_inode(file)->i_rdev) {
 		struct irctl *ir;
-		ir = irctls[iminor(file->f_dentry->d_inode)];
+		ir = irctls[iminor(file_inode(file))];
 		data = ir->d.data;
 	}
 
@@ -769,7 +769,7 @@ EXPORT_SYMBOL(lirc_get_pdata);
 ssize_t lirc_dev_fop_write(struct file *file, const char __user *buffer,
 			   size_t length, loff_t *ppos)
 {
-	struct irctl *ir = irctls[iminor(file->f_dentry->d_inode)];
+	struct irctl *ir = irctls[iminor(file_inode(file))];
 
 	if (!ir) {
 		printk(KERN_ERR "%s: called with invalid irctl\n", __func__);
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 98dcad9c8a3b..870de1d5667a 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -222,7 +222,7 @@ static struct class video_class = {
 
 struct video_device *video_devdata(struct file *file)
 {
-	return video_device[iminor(file->f_path.dentry->d_inode)];
+	return video_device[iminor(file_inode(file))];
 }
 EXPORT_SYMBOL(video_devdata);
 
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index dfcc65b33e99..4f02848bb2bc 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -194,7 +194,7 @@ static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end,
 {
 	struct ubi_volume_desc *desc = file->private_data;
 	struct ubi_device *ubi = desc->vol->ubi;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int err;
 	mutex_lock(&inode->i_mutex);
 	err = ubi_sync(ubi->ubi_num);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index f0718e1a8369..8fb1ccfe74dc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2336,7 +2336,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
 			loff_t *ppos)
 {
 	loff_t pos = *ppos;
-	loff_t avail = file->f_path.dentry->d_inode->i_size;
+	loff_t avail = file_inode(file)->i_size;
 	unsigned int mem = (uintptr_t)file->private_data & 3;
 	struct adapter *adap = file->private_data - mem;
 
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 6aed238e573e..1505509728aa 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -939,14 +939,14 @@ static int cosa_open(struct inode *inode, struct file *file)
 	int ret = 0;
 
 	mutex_lock(&cosa_chardev_mutex);
-	if ((n=iminor(file->f_path.dentry->d_inode)>>CARD_MINOR_BITS)
+	if ((n=iminor(file_inode(file))>>CARD_MINOR_BITS)
 		>= nr_cards) {
 		ret = -ENODEV;
 		goto out;
 	}
 	cosa = cosa_cards+n;
 
-	if ((n=iminor(file->f_path.dentry->d_inode)
+	if ((n=iminor(file_inode(file))
 		& ((1<<CARD_MINOR_BITS)-1)) >= cosa->nchannels) {
 		ret = -ENODEV;
 		goto out;
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 598ca1cafb95..5311ba1dcd2c 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2769,7 +2769,7 @@ static ssize_t int_proc_write(struct file *file, const char __user *buffer,
 		nr = nr * 10 + c;
 		p++;
 	} while (--len);
-	*(int *)PDE(file->f_path.dentry->d_inode)->data = nr;
+	*(int *)PDE(file_inode(file))->data = nr;
 	return count;
 }
 
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index f2f501e5b6a0..d4d800c54d86 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -179,7 +179,7 @@ static int led_proc_open(struct inode *inode, struct file *file)
 static ssize_t led_proc_write(struct file *file, const char *buf,
 	size_t count, loff_t *pos)
 {
-	void *data = PDE(file->f_path.dentry->d_inode)->data;
+	void *data = PDE(file_inode(file))->data;
 	char *cur, lbuf[32];
 	int d;
 
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 9b8505ccc56d..0b009470e6db 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -21,7 +21,7 @@ static loff_t
 proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	mutex_lock(&inode->i_mutex);
 	switch (whence) {
@@ -46,7 +46,7 @@ proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
 static ssize_t
 proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
-	const struct inode *ino = file->f_path.dentry->d_inode;
+	const struct inode *ino = file_inode(file);
 	const struct proc_dir_entry *dp = PDE(ino);
 	struct pci_dev *dev = dp->data;
 	unsigned int pos = *ppos;
@@ -132,7 +132,7 @@ proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *pp
 static ssize_t
 proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos)
 {
-	struct inode *ino = file->f_path.dentry->d_inode;
+	struct inode *ino = file_inode(file);
 	const struct proc_dir_entry *dp = PDE(ino);
 	struct pci_dev *dev = dp->data;
 	int pos = *ppos;
@@ -212,7 +212,7 @@ struct pci_filp_private {
 static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
 			       unsigned long arg)
 {
-	const struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
+	const struct proc_dir_entry *dp = PDE(file_inode(file));
 	struct pci_dev *dev = dp->data;
 #ifdef HAVE_PCI_MMAP
 	struct pci_filp_private *fpriv = file->private_data;
@@ -253,7 +253,7 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
 #ifdef HAVE_PCI_MMAP
 static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	const struct proc_dir_entry *dp = PDE(inode);
 	struct pci_dev *dev = dp->data;
 	struct pci_filp_private *fpriv = file->private_data;
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index b8ad71f7863f..8853d013a716 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -3566,7 +3566,7 @@ static ssize_t sonypi_misc_read(struct file *file, char __user *buf,
 	}
 
 	if (ret > 0) {
-		struct inode *inode = file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(file);
 		inode->i_atime = current_fs_time(inode->i_sb);
 	}
 
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index f946ca7cb762..05dfd569d912 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -852,7 +852,7 @@ static ssize_t dispatch_proc_write(struct file *file,
 			const char __user *userbuf,
 			size_t count, loff_t *pos)
 {
-	struct ibm_struct *ibm = PDE(file->f_path.dentry->d_inode)->data;
+	struct ibm_struct *ibm = PDE(file_inode(file))->data;
 	char *kernbuf;
 	int ret;
 
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index c2727895794c..6fba80ad3bca 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -583,7 +583,7 @@ static int set_lcd_status(struct backlight_device *bd)
 static ssize_t lcd_proc_write(struct file *file, const char __user *buf,
 			      size_t count, loff_t *pos)
 {
-	struct toshiba_acpi_dev *dev = PDE(file->f_path.dentry->d_inode)->data;
+	struct toshiba_acpi_dev *dev = PDE(file_inode(file))->data;
 	char cmd[42];
 	size_t len;
 	int value;
@@ -650,7 +650,7 @@ static int video_proc_open(struct inode *inode, struct file *file)
 static ssize_t video_proc_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *pos)
 {
-	struct toshiba_acpi_dev *dev = PDE(file->f_path.dentry->d_inode)->data;
+	struct toshiba_acpi_dev *dev = PDE(file_inode(file))->data;
 	char *cmd, *buffer;
 	int ret;
 	int value;
@@ -750,7 +750,7 @@ static int fan_proc_open(struct inode *inode, struct file *file)
 static ssize_t fan_proc_write(struct file *file, const char __user *buf,
 			      size_t count, loff_t *pos)
 {
-	struct toshiba_acpi_dev *dev = PDE(file->f_path.dentry->d_inode)->data;
+	struct toshiba_acpi_dev *dev = PDE(file_inode(file))->data;
 	char cmd[42];
 	size_t len;
 	int value;
@@ -822,7 +822,7 @@ static int keys_proc_open(struct inode *inode, struct file *file)
 static ssize_t keys_proc_write(struct file *file, const char __user *buf,
 			       size_t count, loff_t *pos)
 {
-	struct toshiba_acpi_dev *dev = PDE(file->f_path.dentry->d_inode)->data;
+	struct toshiba_acpi_dev *dev = PDE(file_inode(file))->data;
 	char cmd[42];
 	size_t len;
 	int value;
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 315b3112aca8..65f735ac6b3b 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -30,7 +30,7 @@ static struct proc_dir_entry *isapnp_proc_bus_dir = NULL;
 static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	mutex_lock(&inode->i_mutex);
 	switch (whence) {
@@ -55,7 +55,7 @@ static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence)
 static ssize_t isapnp_proc_bus_read(struct file *file, char __user * buf,
 				    size_t nbytes, loff_t * ppos)
 {
-	struct inode *ino = file->f_path.dentry->d_inode;
+	struct inode *ino = file_inode(file);
 	struct proc_dir_entry *dp = PDE(ino);
 	struct pnp_dev *dev = dp->data;
 	int pos = *ppos;
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index bc89f392a629..63ddb0173456 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -244,7 +244,7 @@ static int pnpbios_proc_open(struct inode *inode, struct file *file)
 static ssize_t pnpbios_proc_write(struct file *file, const char __user *buf,
 				  size_t count, loff_t *pos)
 {
-	void *data = PDE(file->f_path.dentry->d_inode)->data;
+	void *data = PDE(file_inode(file))->data;
 	struct pnp_bios_node *node;
 	int boot = (long)data >> 8;
 	u8 nodenum = (long)data;
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 911704571b9c..5acdc5f7dae8 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -433,9 +433,9 @@ fs3270_open(struct inode *inode, struct file *filp)
 	struct idal_buffer *ib;
 	int minor, rc = 0;
 
-	if (imajor(filp->f_path.dentry->d_inode) != IBM_FS3270_MAJOR)
+	if (imajor(file_inode(filp)) != IBM_FS3270_MAJOR)
 		return -ENODEV;
-	minor = iminor(filp->f_path.dentry->d_inode);
+	minor = iminor(file_inode(filp));
 	/* Check for minor 0 multiplexer. */
 	if (minor == 0) {
 		struct tty_struct *tty = get_current_tty();
diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c
index 2d61db3fc62a..6dc60725de92 100644
--- a/drivers/s390/char/tape_char.c
+++ b/drivers/s390/char/tape_char.c
@@ -273,13 +273,13 @@ tapechar_open (struct inode *inode, struct file *filp)
 	int minor, rc;
 
 	DBF_EVENT(6, "TCHAR:open: %i:%i\n",
-		imajor(filp->f_path.dentry->d_inode),
-		iminor(filp->f_path.dentry->d_inode));
+		imajor(file_inode(filp)),
+		iminor(file_inode(filp)));
 
-	if (imajor(filp->f_path.dentry->d_inode) != tapechar_major)
+	if (imajor(file_inode(filp)) != tapechar_major)
 		return -ENODEV;
 
-	minor = iminor(filp->f_path.dentry->d_inode);
+	minor = iminor(file_inode(filp));
 	device = tape_find_device(minor / TAPE_MINORS_PER_DEV);
 	if (IS_ERR(device)) {
 		DBF_EVENT(3, "TCHAR:open: tape_find_device() failed\n");
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 483f72ba030d..c180e3135b3b 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -703,7 +703,7 @@ static int ur_open(struct inode *inode, struct file *file)
 	 * We treat the minor number as the devno of the ur device
 	 * to find in the driver tree.
 	 */
-	devno = MINOR(file->f_dentry->d_inode->i_rdev);
+	devno = MINOR(file_inode(file)->i_rdev);
 
 	urd = urdev_get_from_devno(devno);
 	if (!urd) {
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index e6e0d31c02ac..749b72739c4a 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -128,7 +128,7 @@ static int qstat_show(struct seq_file *m, void *v)
 static int qstat_seq_open(struct inode *inode, struct file *filp)
 {
 	return single_open(filp, qstat_show,
-			   filp->f_path.dentry->d_inode->i_private);
+			   file_inode(filp)->i_private);
 }
 
 static const struct file_operations debugfs_fops = {
@@ -221,7 +221,7 @@ static ssize_t qperf_seq_write(struct file *file, const char __user *ubuf,
 static int qperf_seq_open(struct inode *inode, struct file *filp)
 {
 	return single_open(filp, qperf_show,
-			   filp->f_path.dentry->d_inode->i_private);
+			   file_inode(filp)->i_private);
 }
 
 static struct file_operations debugfs_perf_fops = {
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index e85c803b30cd..fc1339cf91ac 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -107,7 +107,7 @@ static long d7s_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	int error = 0;
 	u8 ireg = 0;
 
-	if (D7S_MINOR != iminor(file->f_path.dentry->d_inode))
+	if (D7S_MINOR != iminor(file_inode(file)))
 		return -ENODEV;
 
 	mutex_lock(&d7s_mutex);
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index d1f0120cdb98..5e1e12c0cf42 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -640,7 +640,7 @@ out:
 /* This function handles ioctl for the character device */
 static long twa_chrdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	long timeout;
 	unsigned long *cpu_addr, data_buffer_length_adjusted = 0, flags = 0;
 	dma_addr_t dma_handle;
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 52a2f0580d97..c845bdbeb6c0 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -757,7 +757,7 @@ static long twl_chrdev_ioctl(struct file *file, unsigned int cmd, unsigned long
 	dma_addr_t dma_handle;
 	int request_id = 0;
 	TW_Ioctl_Driver_Command driver_command;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	TW_Ioctl_Buf_Apache *tw_ioctl;
 	TW_Command_Full *full_command_packet;
 	TW_Device_Extension *tw_dev = twl_device_extension_list[iminor(inode)];
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 62071d2fc1ce..56662ae03dea 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -889,7 +889,7 @@ static long tw_chrdev_ioctl(struct file *file, unsigned int cmd, unsigned long a
 	unsigned long flags;
 	unsigned int data_buffer_length = 0;
 	unsigned long data_buffer_length_adjusted = 0;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	unsigned long *cpu_addr;
 	long timeout;
 	TW_New_Ioctl *tw_ioctl;
diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c
index b42cbbd3d92d..c323b2030afa 100644
--- a/drivers/scsi/csiostor/csio_init.c
+++ b/drivers/scsi/csiostor/csio_init.c
@@ -71,7 +71,7 @@ static ssize_t
 csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	loff_t pos = *ppos;
-	loff_t avail = file->f_path.dentry->d_inode->i_size;
+	loff_t avail = file_inode(file)->i_size;
 	unsigned int mem = (uintptr_t)file->private_data & 3;
 	struct csio_hw *hw = file->private_data - mem;
 
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index b4f6c9a84e71..b6e2700ec1c6 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -2161,7 +2161,7 @@ static long adpt_unlocked_ioctl(struct file *file, uint cmd, ulong arg)
 	struct inode *inode;
 	long ret;
  
-	inode = file->f_dentry->d_inode;
+	inode = file_inode(file);
  
 	mutex_lock(&adpt_mutex);
 	ret = adpt_ioctl(inode, file, cmd, arg);
@@ -2177,7 +2177,7 @@ static long compat_adpt_ioctl(struct file *file,
 	struct inode *inode;
 	long ret;
  
-	inode = file->f_dentry->d_inode;
+	inode = file_inode(file);
  
 	mutex_lock(&adpt_mutex);
  
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 98156a97c472..3e2b3717cb5c 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -977,7 +977,7 @@ static int check_tape(struct scsi_tape *STp, struct file *filp)
 	struct st_modedef *STm;
 	struct st_partstat *STps;
 	char *name = tape_name(STp);
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int mode = TAPE_MODE(inode);
 
 	STp->ready = ST_READY;
diff --git a/drivers/staging/bcm/Misc.c b/drivers/staging/bcm/Misc.c
index c92078e7fe86..5d5d95de081e 100644
--- a/drivers/staging/bcm/Misc.c
+++ b/drivers/staging/bcm/Misc.c
@@ -185,7 +185,7 @@ static int BcmFileDownload(struct bcm_mini_adapter *Adapter, const char *path, u
 		BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Unable to Open %s\n", path);
 		return -ENOENT;
 	}
-	BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Opened file is = %s and length =0x%lx to be downloaded at =0x%x", path, (unsigned long)flp->f_dentry->d_inode->i_size, loc);
+	BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "Opened file is = %s and length =0x%lx to be downloaded at =0x%x", path, (unsigned long)file_inode(flp)->i_size, loc);
 	do_gettimeofday(&tv);
 
 	BCM_DEBUG_PRINT(Adapter, DBG_TYPE_INITEXIT, MP_INIT, DBG_LVL_ALL, "download start %lx", ((tv.tv_sec * 1000) + (tv.tv_usec / 1000)));
diff --git a/drivers/staging/ccg/f_mass_storage.c b/drivers/staging/ccg/f_mass_storage.c
index 4f1142efa6d1..20bc2b454ac2 100644
--- a/drivers/staging/ccg/f_mass_storage.c
+++ b/drivers/staging/ccg/f_mass_storage.c
@@ -998,7 +998,7 @@ static int do_synchronize_cache(struct fsg_common *common)
 static void invalidate_sub(struct fsg_lun *curlun)
 {
 	struct file	*filp = curlun->filp;
-	struct inode	*inode = filp->f_path.dentry->d_inode;
+	struct inode	*inode = file_inode(filp);
 	unsigned long	rc;
 
 	rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
diff --git a/drivers/staging/ccg/rndis.c b/drivers/staging/ccg/rndis.c
index e4192b887de9..d9297eebbf73 100644
--- a/drivers/staging/ccg/rndis.c
+++ b/drivers/staging/ccg/rndis.c
@@ -1065,7 +1065,7 @@ static int rndis_proc_show(struct seq_file *m, void *v)
 static ssize_t rndis_proc_write(struct file *file, const char __user *buffer,
 				size_t count, loff_t *ppos)
 {
-	rndis_params *p = PDE(file->f_path.dentry->d_inode)->data;
+	rndis_params *p = PDE(file_inode(file))->data;
 	u32 speed = 0;
 	int i, fl_speed = 0;
 
diff --git a/drivers/staging/ccg/storage_common.c b/drivers/staging/ccg/storage_common.c
index 8d9bcd8207c8..abb01ac74cec 100644
--- a/drivers/staging/ccg/storage_common.c
+++ b/drivers/staging/ccg/storage_common.c
@@ -656,7 +656,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
 	if (!(filp->f_mode & FMODE_WRITE))
 		ro = 1;
 
-	inode = filp->f_path.dentry->d_inode;
+	inode = file_inode(filp);
 	if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
 		LINFO(curlun, "invalid file type: %s\n", filename);
 		goto out;
diff --git a/drivers/staging/dgrp/dgrp_specproc.c b/drivers/staging/dgrp/dgrp_specproc.c
index c214078a89e9..b2bda32254ac 100644
--- a/drivers/staging/dgrp/dgrp_specproc.c
+++ b/drivers/staging/dgrp/dgrp_specproc.c
@@ -354,7 +354,7 @@ static int dgrp_gen_proc_open(struct inode *inode, struct file *file)
 	struct dgrp_proc_entry *entry;
 	int ret = 0;
 
-	de = (struct proc_dir_entry *) PDE(file->f_dentry->d_inode);
+	de = (struct proc_dir_entry *) PDE(file_inode(file));
 	if (!de || !de->data) {
 		ret = -ENXIO;
 		goto done;
@@ -384,7 +384,7 @@ static int dgrp_gen_proc_close(struct inode *inode, struct file *file)
 	struct proc_dir_entry *de;
 	struct dgrp_proc_entry *entry;
 
-	de = (struct proc_dir_entry *) PDE(file->f_dentry->d_inode);
+	de = (struct proc_dir_entry *) PDE(file_inode(file));
 	if (!de || !de->data)
 		goto done;
 
diff --git a/drivers/staging/omapdrm/omap_gem_helpers.c b/drivers/staging/omapdrm/omap_gem_helpers.c
index ffb8cceaeb46..7d1b64a7404b 100644
--- a/drivers/staging/omapdrm/omap_gem_helpers.c
+++ b/drivers/staging/omapdrm/omap_gem_helpers.c
@@ -40,7 +40,7 @@ struct page **_drm_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask)
 	int i, npages;
 
 	/* This is the shared memory object that backs the GEM resource */
-	inode = obj->filp->f_path.dentry->d_inode;
+	inode = file_inode(obj->filp);
 	mapping = inode->i_mapping;
 
 	npages = obj->size >> PAGE_SHIFT;
diff --git a/drivers/staging/usbip/usbip_common.c b/drivers/staging/usbip/usbip_common.c
index 75189feac380..773014c7c752 100644
--- a/drivers/staging/usbip/usbip_common.c
+++ b/drivers/staging/usbip/usbip_common.c
@@ -411,7 +411,7 @@ struct socket *sockfd_to_socket(unsigned int sockfd)
 		return NULL;
 	}
 
-	inode = file->f_dentry->d_inode;
+	inode = file_inode(file);
 
 	if (!inode || !S_ISSOCK(inode->i_mode)) {
 		fput(file);
diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index 4ef852c4c4e1..869ce93ee204 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -318,7 +318,7 @@ static ssize_t buffer_from_user(unsigned int minor, const char __user *buf,
 static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count,
 			loff_t *ppos)
 {
-	unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev);
+	unsigned int minor = MINOR(file_inode(file)->i_rdev);
 	ssize_t retval;
 	size_t image_size;
 	size_t okcount;
@@ -364,7 +364,7 @@ static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count,
 static ssize_t vme_user_write(struct file *file, const char __user *buf,
 			size_t count, loff_t *ppos)
 {
-	unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev);
+	unsigned int minor = MINOR(file_inode(file)->i_rdev);
 	ssize_t retval;
 	size_t image_size;
 	size_t okcount;
@@ -410,7 +410,7 @@ static ssize_t vme_user_write(struct file *file, const char __user *buf,
 static loff_t vme_user_llseek(struct file *file, loff_t off, int whence)
 {
 	loff_t absolute = -1;
-	unsigned int minor = MINOR(file->f_dentry->d_inode->i_rdev);
+	unsigned int minor = MINOR(file_inode(file)->i_rdev);
 	size_t image_size;
 
 	if (minor == CONTROL_MINOR)
@@ -583,7 +583,7 @@ vme_user_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	int ret;
 
 	mutex_lock(&vme_user_mutex);
-	ret = vme_user_ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
+	ret = vme_user_ioctl(file_inode(file), file, cmd, arg);
 	mutex_unlock(&vme_user_mutex);
 
 	return ret;
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index b9c88497e8f0..a65d507b0b0c 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -265,7 +265,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl,
 		 * the expected virt_size for struct file w/o a backing struct
 		 * block_device.
 		 */
-		if (S_ISBLK(fd->f_dentry->d_inode->i_mode)) {
+		if (S_ISBLK(file_inode(fd)->i_mode)) {
 			if (ret < 0 || ret != cmd->data_length) {
 				pr_err("%s() returned %d, expecting %u for "
 						"S_ISBLK\n", __func__, ret,
diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index fa7268a93c06..e4ca345873c3 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -101,7 +101,7 @@ vcs_poll_data_get(struct file *file)
 	poll = kzalloc(sizeof(*poll), GFP_KERNEL);
 	if (!poll)
 		return NULL;
-	poll->cons_num = iminor(file->f_path.dentry->d_inode) & 127;
+	poll->cons_num = iminor(file_inode(file)) & 127;
 	init_waitqueue_head(&poll->waitq);
 	poll->notifier.notifier_call = vcs_notifier;
 	if (register_vt_notifier(&poll->notifier) != 0) {
@@ -182,7 +182,7 @@ static loff_t vcs_lseek(struct file *file, loff_t offset, int orig)
 	int size;
 
 	console_lock();
-	size = vcs_size(file->f_path.dentry->d_inode);
+	size = vcs_size(file_inode(file));
 	console_unlock();
 	if (size < 0)
 		return size;
@@ -208,7 +208,7 @@ static loff_t vcs_lseek(struct file *file, loff_t offset, int orig)
 static ssize_t
 vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	unsigned int currcons = iminor(inode);
 	struct vc_data *vc;
 	struct vcs_poll_data *poll;
@@ -386,7 +386,7 @@ unlock_out:
 static ssize_t
 vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	unsigned int currcons = iminor(inode);
 	struct vc_data *vc;
 	long pos;
diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index cbacea933b18..2c42e06f9717 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -658,7 +658,7 @@ static loff_t usb_device_lseek(struct file *file, loff_t offset, int orig)
 {
 	loff_t ret;
 
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+	mutex_lock(&file_inode(file)->i_mutex);
 
 	switch (orig) {
 	case 0:
@@ -674,7 +674,7 @@ static loff_t usb_device_lseek(struct file *file, loff_t offset, int orig)
 		ret = -EINVAL;
 	}
 
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+	mutex_unlock(&file_inode(file)->i_mutex);
 	return ret;
 }
 
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index b78fbe222b72..6e8af6ddb5f7 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -160,7 +160,7 @@ static loff_t usbdev_lseek(struct file *file, loff_t offset, int orig)
 {
 	loff_t ret;
 
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+	mutex_lock(&file_inode(file)->i_mutex);
 
 	switch (orig) {
 	case 0:
@@ -176,7 +176,7 @@ static loff_t usbdev_lseek(struct file *file, loff_t offset, int orig)
 		ret = -EINVAL;
 	}
 
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+	mutex_unlock(&file_inode(file)->i_mutex);
 	return ret;
 }
 
@@ -1970,7 +1970,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 				void __user *p)
 {
 	struct dev_state *ps = file->private_data;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct usb_device *dev = ps->dev;
 	int ret = -ENOTTY;
 
diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c
index a7aed84d98c9..fd49dba53613 100644
--- a/drivers/usb/gadget/atmel_usba_udc.c
+++ b/drivers/usb/gadget/atmel_usba_udc.c
@@ -93,7 +93,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
 	if (!access_ok(VERIFY_WRITE, buf, nbytes))
 		return -EFAULT;
 
-	mutex_lock(&file->f_dentry->d_inode->i_mutex);
+	mutex_lock(&file_inode(file)->i_mutex);
 	list_for_each_entry_safe(req, tmp_req, queue, queue) {
 		len = snprintf(tmpbuf, sizeof(tmpbuf),
 				"%8p %08x %c%c%c %5d %c%c%c\n",
@@ -120,7 +120,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
 		nbytes -= len;
 		buf += len;
 	}
-	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
+	mutex_unlock(&file_inode(file)->i_mutex);
 
 	return actual;
 }
@@ -168,13 +168,13 @@ out:
 static ssize_t regs_dbg_read(struct file *file, char __user *buf,
 		size_t nbytes, loff_t *ppos)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int ret;
 
 	mutex_lock(&inode->i_mutex);
 	ret = simple_read_from_buffer(buf, nbytes, ppos,
 			file->private_data,
-			file->f_dentry->d_inode->i_size);
+			file_inode(file)->i_size);
 	mutex_unlock(&inode->i_mutex);
 
 	return ret;
diff --git a/drivers/usb/gadget/f_mass_storage.c b/drivers/usb/gadget/f_mass_storage.c
index 5d027b3e1ef0..50a46a429efb 100644
--- a/drivers/usb/gadget/f_mass_storage.c
+++ b/drivers/usb/gadget/f_mass_storage.c
@@ -992,7 +992,7 @@ static int do_synchronize_cache(struct fsg_common *common)
 static void invalidate_sub(struct fsg_lun *curlun)
 {
 	struct file	*filp = curlun->filp;
-	struct inode	*inode = filp->f_path.dentry->d_inode;
+	struct inode	*inode = file_inode(filp);
 	unsigned long	rc;
 
 	rc = invalidate_mapping_pages(inode->i_mapping, 0, -1);
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 35bcc83d1e04..bf7a56b6d48a 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -688,7 +688,7 @@ static int
 printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
 {
 	struct printer_dev	*dev = fd->private_data;
-	struct inode *inode = fd->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(fd);
 	unsigned long		flags;
 	int			tx_list_empty;
 
diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index e4192b887de9..d9297eebbf73 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -1065,7 +1065,7 @@ static int rndis_proc_show(struct seq_file *m, void *v)
 static ssize_t rndis_proc_write(struct file *file, const char __user *buffer,
 				size_t count, loff_t *ppos)
 {
-	rndis_params *p = PDE(file->f_path.dentry->d_inode)->data;
+	rndis_params *p = PDE(file_inode(file))->data;
 	u32 speed = 0;
 	int i, fl_speed = 0;
 
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index 0e3ae43454a2..b5d3f0eeeb7d 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -501,7 +501,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
 	if (!(filp->f_mode & FMODE_WRITE))
 		ro = 1;
 
-	inode = filp->f_path.dentry->d_inode;
+	inode = file_inode(filp);
 	if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
 		LINFO(curlun, "invalid file type: %s\n", filename);
 		goto out;
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 88cad6b8b479..900aa4ecd617 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -69,7 +69,7 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma,
 int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct fb_info *info = file->private_data;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
 	if (err)
 		return err;
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 3ff0105a496a..bf01980c9554 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -727,7 +727,7 @@ static const struct file_operations fb_proc_fops = {
  */
 static struct fb_info *file_fb_info(struct file *file)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int fbidx = iminor(inode);
 	struct fb_info *info = registered_fb[fbidx];
 
diff --git a/drivers/video/msm/mdp.c b/drivers/video/msm/mdp.c
index f2566c19e71c..113c7876c855 100644
--- a/drivers/video/msm/mdp.c
+++ b/drivers/video/msm/mdp.c
@@ -261,7 +261,7 @@ int get_img(struct mdp_img *img, struct fb_info *info,
 	if (f.file == NULL)
 		return -1;
 
-	if (MAJOR(f.file->f_dentry->d_inode->i_rdev) == FB_MAJOR) {
+	if (MAJOR(file_inode(f.file)->i_rdev) == FB_MAJOR) {
 		*start = info->fix.smem_start;
 		*len = info->fix.smem_len;
 	} else
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index 11d55ce5ca81..70387582843f 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -411,7 +411,7 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		.identity		= DRIVER_NAME,
 	};
 	void __user *argp = (void __user *)arg;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int index = iminor(inode) - WD0_MINOR;
 	struct cpwd *p = cpwd_device;
 	int setopt = 0;
@@ -499,7 +499,7 @@ static long cpwd_compat_ioctl(struct file *file, unsigned int cmd,
 static ssize_t cpwd_write(struct file *file, const char __user *buf,
 			  size_t count, loff_t *ppos)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct cpwd *p = cpwd_device;
 	int index = iminor(inode);
 
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index 988880dcee75..73b33837e12c 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -22,7 +22,7 @@ static loff_t
 proc_bus_zorro_lseek(struct file *file, loff_t off, int whence)
 {
 	loff_t new = -1;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	mutex_lock(&inode->i_mutex);
 	switch (whence) {
@@ -47,7 +47,7 @@ proc_bus_zorro_lseek(struct file *file, loff_t off, int whence)
 static ssize_t
 proc_bus_zorro_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
-	struct inode *ino = file->f_path.dentry->d_inode;
+	struct inode *ino = file_inode(file);
 	struct proc_dir_entry *dp = PDE(ino);
 	struct zorro_dev *z = dp->data;
 	struct ConfigDev cd;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2483e97beee..3356e3ed5115 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -133,7 +133,7 @@ out_error:
 static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
 	int res = 0;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
 
@@ -302,7 +302,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
 
 static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int ret = -ENOLCK;
 
 	p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
@@ -338,7 +338,7 @@ out_err:
 static int v9fs_file_flock_dotl(struct file *filp, int cmd,
 	struct file_lock *fl)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int ret = -ENOLCK;
 
 	p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
@@ -529,7 +529,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	if (!count)
 		goto out;
 
-	retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode,
+	retval = v9fs_file_write_internal(file_inode(filp),
 					filp->private_data,
 					data, count, &origin, 1);
 	/* update offset on successful write */
@@ -604,7 +604,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct v9fs_inode *v9inode;
 	struct page *page = vmf->page;
 	struct file *filp = vma->vm_file;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 
 	p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index b3be2e7c5643..9cf874ce8336 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -19,7 +19,7 @@ static DEFINE_RWLOCK(adfs_dir_lock);
 static int
 adfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
 	struct object_info obj;
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 8ca8f3a55599..fd11a6d608ee 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -42,7 +42,7 @@ const struct inode_operations affs_dir_inode_operations = {
 static int
 affs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode		*inode = filp->f_path.dentry->d_inode;
+	struct inode		*inode = file_inode(filp);
 	struct super_block	*sb = inode->i_sb;
 	struct buffer_head	*dir_bh;
 	struct buffer_head	*fh_bh;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index db477906ba4f..7a465ed04444 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -393,12 +393,12 @@ static int afs_readdir(struct file *file, void *cookie, filldir_t filldir)
 	int ret;
 
 	_enter("{%Ld,{%lu}}",
-	       file->f_pos, file->f_path.dentry->d_inode->i_ino);
+	       file->f_pos, file_inode(file)->i_ino);
 
 	ASSERT(file->private_data != NULL);
 
 	fpos = file->f_pos;
-	ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos,
+	ret = afs_dir_iterate(file_inode(file), &fpos,
 			      cookie, filldir, file->private_data);
 	file->f_pos = fpos;
 
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 757d664575dd..2497bf306c70 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -514,7 +514,7 @@ error:
  */
 int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 
 	_enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -537,7 +537,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
  */
 int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 
 	_enter("{%x:%u},%d,{t=%x,fl=%x}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9aa52d93c73c..7e03eadb40c0 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -120,7 +120,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		    struct page **pagep, void **fsdata)
 {
 	struct afs_writeback *candidate, *wb;
-	struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	struct page *page;
 	struct key *key = file->private_data;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
@@ -245,7 +245,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		  loff_t pos, unsigned len, unsigned copied,
 		  struct page *page, void *fsdata)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	loff_t i_size, maybe_i_size;
 
 	_enter("{%x:%u},{%lx}",
@@ -627,8 +627,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
 ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
 		       unsigned long nr_segs, loff_t pos)
 {
-	struct dentry *dentry = iocb->ki_filp->f_path.dentry;
-	struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+	struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
 
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index b785e7707959..3f1128b37e46 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -273,7 +273,7 @@ static inline int autofs_prepare_pipe(struct file *pipe)
 {
 	if (!pipe->f_op || !pipe->f_op->write)
 		return -EINVAL;
-	if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode))
+	if (!S_ISFIFO(file_inode(pipe)->i_mode))
 		return -EINVAL;
 	/* We want a packet pipe */
 	pipe->f_flags |= O_DIRECT;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9f68a37bb2b2..743c7c2c949d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -159,7 +159,7 @@ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
 	struct inode *inode;
 
 	if (f) {
-		inode = f->f_path.dentry->d_inode;
+		inode = file_inode(f);
 		sbi = autofs4_sbi(inode->i_sb);
 	}
 	return sbi;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c93447604da8..3cdf835e8b49 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -874,7 +874,7 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
 static long autofs4_root_ioctl(struct file *filp,
 			       unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
 }
 
@@ -882,7 +882,7 @@ static long autofs4_root_ioctl(struct file *filp,
 static long autofs4_root_compat_ioctl(struct file *filp,
 			     unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int ret;
 
 	if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 2b3bda8d5e68..c8f4e25eb9e2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -213,7 +213,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 static int
 befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	befs_data_stream *ds = &BEFS_I(inode)->i_data.ds;
 	befs_off_t value;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 2785ef91191a..3f422f6bb5ca 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -28,7 +28,7 @@ static struct buffer_head *bfs_find_entry(struct inode *dir,
 
 static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
 {
-	struct inode *dir = f->f_path.dentry->d_inode;
+	struct inode *dir = file_inode(f);
 	struct buffer_head *bh;
 	struct bfs_dirent *de;
 	struct bfs_sb_info *info = BFS_SB(dir->i_sb);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 6043567b95c2..bbc8f8827eac 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -214,7 +214,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
 	     N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
 	    N_TRSIZE(ex) || N_DRSIZE(ex) ||
-	    i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		return -ENOEXEC;
 	}
 
@@ -367,7 +367,7 @@ static int load_aout_library(struct file *file)
 	int retval;
 	struct exec ex;
 
-	inode = file->f_path.dentry->d_inode;
+	inode = file_inode(file);
 
 	retval = -ENOEXEC;
 	error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..11e078a747a5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1140,7 +1140,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 
 	/* By default, dump shared memory if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED) {
-		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
+		if (file_inode(vma->vm_file)->i_nlink == 0 ?
 		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
 			goto whole;
 		return 0;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..30de01ca3eeb 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -909,7 +909,7 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 
 dynamic_error:
 	printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n",
-	       what, file->f_path.dentry->d_inode->i_ino);
+	       what, file_inode(file)->i_ino);
 	return -ELIBBAD;
 }
 
@@ -1219,7 +1219,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
 
 	/* By default, dump shared memory if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED) {
-		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0) {
+		if (file_inode(vma->vm_file)->i_nlink == 0) {
 			dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
 			kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
 			       vma->vm_flags, dump_ok ? "yes" : "no");
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b56371981d16..2036d21baaef 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -438,7 +438,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 	int ret;
 
 	hdr = ((struct flat_hdr *) bprm->buf);		/* exec-header */
-	inode = bprm->file->f_path.dentry->d_inode;
+	inode = file_inode(bprm->file);
 
 	text_len  = ntohl(hdr->data_start);
 	data_len  = ntohl(hdr->data_end) - ntohl(hdr->data_start);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 0c8869fdd14e..fecbbf3f8ff2 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -531,7 +531,7 @@ static void kill_node(Node *e)
 static ssize_t
 bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
 {
-	Node *e = file->f_path.dentry->d_inode->i_private;
+	Node *e = file_inode(file)->i_private;
 	ssize_t res;
 	char *page;
 
@@ -550,7 +550,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 				size_t count, loff_t *ppos)
 {
 	struct dentry *root;
-	Node *e = file->f_path.dentry->d_inode->i_private;
+	Node *e = file_inode(file)->i_private;
 	int res = parse_command(buffer, count);
 
 	switch (res) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 172f8491a2bd..7d6bdfc6b7bc 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -318,7 +318,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
 
 /*
  * private llseek:
- * for a block special file file->f_path.dentry->d_inode->i_size is zero
+ * for a block special file file_inode(file)->i_size is zero
  * so we compute the size by hand (just as in block_read/write above)
  */
 static loff_t block_llseek(struct file *file, loff_t offset, int whence)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77061bf43edb..4118e0b6e339 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1211,7 +1211,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
 	struct extent_state *cached_state = NULL;
 	int i;
 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 	int err = 0;
 	int faili = 0;
@@ -1298,7 +1298,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 					       struct iov_iter *i,
 					       loff_t pos)
 {
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct page **pages = NULL;
 	unsigned long first_index;
@@ -1486,7 +1486,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 				    unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	loff_t *ppos = &iocb->ki_pos;
 	u64 start_pos;
@@ -2087,7 +2087,7 @@ out:
 static long btrfs_fallocate(struct file *file, int mode,
 			    loff_t offset, loff_t len)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct extent_state *cached_state = NULL;
 	u64 cur_offset;
 	u64 last_byte;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 16d9e8e191e6..02d946a61ddd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4342,7 +4342,7 @@ unsigned char btrfs_filetype_table[] = {
 static int btrfs_real_readdir(struct file *filp, void *dirent,
 			      filldir_t filldir)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_item *item;
 	struct btrfs_dir_item *di;
@@ -6737,7 +6737,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
-	struct inode *inode = fdentry(vma->vm_file)->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_ordered_extent *ordered;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4b4516770f05..61045adc3075 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -152,7 +152,7 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
 
 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
 {
-	struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
+	struct btrfs_inode *ip = BTRFS_I(file_inode(file));
 	unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
 
 	if (copy_to_user(arg, &flags, sizeof(flags)))
@@ -177,7 +177,7 @@ static int check_flags(unsigned int flags)
 
 static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_inode *ip = BTRFS_I(inode);
 	struct btrfs_root *root = ip->root;
 	struct btrfs_trans_handle *trans;
@@ -310,7 +310,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 
 static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	return put_user(inode->i_generation, arg);
 }
@@ -1317,7 +1317,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 	u64 new_size;
 	u64 old_size;
 	u64 devid = 1;
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	struct btrfs_ioctl_vol_args *vol_args;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device = NULL;
@@ -1483,8 +1483,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 			goto out_drop_write;
 		}
 
-		src_inode = src.file->f_path.dentry->d_inode;
-		if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
+		src_inode = file_inode(src.file);
+		if (src_inode->i_sb != file_inode(file)->i_sb) {
 			printk(KERN_INFO "btrfs: Snapshot src from "
 			       "another FS\n");
 			ret = -EINVAL;
@@ -1576,7 +1576,7 @@ out:
 static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
 						void __user *arg)
 {
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret = 0;
 	u64 flags = 0;
@@ -1598,7 +1598,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
 static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
 					      void __user *arg)
 {
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
 	u64 root_flags;
@@ -1892,7 +1892,7 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
 	if (IS_ERR(args))
 		return PTR_ERR(args);
 
-	inode = fdentry(file)->d_inode;
+	inode = file_inode(file);
 	ret = search_ioctl(inode, args);
 	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
 		ret = -EFAULT;
@@ -2002,7 +2002,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 	if (IS_ERR(args))
 		return PTR_ERR(args);
 
-	inode = fdentry(file)->d_inode;
+	inode = file_inode(file);
 
 	if (args->treeid == 0)
 		args->treeid = BTRFS_I(inode)->root->root_key.objectid;
@@ -2178,7 +2178,7 @@ out:
 
 static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 {
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_ioctl_defrag_range_args *range;
 	int ret;
@@ -2237,7 +2237,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 			/* the rest are all set to zero by kzalloc */
 			range->len = (u64)-1;
 		}
-		ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
+		ret = btrfs_defrag_file(file_inode(file), file,
 					range, 0, 0);
 		if (ret > 0)
 			ret = 0;
@@ -2285,7 +2285,7 @@ out:
 
 static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 {
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	struct btrfs_ioctl_vol_args *vol_args;
 	int ret;
 
@@ -2408,7 +2408,7 @@ out:
 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 				       u64 off, u64 olen, u64 destoff)
 {
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct fd src_file;
 	struct inode *src;
@@ -2454,7 +2454,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 	if (src_file.file->f_path.mnt != file->f_path.mnt)
 		goto out_fput;
 
-	src = src_file.file->f_dentry->d_inode;
+	src = file_inode(src_file.file);
 
 	ret = -EINVAL;
 	if (src == inode)
@@ -2816,7 +2816,7 @@ static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
  */
 static long btrfs_ioctl_trans_start(struct file *file)
 {
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -2856,7 +2856,7 @@ out:
 
 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 {
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_root *new_root;
 	struct btrfs_dir_item *di;
@@ -3080,7 +3080,7 @@ out:
  */
 long btrfs_ioctl_trans_end(struct file *file)
 {
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
 
@@ -3142,7 +3142,7 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root,
 
 static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
 {
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	struct btrfs_ioctl_scrub_args *sa;
 	int ret;
 
@@ -3433,7 +3433,7 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
 
 static long btrfs_ioctl_balance(struct file *file, void __user *arg)
 {
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_ioctl_balance_args *bargs;
 	struct btrfs_balance_control *bctl;
@@ -3573,7 +3573,7 @@ out:
 
 static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 {
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	struct btrfs_ioctl_quota_ctl_args *sa;
 	struct btrfs_trans_handle *trans = NULL;
 	int ret;
@@ -3632,7 +3632,7 @@ drop_write:
 
 static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
 {
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	struct btrfs_ioctl_qgroup_assign_args *sa;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -3679,7 +3679,7 @@ drop_write:
 
 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
 {
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	struct btrfs_ioctl_qgroup_create_args *sa;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -3725,7 +3725,7 @@ drop_write:
 
 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
 {
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	struct btrfs_ioctl_qgroup_limit_args *sa;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -3775,7 +3775,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
 					    void __user *arg)
 {
 	struct btrfs_ioctl_received_subvol_args *sa = NULL;
-	struct inode *inode = fdentry(file)->d_inode;
+	struct inode *inode = file_inode(file);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_root_item *root_item = &root->root_item;
 	struct btrfs_trans_handle *trans;
@@ -3855,7 +3855,7 @@ out:
 long btrfs_ioctl(struct file *file, unsigned int
 		cmd, unsigned long arg)
 {
-	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
 	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 54454542ad40..f80df6b04648 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4542,7 +4542,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root;
+	send_root = BTRFS_I(file_inode(mnt_file))->root;
 	fs_info = send_root->fs_info;
 
 	arg = memdup_user(arg_, sizeof(*arg));
diff --git a/fs/buffer.c b/fs/buffer.c
index 7a75c3e0fd58..b8a8b4d64d8c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2332,7 +2332,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 			 get_block_t get_block)
 {
 	struct page *page = vmf->page;
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	unsigned long end;
 	loff_t size;
 	int ret;
@@ -2371,7 +2371,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 		   get_block_t get_block)
 {
 	int ret;
-	struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
+	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
 
 	sb_start_pagefault(sb);
 
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 064d1a68d2c1..d4f81edd9a5d 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -195,7 +195,7 @@ static int ceph_releasepage(struct page *page, gfp_t g)
  */
 static int readpage_nounlock(struct file *filp, struct page *page)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_osd_client *osdc = 
 		&ceph_inode_to_client(inode)->client->osdc;
@@ -370,7 +370,7 @@ out:
 static int ceph_readpages(struct file *file, struct address_space *mapping,
 			  struct list_head *page_list, unsigned nr_pages)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	int rc = 0;
 	int max = 0;
@@ -977,7 +977,7 @@ static int ceph_update_writeable_page(struct file *file,
 			    loff_t pos, unsigned len,
 			    struct page *page)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	loff_t page_off = pos & PAGE_CACHE_MASK;
@@ -1086,7 +1086,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
 			    loff_t pos, unsigned len, unsigned flags,
 			    struct page **pagep, void **fsdata)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_info *fi = file->private_data;
 	struct page *page;
@@ -1144,7 +1144,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
 			  loff_t pos, unsigned len, unsigned copied,
 			  struct page *page, void *fsdata)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1228,7 +1228,7 @@ const struct address_space_operations ceph_aops = {
  */
 static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct inode *inode = vma->vm_file->f_dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	struct page *page = vmf->page;
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	loff_t off = page_offset(page);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8c1aabe93b67..6d797f46d772 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -238,7 +238,7 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name,
 static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct ceph_file_info *fi = filp->private_data;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1138,7 +1138,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 			     loff_t *ppos)
 {
 	struct ceph_file_info *cf = file->private_data;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int left;
 	const int bufsize = 1024;
@@ -1188,7 +1188,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
 			  int datasync)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct list_head *head = &ci->i_unsafe_dirops;
 	struct ceph_mds_request *req;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e51558fca3a3..11b57c2c8f15 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -393,7 +393,7 @@ more:
 static ssize_t ceph_sync_read(struct file *file, char __user *data,
 			      unsigned len, loff_t *poff, int *checkeof)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct page **pages;
 	u64 off = *poff;
 	int num_pages, ret;
@@ -466,7 +466,7 @@ static void sync_write_commit(struct ceph_osd_request *req,
 static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 			       size_t left, loff_t *offset)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_osd_request *req;
@@ -483,7 +483,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	int ret;
 	struct timespec mtime = CURRENT_TIME;
 
-	if (ceph_snap(file->f_dentry->d_inode) != CEPH_NOSNAP)
+	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
 		return -EROFS;
 
 	dout("sync_write on file %p %lld~%u %s\n", file, *offset,
@@ -637,7 +637,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	struct ceph_file_info *fi = filp->private_data;
 	loff_t *ppos = &iocb->ki_pos;
 	size_t len = iov->iov_len;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	void __user *base = iov->iov_base;
 	ssize_t ret;
@@ -707,7 +707,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *file = iocb->ki_filp;
 	struct ceph_file_info *fi = file->private_data;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_osd_client *osdc =
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 36549a46e311..f5ed767806df 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -16,11 +16,11 @@
  */
 static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
 {
-	struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
+	struct ceph_inode_info *ci = ceph_inode(file_inode(file));
 	struct ceph_ioctl_layout l;
 	int err;
 
-	err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
+	err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
 	if (!err) {
 		l.stripe_unit = ceph_file_layout_su(ci->i_layout);
 		l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
@@ -63,12 +63,12 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
 
 static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct inode *parent_inode;
 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	struct ceph_ioctl_layout l;
-	struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
+	struct ceph_inode_info *ci = ceph_inode(file_inode(file));
 	struct ceph_ioctl_layout nl;
 	int err;
 
@@ -76,7 +76,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 		return -EFAULT;
 
 	/* validate changed params against current layout */
-	err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
+	err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
 	if (err)
 		return err;
 
@@ -136,7 +136,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
  */
 static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_mds_request *req;
 	struct ceph_ioctl_layout l;
 	int err;
@@ -179,7 +179,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
 static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 {
 	struct ceph_ioctl_dataloc dl;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_osd_client *osdc =
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
@@ -234,7 +234,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 static long ceph_ioctl_lazyio(struct file *file)
 {
 	struct ceph_file_info *fi = file->private_data;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 
 	if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 80576d05d687..202dd3d68be0 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -13,7 +13,7 @@
 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 			     int cmd, u8 wait, struct file_lock *fl)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ceph_mds_client *mdsc =
 		ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index de7f9168a118..8b35365c70be 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -677,7 +677,7 @@ out_nls:
 static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 				   unsigned long nr_segs, loff_t pos)
 {
-	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t written;
 	int rc;
 
@@ -701,7 +701,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 	 */
 	if (whence != SEEK_SET && whence != SEEK_CUR) {
 		int rc;
-		struct inode *inode = file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(file);
 
 		/*
 		 * We need to be sure that all dirty pages are written and the
@@ -733,7 +733,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 {
 	/* note that this is called by vfs setlease with lock_flocks held
 	   to protect *lease from going away */
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct cifsFileInfo *cfile = file->private_data;
 
 	if (!(S_ISREG(inode->i_mode)))
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8ea6ca50a665..1a5c2911b043 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -947,7 +947,7 @@ static int
 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 {
 	int rc = 0;
-	struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
+	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 	unsigned char saved_type = flock->fl_type;
 
 	if ((flock->fl_flags & FL_POSIX) == 0)
@@ -974,7 +974,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 static int
 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
 {
-	struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
+	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 	int rc = 1;
 
 	if ((flock->fl_flags & FL_POSIX) == 0)
@@ -1548,7 +1548,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
 
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	netfid = cfile->fid.netfid;
-	cinode = CIFS_I(file->f_path.dentry->d_inode);
+	cinode = CIFS_I(file_inode(file));
 
 	if (cap_unix(tcon->ses) &&
 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
@@ -2171,7 +2171,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
 	struct cifs_tcon *tcon;
 	struct TCP_Server_Info *server;
 	struct cifsFileInfo *smbfile = file->private_data;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 
 	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2246,7 +2246,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  */
 int cifs_flush(struct file *file, fl_owner_t id)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int rc = 0;
 
 	if (file->f_mode & FMODE_WRITE)
@@ -2480,7 +2480,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t written;
 	struct inode *inode;
 
-	inode = iocb->ki_filp->f_path.dentry->d_inode;
+	inode = file_inode(iocb->ki_filp);
 
 	/*
 	 * BB - optimize the way when signing is disabled. We can drop this
@@ -2543,7 +2543,7 @@ ssize_t
 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
 		   unsigned long nr_segs, loff_t pos)
 {
-	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(iocb->ki_filp);
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
@@ -2915,7 +2915,7 @@ ssize_t
 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
 		  unsigned long nr_segs, loff_t pos)
 {
-	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(iocb->ki_filp);
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
@@ -3063,7 +3063,7 @@ static struct vm_operations_struct cifs_file_vm_ops = {
 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int rc, xid;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	xid = get_xid();
 
@@ -3356,7 +3356,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 	int rc;
 
 	/* Is the page cached? */
-	rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
+	rc = cifs_readpage_from_fscache(file_inode(file), page);
 	if (rc == 0)
 		goto read_complete;
 
@@ -3371,8 +3371,8 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 	else
 		cFYI(1, "Bytes read %d", rc);
 
-	file->f_path.dentry->d_inode->i_atime =
-		current_fs_time(file->f_path.dentry->d_inode->i_sb);
+	file_inode(file)->i_atime =
+		current_fs_time(file_inode(file)->i_sb);
 
 	if (PAGE_CACHE_SIZE > rc)
 		memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
@@ -3381,7 +3381,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 	SetPageUptodate(page);
 
 	/* send this page to the cache */
-	cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
+	cifs_readpage_to_fscache(file_inode(file), page);
 
 	rc = 0;
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ed6208ff85a7..1fc864b92cf2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -289,7 +289,7 @@ cifs_get_file_info_unix(struct file *filp)
 	unsigned int xid;
 	FILE_UNIX_BASIC_INFO find_data;
 	struct cifs_fattr fattr;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsFileInfo *cfile = filp->private_data;
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
@@ -558,7 +558,7 @@ cifs_get_file_info(struct file *filp)
 	unsigned int xid;
 	FILE_ALL_INFO find_data;
 	struct cifs_fattr fattr;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsFileInfo *cfile = filp->private_data;
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
@@ -1678,7 +1678,7 @@ cifs_invalidate_mapping(struct inode *inode)
 int cifs_revalidate_file_attr(struct file *filp)
 {
 	int rc = 0;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
 
 	if (!cifs_inode_needs_reval(inode))
@@ -1735,7 +1735,7 @@ out:
 int cifs_revalidate_file(struct file *filp)
 {
 	int rc;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	rc = cifs_revalidate_file_attr(filp);
 	if (rc)
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index fd5009d56f9f..6c9f1214cf0b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -30,7 +30,7 @@
 
 long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 {
-	struct inode *inode = filep->f_dentry->d_inode;
+	struct inode *inode = file_inode(filep);
 	int rc = -ENOTTY; /* strange error - but the precedent */
 	unsigned int xid;
 	struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index cdd6ff48246b..7255b0c7aa7e 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -505,7 +505,7 @@ static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode)
    whether we can use the cached search results from the previous search */
 static int is_dir_changed(struct file *file)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
 
 	if (cifsInfo->time == 0)
@@ -778,7 +778,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 	switch ((int) file->f_pos) {
 	case 0:
 		if (filldir(direntry, ".", 1, file->f_pos,
-		     file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) {
+		     file_inode(file)->i_ino, DT_DIR) < 0) {
 			cERROR(1, "Filldir for current dir failed");
 			rc = -ENOMEM;
 			break;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 49fe52d25600..b7d3a05c062c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -397,7 +397,7 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
 		 * We can't use vfs_readdir because we have to keep the file
 		 * position in sync between the coda_file and the host_file.
 		 * and as such we need grab the inode mutex. */
-		struct inode *host_inode = host_file->f_path.dentry->d_inode;
+		struct inode *host_inode = file_inode(host_file);
 
 		mutex_lock(&host_inode->i_mutex);
 		host_file->f_pos = coda_file->f_pos;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 8edd404e6419..fa4c100bdc7d 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -66,7 +66,7 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
 static ssize_t
 coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos)
 {
-	struct inode *host_inode, *coda_inode = coda_file->f_path.dentry->d_inode;
+	struct inode *host_inode, *coda_inode = file_inode(coda_file);
 	struct coda_file_info *cfi;
 	struct file *host_file;
 	ssize_t ret;
@@ -78,7 +78,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
 	if (!host_file->f_op || !host_file->f_op->write)
 		return -EINVAL;
 
-	host_inode = host_file->f_path.dentry->d_inode;
+	host_inode = file_inode(host_file);
 	mutex_lock(&coda_inode->i_mutex);
 
 	ret = host_file->f_op->write(host_file, buf, count, ppos);
@@ -106,8 +106,8 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	if (!host_file->f_op || !host_file->f_op->mmap)
 		return -ENODEV;
 
-	coda_inode = coda_file->f_path.dentry->d_inode;
-	host_inode = host_file->f_path.dentry->d_inode;
+	coda_inode = file_inode(coda_file);
+	host_inode = file_inode(host_file);
 
 	cii = ITOC(coda_inode);
 	spin_lock(&cii->c_lock);
@@ -178,7 +178,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 	err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode),
 			  coda_flags, coda_file->f_cred->fsuid);
 
-	host_inode = cfi->cfi_container->f_path.dentry->d_inode;
+	host_inode = file_inode(cfi->cfi_container);
 	cii = ITOC(coda_inode);
 
 	/* did we mmap this file? */
@@ -202,7 +202,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
 int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
 {
 	struct file *host_file;
-	struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
+	struct inode *coda_inode = file_inode(coda_file);
 	struct coda_file_info *cfi;
 	int err;
 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index be2aa4909487..6df708c7b3e8 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -129,7 +129,7 @@ static int get_device_index(struct coda_mount_data *data)
 	f = fdget(data->fd);
 	if (!f.file)
 		goto Ebadf;
-	inode = f.file->f_path.dentry->d_inode;
+	inode = file_inode(f.file);
 	if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
 		fdput(f);
 		goto Ebadf;
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index ee0981f1375b..3f5de96bbb58 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -52,7 +52,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
 	struct path path;
 	int error;
 	struct PioctlData data;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct inode *target_inode = NULL;
 	struct coda_inode_info *cnp;
 
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e2f57a007029..3ced75f765ca 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1582,7 +1582,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 	case FIBMAP:
 	case FIGETBSZ:
 	case FIONREAD:
-		if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode))
+		if (S_ISREG(file_inode(f.file)->i_mode))
 			break;
 		/*FALL THROUGH*/
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 712b10f64c70..90d222f11e36 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1625,7 +1625,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+			mutex_unlock(&file_inode(file)->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
diff --git a/fs/coredump.c b/fs/coredump.c
index 177493272a61..69baf903d3bd 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -411,7 +411,7 @@ static void wait_for_dump_helpers(struct file *file)
 {
 	struct pipe_inode_info *pipe;
 
-	pipe = file->f_path.dentry->d_inode->i_pipe;
+	pipe = file_inode(file)->i_pipe;
 
 	pipe_lock(pipe);
 	pipe->readers++;
@@ -600,7 +600,7 @@ void do_coredump(siginfo_t *siginfo)
 		if (IS_ERR(cprm.file))
 			goto fail_unlock;
 
-		inode = cprm.file->f_path.dentry->d_inode;
+		inode = file_inode(cprm.file);
 		if (inode->i_nlink > 1)
 			goto close_fail;
 		if (d_unhashed(cprm.file->f_path.dentry))
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index c6c3f91ecf06..3ceb9ec976e1 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -351,7 +351,7 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  */
 static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	char *buf;
 	unsigned int offset;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index d45ba4568128..53acc9d0c138 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -118,7 +118,7 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 
 	lower_file = ecryptfs_file_to_lower(file);
 	lower_file->f_pos = file->f_pos;
-	inode = file->f_path.dentry->d_inode;
+	inode = file_inode(file);
 	memset(&buf, 0, sizeof(buf));
 	buf.dirent = dirent;
 	buf.dentry = file->f_path.dentry;
@@ -133,7 +133,7 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 		goto out;
 	if (rc >= 0)
 		fsstack_copy_attr_atime(inode,
-					lower_file->f_path.dentry->d_inode);
+					file_inode(lower_file));
 out:
 	return rc;
 }
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index 7ee6f7e3a608..055a9e9ca747 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -20,7 +20,7 @@ const struct inode_operations efs_dir_inode_operations = {
 };
 
 static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct buffer_head *bh;
 
 	struct efs_dir		*dirblock;
diff --git a/fs/exec.c b/fs/exec.c
index 20df02c1cc70..7b6f4d59b26c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -123,7 +123,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
 		goto out;
 
 	error = -EINVAL;
-	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
+	if (!S_ISREG(file_inode(file)->i_mode))
 		goto exit;
 
 	error = -EACCES;
@@ -764,7 +764,7 @@ struct file *open_exec(const char *name)
 		goto out;
 
 	err = -EACCES;
-	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode))
+	if (!S_ISREG(file_inode(file)->i_mode))
 		goto exit;
 
 	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
@@ -1098,7 +1098,7 @@ EXPORT_SYMBOL(flush_old_exec);
 
 void would_dump(struct linux_binprm *bprm, struct file *file)
 {
-	if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
+	if (inode_permission(file_inode(file), MAY_READ) < 0)
 		bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 }
 EXPORT_SYMBOL(would_dump);
@@ -1270,7 +1270,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
 int prepare_binprm(struct linux_binprm *bprm)
 {
 	umode_t mode;
-	struct inode * inode = bprm->file->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(bprm->file);
 	int retval;
 
 	mode = inode->i_mode;
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index c61e62ac231c..46375896cfc0 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -242,7 +242,7 @@ static int
 exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	loff_t pos = filp->f_pos;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	unsigned int offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
 	unsigned long npages = dir_pages(inode);
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0f4f5c929257..4237722bfd27 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -290,7 +290,7 @@ static int
 ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
 {
 	loff_t pos = filp->f_pos;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	unsigned int offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 2de655f5d625..5d46c09863f0 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -19,7 +19,7 @@
 
 long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ext2_inode_info *ei = EXT2_I(inode);
 	unsigned int flags;
 	unsigned short rsv_window_size;
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index dd91264ba94f..87eccbbca255 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -99,7 +99,7 @@ static int ext3_readdir(struct file * filp,
 	int i, stored;
 	struct ext3_dir_entry_2 *de;
 	int err;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	int ret = 0;
 	int dir_has_error = 0;
@@ -114,7 +114,7 @@ static int ext3_readdir(struct file * filp,
 		 * We don't set the inode dirty flag since it's not
 		 * critical that it get flushed back to the disk.
 		 */
-		EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+		EXT3_I(file_inode(filp))->i_flags &= ~EXT3_INDEX_FL;
 	}
 	stored = 0;
 	offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -457,7 +457,7 @@ static int call_filldir(struct file * filp, void * dirent,
 {
 	struct dir_private_info *info = filp->private_data;
 	loff_t	curr_pos;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block * sb;
 	int error;
 
@@ -487,7 +487,7 @@ static int ext3_dx_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	struct dir_private_info *info = filp->private_data;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct fname *fname;
 	int	ret;
 
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 677a5c27dc69..4d96e9a64532 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -14,7 +14,7 @@
 
 long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ext3_inode_info *ei = EXT3_I(inode);
 	unsigned int flags;
 	unsigned short rsv_window_size;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 890b8947c546..61fa09eb2501 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -624,7 +624,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 
 	dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
 		       start_minor_hash));
-	dir = dir_file->f_path.dentry->d_inode;
+	dir = file_inode(dir_file);
 	if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
 		hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
 		if (hinfo.hash_version <= DX_HASH_TEA)
@@ -638,7 +638,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 	}
 	hinfo.hash = start_hash;
 	hinfo.minor_hash = 0;
-	frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
+	frame = dx_probe(NULL, file_inode(dir_file), &hinfo, frames, &err);
 	if (!frame)
 		return err;
 
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 80a28b297279..dc149d123de5 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -110,7 +110,7 @@ static int ext4_readdir(struct file *filp,
 	int i, stored;
 	struct ext4_dir_entry_2 *de;
 	int err;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	int ret = 0;
 	int dir_has_error = 0;
@@ -133,7 +133,7 @@ static int ext4_readdir(struct file *filp,
 		 * We don't set the inode dirty flag since it's not
 		 * critical that it get flushed back to the disk.
 		 */
-		ext4_clear_inode_flag(filp->f_path.dentry->d_inode,
+		ext4_clear_inode_flag(file_inode(filp),
 				      EXT4_INODE_INDEX);
 	}
 	stored = 0;
@@ -494,7 +494,7 @@ static int call_filldir(struct file *filp, void *dirent,
 {
 	struct dir_private_info *info = filp->private_data;
 	loff_t	curr_pos;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb;
 	int error;
 
@@ -526,7 +526,7 @@ static int ext4_dx_readdir(struct file *filp,
 			 void *dirent, filldir_t filldir)
 {
 	struct dir_private_info *info = filp->private_data;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct fname *fname;
 	int	ret;
 
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5ae1674ec12f..7817ca7c2bbf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4386,7 +4386,7 @@ static void ext4_falloc_update_inode(struct inode *inode,
  */
 long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	handle_t *handle;
 	loff_t new_size;
 	unsigned int max_blocks;
@@ -4643,7 +4643,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
  */
 int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
 	ext4_lblk_t first_block, stop_block;
 	struct address_space *mapping = inode->i_mapping;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 405565a62277..c00ea7945eb5 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -167,7 +167,7 @@ static ssize_t
 ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
-	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
 
 	/*
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 387c47c6cda9..8106dca95456 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1298,7 +1298,7 @@ int ext4_read_inline_dir(struct file *filp,
 	int i, stored;
 	struct ext4_dir_entry_2 *de;
 	struct super_block *sb;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int ret, inline_size = 0;
 	struct ext4_iloc iloc;
 	void *dir_buf = NULL;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..521bd4ab8abe 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2959,7 +2959,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 			    ssize_t size, void *private, int ret,
 			    bool is_async)
 {
-	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(iocb->ki_filp);
         ext4_io_end_t *io_end = iocb->private;
 
 	/* if not async direct IO or dio with 0 bytes write, just return */
@@ -3553,7 +3553,7 @@ int ext4_can_truncate(struct inode *inode)
 
 int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	if (!S_ISREG(inode->i_mode))
 		return -EOPNOTSUPP;
 
@@ -4926,7 +4926,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	unsigned long len;
 	int ret;
 	struct file *file = vma->vm_file;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct address_space *mapping = inode->i_mapping;
 	handle_t *handle;
 	get_block_t *get_block;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5747f52f7c72..c2f8e060f636 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -22,7 +22,7 @@
 
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int flags;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index d9cc5ee42f53..796f7ac03706 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -900,7 +900,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 		  pgoff_t orig_page_offset, int data_offset_in_page,
 		  int block_len_in_page, int uninit, int *err)
 {
-	struct inode *orig_inode = o_filp->f_dentry->d_inode;
+	struct inode *orig_inode = file_inode(o_filp);
 	struct page *pagep[2] = {NULL, NULL};
 	handle_t *handle;
 	ext4_lblk_t orig_blk_offset;
@@ -1279,8 +1279,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 		 __u64 orig_start, __u64 donor_start, __u64 len,
 		 __u64 *moved_len)
 {
-	struct inode *orig_inode = o_filp->f_dentry->d_inode;
-	struct inode *donor_inode = d_filp->f_dentry->d_inode;
+	struct inode *orig_inode = file_inode(o_filp);
+	struct inode *donor_inode = file_inode(d_filp);
 	struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
 	struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
 	ext4_lblk_t block_start = orig_start;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f9ed946a448e..bb97ad6905b2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -937,7 +937,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 
 	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
 		       start_hash, start_minor_hash));
-	dir = dir_file->f_path.dentry->d_inode;
+	dir = file_inode(dir_file);
 	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
 		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
 		if (hinfo.hash_version <= DX_HASH_TEA)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3d4fb81bacd5..4df78dd3f523 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -553,7 +553,7 @@ void ext4_error_file(struct file *file, const char *function,
 	va_list args;
 	struct va_format vaf;
 	struct ext4_super_block *es;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	char pathname[80], *path;
 
 	es = EXT4_SB(inode->i_sb)->s_es;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 951ed52748f6..fda0bcc0907f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -603,7 +603,7 @@ bool f2fs_empty_dir(struct inode *dir)
 static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
 {
 	unsigned long pos = file->f_pos;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	unsigned long npages = dir_blocks(inode);
 	unsigned char *types = NULL;
 	unsigned int bit_pos = 0, start_bit_pos = 0;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 58bf744dbf39..165012ef363a 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -698,7 +698,7 @@ out:
 
 static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	return __fat_readdir(inode, filp, dirent, filldir, 0, 0);
 }
 
@@ -779,7 +779,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
 static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
 			  unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
 	int short_only, both;
 
@@ -819,7 +819,7 @@ FAT_IOCTL_FILLDIR_FUNC(fat_compat_ioctl_filldir, compat_dirent)
 static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
 				 unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct compat_dirent __user *d1 = compat_ptr(arg);
 	int short_only, both;
 
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a62e0ecbe2db..3978f8ca1823 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -32,7 +32,7 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
 
 static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
 	int is_dir = S_ISDIR(inode->i_mode);
 	u32 attr, oldattr;
@@ -116,7 +116,7 @@ out:
 
 long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	u32 __user *user_attr = (u32 __user *)arg;
 
 	switch (cmd) {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 71a600a19f06..6599222536eb 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -30,7 +30,7 @@
 
 static int setfl(int fd, struct file * filp, unsigned long arg)
 {
-	struct inode * inode = filp->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(filp);
 	int error = 0;
 
 	/*
diff --git a/fs/file_table.c b/fs/file_table.c
index de9e9653d611..0f607ce89acc 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -447,7 +447,7 @@ void mark_files_ro(struct super_block *sb)
 
 	lg_global_lock(&files_lglock);
 	do_file_list_for_each_entry(sb, f) {
-		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+		if (!S_ISREG(file_inode(f)->i_mode))
 		       continue;
 		if (!file_count(f))
 			continue;
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index bd447e88f208..664b07a53870 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -237,7 +237,7 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags)
 static int
 vxfs_readdir(struct file *fp, void *retp, filldir_t filler)
 {
-	struct inode		*ip = fp->f_path.dentry->d_inode;
+	struct inode		*ip = file_inode(fp);
 	struct super_block	*sbp = ip->i_sb;
 	u_long			bsize = sbp->s_blocksize;
 	u_long			page, npages, block, pblocks, nblocks, offset;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 75a20c092dd4..b7978b9f75ef 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
 {
 	struct fuse_conn *fc;
 	mutex_lock(&fuse_mutex);
-	fc = file->f_path.dentry->d_inode->i_private;
+	fc = file_inode(file)->i_private;
 	if (fc)
 		fc = fuse_conn_get(fc);
 	mutex_unlock(&fuse_mutex);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7c09f9eb40c..80ba3950c40d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1160,7 +1160,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
 	int err;
 	size_t nbytes;
 	struct page *page;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_req *req;
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 991ab2d484dd..44543df9f400 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -157,7 +157,7 @@ static const u32 gfs2_to_fsflags[32] = {
 
 static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
@@ -217,7 +217,7 @@ void gfs2_set_inode_flags(struct inode *inode)
  */
 static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *bh;
@@ -293,7 +293,7 @@ out_drop_write:
 
 static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	u32 fsflags, gfsflags;
 
 	if (get_user(fsflags, ptr))
@@ -336,7 +336,7 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
 {
-	struct inode *inode = filep->f_dentry->d_inode;
+	struct inode *inode = file_inode(filep);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
@@ -386,7 +386,7 @@ static int gfs2_allocate_page_backing(struct page *page)
 static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	unsigned long last_index;
@@ -673,8 +673,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *file = iocb->ki_filp;
 	size_t writesize = iov_length(iov, nr_segs);
-	struct dentry *dentry = file->f_dentry;
-	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+	struct gfs2_inode *ip = GFS2_I(file_inode(file));
 	int ret;
 
 	ret = gfs2_rs_alloc(ip);
@@ -772,7 +771,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
 static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 			   loff_t len)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
@@ -938,7 +937,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct gfs2_file *fp = file->private_data;
 	struct gfs2_holder *fl_gh = &fp->f_fl_gh;
-	struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode);
+	struct gfs2_inode *ip = GFS2_I(file_inode(file));
 	struct gfs2_glock *gl;
 	unsigned int state;
 	int flags;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b7eff078fe90..04af1cf7ae34 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1257,7 +1257,7 @@ fail:
 
 int gfs2_fitrim(struct file *filp, void __user *argp)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
 	struct buffer_head *bh;
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 422dde2ec0a1..5f7f1abd5f6d 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -51,7 +51,7 @@ done:
  */
 static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	int len, err;
 	char strbuf[HFS_MAX_NAMELEN];
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d47f11658c17..3031dfdd2358 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -128,7 +128,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+	struct inode *inode = file_inode(file)->i_mapping->host;
 	ssize_t ret;
 
 	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 6b9f921ef2fa..074e04589248 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -122,7 +122,7 @@ fail:
 
 static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	int len, err;
 	char strbuf[HFSPLUS_MAX_STRLEN + 1];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 799b336b59f9..dcd05be5344b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -124,7 +124,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+	struct inode *inode = file_inode(file)->i_mapping->host;
 	ssize_t ret;
 
 	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 09addc8615fa..e3c4c4209428 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -59,7 +59,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
 
 static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	unsigned int flags = 0;
 
@@ -75,7 +75,7 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
 
 static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
 	unsigned int flags;
 	int err = 0;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 457addc5c91f..ba6de25771ac 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -30,7 +30,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
 	return list_entry(inode, struct hostfs_inode_info, vfs_inode);
 }
 
-#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
+#define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
 
 static int hostfs_d_delete(const struct dentry *dentry)
 {
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 78e12b2e0ea2..546f6d39713a 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -25,7 +25,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
 	loff_t new_off = off + (whence == 1 ? filp->f_pos : 0);
 	loff_t pos;
 	struct quad_buffer_head qbh;
-	struct inode *i = filp->f_path.dentry->d_inode;
+	struct inode *i = file_inode(filp);
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
 	struct super_block *s = i->i_sb;
 
@@ -57,7 +57,7 @@ fail:
 
 static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
 	struct quad_buffer_head qbh;
 	struct hpfs_dirent *de;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index fbfe2df5624b..9f9dbeceeee7 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -152,7 +152,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
 	retval = do_sync_write(file, buf, count, ppos);
 	if (retval > 0) {
 		hpfs_lock(file->f_path.dentry->d_sb);
-		hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1;
+		hpfs_i(file_inode(file))->i_dirty = 1;
 		hpfs_unlock(file->f_path.dentry->d_sb);
 	}
 	return retval;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 43b315f2002b..74f55703be49 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -180,7 +180,7 @@ static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
 	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
 	ssize_t n;
 
-	read = file->f_path.dentry->d_inode->i_fop->read;
+	read = file_inode(file)->i_fop->read;
 
 	if (!is_user)
 		set_fs(KERNEL_DS);
@@ -288,7 +288,7 @@ static ssize_t hppfs_write(struct file *file, const char __user *buf,
 	struct file *proc_file = data->proc_file;
 	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
 
-	write = proc_file->f_path.dentry->d_inode->i_fop->write;
+	write = file_inode(proc_file)->i_fop->write;
 	return (*write)(proc_file, buf, len, ppos);
 }
 
@@ -513,7 +513,7 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
 	loff_t (*llseek)(struct file *, loff_t, int);
 	loff_t ret;
 
-	llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek;
+	llseek = file_inode(proc_file)->i_fop->llseek;
 	if (llseek != NULL) {
 		ret = (*llseek)(proc_file, off, where);
 		if (ret < 0)
@@ -561,7 +561,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
 				      });
 	int err;
 
-	readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir;
+	readdir = file_inode(proc_file)->i_fop->readdir;
 
 	proc_file->f_pos = file->f_pos;
 	err = (*readdir)(proc_file, &dirent, hppfs_filldir);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 78bde32ea951..edb42ea60c76 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -97,7 +97,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
 
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	loff_t len, vma_len;
 	int ret;
 	struct hstate *h = hstate_file(file);
diff --git a/fs/inode.c b/fs/inode.c
index 14084b72b259..67880e604399 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1655,7 +1655,7 @@ EXPORT_SYMBOL(file_remove_suid);
 
 int file_update_time(struct file *file)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct timespec now;
 	int sync_it = 0;
 	int ret;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 3bdad6d1f268..fd507fb460f8 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -175,7 +175,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
 	struct fiemap fiemap;
 	struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
 	struct fiemap_extent_info fieinfo = { 0, };
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	u64 len;
 	int error;
@@ -424,7 +424,7 @@ EXPORT_SYMBOL(generic_block_fiemap);
  */
 int ioctl_preallocate(struct file *filp, void __user *argp)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct space_resv sr;
 
 	if (copy_from_user(&sr, argp, sizeof(sr)))
@@ -449,7 +449,7 @@ int ioctl_preallocate(struct file *filp, void __user *argp)
 static int file_ioctl(struct file *filp, unsigned int cmd,
 		unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int __user *p = (int __user *)arg;
 
 	switch (cmd) {
@@ -512,7 +512,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
 
 static int ioctl_fsfreeze(struct file *filp)
 {
-	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct super_block *sb = file_inode(filp)->i_sb;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -527,7 +527,7 @@ static int ioctl_fsfreeze(struct file *filp)
 
 static int ioctl_fsthaw(struct file *filp)
 {
-	struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
+	struct super_block *sb = file_inode(filp)->i_sb;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -548,7 +548,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 {
 	int error = 0;
 	int __user *argp = (int __user *)arg;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	switch (cmd) {
 	case FIOCLEX:
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 0b3fa7974fa8..592e5115a561 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -296,7 +296,7 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
  */
 static int zisofs_readpage(struct file *file, struct page *page)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct address_space *mapping = inode->i_mapping;
 	int err;
 	int i, pcount, full_page;
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index f20437c068a0..a7d5c3c3d4e6 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -253,7 +253,7 @@ static int isofs_readdir(struct file *filp,
 	int result;
 	char *tmpname;
 	struct iso_directory_record *tmpde;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	tmpname = (char *)__get_free_page(GFP_KERNEL);
 	if (tmpname == NULL)
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index ad7774d32095..acd46a4160cb 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -117,12 +117,12 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
 static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	struct jffs2_inode_info *f;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct jffs2_full_dirent *fd;
 	unsigned long offset, curofs;
 
 	jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n",
-		  filp->f_path.dentry->d_inode->i_ino);
+		  file_inode(filp)->i_ino);
 
 	f = JFFS2_INODE_INFO(inode);
 
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index bc555ff417e9..93a1232894f6 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -58,7 +58,7 @@ static long jfs_map_ext2(unsigned long flags, int from)
 
 long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct jfs_inode_info *jfs_inode = JFS_IP(inode);
 	unsigned int flags;
 
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 9197a1b0d02d..0ddbeceafc62 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -3004,7 +3004,7 @@ static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
  */
 int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *ip = filp->f_path.dentry->d_inode;
+	struct inode *ip = file_inode(filp);
 	struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
 	int rc = 0;
 	loff_t dtpos;	/* legacy OS/2 style position */
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index ca0a08001449..a2717408c478 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -178,7 +178,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
 			continue;
 		if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
 			continue;
-		if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
+		if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0)
 			continue;
 		/* Alright, we found a lock. Set the return status
 		 * and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 54f9e6ce0430..d7e1ec1c6827 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -127,7 +127,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_lock	*lock = &argp->lock;
 
 	nlmclnt_next_cookie(&argp->cookie);
-	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh));
+	memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
 	lock->caller  = utsname()->nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 8d80c990dffd..e703318c41df 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -406,8 +406,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	__be32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
-				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
-				file->f_file->f_path.dentry->d_inode->i_ino,
+				file_inode(file->f_file)->i_sb->s_id,
+				file_inode(file->f_file)->i_ino,
 				lock->fl.fl_type, lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end,
@@ -513,8 +513,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	__be32			ret;
 
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
-				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
-				file->f_file->f_path.dentry->d_inode->i_ino,
+				file_inode(file->f_file)->i_sb->s_id,
+				file_inode(file->f_file)->i_ino,
 				lock->fl.fl_type,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -606,8 +606,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
 	int	error;
 
 	dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
-				file->f_file->f_path.dentry->d_inode->i_ino,
+				file_inode(file->f_file)->i_sb->s_id,
+				file_inode(file->f_file)->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -635,8 +635,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
 	int status = 0;
 
 	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
-				file->f_file->f_path.dentry->d_inode->i_ino,
+				file_inode(file->f_file)->i_sb->s_id,
+				file_inode(file->f_file)->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0deb5f6c9dd4..b3a24b07d981 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 
 static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
 {
-	struct inode *inode = file->f_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file->f_file);
 
 	dprintk("lockd: %s %s/%ld\n",
 		msg, inode->i_sb->s_id, inode->i_ino);
diff --git a/fs/locks.c b/fs/locks.c
index a94e331a52a2..cb424a4fed71 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -334,7 +334,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 		start = filp->f_pos;
 		break;
 	case SEEK_END:
-		start = i_size_read(filp->f_path.dentry->d_inode);
+		start = i_size_read(file_inode(filp));
 		break;
 	default:
 		return -EINVAL;
@@ -384,7 +384,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 		start = filp->f_pos;
 		break;
 	case SEEK_END:
-		start = i_size_read(filp->f_path.dentry->d_inode);
+		start = i_size_read(file_inode(filp));
 		break;
 	default:
 		return -EINVAL;
@@ -627,7 +627,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 	struct file_lock *cfl;
 
 	lock_flocks();
-	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
+	for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) {
 		if (!IS_POSIX(cfl))
 			continue;
 		if (posix_locks_conflict(fl, cfl))
@@ -708,7 +708,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 {
 	struct file_lock *new_fl = NULL;
 	struct file_lock **before;
-	struct inode * inode = filp->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(filp);
 	int error = 0;
 	int found = 0;
 
@@ -1002,7 +1002,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 int posix_lock_file(struct file *filp, struct file_lock *fl,
 			struct file_lock *conflock)
 {
-	return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
+	return __posix_lock_file(file_inode(filp), fl, conflock);
 }
 EXPORT_SYMBOL(posix_lock_file);
 
@@ -1326,8 +1326,8 @@ int fcntl_getlease(struct file *filp)
 	int type = F_UNLCK;
 
 	lock_flocks();
-	time_out_leases(filp->f_path.dentry->d_inode);
-	for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
+	time_out_leases(file_inode(filp));
+	for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl);
 			fl = fl->fl_next) {
 		if (fl->fl_file == filp) {
 			type = target_leasetype(fl);
@@ -1843,7 +1843,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 
-	inode = filp->f_path.dentry->d_inode;
+	inode = file_inode(filp);
 
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
@@ -1961,7 +1961,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 
-	inode = filp->f_path.dentry->d_inode;
+	inode = file_inode(filp);
 
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
@@ -2030,7 +2030,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	 * posix_lock_file().  Another process could be setting a lock on this
 	 * file at the same time, but we wouldn't remove that lock anyway.
 	 */
-	if (!filp->f_path.dentry->d_inode->i_flock)
+	if (!file_inode(filp)->i_flock)
 		return;
 
 	lock.fl_type = F_UNLCK;
@@ -2056,7 +2056,7 @@ EXPORT_SYMBOL(locks_remove_posix);
  */
 void locks_remove_flock(struct file *filp)
 {
-	struct inode * inode = filp->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(filp);
 	struct file_lock *fl;
 	struct file_lock **before;
 
@@ -2152,7 +2152,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 		fl_pid = fl->fl_pid;
 
 	if (fl->fl_file != NULL)
-		inode = fl->fl_file->f_path.dentry->d_inode;
+		inode = file_inode(fl->fl_file);
 
 	seq_printf(f, "%lld:%s ", id, pfx);
 	if (IS_POSIX(fl)) {
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 26e4a941532f..b82751082112 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -284,7 +284,7 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
 #define IMPLICIT_NODES 2
 static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
 {
-	struct inode *dir = file->f_dentry->d_inode;
+	struct inode *dir = file_inode(file);
 	loff_t pos = file->f_pos - IMPLICIT_NODES;
 	struct page *page;
 	struct logfs_disk_dentry *dd;
@@ -320,7 +320,7 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
 
 static int logfs_readdir(struct file *file, void *buf, filldir_t filldir)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	ino_t pino = parent_ino(file->f_dentry);
 	int err;
 
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 3886cded283c..c2219a6dd3c8 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -183,7 +183,7 @@ static int logfs_releasepage(struct page *page, gfp_t only_xfs_uses_this)
 
 long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct logfs_inode *li = logfs_inode(inode);
 	unsigned int oldflags, flags;
 	int err;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 685b2d981b87..a9ed6f36e6ea 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -85,7 +85,7 @@ static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
 static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	unsigned long pos = filp->f_pos;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	unsigned offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/namei.c b/fs/namei.c
index 43a97ee1d4c8..df00b754631d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2778,7 +2778,7 @@ retry_lookup:
 			goto out;
 
 		if ((*opened & FILE_CREATED) ||
-		    !S_ISREG(file->f_path.dentry->d_inode->i_mode))
+		    !S_ISREG(file_inode(file)->i_mode))
 			will_truncate = false;
 
 		audit_inode(name, file->f_path.dentry, 0);
diff --git a/fs/namespace.c b/fs/namespace.c
index 269919fa116d..50ca17d3cb45 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -384,7 +384,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
  */
 int __mnt_want_write_file(struct file *file)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
 		return __mnt_want_write(file->f_path.mnt);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1acdad7fcec7..c41e02932542 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -525,7 +525,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	if (!ncp_filp)
 		goto out;
 	error = -ENOTSOCK;
-	sock_inode = ncp_filp->f_path.dentry->d_inode;
+	sock_inode = file_inode(ncp_filp);
 	if (!S_ISSOCK(sock_inode->i_mode))
 		goto out_fput;
 	sock = SOCKET_I(sock_inode);
@@ -564,7 +564,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 		if (!server->info_filp)
 			goto out_bdi;
 		error = -ENOTSOCK;
-		sock_inode = server->info_filp->f_path.dentry->d_inode;
+		sock_inode = file_inode(server->info_filp);
 		if (!S_ISSOCK(sock_inode->i_mode))
 			goto out_fput2;
 		info_sock = SOCKET_I(sock_inode);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 6958adfaff08..5c1e9262219c 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -808,7 +808,7 @@ outrel:
 
 long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ncp_server *server = NCP_SERVER(inode);
 	uid_t uid = current_uid();
 	int need_drop_write = 0;
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 63d14a99483d..ee24df5af1f9 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -105,7 +105,7 @@ static const struct vm_operations_struct ncp_file_mmap =
 /* This is used for a general mmap of a ncp file */
 int ncp_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	
 	DPRINTK("ncp_mmap: called\n");
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1b2d7eb93796..a8bd28cde7e2 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -281,7 +281,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 
 	for (i = 0; i < array->size; i++) {
 		if (array->array[i].cookie == *desc->dir_cookie) {
-			struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode);
+			struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
 			struct nfs_open_dir_context *ctx = desc->file->private_data;
 
 			new_pos = desc->current_index + i;
@@ -629,7 +629,7 @@ out:
 static
 int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 {
-	struct inode	*inode = desc->file->f_path.dentry->d_inode;
+	struct inode	*inode = file_inode(desc->file);
 	int ret;
 
 	ret = nfs_readdir_xdr_to_array(desc, page, inode);
@@ -660,7 +660,7 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-	return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
+	return read_cache_page(file_inode(desc->file)->i_mapping,
 			desc->page_index, (filler_t *)nfs_readdir_filler, desc);
 }
 
@@ -764,7 +764,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 {
 	struct page	*page = NULL;
 	int		status;
-	struct inode *inode = desc->file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(desc->file);
 	struct nfs_open_dir_context *ctx = desc->file->private_data;
 
 	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3c2b893665ba..29f4a48a0ee6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -292,7 +292,7 @@ static int
 nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	int ret;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	do {
 		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index bc3968fa81e5..2ad8deaf7dbf 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -764,7 +764,7 @@ out:
 static ssize_t
 idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
-	struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
+	struct rpc_inode *rpci = RPC_I(file_inode(filp));
 	struct idmap *idmap = (struct idmap *)rpci->private;
 	struct key_construction *cons;
 	struct idmap_msg im;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebeb94ce1b0b..548ae3113005 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -714,7 +714,7 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context);
  */
 void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	filp->private_data = get_nfs_open_context(ctx);
@@ -747,7 +747,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 
 static void nfs_file_clear_open_context(struct file *filp)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct nfs_open_context *ctx = nfs_file_open_context(filp);
 
 	if (ctx) {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 70efb63b1e42..43ea96ced28c 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -872,7 +872,7 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
 static int
 nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
 }
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 08ddcccb8887..13e6bb3e3fe5 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -94,7 +94,7 @@ static int
 nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	int ret;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	do {
 		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f084dac948e1..fc8de9016acf 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -662,7 +662,7 @@ nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
 static int
 nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
 }
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index e761ee95617f..497584c70366 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -101,7 +101,7 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
 	loff_t pos = *ppos;
 
 	if (!pos)
-		nfsd_inject_get(file->f_dentry->d_inode->i_private, &val);
+		nfsd_inject_get(file_inode(file)->i_private, &val);
 	size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
 
 	if (pos < 0)
@@ -133,10 +133,10 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf,
 
 	size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
 	if (size > 0)
-		nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size);
+		nfsd_inject_set_client(file_inode(file)->i_private, &sa, size);
 	else {
 		val = simple_strtoll(write_buf, NULL, 0);
-		nfsd_inject_set(file->f_dentry->d_inode->i_private, val);
+		nfsd_inject_set(file_inode(file)->i_private, val);
 	}
 	return len; /* on success, claim we got the whole input */
 }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 74934284d9a7..2db7021b01ae 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -85,7 +85,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 
 static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
 {
-	ino_t ino =  file->f_path.dentry->d_inode->i_ino;
+	ino_t ino =  file_inode(file)->i_ino;
 	char *data;
 	ssize_t rv;
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d586117fa94a..a94245b4045f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -979,7 +979,7 @@ static void kill_suid(struct dentry *dentry)
  */
 static int wait_for_concurrent_writes(struct file *file)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	static ino_t last_ino;
 	static dev_t last_dev;
 	int err = 0;
@@ -1070,7 +1070,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (err)
 		return err;
 
-	inode = file->f_path.dentry->d_inode;
+	inode = file_inode(file);
 
 	/* Get readahead parameters */
 	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
@@ -1957,7 +1957,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
 	offset = *offsetp;
 
 	while (1) {
-		struct inode *dir_inode = file->f_path.dentry->d_inode;
+		struct inode *dir_inode = file_inode(file);
 		unsigned int reclen;
 
 		cdp->err = nfserr_eof; /* will be cleared on successful read */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index df1a7fb238d1..f30b017740a7 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -259,7 +259,7 @@ static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
 static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	loff_t pos = filp->f_pos;
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	unsigned int offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..89dc0886387d 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -67,7 +67,7 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
-	struct inode *inode = vma->vm_file->f_dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	struct nilfs_transaction_info ti;
 	int ret = 0;
 
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fdb180769485..ef61c749641d 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -793,7 +793,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
 
 long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 08b886f119ce..2bfe6dc413a0 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -174,7 +174,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	struct dnotify_struct **prev;
 	struct inode *inode;
 
-	inode = filp->f_path.dentry->d_inode;
+	inode = file_inode(filp);
 	if (!S_ISDIR(inode->i_mode))
 		return;
 
@@ -296,7 +296,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	}
 
 	/* dnotify only works on directories */
-	inode = filp->f_path.dentry->d_inode;
+	inode = file_inode(filp);
 	if (!S_ISDIR(inode->i_mode)) {
 		error = -ENOTDIR;
 		goto out_err;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9ff4a5ee6e20..5d8444268a16 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -466,7 +466,7 @@ static int fanotify_find_path(int dfd, const char __user *filename,
 
 		ret = -ENOTDIR;
 		if ((flags & FAN_MARK_ONLYDIR) &&
-		    !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) {
+		    !(S_ISDIR(file_inode(f.file)->i_mode))) {
 			fdput(f);
 			goto out;
 		}
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 99e36107ff60..aa411c3f20e9 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1101,7 +1101,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
 	loff_t fpos, i_size;
-	struct inode *bmp_vi, *vdir = filp->f_path.dentry->d_inode;
+	struct inode *bmp_vi, *vdir = file_inode(filp);
 	struct super_block *sb = vdir->i_sb;
 	ntfs_inode *ndir = NTFS_I(vdir);
 	ntfs_volume *vol = NTFS_SB(sb);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 657743254eb9..db1ad26e02a7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -569,7 +569,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 			     int ret,
 			     bool is_async)
 {
-	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(iocb->ki_filp);
 	int level;
 	wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
 
@@ -626,7 +626,7 @@ static ssize_t ocfs2_direct_IO(int rw,
 			       unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+	struct inode *inode = file_inode(file)->i_mapping->host;
 
 	/*
 	 * Fallback to buffered I/O if we see an inode without
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8fe4e2892ab9..ac0d4a0e8a41 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2015,7 +2015,7 @@ int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	int error = 0;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int lock_level = 0;
 
 	trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 16b712d260d4..4c5fc8d77dc2 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -224,7 +224,7 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
 static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait)
 {
 	int event = 0;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct dlmfs_inode_private *ip = DLMFS_I(inode);
 
 	poll_wait(file, &ip->ip_lockres.l_event, wait);
@@ -245,7 +245,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
 	int bytes_left;
 	ssize_t readlen, got;
 	char *lvb_buf;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
 		inode->i_ino, count, *ppos);
@@ -293,7 +293,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 	int bytes_left;
 	ssize_t writelen;
 	char *lvb_buf;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
 		inode->i_ino, count, *ppos);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 37d313ede159..04098af9dbc8 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1949,7 +1949,7 @@ out:
 int ocfs2_change_file_space(struct file *file, unsigned int cmd,
 			    struct ocfs2_space_resv *sr)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int ret;
 
@@ -1977,7 +1977,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
 static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
 			    loff_t len)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_space_resv sr;
 	int change_size = 1;
@@ -2232,7 +2232,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	loff_t old_size, *ppos = &iocb->ki_pos;
 	u32 old_clusters;
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	int full_coherency = !(osb->s_mount_opt &
 			       OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2516,7 +2516,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
 				      unsigned int flags)
 {
 	int ret = 0, lock_level = 0;
-	struct inode *inode = in->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(in);
 
 	trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2546,7 +2546,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 {
 	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
 			(unsigned long long)OCFS2_I(inode)->ip_blkno,
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index f20edcbfe700..752f0b26221d 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -881,7 +881,7 @@ bail:
 
 long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	unsigned int flags;
 	int new_clusters;
 	int status;
@@ -994,7 +994,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
 	bool preserve;
 	struct reflink_arguments args;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ocfs2_info info;
 	void __user *argp = (void __user *)arg;
 
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 47a87dda54ce..07c585b85000 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -62,7 +62,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
 				struct page *page)
 {
 	int ret = VM_FAULT_NOPAGE;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct address_space *mapping = inode->i_mapping;
 	loff_t pos = page_offset(page);
 	unsigned int len = PAGE_CACHE_SIZE;
@@ -131,7 +131,7 @@ out:
 static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	struct buffer_head *di_bh = NULL;
 	sigset_t oldset;
 	int ret;
@@ -180,13 +180,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int ret = 0, lock_level = 0;
 
-	ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode,
+	ret = ocfs2_inode_lock_atime(file_inode(file),
 				    file->f_vfsmnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
 	}
-	ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level);
+	ocfs2_inode_unlock(file_inode(file), lock_level);
 out:
 	vma->vm_ops = &ocfs2_file_vm_ops;
 	return 0;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 6083432f667e..9f8dcadd9a50 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -1055,7 +1055,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
 {
 	int status;
 
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ocfs2_move_extents range;
 	struct ocfs2_move_extents_context *context = NULL;
 
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 30a055049e16..1baffaadda41 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2927,7 +2927,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 				     u32 new_cluster, u32 new_len)
 {
 	int ret = 0, partial;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ocfs2_caching_info *ci = INODE_CACHE(inode);
 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
 	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
@@ -3020,7 +3020,7 @@ int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
 				    u32 new_cluster, u32 new_len)
 {
 	int ret = 0;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
 	struct ocfs2_caching_info *ci = INODE_CACHE(inode);
 	int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index fb5b3ff79dc6..acbaebcad3a8 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -330,7 +330,7 @@ int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
 static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
 		u64 fsblock, int hindex)
 {
-	struct inode *dir = filp->f_dentry->d_inode;
+	struct inode *dir = file_inode(filp);
 	struct buffer_head *bh;
 	struct omfs_inode *oi;
 	u64 self;
@@ -405,7 +405,7 @@ out:
 
 static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *dir = filp->f_dentry->d_inode;
+	struct inode *dir = file_inode(filp);
 	struct buffer_head *bh;
 	loff_t offset, res;
 	unsigned int hchain, hindex;
diff --git a/fs/open.c b/fs/open.c
index 9b33c0cbfacf..e08643feb574 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -228,7 +228,7 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
 
 int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	long ret;
 
 	if (offset < 0 || len <= 0)
@@ -426,7 +426,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 	if (!f.file)
 		goto out;
 
-	inode = f.file->f_path.dentry->d_inode;
+	inode = file_inode(f.file);
 
 	error = -ENOTDIR;
 	if (!S_ISDIR(inode->i_mode))
@@ -689,7 +689,7 @@ static int do_dentry_open(struct file *f,
 		f->f_mode = FMODE_PATH;
 
 	path_get(&f->f_path);
-	inode = f->f_path.dentry->d_inode;
+	inode = file_inode(f);
 	if (f->f_mode & FMODE_WRITE) {
 		error = __get_file_write_access(inode, f->f_path.mnt);
 		if (error)
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 2ad080faca34..ae47fa7efb9d 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -262,7 +262,7 @@ found:
 
 static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct op_inode_info *oi = OP_I(inode);
 	struct device_node *dp = oi->u.node;
 	struct device_node *child;
diff --git a/fs/pipe.c b/fs/pipe.c
index bd3479db4b62..39baf6c3ebb0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -361,7 +361,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 	   unsigned long nr_segs, loff_t pos)
 {
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct pipe_inode_info *pipe;
 	int do_wakeup;
 	ssize_t ret;
@@ -486,7 +486,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 	    unsigned long nr_segs, loff_t ppos)
 {
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct pipe_inode_info *pipe;
 	ssize_t ret;
 	int do_wakeup;
@@ -677,7 +677,7 @@ bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
 
 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct pipe_inode_info *pipe;
 	int count, buf, nrbufs;
 
@@ -705,7 +705,7 @@ static unsigned int
 pipe_poll(struct file *filp, poll_table *wait)
 {
 	unsigned int mask;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct pipe_inode_info *pipe = inode->i_pipe;
 	int nrbufs;
 
@@ -758,7 +758,7 @@ pipe_release(struct inode *inode, int decr, int decw)
 static int
 pipe_read_fasync(int fd, struct file *filp, int on)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int retval;
 
 	mutex_lock(&inode->i_mutex);
@@ -772,7 +772,7 @@ pipe_read_fasync(int fd, struct file *filp, int on)
 static int
 pipe_write_fasync(int fd, struct file *filp, int on)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int retval;
 
 	mutex_lock(&inode->i_mutex);
@@ -786,7 +786,7 @@ pipe_write_fasync(int fd, struct file *filp, int on)
 static int
 pipe_rdwr_fasync(int fd, struct file *filp, int on)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct pipe_inode_info *pipe = inode->i_pipe;
 	int retval;
 
@@ -1226,7 +1226,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
  */
 struct pipe_inode_info *get_pipe_info(struct file *file)
 {
-	struct inode *i = file->f_path.dentry->d_inode;
+	struct inode *i = file_inode(file);
 
 	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b43ff77a51e..760268d6cba6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -383,7 +383,7 @@ static int lstats_open(struct inode *inode, struct file *file)
 static ssize_t lstats_write(struct file *file, const char __user *buf,
 			    size_t count, loff_t *offs)
 {
-	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+	struct task_struct *task = get_proc_task(file_inode(file));
 
 	if (!task)
 		return -ESRCH;
@@ -602,7 +602,7 @@ static const struct inode_operations proc_def_inode_operations = {
 static ssize_t proc_info_read(struct file * file, char __user * buf,
 			  size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(file);
 	unsigned long page;
 	ssize_t length;
 	struct task_struct *task = get_proc_task(inode);
@@ -668,7 +668,7 @@ static const struct file_operations proc_single_file_operations = {
 
 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
 {
-	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+	struct task_struct *task = get_proc_task(file_inode(file));
 	struct mm_struct *mm;
 
 	if (!task)
@@ -869,7 +869,7 @@ static const struct file_operations proc_environ_operations = {
 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
 			    loff_t *ppos)
 {
-	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+	struct task_struct *task = get_proc_task(file_inode(file));
 	char buffer[PROC_NUMBUF];
 	int oom_adj = OOM_ADJUST_MIN;
 	size_t len;
@@ -916,7 +916,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
 		goto out;
 	}
 
-	task = get_proc_task(file->f_path.dentry->d_inode);
+	task = get_proc_task(file_inode(file));
 	if (!task) {
 		err = -ESRCH;
 		goto out;
@@ -976,7 +976,7 @@ static const struct file_operations proc_oom_adj_operations = {
 static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
 					size_t count, loff_t *ppos)
 {
-	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+	struct task_struct *task = get_proc_task(file_inode(file));
 	char buffer[PROC_NUMBUF];
 	short oom_score_adj = OOM_SCORE_ADJ_MIN;
 	unsigned long flags;
@@ -1019,7 +1019,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
 		goto out;
 	}
 
-	task = get_proc_task(file->f_path.dentry->d_inode);
+	task = get_proc_task(file_inode(file));
 	if (!task) {
 		err = -ESRCH;
 		goto out;
@@ -1067,7 +1067,7 @@ static const struct file_operations proc_oom_score_adj_operations = {
 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(file);
 	struct task_struct *task = get_proc_task(inode);
 	ssize_t length;
 	char tmpbuf[TMPBUFLEN];
@@ -1084,7 +1084,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 				   size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(file);
 	char *page, *tmp;
 	ssize_t length;
 	uid_t loginuid;
@@ -1142,7 +1142,7 @@ static const struct file_operations proc_loginuid_operations = {
 static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(file);
 	struct task_struct *task = get_proc_task(inode);
 	ssize_t length;
 	char tmpbuf[TMPBUFLEN];
@@ -1165,7 +1165,7 @@ static const struct file_operations proc_sessionid_operations = {
 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
 				      size_t count, loff_t *ppos)
 {
-	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+	struct task_struct *task = get_proc_task(file_inode(file));
 	char buffer[PROC_NUMBUF];
 	size_t len;
 	int make_it_fail;
@@ -1197,7 +1197,7 @@ static ssize_t proc_fault_inject_write(struct file * file,
 	make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
 	if (*end)
 		return -EINVAL;
-	task = get_proc_task(file->f_dentry->d_inode);
+	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
 	task->make_it_fail = make_it_fail;
@@ -1237,7 +1237,7 @@ static ssize_t
 sched_write(struct file *file, const char __user *buf,
 	    size_t count, loff_t *offset)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct task_struct *p;
 
 	p = get_proc_task(inode);
@@ -1288,7 +1288,7 @@ static ssize_t
 sched_autogroup_write(struct file *file, const char __user *buf,
 	    size_t count, loff_t *offset)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct task_struct *p;
 	char buffer[PROC_NUMBUF];
 	int nice;
@@ -1343,7 +1343,7 @@ static const struct file_operations proc_pid_sched_autogroup_operations = {
 static ssize_t comm_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *offset)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct task_struct *p;
 	char buffer[TASK_COMM_LEN];
 
@@ -2146,7 +2146,7 @@ out_no_task:
 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(file);
 	char *p = NULL;
 	ssize_t length;
 	struct task_struct *task = get_proc_task(inode);
@@ -2167,7 +2167,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 				   size_t count, loff_t *ppos)
 {
-	struct inode * inode = file->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(file);
 	char *page;
 	ssize_t length;
 	struct task_struct *task = get_proc_task(inode);
@@ -2256,7 +2256,7 @@ static const struct inode_operations proc_attr_dir_inode_operations = {
 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
 					 size_t count, loff_t *ppos)
 {
-	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+	struct task_struct *task = get_proc_task(file_inode(file));
 	struct mm_struct *mm;
 	char buffer[PROC_NUMBUF];
 	size_t len;
@@ -2308,7 +2308,7 @@ static ssize_t proc_coredump_filter_write(struct file *file,
 		goto out_no_task;
 
 	ret = -ESRCH;
-	task = get_proc_task(file->f_dentry->d_inode);
+	task = get_proc_task(file_inode(file));
 	if (!task)
 		goto out_no_task;
 
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 76ddae83daa5..7dfe548a28e8 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -42,7 +42,7 @@ static ssize_t
 __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 	       loff_t *ppos)
 {
-	struct inode * inode = file->f_path.dentry->d_inode;
+	struct inode * inode = file_inode(file);
 	char 	*page;
 	ssize_t	retval=0;
 	int	eof=0;
@@ -188,7 +188,7 @@ static ssize_t
 proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 	       loff_t *ppos)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	ssize_t rv = -EIO;
 
 	spin_lock(&pde->pde_unload_lock);
@@ -209,7 +209,7 @@ static ssize_t
 proc_file_write(struct file *file, const char __user *buffer,
 		size_t count, loff_t *ppos)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	ssize_t rv = -EIO;
 
 	if (pde->write_proc) {
@@ -460,7 +460,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
 {
 	unsigned int ino;
 	int i;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int ret = 0;
 
 	ino = inode->i_ino;
@@ -522,7 +522,7 @@ out:
 
 int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	return proc_readdir_de(PDE(inode), filp, dirent, filldir);
 }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 439ae6886507..38f5c119b806 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -144,7 +144,7 @@ void pde_users_dec(struct proc_dir_entry *pde)
 
 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	loff_t rv = -EINVAL;
 	loff_t (*llseek)(struct file *, loff_t, int);
 
@@ -179,7 +179,7 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 
 static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	ssize_t rv = -EIO;
 	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
 
@@ -201,7 +201,7 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count,
 
 static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	ssize_t rv = -EIO;
 	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
 
@@ -223,7 +223,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t
 
 static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	unsigned int rv = DEFAULT_POLLMASK;
 	unsigned int (*poll)(struct file *, struct poll_table_struct *);
 
@@ -245,7 +245,7 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
 
 static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	long rv = -ENOTTY;
 	long (*ioctl)(struct file *, unsigned int, unsigned long);
 
@@ -268,7 +268,7 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
 #ifdef CONFIG_COMPAT
 static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	long rv = -ENOTTY;
 	long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
 
@@ -291,7 +291,7 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
 
 static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
 	int rv = -EIO;
 	int (*mmap)(struct file *, struct vm_area_struct *);
 
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index b1822dde55c2..ccfd99bd1c5a 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -45,7 +45,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
 	file = region->vm_file;
 
 	if (file) {
-		struct inode *inode = region->vm_file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(region->vm_file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 	}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..75df0d731110 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -163,7 +163,7 @@ static int proc_tgid_net_readdir(struct file *filp, void *dirent,
 	struct net *net;
 
 	ret = -EINVAL;
-	net = get_proc_task_net(filp->f_path.dentry->d_inode);
+	net = get_proc_task_net(file_inode(filp));
 	if (net != NULL) {
 		ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
 		put_net(net);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1827d88ad58b..612df79cc6a1 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -478,7 +478,7 @@ out:
 static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 		size_t count, loff_t *ppos, int write)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ctl_table_header *head = grab_header(inode);
 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 	ssize_t error;
@@ -542,7 +542,7 @@ static int proc_sys_open(struct inode *inode, struct file *filp)
 
 static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct ctl_table_header *head = grab_header(inode);
 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 	unsigned int ret = DEFAULT_POLLMASK;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ca5ce7f9f800..3e636d864d56 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -271,7 +271,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 	const char *name = NULL;
 
 	if (file) {
-		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(vma->vm_file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 		pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
@@ -743,7 +743,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		return rv;
 	if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
 		return -EINVAL;
-	task = get_proc_task(file->f_path.dentry->d_inode);
+	task = get_proc_task(file_inode(file));
 	if (!task)
 		return -ESRCH;
 	mm = get_task_mm(task);
@@ -1015,7 +1015,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+	struct task_struct *task = get_proc_task(file_inode(file));
 	struct mm_struct *mm;
 	struct pagemapread pm;
 	int ret = -ESRCH;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 1ccfa537f5f5..56123a6f462e 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -149,7 +149,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
 	file = vma->vm_file;
 
 	if (file) {
-		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(vma->vm_file);
 		dev = inode->i_sb->s_dev;
 		ino = inode->i_ino;
 		pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 7b0329468a5d..28ce014b3cef 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -16,7 +16,7 @@
 
 static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	unsigned int offset;
 	struct buffer_head *bh;
 	struct qnx4_inode_entry *de;
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index dc597353db3b..8798d065e400 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -117,7 +117,7 @@ static int qnx6_dir_longfilename(struct inode *inode,
 
 static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *s = inode->i_sb;
 	struct qnx6_sb_info *sbi = QNX6_SB(s);
 	loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index d5378d028589..8d5b438cc188 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -202,7 +202,7 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
 					    unsigned long pgoff, unsigned long flags)
 {
 	unsigned long maxpages, lpages, nr, loop, ret;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct page **pages = NULL, **ptr, *page;
 	loff_t isize;
 
diff --git a/fs/read_write.c b/fs/read_write.c
index bb34af315280..3ae6dbe828bf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(no_llseek);
 
 loff_t default_llseek(struct file *file, loff_t offset, int whence)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	loff_t retval;
 
 	mutex_lock(&inode->i_mutex);
@@ -290,7 +290,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
 	loff_t pos;
 	int retval = -EINVAL;
 
-	inode = file->f_path.dentry->d_inode;
+	inode = file_inode(file);
 	if (unlikely((ssize_t) count < 0))
 		return retval;
 	pos = *ppos;
@@ -901,8 +901,8 @@ ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
 	if (!(out.file->f_mode & FMODE_WRITE))
 		goto fput_out;
 	retval = -EINVAL;
-	in_inode = in.file->f_path.dentry->d_inode;
-	out_inode = out.file->f_path.dentry->d_inode;
+	in_inode = file_inode(in.file);
+	out_inode = file_inode(out.file);
 	retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
 	if (retval < 0)
 		goto fput_out;
diff --git a/fs/readdir.c b/fs/readdir.c
index 5e69ef533b77..fee38e04fae4 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -22,7 +22,7 @@
 
 int vfs_readdir(struct file *file, filldir_t filler, void *buf)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int res = -ENOTDIR;
 	if (!file->f_op || !file->f_op->readdir)
 		goto out;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 50302d6f8895..6165bd4784f6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -268,7 +268,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 							 * new current position before returning. */
 				   )
 {
-	struct inode *inode = file->f_path.dentry->d_inode;	// Inode of the file that we are writing to.
+	struct inode *inode = file_inode(file);	// Inode of the file that we are writing to.
 	/* To simplify coding at this time, we store
 	   locked pages in array for now */
 	struct reiserfs_transaction_handle th;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0c2185042d5f..15cb5fe6b425 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -21,7 +21,7 @@
  */
 long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	unsigned int flags;
 	int err = 0;
 
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index e60e87035bb3..9cc0740adffa 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -281,7 +281,7 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb)
 	}
 #if defined( REISERFS_USE_OIDMAPF )
 	if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) {
-		loff_t size = sb_info->oidmap.mapf->f_path.dentry->d_inode->i_size;
+		loff_t size = file_inode(sb_info->oidmap.mapf)->i_size;
 		total_used += size / sizeof(reiserfs_oidinterval_d_t);
 	}
 #endif
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index fd7c5f60b46b..7e8d3a80bdab 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -147,7 +147,7 @@ static const struct address_space_operations romfs_aops = {
  */
 static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *i = filp->f_dentry->d_inode;
+	struct inode *i = file_inode(filp);
 	struct romfs_inode ri;
 	unsigned long offset, maxoff;
 	int j, ino, nextfh;
diff --git a/fs/splice.c b/fs/splice.c
index 6909d89d0da5..963213d56403 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1170,7 +1170,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 	 * randomly drop data for eg socket -> socket splicing. Use the
 	 * piped splicing for that!
 	 */
-	i_mode = in->f_path.dentry->d_inode->i_mode;
+	i_mode = file_inode(in)->i_mode;
 	if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
 		return -EINVAL;
 
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index b381305c9a47..57dc70ebbb19 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -102,7 +102,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
 
 static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
 	u64 block = squashfs_i(inode)->start + msblk->directory_table;
 	int offset = squashfs_i(inode)->offset, length, dir_count, size,
diff --git a/fs/sync.c b/fs/sync.c
index 14eefeb44636..2c5d6639a66a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -332,7 +332,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 	if (!f.file)
 		goto out;
 
-	i_mode = f.file->f_path.dentry->d_inode->i_mode;
+	i_mode = file_inode(f.file)->i_mode;
 	ret = -ESPIPE;
 	if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
 			!S_ISLNK(i_mode))
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 614b2b544880..2ce9a5db6ab5 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -70,7 +70,7 @@ static ssize_t
 read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 {
 	struct bin_buffer *bb = file->private_data;
-	int size = file->f_path.dentry->d_inode->i_size;
+	int size = file_inode(file)->i_size;
 	loff_t offs = *off;
 	int count = min_t(size_t, bytes, PAGE_SIZE);
 	char *temp;
@@ -140,7 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
 		     size_t bytes, loff_t *off)
 {
 	struct bin_buffer *bb = file->private_data;
-	int size = file->f_path.dentry->d_inode->i_size;
+	int size = file_inode(file)->i_size;
 	loff_t offs = *off;
 	int count = min_t(size_t, bytes, PAGE_SIZE);
 	char *temp;
@@ -469,7 +469,7 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd)
 	mutex_lock(&sysfs_bin_lock);
 
 	hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
-		struct inode *inode = bb->file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(bb->file);
 
 		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
 	}
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index a77c42157620..3799e8dac3eb 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -68,7 +68,7 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
 static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	unsigned long pos = filp->f_pos;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	unsigned offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 8a574776a493..de08c92f2e23 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -352,7 +352,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
 	struct qstr nm;
 	union ubifs_key key;
 	struct ubifs_dent_node *dent;
-	struct inode *dir = file->f_path.dentry->d_inode;
+	struct inode *dir = file_inode(file);
 	struct ubifs_info *c = dir->i_sb->s_fs_info;
 
 	dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5bc77817f382..fa5b347ec729 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1444,7 +1444,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
 				 struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
 	struct timespec now = ubifs_current_time(inode);
 	struct ubifs_budget_req req = { .new_page = 1 };
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 1a7e2d8bdbe9..648b143606cc 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -147,7 +147,7 @@ out_unlock:
 long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	int flags, err;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	switch (cmd) {
 	case FS_IOC_GETFLAGS:
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index eb8bfe2b89a5..b3e93f5e17c3 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -186,7 +186,7 @@ out:
 
 static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
-	struct inode *dir = filp->f_path.dentry->d_inode;
+	struct inode *dir = file_inode(filp);
 	int result;
 
 	if (filp->f_pos == 0) {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 77b5953eaac8..4257a1f5302a 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -139,7 +139,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	ssize_t retval;
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int err, pos;
 	size_t count = iocb->ki_left;
 	struct udf_inode_info *iinfo = UDF_I(inode);
@@ -178,7 +178,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	long old_block, new_block;
 	int result = -EINVAL;
 
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index dbc90994715a..3a75ca09c506 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -433,7 +433,7 @@ static int
 ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 {
 	loff_t pos = filp->f_pos;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	unsigned int offset = pos & ~PAGE_CACHE_MASK;
 	unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d0e9c74d3d96..75d854b0c439 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -78,14 +78,14 @@ xfs_swapext(
 		goto out_put_tmp_file;
 	}
 
-	if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) ||
-	    IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) {
+	if (IS_SWAPFILE(file_inode(f.file)) ||
+	    IS_SWAPFILE(file_inode(tmp.file))) {
 		error = XFS_ERROR(EINVAL);
 		goto out_put_tmp_file;
 	}
 
-	ip = XFS_I(f.file->f_path.dentry->d_inode);
-	tip = XFS_I(tmp.file->f_path.dentry->d_inode);
+	ip = XFS_I(file_inode(f.file));
+	tip = XFS_I(file_inode(tmp.file));
 
 	if (ip->i_mount != tip->i_mount) {
 		error = XFS_ERROR(EINVAL);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 67284edb84d7..f03bf1a456fb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -811,7 +811,7 @@ xfs_file_fallocate(
 	loff_t		offset,
 	loff_t		len)
 {
-	struct inode	*inode = file->f_path.dentry->d_inode;
+	struct inode	*inode = file_inode(file);
 	long		error;
 	loff_t		new_size = 0;
 	xfs_flock64_t	bf;
@@ -912,7 +912,7 @@ xfs_file_readdir(
 	void		*dirent,
 	filldir_t	filldir)
 {
-	struct inode	*inode = filp->f_path.dentry->d_inode;
+	struct inode	*inode = file_inode(filp);
 	xfs_inode_t	*ip = XFS_I(inode);
 	int		error;
 	size_t		bufsize;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c1c3ef88a260..d681e34c2950 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -80,7 +80,7 @@ xfs_find_handle(
 		f = fdget(hreq->fd);
 		if (!f.file)
 			return -EBADF;
-		inode = f.file->f_path.dentry->d_inode;
+		inode = file_inode(f.file);
 	} else {
 		error = user_lpath((const char __user *)hreq->path, &path);
 		if (error)
@@ -168,7 +168,7 @@ xfs_handle_to_dentry(
 	/*
 	 * Only allow handle opens under a directory.
 	 */
-	if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
+	if (!S_ISDIR(file_inode(parfilp)->i_mode))
 		return ERR_PTR(-ENOTDIR);
 
 	if (hlen != sizeof(xfs_handle_t))
@@ -1334,7 +1334,7 @@ xfs_file_ioctl(
 	unsigned int		cmd,
 	unsigned long		p)
 {
-	struct inode		*inode = filp->f_path.dentry->d_inode;
+	struct inode		*inode = file_inode(filp);
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	void			__user *arg = (void __user *)p;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 1244274a5674..63b8fc432151 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -530,7 +530,7 @@ xfs_file_compat_ioctl(
 	unsigned		cmd,
 	unsigned long		p)
 {
-	struct inode		*inode = filp->f_path.dentry->d_inode;
+	struct inode		*inode = file_inode(filp);
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	void			__user *arg = (void __user *)p;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7617ee04f066..3ab69777b4d8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2217,6 +2217,11 @@ static inline bool execute_ok(struct inode *inode)
 	return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
 }
 
+static inline struct inode *file_inode(struct file *f)
+{
+	return f->f_path.dentry->d_inode;
+}
+
 /*
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.
@@ -2239,7 +2244,7 @@ static inline int get_write_access(struct inode *inode)
 }
 static inline int deny_write_access(struct file *file)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
 }
 static inline void put_write_access(struct inode * inode)
@@ -2249,7 +2254,7 @@ static inline void put_write_access(struct inode * inode)
 static inline void allow_write_access(struct file *file)
 {
 	if (file)
-		atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
+		atomic_inc(&file_inode(file)->i_writecount);
 }
 #ifdef CONFIG_IMA
 static inline void i_readcount_dec(struct inode *inode)
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 0fbfb4646d1b..a78680a92dba 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -244,7 +244,7 @@ static inline void fsnotify_open(struct file *file)
 static inline void fsnotify_close(struct file *file)
 {
 	struct path *path = &file->f_path;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	fmode_t mode = file->f_mode;
 	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0c80d3f57a5b..70832951f97c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -281,7 +281,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
 
 static inline struct hstate *hstate_file(struct file *f)
 {
-	return hstate_inode(f->f_dentry->d_inode);
+	return hstate_inode(file_inode(f));
 }
 
 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f5a051a79273..0e62d84f9f7f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -291,7 +291,7 @@ int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
 
 static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 {
-	return file->f_file->f_path.dentry->d_inode;
+	return file_inode(file->f_file);
 }
 
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 71a3ca18c873..963a8709f5fb 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -477,7 +477,7 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
 				size_t count, loff_t *off)
 {
-	struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
+	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
 	char buffer[FILENT_SIZE];
 	ssize_t ret;
 
@@ -498,13 +498,13 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
 	if (ret <= 0)
 		return ret;
 
-	filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME;
+	file_inode(filp)->i_atime = file_inode(filp)->i_ctime = CURRENT_TIME;
 	return ret;
 }
 
 static int mqueue_flush_file(struct file *filp, fl_owner_t id)
 {
-	struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
+	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
 
 	spin_lock(&info->lock);
 	if (task_tgid(current) == info->notify_owner)
@@ -516,7 +516,7 @@ static int mqueue_flush_file(struct file *filp, fl_owner_t id)
 
 static unsigned int mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
 {
-	struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
+	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
 	int retval = 0;
 
 	poll_wait(filp, &info->wait_q, poll_tab);
@@ -973,7 +973,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
 		goto out;
 	}
 
-	inode = f.file->f_path.dentry->d_inode;
+	inode = file_inode(f.file);
 	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
 		ret = -EBADF;
 		goto out_fput;
@@ -1089,7 +1089,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
 		goto out;
 	}
 
-	inode = f.file->f_path.dentry->d_inode;
+	inode = file_inode(f.file);
 	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
 		ret = -EBADF;
 		goto out_fput;
@@ -1249,7 +1249,7 @@ retry:
 		goto out;
 	}
 
-	inode = f.file->f_path.dentry->d_inode;
+	inode = file_inode(f.file);
 	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
 		ret = -EBADF;
 		goto out_fput;
@@ -1323,7 +1323,7 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
 		goto out;
 	}
 
-	inode = f.file->f_path.dentry->d_inode;
+	inode = file_inode(f.file);
 	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
 		ret = -EBADF;
 		goto out_fput;
diff --git a/ipc/shm.c b/ipc/shm.c
index 4fa6d8fee730..8c9402d298a0 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -193,7 +193,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	if (!is_file_hugepages(shp->shm_file))
 		shmem_lock(shp->shm_file, 0, shp->mlock_user);
 	else if (shp->mlock_user)
-		user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
+		user_shm_unlock(file_inode(shp->shm_file)->i_size,
 						shp->mlock_user);
 	fput (shp->shm_file);
 	security_shm_free(shp);
@@ -529,7 +529,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	 * shmid gets reported as "inode#" in /proc/pid/maps.
 	 * proc-ps tools use this. Changing this will break them.
 	 */
-	file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
+	file_inode(file)->i_ino = shp->shm_perm.id;
 
 	ns->shm_tot += numpages;
 	error = shp->shm_perm.id;
@@ -678,7 +678,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
 {
 	struct inode *inode;
 
-	inode = shp->shm_file->f_path.dentry->d_inode;
+	inode = file_inode(shp->shm_file);
 
 	if (is_file_hugepages(shp->shm_file)) {
 		struct address_space *mapping = inode->i_mapping;
@@ -1173,7 +1173,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 			(vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
 
 
-			size = vma->vm_file->f_path.dentry->d_inode->i_size;
+			size = file_inode(vma->vm_file)->i_size;
 			do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
 			/*
 			 * We discovered the size of the shm segment, so
diff --git a/kernel/acct.c b/kernel/acct.c
index 051e071a06e7..0d2981358e08 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -205,7 +205,7 @@ static int acct_on(struct filename *pathname)
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) {
+	if (!S_ISREG(file_inode(file)->i_mode)) {
 		filp_close(file, NULL);
 		return -EACCES;
 	}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4855892798fd..4fe52b3b6ef6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2637,7 +2637,7 @@ static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, un
  */
 static inline struct cftype *__file_cft(struct file *file)
 {
-	if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
+	if (file_inode(file)->i_fop != &cgroup_file_operations)
 		return ERR_PTR(-EINVAL);
 	return __d_cft(file->f_dentry);
 }
@@ -3852,7 +3852,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
 
 	/* the process need read permission on control file */
 	/* AV: shouldn't we check that it's been opened for read instead? */
-	ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
+	ret = inode_permission(file_inode(cfile), MAY_READ);
 	if (ret < 0)
 		goto fail;
 
@@ -5441,7 +5441,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
 	struct inode *inode;
 	struct cgroup_subsys_state *css;
 
-	inode = f->f_dentry->d_inode;
+	inode = file_inode(f);
 	/* check in cgroup filesystem dir */
 	if (inode->i_op != &cgroup_dir_inode_operations)
 		return ERR_PTR(-EBADF);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 301079d06f24..3b106554b42e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3682,7 +3682,7 @@ unlock:
 
 static int perf_fasync(int fd, struct file *filp, int on)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct perf_event *event = filp->private_data;
 	int retval;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index c535f33bbb9c..4ff724f81f25 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -413,7 +413,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 		tmp->vm_next = tmp->vm_prev = NULL;
 		file = tmp->vm_file;
 		if (file) {
-			struct inode *inode = file->f_path.dentry->d_inode;
+			struct inode *inode = file_inode(file);
 			struct address_space *mapping = file->f_mapping;
 
 			get_file(file);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 4bd4faa6323a..397db02209ed 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -76,7 +76,7 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
 static ssize_t write_irq_affinity(int type, struct file *file,
 		const char __user *buffer, size_t count, loff_t *pos)
 {
-	unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
+	unsigned int irq = (int)(long)PDE(file_inode(file))->data;
 	cpumask_var_t new_value;
 	int err;
 
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 78e2ecb20165..c057104bf05c 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -251,7 +251,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 		return PTR_ERR(file);
 
 	err = -EINVAL;
-	ei = PROC_I(file->f_dentry->d_inode);
+	ei = PROC_I(file_inode(file));
 	ops = ei->ns_ops;
 	if (nstype && (ops->type != nstype))
 		goto out;
diff --git a/kernel/relay.c b/kernel/relay.c
index e8cd2027abbd..01ab081ac53a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1139,7 +1139,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
 	if (!desc->count)
 		return 0;
 
-	mutex_lock(&filp->f_path.dentry->d_inode->i_mutex);
+	mutex_lock(&file_inode(filp)->i_mutex);
 	do {
 		if (!relay_file_read_avail(buf, *ppos))
 			break;
@@ -1159,7 +1159,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
 			*ppos = relay_file_read_end_pos(buf, read_start, ret);
 		}
 	} while (desc->count && ret);
-	mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex);
+	mutex_unlock(&file_inode(filp)->i_mutex);
 
 	return desc->written;
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index 265b37690421..e3932ea50ec8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1792,14 +1792,14 @@ SYSCALL_DEFINE1(umask, int, mask)
 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 {
 	struct fd exe;
-	struct dentry *dentry;
+	struct inode *inode;
 	int err;
 
 	exe = fdget(fd);
 	if (!exe.file)
 		return -EBADF;
 
-	dentry = exe.file->f_path.dentry;
+	inode = file_inode(exe.file);
 
 	/*
 	 * Because the original mm->exe_file points to executable file, make
@@ -1807,11 +1807,11 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 	 * overall picture.
 	 */
 	err = -EACCES;
-	if (!S_ISREG(dentry->d_inode->i_mode)	||
+	if (!S_ISREG(inode->i_mode)	||
 	    exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	err = inode_permission(dentry->d_inode, MAY_EXEC);
+	err = inode_permission(inode, MAY_EXEC);
 	if (err)
 		goto exit;
 
diff --git a/mm/fadvise.c b/mm/fadvise.c
index a47f0f50c89f..6deaa6c04636 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -38,7 +38,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
 	if (!f.file)
 		return -EBADF;
 
-	if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) {
+	if (S_ISFIFO(file_inode(f.file)->i_mode)) {
 		ret = -ESPIPE;
 		goto out;
 	}
diff --git a/mm/filemap.c b/mm/filemap.c
index 83efee76a5c0..6a48a7ea8f4f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1711,7 +1711,7 @@ EXPORT_SYMBOL(filemap_fault);
 int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct page *page = vmf->page;
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	int ret = VM_FAULT_LOCKED;
 
 	sb_start_pagefault(inode->i_sb);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4f3ea0b1e57c..b97e806e5d9a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -127,7 +127,7 @@ static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
 
 static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
 {
-	return subpool_inode(vma->vm_file->f_dentry->d_inode);
+	return subpool_inode(file_inode(vma->vm_file));
 }
 
 /*
@@ -2482,7 +2482,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	address = address & huge_page_mask(h);
 	pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) +
 			vma->vm_pgoff;
-	mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+	mapping = file_inode(vma->vm_file)->i_mapping;
 
 	/*
 	 * Take the mapping lock for the duration of the table walk. As
diff --git a/mm/mmap.c b/mm/mmap.c
index 35730ee9d515..22dfc01e9681 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -202,7 +202,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 		struct file *file, struct address_space *mapping)
 {
 	if (vma->vm_flags & VM_DENYWRITE)
-		atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
+		atomic_inc(&file_inode(file)->i_writecount);
 	if (vma->vm_flags & VM_SHARED)
 		mapping->i_mmap_writable--;
 
@@ -567,7 +567,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
 		struct address_space *mapping = file->f_mapping;
 
 		if (vma->vm_flags & VM_DENYWRITE)
-			atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
+			atomic_dec(&file_inode(file)->i_writecount);
 		if (vma->vm_flags & VM_SHARED)
 			mapping->i_mmap_writable++;
 
@@ -1217,7 +1217,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 			return -EAGAIN;
 	}
 
-	inode = file ? file->f_path.dentry->d_inode : NULL;
+	inode = file ? file_inode(file) : NULL;
 
 	if (file) {
 		switch (flags & MAP_TYPE) {
@@ -1403,7 +1403,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	int error;
 	struct rb_node **rb_link, *rb_parent;
 	unsigned long charged = 0;
-	struct inode *inode =  file ? file->f_path.dentry->d_inode : NULL;
+	struct inode *inode =  file ? file_inode(file) : NULL;
 
 	/* Clear old maps */
 	error = -ENOMEM;
diff --git a/mm/nommu.c b/mm/nommu.c
index 79c3cac87afa..f87d2173d0d0 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -941,7 +941,7 @@ static int validate_mmap_request(struct file *file,
 		 */
 		mapping = file->f_mapping;
 		if (!mapping)
-			mapping = file->f_path.dentry->d_inode->i_mapping;
+			mapping = file_inode(file)->i_mapping;
 
 		capabilities = 0;
 		if (mapping && mapping->backing_dev_info)
@@ -950,7 +950,7 @@ static int validate_mmap_request(struct file *file,
 		if (!capabilities) {
 			/* no explicit capabilities set, so assume some
 			 * defaults */
-			switch (file->f_path.dentry->d_inode->i_mode & S_IFMT) {
+			switch (file_inode(file)->i_mode & S_IFMT) {
 			case S_IFREG:
 			case S_IFBLK:
 				capabilities = BDI_CAP_MAP_COPY;
@@ -985,11 +985,11 @@ static int validate_mmap_request(struct file *file,
 			    !(file->f_mode & FMODE_WRITE))
 				return -EACCES;
 
-			if (IS_APPEND(file->f_path.dentry->d_inode) &&
+			if (IS_APPEND(file_inode(file)) &&
 			    (file->f_mode & FMODE_WRITE))
 				return -EACCES;
 
-			if (locks_verify_locked(file->f_path.dentry->d_inode))
+			if (locks_verify_locked(file_inode(file)))
 				return -EAGAIN;
 
 			if (!(capabilities & BDI_CAP_MAP_DIRECT))
@@ -1322,8 +1322,8 @@ unsigned long do_mmap_pgoff(struct file *file,
 				continue;
 
 			/* search for overlapping mappings on the same file */
-			if (pregion->vm_file->f_path.dentry->d_inode !=
-			    file->f_path.dentry->d_inode)
+			if (file_inode(pregion->vm_file) !=
+			    file_inode(file))
 				continue;
 
 			if (pregion->vm_pgoff >= pgend)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5dd56f6efdbd..814d5546cb35 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1295,7 +1295,7 @@ unlock:
 
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	int error;
 	int ret = VM_FAULT_LOCKED;
 
@@ -1313,14 +1313,14 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 #ifdef CONFIG_NUMA
 static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
 {
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol);
 }
 
 static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 					  unsigned long addr)
 {
-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(vma->vm_file);
 	pgoff_t index;
 
 	index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
@@ -1330,7 +1330,7 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 
 int shmem_lock(struct file *file, int lock, struct user_struct *user)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	int retval = -ENOMEM;
 
@@ -1465,7 +1465,7 @@ shmem_write_end(struct file *file, struct address_space *mapping,
 
 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct address_space *mapping = inode->i_mapping;
 	pgoff_t index;
 	unsigned long offset;
@@ -1808,7 +1808,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 							 loff_t len)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	struct shmem_falloc shmem_falloc;
 	pgoff_t start, index, end;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e97a0e5aea91..ed393002fc09 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1699,7 +1699,7 @@ static int swap_show(struct seq_file *swap, void *v)
 	len = seq_path(swap, &file->f_path, " \t\n\\");
 	seq_printf(swap, "%*s%s\t%u\t%u\t%d\n",
 			len < 40 ? 40 - len : 1, " ",
-			S_ISBLK(file->f_path.dentry->d_inode->i_mode) ?
+			S_ISBLK(file_inode(file)->i_mode) ?
 				"partition" : "file\t",
 			si->pages << (PAGE_SHIFT - 10),
 			si->inuse_pages << (PAGE_SHIFT - 10),
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 0d020de8d233..2518b5f8bb8a 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -385,7 +385,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
 	page = get_zeroed_page(GFP_KERNEL);
 	if (!page)
 		return -ENOMEM;
-	dev = PDE(file->f_path.dentry->d_inode)->data;
+	dev = PDE(file_inode(file))->data;
 	if (!dev->ops->proc_read)
 		length = -EINVAL;
 	else {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8acce01b6dab..80e271d9e64b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -344,7 +344,7 @@ struct net *get_net_ns_by_fd(int fd)
 	if (IS_ERR(file))
 		return ERR_CAST(file);
 
-	ei = PROC_I(file->f_dentry->d_inode);
+	ei = PROC_I(file_inode(file));
 	if (ei->ns_ops == &netns_operations)
 		net = get_net(ei->ns);
 	else
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 75e33a7048f8..5852b249054f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -657,7 +657,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
 static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 				size_t size, loff_t *ofs)
 {
-	struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data;
+	struct clusterip_config *c = PDE(file_inode(file))->data;
 #define PROC_WRITELEN	10
 	char buffer[PROC_WRITELEN+1];
 	unsigned long nodenum;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 978efc9b555a..c3cdcb47c149 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -540,7 +540,7 @@ static ssize_t
 recent_mt_proc_write(struct file *file, const char __user *input,
 		     size_t size, loff_t *loff)
 {
-	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+	const struct proc_dir_entry *pde = PDE(file_inode(file));
 	struct recent_table *t = pde->data;
 	struct recent_entry *e;
 	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c0353d55d56f..c1ee3a8cf111 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -809,7 +809,7 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
 
 struct sock *netlink_getsockbyfilp(struct file *filp)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct sock *sock;
 
 	if (!S_ISSOCK(inode->i_mode))
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 6e5c824b040b..294b4bf7ec6e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -616,7 +616,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	const void *p, *end;
 	void *buf;
 	struct gss_upcall_msg *gss_msg;
-	struct rpc_pipe *pipe = RPC_I(filp->f_dentry->d_inode)->pipe;
+	struct rpc_pipe *pipe = RPC_I(file_inode(filp))->pipe;
 	struct gss_cl_ctx *ctx;
 	uid_t uid;
 	ssize_t err = -EFBIG;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 9afa4393c217..f3897d10f649 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -755,7 +755,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 {
 	struct cache_reader *rp = filp->private_data;
 	struct cache_request *rq;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int err;
 
 	if (count == 0)
@@ -886,7 +886,7 @@ static ssize_t cache_write(struct file *filp, const char __user *buf,
 			   struct cache_detail *cd)
 {
 	struct address_space *mapping = filp->f_mapping;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	ssize_t ret = -EINVAL;
 
 	if (!cd->cache_parse)
@@ -1454,7 +1454,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
 				 size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE(file_inode(filp))->data;
 
 	return cache_read(filp, buf, count, ppos, cd);
 }
@@ -1462,14 +1462,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
 static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE(file_inode(filp))->data;
 
 	return cache_write(filp, buf, count, ppos, cd);
 }
 
 static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE(file_inode(filp))->data;
 
 	return cache_poll(filp, wait, cd);
 }
@@ -1477,7 +1477,7 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
 static long cache_ioctl_procfs(struct file *filp,
 			       unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct cache_detail *cd = PDE(inode)->data;
 
 	return cache_ioctl(inode, filp, cmd, arg, cd);
@@ -1546,7 +1546,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
 static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE(file_inode(filp))->data;
 
 	return read_flush(filp, buf, count, ppos, cd);
 }
@@ -1555,7 +1555,7 @@ static ssize_t write_flush_procfs(struct file *filp,
 				  const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct cache_detail *cd = PDE(file_inode(filp))->data;
 
 	return write_flush(filp, buf, count, ppos, cd);
 }
@@ -1686,7 +1686,7 @@ EXPORT_SYMBOL_GPL(cache_destroy_net);
 static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
 				 size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return cache_read(filp, buf, count, ppos, cd);
 }
@@ -1694,14 +1694,14 @@ static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
 static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return cache_write(filp, buf, count, ppos, cd);
 }
 
 static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return cache_poll(filp, wait, cd);
 }
@@ -1709,7 +1709,7 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
 static long cache_ioctl_pipefs(struct file *filp,
 			      unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct cache_detail *cd = RPC_I(inode)->private;
 
 	return cache_ioctl(inode, filp, cmd, arg, cd);
@@ -1778,7 +1778,7 @@ static int release_flush_pipefs(struct inode *inode, struct file *filp)
 static ssize_t read_flush_pipefs(struct file *filp, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return read_flush(filp, buf, count, ppos, cd);
 }
@@ -1787,7 +1787,7 @@ static ssize_t write_flush_pipefs(struct file *filp,
 				  const char __user *buf,
 				  size_t count, loff_t *ppos)
 {
-	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
 
 	return write_flush(filp, buf, count, ppos, cd);
 }
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index fd10981ea792..7b9b40224a27 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -284,7 +284,7 @@ out:
 static ssize_t
 rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct rpc_pipe *pipe;
 	struct rpc_pipe_msg *msg;
 	int res = 0;
@@ -328,7 +328,7 @@ out_unlock:
 static ssize_t
 rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *offset)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	int res;
 
 	mutex_lock(&inode->i_mutex);
@@ -342,7 +342,7 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
 static unsigned int
 rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct rpc_inode *rpci = RPC_I(inode);
 	unsigned int mask = POLLOUT | POLLWRNORM;
 
@@ -360,7 +360,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 static long
 rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct rpc_pipe *pipe;
 	int len;
 
@@ -830,7 +830,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
  * responses to upcalls.  They will result in calls to @msg->downcall.
  *
  * The @private argument passed here will be available to all these methods
- * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private.
+ * from the file pointer, via RPC_I(file_inode(file))->private.
  */
 struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
 				 void *private, struct rpc_pipe *pipe)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index b6f4b994eb35..d0f6545b0010 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -99,7 +99,7 @@ unsigned int unix_tot_inflight;
 struct sock *unix_get_socket(struct file *filp)
 {
 	struct sock *u_sock = NULL;
-	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 
 	/*
 	 *	Socket ?
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 60f0c76a27d3..859abdaac1ea 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -349,8 +349,8 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
 	unsigned int state;
 	struct file_perms perms = {};
 	struct path_cond cond = {
-		bprm->file->f_path.dentry->d_inode->i_uid,
-		bprm->file->f_path.dentry->d_inode->i_mode
+		file_inode(bprm->file)->i_uid,
+		file_inode(bprm->file)->i_mode
 	};
 	const char *name = NULL, *target = NULL, *info = NULL;
 	int error = cap_bprm_set_creds(bprm);
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index cd21ec5b90af..fdaa50cb1876 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -449,8 +449,8 @@ int aa_file_perm(int op, struct aa_profile *profile, struct file *file,
 		 u32 request)
 {
 	struct path_cond cond = {
-		.uid = file->f_path.dentry->d_inode->i_uid,
-		.mode = file->f_path.dentry->d_inode->i_mode
+		.uid = file_inode(file)->i_uid,
+		.mode = file_inode(file)->i_mode
 	};
 
 	return aa_path_perm(op, profile, &file->f_path, PATH_DELEGATE_DELETED,
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 8c2a7f6b35e2..b21830eced41 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -379,7 +379,7 @@ static int apparmor_file_open(struct file *file, const struct cred *cred)
 	struct aa_profile *profile;
 	int error = 0;
 
-	if (!mediated_filesystem(file->f_path.dentry->d_inode))
+	if (!mediated_filesystem(file_inode(file)))
 		return 0;
 
 	/* If in exec, permission is handled by bprm hooks.
@@ -394,7 +394,7 @@ static int apparmor_file_open(struct file *file, const struct cred *cred)
 
 	profile = aa_cred_profile(cred);
 	if (!unconfined(profile)) {
-		struct inode *inode = file->f_path.dentry->d_inode;
+		struct inode *inode = file_inode(file);
 		struct path_cond cond = { inode->i_uid, inode->i_mode };
 
 		error = aa_path_perm(OP_OPEN, profile, &file->f_path, 0,
@@ -432,7 +432,7 @@ static int common_file_perm(int op, struct file *file, u32 mask)
 	BUG_ON(!fprofile);
 
 	if (!file->f_path.mnt ||
-	    !mediated_filesystem(file->f_path.dentry->d_inode))
+	    !mediated_filesystem(file_inode(file)))
 		return 0;
 
 	profile = __aa_current_profile();
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 0cea3db21657..27cb9eb42cc8 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -140,12 +140,12 @@ int ima_must_measure(struct inode *inode, int mask, int function)
 int ima_collect_measurement(struct integrity_iint_cache *iint,
 			    struct file *file)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	const char *filename = file->f_dentry->d_name.name;
 	int result = 0;
 
 	if (!(iint->flags & IMA_COLLECTED)) {
-		u64 i_version = file->f_dentry->d_inode->i_version;
+		u64 i_version = file_inode(file)->i_version;
 
 		iint->ima_xattr.type = IMA_XATTR_DIGEST;
 		result = ima_calc_hash(file, iint->ima_xattr.digest);
@@ -182,7 +182,7 @@ void ima_store_measurement(struct integrity_iint_cache *iint,
 	const char *op = "add_template_measure";
 	const char *audit_cause = "ENOMEM";
 	int result = -ENOMEM;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ima_template_entry *entry;
 	int violation = 0;
 
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index b21ee5b5495a..81dcaa26401e 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -63,7 +63,7 @@ int ima_calc_hash(struct file *file, char *digest)
 		file->f_mode |= FMODE_READ;
 		read = 1;
 	}
-	i_size = i_size_read(file->f_dentry->d_inode);
+	i_size = i_size_read(file_inode(file));
 	while (offset < i_size) {
 		int rbuf_len;
 
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index dba965de90d3..e7a147f7d371 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -132,7 +132,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
  */
 void ima_file_free(struct file *file)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint;
 
 	if (!iint_initialized || !S_ISREG(inode->i_mode))
@@ -148,7 +148,7 @@ void ima_file_free(struct file *file)
 static int process_measurement(struct file *file, const unsigned char *filename,
 			       int mask, int function)
 {
-	struct inode *inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint;
 	unsigned char *pathname = NULL, *pathbuf = NULL;
 	int rc = -ENOMEM, action, must_appraise;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 61a53367d029..2963c689f9c0 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1528,7 +1528,7 @@ static int file_has_perm(const struct cred *cred,
 			 u32 av)
 {
 	struct file_security_struct *fsec = file->f_security;
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct common_audit_data ad;
 	u32 sid = cred_sid(cred);
 	int rc;
@@ -1957,7 +1957,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
 	struct task_security_struct *new_tsec;
 	struct inode_security_struct *isec;
 	struct common_audit_data ad;
-	struct inode *inode = bprm->file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(bprm->file);
 	int rc;
 
 	rc = cap_bprm_set_creds(bprm);
@@ -2929,7 +2929,7 @@ static void selinux_inode_getsecid(const struct inode *inode, u32 *secid)
 static int selinux_revalidate_file_permission(struct file *file, int mask)
 {
 	const struct cred *cred = current_cred();
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 
 	/* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */
 	if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
@@ -2941,7 +2941,7 @@ static int selinux_revalidate_file_permission(struct file *file, int mask)
 
 static int selinux_file_permission(struct file *file, int mask)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct file_security_struct *fsec = file->f_security;
 	struct inode_security_struct *isec = inode->i_security;
 	u32 sid = current_sid();
@@ -3218,7 +3218,7 @@ static int selinux_file_open(struct file *file, const struct cred *cred)
 	struct inode_security_struct *isec;
 
 	fsec = file->f_security;
-	isec = file->f_path.dentry->d_inode->i_security;
+	isec = file_inode(file)->i_security;
 	/*
 	 * Save inode label and policy sequence number
 	 * at open-time so that selinux_file_permission
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 3a6e8731646c..ff427733c290 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -202,7 +202,7 @@ static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf,
 {
 	char tmpbuf[TMPBUFLEN];
 	ssize_t length;
-	ino_t ino = filp->f_path.dentry->d_inode->i_ino;
+	ino_t ino = file_inode(filp)->i_ino;
 	int handle_unknown = (ino == SEL_REJECT_UNKNOWN) ?
 		security_get_reject_unknown() : !security_get_allow_unknown();
 
@@ -671,7 +671,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 
 static ssize_t selinux_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
 {
-	ino_t ino = file->f_path.dentry->d_inode->i_ino;
+	ino_t ino = file_inode(file)->i_ino;
 	char *data;
 	ssize_t rv;
 
@@ -1042,8 +1042,7 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf,
 	ssize_t length;
 	ssize_t ret;
 	int cur_enforcing;
-	struct inode *inode = filep->f_path.dentry->d_inode;
-	unsigned index = inode->i_ino & SEL_INO_MASK;
+	unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK;
 	const char *name = filep->f_path.dentry->d_name.name;
 
 	mutex_lock(&sel_mutex);
@@ -1077,8 +1076,7 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf,
 	char *page = NULL;
 	ssize_t length;
 	int new_value;
-	struct inode *inode = filep->f_path.dentry->d_inode;
-	unsigned index = inode->i_ino & SEL_INO_MASK;
+	unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK;
 	const char *name = filep->f_path.dentry->d_name.name;
 
 	mutex_lock(&sel_mutex);
@@ -1486,13 +1484,11 @@ static int sel_make_avc_files(struct dentry *dir)
 static ssize_t sel_read_initcon(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	struct inode *inode;
 	char *con;
 	u32 sid, len;
 	ssize_t ret;
 
-	inode = file->f_path.dentry->d_inode;
-	sid = inode->i_ino&SEL_INO_MASK;
+	sid = file_inode(file)->i_ino&SEL_INO_MASK;
 	ret = security_sid_to_context(sid, &con, &len);
 	if (ret)
 		return ret;
@@ -1553,7 +1549,7 @@ static inline u32 sel_ino_to_perm(unsigned long ino)
 static ssize_t sel_read_class(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	unsigned long ino = file->f_path.dentry->d_inode->i_ino;
+	unsigned long ino = file_inode(file)->i_ino;
 	char res[TMPBUFLEN];
 	ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_class(ino));
 	return simple_read_from_buffer(buf, count, ppos, res, len);
@@ -1567,7 +1563,7 @@ static const struct file_operations sel_class_ops = {
 static ssize_t sel_read_perm(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	unsigned long ino = file->f_path.dentry->d_inode->i_ino;
+	unsigned long ino = file_inode(file)->i_ino;
 	char res[TMPBUFLEN];
 	ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_perm(ino));
 	return simple_read_from_buffer(buf, count, ppos, res, len);
@@ -1584,7 +1580,7 @@ static ssize_t sel_read_policycap(struct file *file, char __user *buf,
 	int value;
 	char tmpbuf[TMPBUFLEN];
 	ssize_t length;
-	unsigned long i_ino = file->f_path.dentry->d_inode->i_ino;
+	unsigned long i_ino = file_inode(file)->i_ino;
 
 	value = security_policycap_supported(i_ino & SEL_INO_MASK);
 	length = scnprintf(tmpbuf, TMPBUFLEN, "%d", value);
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 38be92ce901e..fa64740abb59 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -456,7 +456,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags)
  */
 static int smack_bprm_set_creds(struct linux_binprm *bprm)
 {
-	struct inode *inode = bprm->file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(bprm->file);
 	struct task_smack *bsp = bprm->cred->security;
 	struct inode_smack *isp;
 	int rc;
@@ -1187,21 +1187,15 @@ static int smack_mmap_file(struct file *file,
 	char *msmack;
 	char *osmack;
 	struct inode_smack *isp;
-	struct dentry *dp;
 	int may;
 	int mmay;
 	int tmay;
 	int rc;
 
-	if (file == NULL || file->f_dentry == NULL)
-		return 0;
-
-	dp = file->f_dentry;
-
-	if (dp->d_inode == NULL)
+	if (file == NULL)
 		return 0;
 
-	isp = dp->d_inode->i_security;
+	isp = file_inode(file)->i_security;
 	if (isp->smk_mmap == NULL)
 		return 0;
 	msmack = isp->smk_mmap;
@@ -1359,7 +1353,7 @@ static int smack_file_receive(struct file *file)
  */
 static int smack_file_open(struct file *file, const struct cred *cred)
 {
-	struct inode_smack *isp = file->f_path.dentry->d_inode->i_security;
+	struct inode_smack *isp = file_inode(file)->i_security;
 
 	file->f_security = isp->smk_inode;
 
diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c
index 8592f2fc6ebb..fcf32783b66b 100644
--- a/security/tomoyo/securityfs_if.c
+++ b/security/tomoyo/securityfs_if.c
@@ -135,7 +135,7 @@ static const struct file_operations tomoyo_self_operations = {
  */
 static int tomoyo_open(struct inode *inode, struct file *file)
 {
-	const int key = ((u8 *) file->f_path.dentry->d_inode->i_private)
+	const int key = ((u8 *) file_inode(file)->i_private)
 		- ((u8 *) NULL);
 	return tomoyo_open_control(key, file);
 }
diff --git a/sound/core/info.c b/sound/core/info.c
index 6b368d25073b..5bb97e7d325a 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -496,7 +496,7 @@ static long snd_info_entry_ioctl(struct file *file, unsigned int cmd,
 
 static int snd_info_entry_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct snd_info_private_data *data;
 	struct snd_info_entry *entry;
 
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 09b4286c65f9..71ae86ca64ac 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1586,7 +1586,7 @@ static struct file *snd_pcm_file_fd(int fd, int *fput_needed)
 	file = fget_light(fd, fput_needed);
 	if (!file)
 		return NULL;
-	inode = file->f_path.dentry->d_inode;
+	inode = file_inode(file);
 	if (!S_ISCHR(inode->i_mode) ||
 	    imajor(inode) != snd_major) {
 		fput_light(file, *fput_needed);
diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c
index 536c4c0514d3..11ff7c55240c 100644
--- a/sound/oss/msnd_pinnacle.c
+++ b/sound/oss/msnd_pinnacle.c
@@ -642,7 +642,7 @@ static int mixer_ioctl(unsigned int cmd, unsigned long arg)
 
 static long dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	int minor = iminor(file->f_path.dentry->d_inode);
+	int minor = iminor(file_inode(file));
 	int ret;
 
 	if (cmd == OSS_GETVERSION) {
@@ -1012,7 +1012,7 @@ static int dsp_write(const char __user *buf, size_t len)
 
 static ssize_t dev_read(struct file *file, char __user *buf, size_t count, loff_t *off)
 {
-	int minor = iminor(file->f_path.dentry->d_inode);
+	int minor = iminor(file_inode(file));
 	if (minor == dev.dsp_minor)
 		return dsp_read(buf, count);
 	else
@@ -1021,7 +1021,7 @@ static ssize_t dev_read(struct file *file, char __user *buf, size_t count, loff_
 
 static ssize_t dev_write(struct file *file, const char __user *buf, size_t count, loff_t *off)
 {
-	int minor = iminor(file->f_path.dentry->d_inode);
+	int minor = iminor(file_inode(file));
 	if (minor == dev.dsp_minor)
 		return dsp_write(buf, count);
 	else
diff --git a/sound/oss/soundcard.c b/sound/oss/soundcard.c
index 7c7793a0eb25..e7780349cc55 100644
--- a/sound/oss/soundcard.c
+++ b/sound/oss/soundcard.c
@@ -143,7 +143,7 @@ static int get_mixer_levels(void __user * arg)
 
 static ssize_t sound_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-	int dev = iminor(file->f_path.dentry->d_inode);
+	int dev = iminor(file_inode(file));
 	int ret = -EINVAL;
 
 	/*
@@ -176,7 +176,7 @@ static ssize_t sound_read(struct file *file, char __user *buf, size_t count, lof
 
 static ssize_t sound_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
-	int dev = iminor(file->f_path.dentry->d_inode);
+	int dev = iminor(file_inode(file));
 	int ret = -EINVAL;
 	
 	mutex_lock(&soundcard_mutex);
@@ -333,7 +333,7 @@ static int sound_mixer_ioctl(int mixdev, unsigned int cmd, void __user *arg)
 static long sound_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	int len = 0, dtype;
-	int dev = iminor(file->f_dentry->d_inode);
+	int dev = iminor(file_inode(file));
 	long ret = -EINVAL;
 	void __user *p = (void __user *)arg;
 
@@ -406,7 +406,7 @@ static long sound_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 static unsigned int sound_poll(struct file *file, poll_table * wait)
 {
-	struct inode *inode = file->f_path.dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int dev = iminor(inode);
 
 	DEB(printk("sound_poll(dev=%d)\n", dev));
@@ -431,7 +431,7 @@ static int sound_mmap(struct file *file, struct vm_area_struct *vma)
 	int dev_class;
 	unsigned long size;
 	struct dma_buffparms *dmap = NULL;
-	int dev = iminor(file->f_path.dentry->d_inode);
+	int dev = iminor(file_inode(file));
 
 	dev_class = dev & 0x0f;
 	dev >>= 4;
diff --git a/sound/sound_firmware.c b/sound/sound_firmware.c
index 37711a5d0d6b..e14903468051 100644
--- a/sound/sound_firmware.c
+++ b/sound/sound_firmware.c
@@ -19,7 +19,7 @@ static int do_mod_firmware_load(const char *fn, char **fp)
 		printk(KERN_INFO "Unable to load '%s'.\n", fn);
 		return 0;
 	}
-	l = i_size_read(filp->f_path.dentry->d_inode);
+	l = i_size_read(file_inode(filp));
 	if (l <= 0 || l > 131072)
 	{
 		printk(KERN_INFO "Invalid firmware '%s'\n", fn);
-- 
cgit v1.2.3


From ad8ca3743cb7eef0eb8a2e87943c513201685eec Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 15 Jan 2013 12:54:29 -0500
Subject: vfs: remove d_path_with_unreachable

The last caller was removed >2 years ago in commit 7b2a69ba7.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 31 -------------------------------
 include/linux/dcache.h |  1 -
 2 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 19153a0a810c..8f15e8d71bba 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2722,37 +2722,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
 }
 EXPORT_SYMBOL(d_path);
 
-/**
- * d_path_with_unreachable - return the path of a dentry
- * @path: path to report
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * The difference from d_path() is that this prepends "(unreachable)"
- * to paths which are unreachable from the current process' root.
- */
-char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
-{
-	char *res = buf + buflen;
-	struct path root;
-	int error;
-
-	if (path->dentry->d_op && path->dentry->d_op->d_dname)
-		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
-
-	get_fs_root(current->fs, &root);
-	write_seqlock(&rename_lock);
-	error = path_with_deleted(path, &root, &res, &buflen);
-	if (error > 0)
-		error = prepend_unreachable(&res, &buflen);
-	write_sequnlock(&rename_lock);
-	path_put(&root);
-	if (error)
-		res =  ERR_PTR(error);
-
-	return res;
-}
-
 /*
  * Helper function for dentry_operations.d_dname() members
  */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c1754b59ddd3..63e96844a8e8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -333,7 +333,6 @@ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
 extern char *__d_path(const struct path *, const struct path *, char *, int);
 extern char *d_absolute_path(const struct path *, char *, int);
 extern char *d_path(const struct path *, char *, int);
-extern char *d_path_with_unreachable(const struct path *, char *, int);
 extern char *dentry_path_raw(struct dentry *, char *, int);
 extern char *dentry_path(struct dentry *, char *, int);
 
-- 
cgit v1.2.3


From a713ca2ab9d14dc5c86634bc445ce1f66552c169 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Jan 2013 18:27:00 -0500
Subject: constify __d_lookup() arguments

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 2 +-
 include/linux/dcache.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 8f15e8d71bba..93d9f4e1a8ed 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1919,7 +1919,7 @@ EXPORT_SYMBOL(d_lookup);
  *
  * __d_lookup callers must be commented.
  */
-struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
+struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
 {
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 63e96844a8e8..969086c03301 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -295,7 +295,7 @@ extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 /* appendix may either be NULL or be used for transname suffixes */
 extern struct dentry *d_lookup(struct dentry *, struct qstr *);
 extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
-extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
+extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
 extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
 				const struct qstr *name,
 				unsigned *seq, struct inode *inode);
-- 
cgit v1.2.3


From da2d8455ed7eb22d7642ecee43dc463ac42a1256 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Jan 2013 18:29:34 -0500
Subject: constify d_lookup() arguments

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 2 +-
 include/linux/dcache.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 93d9f4e1a8ed..0e4b5fa6c9f9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1889,7 +1889,7 @@ seqretry:
  * dentry is returned. The caller must use dput to free the entry when it has
  * finished using it. %NULL is returned if the dentry does not exist.
  */
-struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
+struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
 {
 	struct dentry *dentry;
 	unsigned seq;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 969086c03301..03d169288423 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -293,7 +293,7 @@ extern void d_move(struct dentry *, struct dentry *);
 extern struct dentry *d_ancestor(struct dentry *, struct dentry *);
 
 /* appendix may either be NULL or be used for transname suffixes */
-extern struct dentry *d_lookup(struct dentry *, struct qstr *);
+extern struct dentry *d_lookup(const struct dentry *, const struct qstr *);
 extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
 extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *);
 extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
-- 
cgit v1.2.3


From d778df51c09264076fe0208c099ef7d428f21790 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 22 Feb 2013 16:32:12 -0800
Subject: mm: vmscan: save work scanning (almost) empty LRU lists

In certain cases (kswapd reclaim, memcg target reclaim), a fixed minimum
amount of pages is scanned from the LRU lists on each iteration, to make
progress.

Do not make this minimum bigger than the respective LRU list size,
however, and save some busy work trying to isolate and reclaim pages
that are not there.

Empty LRU lists are quite common with memory cgroups in NUMA
environments because there exists a set of LRU lists for each zone for
each memory cgroup, while the memory of a single cgroup is expected to
stay on just one node.  The number of expected empty LRU lists is thus

  memcgs * (nodes - 1) * lru types

Each attempt to reclaim from an empty LRU list does expensive size
comparisons between lists, acquires the zone's lru lock etc.  Avoid
that.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Satoru Moriya <satoru.moriya@hds.com>
Cc: Simon Jeons <simon.jeons@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  2 +-
 mm/vmscan.c          | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 68df9c17fbbb..8c66486a8ca8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -156,7 +156,7 @@ enum {
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
 
-#define SWAP_CLUSTER_MAX 32
+#define SWAP_CLUSTER_MAX 32UL
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
 
 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ff842d9a7714..e4521ba1ddd0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1761,15 +1761,17 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 out:
 	for_each_evictable_lru(lru) {
 		int file = is_file_lru(lru);
+		unsigned long size;
 		unsigned long scan;
 
-		scan = get_lru_size(lruvec, lru);
+		size = get_lru_size(lruvec, lru);
 		if (sc->priority || noswap || !vmscan_swappiness(sc)) {
-			scan >>= sc->priority;
+			scan = size >> sc->priority;
 			if (!scan && force_scan)
-				scan = SWAP_CLUSTER_MAX;
+				scan = min(size, SWAP_CLUSTER_MAX);
 			scan = div64_u64(scan * fraction[file], denominator);
-		}
+		} else
+			scan = size;
 		nr[lru] = scan;
 	}
 }
-- 
cgit v1.2.3


From 7103f16dbff20fa969c9500902d980d17f953fa6 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 22 Feb 2013 16:32:33 -0800
Subject: mm: compaction: make __compact_pgdat() and compact_pgdat() return
 void

These functions always return 0.  Formalise this.

Cc: Jason Liu <r64343@freescale.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  5 ++---
 mm/compaction.c            | 12 +++++-------
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index cc7bddeaf553..091d72e70d8a 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -23,7 +23,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
 			bool sync, bool *contended);
-extern int compact_pgdat(pg_data_t *pgdat, int order);
+extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 
@@ -80,9 +80,8 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	return COMPACT_CONTINUE;
 }
 
-static inline int compact_pgdat(pg_data_t *pgdat, int order)
+static inline void compact_pgdat(pg_data_t *pgdat, int order)
 {
-	return COMPACT_CONTINUE;
 }
 
 static inline void reset_isolation_suitable(pg_data_t *pgdat)
diff --git a/mm/compaction.c b/mm/compaction.c
index 0d0248db36d8..5d5b0b259bc9 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1085,7 +1085,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 
 
 /* Compact all zones within a node */
-static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
+static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
 {
 	int zoneid;
 	struct zone *zone;
@@ -1118,28 +1118,26 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
 		VM_BUG_ON(!list_empty(&cc->freepages));
 		VM_BUG_ON(!list_empty(&cc->migratepages));
 	}
-
-	return 0;
 }
 
-int compact_pgdat(pg_data_t *pgdat, int order)
+void compact_pgdat(pg_data_t *pgdat, int order)
 {
 	struct compact_control cc = {
 		.order = order,
 		.sync = false,
 	};
 
-	return __compact_pgdat(pgdat, &cc);
+	__compact_pgdat(pgdat, &cc);
 }
 
-static int compact_node(int nid)
+static void compact_node(int nid)
 {
 	struct compact_control cc = {
 		.order = -1,
 		.sync = true,
 	};
 
-	return __compact_pgdat(NODE_DATA(nid), &cc);
+	__compact_pgdat(NODE_DATA(nid), &cc);
 }
 
 /* Compact all nodes in the system */
-- 
cgit v1.2.3


From bebeb3d68b24bb4132d452c5707fe321208bcbcd Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 22 Feb 2013 16:32:37 -0800
Subject: mm: introduce mm_populate() for populating new vmas

When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or
with MCL_FUTURE in effect), we want to populate the pages within the
newly created vmas.  This may take a while as we may have to read pages
from disk, so ideally we want to do this outside of the write-locked
mmap_sem region.

This change introduces mm_populate(), which is used to defer populating
such mappings until after the mmap_sem write lock has been released.
This is implemented as a generalization of the former do_mlock_pages(),
which accomplished the same task but was using during mlock() /
mlockall().

Signed-off-by: Michel Lespinasse <walken@google.com>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c           |  6 +++++-
 include/linux/mm.h | 18 +++++++++++++++---
 ipc/shm.c          | 12 +++++++-----
 mm/mlock.c         | 17 +++++++++++------
 mm/mmap.c          | 20 +++++++++++++++-----
 mm/nommu.c         |  5 ++++-
 mm/util.c          |  6 +++++-
 7 files changed, 62 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index 71f613cf4a85..82eec7c7b4bb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -103,6 +103,7 @@ static int aio_setup_ring(struct kioctx *ctx)
 	unsigned nr_events = ctx->max_reqs;
 	unsigned long size;
 	int nr_pages;
+	bool populate;
 
 	/* Compensate for the ring buffer's head/tail overlap entry */
 	nr_events += 2;	/* 1 is required, 2 for good luck */
@@ -129,7 +130,8 @@ static int aio_setup_ring(struct kioctx *ctx)
 	down_write(&ctx->mm->mmap_sem);
 	info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 
 					PROT_READ|PROT_WRITE,
-					MAP_ANONYMOUS|MAP_PRIVATE, 0);
+					MAP_ANONYMOUS|MAP_PRIVATE, 0,
+					&populate);
 	if (IS_ERR((void *)info->mmap_base)) {
 		up_write(&ctx->mm->mmap_sem);
 		info->mmap_size = 0;
@@ -147,6 +149,8 @@ static int aio_setup_ring(struct kioctx *ctx)
 		aio_free_ring(ctx);
 		return -EAGAIN;
 	}
+	if (populate)
+		mm_populate(info->mmap_base, info->mmap_size);
 
 	ctx->user_id = info->mmap_base;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d9dcc35d6a1..da0a0fe970c2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1474,11 +1474,23 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff);
-extern unsigned long do_mmap_pgoff(struct file *, unsigned long,
-        unsigned long, unsigned long,
-        unsigned long, unsigned long);
+extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+	unsigned long len, unsigned long prot, unsigned long flags,
+	unsigned long pgoff, bool *populate);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t);
 
+#ifdef CONFIG_MMU
+extern int __mm_populate(unsigned long addr, unsigned long len,
+			 int ignore_errors);
+static inline void mm_populate(unsigned long addr, unsigned long len)
+{
+	/* Ignore errors */
+	(void) __mm_populate(addr, len, 1);
+}
+#else
+static inline void mm_populate(unsigned long addr, unsigned long len) {}
+#endif
+
 /* These take the mm semaphore themselves */
 extern unsigned long vm_brk(unsigned long, unsigned long);
 extern int vm_munmap(unsigned long, size_t);
diff --git a/ipc/shm.c b/ipc/shm.c
index 4fa6d8fee730..9f047ba69e62 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -967,11 +967,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
 	unsigned long flags;
 	unsigned long prot;
 	int acc_mode;
-	unsigned long user_addr;
 	struct ipc_namespace *ns;
 	struct shm_file_data *sfd;
 	struct path path;
 	fmode_t f_mode;
+	bool populate = false;
 
 	err = -EINVAL;
 	if (shmid < 0)
@@ -1070,13 +1070,15 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
 			goto invalid;
 	}
 		
-	user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0);
-	*raddr = user_addr;
+	addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
+	*raddr = addr;
 	err = 0;
-	if (IS_ERR_VALUE(user_addr))
-		err = (long)user_addr;
+	if (IS_ERR_VALUE(addr))
+		err = (long)addr;
 invalid:
 	up_write(&current->mm->mmap_sem);
+	if (populate)
+		mm_populate(addr, size);
 
 out_fput:
 	fput(file);
diff --git a/mm/mlock.c b/mm/mlock.c
index c9bd528b01d2..a296a49865df 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on)
 	return error;
 }
 
-static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors)
+/*
+ * __mm_populate - populate and/or mlock pages within a range of address space.
+ *
+ * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
+ * flags. VMAs must be already marked with the desired vm_flags, and
+ * mmap_sem must not be held.
+ */
+int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long end, nstart, nend;
@@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
 		error = do_mlock(start, len, 1);
 	up_write(&current->mm->mmap_sem);
 	if (!error)
-		error = do_mlock_pages(start, len, 0);
+		error = __mm_populate(start, len, 0);
 	return error;
 }
 
@@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
 	    capable(CAP_IPC_LOCK))
 		ret = do_mlockall(flags);
 	up_write(&current->mm->mmap_sem);
-	if (!ret && (flags & MCL_CURRENT)) {
-		/* Ignore errors */
-		do_mlock_pages(0, TASK_SIZE, 1);
-	}
+	if (!ret && (flags & MCL_CURRENT))
+		mm_populate(0, TASK_SIZE);
 out:
 	return ret;
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 09da0b264982..9b12e3047a86 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
 
 unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 			unsigned long len, unsigned long prot,
-			unsigned long flags, unsigned long pgoff)
+			unsigned long flags, unsigned long pgoff,
+			bool *populate)
 {
 	struct mm_struct * mm = current->mm;
 	struct inode *inode;
 	vm_flags_t vm_flags;
 
+	*populate = false;
+
 	/*
 	 * Does the application expect PROT_READ to imply PROT_EXEC?
 	 *
@@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 		}
 	}
 
-	return mmap_region(file, addr, len, flags, vm_flags, pgoff);
+	addr = mmap_region(file, addr, len, flags, vm_flags, pgoff);
+	if (!IS_ERR_VALUE(addr) &&
+	    ((vm_flags & VM_LOCKED) ||
+	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
+		*populate = true;
+	return addr;
 }
 
 SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
@@ -1531,10 +1539,12 @@ out:
 
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
-		if (!mlock_vma_pages_range(vma, addr, addr + len))
+		if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
+					vma == get_gate_vma(current->mm)))
 			mm->locked_vm += (len >> PAGE_SHIFT);
-	} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
-		make_pages_present(addr, addr + len);
+		else
+			vma->vm_flags &= ~VM_LOCKED;
+	}
 
 	if (file)
 		uprobe_mmap(vma);
diff --git a/mm/nommu.c b/mm/nommu.c
index b20db4e22263..7296a5a280e7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1250,7 +1250,8 @@ unsigned long do_mmap_pgoff(struct file *file,
 			    unsigned long len,
 			    unsigned long prot,
 			    unsigned long flags,
-			    unsigned long pgoff)
+			    unsigned long pgoff,
+			    bool *populate)
 {
 	struct vm_area_struct *vma;
 	struct vm_region *region;
@@ -1260,6 +1261,8 @@ unsigned long do_mmap_pgoff(struct file *file,
 
 	kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
 
+	*populate = false;
+
 	/* decide whether we should attempt the mapping, and if so what sort of
 	 * mapping */
 	ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
diff --git a/mm/util.c b/mm/util.c
index c55e26b17d93..13467e043e9e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -355,12 +355,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 {
 	unsigned long ret;
 	struct mm_struct *mm = current->mm;
+	bool populate;
 
 	ret = security_mmap_file(file, prot, flag);
 	if (!ret) {
 		down_write(&mm->mmap_sem);
-		ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff);
+		ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
+				    &populate);
 		up_write(&mm->mmap_sem);
+		if (!IS_ERR_VALUE(ret) && populate)
+			mm_populate(ret, len);
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From c22c0d6344c362b1dde5d8e160d3d07536aca120 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 22 Feb 2013 16:32:43 -0800
Subject: mm: remove flags argument to mmap_region

After the MAP_POPULATE handling has been moved to mmap_region() call
sites, the only remaining use of the flags argument is to pass the
MAP_NORESERVE flag.  This can be just as easily handled by
do_mmap_pgoff(), so do that and remove the mmap_region() flags
parameter.

[akpm@linux-foundation.org: remove double parens]
Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/mm/elf.c |  1 -
 include/linux/mm.h |  3 +--
 mm/fremap.c        |  3 +--
 mm/mmap.c          | 33 ++++++++++++++++-----------------
 4 files changed, 18 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 3cfa98bf9125..743c951c61b0 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -130,7 +130,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 	if (!retval) {
 		unsigned long addr = MEM_USER_INTRPT;
 		addr = mmap_region(NULL, addr, INTRPT_SIZE,
-				   MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE,
 				   VM_READ|VM_EXEC|
 				   VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0);
 		if (addr > (unsigned long) -PAGE_SIZE)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index da0a0fe970c2..8332f3069fe3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1472,8 +1472,7 @@ extern int install_special_mapping(struct mm_struct *mm,
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
-	unsigned long len, unsigned long flags,
-	vm_flags_t vm_flags, unsigned long pgoff);
+	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	unsigned long pgoff, bool *populate);
diff --git a/mm/fremap.c b/mm/fremap.c
index b42e32171530..503a72387087 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -204,9 +204,8 @@ get_write_lock:
 			unsigned long addr;
 			struct file *file = get_file(vma->vm_file);
 
-			flags = (flags & MAP_NONBLOCK) | MAP_POPULATE;
 			addr = mmap_region(file, start, size,
-					flags, vma->vm_flags, pgoff);
+					vma->vm_flags, pgoff);
 			fput(file);
 			if (IS_ERR_VALUE(addr)) {
 				err = addr;
diff --git a/mm/mmap.c b/mm/mmap.c
index a23e30f9719c..d6bc39e939ab 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1291,7 +1291,21 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 		}
 	}
 
-	addr = mmap_region(file, addr, len, flags, vm_flags, pgoff);
+	/*
+	 * Set 'VM_NORESERVE' if we should not account for the
+	 * memory use of this mapping.
+	 */
+	if (flags & MAP_NORESERVE) {
+		/* We honor MAP_NORESERVE if allowed to overcommit */
+		if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
+			vm_flags |= VM_NORESERVE;
+
+		/* hugetlb applies strict overcommit unless MAP_NORESERVE */
+		if (file && is_file_hugepages(file))
+			vm_flags |= VM_NORESERVE;
+	}
+
+	addr = mmap_region(file, addr, len, vm_flags, pgoff);
 	if (!IS_ERR_VALUE(addr) &&
 	    ((vm_flags & VM_LOCKED) ||
 	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
@@ -1411,8 +1425,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
 }
 
 unsigned long mmap_region(struct file *file, unsigned long addr,
-			  unsigned long len, unsigned long flags,
-			  vm_flags_t vm_flags, unsigned long pgoff)
+		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma, *prev;
@@ -1435,20 +1448,6 @@ munmap_back:
 	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
 		return -ENOMEM;
 
-	/*
-	 * Set 'VM_NORESERVE' if we should not account for the
-	 * memory use of this mapping.
-	 */
-	if ((flags & MAP_NORESERVE)) {
-		/* We honor MAP_NORESERVE if allowed to overcommit */
-		if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
-			vm_flags |= VM_NORESERVE;
-
-		/* hugetlb applies strict overcommit unless MAP_NORESERVE */
-		if (file && is_file_hugepages(file))
-			vm_flags |= VM_NORESERVE;
-	}
-
 	/*
 	 * Private writable mapping: check memory availability
 	 */
-- 
cgit v1.2.3


From cea10a19b7972a1954c4a2d05a7de8db48b444fb Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 22 Feb 2013 16:32:44 -0800
Subject: mm: directly use __mlock_vma_pages_range() in find_extend_vma()

In find_extend_vma(), we don't need mlock_vma_pages_range() to verify
the vma type - we know we're working with a stack.  So, we can call
directly into __mlock_vma_pages_range(), and remove the last
make_pages_present() call site.

Note that we don't use mm_populate() here, so we can't release the
mmap_sem while allocating new stack pages.  This is deemed acceptable,
because the stack vmas grow by a bounded number of pages at a time, and
these are anon pages so we don't have to read from disk to populate
them.

Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 -
 mm/internal.h      |  4 ++--
 mm/memory.c        | 24 -----------------------
 mm/mlock.c         | 57 +++---------------------------------------------------
 mm/mmap.c          | 10 ++++------
 5 files changed, 9 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8332f3069fe3..0c34d3486816 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1035,7 +1035,6 @@ static inline int fixup_user_fault(struct task_struct *tsk,
 }
 #endif
 
-extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 		void *buf, int len, int write);
diff --git a/mm/internal.h b/mm/internal.h
index 9ba21100ebf3..1c0c4cc0fcf7 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -162,8 +162,8 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev, struct rb_node *rb_parent);
 
 #ifdef CONFIG_MMU
-extern long mlock_vma_pages_range(struct vm_area_struct *vma,
-			unsigned long start, unsigned long end);
+extern long __mlock_vma_pages_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end, int *nonblocking);
 extern void munlock_vma_pages_range(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end);
 static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
diff --git a/mm/memory.c b/mm/memory.c
index 0abd07097ec6..7837ceacf090 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3824,30 +3824,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-int make_pages_present(unsigned long addr, unsigned long end)
-{
-	int ret, len, write;
-	struct vm_area_struct * vma;
-
-	vma = find_vma(current->mm, addr);
-	if (!vma)
-		return -ENOMEM;
-	/*
-	 * We want to touch writable mappings with a write fault in order
-	 * to break COW, except for shared mappings because these don't COW
-	 * and we would not want to dirty them for nothing.
-	 */
-	write = (vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE;
-	BUG_ON(addr >= end);
-	BUG_ON(end > vma->vm_end);
-	len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
-	ret = get_user_pages(current, current->mm, addr,
-			len, write, 0, NULL, NULL);
-	if (ret < 0)
-		return ret;
-	return ret == len ? 0 : -EFAULT;
-}
-
 #if !defined(__HAVE_ARCH_GATE_AREA)
 
 #if defined(AT_SYSINFO_EHDR)
diff --git a/mm/mlock.c b/mm/mlock.c
index a296a49865df..569400a5d079 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -155,9 +155,8 @@ void munlock_vma_page(struct page *page)
  *
  * vma->vm_mm->mmap_sem must be held for at least read.
  */
-static long __mlock_vma_pages_range(struct vm_area_struct *vma,
-				    unsigned long start, unsigned long end,
-				    int *nonblocking)
+long __mlock_vma_pages_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end, int *nonblocking)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long addr = start;
@@ -202,56 +201,6 @@ static int __mlock_posix_error_return(long retval)
 	return retval;
 }
 
-/**
- * mlock_vma_pages_range() - mlock pages in specified vma range.
- * @vma - the vma containing the specfied address range
- * @start - starting address in @vma to mlock
- * @end   - end address [+1] in @vma to mlock
- *
- * For mmap()/mremap()/expansion of mlocked vma.
- *
- * return 0 on success for "normal" vmas.
- *
- * return number of pages [> 0] to be removed from locked_vm on success
- * of "special" vmas.
- */
-long mlock_vma_pages_range(struct vm_area_struct *vma,
-			unsigned long start, unsigned long end)
-{
-	int nr_pages = (end - start) / PAGE_SIZE;
-	BUG_ON(!(vma->vm_flags & VM_LOCKED));
-
-	/*
-	 * filter unlockable vmas
-	 */
-	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
-		goto no_mlock;
-
-	if (!((vma->vm_flags & VM_DONTEXPAND) ||
-			is_vm_hugetlb_page(vma) ||
-			vma == get_gate_vma(current->mm))) {
-
-		__mlock_vma_pages_range(vma, start, end, NULL);
-
-		/* Hide errors from mmap() and other callers */
-		return 0;
-	}
-
-	/*
-	 * User mapped kernel pages or huge pages:
-	 * make these pages present to populate the ptes, but
-	 * fall thru' to reset VM_LOCKED--no need to unlock, and
-	 * return nr_pages so these don't get counted against task's
-	 * locked limit.  huge pages are already counted against
-	 * locked vm limit.
-	 */
-	make_pages_present(start, end);
-
-no_mlock:
-	vma->vm_flags &= ~VM_LOCKED;	/* and don't come back! */
-	return nr_pages;		/* error or pages NOT mlocked */
-}
-
 /*
  * munlock_vma_pages_range() - munlock all pages in the vma range.'
  * @vma - vma containing range to be munlock()ed.
@@ -303,7 +252,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
  *
  * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
  * munlock is a no-op.  However, for some special vmas, we go ahead and
- * populate the ptes via make_pages_present().
+ * populate the ptes.
  *
  * For vmas that pass the filters, merge/split as appropriate.
  */
diff --git a/mm/mmap.c b/mm/mmap.c
index d6bc39e939ab..8826c77513a9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2204,9 +2204,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 		return vma;
 	if (!prev || expand_stack(prev, addr))
 		return NULL;
-	if (prev->vm_flags & VM_LOCKED) {
-		mlock_vma_pages_range(prev, addr, prev->vm_end);
-	}
+	if (prev->vm_flags & VM_LOCKED)
+		__mlock_vma_pages_range(prev, addr, prev->vm_end, NULL);
 	return prev;
 }
 #else
@@ -2232,9 +2231,8 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
 	start = vma->vm_start;
 	if (expand_stack(vma, addr))
 		return NULL;
-	if (vma->vm_flags & VM_LOCKED) {
-		mlock_vma_pages_range(vma, addr, start);
-	}
+	if (vma->vm_flags & VM_LOCKED)
+		__mlock_vma_pages_range(vma, addr, start, NULL);
 	return vma;
 }
 #endif
-- 
cgit v1.2.3


From 1869305009857cdeaabe6283bcdc2359c5784543 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 22 Feb 2013 16:32:46 -0800
Subject: mm: introduce VM_POPULATE flag to better deal with racy userspace
 programs

The vm_populate() code populates user mappings without constantly
holding the mmap_sem.  This makes it susceptible to racy userspace
programs: the user mappings may change while vm_populate() is running,
and in this case vm_populate() may end up populating the new mapping
instead of the old one.

In order to reduce the possibility of userspace getting surprised by
this behavior, this change introduces the VM_POPULATE vma flag which
gets set on vmas we want vm_populate() to work on.  This way
vm_populate() may still end up populating the new mapping after such a
race, but only if the new mapping is also one that the user has
requested (using MAP_SHARED, MAP_LOCKED or mlock) to be populated.

Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h   |  1 +
 include/linux/mman.h |  4 +++-
 mm/fremap.c          | 12 ++++++++++--
 mm/mlock.c           | 19 ++++++++++---------
 mm/mmap.c            |  4 +---
 5 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0c34d3486816..9a5fcdeaa3a0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -87,6 +87,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_PFNMAP	0x00000400	/* Page-ranges managed without "struct page", just pure PFN */
 #define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
 
+#define VM_POPULATE     0x00001000
 #define VM_LOCKED	0x00002000
 #define VM_IO           0x00004000	/* Memory mapped I/O or similar */
 
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 9aa863da287f..61c7a87e5d2b 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -79,6 +79,8 @@ calc_vm_flag_bits(unsigned long flags)
 {
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
-	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    );
+	       ((flags & MAP_LOCKED) ? (VM_LOCKED | VM_POPULATE) : 0) |
+	       (((flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE) ?
+							VM_POPULATE : 0);
 }
 #endif /* _LINUX_MMAN_H */
diff --git a/mm/fremap.c b/mm/fremap.c
index 503a72387087..0cd4c11488ed 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -204,8 +204,10 @@ get_write_lock:
 			unsigned long addr;
 			struct file *file = get_file(vma->vm_file);
 
-			addr = mmap_region(file, start, size,
-					vma->vm_flags, pgoff);
+			vm_flags = vma->vm_flags;
+			if (!(flags & MAP_NONBLOCK))
+				vm_flags |= VM_POPULATE;
+			addr = mmap_region(file, start, size, vm_flags, pgoff);
 			fput(file);
 			if (IS_ERR_VALUE(addr)) {
 				err = addr;
@@ -224,6 +226,12 @@ get_write_lock:
 		mutex_unlock(&mapping->i_mmap_mutex);
 	}
 
+	if (!(flags & MAP_NONBLOCK) && !(vma->vm_flags & VM_POPULATE)) {
+		if (!has_write_lock)
+			goto get_write_lock;
+		vma->vm_flags |= VM_POPULATE;
+	}
+
 	if (vma->vm_flags & VM_LOCKED) {
 		/*
 		 * drop PG_Mlocked flag for over-mapped range
diff --git a/mm/mlock.c b/mm/mlock.c
index 569400a5d079..d6378feb2950 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -340,9 +340,9 @@ static int do_mlock(unsigned long start, size_t len, int on)
 
 		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
 
-		newflags = vma->vm_flags | VM_LOCKED;
-		if (!on)
-			newflags &= ~VM_LOCKED;
+		newflags = vma->vm_flags & ~VM_LOCKED;
+		if (on)
+			newflags |= VM_LOCKED | VM_POPULATE;
 
 		tmp = vma->vm_end;
 		if (tmp > end)
@@ -402,7 +402,8 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
 		 * range with the first VMA. Also, skip undesirable VMA types.
 		 */
 		nend = min(end, vma->vm_end);
-		if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+		if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_POPULATE)) !=
+		    VM_POPULATE)
 			continue;
 		if (nstart < vma->vm_start)
 			nstart = vma->vm_start;
@@ -475,18 +476,18 @@ static int do_mlockall(int flags)
 	struct vm_area_struct * vma, * prev = NULL;
 
 	if (flags & MCL_FUTURE)
-		current->mm->def_flags |= VM_LOCKED;
+		current->mm->def_flags |= VM_LOCKED | VM_POPULATE;
 	else
-		current->mm->def_flags &= ~VM_LOCKED;
+		current->mm->def_flags &= ~(VM_LOCKED | VM_POPULATE);
 	if (flags == MCL_FUTURE)
 		goto out;
 
 	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
 		vm_flags_t newflags;
 
-		newflags = vma->vm_flags | VM_LOCKED;
-		if (!(flags & MCL_CURRENT))
-			newflags &= ~VM_LOCKED;
+		newflags = vma->vm_flags & ~VM_LOCKED;
+		if (flags & MCL_CURRENT)
+			newflags |= VM_LOCKED | VM_POPULATE;
 
 		/* Ignore errors */
 		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
diff --git a/mm/mmap.c b/mm/mmap.c
index 8826c77513a9..39a3944e1658 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1306,9 +1306,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	}
 
 	addr = mmap_region(file, addr, len, vm_flags, pgoff);
-	if (!IS_ERR_VALUE(addr) &&
-	    ((vm_flags & VM_LOCKED) ||
-	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
+	if (!IS_ERR_VALUE(addr) && (vm_flags & VM_POPULATE))
 		*populate = true;
 	return addr;
 }
-- 
cgit v1.2.3


From 41badc15cbad0350de34408c1b0c690f9df76d4b Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 22 Feb 2013 16:32:47 -0800
Subject: mm: make do_mmap_pgoff return populate as a size in bytes, not as a
 bool

do_mmap_pgoff() rounds up the desired size to the next PAGE_SIZE
multiple, however there was no equivalent code in mm_populate(), which
caused issues.

This could be fixed by introduced the same rounding in mm_populate(),
however I think it's preferable to make do_mmap_pgoff() return populate
as a size rather than as a boolean, so we don't have to duplicate the
size rounding logic in mm_populate().

Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Andy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c           | 5 ++---
 include/linux/mm.h | 2 +-
 ipc/shm.c          | 4 ++--
 mm/mmap.c          | 6 +++---
 mm/nommu.c         | 4 ++--
 mm/util.c          | 6 +++---
 6 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index 82eec7c7b4bb..064bfbe37566 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -101,9 +101,8 @@ static int aio_setup_ring(struct kioctx *ctx)
 	struct aio_ring *ring;
 	struct aio_ring_info *info = &ctx->ring_info;
 	unsigned nr_events = ctx->max_reqs;
-	unsigned long size;
+	unsigned long size, populate;
 	int nr_pages;
-	bool populate;
 
 	/* Compensate for the ring buffer's head/tail overlap entry */
 	nr_events += 2;	/* 1 is required, 2 for good luck */
@@ -150,7 +149,7 @@ static int aio_setup_ring(struct kioctx *ctx)
 		return -EAGAIN;
 	}
 	if (populate)
-		mm_populate(info->mmap_base, info->mmap_size);
+		mm_populate(info->mmap_base, populate);
 
 	ctx->user_id = info->mmap_base;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9a5fcdeaa3a0..95db68e34b18 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1475,7 +1475,7 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
 extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
-	unsigned long pgoff, bool *populate);
+	unsigned long pgoff, unsigned long *populate);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t);
 
 #ifdef CONFIG_MMU
diff --git a/ipc/shm.c b/ipc/shm.c
index 9f047ba69e62..be3ec9ae454e 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -971,7 +971,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
 	struct shm_file_data *sfd;
 	struct path path;
 	fmode_t f_mode;
-	bool populate = false;
+	unsigned long populate = 0;
 
 	err = -EINVAL;
 	if (shmid < 0)
@@ -1078,7 +1078,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
 invalid:
 	up_write(&current->mm->mmap_sem);
 	if (populate)
-		mm_populate(addr, size);
+		mm_populate(addr, populate);
 
 out_fput:
 	fput(file);
diff --git a/mm/mmap.c b/mm/mmap.c
index 39a3944e1658..44bb4d869884 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1163,13 +1163,13 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
 unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 			unsigned long len, unsigned long prot,
 			unsigned long flags, unsigned long pgoff,
-			bool *populate)
+			unsigned long *populate)
 {
 	struct mm_struct * mm = current->mm;
 	struct inode *inode;
 	vm_flags_t vm_flags;
 
-	*populate = false;
+	*populate = 0;
 
 	/*
 	 * Does the application expect PROT_READ to imply PROT_EXEC?
@@ -1307,7 +1307,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 
 	addr = mmap_region(file, addr, len, vm_flags, pgoff);
 	if (!IS_ERR_VALUE(addr) && (vm_flags & VM_POPULATE))
-		*populate = true;
+		*populate = len;
 	return addr;
 }
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 7296a5a280e7..18c1b932e2c4 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1251,7 +1251,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 			    unsigned long prot,
 			    unsigned long flags,
 			    unsigned long pgoff,
-			    bool *populate)
+			    unsigned long *populate)
 {
 	struct vm_area_struct *vma;
 	struct vm_region *region;
@@ -1261,7 +1261,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 
 	kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
 
-	*populate = false;
+	*populate = 0;
 
 	/* decide whether we should attempt the mapping, and if so what sort of
 	 * mapping */
diff --git a/mm/util.c b/mm/util.c
index 13467e043e9e..3704bf1bef94 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -355,7 +355,7 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 {
 	unsigned long ret;
 	struct mm_struct *mm = current->mm;
-	bool populate;
+	unsigned long populate;
 
 	ret = security_mmap_file(file, prot, flag);
 	if (!ret) {
@@ -363,8 +363,8 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 		ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
 				    &populate);
 		up_write(&mm->mmap_sem);
-		if (!IS_ERR_VALUE(ret) && populate)
-			mm_populate(ret, len);
+		if (populate)
+			mm_populate(ret, populate);
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From 6677e3eaf4d78abd7b09133414c05dc3ec353e7f Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Date: Fri, 22 Feb 2013 16:32:52 -0800
Subject: memory-hotplug: check whether all memory blocks are offlined or not
 when removing memory

We remove the memory like this:

 1. lock memory hotplug
 2. offline a memory block
 3. unlock memory hotplug
 4. repeat 1-3 to offline all memory blocks
 5. lock memory hotplug
 6. remove memory(TODO)
 7. unlock memory hotplug

All memory blocks must be offlined before removing memory.  But we don't
hold the lock in the whole operation.  So we should check whether all
memory blocks are offlined before step6.  Otherwise, kernel maybe
panicked.

Offlining a memory block and removing a memory device can be two
different operations.  Users can just offline some memory blocks without
removing the memory device.  For this purpose, the kernel has held
lock_memory_hotplug() in __offline_pages().  To reuse the code for
memory hot-remove, we repeat step 1-3 to offline all the memory blocks,
repeatedly lock and unlock memory hotplug, but not hold the memory
hotplug lock in the whole operation.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c          |  6 ++++++
 include/linux/memory_hotplug.h |  1 +
 mm/memory_hotplug.c            | 48 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 83d0b17ba1c2..a51007b79032 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -693,6 +693,12 @@ int offline_memory_block(struct memory_block *mem)
 	return ret;
 }
 
+/* return true if the memory block is offlined, otherwise, return false */
+bool is_memblock_offlined(struct memory_block *mem)
+{
+	return mem->state == MEM_OFFLINE;
+}
+
 /*
  * Initialize the sysfs support for memory devices...
  */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4a45c4e50025..8dd0950a6a7a 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -247,6 +247,7 @@ extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern int offline_memory_block(struct memory_block *mem);
+extern bool is_memblock_offlined(struct memory_block *mem);
 extern int remove_memory(u64 start, u64 size);
 extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 								int nr_pages);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6a82972aeae5..5d350f5c68e5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1429,6 +1429,54 @@ repeat:
 		goto repeat;
 	}
 
+	lock_memory_hotplug();
+
+	/*
+	 * we have offlined all memory blocks like this:
+	 *   1. lock memory hotplug
+	 *   2. offline a memory block
+	 *   3. unlock memory hotplug
+	 *
+	 * repeat step1-3 to offline the memory block. All memory blocks
+	 * must be offlined before removing memory. But we don't hold the
+	 * lock in the whole operation. So we should check whether all
+	 * memory blocks are offlined.
+	 */
+
+	mem = NULL;
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		section_nr = pfn_to_section_nr(pfn);
+		if (!present_section_nr(section_nr))
+			continue;
+
+		section = __nr_to_section(section_nr);
+		/* same memblock? */
+		if (mem)
+			if ((section_nr >= mem->start_section_nr) &&
+			    (section_nr <= mem->end_section_nr))
+				continue;
+
+		mem = find_memory_block_hinted(section, mem);
+		if (!mem)
+			continue;
+
+		ret = is_memblock_offlined(mem);
+		if (!ret) {
+			pr_warn("removing memory fails, because memory "
+				"[%#010llx-%#010llx] is onlined\n",
+				PFN_PHYS(section_nr_to_pfn(mem->start_section_nr)),
+				PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1)) - 1);
+
+			kobject_put(&mem->dev.kobj);
+			unlock_memory_hotplug();
+			return ret;
+		}
+	}
+
+	if (mem)
+		kobject_put(&mem->dev.kobj);
+	unlock_memory_hotplug();
+
 	return 0;
 }
 #else
-- 
cgit v1.2.3


From 46c66c4b7ba0f9bb3e2ae3a3cfd40cd3472c8f80 Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Date: Fri, 22 Feb 2013 16:32:56 -0800
Subject: memory-hotplug: remove /sys/firmware/memmap/X sysfs

When (hot)adding memory into system, /sys/firmware/memmap/X/{end, start,
type} sysfs files are created.  But there is no code to remove these
files.  This patch implements the function to remove them.

We cannot free firmware_map_entry which is allocated by bootmem because
there is no way to do so when the system is up.  But we can at least
remember the address of that memory and reuse the storage when the
memory is added next time.

This patch also introduces a new list map_entries_bootmem to link the
map entries allocated by bootmem when they are removed, and a lock to
protect it.  And these entries will be reused when the memory is
hot-added again.

The idea is suggestted by Andrew Morton.

NOTE: It is unsafe to return an entry pointer and release the
      map_entries_lock.  So we should not hold the map_entries_lock
      separately in firmware_map_find_entry() and
      firmware_map_remove_entry().  Hold the map_entries_lock across find
      and remove /sys/firmware/memmap/X operation.

       And also, users of these two functions need to be careful to
      hold the lock when using these two functions.

[tangchen@cn.fujitsu.com: Hold spinlock across find|remove /sys operation]
[tangchen@cn.fujitsu.com: fix the wrong comments of map_entries]
[tangchen@cn.fujitsu.com: reuse the storage of /sys/firmware/memmap/X/ allocated by bootmem]
[tangchen@cn.fujitsu.com: fix section mismatch problem]
[tangchen@cn.fujitsu.com: fix the doc format in drivers/firmware/memmap.c]
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Julian Calaby <julian.calaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/firmware/memmap.c    | 196 ++++++++++++++++++++++++++++++++++++++++---
 include/linux/firmware-map.h |   6 ++
 mm/memory_hotplug.c          |   5 +-
 3 files changed, 193 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 90723e65b081..0b5b5f619c75 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -21,6 +21,7 @@
 #include <linux/types.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 
 /*
  * Data types ------------------------------------------------------------------
@@ -52,6 +53,9 @@ static ssize_t start_show(struct firmware_map_entry *entry, char *buf);
 static ssize_t end_show(struct firmware_map_entry *entry, char *buf);
 static ssize_t type_show(struct firmware_map_entry *entry, char *buf);
 
+static struct firmware_map_entry * __meminit
+firmware_map_find_entry(u64 start, u64 end, const char *type);
+
 /*
  * Static data -----------------------------------------------------------------
  */
@@ -79,7 +83,52 @@ static const struct sysfs_ops memmap_attr_ops = {
 	.show = memmap_attr_show,
 };
 
-static struct kobj_type memmap_ktype = {
+/* Firmware memory map entries. */
+static LIST_HEAD(map_entries);
+static DEFINE_SPINLOCK(map_entries_lock);
+
+/*
+ * For memory hotplug, there is no way to free memory map entries allocated
+ * by boot mem after the system is up. So when we hot-remove memory whose
+ * map entry is allocated by bootmem, we need to remember the storage and
+ * reuse it when the memory is hot-added again.
+ */
+static LIST_HEAD(map_entries_bootmem);
+static DEFINE_SPINLOCK(map_entries_bootmem_lock);
+
+
+static inline struct firmware_map_entry *
+to_memmap_entry(struct kobject *kobj)
+{
+	return container_of(kobj, struct firmware_map_entry, kobj);
+}
+
+static void __meminit release_firmware_map_entry(struct kobject *kobj)
+{
+	struct firmware_map_entry *entry = to_memmap_entry(kobj);
+
+	if (PageReserved(virt_to_page(entry))) {
+		/*
+		 * Remember the storage allocated by bootmem, and reuse it when
+		 * the memory is hot-added again. The entry will be added to
+		 * map_entries_bootmem here, and deleted from &map_entries in
+		 * firmware_map_remove_entry().
+		 */
+		if (firmware_map_find_entry(entry->start, entry->end,
+		    entry->type)) {
+			spin_lock(&map_entries_bootmem_lock);
+			list_add(&entry->list, &map_entries_bootmem);
+			spin_unlock(&map_entries_bootmem_lock);
+		}
+
+		return;
+	}
+
+	kfree(entry);
+}
+
+static struct kobj_type __refdata memmap_ktype = {
+	.release	= release_firmware_map_entry,
 	.sysfs_ops	= &memmap_attr_ops,
 	.default_attrs	= def_attrs,
 };
@@ -88,13 +137,6 @@ static struct kobj_type memmap_ktype = {
  * Registration functions ------------------------------------------------------
  */
 
-/*
- * Firmware memory map entries. No locking is needed because the
- * firmware_map_add() and firmware_map_add_early() functions are called
- * in firmware initialisation code in one single thread of execution.
- */
-static LIST_HEAD(map_entries);
-
 /**
  * firmware_map_add_entry() - Does the real work to add a firmware memmap entry.
  * @start: Start of the memory range.
@@ -118,11 +160,25 @@ static int firmware_map_add_entry(u64 start, u64 end,
 	INIT_LIST_HEAD(&entry->list);
 	kobject_init(&entry->kobj, &memmap_ktype);
 
+	spin_lock(&map_entries_lock);
 	list_add_tail(&entry->list, &map_entries);
+	spin_unlock(&map_entries_lock);
 
 	return 0;
 }
 
+/**
+ * firmware_map_remove_entry() - Does the real work to remove a firmware
+ * memmap entry.
+ * @entry: removed entry.
+ *
+ * The caller must hold map_entries_lock, and release it properly.
+ **/
+static inline void firmware_map_remove_entry(struct firmware_map_entry *entry)
+{
+	list_del(&entry->list);
+}
+
 /*
  * Add memmap entry on sysfs
  */
@@ -144,6 +200,78 @@ static int add_sysfs_fw_map_entry(struct firmware_map_entry *entry)
 	return 0;
 }
 
+/*
+ * Remove memmap entry on sysfs
+ */
+static inline void remove_sysfs_fw_map_entry(struct firmware_map_entry *entry)
+{
+	kobject_put(&entry->kobj);
+}
+
+/*
+ * firmware_map_find_entry_in_list() - Search memmap entry in a given list.
+ * @start: Start of the memory range.
+ * @end:   End of the memory range (exclusive).
+ * @type:  Type of the memory range.
+ * @list:  In which to find the entry.
+ *
+ * This function is to find the memmap entey of a given memory range in a
+ * given list. The caller must hold map_entries_lock, and must not release
+ * the lock until the processing of the returned entry has completed.
+ *
+ * Return: Pointer to the entry to be found on success, or NULL on failure.
+ */
+static struct firmware_map_entry * __meminit
+firmware_map_find_entry_in_list(u64 start, u64 end, const char *type,
+				struct list_head *list)
+{
+	struct firmware_map_entry *entry;
+
+	list_for_each_entry(entry, list, list)
+		if ((entry->start == start) && (entry->end == end) &&
+		    (!strcmp(entry->type, type))) {
+			return entry;
+		}
+
+	return NULL;
+}
+
+/*
+ * firmware_map_find_entry() - Search memmap entry in map_entries.
+ * @start: Start of the memory range.
+ * @end:   End of the memory range (exclusive).
+ * @type:  Type of the memory range.
+ *
+ * This function is to find the memmap entey of a given memory range.
+ * The caller must hold map_entries_lock, and must not release the lock
+ * until the processing of the returned entry has completed.
+ *
+ * Return: Pointer to the entry to be found on success, or NULL on failure.
+ */
+static struct firmware_map_entry * __meminit
+firmware_map_find_entry(u64 start, u64 end, const char *type)
+{
+	return firmware_map_find_entry_in_list(start, end, type, &map_entries);
+}
+
+/*
+ * firmware_map_find_entry_bootmem() - Search memmap entry in map_entries_bootmem.
+ * @start: Start of the memory range.
+ * @end:   End of the memory range (exclusive).
+ * @type:  Type of the memory range.
+ *
+ * This function is similar to firmware_map_find_entry except that it find the
+ * given entry in map_entries_bootmem.
+ *
+ * Return: Pointer to the entry to be found on success, or NULL on failure.
+ */
+static struct firmware_map_entry * __meminit
+firmware_map_find_entry_bootmem(u64 start, u64 end, const char *type)
+{
+	return firmware_map_find_entry_in_list(start, end, type,
+					       &map_entries_bootmem);
+}
+
 /**
  * firmware_map_add_hotplug() - Adds a firmware mapping entry when we do
  * memory hotplug.
@@ -161,9 +289,19 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
-	if (!entry)
-		return -ENOMEM;
+	entry = firmware_map_find_entry_bootmem(start, end, type);
+	if (!entry) {
+		entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC);
+		if (!entry)
+			return -ENOMEM;
+	} else {
+		/* Reuse storage allocated by bootmem. */
+		spin_lock(&map_entries_bootmem_lock);
+		list_del(&entry->list);
+		spin_unlock(&map_entries_bootmem_lock);
+
+		memset(entry, 0, sizeof(*entry));
+	}
 
 	firmware_map_add_entry(start, end, type, entry);
 	/* create the memmap entry */
@@ -196,6 +334,36 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 	return firmware_map_add_entry(start, end, type, entry);
 }
 
+/**
+ * firmware_map_remove() - remove a firmware mapping entry
+ * @start: Start of the memory range.
+ * @end:   End of the memory range.
+ * @type:  Type of the memory range.
+ *
+ * removes a firmware mapping entry.
+ *
+ * Returns 0 on success, or -EINVAL if no entry.
+ **/
+int __meminit firmware_map_remove(u64 start, u64 end, const char *type)
+{
+	struct firmware_map_entry *entry;
+
+	spin_lock(&map_entries_lock);
+	entry = firmware_map_find_entry(start, end - 1, type);
+	if (!entry) {
+		spin_unlock(&map_entries_lock);
+		return -EINVAL;
+	}
+
+	firmware_map_remove_entry(entry);
+	spin_unlock(&map_entries_lock);
+
+	/* remove the memmap entry */
+	remove_sysfs_fw_map_entry(entry);
+
+	return 0;
+}
+
 /*
  * Sysfs functions -------------------------------------------------------------
  */
@@ -217,8 +385,10 @@ static ssize_t type_show(struct firmware_map_entry *entry, char *buf)
 	return snprintf(buf, PAGE_SIZE, "%s\n", entry->type);
 }
 
-#define to_memmap_attr(_attr) container_of(_attr, struct memmap_attribute, attr)
-#define to_memmap_entry(obj) container_of(obj, struct firmware_map_entry, kobj)
+static inline struct memmap_attribute *to_memmap_attr(struct attribute *attr)
+{
+	return container_of(attr, struct memmap_attribute, attr);
+}
 
 static ssize_t memmap_attr_show(struct kobject *kobj,
 				struct attribute *attr, char *buf)
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index 43fe52fcef0f..71d4fa721db9 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -25,6 +25,7 @@
 
 int firmware_map_add_early(u64 start, u64 end, const char *type);
 int firmware_map_add_hotplug(u64 start, u64 end, const char *type);
+int firmware_map_remove(u64 start, u64 end, const char *type);
 
 #else /* CONFIG_FIRMWARE_MEMMAP */
 
@@ -38,6 +39,11 @@ static inline int firmware_map_add_hotplug(u64 start, u64 end, const char *type)
 	return 0;
 }
 
+static inline int firmware_map_remove(u64 start, u64 end, const char *type)
+{
+	return 0;
+}
+
 #endif /* CONFIG_FIRMWARE_MEMMAP */
 
 #endif /* _LINUX_FIRMWARE_MAP_H */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e5b91b12cec3..a776dbf3fa00 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1460,7 +1460,7 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg)
 	return ret;
 }
 
-int remove_memory(u64 start, u64 size)
+int __ref remove_memory(u64 start, u64 size)
 {
 	unsigned long start_pfn, end_pfn;
 	int ret = 0;
@@ -1510,6 +1510,9 @@ repeat:
 		return ret;
 	}
 
+	/* remove memmap entry */
+	firmware_map_remove(start, start + size, "System RAM");
+
 	unlock_memory_hotplug();
 
 	return 0;
-- 
cgit v1.2.3


From 24d335ca3606b610ec69c66a1e42760c96d89470 Mon Sep 17 00:00:00 2001
From: Wen Congyang <wency@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:32:58 -0800
Subject: memory-hotplug: introduce new arch_remove_memory() for removing page
 table

For removing memory, we need to remove page tables.  But it depends on
architecture.  So the patch introduce arch_remove_memory() for removing
page table.  Now it only calls __remove_pages().

Note: __remove_pages() for some archtecuture is not implemented
      (I don't know how to implement it for s390).

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c            | 18 ++++++++++++++++++
 arch/powerpc/mm/mem.c          | 12 ++++++++++++
 arch/s390/mm/init.c            | 12 ++++++++++++
 arch/sh/mm/init.c              | 17 +++++++++++++++++
 arch/tile/mm/init.c            |  8 ++++++++
 arch/x86/mm/init_32.c          | 12 ++++++++++++
 arch/x86/mm/init_64.c          | 15 +++++++++++++++
 include/linux/memory_hotplug.h |  1 +
 mm/memory_hotplug.c            |  2 ++
 9 files changed, 97 insertions(+)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index b755ea92aea7..20bc967c7209 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -688,6 +688,24 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return ret;
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct zone *zone;
+	int ret;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+	ret = __remove_pages(zone, start_pfn, nr_pages);
+	if (ret)
+		pr_warn("%s: Problem encountered in __remove_pages() as"
+			" ret=%d\n", __func__,  ret);
+
+	return ret;
+}
+#endif
 #endif
 
 /*
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0dba5066c22a..09c64518b4d0 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -133,6 +133,18 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct zone *zone;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+	return __remove_pages(zone, start_pfn, nr_pages);
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 /*
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ae672f41c464..49ce6bb2c641 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -228,4 +228,16 @@ int arch_add_memory(int nid, u64 start, u64 size)
 		vmem_remove_mapping(start, size);
 	return rc;
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	/*
+	 * There is no hardware or firmware interface which could trigger a
+	 * hot memory remove on s390. So there is nothing that needs to be
+	 * implemented.
+	 */
+	return -EBUSY;
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 82cc576fab15..105794037143 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -558,4 +558,21 @@ int memory_add_physaddr_to_nid(u64 addr)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct zone *zone;
+	int ret;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+	ret = __remove_pages(zone, start_pfn, nr_pages);
+	if (unlikely(ret))
+		pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
+			ret);
+
+	return ret;
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index ef29d6c5e10e..2749515a0547 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -935,6 +935,14 @@ int remove_memory(u64 start, u64 size)
 {
 	return -EINVAL;
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	/* TODO */
+	return -EBUSY;
+}
+#endif
 #endif
 
 struct kmem_cache *pgd_cache;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index b299724f6e34..2d19001151d5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -862,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return __add_pages(nid, zone, start_pfn, nr_pages);
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct zone *zone;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+	return __remove_pages(zone, start_pfn, nr_pages);
+}
+#endif
 #endif
 
 /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3eba7f429880..b6dd1c480b30 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -707,6 +707,21 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int __ref arch_remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct zone *zone;
+	int ret;
+
+	zone = page_zone(pfn_to_page(start_pfn));
+	ret = __remove_pages(zone, start_pfn, nr_pages);
+	WARN_ON_ONCE(ret);
+
+	return ret;
+}
+#endif
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 static struct kcore_list kcore_vsyscall;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 8dd0950a6a7a..31a563bbd936 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -96,6 +96,7 @@ extern void __online_page_free(struct page *page);
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 extern bool is_pageblock_removable_nolock(struct page *page);
+extern int arch_remove_memory(u64 start, u64 size);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 /* reasonably generic interface to expand the physical pages in a zone  */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a776dbf3fa00..942b43f6d736 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1513,6 +1513,8 @@ repeat:
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size, "System RAM");
 
+	arch_remove_memory(start, size);
+
 	unlock_memory_hotplug();
 
 	return 0;
-- 
cgit v1.2.3


From 46723bfa540f0a1e494476a1734d03626a0bd1e0 Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:00 -0800
Subject: memory-hotplug: implement register_page_bootmem_info_section of
 sparse-vmemmap

For removing memmap region of sparse-vmemmap which is allocated bootmem,
memmap region of sparse-vmemmap needs to be registered by
get_page_bootmem().  So the patch searches pages of virtual mapping and
registers the pages by get_page_bootmem().

NOTE: register_page_bootmem_memmap() is not implemented for ia64,
      ppc, s390, and sparc.  So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE
      and revert register_page_bootmem_info_node() when platform doesn't
      support it.

      It's implemented by adding a new Kconfig option named
      CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected
      by memory-hotplug feature fully supported archs(currently only on
      x86_64).

      Since we have 2 config options called MEMORY_HOTPLUG and
      MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately,
      and codes in function register_page_bootmem_info_node() are only
      used for collecting infomation for hot-remove, so reside it under
      MEMORY_HOTREMOVE.

      Besides page_isolation.c selected by MEMORY_ISOLATION under
      MEMORY_HOTPLUG is also such case, move it too.

[mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE]
[linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()]
[mhocko@suse.cz: remove the arch specific functions without any implementation]
[linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed]
[rientjes@google.com: fix defined but not used warning]
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Wu Jianguo <wujianguo@huawei.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/discontig.c       |  1 +
 arch/powerpc/mm/init_64.c      |  1 +
 arch/sparc/mm/init_64.c        |  1 +
 arch/x86/mm/init_64.c          | 60 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/memory_hotplug.h | 13 +++++----
 include/linux/mm.h             |  3 ++-
 mm/Kconfig                     | 10 ++++++-
 mm/memory_hotplug.c            | 35 +++++++++++++++++++++---
 8 files changed, 111 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c641333cd997..731bf84094b6 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -822,4 +822,5 @@ int __meminit vmemmap_populate(struct page *start_page,
 {
 	return vmemmap_populate_basepages(start_page, size, node);
 }
+
 #endif
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 95a45293e5ac..42bf082f0124 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -297,5 +297,6 @@ int __meminit vmemmap_populate(struct page *start_page,
 
 	return 0;
 }
+
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 5c2c6e61facb..59c6fcfdc782 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2235,6 +2235,7 @@ void __meminit vmemmap_populate_print_last(void)
 		node_start = 0;
 	}
 }
+
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 static void prot_init_common(unsigned long page_none,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b6dd1c480b30..f17aa76dc1ae 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1034,6 +1034,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 	return 0;
 }
 
+#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
+void register_page_bootmem_memmap(unsigned long section_nr,
+				  struct page *start_page, unsigned long size)
+{
+	unsigned long addr = (unsigned long)start_page;
+	unsigned long end = (unsigned long)(start_page + size);
+	unsigned long next;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned int nr_pages;
+	struct page *page;
+
+	for (; addr < end; addr = next) {
+		pte_t *pte = NULL;
+
+		pgd = pgd_offset_k(addr);
+		if (pgd_none(*pgd)) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			continue;
+		}
+		get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
+
+		pud = pud_offset(pgd, addr);
+		if (pud_none(*pud)) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			continue;
+		}
+		get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
+
+		if (!cpu_has_pse) {
+			next = (addr + PAGE_SIZE) & PAGE_MASK;
+			pmd = pmd_offset(pud, addr);
+			if (pmd_none(*pmd))
+				continue;
+			get_page_bootmem(section_nr, pmd_page(*pmd),
+					 MIX_SECTION_INFO);
+
+			pte = pte_offset_kernel(pmd, addr);
+			if (pte_none(*pte))
+				continue;
+			get_page_bootmem(section_nr, pte_page(*pte),
+					 SECTION_INFO);
+		} else {
+			next = pmd_addr_end(addr, end);
+
+			pmd = pmd_offset(pud, addr);
+			if (pmd_none(*pmd))
+				continue;
+
+			nr_pages = 1 << (get_order(PMD_SIZE));
+			page = pmd_page(*pmd);
+			while (nr_pages--)
+				get_page_bootmem(section_nr, page++,
+						 SECTION_INFO);
+		}
+	}
+}
+#endif
+
 void __meminit vmemmap_populate_print_last(void)
 {
 	if (p_start) {
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 31a563bbd936..4d523fe75ba1 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -174,17 +174,16 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
 #endif /* CONFIG_NUMA */
 #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
+extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
+#else
 static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
 }
-static inline void put_page_bootmem(struct page *page)
-{
-}
-#else
-extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
-extern void put_page_bootmem(struct page *page);
 #endif
+extern void put_page_bootmem(struct page *page);
+extern void get_page_bootmem(unsigned long ingo, struct page *page,
+			     unsigned long type);
 
 /*
  * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 95db68e34b18..060557b9764f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1718,7 +1718,8 @@ int vmemmap_populate_basepages(struct page *start_page,
 						unsigned long pages, int node);
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
-
+void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
+				  unsigned long size);
 
 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
diff --git a/mm/Kconfig b/mm/Kconfig
index 0b23db9a8791..2c7aea7106f9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -162,10 +162,16 @@ config MOVABLE_NODE
 	  Say Y here if you want to hotplug a whole node.
 	  Say N here if you want kernel to use memory on all nodes evenly.
 
+#
+# Only be set on architectures that have completely implemented memory hotplug
+# feature. If you are not sure, don't touch it.
+#
+config HAVE_BOOTMEM_INFO_NODE
+	def_bool n
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
-	select MEMORY_ISOLATION
 	depends on SPARSEMEM || X86_64_ACPI_NUMA
 	depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
 	depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
@@ -176,6 +182,8 @@ config MEMORY_HOTPLUG_SPARSE
 
 config MEMORY_HOTREMOVE
 	bool "Allow for memory hot remove"
+	select MEMORY_ISOLATION
+	select HAVE_BOOTMEM_INFO_NODE if X86_64
 	depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
 	depends on MIGRATION
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 942b43f6d736..6c90d222ec0a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -91,9 +91,8 @@ static void release_memory_resource(struct resource *res)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
-static void get_page_bootmem(unsigned long info,  struct page *page,
-			     unsigned long type)
+void get_page_bootmem(unsigned long info,  struct page *page,
+		      unsigned long type)
 {
 	page->lru.next = (struct list_head *) type;
 	SetPagePrivate(page);
@@ -128,6 +127,8 @@ void __ref put_page_bootmem(struct page *page)
 
 }
 
+#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
 static void register_page_bootmem_info_section(unsigned long start_pfn)
 {
 	unsigned long *usemap, mapsize, section_nr, i;
@@ -161,6 +162,32 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
 		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
 
 }
+#else /* CONFIG_SPARSEMEM_VMEMMAP */
+static void register_page_bootmem_info_section(unsigned long start_pfn)
+{
+	unsigned long *usemap, mapsize, section_nr, i;
+	struct mem_section *ms;
+	struct page *page, *memmap;
+
+	if (!pfn_valid(start_pfn))
+		return;
+
+	section_nr = pfn_to_section_nr(start_pfn);
+	ms = __nr_to_section(section_nr);
+
+	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+
+	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);
+
+	usemap = __nr_to_section(section_nr)->pageblock_flags;
+	page = virt_to_page(usemap);
+
+	mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
+
+	for (i = 0; i < mapsize; i++, page++)
+		get_page_bootmem(section_nr, page, MIX_SECTION_INFO);
+}
+#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 void register_page_bootmem_info_node(struct pglist_data *pgdat)
 {
@@ -203,7 +230,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
 			register_page_bootmem_info_section(pfn);
 	}
 }
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
+#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
 
 static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
 			   unsigned long end_pfn)
-- 
cgit v1.2.3


From ae9aae9eda2db71bf4b592f15618b0160eb07731 Mon Sep 17 00:00:00 2001
From: Wen Congyang <wency@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:04 -0800
Subject: memory-hotplug: common APIs to support page tables hot-remove

When memory is removed, the corresponding pagetables should alse be
removed.  This patch introduces some common APIs to support vmemmap
pagetable and x86_64 architecture direct mapping pagetable removing.

All pages of virtual mapping in removed memory cannot be freed if some
pages used as PGD/PUD include not only removed memory but also other
memory.  So this patch uses the following way to check whether a page
can be freed or not.

1) When removing memory, the page structs of the removed memory are
   filled with 0FD.

2) All page structs are filled with 0xFD on PT/PMD, PT/PMD can be
   cleared.  In this case, the page used as PT/PMD can be freed.

For direct mapping pages, update direct_pages_count[level] when we freed
their pagetables.  And do not free the pages again because they were
freed when offlining.

For vmemmap pages, free the pages and their pagetables.

For larger pages, do not split them into smaller ones because there is
no way to know if the larger page has been split.  As a result, there is
no way to decide when to split.  We deal the larger pages in the
following way:

1) For direct mapped pages, all the pages were freed when they were
   offlined.  And since menmory offline is done section by section, all
   the memory ranges being removed are aligned to PAGE_SIZE.  So only need
   to deal with unaligned pages when freeing vmemmap pages.

2) For vmemmap pages being used to store page_struct, if part of the
   larger page is still in use, just fill the unused part with 0xFD.  And
   when the whole page is fulfilled with 0xFD, then free the larger page.

[akpm@linux-foundation.org: fix typo in comment]
[tangchen@cn.fujitsu.com: do not calculate direct mapping pages when freeing vmemmap pagetables]
[tangchen@cn.fujitsu.com: do not free direct mapping pages twice]
[tangchen@cn.fujitsu.com: do not free page split from hugepage one by one]
[tangchen@cn.fujitsu.com: do not split pages when freeing pagetable pages]
[akpm@linux-foundation.org: use pmd_page_vaddr()]
[akpm@linux-foundation.org: fix used-uninitialised bug]
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgtable_types.h |   1 +
 arch/x86/mm/init_64.c                | 304 +++++++++++++++++++++++++++++++++++
 arch/x86/mm/pageattr.c               |  47 +++---
 include/linux/bootmem.h              |   1 +
 4 files changed, 331 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index e6423002c10b..567b5d0632b2 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -351,6 +351,7 @@ static inline void update_page_count(int level, unsigned long pages) { }
  * as a pte too.
  */
 extern pte_t *lookup_address(unsigned long address, unsigned int *level);
+extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase);
 extern phys_addr_t slow_virt_to_phys(void *__address);
 
 #endif	/* !__ASSEMBLY__ */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f17aa76dc1ae..ca6cd403a275 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -707,6 +707,310 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
+#define PAGE_INUSE 0xFD
+
+static void __meminit free_pagetable(struct page *page, int order)
+{
+	struct zone *zone;
+	bool bootmem = false;
+	unsigned long magic;
+	unsigned int nr_pages = 1 << order;
+
+	/* bootmem page has reserved flag */
+	if (PageReserved(page)) {
+		__ClearPageReserved(page);
+		bootmem = true;
+
+		magic = (unsigned long)page->lru.next;
+		if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+			while (nr_pages--)
+				put_page_bootmem(page++);
+		} else
+			__free_pages_bootmem(page, order);
+	} else
+		free_pages((unsigned long)page_address(page), order);
+
+	/*
+	 * SECTION_INFO pages and MIX_SECTION_INFO pages
+	 * are all allocated by bootmem.
+	 */
+	if (bootmem) {
+		zone = page_zone(page);
+		zone_span_writelock(zone);
+		zone->present_pages += nr_pages;
+		zone_span_writeunlock(zone);
+		totalram_pages += nr_pages;
+	}
+}
+
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
+	pte_t *pte;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte = pte_start + i;
+		if (pte_val(*pte))
+			return;
+	}
+
+	/* free a pte talbe */
+	free_pagetable(pmd_page(*pmd), 0);
+	spin_lock(&init_mm.page_table_lock);
+	pmd_clear(pmd);
+	spin_unlock(&init_mm.page_table_lock);
+}
+
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+{
+	pmd_t *pmd;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd = pmd_start + i;
+		if (pmd_val(*pmd))
+			return;
+	}
+
+	/* free a pmd talbe */
+	free_pagetable(pud_page(*pud), 0);
+	spin_lock(&init_mm.page_table_lock);
+	pud_clear(pud);
+	spin_unlock(&init_mm.page_table_lock);
+}
+
+/* Return true if pgd is changed, otherwise return false. */
+static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
+{
+	pud_t *pud;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		pud = pud_start + i;
+		if (pud_val(*pud))
+			return false;
+	}
+
+	/* free a pud table */
+	free_pagetable(pgd_page(*pgd), 0);
+	spin_lock(&init_mm.page_table_lock);
+	pgd_clear(pgd);
+	spin_unlock(&init_mm.page_table_lock);
+
+	return true;
+}
+
+static void __meminit
+remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
+		 bool direct)
+{
+	unsigned long next, pages = 0;
+	pte_t *pte;
+	void *page_addr;
+	phys_addr_t phys_addr;
+
+	pte = pte_start + pte_index(addr);
+	for (; addr < end; addr = next, pte++) {
+		next = (addr + PAGE_SIZE) & PAGE_MASK;
+		if (next > end)
+			next = end;
+
+		if (!pte_present(*pte))
+			continue;
+
+		/*
+		 * We mapped [0,1G) memory as identity mapping when
+		 * initializing, in arch/x86/kernel/head_64.S. These
+		 * pagetables cannot be removed.
+		 */
+		phys_addr = pte_val(*pte) + (addr & PAGE_MASK);
+		if (phys_addr < (phys_addr_t)0x40000000)
+			return;
+
+		if (IS_ALIGNED(addr, PAGE_SIZE) &&
+		    IS_ALIGNED(next, PAGE_SIZE)) {
+			/*
+			 * Do not free direct mapping pages since they were
+			 * freed when offlining, or simplely not in use.
+			 */
+			if (!direct)
+				free_pagetable(pte_page(*pte), 0);
+
+			spin_lock(&init_mm.page_table_lock);
+			pte_clear(&init_mm, addr, pte);
+			spin_unlock(&init_mm.page_table_lock);
+
+			/* For non-direct mapping, pages means nothing. */
+			pages++;
+		} else {
+			/*
+			 * If we are here, we are freeing vmemmap pages since
+			 * direct mapped memory ranges to be freed are aligned.
+			 *
+			 * If we are not removing the whole page, it means
+			 * other page structs in this page are being used and
+			 * we canot remove them. So fill the unused page_structs
+			 * with 0xFD, and remove the page when it is wholly
+			 * filled with 0xFD.
+			 */
+			memset((void *)addr, PAGE_INUSE, next - addr);
+
+			page_addr = page_address(pte_page(*pte));
+			if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
+				free_pagetable(pte_page(*pte), 0);
+
+				spin_lock(&init_mm.page_table_lock);
+				pte_clear(&init_mm, addr, pte);
+				spin_unlock(&init_mm.page_table_lock);
+			}
+		}
+	}
+
+	/* Call free_pte_table() in remove_pmd_table(). */
+	flush_tlb_all();
+	if (direct)
+		update_page_count(PG_LEVEL_4K, -pages);
+}
+
+static void __meminit
+remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
+		 bool direct)
+{
+	unsigned long next, pages = 0;
+	pte_t *pte_base;
+	pmd_t *pmd;
+	void *page_addr;
+
+	pmd = pmd_start + pmd_index(addr);
+	for (; addr < end; addr = next, pmd++) {
+		next = pmd_addr_end(addr, end);
+
+		if (!pmd_present(*pmd))
+			continue;
+
+		if (pmd_large(*pmd)) {
+			if (IS_ALIGNED(addr, PMD_SIZE) &&
+			    IS_ALIGNED(next, PMD_SIZE)) {
+				if (!direct)
+					free_pagetable(pmd_page(*pmd),
+						       get_order(PMD_SIZE));
+
+				spin_lock(&init_mm.page_table_lock);
+				pmd_clear(pmd);
+				spin_unlock(&init_mm.page_table_lock);
+				pages++;
+			} else {
+				/* If here, we are freeing vmemmap pages. */
+				memset((void *)addr, PAGE_INUSE, next - addr);
+
+				page_addr = page_address(pmd_page(*pmd));
+				if (!memchr_inv(page_addr, PAGE_INUSE,
+						PMD_SIZE)) {
+					free_pagetable(pmd_page(*pmd),
+						       get_order(PMD_SIZE));
+
+					spin_lock(&init_mm.page_table_lock);
+					pmd_clear(pmd);
+					spin_unlock(&init_mm.page_table_lock);
+				}
+			}
+
+			continue;
+		}
+
+		pte_base = (pte_t *)pmd_page_vaddr(*pmd);
+		remove_pte_table(pte_base, addr, next, direct);
+		free_pte_table(pte_base, pmd);
+	}
+
+	/* Call free_pmd_table() in remove_pud_table(). */
+	if (direct)
+		update_page_count(PG_LEVEL_2M, -pages);
+}
+
+static void __meminit
+remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
+		 bool direct)
+{
+	unsigned long next, pages = 0;
+	pmd_t *pmd_base;
+	pud_t *pud;
+	void *page_addr;
+
+	pud = pud_start + pud_index(addr);
+	for (; addr < end; addr = next, pud++) {
+		next = pud_addr_end(addr, end);
+
+		if (!pud_present(*pud))
+			continue;
+
+		if (pud_large(*pud)) {
+			if (IS_ALIGNED(addr, PUD_SIZE) &&
+			    IS_ALIGNED(next, PUD_SIZE)) {
+				if (!direct)
+					free_pagetable(pud_page(*pud),
+						       get_order(PUD_SIZE));
+
+				spin_lock(&init_mm.page_table_lock);
+				pud_clear(pud);
+				spin_unlock(&init_mm.page_table_lock);
+				pages++;
+			} else {
+				/* If here, we are freeing vmemmap pages. */
+				memset((void *)addr, PAGE_INUSE, next - addr);
+
+				page_addr = page_address(pud_page(*pud));
+				if (!memchr_inv(page_addr, PAGE_INUSE,
+						PUD_SIZE)) {
+					free_pagetable(pud_page(*pud),
+						       get_order(PUD_SIZE));
+
+					spin_lock(&init_mm.page_table_lock);
+					pud_clear(pud);
+					spin_unlock(&init_mm.page_table_lock);
+				}
+			}
+
+			continue;
+		}
+
+		pmd_base = (pmd_t *)pud_page_vaddr(*pud);
+		remove_pmd_table(pmd_base, addr, next, direct);
+		free_pmd_table(pmd_base, pud);
+	}
+
+	if (direct)
+		update_page_count(PG_LEVEL_1G, -pages);
+}
+
+/* start and end are both virtual address. */
+static void __meminit
+remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+	unsigned long next;
+	pgd_t *pgd;
+	pud_t *pud;
+	bool pgd_changed = false;
+
+	for (; start < end; start = next) {
+		next = pgd_addr_end(start, end);
+
+		pgd = pgd_offset_k(start);
+		if (!pgd_present(*pgd))
+			continue;
+
+		pud = (pud_t *)pgd_page_vaddr(*pgd);
+		remove_pud_table(pud, start, next, direct);
+		if (free_pud_table(pud, pgd))
+			pgd_changed = true;
+	}
+
+	if (pgd_changed)
+		sync_global_pgds(start, end - 1);
+
+	flush_tlb_all();
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 int __ref arch_remove_memory(u64 start, u64 size)
 {
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a1b1c88f9caf..ca1f1c2bb7be 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -529,21 +529,13 @@ out_unlock:
 	return do_split;
 }
 
-static int split_large_page(pte_t *kpte, unsigned long address)
+int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
 {
 	unsigned long pfn, pfninc = 1;
 	unsigned int i, level;
-	pte_t *pbase, *tmp;
+	pte_t *tmp;
 	pgprot_t ref_prot;
-	struct page *base;
-
-	if (!debug_pagealloc)
-		spin_unlock(&cpa_lock);
-	base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
-	if (!debug_pagealloc)
-		spin_lock(&cpa_lock);
-	if (!base)
-		return -ENOMEM;
+	struct page *base = virt_to_page(pbase);
 
 	spin_lock(&pgd_lock);
 	/*
@@ -551,10 +543,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	 * up for us already:
 	 */
 	tmp = lookup_address(address, &level);
-	if (tmp != kpte)
-		goto out_unlock;
+	if (tmp != kpte) {
+		spin_unlock(&pgd_lock);
+		return 1;
+	}
 
-	pbase = (pte_t *)page_address(base);
 	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
 	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 	/*
@@ -601,17 +594,27 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	 * going on.
 	 */
 	__flush_tlb_all();
+	spin_unlock(&pgd_lock);
 
-	base = NULL;
+	return 0;
+}
 
-out_unlock:
-	/*
-	 * If we dropped out via the lookup_address check under
-	 * pgd_lock then stick the page back into the pool:
-	 */
-	if (base)
+static int split_large_page(pte_t *kpte, unsigned long address)
+{
+	pte_t *pbase;
+	struct page *base;
+
+	if (!debug_pagealloc)
+		spin_unlock(&cpa_lock);
+	base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
+	if (!debug_pagealloc)
+		spin_lock(&cpa_lock);
+	if (!base)
+		return -ENOMEM;
+
+	pbase = (pte_t *)page_address(base);
+	if (__split_large_page(kpte, address, pbase))
 		__free_page(base);
-	spin_unlock(&pgd_lock);
 
 	return 0;
 }
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 3cd16ba82f15..cdc3bab01832 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 			      unsigned long size);
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
+extern void __free_pages_bootmem(struct page *page, unsigned int order);
 
 /*
  * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
-- 
cgit v1.2.3


From 0197518cd3672029618a16a57597946a094ac7a8 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:08 -0800
Subject: memory-hotplug: remove memmap of sparse-vmemmap

Introduce a new API vmemmap_free() to free and remove vmemmap
pagetables.  Since pagetable implements are different, each architecture
has to provide its own version of vmemmap_free(), just like
vmemmap_populate().

Note: vmemmap_free() is not implemented for ia64, ppc, s390, and sparc.

[mhocko@suse.cz: fix implicit declaration of remove_pagetable]
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/mmu.c       | 3 +++
 arch/ia64/mm/discontig.c  | 3 +++
 arch/powerpc/mm/init_64.c | 4 ++++
 arch/s390/mm/vmem.c       | 4 ++++
 arch/sparc/mm/init_64.c   | 4 ++++
 arch/x86/mm/init_64.c     | 8 ++++++++
 include/linux/mm.h        | 3 +++
 mm/sparse.c               | 3 ++-
 8 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f4dd585898c5..224b44ab534e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -434,4 +434,7 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return 0;
 }
 #endif	/* CONFIG_ARM64_64K_PAGES */
+void vmemmap_free(struct page *memmap, unsigned long nr_pages)
+{
+}
 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 731bf84094b6..652fee097921 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -823,4 +823,7 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return vmemmap_populate_basepages(start_page, size, node);
 }
 
+void vmemmap_free(struct page *memmap, unsigned long nr_pages)
+{
+}
 #endif
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 42bf082f0124..7e2246fb2f31 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -298,5 +298,9 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return 0;
 }
 
+void vmemmap_free(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 79699f46a443..e21aaf4f5cb6 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -268,6 +268,10 @@ out:
 	return ret;
 }
 
+void vmemmap_free(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 /*
  * Add memory segment to the segment list if it doesn't overlap with
  * an already present segment.
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 59c6fcfdc782..1588d33d5492 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2236,6 +2236,10 @@ void __meminit vmemmap_populate_print_last(void)
 	}
 }
 
+void vmemmap_free(struct page *memmap, unsigned long nr_pages)
+{
+}
+
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 static void prot_init_common(unsigned long page_none,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4e58b83e6b2e..474e28f10815 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1011,6 +1011,14 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 	flush_tlb_all();
 }
 
+void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages)
+{
+	unsigned long start = (unsigned long)memmap;
+	unsigned long end = (unsigned long)(memmap + nr_pages);
+
+	remove_pagetable(start, end, false);
+}
+
 static void __meminit
 kernel_physical_mapping_remove(unsigned long start, unsigned long end)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 060557b9764f..9d659491c0ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1718,6 +1718,9 @@ int vmemmap_populate_basepages(struct page *start_page,
 						unsigned long pages, int node);
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);
+#endif
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
 				  unsigned long size);
 
diff --git a/mm/sparse.c b/mm/sparse.c
index 66f0fd9d7964..46f6ea47d9ab 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -615,10 +615,11 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 }
 static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
 {
-	return; /* XXX: Not implemented yet */
+	vmemmap_free(memmap, nr_pages);
 }
 static void free_map_bootmem(struct page *memmap, unsigned long nr_pages)
 {
+	vmemmap_free(memmap, nr_pages);
 }
 #else
 static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
-- 
cgit v1.2.3


From 60a5a19e7419ba0bc22ed01b3285e8940b42944c Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:14 -0800
Subject: memory-hotplug: remove sysfs file of node

Introduce a new function try_offline_node() to remove sysfs file of node
when all memory sections of this node are removed.  If some memory
sections of this node are not removed, this function does nothing.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/acpi/acpi_memhotplug.c |  8 ++++--
 include/linux/memory_hotplug.h |  2 +-
 mm/memory_hotplug.c            | 58 ++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 63 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 034d3e72aa92..da1f82b445e0 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -280,9 +280,11 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 
 static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device)
 {
-	int result = 0;
+	int result = 0, nid;
 	struct acpi_memory_info *info, *n;
 
+	nid = acpi_get_node(mem_device->device->handle);
+
 	list_for_each_entry_safe(info, n, &mem_device->res_list, list) {
 		if (info->failed)
 			/* The kernel does not use this memory block */
@@ -295,7 +297,9 @@ static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device)
 			 */
 			return -EBUSY;
 
-		result = remove_memory(info->start_addr, info->length);
+		if (nid < 0)
+			nid = memory_add_physaddr_to_nid(info->start_addr);
+		result = remove_memory(nid, info->start_addr, info->length);
 		if (result)
 			return result;
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4d523fe75ba1..69903ccf549e 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -248,7 +248,7 @@ extern int arch_add_memory(int nid, u64 start, u64 size);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern int offline_memory_block(struct memory_block *mem);
 extern bool is_memblock_offlined(struct memory_block *mem);
-extern int remove_memory(u64 start, u64 size);
+extern int remove_memory(int nid, u64 start, u64 size);
 extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 								int nr_pages);
 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3f792375f326..aea6374f435a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -29,6 +29,7 @@
 #include <linux/suspend.h>
 #include <linux/mm_inline.h>
 #include <linux/firmware-map.h>
+#include <linux/stop_machine.h>
 
 #include <asm/tlbflush.h>
 
@@ -1679,7 +1680,58 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg)
 	return ret;
 }
 
-int __ref remove_memory(u64 start, u64 size)
+static int check_cpu_on_node(void *data)
+{
+	struct pglist_data *pgdat = data;
+	int cpu;
+
+	for_each_present_cpu(cpu) {
+		if (cpu_to_node(cpu) == pgdat->node_id)
+			/*
+			 * the cpu on this node isn't removed, and we can't
+			 * offline this node.
+			 */
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+/* offline the node if all memory sections of this node are removed */
+static void try_offline_node(int nid)
+{
+	unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
+	unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+		unsigned long section_nr = pfn_to_section_nr(pfn);
+
+		if (!present_section_nr(section_nr))
+			continue;
+
+		if (pfn_to_nid(pfn) != nid)
+			continue;
+
+		/*
+		 * some memory sections of this node are not removed, and we
+		 * can't offline node now.
+		 */
+		return;
+	}
+
+	if (stop_machine(check_cpu_on_node, NODE_DATA(nid), NULL))
+		return;
+
+	/*
+	 * all memory/cpu of this node are removed, we can offline this
+	 * node now.
+	 */
+	node_set_offline(nid);
+	unregister_one_node(nid);
+}
+
+int __ref remove_memory(int nid, u64 start, u64 size)
 {
 	unsigned long start_pfn, end_pfn;
 	int ret = 0;
@@ -1734,6 +1786,8 @@ repeat:
 
 	arch_remove_memory(start, size);
 
+	try_offline_node(nid);
+
 	unlock_memory_hotplug();
 
 	return 0;
@@ -1743,7 +1797,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 {
 	return -EINVAL;
 }
-int remove_memory(u64 start, u64 size)
+int remove_memory(int nid, u64 start, u64 size)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 90b30cdc1d87450e2ae89c8f8a29102dc2c1992e Mon Sep 17 00:00:00 2001
From: Wen Congyang <wency@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:27 -0800
Subject: memory-hotplug: export the function try_offline_node()

try_offline_node() will be needed in the tristate
drivers/acpi/processor_driver.c.

The node will be offlined when all memory/cpu on the node have been
hotremoved.  So we need the function try_offline_node() in cpu-hotplug
path.

If the memory-hotplug is disabled, and cpu-hotplug is enabled

1. no memory no the node
   we don't online the node, and cpu's node is the nearest node.

2. the node contains some memory
   the node has been onlined, and cpu's node is still needed
   to migrate the sleep task on the cpu to the same node.

So we do nothing in try_offline_node() in this case.

[rientjes@google.com: export the function try_offline_node() fix]
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Len Brown <lenb@kernel.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 3 +++
 mm/memory_hotplug.c            | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 69903ccf549e..b6a3be7d47bf 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -233,6 +233,7 @@ static inline void unlock_memory_hotplug(void) {}
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
 extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
+extern void try_offline_node(int nid);
 
 #else
 static inline int is_mem_section_removable(unsigned long pfn,
@@ -240,6 +241,8 @@ static inline int is_mem_section_removable(unsigned long pfn,
 {
 	return 0;
 }
+
+static inline void try_offline_node(int nid) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 extern int mem_online_node(int nid);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e189b1f4a9db..17e1447077ab 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1706,7 +1706,7 @@ static int check_cpu_on_node(void *data)
 }
 
 /* offline the node if all memory sections of this node are removed */
-static void try_offline_node(int nid)
+void try_offline_node(int nid)
 {
 	pg_data_t *pgdat = NODE_DATA(nid);
 	unsigned long start_pfn = pgdat->node_start_pfn;
@@ -1762,6 +1762,7 @@ static void try_offline_node(int nid)
 	 */
 	memset(pgdat, 0, sizeof(*pgdat));
 }
+EXPORT_SYMBOL(try_offline_node);
 
 int __ref remove_memory(int nid, u64 start, u64 size)
 {
-- 
cgit v1.2.3


From 34b71f1e04fcba578e719e675b4882eeeb2a1f6f Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:37 -0800
Subject: page_alloc: add movable_memmap kernel parameter

Add functions to parse movablemem_map boot option.  Since the option
could be specified more then once, all the maps will be stored in the
global variable movablemem_map.map array.

And also, we keep the array in monotonic increasing order by start_pfn.
And merge all overlapped ranges.

[akpm@linux-foundation.org: improve comment]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: remove unneeded parens]
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Reviewed-by: Wen Congyang <wency@cn.fujitsu.com>
Tested-by: Lin Feng <linfeng@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  17 +++++
 include/linux/mm.h                  |  11 +++
 mm/page_alloc.c                     | 131 ++++++++++++++++++++++++++++++++++++
 3 files changed, 159 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9aa8ff3e54dc..722a74161246 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1640,6 +1640,23 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			that the amount of memory usable for all allocations
 			is not too small.
 
+	movablemem_map=nn[KMG]@ss[KMG]
+			[KNL,X86,IA-64,PPC] This parameter is similar to
+			memmap except it specifies the memory map of
+			ZONE_MOVABLE.
+			If more areas are all within one node, then from
+			lowest ss to the end of the node will be ZONE_MOVABLE.
+			If an area covers two or more nodes, the area from
+			ss to the end of the 1st node will be ZONE_MOVABLE,
+			and all the rest nodes will only have ZONE_MOVABLE.
+			If memmap is specified at the same time, the
+			movablemem_map will be limited within the memmap
+			areas. If kernelcore or movablecore is also specified,
+			movablemem_map will have higher priority to be
+			satisfied. So the administrator should be careful that
+			the amount of movablemem_map areas are not too large.
+			Otherwise kernel won't have enough memory to start.
+
 	MTD_Partition=	[MTD]
 			Format: <name>,<region-number>,<size>,<offset>
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d659491c0ae..ce9bd3049836 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1359,6 +1359,17 @@ extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 extern void sparse_memory_present_with_active_regions(int nid);
 
+#define MOVABLEMEM_MAP_MAX MAX_NUMNODES
+struct movablemem_entry {
+	unsigned long start_pfn;    /* start pfn of memory segment */
+	unsigned long end_pfn;      /* end pfn of memory segment (exclusive) */
+};
+
+struct movablemem_map {
+	int nr_map;
+	struct movablemem_entry map[MOVABLEMEM_MAP_MAX];
+};
+
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 703944809666..aa1cc5fe9904 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -202,6 +202,9 @@ static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+/* Movable memory ranges, will also be used by memblock subsystem. */
+struct movablemem_map movablemem_map;
+
 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __initdata required_kernelcore;
@@ -5078,6 +5081,134 @@ static int __init cmdline_parse_movablecore(char *p)
 early_param("kernelcore", cmdline_parse_kernelcore);
 early_param("movablecore", cmdline_parse_movablecore);
 
+/**
+ * insert_movablemem_map - Insert a memory range in to movablemem_map.map.
+ * @start_pfn:	start pfn of the range
+ * @end_pfn:	end pfn of the range
+ *
+ * This function will also merge the overlapped ranges, and sort the array
+ * by start_pfn in monotonic increasing order.
+ */
+static void __init insert_movablemem_map(unsigned long start_pfn,
+					  unsigned long end_pfn)
+{
+	int pos, overlap;
+
+	/*
+	 * pos will be at the 1st overlapped range, or the position
+	 * where the element should be inserted.
+	 */
+	for (pos = 0; pos < movablemem_map.nr_map; pos++)
+		if (start_pfn <= movablemem_map.map[pos].end_pfn)
+			break;
+
+	/* If there is no overlapped range, just insert the element. */
+	if (pos == movablemem_map.nr_map ||
+	    end_pfn < movablemem_map.map[pos].start_pfn) {
+		/*
+		 * If pos is not the end of array, we need to move all
+		 * the rest elements backward.
+		 */
+		if (pos < movablemem_map.nr_map)
+			memmove(&movablemem_map.map[pos+1],
+				&movablemem_map.map[pos],
+				sizeof(struct movablemem_entry) *
+				(movablemem_map.nr_map - pos));
+		movablemem_map.map[pos].start_pfn = start_pfn;
+		movablemem_map.map[pos].end_pfn = end_pfn;
+		movablemem_map.nr_map++;
+		return;
+	}
+
+	/* overlap will be at the last overlapped range */
+	for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++)
+		if (end_pfn < movablemem_map.map[overlap].start_pfn)
+			break;
+
+	/*
+	 * If there are more ranges overlapped, we need to merge them,
+	 * and move the rest elements forward.
+	 */
+	overlap--;
+	movablemem_map.map[pos].start_pfn = min(start_pfn,
+					movablemem_map.map[pos].start_pfn);
+	movablemem_map.map[pos].end_pfn = max(end_pfn,
+					movablemem_map.map[overlap].end_pfn);
+
+	if (pos != overlap && overlap + 1 != movablemem_map.nr_map)
+		memmove(&movablemem_map.map[pos+1],
+			&movablemem_map.map[overlap+1],
+			sizeof(struct movablemem_entry) *
+			(movablemem_map.nr_map - overlap - 1));
+
+	movablemem_map.nr_map -= overlap - pos;
+}
+
+/**
+ * movablemem_map_add_region - Add a memory range into movablemem_map.
+ * @start:	physical start address of range
+ * @end:	physical end address of range
+ *
+ * This function transform the physical address into pfn, and then add the
+ * range into movablemem_map by calling insert_movablemem_map().
+ */
+static void __init movablemem_map_add_region(u64 start, u64 size)
+{
+	unsigned long start_pfn, end_pfn;
+
+	/* In case size == 0 or start + size overflows */
+	if (start + size <= start)
+		return;
+
+	if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) {
+		pr_err("movablemem_map: too many entries;"
+			" ignoring [mem %#010llx-%#010llx]\n",
+			(unsigned long long) start,
+			(unsigned long long) (start + size - 1));
+		return;
+	}
+
+	start_pfn = PFN_DOWN(start);
+	end_pfn = PFN_UP(start + size);
+	insert_movablemem_map(start_pfn, end_pfn);
+}
+
+/*
+ * cmdline_parse_movablemem_map - Parse boot option movablemem_map.
+ * @p:	The boot option of the following format:
+ *	movablemem_map=nn[KMG]@ss[KMG]
+ *
+ * This option sets the memory range [ss, ss+nn) to be used as movable memory.
+ *
+ * Return: 0 on success or -EINVAL on failure.
+ */
+static int __init cmdline_parse_movablemem_map(char *p)
+{
+	char *oldp;
+	u64 start_at, mem_size;
+
+	if (!p)
+		goto err;
+
+	oldp = p;
+	mem_size = memparse(p, &p);
+	if (p == oldp)
+		goto err;
+
+	if (*p == '@') {
+		oldp = ++p;
+		start_at = memparse(p, &p);
+		if (p == oldp || *p != '\0')
+			goto err;
+
+		movablemem_map_add_region(start_at, mem_size);
+		return 0;
+	}
+err:
+	return -EINVAL;
+}
+early_param("movablemem_map", cmdline_parse_movablemem_map);
+
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 /**
-- 
cgit v1.2.3


From fb06bc8e5f42f38c011de0e59481f464a82380f6 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:42 -0800
Subject: page_alloc: bootmem limit with movablecore_map

Ensure the bootmem will not allocate memory from areas that may be
ZONE_MOVABLE.  The map info is from movablecore_map boot option.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Wen Congyang <wency@cn.fujitsu.com>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Tested-by: Lin Feng <linfeng@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h |  1 +
 mm/memblock.c            | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f388203db7e8..dfefaf111c0e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,6 +42,7 @@ struct memblock {
 
 extern struct memblock memblock;
 extern int memblock_debug;
+extern struct movablemem_map movablemem_map;
 
 #define memblock_dbg(fmt, ...) \
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
diff --git a/mm/memblock.c b/mm/memblock.c
index b8d9147e5c08..c83ff97f57f4 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -101,6 +101,7 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 {
 	phys_addr_t this_start, this_end, cand;
 	u64 i;
+	int curr = movablemem_map.nr_map - 1;
 
 	/* pump up @end */
 	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
@@ -114,13 +115,28 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 		this_start = clamp(this_start, start, end);
 		this_end = clamp(this_end, start, end);
 
-		if (this_end < size)
+restart:
+		if (this_end <= this_start || this_end < size)
 			continue;
 
+		for (; curr >= 0; curr--) {
+			if ((movablemem_map.map[curr].start_pfn << PAGE_SHIFT)
+			    < this_end)
+				break;
+		}
+
 		cand = round_down(this_end - size, align);
+		if (curr >= 0 &&
+		    cand < movablemem_map.map[curr].end_pfn << PAGE_SHIFT) {
+			this_end = movablemem_map.map[curr].start_pfn
+				   << PAGE_SHIFT;
+			goto restart;
+		}
+
 		if (cand >= this_start)
 			return cand;
 	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From e8d1955258091e4c92d5a975ebd7fd8a98f5d30f Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:44 -0800
Subject: acpi, memory-hotplug: parse SRAT before memblock is ready

On linux, the pages used by kernel could not be migrated.  As a result,
if a memory range is used by kernel, it cannot be hot-removed.  So if we
want to hot-remove memory, we should prevent kernel from using it.

The way now used to prevent this is specify a memory range by
movablemem_map boot option and set it as ZONE_MOVABLE.

But when the system is booting, memblock will allocate memory, and
reserve the memory for kernel.  And before we parse SRAT, and know the
node memory ranges, memblock is working.  And it may allocate memory in
ranges to be set as ZONE_MOVABLE.  This memory can be used by kernel,
and never be freed.

So, let's parse SRAT before memblock is called first.  And it is early
enough.

The first call of memblock_find_in_range_node() is in:

  setup_arch()
    |-->setup_real_mode()

so, this patch add a function early_parse_srat() to parse SRAT, and call
it before setup_real_mode() is called.

NOTE:

1) early_parse_srat() is called before numa_init(), and has initialized
   numa_meminfo.  So DO NOT clear numa_nodes_parsed in numa_init() and DO
   NOT zero numa_meminfo in numa_init(), otherwise we will lose memory
   numa info.

2) I don't know why using count of memory affinities parsed from SRAT
   as a return value in original acpi_numa_init().  So I add a static
   variable srat_mem_cnt to remember this count and use it as the return
   value of the new acpi_numa_init()

[mhocko@suse.cz: parse SRAT before memblock is ready fix]
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Wen Congyang <wency@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Len Brown <lenb@kernel.org>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/setup.c | 13 +++++++++----
 arch/x86/mm/numa.c      |  6 ++++--
 drivers/acpi/numa.c     | 23 +++++++++++++----------
 include/linux/acpi.h    |  8 ++++++++
 4 files changed, 34 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 915f5efefcf5..9c857f05cef0 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1056,6 +1056,15 @@ void __init setup_arch(char **cmdline_p)
 	setup_bios_corruption_check();
 #endif
 
+	/*
+	 * In the memory hotplug case, the kernel needs info from SRAT to
+	 * determine which memory is hotpluggable before allocating memory
+	 * using memblock.
+	 */
+	acpi_boot_table_init();
+	early_acpi_boot_init();
+	early_parse_srat();
+
 #ifdef CONFIG_X86_32
 	printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
 			(max_pfn_mapped<<PAGE_SHIFT) - 1);
@@ -1101,10 +1110,6 @@ void __init setup_arch(char **cmdline_p)
 	/*
 	 * Parse the ACPI tables for possible boot-time SMP configuration.
 	 */
-	acpi_boot_table_init();
-
-	early_acpi_boot_init();
-
 	initmem_init();
 	memblock_find_dma_reserve();
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index e3963f52aaea..dfd30259eb89 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -560,10 +560,12 @@ static int __init numa_init(int (*init_func)(void))
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		set_apicid_to_node(i, NUMA_NO_NODE);
 
-	nodes_clear(numa_nodes_parsed);
+	/*
+	 * Do not clear numa_nodes_parsed or zero numa_meminfo here, because
+	 * SRAT was parsed earlier in early_parse_srat().
+	 */
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
-	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
 	WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
 	numa_reset_distance();
 
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 33e609f63585..59844ee149be 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id,
 					    handler, max_entries);
 }
 
-int __init acpi_numa_init(void)
-{
-	int cnt = 0;
+static int srat_mem_cnt;
 
+void __init early_parse_srat(void)
+{
 	/*
 	 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
 	 * SRAT cpu entries could have different order with that in MADT.
@@ -295,21 +295,24 @@ int __init acpi_numa_init(void)
 	/* SRAT: Static Resource Affinity Table */
 	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
-				     acpi_parse_x2apic_affinity, 0);
+				      acpi_parse_x2apic_affinity, 0);
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
-				     acpi_parse_processor_affinity, 0);
-		cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
-					    acpi_parse_memory_affinity,
-					    NR_NODE_MEMBLKS);
+				      acpi_parse_processor_affinity, 0);
+		srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
+						     acpi_parse_memory_affinity,
+						     NR_NODE_MEMBLKS);
 	}
+}
 
+int __init acpi_numa_init(void)
+{
 	/* SLIT: System Locality Information Table */
 	acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
 
 	acpi_numa_arch_fixup();
 
-	if (cnt < 0)
-		return cnt;
+	if (srat_mem_cnt < 0)
+		return srat_mem_cnt;
 	else if (!parsed_numa_memblks)
 		return -ENOENT;
 	return 0;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index bcbdd7484e58..f46cfd73a553 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -485,6 +485,14 @@ static inline bool acpi_driver_match_device(struct device *dev,
 
 #endif	/* !CONFIG_ACPI */
 
+#ifdef CONFIG_ACPI_NUMA
+void __init early_parse_srat(void);
+#else
+static inline void early_parse_srat(void)
+{
+}
+#endif
+
 #ifdef CONFIG_ACPI
 void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 			       u32 pm1a_ctrl,  u32 pm1b_ctrl));
-- 
cgit v1.2.3


From 27168d38fa209073219abedbe6a9de7ba9acbfad Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:46 -0800
Subject: acpi, memory-hotplug: extend movablemem_map ranges to the end of node

When implementing movablemem_map boot option, we introduced an array
movablemem_map.map[] to store the memory ranges to be set as
ZONE_MOVABLE.

Since ZONE_MOVABLE is the latst zone of a node, if user didn't specify
the whole node memory range, we need to extend it to the node end so
that we can use it to prevent memblock from allocating memory in the
ranges user didn't specify.

We now implement movablemem_map boot option like this:

        /*
         * For movablemem_map=nn[KMG]@ss[KMG]:
         *
         * SRAT:                |_____| |_____| |_________| |_________| ......
         * node id:                0       1         1           2
         * user specified:                |__|                 |___|
         * movablemem_map:                |___| |_________|    |______| ......
         *
         * Using movablemem_map, we can prevent memblock from allocating memory
         * on ZONE_MOVABLE at boot time.
         *
         * NOTE: In this case, SRAT info will be ingored.
         */

[akpm@linux-foundation.org: clean up code, fix build warning]
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Len Brown <lenb@kernel.org>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/srat.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/mm.h |  5 +++++
 mm/page_alloc.c    | 34 +++++++++++++++++++++++++++--
 3 files changed, 98 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index cdd0da9dd530..3e90039e52e0 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -141,11 +141,65 @@ static inline int save_add_info(void) {return 1;}
 static inline int save_add_info(void) {return 0;}
 #endif
 
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+static void __init handle_movablemem(int node, u64 start, u64 end)
+{
+	int overlap;
+	unsigned long start_pfn, end_pfn;
+
+	start_pfn = PFN_DOWN(start);
+	end_pfn = PFN_UP(end);
+
+	/*
+	 * For movablecore_map=nn[KMG]@ss[KMG]:
+	 *
+	 * SRAT:		|_____| |_____| |_________| |_________| ......
+	 * node id:		   0       1         1           2
+	 * user specified:	          |__|                 |___|
+	 * movablemem_map:		  |___| |_________|    |______| ......
+	 *
+	 * Using movablemem_map, we can prevent memblock from allocating memory
+	 * on ZONE_MOVABLE at boot time.
+	 */
+	overlap = movablemem_map_overlap(start_pfn, end_pfn);
+	if (overlap >= 0) {
+		/*
+		 * If part of this range is in movablemem_map, we need to
+		 * add the range after it to extend the range to the end
+		 * of the node, because from the min address specified to
+		 * the end of the node will be ZONE_MOVABLE.
+		 */
+		start_pfn = max(start_pfn,
+			    movablemem_map.map[overlap].start_pfn);
+		insert_movablemem_map(start_pfn, end_pfn);
+
+		/*
+		 * Set the nodemask, so that if the address range on one node
+		 * is not continuse, we can add the subsequent ranges on the
+		 * same node into movablemem_map.
+		 */
+		node_set(node, movablemem_map.numa_nodes_hotplug);
+	} else {
+		if (node_isset(node, movablemem_map.numa_nodes_hotplug))
+			/*
+			 * Insert the range if we already have movable ranges
+			 * on the same node.
+			 */
+			insert_movablemem_map(start_pfn, end_pfn);
+	}
+}
+#else		/* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+static inline void handle_movablemem(int node, u64 start, u64 end)
+{
+}
+#endif		/* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
 int __init
 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
 	u64 start, end;
+	u32 hotpluggable;
 	int node, pxm;
 
 	if (srat_disabled())
@@ -154,7 +208,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 		goto out_err_bad_srat;
 	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
 		goto out_err;
-	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
+	hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
+	if (hotpluggable && !save_add_info())
 		goto out_err;
 
 	start = ma->base_address;
@@ -174,9 +229,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 
 	node_set(node, numa_nodes_parsed);
 
-	printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
+	printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n",
 	       node, pxm,
-	       (unsigned long long) start, (unsigned long long) end - 1);
+	       (unsigned long long) start, (unsigned long long) end - 1,
+	       hotpluggable ? "Hot Pluggable": "");
+
+	handle_movablemem(node, start, end);
 
 	return 0;
 out_err_bad_srat:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ce9bd3049836..4d7377a1d084 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1368,8 +1368,13 @@ struct movablemem_entry {
 struct movablemem_map {
 	int nr_map;
 	struct movablemem_entry map[MOVABLEMEM_MAP_MAX];
+	nodemask_t numa_nodes_hotplug;	/* on which nodes we specify memory */
 };
 
+extern void __init insert_movablemem_map(unsigned long start_pfn,
+					 unsigned long end_pfn);
+extern int __init movablemem_map_overlap(unsigned long start_pfn,
+					 unsigned long end_pfn);
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 88b9962c99b3..7ea9a003ad57 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5175,6 +5175,36 @@ static int __init cmdline_parse_movablecore(char *p)
 early_param("kernelcore", cmdline_parse_kernelcore);
 early_param("movablecore", cmdline_parse_movablecore);
 
+/**
+ * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[].
+ * @start_pfn:	start pfn of the range to be checked
+ * @end_pfn: 	end pfn of the range to be checked (exclusive)
+ *
+ * This function checks if a given memory range [start_pfn, end_pfn) overlaps
+ * the movablemem_map.map[] array.
+ *
+ * Return: index of the first overlapped element in movablemem_map.map[]
+ *         or -1 if they don't overlap each other.
+ */
+int __init movablemem_map_overlap(unsigned long start_pfn,
+				   unsigned long end_pfn)
+{
+	int overlap;
+
+	if (!movablemem_map.nr_map)
+		return -1;
+
+	for (overlap = 0; overlap < movablemem_map.nr_map; overlap++)
+		if (start_pfn < movablemem_map.map[overlap].end_pfn)
+			break;
+
+	if (overlap == movablemem_map.nr_map ||
+	    end_pfn <= movablemem_map.map[overlap].start_pfn)
+		return -1;
+
+	return overlap;
+}
+
 /**
  * insert_movablemem_map - Insert a memory range in to movablemem_map.map.
  * @start_pfn:	start pfn of the range
@@ -5183,8 +5213,8 @@ early_param("movablecore", cmdline_parse_movablecore);
  * This function will also merge the overlapped ranges, and sort the array
  * by start_pfn in monotonic increasing order.
  */
-static void __init insert_movablemem_map(unsigned long start_pfn,
-					  unsigned long end_pfn)
+void __init insert_movablemem_map(unsigned long start_pfn,
+				  unsigned long end_pfn)
 {
 	int pos, overlap;
 
-- 
cgit v1.2.3


From 01a178a94e8eaec351b29ee49fbb3d1c124cb7fb Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:49 -0800
Subject: acpi, memory-hotplug: support getting hotplug info from SRAT

We now provide an option for users who don't want to specify physical
memory address in kernel commandline.

         /*
          * For movablemem_map=acpi:
          *
          * SRAT:                |_____| |_____| |_________| |_________| ......
          * node id:                0       1         1           2
          * hotpluggable:           n       y         y           n
          * movablemem_map:              |_____| |_________|
          *
          * Using movablemem_map, we can prevent memblock from allocating memory
          * on ZONE_MOVABLE at boot time.
          */

So user just specify movablemem_map=acpi, and the kernel will use
hotpluggable info in SRAT to determine which memory ranges should be set
as ZONE_MOVABLE.

If all the memory ranges in SRAT is hotpluggable, then no memory can be
used by kernel.  But before parsing SRAT, memblock has already reserve
some memory ranges for other purposes, such as for kernel image, and so
on.  We cannot prevent kernel from using these memory.  So we need to
exclude these ranges even if these memory is hotpluggable.

Furthermore, there could be several memory ranges in the single node
which the kernel resides in.  We may skip one range that have memory
reserved by memblock, but if the rest of memory is too small, then the
kernel will fail to boot.  So, make the whole node which the kernel
resides in un-hotpluggable.  Then the kernel has enough memory to use.

NOTE: Using this way will cause NUMA performance down because the
      whole node will be set as ZONE_MOVABLE, and kernel cannot use memory
      on it.  If users don't want to lose NUMA performance, just don't use
      it.

[akpm@linux-foundation.org: fix warning]
[akpm@linux-foundation.org: use strcmp()]
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Jiang Liu <jiang.liu@huawei.com>
Cc: Jianguo Wu <wujianguo@huawei.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Wu Jianguo <wujianguo@huawei.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Len Brown <lenb@kernel.org>
Cc: "Brown, Len" <len.brown@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 29 ++++++++++++---
 arch/x86/mm/srat.c                  | 71 ++++++++++++++++++++++++++++++++++---
 include/linux/mm.h                  |  2 ++
 mm/page_alloc.c                     | 22 +++++++++++-
 4 files changed, 113 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 722a74161246..766087781ecd 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1640,15 +1640,30 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			that the amount of memory usable for all allocations
 			is not too small.
 
+	movablemem_map=acpi
+			[KNL,X86,IA-64,PPC] This parameter is similar to
+			memmap except it specifies the memory map of
+			ZONE_MOVABLE.
+			This option inform the kernel to use Hot Pluggable bit
+			in flags from SRAT from ACPI BIOS to determine which
+			memory devices could be hotplugged. The corresponding
+			memory ranges will be set as ZONE_MOVABLE.
+			NOTE: Whatever node the kernel resides in will always
+			      be un-hotpluggable.
+
 	movablemem_map=nn[KMG]@ss[KMG]
 			[KNL,X86,IA-64,PPC] This parameter is similar to
 			memmap except it specifies the memory map of
 			ZONE_MOVABLE.
-			If more areas are all within one node, then from
-			lowest ss to the end of the node will be ZONE_MOVABLE.
-			If an area covers two or more nodes, the area from
-			ss to the end of the 1st node will be ZONE_MOVABLE,
-			and all the rest nodes will only have ZONE_MOVABLE.
+			If user specifies memory ranges, the info in SRAT will
+			be ingored. And it works like the following:
+			- If more ranges are all within one node, then from
+			  lowest ss to the end of the node will be ZONE_MOVABLE.
+			- If a range is within a node, then from ss to the end
+			  of the node will be ZONE_MOVABLE.
+			- If a range covers two or more nodes, then from ss to
+			  the end of the 1st node will be ZONE_MOVABLE, and all
+			  the rest nodes will only have ZONE_MOVABLE.
 			If memmap is specified at the same time, the
 			movablemem_map will be limited within the memmap
 			areas. If kernelcore or movablecore is also specified,
@@ -1656,6 +1671,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			satisfied. So the administrator should be careful that
 			the amount of movablemem_map areas are not too large.
 			Otherwise kernel won't have enough memory to start.
+			NOTE: We don't stop users specifying the node the
+			      kernel resides in as hotpluggable so that this
+			      option can be used as a workaround of firmware
+                              bugs.
 
 	MTD_Partition=	[MTD]
 			Format: <name>,<region-number>,<size>,<offset>
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 3e90039e52e0..79836d01f789 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -142,16 +142,72 @@ static inline int save_add_info(void) {return 0;}
 #endif
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-static void __init handle_movablemem(int node, u64 start, u64 end)
+static void __init
+handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
 {
-	int overlap;
+	int overlap, i;
 	unsigned long start_pfn, end_pfn;
 
 	start_pfn = PFN_DOWN(start);
 	end_pfn = PFN_UP(end);
 
 	/*
-	 * For movablecore_map=nn[KMG]@ss[KMG]:
+	 * For movablemem_map=acpi:
+	 *
+	 * SRAT:		|_____| |_____| |_________| |_________| ......
+	 * node id:                0       1         1           2
+	 * hotpluggable:	   n       y         y           n
+	 * movablemem_map:	        |_____| |_________|
+	 *
+	 * Using movablemem_map, we can prevent memblock from allocating memory
+	 * on ZONE_MOVABLE at boot time.
+	 *
+	 * Before parsing SRAT, memblock has already reserve some memory ranges
+	 * for other purposes, such as for kernel image. We cannot prevent
+	 * kernel from using these memory, so we need to exclude these memory
+	 * even if it is hotpluggable.
+	 * Furthermore, to ensure the kernel has enough memory to boot, we make
+	 * all the memory on the node which the kernel resides in
+	 * un-hotpluggable.
+	 */
+	if (hotpluggable && movablemem_map.acpi) {
+		/* Exclude ranges reserved by memblock. */
+		struct memblock_type *rgn = &memblock.reserved;
+
+		for (i = 0; i < rgn->cnt; i++) {
+			if (end <= rgn->regions[i].base ||
+			    start >= rgn->regions[i].base +
+			    rgn->regions[i].size)
+				continue;
+
+			/*
+			 * If the memory range overlaps the memory reserved by
+			 * memblock, then the kernel resides in this node.
+			 */
+			node_set(node, movablemem_map.numa_nodes_kernel);
+
+			goto out;
+		}
+
+		/*
+		 * If the kernel resides in this node, then the whole node
+		 * should not be hotpluggable.
+		 */
+		if (node_isset(node, movablemem_map.numa_nodes_kernel))
+			goto out;
+
+		insert_movablemem_map(start_pfn, end_pfn);
+
+		/*
+		 * numa_nodes_hotplug nodemask represents which nodes are put
+		 * into movablemem_map.map[].
+		 */
+		node_set(node, movablemem_map.numa_nodes_hotplug);
+		goto out;
+	}
+
+	/*
+	 * For movablemem_map=nn[KMG]@ss[KMG]:
 	 *
 	 * SRAT:		|_____| |_____| |_________| |_________| ......
 	 * node id:		   0       1         1           2
@@ -160,6 +216,8 @@ static void __init handle_movablemem(int node, u64 start, u64 end)
 	 *
 	 * Using movablemem_map, we can prevent memblock from allocating memory
 	 * on ZONE_MOVABLE at boot time.
+	 *
+	 * NOTE: In this case, SRAT info will be ingored.
 	 */
 	overlap = movablemem_map_overlap(start_pfn, end_pfn);
 	if (overlap >= 0) {
@@ -187,9 +245,12 @@ static void __init handle_movablemem(int node, u64 start, u64 end)
 			 */
 			insert_movablemem_map(start_pfn, end_pfn);
 	}
+out:
+	return;
 }
 #else		/* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-static inline void handle_movablemem(int node, u64 start, u64 end)
+static inline void
+handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
 {
 }
 #endif		/* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
@@ -234,7 +295,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 	       (unsigned long long) start, (unsigned long long) end - 1,
 	       hotpluggable ? "Hot Pluggable": "");
 
-	handle_movablemem(node, start, end);
+	handle_movablemem(node, start, end, hotpluggable);
 
 	return 0;
 out_err_bad_srat:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4d7377a1d084..72a42c0fa633 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1366,9 +1366,11 @@ struct movablemem_entry {
 };
 
 struct movablemem_map {
+	bool acpi;	/* true if using SRAT info */
 	int nr_map;
 	struct movablemem_entry map[MOVABLEMEM_MAP_MAX];
 	nodemask_t numa_nodes_hotplug;	/* on which nodes we specify memory */
+	nodemask_t numa_nodes_kernel;	/* on which nodes kernel resides in */
 };
 
 extern void __init insert_movablemem_map(unsigned long start_pfn,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7ea9a003ad57..a7381be21320 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -203,7 +203,10 @@ static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 /* Movable memory ranges, will also be used by memblock subsystem. */
-struct movablemem_map movablemem_map;
+struct movablemem_map movablemem_map = {
+	.acpi = false,
+	.nr_map = 0,
+};
 
 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
@@ -5314,6 +5317,23 @@ static int __init cmdline_parse_movablemem_map(char *p)
 	if (!p)
 		goto err;
 
+	if (!strcmp(p, "acpi"))
+		movablemem_map.acpi = true;
+
+	/*
+	 * If user decide to use info from BIOS, all the other user specified
+	 * ranges will be ingored.
+	 */
+	if (movablemem_map.acpi) {
+		if (movablemem_map.nr_map) {
+			memset(movablemem_map.map, 0,
+				sizeof(struct movablemem_entry)
+				* movablemem_map.nr_map);
+			movablemem_map.nr_map = 0;
+		}
+		return 0;
+	}
+
 	oldp = p;
 	mem_size = memparse(p, &p);
 	if (p == oldp)
-- 
cgit v1.2.3


From f7210e6c4ac795694106c1c5307134d3fc233e88 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:33:51 -0800
Subject: mm/memblock.c: use CONFIG_HAVE_MEMBLOCK_NODE_MAP to protect
 movablecore_map in memblock_overlaps_region().

The definition of struct movablecore_map is protected by
CONFIG_HAVE_MEMBLOCK_NODE_MAP but its use in memblock_overlaps_region()
is not.  So add CONFIG_HAVE_MEMBLOCK_NODE_MAP to protect the use of
movablecore_map in memblock_overlaps_region().

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h |  1 +
 mm/memblock.c            | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index dfefaf111c0e..3e5ecb2d790e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -61,6 +61,7 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 			  unsigned long *out_end_pfn, int *out_nid);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index c83ff97f57f4..1bcd9b970564 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -92,9 +92,13 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
  *
  * Find @size free area aligned to @align in the specified range and node.
  *
+ * If we have CONFIG_HAVE_MEMBLOCK_NODE_MAP defined, we need to check if the
+ * memory we found if not in hotpluggable ranges.
+ *
  * RETURNS:
  * Found address on success, %0 on failure.
  */
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 					phys_addr_t end, phys_addr_t size,
 					phys_addr_t align, int nid)
@@ -139,6 +143,36 @@ restart:
 
 	return 0;
 }
+#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
+					phys_addr_t end, phys_addr_t size,
+					phys_addr_t align, int nid)
+{
+	phys_addr_t this_start, this_end, cand;
+	u64 i;
+
+	/* pump up @end */
+	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
+		end = memblock.current_limit;
+
+	/* avoid allocating the first page */
+	start = max_t(phys_addr_t, start, PAGE_SIZE);
+	end = max(start, end);
+
+	for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
+		this_start = clamp(this_start, start, end);
+		this_end = clamp(this_end, start, end);
+
+		if (this_end < size)
+			continue;
+
+		cand = round_down(this_end - size, align);
+		if (cand >= this_start)
+			return cand;
+	}
+	return 0;
+}
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 /**
  * memblock_find_in_range - find free area in given range
-- 
cgit v1.2.3


From 194159fbcc0d6ac1351837d3cd7a27a4af0219a6 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Fri, 22 Feb 2013 16:33:58 -0800
Subject: mm: remove MIGRATE_ISOLATE check in hotpath

Several functions test MIGRATE_ISOLATE and some of those are hotpath but
MIGRATE_ISOLATE is used only if we enable CONFIG_MEMORY_ISOLATION(ie,
CMA, memory-hotplug and memory-failure) which are not common config
option.  So let's not add unnecessary overhead and code when we don't
enable CONFIG_MEMORY_ISOLATION.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h         |  2 ++
 include/linux/page-isolation.h | 19 +++++++++++++++++++
 mm/compaction.c                |  6 +++++-
 mm/page_alloc.c                | 16 ++++++++++------
 mm/vmstat.c                    |  2 ++
 5 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 73b64a38b984..4f4c8c26fa9d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -57,7 +57,9 @@ enum {
 	 */
 	MIGRATE_CMA,
 #endif
+#ifdef CONFIG_MEMORY_ISOLATION
 	MIGRATE_ISOLATE,	/* can't allocate from here */
+#endif
 	MIGRATE_TYPES
 };
 
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index a92061e08d48..3fff8e774067 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -1,6 +1,25 @@
 #ifndef __LINUX_PAGEISOLATION_H
 #define __LINUX_PAGEISOLATION_H
 
+#ifdef CONFIG_MEMORY_ISOLATION
+static inline bool is_migrate_isolate_page(struct page *page)
+{
+	return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
+}
+static inline bool is_migrate_isolate(int migratetype)
+{
+	return migratetype == MIGRATE_ISOLATE;
+}
+#else
+static inline bool is_migrate_isolate_page(struct page *page)
+{
+	return false;
+}
+static inline bool is_migrate_isolate(int migratetype)
+{
+	return false;
+}
+#endif
 
 bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 			 bool skip_hwpoisoned_pages);
diff --git a/mm/compaction.c b/mm/compaction.c
index 5d5b0b259bc9..ef53f352b606 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -15,6 +15,7 @@
 #include <linux/sysctl.h>
 #include <linux/sysfs.h>
 #include <linux/balloon_compaction.h>
+#include <linux/page-isolation.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -215,7 +216,10 @@ static bool suitable_migration_target(struct page *page)
 	int migratetype = get_pageblock_migratetype(page);
 
 	/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
-	if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
+	if (migratetype == MIGRATE_RESERVE)
+		return false;
+
+	if (is_migrate_isolate(migratetype))
 		return false;
 
 	/* If the page is a large free page, then allow migration */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07fe78d01ffd..e3fb290194c0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -673,7 +673,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
 			__free_one_page(page, zone, 0, mt);
 			trace_mm_page_pcpu_drain(page, 0, mt);
-			if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
+			if (likely(!is_migrate_isolate_page(page))) {
 				__mod_zone_page_state(zone, NR_FREE_PAGES, 1);
 				if (is_migrate_cma(mt))
 					__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
@@ -691,7 +691,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
 	zone->pages_scanned = 0;
 
 	__free_one_page(page, zone, order, migratetype);
-	if (unlikely(migratetype != MIGRATE_ISOLATE))
+	if (unlikely(!is_migrate_isolate(migratetype)))
 		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 	spin_unlock(&zone->lock);
 }
@@ -923,7 +923,9 @@ static int fallbacks[MIGRATE_TYPES][4] = {
 	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE,   MIGRATE_RESERVE },
 #endif
 	[MIGRATE_RESERVE]     = { MIGRATE_RESERVE }, /* Never used */
+#ifdef CONFIG_MEMORY_ISOLATION
 	[MIGRATE_ISOLATE]     = { MIGRATE_RESERVE }, /* Never used */
+#endif
 };
 
 /*
@@ -1149,7 +1151,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			list_add_tail(&page->lru, list);
 		if (IS_ENABLED(CONFIG_CMA)) {
 			mt = get_pageblock_migratetype(page);
-			if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE)
+			if (!is_migrate_cma(mt) && !is_migrate_isolate(mt))
 				mt = migratetype;
 		}
 		set_freepage_migratetype(page, mt);
@@ -1333,7 +1335,7 @@ void free_hot_cold_page(struct page *page, int cold)
 	 * excessively into the page allocator
 	 */
 	if (migratetype >= MIGRATE_PCPTYPES) {
-		if (unlikely(migratetype == MIGRATE_ISOLATE)) {
+		if (unlikely(is_migrate_isolate(migratetype))) {
 			free_one_page(zone, page, 0, migratetype);
 			goto out;
 		}
@@ -1407,7 +1409,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
 	zone = page_zone(page);
 	mt = get_pageblock_migratetype(page);
 
-	if (mt != MIGRATE_ISOLATE) {
+	if (!is_migrate_isolate(mt)) {
 		/* Obey watermarks as if the page was being allocated */
 		watermark = low_wmark_pages(zone) + (1 << order);
 		if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
@@ -1426,7 +1428,7 @@ static int __isolate_free_page(struct page *page, unsigned int order)
 		struct page *endpage = page + (1 << order) - 1;
 		for (; page < endpage; page += pageblock_nr_pages) {
 			int mt = get_pageblock_migratetype(page);
-			if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt))
+			if (!is_migrate_isolate(mt) && !is_migrate_cma(mt))
 				set_pageblock_migratetype(page,
 							  MIGRATE_MOVABLE);
 		}
@@ -2904,7 +2906,9 @@ static void show_migration_types(unsigned char type)
 #ifdef CONFIG_CMA
 		[MIGRATE_CMA]		= 'C',
 #endif
+#ifdef CONFIG_MEMORY_ISOLATION
 		[MIGRATE_ISOLATE]	= 'I',
+#endif
 	};
 	char tmp[MIGRATE_TYPES + 1];
 	char *p = tmp;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index e3475f5fd983..c9d1f68120cd 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -628,7 +628,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = {
 #ifdef CONFIG_CMA
 	"CMA",
 #endif
+#ifdef CONFIG_MEMORY_ISOLATION
 	"Isolate",
+#endif
 };
 
 static void *frag_start(struct seq_file *m, loff_t *pos)
-- 
cgit v1.2.3


From 293c07e31ab5a0b8df8c19b2a9e5c6fa30308849 Mon Sep 17 00:00:00 2001
From: Xishi Qiu <qiuxishi@huawei.com>
Date: Fri, 22 Feb 2013 16:34:02 -0800
Subject: memory-failure: use num_poisoned_pages instead of mce_bad_pages

Since MCE is an x86 concept, and this code is in mm/, it would be better
to use the name num_poisoned_pages instead of mce_bad_pages.

[akpm@linux-foundation.org: fix mm/sparse.c]
Signed-off-by: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Suggested-by: Borislav Petkov <bp@alien8.de>
Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c   |  2 +-
 include/linux/mm.h  |  2 +-
 mm/memory-failure.c | 16 ++++++++--------
 mm/sparse.c         |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 80e4645f7990..c3dac611c3c0 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -158,7 +158,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		vmi.used >> 10,
 		vmi.largest_chunk >> 10
 #ifdef CONFIG_MEMORY_FAILURE
-		,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+		,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
 #endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 72a42c0fa633..a114b8eb7676 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1753,7 +1753,7 @@ extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
 extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p, int access);
-extern atomic_long_t mce_bad_pages;
+extern atomic_long_t num_poisoned_pages;
 extern int soft_offline_page(struct page *page, int flags);
 
 extern void dump_page(struct page *page);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f4c9fa1149e2..c95e19af510b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -61,7 +61,7 @@ int sysctl_memory_failure_early_kill __read_mostly = 0;
 
 int sysctl_memory_failure_recovery __read_mostly = 1;
 
-atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
+atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
 #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
 
@@ -1040,7 +1040,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	}
 
 	nr_pages = 1 << compound_trans_order(hpage);
-	atomic_long_add(nr_pages, &mce_bad_pages);
+	atomic_long_add(nr_pages, &num_poisoned_pages);
 
 	/*
 	 * We need/can do nothing about count=0 pages.
@@ -1070,7 +1070,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 			if (!PageHWPoison(hpage)
 			    || (hwpoison_filter(p) && TestClearPageHWPoison(p))
 			    || (p != hpage && TestSetPageHWPoison(hpage))) {
-				atomic_long_sub(nr_pages, &mce_bad_pages);
+				atomic_long_sub(nr_pages, &num_poisoned_pages);
 				return 0;
 			}
 			set_page_hwpoison_huge_page(hpage);
@@ -1128,7 +1128,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
-			atomic_long_sub(nr_pages, &mce_bad_pages);
+			atomic_long_sub(nr_pages, &num_poisoned_pages);
 		unlock_page(hpage);
 		put_page(hpage);
 		return 0;
@@ -1323,7 +1323,7 @@ int unpoison_memory(unsigned long pfn)
 			return 0;
 		}
 		if (TestClearPageHWPoison(p))
-			atomic_long_sub(nr_pages, &mce_bad_pages);
+			atomic_long_sub(nr_pages, &num_poisoned_pages);
 		pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
 		return 0;
 	}
@@ -1337,7 +1337,7 @@ int unpoison_memory(unsigned long pfn)
 	 */
 	if (TestClearPageHWPoison(page)) {
 		pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
-		atomic_long_sub(nr_pages, &mce_bad_pages);
+		atomic_long_sub(nr_pages, &num_poisoned_pages);
 		freeit = 1;
 		if (PageHuge(page))
 			clear_page_hwpoison_huge_page(page);
@@ -1442,7 +1442,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
 	}
 done:
 	/* keep elevated page count for bad page */
-	atomic_long_add(1 << compound_trans_order(hpage), &mce_bad_pages);
+	atomic_long_add(1 << compound_trans_order(hpage), &num_poisoned_pages);
 	set_page_hwpoison_huge_page(hpage);
 	dequeue_hwpoisoned_huge_page(hpage);
 out:
@@ -1585,7 +1585,7 @@ int soft_offline_page(struct page *page, int flags)
 
 done:
 	/* keep elevated page count for bad page */
-	atomic_long_inc(&mce_bad_pages);
+	atomic_long_inc(&num_poisoned_pages);
 	SetPageHWPoison(page);
 out:
 	return ret;
diff --git a/mm/sparse.c b/mm/sparse.c
index cff97960f1d7..7ca6dc847947 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -783,7 +783,7 @@ static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 
 	for (i = 0; i < PAGES_PER_SECTION; i++) {
 		if (PageHWPoison(&memmap[i])) {
-			atomic_long_sub(1, &mce_bad_pages);
+			atomic_long_sub(1, &num_poisoned_pages);
 			ClearPageHWPoison(&memmap[i]);
 		}
 	}
-- 
cgit v1.2.3


From 258401a60c4df39332f30ef57afbc6dbf29a7e84 Mon Sep 17 00:00:00 2001
From: Zlatko Calusic <zlatko.calusic@iskon.hr>
Date: Fri, 22 Feb 2013 16:34:06 -0800
Subject: mm: don't wait on congested zones in balance_pgdat()

From: Zlatko Calusic <zlatko.calusic@iskon.hr>

Commit 92df3a723f84 ("mm: vmscan: throttle reclaim if encountering too
many dirty pages under writeback") introduced waiting on congested zones
based on a sane algorithm in shrink_inactive_list().

What this means is that there's no more need for throttling and
additional heuristics in balance_pgdat().  So, let's remove it and tidy
up the code.

Signed-off-by: Zlatko Calusic <zlatko.calusic@iskon.hr>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h |  1 -
 mm/vmscan.c                   | 29 +----------------------------
 mm/vmstat.c                   |  1 -
 3 files changed, 1 insertion(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index fce0a2799d43..bd6cf61142be 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -36,7 +36,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #endif
 		PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
 		KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
-		KSWAPD_SKIP_CONGESTION_WAIT,
 		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
 #ifdef CONFIG_NUMA_BALANCING
 		NUMA_PTE_UPDATES,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8fde2fc223d9..b93968b71dc6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2617,7 +2617,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 							int *classzone_idx)
 {
 	bool pgdat_is_balanced = false;
-	struct zone *unbalanced_zone;
 	int i;
 	int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
 	unsigned long total_scanned;
@@ -2648,9 +2647,6 @@ loop_again:
 
 	do {
 		unsigned long lru_pages = 0;
-		int has_under_min_watermark_zone = 0;
-
-		unbalanced_zone = NULL;
 
 		/*
 		 * Scan in the highmem->dma direction for the highest
@@ -2790,17 +2786,7 @@ loop_again:
 				continue;
 			}
 
-			if (!zone_balanced(zone, testorder, 0, end_zone)) {
-				unbalanced_zone = zone;
-				/*
-				 * We are still under min water mark.  This
-				 * means that we have a GFP_ATOMIC allocation
-				 * failure risk. Hurry up!
-				 */
-				if (!zone_watermark_ok_safe(zone, order,
-					    min_wmark_pages(zone), end_zone, 0))
-					has_under_min_watermark_zone = 1;
-			} else {
+			if (zone_balanced(zone, testorder, 0, end_zone))
 				/*
 				 * If a zone reaches its high watermark,
 				 * consider it to be no longer congested. It's
@@ -2809,8 +2795,6 @@ loop_again:
 				 * speculatively avoid congestion waits
 				 */
 				zone_clear_flag(zone, ZONE_CONGESTED);
-			}
-
 		}
 
 		/*
@@ -2827,17 +2811,6 @@ loop_again:
 			break;		/* kswapd: all done */
 		}
 
-		/*
-		 * OK, kswapd is getting into trouble.  Take a nap, then take
-		 * another pass across the zones.
-		 */
-		if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) {
-			if (has_under_min_watermark_zone)
-				count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
-			else if (unbalanced_zone)
-				wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10);
-		}
-
 		/*
 		 * We do this so kswapd doesn't build up large priorities for
 		 * example when it is freeing in parallel with allocators. It
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c9d1f68120cd..57f02fd1768b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -770,7 +770,6 @@ const char * const vmstat_text[] = {
 	"kswapd_inodesteal",
 	"kswapd_low_wmark_hit_quickly",
 	"kswapd_high_wmark_hit_quickly",
-	"kswapd_skip_congestion_wait",
 	"pageoutrun",
 	"allocstall",
 
-- 
cgit v1.2.3


From 21caf2fc1931b485483ddd254b634fa8f0099963 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Fri, 22 Feb 2013 16:34:08 -0800
Subject: mm: teach mm by current context info to not do I/O during memory
 allocation

This patch introduces PF_MEMALLOC_NOIO on process flag('flags' field of
'struct task_struct'), so that the flag can be set by one task to avoid
doing I/O inside memory allocation in the task's context.

The patch trys to solve one deadlock problem caused by block device, and
the problem may happen at least in the below situations:

- during block device runtime resume, if memory allocation with
  GFP_KERNEL is called inside runtime resume callback of any one of its
  ancestors(or the block device itself), the deadlock may be triggered
  inside the memory allocation since it might not complete until the block
  device becomes active and the involed page I/O finishes.  The situation
  is pointed out first by Alan Stern.  It is not a good approach to
  convert all GFP_KERNEL[1] in the path into GFP_NOIO because several
  subsystems may be involved(for example, PCI, USB and SCSI may be
  involved for usb mass stoarage device, network devices involved too in
  the iSCSI case)

- during block device runtime suspend, because runtime resume need to
  wait for completion of concurrent runtime suspend.

- during error handling of usb mass storage deivce, USB bus reset will
  be put on the device, so there shouldn't have any memory allocation with
  GFP_KERNEL during USB bus reset, otherwise the deadlock similar with
  above may be triggered.  Unfortunately, any usb device may include one
  mass storage interface in theory, so it requires all usb interface
  drivers to handle the situation.  In fact, most usb drivers don't know
  how to handle bus reset on the device and don't provide .pre_set() and
  .post_reset() callback at all, so USB core has to unbind and bind driver
  for these devices.  So it is still not practical to resort to GFP_NOIO
  for solving the problem.

Also the introduced solution can be used by block subsystem or block
drivers too, for example, set the PF_MEMALLOC_NOIO flag before doing
actual I/O transfer.

It is not a good idea to convert all these GFP_KERNEL in the affected
path into GFP_NOIO because these functions doing that may be implemented
as library and will be called in many other contexts.

In fact, memalloc_noio_flags() can convert some of current static
GFP_NOIO allocation into GFP_KERNEL back in other non-affected contexts,
at least almost all GFP_NOIO in USB subsystem can be converted into
GFP_KERNEL after applying the approach and make allocation with GFP_NOIO
only happen in runtime resume/bus reset/block I/O transfer contexts
generally.

[1], several GFP_KERNEL allocation examples in runtime resume path

- pci subsystem
acpi_os_allocate
	<-acpi_ut_allocate
		<-ACPI_ALLOCATE_ZEROED
			<-acpi_evaluate_object
				<-__acpi_bus_set_power
					<-acpi_bus_set_power
						<-acpi_pci_set_power_state
							<-platform_pci_set_power_state
								<-pci_platform_power_transition
									<-__pci_complete_power_transition
										<-pci_set_power_state
											<-pci_restore_standard_config
												<-pci_pm_runtime_resume
- usb subsystem
usb_get_status
	<-finish_port_resume
		<-usb_port_resume
			<-generic_resume
				<-usb_resume_device
					<-usb_resume_both
						<-usb_runtime_resume

- some individual usb drivers
usblp, uvc, gspca, most of dvb-usb-v2 media drivers, cpia2, az6007, ....

That is just what I have found.  Unfortunately, this allocation can only
be found by human being now, and there should be many not found since
any function in the resume path(call tree) may allocate memory with
GFP_KERNEL.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Oliver Neukum <oneukum@suse.de>
Cc: Jiri Kosina <jiri.kosina@suse.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Greg KH <greg@kroah.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Decotigny <david.decotigny@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 22 ++++++++++++++++++++++
 mm/page_alloc.c       |  9 ++++++++-
 mm/vmscan.c           |  4 ++--
 3 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e4112aad2964..c2182b53dace 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -51,6 +51,7 @@ struct sched_param {
 #include <linux/cred.h>
 #include <linux/llist.h>
 #include <linux/uidgid.h>
+#include <linux/gfp.h>
 
 #include <asm/processor.h>
 
@@ -1791,6 +1792,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
+#define PF_MEMALLOC_NOIO 0x00080000	/* Allocating memory without IO involved */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
@@ -1828,6 +1830,26 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */
+static inline gfp_t memalloc_noio_flags(gfp_t flags)
+{
+	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
+		flags &= ~__GFP_IO;
+	return flags;
+}
+
+static inline unsigned int memalloc_noio_save(void)
+{
+	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
+	current->flags |= PF_MEMALLOC_NOIO;
+	return flags;
+}
+
+static inline void memalloc_noio_restore(unsigned int flags)
+{
+	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+}
+
 /*
  * task->jobctl flags
  */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e3fb290194c0..3ede25e6686e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2624,10 +2624,17 @@ retry_cpuset:
 	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
 			zonelist, high_zoneidx, alloc_flags,
 			preferred_zone, migratetype);
-	if (unlikely(!page))
+	if (unlikely(!page)) {
+		/*
+		 * Runtime PM, block IO and its error handling path
+		 * can deadlock because I/O on the device might not
+		 * complete.
+		 */
+		gfp_mask = memalloc_noio_flags(gfp_mask);
 		page = __alloc_pages_slowpath(gfp_mask, order,
 				zonelist, high_zoneidx, nodemask,
 				preferred_zone, migratetype);
+	}
 
 	trace_mm_page_alloc(page, order, gfp_mask, migratetype);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b93968b71dc6..a68fa20269d9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2352,7 +2352,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 {
 	unsigned long nr_reclaimed;
 	struct scan_control sc = {
-		.gfp_mask = gfp_mask,
+		.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
 		.may_writepage = !laptop_mode,
 		.nr_to_reclaim = SWAP_CLUSTER_MAX,
 		.may_unmap = 1,
@@ -3313,7 +3313,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
 		.may_swap = 1,
 		.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
-		.gfp_mask = gfp_mask,
+		.gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
 		.order = order,
 		.priority = ZONE_RECLAIM_PRIORITY,
 	};
-- 
cgit v1.2.3


From e823407f7b11fa06ba8e7a2801eb9ed11268a7ec Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Fri, 22 Feb 2013 16:34:11 -0800
Subject: pm / runtime: introduce pm_runtime_set_memalloc_noio()

Introduce the flag memalloc_noio in 'struct dev_pm_info' to help PM core
to teach mm not allocating memory with GFP_KERNEL flag for avoiding
probable deadlock.

As explained in the comment, any GFP_KERNEL allocation inside
runtime_resume() or runtime_suspend() on any one of device in the path
from one block or network device to the root device in the device tree
may cause deadlock, the introduced pm_runtime_set_memalloc_noio() sets
or clears the flag on device in the path recursively.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Oliver Neukum <oneukum@suse.de>
Cc: Jiri Kosina <jiri.kosina@suse.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Greg KH <greg@kroah.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Decotigny <david.decotigny@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/power/runtime.c | 70 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm.h           |  1 +
 include/linux/pm_runtime.h   |  3 ++
 3 files changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 3148b10dc2e5..cd92e1c77cb7 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -124,6 +124,76 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);
 
+static int dev_memalloc_noio(struct device *dev, void *data)
+{
+	return dev->power.memalloc_noio;
+}
+
+/*
+ * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag.
+ * @dev: Device to handle.
+ * @enable: True for setting the flag and False for clearing the flag.
+ *
+ * Set the flag for all devices in the path from the device to the
+ * root device in the device tree if @enable is true, otherwise clear
+ * the flag for devices in the path whose siblings don't set the flag.
+ *
+ * The function should only be called by block device, or network
+ * device driver for solving the deadlock problem during runtime
+ * resume/suspend:
+ *
+ *     If memory allocation with GFP_KERNEL is called inside runtime
+ *     resume/suspend callback of any one of its ancestors(or the
+ *     block device itself), the deadlock may be triggered inside the
+ *     memory allocation since it might not complete until the block
+ *     device becomes active and the involed page I/O finishes. The
+ *     situation is pointed out first by Alan Stern. Network device
+ *     are involved in iSCSI kind of situation.
+ *
+ * The lock of dev_hotplug_mutex is held in the function for handling
+ * hotplug race because pm_runtime_set_memalloc_noio() may be called
+ * in async probe().
+ *
+ * The function should be called between device_add() and device_del()
+ * on the affected device(block/network device).
+ */
+void pm_runtime_set_memalloc_noio(struct device *dev, bool enable)
+{
+	static DEFINE_MUTEX(dev_hotplug_mutex);
+
+	mutex_lock(&dev_hotplug_mutex);
+	for (;;) {
+		bool enabled;
+
+		/* hold power lock since bitfield is not SMP-safe. */
+		spin_lock_irq(&dev->power.lock);
+		enabled = dev->power.memalloc_noio;
+		dev->power.memalloc_noio = enable;
+		spin_unlock_irq(&dev->power.lock);
+
+		/*
+		 * not need to enable ancestors any more if the device
+		 * has been enabled.
+		 */
+		if (enabled && enable)
+			break;
+
+		dev = dev->parent;
+
+		/*
+		 * clear flag of the parent device only if all the
+		 * children don't set the flag because ancestor's
+		 * flag was set by any one of the descendants.
+		 */
+		if (!dev || (!enable &&
+			     device_for_each_child(dev, NULL,
+						   dev_memalloc_noio)))
+			break;
+	}
+	mutex_unlock(&dev_hotplug_mutex);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio);
+
 /**
  * rpm_check_suspend_allowed - Test whether a device may be suspended.
  * @dev: Device to test.
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 97bcf23e045a..e5d7230332a4 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -537,6 +537,7 @@ struct dev_pm_info {
 	unsigned int		irq_safe:1;
 	unsigned int		use_autosuspend:1;
 	unsigned int		timer_autosuspends:1;
+	unsigned int		memalloc_noio:1;
 	enum rpm_request	request;
 	enum rpm_status		runtime_status;
 	int			runtime_error;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index c785c215abfc..7d7e09efff9b 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -47,6 +47,7 @@ extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
 extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
 extern void pm_runtime_update_max_time_suspended(struct device *dev,
 						 s64 delta_ns);
+extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
 
 static inline bool pm_children_suspended(struct device *dev)
 {
@@ -156,6 +157,8 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev,
 						int delay) {}
 static inline unsigned long pm_runtime_autosuspend_expiration(
 				struct device *dev) { return 0; }
+static inline void pm_runtime_set_memalloc_noio(struct device *dev,
+						bool enable){}
 
 #endif /* !CONFIG_PM_RUNTIME */
 
-- 
cgit v1.2.3


From cf82f3489cf414fe27c6c5af2b372fdcd2a58b24 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 22 Feb 2013 16:34:24 -0800
Subject: mm: remove unused memclear_highpage_flush()

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/highmem.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index ef788b5b4a35..7fb31da45d03 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -219,12 +219,6 @@ static inline void zero_user(struct page *page,
 	zero_user_segments(page, start, start + size, 0, 0);
 }
 
-static inline void __deprecated memclear_highpage_flush(struct page *page,
-			unsigned int offset, unsigned int size)
-{
-	zero_user(page, offset, size);
-}
-
 #ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE
 
 static inline void copy_user_highpage(struct page *to, struct page *from,
-- 
cgit v1.2.3


From 34f0315adb58af3b01f59d05b2bce267474e71cb Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 22 Feb 2013 16:34:25 -0800
Subject: mm: numa: fix minor typo in numa_next_scan

s/me/be/ and clarify the comment a bit when we're changing it anyway.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Suggested-by: Simon Jeons <simon.jeons@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f8f5162a3571..47047cb4d8e5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -414,9 +414,9 @@ struct mm_struct {
 #endif
 #ifdef CONFIG_NUMA_BALANCING
 	/*
-	 * numa_next_scan is the next time when the PTEs will me marked
-	 * pte_numa to gather statistics and migrate pages to new nodes
-	 * if necessary
+	 * numa_next_scan is the next time that the PTEs will be marked
+	 * pte_numa. NUMA hinting faults will gather statistics and migrate
+	 * pages to new nodes if necessary.
 	 */
 	unsigned long numa_next_scan;
 
-- 
cgit v1.2.3


From 3c0ff4689630b280704666833e9539d84cddc373 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 22 Feb 2013 16:34:29 -0800
Subject: mm: numa: handle side-effects in count_vm_numa_events() for
 !CONFIG_NUMA_BALANCING

The current definitions for count_vm_numa_events() is wrong for
!CONFIG_NUMA_BALANCING as the following would miss the side-effect.

	count_vm_numa_events(NUMA_FOO, bar++);

There are no such users of count_vm_numa_events() but this patch fixes
it as it is a potential pitfall.  Ideally both would be converted to
static inline but NUMA_PTE_UPDATES is not defined if
!CONFIG_NUMA_BALANCING and creating dummy constants just to have a
static inline would be similarly clumsy.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Simon Jeons <simon.jeons@gmail.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index a13291f7da88..5fd71a7d0dfd 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -85,7 +85,7 @@ static inline void vm_events_fold_cpu(int cpu)
 #define count_vm_numa_events(x, y) count_vm_events(x, y)
 #else
 #define count_vm_numa_event(x) do {} while (0)
-#define count_vm_numa_events(x, y) do {} while (0)
+#define count_vm_numa_events(x, y) do { (void)(y); } while (0)
 #endif /* CONFIG_NUMA_BALANCING */
 
 #define __count_zone_vm_events(item, zone, delta) \
-- 
cgit v1.2.3


From bbeae5b05ef6e40bf54db05ceb8635824153b9e2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Feb 2013 16:34:30 -0800
Subject: mm: move page flags layout to separate header

This is a preparation patch for moving page->_last_nid into page->flags
that moves page flag layout information to a separate header.  This
patch is necessary because otherwise there would be a circular
dependency between mm_types.h and mm.h.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Simon Jeons <simon.jeons@gmail.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h                | 40 ----------------------
 include/linux/mm_types.h          |  1 +
 include/linux/mmzone.h            | 22 +-----------
 include/linux/page-flags-layout.h | 71 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 61 deletions(-)
 create mode 100644 include/linux/page-flags-layout.h

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a114b8eb7676..c2d7d5993b14 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -581,51 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
  * sets it, so none of the operations on it need to be atomic.
  */
 
-
-/*
- * page->flags layout:
- *
- * There are three possibilities for how page->flags get
- * laid out.  The first is for the normal case, without
- * sparsemem.  The second is for sparsemem when there is
- * plenty of space for node and section.  The last is when
- * we have run out of space and have to fall back to an
- * alternate (slower) way of determining the node.
- *
- * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE | ... | FLAGS |
- * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS |
- * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |
- */
-#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
-#define SECTIONS_WIDTH		SECTIONS_SHIFT
-#else
-#define SECTIONS_WIDTH		0
-#endif
-
-#define ZONES_WIDTH		ZONES_SHIFT
-
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
-#define NODES_WIDTH		NODES_SHIFT
-#else
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#error "Vmemmap: No space for nodes field in page flags"
-#endif
-#define NODES_WIDTH		0
-#endif
-
 /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
 #define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
 
-/*
- * We are going to use the flags for the page to node mapping if its in
- * there.  This includes the case where there is no node, so it is implicit.
- */
-#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0)
-#define NODE_NOT_IN_PAGE_FLAGS
-#endif
-
 /*
  * Define the bit shifts to access each section.  For non-existent
  * sections we define the shift as 0; that plus a 0 mask ensures
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 47047cb4d8e5..d05d632c716e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
 #include <linux/cpumask.h>
 #include <linux/page-debug-flags.h>
 #include <linux/uprobes.h>
+#include <linux/page-flags-layout.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4f4c8c26fa9d..6c80d0ac14dd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -15,7 +15,7 @@
 #include <linux/seqlock.h>
 #include <linux/nodemask.h>
 #include <linux/pageblock-flags.h>
-#include <generated/bounds.h>
+#include <linux/page-flags-layout.h>
 #include <linux/atomic.h>
 #include <asm/page.h>
 
@@ -310,24 +310,6 @@ enum zone_type {
 
 #ifndef __GENERATING_BOUNDS_H
 
-/*
- * When a memory allocation must conform to specific limitations (such
- * as being suitable for DMA) the caller will pass in hints to the
- * allocator in the gfp_mask, in the zone modifier bits.  These bits
- * are used to select a priority ordered list of memory zones which
- * match the requested limits. See gfp_zone() in include/linux/gfp.h
- */
-
-#if MAX_NR_ZONES < 2
-#define ZONES_SHIFT 0
-#elif MAX_NR_ZONES <= 2
-#define ZONES_SHIFT 1
-#elif MAX_NR_ZONES <= 4
-#define ZONES_SHIFT 2
-#else
-#error ZONES_SHIFT -- too many zones configured adjust calculation
-#endif
-
 struct zone {
 	/* Fields commonly accessed by the page allocator */
 
@@ -1055,8 +1037,6 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
  * PA_SECTION_SHIFT		physical address to/from section number
  * PFN_SECTION_SHIFT		pfn to/from section number
  */
-#define SECTIONS_SHIFT		(MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
-
 #define PA_SECTION_SHIFT	(SECTION_SIZE_BITS)
 #define PFN_SECTION_SHIFT	(SECTION_SIZE_BITS - PAGE_SHIFT)
 
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
new file mode 100644
index 000000000000..316805d6ba1b
--- /dev/null
+++ b/include/linux/page-flags-layout.h
@@ -0,0 +1,71 @@
+#ifndef PAGE_FLAGS_LAYOUT_H
+#define PAGE_FLAGS_LAYOUT_H
+
+#include <linux/numa.h>
+#include <generated/bounds.h>
+
+/*
+ * When a memory allocation must conform to specific limitations (such
+ * as being suitable for DMA) the caller will pass in hints to the
+ * allocator in the gfp_mask, in the zone modifier bits.  These bits
+ * are used to select a priority ordered list of memory zones which
+ * match the requested limits. See gfp_zone() in include/linux/gfp.h
+ */
+#if MAX_NR_ZONES < 2
+#define ZONES_SHIFT 0
+#elif MAX_NR_ZONES <= 2
+#define ZONES_SHIFT 1
+#elif MAX_NR_ZONES <= 4
+#define ZONES_SHIFT 2
+#else
+#error ZONES_SHIFT -- too many zones configured adjust calculation
+#endif
+
+#ifdef CONFIG_SPARSEMEM
+#include <asm/sparsemem.h>
+
+/* SECTION_SHIFT	#bits space required to store a section # */
+#define SECTIONS_SHIFT	(MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
+
+#endif /* CONFIG_SPARSEMEM */
+
+/*
+ * page->flags layout:
+ *
+ * There are three possibilities for how page->flags get
+ * laid out.  The first is for the normal case, without
+ * sparsemem.  The second is for sparsemem when there is
+ * plenty of space for node and section.  The last is when
+ * we have run out of space and have to fall back to an
+ * alternate (slower) way of determining the node.
+ *
+ * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE | ... | FLAGS |
+ * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS |
+ * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |
+ */
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+#define SECTIONS_WIDTH		SECTIONS_SHIFT
+#else
+#define SECTIONS_WIDTH		0
+#endif
+
+#define ZONES_WIDTH		ZONES_SHIFT
+
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#define NODES_WIDTH		NODES_SHIFT
+#else
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#error "Vmemmap: No space for nodes field in page flags"
+#endif
+#define NODES_WIDTH		0
+#endif
+
+/*
+ * We are going to use the flags for the page to node mapping if its in
+ * there.  This includes the case where there is no node, so it is implicit.
+ */
+#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0)
+#define NODE_NOT_IN_PAGE_FLAGS
+#endif
+
+#endif /* _LINUX_PAGE_FLAGS_LAYOUT */
-- 
cgit v1.2.3


From 75980e97daccfc6babbac7e180ff118537955f5d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 22 Feb 2013 16:34:32 -0800
Subject: mm: fold page->_last_nid into page->flags where possible

page->_last_nid fits into page->flags on 64-bit.  The unlikely 32-bit
NUMA configuration with NUMA Balancing will still need an extra page
field.  As Peter notes "Completely dropping 32bit support for
CONFIG_NUMA_BALANCING would simplify things, but it would also remove
the warning if we grow enough 64bit only page-flags to push the last-cpu
out."

[mgorman@suse.de: minor modifications]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Simon Jeons <simon.jeons@gmail.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h                | 33 ++++++++++++++++++++++++++++++++-
 include/linux/mm_types.h          |  2 +-
 include/linux/page-flags-layout.h | 33 +++++++++++++++++++++++++--------
 mm/memory.c                       |  4 ++++
 4 files changed, 62 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c2d7d5993b14..473abbda942e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -581,10 +581,11 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
  * sets it, so none of the operations on it need to be atomic.
  */
 
-/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
+/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_NID] | ... | FLAGS | */
 #define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 #define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
+#define LAST_NID_PGOFF		(ZONES_PGOFF - LAST_NID_WIDTH)
 
 /*
  * Define the bit shifts to access each section.  For non-existent
@@ -594,6 +595,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define SECTIONS_PGSHIFT	(SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
 #define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
 #define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
+#define LAST_NID_PGSHIFT	(LAST_NID_PGOFF * (LAST_NID_WIDTH != 0))
 
 /* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
 #ifdef NODE_NOT_IN_PAGE_FLAGS
@@ -615,6 +617,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 #define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)
 #define NODES_MASK		((1UL << NODES_WIDTH) - 1)
 #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
+#define LAST_NID_MASK		((1UL << LAST_NID_WIDTH) - 1)
 #define ZONEID_MASK		((1UL << ZONEID_SHIFT) - 1)
 
 static inline enum zone_type page_zonenum(const struct page *page)
@@ -654,6 +657,7 @@ static inline int page_to_nid(const struct page *page)
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
+#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
 static inline int page_xchg_last_nid(struct page *page, int nid)
 {
 	return xchg(&page->_last_nid, nid);
@@ -668,6 +672,33 @@ static inline void reset_page_last_nid(struct page *page)
 	page->_last_nid = -1;
 }
 #else
+static inline int page_last_nid(struct page *page)
+{
+	return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK;
+}
+
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+	unsigned long old_flags, flags;
+	int last_nid;
+
+	do {
+		old_flags = flags = page->flags;
+		last_nid = page_last_nid(page);
+
+		flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
+		flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
+	} while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags));
+
+	return last_nid;
+}
+
+static inline void reset_page_last_nid(struct page *page)
+{
+	page_xchg_last_nid(page, (1 << LAST_NID_SHIFT) - 1);
+}
+#endif /* LAST_NID_NOT_IN_PAGE_FLAGS */
+#else
 static inline int page_xchg_last_nid(struct page *page, int nid)
 {
 	return page_to_nid(page);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d05d632c716e..ace9a5f01c64 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -174,7 +174,7 @@ struct page {
 	void *shadow;
 #endif
 
-#ifdef CONFIG_NUMA_BALANCING
+#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
 	int _last_nid;
 #endif
 }
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 316805d6ba1b..93506a114034 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -32,15 +32,16 @@
 /*
  * page->flags layout:
  *
- * There are three possibilities for how page->flags get
- * laid out.  The first is for the normal case, without
- * sparsemem.  The second is for sparsemem when there is
- * plenty of space for node and section.  The last is when
- * we have run out of space and have to fall back to an
- * alternate (slower) way of determining the node.
+ * There are five possibilities for how page->flags get laid out.  The first
+ * pair is for the normal case without sparsemem. The second pair is for
+ * sparsemem when there is plenty of space for node and section information.
+ * The last is when there is insufficient space in page->flags and a separate
+ * lookup is necessary.
  *
- * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE | ... | FLAGS |
- * classic sparse with space for node:| SECTION | NODE | ZONE | ... | FLAGS |
+ * No sparsemem or sparsemem vmemmap: |       NODE     | ZONE |          ... | FLAGS |
+ *         " plus space for last_nid: |       NODE     | ZONE | LAST_NID ... | FLAGS |
+ * classic sparse with space for node:| SECTION | NODE | ZONE |          ... | FLAGS |
+ *         " plus space for last_nid: | SECTION | NODE | ZONE | LAST_NID ... | FLAGS |
  * classic sparse no space for node:  | SECTION |     ZONE    | ... | FLAGS |
  */
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -60,6 +61,18 @@
 #define NODES_WIDTH		0
 #endif
 
+#ifdef CONFIG_NUMA_BALANCING
+#define LAST_NID_SHIFT NODES_SHIFT
+#else
+#define LAST_NID_SHIFT 0
+#endif
+
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_NID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#define LAST_NID_WIDTH LAST_NID_SHIFT
+#else
+#define LAST_NID_WIDTH 0
+#endif
+
 /*
  * We are going to use the flags for the page to node mapping if its in
  * there.  This includes the case where there is no node, so it is implicit.
@@ -68,4 +81,8 @@
 #define NODE_NOT_IN_PAGE_FLAGS
 #endif
 
+#if defined(CONFIG_NUMA_BALANCING) && LAST_NID_WIDTH == 0
+#define LAST_NID_NOT_IN_PAGE_FLAGS
+#endif
+
 #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
diff --git a/mm/memory.c b/mm/memory.c
index 7837ceacf090..054250ee4a68 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,6 +69,10 @@
 
 #include "internal.h"
 
+#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
+#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid.
+#endif
+
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 /* use the per-pgdat data instead for discontigmem - mbligh */
 unsigned long max_mapnr;
-- 
cgit v1.2.3


From 9800339b5e0f0e24ab3dac349e0de80d2018832e Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Fri, 22 Feb 2013 16:34:35 -0800
Subject: mm: don't inline page_mapping()

According to akpm, this saves 1/2k text and makes things simple for the
next patch.

Numbers from Minchan:

add/remove: 1/0 grow/shrink: 6/22 up/down: 92/-516 (-424)
function                                     old     new   delta
page_mapping                                   -      48     +48
do_task_stat                                2292    2308     +16
page_remove_rmap                             240     248      +8
load_elf_binary                             4500    4508      +8
update_queue                                 532     536      +4
scsi_probe_and_add_lun                      2892    2896      +4
lookup_fast                                  644     648      +4
vcs_read                                    1040    1036      -4
__ip_route_output_key                       1904    1900      -4
ip_route_input_noref                        2508    2500      -8
shmem_file_aio_read                          784     772     -12
__isolate_lru_page                           272     256     -16
shmem_replace_page                           708     688     -20
mark_buffer_dirty                            228     208     -20
__set_page_dirty_buffers                     240     220     -20
__remove_mapping                             276     256     -20
update_mmu_cache                             500     476     -24
set_page_dirty_balance                        92      68     -24
set_page_dirty                               172     148     -24
page_evictable                                88      64     -24
page_cache_pipe_buf_steal                    248     224     -24
clear_page_dirty_for_io                      340     316     -24
test_set_page_writeback                      400     372     -28
test_clear_page_writeback                    516     488     -28
invalidate_inode_page                        156     128     -28
page_mkclean                                 432     400     -32
flush_dcache_page                            360     328     -32
__set_page_dirty_nobuffers                   324     280     -44
shrink_page_list                            2412    2356     -56

Signed-off-by: Shaohua Li <shli@fusionio.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 13 +------------
 mm/util.c          | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 473abbda942e..1d4122bf6f27 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -809,18 +809,7 @@ void page_address_init(void);
 #define PAGE_MAPPING_KSM	2
 #define PAGE_MAPPING_FLAGS	(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM)
 
-extern struct address_space swapper_space;
-static inline struct address_space *page_mapping(struct page *page)
-{
-	struct address_space *mapping = page->mapping;
-
-	VM_BUG_ON(PageSlab(page));
-	if (unlikely(PageSwapCache(page)))
-		mapping = &swapper_space;
-	else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
-		mapping = NULL;
-	return mapping;
-}
+extern struct address_space *page_mapping(struct page *page);
 
 /* Neutral page->mapping pointer to address_space or anon_vma or other */
 static inline void *page_rmapping(struct page *page)
diff --git a/mm/util.c b/mm/util.c
index 3704bf1bef94..16a73195a37b 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -5,6 +5,7 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/security.h>
+#include <linux/swap.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -382,6 +383,21 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_mmap);
 
+struct address_space *page_mapping(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+
+	VM_BUG_ON(PageSlab(page));
+#ifdef CONFIG_SWAP
+	if (unlikely(PageSwapCache(page)))
+		mapping = &swapper_space;
+	else
+#endif
+	if ((unsigned long)mapping & PAGE_MAPPING_ANON)
+		mapping = NULL;
+	return mapping;
+}
+
 /* Tracepoints definitions. */
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
-- 
cgit v1.2.3


From 33806f06da654092182410d974b6d3c5396ea3eb Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Fri, 22 Feb 2013 16:34:37 -0800
Subject: swap: make each swap partition have one address_space

When I use several fast SSD to do swap, swapper_space.tree_lock is
heavily contended.  This makes each swap partition have one
address_space to reduce the lock contention.  There is an array of
address_space for swap.  The swap entry type is the index to the array.

In my test with 3 SSD, this increases the swapout throughput 20%.

[akpm@linux-foundation.org: revert unneeded change to  __add_to_swap_cache]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/meminfo.c    |  4 ++--
 include/linux/swap.h |  9 +++++----
 mm/memcontrol.c      |  4 ++--
 mm/mincore.c         |  5 +++--
 mm/swap.c            |  9 +++++++--
 mm/swap_state.c      | 55 ++++++++++++++++++++++++++++++++++++----------------
 mm/swapfile.c        |  5 +++--
 mm/util.c            | 10 +++++++---
 8 files changed, 67 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index c3dac611c3c0..1efaaa19c4f3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		* sysctl_overcommit_ratio / 100) + total_swap_pages;
 
 	cached = global_page_state(NR_FILE_PAGES) -
-			total_swapcache_pages - i.bufferram;
+			total_swapcache_pages() - i.bufferram;
 	if (cached < 0)
 		cached = 0;
 
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(i.freeram),
 		K(i.bufferram),
 		K(cached),
-		K(total_swapcache_pages),
+		K(total_swapcache_pages()),
 		K(pages[LRU_ACTIVE_ANON]   + pages[LRU_ACTIVE_FILE]),
 		K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
 		K(pages[LRU_ACTIVE_ANON]),
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8c66486a8ca8..235c039892ee 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -8,7 +8,7 @@
 #include <linux/memcontrol.h>
 #include <linux/sched.h>
 #include <linux/node.h>
-
+#include <linux/fs.h>
 #include <linux/atomic.h>
 #include <asm/page.h>
 
@@ -330,8 +330,9 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
 		sector_t *);
 
 /* linux/mm/swap_state.c */
-extern struct address_space swapper_space;
-#define total_swapcache_pages  swapper_space.nrpages
+extern struct address_space swapper_spaces[];
+#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
+extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
@@ -382,7 +383,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
 
 #define nr_swap_pages				0L
 #define total_swap_pages			0L
-#define total_swapcache_pages			0UL
+#define total_swapcache_pages()			0UL
 
 #define si_swapinfo(val) \
 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c878b1c69510..f85861531f22 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6307,7 +6307,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
 	 * Because lookup_swap_cache() updates some statistics counter,
 	 * we call find_get_page() with swapper_space directly.
 	 */
-	page = find_get_page(&swapper_space, ent.val);
+	page = find_get_page(swap_address_space(ent), ent.val);
 	if (do_swap_account)
 		entry->val = ent.val;
 
@@ -6348,7 +6348,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 		swp_entry_t swap = radix_to_swp_entry(page);
 		if (do_swap_account)
 			*entry = swap;
-		page = find_get_page(&swapper_space, swap.val);
+		page = find_get_page(swap_address_space(swap), swap.val);
 	}
 #endif
 	return page;
diff --git a/mm/mincore.c b/mm/mincore.c
index 936b4cee8cb1..da2be56a7b8f 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -75,7 +75,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
 	/* shmem/tmpfs may return swap: account for swapcache page too. */
 	if (radix_tree_exceptional_entry(page)) {
 		swp_entry_t swap = radix_to_swp_entry(page);
-		page = find_get_page(&swapper_space, swap.val);
+		page = find_get_page(swap_address_space(swap), swap.val);
 	}
 #endif
 	if (page) {
@@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 			} else {
 #ifdef CONFIG_SWAP
 				pgoff = entry.val;
-				*vec = mincore_page(&swapper_space, pgoff);
+				*vec = mincore_page(swap_address_space(entry),
+					pgoff);
 #else
 				WARN_ON(1);
 				*vec = 1;
diff --git a/mm/swap.c b/mm/swap.c
index 6310dc2008ff..8a529a01e8fc 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -855,9 +855,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
 void __init swap_setup(void)
 {
 	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
-
 #ifdef CONFIG_SWAP
-	bdi_init(swapper_space.backing_dev_info);
+	int i;
+
+	bdi_init(swapper_spaces[0].backing_dev_info);
+	for (i = 0; i < MAX_SWAPFILES; i++) {
+		spin_lock_init(&swapper_spaces[i].tree_lock);
+		INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
+	}
 #endif
 
 	/* Use a smaller cluster for small-memory machines */
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 0cb36fb1f61c..8d6644c5d0cc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -36,12 +36,12 @@ static struct backing_dev_info swap_backing_dev_info = {
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
 };
 
-struct address_space swapper_space = {
-	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
-	.tree_lock	= __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
-	.a_ops		= &swap_aops,
-	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
-	.backing_dev_info = &swap_backing_dev_info,
+struct address_space swapper_spaces[MAX_SWAPFILES] = {
+	[0 ... MAX_SWAPFILES - 1] = {
+		.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
+		.a_ops		= &swap_aops,
+		.backing_dev_info = &swap_backing_dev_info,
+	}
 };
 
 #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
@@ -53,9 +53,19 @@ static struct {
 	unsigned long find_total;
 } swap_cache_info;
 
+unsigned long total_swapcache_pages(void)
+{
+	int i;
+	unsigned long ret = 0;
+
+	for (i = 0; i < MAX_SWAPFILES; i++)
+		ret += swapper_spaces[i].nrpages;
+	return ret;
+}
+
 void show_swap_cache_info(void)
 {
-	printk("%lu pages in swap cache\n", total_swapcache_pages);
+	printk("%lu pages in swap cache\n", total_swapcache_pages());
 	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
 		swap_cache_info.add_total, swap_cache_info.del_total,
 		swap_cache_info.find_success, swap_cache_info.find_total);
@@ -70,6 +80,7 @@ void show_swap_cache_info(void)
 static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
 	int error;
+	struct address_space *address_space;
 
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(PageSwapCache(page));
@@ -79,14 +90,16 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 	SetPageSwapCache(page);
 	set_page_private(page, entry.val);
 
-	spin_lock_irq(&swapper_space.tree_lock);
-	error = radix_tree_insert(&swapper_space.page_tree, entry.val, page);
+	address_space = swap_address_space(entry);
+	spin_lock_irq(&address_space->tree_lock);
+	error = radix_tree_insert(&address_space->page_tree,
+					entry.val, page);
 	if (likely(!error)) {
-		total_swapcache_pages++;
+		address_space->nrpages++;
 		__inc_zone_page_state(page, NR_FILE_PAGES);
 		INC_CACHE_INFO(add_total);
 	}
-	spin_unlock_irq(&swapper_space.tree_lock);
+	spin_unlock_irq(&address_space->tree_lock);
 
 	if (unlikely(error)) {
 		/*
@@ -122,14 +135,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
  */
 void __delete_from_swap_cache(struct page *page)
 {
+	swp_entry_t entry;
+	struct address_space *address_space;
+
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(!PageSwapCache(page));
 	VM_BUG_ON(PageWriteback(page));
 
-	radix_tree_delete(&swapper_space.page_tree, page_private(page));
+	entry.val = page_private(page);
+	address_space = swap_address_space(entry);
+	radix_tree_delete(&address_space->page_tree, page_private(page));
 	set_page_private(page, 0);
 	ClearPageSwapCache(page);
-	total_swapcache_pages--;
+	address_space->nrpages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	INC_CACHE_INFO(del_total);
 }
@@ -195,12 +213,14 @@ int add_to_swap(struct page *page)
 void delete_from_swap_cache(struct page *page)
 {
 	swp_entry_t entry;
+	struct address_space *address_space;
 
 	entry.val = page_private(page);
 
-	spin_lock_irq(&swapper_space.tree_lock);
+	address_space = swap_address_space(entry);
+	spin_lock_irq(&address_space->tree_lock);
 	__delete_from_swap_cache(page);
-	spin_unlock_irq(&swapper_space.tree_lock);
+	spin_unlock_irq(&address_space->tree_lock);
 
 	swapcache_free(entry, page);
 	page_cache_release(page);
@@ -263,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry)
 {
 	struct page *page;
 
-	page = find_get_page(&swapper_space, entry.val);
+	page = find_get_page(swap_address_space(entry), entry.val);
 
 	if (page)
 		INC_CACHE_INFO(find_success);
@@ -290,7 +310,8 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * called after lookup_swap_cache() failed, re-calling
 		 * that would confuse statistics.
 		 */
-		found_page = find_get_page(&swapper_space, entry.val);
+		found_page = find_get_page(swap_address_space(entry),
+					entry.val);
 		if (found_page)
 			break;
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e97a0e5aea91..e51864e6fe8b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -79,7 +79,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
 	struct page *page;
 	int ret = 0;
 
-	page = find_get_page(&swapper_space, entry.val);
+	page = find_get_page(swap_address_space(entry), entry.val);
 	if (!page)
 		return 0;
 	/*
@@ -699,7 +699,8 @@ int free_swap_and_cache(swp_entry_t entry)
 	p = swap_info_get(entry);
 	if (p) {
 		if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
-			page = find_get_page(&swapper_space, entry.val);
+			page = find_get_page(swap_address_space(entry),
+						entry.val);
 			if (page && !trylock_page(page)) {
 				page_cache_release(page);
 				page = NULL;
diff --git a/mm/util.c b/mm/util.c
index 16a73195a37b..ab1424dbe2e6 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -6,6 +6,7 @@
 #include <linux/sched.h>
 #include <linux/security.h>
 #include <linux/swap.h>
+#include <linux/swapops.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -389,9 +390,12 @@ struct address_space *page_mapping(struct page *page)
 
 	VM_BUG_ON(PageSlab(page));
 #ifdef CONFIG_SWAP
-	if (unlikely(PageSwapCache(page)))
-		mapping = &swapper_space;
-	else
+	if (unlikely(PageSwapCache(page))) {
+		swp_entry_t entry;
+
+		entry.val = page_private(page);
+		mapping = swap_address_space(entry);
+	} else
 #endif
 	if ((unsigned long)mapping & PAGE_MAPPING_ANON)
 		mapping = NULL;
-- 
cgit v1.2.3


From ec8acf20afb8534ed511f6613dd2226b9e301010 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Fri, 22 Feb 2013 16:34:38 -0800
Subject: swap: add per-partition lock for swapfile

swap_lock is heavily contended when I test swap to 3 fast SSD (even
slightly slower than swap to 2 such SSD).  The main contention comes
from swap_info_get().  This patch tries to fix the gap with adding a new
per-partition lock.

Global data like nr_swapfiles, total_swap_pages, least_priority and
swap_list are still protected by swap_lock.

nr_swap_pages is an atomic now, it can be changed without swap_lock.  In
theory, it's possible get_swap_page() finds no swap pages but actually
there are free swap pages.  But sounds not a big problem.

Accessing partition specific data (like scan_swap_map and so on) is only
protected by swap_info_struct.lock.

Changing swap_info_struct.flags need hold swap_lock and
swap_info_struct.lock, because scan_scan_map() will check it.  read the
flags is ok with either the locks hold.

If both swap_lock and swap_info_struct.lock must be hold, we always hold
the former first to avoid deadlock.

swap_entry_free() can change swap_list.  To delete that code, we add a
new highest_priority_index.  Whenever get_swap_page() is called, we
check it.  If it's valid, we use it.

It's a pity get_swap_page() still holds swap_lock().  But in practice,
swap_lock() isn't heavily contended in my test with this patch (or I can
say there are other much more heavier bottlenecks like TLB flush).  And
BTW, looks get_swap_page() doesn't really need the lock.  We never free
swap_info[] and we check SWAP_WRITEOK flag.  The only risk without the
lock is we could swapout to some low priority swap, but we can quickly
recover after several rounds of swap, so sounds not a big deal to me.
But I'd prefer to fix this if it's a real problem.

"swap: make each swap partition have one address_space" improved the
swapout speed from 1.7G/s to 2G/s.  This patch further improves the
speed to 2.3G/s, so around 15% improvement.  It's a multi-process test,
so TLB flush isn't the biggest bottleneck before the patches.

[arnd@arndb.de: fix it for nommu]
[hughd@google.com: add missing unlock]
[minchan@kernel.org: get rid of lockdep whinge on sys_swapon]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Dan Magenheimer <dan.magenheimer@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/mm/init_32.c |   2 +-
 arch/tile/mm/pgtable.c  |   2 +-
 include/linux/swap.h    |  32 ++++++++--
 mm/mmap.c               |   2 +-
 mm/nommu.c              |   2 +-
 mm/swap_state.c         |   3 +-
 mm/swapfile.c           | 154 +++++++++++++++++++++++++++++++++---------------
 mm/vmscan.c             |   8 +--
 8 files changed, 145 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index dde85ef1c56d..48e0c030e8f5 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -57,7 +57,7 @@ void show_mem(unsigned int filter)
 	printk("Mem-info:\n");
 	show_free_areas(filter);
 	printk("Free swap:       %6ldkB\n",
-	       nr_swap_pages << (PAGE_SHIFT-10));
+	       get_nr_swap_pages() << (PAGE_SHIFT-10));
 	printk("%ld pages of RAM\n", totalram_pages);
 	printk("%ld free pages\n", nr_free_pages());
 }
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index de0de0c0e8a1..b3b4972c2451 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -61,7 +61,7 @@ void show_mem(unsigned int filter)
 	       global_page_state(NR_PAGETABLE),
 	       global_page_state(NR_BOUNCE),
 	       global_page_state(NR_FILE_PAGES),
-	       nr_swap_pages);
+	       get_nr_swap_pages());
 
 	for_each_zone(zone) {
 		unsigned long flags, order, total = 0, largest_order = -1;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 235c039892ee..a3e22d357e91 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -202,6 +202,18 @@ struct swap_info_struct {
 	unsigned long *frontswap_map;	/* frontswap in-use, one bit per page */
 	atomic_t frontswap_pages;	/* frontswap pages in-use counter */
 #endif
+	spinlock_t lock;		/*
+					 * protect map scan related fields like
+					 * swap_map, lowest_bit, highest_bit,
+					 * inuse_pages, cluster_next,
+					 * cluster_nr, lowest_alloc and
+					 * highest_alloc. other fields are only
+					 * changed at swapon/swapoff, so are
+					 * protected by swap_lock. changing
+					 * flags need hold this lock and
+					 * swap_lock. If both locks need hold,
+					 * hold swap_lock first.
+					 */
 };
 
 struct swap_list_t {
@@ -209,9 +221,6 @@ struct swap_list_t {
 	int next;	/* swapfile to be used next */
 };
 
-/* Swap 50% full? Release swapcache more aggressively.. */
-#define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
-
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
@@ -347,8 +356,20 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr);
 
 /* linux/mm/swapfile.c */
-extern long nr_swap_pages;
+extern atomic_long_t nr_swap_pages;
 extern long total_swap_pages;
+
+/* Swap 50% full? Release swapcache more aggressively.. */
+static inline bool vm_swap_full(void)
+{
+	return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
+}
+
+static inline long get_nr_swap_pages(void)
+{
+	return atomic_long_read(&nr_swap_pages);
+}
+
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
 extern swp_entry_t get_swap_page_of_type(int);
@@ -381,9 +402,10 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
 
 #else /* CONFIG_SWAP */
 
-#define nr_swap_pages				0L
+#define get_nr_swap_pages()			0L
 #define total_swap_pages			0L
 #define total_swapcache_pages()			0UL
+#define vm_swap_full()				0
 
 #define si_swapinfo(val) \
 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
diff --git a/mm/mmap.c b/mm/mmap.c
index 44bb4d869884..28416f6b8dd5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -144,7 +144,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		 */
 		free -= global_page_state(NR_SHMEM);
 
-		free += nr_swap_pages;
+		free += get_nr_swap_pages();
 
 		/*
 		 * Any slabs which are created with the
diff --git a/mm/nommu.c b/mm/nommu.c
index 18c1b932e2c4..87854a55829d 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1907,7 +1907,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		 */
 		free -= global_page_state(NR_SHMEM);
 
-		free += nr_swap_pages;
+		free += get_nr_swap_pages();
 
 		/*
 		 * Any slabs which are created with the
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 8d6644c5d0cc..7efcf1525921 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -69,7 +69,8 @@ void show_swap_cache_info(void)
 	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
 		swap_cache_info.add_total, swap_cache_info.del_total,
 		swap_cache_info.find_success, swap_cache_info.find_total);
-	printk("Free swap  = %ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
+	printk("Free swap  = %ldkB\n",
+		get_nr_swap_pages() << (PAGE_SHIFT - 10));
 	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
 }
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e51864e6fe8b..9b51266413cd 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -47,9 +47,11 @@ static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 
 DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
-long nr_swap_pages;
+atomic_long_t nr_swap_pages;
+/* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
 long total_swap_pages;
 static int least_priority;
+static atomic_t highest_priority_index = ATOMIC_INIT(-1);
 
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
@@ -223,7 +225,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 			si->lowest_alloc = si->max;
 			si->highest_alloc = 0;
 		}
-		spin_unlock(&swap_lock);
+		spin_unlock(&si->lock);
 
 		/*
 		 * If seek is expensive, start searching for new cluster from
@@ -242,7 +244,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 			if (si->swap_map[offset])
 				last_in_cluster = offset + SWAPFILE_CLUSTER;
 			else if (offset == last_in_cluster) {
-				spin_lock(&swap_lock);
+				spin_lock(&si->lock);
 				offset -= SWAPFILE_CLUSTER - 1;
 				si->cluster_next = offset;
 				si->cluster_nr = SWAPFILE_CLUSTER - 1;
@@ -263,7 +265,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 			if (si->swap_map[offset])
 				last_in_cluster = offset + SWAPFILE_CLUSTER;
 			else if (offset == last_in_cluster) {
-				spin_lock(&swap_lock);
+				spin_lock(&si->lock);
 				offset -= SWAPFILE_CLUSTER - 1;
 				si->cluster_next = offset;
 				si->cluster_nr = SWAPFILE_CLUSTER - 1;
@@ -277,7 +279,7 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 		}
 
 		offset = scan_base;
-		spin_lock(&swap_lock);
+		spin_lock(&si->lock);
 		si->cluster_nr = SWAPFILE_CLUSTER - 1;
 		si->lowest_alloc = 0;
 	}
@@ -293,9 +295,9 @@ checks:
 	/* reuse swap entry of cache-only swap if not busy. */
 	if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
 		int swap_was_freed;
-		spin_unlock(&swap_lock);
+		spin_unlock(&si->lock);
 		swap_was_freed = __try_to_reclaim_swap(si, offset);
-		spin_lock(&swap_lock);
+		spin_lock(&si->lock);
 		/* entry was freed successfully, try to use this again */
 		if (swap_was_freed)
 			goto checks;
@@ -335,13 +337,13 @@ checks:
 			    si->lowest_alloc <= last_in_cluster)
 				last_in_cluster = si->lowest_alloc - 1;
 			si->flags |= SWP_DISCARDING;
-			spin_unlock(&swap_lock);
+			spin_unlock(&si->lock);
 
 			if (offset < last_in_cluster)
 				discard_swap_cluster(si, offset,
 					last_in_cluster - offset + 1);
 
-			spin_lock(&swap_lock);
+			spin_lock(&si->lock);
 			si->lowest_alloc = 0;
 			si->flags &= ~SWP_DISCARDING;
 
@@ -355,10 +357,10 @@ checks:
 			 * could defer that delay until swap_writepage,
 			 * but it's easier to keep this self-contained.
 			 */
-			spin_unlock(&swap_lock);
+			spin_unlock(&si->lock);
 			wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),
 				wait_for_discard, TASK_UNINTERRUPTIBLE);
-			spin_lock(&swap_lock);
+			spin_lock(&si->lock);
 		} else {
 			/*
 			 * Note pages allocated by racing tasks while
@@ -374,14 +376,14 @@ checks:
 	return offset;
 
 scan:
-	spin_unlock(&swap_lock);
+	spin_unlock(&si->lock);
 	while (++offset <= si->highest_bit) {
 		if (!si->swap_map[offset]) {
-			spin_lock(&swap_lock);
+			spin_lock(&si->lock);
 			goto checks;
 		}
 		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
-			spin_lock(&swap_lock);
+			spin_lock(&si->lock);
 			goto checks;
 		}
 		if (unlikely(--latency_ration < 0)) {
@@ -392,11 +394,11 @@ scan:
 	offset = si->lowest_bit;
 	while (++offset < scan_base) {
 		if (!si->swap_map[offset]) {
-			spin_lock(&swap_lock);
+			spin_lock(&si->lock);
 			goto checks;
 		}
 		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
-			spin_lock(&swap_lock);
+			spin_lock(&si->lock);
 			goto checks;
 		}
 		if (unlikely(--latency_ration < 0)) {
@@ -404,7 +406,7 @@ scan:
 			latency_ration = LATENCY_LIMIT;
 		}
 	}
-	spin_lock(&swap_lock);
+	spin_lock(&si->lock);
 
 no_page:
 	si->flags -= SWP_SCANNING;
@@ -417,13 +419,34 @@ swp_entry_t get_swap_page(void)
 	pgoff_t offset;
 	int type, next;
 	int wrapped = 0;
+	int hp_index;
 
 	spin_lock(&swap_lock);
-	if (nr_swap_pages <= 0)
+	if (atomic_long_read(&nr_swap_pages) <= 0)
 		goto noswap;
-	nr_swap_pages--;
+	atomic_long_dec(&nr_swap_pages);
 
 	for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
+		hp_index = atomic_xchg(&highest_priority_index, -1);
+		/*
+		 * highest_priority_index records current highest priority swap
+		 * type which just frees swap entries. If its priority is
+		 * higher than that of swap_list.next swap type, we use it.  It
+		 * isn't protected by swap_lock, so it can be an invalid value
+		 * if the corresponding swap type is swapoff. We double check
+		 * the flags here. It's even possible the swap type is swapoff
+		 * and swapon again and its priority is changed. In such rare
+		 * case, low prority swap type might be used, but eventually
+		 * high priority swap will be used after several rounds of
+		 * swap.
+		 */
+		if (hp_index != -1 && hp_index != type &&
+		    swap_info[type]->prio < swap_info[hp_index]->prio &&
+		    (swap_info[hp_index]->flags & SWP_WRITEOK)) {
+			type = hp_index;
+			swap_list.next = type;
+		}
+
 		si = swap_info[type];
 		next = si->next;
 		if (next < 0 ||
@@ -432,22 +455,29 @@ swp_entry_t get_swap_page(void)
 			wrapped++;
 		}
 
-		if (!si->highest_bit)
+		spin_lock(&si->lock);
+		if (!si->highest_bit) {
+			spin_unlock(&si->lock);
 			continue;
-		if (!(si->flags & SWP_WRITEOK))
+		}
+		if (!(si->flags & SWP_WRITEOK)) {
+			spin_unlock(&si->lock);
 			continue;
+		}
 
 		swap_list.next = next;
+
+		spin_unlock(&swap_lock);
 		/* This is called for allocating swap entry for cache */
 		offset = scan_swap_map(si, SWAP_HAS_CACHE);
-		if (offset) {
-			spin_unlock(&swap_lock);
+		spin_unlock(&si->lock);
+		if (offset)
 			return swp_entry(type, offset);
-		}
+		spin_lock(&swap_lock);
 		next = swap_list.next;
 	}
 
-	nr_swap_pages++;
+	atomic_long_inc(&nr_swap_pages);
 noswap:
 	spin_unlock(&swap_lock);
 	return (swp_entry_t) {0};
@@ -459,19 +489,19 @@ swp_entry_t get_swap_page_of_type(int type)
 	struct swap_info_struct *si;
 	pgoff_t offset;
 
-	spin_lock(&swap_lock);
 	si = swap_info[type];
+	spin_lock(&si->lock);
 	if (si && (si->flags & SWP_WRITEOK)) {
-		nr_swap_pages--;
+		atomic_long_dec(&nr_swap_pages);
 		/* This is called for allocating swap entry, not cache */
 		offset = scan_swap_map(si, 1);
 		if (offset) {
-			spin_unlock(&swap_lock);
+			spin_unlock(&si->lock);
 			return swp_entry(type, offset);
 		}
-		nr_swap_pages++;
+		atomic_long_inc(&nr_swap_pages);
 	}
-	spin_unlock(&swap_lock);
+	spin_unlock(&si->lock);
 	return (swp_entry_t) {0};
 }
 
@@ -493,7 +523,7 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry)
 		goto bad_offset;
 	if (!p->swap_map[offset])
 		goto bad_free;
-	spin_lock(&swap_lock);
+	spin_lock(&p->lock);
 	return p;
 
 bad_free:
@@ -511,6 +541,27 @@ out:
 	return NULL;
 }
 
+/*
+ * This swap type frees swap entry, check if it is the highest priority swap
+ * type which just frees swap entry. get_swap_page() uses
+ * highest_priority_index to search highest priority swap type. The
+ * swap_info_struct.lock can't protect us if there are multiple swap types
+ * active, so we use atomic_cmpxchg.
+ */
+static void set_highest_priority_index(int type)
+{
+	int old_hp_index, new_hp_index;
+
+	do {
+		old_hp_index = atomic_read(&highest_priority_index);
+		if (old_hp_index != -1 &&
+			swap_info[old_hp_index]->prio >= swap_info[type]->prio)
+			break;
+		new_hp_index = type;
+	} while (atomic_cmpxchg(&highest_priority_index,
+		old_hp_index, new_hp_index) != old_hp_index);
+}
+
 static unsigned char swap_entry_free(struct swap_info_struct *p,
 				     swp_entry_t entry, unsigned char usage)
 {
@@ -553,10 +604,8 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 			p->lowest_bit = offset;
 		if (offset > p->highest_bit)
 			p->highest_bit = offset;
-		if (swap_list.next >= 0 &&
-		    p->prio > swap_info[swap_list.next]->prio)
-			swap_list.next = p->type;
-		nr_swap_pages++;
+		set_highest_priority_index(p->type);
+		atomic_long_inc(&nr_swap_pages);
 		p->inuse_pages--;
 		frontswap_invalidate_page(p->type, offset);
 		if (p->flags & SWP_BLKDEV) {
@@ -581,7 +630,7 @@ void swap_free(swp_entry_t entry)
 	p = swap_info_get(entry);
 	if (p) {
 		swap_entry_free(p, entry, 1);
-		spin_unlock(&swap_lock);
+		spin_unlock(&p->lock);
 	}
 }
 
@@ -598,7 +647,7 @@ void swapcache_free(swp_entry_t entry, struct page *page)
 		count = swap_entry_free(p, entry, SWAP_HAS_CACHE);
 		if (page)
 			mem_cgroup_uncharge_swapcache(page, entry, count != 0);
-		spin_unlock(&swap_lock);
+		spin_unlock(&p->lock);
 	}
 }
 
@@ -617,7 +666,7 @@ int page_swapcount(struct page *page)
 	p = swap_info_get(entry);
 	if (p) {
 		count = swap_count(p->swap_map[swp_offset(entry)]);
-		spin_unlock(&swap_lock);
+		spin_unlock(&p->lock);
 	}
 	return count;
 }
@@ -706,7 +755,7 @@ int free_swap_and_cache(swp_entry_t entry)
 				page = NULL;
 			}
 		}
-		spin_unlock(&swap_lock);
+		spin_unlock(&p->lock);
 	}
 	if (page) {
 		/*
@@ -804,11 +853,13 @@ unsigned int count_swap_pages(int type, int free)
 	if ((unsigned int)type < nr_swapfiles) {
 		struct swap_info_struct *sis = swap_info[type];
 
+		spin_lock(&sis->lock);
 		if (sis->flags & SWP_WRITEOK) {
 			n = sis->pages;
 			if (free)
 				n -= sis->inuse_pages;
 		}
+		spin_unlock(&sis->lock);
 	}
 	spin_unlock(&swap_lock);
 	return n;
@@ -1457,7 +1508,7 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
 	p->swap_map = swap_map;
 	frontswap_map_set(p, frontswap_map);
 	p->flags |= SWP_WRITEOK;
-	nr_swap_pages += p->pages;
+	atomic_long_add(p->pages, &nr_swap_pages);
 	total_swap_pages += p->pages;
 
 	/* insert swap space into swap_list: */
@@ -1479,15 +1530,19 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
 				unsigned long *frontswap_map)
 {
 	spin_lock(&swap_lock);
+	spin_lock(&p->lock);
 	_enable_swap_info(p, prio, swap_map, frontswap_map);
 	frontswap_init(p->type);
+	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
 }
 
 static void reinsert_swap_info(struct swap_info_struct *p)
 {
 	spin_lock(&swap_lock);
+	spin_lock(&p->lock);
 	_enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
+	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
 }
 
@@ -1547,14 +1602,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		/* just pick something that's safe... */
 		swap_list.next = swap_list.head;
 	}
+	spin_lock(&p->lock);
 	if (p->prio < 0) {
 		for (i = p->next; i >= 0; i = swap_info[i]->next)
 			swap_info[i]->prio = p->prio--;
 		least_priority++;
 	}
-	nr_swap_pages -= p->pages;
+	atomic_long_sub(p->pages, &nr_swap_pages);
 	total_swap_pages -= p->pages;
 	p->flags &= ~SWP_WRITEOK;
+	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
 
 	set_current_oom_origin();
@@ -1573,14 +1630,17 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
+	spin_lock(&p->lock);
 	drain_mmlist();
 
 	/* wait for anyone still in scan_swap_map */
 	p->highest_bit = 0;		/* cuts scans short */
 	while (p->flags >= SWP_SCANNING) {
+		spin_unlock(&p->lock);
 		spin_unlock(&swap_lock);
 		schedule_timeout_uninterruptible(1);
 		spin_lock(&swap_lock);
+		spin_lock(&p->lock);
 	}
 
 	swap_file = p->swap_file;
@@ -1590,6 +1650,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	p->swap_map = NULL;
 	p->flags = 0;
 	frontswap_invalidate_area(type);
+	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	vfree(swap_map);
@@ -1795,6 +1856,7 @@ static struct swap_info_struct *alloc_swap_info(void)
 	p->flags = SWP_USED;
 	p->next = -1;
 	spin_unlock(&swap_lock);
+	spin_lock_init(&p->lock);
 
 	return p;
 }
@@ -2117,7 +2179,7 @@ void si_swapinfo(struct sysinfo *val)
 		if ((si->flags & SWP_USED) && !(si->flags & SWP_WRITEOK))
 			nr_to_be_unused += si->inuse_pages;
 	}
-	val->freeswap = nr_swap_pages + nr_to_be_unused;
+	val->freeswap = atomic_long_read(&nr_swap_pages) + nr_to_be_unused;
 	val->totalswap = total_swap_pages + nr_to_be_unused;
 	spin_unlock(&swap_lock);
 }
@@ -2150,7 +2212,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
 	p = swap_info[type];
 	offset = swp_offset(entry);
 
-	spin_lock(&swap_lock);
+	spin_lock(&p->lock);
 	if (unlikely(offset >= p->max))
 		goto unlock_out;
 
@@ -2185,7 +2247,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
 	p->swap_map[offset] = count | has_cache;
 
 unlock_out:
-	spin_unlock(&swap_lock);
+	spin_unlock(&p->lock);
 out:
 	return err;
 
@@ -2310,7 +2372,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
 	}
 
 	if (!page) {
-		spin_unlock(&swap_lock);
+		spin_unlock(&si->lock);
 		return -ENOMEM;
 	}
 
@@ -2358,7 +2420,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
 	list_add_tail(&page->lru, &head->lru);
 	page = NULL;			/* now it's attached, don't free it */
 out:
-	spin_unlock(&swap_lock);
+	spin_unlock(&si->lock);
 outer:
 	if (page)
 		__free_page(page);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a68fa20269d9..b7d8015a6d54 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1684,7 +1684,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 		force_scan = true;
 
 	/* If we have no swap space, do not bother scanning anon pages. */
-	if (!sc->may_swap || (nr_swap_pages <= 0)) {
+	if (!sc->may_swap || (get_nr_swap_pages() <= 0)) {
 		scan_balance = SCAN_FILE;
 		goto out;
 	}
@@ -1933,7 +1933,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
 	 */
 	pages_for_compaction = (2UL << sc->order);
 	inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE);
-	if (nr_swap_pages > 0)
+	if (get_nr_swap_pages() > 0)
 		inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON);
 	if (sc->nr_reclaimed < pages_for_compaction &&
 			inactive_lru_pages > pages_for_compaction)
@@ -3085,7 +3085,7 @@ unsigned long global_reclaimable_pages(void)
 	nr = global_page_state(NR_ACTIVE_FILE) +
 	     global_page_state(NR_INACTIVE_FILE);
 
-	if (nr_swap_pages > 0)
+	if (get_nr_swap_pages() > 0)
 		nr += global_page_state(NR_ACTIVE_ANON) +
 		      global_page_state(NR_INACTIVE_ANON);
 
@@ -3099,7 +3099,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
 	nr = zone_page_state(zone, NR_ACTIVE_FILE) +
 	     zone_page_state(zone, NR_INACTIVE_FILE);
 
-	if (nr_swap_pages > 0)
+	if (get_nr_swap_pages() > 0)
 		nr += zone_page_state(zone, NR_ACTIVE_ANON) +
 		      zone_page_state(zone, NR_INACTIVE_ANON);
 
-- 
cgit v1.2.3


From 08b52706d505658eac0962d215ff697f898bbc13 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Fri, 22 Feb 2013 16:34:40 -0800
Subject: mm/rmap: rename anon_vma_unlock() => anon_vma_unlock_write()

The comment in commit 4fc3f1d66b1e ("mm/rmap, migration: Make
rmap_walk_anon() and try_to_unmap_anon() more scalable") says:

| Rename anon_vma_[un]lock() => anon_vma_[un]lock_write(),
| to make it clearer that it's an exclusive write-lock in
| that case - suggested by Rik van Riel.

But that commit renames only anon_vma_lock()

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 2 +-
 include/linux/rmap.h    | 2 +-
 mm/huge_memory.c        | 6 +++---
 mm/mmap.c               | 4 ++--
 mm/mremap.c             | 2 +-
 mm/rmap.c               | 6 +++---
 6 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 1d76f8ca90f0..ee1c244a62a1 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -113,7 +113,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma,
 	do {								\
 		pmd_t *____pmd = (__pmd);				\
 		anon_vma_lock_write(__anon_vma);			\
-		anon_vma_unlock(__anon_vma);				\
+		anon_vma_unlock_write(__anon_vma);			\
 		BUG_ON(pmd_trans_splitting(*____pmd) ||			\
 		       pmd_trans_huge(*____pmd));			\
 	} while (0)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c20635c527a9..6dacb93a6d94 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -123,7 +123,7 @@ static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
 	down_write(&anon_vma->root->rwsem);
 }
 
-static inline void anon_vma_unlock(struct anon_vma *anon_vma)
+static inline void anon_vma_unlock_write(struct anon_vma *anon_vma)
 {
 	up_write(&anon_vma->root->rwsem);
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f40b2ce23d60..b1cc6591ed83 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1830,7 +1830,7 @@ int split_huge_page(struct page *page)
 
 	BUG_ON(PageCompound(page));
 out_unlock:
-	anon_vma_unlock(anon_vma);
+	anon_vma_unlock_write(anon_vma);
 	put_anon_vma(anon_vma);
 out:
 	return ret;
@@ -2322,7 +2322,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 		BUG_ON(!pmd_none(*pmd));
 		set_pmd_at(mm, address, pmd, _pmd);
 		spin_unlock(&mm->page_table_lock);
-		anon_vma_unlock(vma->anon_vma);
+		anon_vma_unlock_write(vma->anon_vma);
 		goto out;
 	}
 
@@ -2330,7 +2330,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	 * All pages are isolated and locked so anon_vma rmap
 	 * can't run anymore.
 	 */
-	anon_vma_unlock(vma->anon_vma);
+	anon_vma_unlock_write(vma->anon_vma);
 
 	__collapse_huge_page_copy(pte, new_page, vma, address, ptl);
 	pte_unmap(pte);
diff --git a/mm/mmap.c b/mm/mmap.c
index 28416f6b8dd5..318e121affda 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -809,7 +809,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		anon_vma_interval_tree_post_update_vma(vma);
 		if (adjust_next)
 			anon_vma_interval_tree_post_update_vma(next);
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_write(anon_vma);
 	}
 	if (mapping)
 		mutex_unlock(&mapping->i_mmap_mutex);
@@ -3017,7 +3017,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
 		if (!__test_and_clear_bit(0, (unsigned long *)
 					  &anon_vma->root->rb_root.rb_node))
 			BUG();
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_write(anon_vma);
 	}
 }
 
diff --git a/mm/mremap.c b/mm/mremap.c
index ebe27a8efa62..463a25705ac6 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -135,7 +135,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	pte_unmap(new_pte - 1);
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (anon_vma)
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_write(anon_vma);
 	if (mapping)
 		mutex_unlock(&mapping->i_mmap_mutex);
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 3d38edffda41..807c96bf0dc6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -105,7 +105,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)
 	 */
 	if (rwsem_is_locked(&anon_vma->root->rwsem)) {
 		anon_vma_lock_write(anon_vma);
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_write(anon_vma);
 	}
 
 	kmem_cache_free(anon_vma_cachep, anon_vma);
@@ -191,7 +191,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 			avc = NULL;
 		}
 		spin_unlock(&mm->page_table_lock);
-		anon_vma_unlock(anon_vma);
+		anon_vma_unlock_write(anon_vma);
 
 		if (unlikely(allocated))
 			put_anon_vma(allocated);
@@ -308,7 +308,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	vma->anon_vma = anon_vma;
 	anon_vma_lock_write(anon_vma);
 	anon_vma_chain_link(vma, avc, anon_vma);
-	anon_vma_unlock(anon_vma);
+	anon_vma_unlock_write(anon_vma);
 
 	return 0;
 
-- 
cgit v1.2.3


From 75f7ad8e043d9383337d917584297f7737154bbf Mon Sep 17 00:00:00 2001
From: Paul Szabo <psz@maths.usyd.edu.au>
Date: Fri, 22 Feb 2013 16:34:42 -0800
Subject: page-writeback.c: subtract min_free_kbytes from dirtyable memory

When calculating amount of dirtyable memory, min_free_kbytes should be
subtracted because it is not intended for dirty pages.

Addresses http://bugs.debian.org/695182

[akpm@linux-foundation.org: fix up min_free_kbytes extern declarations]
[akpm@linux-foundation.org: fix min() warning]
Signed-off-by: Paul Szabo <psz@maths.usyd.edu.au>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  | 3 +++
 kernel/sysctl.c     | 1 -
 mm/huge_memory.c    | 1 -
 mm/page-writeback.c | 3 +++
 4 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1d4122bf6f27..437da0ce78c7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1393,6 +1393,9 @@ extern void setup_per_cpu_pageset(void);
 extern void zone_pcp_update(struct zone *zone);
 extern void zone_pcp_reset(struct zone *zone);
 
+/* page_alloc.c */
+extern int min_free_kbytes;
+
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 467d8b923fcd..95e9e55602a8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -105,7 +105,6 @@ extern char core_pattern[];
 extern unsigned int core_pipe_limit;
 #endif
 extern int pid_max;
-extern int min_free_kbytes;
 extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b1cc6591ed83..c63a21d0e991 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -105,7 +105,6 @@ static int set_recommended_min_free_kbytes(void)
 	struct zone *zone;
 	int nr_zones = 0;
 	unsigned long recommended_min;
-	extern int min_free_kbytes;
 
 	if (!khugepaged_enabled())
 		return 0;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7300c9d5e1d9..cdc377c456c0 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -241,6 +241,9 @@ static unsigned long global_dirtyable_memory(void)
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
 
+	/* Subtract min_free_kbytes */
+	x -= min_t(unsigned long, x, min_free_kbytes >> (PAGE_SHIFT - 10));
+
 	return x + 1;	/* Ensure that we never return 0 */
 }
 
-- 
cgit v1.2.3


From 4468b8f1e2d32ce79ef4bcb8e00d7e88627f1c3a Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 22 Feb 2013 16:34:46 -0800
Subject: mm: uninline page_xchg_last_nid()

Andrew Morton pointed out that page_xchg_last_nid() and
reset_page_last_nid() were "getting nuttily large" and asked that it be
investigated.

reset_page_last_nid() is on the page free path and it would be
unfortunate to make that path more expensive than it needs to be.  Due
to the internal use of page_xchg_last_nid() it is already too expensive
but fortunately, it should also be impossible for the page->flags to be
updated in parallel when we call reset_page_last_nid().  Instead of
unlining the function, it uses a simplier implementation that assumes no
parallel updates and should now be sufficiently short for inlining.

page_xchg_last_nid() is called in paths that are already quite expensive
(splitting huge page, fault handling, migration) and it is reasonable to
uninline.  There was not really a good place to place the function but
mm/mmzone.c was the closest fit IMO.

This patch saved 128 bytes of text in the vmlinux file for the kernel
configuration I used for testing automatic NUMA balancing.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 21 +++++----------------
 mm/mmzone.c        | 20 +++++++++++++++++++-
 2 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 437da0ce78c7..8a5bbe3b9e56 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -677,25 +677,14 @@ static inline int page_last_nid(struct page *page)
 	return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK;
 }
 
-static inline int page_xchg_last_nid(struct page *page, int nid)
-{
-	unsigned long old_flags, flags;
-	int last_nid;
-
-	do {
-		old_flags = flags = page->flags;
-		last_nid = page_last_nid(page);
-
-		flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
-		flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
-	} while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags));
-
-	return last_nid;
-}
+extern int page_xchg_last_nid(struct page *page, int nid);
 
 static inline void reset_page_last_nid(struct page *page)
 {
-	page_xchg_last_nid(page, (1 << LAST_NID_SHIFT) - 1);
+	int nid = (1 << LAST_NID_SHIFT) - 1;
+
+	page->flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
+	page->flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
 }
 #endif /* LAST_NID_NOT_IN_PAGE_FLAGS */
 #else
diff --git a/mm/mmzone.c b/mm/mmzone.c
index 4596d81b89b1..bce796e8487f 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -1,7 +1,7 @@
 /*
  * linux/mm/mmzone.c
  *
- * management codes for pgdats and zones.
+ * management codes for pgdats, zones and page flags
  */
 
 
@@ -96,3 +96,21 @@ void lruvec_init(struct lruvec *lruvec)
 	for_each_lru(lru)
 		INIT_LIST_HEAD(&lruvec->lists[lru]);
 }
+
+#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_NID_NOT_IN_PAGE_FLAGS)
+int page_xchg_last_nid(struct page *page, int nid)
+{
+	unsigned long old_flags, flags;
+	int last_nid;
+
+	do {
+		old_flags = flags = page->flags;
+		last_nid = page_last_nid(page);
+
+		flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
+		flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
+	} while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags));
+
+	return last_nid;
+}
+#endif
-- 
cgit v1.2.3


From 22b751c3d0376e86a377e3a0aa2ddbbe9d2eefc1 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Fri, 22 Feb 2013 16:34:59 -0800
Subject: mm: rename page struct field helpers

The function names page_xchg_last_nid(), page_last_nid() and
reset_page_last_nid() were judged to be inconsistent so rename them to a
struct_field_op style pattern.  As it looked jarring to have
reset_page_mapcount() and page_nid_reset_last() beside each other in
memmap_init_zone(), this patch also renames reset_page_mapcount() to
page_mapcount_reset().  There are others like init_page_count() but as
it is used throughout the arch code a rename would likely cause more
conflicts than it is worth.

[akpm@linux-foundation.org: fix zcache]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/staging/zcache/zbud.c            |  2 +-
 drivers/staging/zsmalloc/zsmalloc-main.c |  2 +-
 include/linux/mm.h                       | 20 ++++++++++----------
 mm/huge_memory.c                         |  2 +-
 mm/mempolicy.c                           |  2 +-
 mm/migrate.c                             |  4 ++--
 mm/mmzone.c                              |  4 ++--
 mm/page_alloc.c                          | 10 +++++-----
 mm/slob.c                                |  2 +-
 mm/slub.c                                |  2 +-
 10 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/zcache/zbud.c b/drivers/staging/zcache/zbud.c
index 328c397ea5dc..fdff5c6a0239 100644
--- a/drivers/staging/zcache/zbud.c
+++ b/drivers/staging/zcache/zbud.c
@@ -404,7 +404,7 @@ static inline struct page *zbud_unuse_zbudpage(struct zbudpage *zbudpage,
 	else
 		zbud_pers_pageframes--;
 	zbudpage_spin_unlock(zbudpage);
-	reset_page_mapcount(page);
+	page_mapcount_reset(page);
 	init_page_count(page);
 	page->index = 0;
 	return page;
diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c
index 06f73a93a44d..e78d262c5249 100644
--- a/drivers/staging/zsmalloc/zsmalloc-main.c
+++ b/drivers/staging/zsmalloc/zsmalloc-main.c
@@ -472,7 +472,7 @@ static void reset_page(struct page *page)
 	set_page_private(page, 0);
 	page->mapping = NULL;
 	page->freelist = NULL;
-	reset_page_mapcount(page);
+	page_mapcount_reset(page);
 }
 
 static void free_zspage(struct page *first_page)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8a5bbe3b9e56..fe039bdba4ed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -367,7 +367,7 @@ static inline struct page *compound_head(struct page *page)
  * both from it and to it can be tracked, using atomic_inc_and_test
  * and atomic_add_negative(-1).
  */
-static inline void reset_page_mapcount(struct page *page)
+static inline void page_mapcount_reset(struct page *page)
 {
 	atomic_set(&(page)->_mapcount, -1);
 }
@@ -658,28 +658,28 @@ static inline int page_to_nid(const struct page *page)
 
 #ifdef CONFIG_NUMA_BALANCING
 #ifdef LAST_NID_NOT_IN_PAGE_FLAGS
-static inline int page_xchg_last_nid(struct page *page, int nid)
+static inline int page_nid_xchg_last(struct page *page, int nid)
 {
 	return xchg(&page->_last_nid, nid);
 }
 
-static inline int page_last_nid(struct page *page)
+static inline int page_nid_last(struct page *page)
 {
 	return page->_last_nid;
 }
-static inline void reset_page_last_nid(struct page *page)
+static inline void page_nid_reset_last(struct page *page)
 {
 	page->_last_nid = -1;
 }
 #else
-static inline int page_last_nid(struct page *page)
+static inline int page_nid_last(struct page *page)
 {
 	return (page->flags >> LAST_NID_PGSHIFT) & LAST_NID_MASK;
 }
 
-extern int page_xchg_last_nid(struct page *page, int nid);
+extern int page_nid_xchg_last(struct page *page, int nid);
 
-static inline void reset_page_last_nid(struct page *page)
+static inline void page_nid_reset_last(struct page *page)
 {
 	int nid = (1 << LAST_NID_SHIFT) - 1;
 
@@ -688,17 +688,17 @@ static inline void reset_page_last_nid(struct page *page)
 }
 #endif /* LAST_NID_NOT_IN_PAGE_FLAGS */
 #else
-static inline int page_xchg_last_nid(struct page *page, int nid)
+static inline int page_nid_xchg_last(struct page *page, int nid)
 {
 	return page_to_nid(page);
 }
 
-static inline int page_last_nid(struct page *page)
+static inline int page_nid_last(struct page *page)
 {
 	return page_to_nid(page);
 }
 
-static inline void reset_page_last_nid(struct page *page)
+static inline void page_nid_reset_last(struct page *page)
 {
 }
 #endif
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c63a21d0e991..6049376c7226 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1639,7 +1639,7 @@ static void __split_huge_page_refcount(struct page *page)
 		page_tail->mapping = page->mapping;
 
 		page_tail->index = page->index + i;
-		page_xchg_last_nid(page_tail, page_last_nid(page));
+		page_nid_xchg_last(page_tail, page_nid_last(page));
 
 		BUG_ON(!PageAnon(page_tail));
 		BUG_ON(!PageUptodate(page_tail));
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 6f7979c566d9..2ae78e255e08 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2316,7 +2316,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 		 * it less likely we act on an unlikely task<->page
 		 * relation.
 		 */
-		last_nid = page_xchg_last_nid(page, polnid);
+		last_nid = page_nid_xchg_last(page, polnid);
 		if (last_nid != polnid)
 			goto out;
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index f560071e89c5..de5c371a7969 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1497,7 +1497,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 					  __GFP_NOWARN) &
 					 ~GFP_IOFS, 0);
 	if (newpage)
-		page_xchg_last_nid(newpage, page_last_nid(page));
+		page_nid_xchg_last(newpage, page_nid_last(page));
 
 	return newpage;
 }
@@ -1681,7 +1681,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	if (!new_page)
 		goto out_fail;
 
-	page_xchg_last_nid(new_page, page_last_nid(page));
+	page_nid_xchg_last(new_page, page_nid_last(page));
 
 	isolated = numamigrate_isolate_page(pgdat, page);
 	if (!isolated) {
diff --git a/mm/mmzone.c b/mm/mmzone.c
index bce796e8487f..2ac0afbd68f3 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -98,14 +98,14 @@ void lruvec_init(struct lruvec *lruvec)
 }
 
 #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_NID_NOT_IN_PAGE_FLAGS)
-int page_xchg_last_nid(struct page *page, int nid)
+int page_nid_xchg_last(struct page *page, int nid)
 {
 	unsigned long old_flags, flags;
 	int last_nid;
 
 	do {
 		old_flags = flags = page->flags;
-		last_nid = page_last_nid(page);
+		last_nid = page_nid_last(page);
 
 		flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT);
 		flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3ede25e6686e..445718b328b6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -295,7 +295,7 @@ static void bad_page(struct page *page)
 
 	/* Don't complain about poisoned pages */
 	if (PageHWPoison(page)) {
-		reset_page_mapcount(page); /* remove PageBuddy */
+		page_mapcount_reset(page); /* remove PageBuddy */
 		return;
 	}
 
@@ -327,7 +327,7 @@ static void bad_page(struct page *page)
 	dump_stack();
 out:
 	/* Leave bad fields for debug, except PageBuddy could make trouble */
-	reset_page_mapcount(page); /* remove PageBuddy */
+	page_mapcount_reset(page); /* remove PageBuddy */
 	add_taint(TAINT_BAD_PAGE);
 }
 
@@ -613,7 +613,7 @@ static inline int free_pages_check(struct page *page)
 		bad_page(page);
 		return 1;
 	}
-	reset_page_last_nid(page);
+	page_nid_reset_last(page);
 	if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
 		page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
 	return 0;
@@ -3894,8 +3894,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		set_page_links(page, zone, nid, pfn);
 		mminit_verify_page_links(page, zone, nid, pfn);
 		init_page_count(page);
-		reset_page_mapcount(page);
-		reset_page_last_nid(page);
+		page_mapcount_reset(page);
+		page_nid_reset_last(page);
 		SetPageReserved(page);
 		/*
 		 * Mark the block movable so that blocks are reserved for
diff --git a/mm/slob.c b/mm/slob.c
index a99fdf7a0907..eeed4a05a2ef 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -360,7 +360,7 @@ static void slob_free(void *block, int size)
 			clear_slob_page_free(sp);
 		spin_unlock_irqrestore(&slob_lock, flags);
 		__ClearPageSlab(sp);
-		reset_page_mapcount(sp);
+		page_mapcount_reset(sp);
 		slob_free_pages(b, 0);
 		return;
 	}
diff --git a/mm/slub.c b/mm/slub.c
index ba2ca53f6c3a..ebcc44eb43b9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1408,7 +1408,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	__ClearPageSlab(page);
 
 	memcg_release_pages(s, order);
-	reset_page_mapcount(page);
+	page_mapcount_reset(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
 	__free_memcg_kmem_pages(page, order);
-- 
cgit v1.2.3


From cbf86cfe04a66471f23b9e62e5eba4e525f38855 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 22 Feb 2013 16:35:08 -0800
Subject: ksm: remove old stable nodes more thoroughly

Switching merge_across_nodes after running KSM is liable to oops on stale
nodes still left over from the previous stable tree.  It's not something
that people will often want to do, but it would be lame to demand a reboot
when they're trying to determine which merge_across_nodes setting is best.

How can this happen?  We only permit switching merge_across_nodes when
pages_shared is 0, and usually set run 2 to force that beforehand, which
ought to unmerge everything: yet oopses still occur when you then run 1.

Three causes:

1. The old stable tree (built according to the inverse
   merge_across_nodes) has not been fully torn down.  A stable node
   lingers until get_ksm_page() notices that the page it references no
   longer references it: but the page is not necessarily freed as soon as
   expected, particularly when swapcache.

   Fix this with a pass through the old stable tree, applying
   get_ksm_page() to each of the remaining nodes (most found stale and
   removed immediately), with forced removal of any left over.  Unless the
   page is still mapped: I've not seen that case, it shouldn't occur, but
   better to WARN_ON_ONCE and EBUSY than BUG.

2. __ksm_enter() has a nice little optimization, to insert the new mm
   just behind ksmd's cursor, so there's a full pass for it to stabilize
   (or be removed) before ksmd addresses it.  Nice when ksmd is running,
   but not so nice when we're trying to unmerge all mms: we were missing
   those mms forked and inserted behind the unmerge cursor.  Easily fixed
   by inserting at the end when KSM_RUN_UNMERGE.

3.  It is possible for a KSM page to be faulted back from swapcache
   into an mm, just after unmerge_and_remove_all_rmap_items() scanned past
   it.  Fix this by copying on fault when KSM_RUN_UNMERGE: but that is
   private to ksm.c, so dissolve the distinction between
   ksm_might_need_to_copy() and ksm_does_need_to_copy(), doing it all in
   the one call into ksm.c.

A long outstanding, unrelated bugfix sneaks in with that third fix:
ksm_does_need_to_copy() would copy from a !PageUptodate page (implying I/O
error when read in from swap) to a page which it then marks Uptodate.  Fix
this case by not copying, letting do_swap_page() discover the error.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Petr Holasek <pholasek@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Izik Eidus <izik.eidus@ravellosystems.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 18 +++---------
 mm/ksm.c            | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 mm/memory.c         | 19 ++++++------
 3 files changed, 92 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 3319a6967626..45c9b6a17bcb 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -16,9 +16,6 @@
 struct stable_node;
 struct mem_cgroup;
 
-struct page *ksm_does_need_to_copy(struct page *page,
-			struct vm_area_struct *vma, unsigned long address);
-
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
@@ -73,15 +70,8 @@ static inline void set_page_stable_node(struct page *page,
  * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
  * but what if the vma was unmerged while the page was swapped out?
  */
-static inline int ksm_might_need_to_copy(struct page *page,
-			struct vm_area_struct *vma, unsigned long address)
-{
-	struct anon_vma *anon_vma = page_anon_vma(page);
-
-	return anon_vma &&
-		(anon_vma->root != vma->anon_vma->root ||
-		 page->index != linear_page_index(vma, address));
-}
+struct page *ksm_might_need_to_copy(struct page *page,
+			struct vm_area_struct *vma, unsigned long address);
 
 int page_referenced_ksm(struct page *page,
 			struct mem_cgroup *memcg, unsigned long *vm_flags);
@@ -113,10 +103,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 	return 0;
 }
 
-static inline int ksm_might_need_to_copy(struct page *page,
+static inline struct page *ksm_might_need_to_copy(struct page *page,
 			struct vm_area_struct *vma, unsigned long address)
 {
-	return 0;
+	return page;
 }
 
 static inline int page_referenced_ksm(struct page *page,
diff --git a/mm/ksm.c b/mm/ksm.c
index e02430fb26f6..4c22cdff02ad 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -644,6 +644,57 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
 /*
  * Only called through the sysfs control interface:
  */
+static int remove_stable_node(struct stable_node *stable_node)
+{
+	struct page *page;
+	int err;
+
+	page = get_ksm_page(stable_node, true);
+	if (!page) {
+		/*
+		 * get_ksm_page did remove_node_from_stable_tree itself.
+		 */
+		return 0;
+	}
+
+	if (WARN_ON_ONCE(page_mapped(page)))
+		err = -EBUSY;
+	else {
+		/*
+		 * This page might be in a pagevec waiting to be freed,
+		 * or it might be PageSwapCache (perhaps under writeback),
+		 * or it might have been removed from swapcache a moment ago.
+		 */
+		set_page_stable_node(page, NULL);
+		remove_node_from_stable_tree(stable_node);
+		err = 0;
+	}
+
+	unlock_page(page);
+	put_page(page);
+	return err;
+}
+
+static int remove_all_stable_nodes(void)
+{
+	struct stable_node *stable_node;
+	int nid;
+	int err = 0;
+
+	for (nid = 0; nid < nr_node_ids; nid++) {
+		while (root_stable_tree[nid].rb_node) {
+			stable_node = rb_entry(root_stable_tree[nid].rb_node,
+						struct stable_node, node);
+			if (remove_stable_node(stable_node)) {
+				err = -EBUSY;
+				break;	/* proceed to next nid */
+			}
+			cond_resched();
+		}
+	}
+	return err;
+}
+
 static int unmerge_and_remove_all_rmap_items(void)
 {
 	struct mm_slot *mm_slot;
@@ -691,6 +742,8 @@ static int unmerge_and_remove_all_rmap_items(void)
 		}
 	}
 
+	/* Clean up stable nodes, but don't worry if some are still busy */
+	remove_all_stable_nodes();
 	ksm_scan.seqnr = 0;
 	return 0;
 
@@ -1586,11 +1639,19 @@ int __ksm_enter(struct mm_struct *mm)
 	spin_lock(&ksm_mmlist_lock);
 	insert_to_mm_slots_hash(mm, mm_slot);
 	/*
-	 * Insert just behind the scanning cursor, to let the area settle
+	 * When KSM_RUN_MERGE (or KSM_RUN_STOP),
+	 * insert just behind the scanning cursor, to let the area settle
 	 * down a little; when fork is followed by immediate exec, we don't
 	 * want ksmd to waste time setting up and tearing down an rmap_list.
+	 *
+	 * But when KSM_RUN_UNMERGE, it's important to insert ahead of its
+	 * scanning cursor, otherwise KSM pages in newly forked mms will be
+	 * missed: then we might as well insert at the end of the list.
 	 */
-	list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
+	if (ksm_run & KSM_RUN_UNMERGE)
+		list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
+	else
+		list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
@@ -1640,11 +1701,25 @@ void __ksm_exit(struct mm_struct *mm)
 	}
 }
 
-struct page *ksm_does_need_to_copy(struct page *page,
+struct page *ksm_might_need_to_copy(struct page *page,
 			struct vm_area_struct *vma, unsigned long address)
 {
+	struct anon_vma *anon_vma = page_anon_vma(page);
 	struct page *new_page;
 
+	if (PageKsm(page)) {
+		if (page_stable_node(page) &&
+		    !(ksm_run & KSM_RUN_UNMERGE))
+			return page;	/* no need to copy it */
+	} else if (!anon_vma) {
+		return page;		/* no need to copy it */
+	} else if (anon_vma->root == vma->anon_vma->root &&
+		 page->index == linear_page_index(vma, address)) {
+		return page;		/* still no need to copy it */
+	}
+	if (!PageUptodate(page))
+		return page;		/* let do_swap_page report the error */
+
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (new_page) {
 		copy_user_highpage(new_page, page, address, vma);
@@ -2024,7 +2099,7 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
 
 	mutex_lock(&ksm_thread_mutex);
 	if (ksm_merge_across_nodes != knob) {
-		if (ksm_pages_shared)
+		if (ksm_pages_shared || remove_all_stable_nodes())
 			err = -EBUSY;
 		else
 			ksm_merge_across_nodes = knob;
diff --git a/mm/memory.c b/mm/memory.c
index 054250ee4a68..7bd22a621817 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2994,17 +2994,16 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
 		goto out_page;
 
-	if (ksm_might_need_to_copy(page, vma, address)) {
-		swapcache = page;
-		page = ksm_does_need_to_copy(page, vma, address);
-
-		if (unlikely(!page)) {
-			ret = VM_FAULT_OOM;
-			page = swapcache;
-			swapcache = NULL;
-			goto out_page;
-		}
+	swapcache = page;
+	page = ksm_might_need_to_copy(page, vma, address);
+	if (unlikely(!page)) {
+		ret = VM_FAULT_OOM;
+		page = swapcache;
+		swapcache = NULL;
+		goto out_page;
 	}
+	if (page == swapcache)
+		swapcache = NULL;
 
 	if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
 		ret = VM_FAULT_OOM;
-- 
cgit v1.2.3


From 9c620e2bc5aa4256c102ada34e6c76204ed5898b Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 22 Feb 2013 16:35:14 -0800
Subject: mm: remove offlining arg to migrate_pages

No functional change, but the only purpose of the offlining argument to
migrate_pages() etc, was to ensure that __unmap_and_move() could migrate a
KSM page for memory hotremove (which took ksm_thread_mutex) but not for
other callers.  Now all cases are safe, remove the arg.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Petr Holasek <pholasek@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Izik Eidus <izik.eidus@ravellosystems.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 14 ++++++--------
 mm/compaction.c         |  2 +-
 mm/memory-failure.c     |  7 +++----
 mm/memory_hotplug.c     |  3 +--
 mm/mempolicy.c          |  8 +++-----
 mm/migrate.c            | 35 +++++++++++++----------------------
 mm/page_alloc.c         |  6 ++----
 7 files changed, 29 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 1e9f627967a3..a405d3dc0f61 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -40,11 +40,9 @@ extern void putback_movable_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
-			unsigned long private, bool offlining,
-			enum migrate_mode mode, int reason);
+		unsigned long private, enum migrate_mode mode, int reason);
 extern int migrate_huge_page(struct page *, new_page_t x,
-			unsigned long private, bool offlining,
-			enum migrate_mode mode);
+		unsigned long private, enum migrate_mode mode);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -62,11 +60,11 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline void putback_movable_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
-		unsigned long private, bool offlining,
-		enum migrate_mode mode, int reason) { return -ENOSYS; }
+		unsigned long private, enum migrate_mode mode, int reason)
+	{ return -ENOSYS; }
 static inline int migrate_huge_page(struct page *page, new_page_t x,
-		unsigned long private, bool offlining,
-		enum migrate_mode mode) { return -ENOSYS; }
+		unsigned long private, enum migrate_mode mode)
+	{ return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/mm/compaction.c b/mm/compaction.c
index ef53f352b606..25e75e3e2ac6 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -980,7 +980,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
-				(unsigned long)cc, false,
+				(unsigned long)cc,
 				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
 				MR_COMPACTION);
 		update_nr_listpages(cc);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 1a56d63adf9c..e4683459ab26 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1465,7 +1465,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
 	unlock_page(hpage);
 
 	/* Keep page count to indicate a given hugepage is isolated. */
-	ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false,
+	ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL,
 				MIGRATE_SYNC);
 	put_page(hpage);
 	if (ret) {
@@ -1597,11 +1597,10 @@ static int __soft_offline_page(struct page *page, int flags)
 	if (!ret) {
 		LIST_HEAD(pagelist);
 		inc_zone_page_state(page, NR_ISOLATED_ANON +
-					    page_is_file_cache(page));
+					page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
-							false, MIGRATE_SYNC,
-							MR_MEMORY_FAILURE);
+					MIGRATE_SYNC, MR_MEMORY_FAILURE);
 		if (ret) {
 			putback_lru_pages(&pagelist);
 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 77a6abfe3291..dda1ca695a08 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1286,8 +1286,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		 * migrate_pages returns # of failed pages.
 		 */
 		ret = migrate_pages(&source, alloc_migrate_target, 0,
-							true, MIGRATE_SYNC,
-							MR_MEMORY_HOTPLUG);
+					MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
 		if (ret)
 			putback_lru_pages(&source);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d344c36db63f..e2661d1c5c33 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1014,8 +1014,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_node_page, dest,
-							false, MIGRATE_SYNC,
-							MR_SYSCALL);
+					MIGRATE_SYNC, MR_SYSCALL);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
@@ -1259,9 +1258,8 @@ static long do_mbind(unsigned long start, unsigned long len,
 		if (!list_empty(&pagelist)) {
 			WARN_ON_ONCE(flags & MPOL_MF_LAZY);
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
-						(unsigned long)vma,
-						false, MIGRATE_SYNC,
-						MR_MEMPOLICY_MBIND);
+					(unsigned long)vma,
+					MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
 			if (nr_failed)
 				putback_lru_pages(&pagelist);
 		}
diff --git a/mm/migrate.c b/mm/migrate.c
index 20a03eb0667f..3bbaf5d230b0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -701,7 +701,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 }
 
 static int __unmap_and_move(struct page *page, struct page *newpage,
-			int force, bool offlining, enum migrate_mode mode)
+				int force, enum migrate_mode mode)
 {
 	int rc = -EAGAIN;
 	int remap_swapcache = 1;
@@ -847,8 +847,7 @@ out:
  * to the newly allocated page in newpage.
  */
 static int unmap_and_move(new_page_t get_new_page, unsigned long private,
-			struct page *page, int force, bool offlining,
-			enum migrate_mode mode)
+			struct page *page, int force, enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -866,7 +865,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		if (unlikely(split_huge_page(page)))
 			goto out;
 
-	rc = __unmap_and_move(page, newpage, force, offlining, mode);
+	rc = __unmap_and_move(page, newpage, force, mode);
 
 	if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
 		/*
@@ -926,8 +925,7 @@ out:
  */
 static int unmap_and_move_huge_page(new_page_t get_new_page,
 				unsigned long private, struct page *hpage,
-				int force, bool offlining,
-				enum migrate_mode mode)
+				int force, enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -989,9 +987,8 @@ out:
  *
  * Return: Number of pages not migrated or error code.
  */
-int migrate_pages(struct list_head *from,
-		new_page_t get_new_page, unsigned long private, bool offlining,
-		enum migrate_mode mode, int reason)
+int migrate_pages(struct list_head *from, new_page_t get_new_page,
+		unsigned long private, enum migrate_mode mode, int reason)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -1012,8 +1009,7 @@ int migrate_pages(struct list_head *from,
 			cond_resched();
 
 			rc = unmap_and_move(get_new_page, private,
-						page, pass > 2, offlining,
-						mode);
+						page, pass > 2, mode);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1046,15 +1042,13 @@ out:
 }
 
 int migrate_huge_page(struct page *hpage, new_page_t get_new_page,
-		      unsigned long private, bool offlining,
-		      enum migrate_mode mode)
+		      unsigned long private, enum migrate_mode mode)
 {
 	int pass, rc;
 
 	for (pass = 0; pass < 10; pass++) {
-		rc = unmap_and_move_huge_page(get_new_page,
-					      private, hpage, pass > 2, offlining,
-					      mode);
+		rc = unmap_and_move_huge_page(get_new_page, private,
+						hpage, pass > 2, mode);
 		switch (rc) {
 		case -ENOMEM:
 			goto out;
@@ -1177,8 +1171,7 @@ set_status:
 	err = 0;
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm, 0, MIGRATE_SYNC,
-				MR_SYSCALL);
+				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
@@ -1613,10 +1606,8 @@ int migrate_misplaced_page(struct page *page, int node)
 		goto out;
 
 	list_add(&page->lru, &migratepages);
-	nr_remaining = migrate_pages(&migratepages,
-			alloc_misplaced_dst_page,
-			node, false, MIGRATE_ASYNC,
-			MR_NUMA_MISPLACED);
+	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
+				     node, MIGRATE_ASYNC, MR_NUMA_MISPLACED);
 	if (nr_remaining) {
 		putback_lru_pages(&migratepages);
 		isolated = 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 445718b328b6..64c83a8c3220 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6084,10 +6084,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 							&cc->migratepages);
 		cc->nr_migratepages -= nr_reclaimed;
 
-		ret = migrate_pages(&cc->migratepages,
-				    alloc_migrate_target,
-				    0, false, MIGRATE_SYNC,
-				    MR_CMA);
+		ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
+				    0, MIGRATE_SYNC, MR_CMA);
 	}
 	if (ret < 0) {
 		putback_movable_pages(&cc->migratepages);
-- 
cgit v1.2.3


From e3790144c9091631a18564aa64db8a971da02c41 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 22 Feb 2013 16:35:19 -0800
Subject: mm: refactor inactive_file_is_low() to use get_lru_size()

An inactive file list is considered low when its active counterpart is
bigger, regardless of whether it is a global zone LRU list or a memcg
zone LRU list.  The only difference is in how the LRU size is assessed.

get_lru_size() does the right thing for both global and memcg reclaim
situations.

Get rid of inactive_file_is_low_global() and
mem_cgroup_inactive_file_is_low() by using get_lru_size() and compare
the numbers in common code.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  7 -------
 mm/memcontrol.c            | 11 -----------
 mm/vmscan.c                | 19 ++++++-------------
 3 files changed, 6 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 28bd5fa2ff2e..d6183f06d8c1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -116,7 +116,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
  * For memory reclaim.
  */
 int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec);
-int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec);
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
 unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list);
 void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int);
@@ -321,12 +320,6 @@ mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 	return 1;
 }
 
-static inline int
-mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
-{
-	return 1;
-}
-
 static inline unsigned long
 mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 60d28e36f0e0..af4b04f4d744 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1396,17 +1396,6 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 	return inactive * inactive_ratio < active;
 }
 
-int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
-{
-	unsigned long active;
-	unsigned long inactive;
-
-	inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_FILE);
-	active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_FILE);
-
-	return (active > inactive);
-}
-
 #define mem_cgroup_from_res_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b7d8015a6d54..396ecee281d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1579,16 +1579,6 @@ static inline int inactive_anon_is_low(struct lruvec *lruvec)
 }
 #endif
 
-static int inactive_file_is_low_global(struct zone *zone)
-{
-	unsigned long active, inactive;
-
-	active = zone_page_state(zone, NR_ACTIVE_FILE);
-	inactive = zone_page_state(zone, NR_INACTIVE_FILE);
-
-	return (active > inactive);
-}
-
 /**
  * inactive_file_is_low - check if file pages need to be deactivated
  * @lruvec: LRU vector to check
@@ -1605,10 +1595,13 @@ static int inactive_file_is_low_global(struct zone *zone)
  */
 static int inactive_file_is_low(struct lruvec *lruvec)
 {
-	if (!mem_cgroup_disabled())
-		return mem_cgroup_inactive_file_is_low(lruvec);
+	unsigned long inactive;
+	unsigned long active;
+
+	inactive = get_lru_size(lruvec, LRU_INACTIVE_FILE);
+	active = get_lru_size(lruvec, LRU_ACTIVE_FILE);
 
-	return inactive_file_is_low_global(lruvec_zone(lruvec));
+	return active > inactive;
 }
 
 static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru)
-- 
cgit v1.2.3


From 9127ab4ff92f0ecd7b4671efa9d0edb21c691e9f Mon Sep 17 00:00:00 2001
From: Cody P Schafer <cody@linux.vnet.ibm.com>
Date: Fri, 22 Feb 2013 16:35:21 -0800
Subject: mm: add SECTION_IN_PAGE_FLAGS

Instead of directly utilizing a combination of config options to determine
this, add a macro to specifically address it.

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: David Hansen <dave@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fe039bdba4ed..97da0302cf51 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -625,6 +625,10 @@ static inline enum zone_type page_zonenum(const struct page *page)
 	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
 
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+#define SECTION_IN_PAGE_FLAGS
+#endif
+
 /*
  * The identification function is only used by the buddy allocator for
  * determining if two pages could be buddies. We are not really
@@ -708,7 +712,7 @@ static inline struct zone *page_zone(const struct page *page)
 	return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
 }
 
-#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+#ifdef SECTION_IN_PAGE_FLAGS
 static inline void set_page_section(struct page *page, unsigned long section)
 {
 	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
@@ -738,7 +742,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
 {
 	set_page_zone(page, zone);
 	set_page_node(page, node);
-#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+#ifdef SECTION_IN_PAGE_FLAGS
 	set_page_section(page, pfn_to_section_nr(pfn));
 #endif
 }
-- 
cgit v1.2.3


From 108bcc96ef7047c02cad4d229f04da38186a3f3f Mon Sep 17 00:00:00 2001
From: Cody P Schafer <cody@linux.vnet.ibm.com>
Date: Fri, 22 Feb 2013 16:35:23 -0800
Subject: mm: add & use zone_end_pfn() and zone_spans_pfn()

Add 2 helpers (zone_end_pfn() and zone_spans_pfn()) to reduce code
duplication.

This also switches to using them in compaction (where an additional
variable needed to be renamed), page_alloc, vmstat, memory_hotplug, and
kmemleak.

Note that in compaction.c I avoid calling zone_end_pfn() repeatedly
because I expect at some point the sycronization issues with start_pfn &
spanned_pages will need fixing, either by actually using the seqlock or
clever memory barrier usage.

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: David Hansen <dave@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 10 ++++++++++
 mm/compaction.c        | 10 +++++-----
 mm/kmemleak.c          |  5 ++---
 mm/memory_hotplug.c    | 10 +++++-----
 mm/page_alloc.c        | 22 +++++++++-------------
 mm/vmstat.c            |  2 +-
 6 files changed, 32 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6c80d0ac14dd..34343f51e211 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -527,6 +527,16 @@ static inline int zone_is_oom_locked(const struct zone *zone)
 	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
 }
 
+static inline unsigned zone_end_pfn(const struct zone *zone)
+{
+	return zone->zone_start_pfn + zone->spanned_pages;
+}
+
+static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
+{
+	return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
+}
+
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
diff --git a/mm/compaction.c b/mm/compaction.c
index 25e75e3e2ac6..05ccb4cc0bdb 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -86,7 +86,7 @@ static inline bool isolation_suitable(struct compact_control *cc,
 static void __reset_isolation_suitable(struct zone *zone)
 {
 	unsigned long start_pfn = zone->zone_start_pfn;
-	unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+	unsigned long end_pfn = zone_end_pfn(zone);
 	unsigned long pfn;
 
 	zone->compact_cached_migrate_pfn = start_pfn;
@@ -647,7 +647,7 @@ static void isolate_freepages(struct zone *zone,
 				struct compact_control *cc)
 {
 	struct page *page;
-	unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn;
+	unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn;
 	int nr_freepages = cc->nr_freepages;
 	struct list_head *freelist = &cc->freepages;
 
@@ -666,7 +666,7 @@ static void isolate_freepages(struct zone *zone,
 	 */
 	high_pfn = min(low_pfn, pfn);
 
-	zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+	z_end_pfn = zone_end_pfn(zone);
 
 	/*
 	 * Isolate free pages until enough are available to migrate the
@@ -709,7 +709,7 @@ static void isolate_freepages(struct zone *zone,
 		 * only scans within a pageblock
 		 */
 		end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
-		end_pfn = min(end_pfn, zone_end_pfn);
+		end_pfn = min(end_pfn, z_end_pfn);
 		isolated = isolate_freepages_block(cc, pfn, end_pfn,
 						   freelist, false);
 		nr_freepages += isolated;
@@ -923,7 +923,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 {
 	int ret;
 	unsigned long start_pfn = zone->zone_start_pfn;
-	unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+	unsigned long end_pfn = zone_end_pfn(zone);
 
 	ret = compaction_suitable(zone, cc->order);
 	switch (ret) {
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 752a705c77c2..83dd5fbf5e60 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1300,9 +1300,8 @@ static void kmemleak_scan(void)
 	 */
 	lock_memory_hotplug();
 	for_each_online_node(i) {
-		pg_data_t *pgdat = NODE_DATA(i);
-		unsigned long start_pfn = pgdat->node_start_pfn;
-		unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
+		unsigned long start_pfn = node_start_pfn(i);
+		unsigned long end_pfn = node_end_pfn(i);
 		unsigned long pfn;
 
 		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index dda1ca695a08..8b3235eedf3d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -299,7 +299,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
 	pgdat_resize_lock(z1->zone_pgdat, &flags);
 
 	/* can't move pfns which are higher than @z2 */
-	if (end_pfn > z2->zone_start_pfn + z2->spanned_pages)
+	if (end_pfn > zone_end_pfn(z2))
 		goto out_fail;
 	/* the move out part mast at the left most of @z2 */
 	if (start_pfn > z2->zone_start_pfn)
@@ -315,7 +315,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
 		z1_start_pfn = start_pfn;
 
 	resize_zone(z1, z1_start_pfn, end_pfn);
-	resize_zone(z2, end_pfn, z2->zone_start_pfn + z2->spanned_pages);
+	resize_zone(z2, end_pfn, zone_end_pfn(z2));
 
 	pgdat_resize_unlock(z1->zone_pgdat, &flags);
 
@@ -347,15 +347,15 @@ static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2,
 	if (z1->zone_start_pfn > start_pfn)
 		goto out_fail;
 	/* the move out part mast at the right most of @z1 */
-	if (z1->zone_start_pfn + z1->spanned_pages >  end_pfn)
+	if (zone_end_pfn(z1) >  end_pfn)
 		goto out_fail;
 	/* must included/overlap */
-	if (start_pfn >= z1->zone_start_pfn + z1->spanned_pages)
+	if (start_pfn >= zone_end_pfn(z1))
 		goto out_fail;
 
 	/* use end_pfn for z2's end_pfn if z2 is empty */
 	if (z2->spanned_pages)
-		z2_end_pfn = z2->zone_start_pfn + z2->spanned_pages;
+		z2_end_pfn = zone_end_pfn(z2);
 	else
 		z2_end_pfn = end_pfn;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 64c83a8c3220..a3687afc5917 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -250,9 +250,7 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 
 	do {
 		seq = zone_span_seqbegin(zone);
-		if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
-			ret = 1;
-		else if (pfn < zone->zone_start_pfn)
+		if (!zone_spans_pfn(zone, pfn))
 			ret = 1;
 	} while (zone_span_seqretry(zone, seq));
 
@@ -990,9 +988,9 @@ int move_freepages_block(struct zone *zone, struct page *page,
 	end_pfn = start_pfn + pageblock_nr_pages - 1;
 
 	/* Do not cross zone boundaries */
-	if (start_pfn < zone->zone_start_pfn)
+	if (!zone_spans_pfn(zone, start_pfn))
 		start_page = page;
-	if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
+	if (!zone_spans_pfn(zone, end_pfn))
 		return 0;
 
 	return move_freepages(zone, start_page, end_page, migratetype);
@@ -1286,7 +1284,7 @@ void mark_free_pages(struct zone *zone)
 
 	spin_lock_irqsave(&zone->lock, flags);
 
-	max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+	max_zone_pfn = zone_end_pfn(zone);
 	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
 		if (pfn_valid(pfn)) {
 			struct page *page = pfn_to_page(pfn);
@@ -3798,7 +3796,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
 	 * the block.
 	 */
 	start_pfn = zone->zone_start_pfn;
-	end_pfn = start_pfn + zone->spanned_pages;
+	end_pfn = zone_end_pfn(zone);
 	start_pfn = roundup(start_pfn, pageblock_nr_pages);
 	reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
 							pageblock_order;
@@ -3912,7 +3910,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		 * pfn out of zone.
 		 */
 		if ((z->zone_start_pfn <= pfn)
-		    && (pfn < z->zone_start_pfn + z->spanned_pages)
+		    && (pfn < zone_end_pfn(z))
 		    && !(pfn & (pageblock_nr_pages - 1)))
 			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
 
@@ -4713,7 +4711,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
 		 * for the buddy allocator to function correctly.
 		 */
 		start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
-		end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+		end = pgdat_end_pfn(pgdat);
 		end = ALIGN(end, MAX_ORDER_NR_PAGES);
 		size =  (end - start) * sizeof(struct page);
 		map = alloc_remap(pgdat->node_id, size);
@@ -5928,8 +5926,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
 	pfn = page_to_pfn(page);
 	bitmap = get_pageblock_bitmap(zone, pfn);
 	bitidx = pfn_to_bitidx(zone, pfn);
-	VM_BUG_ON(pfn < zone->zone_start_pfn);
-	VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages);
+	VM_BUG_ON(!zone_spans_pfn(zone, pfn));
 
 	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
 		if (flags & value)
@@ -6027,8 +6024,7 @@ bool is_pageblock_removable_nolock(struct page *page)
 
 	zone = page_zone(page);
 	pfn = page_to_pfn(page);
-	if (zone->zone_start_pfn > pfn ||
-			zone->zone_start_pfn + zone->spanned_pages <= pfn)
+	if (!zone_spans_pfn(zone, pfn))
 		return false;
 
 	return !has_unmovable_pages(zone, page, 0, true);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 57f02fd1768b..e1d8ed172c42 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -891,7 +891,7 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
 	int mtype;
 	unsigned long pfn;
 	unsigned long start_pfn = zone->zone_start_pfn;
-	unsigned long end_pfn = start_pfn + zone->spanned_pages;
+	unsigned long end_pfn = zone_end_pfn(zone);
 	unsigned long count[MIGRATE_TYPES] = { 0, };
 
 	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
-- 
cgit v1.2.3


From 2a6e3ebee2edcade56f836390a5f0c7b76ff5f9e Mon Sep 17 00:00:00 2001
From: Cody P Schafer <cody@linux.vnet.ibm.com>
Date: Fri, 22 Feb 2013 16:35:24 -0800
Subject: mm: add zone_is_empty() and zone_is_initialized()

Factoring out these 2 checks makes it more clear what we are actually
checking for.

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: David Hansen <dave@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 34343f51e211..cf8925962b68 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -537,6 +537,16 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
 	return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
 }
 
+static inline bool zone_is_initialized(struct zone *zone)
+{
+	return !!zone->wait_table;
+}
+
+static inline bool zone_is_empty(struct zone *zone)
+{
+	return zone->spanned_pages == 0;
+}
+
 /*
  * The "priority" of VM scanning is how much of the queues we will scan in one
  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
-- 
cgit v1.2.3


From da3649e133948d8b7d8c57b05a33faf62ac2cc7e Mon Sep 17 00:00:00 2001
From: Cody P Schafer <cody@linux.vnet.ibm.com>
Date: Fri, 22 Feb 2013 16:35:27 -0800
Subject: mmzone: add pgdat_{end_pfn,is_empty}() helpers & consolidate.

Add pgdat_end_pfn() and pgdat_is_empty() helpers which match the similar
zone_*() functions.

Change node_end_pfn() to be a wrapper of pgdat_end_pfn().

Signed-off-by: Cody P Schafer <cody@linux.vnet.ibm.com>
Cc: David Hansen <dave@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index cf8925962b68..ede274957e05 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -756,11 +756,17 @@ typedef struct pglist_data {
 #define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
 
-#define node_end_pfn(nid) ({\
-	pg_data_t *__pgdat = NODE_DATA(nid);\
-	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;\
-})
+static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
+{
+	return pgdat->node_start_pfn + pgdat->node_spanned_pages;
+}
+
+static inline bool pgdat_is_empty(pg_data_t *pgdat)
+{
+	return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
+}
 
 #include <linux/memory_hotplug.h>
 
-- 
cgit v1.2.3


From ebec3862fd6eefe8301aa55ed2e30c685d831842 Mon Sep 17 00:00:00 2001
From: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:35:43 -0800
Subject: mm: fix return type for functions nr_free_*_pages

Currently, the amount of RAM that functions nr_free_*_pages return is
held in unsigned int.  But in machines with big memory (exceeding 16TB),
the amount may be incorrect because of overflow, so fix it.

Signed-off-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: David Miller <davem@davemloft.net>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 4 ++--
 mm/page_alloc.c      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a3e22d357e91..8a15f38ebc5c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -225,8 +225,8 @@ struct swap_list_t {
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
 extern unsigned long dirty_balance_reserve;
-extern unsigned int nr_free_buffer_pages(void);
-extern unsigned int nr_free_pagecache_pages(void);
+extern unsigned long nr_free_buffer_pages(void);
+extern unsigned long nr_free_pagecache_pages(void);
 
 /* Definition of global_page_state not available yet */
 #define nr_free_pages() global_page_state(NR_FREE_PAGES)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 159f81577774..276140654305 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2813,13 +2813,13 @@ void free_pages_exact(void *virt, size_t size)
 }
 EXPORT_SYMBOL(free_pages_exact);
 
-static unsigned int nr_free_zone_pages(int offset)
+static unsigned long nr_free_zone_pages(int offset)
 {
 	struct zoneref *z;
 	struct zone *zone;
 
 	/* Just pick one node, since fallback list is circular */
-	unsigned int sum = 0;
+	unsigned long sum = 0;
 
 	struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
 
@@ -2836,7 +2836,7 @@ static unsigned int nr_free_zone_pages(int offset)
 /*
  * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
  */
-unsigned int nr_free_buffer_pages(void)
+unsigned long nr_free_buffer_pages(void)
 {
 	return nr_free_zone_pages(gfp_zone(GFP_USER));
 }
@@ -2845,7 +2845,7 @@ EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
 /*
  * Amount of free RAM allocatable within all zones
  */
-unsigned int nr_free_pagecache_pages(void)
+unsigned long nr_free_pagecache_pages(void)
 {
 	return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
 }
-- 
cgit v1.2.3


From b21e0b90ccb99a377bce0167fed1e881bb5065d7 Mon Sep 17 00:00:00 2001
From: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Date: Fri, 22 Feb 2013 16:35:48 -0800
Subject: vmscan: change type of vm_total_pages to unsigned long

This variable is calculated from nr_free_pagecache_pages so
change its type to unsigned long.

Signed-off-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 2 +-
 mm/vmscan.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8a15f38ebc5c..2818a123f3ea 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -275,7 +275,7 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
-extern long vm_total_pages;
+extern unsigned long vm_total_pages;
 
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 606d0bb46091..88c5fed8b9a4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -128,7 +128,7 @@ struct scan_control {
  * From 0 .. 100.  Higher means more swappy.
  */
 int vm_swappiness = 60;
-long vm_total_pages;	/* The total number of pages which the VM controls */
+unsigned long vm_total_pages;	/* The total number of pages which the VM controls */
 
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
-- 
cgit v1.2.3


From 28a35716d317980ae9bc2ff2f84c33a3cda9e884 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 22 Feb 2013 16:35:55 -0800
Subject: mm: use long type for page counts in mm_populate() and
 get_user_pages()

Use long type for page counts in mm_populate() so as to avoid integer
overflow when running the following test code:

int main(void) {
  void *p = mmap(NULL, 0x100000000000, PROT_READ,
                 MAP_PRIVATE | MAP_ANON, -1, 0);
  printf("p: %p\n", p);
  mlockall(MCL_CURRENT);
  printf("done\n");
  return 0;
}

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  6 +++---
 include/linux/mm.h      | 15 ++++++++-------
 mm/hugetlb.c            | 12 ++++++------
 mm/memory.c             | 18 +++++++++---------
 mm/mlock.c              |  4 ++--
 mm/nommu.c              | 15 ++++++++-------
 6 files changed, 36 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0c80d3f57a5b..eedc334fb6f5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -43,9 +43,9 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
 #endif
 
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
-			struct page **, struct vm_area_struct **,
-			unsigned long *, int *, int, unsigned int flags);
+long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
+			 struct page **, struct vm_area_struct **,
+			 unsigned long *, unsigned long *, long, unsigned int);
 void unmap_hugepage_range(struct vm_area_struct *,
 			  unsigned long, unsigned long, struct page *);
 void __unmap_hugepage_range_final(struct mmu_gather *tlb,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 97da0302cf51..87b0ef253607 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1013,13 +1013,14 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
 		void *buf, int len, int write);
 
-int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int len, unsigned int foll_flags,
-		     struct page **pages, struct vm_area_struct **vmas,
-		     int *nonblocking);
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-			unsigned long start, int nr_pages, int write, int force,
-			struct page **pages, struct vm_area_struct **vmas);
+long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		      unsigned long start, unsigned long nr_pages,
+		      unsigned int foll_flags, struct page **pages,
+		      struct vm_area_struct **vmas, int *nonblocking);
+long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		    unsigned long start, unsigned long nr_pages,
+		    int write, int force, struct page **pages,
+		    struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
 struct kvec;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e14a8c79a1eb..cdb64e4d238a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2920,14 +2920,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }
 
-int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			struct page **pages, struct vm_area_struct **vmas,
-			unsigned long *position, int *length, int i,
-			unsigned int flags)
+long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
+			 struct page **pages, struct vm_area_struct **vmas,
+			 unsigned long *position, unsigned long *nr_pages,
+			 long i, unsigned int flags)
 {
 	unsigned long pfn_offset;
 	unsigned long vaddr = *position;
-	int remainder = *length;
+	unsigned long remainder = *nr_pages;
 	struct hstate *h = hstate_vma(vma);
 
 	spin_lock(&mm->page_table_lock);
@@ -2997,7 +2997,7 @@ same_page:
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
-	*length = remainder;
+	*nr_pages = remainder;
 	*position = vaddr;
 
 	return i ? i : -EFAULT;
diff --git a/mm/memory.c b/mm/memory.c
index 7bd22a621817..bc929dbad215 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1677,15 +1677,15 @@ static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long add
  * instead of __get_user_pages. __get_user_pages should be used only if
  * you need some special @gup_flags.
  */
-int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, unsigned int gup_flags,
-		     struct page **pages, struct vm_area_struct **vmas,
-		     int *nonblocking)
+long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, unsigned long nr_pages,
+		unsigned int gup_flags, struct page **pages,
+		struct vm_area_struct **vmas, int *nonblocking)
 {
-	int i;
+	long i;
 	unsigned long vm_flags;
 
-	if (nr_pages <= 0)
+	if (!nr_pages)
 		return 0;
 
 	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
@@ -1981,9 +1981,9 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
  *
  * See also get_user_pages_fast, for performance critical applications.
  */
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, int nr_pages, int write, int force,
-		struct page **pages, struct vm_area_struct **vmas)
+long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, unsigned long nr_pages, int write,
+		int force, struct page **pages, struct vm_area_struct **vmas)
 {
 	int flags = FOLL_TOUCH;
 
diff --git a/mm/mlock.c b/mm/mlock.c
index 38db3b094105..e6638f565d42 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -160,7 +160,7 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long addr = start;
-	int nr_pages = (end - start) / PAGE_SIZE;
+	unsigned long nr_pages = (end - start) / PAGE_SIZE;
 	int gup_flags;
 
 	VM_BUG_ON(start & ~PAGE_MASK);
@@ -382,7 +382,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
 	unsigned long end, nstart, nend;
 	struct vm_area_struct *vma = NULL;
 	int locked = 0;
-	int ret = 0;
+	long ret = 0;
 
 	VM_BUG_ON(start & ~PAGE_MASK);
 	VM_BUG_ON(len != PAGE_ALIGN(len));
diff --git a/mm/nommu.c b/mm/nommu.c
index 87854a55829d..6ab706608492 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -140,10 +140,10 @@ unsigned int kobjsize(const void *objp)
 	return PAGE_SIZE << compound_order(page);
 }
 
-int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, unsigned int foll_flags,
-		     struct page **pages, struct vm_area_struct **vmas,
-		     int *retry)
+long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		      unsigned long start, unsigned long nr_pages,
+		      unsigned int foll_flags, struct page **pages,
+		      struct vm_area_struct **vmas, int *nonblocking)
 {
 	struct vm_area_struct *vma;
 	unsigned long vm_flags;
@@ -190,9 +190,10 @@ finish_or_fault:
  *   slab page or a secondary page from a compound page
  * - don't permit access to VMAs that don't support it, such as I/O mappings
  */
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-	unsigned long start, int nr_pages, int write, int force,
-	struct page **pages, struct vm_area_struct **vmas)
+long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		    unsigned long start, unsigned long nr_pages,
+		    int write, int force, struct page **pages,
+		    struct vm_area_struct **vmas)
 {
 	int flags = 0;
 
-- 
cgit v1.2.3


From 240aadeedc4a89fc44623f8ce4ca46bda73db07e Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Fri, 22 Feb 2013 16:35:56 -0800
Subject: mm: accelerate mm_populate() treatment of THP pages

This change adds a follow_page_mask function which is equivalent to
follow_page, but with an extra page_mask argument.

follow_page_mask sets *page_mask to HPAGE_PMD_NR - 1 when it encounters
a THP page, and to 0 in other cases.

__get_user_pages() makes use of this in order to accelerate populating
THP ranges - that is, when both the pages and vmas arrays are NULL, we
don't need to iterate HPAGE_PMD_NR times to cover a single THP page (and
we also avoid taking mm->page_table_lock that many times).

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 13 +++++++++++--
 mm/memory.c        | 31 +++++++++++++++++++++++--------
 mm/nommu.c         |  6 ++++--
 3 files changed, 38 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 87b0ef253607..6124f1db50fe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1629,8 +1629,17 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 
-struct page *follow_page(struct vm_area_struct *, unsigned long address,
-			unsigned int foll_flags);
+struct page *follow_page_mask(struct vm_area_struct *vma,
+			      unsigned long address, unsigned int foll_flags,
+			      unsigned int *page_mask);
+
+static inline struct page *follow_page(struct vm_area_struct *vma,
+		unsigned long address, unsigned int foll_flags)
+{
+	unsigned int unused_page_mask;
+	return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
+}
+
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_GET	0x04	/* do get_page on page */
diff --git a/mm/memory.c b/mm/memory.c
index bc929dbad215..5d2ef1217d0c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1462,10 +1462,11 @@ int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 EXPORT_SYMBOL_GPL(zap_vma_ptes);
 
 /**
- * follow_page - look up a page descriptor from a user-virtual address
+ * follow_page_mask - look up a page descriptor from a user-virtual address
  * @vma: vm_area_struct mapping @address
  * @address: virtual address to look up
  * @flags: flags modifying lookup behaviour
+ * @page_mask: on output, *page_mask is set according to the size of the page
  *
  * @flags can have FOLL_ flags set, defined in <linux/mm.h>
  *
@@ -1473,8 +1474,9 @@ EXPORT_SYMBOL_GPL(zap_vma_ptes);
  * an error pointer if there is a mapping to something not represented
  * by a page descriptor (see also vm_normal_page()).
  */
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
-			unsigned int flags)
+struct page *follow_page_mask(struct vm_area_struct *vma,
+			      unsigned long address, unsigned int flags,
+			      unsigned int *page_mask)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -1484,6 +1486,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 	struct page *page;
 	struct mm_struct *mm = vma->vm_mm;
 
+	*page_mask = 0;
+
 	page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
 	if (!IS_ERR(page)) {
 		BUG_ON(flags & FOLL_GET);
@@ -1530,6 +1534,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 				page = follow_trans_huge_pmd(vma, address,
 							     pmd, flags);
 				spin_unlock(&mm->page_table_lock);
+				*page_mask = HPAGE_PMD_NR - 1;
 				goto out;
 			}
 		} else
@@ -1684,6 +1689,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 {
 	long i;
 	unsigned long vm_flags;
+	unsigned int page_mask;
 
 	if (!nr_pages)
 		return 0;
@@ -1761,6 +1767,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				get_page(page);
 			}
 			pte_unmap(pte);
+			page_mask = 0;
 			goto next_page;
 		}
 
@@ -1778,6 +1785,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		do {
 			struct page *page;
 			unsigned int foll_flags = gup_flags;
+			unsigned int page_increm;
 
 			/*
 			 * If we have a pending SIGKILL, don't keep faulting
@@ -1787,7 +1795,8 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				return i ? i : -ERESTARTSYS;
 
 			cond_resched();
-			while (!(page = follow_page(vma, start, foll_flags))) {
+			while (!(page = follow_page_mask(vma, start,
+						foll_flags, &page_mask))) {
 				int ret;
 				unsigned int fault_flags = 0;
 
@@ -1861,13 +1870,19 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 				flush_anon_page(vma, page, start);
 				flush_dcache_page(page);
+				page_mask = 0;
 			}
 next_page:
-			if (vmas)
+			if (vmas) {
 				vmas[i] = vma;
-			i++;
-			start += PAGE_SIZE;
-			nr_pages--;
+				page_mask = 0;
+			}
+			page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
+			if (page_increm > nr_pages)
+				page_increm = nr_pages;
+			i += page_increm;
+			start += page_increm * PAGE_SIZE;
+			nr_pages -= page_increm;
 		} while (nr_pages && start < vma->vm_end);
 	} while (nr_pages);
 	return i;
diff --git a/mm/nommu.c b/mm/nommu.c
index 6ab706608492..da0d210fd403 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1819,9 +1819,11 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	return ret;
 }
 
-struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
-			unsigned int foll_flags)
+struct page *follow_page_mask(struct vm_area_struct *vma,
+			      unsigned long address, unsigned int flags,
+			      unsigned int *page_mask)
 {
+	*page_mask = 0;
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 5117b3b835f288314a2d4e5512bc1747e3a7c8ed Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 22 Feb 2013 16:36:07 -0800
Subject: mm,ksm: FOLL_MIGRATION do migration_entry_wait

In "ksm: remove old stable nodes more thoroughly" I said that I'd never
seen its WARN_ON_ONCE(page_mapped(page)).  True at the time of writing,
but it soon appeared once I tried fuller tests on the whole series.

It turned out to be due to the KSM page migration itself: unmerge_and_
remove_all_rmap_items() failed to locate and replace all the KSM pages,
because of that hiatus in page migration when old pte has been replaced
by migration entry, but not yet by new pte.  follow_page() finds no page
at that instant, but a KSM page reappears shortly after, without a
fault.

Add FOLL_MIGRATION flag, so follow_page() can do migration_entry_wait()
for KSM's break_cow().  I'd have preferred to avoid another flag, and do
it every time, in case someone else makes the same easy mistake; but did
not find another transgressor (the common get_user_pages() is of course
safe), and cannot be sure that every follow_page() caller is prepared to
sleep - ia64's xencomm_vtop()? Now, THP's wait_split_huge_page() can
already sleep there, since anon_vma locking was changed to mutex, but
maybe that's somehow excluded.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Petr Holasek <pholasek@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Izik Eidus <izik.eidus@ravellosystems.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/ksm.c           |  2 +-
 mm/memory.c        | 20 ++++++++++++++++++--
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6124f1db50fe..e7c3f9a0111a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1651,6 +1651,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
 #define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
+#define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/ksm.c b/mm/ksm.c
index 0320327b8a6c..d61cba6fa1dc 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -364,7 +364,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 
 	do {
 		cond_resched();
-		page = follow_page(vma, addr, FOLL_GET);
+		page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION);
 		if (IS_ERR_OR_NULL(page))
 			break;
 		if (PageKsm(page))
diff --git a/mm/memory.c b/mm/memory.c
index 5d2ef1217d0c..ec8ba011fa7d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1548,8 +1548,24 @@ split_fallthrough:
 	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
 
 	pte = *ptep;
-	if (!pte_present(pte))
-		goto no_page;
+	if (!pte_present(pte)) {
+		swp_entry_t entry;
+		/*
+		 * KSM's break_ksm() relies upon recognizing a ksm page
+		 * even while it is being migrated, so for that case we
+		 * need migration_entry_wait().
+		 */
+		if (likely(!(flags & FOLL_MIGRATION)))
+			goto no_page;
+		if (pte_none(pte) || pte_file(pte))
+			goto no_page;
+		entry = pte_to_swp_entry(pte);
+		if (!is_migration_entry(entry))
+			goto no_page;
+		pte_unmap_unlock(ptep, ptl);
+		migration_entry_wait(mm, pmd, address);
+		goto split_fallthrough;
+	}
 	if ((flags & FOLL_NUMA) && pte_numa(pte))
 		goto no_page;
 	if ((flags & FOLL_WRITE) && !pte_write(pte))
-- 
cgit v1.2.3


From da8c87241c26aac81a64c7e4d21d438a33018f4e Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Thu, 21 Feb 2013 23:32:27 +0000
Subject: vlan: adjust vlan_set_encap_proto() for its callers

There are two places to call vlan_set_encap_proto():
vlan_untag() and __pop_vlan_tci().

vlan_untag() assumes skb->data points after mac addr, otherwise
the following code

        vhdr = (struct vlan_hdr *) skb->data;
        vlan_tci = ntohs(vhdr->h_vlan_TCI);
        __vlan_hwaccel_put_tag(skb, vlan_tci);

        skb_pull_rcsum(skb, VLAN_HLEN);

won't be correct. But __pop_vlan_tci() assumes points _before_
mac addr.

In vlan_set_encap_proto(), it looks for some magic L2 value
after mac addr:

        rawp = skb->data;
        if (*(unsigned short *) rawp == 0xFFFF)
	...

Therefore __pop_vlan_tci() is obviously wrong.

A quick fix is avoiding using skb->data in vlan_set_encap_proto(),
use 'vhdr+1' is always correct in both cases.

Cc: David S. Miller <davem@davemloft.net>
Cc: Jesse Gross <jesse@nicira.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index d06cc5c8f58c..218a3b686d90 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -331,7 +331,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 					struct vlan_hdr *vhdr)
 {
 	__be16 proto;
-	unsigned char *rawp;
+	unsigned short *rawp;
 
 	/*
 	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
@@ -344,8 +344,8 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 		return;
 	}
 
-	rawp = skb->data;
-	if (*(unsigned short *) rawp == 0xFFFF)
+	rawp = (unsigned short *)(vhdr + 1);
+	if (*rawp == 0xFFFF)
 		/*
 		 * This is a magic hack to spot IPX packets. Older Novell
 		 * breaks the protocol design and runs IPX over 802.3 without
-- 
cgit v1.2.3


From 561c6731978fa128f29342495f47fc3365898b3d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Feb 2013 10:52:26 -0500
Subject: switch lseek to COMPAT_SYSCALL_DEFINE

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/unistd32.h  |  2 +-
 arch/arm64/kernel/sys32.S          |  5 -----
 arch/parisc/kernel/sys_parisc32.c  | 10 ----------
 arch/parisc/kernel/syscall_table.S |  2 +-
 arch/powerpc/include/asm/systbl.h  |  2 +-
 arch/powerpc/kernel/sys_ppc32.c    |  6 ------
 arch/s390/kernel/compat_wrapper.S  |  6 ------
 arch/s390/kernel/syscalls.S        |  2 +-
 arch/sparc/kernel/sys32.S          |  1 -
 arch/sparc/kernel/systbls_64.S     |  2 +-
 arch/x86/ia32/sys_ia32.c           |  5 -----
 arch/x86/include/asm/sys_ia32.h    |  1 -
 arch/x86/syscalls/syscall_32.tbl   |  2 +-
 fs/read_write.c                    |  9 ++++++++-
 include/linux/compat.h             |  1 +
 15 files changed, 15 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index e60e386178d1..8153f1a43f0a 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -40,7 +40,7 @@ __SYSCALL(15,  sys_chmod)
 __SYSCALL(16,  sys_lchown16)
 __SYSCALL(17,  sys_ni_syscall)			/* 17 was sys_break */
 __SYSCALL(18,  sys_ni_syscall)			/* 18 was sys_stat */
-__SYSCALL(19,  compat_sys_lseek_wrapper)
+__SYSCALL(19,  compat_sys_lseek)
 __SYSCALL(20,  sys_getpid)
 __SYSCALL(21,  compat_sys_mount)
 __SYSCALL(22,  sys_ni_syscall)			/* 22 was sys_umount */
diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S
index 6abb05721614..9416d045a687 100644
--- a/arch/arm64/kernel/sys32.S
+++ b/arch/arm64/kernel/sys32.S
@@ -58,11 +58,6 @@ ENDPROC(compat_sys_fstatfs64_wrapper)
  * in registers or that take 32-bit parameters which require sign
  * extension.
  */
-compat_sys_lseek_wrapper:
-	sxtw	x1, w1
-	b	sys_lseek
-ENDPROC(compat_sys_lseek_wrapper)
-
 compat_sys_pread64_wrapper:
 	orr	x3, x4, x5, lsl #32
 	b	sys_pread64
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index eca69bb8ef5f..051c8b90231f 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -79,16 +79,6 @@ asmlinkage long sys32_sendfile64(u32 out_fd, u32 in_fd,
 				(loff_t __user *)offset, count);
 }
 
-
-/* lseek() needs a wrapper because 'offset' can be negative, but the top
- * half of the argument has been zeroed by syscall.S.
- */
-
-asmlinkage int sys32_lseek(unsigned int fd, int offset, unsigned int origin)
-{
-	return sys_lseek(fd, offset, origin);
-}
-
 asmlinkage long sys32_semctl(int semid, int semnum, int cmd, union semun arg)
 {
         union semun u;
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index fc9cab1cc2df..d0efc0aeb612 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -76,7 +76,7 @@
 	ENTRY_SAME(socket)
 	/* struct stat is MAYBE identical wide and narrow ?? */
 	ENTRY_COMP(newstat)
-	ENTRY_DIFF(lseek)
+	ENTRY_COMP(lseek)
 	ENTRY_SAME(getpid)		/* 20 */
 	/* the 'void * data' parameter may need re-packing in wide */
 	ENTRY_COMP(mount)
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index d906f33441c6..535b6d8a41cc 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -22,7 +22,7 @@ SYSCALL_SPU(chmod)
 SYSCALL_SPU(lchown)
 SYSCALL(ni_syscall)
 OLDSYS(stat)
-SYSX_SPU(sys_lseek,ppc32_lseek,sys_lseek)
+COMPAT_SYS_SPU(lseek)
 SYSCALL_SPU(getpid)
 COMPAT_SYS(mount)
 SYSX(sys_ni_syscall,sys_oldumount,sys_oldumount)
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index dbc44ba5b078..5677a36f450b 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -146,12 +146,6 @@ asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd,
 			    (off_t __user *)offset, count);
 }
 
-off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin)
-{
-	/* sign extend n */
-	return sys_lseek(fd, (int)offset, origin);
-}
-
 long compat_sys_truncate(const char __user * path, u32 length)
 {
 	/* sign extend length */
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index c14faf39ae36..2b1a3a03244f 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -67,12 +67,6 @@ ENTRY(sys32_lchown16_wrapper)
 	llgfr	%r4,%r4			# __kernel_old_uid_emu31_t
 	jg	sys32_lchown16		# branch to system call
 
-ENTRY(sys32_lseek_wrapper)
-	llgfr	%r2,%r2			# unsigned int
-	lgfr	%r3,%r3			# off_t
-	llgfr	%r4,%r4			# unsigned int
-	jg	sys_lseek		# branch to system call
-
 #sys32_getpid_wrapper				# void
 
 ENTRY(sys32_mount_wrapper)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index aaac708aa110..0e5262f01007 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -27,7 +27,7 @@ SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper)		/* 15 */
 SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper)	/* old lchown16 syscall*/
 NI_SYSCALL							/* old break syscall holder */
 NI_SYSCALL							/* old stat syscall holder */
-SYSCALL(sys_lseek,sys_lseek,sys32_lseek_wrapper)
+SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek)
 SYSCALL(sys_getpid,sys_getpid,sys_getpid)			/* 20 */
 SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper)
 SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper)
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index 0cfe219646e3..be3d65a3c270 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -47,7 +47,6 @@ SIGN1(sys32_mq_open, compat_sys_mq_open, %o1)
 SIGN1(sys32_select, compat_sys_select, %o0)
 SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5)
 SIGN2(sys32_sendfile, compat_sys_sendfile, %o0, %o1)
-SIGN1(sys32_lseek, sys_lseek, %o1)
 SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
 SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0)
 SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0)
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 6eed1945a2cd..9ed517c5037b 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -21,7 +21,7 @@ sys_call_table32:
 /*0*/	.word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write
 /*5*/	.word compat_sys_open, sys_close, compat_sys_wait4, sys_creat, sys_link
 /*10*/  .word sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys_mknod
-/*15*/	.word sys_chmod, sys_lchown16, sys_brk, sys_nis_syscall, sys32_lseek
+/*15*/	.word sys_chmod, sys_lchown16, sys_brk, sys_nis_syscall, compat_sys_lseek
 /*20*/	.word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16
 /*25*/	.word sys32_vmsplice, compat_sys_ptrace, sys_alarm, compat_sys_sigaltstack, sys_pause
 /*30*/	.word compat_sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 592f5a9a9c0e..ad7a20cbc699 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -218,11 +218,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
  * Some system calls that need sign extended arguments. This could be
  * done by a generic wrapper.
  */
-long sys32_lseek(unsigned int fd, int offset, unsigned int whence)
-{
-	return sys_lseek(fd, offset, whence);
-}
-
 long sys32_kill(int pid, int sig)
 {
 	return sys_kill(pid, sig);
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 0218d917f509..8459efc39686 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -43,7 +43,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
 asmlinkage long sys32_personality(unsigned long);
 asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
 
-long sys32_lseek(unsigned int, int, unsigned int);
 long sys32_kill(int, int);
 long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
 long sys32_vm86_warning(void);
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index f2fe78ff22cc..f51810be1a32 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -25,7 +25,7 @@
 16	i386	lchown			sys_lchown16
 17	i386	break
 18	i386	oldstat			sys_stat
-19	i386	lseek			sys_lseek			sys32_lseek
+19	i386	lseek			sys_lseek			compat_sys_lseek
 20	i386	getpid			sys_getpid
 21	i386	mount			sys_mount			compat_sys_mount
 22	i386	umount			sys_oldumount
diff --git a/fs/read_write.c b/fs/read_write.c
index bb34af315280..e57796cb7b59 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 #include <linux/splice.h>
+#include <linux/compat.h>
 #include "read_write.h"
 
 #include <asm/uaccess.h>
@@ -247,6 +248,13 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
 	return retval;
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
+{
+	return sys_lseek(fd, offset, whence);
+}
+#endif
+
 #ifdef __ARCH_WANT_SYS_LLSEEK
 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		unsigned long, offset_low, loff_t __user *, result,
@@ -278,7 +286,6 @@ out_putf:
 }
 #endif
 
-
 /*
  * rw_verify_area doesn't like huge counts. We limit
  * them to something that fits in "int" so that others
diff --git a/include/linux/compat.h b/include/linux/compat.h
index de095b0462a7..59c72048bf20 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -359,6 +359,7 @@ asmlinkage ssize_t compat_sys_preadv(unsigned long fd,
 asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
 		const struct compat_iovec __user *vec,
 		unsigned long vlen, u32 pos_low, u32 pos_high);
+asmlinkage long comat_sys_lseek(unsigned int, compat_off_t, unsigned int);
 
 asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp);
-- 
cgit v1.2.3


From c148e9ff4bd45f26d3f0253c20efc497672c3c84 Mon Sep 17 00:00:00 2001
From: "Zhang, YiX X" <yix.x.zhang@intel.com>
Date: Tue, 8 Jan 2013 06:07:39 +0000
Subject: mmc: correct the EXCEPTION_EVENTS_STATUS value in comment

The right value is 54 according to eMMC 4.5 specification.

Signed-off-by: ZhangYi <yix.x.zhang@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/card.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index be2500a49925..7069dcea27c5 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -83,7 +83,7 @@ struct mmc_ext_csd {
 	unsigned int            data_tag_unit_size;     /* DATA TAG UNIT size */
 	unsigned int		boot_ro_lock;		/* ro lock support */
 	bool			boot_ro_lockable;
-	u8			raw_exception_status;	/* 53 */
+	u8			raw_exception_status;	/* 54 */
 	u8			raw_partition_support;	/* 160 */
 	u8			raw_rpmb_size_mult;	/* 168 */
 	u8			raw_erased_mem_count;	/* 181 */
-- 
cgit v1.2.3


From a70aaa64da2205d32d1c9362d8f5d4be619cd58f Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Fri, 11 Jan 2013 17:03:50 +0000
Subject: mmc: dw_mmc: Add "disable-wp" device tree property

The "disable-wp" property is used to specify that a given SD card slot
doesn't have a concept of write protect.  This eliminates the need for
special case code for SD slots that should never be write protected
(like a micro SD slot or a dev board).

The dw_mmc driver is special in needing to specify "disable-wp"
because the lack of a "wp-gpios" property means to use the special
purpose write protect line.  On some other mmc devices the lack of
"wp-gpios" means that write protect should be disabled.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Acked-by: Seungwon Jeon <tgih.jun@samsung.com>
Acked-by: Will Newton <will.newton@imgtec.com>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 .../devicetree/bindings/mmc/synopsis-dw-mshc.txt   | 12 +++++--
 drivers/mmc/host/dw_mmc.c                          | 41 +++++++++++++++++++++-
 include/linux/mmc/dw_mmc.h                         |  9 +++++
 3 files changed, 59 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt
index 06cd32d08052..726fd2122a13 100644
--- a/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/synopsis-dw-mshc.txt
@@ -26,8 +26,16 @@ Required Properties:
 	* bus-width: as documented in mmc core bindings.
 
 	* wp-gpios: specifies the write protect gpio line. The format of the
-	  gpio specifier depends on the gpio controller. If the write-protect
-	  line is not available, this property is optional.
+	  gpio specifier depends on the gpio controller. If a GPIO is not used
+	  for write-protect, this property is optional.
+
+	* disable-wp: If the wp-gpios property isn't present then (by default)
+	  we'd assume that the write protect is hooked up directly to the
+	  controller's special purpose write protect line (accessible via
+	  the WRTPRT register).  However, it's possible that we simply don't
+	  want write protect.  In that case specify 'disable-wp'.
+	  NOTE: This property is not required for slots known to always
+	  connect to eMMC or SDIO cards.
 
 Optional properties:
 
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 323c5022c2ca..90f7d990551b 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -74,6 +74,7 @@ struct idmac_desc {
  * struct dw_mci_slot - MMC slot state
  * @mmc: The mmc_host representing this slot.
  * @host: The MMC controller this slot is using.
+ * @quirks: Slot-level quirks (DW_MCI_SLOT_QUIRK_XXX)
  * @ctype: Card type for this slot.
  * @mrq: mmc_request currently being processed or waiting to be
  *	processed, or NULL when the slot is idle.
@@ -88,6 +89,8 @@ struct dw_mci_slot {
 	struct mmc_host		*mmc;
 	struct dw_mci		*host;
 
+	int			quirks;
+
 	u32			ctype;
 
 	struct mmc_request	*mrq;
@@ -825,7 +828,13 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
 	struct dw_mci_board *brd = slot->host->pdata;
 
 	/* Use platform get_ro function, else try on board write protect */
-	if (brd->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT)
+
+	/*
+	 * NOTE: DW_MCI_QUIRK_NO_WRITE_PROTECT will be removed in a future
+	 * patch in the series once reference to it is removed.
+	 */
+	if ((brd->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT) ||
+	    (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT))
 		read_only = 0;
 	else if (brd->get_ro)
 		read_only = brd->get_ro(slot->id);
@@ -1785,6 +1794,30 @@ static struct device_node *dw_mci_of_find_slot_node(struct device *dev, u8 slot)
 	return NULL;
 }
 
+static struct dw_mci_of_slot_quirks {
+	char *quirk;
+	int id;
+} of_slot_quirks[] = {
+	{
+		.quirk	= "disable-wp",
+		.id	= DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT,
+	},
+};
+
+static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
+{
+	struct device_node *np = dw_mci_of_find_slot_node(dev, slot);
+	int quirks = 0;
+	int idx;
+
+	/* get quirks */
+	for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++)
+		if (of_get_property(np, of_slot_quirks[idx].quirk, NULL))
+			quirks |= of_slot_quirks[idx].id;
+
+	return quirks;
+}
+
 /* find out bus-width for a given slot */
 static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot)
 {
@@ -1800,6 +1833,10 @@ static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot)
 	return bus_wd;
 }
 #else /* CONFIG_OF */
+static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
+{
+	return 0;
+}
 static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot)
 {
 	return 1;
@@ -1828,6 +1865,8 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 	slot->host = host;
 	host->slot[id] = slot;
 
+	slot->quirks = dw_mci_of_get_slot_quirks(host->dev, slot->id);
+
 	mmc->ops = &dw_mci_ops;
 	mmc->f_min = DIV_ROUND_UP(host->bus_hz, 510);
 	mmc->f_max = host->bus_hz;
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 34be4f47293c..de61de5608c9 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -209,9 +209,18 @@ struct dw_mci_dma_ops {
 #define DW_MCI_QUIRK_HIGHSPEED			BIT(2)
 /* Unreliable card detection */
 #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION	BIT(3)
+
 /* Write Protect detection not available */
+/*
+ * NOTE: DW_MCI_QUIRK_NO_WRITE_PROTECT will be removed in a future
+ * patch in the series once reference to it is removed.
+ */
 #define DW_MCI_QUIRK_NO_WRITE_PROTECT		BIT(4)
 
+/* Slot level quirks */
+/* This slot has no write protect */
+#define DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT	BIT(0)
+
 struct dma_pdata;
 
 struct block_settings {
-- 
cgit v1.2.3


From 9640639b09313af4cd37a465408643aba927808e Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Fri, 11 Jan 2013 17:03:54 +0000
Subject: mmc: dw_mmc: Remove DW_MCI_QUIRK_NO_WRITE_PROTECT

The original quirk was added in the change 'mmc: dw_mmc: add quirk to
indicate missing write protect line'.  The original quirk was added at
a controller level even though each slot has its own write protect (so
the quirk should be at the slot level).  A recent change (mmc: dw_mmc:
Add "disable-wp" device tree property) added a slot-level quirk and
support for the quirk directly to dw_mmc.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Acked-by: Will Newton <will.newton@imgtec.com>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 8 +-------
 include/linux/mmc/dw_mmc.h | 7 -------
 2 files changed, 1 insertion(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index df6207909fe7..60063ccb4c4b 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -831,13 +831,7 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
 	struct dw_mci_board *brd = slot->host->pdata;
 
 	/* Use platform get_ro function, else try on board write protect */
-
-	/*
-	 * NOTE: DW_MCI_QUIRK_NO_WRITE_PROTECT will be removed in a future
-	 * patch in the series once reference to it is removed.
-	 */
-	if ((brd->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT) ||
-	    (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT))
+	if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
 		read_only = 0;
 	else if (brd->get_ro)
 		read_only = brd->get_ro(slot->id);
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index de61de5608c9..198f0fa44e9f 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -210,13 +210,6 @@ struct dw_mci_dma_ops {
 /* Unreliable card detection */
 #define DW_MCI_QUIRK_BROKEN_CARD_DETECTION	BIT(3)
 
-/* Write Protect detection not available */
-/*
- * NOTE: DW_MCI_QUIRK_NO_WRITE_PROTECT will be removed in a future
- * patch in the series once reference to it is removed.
- */
-#define DW_MCI_QUIRK_NO_WRITE_PROTECT		BIT(4)
-
 /* Slot level quirks */
 /* This slot has no write protect */
 #define DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT	BIT(0)
-- 
cgit v1.2.3


From af51079e68d4759e458b0592df5d1fab373c43ae Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Mon, 21 Jan 2013 19:02:28 +0800
Subject: mmc: sdhci-esdhc-imx: support 8bit mode

The i.MX esdhc has a nonstandard bit layout for the SDHCI_HOST_CONTROL
register. To support 8bit bus width on i.MX populate the platform_bus_width
callback. This is tested on an i.MX25, but should according to the datasheets
work on the other i.MX using this hardware aswell. The i.MX6, while having
a SDHCI_SPEC_300 controller, still uses the same nonstandard register layout.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Dirk Behme <dirk.behme@de.bosch.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c          | 56 +++++++++++++++++++++++++++--
 include/linux/platform_data/mmc-esdhc-imx.h |  1 +
 2 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 24daaf4e20ba..f7ee5e67516c 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -40,6 +40,13 @@
 /* Bits 3 and 6 are not SDHCI standard definitions */
 #define  ESDHC_MIX_CTRL_SDHCI_MASK	0xb7
 
+/*
+ * Our interpretation of the SDHCI_HOST_CONTROL register
+ */
+#define ESDHC_CTRL_4BITBUS		(0x1 << 1)
+#define ESDHC_CTRL_8BITBUS		(0x2 << 1)
+#define ESDHC_CTRL_BUSWIDTH_MASK	(0x3 << 1)
+
 /*
  * There is an INT DMA ERR mis-match between eSDHC and STD SDHC SPEC:
  * Bit25 is used in STD SPEC, and is reserved in fsl eSDHC design,
@@ -294,6 +301,7 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg)
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
 	u32 new_val;
+	u32 mask;
 
 	switch (reg) {
 	case SDHCI_POWER_CONTROL:
@@ -304,7 +312,7 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg)
 		return;
 	case SDHCI_HOST_CONTROL:
 		/* FSL messed up here, so we need to manually compose it. */
-		new_val = val & (SDHCI_CTRL_LED | SDHCI_CTRL_4BITBUS);
+		new_val = val & SDHCI_CTRL_LED;
 		/* ensure the endianness */
 		new_val |= ESDHC_HOST_CONTROL_LE;
 		/* bits 8&9 are reserved on mx25 */
@@ -313,7 +321,13 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg)
 			new_val |= (val & SDHCI_CTRL_DMA_MASK) << 5;
 		}
 
-		esdhc_clrset_le(host, 0xffff, new_val, reg);
+		/*
+		 * Do not touch buswidth bits here. This is done in
+		 * esdhc_pltfm_bus_width.
+		 */
+		mask = 0xffff & ~ESDHC_CTRL_BUSWIDTH_MASK;
+
+		esdhc_clrset_le(host, mask, new_val, reg);
 		return;
 	}
 	esdhc_clrset_le(host, 0xff, val, reg);
@@ -370,6 +384,28 @@ static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
 	return -ENOSYS;
 }
 
+static int esdhc_pltfm_bus_width(struct sdhci_host *host, int width)
+{
+	u32 ctrl;
+
+	switch (width) {
+	case MMC_BUS_WIDTH_8:
+		ctrl = ESDHC_CTRL_8BITBUS;
+		break;
+	case MMC_BUS_WIDTH_4:
+		ctrl = ESDHC_CTRL_4BITBUS;
+		break;
+	default:
+		ctrl = 0;
+		break;
+	}
+
+	esdhc_clrset_le(host, ESDHC_CTRL_BUSWIDTH_MASK, ctrl,
+			SDHCI_HOST_CONTROL);
+
+	return 0;
+}
+
 static struct sdhci_ops sdhci_esdhc_ops = {
 	.read_l = esdhc_readl_le,
 	.read_w = esdhc_readw_le,
@@ -380,6 +416,7 @@ static struct sdhci_ops sdhci_esdhc_ops = {
 	.get_max_clock = esdhc_pltfm_get_max_clock,
 	.get_min_clock = esdhc_pltfm_get_min_clock,
 	.get_ro = esdhc_pltfm_get_ro,
+	.platform_bus_width = esdhc_pltfm_bus_width,
 };
 
 static struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
@@ -417,6 +454,8 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
 	if (gpio_is_valid(boarddata->wp_gpio))
 		boarddata->wp_type = ESDHC_WP_GPIO;
 
+	of_property_read_u32(np, "bus-width", &boarddata->max_bus_width);
+
 	return 0;
 }
 #else
@@ -548,6 +587,19 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 		break;
 	}
 
+	switch (boarddata->max_bus_width) {
+	case 8:
+		host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_4_BIT_DATA;
+		break;
+	case 4:
+		host->mmc->caps |= MMC_CAP_4_BIT_DATA;
+		break;
+	case 1:
+	default:
+		host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
+		break;
+	}
+
 	err = sdhci_add_host(host);
 	if (err)
 		goto disable_clk;
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
index aaf97481f413..b4a0521ce411 100644
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ b/include/linux/platform_data/mmc-esdhc-imx.h
@@ -39,5 +39,6 @@ struct esdhc_platform_data {
 	unsigned int cd_gpio;
 	enum wp_types wp_type;
 	enum cd_types cd_type;
+	int max_bus_width;
 };
 #endif /* __ASM_ARCH_IMX_ESDHC_H */
-- 
cgit v1.2.3


From d887874e0ead6a0b86b6046b872730c81c121352 Mon Sep 17 00:00:00 2001
From: Johan Rudholm <johan.rudholm@stericsson.com>
Date: Mon, 28 Jan 2013 15:08:26 +0100
Subject: mmc: core: Add card_busy to host_ops

This host_ops member is used to test if the card is signaling busy by
pulling dat[0:3] low.

Signed-off-by: Johan Rudholm <johan.rudholm@stericsson.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Wei WANG <wei_wang@realsil.com.cn>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/host.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 523d570f58ad..0373b0a6daac 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -131,6 +131,9 @@ struct mmc_host_ops {
 
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
 
+	/* Check if the card is pulling dat[0:3] low */
+	int	(*card_busy)(struct mmc_host *host);
+
 	/* The tuning command opcode value is different for SD and eMMC cards */
 	int	(*execute_tuning)(struct mmc_host *host, u32 opcode);
 	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
-- 
cgit v1.2.3


From 52983382c74f59a3953e622d7661a24e1bc4388a Mon Sep 17 00:00:00 2001
From: Kevin Liu <kliu5@marvell.com>
Date: Thu, 31 Jan 2013 11:31:37 +0800
Subject: mmc: sdhci: enhance preset value function

4d55c5a1 ("mmc: sdhci: enable preset value after uhs initialization")
added preset value support and enabled it by default during sd card init.

Below are the enhancements introduced by this patch:

1. In current code, preset value is enabled after setting clock finished,
which means the clock is manually set by driver firstly and then suddenly
switched to preset value at this point. So the first setting is useless
and unnecessary. What's more, the first clock setting may differ from the
preset one.  The better way is enable preset value just after switch to
UHS mode so the preset value can take effect immediately. So move preset
value enable from mmc_sd_init_card to sdhci_set_ios which will be called
during set timing.

2. In current code, preset value is disabled at the beginning of
mmc_attach_sd.  It's too late since low freq (400khz) should be set in
mmc_power_up.  So move preset value disable to sdhci_set_ios which will
be called during power up.

3. host->clock and ios->drv_type should also be updated according to the
preset value if it's enabled. Current code missed this.

4. This patch also introduce a quirk to disable preset value in case
preset value doesn't work.

This patch has been verified on sdhci-pxav3 platform with both preset
enabled and disabled.

Signed-off-by: Kevin Liu <kliu5@marvell.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c     |  17 ------
 drivers/mmc/host/sdhci.c  | 129 ++++++++++++++++++++++++++++++++--------------
 drivers/mmc/host/sdhci.h  |  12 +++++
 include/linux/mmc/host.h  |   1 -
 include/linux/mmc/sdhci.h |   1 +
 5 files changed, 102 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 03134b1e563c..9e645e19cec6 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -969,16 +969,6 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 
 		/* Card is an ultra-high-speed card */
 		mmc_card_set_uhs(card);
-
-		/*
-		 * Since initialization is now complete, enable preset
-		 * value registers for UHS-I cards.
-		 */
-		if (host->ops->enable_preset_value) {
-			mmc_host_clk_hold(card->host);
-			host->ops->enable_preset_value(host, true);
-			mmc_host_clk_release(card->host);
-		}
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
@@ -1157,13 +1147,6 @@ int mmc_attach_sd(struct mmc_host *host)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	/* Disable preset value enable if already set since last time */
-	if (host->ops->enable_preset_value) {
-		mmc_host_clk_hold(host);
-		host->ops->enable_preset_value(host, false);
-		mmc_host_clk_release(host);
-	}
-
 	err = mmc_send_app_op_cond(host, 0, &ocr);
 	if (err)
 		return err;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index efb112684787..ba586ae99252 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -53,6 +53,7 @@ static void sdhci_send_command(struct sdhci_host *, struct mmc_command *);
 static void sdhci_finish_command(struct sdhci_host *);
 static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
 static void sdhci_tuning_timer(unsigned long data);
+static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
 
 #ifdef CONFIG_PM_RUNTIME
 static int sdhci_runtime_pm_get(struct sdhci_host *host);
@@ -1082,6 +1083,37 @@ static void sdhci_finish_command(struct sdhci_host *host)
 	}
 }
 
+static u16 sdhci_get_preset_value(struct sdhci_host *host)
+{
+	u16 ctrl, preset = 0;
+
+	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+	switch (ctrl & SDHCI_CTRL_UHS_MASK) {
+	case SDHCI_CTRL_UHS_SDR12:
+		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12);
+		break;
+	case SDHCI_CTRL_UHS_SDR25:
+		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR25);
+		break;
+	case SDHCI_CTRL_UHS_SDR50:
+		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR50);
+		break;
+	case SDHCI_CTRL_UHS_SDR104:
+		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104);
+		break;
+	case SDHCI_CTRL_UHS_DDR50:
+		preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50);
+		break;
+	default:
+		pr_warn("%s: Invalid UHS-I mode selected\n",
+			mmc_hostname(host->mmc));
+		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12);
+		break;
+	}
+	return preset;
+}
+
 static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	int div = 0; /* Initialized for compiler warning */
@@ -1106,35 +1138,43 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 		goto out;
 
 	if (host->version >= SDHCI_SPEC_300) {
+		if (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
+			SDHCI_CTRL_PRESET_VAL_ENABLE) {
+			u16 pre_val;
+
+			clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+			pre_val = sdhci_get_preset_value(host);
+			div = (pre_val & SDHCI_PRESET_SDCLK_FREQ_MASK)
+				>> SDHCI_PRESET_SDCLK_FREQ_SHIFT;
+			if (host->clk_mul &&
+				(pre_val & SDHCI_PRESET_CLKGEN_SEL_MASK)) {
+				clk = SDHCI_PROG_CLOCK_MODE;
+				real_div = div + 1;
+				clk_mul = host->clk_mul;
+			} else {
+				real_div = max_t(int, 1, div << 1);
+			}
+			goto clock_set;
+		}
+
 		/*
 		 * Check if the Host Controller supports Programmable Clock
 		 * Mode.
 		 */
 		if (host->clk_mul) {
-			u16 ctrl;
-
+			for (div = 1; div <= 1024; div++) {
+				if ((host->max_clk * host->clk_mul / div)
+					<= clock)
+					break;
+			}
 			/*
-			 * We need to figure out whether the Host Driver needs
-			 * to select Programmable Clock Mode, or the value can
-			 * be set automatically by the Host Controller based on
-			 * the Preset Value registers.
+			 * Set Programmable Clock Mode in the Clock
+			 * Control register.
 			 */
-			ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-			if (!(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
-				for (div = 1; div <= 1024; div++) {
-					if (((host->max_clk * host->clk_mul) /
-					      div) <= clock)
-						break;
-				}
-				/*
-				 * Set Programmable Clock Mode in the Clock
-				 * Control register.
-				 */
-				clk = SDHCI_PROG_CLOCK_MODE;
-				real_div = div;
-				clk_mul = host->clk_mul;
-				div--;
-			}
+			clk = SDHCI_PROG_CLOCK_MODE;
+			real_div = div;
+			clk_mul = host->clk_mul;
+			div--;
 		} else {
 			/* Version 3.00 divisors must be a multiple of 2. */
 			if (host->max_clk <= clock)
@@ -1159,6 +1199,7 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 		div >>= 1;
 	}
 
+clock_set:
 	if (real_div)
 		host->mmc->actual_clock = (host->max_clk * clk_mul) / real_div;
 
@@ -1376,6 +1417,10 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		sdhci_reinit(host);
 	}
 
+	if (host->version >= SDHCI_SPEC_300 &&
+		(ios->power_mode == MMC_POWER_UP))
+		sdhci_enable_preset_value(host, false);
+
 	sdhci_set_clock(host, ios->clock);
 
 	if (ios->power_mode == MMC_POWER_OFF)
@@ -1496,6 +1541,20 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 			sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
 		}
 
+		if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
+				((ios->timing == MMC_TIMING_UHS_SDR12) ||
+				 (ios->timing == MMC_TIMING_UHS_SDR25) ||
+				 (ios->timing == MMC_TIMING_UHS_SDR50) ||
+				 (ios->timing == MMC_TIMING_UHS_SDR104) ||
+				 (ios->timing == MMC_TIMING_UHS_DDR50))) {
+			u16 preset;
+
+			sdhci_enable_preset_value(host, true);
+			preset = sdhci_get_preset_value(host);
+			ios->drv_type = (preset & SDHCI_PRESET_DRV_MASK)
+				>> SDHCI_PRESET_DRV_SHIFT;
+		}
+
 		/* Re-enable SD Clock */
 		sdhci_update_clock(host);
 	} else
@@ -1925,17 +1984,15 @@ out:
 	return err;
 }
 
-static void sdhci_do_enable_preset_value(struct sdhci_host *host, bool enable)
+
+static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable)
 {
 	u16 ctrl;
-	unsigned long flags;
 
 	/* Host Controller v3.00 defines preset value registers */
 	if (host->version < SDHCI_SPEC_300)
 		return;
 
-	spin_lock_irqsave(&host->lock, flags);
-
 	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 
 	/*
@@ -1951,17 +2008,6 @@ static void sdhci_do_enable_preset_value(struct sdhci_host *host, bool enable)
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
 		host->flags &= ~SDHCI_PV_ENABLED;
 	}
-
-	spin_unlock_irqrestore(&host->lock, flags);
-}
-
-static void sdhci_enable_preset_value(struct mmc_host *mmc, bool enable)
-{
-	struct sdhci_host *host = mmc_priv(mmc);
-
-	sdhci_runtime_pm_get(host);
-	sdhci_do_enable_preset_value(host, enable);
-	sdhci_runtime_pm_put(host);
 }
 
 static void sdhci_card_event(struct mmc_host *mmc)
@@ -1997,7 +2043,6 @@ static const struct mmc_host_ops sdhci_ops = {
 	.enable_sdio_irq = sdhci_enable_sdio_irq,
 	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
 	.execute_tuning			= sdhci_execute_tuning,
-	.enable_preset_value		= sdhci_enable_preset_value,
 	.card_event			= sdhci_card_event,
 	.card_busy	= sdhci_card_busy,
 };
@@ -2591,8 +2636,12 @@ int sdhci_runtime_resume_host(struct sdhci_host *host)
 	sdhci_do_set_ios(host, &host->mmc->ios);
 
 	sdhci_do_start_signal_voltage_switch(host, &host->mmc->ios);
-	if (host_flags & SDHCI_PV_ENABLED)
-		sdhci_do_enable_preset_value(host, true);
+	if ((host_flags & SDHCI_PV_ENABLED) &&
+		!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN)) {
+		spin_lock_irqsave(&host->lock, flags);
+		sdhci_enable_preset_value(host, true);
+		spin_unlock_irqrestore(&host->lock, flags);
+	}
 
 	/* Set the re-tuning expiration flag */
 	if (host->flags & SDHCI_USING_RETUNING_TIMER)
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index c8d11b904a40..379e09d9f3c1 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -229,6 +229,18 @@
 
 /* 60-FB reserved */
 
+#define SDHCI_PRESET_FOR_SDR12 0x66
+#define SDHCI_PRESET_FOR_SDR25 0x68
+#define SDHCI_PRESET_FOR_SDR50 0x6A
+#define SDHCI_PRESET_FOR_SDR104        0x6C
+#define SDHCI_PRESET_FOR_DDR50 0x6E
+#define SDHCI_PRESET_DRV_MASK  0xC000
+#define SDHCI_PRESET_DRV_SHIFT  14
+#define SDHCI_PRESET_CLKGEN_SEL_MASK   0x400
+#define SDHCI_PRESET_CLKGEN_SEL_SHIFT	10
+#define SDHCI_PRESET_SDCLK_FREQ_MASK   0x3FF
+#define SDHCI_PRESET_SDCLK_FREQ_SHIFT	0
+
 #define SDHCI_SLOT_INT_STATUS	0xFC
 
 #define SDHCI_HOST_VERSION	0xFE
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0373b0a6daac..6c235e03de29 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -136,7 +136,6 @@ struct mmc_host_ops {
 
 	/* The tuning command opcode value is different for SD and eMMC cards */
 	int	(*execute_tuning)(struct mmc_host *host, u32 opcode);
-	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
 	void	(*card_event)(struct mmc_host *host);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 4bbc3301fbbf..b838ffc49e4a 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -94,6 +94,7 @@ struct sdhci_host {
 #define SDHCI_QUIRK2_HOST_NO_CMD23			(1<<1)
 /* The system physically doesn't support 1.8v, even if the host does */
 #define SDHCI_QUIRK2_NO_1_8_V				(1<<2)
+#define SDHCI_QUIRK2_PRESET_VALUE_BROKEN		(1<<3)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3


From abd9ac144947d9a604beb763339e2f77ce8bec79 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 6 Feb 2013 17:01:43 +0900
Subject: mmc: add packed command feature of eMMC4.5

This patch adds packed command feature of eMMC4.5.  The maximum number
for packing read (or write) is offered and exception event relevant to
packed command which is used for error handling is enabled. If host
wants to use this feature, MMC_CAP2_PACKED_CMD should be set.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Reviewed-by: Maya Erez <merez@codeaurora.org>
Reviewed-by: Subhash Jadavani <subhashj@codeaurora.org>
Reviewed-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/mmc.c   | 28 ++++++++++++++++++++++++++++
 include/linux/mmc/card.h |  3 +++
 include/linux/mmc/host.h |  4 ++++
 include/linux/mmc/mmc.h  | 15 +++++++++++++--
 4 files changed, 48 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 8a3ad602a877..c8f3d6e0684e 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -538,6 +538,11 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		} else {
 			card->ext_csd.data_tag_unit_size = 0;
 		}
+
+		card->ext_csd.max_packed_writes =
+			ext_csd[EXT_CSD_MAX_PACKED_WRITES];
+		card->ext_csd.max_packed_reads =
+			ext_csd[EXT_CSD_MAX_PACKED_READS];
 	} else {
 		card->ext_csd.data_sector_size = 512;
 	}
@@ -1275,6 +1280,29 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		}
 	}
 
+	/*
+	 * The mandatory minimum values are defined for packed command.
+	 * read: 5, write: 3
+	 */
+	if (card->ext_csd.max_packed_writes >= 3 &&
+	    card->ext_csd.max_packed_reads >= 5 &&
+	    host->caps2 & MMC_CAP2_PACKED_CMD) {
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				EXT_CSD_EXP_EVENTS_CTRL,
+				EXT_CSD_PACKED_EVENT_EN,
+				card->ext_csd.generic_cmd6_time);
+		if (err && err != -EBADMSG)
+			goto free_card;
+		if (err) {
+			pr_warn("%s: Enabling packed event failed\n",
+				mmc_hostname(card->host));
+			card->ext_csd.packed_event_en = 0;
+			err = 0;
+		} else {
+			card->ext_csd.packed_event_en = 1;
+		}
+	}
+
 	if (!oldcard)
 		host->card = card;
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 7069dcea27c5..237f253f2fe0 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -53,6 +53,9 @@ struct mmc_ext_csd {
 	u8			part_config;
 	u8			cache_ctrl;
 	u8			rst_n_function;
+	u8			max_packed_writes;
+	u8			max_packed_reads;
+	u8			packed_event_en;
 	unsigned int		part_time;		/* Units: ms */
 	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		generic_cmd6_time;	/* Units: 10ms */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 6c235e03de29..a0466c03f5e1 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -276,6 +276,10 @@ struct mmc_host {
 #define MMC_CAP2_HC_ERASE_SZ	(1 << 9)	/* High-capacity erase size */
 #define MMC_CAP2_CD_ACTIVE_HIGH	(1 << 10)	/* Card-detect signal active high */
 #define MMC_CAP2_RO_ACTIVE_HIGH	(1 << 11)	/* Write-protect signal active high */
+#define MMC_CAP2_PACKED_RD	(1 << 12)	/* Allow packed read */
+#define MMC_CAP2_PACKED_WR	(1 << 13)	/* Allow packed write */
+#define MMC_CAP2_PACKED_CMD	(MMC_CAP2_PACKED_RD | \
+				 MMC_CAP2_PACKED_WR)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 94d532e41c61..50bcde3677ca 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -139,7 +139,7 @@ static inline bool mmc_op_multi(u32 opcode)
 #define R1_CURRENT_STATE(x)	((x & 0x00001E00) >> 9)	/* sx, b (4 bits) */
 #define R1_READY_FOR_DATA	(1 << 8)	/* sx, a */
 #define R1_SWITCH_ERROR		(1 << 7)	/* sx, c */
-#define R1_EXCEPTION_EVENT	(1 << 6)	/* sx, a */
+#define R1_EXCEPTION_EVENT	(1 << 6)	/* sr, a */
 #define R1_APP_CMD		(1 << 5)	/* sr, c */
 
 #define R1_STATE_IDLE	0
@@ -275,7 +275,10 @@ struct _mmc_csd {
 #define EXT_CSD_FLUSH_CACHE		32      /* W */
 #define EXT_CSD_CACHE_CTRL		33      /* R/W */
 #define EXT_CSD_POWER_OFF_NOTIFICATION	34	/* R/W */
-#define EXT_CSD_EXP_EVENTS_STATUS	54	/* RO */
+#define EXT_CSD_PACKED_FAILURE_INDEX	35	/* RO */
+#define EXT_CSD_PACKED_CMD_STATUS	36	/* RO */
+#define EXT_CSD_EXP_EVENTS_STATUS	54	/* RO, 2 bytes */
+#define EXT_CSD_EXP_EVENTS_CTRL		56	/* R/W, 2 bytes */
 #define EXT_CSD_DATA_SECTOR_SIZE	61	/* R */
 #define EXT_CSD_GP_SIZE_MULT		143	/* R/W */
 #define EXT_CSD_PARTITION_ATTRIBUTE	156	/* R/W */
@@ -324,6 +327,8 @@ struct _mmc_csd {
 #define EXT_CSD_CACHE_SIZE		249	/* RO, 4 bytes */
 #define EXT_CSD_TAG_UNIT_SIZE		498	/* RO */
 #define EXT_CSD_DATA_TAG_SUPPORT	499	/* RO */
+#define EXT_CSD_MAX_PACKED_WRITES	500	/* RO */
+#define EXT_CSD_MAX_PACKED_READS	501	/* RO */
 #define EXT_CSD_BKOPS_SUPPORT		502	/* RO */
 #define EXT_CSD_HPI_FEATURES		503	/* RO */
 
@@ -385,6 +390,9 @@ struct _mmc_csd {
 #define EXT_CSD_PWR_CL_4BIT_MASK	0x0F	/* 8 bit PWR CLS */
 #define EXT_CSD_PWR_CL_8BIT_SHIFT	4
 #define EXT_CSD_PWR_CL_4BIT_SHIFT	0
+
+#define EXT_CSD_PACKED_EVENT_EN	BIT(3)
+
 /*
  * EXCEPTION_EVENT_STATUS field
  */
@@ -393,6 +401,9 @@ struct _mmc_csd {
 #define EXT_CSD_SYSPOOL_EXHAUSTED	BIT(2)
 #define EXT_CSD_PACKED_FAILURE		BIT(3)
 
+#define EXT_CSD_PACKED_GENERIC_ERROR	BIT(0)
+#define EXT_CSD_PACKED_INDEXED_ERROR	BIT(1)
+
 /*
  * BKOPS status level
  */
-- 
cgit v1.2.3


From ce39f9d17c14e56ea6772aa84393e6e0cc8499c4 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 6 Feb 2013 17:02:46 +0900
Subject: mmc: support packed write command for eMMC4.5 devices

This patch supports packed write command of eMMC4.5 devices.  Several
writes can be grouped in packed command and all data of the individual
commands can be sent in a single transfer on the bus. Large amounts of
data in one transfer rather than several data of small size are
effective for eMMC write internally.  As a result, packed command help
write throughput be improved.  The following tables show the results
of packed write.

Type A:
test     none |  packed
iozone   25.8 |  31
tiotest  27.6 |  31.2
lmdd     31.2 |  35.4

Type B:
test     none |  packed
iozone   44.1 |  51.1
tiotest  47.9 |  52.5
lmdd     51.6 |  59.2

Type C:
test     none |  packed
iozone   19.5 |  32
tiotest  19.9 |  34.5
lmdd     22.8 |  40.7

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Reviewed-by: Maya Erez <merez@codeaurora.org>
Reviewed-by: Namjae Jeon <linkinjeon@gmail.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c   | 455 +++++++++++++++++++++++++++++++++++++++++++--
 drivers/mmc/card/queue.c   |  96 +++++++++-
 drivers/mmc/card/queue.h   |  22 +++
 drivers/mmc/core/mmc_ops.c |   1 +
 include/linux/mmc/card.h   |   5 +
 include/linux/mmc/core.h   |   4 +
 include/linux/mmc/host.h   |   5 +
 7 files changed, 571 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 1170afe1a596..5bab73b91c20 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -59,6 +59,12 @@ MODULE_ALIAS("mmc:block");
 #define INAND_CMD38_ARG_SECTRIM2 0x88
 #define MMC_BLK_TIMEOUT_MS  (10 * 60 * 1000)        /* 10 minute timeout */
 
+#define mmc_req_rel_wr(req)	(((req->cmd_flags & REQ_FUA) || \
+				  (req->cmd_flags & REQ_META)) && \
+				  (rq_data_dir(req) == WRITE))
+#define PACKED_CMD_VER	0x01
+#define PACKED_CMD_WR	0x02
+
 static DEFINE_MUTEX(block_mutex);
 
 /*
@@ -89,6 +95,7 @@ struct mmc_blk_data {
 	unsigned int	flags;
 #define MMC_BLK_CMD23	(1 << 0)	/* Can do SET_BLOCK_COUNT for multiblock */
 #define MMC_BLK_REL_WR	(1 << 1)	/* MMC Reliable write support */
+#define MMC_BLK_PACKED_CMD	(1 << 2)	/* MMC packed command support */
 
 	unsigned int	usage;
 	unsigned int	read_only;
@@ -113,6 +120,12 @@ struct mmc_blk_data {
 
 static DEFINE_MUTEX(open_lock);
 
+enum {
+	MMC_PACKED_NR_IDX = -1,
+	MMC_PACKED_NR_ZERO,
+	MMC_PACKED_NR_SINGLE,
+};
+
 module_param(perdev_minors, int, 0444);
 MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
 
@@ -120,6 +133,19 @@ static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md);
 static int get_card_status(struct mmc_card *card, u32 *status, int retries);
 
+static inline void mmc_blk_clear_packed(struct mmc_queue_req *mqrq)
+{
+	struct mmc_packed *packed = mqrq->packed;
+
+	BUG_ON(!packed);
+
+	mqrq->cmd_type = MMC_PACKED_NONE;
+	packed->nr_entries = MMC_PACKED_NR_ZERO;
+	packed->idx_failure = MMC_PACKED_NR_IDX;
+	packed->retries = 0;
+	packed->blocks = 0;
+}
+
 static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk)
 {
 	struct mmc_blk_data *md;
@@ -1137,12 +1163,78 @@ static int mmc_blk_err_check(struct mmc_card *card,
 	if (!brq->data.bytes_xfered)
 		return MMC_BLK_RETRY;
 
+	if (mmc_packed_cmd(mq_mrq->cmd_type)) {
+		if (unlikely(brq->data.blocks << 9 != brq->data.bytes_xfered))
+			return MMC_BLK_PARTIAL;
+		else
+			return MMC_BLK_SUCCESS;
+	}
+
 	if (blk_rq_bytes(req) != brq->data.bytes_xfered)
 		return MMC_BLK_PARTIAL;
 
 	return MMC_BLK_SUCCESS;
 }
 
+static int mmc_blk_packed_err_check(struct mmc_card *card,
+				    struct mmc_async_req *areq)
+{
+	struct mmc_queue_req *mq_rq = container_of(areq, struct mmc_queue_req,
+			mmc_active);
+	struct request *req = mq_rq->req;
+	struct mmc_packed *packed = mq_rq->packed;
+	int err, check, status;
+	u8 *ext_csd;
+
+	BUG_ON(!packed);
+
+	packed->retries--;
+	check = mmc_blk_err_check(card, areq);
+	err = get_card_status(card, &status, 0);
+	if (err) {
+		pr_err("%s: error %d sending status command\n",
+		       req->rq_disk->disk_name, err);
+		return MMC_BLK_ABORT;
+	}
+
+	if (status & R1_EXCEPTION_EVENT) {
+		ext_csd = kzalloc(512, GFP_KERNEL);
+		if (!ext_csd) {
+			pr_err("%s: unable to allocate buffer for ext_csd\n",
+			       req->rq_disk->disk_name);
+			return -ENOMEM;
+		}
+
+		err = mmc_send_ext_csd(card, ext_csd);
+		if (err) {
+			pr_err("%s: error %d sending ext_csd\n",
+			       req->rq_disk->disk_name, err);
+			check = MMC_BLK_ABORT;
+			goto free;
+		}
+
+		if ((ext_csd[EXT_CSD_EXP_EVENTS_STATUS] &
+		     EXT_CSD_PACKED_FAILURE) &&
+		    (ext_csd[EXT_CSD_PACKED_CMD_STATUS] &
+		     EXT_CSD_PACKED_GENERIC_ERROR)) {
+			if (ext_csd[EXT_CSD_PACKED_CMD_STATUS] &
+			    EXT_CSD_PACKED_INDEXED_ERROR) {
+				packed->idx_failure =
+				  ext_csd[EXT_CSD_PACKED_FAILURE_INDEX] - 1;
+				check = MMC_BLK_PARTIAL;
+			}
+			pr_err("%s: packed cmd failed, nr %u, sectors %u, "
+			       "failure index: %d\n",
+			       req->rq_disk->disk_name, packed->nr_entries,
+			       packed->blocks, packed->idx_failure);
+		}
+free:
+		kfree(ext_csd);
+	}
+
+	return check;
+}
+
 static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 			       struct mmc_card *card,
 			       int disable_multi,
@@ -1297,10 +1389,221 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 	mmc_queue_bounce_pre(mqrq);
 }
 
+static inline u8 mmc_calc_packed_hdr_segs(struct request_queue *q,
+					  struct mmc_card *card)
+{
+	unsigned int hdr_sz = mmc_large_sector(card) ? 4096 : 512;
+	unsigned int max_seg_sz = queue_max_segment_size(q);
+	unsigned int len, nr_segs = 0;
+
+	do {
+		len = min(hdr_sz, max_seg_sz);
+		hdr_sz -= len;
+		nr_segs++;
+	} while (hdr_sz);
+
+	return nr_segs;
+}
+
+static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req)
+{
+	struct request_queue *q = mq->queue;
+	struct mmc_card *card = mq->card;
+	struct request *cur = req, *next = NULL;
+	struct mmc_blk_data *md = mq->data;
+	struct mmc_queue_req *mqrq = mq->mqrq_cur;
+	bool en_rel_wr = card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN;
+	unsigned int req_sectors = 0, phys_segments = 0;
+	unsigned int max_blk_count, max_phys_segs;
+	bool put_back = true;
+	u8 max_packed_rw = 0;
+	u8 reqs = 0;
+
+	if (!(md->flags & MMC_BLK_PACKED_CMD))
+		goto no_packed;
+
+	if ((rq_data_dir(cur) == WRITE) &&
+	    mmc_host_packed_wr(card->host))
+		max_packed_rw = card->ext_csd.max_packed_writes;
+
+	if (max_packed_rw == 0)
+		goto no_packed;
+
+	if (mmc_req_rel_wr(cur) &&
+	    (md->flags & MMC_BLK_REL_WR) && !en_rel_wr)
+		goto no_packed;
+
+	if (mmc_large_sector(card) &&
+	    !IS_ALIGNED(blk_rq_sectors(cur), 8))
+		goto no_packed;
+
+	mmc_blk_clear_packed(mqrq);
+
+	max_blk_count = min(card->host->max_blk_count,
+			    card->host->max_req_size >> 9);
+	if (unlikely(max_blk_count > 0xffff))
+		max_blk_count = 0xffff;
+
+	max_phys_segs = queue_max_segments(q);
+	req_sectors += blk_rq_sectors(cur);
+	phys_segments += cur->nr_phys_segments;
+
+	if (rq_data_dir(cur) == WRITE) {
+		req_sectors += mmc_large_sector(card) ? 8 : 1;
+		phys_segments += mmc_calc_packed_hdr_segs(q, card);
+	}
+
+	do {
+		if (reqs >= max_packed_rw - 1) {
+			put_back = false;
+			break;
+		}
+
+		spin_lock_irq(q->queue_lock);
+		next = blk_fetch_request(q);
+		spin_unlock_irq(q->queue_lock);
+		if (!next) {
+			put_back = false;
+			break;
+		}
+
+		if (mmc_large_sector(card) &&
+		    !IS_ALIGNED(blk_rq_sectors(next), 8))
+			break;
+
+		if (next->cmd_flags & REQ_DISCARD ||
+		    next->cmd_flags & REQ_FLUSH)
+			break;
+
+		if (rq_data_dir(cur) != rq_data_dir(next))
+			break;
+
+		if (mmc_req_rel_wr(next) &&
+		    (md->flags & MMC_BLK_REL_WR) && !en_rel_wr)
+			break;
+
+		req_sectors += blk_rq_sectors(next);
+		if (req_sectors > max_blk_count)
+			break;
+
+		phys_segments +=  next->nr_phys_segments;
+		if (phys_segments > max_phys_segs)
+			break;
+
+		list_add_tail(&next->queuelist, &mqrq->packed->list);
+		cur = next;
+		reqs++;
+	} while (1);
+
+	if (put_back) {
+		spin_lock_irq(q->queue_lock);
+		blk_requeue_request(q, next);
+		spin_unlock_irq(q->queue_lock);
+	}
+
+	if (reqs > 0) {
+		list_add(&req->queuelist, &mqrq->packed->list);
+		mqrq->packed->nr_entries = ++reqs;
+		mqrq->packed->retries = reqs;
+		return reqs;
+	}
+
+no_packed:
+	mqrq->cmd_type = MMC_PACKED_NONE;
+	return 0;
+}
+
+static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
+					struct mmc_card *card,
+					struct mmc_queue *mq)
+{
+	struct mmc_blk_request *brq = &mqrq->brq;
+	struct request *req = mqrq->req;
+	struct request *prq;
+	struct mmc_blk_data *md = mq->data;
+	struct mmc_packed *packed = mqrq->packed;
+	bool do_rel_wr, do_data_tag;
+	u32 *packed_cmd_hdr;
+	u8 hdr_blocks;
+	u8 i = 1;
+
+	BUG_ON(!packed);
+
+	mqrq->cmd_type = MMC_PACKED_WRITE;
+	packed->blocks = 0;
+	packed->idx_failure = MMC_PACKED_NR_IDX;
+
+	packed_cmd_hdr = packed->cmd_hdr;
+	memset(packed_cmd_hdr, 0, sizeof(packed->cmd_hdr));
+	packed_cmd_hdr[0] = (packed->nr_entries << 16) |
+		(PACKED_CMD_WR << 8) | PACKED_CMD_VER;
+	hdr_blocks = mmc_large_sector(card) ? 8 : 1;
+
+	/*
+	 * Argument for each entry of packed group
+	 */
+	list_for_each_entry(prq, &packed->list, queuelist) {
+		do_rel_wr = mmc_req_rel_wr(prq) && (md->flags & MMC_BLK_REL_WR);
+		do_data_tag = (card->ext_csd.data_tag_unit_size) &&
+			(prq->cmd_flags & REQ_META) &&
+			(rq_data_dir(prq) == WRITE) &&
+			((brq->data.blocks * brq->data.blksz) >=
+			 card->ext_csd.data_tag_unit_size);
+		/* Argument of CMD23 */
+		packed_cmd_hdr[(i * 2)] =
+			(do_rel_wr ? MMC_CMD23_ARG_REL_WR : 0) |
+			(do_data_tag ? MMC_CMD23_ARG_TAG_REQ : 0) |
+			blk_rq_sectors(prq);
+		/* Argument of CMD18 or CMD25 */
+		packed_cmd_hdr[((i * 2)) + 1] =
+			mmc_card_blockaddr(card) ?
+			blk_rq_pos(prq) : blk_rq_pos(prq) << 9;
+		packed->blocks += blk_rq_sectors(prq);
+		i++;
+	}
+
+	memset(brq, 0, sizeof(struct mmc_blk_request));
+	brq->mrq.cmd = &brq->cmd;
+	brq->mrq.data = &brq->data;
+	brq->mrq.sbc = &brq->sbc;
+	brq->mrq.stop = &brq->stop;
+
+	brq->sbc.opcode = MMC_SET_BLOCK_COUNT;
+	brq->sbc.arg = MMC_CMD23_ARG_PACKED | (packed->blocks + hdr_blocks);
+	brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
+
+	brq->cmd.opcode = MMC_WRITE_MULTIPLE_BLOCK;
+	brq->cmd.arg = blk_rq_pos(req);
+	if (!mmc_card_blockaddr(card))
+		brq->cmd.arg <<= 9;
+	brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
+
+	brq->data.blksz = 512;
+	brq->data.blocks = packed->blocks + hdr_blocks;
+	brq->data.flags |= MMC_DATA_WRITE;
+
+	brq->stop.opcode = MMC_STOP_TRANSMISSION;
+	brq->stop.arg = 0;
+	brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+
+	mmc_set_data_timeout(&brq->data, card);
+
+	brq->data.sg = mqrq->sg;
+	brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
+
+	mqrq->mmc_active.mrq = &brq->mrq;
+	mqrq->mmc_active.err_check = mmc_blk_packed_err_check;
+
+	mmc_queue_bounce_pre(mqrq);
+}
+
 static int mmc_blk_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
 			   struct mmc_blk_request *brq, struct request *req,
 			   int ret)
 {
+	struct mmc_queue_req *mq_rq;
+	mq_rq = container_of(brq, struct mmc_queue_req, brq);
+
 	/*
 	 * If this is an SD card and we're writing, we can first
 	 * mark the known good sectors as ok.
@@ -1317,11 +1620,84 @@ static int mmc_blk_cmd_err(struct mmc_blk_data *md, struct mmc_card *card,
 			ret = blk_end_request(req, 0, blocks << 9);
 		}
 	} else {
-		ret = blk_end_request(req, 0, brq->data.bytes_xfered);
+		if (!mmc_packed_cmd(mq_rq->cmd_type))
+			ret = blk_end_request(req, 0, brq->data.bytes_xfered);
 	}
 	return ret;
 }
 
+static int mmc_blk_end_packed_req(struct mmc_queue_req *mq_rq)
+{
+	struct request *prq;
+	struct mmc_packed *packed = mq_rq->packed;
+	int idx = packed->idx_failure, i = 0;
+	int ret = 0;
+
+	BUG_ON(!packed);
+
+	while (!list_empty(&packed->list)) {
+		prq = list_entry_rq(packed->list.next);
+		if (idx == i) {
+			/* retry from error index */
+			packed->nr_entries -= idx;
+			mq_rq->req = prq;
+			ret = 1;
+
+			if (packed->nr_entries == MMC_PACKED_NR_SINGLE) {
+				list_del_init(&prq->queuelist);
+				mmc_blk_clear_packed(mq_rq);
+			}
+			return ret;
+		}
+		list_del_init(&prq->queuelist);
+		blk_end_request(prq, 0, blk_rq_bytes(prq));
+		i++;
+	}
+
+	mmc_blk_clear_packed(mq_rq);
+	return ret;
+}
+
+static void mmc_blk_abort_packed_req(struct mmc_queue_req *mq_rq)
+{
+	struct request *prq;
+	struct mmc_packed *packed = mq_rq->packed;
+
+	BUG_ON(!packed);
+
+	while (!list_empty(&packed->list)) {
+		prq = list_entry_rq(packed->list.next);
+		list_del_init(&prq->queuelist);
+		blk_end_request(prq, -EIO, blk_rq_bytes(prq));
+	}
+
+	mmc_blk_clear_packed(mq_rq);
+}
+
+static void mmc_blk_revert_packed_req(struct mmc_queue *mq,
+				      struct mmc_queue_req *mq_rq)
+{
+	struct request *prq;
+	struct request_queue *q = mq->queue;
+	struct mmc_packed *packed = mq_rq->packed;
+
+	BUG_ON(!packed);
+
+	while (!list_empty(&packed->list)) {
+		prq = list_entry_rq(packed->list.prev);
+		if (prq->queuelist.prev != &packed->list) {
+			list_del_init(&prq->queuelist);
+			spin_lock_irq(q->queue_lock);
+			blk_requeue_request(mq->queue, prq);
+			spin_unlock_irq(q->queue_lock);
+		} else {
+			list_del_init(&prq->queuelist);
+		}
+	}
+
+	mmc_blk_clear_packed(mq_rq);
+}
+
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 {
 	struct mmc_blk_data *md = mq->data;
@@ -1332,10 +1708,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 	struct mmc_queue_req *mq_rq;
 	struct request *req = rqc;
 	struct mmc_async_req *areq;
+	const u8 packed_nr = 2;
+	u8 reqs = 0;
 
 	if (!rqc && !mq->mqrq_prev->req)
 		return 0;
 
+	if (rqc)
+		reqs = mmc_blk_prep_packed_list(mq, rqc);
+
 	do {
 		if (rqc) {
 			/*
@@ -1346,9 +1727,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			    (card->ext_csd.data_sector_size == 4096)) {
 				pr_err("%s: Transfer size is not 4KB sector size aligned\n",
 					req->rq_disk->disk_name);
+				mq_rq = mq->mqrq_cur;
 				goto cmd_abort;
 			}
-			mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
+
+			if (reqs >= packed_nr)
+				mmc_blk_packed_hdr_wrq_prep(mq->mqrq_cur,
+							    card, mq);
+			else
+				mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
 			areq = &mq->mqrq_cur->mmc_active;
 		} else
 			areq = NULL;
@@ -1372,8 +1759,15 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			 * A block was successfully transferred.
 			 */
 			mmc_blk_reset_success(md, type);
-			ret = blk_end_request(req, 0,
+
+			if (mmc_packed_cmd(mq_rq->cmd_type)) {
+				ret = mmc_blk_end_packed_req(mq_rq);
+				break;
+			} else {
+				ret = blk_end_request(req, 0,
 						brq->data.bytes_xfered);
+			}
+
 			/*
 			 * If the blk_end_request function returns non-zero even
 			 * though all data has been transferred and no errors
@@ -1406,7 +1800,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			err = mmc_blk_reset(md, card->host, type);
 			if (!err)
 				break;
-			if (err == -ENODEV)
+			if (err == -ENODEV ||
+				mmc_packed_cmd(mq_rq->cmd_type))
 				goto cmd_abort;
 			/* Fall through */
 		}
@@ -1437,22 +1832,38 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 		}
 
 		if (ret) {
-			/*
-			 * In case of a incomplete request
-			 * prepare it again and resend.
-			 */
-			mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq);
-			mmc_start_req(card->host, &mq_rq->mmc_active, NULL);
+			if (mmc_packed_cmd(mq_rq->cmd_type)) {
+				if (!mq_rq->packed->retries)
+					goto cmd_abort;
+				mmc_blk_packed_hdr_wrq_prep(mq_rq, card, mq);
+				mmc_start_req(card->host,
+					      &mq_rq->mmc_active, NULL);
+			} else {
+
+				/*
+				 * In case of a incomplete request
+				 * prepare it again and resend.
+				 */
+				mmc_blk_rw_rq_prep(mq_rq, card,
+						disable_multi, mq);
+				mmc_start_req(card->host,
+						&mq_rq->mmc_active, NULL);
+			}
 		}
 	} while (ret);
 
 	return 1;
 
  cmd_abort:
-	if (mmc_card_removed(card))
-		req->cmd_flags |= REQ_QUIET;
-	while (ret)
-		ret = blk_end_request(req, -EIO, blk_rq_cur_bytes(req));
+	if (mmc_packed_cmd(mq_rq->cmd_type)) {
+		mmc_blk_abort_packed_req(mq_rq);
+	} else {
+		if (mmc_card_removed(card))
+			req->cmd_flags |= REQ_QUIET;
+		while (ret)
+			ret = blk_end_request(req, -EIO,
+					blk_rq_cur_bytes(req));
+	}
 
  start_new_req:
 	if (rqc) {
@@ -1460,6 +1871,12 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			rqc->cmd_flags |= REQ_QUIET;
 			blk_end_request_all(rqc, -EIO);
 		} else {
+			/*
+			 * If current request is packed, it needs to put back.
+			 */
+			if (mmc_packed_cmd(mq->mqrq_cur->cmd_type))
+				mmc_blk_revert_packed_req(mq, mq->mqrq_cur);
+
 			mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
 			mmc_start_req(card->host,
 				      &mq->mqrq_cur->mmc_active, NULL);
@@ -1634,6 +2051,14 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 		blk_queue_flush(md->queue.queue, REQ_FLUSH | REQ_FUA);
 	}
 
+	if (mmc_card_mmc(card) &&
+	    (area_type == MMC_BLK_DATA_AREA_MAIN) &&
+	    (md->flags & MMC_BLK_CMD23) &&
+	    card->ext_csd.packed_event_en) {
+		if (!mmc_packed_init(&md->queue, card))
+			md->flags |= MMC_BLK_PACKED_CMD;
+	}
+
 	return md;
 
  err_putdisk:
@@ -1742,6 +2167,8 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md)
 
 		/* Then flush out any already in there */
 		mmc_cleanup_queue(&md->queue);
+		if (md->flags & MMC_BLK_PACKED_CMD)
+			mmc_packed_clean(&md->queue);
 		mmc_blk_put(md);
 	}
 }
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 5e0971016ac5..fa4e44ee7961 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -362,6 +362,49 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
 }
 EXPORT_SYMBOL(mmc_cleanup_queue);
 
+int mmc_packed_init(struct mmc_queue *mq, struct mmc_card *card)
+{
+	struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
+	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
+	int ret = 0;
+
+
+	mqrq_cur->packed = kzalloc(sizeof(struct mmc_packed), GFP_KERNEL);
+	if (!mqrq_cur->packed) {
+		pr_warn("%s: unable to allocate packed cmd for mqrq_cur\n",
+			mmc_card_name(card));
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mqrq_prev->packed = kzalloc(sizeof(struct mmc_packed), GFP_KERNEL);
+	if (!mqrq_prev->packed) {
+		pr_warn("%s: unable to allocate packed cmd for mqrq_prev\n",
+			mmc_card_name(card));
+		kfree(mqrq_cur->packed);
+		mqrq_cur->packed = NULL;
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&mqrq_cur->packed->list);
+	INIT_LIST_HEAD(&mqrq_prev->packed->list);
+
+out:
+	return ret;
+}
+
+void mmc_packed_clean(struct mmc_queue *mq)
+{
+	struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
+	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
+
+	kfree(mqrq_cur->packed);
+	mqrq_cur->packed = NULL;
+	kfree(mqrq_prev->packed);
+	mqrq_prev->packed = NULL;
+}
+
 /**
  * mmc_queue_suspend - suspend a MMC request queue
  * @mq: MMC queue to suspend
@@ -406,6 +449,41 @@ void mmc_queue_resume(struct mmc_queue *mq)
 	}
 }
 
+static unsigned int mmc_queue_packed_map_sg(struct mmc_queue *mq,
+					    struct mmc_packed *packed,
+					    struct scatterlist *sg,
+					    enum mmc_packed_type cmd_type)
+{
+	struct scatterlist *__sg = sg;
+	unsigned int sg_len = 0;
+	struct request *req;
+
+	if (mmc_packed_wr(cmd_type)) {
+		unsigned int hdr_sz = mmc_large_sector(mq->card) ? 4096 : 512;
+		unsigned int max_seg_sz = queue_max_segment_size(mq->queue);
+		unsigned int len, remain, offset = 0;
+		u8 *buf = (u8 *)packed->cmd_hdr;
+
+		remain = hdr_sz;
+		do {
+			len = min(remain, max_seg_sz);
+			sg_set_buf(__sg, buf + offset, len);
+			offset += len;
+			remain -= len;
+			(__sg++)->page_link &= ~0x02;
+			sg_len++;
+		} while (remain);
+	}
+
+	list_for_each_entry(req, &packed->list, queuelist) {
+		sg_len += blk_rq_map_sg(mq->queue, req, __sg);
+		__sg = sg + (sg_len - 1);
+		(__sg++)->page_link &= ~0x02;
+	}
+	sg_mark_end(sg + (sg_len - 1));
+	return sg_len;
+}
+
 /*
  * Prepare the sg list(s) to be handed of to the host driver
  */
@@ -414,14 +492,26 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq)
 	unsigned int sg_len;
 	size_t buflen;
 	struct scatterlist *sg;
+	enum mmc_packed_type cmd_type;
 	int i;
 
-	if (!mqrq->bounce_buf)
-		return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg);
+	cmd_type = mqrq->cmd_type;
+
+	if (!mqrq->bounce_buf) {
+		if (mmc_packed_cmd(cmd_type))
+			return mmc_queue_packed_map_sg(mq, mqrq->packed,
+						       mqrq->sg, cmd_type);
+		else
+			return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg);
+	}
 
 	BUG_ON(!mqrq->bounce_sg);
 
-	sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg);
+	if (mmc_packed_cmd(cmd_type))
+		sg_len = mmc_queue_packed_map_sg(mq, mqrq->packed,
+						 mqrq->bounce_sg, cmd_type);
+	else
+		sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg);
 
 	mqrq->bounce_sg_len = sg_len;
 
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index e20c27b2b8b4..031bf6376c99 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -12,6 +12,23 @@ struct mmc_blk_request {
 	struct mmc_data		data;
 };
 
+enum mmc_packed_type {
+	MMC_PACKED_NONE = 0,
+	MMC_PACKED_WRITE,
+};
+
+#define mmc_packed_cmd(type)	((type) != MMC_PACKED_NONE)
+#define mmc_packed_wr(type)	((type) == MMC_PACKED_WRITE)
+
+struct mmc_packed {
+	struct list_head	list;
+	u32			cmd_hdr[1024];
+	unsigned int		blocks;
+	u8			nr_entries;
+	u8			retries;
+	s16			idx_failure;
+};
+
 struct mmc_queue_req {
 	struct request		*req;
 	struct mmc_blk_request	brq;
@@ -20,6 +37,8 @@ struct mmc_queue_req {
 	struct scatterlist	*bounce_sg;
 	unsigned int		bounce_sg_len;
 	struct mmc_async_req	mmc_active;
+	enum mmc_packed_type	cmd_type;
+	struct mmc_packed	*packed;
 };
 
 struct mmc_queue {
@@ -49,4 +68,7 @@ extern unsigned int mmc_queue_map_sg(struct mmc_queue *,
 extern void mmc_queue_bounce_pre(struct mmc_queue_req *);
 extern void mmc_queue_bounce_post(struct mmc_queue_req *);
 
+extern int mmc_packed_init(struct mmc_queue *, struct mmc_card *);
+extern void mmc_packed_clean(struct mmc_queue *);
+
 #endif
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 6d8f7012d73a..49f04bc9d0eb 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -363,6 +363,7 @@ int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd)
 	return mmc_send_cxd_data(card, card->host, MMC_SEND_EXT_CSD,
 			ext_csd, 512);
 }
+EXPORT_SYMBOL_GPL(mmc_send_ext_csd);
 
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp)
 {
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 237f253f2fe0..61b2c30c903b 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -310,6 +310,11 @@ static inline void mmc_part_add(struct mmc_card *card, unsigned int size,
 	card->nr_parts++;
 }
 
+static inline bool mmc_large_sector(struct mmc_card *card)
+{
+	return card->ext_csd.data_sector_size == 4096;
+}
+
 /*
  *  The world is not perfect and supplies us with broken mmc/sdio devices.
  *  For at least some of these bugs we need a work-around.
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 495d1336149c..39613b9a6fc5 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -18,6 +18,9 @@ struct mmc_request;
 struct mmc_command {
 	u32			opcode;
 	u32			arg;
+#define MMC_CMD23_ARG_REL_WR	(1 << 31)
+#define MMC_CMD23_ARG_PACKED	((0 << 31) | (1 << 30))
+#define MMC_CMD23_ARG_TAG_REQ	(1 << 29)
 	u32			resp[4];
 	unsigned int		flags;		/* expected response type */
 #define MMC_RSP_PRESENT	(1 << 0)
@@ -148,6 +151,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
 extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
+extern int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd);
 
 #define MMC_ERASE_ARG		0x00000000
 #define MMC_SECURE_ERASE_ARG	0x80000000
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a0466c03f5e1..fd5fd5a6026f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -465,6 +465,11 @@ static inline int mmc_host_uhs(struct mmc_host *host)
 		 MMC_CAP_UHS_DDR50);
 }
 
+static inline int mmc_host_packed_wr(struct mmc_host *host)
+{
+	return host->caps2 & MMC_CAP2_PACKED_WR;
+}
+
 #ifdef CONFIG_MMC_CLKGATE
 void mmc_host_clk_hold(struct mmc_host *host);
 void mmc_host_clk_release(struct mmc_host *host);
-- 
cgit v1.2.3


From 6c56e7a0fff166904ce2715f7ab1746460c1f11b Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Sat, 16 Feb 2013 16:21:16 +0100
Subject: mmc: provide a standard MMC device-tree binding parser centrally

MMC defines a number of standard DT bindings. Having each driver parse
them individually adds code redundancy and is error prone. Provide a
standard function to unify the parsing. After all drivers are converted
to using it instead of their own parsers, this function can be integrated
into mmc_alloc_host().

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/host.c  | 110 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/host.h |   1 +
 2 files changed, 111 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index ee2e16b17017..9c53452e73e1 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -15,6 +15,8 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/idr.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
 #include <linux/pagemap.h>
 #include <linux/export.h>
 #include <linux/leds.h>
@@ -23,6 +25,7 @@
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
+#include <linux/mmc/slot-gpio.h>
 
 #include "core.h"
 #include "host.h"
@@ -294,6 +297,113 @@ static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
 
 #endif
 
+/**
+ *	mmc_of_parse() - parse host's device-tree node
+ *	@host: host whose node should be parsed.
+ *
+ * To keep the rest of the MMC subsystem unaware of whether DT has been
+ * used to to instantiate and configure this host instance or not, we
+ * parse the properties and set respective generic mmc-host flags and
+ * parameters.
+ */
+void mmc_of_parse(struct mmc_host *host)
+{
+	struct device_node *np;
+	u32 bus_width;
+	bool explicit_inv_wp, gpio_inv_wp = false;
+	enum of_gpio_flags flags;
+	int len, ret, gpio;
+
+	if (!host->parent || !host->parent->of_node)
+		return;
+
+	np = host->parent->of_node;
+
+	/* "bus-width" is translated to MMC_CAP_*_BIT_DATA flags */
+	if (of_property_read_u32(np, "bus-width", &bus_width) < 0) {
+		dev_dbg(host->parent,
+			"\"bus-width\" property is missing, assuming 1 bit.\n");
+		bus_width = 1;
+	}
+
+	switch (bus_width) {
+	case 8:
+		host->caps |= MMC_CAP_8_BIT_DATA;
+		/* Hosts capable of 8-bit transfers can also do 4 bits */
+	case 4:
+		host->caps |= MMC_CAP_4_BIT_DATA;
+		break;
+	case 1:
+		break;
+	default:
+		dev_err(host->parent,
+			"Invalid \"bus-width\" value %ud!\n", bus_width);
+	}
+
+	/* f_max is obtained from the optional "max-frequency" property */
+	of_property_read_u32(np, "max-frequency", &host->f_max);
+
+	/*
+	 * Configure CD and WP pins. They are both by default active low to
+	 * match the SDHCI spec. If GPIOs are provided for CD and / or WP, the
+	 * mmc-gpio helpers are used to attach, configure and use them. If
+	 * polarity inversion is specified in DT, one of MMC_CAP2_CD_ACTIVE_HIGH
+	 * and MMC_CAP2_RO_ACTIVE_HIGH capability-2 flags is set. If the
+	 * "broken-cd" property is provided, the MMC_CAP_NEEDS_POLL capability
+	 * is set. If the "non-removable" property is found, the
+	 * MMC_CAP_NONREMOVABLE capability is set and no card-detection
+	 * configuration is performed.
+	 */
+
+	/* Parse Card Detection */
+	if (of_find_property(np, "non-removable", &len)) {
+		host->caps |= MMC_CAP_NONREMOVABLE;
+	} else {
+		bool explicit_inv_cd, gpio_inv_cd = false;
+
+		explicit_inv_cd = of_property_read_bool(np, "cd-inverted");
+
+		if (of_find_property(np, "broken-cd", &len))
+			host->caps |= MMC_CAP_NEEDS_POLL;
+
+		gpio = of_get_named_gpio_flags(np, "cd-gpios", 0, &flags);
+		if (gpio_is_valid(gpio)) {
+			if (!(flags & OF_GPIO_ACTIVE_LOW))
+				gpio_inv_cd = true;
+
+			ret = mmc_gpio_request_cd(host, gpio);
+			if (ret < 0)
+				dev_err(host->parent,
+					"Failed to request CD GPIO #%d: %d!\n",
+					gpio, ret);
+			else
+				dev_info(host->parent, "Got CD GPIO #%d.\n",
+					 gpio);
+		}
+
+		if (explicit_inv_cd ^ gpio_inv_cd)
+			host->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
+	}
+
+	/* Parse Write Protection */
+	explicit_inv_wp = of_property_read_bool(np, "wp-inverted");
+
+	gpio = of_get_named_gpio_flags(np, "wp-gpios", 0, &flags);
+	if (gpio_is_valid(gpio)) {
+		if (!(flags & OF_GPIO_ACTIVE_LOW))
+			gpio_inv_wp = true;
+
+		ret = mmc_gpio_request_ro(host, gpio);
+		if (ret < 0)
+			dev_err(host->parent,
+				"Failed to request WP GPIO: %d!\n", ret);
+	}
+	if (explicit_inv_wp ^ gpio_inv_wp)
+		host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
+}
+
+EXPORT_SYMBOL(mmc_of_parse);
+
 /**
  *	mmc_alloc_host - initialise the per-host structure.
  *	@extra: sizeof private data structure
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index fd5fd5a6026f..bf93c9a6d729 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -368,6 +368,7 @@ extern struct mmc_host *mmc_alloc_host(int extra, struct device *);
 extern int mmc_add_host(struct mmc_host *);
 extern void mmc_remove_host(struct mmc_host *);
 extern void mmc_free_host(struct mmc_host *);
+void mmc_of_parse(struct mmc_host *host);
 
 static inline void *mmc_priv(struct mmc_host *host)
 {
-- 
cgit v1.2.3


From 8c9beb117bf31cdb757bc80992281004be8a177b Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 15 Feb 2013 16:13:53 +0100
Subject: mmc: (cosmetic) remove "extern" from function declarations

The "extern" keyword isn't required in function declarations, remove it.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/host.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bf93c9a6d729..d6f20cc6415e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -364,10 +364,10 @@ struct mmc_host {
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
-extern struct mmc_host *mmc_alloc_host(int extra, struct device *);
-extern int mmc_add_host(struct mmc_host *);
-extern void mmc_remove_host(struct mmc_host *);
-extern void mmc_free_host(struct mmc_host *);
+struct mmc_host *mmc_alloc_host(int extra, struct device *);
+int mmc_add_host(struct mmc_host *);
+void mmc_remove_host(struct mmc_host *);
+void mmc_free_host(struct mmc_host *);
 void mmc_of_parse(struct mmc_host *host);
 
 static inline void *mmc_priv(struct mmc_host *host)
@@ -381,16 +381,16 @@ static inline void *mmc_priv(struct mmc_host *host)
 #define mmc_classdev(x)	(&(x)->class_dev)
 #define mmc_hostname(x)	(dev_name(&(x)->class_dev))
 
-extern int mmc_suspend_host(struct mmc_host *);
-extern int mmc_resume_host(struct mmc_host *);
+int mmc_suspend_host(struct mmc_host *);
+int mmc_resume_host(struct mmc_host *);
 
-extern int mmc_power_save_host(struct mmc_host *host);
-extern int mmc_power_restore_host(struct mmc_host *host);
+int mmc_power_save_host(struct mmc_host *host);
+int mmc_power_restore_host(struct mmc_host *host);
 
-extern void mmc_detect_change(struct mmc_host *, unsigned long delay);
-extern void mmc_request_done(struct mmc_host *, struct mmc_request *);
+void mmc_detect_change(struct mmc_host *, unsigned long delay);
+void mmc_request_done(struct mmc_host *, struct mmc_request *);
 
-extern int mmc_cache_ctrl(struct mmc_host *, u8);
+int mmc_cache_ctrl(struct mmc_host *, u8);
 
 static inline void mmc_signal_sdio_irq(struct mmc_host *host)
 {
-- 
cgit v1.2.3


From 76a411f9f9423cbc9f62e70173459c5af54323f4 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 15 Feb 2013 16:13:57 +0100
Subject: mmc: sh_mobile_sdhi: remove unused .pdata field

The struct sh_mobile_sdhi_info::pdata field was only used for platform-
based card detection and isn't used anymore since the migration to GPIO-
based MMC slot functions. Remove it.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c  | 4 ----
 include/linux/mmc/sh_mobile_sdhi.h | 2 --
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 06e1bc98c536..175ab970952a 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -156,7 +156,6 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 	mmc_data = &priv->mmc_data;
 
 	if (p) {
-		p->pdata = mmc_data;
 		if (p->init) {
 			ret = p->init(pdev, &sdhi_ops);
 			if (ret)
@@ -310,9 +309,6 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev)
 	struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
 	int i = 0, irq;
 
-	if (p)
-		p->pdata = NULL;
-
 	tmio_mmc_host_remove(host);
 
 	while (1) {
diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h
index b65679ffa880..b76bcf0621f6 100644
--- a/include/linux/mmc/sh_mobile_sdhi.h
+++ b/include/linux/mmc/sh_mobile_sdhi.h
@@ -4,7 +4,6 @@
 #include <linux/types.h>
 
 struct platform_device;
-struct tmio_mmc_data;
 
 #define SH_MOBILE_SDHI_IRQ_CARD_DETECT	"card_detect"
 #define SH_MOBILE_SDHI_IRQ_SDCARD	"sdcard"
@@ -26,7 +25,6 @@ struct sh_mobile_sdhi_info {
 	unsigned long tmio_caps2;
 	u32 tmio_ocr_mask;	/* available MMC voltages */
 	unsigned int cd_gpio;
-	struct tmio_mmc_data *pdata;
 	void (*set_pwr)(struct platform_device *pdev, int state);
 	int (*get_cd)(struct platform_device *pdev);
 
-- 
cgit v1.2.3


From 27902c14aa2376d53755b6c02e3be671fd890e30 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Fri, 15 Feb 2013 16:13:59 +0100
Subject: mmc: tmio: remove unused and deprecated symbols

The tmio_mmc_cd_wakeup() inline function has been deprecated since 3.4 and
is unused since 3.4 too. Remove them.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mfd/tmio.h | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index d83af39815ab..99bf3e665997 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -64,12 +64,6 @@
  * Some controllers can support SDIO IRQ signalling.
  */
 #define TMIO_MMC_SDIO_IRQ		(1 << 2)
-/*
- * Some platforms can detect card insertion events with controller powered
- * down, using a GPIO IRQ, in which case they have to fill in cd_irq, cd_gpio,
- * and cd_flags fields of struct tmio_mmc_data.
- */
-#define TMIO_MMC_HAS_COLD_CD		(1 << 3)
 /*
  * Some controllers require waiting for the SD bus to become
  * idle before writing to some registers.
@@ -116,18 +110,6 @@ struct tmio_mmc_data {
 	void (*clk_disable)(struct platform_device *pdev);
 };
 
-/*
- * This function is deprecated and will be removed soon. Please, convert your
- * platform to use drivers/mmc/core/cd-gpio.c
- */
-#include <linux/mmc/host.h>
-static inline void tmio_mmc_cd_wakeup(struct tmio_mmc_data *pdata)
-{
-	if (pdata)
-		mmc_detect_change(dev_get_drvdata(pdata->dev),
-				  msecs_to_jiffies(100));
-}
-
 /*
  * data for the NAND controller
  */
-- 
cgit v1.2.3


From 0e786102949d7461859c6ce9f39c2c8d28e42db3 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 15 Feb 2013 15:07:19 -0700
Subject: mmc: tegra: assume CONFIG_OF, remove platform data

Tegra only supports, and always enables, device tree. Remove all ifdefs
and runtime checks for DT support from the driver. Platform data is
therefore no longer required. Rework the driver to parse the device tree
directly into struct sdhci_tegra.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-tegra.c                | 119 ++++++++++----------------
 include/linux/platform_data/mmc-sdhci-tegra.h |  28 ------
 2 files changed, 45 insertions(+), 102 deletions(-)
 delete mode 100644 include/linux/platform_data/mmc-sdhci-tegra.h

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 5a600a53b876..08b06e9a3a21 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -27,8 +27,6 @@
 
 #include <asm/gpio.h>
 
-#include <linux/platform_data/mmc-sdhci-tegra.h>
-
 #include "sdhci-pltfm.h"
 
 /* Tegra SDHOST controller vendor register definitions */
@@ -45,8 +43,11 @@ struct sdhci_tegra_soc_data {
 };
 
 struct sdhci_tegra {
-	const struct tegra_sdhci_platform_data *plat;
 	const struct sdhci_tegra_soc_data *soc_data;
+	int cd_gpio;
+	int wp_gpio;
+	int power_gpio;
+	int is_8bit;
 };
 
 static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg)
@@ -108,12 +109,11 @@ static unsigned int tegra_sdhci_get_ro(struct sdhci_host *host)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_tegra *tegra_host = pltfm_host->priv;
-	const struct tegra_sdhci_platform_data *plat = tegra_host->plat;
 
-	if (!gpio_is_valid(plat->wp_gpio))
+	if (!gpio_is_valid(tegra_host->wp_gpio))
 		return -1;
 
-	return gpio_get_value(plat->wp_gpio);
+	return gpio_get_value(tegra_host->wp_gpio);
 }
 
 static irqreturn_t carddetect_irq(int irq, void *data)
@@ -147,11 +147,10 @@ static int tegra_sdhci_buswidth(struct sdhci_host *host, int bus_width)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_tegra *tegra_host = pltfm_host->priv;
-	const struct tegra_sdhci_platform_data *plat = tegra_host->plat;
 	u32 ctrl;
 
 	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
-	if (plat->is_8bit && bus_width == MMC_BUS_WIDTH_8) {
+	if (tegra_host->is_8bit && bus_width == MMC_BUS_WIDTH_8) {
 		ctrl &= ~SDHCI_CTRL_4BITBUS;
 		ctrl |= SDHCI_CTRL_8BITBUS;
 	} else {
@@ -217,31 +216,19 @@ static const struct of_device_id sdhci_tegra_dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, sdhci_dt_ids);
 
-static struct tegra_sdhci_platform_data *sdhci_tegra_dt_parse_pdata(
-						struct platform_device *pdev)
+static void sdhci_tegra_parse_dt(struct device *dev,
+					struct sdhci_tegra *tegra_host)
 {
-	struct tegra_sdhci_platform_data *plat;
-	struct device_node *np = pdev->dev.of_node;
+	struct device_node *np = dev->of_node;
 	u32 bus_width;
 
-	if (!np)
-		return NULL;
-
-	plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
-	if (!plat) {
-		dev_err(&pdev->dev, "Can't allocate platform data\n");
-		return NULL;
-	}
-
-	plat->cd_gpio = of_get_named_gpio(np, "cd-gpios", 0);
-	plat->wp_gpio = of_get_named_gpio(np, "wp-gpios", 0);
-	plat->power_gpio = of_get_named_gpio(np, "power-gpios", 0);
+	tegra_host->cd_gpio = of_get_named_gpio(np, "cd-gpios", 0);
+	tegra_host->wp_gpio = of_get_named_gpio(np, "wp-gpios", 0);
+	tegra_host->power_gpio = of_get_named_gpio(np, "power-gpios", 0);
 
 	if (of_property_read_u32(np, "bus-width", &bus_width) == 0 &&
 	    bus_width == 8)
-		plat->is_8bit = 1;
-
-	return plat;
+		tegra_host->is_8bit = 1;
 }
 
 static int sdhci_tegra_probe(struct platform_device *pdev)
@@ -250,7 +237,6 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
 	const struct sdhci_tegra_soc_data *soc_data;
 	struct sdhci_host *host;
 	struct sdhci_pltfm_host *pltfm_host;
-	struct tegra_sdhci_platform_data *plat;
 	struct sdhci_tegra *tegra_host;
 	struct clk *clk;
 	int rc;
@@ -263,52 +249,40 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
 	host = sdhci_pltfm_init(pdev, soc_data->pdata);
 	if (IS_ERR(host))
 		return PTR_ERR(host);
-
 	pltfm_host = sdhci_priv(host);
 
-	plat = pdev->dev.platform_data;
-
-	if (plat == NULL)
-		plat = sdhci_tegra_dt_parse_pdata(pdev);
-
-	if (plat == NULL) {
-		dev_err(mmc_dev(host->mmc), "missing platform data\n");
-		rc = -ENXIO;
-		goto err_no_plat;
-	}
-
 	tegra_host = devm_kzalloc(&pdev->dev, sizeof(*tegra_host), GFP_KERNEL);
 	if (!tegra_host) {
 		dev_err(mmc_dev(host->mmc), "failed to allocate tegra_host\n");
 		rc = -ENOMEM;
-		goto err_no_plat;
+		goto err_alloc_tegra_host;
 	}
-
-	tegra_host->plat = plat;
 	tegra_host->soc_data = soc_data;
-
 	pltfm_host->priv = tegra_host;
 
-	if (gpio_is_valid(plat->power_gpio)) {
-		rc = gpio_request(plat->power_gpio, "sdhci_power");
+	sdhci_tegra_parse_dt(&pdev->dev, tegra_host);
+
+	if (gpio_is_valid(tegra_host->power_gpio)) {
+		rc = gpio_request(tegra_host->power_gpio, "sdhci_power");
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate power gpio\n");
 			goto err_power_req;
 		}
-		gpio_direction_output(plat->power_gpio, 1);
+		gpio_direction_output(tegra_host->power_gpio, 1);
 	}
 
-	if (gpio_is_valid(plat->cd_gpio)) {
-		rc = gpio_request(plat->cd_gpio, "sdhci_cd");
+	if (gpio_is_valid(tegra_host->cd_gpio)) {
+		rc = gpio_request(tegra_host->cd_gpio, "sdhci_cd");
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate cd gpio\n");
 			goto err_cd_req;
 		}
-		gpio_direction_input(plat->cd_gpio);
+		gpio_direction_input(tegra_host->cd_gpio);
 
-		rc = request_irq(gpio_to_irq(plat->cd_gpio), carddetect_irq,
+		rc = request_irq(gpio_to_irq(tegra_host->cd_gpio),
+				 carddetect_irq,
 				 IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
 				 mmc_hostname(host->mmc), host);
 
@@ -319,14 +293,14 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
 
 	}
 
-	if (gpio_is_valid(plat->wp_gpio)) {
-		rc = gpio_request(plat->wp_gpio, "sdhci_wp");
+	if (gpio_is_valid(tegra_host->wp_gpio)) {
+		rc = gpio_request(tegra_host->wp_gpio, "sdhci_wp");
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate wp gpio\n");
 			goto err_wp_req;
 		}
-		gpio_direction_input(plat->wp_gpio);
+		gpio_direction_input(tegra_host->wp_gpio);
 	}
 
 	clk = clk_get(mmc_dev(host->mmc), NULL);
@@ -338,9 +312,7 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
 	clk_prepare_enable(clk);
 	pltfm_host->clk = clk;
 
-	host->mmc->pm_caps = plat->pm_flags;
-
-	if (plat->is_8bit)
+	if (tegra_host->is_8bit)
 		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
 
 	rc = sdhci_add_host(host);
@@ -353,19 +325,19 @@ err_add_host:
 	clk_disable_unprepare(pltfm_host->clk);
 	clk_put(pltfm_host->clk);
 err_clk_get:
-	if (gpio_is_valid(plat->wp_gpio))
-		gpio_free(plat->wp_gpio);
+	if (gpio_is_valid(tegra_host->wp_gpio))
+		gpio_free(tegra_host->wp_gpio);
 err_wp_req:
-	if (gpio_is_valid(plat->cd_gpio))
-		free_irq(gpio_to_irq(plat->cd_gpio), host);
+	if (gpio_is_valid(tegra_host->cd_gpio))
+		free_irq(gpio_to_irq(tegra_host->cd_gpio), host);
 err_cd_irq_req:
-	if (gpio_is_valid(plat->cd_gpio))
-		gpio_free(plat->cd_gpio);
+	if (gpio_is_valid(tegra_host->cd_gpio))
+		gpio_free(tegra_host->cd_gpio);
 err_cd_req:
-	if (gpio_is_valid(plat->power_gpio))
-		gpio_free(plat->power_gpio);
+	if (gpio_is_valid(tegra_host->power_gpio))
+		gpio_free(tegra_host->power_gpio);
 err_power_req:
-err_no_plat:
+err_alloc_tegra_host:
 	sdhci_pltfm_free(pdev);
 	return rc;
 }
@@ -375,21 +347,20 @@ static int sdhci_tegra_remove(struct platform_device *pdev)
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_tegra *tegra_host = pltfm_host->priv;
-	const struct tegra_sdhci_platform_data *plat = tegra_host->plat;
 	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
 
 	sdhci_remove_host(host, dead);
 
-	if (gpio_is_valid(plat->wp_gpio))
-		gpio_free(plat->wp_gpio);
+	if (gpio_is_valid(tegra_host->wp_gpio))
+		gpio_free(tegra_host->wp_gpio);
 
-	if (gpio_is_valid(plat->cd_gpio)) {
-		free_irq(gpio_to_irq(plat->cd_gpio), host);
-		gpio_free(plat->cd_gpio);
+	if (gpio_is_valid(tegra_host->cd_gpio)) {
+		free_irq(gpio_to_irq(tegra_host->cd_gpio), host);
+		gpio_free(tegra_host->cd_gpio);
 	}
 
-	if (gpio_is_valid(plat->power_gpio))
-		gpio_free(plat->power_gpio);
+	if (gpio_is_valid(tegra_host->power_gpio))
+		gpio_free(tegra_host->power_gpio);
 
 	clk_disable_unprepare(pltfm_host->clk);
 	clk_put(pltfm_host->clk);
diff --git a/include/linux/platform_data/mmc-sdhci-tegra.h b/include/linux/platform_data/mmc-sdhci-tegra.h
deleted file mode 100644
index 8f8430697686..000000000000
--- a/include/linux/platform_data/mmc-sdhci-tegra.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2009 Palm, Inc.
- * Author: Yvonne Yip <y@palm.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-#ifndef __PLATFORM_DATA_TEGRA_SDHCI_H
-#define __PLATFORM_DATA_TEGRA_SDHCI_H
-
-#include <linux/mmc/host.h>
-
-struct tegra_sdhci_platform_data {
-	int cd_gpio;
-	int wp_gpio;
-	int power_gpio;
-	int is_8bit;
-	int pm_flags;
-};
-
-#endif
-- 
cgit v1.2.3


From 3f6d078d4accfff8b114f968259a060bfdc7c682 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 24 Feb 2013 13:49:08 -0500
Subject: fix compat truncate/ftruncate

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/include/asm/unistd32.h  |  4 ++--
 arch/mips/kernel/scall64-o32.S     |  4 ++--
 arch/parisc/kernel/syscall_table.S |  4 ++--
 arch/powerpc/kernel/sys_ppc32.c    | 12 ------------
 arch/s390/kernel/compat_wrapper.S  | 10 ----------
 arch/s390/kernel/syscalls.S        |  4 ++--
 arch/sparc/kernel/sys32.S          |  1 -
 arch/sparc/kernel/systbls_64.S     |  4 ++--
 arch/x86/syscalls/syscall_32.tbl   |  4 ++--
 fs/open.c                          | 15 +++++++++++++++
 include/linux/compat.h             |  2 ++
 11 files changed, 29 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 8153f1a43f0a..12f22492df4c 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -113,8 +113,8 @@ __SYSCALL(88,  sys_reboot)
 __SYSCALL(89,  sys_ni_syscall)			/* 89 was sys_readdir */
 __SYSCALL(90,  sys_ni_syscall)			/* 90 was sys_mmap */
 __SYSCALL(91,  sys_munmap)
-__SYSCALL(92,  sys_truncate)
-__SYSCALL(93,  sys_ftruncate)
+__SYSCALL(92,  compat_sys_truncate)
+__SYSCALL(93,  compat_sys_ftruncate)
 __SYSCALL(94,  sys_fchmod)
 __SYSCALL(95,  sys_fchown16)
 __SYSCALL(96,  sys_getpriority)
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 063cd0d6ddd2..20b100f9d360 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -284,8 +284,8 @@ sys_call_table:
 	PTR	compat_sys_old_readdir
 	PTR	sys_mips_mmap			/* 4090 */
 	PTR	sys_munmap
-	PTR	sys_truncate
-	PTR	sys_ftruncate
+	PTR	compat_sys_truncate
+	PTR	compat_sys_ftruncate
 	PTR	sys_fchmod
 	PTR	sys_fchown			/* 4095 */
 	PTR	sys_getpriority
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index d0efc0aeb612..884b91b028f0 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -165,8 +165,8 @@
 	ENTRY_SAME(mmap2)
 	ENTRY_SAME(mmap)		/* 90 */
 	ENTRY_SAME(munmap)
-	ENTRY_SAME(truncate)
-	ENTRY_SAME(ftruncate)
+	ENTRY_COMP(truncate)
+	ENTRY_COMP(ftruncate)
 	ENTRY_SAME(fchmod)
 	ENTRY_SAME(fchown)		/* 95 */
 	ENTRY_SAME(getpriority)
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 5677a36f450b..d0bafc0cdf06 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -146,18 +146,6 @@ asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd,
 			    (off_t __user *)offset, count);
 }
 
-long compat_sys_truncate(const char __user * path, u32 length)
-{
-	/* sign extend length */
-	return sys_truncate(path, (int)length);
-}
-
-long compat_sys_ftruncate(int fd, u32 length)
-{
-	/* sign extend length */
-	return sys_ftruncate(fd, (int)length);
-}
-
 unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff)
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 2b1a3a03244f..3c98c4dc5aca 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -325,16 +325,6 @@ ENTRY(sys32_munmap_wrapper)
 	llgfr	%r3,%r3			# size_t
 	jg	sys_munmap		# branch to system call
 
-ENTRY(sys32_truncate_wrapper)
-	llgtr	%r2,%r2			# const char *
-	lgfr	%r3,%r3			# long
-	jg	sys_truncate		# branch to system call
-
-ENTRY(sys32_ftruncate_wrapper)
-	llgfr	%r2,%r2			# unsigned int
-	llgfr	%r3,%r3			# unsigned long
-	jg	sys_ftruncate		# branch to system call
-
 ENTRY(sys32_fchmod_wrapper)
 	llgfr	%r2,%r2			# unsigned int
 	llgfr	%r3,%r3			# mode_t
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 0e5262f01007..630b935d1284 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -100,8 +100,8 @@ SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper)
 SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper)	/* old readdir syscall */
 SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper)		/* 90 */
 SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper)
-SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper)
-SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper)
+SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate)
+SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate)
 SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper)
 SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper)	/* 95 old fchown16 syscall*/
 SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper)
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index be3d65a3c270..240a3cecc11e 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -52,7 +52,6 @@ SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0)
 SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0)
 SIGN2(sys32_sync_file_range, compat_sync_file_range, %o0, %o5)
 SIGN1(sys32_vmsplice, compat_sys_vmsplice, %o0)
-SIGN1(sys32_truncate, sys_truncate, %o1)
 
 	.globl		sys32_mmap2
 sys32_mmap2:
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 9ed517c5037b..088134834dab 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -43,8 +43,8 @@ sys_call_table32:
 /*110*/	.word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall
 	.word sys_getgroups, compat_sys_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd
 /*120*/	.word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod
-	.word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys32_truncate
-/*130*/	.word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall
+	.word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
+/*130*/	.word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall
 	.word sys_nis_syscall, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
 /*140*/	.word sys_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit
 	.word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index f51810be1a32..e6d55f0064df 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -98,8 +98,8 @@
 89	i386	readdir			sys_old_readdir			compat_sys_old_readdir
 90	i386	mmap			sys_old_mmap			sys32_mmap
 91	i386	munmap			sys_munmap
-92	i386	truncate		sys_truncate
-93	i386	ftruncate		sys_ftruncate
+92	i386	truncate		sys_truncate			compat_sys_truncate
+93	i386	ftruncate		sys_ftruncate			compat_sys_ftruncate
 94	i386	fchmod			sys_fchmod
 95	i386	fchown			sys_fchown16
 96	i386	getpriority		sys_getpriority
diff --git a/fs/open.c b/fs/open.c
index 9b33c0cbfacf..669ba0dd6667 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,6 +30,7 @@
 #include <linux/fs_struct.h>
 #include <linux/ima.h>
 #include <linux/dnotify.h>
+#include <linux/compat.h>
 
 #include "internal.h"
 
@@ -140,6 +141,13 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 	return do_sys_truncate(path, length);
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
+{
+	return do_sys_truncate(path, length);
+}
+#endif
+
 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 {
 	struct inode *inode;
@@ -195,6 +203,13 @@ SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
+{
+	return do_sys_ftruncate(fd, length, 1);
+}
+#endif
+
 /* LFS versions of truncate are only needed on 32 bit machines */
 #if BITS_PER_LONG == 32
 SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length)
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 59c72048bf20..76a87fb57ac2 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -536,6 +536,8 @@ asmlinkage long compat_sys_openat(int dfd, const char __user *filename,
 asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 					     struct file_handle __user *handle,
 					     int flags);
+asmlinkage long compat_sys_truncate(const char __user *, compat_off_t);
+asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t);
 asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
 				    compat_ulong_t __user *outp,
 				    compat_ulong_t __user *exp,
-- 
cgit v1.2.3


From 446d64e3e1154806092ac27de198dff1225797d9 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Sun, 24 Feb 2013 23:42:37 -0500
Subject: block: fix part_pack_uuid() build error

Commit "85865c1 ima: add policy support for file system uuid"
introduced a CONFIG_BLOCK dependency.  This patch defines a
wrapper called blk_part_pack_uuid(), which returns -EINVAL,
when CONFIG_BLOCK is not defined.

security/integrity/ima/ima_policy.c:538:4: error: implicit declaration
of function 'part_pack_uuid' [-Werror=implicit-function-declaration]

Changelog v2:
- Reference commit number in patch description
Changelog v1:
- rename ima_part_pack_uuid() to blk_part_pack_uuid()
- resolve scripts/checkpatch.pl warnings
Changelog v0:
- fix UUID scripts/Lindent msgs

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/genhd.h               | 10 ++++++++++
 security/integrity/ima/ima_policy.c | 11 ++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 79b8bba19363..9f3c275e053e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -231,6 +231,12 @@ static inline void part_pack_uuid(const u8 *uuid_str, u8 *to)
 	}
 }
 
+static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
+{
+	part_pack_uuid(uuid_str, to);
+	return 0;
+}
+
 static inline int disk_max_parts(struct gendisk *disk)
 {
 	if (disk->flags & GENHD_FL_EXT_DEVT)
@@ -718,6 +724,10 @@ static inline dev_t blk_lookup_devt(const char *name, int partno)
 	return devt;
 }
 
+static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_BLOCK */
 
 #endif /* _LINUX_GENHD_H */
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index b27535a13a79..399433ad614e 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -176,7 +176,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule,
 	    && rule->fsmagic != inode->i_sb->s_magic)
 		return false;
 	if ((rule->flags & IMA_FSUUID) &&
-		memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid)))
+	    memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid)))
 		return false;
 	if ((rule->flags & IMA_UID) && !uid_eq(rule->uid, cred->uid))
 		return false;
@@ -530,14 +530,15 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 			ima_log_string(ab, "fsuuid", args[0].from);
 
 			if (memchr_inv(entry->fsuuid, 0x00,
-			    sizeof(entry->fsuuid))) {
+				       sizeof(entry->fsuuid))) {
 				result = -EINVAL;
 				break;
 			}
 
-			part_pack_uuid(args[0].from, entry->fsuuid);
-			entry->flags |= IMA_FSUUID;
-			result = 0;
+			result = blk_part_pack_uuid(args[0].from,
+						    entry->fsuuid);
+			if (!result)
+				entry->flags |= IMA_FSUUID;
 			break;
 		case Opt_uid:
 			ima_log_string(ab, "uid", args[0].from);
-- 
cgit v1.2.3


From 804d6a89a5c0b076317966bcbcd7a63d42241831 Mon Sep 17 00:00:00 2001
From: Shani Michaeli <shanim@mellanox.com>
Date: Wed, 6 Feb 2013 16:19:14 +0000
Subject: mlx4: Implement memory windows allocation and deallocation

Implement MW allocation and deallocation in mlx4_core and mlx4_ib.
Pass down the enable bind flag when registering memory regions.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/mlx4_ib.h    | 12 +++++
 drivers/infiniband/hw/mlx4/mr.c         | 52 ++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/mr.c | 95 +++++++++++++++++++++++++++++++++
 include/linux/mlx4/device.h             | 20 ++++++-
 4 files changed, 178 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index cce5dde94bc1..9ba0aaf3a58e 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -116,6 +116,11 @@ struct mlx4_ib_mr {
 	struct ib_umem	       *umem;
 };
 
+struct mlx4_ib_mw {
+	struct ib_mw		ibmw;
+	struct mlx4_mw		mmw;
+};
+
 struct mlx4_ib_fast_reg_page_list {
 	struct ib_fast_reg_page_list	ibfrpl;
 	__be64			       *mapped_page_list;
@@ -533,6 +538,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
 	return container_of(ibmr, struct mlx4_ib_mr, ibmr);
 }
 
+static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
+{
+	return container_of(ibmw, struct mlx4_ib_mw, ibmw);
+}
+
 static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
 {
 	return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
@@ -581,6 +591,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
+struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
+int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 					int max_page_list_len);
 struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 254e1cf26439..5adf4c47ee18 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -41,9 +41,19 @@ static u32 convert_access(int acc)
 	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX4_PERM_REMOTE_WRITE : 0) |
 	       (acc & IB_ACCESS_REMOTE_READ   ? MLX4_PERM_REMOTE_READ  : 0) |
 	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX4_PERM_LOCAL_WRITE  : 0) |
+	       (acc & IB_ACCESS_MW_BIND	      ? MLX4_PERM_BIND_MW      : 0) |
 	       MLX4_PERM_LOCAL_READ;
 }
 
+static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type)
+{
+	switch (type) {
+	case IB_MW_TYPE_1:	return MLX4_MW_TYPE_1;
+	case IB_MW_TYPE_2:	return MLX4_MW_TYPE_2;
+	default:		return -1;
+	}
+}
+
 struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
 {
 	struct mlx4_ib_mr *mr;
@@ -189,6 +199,48 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 	return 0;
 }
 
+struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
+{
+	struct mlx4_ib_dev *dev = to_mdev(pd->device);
+	struct mlx4_ib_mw *mw;
+	int err;
+
+	mw = kmalloc(sizeof(*mw), GFP_KERNEL);
+	if (!mw)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
+			    to_mlx4_type(type), &mw->mmw);
+	if (err)
+		goto err_free;
+
+	err = mlx4_mw_enable(dev->dev, &mw->mmw);
+	if (err)
+		goto err_mw;
+
+	mw->ibmw.rkey = mw->mmw.key;
+
+	return &mw->ibmw;
+
+err_mw:
+	mlx4_mw_free(dev->dev, &mw->mmw);
+
+err_free:
+	kfree(mw);
+
+	return ERR_PTR(err);
+}
+
+int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
+{
+	struct mlx4_ib_mw *mw = to_mmw(ibmw);
+
+	mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
+	kfree(mw);
+
+	return 0;
+}
+
 struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 					int max_page_list_len)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 5e785bdcc694..602ca9bf78e4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -654,6 +654,101 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
 
+int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type,
+		  struct mlx4_mw *mw)
+{
+	u32 index;
+
+	if ((type == MLX4_MW_TYPE_1 &&
+	     !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) ||
+	     (type == MLX4_MW_TYPE_2 &&
+	     !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)))
+		return -ENOTSUPP;
+
+	index = mlx4_mpt_reserve(dev);
+	if (index == -1)
+		return -ENOMEM;
+
+	mw->key	    = hw_index_to_key(index);
+	mw->pd      = pd;
+	mw->type    = type;
+	mw->enabled = MLX4_MPT_DISABLED;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_alloc);
+
+int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_mpt_entry *mpt_entry;
+	int err;
+
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
+	if (err)
+		return err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		err = PTR_ERR(mailbox);
+		goto err_table;
+	}
+	mpt_entry = mailbox->buf;
+
+	memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+	/* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned
+	 * off, thus creating a memory window and not a memory region.
+	 */
+	mpt_entry->key	       = cpu_to_be32(key_to_hw_index(mw->key));
+	mpt_entry->pd_flags    = cpu_to_be32(mw->pd);
+	if (mw->type == MLX4_MW_TYPE_2) {
+		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
+		mpt_entry->qpn       = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP);
+		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV);
+	}
+
+	err = mlx4_SW2HW_MPT(dev, mailbox,
+			     key_to_hw_index(mw->key) &
+			     (dev->caps.num_mpts - 1));
+	if (err) {
+		mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
+		goto err_cmd;
+	}
+	mw->enabled = MLX4_MPT_EN_HW;
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	return 0;
+
+err_cmd:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+err_table:
+	mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_enable);
+
+void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw)
+{
+	int err;
+
+	if (mw->enabled == MLX4_MPT_EN_HW) {
+		err = mlx4_HW2SW_MPT(dev, NULL,
+				     key_to_hw_index(mw->key) &
+				     (dev->caps.num_mpts - 1));
+		if (err)
+			mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err);
+
+		mw->enabled = MLX4_MPT_EN_SW;
+	}
+	if (mw->enabled)
+		mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
+	mlx4_mpt_release(dev, key_to_hw_index(mw->key));
+}
+EXPORT_SYMBOL_GPL(mlx4_mw_free);
+
 int mlx4_init_mr_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e9fe8caaf8bb..67b4695e5940 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -170,6 +170,7 @@ enum {
 #define MLX4_ATTR_EXTENDED_PORT_INFO	cpu_to_be16(0xff90)
 
 enum {
+	MLX4_BMME_FLAG_WIN_TYPE_2B	= 1 <<  1,
 	MLX4_BMME_FLAG_LOCAL_INV	= 1 <<  6,
 	MLX4_BMME_FLAG_REMOTE_INV	= 1 <<  7,
 	MLX4_BMME_FLAG_TYPE_2_WIN	= 1 <<  9,
@@ -237,7 +238,8 @@ enum {
 	MLX4_PERM_LOCAL_WRITE	= 1 << 11,
 	MLX4_PERM_REMOTE_READ	= 1 << 12,
 	MLX4_PERM_REMOTE_WRITE	= 1 << 13,
-	MLX4_PERM_ATOMIC	= 1 << 14
+	MLX4_PERM_ATOMIC	= 1 << 14,
+	MLX4_PERM_BIND_MW	= 1 << 15,
 };
 
 enum {
@@ -503,6 +505,18 @@ struct mlx4_mr {
 	int			enabled;
 };
 
+enum mlx4_mw_type {
+	MLX4_MW_TYPE_1 = 1,
+	MLX4_MW_TYPE_2 = 2,
+};
+
+struct mlx4_mw {
+	u32			key;
+	u32			pd;
+	enum mlx4_mw_type	type;
+	int			enabled;
+};
+
 struct mlx4_fmr {
 	struct mlx4_mr		mr;
 	struct mlx4_mpt_entry  *mpt;
@@ -803,6 +817,10 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
 		  int npages, int page_shift, struct mlx4_mr *mr);
 int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
 int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
+int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type,
+		  struct mlx4_mw *mw);
+void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw);
+int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw);
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   int start_index, int npages, u64 *page_list);
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-- 
cgit v1.2.3


From 6ff63e194066a1f14aee805366a1d79c541fddae Mon Sep 17 00:00:00 2001
From: Shani Michaeli <shanim@mellanox.com>
Date: Wed, 6 Feb 2013 16:19:15 +0000
Subject: IB/mlx4: Support memory window binding

* Implement memory windows binding in mlx4_ib_post_send.

* Implement mlx4_ib_bind_mw by deferring to mlx4_ib_post_send.

* Rename MLX4_WQE_FMR_PERM_* flags to MLX4_WQE_FMR_AND_BIND_PERM_*,
  indicating that they are used both for fast registration work
  requests, and for memory window bind work requests.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/mlx4_ib.h |  2 ++
 drivers/infiniband/hw/mlx4/mr.c      | 22 ++++++++++++++++++++++
 drivers/infiniband/hw/mlx4/qp.c      | 35 ++++++++++++++++++++++++++++++++---
 include/linux/mlx4/qp.h              | 11 ++++++++---
 4 files changed, 64 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 9ba0aaf3a58e..f61ec26500c4 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -592,6 +592,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
 struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
+int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+		    struct ib_mw_bind *mw_bind);
 int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 					int max_page_list_len);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 5adf4c47ee18..e471f089ff00 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -231,6 +231,28 @@ err_free:
 	return ERR_PTR(err);
 }
 
+int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+		    struct ib_mw_bind *mw_bind)
+{
+	struct ib_send_wr  wr;
+	struct ib_send_wr *bad_wr;
+	int ret;
+
+	memset(&wr, 0, sizeof(wr));
+	wr.opcode               = IB_WR_BIND_MW;
+	wr.wr_id                = mw_bind->wr_id;
+	wr.send_flags           = mw_bind->send_flags;
+	wr.wr.bind_mw.mw        = mw;
+	wr.wr.bind_mw.bind_info = mw_bind->bind_info;
+	wr.wr.bind_mw.rkey      = ib_inc_rkey(mw->rkey);
+
+	ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
+	if (!ret)
+		mw->rkey = wr.wr.bind_mw.rkey;
+
+	return ret;
+}
+
 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
 {
 	struct mlx4_ib_mw *mw = to_mmw(ibmw);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index ebadebe83b11..35cced2a4da8 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -104,6 +104,7 @@ static const __be32 mlx4_ib_opcode[] = {
 	[IB_WR_FAST_REG_MR]			= cpu_to_be32(MLX4_OPCODE_FMR),
 	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
 	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
+	[IB_WR_BIND_MW]				= cpu_to_be32(MLX4_OPCODE_BIND_MW),
 };
 
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -1953,9 +1954,12 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
 
 static __be32 convert_access(int acc)
 {
-	return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC)       : 0) |
-	       (acc & IB_ACCESS_REMOTE_WRITE  ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) |
-	       (acc & IB_ACCESS_REMOTE_READ   ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ)  : 0) |
+	return (acc & IB_ACCESS_REMOTE_ATOMIC ?
+		cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC)       : 0) |
+	       (acc & IB_ACCESS_REMOTE_WRITE  ?
+		cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
+	       (acc & IB_ACCESS_REMOTE_READ   ?
+		cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ)  : 0) |
 	       (acc & IB_ACCESS_LOCAL_WRITE   ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE)  : 0) |
 		cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
 }
@@ -1981,6 +1985,24 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
 	fseg->reserved[1]	= 0;
 }
 
+static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
+{
+	bseg->flags1 =
+		convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
+		cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ  |
+			    MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
+			    MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
+	bseg->flags2 = 0;
+	if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
+		bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
+	if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
+		bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
+	bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
+	bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
+	bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
+	bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
+}
+
 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
 {
 	memset(iseg, 0, sizeof(*iseg));
@@ -2289,6 +2311,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
 				break;
 
+			case IB_WR_BIND_MW:
+				ctrl->srcrb_flags |=
+					cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
+				set_bind_seg(wqe, wr);
+				wqe  += sizeof(struct mlx4_wqe_bind_seg);
+				size += sizeof(struct mlx4_wqe_bind_seg) / 16;
+				break;
 			default:
 				/* No extra segments required for sends */
 				break;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 6c8a68c602be..67f46ad6920a 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -265,6 +265,11 @@ struct mlx4_wqe_lso_seg {
 	__be32			header[0];
 };
 
+enum mlx4_wqe_bind_seg_flags2 {
+	MLX4_WQE_BIND_ZERO_BASED = (1 << 30),
+	MLX4_WQE_BIND_TYPE_2     = (1 << 31),
+};
+
 struct mlx4_wqe_bind_seg {
 	__be32			flags1;
 	__be32			flags2;
@@ -277,9 +282,9 @@ struct mlx4_wqe_bind_seg {
 enum {
 	MLX4_WQE_FMR_PERM_LOCAL_READ	= 1 << 27,
 	MLX4_WQE_FMR_PERM_LOCAL_WRITE	= 1 << 28,
-	MLX4_WQE_FMR_PERM_REMOTE_READ	= 1 << 29,
-	MLX4_WQE_FMR_PERM_REMOTE_WRITE	= 1 << 30,
-	MLX4_WQE_FMR_PERM_ATOMIC	= 1 << 31
+	MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ	= 1 << 29,
+	MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE	= 1 << 30,
+	MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC	= 1 << 31
 };
 
 struct mlx4_wqe_fmr_seg {
-- 
cgit v1.2.3


From e72837e3e7bae3f182c4ac63c9424e86f1158dd0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 17 Feb 2013 17:47:04 -0500
Subject: default SET_PERSONALITY() in linux/elf.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/avr32/include/asm/elf.h    | 3 ---
 arch/blackfin/include/asm/elf.h | 3 ---
 arch/c6x/include/asm/elf.h      | 3 ---
 arch/cris/include/asm/elf.h     | 3 ---
 arch/frv/include/asm/elf.h      | 3 ---
 arch/h8300/include/asm/elf.h    | 3 ---
 arch/hexagon/include/asm/elf.h  | 5 -----
 arch/ia64/include/asm/elf.h     | 3 ---
 arch/m32r/include/asm/elf.h     | 3 ---
 arch/m68k/include/asm/elf.h     | 3 ---
 arch/mn10300/include/asm/elf.h  | 5 -----
 arch/openrisc/include/asm/elf.h | 3 ---
 arch/powerpc/include/asm/elf.h  | 2 --
 arch/s390/include/asm/elf.h     | 5 +----
 arch/score/include/asm/elf.h    | 5 -----
 arch/sparc/include/asm/elf_32.h | 3 ---
 include/linux/elf.h             | 4 ++++
 17 files changed, 5 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h
index e2c328739808..d232888b99d5 100644
--- a/arch/avr32/include/asm/elf.h
+++ b/arch/avr32/include/asm/elf.h
@@ -102,7 +102,4 @@ typedef struct user_fpu_struct elf_fpregset_t;
 
 #define ELF_PLATFORM  (NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX_32BIT | (current->personality & (~PER_MASK)))
-
 #endif /* __ASM_AVR32_ELF_H */
diff --git a/arch/blackfin/include/asm/elf.h b/arch/blackfin/include/asm/elf.h
index 14bc98ff668f..d15cb9b5d52c 100644
--- a/arch/blackfin/include/asm/elf.h
+++ b/arch/blackfin/include/asm/elf.h
@@ -132,7 +132,4 @@ do {											\
 
 #define ELF_PLATFORM  (NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #endif
diff --git a/arch/c6x/include/asm/elf.h b/arch/c6x/include/asm/elf.h
index 32b997126adf..9a4dfc5eb249 100644
--- a/arch/c6x/include/asm/elf.h
+++ b/arch/c6x/include/asm/elf.h
@@ -77,9 +77,6 @@ do {								\
 
 #define ELF_PLATFORM  (NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 /* C6X specific section types */
 #define SHT_C6000_UNWIND	0x70000001
 #define SHT_C6000_PREEMPTMAP	0x70000002
diff --git a/arch/cris/include/asm/elf.h b/arch/cris/include/asm/elf.h
index 8182f2dc89d0..30ded8fbf592 100644
--- a/arch/cris/include/asm/elf.h
+++ b/arch/cris/include/asm/elf.h
@@ -86,7 +86,4 @@ typedef unsigned long elf_fpregset_t;
 
 #define ELF_PLATFORM  (NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #endif
diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h
index 9ccbc80f0b11..2bac6446db41 100644
--- a/arch/frv/include/asm/elf.h
+++ b/arch/frv/include/asm/elf.h
@@ -137,7 +137,4 @@ do {											\
 
 #define ELF_PLATFORM  (NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #endif
diff --git a/arch/h8300/include/asm/elf.h b/arch/h8300/include/asm/elf.h
index 41193c396bff..6db71248a82f 100644
--- a/arch/h8300/include/asm/elf.h
+++ b/arch/h8300/include/asm/elf.h
@@ -54,9 +54,6 @@ typedef unsigned long elf_fpregset_t;
 
 #define ELF_PLATFORM  (NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #define R_H8_NONE       0
 #define R_H8_DIR32      1
 #define R_H8_DIR32_28   2
diff --git a/arch/hexagon/include/asm/elf.h b/arch/hexagon/include/asm/elf.h
index 1ba4b3bff5ed..1f14e082588e 100644
--- a/arch/hexagon/include/asm/elf.h
+++ b/arch/hexagon/include/asm/elf.h
@@ -216,11 +216,6 @@ do {					\
  */
 #define ELF_PLATFORM  (NULL)
 
-#ifdef __KERNEL__
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-#endif
-
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
 extern int arch_setup_additional_pages(struct linux_binprm *bprm,
diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h
index b5298eb09adb..5a83c5cc3dc8 100644
--- a/arch/ia64/include/asm/elf.h
+++ b/arch/ia64/include/asm/elf.h
@@ -201,9 +201,6 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst);
    relevant until we have real hardware to play with... */
 #define ELF_PLATFORM	NULL
 
-#define SET_PERSONALITY(ex)	\
-	set_personality((current->personality & ~PER_MASK) | PER_LINUX)
-
 #define elf_read_implies_exec(ex, executable_stack)					\
 	((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0)
 
diff --git a/arch/m32r/include/asm/elf.h b/arch/m32r/include/asm/elf.h
index 70896161c636..8acc9da9a15e 100644
--- a/arch/m32r/include/asm/elf.h
+++ b/arch/m32r/include/asm/elf.h
@@ -128,7 +128,4 @@ typedef elf_fpreg_t elf_fpregset_t;
    intent than poking at uname or /proc/cpuinfo.  */
 #define ELF_PLATFORM	(NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #endif  /* _ASM_M32R__ELF_H */
diff --git a/arch/m68k/include/asm/elf.h b/arch/m68k/include/asm/elf.h
index f83c1d0a87cf..b1c26de438be 100644
--- a/arch/m68k/include/asm/elf.h
+++ b/arch/m68k/include/asm/elf.h
@@ -113,7 +113,4 @@ typedef struct user_m68kfp_struct elf_fpregset_t;
 
 #define ELF_PLATFORM  (NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #endif
diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h
index 4ebd6b3a0a1e..f592d7a9f032 100644
--- a/arch/mn10300/include/asm/elf.h
+++ b/arch/mn10300/include/asm/elf.h
@@ -150,9 +150,4 @@ do {						\
  */
 #define ELF_PLATFORM  (NULL)
 
-#ifdef __KERNEL__
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-#endif
-
 #endif /* _ASM_ELF_H */
diff --git a/arch/openrisc/include/asm/elf.h b/arch/openrisc/include/asm/elf.h
index f4aa8a542a22..d334e204bbdd 100644
--- a/arch/openrisc/include/asm/elf.h
+++ b/arch/openrisc/include/asm/elf.h
@@ -62,7 +62,4 @@ extern void dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt);
 
 #define ELF_PLATFORM	(NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #endif
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 6abf0a163233..ac9790fc3836 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -103,8 +103,6 @@ do {								\
 # define elf_read_implies_exec(ex, exec_stk) (is_32bit_task() ? \
 		(exec_stk == EXSTACK_DEFAULT) : 0)
 #else 
-# define SET_PERSONALITY(ex) \
-  set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
 # define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT)
 #endif /* __powerpc64__ */
 
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 178ff966a8ba..1bfdf24b85a2 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -180,10 +180,7 @@ extern unsigned long elf_hwcap;
 extern char elf_platform[];
 #define ELF_PLATFORM (elf_platform)
 
-#ifndef CONFIG_64BIT
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-#else /* CONFIG_64BIT */
+#ifdef CONFIG_64BIT
 #define SET_PERSONALITY(ex)					\
 do {								\
 	if (personality(current->personality) != PER_LINUX32)	\
diff --git a/arch/score/include/asm/elf.h b/arch/score/include/asm/elf.h
index 5d566c7a0af2..6a9421c693ca 100644
--- a/arch/score/include/asm/elf.h
+++ b/arch/score/include/asm/elf.h
@@ -52,11 +52,6 @@ typedef elf_fpreg_t	elf_fpregset_t;
 #define ELF_DATA	ELFDATA2LSB
 #define ELF_ARCH	EM_SCORE7
 
-#define SET_PERSONALITY(ex)					\
-do {								\
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \
-} while (0)
-
 struct task_struct;
 struct pt_regs;
 
diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h
index ac74a2c98e6d..a24e41fcdde1 100644
--- a/arch/sparc/include/asm/elf_32.h
+++ b/arch/sparc/include/asm/elf_32.h
@@ -128,7 +128,4 @@ typedef struct {
 
 #define ELF_PLATFORM	(NULL)
 
-#define SET_PERSONALITY(ex) \
-	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
-
 #endif /* !(__ASMSPARC_ELF_H) */
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 8c9048e33463..40a3c0e01b2b 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -10,6 +10,10 @@
      Override in asm/elf.h as needed.  */
 # define elf_read_implies_exec(ex, have_pt_gnu_stack)	0
 #endif
+#ifndef SET_PERSONALITY
+#define SET_PERSONALITY(ex) \
+	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
+#endif
 
 #if ELF_CLASS == ELFCLASS32
 
-- 
cgit v1.2.3


From 3dadecce20603aa380023c65e6f55f108fd5e952 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Jan 2013 02:18:08 -0500
Subject: switch vfs_getattr() to struct path

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/base/devtmpfs.c       |  3 ++-
 drivers/base/firmware_class.c |  2 +-
 drivers/block/loop.c          |  2 +-
 fs/ecryptfs/ecryptfs_kernel.h |  6 ++++++
 fs/ecryptfs/inode.c           |  3 +--
 fs/nfsd/nfs3proc.c            |  5 +----
 fs/nfsd/nfs3xdr.c             | 10 +++++-----
 fs/nfsd/nfs4xdr.c             |  4 ++--
 fs/nfsd/nfsproc.c             | 12 +++---------
 fs/nfsd/nfsxdr.c              |  3 ++-
 fs/nfsd/vfs.h                 |  8 ++++++++
 fs/stat.c                     | 13 ++++++-------
 include/linux/fs.h            |  2 +-
 kernel/module.c               |  2 +-
 14 files changed, 40 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 17cf7cad601e..01fc5b07f951 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -302,7 +302,8 @@ static int handle_remove(const char *nodename, struct device *dev)
 
 	if (dentry->d_inode) {
 		struct kstat stat;
-		err = vfs_getattr(parent.mnt, dentry, &stat);
+		struct path p = {.mnt = parent.mnt, .dentry = dentry};
+		err = vfs_getattr(&p, &stat);
 		if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
 			struct iattr newattrs;
 			/*
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index b392b353be39..a2be09dd7771 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -290,7 +290,7 @@ MODULE_PARM_DESC(path, "customized firmware image search path with a higher prio
 static noinline_for_stack long fw_file_size(struct file *file)
 {
 	struct kstat st;
-	if (vfs_getattr(file->f_path.mnt, file->f_path.dentry, &st))
+	if (vfs_getattr(&file->f_path, &st))
 		return -1;
 	if (!S_ISREG(st.mode))
 		return -1;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ae1251270624..8031a8cdd698 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1139,7 +1139,7 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
-	error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
+	error = vfs_getattr(&file->f_path, &stat);
 	if (error)
 		return error;
 	memset(info, 0, sizeof(*info));
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index cfb4b9fed520..7e2c6f5d7985 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -509,6 +509,12 @@ ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
 	return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt;
 }
 
+static inline struct path *
+ecryptfs_dentry_to_lower_path(struct dentry *dentry)
+{
+	return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
+}
+
 static inline void
 ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
 {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index cc7709e7c508..e0f07fb6d56b 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1027,8 +1027,7 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct kstat lower_stat;
 	int rc;
 
-	rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
-			 ecryptfs_dentry_to_lower(dentry), &lower_stat);
+	rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat);
 	if (!rc) {
 		fsstack_copy_attr_all(dentry->d_inode,
 				      ecryptfs_inode_to_lower(dentry->d_inode));
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 1fc02dfdc5c4..401289913130 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -43,7 +43,6 @@ static __be32
 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					   struct nfsd3_attrstat *resp)
 {
-	int	err;
 	__be32	nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
@@ -55,9 +54,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 	if (nfserr)
 		RETURN_STATUS(nfserr);
 
-	err = vfs_getattr(resp->fh.fh_export->ex_path.mnt,
-			  resp->fh.fh_dentry, &resp->stat);
-	nfserr = nfserrno(err);
+	nfserr = fh_getattr(&resp->fh, &resp->stat);
 
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 324c0baf7cda..7af9417be88d 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -11,6 +11,7 @@
 #include "xdr3.h"
 #include "auth.h"
 #include "netns.h"
+#include "vfs.h"
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
@@ -204,10 +205,10 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct dentry *dentry = fhp->fh_dentry;
 	if (dentry && dentry->d_inode) {
-	        int err;
+	        __be32 err;
 		struct kstat stat;
 
-		err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat);
+		err = fh_getattr(fhp, &stat);
 		if (!err) {
 			*p++ = xdr_one;		/* attributes follow */
 			lease_get_mtime(dentry->d_inode, &stat.mtime);
@@ -254,13 +255,12 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
  */
 void fill_post_wcc(struct svc_fh *fhp)
 {
-	int err;
+	__be32 err;
 
 	if (fhp->fh_post_saved)
 		printk("nfsd: inode locked twice during operation.\n");
 
-	err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
-			&fhp->fh_post_attr);
+	err = fh_getattr(fhp, &fhp->fh_post_attr);
 	fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
 	if (err) {
 		fhp->fh_post_saved = 0;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0dc11586682f..17e70dabe21c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1997,7 +1997,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
 		if (path.dentry != path.mnt->mnt_root)
 			break;
 	}
-	err = vfs_getattr(path.mnt, path.dentry, stat);
+	err = vfs_getattr(&path, stat);
 	path_put(&path);
 	return err;
 }
@@ -2050,7 +2050,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			goto out;
 	}
 
-	err = vfs_getattr(exp->ex_path.mnt, dentry, &stat);
+	err = vfs_getattr(&path, &stat);
 	if (err)
 		goto out_nfserr;
 	if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index aad6d457b9e8..54c6b3d3cc79 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -26,17 +26,13 @@ static __be32
 nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp)
 {
 	if (err) return err;
-	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
-				    resp->fh.fh_dentry,
-				    &resp->stat));
+	return fh_getattr(&resp->fh, &resp->stat);
 }
 static __be32
 nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
 {
 	if (err) return err;
-	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
-				    resp->fh.fh_dentry,
-				    &resp->stat));
+	return fh_getattr(&resp->fh, &resp->stat);
 }
 /*
  * Get a file's attributes
@@ -150,9 +146,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
 				  &resp->count);
 
 	if (nfserr) return nfserr;
-	return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
-				    resp->fh.fh_dentry,
-				    &resp->stat));
+	return fh_getattr(&resp->fh, &resp->stat);
 }
 
 /*
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 979b42106979..bf6d3bccdd98 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -4,6 +4,7 @@
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
  */
 
+#include "vfs.h"
 #include "xdr.h"
 #include "auth.h"
 
@@ -197,7 +198,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct kstat stat;
-	vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat);
+	fh_getattr(fhp, &stat);	/* BUG */
 	return encode_fattr(rqstp, p, fhp, &stat);
 }
 
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 359594c393d2..5b5894159f22 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -6,6 +6,7 @@
 #define LINUX_NFSD_VFS_H
 
 #include "nfsfh.h"
+#include "nfsd.h"
 
 /*
  * Flags for nfsd_permission
@@ -125,4 +126,11 @@ static inline void fh_drop_write(struct svc_fh *fh)
 	}
 }
 
+static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
+{
+	struct path p = {.mnt = fh->fh_export->ex_path.mnt,
+			 .dentry = fh->fh_dentry};
+	return nfserrno(vfs_getattr(&p, stat));
+}
+
 #endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/stat.c b/fs/stat.c
index 14f45459c83d..04ce1ac20d20 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,17 +37,17 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
 
 EXPORT_SYMBOL(generic_fillattr);
 
-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int vfs_getattr(struct path *path, struct kstat *stat)
 {
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = path->dentry->d_inode;
 	int retval;
 
-	retval = security_inode_getattr(mnt, dentry);
+	retval = security_inode_getattr(path->mnt, path->dentry);
 	if (retval)
 		return retval;
 
 	if (inode->i_op->getattr)
-		return inode->i_op->getattr(mnt, dentry, stat);
+		return inode->i_op->getattr(path->mnt, path->dentry, stat);
 
 	generic_fillattr(inode, stat);
 	return 0;
@@ -61,8 +61,7 @@ int vfs_fstat(unsigned int fd, struct kstat *stat)
 	int error = -EBADF;
 
 	if (f.file) {
-		error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry,
-				    stat);
+		error = vfs_getattr(&f.file->f_path, stat);
 		fdput(f);
 	}
 	return error;
@@ -89,7 +88,7 @@ retry:
 	if (error)
 		goto out;
 
-	error = vfs_getattr(path.mnt, path.dentry, stat);
+	error = vfs_getattr(&path, stat);
 	path_put(&path);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3ab69777b4d8..7f471520b88b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2468,7 +2468,7 @@ extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern int generic_readlink(struct dentry *, char __user *, int);
 extern void generic_fillattr(struct inode *, struct kstat *);
-extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int vfs_getattr(struct path *, struct kstat *);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_sub_bytes(struct inode *inode, loff_t bytes);
diff --git a/kernel/module.c b/kernel/module.c
index b10b048367e1..950076eb3273 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2519,7 +2519,7 @@ static int copy_module_from_fd(int fd, struct load_info *info)
 	if (err)
 		goto out;
 
-	err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
+	err = vfs_getattr(&file->f_path, &stat);
 	if (err)
 		goto out;
 
-- 
cgit v1.2.3


From ecf3d1f1aa74da0d632b651a2e05a911f60e92c0 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 20 Feb 2013 11:19:05 -0500
Subject: vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op

The following set of operations on a NFS client and server will cause

    server# mkdir a
    client# cd a
    server# mv a a.bak
    client# sleep 30  # (or whatever the dir attrcache timeout is)
    client# stat .
    stat: cannot stat `.': Stale NFS file handle

Obviously, we should not be getting an ESTALE error back there since the
inode still exists on the server. The problem is that the lookup code
will call d_revalidate on the dentry that "." refers to, because NFS has
FS_REVAL_DOT set.

nfs_lookup_revalidate will see that the parent directory has changed and
will try to reverify the dentry by redoing a LOOKUP. That of course
fails, so the lookup code returns ESTALE.

The problem here is that d_revalidate is really a bad fit for this case.
What we really want to know at this point is whether the inode is still
good or not, but we don't really care what name it goes by or whether
the dcache is still valid.

Add a new d_op->d_weak_revalidate operation and have complete_walk call
that instead of d_revalidate. The intent there is to allow for a
"weaker" d_revalidate that just checks to see whether the inode is still
good. This is also gives us an opportunity to kill off the FS_REVAL_DOT
special casing.

[AV: changed method name, added note in porting, fixed confusion re
having it possibly called from RCU mode (it won't be)]

Cc: NeilBrown <neilb@suse.de>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 ++
 Documentation/filesystems/porting |  4 ++++
 Documentation/filesystems/vfs.txt | 24 +++++++++++++++++++++--
 fs/9p/vfs_dentry.c                |  1 +
 fs/9p/vfs_super.c                 |  2 +-
 fs/dcache.c                       |  3 +++
 fs/namei.c                        |  8 ++------
 fs/nfs/dir.c                      | 40 +++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4super.c                |  6 +++---
 fs/nfs/super.c                    |  6 +++---
 include/linux/dcache.h            |  3 +++
 include/linux/fs.h                |  1 -
 12 files changed, 84 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index f48e0c6b4c42..0706d32a61e6 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -10,6 +10,7 @@ be able to use diff(1).
 --------------------------- dentry_operations --------------------------
 prototypes:
 	int (*d_revalidate)(struct dentry *, unsigned int);
+	int (*d_weak_revalidate)(struct dentry *, unsigned int);
 	int (*d_hash)(const struct dentry *, const struct inode *,
 			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
@@ -25,6 +26,7 @@ prototypes:
 locking rules:
 		rename_lock	->d_lock	may block	rcu-walk
 d_revalidate:	no		no		yes (ref-walk)	maybe
+d_weak_revalidate:no		no		yes	 	no
 d_hash		no		no		no		maybe
 d_compare:	yes		no		no		maybe
 d_delete:	no		yes		no		no
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 0472c31c163b..4db22f6491e0 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -441,3 +441,7 @@ d_make_root() drops the reference to inode if dentry allocation fails.
 two, it gets "is it an O_EXCL or equivalent?" boolean argument.  Note that
 local filesystems can ignore tha argument - they are guaranteed that the
 object doesn't exist.  It's remote/distributed ones that might care...
+--
+[mandatory]
+	FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate()
+in your dentry operations instead.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index e3869098163e..bc4b06b3160a 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -900,6 +900,7 @@ defined:
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, unsigned int);
+	int (*d_weak_revalidate)(struct dentry *, unsigned int);
 	int (*d_hash)(const struct dentry *, const struct inode *,
 			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
@@ -915,8 +916,13 @@ struct dentry_operations {
 
   d_revalidate: called when the VFS needs to revalidate a dentry. This
 	is called whenever a name look-up finds a dentry in the
-	dcache. Most filesystems leave this as NULL, because all their
-	dentries in the dcache are valid
+	dcache. Most local filesystems leave this as NULL, because all their
+	dentries in the dcache are valid. Network filesystems are different
+	since things can change on the server without the client necessarily
+	being aware of it.
+
+	This function should return a positive value if the dentry is still
+	valid, and zero or a negative error code if it isn't.
 
 	d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU).
 	If in rcu-walk mode, the filesystem must revalidate the dentry without
@@ -927,6 +933,20 @@ struct dentry_operations {
 	If a situation is encountered that rcu-walk cannot handle, return
 	-ECHILD and it will be called again in ref-walk mode.
 
+ d_weak_revalidate: called when the VFS needs to revalidate a "jumped" dentry.
+	This is called when a path-walk ends at dentry that was not acquired by
+	doing a lookup in the parent directory. This includes "/", "." and "..",
+	as well as procfs-style symlinks and mountpoint traversal.
+
+	In this case, we are less concerned with whether the dentry is still
+	fully correct, but rather that the inode is still valid. As with
+	d_revalidate, most local filesystems will set this to NULL since their
+	dcache entries are always valid.
+
+	This function has the same return code semantics as d_revalidate.
+
+	d_weak_revalidate is only called after leaving rcu-walk mode.
+
   d_hash: called when the VFS adds a dentry to the hash table. The first
 	dentry passed to d_hash is the parent directory that the name is
 	to be hashed into. The inode is the dentry's inode.
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 64600b5d0522..9ad68628522c 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -137,6 +137,7 @@ out_valid:
 
 const struct dentry_operations v9fs_cached_dentry_operations = {
 	.d_revalidate = v9fs_lookup_revalidate,
+	.d_weak_revalidate = v9fs_lookup_revalidate,
 	.d_delete = v9fs_cached_dentry_delete,
 	.d_release = v9fs_dentry_release,
 };
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 137d50396898..91dad63e5a2d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -363,5 +363,5 @@ struct file_system_type v9fs_fs_type = {
 	.mount = v9fs_mount,
 	.kill_sb = v9fs_kill_super,
 	.owner = THIS_MODULE,
-	.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
+	.fs_flags = FS_RENAME_DOES_D_MOVE,
 };
diff --git a/fs/dcache.c b/fs/dcache.c
index ebab049826c0..68220dd0c135 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1358,6 +1358,7 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 	WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH	|
 				DCACHE_OP_COMPARE	|
 				DCACHE_OP_REVALIDATE	|
+				DCACHE_OP_WEAK_REVALIDATE	|
 				DCACHE_OP_DELETE ));
 	dentry->d_op = op;
 	if (!op)
@@ -1368,6 +1369,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 		dentry->d_flags |= DCACHE_OP_COMPARE;
 	if (op->d_revalidate)
 		dentry->d_flags |= DCACHE_OP_REVALIDATE;
+	if (op->d_weak_revalidate)
+		dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
 	if (op->d_delete)
 		dentry->d_flags |= DCACHE_OP_DELETE;
 	if (op->d_prune)
diff --git a/fs/namei.c b/fs/namei.c
index 052c095c2808..dc984fee5532 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -600,14 +600,10 @@ static int complete_walk(struct nameidata *nd)
 	if (likely(!(nd->flags & LOOKUP_JUMPED)))
 		return 0;
 
-	if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+	if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
 		return 0;
 
-	if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
-		return 0;
-
-	/* Note: we do not d_invalidate() */
-	status = d_revalidate(dentry, nd->flags);
+	status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
 	if (status > 0)
 		return 0;
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a8bd28cde7e2..f23f455be42b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1135,6 +1135,45 @@ out_error:
 	return error;
 }
 
+/*
+ * A weaker form of d_revalidate for revalidating just the dentry->d_inode
+ * when we don't really care about the dentry name. This is called when a
+ * pathwalk ends on a dentry that was not found via a normal lookup in the
+ * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
+ *
+ * In this situation, we just want to verify that the inode itself is OK
+ * since the dentry might have changed on the server.
+ */
+static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	int error;
+	struct inode *inode = dentry->d_inode;
+
+	/*
+	 * I believe we can only get a negative dentry here in the case of a
+	 * procfs-style symlink. Just assume it's correct for now, but we may
+	 * eventually need to do something more here.
+	 */
+	if (!inode) {
+		dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n",
+				__func__, dentry->d_parent->d_name.name,
+				dentry->d_name.name);
+		return 1;
+	}
+
+	if (is_bad_inode(inode)) {
+		dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+				__func__, dentry->d_parent->d_name.name,
+				dentry->d_name.name);
+		return 0;
+	}
+
+	error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
+			__func__, inode->i_ino, error ? "invalid" : "valid");
+	return !error;
+}
+
 /*
  * This is called from dput() when d_count is going to 0.
  */
@@ -1202,6 +1241,7 @@ static void nfs_d_release(struct dentry *dentry)
 
 const struct dentry_operations nfs_dentry_operations = {
 	.d_revalidate	= nfs_lookup_revalidate,
+	.d_weak_revalidate	= nfs_weak_revalidate,
 	.d_delete	= nfs_dentry_delete,
 	.d_iput		= nfs_dentry_iput,
 	.d_automount	= nfs_d_automount,
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 84d2e9e2f313..569b166cc050 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -28,7 +28,7 @@ static struct file_system_type nfs4_remote_fs_type = {
 	.name		= "nfs4",
 	.mount		= nfs4_remote_mount,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 
 static struct file_system_type nfs4_remote_referral_fs_type = {
@@ -36,7 +36,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
 	.name		= "nfs4",
 	.mount		= nfs4_remote_referral_mount,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 
 struct file_system_type nfs4_referral_fs_type = {
@@ -44,7 +44,7 @@ struct file_system_type nfs4_referral_fs_type = {
 	.name		= "nfs4",
 	.mount		= nfs4_referral_mount,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 
 static const struct super_operations nfs4_sops = {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2e7e8c878e5d..92acc26f9c5f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -292,7 +292,7 @@ struct file_system_type nfs_fs_type = {
 	.name		= "nfs",
 	.mount		= nfs_fs_mount,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 EXPORT_SYMBOL_GPL(nfs_fs_type);
 
@@ -301,7 +301,7 @@ struct file_system_type nfs_xdev_fs_type = {
 	.name		= "nfs",
 	.mount		= nfs_xdev_mount,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 
 const struct super_operations nfs_sops = {
@@ -331,7 +331,7 @@ struct file_system_type nfs4_fs_type = {
 	.name		= "nfs4",
 	.mount		= nfs_fs_mount,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
 EXPORT_SYMBOL_GPL(nfs4_fs_type);
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 03d169288423..1a6bb81f0fe5 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -145,6 +145,7 @@ enum dentry_d_lock_class
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, unsigned int);
+	int (*d_weak_revalidate)(struct dentry *, unsigned int);
 	int (*d_hash)(const struct dentry *, const struct inode *,
 			struct qstr *);
 	int (*d_compare)(const struct dentry *, const struct inode *,
@@ -192,6 +193,8 @@ struct dentry_operations {
 #define DCACHE_GENOCIDE		0x0200
 #define DCACHE_SHRINK_LIST	0x0400
 
+#define DCACHE_OP_WEAK_REVALIDATE	0x0800
+
 #define DCACHE_NFSFS_RENAMED	0x1000
      /* this dentry has been "silly renamed" and has to be deleted on the last
       * dput() */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7f471520b88b..da94011ae83c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1807,7 +1807,6 @@ struct file_system_type {
 #define FS_HAS_SUBTYPE		4
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_USERNS_DEV_MOUNT	16 /* A userns mount does not imply MNT_NODEV */
-#define FS_REVAL_DOT		16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
-- 
cgit v1.2.3


From 182be684784334598eee1d90274e7f7aa0063616 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Jan 2013 02:21:54 -0500
Subject: kill f_vfsmnt

very few users left...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/dir.c       | 2 +-
 fs/ocfs2/file.c      | 4 ++--
 fs/ocfs2/mmap.c      | 2 +-
 include/linux/fs.h   | 1 -
 security/commoncap.c | 2 +-
 5 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index ac0d4a0e8a41..c87d0793bdec 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2020,7 +2020,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 
 	trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
+	error = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
 	if (lock_level && error >= 0) {
 		/* We release EX lock which used to update atime
 		 * and get PR lock again to reduce contention
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 04098af9dbc8..5bcd865905ef 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2526,7 +2526,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
 	/*
 	 * See the comment in ocfs2_file_aio_read()
 	 */
-	ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
+	ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto bail;
@@ -2589,7 +2589,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	 * like i_size. This allows the checks down below
 	 * generic_file_aio_read() a chance of actually working.
 	 */
-	ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
+	ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto bail;
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 07c585b85000..10d66c75cecb 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -181,7 +181,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 	int ret = 0, lock_level = 0;
 
 	ret = ocfs2_inode_lock_atime(file_inode(file),
-				    file->f_vfsmnt, &lock_level);
+				    file->f_path.mnt, &lock_level);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index da94011ae83c..c766afd1e684 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -769,7 +769,6 @@ struct file {
 	} f_u;
 	struct path		f_path;
 #define f_dentry	f_path.dentry
-#define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
 
 	/*
diff --git a/security/commoncap.c b/security/commoncap.c
index 7ee08c756d6b..c44b6fe6648e 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -440,7 +440,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
 	if (!file_caps_enabled)
 		return 0;
 
-	if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)
+	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
 		return 0;
 
 	dentry = dget(bprm->file->f_dentry);
-- 
cgit v1.2.3


From 7bb307e894d51308aa0582a8c4cc5875bbc645b9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Feb 2013 14:51:48 -0500
Subject: export kernel_write(), convert open-coded instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/nand/nandsim.c | 34 ++++++++++++----------------------
 fs/ecryptfs/read_write.c   |  6 +-----
 fs/splice.c                |  5 +++--
 include/linux/fs.h         |  1 +
 kernel/sysctl_binary.c     | 39 +++++++--------------------------------
 5 files changed, 24 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 818b65c85d12..8f30d385bfa3 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -1408,40 +1408,32 @@ static void clear_memalloc(int memalloc)
 		current->flags &= ~PF_MEMALLOC;
 }
 
-static ssize_t read_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t *pos)
+static ssize_t read_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t pos)
 {
-	mm_segment_t old_fs;
 	ssize_t tx;
 	int err, memalloc;
 
-	err = get_pages(ns, file, count, *pos);
+	err = get_pages(ns, file, count, pos);
 	if (err)
 		return err;
-	old_fs = get_fs();
-	set_fs(get_ds());
 	memalloc = set_memalloc();
-	tx = vfs_read(file, (char __user *)buf, count, pos);
+	tx = kernel_read(file, pos, buf, count);
 	clear_memalloc(memalloc);
-	set_fs(old_fs);
 	put_pages(ns);
 	return tx;
 }
 
-static ssize_t write_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t *pos)
+static ssize_t write_file(struct nandsim *ns, struct file *file, void *buf, size_t count, loff_t pos)
 {
-	mm_segment_t old_fs;
 	ssize_t tx;
 	int err, memalloc;
 
-	err = get_pages(ns, file, count, *pos);
+	err = get_pages(ns, file, count, pos);
 	if (err)
 		return err;
-	old_fs = get_fs();
-	set_fs(get_ds());
 	memalloc = set_memalloc();
-	tx = vfs_write(file, (char __user *)buf, count, pos);
+	tx = kernel_write(file, buf, count, pos);
 	clear_memalloc(memalloc);
-	set_fs(old_fs);
 	put_pages(ns);
 	return tx;
 }
@@ -1511,7 +1503,7 @@ static void read_page(struct nandsim *ns, int num)
 			if (do_read_error(ns, num))
 				return;
 			pos = (loff_t)ns->regs.row * ns->geom.pgszoob + ns->regs.column + ns->regs.off;
-			tx = read_file(ns, ns->cfile, ns->buf.byte, num, &pos);
+			tx = read_file(ns, ns->cfile, ns->buf.byte, num, pos);
 			if (tx != num) {
 				NS_ERR("read_page: read error for page %d ret %ld\n", ns->regs.row, (long)tx);
 				return;
@@ -1573,7 +1565,7 @@ static int prog_page(struct nandsim *ns, int num)
 	u_char *pg_off;
 
 	if (ns->cfile) {
-		loff_t off, pos;
+		loff_t off;
 		ssize_t tx;
 		int all;
 
@@ -1585,8 +1577,7 @@ static int prog_page(struct nandsim *ns, int num)
 			memset(ns->file_buf, 0xff, ns->geom.pgszoob);
 		} else {
 			all = 0;
-			pos = off;
-			tx = read_file(ns, ns->cfile, pg_off, num, &pos);
+			tx = read_file(ns, ns->cfile, pg_off, num, off);
 			if (tx != num) {
 				NS_ERR("prog_page: read error for page %d ret %ld\n", ns->regs.row, (long)tx);
 				return -1;
@@ -1595,16 +1586,15 @@ static int prog_page(struct nandsim *ns, int num)
 		for (i = 0; i < num; i++)
 			pg_off[i] &= ns->buf.byte[i];
 		if (all) {
-			pos = (loff_t)ns->regs.row * ns->geom.pgszoob;
-			tx = write_file(ns, ns->cfile, ns->file_buf, ns->geom.pgszoob, &pos);
+			loff_t pos = (loff_t)ns->regs.row * ns->geom.pgszoob;
+			tx = write_file(ns, ns->cfile, ns->file_buf, ns->geom.pgszoob, pos);
 			if (tx != ns->geom.pgszoob) {
 				NS_ERR("prog_page: write error for page %d ret %ld\n", ns->regs.row, (long)tx);
 				return -1;
 			}
 			ns->pages_written[ns->regs.row] = 1;
 		} else {
-			pos = off;
-			tx = write_file(ns, ns->cfile, pg_off, num, &pos);
+			tx = write_file(ns, ns->cfile, pg_off, num, off);
 			if (tx != num) {
 				NS_ERR("prog_page: write error for page %d ret %ld\n", ns->regs.row, (long)tx);
 				return -1;
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index b2a34a192f4f..6a160539cd23 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -40,16 +40,12 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
 			 loff_t offset, size_t size)
 {
 	struct file *lower_file;
-	mm_segment_t fs_save;
 	ssize_t rc;
 
 	lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
 	if (!lower_file)
 		return -EIO;
-	fs_save = get_fs();
-	set_fs(get_ds());
-	rc = vfs_write(lower_file, data, size, &offset);
-	set_fs(fs_save);
+	rc = kernel_write(lower_file, data, size, offset);
 	mark_inode_dirty_sync(ecryptfs_inode);
 	return rc;
 }
diff --git a/fs/splice.c b/fs/splice.c
index 963213d56403..718bd0056384 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -569,7 +569,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
 	return res;
 }
 
-static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
+ssize_t kernel_write(struct file *file, const char *buf, size_t count,
 			    loff_t pos)
 {
 	mm_segment_t old_fs;
@@ -578,11 +578,12 @@ static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
 	old_fs = get_fs();
 	set_fs(get_ds());
 	/* The cast to a user pointer is valid due to the set_fs() */
-	res = vfs_write(file, (const char __user *)buf, count, &pos);
+	res = vfs_write(file, (__force const char __user *)buf, count, &pos);
 	set_fs(old_fs);
 
 	return res;
 }
+EXPORT_SYMBOL(kernel_write);
 
 ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 				 struct pipe_inode_info *pipe, size_t len,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c766afd1e684..d858363a7c17 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2277,6 +2277,7 @@ static inline void i_readcount_inc(struct inode *inode)
 extern int do_pipe_flags(int *, int);
 
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
+extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
 extern struct file * open_exec(const char *);
  
 /* fs/dcache.c -- generic fs support functions */
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 5a6384450501..37f240fec37a 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -971,7 +971,6 @@ out:
 static ssize_t bin_intvec(struct file *file,
 	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
 {
-	mm_segment_t old_fs = get_fs();
 	ssize_t copied = 0;
 	char *buffer;
 	ssize_t result;
@@ -984,13 +983,10 @@ static ssize_t bin_intvec(struct file *file,
 	if (oldval && oldlen) {
 		unsigned __user *vec = oldval;
 		size_t length = oldlen / sizeof(*vec);
-		loff_t pos = 0;
 		char *str, *end;
 		int i;
 
-		set_fs(KERNEL_DS);
-		result = vfs_read(file, buffer, BUFSZ - 1, &pos);
-		set_fs(old_fs);
+		result = kernel_read(file, 0, buffer, BUFSZ - 1);
 		if (result < 0)
 			goto out_kfree;
 
@@ -1017,7 +1013,6 @@ static ssize_t bin_intvec(struct file *file,
 	if (newval && newlen) {
 		unsigned __user *vec = newval;
 		size_t length = newlen / sizeof(*vec);
-		loff_t pos = 0;
 		char *str, *end;
 		int i;
 
@@ -1033,9 +1028,7 @@ static ssize_t bin_intvec(struct file *file,
 			str += snprintf(str, end - str, "%lu\t", value);
 		}
 
-		set_fs(KERNEL_DS);
-		result = vfs_write(file, buffer, str - buffer, &pos);
-		set_fs(old_fs);
+		result = kernel_write(file, buffer, str - buffer, 0);
 		if (result < 0)
 			goto out_kfree;
 	}
@@ -1049,7 +1042,6 @@ out:
 static ssize_t bin_ulongvec(struct file *file,
 	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
 {
-	mm_segment_t old_fs = get_fs();
 	ssize_t copied = 0;
 	char *buffer;
 	ssize_t result;
@@ -1062,13 +1054,10 @@ static ssize_t bin_ulongvec(struct file *file,
 	if (oldval && oldlen) {
 		unsigned long __user *vec = oldval;
 		size_t length = oldlen / sizeof(*vec);
-		loff_t pos = 0;
 		char *str, *end;
 		int i;
 
-		set_fs(KERNEL_DS);
-		result = vfs_read(file, buffer, BUFSZ - 1, &pos);
-		set_fs(old_fs);
+		result = kernel_read(file, 0, buffer, BUFSZ - 1);
 		if (result < 0)
 			goto out_kfree;
 
@@ -1095,7 +1084,6 @@ static ssize_t bin_ulongvec(struct file *file,
 	if (newval && newlen) {
 		unsigned long __user *vec = newval;
 		size_t length = newlen / sizeof(*vec);
-		loff_t pos = 0;
 		char *str, *end;
 		int i;
 
@@ -1111,9 +1099,7 @@ static ssize_t bin_ulongvec(struct file *file,
 			str += snprintf(str, end - str, "%lu\t", value);
 		}
 
-		set_fs(KERNEL_DS);
-		result = vfs_write(file, buffer, str - buffer, &pos);
-		set_fs(old_fs);
+		result = kernel_write(file, buffer, str - buffer, 0);
 		if (result < 0)
 			goto out_kfree;
 	}
@@ -1127,19 +1113,15 @@ out:
 static ssize_t bin_uuid(struct file *file,
 	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
 {
-	mm_segment_t old_fs = get_fs();
 	ssize_t result, copied = 0;
 
 	/* Only supports reads */
 	if (oldval && oldlen) {
-		loff_t pos = 0;
 		char buf[40], *str = buf;
 		unsigned char uuid[16];
 		int i;
 
-		set_fs(KERNEL_DS);
-		result = vfs_read(file, buf, sizeof(buf) - 1, &pos);
-		set_fs(old_fs);
+		result = kernel_read(file, 0, buf, sizeof(buf) - 1);
 		if (result < 0)
 			goto out;
 
@@ -1175,18 +1157,14 @@ out:
 static ssize_t bin_dn_node_address(struct file *file,
 	void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
 {
-	mm_segment_t old_fs = get_fs();
 	ssize_t result, copied = 0;
 
 	if (oldval && oldlen) {
-		loff_t pos = 0;
 		char buf[15], *nodep;
 		unsigned long area, node;
 		__le16 dnaddr;
 
-		set_fs(KERNEL_DS);
-		result = vfs_read(file, buf, sizeof(buf) - 1, &pos);
-		set_fs(old_fs);
+		result = kernel_read(file, 0, buf, sizeof(buf) - 1);
 		if (result < 0)
 			goto out;
 
@@ -1215,7 +1193,6 @@ static ssize_t bin_dn_node_address(struct file *file,
 	}
 
 	if (newval && newlen) {
-		loff_t pos = 0;
 		__le16 dnaddr;
 		char buf[15];
 		int len;
@@ -1232,9 +1209,7 @@ static ssize_t bin_dn_node_address(struct file *file,
 				le16_to_cpu(dnaddr) >> 10,
 				le16_to_cpu(dnaddr) & 0x3ff);
 
-		set_fs(KERNEL_DS);
-		result = vfs_write(file, buf, len, &pos);
-		set_fs(old_fs);
+		result = kernel_write(file, buf, len, 0);
 		if (result < 0)
 			goto out;
 	}
-- 
cgit v1.2.3


From 9cc64ceaa8b8e8c874519caee79e18cb35d3ce3e Mon Sep 17 00:00:00 2001
From: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Date: Wed, 20 Feb 2013 13:16:01 +1100
Subject: fs/exec.c: make bprm_mm_init() static

There is only one user of bprm_mm_init, and it's inside the same file.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c               | 2 +-
 include/linux/binfmts.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 7b6f4d59b26c..864c50df660a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -355,7 +355,7 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
  * flags, permissions, and offset, so we use temporary values.  We'll update
  * them later in setup_arg_pages().
  */
-int bprm_mm_init(struct linux_binprm *bprm)
+static int bprm_mm_init(struct linux_binprm *bprm)
 {
 	int err;
 	struct mm_struct *mm = NULL;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 0530b9860359..c3a09149f793 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -111,7 +111,6 @@ extern int suid_dumpable;
 extern int setup_arg_pages(struct linux_binprm * bprm,
 			   unsigned long stack_top,
 			   int executable_stack);
-extern int bprm_mm_init(struct linux_binprm *bprm);
 extern int bprm_change_interp(char *interp, struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc, const char *const *argv,
 			       struct linux_binprm *bprm);
-- 
cgit v1.2.3


From 12979354a1d6ef25d86f381e4d5f9e103f29913a Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Tue, 8 Jan 2013 09:15:10 -0800
Subject: libceph: rename ceph_pg -> ceph_pg_v1

Rename the old version this type to distinguish it from the new version.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 fs/ceph/ioctl.c                 |  2 +-
 include/linux/ceph/osd_client.h |  2 +-
 include/linux/ceph/osdmap.h     |  7 ++++---
 include/linux/ceph/rados.h      |  4 ++--
 net/ceph/osd_client.c           |  2 +-
 net/ceph/osdmap.c               | 18 +++++++++---------
 6 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 3b22150d3e19..e831436d6e68 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -186,7 +186,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 	u64 len = 1, olen;
 	u64 tmp;
 	struct ceph_object_layout ol;
-	struct ceph_pg pgid;
+	struct ceph_pg_v1 pgid;
 	int r;
 
 	/* copy and validate */
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 388158ff0cbc..be2867330e23 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -56,7 +56,7 @@ struct ceph_osd_request {
 	struct list_head r_linger_item;
 	struct list_head r_linger_osd;
 	struct ceph_osd *r_osd;
-	struct ceph_pg   r_pgid;
+	struct ceph_pg_v1 r_pgid;
 	int              r_pg_osds[CEPH_PG_MAX_SIZE];
 	int              r_num_pg_osds;
 
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index c83a838f89f5..eb4989aa48e8 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -28,7 +28,7 @@ struct ceph_pg_pool_info {
 
 struct ceph_pg_mapping {
 	struct rb_node node;
-	struct ceph_pg pgid;
+	struct ceph_pg_v1 pgid;
 	int len;
 	int osds[];
 };
@@ -118,10 +118,11 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
 				   const char *oid,
 				   struct ceph_file_layout *fl,
 				   struct ceph_osdmap *osdmap);
-extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
+			       struct ceph_pg_v1 pgid,
 			       int *acting);
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
-				struct ceph_pg pgid);
+				struct ceph_pg_v1 pgid);
 
 extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
 extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index b65182aba6f7..e7cece69b13f 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -64,7 +64,7 @@ struct ceph_timespec {
  * placement group.
  * we encode this into one __le64.
  */
-struct ceph_pg {
+struct ceph_pg_v1 {
 	__le16 preferred; /* preferred primary osd */
 	__le16 ps;        /* placement seed */
 	__le32 pool;      /* object pool */
@@ -128,7 +128,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask)
  * object layout - how a given object should be stored.
  */
 struct ceph_object_layout {
-	struct ceph_pg ol_pgid;   /* raw pg, with _full_ ps precision. */
+	struct ceph_pg_v1 ol_pgid;   /* raw pg, with _full_ ps precision. */
 	__le32 ol_stripe_unit;    /* for per-object parity, if any */
 } __attribute__ ((packed));
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 39629b66f3b1..e3ab8d60d080 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -914,7 +914,7 @@ static int __map_request(struct ceph_osd_client *osdc,
 			 struct ceph_osd_request *req, int force_resend)
 {
 	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
-	struct ceph_pg pgid;
+	struct ceph_pg_v1 pgid;
 	int acting[CEPH_PG_MAX_SIZE];
 	int o = -1, num = 0;
 	int err;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 3c61e21611d3..8c89ac25081a 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -350,7 +350,7 @@ bad:
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds)
  */
-static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
+static int pgid_cmp(struct ceph_pg_v1 l, struct ceph_pg_v1 r)
 {
 	u64 a = *(u64 *)&l;
 	u64 b = *(u64 *)&r;
@@ -389,7 +389,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
 }
 
 static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
-						   struct ceph_pg pgid)
+						   struct ceph_pg_v1 pgid)
 {
 	struct rb_node *n = root->rb_node;
 	struct ceph_pg_mapping *pg;
@@ -411,7 +411,7 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
 	return NULL;
 }
 
-static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
+static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg_v1 pgid)
 {
 	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
 
@@ -721,7 +721,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 	ceph_decode_32_safe(p, end, len, bad);
 	for (i = 0; i < len; i++) {
 		int n, j;
-		struct ceph_pg pgid;
+		struct ceph_pg_v1 pgid;
 		struct ceph_pg_mapping *pg;
 
 		ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
@@ -944,7 +944,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	while (len--) {
 		struct ceph_pg_mapping *pg;
 		int j;
-		struct ceph_pg pgid;
+		struct ceph_pg_v1 pgid;
 		u32 pglen;
 		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
 		ceph_decode_copy(p, &pgid, sizeof(pgid));
@@ -1079,7 +1079,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
 			    struct ceph_osdmap *osdmap)
 {
 	unsigned int num, num_mask;
-	struct ceph_pg pgid;
+	struct ceph_pg_v1 pgid;
 	int poolid = le32_to_cpu(fl->fl_pg_pool);
 	struct ceph_pg_pool_info *pool;
 	unsigned int ps;
@@ -1108,7 +1108,7 @@ EXPORT_SYMBOL(ceph_calc_object_layout);
  * Calculate raw osd vector for the given pgid.  Return pointer to osd
  * array, or NULL on failure.
  */
-static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
 			int *osds, int *num)
 {
 	struct ceph_pg_mapping *pg;
@@ -1163,7 +1163,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 /*
  * Return acting set for given pgid.
  */
-int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
 			int *acting)
 {
 	int rawosds[CEPH_PG_MAX_SIZE], *osds;
@@ -1184,7 +1184,7 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 /*
  * Return primary osd for given pgid, or -1 if none.
  */
-int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
+int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid)
 {
 	int rawosds[CEPH_PG_MAX_SIZE], *osds;
 	int i, num = CEPH_PG_MAX_SIZE;
-- 
cgit v1.2.3


From 5b191d9914eb68257f47de9d5bfe099b77f0687c Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Sat, 23 Feb 2013 10:38:16 -0800
Subject: libceph: decode into cpu-native ceph_pg type

Always decode data into our cpu-native ceph_pg type that has the correct
field widths.  Limit any remaining uses of ceph_pg_v1 to dealing with the
legacy protocol.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 fs/ceph/ioctl.c                 |  5 +--
 include/linux/ceph/osd_client.h |  2 +-
 include/linux/ceph/osdmap.h     | 11 ++++--
 net/ceph/debugfs.c              |  5 ++-
 net/ceph/osd_client.c           |  9 ++---
 net/ceph/osdmap.c               | 78 +++++++++++++++++++++++------------------
 6 files changed, 62 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index e831436d6e68..fb036ed3e129 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -186,7 +186,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 	u64 len = 1, olen;
 	u64 tmp;
 	struct ceph_object_layout ol;
-	struct ceph_pg_v1 pgid;
+	struct ceph_pg pgid;
 	int r;
 
 	/* copy and validate */
@@ -212,7 +212,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 	ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
 				osdc->osdmap);
 
-	pgid = ol.ol_pgid;
+	pgid.pool = le32_to_cpu(ol.ol_pgid.pool);
+	pgid.seed = le16_to_cpu(ol.ol_pgid.ps);
 	dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
 	if (dl.osd >= 0) {
 		struct ceph_entity_addr *a =
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index be2867330e23..388158ff0cbc 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -56,7 +56,7 @@ struct ceph_osd_request {
 	struct list_head r_linger_item;
 	struct list_head r_linger_osd;
 	struct ceph_osd *r_osd;
-	struct ceph_pg_v1 r_pgid;
+	struct ceph_pg   r_pgid;
 	int              r_pg_osds[CEPH_PG_MAX_SIZE];
 	int              r_num_pg_osds;
 
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index eb4989aa48e8..8a612df4c248 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -18,6 +18,11 @@
  * The map can be updated either via an incremental map (diff) describing
  * the change between two successive epochs, or as a fully encoded map.
  */
+struct ceph_pg {
+	uint64_t pool;
+	uint32_t seed;
+};
+
 struct ceph_pg_pool_info {
 	struct rb_node node;
 	int id;
@@ -28,7 +33,7 @@ struct ceph_pg_pool_info {
 
 struct ceph_pg_mapping {
 	struct rb_node node;
-	struct ceph_pg_v1 pgid;
+	struct ceph_pg pgid;
 	int len;
 	int osds[];
 };
@@ -119,10 +124,10 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
 				   struct ceph_file_layout *fl,
 				   struct ceph_osdmap *osdmap);
 extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
-			       struct ceph_pg_v1 pgid,
+			       struct ceph_pg pgid,
 			       int *acting);
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
-				struct ceph_pg_v1 pgid);
+				struct ceph_pg pgid);
 
 extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
 extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 38b5dc1823d4..61a9af634f8b 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -131,10 +131,9 @@ static int osdc_show(struct seq_file *s, void *pp)
 
 		req = rb_entry(p, struct ceph_osd_request, r_node);
 
-		seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
+		seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid,
 			   req->r_osd ? req->r_osd->o_osd : -1,
-			   le32_to_cpu(req->r_pgid.pool),
-			   le16_to_cpu(req->r_pgid.ps));
+			   req->r_pgid.pool, req->r_pgid.seed);
 
 		head = req->r_request->front.iov_base;
 		op = (void *)(head + 1);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e3ab8d60d080..1990834e518b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -914,7 +914,7 @@ static int __map_request(struct ceph_osd_client *osdc,
 			 struct ceph_osd_request *req, int force_resend)
 {
 	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
-	struct ceph_pg_v1 pgid;
+	struct ceph_pg pgid;
 	int acting[CEPH_PG_MAX_SIZE];
 	int o = -1, num = 0;
 	int err;
@@ -926,7 +926,8 @@ static int __map_request(struct ceph_osd_client *osdc,
 		list_move(&req->r_req_lru_item, &osdc->req_notarget);
 		return err;
 	}
-	pgid = reqhead->layout.ol_pgid;
+	pgid.pool = le32_to_cpu(reqhead->layout.ol_pgid.pool);
+	pgid.seed = le16_to_cpu(reqhead->layout.ol_pgid.ps);
 	req->r_pgid = pgid;
 
 	err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
@@ -943,8 +944,8 @@ static int __map_request(struct ceph_osd_client *osdc,
 	    (req->r_osd == NULL && o == -1))
 		return 0;  /* no change */
 
-	dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
-	     req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
+	dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
+	     req->r_tid, pgid.pool, pgid.seed, o,
 	     req->r_osd ? req->r_osd->o_osd : -1);
 
 	/* record full pg acting set */
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 8c89ac25081a..81118db5bd11 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -350,14 +350,15 @@ bad:
  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
  * to a set of osds)
  */
-static int pgid_cmp(struct ceph_pg_v1 l, struct ceph_pg_v1 r)
+static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
 {
-	u64 a = *(u64 *)&l;
-	u64 b = *(u64 *)&r;
-
-	if (a < b)
+	if (l.pool < r.pool)
+		return -1;
+	if (l.pool > r.pool)
+		return 1;
+	if (l.seed < r.seed)
 		return -1;
-	if (a > b)
+	if (l.seed > r.seed)
 		return 1;
 	return 0;
 }
@@ -389,7 +390,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
 }
 
 static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
-						   struct ceph_pg_v1 pgid)
+						   struct ceph_pg pgid)
 {
 	struct rb_node *n = root->rb_node;
 	struct ceph_pg_mapping *pg;
@@ -403,25 +404,26 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
 		} else if (c > 0) {
 			n = n->rb_right;
 		} else {
-			dout("__lookup_pg_mapping %llx got %p\n",
-			     *(u64 *)&pgid, pg);
+			dout("__lookup_pg_mapping %lld.%x got %p\n",
+			     pgid.pool, pgid.seed, pg);
 			return pg;
 		}
 	}
 	return NULL;
 }
 
-static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg_v1 pgid)
+static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
 {
 	struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
 
 	if (pg) {
-		dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg);
+		dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
+		     pg);
 		rb_erase(&pg->node, root);
 		kfree(pg);
 		return 0;
 	}
-	dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid);
+	dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
 	return -ENOENT;
 }
 
@@ -721,11 +723,14 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 	ceph_decode_32_safe(p, end, len, bad);
 	for (i = 0; i < len; i++) {
 		int n, j;
-		struct ceph_pg_v1 pgid;
+		struct ceph_pg pgid;
+		struct ceph_pg_v1 pgid_v1;
 		struct ceph_pg_mapping *pg;
 
 		ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
-		ceph_decode_copy(p, &pgid, sizeof(pgid));
+		ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1));
+		pgid.pool = le32_to_cpu(pgid_v1.pool);
+		pgid.seed = le16_to_cpu(pgid_v1.ps);
 		n = ceph_decode_32(p);
 		err = -EINVAL;
 		if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
@@ -743,7 +748,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 		err = __insert_pg_mapping(pg, &map->pg_temp);
 		if (err)
 			goto bad;
-		dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len);
+		dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed,
+		     len);
 	}
 
 	/* crush */
@@ -944,10 +950,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	while (len--) {
 		struct ceph_pg_mapping *pg;
 		int j;
-		struct ceph_pg_v1 pgid;
+		struct ceph_pg_v1 pgid_v1;
+		struct ceph_pg pgid;
 		u32 pglen;
 		ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
-		ceph_decode_copy(p, &pgid, sizeof(pgid));
+		ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1));
+		pgid.pool = le32_to_cpu(pgid_v1.pool);
+		pgid.seed = le16_to_cpu(pgid_v1.ps);
 		pglen = ceph_decode_32(p);
 
 		if (pglen) {
@@ -973,8 +982,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 				kfree(pg);
 				goto bad;
 			}
-			dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
-			     pglen);
+			dout(" added pg_temp %lld.%x len %d\n", pgid.pool,
+			     pgid.seed, pglen);
 		} else {
 			/* remove */
 			__remove_pg_mapping(&map->pg_temp, pgid);
@@ -1079,26 +1088,25 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
 			    struct ceph_osdmap *osdmap)
 {
 	unsigned int num, num_mask;
-	struct ceph_pg_v1 pgid;
-	int poolid = le32_to_cpu(fl->fl_pg_pool);
+	struct ceph_pg pgid;
 	struct ceph_pg_pool_info *pool;
-	unsigned int ps;
 
 	BUG_ON(!osdmap);
 
-	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+	pgid.pool = le32_to_cpu(fl->fl_pg_pool);
+	pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
 	if (!pool)
 		return -EIO;
-	ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
+	pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
 	num = le32_to_cpu(pool->v.pg_num);
 	num_mask = pool->pg_num_mask;
 
-	pgid.ps = cpu_to_le16(ps);
-	pgid.preferred = cpu_to_le16(-1);
-	pgid.pool = fl->fl_pg_pool;
-	dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
+	dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool,
+	     pgid.seed);
 
-	ol->ol_pgid = pgid;
+	ol->ol_pgid.ps = cpu_to_le16(pgid.seed);
+	ol->ol_pgid.pool = fl->fl_pg_pool;
+	ol->ol_pgid.preferred = cpu_to_le16(-1);
 	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
 	return 0;
 }
@@ -1108,7 +1116,7 @@ EXPORT_SYMBOL(ceph_calc_object_layout);
  * Calculate raw osd vector for the given pgid.  Return pointer to osd
  * array, or NULL on failure.
  */
-static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
+static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 			int *osds, int *num)
 {
 	struct ceph_pg_mapping *pg;
@@ -1116,8 +1124,8 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
 	int ruleno;
 	unsigned int poolid, ps, pps, t, r;
 
-	poolid = le32_to_cpu(pgid.pool);
-	ps = le16_to_cpu(pgid.ps);
+	poolid = pgid.pool;
+	ps = pgid.seed;
 
 	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
 	if (!pool)
@@ -1126,7 +1134,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
 	/* pg_temp? */
 	t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
 			    pool->pgp_num_mask);
-	pgid.ps = cpu_to_le16(t);
+	pgid.seed = t;
 	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
 		*num = pg->len;
@@ -1163,7 +1171,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
 /*
  * Return acting set for given pgid.
  */
-int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 			int *acting)
 {
 	int rawosds[CEPH_PG_MAX_SIZE], *osds;
@@ -1184,7 +1192,7 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid,
 /*
  * Return primary osd for given pgid, or -1 if none.
  */
-int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg_v1 pgid)
+int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
 {
 	int rawosds[CEPH_PG_MAX_SIZE], *osds;
 	int i, num = CEPH_PG_MAX_SIZE;
-- 
cgit v1.2.3


From ec73a754989c27628c9037887df919561280519c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 26 Feb 2013 14:23:07 -0600
Subject: ceph: update "ceph_features.h"

This updates "include/linux/ceph/ceph_features.h" so all the feature
bits defined in the user space code are defined here.

The features supported by this implementation will still differ so
that's not updated here.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 include/linux/ceph/ceph_features.h | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 2160aab482f6..9e0f5a8ba247 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -12,12 +12,28 @@
 #define CEPH_FEATURE_MONNAMES       (1<<5)
 #define CEPH_FEATURE_RECONNECT_SEQ  (1<<6)
 #define CEPH_FEATURE_DIRLAYOUTHASH  (1<<7)
-/* bits 8-17 defined by user-space; not supported yet here */
+#define CEPH_FEATURE_OBJECTLOCATOR  (1<<8)
+#define CEPH_FEATURE_PGID64         (1<<9)
+#define CEPH_FEATURE_INCSUBOSDMAP   (1<<10)
+#define CEPH_FEATURE_PGPOOL3        (1<<11)
+#define CEPH_FEATURE_OSDREPLYMUX    (1<<12)
+#define CEPH_FEATURE_OSDENC         (1<<13)
+#define CEPH_FEATURE_OMAP           (1<<14)
+#define CEPH_FEATURE_MONENC         (1<<15)
+#define CEPH_FEATURE_QUERY_T        (1<<16)
+#define CEPH_FEATURE_INDEP_PG_MAP   (1<<17)
 #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
-/* bits 19-24 defined by user-space; not supported yet here */
+#define CEPH_FEATURE_CHUNKY_SCRUB   (1<<19)
+#define CEPH_FEATURE_MON_NULLROUTE  (1<<20)
+#define CEPH_FEATURE_MON_GV         (1<<21)
+#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22)
+#define CEPH_FEATURE_MSG_AUTH	    (1<<23)
+#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24)
 #define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
-/* bit 26 defined by user-space; not supported yet here */
-#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
+#define CEPH_FEATURE_CREATEPOOLID   (1<<26)
+#define CEPH_FEATURE_REPLY_CREATE_INODE   (1<<27)
+#define CEPH_FEATURE_OSD_HBMSGS     (1<<28)
+#define CEPH_FEATURE_MDSENC         (1<<29)
 
 /*
  * Features supported.
-- 
cgit v1.2.3


From 4f6a7e5ee1393ec4b243b39dac9f36992d161540 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Sat, 23 Feb 2013 10:41:09 -0800
Subject: ceph: update support for PGID64, PGPOOL3, OSDENC protocol features

Support (and require) the PGID64, PGPOOL3, and OSDENC protocol features.
These have been present in ceph.git since v0.42, Feb 2012.  Require these
features to simplify support; nobody is running older userspace.

Note that the new request and reply encoding is still not in place, so the new
code is not yet functional.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 fs/ceph/mdsmap.c                   |  12 ++-
 include/linux/ceph/ceph_features.h |  14 +++-
 include/linux/ceph/mdsmap.h        |   4 +-
 include/linux/ceph/osdmap.h        |  16 +++-
 include/linux/ceph/rados.h         |  23 ------
 net/ceph/ceph_common.c             |   6 +-
 net/ceph/debugfs.c                 |   6 +-
 net/ceph/osdmap.c                  | 162 ++++++++++++++++++++-----------------
 8 files changed, 124 insertions(+), 119 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 73b7d44e8a35..0d3c9240c61b 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		return ERR_PTR(-ENOMEM);
 
 	ceph_decode_16_safe(p, end, version, bad);
+	if (version > 3) {
+		pr_warning("got mdsmap version %d > 3, failing", version);
+		goto bad;
+	}
 
 	ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
 	m->m_epoch = ceph_decode_32(p);
@@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	/* pg_pools */
 	ceph_decode_32_safe(p, end, n, bad);
 	m->m_num_data_pg_pools = n;
-	m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS);
+	m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
 	if (!m->m_data_pg_pools)
 		goto badmem;
-	ceph_decode_need(p, end, sizeof(u32)*(n+1), bad);
+	ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
 	for (i = 0; i < n; i++)
-		m->m_data_pg_pools[i] = ceph_decode_32(p);
-	m->m_cas_pg_pool = ceph_decode_32(p);
+		m->m_data_pg_pools[i] = ceph_decode_64(p);
+	m->m_cas_pg_pool = ceph_decode_64(p);
 
 	/* ok, we don't care about the rest. */
 	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 9e0f5a8ba247..ab0a54286e0d 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -39,11 +39,17 @@
  * Features supported.
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  \
-	(CEPH_FEATURE_NOSRCADDR |	 \
-	 CEPH_FEATURE_CRUSH_TUNABLES |	  \
-	 CEPH_FEATURE_CRUSH_TUNABLES2 |   \
+	(CEPH_FEATURE_NOSRCADDR |		\
+	 CEPH_FEATURE_PGID64 |			\
+	 CEPH_FEATURE_PGPOOL3 |			\
+	 CEPH_FEATURE_OSDENC |			\
+	 CEPH_FEATURE_CRUSH_TUNABLES |		\
+	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
-	(CEPH_FEATURE_NOSRCADDR)
+	(CEPH_FEATURE_NOSRCADDR |	 \
+	 CEPH_FEATURE_PGID64 |		 \
+	 CEPH_FEATURE_PGPOOL3 |		 \
+	 CEPH_FEATURE_OSDENC)
 #endif
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index cb15b5d867c7..87ed09f54800 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -29,8 +29,8 @@ struct ceph_mdsmap {
 
 	/* which object pools file data can be stored in */
 	int m_num_data_pg_pools;
-	u32 *m_data_pg_pools;
-	u32 m_cas_pg_pool;
+	u64 *m_data_pg_pools;
+	u64 m_cas_pg_pool;
 };
 
 static inline struct ceph_entity_addr *
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 8a612df4c248..8587746b7f0e 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -25,12 +25,22 @@ struct ceph_pg {
 
 struct ceph_pg_pool_info {
 	struct rb_node node;
-	int id;
-	struct ceph_pg_pool v;
-	int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
+	s64 id;
+	u8 type;
+	u8 size;
+	u8 crush_ruleset;
+	u8 object_hash;
+	u32 pg_num, pgp_num;
+	int pg_num_mask, pgp_num_mask;
+	u64 flags;
 	char *name;
 };
 
+struct ceph_object_locator {
+	uint64_t pool;
+	char *key;
+};
+
 struct ceph_pg_mapping {
 	struct rb_node node;
 	struct ceph_pg pgid;
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index e7cece69b13f..d784c8dfb09a 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -8,14 +8,6 @@
 
 #include <linux/ceph/msgr.h>
 
-/*
- * osdmap encoding versions
- */
-#define CEPH_OSDMAP_INC_VERSION     5
-#define CEPH_OSDMAP_INC_VERSION_EXT 6
-#define CEPH_OSDMAP_VERSION         5
-#define CEPH_OSDMAP_VERSION_EXT     6
-
 /*
  * fs id
  */
@@ -91,21 +83,6 @@ struct ceph_pg_v1 {
 
 #define CEPH_PG_TYPE_REP     1
 #define CEPH_PG_TYPE_RAID4   2
-#define CEPH_PG_POOL_VERSION 2
-struct ceph_pg_pool {
-	__u8 type;                /* CEPH_PG_TYPE_* */
-	__u8 size;                /* number of osds in each pg */
-	__u8 crush_ruleset;       /* crush placement rule */
-	__u8 object_hash;         /* hash mapping object name to ps */
-	__le32 pg_num, pgp_num;   /* number of pg's */
-	__le32 lpg_num, lpgp_num; /* number of localized pg's */
-	__le32 last_change;       /* most recent epoch changed */
-	__le64 snap_seq;          /* seq for per-pool snapshot */
-	__le32 snap_epoch;        /* epoch of last snap */
-	__le32 num_snaps;
-	__le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
-	__le64 auid;               /* who owns the pg */
-} __attribute__ ((packed));
 
 /*
  * stable_mod func is used to control number of placement groups.
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index c236c235c4a2..c5605ae96714 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -601,10 +601,8 @@ static int __init init_ceph_lib(void)
 	if (ret < 0)
 		goto out_crypto;
 
-	pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
-		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
-		CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
-		CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
+	pr_info("loaded (mon/osd proto %d/%d)\n",
+		CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
 
 	return 0;
 
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 61a9af634f8b..f4d4b27d6026 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -66,9 +66,9 @@ static int osdmap_show(struct seq_file *s, void *p)
 	for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
 		struct ceph_pg_pool_info *pool =
 			rb_entry(n, struct ceph_pg_pool_info, node);
-		seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
-			   pool->id, pool->v.pg_num, pool->pg_num_mask,
-			   pool->v.lpg_num, pool->lpg_num_mask);
+		seq_printf(s, "pg_pool %llu pg_num %d / %d\n",
+			   (unsigned long long)pool->id, pool->pg_num,
+			   pool->pg_num_mask);
 	}
 	for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
 		struct ceph_entity_addr *addr =
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 81118db5bd11..911919320d2e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -45,13 +45,8 @@ static int calc_bits_of(unsigned int t)
  */
 static void calc_pg_masks(struct ceph_pg_pool_info *pi)
 {
-	pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1;
-	pi->pgp_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
-	pi->lpg_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
-	pi->lpgp_num_mask =
-		(1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
+	pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
+	pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
 }
 
 /*
@@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
 	return 0;
 }
 
-static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
+static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
 {
 	struct ceph_pg_pool_info *pi;
 	struct rb_node *n = root->rb_node;
@@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
 
 static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
 {
-	unsigned int n, m;
+	u8 ev, cv;
+	unsigned len, num;
+	void *pool_end;
+
+	ceph_decode_need(p, end, 2 + 4, bad);
+	ev = ceph_decode_8(p);  /* encoding version */
+	cv = ceph_decode_8(p); /* compat version */
+	if (ev < 5) {
+		pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
+		return -EINVAL;
+	}
+	if (cv > 7) {
+		pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv);
+		return -EINVAL;
+	}
+	len = ceph_decode_32(p);
+	ceph_decode_need(p, end, len, bad);
+	pool_end = *p + len;
 
-	ceph_decode_copy(p, &pi->v, sizeof(pi->v));
-	calc_pg_masks(pi);
+	pi->type = ceph_decode_8(p);
+	pi->size = ceph_decode_8(p);
+	pi->crush_ruleset = ceph_decode_8(p);
+	pi->object_hash = ceph_decode_8(p);
 
-	/* num_snaps * snap_info_t */
-	n = le32_to_cpu(pi->v.num_snaps);
-	while (n--) {
-		ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) +
-				 sizeof(struct ceph_timespec), bad);
-		*p += sizeof(u64) +       /* key */
-			1 + sizeof(u64) + /* u8, snapid */
-			sizeof(struct ceph_timespec);
-		m = ceph_decode_32(p);    /* snap name */
-		*p += m;
+	pi->pg_num = ceph_decode_32(p);
+	pi->pgp_num = ceph_decode_32(p);
+
+	*p += 4 + 4;  /* skip lpg* */
+	*p += 4;      /* skip last_change */
+	*p += 8 + 4;  /* skip snap_seq, snap_epoch */
+
+	/* skip snaps */
+	num = ceph_decode_32(p);
+	while (num--) {
+		*p += 8;  /* snapid key */
+		*p += 1 + 1; /* versions */
+		len = ceph_decode_32(p);
+		*p += len;
 	}
 
-	*p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
+	/* skip removed snaps */
+	num = ceph_decode_32(p);
+	*p += num * (8 + 8);
+
+	*p += 8;  /* skip auid */
+	pi->flags = ceph_decode_64(p);
+
+	/* ignore the rest */
+
+	*p = pool_end;
+	calc_pg_masks(pi);
 	return 0;
 
 bad:
@@ -535,14 +563,15 @@ bad:
 static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
 {
 	struct ceph_pg_pool_info *pi;
-	u32 num, len, pool;
+	u32 num, len;
+	u64 pool;
 
 	ceph_decode_32_safe(p, end, num, bad);
 	dout(" %d pool names\n", num);
 	while (num--) {
-		ceph_decode_32_safe(p, end, pool, bad);
+		ceph_decode_64_safe(p, end, pool, bad);
 		ceph_decode_32_safe(p, end, len, bad);
-		dout("  pool %d len %d\n", pool, len);
+		dout("  pool %llu len %d\n", pool, len);
 		ceph_decode_need(p, end, len, bad);
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
 		if (pi) {
@@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 	struct ceph_osdmap *map;
 	u16 version;
 	u32 len, max, i;
-	u8 ev;
 	int err = -EINVAL;
 	void *start = *p;
 	struct ceph_pg_pool_info *pi;
@@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 	map->pg_temp = RB_ROOT;
 
 	ceph_decode_16_safe(p, end, version, bad);
-	if (version > CEPH_OSDMAP_VERSION) {
-		pr_warning("got unknown v %d > %d of osdmap\n", version,
-			   CEPH_OSDMAP_VERSION);
+	if (version > 6) {
+		pr_warning("got unknown v %d > 6 of osdmap\n", version);
+		goto bad;
+	}
+	if (version < 6) {
+		pr_warning("got old v %d < 6 of osdmap\n", version);
 		goto bad;
 	}
 
@@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 
 	ceph_decode_32_safe(p, end, max, bad);
 	while (max--) {
-		ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
+		ceph_decode_need(p, end, 8 + 2, bad);
 		err = -ENOMEM;
 		pi = kzalloc(sizeof(*pi), GFP_NOFS);
 		if (!pi)
 			goto bad;
-		pi->id = ceph_decode_32(p);
-		err = -EINVAL;
-		ev = ceph_decode_8(p); /* encoding version */
-		if (ev > CEPH_PG_POOL_VERSION) {
-			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
-				   ev, CEPH_PG_POOL_VERSION);
-			kfree(pi);
-			goto bad;
-		}
+		pi->id = ceph_decode_64(p);
 		err = __decode_pool(p, end, pi);
 		if (err < 0) {
 			kfree(pi);
@@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 		__insert_pg_pool(&map->pg_pools, pi);
 	}
 
-	if (version >= 5) {
-		err = __decode_pool_names(p, end, map);
-		if (err < 0) {
-			dout("fail to decode pool names");
-			goto bad;
-		}
+	err = __decode_pool_names(p, end, map);
+	if (err < 0) {
+		dout("fail to decode pool names");
+		goto bad;
 	}
 
 	ceph_decode_32_safe(p, end, map->pool_max, bad);
@@ -788,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	struct ceph_fsid fsid;
 	u32 epoch = 0;
 	struct ceph_timespec modified;
-	u32 len, pool;
-	__s32 new_pool_max, new_flags, max;
+	s32 len;
+	u64 pool;
+	__s64 new_pool_max;
+	__s32 new_flags, max;
 	void *start = *p;
 	int err = -EINVAL;
 	u16 version;
 
 	ceph_decode_16_safe(p, end, version, bad);
-	if (version > CEPH_OSDMAP_INC_VERSION) {
-		pr_warning("got unknown v %d > %d of inc osdmap\n", version,
-			   CEPH_OSDMAP_INC_VERSION);
+	if (version > 6) {
+		pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6);
 		goto bad;
 	}
 
@@ -807,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	epoch = ceph_decode_32(p);
 	BUG_ON(epoch != map->epoch+1);
 	ceph_decode_copy(p, &modified, sizeof(modified));
-	new_pool_max = ceph_decode_32(p);
+	new_pool_max = ceph_decode_64(p);
 	new_flags = ceph_decode_32(p);
 
 	/* full map? */
@@ -857,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	/* new_pool */
 	ceph_decode_32_safe(p, end, len, bad);
 	while (len--) {
-		__u8 ev;
 		struct ceph_pg_pool_info *pi;
 
-		ceph_decode_32_safe(p, end, pool, bad);
-		ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
-		ev = ceph_decode_8(p);  /* encoding version */
-		if (ev > CEPH_PG_POOL_VERSION) {
-			pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
-				   ev, CEPH_PG_POOL_VERSION);
-			err = -EINVAL;
-			goto bad;
-		}
+		ceph_decode_64_safe(p, end, pool, bad);
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
 		if (!pi) {
 			pi = kzalloc(sizeof(*pi), GFP_NOFS);
@@ -894,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	while (len--) {
 		struct ceph_pg_pool_info *pi;
 
-		ceph_decode_32_safe(p, end, pool, bad);
+		ceph_decode_64_safe(p, end, pool, bad);
 		pi = __lookup_pg_pool(&map->pg_pools, pool);
 		if (pi)
 			__remove_pg_pool(&map->pg_pools, pi);
@@ -1097,8 +1110,8 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
 	pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
 	if (!pool)
 		return -EIO;
-	pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
-	num = le32_to_cpu(pool->v.pg_num);
+	pgid.seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
+	num = pool->pg_num;
 	num_mask = pool->pg_num_mask;
 
 	dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool,
@@ -1132,8 +1145,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 		return NULL;
 
 	/* pg_temp? */
-	t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
-			    pool->pgp_num_mask);
+	t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask);
 	pgid.seed = t;
 	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
@@ -1142,26 +1154,24 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	}
 
 	/* crush */
-	ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
-				 pool->v.type, pool->v.size);
+	ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
+				 pool->type, pool->size);
 	if (ruleno < 0) {
 		pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
-		       poolid, pool->v.crush_ruleset, pool->v.type,
-		       pool->v.size);
+		       poolid, pool->crush_ruleset, pool->type,
+		       pool->size);
 		return NULL;
 	}
 
-	pps = ceph_stable_mod(ps,
-			      le32_to_cpu(pool->v.pgp_num),
-			      pool->pgp_num_mask);
+	pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask);
 	pps += poolid;
 	r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
-			  min_t(int, pool->v.size, *num),
+			  min_t(int, pool->size, *num),
 			  osdmap->osd_weight);
 	if (r < 0) {
 		pr_err("error %d from crush rule: pool %d ruleset %d type %d"
-		       " size %d\n", r, poolid, pool->v.crush_ruleset,
-		       pool->v.type, pool->v.size);
+		       " size %d\n", r, poolid, pool->crush_ruleset,
+		       pool->type, pool->size);
 		return NULL;
 	}
 	*num = r;
-- 
cgit v1.2.3


From 2169aea649c08374bec7d220a3b8f64712275356 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Mon, 25 Feb 2013 16:13:08 -0800
Subject: libceph: calculate placement based on the internal data types

Instead of using the old ceph_object_layout struct, update our internal
ceph_calc_object_layout method to use the ceph_pg type.  This allows us to
pass the full 32-bit precision of the pgid.seed to the callers.  It also
allows some callers to avoid reaching into the request structures for the
struct ceph_object_layout fields.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 fs/ceph/ioctl.c                 |  5 +----
 include/linux/ceph/osd_client.h |  1 +
 include/linux/ceph/osdmap.h     |  2 +-
 net/ceph/osd_client.c           | 11 +++++++----
 net/ceph/osdmap.c               | 18 +++++-------------
 5 files changed, 15 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index fb036ed3e129..7d85991fd647 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -185,7 +185,6 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
 	u64 len = 1, olen;
 	u64 tmp;
-	struct ceph_object_layout ol;
 	struct ceph_pg pgid;
 	int r;
 
@@ -209,11 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 
 	snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
 		 ceph_ino(inode), dl.object_no);
-	ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
+	ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout,
 				osdc->osdmap);
 
-	pgid.pool = le32_to_cpu(ol.ol_pgid.pool);
-	pgid.seed = le16_to_cpu(ol.ol_pgid.ps);
 	dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
 	if (dl.osd >= 0) {
 		struct ceph_entity_addr *a =
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 388158ff0cbc..ad8899fc3157 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -82,6 +82,7 @@ struct ceph_osd_request {
 
 	char              r_oid[MAX_OBJ_NAME_SIZE];          /* object name */
 	int               r_oid_len;
+	u64               r_snapid;
 	unsigned long     r_stamp;            /* send OR check time */
 
 	struct ceph_file_layout r_file_layout;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 8587746b7f0e..35985125f118 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -129,7 +129,7 @@ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 					 u64 *bno, u64 *oxoff, u64 *oxlen);
 
 /* calculate mapping of object to a placement group */
-extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
+extern int ceph_calc_object_layout(struct ceph_pg *pg,
 				   const char *oid,
 				   struct ceph_file_layout *fl,
 				   struct ceph_osdmap *osdmap);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1990834e518b..5584f0a08e28 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -913,21 +913,18 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
 static int __map_request(struct ceph_osd_client *osdc,
 			 struct ceph_osd_request *req, int force_resend)
 {
-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
 	struct ceph_pg pgid;
 	int acting[CEPH_PG_MAX_SIZE];
 	int o = -1, num = 0;
 	int err;
 
 	dout("map_request %p tid %lld\n", req, req->r_tid);
-	err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
+	err = ceph_calc_object_layout(&pgid, req->r_oid,
 				      &req->r_file_layout, osdc->osdmap);
 	if (err) {
 		list_move(&req->r_req_lru_item, &osdc->req_notarget);
 		return err;
 	}
-	pgid.pool = le32_to_cpu(reqhead->layout.ol_pgid.pool);
-	pgid.seed = le16_to_cpu(reqhead->layout.ol_pgid.ps);
 	req->r_pgid = pgid;
 
 	err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
@@ -1000,10 +997,16 @@ static void __send_request(struct ceph_osd_client *osdc,
 	     req, req->r_tid, req->r_osd->o_osd, req->r_flags);
 
 	reqhead = req->r_request->front.iov_base;
+	reqhead->snapid = cpu_to_le64(req->r_snapid);
 	reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch);
 	reqhead->flags |= cpu_to_le32(req->r_flags);  /* e.g., RETRY */
 	reqhead->reassert_version = req->r_reassert_version;
 
+	reqhead->layout.ol_pgid.ps = cpu_to_le16(req->r_pgid.seed);
+	reqhead->layout.ol_pgid.pool = cpu_to_le32(req->r_pgid.pool);
+	reqhead->layout.ol_pgid.preferred = cpu_to_le16(-1);
+	reqhead->layout.ol_stripe_unit = 0;
+
 	req->r_stamp = jiffies;
 	list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
 
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 911919320d2e..378471644501 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1095,32 +1095,24 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
  * calculate an object layout (i.e. pgid) from an oid,
  * file_layout, and osdmap
  */
-int ceph_calc_object_layout(struct ceph_object_layout *ol,
+int ceph_calc_object_layout(struct ceph_pg *pg,
 			    const char *oid,
 			    struct ceph_file_layout *fl,
 			    struct ceph_osdmap *osdmap)
 {
 	unsigned int num, num_mask;
-	struct ceph_pg pgid;
 	struct ceph_pg_pool_info *pool;
 
 	BUG_ON(!osdmap);
-
-	pgid.pool = le32_to_cpu(fl->fl_pg_pool);
-	pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
+	pg->pool = le32_to_cpu(fl->fl_pg_pool);
+	pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool);
 	if (!pool)
 		return -EIO;
-	pgid.seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
+	pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
 	num = pool->pg_num;
 	num_mask = pool->pg_num_mask;
 
-	dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool,
-	     pgid.seed);
-
-	ol->ol_pgid.ps = cpu_to_le16(pgid.seed);
-	ol->ol_pgid.pool = fl->fl_pg_pool;
-	ol->ol_pgid.preferred = cpu_to_le16(-1);
-	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
+	dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed);
 	return 0;
 }
 EXPORT_SYMBOL(ceph_calc_object_layout);
-- 
cgit v1.2.3


From 1b83bef24c6746a146d39915a18fb5425f2facb0 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Mon, 25 Feb 2013 16:11:12 -0800
Subject: libceph: update osd request/reply encoding

Use the new version of the encoding for osd requests and replies.  In the
process, update the way we are tracking request ops and reply lengths and
results in the struct ceph_osd_request.  Update the rbd and fs/ceph users
appropriately.

The main changes are:
 - we keep pointers into the request memory for fields we need to update
   each time the request is sent out over the wire
 - we keep information about the result in an array in the request struct
   where the users can easily get at it.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 drivers/block/rbd.c             |  52 +++++----
 fs/ceph/addr.c                  |  31 ++----
 include/linux/ceph/osd_client.h |  19 +++-
 include/linux/ceph/rados.h      |  38 -------
 net/ceph/debugfs.c              |  18 +---
 net/ceph/osd_client.c           | 233 +++++++++++++++++++++++++++++-----------
 6 files changed, 222 insertions(+), 169 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 22085e86a409..6c81a4c040b9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -196,7 +196,7 @@ struct rbd_obj_request {
 
 	u64			xferred;	/* bytes transferred */
 	u64			version;
-	s32			result;
+	int			result;
 	atomic_t		done;
 
 	rbd_obj_callback_t	callback;
@@ -1282,12 +1282,19 @@ static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request)
 
 static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
 {
-
 	dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
 		obj_request->result, obj_request->xferred, obj_request->length);
-	if (obj_request->result == (s32) -ENOENT) {
+	/*
+	 * ENOENT means a hole in the object.  We zero-fill the
+	 * entire length of the request.  A short read also implies
+	 * zero-fill to the end of the request.  Either way we
+	 * update the xferred count to indicate the whole request
+	 * was satisfied.
+	 */
+	if (obj_request->result == -ENOENT) {
 		zero_bio_chain(obj_request->bio_list, 0);
 		obj_request->result = 0;
+		obj_request->xferred = obj_request->length;
 	} else if (obj_request->xferred < obj_request->length &&
 			!obj_request->result) {
 		zero_bio_chain(obj_request->bio_list, obj_request->xferred);
@@ -1298,20 +1305,14 @@ static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
 
 static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
 {
-	dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
-		obj_request->result, obj_request->xferred, obj_request->length);
-
-	/* A short write really shouldn't occur.  Warn if we see one */
-
-	if (obj_request->xferred != obj_request->length) {
-		struct rbd_img_request *img_request = obj_request->img_request;
-		struct rbd_device *rbd_dev;
-
-		rbd_dev = img_request ? img_request->rbd_dev : NULL;
-		rbd_warn(rbd_dev, "wrote %llu want %llu\n",
-			obj_request->xferred, obj_request->length);
-	}
-
+	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
+		obj_request->result, obj_request->length);
+	/*
+	 * There is no such thing as a successful short write.
+	 * Our xferred value is the number of bytes transferred
+	 * back.  Set it to our originally-requested length.
+	 */
+	obj_request->xferred = obj_request->length;
 	obj_request_done_set(obj_request);
 }
 
@@ -1329,9 +1330,6 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
 				struct ceph_msg *msg)
 {
 	struct rbd_obj_request *obj_request = osd_req->r_priv;
-	struct ceph_osd_reply_head *reply_head;
-	struct ceph_osd_op *op;
-	u32 num_ops;
 	u16 opcode;
 
 	dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
@@ -1339,22 +1337,19 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
 	rbd_assert(!!obj_request->img_request ^
 				(obj_request->which == BAD_WHICH));
 
-	reply_head = msg->front.iov_base;
-	obj_request->result = (s32) le32_to_cpu(reply_head->result);
+	if (osd_req->r_result < 0)
+		obj_request->result = osd_req->r_result;
 	obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version);
 
-	num_ops = le32_to_cpu(reply_head->num_ops);
-	WARN_ON(num_ops != 1);	/* For now */
+	WARN_ON(osd_req->r_num_ops != 1);	/* For now */
 
 	/*
 	 * We support a 64-bit length, but ultimately it has to be
 	 * passed to blk_end_request(), which takes an unsigned int.
 	 */
-	op = &reply_head->ops[0];
-	obj_request->xferred = le64_to_cpu(op->extent.length);
+	obj_request->xferred = osd_req->r_reply_op_len[0];
 	rbd_assert(obj_request->xferred < (u64) UINT_MAX);
-
-	opcode = le16_to_cpu(op->op);
+	opcode = osd_req->r_request_ops[0].op;
 	switch (opcode) {
 	case CEPH_OSD_OP_READ:
 		rbd_osd_read_callback(obj_request);
@@ -1719,6 +1714,7 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
 		more = blk_end_request(img_request->rq, result, xferred);
 		which++;
 	}
+
 	rbd_assert(more ^ (which == img_request->obj_request_count));
 	img_request->next_completion = which;
 out:
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index fc613715af46..cfef3e01a9b3 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page)
 static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
 {
 	struct inode *inode = req->r_inode;
-	struct ceph_osd_reply_head *replyhead;
-	int rc, bytes;
+	int rc = req->r_result;
+	int bytes = le32_to_cpu(msg->hdr.data_len);
 	int i;
 
-	/* parse reply */
-	replyhead = msg->front.iov_base;
-	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-	rc = le32_to_cpu(replyhead->result);
-	bytes = le32_to_cpu(msg->hdr.data_len);
-
 	dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
 
 	/* unlock all pages, zeroing any data we didn't read */
@@ -553,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req,
 			      struct ceph_msg *msg)
 {
 	struct inode *inode = req->r_inode;
-	struct ceph_osd_reply_head *replyhead;
-	struct ceph_osd_op *op;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	unsigned wrote;
 	struct page *page;
 	int i;
 	struct ceph_snap_context *snapc = req->r_snapc;
 	struct address_space *mapping = inode->i_mapping;
-	__s32 rc = -EIO;
-	u64 bytes = 0;
+	int rc = req->r_result;
+	u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	long writeback_stat;
 	unsigned issued = ceph_caps_issued(ci);
 
-	/* parse reply */
-	replyhead = msg->front.iov_base;
-	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-	op = (void *)(replyhead + 1);
-	rc = le32_to_cpu(replyhead->result);
-	bytes = le64_to_cpu(op->extent.length);
-
 	if (rc >= 0) {
 		/*
 		 * Assume we wrote the pages we originally sent.  The
@@ -740,8 +725,6 @@ retry:
 		struct page *page;
 		int want;
 		u64 offset, len;
-		struct ceph_osd_request_head *reqhead;
-		struct ceph_osd_op *op;
 		long writeback_stat;
 
 		next = 0;
@@ -905,10 +888,8 @@ get_more_pages:
 
 		/* revise final length, page count */
 		req->r_num_pages = locked_pages;
-		reqhead = req->r_request->front.iov_base;
-		op = (void *)(reqhead + 1);
-		op->extent.length = cpu_to_le64(len);
-		op->payload_len = cpu_to_le32(len);
+		req->r_request_ops[0].extent.length = cpu_to_le64(len);
+		req->r_request_ops[0].payload_len = cpu_to_le32(len);
 		req->r_request->hdr.data_len = cpu_to_le32(len);
 
 		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index ad8899fc3157..1dd5d466b6f9 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -47,6 +47,9 @@ struct ceph_osd {
 	struct list_head o_keepalive_item;
 };
 
+
+#define CEPH_OSD_MAX_OP 10
+
 /* an in-flight request */
 struct ceph_osd_request {
 	u64             r_tid;              /* unique for this client */
@@ -63,9 +66,23 @@ struct ceph_osd_request {
 	struct ceph_connection *r_con_filling_msg;
 
 	struct ceph_msg  *r_request, *r_reply;
-	int               r_result;
 	int               r_flags;     /* any additional flags for the osd */
 	u32               r_sent;      /* >0 if r_request is sending/sent */
+	int               r_num_ops;
+
+	/* encoded message content */
+	struct ceph_osd_op *r_request_ops;
+	/* these are updated on each send */
+	__le32           *r_request_osdmap_epoch;
+	__le32           *r_request_flags;
+	__le64           *r_request_pool;
+	void             *r_request_pgid;
+	__le32           *r_request_attempts;
+	struct ceph_eversion *r_request_reassert_version;
+
+	int               r_result;
+	int               r_reply_op_len[CEPH_OSD_MAX_OP];
+	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
 	int               r_got_reply;
 	int		  r_linger;
 
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index d784c8dfb09a..68c96a508ac2 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -416,43 +416,5 @@ struct ceph_osd_op {
 	__le32 payload_len;
 } __attribute__ ((packed));
 
-/*
- * osd request message header.  each request may include multiple
- * ceph_osd_op object operations.
- */
-struct ceph_osd_request_head {
-	__le32 client_inc;                 /* client incarnation */
-	struct ceph_object_layout layout;  /* pgid */
-	__le32 osdmap_epoch;               /* client's osdmap epoch */
-
-	__le32 flags;
-
-	struct ceph_timespec mtime;        /* for mutations only */
-	struct ceph_eversion reassert_version; /* if we are replaying op */
-
-	__le32 object_len;     /* length of object name */
-
-	__le64 snapid;         /* snapid to read */
-	__le64 snap_seq;       /* writer's snap context */
-	__le32 num_snaps;
-
-	__le16 num_ops;
-	struct ceph_osd_op ops[];  /* followed by ops[], obj, ticket, snaps */
-} __attribute__ ((packed));
-
-struct ceph_osd_reply_head {
-	__le32 client_inc;                /* client incarnation */
-	__le32 flags;
-	struct ceph_object_layout layout;
-	__le32 osdmap_epoch;
-	struct ceph_eversion reassert_version; /* for replaying uncommitted */
-
-	__le32 result;                    /* result code */
-
-	__le32 object_len;                /* length of object name */
-	__le32 num_ops;
-	struct ceph_osd_op ops[0];  /* ops[], object */
-} __attribute__ ((packed));
-
 
 #endif
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index f4d4b27d6026..00d051f4894e 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -123,10 +123,7 @@ static int osdc_show(struct seq_file *s, void *pp)
 	mutex_lock(&osdc->request_mutex);
 	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
 		struct ceph_osd_request *req;
-		struct ceph_osd_request_head *head;
-		struct ceph_osd_op *op;
-		int num_ops;
-		int opcode, olen;
+		int opcode;
 		int i;
 
 		req = rb_entry(p, struct ceph_osd_request, r_node);
@@ -135,13 +132,7 @@ static int osdc_show(struct seq_file *s, void *pp)
 			   req->r_osd ? req->r_osd->o_osd : -1,
 			   req->r_pgid.pool, req->r_pgid.seed);
 
-		head = req->r_request->front.iov_base;
-		op = (void *)(head + 1);
-
-		num_ops = le16_to_cpu(head->num_ops);
-		olen = le32_to_cpu(head->object_len);
-		seq_printf(s, "%.*s", olen,
-			   (const char *)(head->ops + num_ops));
+		seq_printf(s, "%.*s", req->r_oid_len, req->r_oid);
 
 		if (req->r_reassert_version.epoch)
 			seq_printf(s, "\t%u'%llu",
@@ -150,10 +141,9 @@ static int osdc_show(struct seq_file *s, void *pp)
 		else
 			seq_printf(s, "\t");
 
-		for (i = 0; i < num_ops; i++) {
-			opcode = le16_to_cpu(op->op);
+		for (i = 0; i < req->r_num_ops; i++) {
+			opcode = le16_to_cpu(req->r_request_ops[i].op);
 			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
-			op++;
 		}
 
 		seq_printf(s, "\n");
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5584f0a08e28..d730dd4d8eb2 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -146,15 +146,23 @@ EXPORT_SYMBOL(ceph_osdc_release_request);
 
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
-					       unsigned int num_op,
+					       unsigned int num_ops,
 					       bool use_mempool,
 					       gfp_t gfp_flags)
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
-	size_t msg_size = sizeof(struct ceph_osd_request_head);
-
-	msg_size += num_op*sizeof(struct ceph_osd_op);
+	size_t msg_size;
+
+	msg_size = 4 + 4 + 8 + 8 + 4+8;
+	msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
+	msg_size += 1 + 8 + 4 + 4;     /* pg_t */
+	msg_size += 4 + MAX_OBJ_NAME_SIZE;
+	msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
+	msg_size += 8;  /* snapid */
+	msg_size += 8;  /* snap_seq */
+	msg_size += 8 * (snapc ? snapc->num_snaps : 0);  /* snaps */
+	msg_size += 4;
 
 	if (use_mempool) {
 		req = mempool_alloc(osdc->req_mempool, gfp_flags);
@@ -193,9 +201,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	ceph_pagelist_init(&req->r_trail);
 
 	/* create request message; allow space for oid */
-	msg_size += MAX_OBJ_NAME_SIZE;
-	if (snapc)
-		msg_size += sizeof(u64) * snapc->num_snaps;
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
 	else
@@ -324,55 +329,80 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
  *
  */
 void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 len, unsigned int num_op,
+			     u64 off, u64 len, unsigned int num_ops,
 			     struct ceph_osd_req_op *src_ops,
 			     struct ceph_snap_context *snapc, u64 snap_id,
 			     struct timespec *mtime)
 {
 	struct ceph_msg *msg = req->r_request;
-	struct ceph_osd_request_head *head;
 	struct ceph_osd_req_op *src_op;
-	struct ceph_osd_op *op;
 	void *p;
-	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
+	size_t msg_size;
 	int flags = req->r_flags;
 	u64 data_len;
 	int i;
 
-	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
-
-	head = msg->front.iov_base;
-	head->snapid = cpu_to_le64(snap_id);
-	op = (void *)(head + 1);
-	p = (void *)(op + num_op);
-
+	req->r_num_ops = num_ops;
+	req->r_snapid = snap_id;
 	req->r_snapc = ceph_get_snap_context(snapc);
 
-	head->client_inc = cpu_to_le32(1); /* always, for now. */
-	head->flags = cpu_to_le32(flags);
-	if (flags & CEPH_OSD_FLAG_WRITE)
-		ceph_encode_timespec(&head->mtime, mtime);
-	BUG_ON(num_op > (unsigned int) ((u16) -1));
-	head->num_ops = cpu_to_le16(num_op);
+	/* encode request */
+	msg->hdr.version = cpu_to_le16(4);
 
-	/* fill in oid */
-	head->object_len = cpu_to_le32(req->r_oid_len);
+	p = msg->front.iov_base;
+	ceph_encode_32(&p, 1);   /* client_inc  is always 1 */
+	req->r_request_osdmap_epoch = p;
+	p += 4;
+	req->r_request_flags = p;
+	p += 4;
+	if (req->r_flags & CEPH_OSD_FLAG_WRITE)
+		ceph_encode_timespec(p, mtime);
+	p += sizeof(struct ceph_timespec);
+	req->r_request_reassert_version = p;
+	p += sizeof(struct ceph_eversion); /* will get filled in */
+
+	/* oloc */
+	ceph_encode_8(&p, 4);
+	ceph_encode_8(&p, 4);
+	ceph_encode_32(&p, 8 + 4 + 4);
+	req->r_request_pool = p;
+	p += 8;
+	ceph_encode_32(&p, -1);  /* preferred */
+	ceph_encode_32(&p, 0);   /* key len */
+
+	ceph_encode_8(&p, 1);
+	req->r_request_pgid = p;
+	p += 8 + 4;
+	ceph_encode_32(&p, -1);  /* preferred */
+
+	/* oid */
+	ceph_encode_32(&p, req->r_oid_len);
 	memcpy(p, req->r_oid, req->r_oid_len);
+	dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
 	p += req->r_oid_len;
 
+	/* ops */
+	ceph_encode_16(&p, num_ops);
 	src_op = src_ops;
-	while (num_op--)
-		osd_req_encode_op(req, op++, src_op++);
+	req->r_request_ops = p;
+	for (i = 0; i < num_ops; i++, src_op++) {
+		osd_req_encode_op(req, p, src_op);
+		p += sizeof(struct ceph_osd_op);
+	}
 
-	if (snapc) {
-		head->snap_seq = cpu_to_le64(snapc->seq);
-		head->num_snaps = cpu_to_le32(snapc->num_snaps);
+	/* snaps */
+	ceph_encode_64(&p, req->r_snapid);
+	ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
+	ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
+	if (req->r_snapc) {
 		for (i = 0; i < snapc->num_snaps; i++) {
-			put_unaligned_le64(snapc->snaps[i], p);
-			p += sizeof(u64);
+			ceph_encode_64(&p, req->r_snapc->snaps[i]);
 		}
 	}
 
+	req->r_request_attempts = p;
+	p += 4;
+
 	data_len = req->r_trail.length;
 	if (flags & CEPH_OSD_FLAG_WRITE) {
 		req->r_request->hdr.data_off = cpu_to_le16(off);
@@ -385,6 +415,9 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	msg_size = p - msg->front.iov_base;
 	msg->front.iov_len = msg_size;
 	msg->hdr.front_len = cpu_to_le32(msg_size);
+
+	dout("build_request msg_size was %d num_ops %d\n", (int)msg_size,
+	     num_ops);
 	return;
 }
 EXPORT_SYMBOL(ceph_osdc_build_request);
@@ -991,21 +1024,22 @@ out:
 static void __send_request(struct ceph_osd_client *osdc,
 			   struct ceph_osd_request *req)
 {
-	struct ceph_osd_request_head *reqhead;
-
-	dout("send_request %p tid %llu to osd%d flags %d\n",
-	     req, req->r_tid, req->r_osd->o_osd, req->r_flags);
-
-	reqhead = req->r_request->front.iov_base;
-	reqhead->snapid = cpu_to_le64(req->r_snapid);
-	reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch);
-	reqhead->flags |= cpu_to_le32(req->r_flags);  /* e.g., RETRY */
-	reqhead->reassert_version = req->r_reassert_version;
+	void *p;
 
-	reqhead->layout.ol_pgid.ps = cpu_to_le16(req->r_pgid.seed);
-	reqhead->layout.ol_pgid.pool = cpu_to_le32(req->r_pgid.pool);
-	reqhead->layout.ol_pgid.preferred = cpu_to_le16(-1);
-	reqhead->layout.ol_stripe_unit = 0;
+	dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n",
+	     req, req->r_tid, req->r_osd->o_osd, req->r_flags,
+	     (unsigned long long)req->r_pgid.pool, req->r_pgid.seed);
+
+	/* fill in message content that changes each time we send it */
+	put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch);
+	put_unaligned_le32(req->r_flags, req->r_request_flags);
+	put_unaligned_le64(req->r_pgid.pool, req->r_request_pool);
+	p = req->r_request_pgid;
+	ceph_encode_64(&p, req->r_pgid.pool);
+	ceph_encode_32(&p, req->r_pgid.seed);
+	put_unaligned_le64(1, req->r_request_attempts);  /* FIXME */
+	memcpy(req->r_request_reassert_version, &req->r_reassert_version,
+	       sizeof(req->r_reassert_version));
 
 	req->r_stamp = jiffies;
 	list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
@@ -1105,6 +1139,26 @@ static void complete_request(struct ceph_osd_request *req)
 	complete_all(&req->r_safe_completion);  /* fsync waiter */
 }
 
+static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid)
+{
+	__u8 v;
+
+	ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad);
+	v = ceph_decode_8(p);
+	if (v > 1) {
+		pr_warning("do not understand pg encoding %d > 1", v);
+		return -EINVAL;
+	}
+	pgid->pool = ceph_decode_64(p);
+	pgid->seed = ceph_decode_32(p);
+	*p += 4;
+	return 0;
+
+bad:
+	pr_warning("incomplete pg encoding");
+	return -EINVAL;
+}
+
 /*
  * handle osd op reply.  either call the callback if it is specified,
  * or do the completion to wake up the waiting thread.
@@ -1112,22 +1166,42 @@ static void complete_request(struct ceph_osd_request *req)
 static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 			 struct ceph_connection *con)
 {
-	struct ceph_osd_reply_head *rhead = msg->front.iov_base;
+	void *p, *end;
 	struct ceph_osd_request *req;
 	u64 tid;
-	int numops, object_len, flags;
+	int object_len;
+	int numops, payload_len, flags;
 	s32 result;
+	s32 retry_attempt;
+	struct ceph_pg pg;
+	int err;
+	u32 reassert_epoch;
+	u64 reassert_version;
+	u32 osdmap_epoch;
+	int i;
 
 	tid = le64_to_cpu(msg->hdr.tid);
-	if (msg->front.iov_len < sizeof(*rhead))
-		goto bad;
-	numops = le32_to_cpu(rhead->num_ops);
-	object_len = le32_to_cpu(rhead->object_len);
-	result = le32_to_cpu(rhead->result);
-	if (msg->front.iov_len != sizeof(*rhead) + object_len +
-	    numops * sizeof(struct ceph_osd_op))
+	dout("handle_reply %p tid %llu\n", msg, tid);
+
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	ceph_decode_need(&p, end, 4, bad);
+	object_len = ceph_decode_32(&p);
+	ceph_decode_need(&p, end, object_len, bad);
+	p += object_len;
+
+	err = __decode_pgid(&p, end, &pg);
+	if (err)
 		goto bad;
-	dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
+
+	ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad);
+	flags = ceph_decode_64(&p);
+	result = ceph_decode_32(&p);
+	reassert_epoch = ceph_decode_32(&p);
+	reassert_version = ceph_decode_64(&p);
+	osdmap_epoch = ceph_decode_32(&p);
+
 	/* lookup */
 	mutex_lock(&osdc->request_mutex);
 	req = __lookup_request(osdc, tid);
@@ -1137,7 +1211,38 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 		return;
 	}
 	ceph_osdc_get_request(req);
-	flags = le32_to_cpu(rhead->flags);
+
+	dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
+	     req, result);
+
+	ceph_decode_need(&p, end, 4, bad);
+	numops = ceph_decode_32(&p);
+	if (numops > CEPH_OSD_MAX_OP)
+		goto bad_put;
+	if (numops != req->r_num_ops)
+		goto bad_put;
+	payload_len = 0;
+	ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
+	for (i = 0; i < numops; i++) {
+		struct ceph_osd_op *op = p;
+		int len;
+
+		len = le32_to_cpu(op->payload_len);
+		req->r_reply_op_len[i] = len;
+		dout(" op %d has %d bytes\n", i, len);
+		payload_len += len;
+		p += sizeof(*op);
+	}
+	if (payload_len != le32_to_cpu(msg->hdr.data_len)) {
+		pr_warning("sum of op payload lens %d != data_len %d",
+			   payload_len, le32_to_cpu(msg->hdr.data_len));
+		goto bad_put;
+	}
+
+	ceph_decode_need(&p, end, 4 + numops * 4, bad);
+	retry_attempt = ceph_decode_32(&p);
+	for (i = 0; i < numops; i++)
+		req->r_reply_op_result[i] = ceph_decode_32(&p);
 
 	/*
 	 * if this connection filled our message, drop our reference now, to
@@ -1152,7 +1257,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	if (!req->r_got_reply) {
 		unsigned int bytes;
 
-		req->r_result = le32_to_cpu(rhead->result);
+		req->r_result = result;
 		bytes = le32_to_cpu(msg->hdr.data_len);
 		dout("handle_reply result %d bytes %d\n", req->r_result,
 		     bytes);
@@ -1160,7 +1265,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 			req->r_result = bytes;
 
 		/* in case this is a write and we need to replay, */
-		req->r_reassert_version = rhead->reassert_version;
+		req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch);
+		req->r_reassert_version.version = cpu_to_le64(reassert_version);
 
 		req->r_got_reply = 1;
 	} else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
@@ -1195,10 +1301,11 @@ done:
 	ceph_osdc_put_request(req);
 	return;
 
+bad_put:
+	ceph_osdc_put_request(req);
 bad:
-	pr_err("corrupt osd_op_reply got %d %d expected %d\n",
-	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len),
-	       (int)sizeof(*rhead));
+	pr_err("corrupt osd_op_reply got %d %d\n",
+	       (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
 	ceph_msg_dump(msg);
 }
 
-- 
cgit v1.2.3


From 83ca14fdd35821554058e5fd4fa7b118ee504a33 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Tue, 26 Feb 2013 10:39:09 -0800
Subject: libceph: add support for HASHPSPOOL pool flag

The legacy behavior adds the pgid seed and pool together as the input for
CRUSH.  That is problematic because each pool's PGs end up mapping to the
same OSDs: 1.5 == 2.4 == 3.3 == ...

Instead, if the HASHPSPOOL flag is set, we has the ps and pool together and
feed that into CRUSH.  This ensures that two adjacent pools will map to
an independent pseudorandom set of OSDs.

Advertise our support for this via a protocol feature flag.

Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 include/linux/ceph/ceph_features.h |  4 +++-
 include/linux/ceph/osdmap.h        |  2 ++
 net/ceph/osdmap.c                  | 39 +++++++++++++++++++++++++-------------
 3 files changed, 31 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index ab0a54286e0d..76554cecaab2 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -34,6 +34,7 @@
 #define CEPH_FEATURE_REPLY_CREATE_INODE   (1<<27)
 #define CEPH_FEATURE_OSD_HBMSGS     (1<<28)
 #define CEPH_FEATURE_MDSENC         (1<<29)
+#define CEPH_FEATURE_OSDHASHPSPOOL  (1<<30)
 
 /*
  * Features supported.
@@ -45,7 +46,8 @@
 	 CEPH_FEATURE_OSDENC |			\
 	 CEPH_FEATURE_CRUSH_TUNABLES |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
-	 CEPH_FEATURE_REPLY_CREATE_INODE)
+	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
+	 CEPH_FEATURE_OSDHASHPSPOOL)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 35985125f118..c819190d1642 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -23,6 +23,8 @@ struct ceph_pg {
 	uint32_t seed;
 };
 
+#define CEPH_POOL_FLAG_HASHPSPOOL  1
+
 struct ceph_pg_pool_info {
 	struct rb_node node;
 	s64 id;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 378471644501..69bc4bf89e3e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1127,18 +1127,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	struct ceph_pg_mapping *pg;
 	struct ceph_pg_pool_info *pool;
 	int ruleno;
-	unsigned int poolid, ps, pps, t, r;
+	int r;
+	u32 pps;
 
-	poolid = pgid.pool;
-	ps = pgid.seed;
-
-	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+	pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
 	if (!pool)
 		return NULL;
 
 	/* pg_temp? */
-	t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask);
-	pgid.seed = t;
+	pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
+				    pool->pgp_num_mask);
 	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
 		*num = pg->len;
@@ -1149,20 +1147,35 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
 				 pool->type, pool->size);
 	if (ruleno < 0) {
-		pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
-		       poolid, pool->crush_ruleset, pool->type,
+		pr_err("no crush rule pool %lld ruleset %d type %d size %d\n",
+		       pgid.pool, pool->crush_ruleset, pool->type,
 		       pool->size);
 		return NULL;
 	}
 
-	pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask);
-	pps += poolid;
+	if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
+		/* hash pool id and seed sothat pool PGs do not overlap */
+		pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
+				     ceph_stable_mod(pgid.seed, pool->pgp_num,
+						     pool->pgp_num_mask),
+				     pgid.pool);
+	} else {
+		/*
+		 * legacy ehavior: add ps and pool together.  this is
+		 * not a great approach because the PGs from each pool
+		 * will overlap on top of each other: 0.5 == 1.4 ==
+		 * 2.3 == ...
+		 */
+		pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
+				      pool->pgp_num_mask) +
+			(unsigned)pgid.pool;
+	}
 	r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
 			  min_t(int, pool->size, *num),
 			  osdmap->osd_weight);
 	if (r < 0) {
-		pr_err("error %d from crush rule: pool %d ruleset %d type %d"
-		       " size %d\n", r, poolid, pool->crush_ruleset,
+		pr_err("error %d from crush rule: pool %lld ruleset %d type %d"
+		       " size %d\n", r, pgid.pool, pool->crush_ruleset,
 		       pool->type, pool->size);
 		return NULL;
 	}
-- 
cgit v1.2.3


From f00b4dad9d9eb001a04cf72e8351a2a1b9e99322 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 20 Dec 2012 14:14:23 +0100
Subject: dma-buf: implement vmap refcounting in the interface logic

All drivers which implement this need to have some sort of refcount to
allow concurrent vmap usage. Hence implement this in the dma-buf core.

To protect against concurrent calls we need a lock, which potentially
causes new funny locking inversions. But this shouldn't be a problem
for exporters with statically allocated backing storage, and more
dynamic drivers have decent issues already anyway.

Inspired by some refactoring patches from Aaron Plattner, who
implemented the same idea, but only for drm/prime drivers.

v2: Check in dma_buf_release that no dangling vmaps are left.
Suggested by Aaron Plattner. We might want to do similar checks for
attachments, but that's for another patch. Also fix up ERR_PTR return
for vmap.

v3: Check whether the passed-in vmap address matches with the cached
one for vunmap. Eventually we might want to remove that parameter -
compared to the kmap functions there's no need for the vaddr for
unmapping.  Suggested by Chris Wilson.

v4: Fix a brown-paper-bag bug spotted by Aaron Plattner.

Cc: Aaron Plattner <aplattner@nvidia.com>
Reviewed-by: Aaron Plattner <aplattner@nvidia.com>
Tested-by: Aaron Plattner <aplattner@nvidia.com>
Reviewed-by: Rob Clark <rob@ti.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
---
 Documentation/dma-buf-sharing.txt |  6 +++++-
 drivers/base/dma-buf.c            | 43 ++++++++++++++++++++++++++++++++++-----
 include/linux/dma-buf.h           |  4 +++-
 3 files changed, 46 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 0188903bc9e1..4966b1be42ac 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -302,7 +302,11 @@ Access to a dma_buf from the kernel context involves three steps:
       void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 
    The vmap call can fail if there is no vmap support in the exporter, or if it
-   runs out of vmalloc space. Fallback to kmap should be implemented.
+   runs out of vmalloc space. Fallback to kmap should be implemented. Note that
+   the dma-buf layer keeps a reference count for all vmap access and calls down
+   into the exporter's vmap function only when no vmapping exists, and only
+   unmaps it once. Protection against concurrent vmap/vunmap calls is provided
+   by taking the dma_buf->lock mutex.
 
 3. Finish access
 
diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c
index ff5b745c4705..394523807a6d 100644
--- a/drivers/base/dma-buf.c
+++ b/drivers/base/dma-buf.c
@@ -39,6 +39,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
 
 	dmabuf = file->private_data;
 
+	BUG_ON(dmabuf->vmapping_counter);
+
 	dmabuf->ops->release(dmabuf);
 	kfree(dmabuf);
 	return 0;
@@ -481,12 +483,34 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap);
  */
 void *dma_buf_vmap(struct dma_buf *dmabuf)
 {
+	void *ptr;
+
 	if (WARN_ON(!dmabuf))
 		return NULL;
 
-	if (dmabuf->ops->vmap)
-		return dmabuf->ops->vmap(dmabuf);
-	return NULL;
+	if (!dmabuf->ops->vmap)
+		return NULL;
+
+	mutex_lock(&dmabuf->lock);
+	if (dmabuf->vmapping_counter) {
+		dmabuf->vmapping_counter++;
+		BUG_ON(!dmabuf->vmap_ptr);
+		ptr = dmabuf->vmap_ptr;
+		goto out_unlock;
+	}
+
+	BUG_ON(dmabuf->vmap_ptr);
+
+	ptr = dmabuf->ops->vmap(dmabuf);
+	if (IS_ERR_OR_NULL(ptr))
+		goto out_unlock;
+
+	dmabuf->vmap_ptr = ptr;
+	dmabuf->vmapping_counter = 1;
+
+out_unlock:
+	mutex_unlock(&dmabuf->lock);
+	return ptr;
 }
 EXPORT_SYMBOL_GPL(dma_buf_vmap);
 
@@ -500,7 +524,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 	if (WARN_ON(!dmabuf))
 		return;
 
-	if (dmabuf->ops->vunmap)
-		dmabuf->ops->vunmap(dmabuf, vaddr);
+	BUG_ON(!dmabuf->vmap_ptr);
+	BUG_ON(dmabuf->vmapping_counter == 0);
+	BUG_ON(dmabuf->vmap_ptr != vaddr);
+
+	mutex_lock(&dmabuf->lock);
+	if (--dmabuf->vmapping_counter == 0) {
+		if (dmabuf->ops->vunmap)
+			dmabuf->ops->vunmap(dmabuf, vaddr);
+		dmabuf->vmap_ptr = NULL;
+	}
+	mutex_unlock(&dmabuf->lock);
 }
 EXPORT_SYMBOL_GPL(dma_buf_vunmap);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 3d754a394e92..9978b614a1aa 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -119,8 +119,10 @@ struct dma_buf {
 	struct file *file;
 	struct list_head attachments;
 	const struct dma_buf_ops *ops;
-	/* mutex to serialize list manipulation and attach/detach */
+	/* mutex to serialize list manipulation, attach/detach and vmap/unmap */
 	struct mutex lock;
+	unsigned vmapping_counter;
+	void *vmap_ptr;
 	void *priv;
 };
 
-- 
cgit v1.2.3


From 864ef69b2d9b34e7c85baa9c5c601d5e735b208a Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@ti.com>
Date: Fri, 1 Feb 2013 18:22:52 +0000
Subject: dmaengine: add dma_request_slave_channel_compat()

Adds a dma_request_slave_channel_compat() wrapper which accepts
both the arguments from dma_request_channel() and
dma_request_slave_channel(). Based on whether the driver is
instantiated via DT, the appropriate channel request call will be
made.

This allows for a much cleaner migration of drivers to the
dmaengine DT API as platforms continue to be mixed between those
that boot using DT and those that do not.

Suggested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Matt Porter <mporter@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index f5939999cb65..91ac8da25020 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -1001,6 +1001,22 @@ void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
 struct dma_chan *net_dma_find_channel(void);
 #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
+#define dma_request_slave_channel_compat(mask, x, y, dev, name) \
+	__dma_request_slave_channel_compat(&(mask), x, y, dev, name)
+
+static inline struct dma_chan
+*__dma_request_slave_channel_compat(dma_cap_mask_t *mask, dma_filter_fn fn,
+				  void *fn_param, struct device *dev,
+				  char *name)
+{
+	struct dma_chan *chan;
+
+	chan = dma_request_slave_channel(dev, name);
+	if (chan)
+		return chan;
+
+	return __dma_request_channel(mask, fn, fn_param);
+}
 
 /* --- Helper iov-locking functions --- */
 
-- 
cgit v1.2.3


From c5a51053cf3b499ddba60a89ab067ea05ad15840 Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@ti.com>
Date: Wed, 27 Feb 2013 17:02:43 -0800
Subject: backlight: add new lp8788 backlight driver

TI LP8788 PMU supports regulators, battery charger, RTC, ADC, backlight
dri= ver and current sinks.  This patch enables LP8788 backlight module.

(Brightness mode)
The brightness is controlled by PWM input or I2C register.
All modes are supported in the driver.

(Platform data)
Configurable data can be defined in the platform side.
 name                  : backlight driver name. (default: "lcd-backlight")
 initial_brightness    : initial value of backlight brightness
 bl_mode               : brightness control by PWM or lp8788 register
 dim_mode              : dimming mode selection
 full_scale            : full scale current setting
 rise_time             : brightness ramp up step time
 fall_time             : brightness ramp down step time
 pwm_pol               : PWM polarity setting when bl_mode is PWM based
 period_ns             : platform specific PWM period value. unit is nano.

The default values are set in case no platform data is defined.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Thierry Reding <thierry.reding@avionic-design.de>
Cc: "devendra.aaru" <devendra.aaru@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/backlight/Kconfig     |   6 +
 drivers/video/backlight/Makefile    |   1 +
 drivers/video/backlight/lp8788_bl.c | 333 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/lp8788.h          |  24 +--
 4 files changed, 345 insertions(+), 19 deletions(-)
 create mode 100644 drivers/video/backlight/lp8788_bl.c

(limited to 'include/linux')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index be27b551473f..db10d0120d2b 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -384,6 +384,12 @@ config BACKLIGHT_LP855X
 	  This supports TI LP8550, LP8551, LP8552, LP8553, LP8556 and LP8557
 	  backlight driver.
 
+config BACKLIGHT_LP8788
+	tristate "Backlight driver for TI LP8788 MFD"
+	depends on BACKLIGHT_CLASS_DEVICE && MFD_LP8788
+	help
+	  This supports TI LP8788 backlight driver.
+
 config BACKLIGHT_OT200
 	tristate "Backlight driver for ot200 visualisation device"
 	depends on BACKLIGHT_CLASS_DEVICE && CS5535_MFGPT && GPIO_CS5535
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 4606c218e8e4..96c4d620c5ce 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_BACKLIGHT_LM3630)		+= lm3630_bl.o
 obj-$(CONFIG_BACKLIGHT_LM3639)		+= lm3639_bl.o
 obj-$(CONFIG_BACKLIGHT_LOCOMO)		+= locomolcd.o
 obj-$(CONFIG_BACKLIGHT_LP855X)		+= lp855x_bl.o
+obj-$(CONFIG_BACKLIGHT_LP8788)		+= lp8788_bl.o
 obj-$(CONFIG_BACKLIGHT_MAX8925)		+= max8925_bl.o
 obj-$(CONFIG_BACKLIGHT_OMAP1)		+= omap1_bl.o
 obj-$(CONFIG_BACKLIGHT_OT200)		+= ot200_bl.o
diff --git a/drivers/video/backlight/lp8788_bl.c b/drivers/video/backlight/lp8788_bl.c
new file mode 100644
index 000000000000..4bb8b4f140cf
--- /dev/null
+++ b/drivers/video/backlight/lp8788_bl.c
@@ -0,0 +1,333 @@
+/*
+ * TI LP8788 MFD - backlight driver
+ *
+ * Copyright 2012 Texas Instruments
+ *
+ * Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/backlight.h>
+#include <linux/err.h>
+#include <linux/mfd/lp8788.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/slab.h>
+
+/* Register address */
+#define LP8788_BL_CONFIG		0x96
+#define LP8788_BL_EN			BIT(0)
+#define LP8788_BL_PWM_INPUT_EN		BIT(5)
+#define LP8788_BL_FULLSCALE_SHIFT	2
+#define LP8788_BL_DIM_MODE_SHIFT	1
+#define LP8788_BL_PWM_POLARITY_SHIFT	6
+
+#define LP8788_BL_BRIGHTNESS		0x97
+
+#define LP8788_BL_RAMP			0x98
+#define LP8788_BL_RAMP_RISE_SHIFT	4
+
+#define MAX_BRIGHTNESS			127
+#define DEFAULT_BL_NAME			"lcd-backlight"
+
+struct lp8788_bl_config {
+	enum lp8788_bl_ctrl_mode bl_mode;
+	enum lp8788_bl_dim_mode dim_mode;
+	enum lp8788_bl_full_scale_current full_scale;
+	enum lp8788_bl_ramp_step rise_time;
+	enum lp8788_bl_ramp_step fall_time;
+	enum pwm_polarity pwm_pol;
+};
+
+struct lp8788_bl {
+	struct lp8788 *lp;
+	struct backlight_device *bl_dev;
+	struct lp8788_backlight_platform_data *pdata;
+	enum lp8788_bl_ctrl_mode mode;
+	struct pwm_device *pwm;
+};
+
+struct lp8788_bl_config default_bl_config = {
+	.bl_mode    = LP8788_BL_REGISTER_ONLY,
+	.dim_mode   = LP8788_DIM_EXPONENTIAL,
+	.full_scale = LP8788_FULLSCALE_1900uA,
+	.rise_time  = LP8788_RAMP_8192us,
+	.fall_time  = LP8788_RAMP_8192us,
+	.pwm_pol    = PWM_POLARITY_NORMAL,
+};
+
+static inline bool is_brightness_ctrl_by_pwm(enum lp8788_bl_ctrl_mode mode)
+{
+	return (mode == LP8788_BL_COMB_PWM_BASED);
+}
+
+static inline bool is_brightness_ctrl_by_register(enum lp8788_bl_ctrl_mode mode)
+{
+	return (mode == LP8788_BL_REGISTER_ONLY ||
+		mode == LP8788_BL_COMB_REGISTER_BASED);
+}
+
+static int lp8788_backlight_configure(struct lp8788_bl *bl)
+{
+	struct lp8788_backlight_platform_data *pdata = bl->pdata;
+	struct lp8788_bl_config *cfg = &default_bl_config;
+	int ret;
+	u8 val;
+
+	/*
+	 * Update chip configuration if platform data exists,
+	 * otherwise use the default settings.
+	 */
+	if (pdata) {
+		cfg->bl_mode    = pdata->bl_mode;
+		cfg->dim_mode   = pdata->dim_mode;
+		cfg->full_scale = pdata->full_scale;
+		cfg->rise_time  = pdata->rise_time;
+		cfg->fall_time  = pdata->fall_time;
+		cfg->pwm_pol    = pdata->pwm_pol;
+	}
+
+	/* Brightness ramp up/down */
+	val = (cfg->rise_time << LP8788_BL_RAMP_RISE_SHIFT) | cfg->fall_time;
+	ret = lp8788_write_byte(bl->lp, LP8788_BL_RAMP, val);
+	if (ret)
+		return ret;
+
+	/* Fullscale current setting */
+	val = (cfg->full_scale << LP8788_BL_FULLSCALE_SHIFT) |
+		(cfg->dim_mode << LP8788_BL_DIM_MODE_SHIFT);
+
+	/* Brightness control mode */
+	switch (cfg->bl_mode) {
+	case LP8788_BL_REGISTER_ONLY:
+		val |= LP8788_BL_EN;
+		break;
+	case LP8788_BL_COMB_PWM_BASED:
+	case LP8788_BL_COMB_REGISTER_BASED:
+		val |= LP8788_BL_EN | LP8788_BL_PWM_INPUT_EN |
+			(cfg->pwm_pol << LP8788_BL_PWM_POLARITY_SHIFT);
+		break;
+	default:
+		dev_err(bl->lp->dev, "invalid mode: %d\n", cfg->bl_mode);
+		return -EINVAL;
+	}
+
+	bl->mode = cfg->bl_mode;
+
+	return lp8788_write_byte(bl->lp, LP8788_BL_CONFIG, val);
+}
+
+static void lp8788_pwm_ctrl(struct lp8788_bl *bl, int br, int max_br)
+{
+	unsigned int period;
+	unsigned int duty;
+	struct device *dev;
+	struct pwm_device *pwm;
+
+	if (!bl->pdata)
+		return;
+
+	period = bl->pdata->period_ns;
+	duty = br * period / max_br;
+	dev = bl->lp->dev;
+
+	/* request PWM device with the consumer name */
+	if (!bl->pwm) {
+		pwm = devm_pwm_get(dev, LP8788_DEV_BACKLIGHT);
+		if (IS_ERR(pwm)) {
+			dev_err(dev, "can not get PWM device\n");
+			return;
+		}
+
+		bl->pwm = pwm;
+	}
+
+	pwm_config(bl->pwm, duty, period);
+	if (duty)
+		pwm_enable(bl->pwm);
+	else
+		pwm_disable(bl->pwm);
+}
+
+static int lp8788_bl_update_status(struct backlight_device *bl_dev)
+{
+	struct lp8788_bl *bl = bl_get_data(bl_dev);
+	enum lp8788_bl_ctrl_mode mode = bl->mode;
+
+	if (bl_dev->props.state & BL_CORE_SUSPENDED)
+		bl_dev->props.brightness = 0;
+
+	if (is_brightness_ctrl_by_pwm(mode)) {
+		int brt = bl_dev->props.brightness;
+		int max = bl_dev->props.max_brightness;
+
+		lp8788_pwm_ctrl(bl, brt, max);
+	} else if (is_brightness_ctrl_by_register(mode)) {
+		u8 brt = bl_dev->props.brightness;
+
+		lp8788_write_byte(bl->lp, LP8788_BL_BRIGHTNESS, brt);
+	}
+
+	return 0;
+}
+
+static int lp8788_bl_get_brightness(struct backlight_device *bl_dev)
+{
+	return bl_dev->props.brightness;
+}
+
+static const struct backlight_ops lp8788_bl_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.update_status = lp8788_bl_update_status,
+	.get_brightness = lp8788_bl_get_brightness,
+};
+
+static int lp8788_backlight_register(struct lp8788_bl *bl)
+{
+	struct backlight_device *bl_dev;
+	struct backlight_properties props;
+	struct lp8788_backlight_platform_data *pdata = bl->pdata;
+	int init_brt;
+	char *name;
+
+	props.type = BACKLIGHT_PLATFORM;
+	props.max_brightness = MAX_BRIGHTNESS;
+
+	/* Initial brightness */
+	if (pdata)
+		init_brt = min_t(int, pdata->initial_brightness,
+				props.max_brightness);
+	else
+		init_brt = 0;
+
+	props.brightness = init_brt;
+
+	/* Backlight device name */
+	if (!pdata || !pdata->name)
+		name = DEFAULT_BL_NAME;
+	else
+		name = pdata->name;
+
+	bl_dev = backlight_device_register(name, bl->lp->dev, bl,
+				       &lp8788_bl_ops, &props);
+	if (IS_ERR(bl_dev))
+		return PTR_ERR(bl_dev);
+
+	bl->bl_dev = bl_dev;
+
+	return 0;
+}
+
+static void lp8788_backlight_unregister(struct lp8788_bl *bl)
+{
+	struct backlight_device *bl_dev = bl->bl_dev;
+
+	if (bl_dev)
+		backlight_device_unregister(bl_dev);
+}
+
+static ssize_t lp8788_get_bl_ctl_mode(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct lp8788_bl *bl = dev_get_drvdata(dev);
+	enum lp8788_bl_ctrl_mode mode = bl->mode;
+	char *strmode;
+
+	if (is_brightness_ctrl_by_pwm(mode))
+		strmode = "PWM based";
+	else if (is_brightness_ctrl_by_register(mode))
+		strmode = "Register based";
+	else
+		strmode = "Invalid mode";
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", strmode);
+}
+
+static DEVICE_ATTR(bl_ctl_mode, S_IRUGO, lp8788_get_bl_ctl_mode, NULL);
+
+static struct attribute *lp8788_attributes[] = {
+	&dev_attr_bl_ctl_mode.attr,
+	NULL,
+};
+
+static const struct attribute_group lp8788_attr_group = {
+	.attrs = lp8788_attributes,
+};
+
+static int lp8788_backlight_probe(struct platform_device *pdev)
+{
+	struct lp8788 *lp = dev_get_drvdata(pdev->dev.parent);
+	struct lp8788_bl *bl;
+	int ret;
+
+	bl = devm_kzalloc(lp->dev, sizeof(struct lp8788_bl), GFP_KERNEL);
+	if (!bl)
+		return -ENOMEM;
+
+	bl->lp = lp;
+	if (lp->pdata)
+		bl->pdata = lp->pdata->bl_pdata;
+
+	platform_set_drvdata(pdev, bl);
+
+	ret = lp8788_backlight_configure(bl);
+	if (ret) {
+		dev_err(lp->dev, "backlight config err: %d\n", ret);
+		goto err_dev;
+	}
+
+	ret = lp8788_backlight_register(bl);
+	if (ret) {
+		dev_err(lp->dev, "register backlight err: %d\n", ret);
+		goto err_dev;
+	}
+
+	ret = sysfs_create_group(&pdev->dev.kobj, &lp8788_attr_group);
+	if (ret) {
+		dev_err(lp->dev, "register sysfs err: %d\n", ret);
+		goto err_sysfs;
+	}
+
+	backlight_update_status(bl->bl_dev);
+
+	return 0;
+
+err_sysfs:
+	lp8788_backlight_unregister(bl);
+err_dev:
+	return ret;
+}
+
+static int lp8788_backlight_remove(struct platform_device *pdev)
+{
+	struct lp8788_bl *bl = platform_get_drvdata(pdev);
+	struct backlight_device *bl_dev = bl->bl_dev;
+
+	bl_dev->props.brightness = 0;
+	backlight_update_status(bl_dev);
+	sysfs_remove_group(&pdev->dev.kobj, &lp8788_attr_group);
+	lp8788_backlight_unregister(bl);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver lp8788_bl_driver = {
+	.probe = lp8788_backlight_probe,
+	.remove = lp8788_backlight_remove,
+	.driver = {
+		.name = LP8788_DEV_BACKLIGHT,
+		.owner = THIS_MODULE,
+	},
+};
+module_platform_driver(lp8788_bl_driver);
+
+MODULE_DESCRIPTION("Texas Instruments LP8788 Backlight Driver");
+MODULE_AUTHOR("Milo Kim");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:lp8788-backlight");
diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h
index 2a32b16f79cb..786bf6679a28 100644
--- a/include/linux/mfd/lp8788.h
+++ b/include/linux/mfd/lp8788.h
@@ -16,6 +16,7 @@
 
 #include <linux/gpio.h>
 #include <linux/irqdomain.h>
+#include <linux/pwm.h>
 #include <linux/regmap.h>
 
 #define LP8788_DEV_BUCK		"lp8788-buck"
@@ -124,11 +125,6 @@ enum lp8788_bl_ramp_step {
 	LP8788_RAMP_65538us,
 };
 
-enum lp8788_bl_pwm_polarity {
-	LP8788_PWM_ACTIVE_HIGH,
-	LP8788_PWM_ACTIVE_LOW,
-};
-
 enum lp8788_isink_scale {
 	LP8788_ISINK_SCALE_100mA,
 	LP8788_ISINK_SCALE_120mA,
@@ -228,16 +224,6 @@ struct lp8788_charger_platform_data {
 				enum lp8788_charger_event event);
 };
 
-/*
- * struct lp8788_bl_pwm_data
- * @pwm_set_intensity     : set duty of pwm
- * @pwm_get_intensity     : get current duty of pwm
- */
-struct lp8788_bl_pwm_data {
-	void (*pwm_set_intensity) (int brightness, int max_brightness);
-	int (*pwm_get_intensity) (int max_brightness);
-};
-
 /*
  * struct lp8788_backlight_platform_data
  * @name                  : backlight driver name. (default: "lcd-backlight")
@@ -248,8 +234,8 @@ struct lp8788_bl_pwm_data {
  * @rise_time             : brightness ramp up step time
  * @fall_time             : brightness ramp down step time
  * @pwm_pol               : pwm polarity setting when bl_mode is pwm based
- * @pwm_data              : platform specific pwm generation functions
- *                          only valid when bl_mode is pwm based
+ * @period_ns             : platform specific pwm period value. unit is nano.
+			    Only valid when bl_mode is LP8788_BL_COMB_PWM_BASED
  */
 struct lp8788_backlight_platform_data {
 	char *name;
@@ -259,8 +245,8 @@ struct lp8788_backlight_platform_data {
 	enum lp8788_bl_full_scale_current full_scale;
 	enum lp8788_bl_ramp_step rise_time;
 	enum lp8788_bl_ramp_step fall_time;
-	enum lp8788_bl_pwm_polarity pwm_pol;
-	struct lp8788_bl_pwm_data pwm_data;
+	enum pwm_polarity pwm_pol;
+	unsigned int period_ns;
 };
 
 /*
-- 
cgit v1.2.3


From a321e91b6d73ed011ffceed384c40d2785cf723b Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Wed, 27 Feb 2013 17:02:56 -0800
Subject: lib/scatterlist: add simple page iterator

Add an iterator to walk through a scatter list a page at a time starting
at a specific page offset.  As opposed to the mapping iterator this is
meant to be small, performing well even in simple loops like collecting
all pages on the scatterlist into an array or setting up an iommu table
based on the pages' DMA address.

Signed-off-by: Imre Deak <imre.deak@intel.com>
Cc: Maxim Levitsky <maximlevitsky@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Tested-by: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/scatterlist.h | 35 +++++++++++++++++++++++++++++++++++
 lib/scatterlist.c           | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 4bd6c06eb28e..788a853aa7a7 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -231,6 +231,41 @@ size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
  */
 #define SG_MAX_SINGLE_ALLOC		(PAGE_SIZE / sizeof(struct scatterlist))
 
+/*
+ * sg page iterator
+ *
+ * Iterates over sg entries page-by-page.  On each successful iteration,
+ * @piter->page points to the current page, @piter->sg to the sg holding this
+ * page and @piter->sg_pgoffset to the page's page offset within the sg. The
+ * iteration will stop either when a maximum number of sg entries was reached
+ * or a terminating sg (sg_last(sg) == true) was reached.
+ */
+struct sg_page_iter {
+	struct page		*page;		/* current page */
+	struct scatterlist	*sg;		/* sg holding the page */
+	unsigned int		sg_pgoffset;	/* page offset within the sg */
+
+	/* these are internal states, keep away */
+	unsigned int		__nents;	/* remaining sg entries */
+	int			__pg_advance;	/* nr pages to advance at the
+						 * next step */
+};
+
+bool __sg_page_iter_next(struct sg_page_iter *piter);
+void __sg_page_iter_start(struct sg_page_iter *piter,
+			  struct scatterlist *sglist, unsigned int nents,
+			  unsigned long pgoffset);
+
+/**
+ * for_each_sg_page - iterate over the pages of the given sg list
+ * @sglist:	sglist to iterate over
+ * @piter:	page iterator to hold current page, sg, sg_pgoffset
+ * @nents:	maximum number of sg entries to iterate over
+ * @pgoffset:	starting page offset
+ */
+#define for_each_sg_page(sglist, piter, nents, pgoffset)		   \
+	for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \
+	     __sg_page_iter_next(piter);)
 
 /*
  * Mapping sg iterator
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 7874b01e816e..a1d15647d7db 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -394,6 +394,44 @@ int sg_alloc_table_from_pages(struct sg_table *sgt,
 }
 EXPORT_SYMBOL(sg_alloc_table_from_pages);
 
+void __sg_page_iter_start(struct sg_page_iter *piter,
+			  struct scatterlist *sglist, unsigned int nents,
+			  unsigned long pgoffset)
+{
+	piter->__pg_advance = 0;
+	piter->__nents = nents;
+
+	piter->page = NULL;
+	piter->sg = sglist;
+	piter->sg_pgoffset = pgoffset;
+}
+EXPORT_SYMBOL(__sg_page_iter_start);
+
+static int sg_page_count(struct scatterlist *sg)
+{
+	return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
+}
+
+bool __sg_page_iter_next(struct sg_page_iter *piter)
+{
+	if (!piter->__nents || !piter->sg)
+		return false;
+
+	piter->sg_pgoffset += piter->__pg_advance;
+	piter->__pg_advance = 1;
+
+	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
+		piter->sg_pgoffset -= sg_page_count(piter->sg);
+		piter->sg = sg_next(piter->sg);
+		if (!--piter->__nents || !piter->sg)
+			return false;
+	}
+	piter->page = nth_page(sg_page(piter->sg), piter->sg_pgoffset);
+
+	return true;
+}
+EXPORT_SYMBOL(__sg_page_iter_next);
+
 /**
  * sg_miter_start - start mapping iteration over a sg list
  * @miter: sg mapping iter to be started
-- 
cgit v1.2.3


From 4225fc8555a992c7f91d174ef424384d6781e144 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Wed, 27 Feb 2013 17:02:57 -0800
Subject: lib/scatterlist: use page iterator in the mapping iterator

For better code reuse use the newly added page iterator to iterate
through the pages.  The offset, length within the page is still
calculated by the mapping iterator as well as the actual mapping.  Idea
from Tejun Heo.

Signed-off-by: Imre Deak <imre.deak@intel.com>
Cc: Maxim Levitsky <maximlevitsky@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/host/dw_mmc.c   |  4 ++--
 include/linux/scatterlist.h |  6 +++---
 lib/scatterlist.c           | 48 ++++++++++++++++++++++-----------------------
 3 files changed, 28 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 60063ccb4c4b..98342213ed21 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1453,7 +1453,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 		if (!sg_miter_next(sg_miter))
 			goto done;
 
-		host->sg = sg_miter->__sg;
+		host->sg = sg_miter->piter.sg;
 		buf = sg_miter->addr;
 		remain = sg_miter->length;
 		offset = 0;
@@ -1508,7 +1508,7 @@ static void dw_mci_write_data_pio(struct dw_mci *host)
 		if (!sg_miter_next(sg_miter))
 			goto done;
 
-		host->sg = sg_miter->__sg;
+		host->sg = sg_miter->piter.sg;
 		buf = sg_miter->addr;
 		remain = sg_miter->length;
 		offset = 0;
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 788a853aa7a7..2d8bdaef9611 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -293,11 +293,11 @@ struct sg_mapping_iter {
 	void			*addr;		/* pointer to the mapped area */
 	size_t			length;		/* length of the mapped area */
 	size_t			consumed;	/* number of consumed bytes */
+	struct sg_page_iter	piter;		/* page iterator */
 
 	/* these are internal states, keep away */
-	struct scatterlist	*__sg;		/* current entry */
-	unsigned int		__nents;	/* nr of remaining entries */
-	unsigned int		__offset;	/* offset within sg */
+	unsigned int		__offset;	/* offset within page */
+	unsigned int		__remaining;	/* remaining bytes on page */
 	unsigned int		__flags;
 };
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a1d15647d7db..b83c144d731f 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -449,9 +449,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
 {
 	memset(miter, 0, sizeof(struct sg_mapping_iter));
 
-	miter->__sg = sgl;
-	miter->__nents = nents;
-	miter->__offset = 0;
+	__sg_page_iter_start(&miter->piter, sgl, nents, 0);
 	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
 	miter->__flags = flags;
 }
@@ -476,36 +474,35 @@ EXPORT_SYMBOL(sg_miter_start);
  */
 bool sg_miter_next(struct sg_mapping_iter *miter)
 {
-	unsigned int off, len;
-
-	/* check for end and drop resources from the last iteration */
-	if (!miter->__nents)
-		return false;
-
 	sg_miter_stop(miter);
 
-	/* get to the next sg if necessary.  __offset is adjusted by stop */
-	while (miter->__offset == miter->__sg->length) {
-		if (--miter->__nents) {
-			miter->__sg = sg_next(miter->__sg);
-			miter->__offset = 0;
-		} else
+	/*
+	 * Get to the next page if necessary.
+	 * __remaining, __offset is adjusted by sg_miter_stop
+	 */
+	if (!miter->__remaining) {
+		struct scatterlist *sg;
+		unsigned long pgoffset;
+
+		if (!__sg_page_iter_next(&miter->piter))
 			return false;
-	}
 
-	/* map the next page */
-	off = miter->__sg->offset + miter->__offset;
-	len = miter->__sg->length - miter->__offset;
+		sg = miter->piter.sg;
+		pgoffset = miter->piter.sg_pgoffset;
 
-	miter->page = nth_page(sg_page(miter->__sg), off >> PAGE_SHIFT);
-	off &= ~PAGE_MASK;
-	miter->length = min_t(unsigned int, len, PAGE_SIZE - off);
-	miter->consumed = miter->length;
+		miter->__offset = pgoffset ? 0 : sg->offset;
+		miter->__remaining = sg->offset + sg->length -
+				(pgoffset << PAGE_SHIFT) - miter->__offset;
+		miter->__remaining = min_t(unsigned long, miter->__remaining,
+					   PAGE_SIZE - miter->__offset);
+	}
+	miter->page = miter->piter.page;
+	miter->consumed = miter->length = miter->__remaining;
 
 	if (miter->__flags & SG_MITER_ATOMIC)
-		miter->addr = kmap_atomic(miter->page) + off;
+		miter->addr = kmap_atomic(miter->page) + miter->__offset;
 	else
-		miter->addr = kmap(miter->page) + off;
+		miter->addr = kmap(miter->page) + miter->__offset;
 
 	return true;
 }
@@ -532,6 +529,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
 	/* drop resources from the last iteration */
 	if (miter->addr) {
 		miter->__offset += miter->consumed;
+		miter->__remaining -= miter->consumed;
 
 		if (miter->__flags & SG_MITER_TO_SG)
 			flush_kernel_dcache_page(miter->page);
-- 
cgit v1.2.3


From e579d2c259be42b6f29458327e5153b22414b031 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 27 Feb 2013 17:03:15 -0800
Subject: coredump: remove redundant defines for dumpable states

The existing SUID_DUMP_* defines duplicate the newer SUID_DUMPABLE_*
defines introduced in 54b501992dd2 ("coredump: warn about unsafe
suid_dumpable / core_pattern combo").  Remove the new ones, and use the
prior values instead.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Chen Gang <gang.chen@asianux.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alan Cox <alan@linux.intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: James Morris <james.l.morris@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/coredump.c         |  2 +-
 fs/exec.c             | 10 +++++-----
 fs/proc/internal.h    |  3 ++-
 include/linux/sched.h |  5 -----
 kernel/sysctl.c       |  2 +-
 5 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/coredump.c b/fs/coredump.c
index 69baf903d3bd..c6479658d487 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -501,7 +501,7 @@ void do_coredump(siginfo_t *siginfo)
 	 * so we dump it as root in mode 2, and only into a controlled
 	 * environment (pipe handler or fully qualified path).
 	 */
-	if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+	if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
 		/* Setuid core dump mode */
 		flag = O_EXCL;		/* Stop rewrite attacks */
 		cred->fsuid = GLOBAL_ROOT_UID;	/* Dump root private */
diff --git a/fs/exec.c b/fs/exec.c
index 864c50df660a..a96a4885bbbf 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 	current->sas_ss_sp = current->sas_ss_size = 0;
 
 	if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
-		set_dumpable(current->mm, SUID_DUMPABLE_ENABLED);
+		set_dumpable(current->mm, SUID_DUMP_USER);
 	else
 		set_dumpable(current->mm, suid_dumpable);
 
@@ -1639,17 +1639,17 @@ EXPORT_SYMBOL(set_binfmt);
 void set_dumpable(struct mm_struct *mm, int value)
 {
 	switch (value) {
-	case SUID_DUMPABLE_DISABLED:
+	case SUID_DUMP_DISABLE:
 		clear_bit(MMF_DUMPABLE, &mm->flags);
 		smp_wmb();
 		clear_bit(MMF_DUMP_SECURELY, &mm->flags);
 		break;
-	case SUID_DUMPABLE_ENABLED:
+	case SUID_DUMP_USER:
 		set_bit(MMF_DUMPABLE, &mm->flags);
 		smp_wmb();
 		clear_bit(MMF_DUMP_SECURELY, &mm->flags);
 		break;
-	case SUID_DUMPABLE_SAFE:
+	case SUID_DUMP_ROOT:
 		set_bit(MMF_DUMP_SECURELY, &mm->flags);
 		smp_wmb();
 		set_bit(MMF_DUMPABLE, &mm->flags);
@@ -1662,7 +1662,7 @@ int __get_dumpable(unsigned long mm_flags)
 	int ret;
 
 	ret = mm_flags & MMF_DUMPABLE_MASK;
-	return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret;
+	return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
 }
 
 int get_dumpable(struct mm_struct *mm)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 252544c05207..85ff3a4598b3 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
 
 #include <linux/sched.h>
 #include <linux/proc_fs.h>
+#include <linux/binfmts.h>
 struct  ctl_table_header;
 struct  mempolicy;
 
@@ -108,7 +109,7 @@ static inline int task_dumpable(struct task_struct *task)
 	if (mm)
 		dumpable = get_dumpable(mm);
 	task_unlock(task);
-	if (dumpable == SUID_DUMPABLE_ENABLED)
+	if (dumpable == SUID_DUMP_USER)
 		return 1;
 	return 0;
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6853bf947fde..d35d2b6ddbfb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -346,11 +346,6 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 extern void set_dumpable(struct mm_struct *mm, int value);
 extern int get_dumpable(struct mm_struct *mm);
 
-/* get/set_dumpable() values */
-#define SUID_DUMPABLE_DISABLED	0
-#define SUID_DUMPABLE_ENABLED	1
-#define SUID_DUMPABLE_SAFE	2
-
 /* mm flags */
 /* dumpable bits */
 #define MMF_DUMPABLE      0  /* core dump is permitted */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d8df00e69c14..d1b4ee67d2df 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2095,7 +2095,7 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 static void validate_coredump_safety(void)
 {
 #ifdef CONFIG_COREDUMP
-	if (suid_dumpable == SUID_DUMPABLE_SAFE &&
+	if (suid_dumpable == SUID_DUMP_ROOT &&
 	    core_pattern[0] != '/' && core_pattern[0] != '|') {
 		printk(KERN_WARNING "Unsafe core_pattern used with "\
 			"suid_dumpable=2. Pipe handler or fully qualified "\
-- 
cgit v1.2.3


From 6aa9707099c4b25700940eb3d016f16c4434360d Mon Sep 17 00:00:00 2001
From: Mandeep Singh Baines <msb@chromium.org>
Date: Wed, 27 Feb 2013 17:03:18 -0800
Subject: lockdep: check that no locks held at freeze time

We shouldn't try_to_freeze if locks are held.  Holding a lock can cause a
deadlock if the lock is later acquired in the suspend or hibernate path
(e.g.  by dpm).  Holding a lock can also cause a deadlock in the case of
cgroup_freezer if a lock is held inside a frozen cgroup that is later
acquired by a process outside that group.

[akpm@linux-foundation.org: export debug_check_no_locks_held]
Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Cc: Ben Chan <benchan@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/debug_locks.h |  4 ++--
 include/linux/freezer.h     |  3 +++
 kernel/exit.c               |  2 +-
 kernel/lockdep.c            | 17 ++++++++---------
 4 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 3bd46f766751..a975de1ff59f 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -51,7 +51,7 @@ struct task_struct;
 extern void debug_show_all_locks(void);
 extern void debug_show_held_locks(struct task_struct *task);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
-extern void debug_check_no_locks_held(struct task_struct *task);
+extern void debug_check_no_locks_held(void);
 #else
 static inline void debug_show_all_locks(void)
 {
@@ -67,7 +67,7 @@ debug_check_no_locks_freed(const void *from, unsigned long len)
 }
 
 static inline void
-debug_check_no_locks_held(struct task_struct *task)
+debug_check_no_locks_held(void)
 {
 }
 #endif
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index e70df40d84f6..043a5cf8b5ba 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -3,6 +3,7 @@
 #ifndef FREEZER_H_INCLUDED
 #define FREEZER_H_INCLUDED
 
+#include <linux/debug_locks.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
@@ -48,6 +49,8 @@ extern void thaw_kernel_threads(void);
 
 static inline bool try_to_freeze(void)
 {
+	if (!(current->flags & PF_NOFREEZE))
+		debug_check_no_locks_held();
 	might_sleep();
 	if (likely(!freezing(current)))
 		return false;
diff --git a/kernel/exit.c b/kernel/exit.c
index 7dd20408707c..aaf97c122e8a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -835,7 +835,7 @@ void do_exit(long code)
 	/*
 	 * Make sure we are holding no locks:
 	 */
-	debug_check_no_locks_held(tsk);
+	debug_check_no_locks_held();
 	/*
 	 * We can do this unlocked here. The futex code uses this flag
 	 * just to verify whether the pi state cleanup has been done
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8a0efac4f99d..259db207b5d9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -4088,7 +4088,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 }
 EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
 
-static void print_held_locks_bug(struct task_struct *curr)
+static void print_held_locks_bug(void)
 {
 	if (!debug_locks_off())
 		return;
@@ -4097,22 +4097,21 @@ static void print_held_locks_bug(struct task_struct *curr)
 
 	printk("\n");
 	printk("=====================================\n");
-	printk("[ BUG: lock held at task exit time! ]\n");
+	printk("[ BUG: %s/%d still has locks held! ]\n",
+	       current->comm, task_pid_nr(current));
 	print_kernel_ident();
 	printk("-------------------------------------\n");
-	printk("%s/%d is exiting with locks still held!\n",
-		curr->comm, task_pid_nr(curr));
-	lockdep_print_held_locks(curr);
-
+	lockdep_print_held_locks(current);
 	printk("\nstack backtrace:\n");
 	dump_stack();
 }
 
-void debug_check_no_locks_held(struct task_struct *task)
+void debug_check_no_locks_held(void)
 {
-	if (unlikely(task->lockdep_depth > 0))
-		print_held_locks_bug(task);
+	if (unlikely(current->lockdep_depth > 0))
+		print_held_locks_bug();
 }
+EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
 
 void debug_show_all_locks(void)
 {
-- 
cgit v1.2.3


From fe6e24ec90b753392c3f9ec1fbca196c4e88e511 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:03:50 -0800
Subject: idr: deprecate idr_remove_all()

There was only one legitimate use of idr_remove_all() and a lot more of
incorrect uses (or lack of it).  Now that idr_destroy() implies
idr_remove_all() and all the in-kernel users updated not to use it,
there's no reason to keep it around.  Mark it deprecated so that we can
later unexport it.

idr_remove_all() is made an inline function calling __idr_remove_all()
to avoid triggering deprecated warning on EXPORT_SYMBOL().

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 14 +++++++++++++-
 lib/idr.c           | 10 +++-------
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e5eb125effe6..4cf042da3892 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -110,10 +110,22 @@ int idr_for_each(struct idr *idp,
 void *idr_get_next(struct idr *idp, int *nextid);
 void *idr_replace(struct idr *idp, void *ptr, int id);
 void idr_remove(struct idr *idp, int id);
-void idr_remove_all(struct idr *idp);
 void idr_destroy(struct idr *idp);
 void idr_init(struct idr *idp);
 
+void __idr_remove_all(struct idr *idp);	/* don't use */
+
+/**
+ * idr_remove_all - remove all ids from the given idr tree
+ * @idp: idr handle
+ *
+ * If you're trying to destroy @idp, calling idr_destroy() is enough.
+ * This is going away.  Don't use.
+ */
+static inline void __deprecated idr_remove_all(struct idr *idp)
+{
+	__idr_remove_all(idp);
+}
 
 /*
  * IDA - IDR based id allocator, use when translation from id to
diff --git a/lib/idr.c b/lib/idr.c
index b8602e0b30da..814c53ce0d41 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -433,11 +433,7 @@ void idr_remove(struct idr *idp, int id)
 }
 EXPORT_SYMBOL(idr_remove);
 
-/**
- * idr_remove_all - remove all ids from the given idr tree
- * @idp: idr handle
- */
-void idr_remove_all(struct idr *idp)
+void __idr_remove_all(struct idr *idp)
 {
 	int n, id, max;
 	int bt_mask;
@@ -470,7 +466,7 @@ void idr_remove_all(struct idr *idp)
 	}
 	idp->layers = 0;
 }
-EXPORT_SYMBOL(idr_remove_all);
+EXPORT_SYMBOL(__idr_remove_all);
 
 /**
  * idr_destroy - release all cached layers within an idr tree
@@ -487,7 +483,7 @@ EXPORT_SYMBOL(idr_remove_all);
  */
 void idr_destroy(struct idr *idp)
 {
-	idr_remove_all(idp);
+	__idr_remove_all(idp);
 
 	while (idp->id_free_cnt) {
 		struct idr_layer *p = get_from_free_list(idp);
-- 
cgit v1.2.3


From 4106ecaf59b79efff3f9b466baf9e8c67e19ac5a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:03:51 -0800
Subject: idr: cosmetic updates to struct / initializer definitions

* Tab align fields like a normal person.

* Drop the unnecessary 0 inits from IDR_INIT().

This patch is purely cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 4cf042da3892..8f4980db3524 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -49,28 +49,24 @@
 #define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
 
 struct idr_layer {
-	unsigned long		 bitmap; /* A zero bit means "space here" */
+	unsigned long		bitmap;	/* A zero bit means "space here" */
 	struct idr_layer __rcu	*ary[1<<IDR_BITS];
-	int			 count;	 /* When zero, we can release it */
-	int			 layer;	 /* distance from leaf */
-	struct rcu_head		 rcu_head;
+	int			count;	/* When zero, we can release it */
+	int			layer;	/* distance from leaf */
+	struct rcu_head		rcu_head;
 };
 
 struct idr {
-	struct idr_layer __rcu *top;
-	struct idr_layer *id_free;
-	int		  layers; /* only valid without concurrent changes */
-	int		  id_free_cnt;
-	spinlock_t	  lock;
+	struct idr_layer __rcu	*top;
+	struct idr_layer	*id_free;
+	int			layers;	/* only valid w/o concurrent changes */
+	int			id_free_cnt;
+	spinlock_t		lock;
 };
 
-#define IDR_INIT(name)						\
-{								\
-	.top		= NULL,					\
-	.id_free	= NULL,					\
-	.layers 	= 0,					\
-	.id_free_cnt	= 0,					\
-	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),	\
+#define IDR_INIT(name)							\
+{									\
+	.lock			= __SPIN_LOCK_UNLOCKED(name.lock),	\
 }
 #define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
-- 
cgit v1.2.3


From 49038ef4fbe2842bd4d8338f89ec5c9ba71b0ae1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:03:52 -0800
Subject: idr: relocate idr_for_each_entry() and reorganize id[r|a]_get_new()

* Move idr_for_each_entry() definition next to other idr related
  definitions.

* Make id[r|a]_get_new() inline wrappers of id[r|a]_get_new_above().

This changes the implementation of idr_get_new() but the new
implementation is trivial.  This patch doesn't introduce any
functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 47 +++++++++++++++++++++++++++++++++++------------
 lib/idr.c           | 49 -------------------------------------------------
 2 files changed, 35 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 8f4980db3524..ff44bc83f3cb 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -99,7 +99,6 @@ struct idr {
 
 void *idr_find(struct idr *idp, int id);
 int idr_pre_get(struct idr *idp, gfp_t gfp_mask);
-int idr_get_new(struct idr *idp, void *ptr, int *id);
 int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
 int idr_for_each(struct idr *idp,
 		 int (*fn)(int id, void *p, void *data), void *data);
@@ -109,6 +108,30 @@ void idr_remove(struct idr *idp, int id);
 void idr_destroy(struct idr *idp);
 void idr_init(struct idr *idp);
 
+/**
+ * idr_get_new - allocate new idr entry
+ * @idp: idr handle
+ * @ptr: pointer you want associated with the id
+ * @id: pointer to the allocated handle
+ *
+ * Simple wrapper around idr_get_new_above() w/ @starting_id of zero.
+ */
+static inline int idr_get_new(struct idr *idp, void *ptr, int *id)
+{
+	return idr_get_new_above(idp, ptr, 0, id);
+}
+
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp:     idr handle
+ * @entry:   the type * to use as cursor
+ * @id:      id entry's key
+ */
+#define idr_for_each_entry(idp, entry, id)				\
+	for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
+	     entry != NULL;                                             \
+	     ++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+
 void __idr_remove_all(struct idr *idp);	/* don't use */
 
 /**
@@ -149,7 +172,6 @@ struct ida {
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
-int ida_get_new(struct ida *ida, int *p_id);
 void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
 void ida_init(struct ida *ida);
@@ -158,17 +180,18 @@ int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 		   gfp_t gfp_mask);
 void ida_simple_remove(struct ida *ida, unsigned int id);
 
-void __init idr_init_cache(void);
-
 /**
- * idr_for_each_entry - iterate over an idr's elements of a given type
- * @idp:     idr handle
- * @entry:   the type * to use as cursor
- * @id:      id entry's key
+ * ida_get_new - allocate new ID
+ * @ida:	idr handle
+ * @p_id:	pointer to the allocated handle
+ *
+ * Simple wrapper around ida_get_new_above() w/ @starting_id of zero.
  */
-#define idr_for_each_entry(idp, entry, id)				\
-	for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
-	     entry != NULL;                                             \
-	     ++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+static inline int ida_get_new(struct ida *ida, int *p_id)
+{
+	return ida_get_new_above(ida, 0, p_id);
+}
+
+void __init idr_init_cache(void);
 
 #endif /* __IDR_H__ */
diff --git a/lib/idr.c b/lib/idr.c
index 814c53ce0d41..282841b5a561 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -317,36 +317,6 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 }
 EXPORT_SYMBOL(idr_get_new_above);
 
-/**
- * idr_get_new - allocate new idr entry
- * @idp: idr handle
- * @ptr: pointer you want associated with the id
- * @id: pointer to the allocated handle
- *
- * If allocation from IDR's private freelist fails, idr_get_new_above() will
- * return %-EAGAIN.  The caller should retry the idr_pre_get() call to refill
- * IDR's preallocation and then retry the idr_get_new_above() call.
- *
- * If the idr is full idr_get_new_above() will return %-ENOSPC.
- *
- * @id returns a value in the range %0 ... %0x7fffffff
- */
-int idr_get_new(struct idr *idp, void *ptr, int *id)
-{
-	int rv;
-
-	rv = idr_get_new_above_int(idp, ptr, 0);
-	/*
-	 * This is a cheap hack until the IDR code can be fixed to
-	 * return proper error values.
-	 */
-	if (rv < 0)
-		return _idr_rc_to_errno(rv);
-	*id = rv;
-	return 0;
-}
-EXPORT_SYMBOL(idr_get_new);
-
 static void idr_remove_warning(int id)
 {
 	printk(KERN_WARNING
@@ -856,25 +826,6 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 }
 EXPORT_SYMBOL(ida_get_new_above);
 
-/**
- * ida_get_new - allocate new ID
- * @ida:	idr handle
- * @p_id:	pointer to the allocated handle
- *
- * Allocate new ID.  It should be called with any required locks.
- *
- * If memory is required, it will return %-EAGAIN, you should unlock
- * and go back to the idr_pre_get() call.  If the idr is full, it will
- * return %-ENOSPC.
- *
- * @p_id returns a value in the range %0 ... %0x7fffffff.
- */
-int ida_get_new(struct ida *ida, int *p_id)
-{
-	return ida_get_new_above(ida, 0, p_id);
-}
-EXPORT_SYMBOL(ida_get_new);
-
 /**
  * ida_remove - remove the given ID
  * @ida:	ida handle
-- 
cgit v1.2.3


From 12d1b4393e0d8df36b2646a5e512f0513fb532d2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:03:53 -0800
Subject: idr: remove _idr_rc_to_errno() hack

idr uses -1, IDR_NEED_TO_GROW and IDR_NOMORE_SPACE to communicate
exception conditions internally.  The return value is later translated
to errno values using _idr_rc_to_errno().

This is confusing.  Drop the custom ones and consistently use -EAGAIN
for "tree needs to grow", -ENOMEM for "need more memory" and -ENOSPC for
"ran out of ID space".

Due to the weird memory preloading mechanism, [ra]_get_new*() return
-EAGAIN on memory shortage, so we need to substitute -ENOMEM w/
-EAGAIN on those interface functions.  They'll eventually be cleaned
up and the translations will go away.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h |  6 ------
 lib/idr.c           | 35 +++++++++++++++++++++++------------
 2 files changed, 23 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index ff44bc83f3cb..837f152b1383 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -70,12 +70,6 @@ struct idr {
 }
 #define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
-/* Actions to be taken after a call to _idr_sub_alloc */
-#define IDR_NEED_TO_GROW -2
-#define IDR_NOMORE_SPACE -3
-
-#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
-
 /**
  * DOC: idr sync
  * idr synchronization (stolen from radix-tree.h)
diff --git a/lib/idr.c b/lib/idr.c
index 282841b5a561..bde6eecb0e87 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -133,6 +133,21 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(idr_pre_get);
 
+/**
+ * sub_alloc - try to allocate an id without growing the tree depth
+ * @idp: idr handle
+ * @starting_id: id to start search at
+ * @id: pointer to the allocated handle
+ * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer
+ *
+ * Allocate an id in range [@starting_id, INT_MAX] from @idp without
+ * growing its depth.  Returns
+ *
+ *  the allocated id >= 0 if successful,
+ *  -EAGAIN if the tree needs to grow for allocation to succeed,
+ *  -ENOSPC if the id space is exhausted,
+ *  -ENOMEM if more idr_layers need to be allocated.
+ */
 static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 {
 	int n, m, sh;
@@ -161,7 +176,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			/* if already at the top layer, we need to grow */
 			if (id >= 1 << (idp->layers * IDR_BITS)) {
 				*starting_id = id;
-				return IDR_NEED_TO_GROW;
+				return -EAGAIN;
 			}
 			p = pa[l];
 			BUG_ON(!p);
@@ -180,7 +195,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 			id = ((id >> sh) ^ n ^ m) << sh;
 		}
 		if ((id >= MAX_IDR_BIT) || (id < 0))
-			return IDR_NOMORE_SPACE;
+			return -ENOSPC;
 		if (l == 0)
 			break;
 		/*
@@ -189,7 +204,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 		if (!p->ary[m]) {
 			new = get_from_free_list(idp);
 			if (!new)
-				return -1;
+				return -ENOMEM;
 			new->layer = l-1;
 			rcu_assign_pointer(p->ary[m], new);
 			p->count++;
@@ -215,7 +230,7 @@ build_up:
 	layers = idp->layers;
 	if (unlikely(!p)) {
 		if (!(p = get_from_free_list(idp)))
-			return -1;
+			return -ENOMEM;
 		p->layer = 0;
 		layers = 1;
 	}
@@ -246,7 +261,7 @@ build_up:
 				__move_to_free_list(idp, new);
 			}
 			spin_unlock_irqrestore(&idp->lock, flags);
-			return -1;
+			return -ENOMEM;
 		}
 		new->ary[0] = p;
 		new->count = 1;
@@ -258,7 +273,7 @@ build_up:
 	rcu_assign_pointer(idp->top, p);
 	idp->layers = layers;
 	v = sub_alloc(idp, &id, pa);
-	if (v == IDR_NEED_TO_GROW)
+	if (v == -EAGAIN)
 		goto build_up;
 	return(v);
 }
@@ -306,12 +321,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 	int rv;
 
 	rv = idr_get_new_above_int(idp, ptr, starting_id);
-	/*
-	 * This is a cheap hack until the IDR code can be fixed to
-	 * return proper error values.
-	 */
 	if (rv < 0)
-		return _idr_rc_to_errno(rv);
+		return rv == -ENOMEM ? -EAGAIN : rv;
 	*id = rv;
 	return 0;
 }
@@ -766,7 +777,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 	/* get vacant slot */
 	t = idr_get_empty_slot(&ida->idr, idr_id, pa);
 	if (t < 0)
-		return _idr_rc_to_errno(t);
+		return t == -ENOMEM ? -EAGAIN : t;
 
 	if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT)
 		return -ENOSPC;
-- 
cgit v1.2.3


From d5c7409f79e14db49d00785692334657592c07ff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:03:55 -0800
Subject: idr: implement idr_preload[_end]() and idr_alloc()

The current idr interface is very cumbersome.

* For all allocations, two function calls - idr_pre_get() and
  idr_get_new*() - should be made.

* idr_pre_get() doesn't guarantee that the following idr_get_new*()
  will not fail from memory shortage.  If idr_get_new*() returns
  -EAGAIN, the caller is expected to retry pre_get and allocation.

* idr_get_new*() can't enforce upper limit.  Upper limit can only be
  enforced by allocating and then freeing if above limit.

* idr_layer buffer is unnecessarily per-idr.  Each idr ends up keeping
  around MAX_IDR_FREE idr_layers.  The memory consumed per idr is
  under two pages but it makes it difficult to make idr_layer larger.

This patch implements the following new set of allocation functions.

* idr_preload[_end]() - Similar to radix preload but doesn't fail.
  The first idr_alloc() inside preload section can be treated as if it
  were called with @gfp_mask used for idr_preload().

* idr_alloc() - Allocate an ID w/ lower and upper limits.  Takes
  @gfp_flags and can be used w/o preloading.  When used inside
  preloaded section, the allocation mask of preloading can be assumed.

If idr_alloc() can be called from a context which allows sufficiently
relaxed @gfp_mask, it can be used by itself.  If, for example,
idr_alloc() is called inside spinlock protected region, preloading can
be used like the following.

	idr_preload(GFP_KERNEL);
	spin_lock(lock);

	id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT);

	spin_unlock(lock);
	idr_preload_end();
	if (id < 0)
		error;

which is much simpler and less error-prone than idr_pre_get and
idr_get_new*() loop.

The new interface uses per-pcu idr_layer buffer and thus the number of
idr's in the system doesn't affect the amount of memory used for
preloading.

idr_layer_alloc() is introduced to handle idr_layer allocations for
both old and new ID allocation paths.  This is a bit hairy now but the
new interface is expected to replace the old and the internal
implementation eventually will become simpler.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h |  14 +++++
 lib/idr.c           | 174 +++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 180 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 837f152b1383..6dcf133f208a 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -94,14 +94,28 @@ struct idr {
 void *idr_find(struct idr *idp, int id);
 int idr_pre_get(struct idr *idp, gfp_t gfp_mask);
 int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
+void idr_preload(gfp_t gfp_mask);
+int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask);
 int idr_for_each(struct idr *idp,
 		 int (*fn)(int id, void *p, void *data), void *data);
 void *idr_get_next(struct idr *idp, int *nextid);
 void *idr_replace(struct idr *idp, void *ptr, int id);
 void idr_remove(struct idr *idp, int id);
+void idr_free(struct idr *idp, int id);
 void idr_destroy(struct idr *idp);
 void idr_init(struct idr *idp);
 
+/**
+ * idr_preload_end - end preload section started with idr_preload()
+ *
+ * Each idr_preload() should be matched with an invocation of this
+ * function.  See idr_preload() for details.
+ */
+static inline void idr_preload_end(void)
+{
+	preempt_enable();
+}
+
 /**
  * idr_get_new - allocate new idr entry
  * @idp: idr handle
diff --git a/lib/idr.c b/lib/idr.c
index b13aae5bdc81..2d016f5c410e 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -35,8 +35,12 @@
 #include <linux/string.h>
 #include <linux/idr.h>
 #include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
 
 static struct kmem_cache *idr_layer_cache;
+static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
+static DEFINE_PER_CPU(int, idr_preload_cnt);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
 static struct idr_layer *get_from_free_list(struct idr *idp)
@@ -54,6 +58,50 @@ static struct idr_layer *get_from_free_list(struct idr *idp)
 	return(p);
 }
 
+/**
+ * idr_layer_alloc - allocate a new idr_layer
+ * @gfp_mask: allocation mask
+ * @layer_idr: optional idr to allocate from
+ *
+ * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch
+ * one from the per-cpu preload buffer.  If @layer_idr is not %NULL, fetch
+ * an idr_layer from @idr->id_free.
+ *
+ * @layer_idr is to maintain backward compatibility with the old alloc
+ * interface - idr_pre_get() and idr_get_new*() - and will be removed
+ * together with per-pool preload buffer.
+ */
+static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
+{
+	struct idr_layer *new;
+
+	/* this is the old path, bypass to get_from_free_list() */
+	if (layer_idr)
+		return get_from_free_list(layer_idr);
+
+	/* try to allocate directly from kmem_cache */
+	new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
+	if (new)
+		return new;
+
+	/*
+	 * Try to fetch one from the per-cpu preload buffer if in process
+	 * context.  See idr_preload() for details.
+	 */
+	if (in_interrupt())
+		return NULL;
+
+	preempt_disable();
+	new = __this_cpu_read(idr_preload_head);
+	if (new) {
+		__this_cpu_write(idr_preload_head, new->ary[0]);
+		__this_cpu_dec(idr_preload_cnt);
+		new->ary[0] = NULL;
+	}
+	preempt_enable();
+	return new;
+}
+
 static void idr_layer_rcu_free(struct rcu_head *head)
 {
 	struct idr_layer *layer;
@@ -139,6 +187,8 @@ EXPORT_SYMBOL(idr_pre_get);
  * @starting_id: id to start search at
  * @id: pointer to the allocated handle
  * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer
+ * @gfp_mask: allocation mask for idr_layer_alloc()
+ * @layer_idr: optional idr passed to idr_layer_alloc()
  *
  * Allocate an id in range [@starting_id, INT_MAX] from @idp without
  * growing its depth.  Returns
@@ -148,7 +198,8 @@ EXPORT_SYMBOL(idr_pre_get);
  *  -ENOSPC if the id space is exhausted,
  *  -ENOMEM if more idr_layers need to be allocated.
  */
-static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
+static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
+		     gfp_t gfp_mask, struct idr *layer_idr)
 {
 	int n, m, sh;
 	struct idr_layer *p, *new;
@@ -202,7 +253,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 		 * Create the layer below if it is missing.
 		 */
 		if (!p->ary[m]) {
-			new = get_from_free_list(idp);
+			new = idr_layer_alloc(gfp_mask, layer_idr);
 			if (!new)
 				return -ENOMEM;
 			new->layer = l-1;
@@ -218,7 +269,8 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
 }
 
 static int idr_get_empty_slot(struct idr *idp, int starting_id,
-			      struct idr_layer **pa)
+			      struct idr_layer **pa, gfp_t gfp_mask,
+			      struct idr *layer_idr)
 {
 	struct idr_layer *p, *new;
 	int layers, v, id;
@@ -229,7 +281,7 @@ build_up:
 	p = idp->top;
 	layers = idp->layers;
 	if (unlikely(!p)) {
-		if (!(p = get_from_free_list(idp)))
+		if (!(p = idr_layer_alloc(gfp_mask, layer_idr)))
 			return -ENOMEM;
 		p->layer = 0;
 		layers = 1;
@@ -248,7 +300,7 @@ build_up:
 			p->layer++;
 			continue;
 		}
-		if (!(new = get_from_free_list(idp))) {
+		if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) {
 			/*
 			 * The allocation failed.  If we built part of
 			 * the structure tear it down.
@@ -272,7 +324,7 @@ build_up:
 	}
 	rcu_assign_pointer(idp->top, p);
 	idp->layers = layers;
-	v = sub_alloc(idp, &id, pa);
+	v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr);
 	if (v == -EAGAIN)
 		goto build_up;
 	return(v);
@@ -312,7 +364,7 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 	struct idr_layer *pa[MAX_IDR_LEVEL];
 	int rv;
 
-	rv = idr_get_empty_slot(idp, starting_id, pa);
+	rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp);
 	if (rv < 0)
 		return rv == -ENOMEM ? -EAGAIN : rv;
 
@@ -322,6 +374,112 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 }
 EXPORT_SYMBOL(idr_get_new_above);
 
+/**
+ * idr_preload - preload for idr_alloc()
+ * @gfp_mask: allocation mask to use for preloading
+ *
+ * Preload per-cpu layer buffer for idr_alloc().  Can only be used from
+ * process context and each idr_preload() invocation should be matched with
+ * idr_preload_end().  Note that preemption is disabled while preloaded.
+ *
+ * The first idr_alloc() in the preloaded section can be treated as if it
+ * were invoked with @gfp_mask used for preloading.  This allows using more
+ * permissive allocation masks for idrs protected by spinlocks.
+ *
+ * For example, if idr_alloc() below fails, the failure can be treated as
+ * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT.
+ *
+ *	idr_preload(GFP_KERNEL);
+ *	spin_lock(lock);
+ *
+ *	id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT);
+ *
+ *	spin_unlock(lock);
+ *	idr_preload_end();
+ *	if (id < 0)
+ *		error;
+ */
+void idr_preload(gfp_t gfp_mask)
+{
+	/*
+	 * Consuming preload buffer from non-process context breaks preload
+	 * allocation guarantee.  Disallow usage from those contexts.
+	 */
+	WARN_ON_ONCE(in_interrupt());
+	might_sleep_if(gfp_mask & __GFP_WAIT);
+
+	preempt_disable();
+
+	/*
+	 * idr_alloc() is likely to succeed w/o full idr_layer buffer and
+	 * return value from idr_alloc() needs to be checked for failure
+	 * anyway.  Silently give up if allocation fails.  The caller can
+	 * treat failures from idr_alloc() as if idr_alloc() were called
+	 * with @gfp_mask which should be enough.
+	 */
+	while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
+		struct idr_layer *new;
+
+		preempt_enable();
+		new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
+		preempt_disable();
+		if (!new)
+			break;
+
+		/* link the new one to per-cpu preload list */
+		new->ary[0] = __this_cpu_read(idr_preload_head);
+		__this_cpu_write(idr_preload_head, new);
+		__this_cpu_inc(idr_preload_cnt);
+	}
+}
+EXPORT_SYMBOL(idr_preload);
+
+/**
+ * idr_alloc - allocate new idr entry
+ * @idr: the (initialized) idr
+ * @ptr: pointer to be associated with the new id
+ * @start: the minimum id (inclusive)
+ * @end: the maximum id (exclusive, <= 0 for max)
+ * @gfp_mask: memory allocation flags
+ *
+ * Allocate an id in [start, end) and associate it with @ptr.  If no ID is
+ * available in the specified range, returns -ENOSPC.  On memory allocation
+ * failure, returns -ENOMEM.
+ *
+ * Note that @end is treated as max when <= 0.  This is to always allow
+ * using @start + N as @end as long as N is inside integer range.
+ *
+ * The user is responsible for exclusively synchronizing all operations
+ * which may modify @idr.  However, read-only accesses such as idr_find()
+ * or iteration can be performed under RCU read lock provided the user
+ * destroys @ptr in RCU-safe way after removal from idr.
+ */
+int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
+{
+	int max = end > 0 ? end - 1 : INT_MAX;	/* inclusive upper limit */
+	struct idr_layer *pa[MAX_IDR_LEVEL];
+	int id;
+
+	might_sleep_if(gfp_mask & __GFP_WAIT);
+
+	/* sanity checks */
+	if (WARN_ON_ONCE(start < 0))
+		return -EINVAL;
+	if (unlikely(max < start))
+		return -ENOSPC;
+
+	/* allocate id */
+	id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL);
+	if (unlikely(id < 0))
+		return id;
+	if (unlikely(id > max))
+		return -ENOSPC;
+
+	idr_fill_slot(ptr, id, pa);
+	return id;
+}
+EXPORT_SYMBOL_GPL(idr_alloc);
+
 static void idr_remove_warning(int id)
 {
 	printk(KERN_WARNING
@@ -769,7 +927,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
 
  restart:
 	/* get vacant slot */
-	t = idr_get_empty_slot(&ida->idr, idr_id, pa);
+	t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr);
 	if (t < 0)
 		return t == -ENOMEM ? -EAGAIN : t;
 
-- 
cgit v1.2.3


From e8c8d1bc063bc88cfa1356266027b5075d3a82d7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:05:04 -0800
Subject: idr: remove MAX_IDR_MASK and move left MAX_IDR_* into idr.c

MAX_IDR_MASK is another weirdness in the idr interface.  As idr covers
whole positive integer range, it's defined as 0x7fffffff or INT_MAX.

Its usage in idr_find(), idr_replace() and idr_remove() is bizarre.
They basically mask off the sign bit and operate on the rest, so if
the caller, by accident, passes in a negative number, the sign bit
will be masked off and the remaining part will be used as if that was
the input, which is worse than crashing.

The constant is visible in idr.h and there are several users in the
kernel.

* drivers/i2c/i2c-core.c:i2c_add_numbered_adapter()

  Basically used to test if adap->nr is a negative number which isn't
  -1 and returns -EINVAL if so.  idr_alloc() already has negative
  @start checking (w/ WARN_ON_ONCE), so this can go away.

* drivers/infiniband/core/cm.c:cm_alloc_id()
  drivers/infiniband/hw/mlx4/cm.c:id_map_alloc()

  Used to wrap cyclic @start.  Can be replaced with max(next, 0).
  Note that this type of cyclic allocation using idr is buggy.  These
  are prone to spurious -ENOSPC failure after the first wraparound.

* fs/super.c:get_anon_bdev()

  The ID allocated from ida is masked off before being tested whether
  it's inside valid range.  ida allocated ID can never be a negative
  number and the masking is unnecessary.

Update idr_*() functions to fail with -EINVAL when negative @id is
specified and update other MAX_IDR_MASK users as described above.

This leaves MAX_IDR_MASK without any user, remove it and relocate
other MAX_IDR_* constants to lib/idr.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Roland Dreier <roland@kernel.org>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hal Rosenstock <hal.rosenstock@gmail.com>
Cc: "Marciniszyn, Mike" <mike.marciniszyn@intel.com>
Cc: Jack Morgenstein <jackm@dev.mellanox.co.il>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Wolfram Sang <wolfram@the-dreams.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/i2c/i2c-core.c          |  2 --
 drivers/infiniband/core/cm.c    |  2 +-
 drivers/infiniband/hw/mlx4/cm.c |  2 +-
 fs/super.c                      |  2 +-
 include/linux/idr.h             | 10 ----------
 lib/idr.c                       | 24 +++++++++++++++++-------
 6 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 8d1f644a7fdc..991d38daa87d 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -979,8 +979,6 @@ int i2c_add_numbered_adapter(struct i2c_adapter *adap)
 
 	if (adap->nr == -1) /* -1 means dynamically assign bus id */
 		return i2c_add_adapter(adap);
-	if (adap->nr & ~MAX_IDR_MASK)
-		return -EINVAL;
 
 	mutex_lock(&core_lock);
 	id = idr_alloc(&i2c_adapter_idr, adap, adap->nr, adap->nr + 1,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 98281fe5ea4b..784b97cb05b0 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -390,7 +390,7 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 
 	id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT);
 	if (id >= 0)
-		next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
+		next_id = max(id + 1, 0);
 
 	spin_unlock_irqrestore(&cm.lock, flags);
 	idr_preload_end();
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index 80e59ed864b3..e0d79b2395e4 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -225,7 +225,7 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
 
 	ret = idr_alloc(&sriov->pv_id_table, ent, next_id, 0, GFP_NOWAIT);
 	if (ret >= 0) {
-		next_id = ((unsigned)ret + 1) & MAX_IDR_MASK;
+		next_id = max(ret + 1, 0);
 		ent->pv_cm_id = (u32)ret;
 		sl_id_map_add(ibdev, ent);
 		list_add_tail(&ent->list, &sriov->cm_list);
diff --git a/fs/super.c b/fs/super.c
index 12f123712161..df6c2f4c6b59 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -842,7 +842,7 @@ int get_anon_bdev(dev_t *p)
 	else if (error)
 		return -EAGAIN;
 
-	if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) {
+	if (dev == (1 << MINORBITS)) {
 		spin_lock(&unnamed_dev_lock);
 		ida_remove(&unnamed_dev_ida, dev);
 		if (unnamed_dev_start > dev)
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 6dcf133f208a..99b0ce533f0e 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -38,16 +38,6 @@
 #define IDR_SIZE (1 << IDR_BITS)
 #define IDR_MASK ((1 << IDR_BITS)-1)
 
-#define MAX_IDR_SHIFT (sizeof(int)*8 - 1)
-#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
-#define MAX_IDR_MASK (MAX_IDR_BIT - 1)
-
-/* Leave the possibility of an incomplete final layer */
-#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
-
-/* Number of id_layer structs to leave in free list */
-#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
-
 struct idr_layer {
 	unsigned long		bitmap;	/* A zero bit means "space here" */
 	struct idr_layer __rcu	*ary[1<<IDR_BITS];
diff --git a/lib/idr.c b/lib/idr.c
index 63dda62131b3..e2b799989ab0 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -38,6 +38,15 @@
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 
+#define MAX_IDR_SHIFT		(sizeof(int) * 8 - 1)
+#define MAX_IDR_BIT		(1U << MAX_IDR_SHIFT)
+
+/* Leave the possibility of an incomplete final layer */
+#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
+
+/* Number of id_layer structs to leave in free list */
+#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
+
 static struct kmem_cache *idr_layer_cache;
 static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
 static DEFINE_PER_CPU(int, idr_preload_cnt);
@@ -542,8 +551,8 @@ void idr_remove(struct idr *idp, int id)
 	struct idr_layer *p;
 	struct idr_layer *to_free;
 
-	/* Mask off upper bits we don't use for the search. */
-	id &= MAX_IDR_MASK;
+	if (WARN_ON_ONCE(id < 0))
+		return;
 
 	sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
 	if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
@@ -650,14 +659,14 @@ void *idr_find(struct idr *idp, int id)
 	int n;
 	struct idr_layer *p;
 
+	if (WARN_ON_ONCE(id < 0))
+		return NULL;
+
 	p = rcu_dereference_raw(idp->top);
 	if (!p)
 		return NULL;
 	n = (p->layer+1) * IDR_BITS;
 
-	/* Mask off upper bits we don't use for the search. */
-	id &= MAX_IDR_MASK;
-
 	if (id > idr_max(p->layer + 1))
 		return NULL;
 	BUG_ON(n == 0);
@@ -799,14 +808,15 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 	int n;
 	struct idr_layer *p, *old_p;
 
+	if (WARN_ON_ONCE(id < 0))
+		return ERR_PTR(-EINVAL);
+
 	p = idp->top;
 	if (!p)
 		return ERR_PTR(-EINVAL);
 
 	n = (p->layer+1) * IDR_BITS;
 
-	id &= MAX_IDR_MASK;
-
 	if (id >= (1 << n))
 		return ERR_PTR(-EINVAL);
 
-- 
cgit v1.2.3


From 1d9b2e1e663719d406e3a770979a19ba4233bba0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:05:05 -0800
Subject: idr: remove length restriction from idr_layer->bitmap

Currently, idr->bitmap is declared as an unsigned long which restricts
the number of bits an idr_layer can contain.  All bitops can handle
arbitrary positive integer bit number and there's no reason for this
restriction.

Declare idr_layer->bitmap using DECLARE_BITMAP() instead of a single
unsigned long.

* idr_layer->bitmap is now an array.  '&' dropped from params to
  bitops.

* Replaced "== IDR_FULL" tests with bitmap_full() and removed
  IDR_FULL.

* Replaced find_next_bit() on ~bitmap with find_next_zero_bit().

* Replaced "bitmap = 0" with bitmap_clear().

This patch doesn't (or at least shouldn't) introduce any behavior
changes.

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 12 +-----------
 lib/idr.c           | 34 +++++++++++++++++-----------------
 2 files changed, 18 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 99b0ce533f0e..63aa542da49b 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -19,18 +19,8 @@
 
 #if BITS_PER_LONG == 32
 # define IDR_BITS 5
-# define IDR_FULL 0xfffffffful
-/* We can only use two of the bits in the top level because there is
-   only one possible bit in the top level (5 bits * 7 levels = 35
-   bits, but you only use 31 bits in the id). */
-# define TOP_LEVEL_FULL (IDR_FULL >> 30)
 #elif BITS_PER_LONG == 64
 # define IDR_BITS 6
-# define IDR_FULL 0xfffffffffffffffful
-/* We can only use two of the bits in the top level because there is
-   only one possible bit in the top level (6 bits * 6 levels = 36
-   bits, but you only use 31 bits in the id). */
-# define TOP_LEVEL_FULL (IDR_FULL >> 62)
 #else
 # error "BITS_PER_LONG is not 32 or 64"
 #endif
@@ -39,7 +29,7 @@
 #define IDR_MASK ((1 << IDR_BITS)-1)
 
 struct idr_layer {
-	unsigned long		bitmap;	/* A zero bit means "space here" */
+	DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */
 	struct idr_layer __rcu	*ary[1<<IDR_BITS];
 	int			count;	/* When zero, we can release it */
 	int			layer;	/* distance from leaf */
diff --git a/lib/idr.c b/lib/idr.c
index e2b799989ab0..d66e75bfc1a0 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -157,18 +157,18 @@ static void idr_mark_full(struct idr_layer **pa, int id)
 	struct idr_layer *p = pa[0];
 	int l = 0;
 
-	__set_bit(id & IDR_MASK, &p->bitmap);
+	__set_bit(id & IDR_MASK, p->bitmap);
 	/*
 	 * If this layer is full mark the bit in the layer above to
 	 * show that this part of the radix tree is full.  This may
 	 * complete the layer above and require walking up the radix
 	 * tree.
 	 */
-	while (p->bitmap == IDR_FULL) {
+	while (bitmap_full(p->bitmap, IDR_SIZE)) {
 		if (!(p = pa[++l]))
 			break;
 		id = id >> IDR_BITS;
-		__set_bit((id & IDR_MASK), &p->bitmap);
+		__set_bit((id & IDR_MASK), p->bitmap);
 	}
 }
 
@@ -221,7 +221,6 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
 	int n, m, sh;
 	struct idr_layer *p, *new;
 	int l, id, oid;
-	unsigned long bm;
 
 	id = *starting_id;
  restart:
@@ -233,8 +232,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
 		 * We run around this while until we reach the leaf node...
 		 */
 		n = (id >> (IDR_BITS*l)) & IDR_MASK;
-		bm = ~p->bitmap;
-		m = find_next_bit(&bm, IDR_SIZE, n);
+		m = find_next_zero_bit(p->bitmap, IDR_SIZE, n);
 		if (m == IDR_SIZE) {
 			/* no space available go back to previous layer. */
 			l++;
@@ -326,7 +324,8 @@ build_up:
 			for (new = p; p && p != idp->top; new = p) {
 				p = p->ary[0];
 				new->ary[0] = NULL;
-				new->bitmap = new->count = 0;
+				new->count = 0;
+				bitmap_clear(new->bitmap, 0, IDR_SIZE);
 				__move_to_free_list(idp, new);
 			}
 			spin_unlock_irqrestore(&idp->lock, flags);
@@ -335,8 +334,8 @@ build_up:
 		new->ary[0] = p;
 		new->count = 1;
 		new->layer = layers-1;
-		if (p->bitmap == IDR_FULL)
-			__set_bit(0, &new->bitmap);
+		if (bitmap_full(p->bitmap, IDR_SIZE))
+			__set_bit(0, new->bitmap);
 		p = new;
 	}
 	rcu_assign_pointer(idp->top, p);
@@ -517,14 +516,14 @@ static void sub_remove(struct idr *idp, int shift, int id)
 
 	while ((shift > 0) && p) {
 		n = (id >> shift) & IDR_MASK;
-		__clear_bit(n, &p->bitmap);
+		__clear_bit(n, p->bitmap);
 		*++paa = &p->ary[n];
 		p = p->ary[n];
 		shift -= IDR_BITS;
 	}
 	n = id & IDR_MASK;
-	if (likely(p != NULL && test_bit(n, &p->bitmap))){
-		__clear_bit(n, &p->bitmap);
+	if (likely(p != NULL && test_bit(n, p->bitmap))) {
+		__clear_bit(n, p->bitmap);
 		rcu_assign_pointer(p->ary[n], NULL);
 		to_free = NULL;
 		while(*paa && ! --((**paa)->count)){
@@ -567,7 +566,8 @@ void idr_remove(struct idr *idp, int id)
 		p = idp->top->ary[0];
 		rcu_assign_pointer(idp->top, p);
 		--idp->layers;
-		to_free->bitmap = to_free->count = 0;
+		to_free->count = 0;
+		bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
 		free_layer(to_free);
 	}
 	while (idp->id_free_cnt >= MAX_IDR_FREE) {
@@ -827,7 +827,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
 	}
 
 	n = id & IDR_MASK;
-	if (unlikely(p == NULL || !test_bit(n, &p->bitmap)))
+	if (unlikely(p == NULL || !test_bit(n, p->bitmap)))
 		return ERR_PTR(-ENOENT);
 
 	old_p = p->ary[n];
@@ -1024,7 +1024,7 @@ void ida_remove(struct ida *ida, int id)
 	/* clear full bits while looking up the leaf idr_layer */
 	while ((shift > 0) && p) {
 		n = (idr_id >> shift) & IDR_MASK;
-		__clear_bit(n, &p->bitmap);
+		__clear_bit(n, p->bitmap);
 		p = p->ary[n];
 		shift -= IDR_BITS;
 	}
@@ -1033,7 +1033,7 @@ void ida_remove(struct ida *ida, int id)
 		goto err;
 
 	n = idr_id & IDR_MASK;
-	__clear_bit(n, &p->bitmap);
+	__clear_bit(n, p->bitmap);
 
 	bitmap = (void *)p->ary[n];
 	if (!test_bit(offset, bitmap->bitmap))
@@ -1042,7 +1042,7 @@ void ida_remove(struct ida *ida, int id)
 	/* update bitmap and remove it if empty */
 	__clear_bit(offset, bitmap->bitmap);
 	if (--bitmap->nr_busy == 0) {
-		__set_bit(n, &p->bitmap);	/* to please idr_remove() */
+		__set_bit(n, p->bitmap);	/* to please idr_remove() */
 		idr_remove(&ida->idr, idr_id);
 		free_bitmap(ida, bitmap);
 	}
-- 
cgit v1.2.3


From 050a6b47d98e2bcea909c1129111e721668aaa2c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:05:06 -0800
Subject: idr: make idr_layer larger

With recent preloading changes, idr no longer keeps full layer cache per
each idr instance (used to be ~6.5k per idr on 64bit) and the previous
patch removed restriction on the bitmap size.  Both now allow us to have
larger layers.

Increase IDR_BITS to 8 regardless of BITS_PER_LONG.  Each layer is
slightly larger than 2k on 64bit and 1k on 32bit and carries 256 entries.
The size isn't too large, especially compared to what we used to waste on
per-idr caches, and 256 entries should be able to serve most use cases
with single layer.  The max tree depth is 4 which is much better than the
previous 6 on 64bit and 7 on 32bit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 63aa542da49b..43b87b1c77a3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -17,14 +17,13 @@
 #include <linux/init.h>
 #include <linux/rcupdate.h>
 
-#if BITS_PER_LONG == 32
-# define IDR_BITS 5
-#elif BITS_PER_LONG == 64
-# define IDR_BITS 6
-#else
-# error "BITS_PER_LONG is not 32 or 64"
-#endif
-
+/*
+ * We want shallower trees and thus more bits covered at each layer.  8
+ * bits gives us large enough first layer for most use cases and maximum
+ * tree depth of 4.  Each idr_layer is slightly larger than 2k on 64bit and
+ * 1k on 32bit.
+ */
+#define IDR_BITS 8
 #define IDR_SIZE (1 << IDR_BITS)
 #define IDR_MASK ((1 << IDR_BITS)-1)
 
-- 
cgit v1.2.3


From 54616283c2948812a44240858ced610e7cacbde1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:05:07 -0800
Subject: idr: add idr_layer->prefix

Add a field which carries the prefix of ID the idr_layer covers.  This
will be used to implement lookup hint.

This patch doesn't make use of the new field and doesn't introduce any
behavior difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h |  1 +
 lib/idr.c           | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 43b87b1c77a3..7b1c5c6f9a06 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -28,6 +28,7 @@
 #define IDR_MASK ((1 << IDR_BITS)-1)
 
 struct idr_layer {
+	int			prefix;	/* the ID prefix of this idr_layer */
 	DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */
 	struct idr_layer __rcu	*ary[1<<IDR_BITS];
 	int			count;	/* When zero, we can release it */
diff --git a/lib/idr.c b/lib/idr.c
index d66e75bfc1a0..5cd602936645 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -60,6 +60,16 @@ static int idr_max(int layers)
 	return (1 << bits) - 1;
 }
 
+/*
+ * Prefix mask for an idr_layer at @layer.  For layer 0, the prefix mask is
+ * all bits except for the lower IDR_BITS.  For layer 1, 2 * IDR_BITS, and
+ * so on.
+ */
+static int idr_layer_prefix_mask(int layer)
+{
+	return ~idr_max(layer + 1);
+}
+
 static struct idr_layer *get_from_free_list(struct idr *idp)
 {
 	struct idr_layer *p;
@@ -272,6 +282,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
 			if (!new)
 				return -ENOMEM;
 			new->layer = l-1;
+			new->prefix = id & idr_layer_prefix_mask(new->layer);
 			rcu_assign_pointer(p->ary[m], new);
 			p->count++;
 		}
@@ -313,6 +324,7 @@ build_up:
 			 * upwards.
 			 */
 			p->layer++;
+			WARN_ON_ONCE(p->prefix);
 			continue;
 		}
 		if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) {
@@ -334,6 +346,7 @@ build_up:
 		new->ary[0] = p;
 		new->count = 1;
 		new->layer = layers-1;
+		new->prefix = id & idr_layer_prefix_mask(new->layer);
 		if (bitmap_full(p->bitmap, IDR_SIZE))
 			__set_bit(0, new->bitmap);
 		p = new;
-- 
cgit v1.2.3


From 0ffc2a9c8072969253a20821c2c733a2cbb4c7c7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 27 Feb 2013 17:05:08 -0800
Subject: idr: implement lookup hint

While idr lookup isn't a particularly heavy operation, it still is too
substantial to use in hot paths without worrying about the performance
implications.  With recent changes, each idr_layer covers 256 slots
which should be enough to cover most use cases with single idr_layer
making lookup hint very attractive.

This patch adds idr->hint which points to the idr_layer which
allocated an ID most recently and the fast path lookup becomes

	if (look up target's prefix matches that of the hinted layer)
		return hint->ary[ID's offset in the leaf layer];

which can be inlined.

idr->hint is set to the leaf node on idr_fill_slot() and cleared from
free_layer().

[andriy.shevchenko@linux.intel.com: always do slow path when hint is uninitialized]
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 25 ++++++++++++++++++++++++-
 lib/idr.c           | 38 ++++++++++++++++----------------------
 2 files changed, 40 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 7b1c5c6f9a06..a6f38b5c34e4 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -37,6 +37,7 @@ struct idr_layer {
 };
 
 struct idr {
+	struct idr_layer __rcu	*hint;	/* the last layer allocated from */
 	struct idr_layer __rcu	*top;
 	struct idr_layer	*id_free;
 	int			layers;	/* only valid w/o concurrent changes */
@@ -71,7 +72,7 @@ struct idr {
  * This is what we export.
  */
 
-void *idr_find(struct idr *idp, int id);
+void *idr_find_slowpath(struct idr *idp, int id);
 int idr_pre_get(struct idr *idp, gfp_t gfp_mask);
 int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
 void idr_preload(gfp_t gfp_mask);
@@ -96,6 +97,28 @@ static inline void idr_preload_end(void)
 	preempt_enable();
 }
 
+/**
+ * idr_find - return pointer for given id
+ * @idp: idr handle
+ * @id: lookup key
+ *
+ * Return the pointer given the id it has been registered with.  A %NULL
+ * return indicates that @id is not valid or you passed %NULL in
+ * idr_get_new().
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
+ */
+static inline void *idr_find(struct idr *idr, int id)
+{
+	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
+
+	if (hint && (id & ~IDR_MASK) == hint->prefix)
+		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
+
+	return idr_find_slowpath(idr, id);
+}
+
 /**
  * idr_get_new - allocate new idr entry
  * @idp: idr handle
diff --git a/lib/idr.c b/lib/idr.c
index 5cd602936645..1a30272066c6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -137,8 +137,10 @@ static void idr_layer_rcu_free(struct rcu_head *head)
 	kmem_cache_free(idr_layer_cache, layer);
 }
 
-static inline void free_layer(struct idr_layer *p)
+static inline void free_layer(struct idr *idr, struct idr_layer *p)
 {
+	if (idr->hint && idr->hint == p)
+		RCU_INIT_POINTER(idr->hint, NULL);
 	call_rcu(&p->rcu_head, idr_layer_rcu_free);
 }
 
@@ -363,8 +365,12 @@ build_up:
  * @id and @pa are from a successful allocation from idr_get_empty_slot().
  * Install the user pointer @ptr and mark the slot full.
  */
-static void idr_fill_slot(void *ptr, int id, struct idr_layer **pa)
+static void idr_fill_slot(struct idr *idr, void *ptr, int id,
+			  struct idr_layer **pa)
 {
+	/* update hint used for lookup, cleared from free_layer() */
+	rcu_assign_pointer(idr->hint, pa[0]);
+
 	rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr);
 	pa[0]->count++;
 	idr_mark_full(pa, id);
@@ -397,7 +403,7 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
 	if (rv < 0)
 		return rv == -ENOMEM ? -EAGAIN : rv;
 
-	idr_fill_slot(ptr, rv, pa);
+	idr_fill_slot(idp, ptr, rv, pa);
 	*id = rv;
 	return 0;
 }
@@ -504,7 +510,7 @@ int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
 	if (unlikely(id > max))
 		return -ENOSPC;
 
-	idr_fill_slot(ptr, id, pa);
+	idr_fill_slot(idr, ptr, id, pa);
 	return id;
 }
 EXPORT_SYMBOL_GPL(idr_alloc);
@@ -541,14 +547,14 @@ static void sub_remove(struct idr *idp, int shift, int id)
 		to_free = NULL;
 		while(*paa && ! --((**paa)->count)){
 			if (to_free)
-				free_layer(to_free);
+				free_layer(idp, to_free);
 			to_free = **paa;
 			**paa-- = NULL;
 		}
 		if (!*paa)
 			idp->layers = 0;
 		if (to_free)
-			free_layer(to_free);
+			free_layer(idp, to_free);
 	} else
 		idr_remove_warning(id);
 }
@@ -581,7 +587,7 @@ void idr_remove(struct idr *idp, int id)
 		--idp->layers;
 		to_free->count = 0;
 		bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
-		free_layer(to_free);
+		free_layer(idp, to_free);
 	}
 	while (idp->id_free_cnt >= MAX_IDR_FREE) {
 		p = get_from_free_list(idp);
@@ -622,7 +628,7 @@ void __idr_remove_all(struct idr *idp)
 		/* Get the highest bit that the above add changed from 0->1. */
 		while (n < fls(id ^ bt_mask)) {
 			if (p)
-				free_layer(p);
+				free_layer(idp, p);
 			n += IDR_BITS;
 			p = *--paa;
 		}
@@ -655,19 +661,7 @@ void idr_destroy(struct idr *idp)
 }
 EXPORT_SYMBOL(idr_destroy);
 
-/**
- * idr_find - return pointer for given id
- * @idp: idr handle
- * @id: lookup key
- *
- * Return the pointer given the id it has been registered with.  A %NULL
- * return indicates that @id is not valid or you passed %NULL in
- * idr_get_new().
- *
- * This function can be called under rcu_read_lock(), given that the leaf
- * pointers lifetimes are correctly managed.
- */
-void *idr_find(struct idr *idp, int id)
+void *idr_find_slowpath(struct idr *idp, int id)
 {
 	int n;
 	struct idr_layer *p;
@@ -691,7 +685,7 @@ void *idr_find(struct idr *idp, int id)
 	}
 	return((void *)p);
 }
-EXPORT_SYMBOL(idr_find);
+EXPORT_SYMBOL(idr_find_slowpath);
 
 /**
  * idr_for_each - iterate through all stored pointers
-- 
cgit v1.2.3


From 59fb1b9f5d9910c2eb97107dd0eb7e3bce8f0dde Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Wed, 27 Feb 2013 17:05:11 -0800
Subject: ipmi: remove superfluous kernel/userspace explanation

Given the obvious distinction between kernel and userspace supported
by uapi/, it seems unnecessary to comment on that.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipmi.h      |  4 ----
 include/uapi/linux/ipmi.h | 10 +---------
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 1487e7906bbd..1f9f56e28851 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -35,10 +35,6 @@
 
 #include <uapi/linux/ipmi.h>
 
-
-/*
- * The in-kernel interface.
- */
 #include <linux/list.h>
 #include <linux/proc_fs.h>
 
diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h
index 33fbc99b3812..7b26a62e5707 100644
--- a/include/uapi/linux/ipmi.h
+++ b/include/uapi/linux/ipmi.h
@@ -59,15 +59,7 @@
  * if it becomes full and it is queried once a second to see if
  * anything is in it.  Incoming commands to the driver will get
  * delivered as commands.
- *
- * This driver provides two main interfaces: one for in-kernel
- * applications and another for userland applications.  The
- * capabilities are basically the same for both interface, although
- * the interfaces are somewhat different.  The stuff in the
- * #ifdef __KERNEL__ below is the in-kernel interface.  The userland
- * interface is defined later in the file.  */
-
-
+ */
 
 /*
  * This is an overlay for all the address types, so it's easy to
-- 
cgit v1.2.3


From 1d730c49a91dc5b7660269b98ad76e5a9b85740f Mon Sep 17 00:00:00 2001
From: Martin Sustrik <sustrik@250bpm.com>
Date: Wed, 27 Feb 2013 17:05:42 -0800
Subject: include/linux/eventfd.h: fix incorrect filename is a comment

Comment in eventfd.h referred to 'include/asm-generic/fcntl.h'
while the correct path is 'include/uapi/asm-generic/fcntl.h'.

Signed-off-by: Martin Sustrik <sustrik@250bpm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/eventfd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 3c3ef19a625a..cf5d2af61b81 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -13,7 +13,7 @@
 #include <linux/wait.h>
 
 /*
- * CAREFUL: Check include/asm-generic/fcntl.h when defining
+ * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
  * new flags, since they might collide with O_* ones. We want
  * to re-use O_* flags that couldn't possibly have a meaning
  * from eventfd, in order to leave a free define-space for
-- 
cgit v1.2.3


From b67bfe0d42cac56c512dd5da4b1b347a23f4b70a Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Wed, 27 Feb 2013 17:06:00 -0800
Subject: hlist: drop the node parameter from iterators

I'm not sure why, but the hlist for each entry iterators were conceived

        list_for_each_entry(pos, head, member)

The hlist ones were greedy and wanted an extra parameter:

        hlist_for_each_entry(tpos, pos, head, member)

Why did they need an extra pos parameter? I'm not quite sure. Not only
they don't really need it, it also prevents the iterator from looking
exactly like the list iterator, which is unfortunate.

Besides the semantic patch, there was some manual work required:

 - Fix up the actual hlist iterators in linux/list.h
 - Fix up the declaration of other iterators based on the hlist ones.
 - A very small amount of places were using the 'node' parameter, this
 was modified to use 'obj->member' instead.
 - Coccinelle didn't handle the hlist_for_each_entry_safe iterator
 properly, so those had to be fixed up manually.

The semantic patch which is mostly the work of Peter Senna Tschudin is here:

@@
iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host;

type T;
expression a,c,d,e;
identifier b;
statement S;
@@

-T b;
    <+... when != b
(
hlist_for_each_entry(a,
- b,
c, d) S
|
hlist_for_each_entry_continue(a,
- b,
c) S
|
hlist_for_each_entry_from(a,
- b,
c) S
|
hlist_for_each_entry_rcu(a,
- b,
c, d) S
|
hlist_for_each_entry_rcu_bh(a,
- b,
c, d) S
|
hlist_for_each_entry_continue_rcu_bh(a,
- b,
c) S
|
for_each_busy_worker(a, c,
- b,
d) S
|
ax25_uid_for_each(a,
- b,
c) S
|
ax25_for_each(a,
- b,
c) S
|
inet_bind_bucket_for_each(a,
- b,
c) S
|
sctp_for_each_hentry(a,
- b,
c) S
|
sk_for_each(a,
- b,
c) S
|
sk_for_each_rcu(a,
- b,
c) S
|
sk_for_each_from
-(a, b)
+(a)
S
+ sk_for_each_from(a) S
|
sk_for_each_safe(a,
- b,
c, d) S
|
sk_for_each_bound(a,
- b,
c) S
|
hlist_for_each_entry_safe(a,
- b,
c, d, e) S
|
hlist_for_each_entry_continue_rcu(a,
- b,
c) S
|
nr_neigh_for_each(a,
- b,
c) S
|
nr_neigh_for_each_safe(a,
- b,
c, d) S
|
nr_node_for_each(a,
- b,
c) S
|
nr_node_for_each_safe(a,
- b,
c, d) S
|
- for_each_gfn_sp(a, c, d, b) S
+ for_each_gfn_sp(a, c, d) S
|
- for_each_gfn_indirect_valid_sp(a, c, d, b) S
+ for_each_gfn_indirect_valid_sp(a, c, d) S
|
for_each_host(a,
- b,
c) S
|
for_each_host_safe(a,
- b,
c, d) S
|
for_each_mesh_entry(a,
- b,
c, d) S
)
    ...+>

[akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c]
[akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c]
[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix warnings]
[akpm@linux-foudnation.org: redo intrusive kvm changes]
Tested-by: Peter Senna Tschudin <peter.senna@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/kprobes.c                          |  6 +-
 arch/ia64/kernel/kprobes.c                         |  8 +--
 arch/mips/kernel/kprobes.c                         |  6 +-
 arch/powerpc/kernel/kprobes.c                      |  6 +-
 arch/powerpc/kvm/book3s_mmu_hpte.c                 | 18 ++---
 arch/s390/kernel/kprobes.c                         |  8 +--
 arch/s390/pci/pci_msi.c                            |  3 +-
 arch/sh/kernel/kprobes.c                           |  6 +-
 arch/sparc/kernel/kprobes.c                        |  6 +-
 arch/sparc/kernel/ldc.c                            |  3 +-
 arch/x86/kernel/kprobes/core.c                     |  8 +--
 arch/x86/kvm/mmu.c                                 | 26 +++----
 block/blk-cgroup.c                                 |  6 +-
 block/blk-ioc.c                                    |  3 +-
 block/bsg.c                                        |  3 +-
 block/cfq-iosched.c                                |  3 +-
 block/elevator.c                                   |  4 +-
 crypto/algapi.c                                    |  6 +-
 drivers/atm/atmtcp.c                               |  6 +-
 drivers/atm/eni.c                                  |  3 +-
 drivers/atm/he.c                                   |  3 +-
 drivers/atm/solos-pci.c                            |  3 +-
 drivers/clk/clk.c                                  | 59 ++++++----------
 drivers/gpu/drm/drm_hashtab.c                      | 19 +++--
 drivers/infiniband/core/cma.c                      |  3 +-
 drivers/infiniband/core/fmr_pool.c                 |  3 +-
 drivers/isdn/mISDN/socket.c                        |  3 +-
 drivers/isdn/mISDN/stack.c                         |  3 +-
 drivers/md/dm-bio-prison.c                         |  3 +-
 drivers/md/dm-bufio.c                              |  3 +-
 drivers/md/dm-snap.c                               |  3 +-
 .../md/persistent-data/dm-transaction-manager.c    |  7 +-
 drivers/md/raid5.c                                 |  3 +-
 drivers/misc/sgi-gru/grutlbpurge.c                 |  3 +-
 drivers/misc/vmw_vmci/vmci_doorbell.c              |  7 +-
 drivers/misc/vmw_vmci/vmci_resource.c              |  6 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c   | 18 ++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  8 +--
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     | 17 +++--
 drivers/net/ethernet/mellanox/mlx4/en_rx.c         |  4 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c     | 10 +--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c     | 10 +--
 drivers/net/ethernet/sun/sunvnet.c                 |  3 +-
 drivers/net/macvlan.c                              |  6 +-
 drivers/net/tun.c                                  | 15 ++--
 drivers/net/vxlan.c                                | 12 ++--
 drivers/net/wireless/zd1201.c                      |  7 +-
 drivers/pci/pci.c                                  | 12 ++--
 drivers/staging/android/binder.c                   | 19 ++---
 drivers/target/tcm_fc/tfc_sess.c                   | 12 ++--
 fs/affs/amigaffs.c                                 |  3 +-
 fs/aio.c                                           |  3 +-
 fs/cifs/inode.c                                    |  3 +-
 fs/dcache.c                                        |  9 +--
 fs/dlm/lowcomms.c                                  | 11 ++-
 fs/ecryptfs/messaging.c                            |  6 +-
 fs/exportfs/expfs.c                                |  3 +-
 fs/fat/inode.c                                     |  3 +-
 fs/fat/nfs.c                                       |  3 +-
 fs/fscache/cookie.c                                | 11 ++-
 fs/inode.c                                         | 19 ++---
 fs/lockd/host.c                                    | 29 ++++----
 fs/lockd/svcsubs.c                                 |  7 +-
 fs/nfs/pnfs_dev.c                                  |  9 +--
 fs/nfsd/nfscache.c                                 |  3 +-
 fs/notify/fsnotify.c                               |  3 +-
 fs/notify/inode_mark.c                             | 19 +++--
 fs/notify/vfsmount_mark.c                          | 19 +++--
 fs/ocfs2/dcache.c                                  |  3 +-
 fs/ocfs2/dlm/dlmrecovery.c                         |  6 +-
 fs/super.c                                         |  6 +-
 fs/sysfs/bin.c                                     |  3 +-
 fs/xfs/xfs_log_recover.c                           |  3 +-
 include/linux/hashtable.h                          | 40 +++++------
 include/linux/if_team.h                            |  6 +-
 include/linux/list.h                               | 49 ++++++-------
 include/linux/pid.h                                |  3 +-
 include/linux/rculist.h                            | 56 +++++++--------
 include/net/ax25.h                                 |  8 +--
 include/net/inet_hashtables.h                      |  4 +-
 include/net/inet_timewait_sock.h                   |  8 +--
 include/net/netrom.h                               | 16 ++---
 include/net/sch_generic.h                          |  3 +-
 include/net/sctp/sctp.h                            |  4 +-
 include/net/sock.h                                 | 21 +++---
 kernel/cgroup.c                                    | 12 ++--
 kernel/events/core.c                               |  6 +-
 kernel/kprobes.c                                   | 35 ++++-----
 kernel/pid.c                                       |  3 +-
 kernel/sched/core.c                                |  6 +-
 kernel/smpboot.c                                   |  2 +-
 kernel/trace/ftrace.c                              | 24 +++----
 kernel/trace/trace_output.c                        |  3 +-
 kernel/tracepoint.c                                |  6 +-
 kernel/user-return-notifier.c                      |  4 +-
 kernel/user.c                                      |  3 +-
 kernel/workqueue.c                                 | 13 ++--
 lib/debugobjects.c                                 | 21 +++---
 lib/lru_cache.c                                    |  3 +-
 mm/huge_memory.c                                   |  3 +-
 mm/kmemleak.c                                      |  9 ++-
 mm/ksm.c                                           | 15 ++--
 mm/mmu_notifier.c                                  | 18 ++---
 net/9p/error.c                                     |  4 +-
 net/9p/trans_virtio.c                              |  2 +-
 net/appletalk/ddp.c                                |  9 +--
 net/atm/common.c                                   |  7 +-
 net/atm/lec.c                                      | 66 ++++++++---------
 net/atm/signaling.c                                |  3 +-
 net/ax25/af_ax25.c                                 | 15 ++--
 net/ax25/ax25_ds_subr.c                            |  6 +-
 net/ax25/ax25_ds_timer.c                           |  3 +-
 net/ax25/ax25_iface.c                              |  3 +-
 net/ax25/ax25_uid.c                                | 11 ++-
 net/batman-adv/bat_iv_ogm.c                        | 12 ++--
 net/batman-adv/bridge_loop_avoidance.c             | 39 ++++------
 net/batman-adv/distributed-arp-table.c             | 15 ++--
 net/batman-adv/gateway_client.c                    | 13 ++--
 net/batman-adv/main.c                              |  6 +-
 net/batman-adv/originator.c                        | 31 ++++----
 net/batman-adv/originator.h                        |  3 +-
 net/batman-adv/routing.c                           |  6 +-
 net/batman-adv/send.c                              |  6 +-
 net/batman-adv/translation-table.c                 | 82 +++++++++-------------
 net/batman-adv/vis.c                               | 38 ++++------
 net/bluetooth/hci_sock.c                           | 15 ++--
 net/bluetooth/rfcomm/sock.c                        | 13 ++--
 net/bluetooth/sco.c                                | 14 ++--
 net/bridge/br_fdb.c                                | 23 +++---
 net/bridge/br_mdb.c                                |  6 +-
 net/bridge/br_multicast.c                          | 25 +++----
 net/can/af_can.c                                   | 18 +++--
 net/can/gw.c                                       | 15 ++--
 net/can/proc.c                                     |  3 +-
 net/core/dev.c                                     | 12 ++--
 net/core/flow.c                                    | 11 ++-
 net/core/net-procfs.c                              |  3 +-
 net/core/rtnetlink.c                               |  3 +-
 net/decnet/af_decnet.c                             |  9 +--
 net/decnet/dn_table.c                              | 13 ++--
 net/ieee802154/dgram.c                             |  3 +-
 net/ieee802154/raw.c                               |  3 +-
 net/ipv4/devinet.c                                 | 10 +--
 net/ipv4/fib_frontend.c                            | 15 ++--
 net/ipv4/fib_semantics.c                           | 23 +++---
 net/ipv4/fib_trie.c                                | 33 ++++-----
 net/ipv4/inet_connection_sock.c                    | 10 ++-
 net/ipv4/inet_fragment.c                           | 10 ++-
 net/ipv4/inet_hashtables.c                         |  8 +--
 net/ipv4/inet_timewait_sock.c                      |  7 +-
 net/ipv4/raw.c                                     |  8 +--
 net/ipv4/tcp_ipv4.c                                |  7 +-
 net/ipv6/addrconf.c                                | 32 +++------
 net/ipv6/addrlabel.c                               | 18 +++--
 net/ipv6/inet6_connection_sock.c                   |  5 +-
 net/ipv6/ip6_fib.c                                 | 12 ++--
 net/ipv6/raw.c                                     |  3 +-
 net/ipv6/xfrm6_tunnel.c                            | 10 ++-
 net/ipx/af_ipx.c                                   | 16 ++---
 net/ipx/ipx_proc.c                                 |  5 +-
 net/iucv/af_iucv.c                                 | 21 ++----
 net/key/af_key.c                                   |  3 +-
 net/l2tp/l2tp_core.c                               | 12 ++--
 net/l2tp/l2tp_ip.c                                 |  3 +-
 net/l2tp/l2tp_ip6.c                                |  3 +-
 net/llc/llc_sap.c                                  |  3 +-
 net/mac80211/mesh_pathtbl.c                        | 45 +++++-------
 net/netfilter/ipvs/ip_vs_conn.c                    | 26 +++----
 net/netfilter/nf_conntrack_expect.c                | 17 ++---
 net/netfilter/nf_conntrack_helper.c                | 13 ++--
 net/netfilter/nf_conntrack_netlink.c               |  9 ++-
 net/netfilter/nf_conntrack_sip.c                   |  8 +--
 net/netfilter/nf_nat_core.c                        |  3 +-
 net/netfilter/nfnetlink_cthelper.c                 | 17 ++---
 net/netfilter/nfnetlink_log.c                      |  7 +-
 net/netfilter/nfnetlink_queue_core.c               | 10 ++-
 net/netfilter/xt_RATEEST.c                         |  3 +-
 net/netfilter/xt_connlimit.c                       |  8 +--
 net/netfilter/xt_hashlimit.c                       | 16 ++---
 net/netlink/af_netlink.c                           | 30 +++-----
 net/netrom/af_netrom.c                             | 12 ++--
 net/netrom/nr_route.c                              | 30 ++++----
 net/nfc/llcp/llcp.c                                | 16 ++---
 net/openvswitch/datapath.c                         | 10 ++-
 net/openvswitch/flow.c                             | 13 ++--
 net/openvswitch/vport.c                            |  3 +-
 net/packet/af_packet.c                             |  3 +-
 net/packet/diag.c                                  |  3 +-
 net/phonet/pep.c                                   |  3 +-
 net/phonet/socket.c                                |  9 +--
 net/rds/bind.c                                     |  3 +-
 net/rds/connection.c                               |  9 +--
 net/rose/af_rose.c                                 | 14 ++--
 net/sched/sch_api.c                                |  4 +-
 net/sched/sch_cbq.c                                | 18 ++---
 net/sched/sch_drr.c                                | 10 ++-
 net/sched/sch_hfsc.c                               | 15 ++--
 net/sched/sch_htb.c                                | 12 ++--
 net/sched/sch_qfq.c                                | 16 ++---
 net/sctp/endpointola.c                             |  3 +-
 net/sctp/input.c                                   |  6 +-
 net/sctp/proc.c                                    |  9 +--
 net/sctp/socket.c                                  |  9 ++-
 net/sunrpc/auth.c                                  |  5 +-
 net/sunrpc/cache.c                                 |  4 +-
 net/sunrpc/svcauth.c                               |  3 +-
 net/tipc/name_table.c                              |  8 +--
 net/tipc/node.c                                    |  3 +-
 net/unix/af_unix.c                                 |  6 +-
 net/unix/diag.c                                    |  7 +-
 net/x25/af_x25.c                                   | 12 ++--
 net/xfrm/xfrm_policy.c                             | 47 ++++++-------
 net/xfrm/xfrm_state.c                              | 42 +++++------
 security/integrity/ima/ima_queue.c                 |  3 +-
 security/selinux/avc.c                             | 19 ++---
 tools/perf/util/evlist.c                           |  3 +-
 virt/kvm/eventfd.c                                 |  3 +-
 virt/kvm/irq_comm.c                                | 18 ++---
 218 files changed, 987 insertions(+), 1494 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 4dd41fc9e235..170e9f34003f 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -395,7 +395,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
@@ -415,7 +415,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	 *       real return address, and all the rest will point to
 	 *       kretprobe_trampoline
 	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -442,7 +442,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 	kretprobe_hash_unlock(current, &flags);
 
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 7026b29e277a..f8280a766a78 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -423,7 +423,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address =
 		((struct fnptr *)kretprobe_trampoline)->ip;
@@ -444,7 +444,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	 *       real return address, and all the rest will point to
 	 *       kretprobe_trampoline
 	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -461,7 +461,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 
 	regs->cr_iip = orig_ret_address;
 
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -487,7 +487,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index 158467da9bc1..ce3f0807ad1e 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -598,7 +598,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address = (unsigned long)kretprobe_trampoline;
 
@@ -618,7 +618,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	 *       real return address, and all the rest will point to
 	 *       kretprobe_trampoline
 	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -645,7 +645,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index e88c64331819..11f5b03a0b06 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -310,7 +310,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
@@ -330,7 +330,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	 *       real return address, and all the rest will point to
 	 *       kretprobe_trampoline
 	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -357,7 +357,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 2c86b0d63714..da8b13c4b776 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -124,7 +124,6 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	struct hpte_cache *pte;
-	struct hlist_node *node;
 	int i;
 
 	rcu_read_lock();
@@ -132,7 +131,7 @@ static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
 	for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
 		struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
 
-		hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+		hlist_for_each_entry_rcu(pte, list, list_vpte_long)
 			invalidate_pte(vcpu, pte);
 	}
 
@@ -143,7 +142,6 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	struct hlist_head *list;
-	struct hlist_node *node;
 	struct hpte_cache *pte;
 
 	/* Find the list of entries in the map */
@@ -152,7 +150,7 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
 	rcu_read_lock();
 
 	/* Check the list for matching entries and invalidate */
-	hlist_for_each_entry_rcu(pte, node, list, list_pte)
+	hlist_for_each_entry_rcu(pte, list, list_pte)
 		if ((pte->pte.eaddr & ~0xfffUL) == guest_ea)
 			invalidate_pte(vcpu, pte);
 
@@ -163,7 +161,6 @@ static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	struct hlist_head *list;
-	struct hlist_node *node;
 	struct hpte_cache *pte;
 
 	/* Find the list of entries in the map */
@@ -173,7 +170,7 @@ static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
 	rcu_read_lock();
 
 	/* Check the list for matching entries and invalidate */
-	hlist_for_each_entry_rcu(pte, node, list, list_pte_long)
+	hlist_for_each_entry_rcu(pte, list, list_pte_long)
 		if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea)
 			invalidate_pte(vcpu, pte);
 
@@ -207,7 +204,6 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	struct hlist_head *list;
-	struct hlist_node *node;
 	struct hpte_cache *pte;
 	u64 vp_mask = 0xfffffffffULL;
 
@@ -216,7 +212,7 @@ static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
 	rcu_read_lock();
 
 	/* Check the list for matching entries and invalidate */
-	hlist_for_each_entry_rcu(pte, node, list, list_vpte)
+	hlist_for_each_entry_rcu(pte, list, list_vpte)
 		if ((pte->pte.vpage & vp_mask) == guest_vp)
 			invalidate_pte(vcpu, pte);
 
@@ -228,7 +224,6 @@ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	struct hlist_head *list;
-	struct hlist_node *node;
 	struct hpte_cache *pte;
 	u64 vp_mask = 0xffffff000ULL;
 
@@ -238,7 +233,7 @@ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
 	rcu_read_lock();
 
 	/* Check the list for matching entries and invalidate */
-	hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+	hlist_for_each_entry_rcu(pte, list, list_vpte_long)
 		if ((pte->pte.vpage & vp_mask) == guest_vp)
 			invalidate_pte(vcpu, pte);
 
@@ -266,7 +261,6 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
 void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
-	struct hlist_node *node;
 	struct hpte_cache *pte;
 	int i;
 
@@ -277,7 +271,7 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
 	for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
 		struct hlist_head *list = &vcpu3s->hpte_hash_vpte_long[i];
 
-		hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
+		hlist_for_each_entry_rcu(pte, list, list_vpte_long)
 			if ((pte->pte.raddr >= pa_start) &&
 			    (pte->pte.raddr < pa_end))
 				invalidate_pte(vcpu, pte);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index d1c7214e157c..3388b2b2a07d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -354,7 +354,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 {
 	struct kretprobe_instance *ri;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address;
 	unsigned long trampoline_address;
 	kprobe_opcode_t *correct_ret_addr;
@@ -379,7 +379,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	orig_ret_address = 0;
 	correct_ret_addr = NULL;
 	trampoline_address = (unsigned long) &kretprobe_trampoline;
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -398,7 +398,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
 	correct_ret_addr = ri->ret_addr;
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -427,7 +427,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c
index 90fd3482b9e2..0297931335e1 100644
--- a/arch/s390/pci/pci_msi.c
+++ b/arch/s390/pci/pci_msi.c
@@ -25,10 +25,9 @@ static DEFINE_SPINLOCK(msi_map_lock);
 
 struct msi_desc *__irq_get_msi_desc(unsigned int irq)
 {
-	struct hlist_node *entry;
 	struct msi_map *map;
 
-	hlist_for_each_entry_rcu(map, entry,
+	hlist_for_each_entry_rcu(map,
 			&msi_hash[msi_hashfn(irq)], msi_chain)
 		if (map->irq == irq)
 			return map->msi;
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 1208b09e95c3..42b46e61a2d5 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -310,7 +310,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
@@ -330,7 +330,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	 *       real return address, and all the rest will point to
 	 *       kretprobe_trampoline
 	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -360,7 +360,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 
 	preempt_enable_no_resched();
 
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index a39d1ba5a119..e72212148d2a 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -511,7 +511,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
@@ -531,7 +531,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	 *       real return address, and all the rest will point to
 	 *       kretprobe_trampoline
 	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -559,7 +559,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	kretprobe_hash_unlock(current, &flags);
 	preempt_enable_no_resched();
 
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 9fcc6b4e93b3..54df554b82d9 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -953,9 +953,8 @@ static HLIST_HEAD(ldc_channel_list);
 static int __ldc_channel_exists(unsigned long id)
 {
 	struct ldc_channel *lp;
-	struct hlist_node *n;
 
-	hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
+	hlist_for_each_entry(lp, &ldc_channel_list, list) {
 		if (lp->id == id)
 			return 1;
 	}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index e124554598ee..3f06e6149981 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -652,7 +652,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 {
 	struct kretprobe_instance *ri = NULL;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 	kprobe_opcode_t *correct_ret_addr = NULL;
@@ -682,7 +682,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	 *	 will be the real return address, and all the rest will
 	 *	 point to kretprobe_trampoline.
 	 */
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -701,7 +701,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
 
 	correct_ret_addr = ri->ret_addr;
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task != current)
 			/* another task is sharing our hash bucket */
 			continue;
@@ -728,7 +728,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 
 	kretprobe_hash_unlock(current, &flags);
 
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4ed3edbe06bd..956ca358108a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1644,13 +1644,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 				    struct list_head *invalid_list);
 
-#define for_each_gfn_sp(kvm, sp, gfn, pos)				\
-  hlist_for_each_entry(sp, pos,						\
+#define for_each_gfn_sp(kvm, sp, gfn)					\
+  hlist_for_each_entry(sp,						\
    &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link)	\
 	if ((sp)->gfn != (gfn)) {} else
 
-#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos)		\
-  hlist_for_each_entry(sp, pos,						\
+#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn)			\
+  hlist_for_each_entry(sp,						\
    &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link)	\
 		if ((sp)->gfn != (gfn) || (sp)->role.direct ||		\
 			(sp)->role.invalid) {} else
@@ -1706,11 +1706,10 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 static void kvm_sync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn)
 {
 	struct kvm_mmu_page *s;
-	struct hlist_node *node;
 	LIST_HEAD(invalid_list);
 	bool flush = false;
 
-	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
+	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
 		if (!s->unsync)
 			continue;
 
@@ -1848,7 +1847,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	union kvm_mmu_page_role role;
 	unsigned quadrant;
 	struct kvm_mmu_page *sp;
-	struct hlist_node *node;
 	bool need_sync = false;
 
 	role = vcpu->arch.mmu.base_role;
@@ -1863,7 +1861,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
 	}
-	for_each_gfn_sp(vcpu->kvm, sp, gfn, node) {
+	for_each_gfn_sp(vcpu->kvm, sp, gfn) {
 		if (!need_sync && sp->unsync)
 			need_sync = true;
 
@@ -2151,14 +2149,13 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_mmu_page *sp;
-	struct hlist_node *node;
 	LIST_HEAD(invalid_list);
 	int r;
 
 	pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
 	r = 0;
 	spin_lock(&kvm->mmu_lock);
-	for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
+	for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
 		pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 			 sp->role.word);
 		r = 1;
@@ -2288,9 +2285,8 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 static void kvm_unsync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn)
 {
 	struct kvm_mmu_page *s;
-	struct hlist_node *node;
 
-	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
+	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
 		if (s->unsync)
 			continue;
 		WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
@@ -2302,10 +2298,9 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 				  bool can_unsync)
 {
 	struct kvm_mmu_page *s;
-	struct hlist_node *node;
 	bool need_unsync = false;
 
-	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) {
+	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
 		if (!can_unsync)
 			return 1;
 
@@ -3933,7 +3928,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	union kvm_mmu_page_role mask = { .word = 0 };
 	struct kvm_mmu_page *sp;
-	struct hlist_node *node;
 	LIST_HEAD(invalid_list);
 	u64 entry, gentry, *spte;
 	int npte;
@@ -3964,7 +3958,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
 
 	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
-	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
+	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
 		if (detect_write_misaligned(sp, gpa, bytes) ||
 		      detect_write_flooding(sp)) {
 			zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b8858fb0cafa..8bdebb6781e1 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -357,7 +357,6 @@ static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
 {
 	struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
 	struct blkcg_gq *blkg;
-	struct hlist_node *n;
 	int i;
 
 	mutex_lock(&blkcg_pol_mutex);
@@ -368,7 +367,7 @@ static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
 	 * stat updates.  This is a debug feature which shouldn't exist
 	 * anyway.  If you get hit by a race, retry.
 	 */
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkcg_policy *pol = blkcg_policy[i];
 
@@ -415,11 +414,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
 		       bool show_total)
 {
 	struct blkcg_gq *blkg;
-	struct hlist_node *n;
 	u64 total = 0;
 
 	spin_lock_irq(&blkcg->lock);
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
+	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node)
 		if (blkcg_policy_enabled(blkg->q, pol))
 			total += prfill(sf, blkg->pd[pol->plid], data);
 	spin_unlock_irq(&blkcg->lock);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index fab4cdd3f7bb..9c4bb8266bc8 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -164,7 +164,6 @@ EXPORT_SYMBOL(put_io_context);
  */
 void put_io_context_active(struct io_context *ioc)
 {
-	struct hlist_node *n;
 	unsigned long flags;
 	struct io_cq *icq;
 
@@ -180,7 +179,7 @@ void put_io_context_active(struct io_context *ioc)
 	 */
 retry:
 	spin_lock_irqsave_nested(&ioc->lock, flags, 1);
-	hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node) {
+	hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
 		if (icq->flags & ICQ_EXITED)
 			continue;
 		if (spin_trylock(icq->q->queue_lock)) {
diff --git a/block/bsg.c b/block/bsg.c
index 3ca92ebf6bbb..420a5a9f1b23 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -800,11 +800,10 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
 {
 	struct bsg_device *bd;
-	struct hlist_node *entry;
 
 	mutex_lock(&bsg_mutex);
 
-	hlist_for_each_entry(bd, entry, bsg_dev_idx_hash(minor), dev_list) {
+	hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
 		if (bd->queue == q) {
 			atomic_inc(&bd->ref_count);
 			goto found;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e62e9205b80a..ec52807cdd09 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1435,7 +1435,6 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 {
 	struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
 	struct blkcg_gq *blkg;
-	struct hlist_node *n;
 
 	if (val < CFQ_WEIGHT_MIN || val > CFQ_WEIGHT_MAX)
 		return -EINVAL;
@@ -1443,7 +1442,7 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
 	spin_lock_irq(&blkcg->lock);
 	blkcg->cfq_weight = (unsigned int)val;
 
-	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
 		struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
 		if (cfqg && !cfqg->dev_weight)
diff --git a/block/elevator.c b/block/elevator.c
index 603b2c178740..d0acb31cc083 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -288,10 +288,10 @@ static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
 {
 	struct elevator_queue *e = q->elevator;
 	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
-	struct hlist_node *entry, *next;
+	struct hlist_node *next;
 	struct request *rq;
 
-	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+	hlist_for_each_entry_safe(rq, next, hash_list, hash) {
 		BUG_ON(!ELV_ON_HASH(rq));
 
 		if (unlikely(!rq_mergeable(rq))) {
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 08c57c8aec95..6149a6e09643 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -447,7 +447,7 @@ EXPORT_SYMBOL_GPL(crypto_register_template);
 void crypto_unregister_template(struct crypto_template *tmpl)
 {
 	struct crypto_instance *inst;
-	struct hlist_node *p, *n;
+	struct hlist_node *n;
 	struct hlist_head *list;
 	LIST_HEAD(users);
 
@@ -457,7 +457,7 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 	list_del_init(&tmpl->list);
 
 	list = &tmpl->instances;
-	hlist_for_each_entry(inst, p, list, list) {
+	hlist_for_each_entry(inst, list, list) {
 		int err = crypto_remove_alg(&inst->alg, &users);
 		BUG_ON(err);
 	}
@@ -466,7 +466,7 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 
 	up_write(&crypto_alg_sem);
 
-	hlist_for_each_entry_safe(inst, p, n, list, list) {
+	hlist_for_each_entry_safe(inst, n, list, list) {
 		BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1);
 		tmpl->free(inst);
 	}
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index b22d71cac54c..0e3f8f9dcd29 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -157,7 +157,6 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
 {
 	struct atm_cirange ci;
 	struct atm_vcc *vcc;
-	struct hlist_node *node;
 	struct sock *s;
 	int i;
 
@@ -171,7 +170,7 @@ static int atmtcp_v_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
 	for(i = 0; i < VCC_HTABLE_SIZE; ++i) {
 		struct hlist_head *head = &vcc_hash[i];
 
-		sk_for_each(s, node, head) {
+		sk_for_each(s, head) {
 			vcc = atm_sk(s);
 			if (vcc->dev != dev)
 				continue;
@@ -264,12 +263,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci)
 {
         struct hlist_head *head;
         struct atm_vcc *vcc;
-        struct hlist_node *node;
         struct sock *s;
 
         head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)];
 
-        sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
                 vcc = atm_sk(s);
                 if (vcc->dev == dev &&
                     vcc->vci == vci && vcc->vpi == vpi &&
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index c1eb6fa8ac35..b1955ba40d63 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -2093,7 +2093,6 @@ static unsigned char eni_phy_get(struct atm_dev *dev,unsigned long addr)
 
 static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page)
 {
-	struct hlist_node *node;
 	struct sock *s;
 	static const char *signal[] = { "LOST","unknown","okay" };
 	struct eni_dev *eni_dev = ENI_DEV(dev);
@@ -2171,7 +2170,7 @@ static int eni_proc_read(struct atm_dev *dev,loff_t *pos,char *page)
 	for(i = 0; i < VCC_HTABLE_SIZE; ++i) {
 		struct hlist_head *head = &vcc_hash[i];
 
-		sk_for_each(s, node, head) {
+		sk_for_each(s, head) {
 			struct eni_vcc *eni_vcc;
 			int length;
 
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 72b6960fa95f..d6891267f5bb 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -329,7 +329,6 @@ __find_vcc(struct he_dev *he_dev, unsigned cid)
 {
 	struct hlist_head *head;
 	struct atm_vcc *vcc;
-	struct hlist_node *node;
 	struct sock *s;
 	short vpi;
 	int vci;
@@ -338,7 +337,7 @@ __find_vcc(struct he_dev *he_dev, unsigned cid)
 	vci = cid & ((1 << he_dev->vcibits) - 1);
 	head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)];
 
-	sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
 		vcc = atm_sk(s);
 		if (vcc->dev == he_dev->atm_dev &&
 		    vcc->vci == vci && vcc->vpi == vpi &&
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 0474a89170b9..32784d18d1f7 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -896,12 +896,11 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci)
 {
 	struct hlist_head *head;
 	struct atm_vcc *vcc = NULL;
-	struct hlist_node *node;
 	struct sock *s;
 
 	read_lock(&vcc_sklist_lock);
 	head = &vcc_hash[vci & (VCC_HTABLE_SIZE -1)];
-	sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
 		vcc = atm_sk(s);
 		if (vcc->dev == dev && vcc->vci == vci &&
 		    vcc->vpi == vpi && vcc->qos.rxtp.traffic_class != ATM_NONE &&
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index fabbfe1a9253..ed87b2405806 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -52,31 +52,29 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk *c,
 				     int level)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 
 	if (!c)
 		return;
 
 	clk_summary_show_one(s, c, level);
 
-	hlist_for_each_entry(child, tmp, &c->children, child_node)
+	hlist_for_each_entry(child, &c->children, child_node)
 		clk_summary_show_subtree(s, child, level + 1);
 }
 
 static int clk_summary_show(struct seq_file *s, void *data)
 {
 	struct clk *c;
-	struct hlist_node *tmp;
 
 	seq_printf(s, "   clock                        enable_cnt  prepare_cnt  rate\n");
 	seq_printf(s, "---------------------------------------------------------------------\n");
 
 	mutex_lock(&prepare_lock);
 
-	hlist_for_each_entry(c, tmp, &clk_root_list, child_node)
+	hlist_for_each_entry(c, &clk_root_list, child_node)
 		clk_summary_show_subtree(s, c, 0);
 
-	hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node)
+	hlist_for_each_entry(c, &clk_orphan_list, child_node)
 		clk_summary_show_subtree(s, c, 0);
 
 	mutex_unlock(&prepare_lock);
@@ -111,14 +109,13 @@ static void clk_dump_one(struct seq_file *s, struct clk *c, int level)
 static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 
 	if (!c)
 		return;
 
 	clk_dump_one(s, c, level);
 
-	hlist_for_each_entry(child, tmp, &c->children, child_node) {
+	hlist_for_each_entry(child, &c->children, child_node) {
 		seq_printf(s, ",");
 		clk_dump_subtree(s, child, level + 1);
 	}
@@ -129,21 +126,20 @@ static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level)
 static int clk_dump(struct seq_file *s, void *data)
 {
 	struct clk *c;
-	struct hlist_node *tmp;
 	bool first_node = true;
 
 	seq_printf(s, "{");
 
 	mutex_lock(&prepare_lock);
 
-	hlist_for_each_entry(c, tmp, &clk_root_list, child_node) {
+	hlist_for_each_entry(c, &clk_root_list, child_node) {
 		if (!first_node)
 			seq_printf(s, ",");
 		first_node = false;
 		clk_dump_subtree(s, c, 0);
 	}
 
-	hlist_for_each_entry(c, tmp, &clk_orphan_list, child_node) {
+	hlist_for_each_entry(c, &clk_orphan_list, child_node) {
 		seq_printf(s, ",");
 		clk_dump_subtree(s, c, 0);
 	}
@@ -222,7 +218,6 @@ out:
 static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 	int ret = -EINVAL;;
 
 	if (!clk || !pdentry)
@@ -233,7 +228,7 @@ static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry)
 	if (ret)
 		goto out;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node)
 		clk_debug_create_subtree(child, clk->dentry);
 
 	ret = 0;
@@ -299,7 +294,6 @@ out:
 static int __init clk_debug_init(void)
 {
 	struct clk *clk;
-	struct hlist_node *tmp;
 	struct dentry *d;
 
 	rootdir = debugfs_create_dir("clk", NULL);
@@ -324,10 +318,10 @@ static int __init clk_debug_init(void)
 
 	mutex_lock(&prepare_lock);
 
-	hlist_for_each_entry(clk, tmp, &clk_root_list, child_node)
+	hlist_for_each_entry(clk, &clk_root_list, child_node)
 		clk_debug_create_subtree(clk, rootdir);
 
-	hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node)
+	hlist_for_each_entry(clk, &clk_orphan_list, child_node)
 		clk_debug_create_subtree(clk, orphandir);
 
 	inited = 1;
@@ -345,13 +339,12 @@ static inline int clk_debug_register(struct clk *clk) { return 0; }
 static void clk_disable_unused_subtree(struct clk *clk)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 	unsigned long flags;
 
 	if (!clk)
 		goto out;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node)
 		clk_disable_unused_subtree(child);
 
 	spin_lock_irqsave(&enable_lock, flags);
@@ -384,14 +377,13 @@ out:
 static int clk_disable_unused(void)
 {
 	struct clk *clk;
-	struct hlist_node *tmp;
 
 	mutex_lock(&prepare_lock);
 
-	hlist_for_each_entry(clk, tmp, &clk_root_list, child_node)
+	hlist_for_each_entry(clk, &clk_root_list, child_node)
 		clk_disable_unused_subtree(clk);
 
-	hlist_for_each_entry(clk, tmp, &clk_orphan_list, child_node)
+	hlist_for_each_entry(clk, &clk_orphan_list, child_node)
 		clk_disable_unused_subtree(clk);
 
 	mutex_unlock(&prepare_lock);
@@ -484,12 +476,11 @@ static struct clk *__clk_lookup_subtree(const char *name, struct clk *clk)
 {
 	struct clk *child;
 	struct clk *ret;
-	struct hlist_node *tmp;
 
 	if (!strcmp(clk->name, name))
 		return clk;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node) {
+	hlist_for_each_entry(child, &clk->children, child_node) {
 		ret = __clk_lookup_subtree(name, child);
 		if (ret)
 			return ret;
@@ -502,20 +493,19 @@ struct clk *__clk_lookup(const char *name)
 {
 	struct clk *root_clk;
 	struct clk *ret;
-	struct hlist_node *tmp;
 
 	if (!name)
 		return NULL;
 
 	/* search the 'proper' clk tree first */
-	hlist_for_each_entry(root_clk, tmp, &clk_root_list, child_node) {
+	hlist_for_each_entry(root_clk, &clk_root_list, child_node) {
 		ret = __clk_lookup_subtree(name, root_clk);
 		if (ret)
 			return ret;
 	}
 
 	/* if not found, then search the orphan tree */
-	hlist_for_each_entry(root_clk, tmp, &clk_orphan_list, child_node) {
+	hlist_for_each_entry(root_clk, &clk_orphan_list, child_node) {
 		ret = __clk_lookup_subtree(name, root_clk);
 		if (ret)
 			return ret;
@@ -812,7 +802,6 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg)
 {
 	unsigned long old_rate;
 	unsigned long parent_rate = 0;
-	struct hlist_node *tmp;
 	struct clk *child;
 
 	old_rate = clk->rate;
@@ -832,7 +821,7 @@ static void __clk_recalc_rates(struct clk *clk, unsigned long msg)
 	if (clk->notifier_count && msg)
 		__clk_notify(clk, msg, old_rate, clk->rate);
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node)
 		__clk_recalc_rates(child, msg);
 }
 
@@ -878,7 +867,6 @@ EXPORT_SYMBOL_GPL(clk_get_rate);
  */
 static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate)
 {
-	struct hlist_node *tmp;
 	struct clk *child;
 	unsigned long new_rate;
 	int ret = NOTIFY_DONE;
@@ -895,7 +883,7 @@ static int __clk_speculate_rates(struct clk *clk, unsigned long parent_rate)
 	if (ret == NOTIFY_BAD)
 		goto out;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node) {
+	hlist_for_each_entry(child, &clk->children, child_node) {
 		ret = __clk_speculate_rates(child, new_rate);
 		if (ret == NOTIFY_BAD)
 			break;
@@ -908,11 +896,10 @@ out:
 static void clk_calc_subtree(struct clk *clk, unsigned long new_rate)
 {
 	struct clk *child;
-	struct hlist_node *tmp;
 
 	clk->new_rate = new_rate;
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node) {
+	hlist_for_each_entry(child, &clk->children, child_node) {
 		if (child->ops->recalc_rate)
 			child->new_rate = child->ops->recalc_rate(child->hw, new_rate);
 		else
@@ -983,7 +970,6 @@ out:
  */
 static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long event)
 {
-	struct hlist_node *tmp;
 	struct clk *child, *fail_clk = NULL;
 	int ret = NOTIFY_DONE;
 
@@ -996,7 +982,7 @@ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long even
 			fail_clk = clk;
 	}
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node) {
+	hlist_for_each_entry(child, &clk->children, child_node) {
 		clk = clk_propagate_rate_change(child, event);
 		if (clk)
 			fail_clk = clk;
@@ -1014,7 +1000,6 @@ static void clk_change_rate(struct clk *clk)
 	struct clk *child;
 	unsigned long old_rate;
 	unsigned long best_parent_rate = 0;
-	struct hlist_node *tmp;
 
 	old_rate = clk->rate;
 
@@ -1032,7 +1017,7 @@ static void clk_change_rate(struct clk *clk)
 	if (clk->notifier_count && old_rate != clk->rate)
 		__clk_notify(clk, POST_RATE_CHANGE, old_rate, clk->rate);
 
-	hlist_for_each_entry(child, tmp, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node)
 		clk_change_rate(child);
 }
 
@@ -1348,7 +1333,7 @@ int __clk_init(struct device *dev, struct clk *clk)
 {
 	int i, ret = 0;
 	struct clk *orphan;
-	struct hlist_node *tmp, *tmp2;
+	struct hlist_node *tmp2;
 
 	if (!clk)
 		return -EINVAL;
@@ -1448,7 +1433,7 @@ int __clk_init(struct device *dev, struct clk *clk)
 	 * walk the list of orphan clocks and reparent any that are children of
 	 * this clock
 	 */
-	hlist_for_each_entry_safe(orphan, tmp, tmp2, &clk_orphan_list, child_node) {
+	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
 		if (orphan->ops->get_parent) {
 			i = orphan->ops->get_parent(orphan->hw);
 			if (!strcmp(clk->name, orphan->parent_names[i]))
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 80254547a3f8..7e4bae760e27 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -60,14 +60,13 @@ void drm_ht_verbose_list(struct drm_open_hash *ht, unsigned long key)
 {
 	struct drm_hash_item *entry;
 	struct hlist_head *h_list;
-	struct hlist_node *list;
 	unsigned int hashed_key;
 	int count = 0;
 
 	hashed_key = hash_long(key, ht->order);
 	DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key);
 	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry(entry, list, h_list, head)
+	hlist_for_each_entry(entry, h_list, head)
 		DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key);
 }
 
@@ -76,14 +75,13 @@ static struct hlist_node *drm_ht_find_key(struct drm_open_hash *ht,
 {
 	struct drm_hash_item *entry;
 	struct hlist_head *h_list;
-	struct hlist_node *list;
 	unsigned int hashed_key;
 
 	hashed_key = hash_long(key, ht->order);
 	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry(entry, list, h_list, head) {
+	hlist_for_each_entry(entry, h_list, head) {
 		if (entry->key == key)
-			return list;
+			return &entry->head;
 		if (entry->key > key)
 			break;
 	}
@@ -95,14 +93,13 @@ static struct hlist_node *drm_ht_find_key_rcu(struct drm_open_hash *ht,
 {
 	struct drm_hash_item *entry;
 	struct hlist_head *h_list;
-	struct hlist_node *list;
 	unsigned int hashed_key;
 
 	hashed_key = hash_long(key, ht->order);
 	h_list = &ht->table[hashed_key];
-	hlist_for_each_entry_rcu(entry, list, h_list, head) {
+	hlist_for_each_entry_rcu(entry, h_list, head) {
 		if (entry->key == key)
-			return list;
+			return &entry->head;
 		if (entry->key > key)
 			break;
 	}
@@ -113,19 +110,19 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 {
 	struct drm_hash_item *entry;
 	struct hlist_head *h_list;
-	struct hlist_node *list, *parent;
+	struct hlist_node *parent;
 	unsigned int hashed_key;
 	unsigned long key = item->key;
 
 	hashed_key = hash_long(key, ht->order);
 	h_list = &ht->table[hashed_key];
 	parent = NULL;
-	hlist_for_each_entry(entry, list, h_list, head) {
+	hlist_for_each_entry(entry, h_list, head) {
 		if (entry->key == key)
 			return -EINVAL;
 		if (entry->key > key)
 			break;
-		parent = list;
+		parent = &entry->head;
 	}
 	if (parent) {
 		hlist_add_after_rcu(parent, &item->head);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index c32eeaa3f3b1..71c2c7116802 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2204,10 +2204,9 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
 {
 	struct rdma_id_private *cur_id;
 	struct sockaddr *addr, *cur_addr;
-	struct hlist_node *node;
 
 	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
+	hlist_for_each_entry(cur_id, &bind_list->owners, node) {
 		if (id_priv == cur_id)
 			continue;
 
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 176c8f90f2bb..9f5ad7cc33c8 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -118,14 +118,13 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
 {
 	struct hlist_head *bucket;
 	struct ib_pool_fmr *fmr;
-	struct hlist_node *pos;
 
 	if (!pool->cache_bucket)
 		return NULL;
 
 	bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
 
-	hlist_for_each_entry(fmr, pos, bucket, cache_node)
+	hlist_for_each_entry(fmr, bucket, cache_node)
 		if (io_virtual_address == fmr->io_virtual_address &&
 		    page_list_len      == fmr->page_list_len      &&
 		    !memcmp(page_list, fmr->page_list,
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index abe2d699b6f3..8b07f83d48ad 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -483,7 +483,6 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct sockaddr_mISDN *maddr = (struct sockaddr_mISDN *) addr;
 	struct sock *sk = sock->sk;
-	struct hlist_node *node;
 	struct sock *csk;
 	int err = 0;
 
@@ -508,7 +507,7 @@ data_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 
 	if (sk->sk_protocol < ISDN_P_B_START) {
 		read_lock_bh(&data_sockets.lock);
-		sk_for_each(csk, node, &data_sockets.head) {
+		sk_for_each(csk, &data_sockets.head) {
 			if (sk == csk)
 				continue;
 			if (_pms(csk)->dev != _pms(sk)->dev)
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index deda591f70b9..9cb4b621fbc3 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -64,12 +64,11 @@ unlock:
 static void
 send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb)
 {
-	struct hlist_node	*node;
 	struct sock		*sk;
 	struct sk_buff		*cskb = NULL;
 
 	read_lock(&sl->lock);
-	sk_for_each(sk, node, &sl->head) {
+	sk_for_each(sk, &sl->head) {
 		if (sk->sk_state != MISDN_BOUND)
 			continue;
 		if (!cskb)
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index aefb78e3cbf9..d9d3f1c7b662 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -106,9 +106,8 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket,
 						  struct dm_cell_key *key)
 {
 	struct dm_bio_prison_cell *cell;
-	struct hlist_node *tmp;
 
-	hlist_for_each_entry(cell, tmp, bucket, list)
+	hlist_for_each_entry(cell, bucket, list)
 		if (keys_equal(&cell->key, key))
 			return cell;
 
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 651ca79881dd..93205e32a004 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -859,9 +859,8 @@ static void __check_watermark(struct dm_bufio_client *c)
 static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
 {
 	struct dm_buffer *b;
-	struct hlist_node *hn;
 
-	hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)],
+	hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)],
 			     hash_list) {
 		dm_bufio_cond_resched();
 		if (b->block == block)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 59fc18ae52c2..10079e07edf4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -227,12 +227,11 @@ static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio)
 static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
 {
 	struct dm_snap_tracked_chunk *c;
-	struct hlist_node *hn;
 	int found = 0;
 
 	spin_lock_irq(&s->tracked_chunk_lock);
 
-	hlist_for_each_entry(c, hn,
+	hlist_for_each_entry(c,
 	    &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
 		if (c->chunk == chunk) {
 			found = 1;
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 7b17a1fdeaf9..81da1a26042e 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -46,10 +46,9 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 	int r = 0;
 	unsigned bucket = dm_hash_block(b, DM_HASH_MASK);
 	struct shadow_info *si;
-	struct hlist_node *n;
 
 	spin_lock(&tm->lock);
-	hlist_for_each_entry(si, n, tm->buckets + bucket, hlist)
+	hlist_for_each_entry(si, tm->buckets + bucket, hlist)
 		if (si->where == b) {
 			r = 1;
 			break;
@@ -81,14 +80,14 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 static void wipe_shadow_table(struct dm_transaction_manager *tm)
 {
 	struct shadow_info *si;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	struct hlist_head *bucket;
 	int i;
 
 	spin_lock(&tm->lock);
 	for (i = 0; i < DM_HASH_SIZE; i++) {
 		bucket = tm->buckets + i;
-		hlist_for_each_entry_safe(si, n, tmp, bucket, hlist)
+		hlist_for_each_entry_safe(si, tmp, bucket, hlist)
 			kfree(si);
 
 		INIT_HLIST_HEAD(bucket);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 19d77a026639..697f026cb318 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -365,10 +365,9 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
 					 short generation)
 {
 	struct stripe_head *sh;
-	struct hlist_node *hn;
 
 	pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
-	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
+	hlist_for_each_entry(sh, stripe_hash(conf, sector), hash)
 		if (sh->sector == sector && sh->generation == generation)
 			return sh;
 	pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index 240a6d361665..2129274ef7ab 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -280,11 +280,10 @@ static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
 			const struct mmu_notifier_ops *ops)
 {
 	struct mmu_notifier *mn, *gru_mn = NULL;
-	struct hlist_node *n;
 
 	if (mm->mmu_notifier_mm) {
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
+		hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list,
 					 hlist)
 		    if (mn->ops == ops) {
 			gru_mn = mn;
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c
index c3e8397f62ed..a8cee33ae8d2 100644
--- a/drivers/misc/vmw_vmci/vmci_doorbell.c
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.c
@@ -127,9 +127,8 @@ static struct dbell_entry *dbell_index_table_find(u32 idx)
 {
 	u32 bucket = VMCI_DOORBELL_HASH(idx);
 	struct dbell_entry *dbell;
-	struct hlist_node *node;
 
-	hlist_for_each_entry(dbell, node, &vmci_doorbell_it.entries[bucket],
+	hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket],
 			     node) {
 		if (idx == dbell->idx)
 			return dbell;
@@ -359,12 +358,10 @@ static void dbell_fire_entries(u32 notify_idx)
 {
 	u32 bucket = VMCI_DOORBELL_HASH(notify_idx);
 	struct dbell_entry *dbell;
-	struct hlist_node *node;
 
 	spin_lock_bh(&vmci_doorbell_it.lock);
 
-	hlist_for_each_entry(dbell, node,
-			     &vmci_doorbell_it.entries[bucket], node) {
+	hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) {
 		if (dbell->idx == notify_idx &&
 		    atomic_read(&dbell->active) == 1) {
 			if (dbell->run_delayed) {
diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
index a196f84a4fd2..9a53a30de445 100644
--- a/drivers/misc/vmw_vmci/vmci_resource.c
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -46,11 +46,10 @@ static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle,
 						  enum vmci_resource_type type)
 {
 	struct vmci_resource *r, *resource = NULL;
-	struct hlist_node *node;
 	unsigned int idx = vmci_resource_hash(handle);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(r, node,
+	hlist_for_each_entry_rcu(r,
 				 &vmci_resource_table.entries[idx], node) {
 		u32 cid = r->handle.context;
 		u32 rid = r->handle.resource;
@@ -146,12 +145,11 @@ void vmci_resource_remove(struct vmci_resource *resource)
 	struct vmci_handle handle = resource->handle;
 	unsigned int idx = vmci_resource_hash(handle);
 	struct vmci_resource *r;
-	struct hlist_node *node;
 
 	/* Remove resource from hash table. */
 	spin_lock(&vmci_resource_table.lock);
 
-	hlist_for_each_entry(r, node, &vmci_resource_table.entries[idx], node) {
+	hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) {
 		if (vmci_handle_is_equal(r->handle, resource->handle)) {
 			hlist_del_init_rcu(&r->node);
 			break;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index f4d2e9e3c6d5..c3f1afd86906 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2197,13 +2197,13 @@ static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 	union ixgbe_atr_input *mask = &adapter->fdir_mask;
 	struct ethtool_rx_flow_spec *fsp =
 		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *rule = NULL;
 
 	/* report total rule count */
 	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
 
-	hlist_for_each_entry_safe(rule, node, node2,
+	hlist_for_each_entry_safe(rule, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		if (fsp->location <= rule->sw_idx)
 			break;
@@ -2264,14 +2264,14 @@ static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
 				      struct ethtool_rxnfc *cmd,
 				      u32 *rule_locs)
 {
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *rule;
 	int cnt = 0;
 
 	/* report total rule count */
 	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
 
-	hlist_for_each_entry_safe(rule, node, node2,
+	hlist_for_each_entry_safe(rule, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		if (cnt == cmd->rule_cnt)
 			return -EMSGSIZE;
@@ -2358,19 +2358,19 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 					   u16 sw_idx)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct hlist_node *node, *node2, *parent;
-	struct ixgbe_fdir_filter *rule;
+	struct hlist_node *node2;
+	struct ixgbe_fdir_filter *rule, *parent;
 	int err = -EINVAL;
 
 	parent = NULL;
 	rule = NULL;
 
-	hlist_for_each_entry_safe(rule, node, node2,
+	hlist_for_each_entry_safe(rule, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		/* hash found, or no matching entry */
 		if (rule->sw_idx >= sw_idx)
 			break;
-		parent = node;
+		parent = rule;
 	}
 
 	/* if there is an old rule occupying our place remove it */
@@ -2399,7 +2399,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
 
 	/* add filter to the list */
 	if (parent)
-		hlist_add_after(parent, &input->fdir_node);
+		hlist_add_after(&parent->fdir_node, &input->fdir_node);
 	else
 		hlist_add_head(&input->fdir_node,
 			       &adapter->fdir_filter_list);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 68478d6dfa2d..db5611ae407e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3891,7 +3891,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
 static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *filter;
 
 	spin_lock(&adapter->fdir_perfect_lock);
@@ -3899,7 +3899,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
 	if (!hlist_empty(&adapter->fdir_filter_list))
 		ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask);
 
-	hlist_for_each_entry_safe(filter, node, node2,
+	hlist_for_each_entry_safe(filter, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		ixgbe_fdir_write_perfect_filter_82599(hw,
 				&filter->filter,
@@ -4356,12 +4356,12 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
 
 static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
 {
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct ixgbe_fdir_filter *filter;
 
 	spin_lock(&adapter->fdir_perfect_lock);
 
-	hlist_for_each_entry_safe(filter, node, node2,
+	hlist_for_each_entry_safe(filter, node2,
 				  &adapter->fdir_filter_list, fdir_node) {
 		hlist_del(&filter->fdir_node);
 		kfree(filter);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 5385474bb526..bb4d8d99f36d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -225,11 +225,10 @@ static inline struct mlx4_en_filter *
 mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip,
 		    __be16 src_port, __be16 dst_port)
 {
-	struct hlist_node *elem;
 	struct mlx4_en_filter *filter;
 	struct mlx4_en_filter *ret = NULL;
 
-	hlist_for_each_entry(filter, elem,
+	hlist_for_each_entry(filter,
 			     filter_hash_bucket(priv, src_ip, dst_ip,
 						src_port, dst_port),
 			     filter_chain) {
@@ -574,13 +573,13 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
 
 	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
 		struct mlx4_mac_entry *entry;
-		struct hlist_node *n, *tmp;
+		struct hlist_node *tmp;
 		struct hlist_head *bucket;
 		unsigned int mac_hash;
 
 		mac_hash = priv->dev->dev_addr[MLX4_EN_MAC_HASH_IDX];
 		bucket = &priv->mac_hash[mac_hash];
-		hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) {
+		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
 			if (ether_addr_equal_64bits(entry->mac,
 						    priv->dev->dev_addr)) {
 				en_dbg(DRV, priv, "Releasing qp: port %d, MAC %pM, qpn %d\n",
@@ -609,11 +608,11 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn,
 		struct hlist_head *bucket;
 		unsigned int mac_hash;
 		struct mlx4_mac_entry *entry;
-		struct hlist_node *n, *tmp;
+		struct hlist_node *tmp;
 		u64 prev_mac_u64 = mlx4_en_mac_to_u64(prev_mac);
 
 		bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]];
-		hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) {
+		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
 			if (ether_addr_equal_64bits(entry->mac, prev_mac)) {
 				mlx4_en_uc_steer_release(priv, entry->mac,
 							 qpn, entry->reg_id);
@@ -1019,7 +1018,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
 {
 	struct netdev_hw_addr *ha;
 	struct mlx4_mac_entry *entry;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	bool found;
 	u64 mac;
 	int err = 0;
@@ -1035,7 +1034,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
 	/* find what to remove */
 	for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) {
 		bucket = &priv->mac_hash[i];
-		hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) {
+		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
 			found = false;
 			netdev_for_each_uc_addr(ha, dev) {
 				if (ether_addr_equal_64bits(entry->mac,
@@ -1078,7 +1077,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
 	netdev_for_each_uc_addr(ha, dev) {
 		found = false;
 		bucket = &priv->mac_hash[ha->addr[MLX4_EN_MAC_HASH_IDX]];
-		hlist_for_each_entry(entry, n, bucket, hlist) {
+		hlist_for_each_entry(entry, bucket, hlist) {
 			if (ether_addr_equal_64bits(entry->mac, ha->addr)) {
 				found = true;
 				break;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index ce38654bbdd0..c7f856308e1a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/mlx4/qp.h>
 #include <linux/skbuff.h>
+#include <linux/rculist.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/vmalloc.h>
@@ -617,7 +618,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 
 			if (is_multicast_ether_addr(ethh->h_dest)) {
 				struct mlx4_mac_entry *entry;
-				struct hlist_node *n;
 				struct hlist_head *bucket;
 				unsigned int mac_hash;
 
@@ -625,7 +625,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 				mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
 				bucket = &priv->mac_hash[mac_hash];
 				rcu_read_lock();
-				hlist_for_each_entry_rcu(entry, n, bucket, hlist) {
+				hlist_for_each_entry_rcu(entry, bucket, hlist) {
 					if (ether_addr_equal_64bits(entry->mac,
 								    ethh->h_source)) {
 						rcu_read_unlock();
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 325e11e1ce0f..f89cc7a3fe6c 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -576,7 +576,7 @@ void qlcnic_free_mac_list(struct qlcnic_adapter *adapter)
 void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_filter *tmp_fil;
-	struct hlist_node *tmp_hnode, *n;
+	struct hlist_node *n;
 	struct hlist_head *head;
 	int i;
 	unsigned long time;
@@ -584,7 +584,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter)
 
 	for (i = 0; i < adapter->fhash.fbucket_size; i++) {
 		head = &(adapter->fhash.fhead[i]);
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			cmd =  tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL :
 						  QLCNIC_MAC_DEL;
 			time = tmp_fil->ftime;
@@ -604,7 +604,7 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter)
 	for (i = 0; i < adapter->rx_fhash.fbucket_size; i++) {
 		head = &(adapter->rx_fhash.fhead[i]);
 
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode)
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode)
 		{
 			time = tmp_fil->ftime;
 			if (jiffies > (QLCNIC_FILTER_AGE * HZ + time)) {
@@ -621,14 +621,14 @@ void qlcnic_prune_lb_filters(struct qlcnic_adapter *adapter)
 void qlcnic_delete_lb_filters(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_filter *tmp_fil;
-	struct hlist_node *tmp_hnode, *n;
+	struct hlist_node *n;
 	struct hlist_head *head;
 	int i;
 	u8 cmd;
 
 	for (i = 0; i < adapter->fhash.fbucket_size; i++) {
 		head = &(adapter->fhash.fhead[i]);
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			cmd =  tmp_fil->vlan_id ? QLCNIC_MAC_VLAN_DEL :
 						  QLCNIC_MAC_DEL;
 			qlcnic_sre_macaddr_change(adapter,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 6387e0cc3ea9..0e630061bff3 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -162,7 +162,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb,
 {
 	struct ethhdr *phdr = (struct ethhdr *)(skb->data);
 	struct qlcnic_filter *fil, *tmp_fil;
-	struct hlist_node *tmp_hnode, *n;
+	struct hlist_node *n;
 	struct hlist_head *head;
 	unsigned long time;
 	u64 src_addr = 0;
@@ -179,7 +179,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb,
 			 (adapter->fhash.fbucket_size - 1);
 		head = &(adapter->rx_fhash.fhead[hindex]);
 
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) &&
 			    tmp_fil->vlan_id == vlan_id) {
 				time = tmp_fil->ftime;
@@ -205,7 +205,7 @@ void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb,
 			 (adapter->fhash.fbucket_size - 1);
 		head = &(adapter->rx_fhash.fhead[hindex]);
 		spin_lock(&adapter->rx_mac_learn_lock);
-		hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 			if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) &&
 			    tmp_fil->vlan_id == vlan_id) {
 				found = 1;
@@ -272,7 +272,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 			       struct sk_buff *skb)
 {
 	struct qlcnic_filter *fil, *tmp_fil;
-	struct hlist_node *tmp_hnode, *n;
+	struct hlist_node *n;
 	struct hlist_head *head;
 	struct net_device *netdev = adapter->netdev;
 	struct ethhdr *phdr = (struct ethhdr *)(skb->data);
@@ -294,7 +294,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 	hindex = qlcnic_mac_hash(src_addr) & (adapter->fhash.fbucket_size - 1);
 	head = &(adapter->fhash.fhead[hindex]);
 
-	hlist_for_each_entry_safe(tmp_fil, tmp_hnode, n, head, fnode) {
+	hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
 		if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) &&
 		    tmp_fil->vlan_id == vlan_id) {
 			if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime))
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 289b4eefb42f..1df0ff3839e8 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -614,10 +614,9 @@ struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb)
 {
 	unsigned int hash = vnet_hashfn(skb->data);
 	struct hlist_head *hp = &vp->port_hash[hash];
-	struct hlist_node *n;
 	struct vnet_port *port;
 
-	hlist_for_each_entry(port, n, hp, hash) {
+	hlist_for_each_entry(port, hp, hash) {
 		if (ether_addr_equal(port->raddr, skb->data))
 			return port;
 	}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index defcd8a85744..417b2af1aa80 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -55,9 +55,8 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 					       const unsigned char *addr)
 {
 	struct macvlan_dev *vlan;
-	struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[addr[5]], hlist) {
+	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[addr[5]], hlist) {
 		if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr))
 			return vlan;
 	}
@@ -149,7 +148,6 @@ static void macvlan_broadcast(struct sk_buff *skb,
 {
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
-	struct hlist_node *n;
 	struct sk_buff *nskb;
 	unsigned int i;
 	int err;
@@ -159,7 +157,7 @@ static void macvlan_broadcast(struct sk_buff *skb,
 		return;
 
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
-		hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[i], hlist) {
+		hlist_for_each_entry_rcu(vlan, &port->vlan_hash[i], hlist) {
 			if (vlan->dev == src || !(vlan->mode & mode))
 				continue;
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b6f45c5d84d5..2c6a22e278ea 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -197,9 +197,8 @@ static inline u32 tun_hashfn(u32 rxhash)
 static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
 {
 	struct tun_flow_entry *e;
-	struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(e, n, head, hash_link) {
+	hlist_for_each_entry_rcu(e, head, hash_link) {
 		if (e->rxhash == rxhash)
 			return e;
 	}
@@ -241,9 +240,9 @@ static void tun_flow_flush(struct tun_struct *tun)
 	spin_lock_bh(&tun->lock);
 	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
 		struct tun_flow_entry *e;
-		struct hlist_node *h, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link)
+		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
 			tun_flow_delete(tun, e);
 	}
 	spin_unlock_bh(&tun->lock);
@@ -256,9 +255,9 @@ static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
 	spin_lock_bh(&tun->lock);
 	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
 		struct tun_flow_entry *e;
-		struct hlist_node *h, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) {
+		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
 			if (e->queue_index == queue_index)
 				tun_flow_delete(tun, e);
 		}
@@ -279,9 +278,9 @@ static void tun_flow_cleanup(unsigned long data)
 	spin_lock_bh(&tun->lock);
 	for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
 		struct tun_flow_entry *e;
-		struct hlist_node *h, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) {
+		hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
 			unsigned long this_timer;
 			count++;
 			this_timer = e->updated + delay;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f736823f8437..f10e58ac9c1b 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -145,9 +145,8 @@ static inline struct hlist_head *vni_head(struct net *net, u32 id)
 static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id)
 {
 	struct vxlan_dev *vxlan;
-	struct hlist_node *node;
 
-	hlist_for_each_entry_rcu(vxlan, node, vni_head(net, id), hlist) {
+	hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) {
 		if (vxlan->vni == id)
 			return vxlan;
 	}
@@ -292,9 +291,8 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
 {
 	struct hlist_head *head = vxlan_fdb_head(vxlan, mac);
 	struct vxlan_fdb *f;
-	struct hlist_node *node;
 
-	hlist_for_each_entry_rcu(f, node, head, hlist) {
+	hlist_for_each_entry_rcu(f, head, hlist) {
 		if (compare_ether_addr(mac, f->eth_addr) == 0)
 			return f;
 	}
@@ -422,10 +420,9 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 	for (h = 0; h < FDB_HASH_SIZE; ++h) {
 		struct vxlan_fdb *f;
-		struct hlist_node *n;
 		int err;
 
-		hlist_for_each_entry_rcu(f, n, &vxlan->fdb_head[h], hlist) {
+		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
 			if (idx < cb->args[0])
 				goto skip;
 
@@ -483,11 +480,10 @@ static bool vxlan_group_used(struct vxlan_net *vn,
 			     const struct vxlan_dev *this)
 {
 	const struct vxlan_dev *vxlan;
-	struct hlist_node *node;
 	unsigned h;
 
 	for (h = 0; h < VNI_HASH_SIZE; ++h)
-		hlist_for_each_entry(vxlan, node, &vn->vni_list[h], hlist) {
+		hlist_for_each_entry(vxlan, &vn->vni_list[h], hlist) {
 			if (vxlan == this)
 				continue;
 
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 48273dd05b63..4941f201d6c8 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -309,7 +309,6 @@ static void zd1201_usbrx(struct urb *urb)
 	if (data[urb->actual_length-1] == ZD1201_PACKET_RXDATA) {
 		int datalen = urb->actual_length-1;
 		unsigned short len, fc, seq;
-		struct hlist_node *node;
 
 		len = ntohs(*(__be16 *)&data[datalen-2]);
 		if (len>datalen)
@@ -362,7 +361,7 @@ static void zd1201_usbrx(struct urb *urb)
 				hlist_add_head(&frag->fnode, &zd->fraglist);
 				goto resubmit;
 			}
-			hlist_for_each_entry(frag, node, &zd->fraglist, fnode)
+			hlist_for_each_entry(frag, &zd->fraglist, fnode)
 				if (frag->seq == (seq&IEEE80211_SCTL_SEQ))
 					break;
 			if (!frag)
@@ -1831,14 +1830,14 @@ err_zd:
 static void zd1201_disconnect(struct usb_interface *interface)
 {
 	struct zd1201 *zd = usb_get_intfdata(interface);
-	struct hlist_node *node, *node2;
+	struct hlist_node *node2;
 	struct zd1201_frag *frag;
 
 	if (!zd)
 		return;
 	usb_set_intfdata(interface, NULL);
 
-	hlist_for_each_entry_safe(frag, node, node2, &zd->fraglist, fnode) {
+	hlist_for_each_entry_safe(frag, node2, &zd->fraglist, fnode) {
 		hlist_del_init(&frag->fnode);
 		kfree_skb(frag->skb);
 		kfree(frag);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 924e4665bd57..b099e0025d2b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -842,9 +842,8 @@ static struct pci_cap_saved_state *pci_find_saved_cap(
 	struct pci_dev *pci_dev, char cap)
 {
 	struct pci_cap_saved_state *tmp;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(tmp, pos, &pci_dev->saved_cap_space, next) {
+	hlist_for_each_entry(tmp, &pci_dev->saved_cap_space, next) {
 		if (tmp->cap.cap_nr == cap)
 			return tmp;
 	}
@@ -1041,7 +1040,6 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
 	struct pci_saved_state *state;
 	struct pci_cap_saved_state *tmp;
 	struct pci_cap_saved_data *cap;
-	struct hlist_node *pos;
 	size_t size;
 
 	if (!dev->state_saved)
@@ -1049,7 +1047,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
 
 	size = sizeof(*state) + sizeof(struct pci_cap_saved_data);
 
-	hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next)
+	hlist_for_each_entry(tmp, &dev->saved_cap_space, next)
 		size += sizeof(struct pci_cap_saved_data) + tmp->cap.size;
 
 	state = kzalloc(size, GFP_KERNEL);
@@ -1060,7 +1058,7 @@ struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev)
 	       sizeof(state->config_space));
 
 	cap = state->cap;
-	hlist_for_each_entry(tmp, pos, &dev->saved_cap_space, next) {
+	hlist_for_each_entry(tmp, &dev->saved_cap_space, next) {
 		size_t len = sizeof(struct pci_cap_saved_data) + tmp->cap.size;
 		memcpy(cap, &tmp->cap, len);
 		cap = (struct pci_cap_saved_data *)((u8 *)cap + len);
@@ -2038,9 +2036,9 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
 void pci_free_cap_save_buffers(struct pci_dev *dev)
 {
 	struct pci_cap_saved_state *tmp;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 
-	hlist_for_each_entry_safe(tmp, pos, n, &dev->saved_cap_space, next)
+	hlist_for_each_entry_safe(tmp, n, &dev->saved_cap_space, next)
 		kfree(tmp);
 }
 
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 538ebe213129..24456a0de6b2 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -2880,7 +2880,6 @@ static int binder_release(struct inode *nodp, struct file *filp)
 
 static void binder_deferred_release(struct binder_proc *proc)
 {
-	struct hlist_node *pos;
 	struct binder_transaction *t;
 	struct rb_node *n;
 	int threads, nodes, incoming_refs, outgoing_refs, buffers, active_transactions, page_count;
@@ -2924,7 +2923,7 @@ static void binder_deferred_release(struct binder_proc *proc)
 			node->local_weak_refs = 0;
 			hlist_add_head(&node->dead_node, &binder_dead_nodes);
 
-			hlist_for_each_entry(ref, pos, &node->refs, node_entry) {
+			hlist_for_each_entry(ref, &node->refs, node_entry) {
 				incoming_refs++;
 				if (ref->death) {
 					death++;
@@ -3156,12 +3155,11 @@ static void print_binder_thread(struct seq_file *m,
 static void print_binder_node(struct seq_file *m, struct binder_node *node)
 {
 	struct binder_ref *ref;
-	struct hlist_node *pos;
 	struct binder_work *w;
 	int count;
 
 	count = 0;
-	hlist_for_each_entry(ref, pos, &node->refs, node_entry)
+	hlist_for_each_entry(ref, &node->refs, node_entry)
 		count++;
 
 	seq_printf(m, "  node %d: u%p c%p hs %d hw %d ls %d lw %d is %d iw %d",
@@ -3171,7 +3169,7 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node)
 		   node->internal_strong_refs, count);
 	if (count) {
 		seq_puts(m, " proc");
-		hlist_for_each_entry(ref, pos, &node->refs, node_entry)
+		hlist_for_each_entry(ref, &node->refs, node_entry)
 			seq_printf(m, " %d", ref->proc->pid);
 	}
 	seq_puts(m, "\n");
@@ -3369,7 +3367,6 @@ static void print_binder_proc_stats(struct seq_file *m,
 static int binder_state_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *proc;
-	struct hlist_node *pos;
 	struct binder_node *node;
 	int do_lock = !binder_debug_no_lock;
 
@@ -3380,10 +3377,10 @@ static int binder_state_show(struct seq_file *m, void *unused)
 
 	if (!hlist_empty(&binder_dead_nodes))
 		seq_puts(m, "dead nodes:\n");
-	hlist_for_each_entry(node, pos, &binder_dead_nodes, dead_node)
+	hlist_for_each_entry(node, &binder_dead_nodes, dead_node)
 		print_binder_node(m, node);
 
-	hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
+	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc(m, proc, 1);
 	if (do_lock)
 		binder_unlock(__func__);
@@ -3393,7 +3390,6 @@ static int binder_state_show(struct seq_file *m, void *unused)
 static int binder_stats_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *proc;
-	struct hlist_node *pos;
 	int do_lock = !binder_debug_no_lock;
 
 	if (do_lock)
@@ -3403,7 +3399,7 @@ static int binder_stats_show(struct seq_file *m, void *unused)
 
 	print_binder_stats(m, "", &binder_stats);
 
-	hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
+	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc_stats(m, proc);
 	if (do_lock)
 		binder_unlock(__func__);
@@ -3413,14 +3409,13 @@ static int binder_stats_show(struct seq_file *m, void *unused)
 static int binder_transactions_show(struct seq_file *m, void *unused)
 {
 	struct binder_proc *proc;
-	struct hlist_node *pos;
 	int do_lock = !binder_debug_no_lock;
 
 	if (do_lock)
 		binder_lock(__func__);
 
 	seq_puts(m, "binder transactions:\n");
-	hlist_for_each_entry(proc, pos, &binder_procs, proc_node)
+	hlist_for_each_entry(proc, &binder_procs, proc_node)
 		print_binder_proc(m, proc, 0);
 	if (do_lock)
 		binder_unlock(__func__);
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index 6659dd36e806..113f33598b9f 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -169,7 +169,6 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id)
 {
 	struct ft_tport *tport;
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct ft_sess *sess;
 
 	rcu_read_lock();
@@ -178,7 +177,7 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id)
 		goto out;
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-	hlist_for_each_entry_rcu(sess, pos, head, hash) {
+	hlist_for_each_entry_rcu(sess, head, hash) {
 		if (sess->port_id == port_id) {
 			kref_get(&sess->kref);
 			rcu_read_unlock();
@@ -201,10 +200,9 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
 {
 	struct ft_sess *sess;
 	struct hlist_head *head;
-	struct hlist_node *pos;
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-	hlist_for_each_entry_rcu(sess, pos, head, hash)
+	hlist_for_each_entry_rcu(sess, head, hash)
 		if (sess->port_id == port_id)
 			return sess;
 
@@ -253,11 +251,10 @@ static void ft_sess_unhash(struct ft_sess *sess)
 static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct ft_sess *sess;
 
 	head = &tport->hash[ft_sess_hash(port_id)];
-	hlist_for_each_entry_rcu(sess, pos, head, hash) {
+	hlist_for_each_entry_rcu(sess, head, hash) {
 		if (sess->port_id == port_id) {
 			ft_sess_unhash(sess);
 			return sess;
@@ -273,12 +270,11 @@ static struct ft_sess *ft_sess_delete(struct ft_tport *tport, u32 port_id)
 static void ft_sess_delete_all(struct ft_tport *tport)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct ft_sess *sess;
 
 	for (head = tport->hash;
 	     head < &tport->hash[FT_SESS_HASH_SIZE]; head++) {
-		hlist_for_each_entry_rcu(sess, pos, head, hash) {
+		hlist_for_each_entry_rcu(sess, head, hash) {
 			ft_sess_unhash(sess);
 			transport_deregister_session_configfs(sess->se_sess);
 			ft_sess_put(sess);	/* release from table */
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index eb82ee53ee0b..d9a43674cb94 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -125,9 +125,8 @@ static void
 affs_fix_dcache(struct inode *inode, u32 entry_ino)
 {
 	struct dentry *dentry;
-	struct hlist_node *p;
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		if (entry_ino == (u32)(long)dentry->d_fsdata) {
 			dentry->d_fsdata = (void *)inode->i_ino;
 			break;
diff --git a/fs/aio.c b/fs/aio.c
index 064bfbe37566..3f941f2a3059 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -591,11 +591,10 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
 	struct mm_struct *mm = current->mm;
 	struct kioctx *ctx, *ret = NULL;
-	struct hlist_node *n;
 
 	rcu_read_lock();
 
-	hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
+	hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) {
 		/*
 		 * RCU protects us against accessing freed memory but
 		 * we have to be careful not to get a reference when the
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index d2a833999bcc..83f2606c76d0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -816,10 +816,9 @@ static bool
 inode_has_hashed_dentries(struct inode *inode)
 {
 	struct dentry *dentry;
-	struct hlist_node *p;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
 			spin_unlock(&inode->i_lock);
 			return true;
diff --git a/fs/dcache.c b/fs/dcache.c
index 68220dd0c135..fbfae008ba44 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -675,11 +675,10 @@ EXPORT_SYMBOL(dget_parent);
 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 {
 	struct dentry *alias, *discon_alias;
-	struct hlist_node *p;
 
 again:
 	discon_alias = NULL;
-	hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		spin_lock(&alias->d_lock);
  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 			if (IS_ROOT(alias) &&
@@ -730,10 +729,9 @@ EXPORT_SYMBOL(d_find_alias);
 void d_prune_aliases(struct inode *inode)
 {
 	struct dentry *dentry;
-	struct hlist_node *p;
 restart:
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!dentry->d_count) {
 			__dget_dlock(dentry);
@@ -1443,14 +1441,13 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
 	int len = entry->d_name.len;
 	const char *name = entry->d_name.name;
 	unsigned int hash = entry->d_name.hash;
-	struct hlist_node *p;
 
 	if (!inode) {
 		__d_instantiate(entry, NULL);
 		return NULL;
 	}
 
-	hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		/*
 		 * Don't need alias->d_lock here, because aliases with
 		 * d_parent == entry->d_parent are not subject to name or
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index dd87a31bcc21..4f5ad246582f 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -177,12 +177,11 @@ static inline int nodeid_hash(int nodeid)
 static struct connection *__find_con(int nodeid)
 {
 	int r;
-	struct hlist_node *h;
 	struct connection *con;
 
 	r = nodeid_hash(nodeid);
 
-	hlist_for_each_entry(con, h, &connection_hash[r], list) {
+	hlist_for_each_entry(con, &connection_hash[r], list) {
 		if (con->nodeid == nodeid)
 			return con;
 	}
@@ -232,13 +231,12 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
 static void foreach_conn(void (*conn_func)(struct connection *c))
 {
 	int i;
-	struct hlist_node *h, *n;
+	struct hlist_node *n;
 	struct connection *con;
 
 	for (i = 0; i < CONN_HASH_SIZE; i++) {
-		hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){
+		hlist_for_each_entry_safe(con, n, &connection_hash[i], list)
 			conn_func(con);
-		}
 	}
 }
 
@@ -257,13 +255,12 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 static struct connection *assoc2con(int assoc_id)
 {
 	int i;
-	struct hlist_node *h;
 	struct connection *con;
 
 	mutex_lock(&connections_lock);
 
 	for (i = 0 ; i < CONN_HASH_SIZE; i++) {
-		hlist_for_each_entry(con, h, &connection_hash[i], list) {
+		hlist_for_each_entry(con, &connection_hash[i], list) {
 			if (con->sctp_assoc == assoc_id) {
 				mutex_unlock(&connections_lock);
 				return con;
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 5fa2471796c2..8d7a577ae497 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -115,10 +115,9 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
  */
 int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon)
 {
-	struct hlist_node *elem;
 	int rc;
 
-	hlist_for_each_entry(*daemon, elem,
+	hlist_for_each_entry(*daemon,
 			    &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()],
 			    euid_chain) {
 		if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) {
@@ -445,7 +444,6 @@ void ecryptfs_release_messaging(void)
 		mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
 	}
 	if (ecryptfs_daemon_hash) {
-		struct hlist_node *elem;
 		struct ecryptfs_daemon *daemon;
 		int i;
 
@@ -453,7 +451,7 @@ void ecryptfs_release_messaging(void)
 		for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
 			int rc;
 
-			hlist_for_each_entry(daemon, elem,
+			hlist_for_each_entry(daemon,
 					     &ecryptfs_daemon_hash[i],
 					     euid_chain) {
 				rc = ecryptfs_exorcise_daemon(daemon);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 5df4bb4aab14..262fc9940982 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -44,14 +44,13 @@ find_acceptable_alias(struct dentry *result,
 {
 	struct dentry *dentry, *toput = NULL;
 	struct inode *inode;
-	struct hlist_node *p;
 
 	if (acceptable(context, result))
 		return result;
 
 	inode = result->d_inode;
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		dget(dentry);
 		spin_unlock(&inode->i_lock);
 		if (toput)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 780e20806346..acf6e479b443 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -341,12 +341,11 @@ struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 	struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
-	struct hlist_node *_p;
 	struct msdos_inode_info *i;
 	struct inode *inode = NULL;
 
 	spin_lock(&sbi->inode_hash_lock);
-	hlist_for_each_entry(i, _p, head, i_fat_hash) {
+	hlist_for_each_entry(i, head, i_fat_hash) {
 		BUG_ON(i->vfs_inode.i_sb != sb);
 		if (i->i_pos != i_pos)
 			continue;
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index ef4b5faba87b..499c10438ca2 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -21,13 +21,12 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart)
 {
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
 	struct hlist_head *head;
-	struct hlist_node *_p;
 	struct msdos_inode_info *i;
 	struct inode *inode = NULL;
 
 	head = sbi->dir_hashtable + fat_dir_hash(i_logstart);
 	spin_lock(&sbi->dir_hash_lock);
-	hlist_for_each_entry(i, _p, head, i_dir_hash) {
+	hlist_for_each_entry(i, head, i_dir_hash) {
 		BUG_ON(i->vfs_inode.i_sb != sb);
 		if (i->i_logstart != i_logstart)
 			continue;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 8dcb114758e3..e2cba1f60c21 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -237,13 +237,12 @@ static int fscache_alloc_object(struct fscache_cache *cache,
 				struct fscache_cookie *cookie)
 {
 	struct fscache_object *object;
-	struct hlist_node *_n;
 	int ret;
 
 	_enter("%p,%p{%s}", cache, cookie, cookie->def->name);
 
 	spin_lock(&cookie->lock);
-	hlist_for_each_entry(object, _n, &cookie->backing_objects,
+	hlist_for_each_entry(object, &cookie->backing_objects,
 			     cookie_link) {
 		if (object->cache == cache)
 			goto object_already_extant;
@@ -311,7 +310,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
 {
 	struct fscache_object *p;
 	struct fscache_cache *cache = object->cache;
-	struct hlist_node *_n;
 	int ret;
 
 	_enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id);
@@ -321,7 +319,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
 	/* there may be multiple initial creations of this object, but we only
 	 * want one */
 	ret = -EEXIST;
-	hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) {
+	hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) {
 		if (p->cache == object->cache) {
 			if (p->state >= FSCACHE_OBJECT_DYING)
 				ret = -ENOBUFS;
@@ -331,7 +329,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
 
 	/* pin the parent object */
 	spin_lock_nested(&cookie->parent->lock, 1);
-	hlist_for_each_entry(p, _n, &cookie->parent->backing_objects,
+	hlist_for_each_entry(p, &cookie->parent->backing_objects,
 			     cookie_link) {
 		if (p->cache == object->cache) {
 			if (p->state >= FSCACHE_OBJECT_DYING) {
@@ -435,7 +433,6 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate);
 void __fscache_update_cookie(struct fscache_cookie *cookie)
 {
 	struct fscache_object *object;
-	struct hlist_node *_p;
 
 	fscache_stat(&fscache_n_updates);
 
@@ -452,7 +449,7 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
 	spin_lock(&cookie->lock);
 
 	/* update the index entry on disk in each cache backing this cookie */
-	hlist_for_each_entry(object, _p,
+	hlist_for_each_entry(object,
 			     &cookie->backing_objects, cookie_link) {
 		fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
 	}
diff --git a/fs/inode.c b/fs/inode.c
index 67880e604399..f5f7c06c36fb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -798,11 +798,10 @@ static struct inode *find_inode(struct super_block *sb,
 				int (*test)(struct inode *, void *),
 				void *data)
 {
-	struct hlist_node *node;
 	struct inode *inode = NULL;
 
 repeat:
-	hlist_for_each_entry(inode, node, head, i_hash) {
+	hlist_for_each_entry(inode, head, i_hash) {
 		spin_lock(&inode->i_lock);
 		if (inode->i_sb != sb) {
 			spin_unlock(&inode->i_lock);
@@ -830,11 +829,10 @@ repeat:
 static struct inode *find_inode_fast(struct super_block *sb,
 				struct hlist_head *head, unsigned long ino)
 {
-	struct hlist_node *node;
 	struct inode *inode = NULL;
 
 repeat:
-	hlist_for_each_entry(inode, node, head, i_hash) {
+	hlist_for_each_entry(inode, head, i_hash) {
 		spin_lock(&inode->i_lock);
 		if (inode->i_ino != ino) {
 			spin_unlock(&inode->i_lock);
@@ -1132,11 +1130,10 @@ EXPORT_SYMBOL(iget_locked);
 static int test_inode_iunique(struct super_block *sb, unsigned long ino)
 {
 	struct hlist_head *b = inode_hashtable + hash(sb, ino);
-	struct hlist_node *node;
 	struct inode *inode;
 
 	spin_lock(&inode_hash_lock);
-	hlist_for_each_entry(inode, node, b, i_hash) {
+	hlist_for_each_entry(inode, b, i_hash) {
 		if (inode->i_ino == ino && inode->i_sb == sb) {
 			spin_unlock(&inode_hash_lock);
 			return 0;
@@ -1291,10 +1288,9 @@ int insert_inode_locked(struct inode *inode)
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 
 	while (1) {
-		struct hlist_node *node;
 		struct inode *old = NULL;
 		spin_lock(&inode_hash_lock);
-		hlist_for_each_entry(old, node, head, i_hash) {
+		hlist_for_each_entry(old, head, i_hash) {
 			if (old->i_ino != ino)
 				continue;
 			if (old->i_sb != sb)
@@ -1306,7 +1302,7 @@ int insert_inode_locked(struct inode *inode)
 			}
 			break;
 		}
-		if (likely(!node)) {
+		if (likely(!old)) {
 			spin_lock(&inode->i_lock);
 			inode->i_state |= I_NEW;
 			hlist_add_head(&inode->i_hash, head);
@@ -1334,11 +1330,10 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
 
 	while (1) {
-		struct hlist_node *node;
 		struct inode *old = NULL;
 
 		spin_lock(&inode_hash_lock);
-		hlist_for_each_entry(old, node, head, i_hash) {
+		hlist_for_each_entry(old, head, i_hash) {
 			if (old->i_sb != sb)
 				continue;
 			if (!test(old, data))
@@ -1350,7 +1345,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 			}
 			break;
 		}
-		if (likely(!node)) {
+		if (likely(!old)) {
 			spin_lock(&inode->i_lock);
 			inode->i_state |= I_NEW;
 			hlist_add_head(&inode->i_hash, head);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 0e17090c310f..abdd75d44dd4 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -32,15 +32,15 @@
 static struct hlist_head	nlm_server_hosts[NLM_HOST_NRHASH];
 static struct hlist_head	nlm_client_hosts[NLM_HOST_NRHASH];
 
-#define for_each_host(host, pos, chain, table) \
+#define for_each_host(host, chain, table) \
 	for ((chain) = (table); \
 	     (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
-		hlist_for_each_entry((host), (pos), (chain), h_hash)
+		hlist_for_each_entry((host), (chain), h_hash)
 
-#define for_each_host_safe(host, pos, next, chain, table) \
+#define for_each_host_safe(host, next, chain, table) \
 	for ((chain) = (table); \
 	     (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
-		hlist_for_each_entry_safe((host), (pos), (next), \
+		hlist_for_each_entry_safe((host), (next), \
 						(chain), h_hash)
 
 static unsigned long		nrhosts;
@@ -225,7 +225,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 		.net		= net,
 	};
 	struct hlist_head *chain;
-	struct hlist_node *pos;
 	struct nlm_host	*host;
 	struct nsm_handle *nsm = NULL;
 	struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -237,7 +236,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 	mutex_lock(&nlm_host_mutex);
 
 	chain = &nlm_client_hosts[nlm_hash_address(sap)];
-	hlist_for_each_entry(host, pos, chain, h_hash) {
+	hlist_for_each_entry(host, chain, h_hash) {
 		if (host->net != net)
 			continue;
 		if (!rpc_cmp_addr(nlm_addr(host), sap))
@@ -322,7 +321,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 				    const size_t hostname_len)
 {
 	struct hlist_head *chain;
-	struct hlist_node *pos;
 	struct nlm_host	*host = NULL;
 	struct nsm_handle *nsm = NULL;
 	struct sockaddr *src_sap = svc_daddr(rqstp);
@@ -350,7 +348,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 		nlm_gc_hosts(net);
 
 	chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
-	hlist_for_each_entry(host, pos, chain, h_hash) {
+	hlist_for_each_entry(host, chain, h_hash) {
 		if (host->net != net)
 			continue;
 		if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
@@ -515,10 +513,9 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
 {
 	struct nlm_host *host;
 	struct hlist_head *chain;
-	struct hlist_node *pos;
 
 	mutex_lock(&nlm_host_mutex);
-	for_each_host(host, pos, chain, cache) {
+	for_each_host(host, chain, cache) {
 		if (host->h_nsmhandle == nsm
 		    && host->h_nsmstate != info->state) {
 			host->h_nsmstate = info->state;
@@ -570,7 +567,6 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
 static void nlm_complain_hosts(struct net *net)
 {
 	struct hlist_head *chain;
-	struct hlist_node *pos;
 	struct nlm_host	*host;
 
 	if (net) {
@@ -587,7 +583,7 @@ static void nlm_complain_hosts(struct net *net)
 		dprintk("lockd: %lu hosts left:\n", nrhosts);
 	}
 
-	for_each_host(host, pos, chain, nlm_server_hosts) {
+	for_each_host(host, chain, nlm_server_hosts) {
 		if (net && host->net != net)
 			continue;
 		dprintk("       %s (cnt %d use %d exp %ld net %p)\n",
@@ -600,14 +596,13 @@ void
 nlm_shutdown_hosts_net(struct net *net)
 {
 	struct hlist_head *chain;
-	struct hlist_node *pos;
 	struct nlm_host	*host;
 
 	mutex_lock(&nlm_host_mutex);
 
 	/* First, make all hosts eligible for gc */
 	dprintk("lockd: nuking all hosts in net %p...\n", net);
-	for_each_host(host, pos, chain, nlm_server_hosts) {
+	for_each_host(host, chain, nlm_server_hosts) {
 		if (net && host->net != net)
 			continue;
 		host->h_expires = jiffies - 1;
@@ -644,11 +639,11 @@ static void
 nlm_gc_hosts(struct net *net)
 {
 	struct hlist_head *chain;
-	struct hlist_node *pos, *next;
+	struct hlist_node *next;
 	struct nlm_host	*host;
 
 	dprintk("lockd: host garbage collection for net %p\n", net);
-	for_each_host(host, pos, chain, nlm_server_hosts) {
+	for_each_host(host, chain, nlm_server_hosts) {
 		if (net && host->net != net)
 			continue;
 		host->h_inuse = 0;
@@ -657,7 +652,7 @@ nlm_gc_hosts(struct net *net)
 	/* Mark all hosts that hold locks, blocks or shares */
 	nlmsvc_mark_resources(net);
 
-	for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
+	for_each_host_safe(host, next, chain, nlm_server_hosts) {
 		if (net && host->net != net)
 			continue;
 		if (atomic_read(&host->h_count) || host->h_inuse
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index b3a24b07d981..d17bb62b06d6 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -84,7 +84,6 @@ __be32
 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 					struct nfs_fh *f)
 {
-	struct hlist_node *pos;
 	struct nlm_file	*file;
 	unsigned int	hash;
 	__be32		nfserr;
@@ -96,7 +95,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 	/* Lock file table */
 	mutex_lock(&nlm_file_mutex);
 
-	hlist_for_each_entry(file, pos, &nlm_files[hash], f_list)
+	hlist_for_each_entry(file, &nlm_files[hash], f_list)
 		if (!nfs_compare_fh(&file->f_handle, f))
 			goto found;
 
@@ -248,13 +247,13 @@ static int
 nlm_traverse_files(void *data, nlm_host_match_fn_t match,
 		int (*is_failover_file)(void *data, struct nlm_file *file))
 {
-	struct hlist_node *pos, *next;
+	struct hlist_node *next;
 	struct nlm_file	*file;
 	int i, ret = 0;
 
 	mutex_lock(&nlm_file_mutex);
 	for (i = 0; i < FILE_NRHASH; i++) {
-		hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) {
+		hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) {
 			if (is_failover_file && !is_failover_file(data, file))
 				continue;
 			file->f_count++;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index d35b62e83ea6..6da209bd9408 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
 		 long hash)
 {
 	struct nfs4_deviceid_node *d;
-	struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+	hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
 		if (d->ld == ld && d->nfs_client == clp &&
 		    !memcmp(&d->deviceid, id, sizeof(*id))) {
 			if (atomic_read(&d->ref))
@@ -248,12 +247,11 @@ static void
 _deviceid_purge_client(const struct nfs_client *clp, long hash)
 {
 	struct nfs4_deviceid_node *d;
-	struct hlist_node *n;
 	HLIST_HEAD(tmp);
 
 	spin_lock(&nfs4_deviceid_lock);
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+	hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
 		if (d->nfs_client == clp && atomic_read(&d->ref)) {
 			hlist_del_init_rcu(&d->node);
 			hlist_add_head(&d->tmpnode, &tmp);
@@ -291,12 +289,11 @@ void
 nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
 {
 	struct nfs4_deviceid_node *d;
-	struct hlist_node *n;
 	int i;
 
 	rcu_read_lock();
 	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
-		hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
+		hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node)
 			if (d->nfs_client == clp)
 				set_bit(NFS_DEVICEID_INVALID, &d->flags);
 	}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 2cbac34a55da..da3dbd0f8979 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -120,7 +120,6 @@ hash_refile(struct svc_cacherep *rp)
 int
 nfsd_cache_lookup(struct svc_rqst *rqstp)
 {
-	struct hlist_node	*hn;
 	struct hlist_head 	*rh;
 	struct svc_cacherep	*rp;
 	__be32			xid = rqstp->rq_xid;
@@ -141,7 +140,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
 	rtn = RC_DOIT;
 
 	rh = &cache_hash[request_hash(xid)];
-	hlist_for_each_entry(rp, hn, rh, c_hash) {
+	hlist_for_each_entry(rp, rh, c_hash) {
 		if (rp->c_state != RC_UNUSED &&
 		    xid == rp->c_xid && proc == rp->c_proc &&
 		    proto == rp->c_prot && vers == rp->c_vers &&
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 6baadb5a8430..4bb21d67d9b1 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,7 +52,6 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 void __fsnotify_update_child_dentry_flags(struct inode *inode)
 {
 	struct dentry *alias;
-	struct hlist_node *p;
 	int watched;
 
 	if (!S_ISDIR(inode->i_mode))
@@ -64,7 +63,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
-	hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		struct dentry *child;
 
 		/* run all of the children of the original inode and fix their
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index f31e90fc050d..74825be65b7b 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -36,12 +36,11 @@
 static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
 {
 	struct fsnotify_mark *mark;
-	struct hlist_node *pos;
 	__u32 new_mask = 0;
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list)
+	hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list)
 		new_mask |= mark->mask;
 	inode->i_fsnotify_mask = new_mask;
 }
@@ -87,11 +86,11 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
 void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
 	struct fsnotify_mark *mark, *lmark;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	LIST_HEAD(free_list);
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) {
+	hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) {
 		list_add(&mark->i.free_i_list, &free_list);
 		hlist_del_init_rcu(&mark->i.i_list);
 		fsnotify_get_mark(mark);
@@ -129,11 +128,10 @@ static struct fsnotify_mark *fsnotify_find_inode_mark_locked(
 		struct inode *inode)
 {
 	struct fsnotify_mark *mark;
-	struct hlist_node *pos;
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) {
+	hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) {
 		if (mark->group == group) {
 			fsnotify_get_mark(mark);
 			return mark;
@@ -194,8 +192,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 			    struct fsnotify_group *group, struct inode *inode,
 			    int allow_dups)
 {
-	struct fsnotify_mark *lmark;
-	struct hlist_node *node, *last = NULL;
+	struct fsnotify_mark *lmark, *last = NULL;
 	int ret = 0;
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
@@ -214,8 +211,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 	}
 
 	/* should mark be in the middle of the current list? */
-	hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) {
-		last = node;
+	hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) {
+		last = lmark;
 
 		if ((lmark->group == group) && !allow_dups) {
 			ret = -EEXIST;
@@ -235,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_after_rcu(last, &mark->i.i_list);
+	hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
 out:
 	fsnotify_recalc_inode_mask_locked(inode);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 4df58b8ea64a..68ca5a8704b5 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -33,12 +33,12 @@
 void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 {
 	struct fsnotify_mark *mark, *lmark;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	struct mount *m = real_mount(mnt);
 	LIST_HEAD(free_list);
 
 	spin_lock(&mnt->mnt_root->d_lock);
-	hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) {
+	hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) {
 		list_add(&mark->m.free_m_list, &free_list);
 		hlist_del_init_rcu(&mark->m.m_list);
 		fsnotify_get_mark(mark);
@@ -71,12 +71,11 @@ static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
 {
 	struct mount *m = real_mount(mnt);
 	struct fsnotify_mark *mark;
-	struct hlist_node *pos;
 	__u32 new_mask = 0;
 
 	assert_spin_locked(&mnt->mnt_root->d_lock);
 
-	hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list)
+	hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list)
 		new_mask |= mark->mask;
 	m->mnt_fsnotify_mask = new_mask;
 }
@@ -114,11 +113,10 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_
 {
 	struct mount *m = real_mount(mnt);
 	struct fsnotify_mark *mark;
-	struct hlist_node *pos;
 
 	assert_spin_locked(&mnt->mnt_root->d_lock);
 
-	hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) {
+	hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) {
 		if (mark->group == group) {
 			fsnotify_get_mark(mark);
 			return mark;
@@ -153,8 +151,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 			       int allow_dups)
 {
 	struct mount *m = real_mount(mnt);
-	struct fsnotify_mark *lmark;
-	struct hlist_node *node, *last = NULL;
+	struct fsnotify_mark *lmark, *last = NULL;
 	int ret = 0;
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
@@ -173,8 +170,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 	}
 
 	/* should mark be in the middle of the current list? */
-	hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) {
-		last = node;
+	hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) {
+		last = lmark;
 
 		if ((lmark->group == group) && !allow_dups) {
 			ret = -EEXIST;
@@ -194,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_after_rcu(last, &mark->m.m_list);
+	hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
 out:
 	fsnotify_recalc_vfsmount_mask_locked(mnt);
 	spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 8db4b58b2e4b..ef999729e274 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -169,11 +169,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 				      u64 parent_blkno,
 				      int skip_unhashed)
 {
-	struct hlist_node *p;
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 			trace_ocfs2_find_local_alias(dentry->d_name.len,
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 01ebfd0bdad7..eeac97bb3bfa 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2083,7 +2083,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 					      u8 dead_node, u8 new_master)
 {
 	int i;
-	struct hlist_node *hash_iter;
 	struct hlist_head *bucket;
 	struct dlm_lock_resource *res, *next;
 
@@ -2114,7 +2113,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
 	 * if necessary */
 	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
 		bucket = dlm_lockres_hash(dlm, i);
-		hlist_for_each_entry(res, hash_iter, bucket, hash_node) {
+		hlist_for_each_entry(res, bucket, hash_node) {
 			if (!(res->state & DLM_LOCK_RES_RECOVERING))
 				continue;
 
@@ -2273,7 +2272,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
 
 static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
 {
-	struct hlist_node *iter;
 	struct dlm_lock_resource *res;
 	int i;
 	struct hlist_head *bucket;
@@ -2299,7 +2297,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
 	 */
 	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
 		bucket = dlm_lockres_hash(dlm, i);
-		hlist_for_each_entry(res, iter, bucket, hash_node) {
+		hlist_for_each_entry(res, bucket, hash_node) {
  			/* always prune any $RECOVERY entries for dead nodes,
  			 * otherwise hangs can occur during later recovery */
 			if (dlm_is_recovery_lock(res->lockname.name,
diff --git a/fs/super.c b/fs/super.c
index df6c2f4c6b59..7465d4364208 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -447,14 +447,13 @@ struct super_block *sget(struct file_system_type *type,
 			void *data)
 {
 	struct super_block *s = NULL;
-	struct hlist_node *node;
 	struct super_block *old;
 	int err;
 
 retry:
 	spin_lock(&sb_lock);
 	if (test) {
-		hlist_for_each_entry(old, node, &type->fs_supers, s_instances) {
+		hlist_for_each_entry(old, &type->fs_supers, s_instances) {
 			if (!test(old, data))
 				continue;
 			if (!grab_super(old))
@@ -554,10 +553,9 @@ void iterate_supers_type(struct file_system_type *type,
 	void (*f)(struct super_block *, void *), void *arg)
 {
 	struct super_block *sb, *p = NULL;
-	struct hlist_node *node;
 
 	spin_lock(&sb_lock);
-	hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) {
+	hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 2ce9a5db6ab5..15c68f9489ae 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -461,14 +461,13 @@ const struct file_operations bin_fops = {
 void unmap_bin_file(struct sysfs_dirent *attr_sd)
 {
 	struct bin_buffer *bb;
-	struct hlist_node *tmp;
 
 	if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
 		return;
 
 	mutex_lock(&sysfs_bin_lock);
 
-	hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+	hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) {
 		struct inode *inode = file_inode(bb->file);
 
 		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 96fcbb85ff83..d1dba7ce75ae 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1442,9 +1442,8 @@ xlog_recover_find_tid(
 	xlog_tid_t		tid)
 {
 	xlog_recover_t		*trans;
-	struct hlist_node	*n;
 
-	hlist_for_each_entry(trans, n, head, r_list) {
+	hlist_for_each_entry(trans, head, r_list) {
 		if (trans->r_log_tid == tid)
 			return trans;
 	}
diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index 227c62424f3c..a9df51f5d54c 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h
@@ -115,51 +115,50 @@ static inline void hash_del_rcu(struct hlist_node *node)
  * hash_for_each - iterate over a hashtable
  * @name: hashtable to iterate
  * @bkt: integer to use as bucket loop cursor
- * @node: the &struct list_head to use as a loop cursor for each entry
  * @obj: the type * to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
  */
-#define hash_for_each(name, bkt, node, obj, member)				\
-	for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
-		hlist_for_each_entry(obj, node, &name[bkt], member)
+#define hash_for_each(name, bkt, obj, member)				\
+	for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+			(bkt)++)\
+		hlist_for_each_entry(obj, &name[bkt], member)
 
 /**
  * hash_for_each_rcu - iterate over a rcu enabled hashtable
  * @name: hashtable to iterate
  * @bkt: integer to use as bucket loop cursor
- * @node: the &struct list_head to use as a loop cursor for each entry
  * @obj: the type * to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
  */
-#define hash_for_each_rcu(name, bkt, node, obj, member)				\
-	for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
-		hlist_for_each_entry_rcu(obj, node, &name[bkt], member)
+#define hash_for_each_rcu(name, bkt, obj, member)			\
+	for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+			(bkt)++)\
+		hlist_for_each_entry_rcu(obj, &name[bkt], member)
 
 /**
  * hash_for_each_safe - iterate over a hashtable safe against removal of
  * hash entry
  * @name: hashtable to iterate
  * @bkt: integer to use as bucket loop cursor
- * @node: the &struct list_head to use as a loop cursor for each entry
  * @tmp: a &struct used for temporary storage
  * @obj: the type * to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
  */
-#define hash_for_each_safe(name, bkt, node, tmp, obj, member)			\
-	for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\
-		hlist_for_each_entry_safe(obj, node, tmp, &name[bkt], member)
+#define hash_for_each_safe(name, bkt, tmp, obj, member)			\
+	for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+			(bkt)++)\
+		hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
 
 /**
  * hash_for_each_possible - iterate over all possible objects hashing to the
  * same bucket
  * @name: hashtable to iterate
  * @obj: the type * to use as a loop cursor for each entry
- * @node: the &struct list_head to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
  * @key: the key of the objects to iterate over
  */
-#define hash_for_each_possible(name, obj, node, member, key)			\
-	hlist_for_each_entry(obj, node,	&name[hash_min(key, HASH_BITS(name))], member)
+#define hash_for_each_possible(name, obj, member, key)			\
+	hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member)
 
 /**
  * hash_for_each_possible_rcu - iterate over all possible objects hashing to the
@@ -167,25 +166,24 @@ static inline void hash_del_rcu(struct hlist_node *node)
  * in a rcu enabled hashtable
  * @name: hashtable to iterate
  * @obj: the type * to use as a loop cursor for each entry
- * @node: the &struct list_head to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
  * @key: the key of the objects to iterate over
  */
-#define hash_for_each_possible_rcu(name, obj, node, member, key)		\
-	hlist_for_each_entry_rcu(obj, node, &name[hash_min(key, HASH_BITS(name))], member)
+#define hash_for_each_possible_rcu(name, obj, member, key)		\
+	hlist_for_each_entry_rcu(obj, &name[hash_min(key, HASH_BITS(name))],\
+		member)
 
 /**
  * hash_for_each_possible_safe - iterate over all possible objects hashing to the
  * same bucket safe against removals
  * @name: hashtable to iterate
  * @obj: the type * to use as a loop cursor for each entry
- * @node: the &struct list_head to use as a loop cursor for each entry
  * @tmp: a &struct used for temporary storage
  * @member: the name of the hlist_node within the struct
  * @key: the key of the objects to iterate over
  */
-#define hash_for_each_possible_safe(name, obj, node, tmp, member, key)		\
-	hlist_for_each_entry_safe(obj, node, tmp,				\
+#define hash_for_each_possible_safe(name, obj, tmp, member, key)	\
+	hlist_for_each_entry_safe(obj, tmp,\
 		&name[hash_min(key, HASH_BITS(name))], member)
 
 
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 4648d8021244..cfd21e3d5506 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -216,11 +216,10 @@ static inline struct hlist_head *team_port_index_hash(struct team *team,
 static inline struct team_port *team_get_port_by_index(struct team *team,
 						       int port_index)
 {
-	struct hlist_node *p;
 	struct team_port *port;
 	struct hlist_head *head = team_port_index_hash(team, port_index);
 
-	hlist_for_each_entry(port, p, head, hlist)
+	hlist_for_each_entry(port, head, hlist)
 		if (port->index == port_index)
 			return port;
 	return NULL;
@@ -228,11 +227,10 @@ static inline struct team_port *team_get_port_by_index(struct team *team,
 static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
 							   int port_index)
 {
-	struct hlist_node *p;
 	struct team_port *port;
 	struct hlist_head *head = team_port_index_hash(team, port_index);
 
-	hlist_for_each_entry_rcu(port, p, head, hlist)
+	hlist_for_each_entry_rcu(port, head, hlist)
 		if (port->index == port_index)
 			return port;
 	return NULL;
diff --git a/include/linux/list.h b/include/linux/list.h
index cc6d2aa6b415..d991cc147c98 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -666,54 +666,49 @@ static inline void hlist_move_list(struct hlist_head *old,
 	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
 	     pos = n)
 
+#define hlist_entry_safe(ptr, type, member) \
+	(ptr) ? hlist_entry(ptr, type, member) : NULL
+
 /**
  * hlist_for_each_entry	- iterate over list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the hlist_node within the struct.
  */
-#define hlist_for_each_entry(tpos, pos, head, member)			 \
-	for (pos = (head)->first;					 \
-	     pos &&							 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
+#define hlist_for_each_entry(pos, head, member)				\
+	for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\
+	     pos;							\
+	     pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
 
 /**
  * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @pos:	the type * to use as a loop cursor.
  * @member:	the name of the hlist_node within the struct.
  */
-#define hlist_for_each_entry_continue(tpos, pos, member)		 \
-	for (pos = (pos)->next;						 \
-	     pos &&							 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
+#define hlist_for_each_entry_continue(pos, member)			\
+	for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\
+	     pos;							\
+	     pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
 
 /**
  * hlist_for_each_entry_from - iterate over a hlist continuing from current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @pos:	the type * to use as a loop cursor.
  * @member:	the name of the hlist_node within the struct.
  */
-#define hlist_for_each_entry_from(tpos, pos, member)			 \
-	for (; pos &&							 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = pos->next)
+#define hlist_for_each_entry_from(pos, member)				\
+	for (; pos;							\
+	     pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
 
 /**
  * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @pos:	the type * to use as a loop cursor.
  * @n:		another &struct hlist_node to use as temporary storage
  * @head:	the head for your list.
  * @member:	the name of the hlist_node within the struct.
  */
-#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
-	for (pos = (head)->first;					 \
-	     pos && ({ n = pos->next; 1; }) && 				 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-	     pos = n)
+#define hlist_for_each_entry_safe(pos, n, head, member) 		\
+	for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\
+	     pos && ({ n = pos->member.next; 1; });			\
+	     pos = hlist_entry_safe(n, typeof(*pos), member))
 
 #endif
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 2381c973d897..a089a3c447fc 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -176,9 +176,8 @@ pid_t pid_vnr(struct pid *pid);
 
 #define do_each_pid_task(pid, type, task)				\
 	do {								\
-		struct hlist_node *pos___;				\
 		if ((pid) != NULL)					\
-			hlist_for_each_entry_rcu((task), pos___,	\
+			hlist_for_each_entry_rcu((task),		\
 				&(pid)->tasks[type], pids[type].node) {
 
 			/*
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index c92dd28eaa6c..8089e35d47ac 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -445,8 +445,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 
 /**
  * hlist_for_each_entry_rcu - iterate over rcu list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the hlist_node within the struct.
  *
@@ -454,16 +453,16 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  * the _rcu list-mutation primitives such as hlist_add_head_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define hlist_for_each_entry_rcu(tpos, pos, head, member)		\
-	for (pos = rcu_dereference_raw(hlist_first_rcu(head));		\
-		pos &&							 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
-		pos = rcu_dereference_raw(hlist_next_rcu(pos)))
+#define hlist_for_each_entry_rcu(pos, head, member)			\
+	for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
+			typeof(*(pos)), member);			\
+		pos;							\
+		pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
+			&(pos)->member)), typeof(*(pos)), member))
 
 /**
  * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the hlist_node within the struct.
  *
@@ -471,35 +470,36 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
  * the _rcu list-mutation primitives such as hlist_add_head_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define hlist_for_each_entry_rcu_bh(tpos, pos, head, member)		 \
-	for (pos = rcu_dereference_bh((head)->first);			 \
-		pos &&							 \
-		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
-		pos = rcu_dereference_bh(pos->next))
+#define hlist_for_each_entry_rcu_bh(pos, head, member)			\
+	for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)),\
+			typeof(*(pos)), member);			\
+		pos;							\
+		pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(\
+			&(pos)->member)), typeof(*(pos)), member))
 
 /**
  * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @pos:	the type * to use as a loop cursor.
  * @member:	the name of the hlist_node within the struct.
  */
-#define hlist_for_each_entry_continue_rcu(tpos, pos, member)		\
-	for (pos = rcu_dereference((pos)->next);			\
-	     pos &&							\
-	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
-	     pos = rcu_dereference(pos->next))
+#define hlist_for_each_entry_continue_rcu(pos, member)			\
+	for (pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\
+			typeof(*(pos)), member);			\
+	     pos;							\
+	     pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\
+			typeof(*(pos)), member))
 
 /**
  * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point
- * @tpos:	the type * to use as a loop cursor.
- * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @pos:	the type * to use as a loop cursor.
  * @member:	the name of the hlist_node within the struct.
  */
-#define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member)		\
-	for (pos = rcu_dereference_bh((pos)->next);			\
-	     pos &&							\
-	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
-	     pos = rcu_dereference_bh(pos->next))
+#define hlist_for_each_entry_continue_rcu_bh(pos, member)		\
+	for (pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\
+			typeof(*(pos)), member);			\
+	     pos;							\
+	     pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\
+			typeof(*(pos)), member))
 
 
 #endif	/* __KERNEL__ */
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 53539acbd81a..89ed9ac5701f 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -161,8 +161,8 @@ typedef struct ax25_uid_assoc {
 	ax25_address		call;
 } ax25_uid_assoc;
 
-#define ax25_uid_for_each(__ax25, node, list) \
-	hlist_for_each_entry(__ax25, node, list, uid_node)
+#define ax25_uid_for_each(__ax25, list) \
+	hlist_for_each_entry(__ax25, list, uid_node)
 
 #define ax25_uid_hold(ax25) \
 	atomic_inc(&((ax25)->refcount))
@@ -247,8 +247,8 @@ typedef struct ax25_cb {
 
 #define ax25_sk(__sk) ((ax25_cb *)(__sk)->sk_protinfo)
 
-#define ax25_for_each(__ax25, node, list) \
-	hlist_for_each_entry(__ax25, node, list, ax25_node)
+#define ax25_for_each(__ax25, list) \
+	hlist_for_each_entry(__ax25, list, ax25_node)
 
 #define ax25_cb_hold(__ax25) \
 	atomic_inc(&((__ax25)->refcount))
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 7b2ae9d37076..ef83d9e844b5 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -94,8 +94,8 @@ static inline struct net *ib_net(struct inet_bind_bucket *ib)
 	return read_pnet(&ib->ib_net);
 }
 
-#define inet_bind_bucket_for_each(tb, pos, head) \
-	hlist_for_each_entry(tb, pos, head, node)
+#define inet_bind_bucket_for_each(tb, head) \
+	hlist_for_each_entry(tb, head, node)
 
 struct inet_bind_hashbucket {
 	spinlock_t		lock;
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 7d658d577368..f908dfc06505 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -178,11 +178,11 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
 #define inet_twsk_for_each(tw, node, head) \
 	hlist_nulls_for_each_entry(tw, node, head, tw_node)
 
-#define inet_twsk_for_each_inmate(tw, node, jail) \
-	hlist_for_each_entry(tw, node, jail, tw_death_node)
+#define inet_twsk_for_each_inmate(tw, jail) \
+	hlist_for_each_entry(tw, jail, tw_death_node)
 
-#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \
-	hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
+#define inet_twsk_for_each_inmate_safe(tw, safe, jail) \
+	hlist_for_each_entry_safe(tw, safe, jail, tw_death_node)
 
 static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
 {
diff --git a/include/net/netrom.h b/include/net/netrom.h
index f0793c1cb5f8..121dcf854db5 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -154,17 +154,17 @@ static __inline__ void nr_node_unlock(struct nr_node *nr_node)
 	nr_node_put(nr_node);
 }
 
-#define nr_neigh_for_each(__nr_neigh, node, list) \
-	hlist_for_each_entry(__nr_neigh, node, list, neigh_node)
+#define nr_neigh_for_each(__nr_neigh, list) \
+	hlist_for_each_entry(__nr_neigh, list, neigh_node)
 
-#define nr_neigh_for_each_safe(__nr_neigh, node, node2, list) \
-	hlist_for_each_entry_safe(__nr_neigh, node, node2, list, neigh_node)
+#define nr_neigh_for_each_safe(__nr_neigh, node2, list) \
+	hlist_for_each_entry_safe(__nr_neigh, node2, list, neigh_node)
 
-#define nr_node_for_each(__nr_node, node, list) \
-	hlist_for_each_entry(__nr_node, node, list, node_node)
+#define nr_node_for_each(__nr_node, list) \
+	hlist_for_each_entry(__nr_node, list, node_node)
 
-#define nr_node_for_each_safe(__nr_node, node, node2, list) \
-	hlist_for_each_entry_safe(__nr_node, node, node2, list, node_node)
+#define nr_node_for_each_safe(__nr_node, node2, list) \
+	hlist_for_each_entry_safe(__nr_node, node2, list, node_node)
 
 
 /*********************************************************************/
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2761c905504e..f10818fc8804 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -339,11 +339,10 @@ static inline struct Qdisc_class_common *
 qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
 {
 	struct Qdisc_class_common *cl;
-	struct hlist_node *n;
 	unsigned int h;
 
 	h = qdisc_class_hash(id, hash->hashmask);
-	hlist_for_each_entry(cl, n, &hash->hash[h], hnode) {
+	hlist_for_each_entry(cl, &hash->hash[h], hnode) {
 		if (cl->classid == id)
 			return cl;
 	}
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 7fdf298a47ef..df85a0c0f2d5 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -675,8 +675,8 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag)
 	return h & (sctp_assoc_hashsize - 1);
 }
 
-#define sctp_for_each_hentry(epb, node, head) \
-	hlist_for_each_entry(epb, node, head, node)
+#define sctp_for_each_hentry(epb, head) \
+	hlist_for_each_entry(epb, head, node)
 
 /* Is a socket of this style? */
 #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style))
diff --git a/include/net/sock.h b/include/net/sock.h
index a66caa223d18..14f6e9d19dc7 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -606,24 +606,23 @@ static inline void sk_add_bind_node(struct sock *sk,
 	hlist_add_head(&sk->sk_bind_node, list);
 }
 
-#define sk_for_each(__sk, node, list) \
-	hlist_for_each_entry(__sk, node, list, sk_node)
-#define sk_for_each_rcu(__sk, node, list) \
-	hlist_for_each_entry_rcu(__sk, node, list, sk_node)
+#define sk_for_each(__sk, list) \
+	hlist_for_each_entry(__sk, list, sk_node)
+#define sk_for_each_rcu(__sk, list) \
+	hlist_for_each_entry_rcu(__sk, list, sk_node)
 #define sk_nulls_for_each(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
 #define sk_nulls_for_each_rcu(__sk, node, list) \
 	hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
-#define sk_for_each_from(__sk, node) \
-	if (__sk && ({ node = &(__sk)->sk_node; 1; })) \
-		hlist_for_each_entry_from(__sk, node, sk_node)
+#define sk_for_each_from(__sk) \
+	hlist_for_each_entry_from(__sk, sk_node)
 #define sk_nulls_for_each_from(__sk, node) \
 	if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \
 		hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node)
-#define sk_for_each_safe(__sk, node, tmp, list) \
-	hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node)
-#define sk_for_each_bound(__sk, node, list) \
-	hlist_for_each_entry(__sk, node, list, sk_bind_node)
+#define sk_for_each_safe(__sk, tmp, list) \
+	hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
+#define sk_for_each_bound(__sk, list) \
+	hlist_for_each_entry(__sk, list, sk_bind_node)
 
 static inline struct user_namespace *sk_user_ns(struct sock *sk)
 {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 40e0df6c2a2f..a32f9432666c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -554,7 +554,6 @@ static struct css_set *find_existing_css_set(
 {
 	int i;
 	struct cgroupfs_root *root = cgrp->root;
-	struct hlist_node *node;
 	struct css_set *cg;
 	unsigned long key;
 
@@ -577,7 +576,7 @@ static struct css_set *find_existing_css_set(
 	}
 
 	key = css_set_hash(template);
-	hash_for_each_possible(css_set_table, cg, node, hlist, key) {
+	hash_for_each_possible(css_set_table, cg, hlist, key) {
 		if (!compare_css_sets(cg, oldcg, cgrp, template))
 			continue;
 
@@ -1611,7 +1610,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		struct cgroupfs_root *existing_root;
 		const struct cred *cred;
 		int i;
-		struct hlist_node *node;
 		struct css_set *cg;
 
 		BUG_ON(sb->s_root != NULL);
@@ -1666,7 +1664,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		/* Link the top cgroup in this hierarchy into all
 		 * the css_set objects */
 		write_lock(&css_set_lock);
-		hash_for_each(css_set_table, i, node, cg, hlist)
+		hash_for_each(css_set_table, i, cg, hlist)
 			link_css_set(&tmp_cg_links, cg, root_cgrp);
 		write_unlock(&css_set_lock);
 
@@ -4493,7 +4491,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 {
 	struct cgroup_subsys_state *css;
 	int i, ret;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	struct css_set *cg;
 	unsigned long key;
 
@@ -4561,7 +4559,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	 * this is all done under the css_set_lock.
 	 */
 	write_lock(&css_set_lock);
-	hash_for_each_safe(css_set_table, i, node, tmp, cg, hlist) {
+	hash_for_each_safe(css_set_table, i, tmp, cg, hlist) {
 		/* skip entries that we already rehashed */
 		if (cg->subsys[ss->subsys_id])
 			continue;
@@ -4571,7 +4569,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 		cg->subsys[ss->subsys_id] = css;
 		/* recompute hash and restore entry */
 		key = css_set_hash(cg->subsys);
-		hash_add(css_set_table, node, key);
+		hash_add(css_set_table, &cg->hlist, key);
 	}
 	write_unlock(&css_set_lock);
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5a92cf6beff0..b0cd86501c30 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5126,7 +5126,6 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 {
 	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
 	struct perf_event *event;
-	struct hlist_node *node;
 	struct hlist_head *head;
 
 	rcu_read_lock();
@@ -5134,7 +5133,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 	if (!head)
 		goto end;
 
-	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
+	hlist_for_each_entry_rcu(event, head, hlist_entry) {
 		if (perf_swevent_match(event, type, event_id, data, regs))
 			perf_swevent_event(event, nr, data, regs);
 	}
@@ -5419,7 +5418,6 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
 {
 	struct perf_sample_data data;
 	struct perf_event *event;
-	struct hlist_node *node;
 
 	struct perf_raw_record raw = {
 		.size = entry_size,
@@ -5429,7 +5427,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
 	perf_sample_data_init(&data, addr, 0);
 	data.raw = &raw;
 
-	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
+	hlist_for_each_entry_rcu(event, head, hlist_entry) {
 		if (perf_tp_event_match(event, &data, regs))
 			perf_swevent_event(event, count, &data, regs);
 	}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 550294d58a02..e35be53f6613 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -334,11 +334,10 @@ static inline void reset_kprobe_instance(void)
 struct kprobe __kprobes *get_kprobe(void *addr)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct kprobe *p;
 
 	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
-	hlist_for_each_entry_rcu(p, node, head, hlist) {
+	hlist_for_each_entry_rcu(p, head, hlist) {
 		if (p->addr == addr)
 			return p;
 	}
@@ -799,7 +798,6 @@ out:
 static void __kprobes optimize_all_kprobes(void)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct kprobe *p;
 	unsigned int i;
 
@@ -810,7 +808,7 @@ static void __kprobes optimize_all_kprobes(void)
 	kprobes_allow_optimization = true;
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
-		hlist_for_each_entry_rcu(p, node, head, hlist)
+		hlist_for_each_entry_rcu(p, head, hlist)
 			if (!kprobe_disabled(p))
 				optimize_kprobe(p);
 	}
@@ -821,7 +819,6 @@ static void __kprobes optimize_all_kprobes(void)
 static void __kprobes unoptimize_all_kprobes(void)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct kprobe *p;
 	unsigned int i;
 
@@ -832,7 +829,7 @@ static void __kprobes unoptimize_all_kprobes(void)
 	kprobes_allow_optimization = false;
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
-		hlist_for_each_entry_rcu(p, node, head, hlist) {
+		hlist_for_each_entry_rcu(p, head, hlist) {
 			if (!kprobe_disabled(p))
 				unoptimize_kprobe(p, false);
 		}
@@ -1148,7 +1145,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 {
 	struct kretprobe_instance *ri;
 	struct hlist_head *head, empty_rp;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	unsigned long hash, flags = 0;
 
 	if (unlikely(!kprobes_initialized))
@@ -1159,12 +1156,12 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 	hash = hash_ptr(tk, KPROBE_HASH_BITS);
 	head = &kretprobe_inst_table[hash];
 	kretprobe_table_lock(hash, &flags);
-	hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
 		if (ri->task == tk)
 			recycle_rp_inst(ri, &empty_rp);
 	}
 	kretprobe_table_unlock(hash, &flags);
-	hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
@@ -1173,9 +1170,9 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
 static inline void free_rp_inst(struct kretprobe *rp)
 {
 	struct kretprobe_instance *ri;
-	struct hlist_node *pos, *next;
+	struct hlist_node *next;
 
-	hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
+	hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
@@ -1185,14 +1182,14 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
 {
 	unsigned long flags, hash;
 	struct kretprobe_instance *ri;
-	struct hlist_node *pos, *next;
+	struct hlist_node *next;
 	struct hlist_head *head;
 
 	/* No race here */
 	for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
 		kretprobe_table_lock(hash, &flags);
 		head = &kretprobe_inst_table[hash];
-		hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
+		hlist_for_each_entry_safe(ri, next, head, hlist) {
 			if (ri->rp == rp)
 				ri->rp = NULL;
 		}
@@ -2028,7 +2025,6 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb,
 {
 	struct module *mod = data;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct kprobe *p;
 	unsigned int i;
 	int checkcore = (val == MODULE_STATE_GOING);
@@ -2045,7 +2041,7 @@ static int __kprobes kprobes_module_callback(struct notifier_block *nb,
 	mutex_lock(&kprobe_mutex);
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
-		hlist_for_each_entry_rcu(p, node, head, hlist)
+		hlist_for_each_entry_rcu(p, head, hlist)
 			if (within_module_init((unsigned long)p->addr, mod) ||
 			    (checkcore &&
 			     within_module_core((unsigned long)p->addr, mod))) {
@@ -2192,7 +2188,6 @@ static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
 static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct kprobe *p, *kp;
 	const char *sym = NULL;
 	unsigned int i = *(loff_t *) v;
@@ -2201,7 +2196,7 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
 
 	head = &kprobe_table[i];
 	preempt_disable();
-	hlist_for_each_entry_rcu(p, node, head, hlist) {
+	hlist_for_each_entry_rcu(p, head, hlist) {
 		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
 					&offset, &modname, namebuf);
 		if (kprobe_aggrprobe(p)) {
@@ -2236,7 +2231,6 @@ static const struct file_operations debugfs_kprobes_operations = {
 static void __kprobes arm_all_kprobes(void)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct kprobe *p;
 	unsigned int i;
 
@@ -2249,7 +2243,7 @@ static void __kprobes arm_all_kprobes(void)
 	/* Arming kprobes doesn't optimize kprobe itself */
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
-		hlist_for_each_entry_rcu(p, node, head, hlist)
+		hlist_for_each_entry_rcu(p, head, hlist)
 			if (!kprobe_disabled(p))
 				arm_kprobe(p);
 	}
@@ -2265,7 +2259,6 @@ already_enabled:
 static void __kprobes disarm_all_kprobes(void)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct kprobe *p;
 	unsigned int i;
 
@@ -2282,7 +2275,7 @@ static void __kprobes disarm_all_kprobes(void)
 
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
-		hlist_for_each_entry_rcu(p, node, head, hlist) {
+		hlist_for_each_entry_rcu(p, head, hlist) {
 			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
 				disarm_kprobe(p, false);
 		}
diff --git a/kernel/pid.c b/kernel/pid.c
index f2c6a6825098..047dc6264638 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -350,10 +350,9 @@ void disable_pid_allocation(struct pid_namespace *ns)
 
 struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
 {
-	struct hlist_node *elem;
 	struct upid *pnr;
 
-	hlist_for_each_entry_rcu(pnr, elem,
+	hlist_for_each_entry_rcu(pnr,
 			&pid_hash[pid_hashfn(nr, ns)], pid_chain)
 		if (pnr->nr == nr && pnr->ns == ns)
 			return container_of(pnr, struct pid,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b5243176aba..12af4270c9c1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1752,9 +1752,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
 static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
 {
 	struct preempt_notifier *notifier;
-	struct hlist_node *node;
 
-	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
 		notifier->ops->sched_in(notifier, raw_smp_processor_id());
 }
 
@@ -1763,9 +1762,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
 				 struct task_struct *next)
 {
 	struct preempt_notifier *notifier;
-	struct hlist_node *node;
 
-	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
 		notifier->ops->sched_out(notifier, next);
 }
 
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index d4abac261779..b9bde5727829 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -131,7 +131,7 @@ static int smpboot_thread_fn(void *data)
 			continue;
 		}
 
-		BUG_ON(td->cpu != smp_processor_id());
+		//BUG_ON(td->cpu != smp_processor_id());
 
 		/* Check for state change setup */
 		switch (td->status) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 98ca94a41819..ab25b88aae56 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -762,7 +762,6 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
 {
 	struct ftrace_profile *rec;
 	struct hlist_head *hhd;
-	struct hlist_node *n;
 	unsigned long key;
 
 	key = hash_long(ip, ftrace_profile_bits);
@@ -771,7 +770,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
 	if (hlist_empty(hhd))
 		return NULL;
 
-	hlist_for_each_entry_rcu(rec, n, hhd, node) {
+	hlist_for_each_entry_rcu(rec, hhd, node) {
 		if (rec->ip == ip)
 			return rec;
 	}
@@ -1133,7 +1132,6 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 	unsigned long key;
 	struct ftrace_func_entry *entry;
 	struct hlist_head *hhd;
-	struct hlist_node *n;
 
 	if (ftrace_hash_empty(hash))
 		return NULL;
@@ -1145,7 +1143,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 
 	hhd = &hash->buckets[key];
 
-	hlist_for_each_entry_rcu(entry, n, hhd, hlist) {
+	hlist_for_each_entry_rcu(entry, hhd, hlist) {
 		if (entry->ip == ip)
 			return entry;
 	}
@@ -1202,7 +1200,7 @@ remove_hash_entry(struct ftrace_hash *hash,
 static void ftrace_hash_clear(struct ftrace_hash *hash)
 {
 	struct hlist_head *hhd;
-	struct hlist_node *tp, *tn;
+	struct hlist_node *tn;
 	struct ftrace_func_entry *entry;
 	int size = 1 << hash->size_bits;
 	int i;
@@ -1212,7 +1210,7 @@ static void ftrace_hash_clear(struct ftrace_hash *hash)
 
 	for (i = 0; i < size; i++) {
 		hhd = &hash->buckets[i];
-		hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist)
+		hlist_for_each_entry_safe(entry, tn, hhd, hlist)
 			free_hash_entry(hash, entry);
 	}
 	FTRACE_WARN_ON(hash->count);
@@ -1275,7 +1273,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
 {
 	struct ftrace_func_entry *entry;
 	struct ftrace_hash *new_hash;
-	struct hlist_node *tp;
 	int size;
 	int ret;
 	int i;
@@ -1290,7 +1287,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
 
 	size = 1 << hash->size_bits;
 	for (i = 0; i < size; i++) {
-		hlist_for_each_entry(entry, tp, &hash->buckets[i], hlist) {
+		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
 			ret = add_hash_entry(new_hash, entry->ip);
 			if (ret < 0)
 				goto free_hash;
@@ -1316,7 +1313,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
 		 struct ftrace_hash **dst, struct ftrace_hash *src)
 {
 	struct ftrace_func_entry *entry;
-	struct hlist_node *tp, *tn;
+	struct hlist_node *tn;
 	struct hlist_head *hhd;
 	struct ftrace_hash *old_hash;
 	struct ftrace_hash *new_hash;
@@ -1362,7 +1359,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
 	size = 1 << src->size_bits;
 	for (i = 0; i < size; i++) {
 		hhd = &src->buckets[i];
-		hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) {
+		hlist_for_each_entry_safe(entry, tn, hhd, hlist) {
 			if (bits > 0)
 				key = hash_long(entry->ip, bits);
 			else
@@ -2901,7 +2898,6 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
 {
 	struct ftrace_func_probe *entry;
 	struct hlist_head *hhd;
-	struct hlist_node *n;
 	unsigned long key;
 
 	key = hash_long(ip, FTRACE_HASH_BITS);
@@ -2917,7 +2913,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
 	 * on the hash. rcu_read_lock is too dangerous here.
 	 */
 	preempt_disable_notrace();
-	hlist_for_each_entry_rcu(entry, n, hhd, node) {
+	hlist_for_each_entry_rcu(entry, hhd, node) {
 		if (entry->ip == ip)
 			entry->ops->func(ip, parent_ip, &entry->data);
 	}
@@ -3068,7 +3064,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 				  void *data, int flags)
 {
 	struct ftrace_func_probe *entry;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	char str[KSYM_SYMBOL_LEN];
 	int type = MATCH_FULL;
 	int i, len = 0;
@@ -3091,7 +3087,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
 		struct hlist_head *hhd = &ftrace_func_hash[i];
 
-		hlist_for_each_entry_safe(entry, n, tmp, hhd, node) {
+		hlist_for_each_entry_safe(entry, tmp, hhd, node) {
 
 			/* break up if statements for readability */
 			if ((flags & PROBE_TEST_FUNC) && entry->ops != ops)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 194d79602dc7..697e88d13907 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -739,12 +739,11 @@ static int task_state_char(unsigned long state)
 struct trace_event *ftrace_find_event(int type)
 {
 	struct trace_event *event;
-	struct hlist_node *n;
 	unsigned key;
 
 	key = type & (EVENT_HASHSIZE - 1);
 
-	hlist_for_each_entry(event, n, &event_hash[key], node) {
+	hlist_for_each_entry(event, &event_hash[key], node) {
 		if (event->type == type)
 			return event;
 	}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index d96ba22dabfa..0c05a4592047 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -192,12 +192,11 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
 static struct tracepoint_entry *get_tracepoint(const char *name)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct tracepoint_entry *e;
 	u32 hash = jhash(name, strlen(name), 0);
 
 	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
-	hlist_for_each_entry(e, node, head, hlist) {
+	hlist_for_each_entry(e, head, hlist) {
 		if (!strcmp(name, e->name))
 			return e;
 	}
@@ -211,13 +210,12 @@ static struct tracepoint_entry *get_tracepoint(const char *name)
 static struct tracepoint_entry *add_tracepoint(const char *name)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct tracepoint_entry *e;
 	size_t name_len = strlen(name) + 1;
 	u32 hash = jhash(name, name_len-1, 0);
 
 	head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
-	hlist_for_each_entry(e, node, head, hlist) {
+	hlist_for_each_entry(e, head, hlist) {
 		if (!strcmp(name, e->name)) {
 			printk(KERN_NOTICE
 				"tracepoint %s busy\n", name);
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
index 1744bb80f1fb..394f70b17162 100644
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -34,11 +34,11 @@ EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
 void fire_user_return_notifiers(void)
 {
 	struct user_return_notifier *urn;
-	struct hlist_node *tmp1, *tmp2;
+	struct hlist_node *tmp2;
 	struct hlist_head *head;
 
 	head = &get_cpu_var(return_notifier_list);
-	hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
+	hlist_for_each_entry_safe(urn, tmp2, head, link)
 		urn->on_user_return(urn);
 	put_cpu_var(return_notifier_list);
 }
diff --git a/kernel/user.c b/kernel/user.c
index 57ebfd42023c..e81978e8c03b 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -105,9 +105,8 @@ static void uid_hash_remove(struct user_struct *up)
 static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent)
 {
 	struct user_struct *user;
-	struct hlist_node *h;
 
-	hlist_for_each_entry(user, h, hashent, uidhash_node) {
+	hlist_for_each_entry(user, hashent, uidhash_node) {
 		if (uid_eq(user->uid, uid)) {
 			atomic_inc(&user->__count);
 			return user;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f4feacad3812..81f2457811eb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -251,8 +251,8 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 	for ((pool) = &std_worker_pools(cpu)[0];			\
 	     (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
 
-#define for_each_busy_worker(worker, i, pos, pool)			\
-	hash_for_each(pool->busy_hash, i, pos, worker, hentry)
+#define for_each_busy_worker(worker, i, pool)				\
+	hash_for_each(pool->busy_hash, i, worker, hentry)
 
 static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 				unsigned int sw)
@@ -909,9 +909,8 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
 						 struct work_struct *work)
 {
 	struct worker *worker;
-	struct hlist_node *tmp;
 
-	hash_for_each_possible(pool->busy_hash, worker, tmp, hentry,
+	hash_for_each_possible(pool->busy_hash, worker, hentry,
 			       (unsigned long)work)
 		if (worker->current_work == work &&
 		    worker->current_func == work->func)
@@ -1626,7 +1625,6 @@ static void busy_worker_rebind_fn(struct work_struct *work)
 static void rebind_workers(struct worker_pool *pool)
 {
 	struct worker *worker, *n;
-	struct hlist_node *pos;
 	int i;
 
 	lockdep_assert_held(&pool->assoc_mutex);
@@ -1648,7 +1646,7 @@ static void rebind_workers(struct worker_pool *pool)
 	}
 
 	/* rebind busy workers */
-	for_each_busy_worker(worker, i, pos, pool) {
+	for_each_busy_worker(worker, i, pool) {
 		struct work_struct *rebind_work = &worker->rebind_work;
 		struct workqueue_struct *wq;
 
@@ -3423,7 +3421,6 @@ static void wq_unbind_fn(struct work_struct *work)
 	int cpu = smp_processor_id();
 	struct worker_pool *pool;
 	struct worker *worker;
-	struct hlist_node *pos;
 	int i;
 
 	for_each_std_worker_pool(pool, cpu) {
@@ -3442,7 +3439,7 @@ static void wq_unbind_fn(struct work_struct *work)
 		list_for_each_entry(worker, &pool->idle_list, entry)
 			worker->flags |= WORKER_UNBOUND;
 
-		for_each_busy_worker(worker, i, pos, pool)
+		for_each_busy_worker(worker, i, pool)
 			worker->flags |= WORKER_UNBOUND;
 
 		pool->flags |= POOL_DISASSOCIATED;
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index d11808ca4bc4..37061ede8b81 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -109,11 +109,10 @@ static void fill_pool(void)
  */
 static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
 {
-	struct hlist_node *node;
 	struct debug_obj *obj;
 	int cnt = 0;
 
-	hlist_for_each_entry(obj, node, &b->list, node) {
+	hlist_for_each_entry(obj, &b->list, node) {
 		cnt++;
 		if (obj->object == addr)
 			return obj;
@@ -213,7 +212,7 @@ static void free_object(struct debug_obj *obj)
 static void debug_objects_oom(void)
 {
 	struct debug_bucket *db = obj_hash;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	HLIST_HEAD(freelist);
 	struct debug_obj *obj;
 	unsigned long flags;
@@ -227,7 +226,7 @@ static void debug_objects_oom(void)
 		raw_spin_unlock_irqrestore(&db->lock, flags);
 
 		/* Now free them */
-		hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
+		hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
 			hlist_del(&obj->node);
 			free_object(obj);
 		}
@@ -658,7 +657,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr,
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
 	unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	HLIST_HEAD(freelist);
 	struct debug_obj_descr *descr;
 	enum debug_obj_state state;
@@ -678,7 +677,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 repeat:
 		cnt = 0;
 		raw_spin_lock_irqsave(&db->lock, flags);
-		hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) {
+		hlist_for_each_entry_safe(obj, tmp, &db->list, node) {
 			cnt++;
 			oaddr = (unsigned long) obj->object;
 			if (oaddr < saddr || oaddr >= eaddr)
@@ -702,7 +701,7 @@ repeat:
 		raw_spin_unlock_irqrestore(&db->lock, flags);
 
 		/* Now free them */
-		hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
+		hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
 			hlist_del(&obj->node);
 			free_object(obj);
 		}
@@ -1013,7 +1012,7 @@ void __init debug_objects_early_init(void)
 static int __init debug_objects_replace_static_objects(void)
 {
 	struct debug_bucket *db = obj_hash;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	struct debug_obj *obj, *new;
 	HLIST_HEAD(objects);
 	int i, cnt = 0;
@@ -1033,7 +1032,7 @@ static int __init debug_objects_replace_static_objects(void)
 	local_irq_disable();
 
 	/* Remove the statically allocated objects from the pool */
-	hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node)
+	hlist_for_each_entry_safe(obj, tmp, &obj_pool, node)
 		hlist_del(&obj->node);
 	/* Move the allocated objects to the pool */
 	hlist_move_list(&objects, &obj_pool);
@@ -1042,7 +1041,7 @@ static int __init debug_objects_replace_static_objects(void)
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
 		hlist_move_list(&db->list, &objects);
 
-		hlist_for_each_entry(obj, node, &objects, node) {
+		hlist_for_each_entry(obj, &objects, node) {
 			new = hlist_entry(obj_pool.first, typeof(*obj), node);
 			hlist_del(&new->node);
 			/* copy object data */
@@ -1057,7 +1056,7 @@ static int __init debug_objects_replace_static_objects(void)
 	       obj_pool_used);
 	return 0;
 free:
-	hlist_for_each_entry_safe(obj, node, tmp, &objects, node) {
+	hlist_for_each_entry_safe(obj, tmp, &objects, node) {
 		hlist_del(&obj->node);
 		kmem_cache_free(obj_cache, obj);
 	}
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index d71d89498943..8335d39d2ccd 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -262,12 +262,11 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
 static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
 		bool include_changing)
 {
-	struct hlist_node *n;
 	struct lc_element *e;
 
 	BUG_ON(!lc);
 	BUG_ON(!lc->nr_elements);
-	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
+	hlist_for_each_entry(e, lc_hash_slot(lc, enr), colision) {
 		/* "about to be changed" elements, pending transaction commit,
 		 * are hashed by their "new number". "Normal" elements have
 		 * lc_number == lc_new_number. */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bfa142e67b1c..e2f7f5aaaafb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1906,9 +1906,8 @@ static inline void free_mm_slot(struct mm_slot *mm_slot)
 static struct mm_slot *get_mm_slot(struct mm_struct *mm)
 {
 	struct mm_slot *mm_slot;
-	struct hlist_node *node;
 
-	hash_for_each_possible(mm_slots_hash, mm_slot, node, hash, (unsigned long)mm)
+	hash_for_each_possible(mm_slots_hash, mm_slot, hash, (unsigned long)mm)
 		if (mm == mm_slot->mm)
 			return mm_slot;
 
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 83dd5fbf5e60..c8d7f3110fd0 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -436,7 +436,7 @@ static int get_object(struct kmemleak_object *object)
  */
 static void free_object_rcu(struct rcu_head *rcu)
 {
-	struct hlist_node *elem, *tmp;
+	struct hlist_node *tmp;
 	struct kmemleak_scan_area *area;
 	struct kmemleak_object *object =
 		container_of(rcu, struct kmemleak_object, rcu);
@@ -445,8 +445,8 @@ static void free_object_rcu(struct rcu_head *rcu)
 	 * Once use_count is 0 (guaranteed by put_object), there is no other
 	 * code accessing this object, hence no need for locking.
 	 */
-	hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) {
-		hlist_del(elem);
+	hlist_for_each_entry_safe(area, tmp, &object->area_list, node) {
+		hlist_del(&area->node);
 		kmem_cache_free(scan_area_cache, area);
 	}
 	kmem_cache_free(object_cache, object);
@@ -1177,7 +1177,6 @@ static void scan_block(void *_start, void *_end,
 static void scan_object(struct kmemleak_object *object)
 {
 	struct kmemleak_scan_area *area;
-	struct hlist_node *elem;
 	unsigned long flags;
 
 	/*
@@ -1205,7 +1204,7 @@ static void scan_object(struct kmemleak_object *object)
 			spin_lock_irqsave(&object->lock, flags);
 		}
 	} else
-		hlist_for_each_entry(area, elem, &object->area_list, node)
+		hlist_for_each_entry(area, &object->area_list, node)
 			scan_block((void *)area->start,
 				   (void *)(area->start + area->size),
 				   object, 0);
diff --git a/mm/ksm.c b/mm/ksm.c
index ab2ba9ad3c59..85bfd4c16346 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -320,10 +320,9 @@ static inline void free_mm_slot(struct mm_slot *mm_slot)
 
 static struct mm_slot *get_mm_slot(struct mm_struct *mm)
 {
-	struct hlist_node *node;
 	struct mm_slot *slot;
 
-	hash_for_each_possible(mm_slots_hash, slot, node, link, (unsigned long)mm)
+	hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
 		if (slot->mm == mm)
 			return slot;
 
@@ -496,9 +495,8 @@ static inline int get_kpfn_nid(unsigned long kpfn)
 static void remove_node_from_stable_tree(struct stable_node *stable_node)
 {
 	struct rmap_item *rmap_item;
-	struct hlist_node *hlist;
 
-	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
+	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
 		if (rmap_item->hlist.next)
 			ksm_pages_sharing--;
 		else
@@ -1898,7 +1896,6 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
 {
 	struct stable_node *stable_node;
 	struct rmap_item *rmap_item;
-	struct hlist_node *hlist;
 	unsigned int mapcount = page_mapcount(page);
 	int referenced = 0;
 	int search_new_forks = 0;
@@ -1910,7 +1907,7 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
 	if (!stable_node)
 		return 0;
 again:
-	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
+	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
@@ -1952,7 +1949,6 @@ out:
 int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
 {
 	struct stable_node *stable_node;
-	struct hlist_node *hlist;
 	struct rmap_item *rmap_item;
 	int ret = SWAP_AGAIN;
 	int search_new_forks = 0;
@@ -1964,7 +1960,7 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
 	if (!stable_node)
 		return SWAP_FAIL;
 again:
-	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
+	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
@@ -2005,7 +2001,6 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
 		  struct vm_area_struct *, unsigned long, void *), void *arg)
 {
 	struct stable_node *stable_node;
-	struct hlist_node *hlist;
 	struct rmap_item *rmap_item;
 	int ret = SWAP_AGAIN;
 	int search_new_forks = 0;
@@ -2017,7 +2012,7 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
 	if (!stable_node)
 		return ret;
 again:
-	hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
+	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
 		struct anon_vma *anon_vma = rmap_item->anon_vma;
 		struct anon_vma_chain *vmac;
 		struct vm_area_struct *vma;
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 2175fb0d501c..be04122fb277 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -95,11 +95,10 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 					unsigned long address)
 {
 	struct mmu_notifier *mn;
-	struct hlist_node *n;
 	int young = 0, id;
 
 	id = srcu_read_lock(&srcu);
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->clear_flush_young)
 			young |= mn->ops->clear_flush_young(mn, mm, address);
 	}
@@ -112,11 +111,10 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
 			      unsigned long address)
 {
 	struct mmu_notifier *mn;
-	struct hlist_node *n;
 	int young = 0, id;
 
 	id = srcu_read_lock(&srcu);
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->test_young) {
 			young = mn->ops->test_young(mn, mm, address);
 			if (young)
@@ -132,11 +130,10 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
 			       pte_t pte)
 {
 	struct mmu_notifier *mn;
-	struct hlist_node *n;
 	int id;
 
 	id = srcu_read_lock(&srcu);
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->change_pte)
 			mn->ops->change_pte(mn, mm, address, pte);
 	}
@@ -147,11 +144,10 @@ void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
 	struct mmu_notifier *mn;
-	struct hlist_node *n;
 	int id;
 
 	id = srcu_read_lock(&srcu);
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->invalidate_page)
 			mn->ops->invalidate_page(mn, mm, address);
 	}
@@ -162,11 +158,10 @@ void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
 	struct mmu_notifier *mn;
-	struct hlist_node *n;
 	int id;
 
 	id = srcu_read_lock(&srcu);
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->invalidate_range_start)
 			mn->ops->invalidate_range_start(mn, mm, start, end);
 	}
@@ -178,11 +173,10 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
 	struct mmu_notifier *mn;
-	struct hlist_node *n;
 	int id;
 
 	id = srcu_read_lock(&srcu);
-	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->invalidate_range_end)
 			mn->ops->invalidate_range_end(mn, mm, start, end);
 	}
diff --git a/net/9p/error.c b/net/9p/error.c
index 2ab2de76010f..126fd0dceea2 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -221,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init);
 int p9_errstr2errno(char *errstr, int len)
 {
 	int errno;
-	struct hlist_node *p;
 	struct errormap *c;
 	int bucket;
 
 	errno = 0;
-	p = NULL;
 	c = NULL;
 	bucket = jhash(errstr, len, 0) % ERRHASHSZ;
-	hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
+	hlist_for_each_entry(c, &hash_errmap[bucket], list) {
 		if (c->namelen == len && !memcmp(c->name, errstr, len)) {
 			errno = c->val;
 			break;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index de2e950a0a7a..74dea377fe5b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -655,7 +655,7 @@ static struct p9_trans_module p9_virtio_trans = {
 	.create = p9_virtio_create,
 	.close = p9_virtio_close,
 	.request = p9_virtio_request,
-	.zc_request = p9_virtio_zc_request,
+	//.zc_request = p9_virtio_zc_request,
 	.cancel = p9_virtio_cancel,
 	/*
 	 * We leave one entry for input and one entry for response
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 33475291c9c1..4a141e3cf076 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -93,10 +93,9 @@ static struct sock *atalk_search_socket(struct sockaddr_at *to,
 					struct atalk_iface *atif)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	read_lock_bh(&atalk_sockets_lock);
-	sk_for_each(s, node, &atalk_sockets) {
+	sk_for_each(s, &atalk_sockets) {
 		struct atalk_sock *at = at_sk(s);
 
 		if (to->sat_port != at->src_port)
@@ -141,11 +140,10 @@ static struct sock *atalk_find_or_insert_socket(struct sock *sk,
 						struct sockaddr_at *sat)
 {
 	struct sock *s;
-	struct hlist_node *node;
 	struct atalk_sock *at;
 
 	write_lock_bh(&atalk_sockets_lock);
-	sk_for_each(s, node, &atalk_sockets) {
+	sk_for_each(s, &atalk_sockets) {
 		at = at_sk(s);
 
 		if (at->src_net == sat->sat_addr.s_net &&
@@ -1084,9 +1082,8 @@ static int atalk_pick_and_bind_port(struct sock *sk, struct sockaddr_at *sat)
 	     sat->sat_port < ATPORT_LAST;
 	     sat->sat_port++) {
 		struct sock *s;
-		struct hlist_node *node;
 
-		sk_for_each(s, node, &atalk_sockets) {
+		sk_for_each(s, &atalk_sockets) {
 			struct atalk_sock *at = at_sk(s);
 
 			if (at->src_net == sat->sat_addr.s_net &&
diff --git a/net/atm/common.c b/net/atm/common.c
index 806fc0a40051..7b491006eaf4 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -270,11 +270,11 @@ void atm_dev_release_vccs(struct atm_dev *dev)
 	write_lock_irq(&vcc_sklist_lock);
 	for (i = 0; i < VCC_HTABLE_SIZE; i++) {
 		struct hlist_head *head = &vcc_hash[i];
-		struct hlist_node *node, *tmp;
+		struct hlist_node *tmp;
 		struct sock *s;
 		struct atm_vcc *vcc;
 
-		sk_for_each_safe(s, node, tmp, head) {
+		sk_for_each_safe(s, tmp, head) {
 			vcc = atm_sk(s);
 			if (vcc->dev == dev) {
 				vcc_release_async(vcc, -EPIPE);
@@ -317,11 +317,10 @@ static int adjust_tp(struct atm_trafprm *tp, unsigned char aal)
 static int check_ci(const struct atm_vcc *vcc, short vpi, int vci)
 {
 	struct hlist_head *head = &vcc_hash[vci & (VCC_HTABLE_SIZE - 1)];
-	struct hlist_node *node;
 	struct sock *s;
 	struct atm_vcc *walk;
 
-	sk_for_each(s, node, head) {
+	sk_for_each(s, head) {
 		walk = atm_sk(s);
 		if (walk->dev != vcc->dev)
 			continue;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 2e3d942e77f1..f23916be18fb 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -842,7 +842,9 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
 		--*l;
 	}
 
-	hlist_for_each_entry_from(tmp, e, next) {
+	tmp = container_of(e, struct lec_arp_table, next);
+
+	hlist_for_each_entry_from(tmp, next) {
 		if (--*l < 0)
 			break;
 	}
@@ -1307,7 +1309,6 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *entry)
 static int
 lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 {
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i, remove_vcc = 1;
 
@@ -1326,7 +1327,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 		 * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT
 		 */
 		for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-			hlist_for_each_entry(entry, node,
+			hlist_for_each_entry(entry,
 					     &priv->lec_arp_tables[i], next) {
 				if (memcmp(to_remove->atm_addr,
 					   entry->atm_addr, ATM_ESA_LEN) == 0) {
@@ -1364,14 +1365,13 @@ static const char *get_status_string(unsigned char st)
 
 static void dump_arp_table(struct lec_priv *priv)
 {
-	struct hlist_node *node;
 	struct lec_arp_table *rulla;
 	char buf[256];
 	int i, j, offset;
 
 	pr_info("Dump %p:\n", priv);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(rulla, node,
+		hlist_for_each_entry(rulla,
 				     &priv->lec_arp_tables[i], next) {
 			offset = 0;
 			offset += sprintf(buf, "%d: %p\n", i, rulla);
@@ -1403,7 +1403,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->lec_no_forward))
 		pr_info("No forward\n");
-	hlist_for_each_entry(rulla, node, &priv->lec_no_forward, next) {
+	hlist_for_each_entry(rulla, &priv->lec_no_forward, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1428,7 +1428,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->lec_arp_empty_ones))
 		pr_info("Empty ones\n");
-	hlist_for_each_entry(rulla, node, &priv->lec_arp_empty_ones, next) {
+	hlist_for_each_entry(rulla, &priv->lec_arp_empty_ones, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1453,7 +1453,7 @@ static void dump_arp_table(struct lec_priv *priv)
 
 	if (!hlist_empty(&priv->mcast_fwds))
 		pr_info("Multicast Forward VCCs\n");
-	hlist_for_each_entry(rulla, node, &priv->mcast_fwds, next) {
+	hlist_for_each_entry(rulla, &priv->mcast_fwds, next) {
 		offset = 0;
 		offset += sprintf(buf + offset, "Mac: %pM", rulla->mac_addr);
 		offset += sprintf(buf + offset, " Atm:");
@@ -1487,7 +1487,7 @@ static void dump_arp_table(struct lec_priv *priv)
 static void lec_arp_destroy(struct lec_priv *priv)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -1499,7 +1499,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			lec_arp_remove(priv, entry);
 			lec_arp_put(entry);
@@ -1507,7 +1507,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 		INIT_HLIST_HEAD(&priv->lec_arp_tables[i]);
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		del_timer_sync(&entry->timer);
 		lec_arp_clear_vccs(entry);
@@ -1516,7 +1516,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 	}
 	INIT_HLIST_HEAD(&priv->lec_arp_empty_ones);
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_no_forward, next) {
 		del_timer_sync(&entry->timer);
 		lec_arp_clear_vccs(entry);
@@ -1525,7 +1525,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 	}
 	INIT_HLIST_HEAD(&priv->lec_no_forward);
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) {
+	hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) {
 		/* No timer, LANEv2 7.1.20 and 2.3.5.3 */
 		lec_arp_clear_vccs(entry);
 		hlist_del(&entry->next);
@@ -1542,14 +1542,13 @@ static void lec_arp_destroy(struct lec_priv *priv)
 static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
 					  const unsigned char *mac_addr)
 {
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct lec_arp_table *entry;
 
 	pr_debug("%pM\n", mac_addr);
 
 	head = &priv->lec_arp_tables[HASH(mac_addr[ETH_ALEN - 1])];
-	hlist_for_each_entry(entry, node, head, next) {
+	hlist_for_each_entry(entry, head, next) {
 		if (ether_addr_equal(mac_addr, entry->mac_addr))
 			return entry;
 	}
@@ -1686,7 +1685,7 @@ static void lec_arp_check_expire(struct work_struct *work)
 	unsigned long flags;
 	struct lec_priv *priv =
 		container_of(work, struct lec_priv, lec_arp_work.work);
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	unsigned long now;
 	int i;
@@ -1696,7 +1695,7 @@ static void lec_arp_check_expire(struct work_struct *work)
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (__lec_arp_check_expire(entry, now, priv)) {
 				struct sk_buff *skb;
@@ -1823,14 +1822,14 @@ lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
 		unsigned long permanent)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
 	pr_debug("\n");
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) &&
 			    (permanent ||
@@ -1855,7 +1854,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 	       unsigned int targetless_le_arp)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry, *tmp;
 	int i;
 
@@ -1870,7 +1869,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 				 * we have no entry in the cache. 7.1.30
 				 */
 	if (!hlist_empty(&priv->lec_arp_empty_ones)) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_empty_ones, next) {
 			if (memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN) == 0) {
 				hlist_del(&entry->next);
@@ -1915,7 +1914,7 @@ lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
 	memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN);
 	del_timer(&entry->timer);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(tmp, node,
+		hlist_for_each_entry(tmp,
 				     &priv->lec_arp_tables[i], next) {
 			if (entry != tmp &&
 			    !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) {
@@ -1956,7 +1955,6 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 	      void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb))
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i, found_entry = 0;
 
@@ -2026,7 +2024,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
 		 ioc_data->atm_addr[16], ioc_data->atm_addr[17],
 		 ioc_data->atm_addr[18], ioc_data->atm_addr[19]);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (memcmp
 			    (ioc_data->atm_addr, entry->atm_addr,
@@ -2103,7 +2101,6 @@ out:
 static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -2111,7 +2108,7 @@ static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
 restart:
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (entry->flush_tran_id == tran_id &&
 			    entry->status == ESI_FLUSH_PENDING) {
@@ -2140,13 +2137,12 @@ lec_set_flush_tran_id(struct lec_priv *priv,
 		      const unsigned char *atm_addr, unsigned long tran_id)
 {
 	unsigned long flags;
-	struct hlist_node *node;
 	struct lec_arp_table *entry;
 	int i;
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
-		hlist_for_each_entry(entry, node,
+		hlist_for_each_entry(entry,
 				     &priv->lec_arp_tables[i], next) {
 			if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
 				entry->flush_tran_id = tran_id;
@@ -2198,7 +2194,7 @@ out:
 static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry;
 	int i;
 
@@ -2208,7 +2204,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
 
 	for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
-		hlist_for_each_entry_safe(entry, node, next,
+		hlist_for_each_entry_safe(entry, next,
 					  &priv->lec_arp_tables[i], next) {
 			if (vcc == entry->vcc) {
 				lec_arp_remove(priv, entry);
@@ -2219,7 +2215,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		if (entry->vcc == vcc) {
 			lec_arp_clear_vccs(entry);
@@ -2229,7 +2225,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_no_forward, next) {
 		if (entry->recv_vcc == vcc) {
 			lec_arp_clear_vccs(entry);
@@ -2239,7 +2235,7 @@ static void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
 		}
 	}
 
-	hlist_for_each_entry_safe(entry, node, next, &priv->mcast_fwds, next) {
+	hlist_for_each_entry_safe(entry, next, &priv->mcast_fwds, next) {
 		if (entry->recv_vcc == vcc) {
 			lec_arp_clear_vccs(entry);
 			/* No timer, LANEv2 7.1.20 and 2.3.5.3 */
@@ -2257,13 +2253,13 @@ lec_arp_check_empties(struct lec_priv *priv,
 		      struct atm_vcc *vcc, struct sk_buff *skb)
 {
 	unsigned long flags;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	struct lec_arp_table *entry, *tmp;
 	struct lecdatahdr_8023 *hdr = (struct lecdatahdr_8023 *)skb->data;
 	unsigned char *src = hdr->h_source;
 
 	spin_lock_irqsave(&priv->lec_arp_lock, flags);
-	hlist_for_each_entry_safe(entry, node, next,
+	hlist_for_each_entry_safe(entry, next,
 				  &priv->lec_arp_empty_ones, next) {
 		if (vcc == entry->vcc) {
 			del_timer(&entry->timer);
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 86767ca908a3..4176887e72eb 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -217,7 +217,6 @@ static void purge_vcc(struct atm_vcc *vcc)
 
 static void sigd_close(struct atm_vcc *vcc)
 {
-	struct hlist_node *node;
 	struct sock *s;
 	int i;
 
@@ -231,7 +230,7 @@ static void sigd_close(struct atm_vcc *vcc)
 	for (i = 0; i < VCC_HTABLE_SIZE; ++i) {
 		struct hlist_head *head = &vcc_hash[i];
 
-		sk_for_each(s, node, head) {
+		sk_for_each(s, head) {
 			vcc = atm_sk(s);
 
 			purge_vcc(vcc);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 69a06c47b648..7b11f8bc5071 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -81,14 +81,13 @@ static void ax25_kill_by_device(struct net_device *dev)
 {
 	ax25_dev *ax25_dev;
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
 		return;
 
 	spin_lock_bh(&ax25_list_lock);
 again:
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->ax25_dev == ax25_dev) {
 			s->ax25_dev = NULL;
 			spin_unlock_bh(&ax25_list_lock);
@@ -158,10 +157,9 @@ struct sock *ax25_find_listener(ax25_address *addr, int digi,
 	struct net_device *dev, int type)
 {
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if ((s->iamdigi && !digi) || (!s->iamdigi && digi))
 			continue;
 		if (s->sk && !ax25cmp(&s->source_addr, addr) &&
@@ -187,10 +185,9 @@ struct sock *ax25_get_socket(ax25_address *my_addr, ax25_address *dest_addr,
 {
 	struct sock *sk = NULL;
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk && !ax25cmp(&s->source_addr, my_addr) &&
 		    !ax25cmp(&s->dest_addr, dest_addr) &&
 		    s->sk->sk_type == type) {
@@ -213,10 +210,9 @@ ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr,
 	ax25_digi *digi, struct net_device *dev)
 {
 	ax25_cb *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk && s->sk->sk_type != SOCK_SEQPACKET)
 			continue;
 		if (s->ax25_dev == NULL)
@@ -248,10 +244,9 @@ void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto)
 {
 	ax25_cb *s;
 	struct sk_buff *copy;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(s, node, &ax25_list) {
+	ax25_for_each(s, &ax25_list) {
 		if (s->sk != NULL && ax25cmp(&s->source_addr, addr) == 0 &&
 		    s->sk->sk_type == SOCK_RAW &&
 		    s->sk->sk_protocol == proto &&
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 5ea7fd3e2af9..e05bd57b5afd 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -39,7 +39,6 @@ void ax25_ds_nr_error_recovery(ax25_cb *ax25)
 void ax25_ds_enquiry_response(ax25_cb *ax25)
 {
 	ax25_cb *ax25o;
-	struct hlist_node *node;
 
 	/* Please note that neither DK4EG's nor DG2FEF's
 	 * DAMA spec mention the following behaviour as seen
@@ -80,7 +79,7 @@ void ax25_ds_enquiry_response(ax25_cb *ax25)
 	ax25_ds_set_timer(ax25->ax25_dev);
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25o, node, &ax25_list) {
+	ax25_for_each(ax25o, &ax25_list) {
 		if (ax25o == ax25)
 			continue;
 
@@ -159,10 +158,9 @@ static int ax25_check_dama_slave(ax25_dev *ax25_dev)
 {
 	ax25_cb *ax25;
 	int res = 0;
-	struct hlist_node *node;
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25, node, &ax25_list)
+	ax25_for_each(ax25, &ax25_list)
 		if (ax25->ax25_dev == ax25_dev && (ax25->condition & AX25_COND_DAMA_MODE) && ax25->state > AX25_STATE_1) {
 			res = 1;
 			break;
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 993c439b4f71..951cd57bb07d 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -70,7 +70,6 @@ static void ax25_ds_timeout(unsigned long arg)
 {
 	ax25_dev *ax25_dev = (struct ax25_dev *) arg;
 	ax25_cb *ax25;
-	struct hlist_node *node;
 
 	if (ax25_dev == NULL || !ax25_dev->dama.slave)
 		return;			/* Yikes! */
@@ -81,7 +80,7 @@ static void ax25_ds_timeout(unsigned long arg)
 	}
 
 	spin_lock(&ax25_list_lock);
-	ax25_for_each(ax25, node, &ax25_list) {
+	ax25_for_each(ax25, &ax25_list) {
 		if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE))
 			continue;
 
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 7d5f24b82cc8..7f16e8a931b2 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -193,10 +193,9 @@ int ax25_listen_mine(ax25_address *callsign, struct net_device *dev)
 void ax25_link_failed(ax25_cb *ax25, int reason)
 {
 	struct ax25_linkfail *lf;
-	struct hlist_node *node;
 
 	spin_lock_bh(&linkfail_lock);
-	hlist_for_each_entry(lf, node, &ax25_linkfail_list, lf_node)
+	hlist_for_each_entry(lf, &ax25_linkfail_list, lf_node)
 		lf->func(ax25, reason);
 	spin_unlock_bh(&linkfail_lock);
 }
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 957999e43ff7..71c4badbc807 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -54,10 +54,9 @@ EXPORT_SYMBOL(ax25_uid_policy);
 ax25_uid_assoc *ax25_findbyuid(kuid_t uid)
 {
 	ax25_uid_assoc *ax25_uid, *res = NULL;
-	struct hlist_node *node;
 
 	read_lock(&ax25_uid_lock);
-	ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+	ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 		if (uid_eq(ax25_uid->uid, uid)) {
 			ax25_uid_hold(ax25_uid);
 			res = ax25_uid;
@@ -74,7 +73,6 @@ EXPORT_SYMBOL(ax25_findbyuid);
 int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 {
 	ax25_uid_assoc *ax25_uid;
-	struct hlist_node *node;
 	ax25_uid_assoc *user;
 	unsigned long res;
 
@@ -82,7 +80,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 	case SIOCAX25GETUID:
 		res = -ENOENT;
 		read_lock(&ax25_uid_lock);
-		ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+		ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
 				res = from_kuid_munged(current_user_ns(), ax25_uid->uid);
 				break;
@@ -126,7 +124,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 
 		ax25_uid = NULL;
 		write_lock(&ax25_uid_lock);
-		ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+		ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 			if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0)
 				break;
 		}
@@ -212,11 +210,10 @@ const struct file_operations ax25_uid_fops = {
 void __exit ax25_uid_free(void)
 {
 	ax25_uid_assoc *ax25_uid;
-	struct hlist_node *node;
 
 	write_lock(&ax25_uid_lock);
 again:
-	ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
+	ax25_uid_for_each(ax25_uid, &ax25_uid_list) {
 		hlist_del_init(&ax25_uid->uid_node);
 		ax25_uid_put(ax25_uid);
 		goto again;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 72fe1bbf7721..a0b253ecadaf 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -487,7 +487,6 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
 	 */
 	struct batadv_forw_packet *forw_packet_aggr = NULL;
 	struct batadv_forw_packet *forw_packet_pos = NULL;
-	struct hlist_node *tmp_node;
 	struct batadv_ogm_packet *batadv_ogm_packet;
 	bool direct_link;
 	unsigned long max_aggregation_jiffies;
@@ -500,7 +499,7 @@ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
 	spin_lock_bh(&bat_priv->forw_bat_list_lock);
 	/* own packets are not to be aggregated */
 	if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
-		hlist_for_each_entry(forw_packet_pos, tmp_node,
+		hlist_for_each_entry(forw_packet_pos,
 				     &bat_priv->forw_bat_list, list) {
 			if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet,
 							bat_priv, packet_len,
@@ -655,7 +654,6 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
 	struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
 	struct batadv_neigh_node *router = NULL;
 	struct batadv_orig_node *orig_node_tmp;
-	struct hlist_node *node;
 	int if_num;
 	uint8_t sum_orig, sum_neigh;
 	uint8_t *neigh_addr;
@@ -665,7 +663,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
 		   "update_originator(): Searching and updating originator entry of received packet\n");
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+	hlist_for_each_entry_rcu(tmp_neigh_node,
 				 &orig_node->neigh_list, list) {
 		neigh_addr = tmp_neigh_node->addr;
 		if (batadv_compare_eth(neigh_addr, ethhdr->h_source) &&
@@ -801,7 +799,6 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 {
 	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
 	struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node;
-	struct hlist_node *node;
 	uint8_t total_count;
 	uint8_t orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own;
 	unsigned int neigh_rq_inv_cube, neigh_rq_max_cube;
@@ -810,7 +807,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 
 	/* find corresponding one hop neighbor */
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+	hlist_for_each_entry_rcu(tmp_neigh_node,
 				 &orig_neigh_node->neigh_list, list) {
 		if (!batadv_compare_eth(tmp_neigh_node->addr,
 					orig_neigh_node->orig))
@@ -920,7 +917,6 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
 	struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
 	struct batadv_orig_node *orig_node;
 	struct batadv_neigh_node *tmp_neigh_node;
-	struct hlist_node *node;
 	int is_duplicate = 0;
 	int32_t seq_diff;
 	int need_update = 0;
@@ -943,7 +939,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
 		goto out;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+	hlist_for_each_entry_rcu(tmp_neigh_node,
 				 &orig_node->neigh_list, list) {
 		is_duplicate |= batadv_test_bit(tmp_neigh_node->real_bits,
 						orig_node->last_real_seqno,
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 30f46526cbbd..6a4f728680ae 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -144,7 +144,6 @@ static struct batadv_bla_claim
 {
 	struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_bla_claim *claim;
 	struct batadv_bla_claim *claim_tmp = NULL;
 	int index;
@@ -156,7 +155,7 @@ static struct batadv_bla_claim
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(claim, head, hash_entry) {
 		if (!batadv_compare_claim(&claim->hash_entry, data))
 			continue;
 
@@ -185,7 +184,6 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv,
 {
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_bla_backbone_gw search_entry, *backbone_gw;
 	struct batadv_bla_backbone_gw *backbone_gw_tmp = NULL;
 	int index;
@@ -200,7 +198,7 @@ batadv_backbone_hash_find(struct batadv_priv *bat_priv,
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 		if (!batadv_compare_backbone_gw(&backbone_gw->hash_entry,
 						&search_entry))
 			continue;
@@ -221,7 +219,7 @@ static void
 batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
 {
 	struct batadv_hashtable *hash;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	struct batadv_bla_claim *claim;
 	int i;
@@ -236,13 +234,13 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(claim, node, node_tmp,
+		hlist_for_each_entry_safe(claim, node_tmp,
 					  head, hash_entry) {
 			if (claim->backbone_gw != backbone_gw)
 				continue;
 
 			batadv_claim_free_ref(claim);
-			hlist_del_rcu(node);
+			hlist_del_rcu(&claim->hash_entry);
 		}
 		spin_unlock_bh(list_lock);
 	}
@@ -460,7 +458,6 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
 				      struct batadv_hard_iface *primary_if,
 				      short vid)
 {
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
 	struct batadv_bla_claim *claim;
@@ -481,7 +478,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(claim, head, hash_entry) {
 			/* only own claims are interesting */
 			if (claim->backbone_gw != backbone_gw)
 				continue;
@@ -958,7 +955,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
 static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
 {
 	struct batadv_bla_backbone_gw *backbone_gw;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
 	spinlock_t *list_lock;	/* protects write access to the hash lists */
@@ -973,7 +970,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(backbone_gw, node, node_tmp,
+		hlist_for_each_entry_safe(backbone_gw, node_tmp,
 					  head, hash_entry) {
 			if (now)
 				goto purge_now;
@@ -992,7 +989,7 @@ purge_now:
 
 			batadv_bla_del_backbone_claims(backbone_gw);
 
-			hlist_del_rcu(node);
+			hlist_del_rcu(&backbone_gw->hash_entry);
 			batadv_backbone_gw_free_ref(backbone_gw);
 		}
 		spin_unlock_bh(list_lock);
@@ -1013,7 +1010,6 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 				    int now)
 {
 	struct batadv_bla_claim *claim;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
 	int i;
@@ -1026,7 +1022,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(claim, head, hash_entry) {
 			if (now)
 				goto purge_now;
 			if (!batadv_compare_eth(claim->backbone_gw->orig,
@@ -1062,7 +1058,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 				    struct batadv_hard_iface *oldif)
 {
 	struct batadv_bla_backbone_gw *backbone_gw;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
 	__be16 group;
@@ -1086,7 +1081,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 			/* own orig still holds the old value. */
 			if (!batadv_compare_eth(backbone_gw->orig,
 						oldif->net_dev->dev_addr))
@@ -1112,7 +1107,6 @@ static void batadv_bla_periodic_work(struct work_struct *work)
 	struct delayed_work *delayed_work;
 	struct batadv_priv *bat_priv;
 	struct batadv_priv_bla *priv_bla;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_hashtable *hash;
@@ -1140,7 +1134,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 			if (!batadv_compare_eth(backbone_gw->orig,
 						primary_if->net_dev->dev_addr))
 				continue;
@@ -1322,7 +1316,6 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
 {
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_bla_backbone_gw *backbone_gw;
 	int i;
 
@@ -1336,7 +1329,7 @@ int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 			if (batadv_compare_eth(backbone_gw->orig, orig)) {
 				rcu_read_unlock();
 				return 1;
@@ -1607,7 +1600,6 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
 	struct batadv_bla_claim *claim;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 	bool is_own;
@@ -1628,7 +1620,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(claim, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(claim, head, hash_entry) {
 			is_own = batadv_compare_eth(claim->backbone_gw->orig,
 						    primary_addr);
 			seq_printf(seq,	" * %pM on % 5d by %pM [%c] (%#.4x)\n",
@@ -1652,7 +1644,6 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
 	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	int secs, msecs;
 	uint32_t i;
@@ -1674,7 +1665,7 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
 			msecs = jiffies_to_msecs(jiffies -
 						 backbone_gw->lasttime);
 			secs = msecs / 1000;
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 761a59002e34..d54188a112ea 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -83,7 +83,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
 {
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	struct batadv_dat_entry *dat_entry;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	uint32_t i;
 
@@ -95,7 +95,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
 		list_lock = &bat_priv->dat.hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(dat_entry, node, node_tmp, head,
+		hlist_for_each_entry_safe(dat_entry, node_tmp, head,
 					  hash_entry) {
 			/* if an helper function has been passed as parameter,
 			 * ask it if the entry has to be purged or not
@@ -103,7 +103,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv,
 			if (to_purge && !to_purge(dat_entry))
 				continue;
 
-			hlist_del_rcu(node);
+			hlist_del_rcu(&dat_entry->hash_entry);
 			batadv_dat_entry_free_ref(dat_entry);
 		}
 		spin_unlock_bh(list_lock);
@@ -235,7 +235,6 @@ static struct batadv_dat_entry *
 batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL;
 	struct batadv_hashtable *hash = bat_priv->dat.hash;
 	uint32_t index;
@@ -247,7 +246,7 @@ batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip)
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
 		if (dat_entry->ip != ip)
 			continue;
 
@@ -465,7 +464,6 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
 	batadv_dat_addr_t max = 0, tmp_max = 0;
 	struct batadv_orig_node *orig_node, *max_orig_node = NULL;
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	int i;
 
@@ -481,7 +479,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			/* the dht space is a ring and addresses are unsigned */
 			tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr +
 				  ip_key;
@@ -686,7 +684,6 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_hashtable *hash = bat_priv->dat.hash;
 	struct batadv_dat_entry *dat_entry;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned long last_seen_jiffies;
 	int last_seen_msecs, last_seen_secs, last_seen_mins;
@@ -704,7 +701,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
 			last_seen_jiffies = jiffies - dat_entry->last_update;
 			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
 			last_seen_mins = last_seen_msecs / 60000;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 074107f2cfaa..34f99a46ec1d 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -114,7 +114,6 @@ static struct batadv_gw_node *
 batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 {
 	struct batadv_neigh_node *router;
-	struct hlist_node *node;
 	struct batadv_gw_node *gw_node, *curr_gw = NULL;
 	uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
 	uint32_t gw_divisor;
@@ -127,7 +126,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 	gw_divisor *= 64;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
 		if (gw_node->deleted)
 			continue;
 
@@ -344,7 +343,6 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 			   struct batadv_orig_node *orig_node,
 			   uint8_t new_gwflags)
 {
-	struct hlist_node *node;
 	struct batadv_gw_node *gw_node, *curr_gw;
 
 	/* Note: We don't need a NULL check here, since curr_gw never gets
@@ -355,7 +353,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
 		if (gw_node->orig_node != orig_node)
 			continue;
 
@@ -403,7 +401,7 @@ void batadv_gw_node_delete(struct batadv_priv *bat_priv,
 void batadv_gw_node_purge(struct batadv_priv *bat_priv)
 {
 	struct batadv_gw_node *gw_node, *curr_gw;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	unsigned long timeout = msecs_to_jiffies(2 * BATADV_PURGE_TIMEOUT);
 	int do_deselect = 0;
 
@@ -411,7 +409,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv)
 
 	spin_lock_bh(&bat_priv->gw.list_lock);
 
-	hlist_for_each_entry_safe(gw_node, node, node_tmp,
+	hlist_for_each_entry_safe(gw_node, node_tmp,
 				  &bat_priv->gw.list, list) {
 		if (((!gw_node->deleted) ||
 		     (time_before(jiffies, gw_node->deleted + timeout))) &&
@@ -476,7 +474,6 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hard_iface *primary_if;
 	struct batadv_gw_node *gw_node;
-	struct hlist_node *node;
 	int gw_count = 0;
 
 	primary_if = batadv_seq_print_text_primary_if_get(seq);
@@ -490,7 +487,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 		   primary_if->net_dev->dev_addr, net_dev->name);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
 		if (gw_node->deleted)
 			continue;
 
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 21fe6987733b..0488d70c8c35 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -345,9 +345,8 @@ void batadv_recv_handler_unregister(uint8_t packet_type)
 static struct batadv_algo_ops *batadv_algo_get(char *name)
 {
 	struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp;
-	struct hlist_node *node;
 
-	hlist_for_each_entry(bat_algo_ops_tmp, node, &batadv_algo_list, list) {
+	hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) {
 		if (strcmp(bat_algo_ops_tmp->name, name) != 0)
 			continue;
 
@@ -411,11 +410,10 @@ out:
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct batadv_algo_ops *bat_algo_ops;
-	struct hlist_node *node;
 
 	seq_printf(seq, "Available routing algorithms:\n");
 
-	hlist_for_each_entry(bat_algo_ops, node, &batadv_algo_list, list) {
+	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
 		seq_printf(seq, "%s\n", bat_algo_ops->name);
 	}
 
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 457ea445217c..96fb80b724dc 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -118,7 +118,7 @@ out:
 
 static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 {
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct batadv_neigh_node *neigh_node, *tmp_neigh_node;
 	struct batadv_orig_node *orig_node;
 
@@ -134,7 +134,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 	}
 
 	/* for all neighbors towards this originator ... */
-	hlist_for_each_entry_safe(neigh_node, node, node_tmp,
+	hlist_for_each_entry_safe(neigh_node, node_tmp,
 				  &orig_node->neigh_list, list) {
 		hlist_del_rcu(&neigh_node->list);
 		batadv_neigh_node_free_ref(neigh_node);
@@ -161,7 +161,7 @@ void batadv_orig_node_free_ref(struct batadv_orig_node *orig_node)
 void batadv_originator_free(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* spinlock to protect write access */
 	struct batadv_orig_node *orig_node;
@@ -179,9 +179,9 @@ void batadv_originator_free(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(orig_node, node, node_tmp,
+		hlist_for_each_entry_safe(orig_node, node_tmp,
 					  head, hash_entry) {
-			hlist_del_rcu(node);
+			hlist_del_rcu(&orig_node->hash_entry);
 			batadv_orig_node_free_ref(orig_node);
 		}
 		spin_unlock_bh(list_lock);
@@ -274,7 +274,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 			    struct batadv_orig_node *orig_node,
 			    struct batadv_neigh_node **best_neigh_node)
 {
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct batadv_neigh_node *neigh_node;
 	bool neigh_purged = false;
 	unsigned long last_seen;
@@ -285,7 +285,7 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 	spin_lock_bh(&orig_node->neigh_list_lock);
 
 	/* for all neighbors towards this originator ... */
-	hlist_for_each_entry_safe(neigh_node, node, node_tmp,
+	hlist_for_each_entry_safe(neigh_node, node_tmp,
 				  &orig_node->neigh_list, list) {
 		last_seen = neigh_node->last_seen;
 		if_incoming = neigh_node->if_incoming;
@@ -348,7 +348,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
 static void _batadv_purge_orig(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* spinlock to protect write access */
 	struct batadv_orig_node *orig_node;
@@ -363,13 +363,13 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(orig_node, node, node_tmp,
+		hlist_for_each_entry_safe(orig_node, node_tmp,
 					  head, hash_entry) {
 			if (batadv_purge_orig_node(bat_priv, orig_node)) {
 				if (orig_node->gw_flags)
 					batadv_gw_node_delete(bat_priv,
 							      orig_node);
-				hlist_del_rcu(node);
+				hlist_del_rcu(&orig_node->hash_entry);
 				batadv_orig_node_free_ref(orig_node);
 				continue;
 			}
@@ -408,7 +408,6 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node, *node_tmp;
 	struct hlist_head *head;
 	struct batadv_hard_iface *primary_if;
 	struct batadv_orig_node *orig_node;
@@ -434,7 +433,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			neigh_node = batadv_orig_node_get_router(orig_node);
 			if (!neigh_node)
 				continue;
@@ -453,7 +452,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 				   neigh_node->addr,
 				   neigh_node->if_incoming->net_dev->name);
 
-			hlist_for_each_entry_rcu(neigh_node_tmp, node_tmp,
+			hlist_for_each_entry_rcu(neigh_node_tmp,
 						 &orig_node->neigh_list, list) {
 				seq_printf(seq, " %pM (%3i)",
 					   neigh_node_tmp->addr,
@@ -511,7 +510,6 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	uint32_t i;
@@ -524,7 +522,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			spin_lock_bh(&orig_node->ogm_cnt_lock);
 			ret = batadv_orig_node_add_if(orig_node, max_if_num);
 			spin_unlock_bh(&orig_node->ogm_cnt_lock);
@@ -595,7 +593,6 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_hard_iface *hard_iface_tmp;
 	struct batadv_orig_node *orig_node;
@@ -609,7 +606,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			spin_lock_bh(&orig_node->ogm_cnt_lock);
 			ret = batadv_orig_node_del_if(orig_node, max_if_num,
 						      hard_iface->if_num);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 286bf743e76a..7df48fa7669d 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -68,7 +68,6 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_orig_node *orig_node, *orig_node_tmp = NULL;
 	int index;
 
@@ -79,7 +78,7 @@ batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data)
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 		if (!batadv_compare_eth(orig_node, data))
 			continue;
 
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 60ba03fc8390..5ee21cebbbb0 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -37,7 +37,6 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	unsigned long *word;
@@ -49,7 +48,7 @@ void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			spin_lock_bh(&orig_node->ogm_cnt_lock);
 			word_index = hard_iface->if_num * BATADV_NUM_WORDS;
 			word = &(orig_node->bcast_own[word_index]);
@@ -146,7 +145,6 @@ out:
 void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
 				  struct batadv_neigh_node *neigh_node)
 {
-	struct hlist_node *node;
 	struct batadv_neigh_node *tmp_neigh_node, *router = NULL;
 	uint8_t interference_candidate = 0;
 
@@ -169,7 +167,7 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
 	 * interface. If we do, we won't select this candidate because of
 	 * possible interference.
 	 */
-	hlist_for_each_entry_rcu(tmp_neigh_node, node,
+	hlist_for_each_entry_rcu(tmp_neigh_node,
 				 &orig_node->neigh_list, list) {
 		if (tmp_neigh_node == neigh_node)
 			continue;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 80ca65fc89a1..a67cffde37ae 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -316,7 +316,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 				 const struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_forw_packet *forw_packet;
-	struct hlist_node *tmp_node, *safe_tmp_node;
+	struct hlist_node *safe_tmp_node;
 	bool pending;
 
 	if (hard_iface)
@@ -329,7 +329,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 	/* free bcast list */
 	spin_lock_bh(&bat_priv->forw_bcast_list_lock);
-	hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
+	hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
 				  &bat_priv->forw_bcast_list, list) {
 		/* if purge_outstanding_packets() was called with an argument
 		 * we delete only packets belonging to the given interface
@@ -355,7 +355,7 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 	/* free batman packet list */
 	spin_lock_bh(&bat_priv->forw_bat_list_lock);
-	hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
+	hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
 				  &bat_priv->forw_bat_list, list) {
 		/* if purge_outstanding_packets() was called with an argument
 		 * we delete only packets belonging to the given interface
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d44672f4a349..98a66a021a60 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -56,7 +56,6 @@ static struct batadv_tt_common_entry *
 batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_common_entry *tt_common_entry_tmp = NULL;
 	uint32_t index;
@@ -68,7 +67,7 @@ batadv_tt_hash_find(struct batadv_hashtable *hash, const void *data)
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(tt_common_entry, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) {
 		if (!batadv_compare_eth(tt_common_entry, data))
 			continue;
 
@@ -257,7 +256,6 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	struct batadv_tt_local_entry *tt_local;
 	struct batadv_tt_global_entry *tt_global;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_orig_list_entry *orig_entry;
 	int hash_added;
 	bool roamed_back = false;
@@ -339,7 +337,7 @@ check_roaming:
 		/* These node are probably going to update their tt table */
 		head = &tt_global->orig_list;
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+		hlist_for_each_entry_rcu(orig_entry, head, list) {
 			batadv_send_roam_adv(bat_priv, tt_global->common.addr,
 					     orig_entry->orig_node);
 		}
@@ -470,7 +468,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_local_entry *tt_local;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 	int last_seen_secs;
@@ -494,7 +491,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common_entry, node,
+		hlist_for_each_entry_rcu(tt_common_entry,
 					 head, hash_entry) {
 			tt_local = container_of(tt_common_entry,
 						struct batadv_tt_local_entry,
@@ -605,9 +602,9 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
 {
 	struct batadv_tt_local_entry *tt_local_entry;
 	struct batadv_tt_common_entry *tt_common_entry;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 
-	hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head,
+	hlist_for_each_entry_safe(tt_common_entry, node_tmp, head,
 				  hash_entry) {
 		tt_local_entry = container_of(tt_common_entry,
 					      struct batadv_tt_local_entry,
@@ -651,7 +648,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_local_entry *tt_local;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	uint32_t i;
 
@@ -665,9 +662,9 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
+		hlist_for_each_entry_safe(tt_common_entry, node_tmp,
 					  head, hash_entry) {
-			hlist_del_rcu(node);
+			hlist_del_rcu(&tt_common_entry->hash_entry);
 			tt_local = container_of(tt_common_entry,
 						struct batadv_tt_local_entry,
 						common);
@@ -724,11 +721,10 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
 {
 	struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL;
 	const struct hlist_head *head;
-	struct hlist_node *node;
 
 	rcu_read_lock();
 	head = &entry->orig_list;
-	hlist_for_each_entry_rcu(tmp_orig_entry, node, head, list) {
+	hlist_for_each_entry_rcu(tmp_orig_entry, head, list) {
 		if (tmp_orig_entry->orig_node != orig_node)
 			continue;
 		if (!atomic_inc_not_zero(&tmp_orig_entry->refcount))
@@ -940,12 +936,11 @@ batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry)
 {
 	struct batadv_neigh_node *router = NULL;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
 	int best_tq = 0;
 
 	head = &tt_global_entry->orig_list;
-	hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
 		router = batadv_orig_node_get_router(orig_entry->orig_node);
 		if (!router)
 			continue;
@@ -973,7 +968,6 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
 			     struct seq_file *seq)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
 	struct batadv_tt_common_entry *tt_common_entry;
 	uint16_t flags;
@@ -997,7 +991,7 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
 
 	head = &tt_global_entry->orig_list;
 
-	hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
 		if (best_entry == orig_entry)
 			continue;
 
@@ -1020,7 +1014,6 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_global_entry *tt_global;
 	struct batadv_hard_iface *primary_if;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 
@@ -1039,7 +1032,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common_entry, node,
+		hlist_for_each_entry_rcu(tt_common_entry,
 					 head, hash_entry) {
 			tt_global = container_of(tt_common_entry,
 						 struct batadv_tt_global_entry,
@@ -1059,13 +1052,13 @@ static void
 batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry)
 {
 	struct hlist_head *head;
-	struct hlist_node *node, *safe;
+	struct hlist_node *safe;
 	struct batadv_tt_orig_list_entry *orig_entry;
 
 	spin_lock_bh(&tt_global_entry->list_lock);
 	head = &tt_global_entry->orig_list;
-	hlist_for_each_entry_safe(orig_entry, node, safe, head, list) {
-		hlist_del_rcu(node);
+	hlist_for_each_entry_safe(orig_entry, safe, head, list) {
+		hlist_del_rcu(&orig_entry->list);
 		batadv_tt_orig_list_entry_free_ref(orig_entry);
 	}
 	spin_unlock_bh(&tt_global_entry->list_lock);
@@ -1078,18 +1071,18 @@ batadv_tt_global_del_orig_entry(struct batadv_priv *bat_priv,
 				const char *message)
 {
 	struct hlist_head *head;
-	struct hlist_node *node, *safe;
+	struct hlist_node *safe;
 	struct batadv_tt_orig_list_entry *orig_entry;
 
 	spin_lock_bh(&tt_global_entry->list_lock);
 	head = &tt_global_entry->orig_list;
-	hlist_for_each_entry_safe(orig_entry, node, safe, head, list) {
+	hlist_for_each_entry_safe(orig_entry, safe, head, list) {
 		if (orig_entry->orig_node == orig_node) {
 			batadv_dbg(BATADV_DBG_TT, bat_priv,
 				   "Deleting %pM from global tt entry %pM: %s\n",
 				   orig_node->orig,
 				   tt_global_entry->common.addr, message);
-			hlist_del_rcu(node);
+			hlist_del_rcu(&orig_entry->list);
 			batadv_tt_orig_list_entry_free_ref(orig_entry);
 		}
 	}
@@ -1108,7 +1101,6 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
 {
 	bool last_entry = true;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_orig_list_entry *orig_entry;
 
 	/* no local entry exists, case 1:
@@ -1117,7 +1109,7 @@ batadv_tt_global_del_roaming(struct batadv_priv *bat_priv,
 
 	rcu_read_lock();
 	head = &tt_global_entry->orig_list;
-	hlist_for_each_entry_rcu(orig_entry, node, head, list) {
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
 		if (orig_entry->orig_node != orig_node) {
 			last_entry = false;
 			break;
@@ -1202,7 +1194,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 	struct batadv_tt_common_entry *tt_common_entry;
 	uint32_t i;
 	struct batadv_hashtable *hash = bat_priv->tt.global_hash;
-	struct hlist_node *node, *safe;
+	struct hlist_node *safe;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 
@@ -1214,7 +1206,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common_entry, node, safe,
+		hlist_for_each_entry_safe(tt_common_entry, safe,
 					  head, hash_entry) {
 			tt_global = container_of(tt_common_entry,
 						 struct batadv_tt_global_entry,
@@ -1227,7 +1219,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 				batadv_dbg(BATADV_DBG_TT, bat_priv,
 					   "Deleting global tt entry %pM: %s\n",
 					   tt_global->common.addr, message);
-				hlist_del_rcu(node);
+				hlist_del_rcu(&tt_common_entry->hash_entry);
 				batadv_tt_global_entry_free_ref(tt_global);
 			}
 		}
@@ -1262,7 +1254,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->tt.global_hash;
 	struct hlist_head *head;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	uint32_t i;
 	char *msg = NULL;
@@ -1274,7 +1266,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common, node, node_tmp, head,
+		hlist_for_each_entry_safe(tt_common, node_tmp, head,
 					  hash_entry) {
 			tt_global = container_of(tt_common,
 						 struct batadv_tt_global_entry,
@@ -1287,7 +1279,7 @@ static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
 				   "Deleting global tt entry (%pM): %s\n",
 				   tt_global->common.addr, msg);
 
-			hlist_del_rcu(node);
+			hlist_del_rcu(&tt_common->hash_entry);
 
 			batadv_tt_global_entry_free_ref(tt_global);
 		}
@@ -1301,7 +1293,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_global_entry *tt_global;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	uint32_t i;
 
@@ -1315,9 +1307,9 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common_entry, node, node_tmp,
+		hlist_for_each_entry_safe(tt_common_entry, node_tmp,
 					  head, hash_entry) {
-			hlist_del_rcu(node);
+			hlist_del_rcu(&tt_common_entry->hash_entry);
 			tt_global = container_of(tt_common_entry,
 						 struct batadv_tt_global_entry,
 						 common);
@@ -1397,7 +1389,6 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
 	struct batadv_hashtable *hash = bat_priv->tt.global_hash;
 	struct batadv_tt_common_entry *tt_common;
 	struct batadv_tt_global_entry *tt_global;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 	int j;
@@ -1406,7 +1397,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(tt_common, head, hash_entry) {
 			tt_global = container_of(tt_common,
 						 struct batadv_tt_global_entry,
 						 common);
@@ -1449,7 +1440,6 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
 	uint16_t total = 0, total_one;
 	struct batadv_hashtable *hash = bat_priv->tt.local_hash;
 	struct batadv_tt_common_entry *tt_common;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	uint32_t i;
 	int j;
@@ -1458,7 +1448,7 @@ static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(tt_common, head, hash_entry) {
 			/* not yet committed clients have not to be taken into
 			 * account while computing the CRC
 			 */
@@ -1597,7 +1587,6 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_query_packet *tt_response;
 	struct batadv_tt_change *tt_change;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct sk_buff *skb = NULL;
 	uint16_t tt_tot, tt_count;
@@ -1627,7 +1616,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry_rcu(tt_common_entry, node,
+		hlist_for_each_entry_rcu(tt_common_entry,
 					 head, hash_entry) {
 			if (tt_count == tt_tot)
 				break;
@@ -2307,7 +2296,6 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash,
 	uint32_t i;
 	uint16_t changed_num = 0;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_tt_common_entry *tt_common_entry;
 
 	if (!hash)
@@ -2317,7 +2305,7 @@ static uint16_t batadv_tt_set_flags(struct batadv_hashtable *hash,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common_entry, node,
+		hlist_for_each_entry_rcu(tt_common_entry,
 					 head, hash_entry) {
 			if (enable) {
 				if ((tt_common_entry->flags & flags) == flags)
@@ -2342,7 +2330,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 	struct batadv_hashtable *hash = bat_priv->tt.local_hash;
 	struct batadv_tt_common_entry *tt_common;
 	struct batadv_tt_local_entry *tt_local;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	uint32_t i;
@@ -2355,7 +2343,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		hlist_for_each_entry_safe(tt_common, node, node_tmp, head,
+		hlist_for_each_entry_safe(tt_common, node_tmp, head,
 					  hash_entry) {
 			if (!(tt_common->flags & BATADV_TT_CLIENT_PENDING))
 				continue;
@@ -2365,7 +2353,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 				   tt_common->addr);
 
 			atomic_dec(&bat_priv->tt.local_entry_num);
-			hlist_del_rcu(node);
+			hlist_del_rcu(&tt_common->hash_entry);
 			tt_local = container_of(tt_common,
 						struct batadv_tt_local_entry,
 						common);
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 22d2785177d1..c053244b97bd 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -97,7 +97,6 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data)
 {
 	struct batadv_hashtable *hash = bat_priv->vis.hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct batadv_vis_info *vis_info, *vis_info_tmp = NULL;
 	uint32_t index;
 
@@ -108,8 +107,8 @@ batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data)
 	head = &hash->table[index];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) {
-		if (!batadv_vis_info_cmp(node, data))
+	hlist_for_each_entry_rcu(vis_info, head, hash_entry) {
+		if (!batadv_vis_info_cmp(&vis_info->hash_entry, data))
 			continue;
 
 		vis_info_tmp = vis_info;
@@ -128,9 +127,8 @@ static void batadv_vis_data_insert_interface(const uint8_t *interface,
 					     bool primary)
 {
 	struct batadv_vis_if_list_entry *entry;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(entry, pos, if_list, list) {
+	hlist_for_each_entry(entry, if_list, list) {
 		if (batadv_compare_eth(entry->addr, interface))
 			return;
 	}
@@ -148,9 +146,8 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq,
 					  const struct hlist_head *if_list)
 {
 	struct batadv_vis_if_list_entry *entry;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(entry, pos, if_list, list) {
+	hlist_for_each_entry(entry, if_list, list) {
 		if (entry->primary)
 			seq_printf(seq, "PRIMARY, ");
 		else
@@ -198,9 +195,8 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
 {
 	int i;
 	struct batadv_vis_if_list_entry *entry;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(entry, pos, list, list) {
+	hlist_for_each_entry(entry, list, list) {
 		seq_printf(seq, "%pM,", entry->addr);
 
 		for (i = 0; i < packet->entries; i++)
@@ -218,17 +214,16 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
 static void batadv_vis_seq_print_text_bucket(struct seq_file *seq,
 					     const struct hlist_head *head)
 {
-	struct hlist_node *node;
 	struct batadv_vis_info *info;
 	struct batadv_vis_packet *packet;
 	uint8_t *entries_pos;
 	struct batadv_vis_info_entry *entries;
 	struct batadv_vis_if_list_entry *entry;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 
 	HLIST_HEAD(vis_if_list);
 
-	hlist_for_each_entry_rcu(info, node, head, hash_entry) {
+	hlist_for_each_entry_rcu(info, head, hash_entry) {
 		packet = (struct batadv_vis_packet *)info->skb_packet->data;
 		entries_pos = (uint8_t *)packet + sizeof(*packet);
 		entries = (struct batadv_vis_info_entry *)entries_pos;
@@ -240,7 +235,7 @@ static void batadv_vis_seq_print_text_bucket(struct seq_file *seq,
 		batadv_vis_data_read_entries(seq, &vis_if_list, packet,
 					     entries);
 
-		hlist_for_each_entry_safe(entry, pos, n, &vis_if_list, list) {
+		hlist_for_each_entry_safe(entry, n, &vis_if_list, list) {
 			hlist_del(&entry->list);
 			kfree(entry);
 		}
@@ -519,7 +514,6 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv,
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct batadv_neigh_node *router;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	struct batadv_vis_packet *packet;
@@ -532,7 +526,7 @@ static int batadv_find_best_vis_server(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			router = batadv_orig_node_get_router(orig_node);
 			if (!router)
 				continue;
@@ -571,7 +565,6 @@ static bool batadv_vis_packet_full(const struct batadv_vis_info *info)
 static int batadv_generate_vis_packet(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	struct batadv_neigh_node *router;
@@ -605,7 +598,7 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv)
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			router = batadv_orig_node_get_router(orig_node);
 			if (!router)
 				continue;
@@ -644,7 +637,7 @@ next:
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(tt_common_entry, node, head,
+		hlist_for_each_entry_rcu(tt_common_entry, head,
 					 hash_entry) {
 			packet_pos = skb_put(info->skb_packet, sizeof(*entry));
 			entry = (struct batadv_vis_info_entry *)packet_pos;
@@ -673,14 +666,14 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv)
 {
 	uint32_t i;
 	struct batadv_hashtable *hash = bat_priv->vis.hash;
-	struct hlist_node *node, *node_tmp;
+	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	struct batadv_vis_info *info;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
 
-		hlist_for_each_entry_safe(info, node, node_tmp,
+		hlist_for_each_entry_safe(info, node_tmp,
 					  head, hash_entry) {
 			/* never purge own data. */
 			if (info == bat_priv->vis.my_info)
@@ -688,7 +681,7 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv)
 
 			if (batadv_has_timed_out(info->first_seen,
 						 BATADV_VIS_TIMEOUT)) {
-				hlist_del(node);
+				hlist_del(&info->hash_entry);
 				batadv_send_list_del(info);
 				kref_put(&info->refcount, batadv_free_info);
 			}
@@ -700,7 +693,6 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
 					struct batadv_vis_info *info)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
 	struct batadv_vis_packet *packet;
@@ -715,7 +707,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv,
 		head = &hash->table[i];
 
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
 			/* if it's a vis server and reachable, send it. */
 			if (!(orig_node->flags & BATADV_VIS_SERVER))
 				continue;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 07f073935811..6a93614f2c49 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -70,14 +70,13 @@ static struct bt_sock_list hci_sk_list = {
 void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct sk_buff *skb_copy = NULL;
 
 	BT_DBG("hdev %p len %d", hdev, skb->len);
 
 	read_lock(&hci_sk_list.lock);
 
-	sk_for_each(sk, node, &hci_sk_list.head) {
+	sk_for_each(sk, &hci_sk_list.head) {
 		struct hci_filter *flt;
 		struct sk_buff *nskb;
 
@@ -142,13 +141,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	BT_DBG("len %d", skb->len);
 
 	read_lock(&hci_sk_list.lock);
 
-	sk_for_each(sk, node, &hci_sk_list.head) {
+	sk_for_each(sk, &hci_sk_list.head) {
 		struct sk_buff *nskb;
 
 		/* Skip the original socket */
@@ -176,7 +174,6 @@ void hci_send_to_control(struct sk_buff *skb, struct sock *skip_sk)
 void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct sk_buff *skb_copy = NULL;
 	__le16 opcode;
 
@@ -210,7 +207,7 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 
 	read_lock(&hci_sk_list.lock);
 
-	sk_for_each(sk, node, &hci_sk_list.head) {
+	sk_for_each(sk, &hci_sk_list.head) {
 		struct sk_buff *nskb;
 
 		if (sk->sk_state != BT_BOUND)
@@ -251,13 +248,12 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 static void send_monitor_event(struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	BT_DBG("len %d", skb->len);
 
 	read_lock(&hci_sk_list.lock);
 
-	sk_for_each(sk, node, &hci_sk_list.head) {
+	sk_for_each(sk, &hci_sk_list.head) {
 		struct sk_buff *nskb;
 
 		if (sk->sk_state != BT_BOUND)
@@ -393,11 +389,10 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 
 	if (event == HCI_DEV_UNREG) {
 		struct sock *sk;
-		struct hlist_node *node;
 
 		/* Detach sockets from device */
 		read_lock(&hci_sk_list.lock);
-		sk_for_each(sk, node, &hci_sk_list.head) {
+		sk_for_each(sk, &hci_sk_list.head) {
 			bh_lock_sock_nested(sk);
 			if (hci_pi(sk)->hdev == hdev) {
 				hci_pi(sk)->hdev = NULL;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ce3f6658f4b2..c23bae86263b 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -107,15 +107,14 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
 static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src)
 {
 	struct sock *sk = NULL;
-	struct hlist_node *node;
 
-	sk_for_each(sk, node, &rfcomm_sk_list.head) {
+	sk_for_each(sk, &rfcomm_sk_list.head) {
 		if (rfcomm_pi(sk)->channel == channel &&
 				!bacmp(&bt_sk(sk)->src, src))
 			break;
 	}
 
-	return node ? sk : NULL;
+	return sk ? sk : NULL;
 }
 
 /* Find socket with channel and source bdaddr.
@@ -124,11 +123,10 @@ static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src)
 static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
 {
 	struct sock *sk = NULL, *sk1 = NULL;
-	struct hlist_node *node;
 
 	read_lock(&rfcomm_sk_list.lock);
 
-	sk_for_each(sk, node, &rfcomm_sk_list.head) {
+	sk_for_each(sk, &rfcomm_sk_list.head) {
 		if (state && sk->sk_state != state)
 			continue;
 
@@ -145,7 +143,7 @@ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *
 
 	read_unlock(&rfcomm_sk_list.lock);
 
-	return node ? sk : sk1;
+	return sk ? sk : sk1;
 }
 
 static void rfcomm_sock_destruct(struct sock *sk)
@@ -970,11 +968,10 @@ done:
 static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	read_lock(&rfcomm_sk_list.lock);
 
-	sk_for_each(sk, node, &rfcomm_sk_list.head) {
+	sk_for_each(sk, &rfcomm_sk_list.head) {
 		seq_printf(f, "%pMR %pMR %d %d\n",
 			   &bt_sk(sk)->src, &bt_sk(sk)->dst,
 			   sk->sk_state, rfcomm_pi(sk)->channel);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index b5178d62064e..79d87d8d4f51 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -259,10 +259,9 @@ drop:
 /* -------- Socket interface ---------- */
 static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba)
 {
-	struct hlist_node *node;
 	struct sock *sk;
 
-	sk_for_each(sk, node, &sco_sk_list.head) {
+	sk_for_each(sk, &sco_sk_list.head) {
 		if (sk->sk_state != BT_LISTEN)
 			continue;
 
@@ -279,11 +278,10 @@ static struct sock *__sco_get_sock_listen_by_addr(bdaddr_t *ba)
 static struct sock *sco_get_sock_listen(bdaddr_t *src)
 {
 	struct sock *sk = NULL, *sk1 = NULL;
-	struct hlist_node *node;
 
 	read_lock(&sco_sk_list.lock);
 
-	sk_for_each(sk, node, &sco_sk_list.head) {
+	sk_for_each(sk, &sco_sk_list.head) {
 		if (sk->sk_state != BT_LISTEN)
 			continue;
 
@@ -298,7 +296,7 @@ static struct sock *sco_get_sock_listen(bdaddr_t *src)
 
 	read_unlock(&sco_sk_list.lock);
 
-	return node ? sk : sk1;
+	return sk ? sk : sk1;
 }
 
 static void sco_sock_destruct(struct sock *sk)
@@ -951,14 +949,13 @@ static void sco_conn_ready(struct sco_conn *conn)
 int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	int lm = 0;
 
 	BT_DBG("hdev %s, bdaddr %pMR", hdev->name, bdaddr);
 
 	/* Find listening sockets */
 	read_lock(&sco_sk_list.lock);
-	sk_for_each(sk, node, &sco_sk_list.head) {
+	sk_for_each(sk, &sco_sk_list.head) {
 		if (sk->sk_state != BT_LISTEN)
 			continue;
 
@@ -1018,11 +1015,10 @@ drop:
 static int sco_debugfs_show(struct seq_file *f, void *p)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	read_lock(&sco_sk_list.lock);
 
-	sk_for_each(sk, node, &sco_sk_list.head) {
+	sk_for_each(sk, &sco_sk_list.head) {
 		seq_printf(f, "%pMR %pMR %d\n", &bt_sk(sk)->src,
 			   &bt_sk(sk)->dst, sk->sk_state);
 	}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 8117900af4de..b0812c91c0f0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -181,9 +181,9 @@ void br_fdb_cleanup(unsigned long _data)
 	spin_lock(&br->hash_lock);
 	for (i = 0; i < BR_HASH_SIZE; i++) {
 		struct net_bridge_fdb_entry *f;
-		struct hlist_node *h, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+		hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
 			unsigned long this_timer;
 			if (f->is_static)
 				continue;
@@ -207,8 +207,8 @@ void br_fdb_flush(struct net_bridge *br)
 	spin_lock_bh(&br->hash_lock);
 	for (i = 0; i < BR_HASH_SIZE; i++) {
 		struct net_bridge_fdb_entry *f;
-		struct hlist_node *h, *n;
-		hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+		struct hlist_node *n;
+		hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) {
 			if (!f->is_static)
 				fdb_delete(br, f);
 		}
@@ -266,10 +266,9 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
 					  const unsigned char *addr,
 					  __u16 vid)
 {
-	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
-	hlist_for_each_entry_rcu(fdb, h,
+	hlist_for_each_entry_rcu(fdb,
 				&br->hash[br_mac_hash(addr, vid)], hlist) {
 		if (ether_addr_equal(fdb->addr.addr, addr) &&
 		    fdb->vlan_id == vid) {
@@ -315,14 +314,13 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 {
 	struct __fdb_entry *fe = buf;
 	int i, num = 0;
-	struct hlist_node *h;
 	struct net_bridge_fdb_entry *f;
 
 	memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
 
 	rcu_read_lock();
 	for (i = 0; i < BR_HASH_SIZE; i++) {
-		hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
+		hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
 			if (num >= maxnum)
 				goto out;
 
@@ -363,10 +361,9 @@ static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
 					     const unsigned char *addr,
 					     __u16 vid)
 {
-	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
-	hlist_for_each_entry(fdb, h, head, hlist) {
+	hlist_for_each_entry(fdb, head, hlist) {
 		if (ether_addr_equal(fdb->addr.addr, addr) &&
 		    fdb->vlan_id == vid)
 			return fdb;
@@ -378,10 +375,9 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
 						 const unsigned char *addr,
 						 __u16 vid)
 {
-	struct hlist_node *h;
 	struct net_bridge_fdb_entry *fdb;
 
-	hlist_for_each_entry_rcu(fdb, h, head, hlist) {
+	hlist_for_each_entry_rcu(fdb, head, hlist) {
 		if (ether_addr_equal(fdb->addr.addr, addr) &&
 		    fdb->vlan_id == vid)
 			return fdb;
@@ -593,10 +589,9 @@ int br_fdb_dump(struct sk_buff *skb,
 		goto out;
 
 	for (i = 0; i < BR_HASH_SIZE; i++) {
-		struct hlist_node *h;
 		struct net_bridge_fdb_entry *f;
 
-		hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) {
+		hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
 			if (idx < cb->args[0])
 				goto skip;
 
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 38991e03646d..9f97b850fc65 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -18,7 +18,6 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p;
-	struct hlist_node *n;
 	struct nlattr *nest;
 
 	if (!br->multicast_router || hlist_empty(&br->router_list))
@@ -28,7 +27,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 	if (nest == NULL)
 		return -EMSGSIZE;
 
-	hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
+	hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
 		if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex))
 			goto fail;
 	}
@@ -61,12 +60,11 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
 		return -EMSGSIZE;
 
 	for (i = 0; i < mdb->max; i++) {
-		struct hlist_node *h;
 		struct net_bridge_mdb_entry *mp;
 		struct net_bridge_port_group *p, **pp;
 		struct net_bridge_port *port;
 
-		hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) {
+		hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
 			if (idx < s_idx)
 				goto skip;
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 7d886b0a8b7b..10e6fce1bb62 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -86,9 +86,8 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
 	struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash)
 {
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *p;
 
-	hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+	hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
 		if (br_ip_equal(&mp->addr, dst))
 			return mp;
 	}
@@ -178,13 +177,12 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new,
 		       int elasticity)
 {
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *p;
 	int maxlen;
 	int len;
 	int i;
 
 	for (i = 0; i < old->max; i++)
-		hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver])
+		hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver])
 			hlist_add_head(&mp->hlist[new->ver],
 				       &new->mhash[br_ip_hash(new, &mp->addr)]);
 
@@ -194,7 +192,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new,
 	maxlen = 0;
 	for (i = 0; i < new->max; i++) {
 		len = 0;
-		hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver])
+		hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver])
 			len++;
 		if (len > maxlen)
 			maxlen = len;
@@ -510,14 +508,13 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 {
 	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *p;
 	unsigned int count = 0;
 	unsigned int max;
 	int elasticity;
 	int err;
 
 	mdb = rcu_dereference_protected(br->mdb, 1);
-	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
+	hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
 		count++;
 		if (unlikely(br_ip_equal(group, &mp->addr)))
 			return mp;
@@ -882,10 +879,10 @@ void br_multicast_disable_port(struct net_bridge_port *port)
 {
 	struct net_bridge *br = port->br;
 	struct net_bridge_port_group *pg;
-	struct hlist_node *p, *n;
+	struct hlist_node *n;
 
 	spin_lock(&br->multicast_lock);
-	hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist)
+	hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
 		br_multicast_del_pg(br, pg);
 
 	if (!hlist_unhashed(&port->rlist))
@@ -1025,12 +1022,12 @@ static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port)
 {
 	struct net_bridge_port *p;
-	struct hlist_node *n, *slot = NULL;
+	struct hlist_node *slot = NULL;
 
-	hlist_for_each_entry(p, n, &br->router_list, rlist) {
+	hlist_for_each_entry(p, &br->router_list, rlist) {
 		if ((unsigned long) port >= (unsigned long) p)
 			break;
-		slot = n;
+		slot = &p->rlist;
 	}
 
 	if (slot)
@@ -1653,7 +1650,7 @@ void br_multicast_stop(struct net_bridge *br)
 {
 	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
-	struct hlist_node *p, *n;
+	struct hlist_node *n;
 	u32 ver;
 	int i;
 
@@ -1670,7 +1667,7 @@ void br_multicast_stop(struct net_bridge *br)
 
 	ver = mdb->ver;
 	for (i = 0; i < mdb->max; i++) {
-		hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i],
+		hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
 					  hlist[ver]) {
 			del_timer(&mp->timer);
 			call_rcu_bh(&mp->rcu, br_multicast_free_group);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index ddac1ee2ed20..c48e5220bbac 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -516,7 +516,6 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 {
 	struct receiver *r = NULL;
 	struct hlist_head *rl;
-	struct hlist_node *next;
 	struct dev_rcv_lists *d;
 
 	if (dev && dev->type != ARPHRD_CAN)
@@ -540,7 +539,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 	 * been registered before.
 	 */
 
-	hlist_for_each_entry_rcu(r, next, rl, list) {
+	hlist_for_each_entry_rcu(r, rl, list) {
 		if (r->can_id == can_id && r->mask == mask &&
 		    r->func == func && r->data == data)
 			break;
@@ -552,7 +551,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 	 * will be NULL, while r will point to the last item of the list.
 	 */
 
-	if (!next) {
+	if (!r) {
 		printk(KERN_ERR "BUG: receive list entry not found for "
 		       "dev %s, id %03X, mask %03X\n",
 		       DNAME(dev), can_id, mask);
@@ -590,7 +589,6 @@ static inline void deliver(struct sk_buff *skb, struct receiver *r)
 static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 {
 	struct receiver *r;
-	struct hlist_node *n;
 	int matches = 0;
 	struct can_frame *cf = (struct can_frame *)skb->data;
 	canid_t can_id = cf->can_id;
@@ -600,7 +598,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 
 	if (can_id & CAN_ERR_FLAG) {
 		/* check for error message frame entries only */
-		hlist_for_each_entry_rcu(r, n, &d->rx[RX_ERR], list) {
+		hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) {
 			if (can_id & r->mask) {
 				deliver(skb, r);
 				matches++;
@@ -610,13 +608,13 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 	}
 
 	/* check for unfiltered entries */
-	hlist_for_each_entry_rcu(r, n, &d->rx[RX_ALL], list) {
+	hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) {
 		deliver(skb, r);
 		matches++;
 	}
 
 	/* check for can_id/mask entries */
-	hlist_for_each_entry_rcu(r, n, &d->rx[RX_FIL], list) {
+	hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) {
 		if ((can_id & r->mask) == r->can_id) {
 			deliver(skb, r);
 			matches++;
@@ -624,7 +622,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 	}
 
 	/* check for inverted can_id/mask entries */
-	hlist_for_each_entry_rcu(r, n, &d->rx[RX_INV], list) {
+	hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) {
 		if ((can_id & r->mask) != r->can_id) {
 			deliver(skb, r);
 			matches++;
@@ -636,7 +634,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 		return matches;
 
 	if (can_id & CAN_EFF_FLAG) {
-		hlist_for_each_entry_rcu(r, n, &d->rx[RX_EFF], list) {
+		hlist_for_each_entry_rcu(r, &d->rx[RX_EFF], list) {
 			if (r->can_id == can_id) {
 				deliver(skb, r);
 				matches++;
@@ -644,7 +642,7 @@ static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb)
 		}
 	} else {
 		can_id &= CAN_SFF_MASK;
-		hlist_for_each_entry_rcu(r, n, &d->rx_sff[can_id], list) {
+		hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) {
 			deliver(skb, r);
 			matches++;
 		}
diff --git a/net/can/gw.c b/net/can/gw.c
index c185fcd5e828..2d117dc5ebea 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -457,11 +457,11 @@ static int cgw_notifier(struct notifier_block *nb,
 	if (msg == NETDEV_UNREGISTER) {
 
 		struct cgw_job *gwj = NULL;
-		struct hlist_node *n, *nx;
+		struct hlist_node *nx;
 
 		ASSERT_RTNL();
 
-		hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+		hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
 
 			if (gwj->src.dev == dev || gwj->dst.dev == dev) {
 				hlist_del(&gwj->list);
@@ -575,12 +575,11 @@ cancel:
 static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct cgw_job *gwj = NULL;
-	struct hlist_node *n;
 	int idx = 0;
 	int s_idx = cb->args[0];
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(gwj, n, &cgw_list, list) {
+	hlist_for_each_entry_rcu(gwj, &cgw_list, list) {
 		if (idx < s_idx)
 			goto cont;
 
@@ -858,11 +857,11 @@ out:
 static void cgw_remove_all_jobs(void)
 {
 	struct cgw_job *gwj = NULL;
-	struct hlist_node *n, *nx;
+	struct hlist_node *nx;
 
 	ASSERT_RTNL();
 
-	hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+	hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
 		hlist_del(&gwj->list);
 		cgw_unregister_filter(gwj);
 		kfree(gwj);
@@ -872,7 +871,7 @@ static void cgw_remove_all_jobs(void)
 static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 {
 	struct cgw_job *gwj = NULL;
-	struct hlist_node *n, *nx;
+	struct hlist_node *nx;
 	struct rtcanmsg *r;
 	struct cf_mod mod;
 	struct can_can_gw ccgw;
@@ -907,7 +906,7 @@ static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	ASSERT_RTNL();
 
 	/* remove only the first matching entry */
-	hlist_for_each_entry_safe(gwj, n, nx, &cgw_list, list) {
+	hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
 
 		if (gwj->flags != r->flags)
 			continue;
diff --git a/net/can/proc.c b/net/can/proc.c
index 497335892146..1ab8c888f102 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -195,9 +195,8 @@ static void can_print_rcvlist(struct seq_file *m, struct hlist_head *rx_list,
 			      struct net_device *dev)
 {
 	struct receiver *r;
-	struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(r, n, rx_list, list) {
+	hlist_for_each_entry_rcu(r, rx_list, list) {
 		char *fmt = (r->can_id & CAN_EFF_FLAG)?
 			"   %-5s  %08x  %08x  %pK  %pK  %8ld  %s\n" :
 			"   %-5s     %03x    %08x  %pK  %pK  %8ld  %s\n";
diff --git a/net/core/dev.c b/net/core/dev.c
index 18d8b5acc343..a06a7a58dd11 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -658,11 +658,10 @@ __setup("netdev=", netdev_boot_setup);
 
 struct net_device *__dev_get_by_name(struct net *net, const char *name)
 {
-	struct hlist_node *p;
 	struct net_device *dev;
 	struct hlist_head *head = dev_name_hash(net, name);
 
-	hlist_for_each_entry(dev, p, head, name_hlist)
+	hlist_for_each_entry(dev, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
 
@@ -684,11 +683,10 @@ EXPORT_SYMBOL(__dev_get_by_name);
 
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
 {
-	struct hlist_node *p;
 	struct net_device *dev;
 	struct hlist_head *head = dev_name_hash(net, name);
 
-	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
+	hlist_for_each_entry_rcu(dev, head, name_hlist)
 		if (!strncmp(dev->name, name, IFNAMSIZ))
 			return dev;
 
@@ -735,11 +733,10 @@ EXPORT_SYMBOL(dev_get_by_name);
 
 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 {
-	struct hlist_node *p;
 	struct net_device *dev;
 	struct hlist_head *head = dev_index_hash(net, ifindex);
 
-	hlist_for_each_entry(dev, p, head, index_hlist)
+	hlist_for_each_entry(dev, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
 
@@ -760,11 +757,10 @@ EXPORT_SYMBOL(__dev_get_by_index);
 
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
 {
-	struct hlist_node *p;
 	struct net_device *dev;
 	struct hlist_head *head = dev_index_hash(net, ifindex);
 
-	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
+	hlist_for_each_entry_rcu(dev, head, index_hlist)
 		if (dev->ifindex == ifindex)
 			return dev;
 
diff --git a/net/core/flow.c b/net/core/flow.c
index 43f7495df27a..c56ea6f7f6c7 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -132,14 +132,14 @@ static void __flow_cache_shrink(struct flow_cache *fc,
 				int shrink_to)
 {
 	struct flow_cache_entry *fle;
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	LIST_HEAD(gc_list);
 	int i, deleted = 0;
 
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		int saved = 0;
 
-		hlist_for_each_entry_safe(fle, entry, tmp,
+		hlist_for_each_entry_safe(fle, tmp,
 					  &fcp->hash_table[i], u.hlist) {
 			if (saved < shrink_to &&
 			    flow_entry_valid(fle)) {
@@ -211,7 +211,6 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
 	struct flow_cache *fc = &flow_cache_global;
 	struct flow_cache_percpu *fcp;
 	struct flow_cache_entry *fle, *tfle;
-	struct hlist_node *entry;
 	struct flow_cache_object *flo;
 	size_t keysize;
 	unsigned int hash;
@@ -235,7 +234,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
 		flow_new_hash_rnd(fc, fcp);
 
 	hash = flow_hash_code(fc, fcp, key, keysize);
-	hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
+	hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
 		if (tfle->net == net &&
 		    tfle->family == family &&
 		    tfle->dir == dir &&
@@ -301,13 +300,13 @@ static void flow_cache_flush_tasklet(unsigned long data)
 	struct flow_cache *fc = info->cache;
 	struct flow_cache_percpu *fcp;
 	struct flow_cache_entry *fle;
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	LIST_HEAD(gc_list);
 	int i, deleted = 0;
 
 	fcp = this_cpu_ptr(fc->percpu);
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		hlist_for_each_entry_safe(fle, entry, tmp,
+		hlist_for_each_entry_safe(fle, tmp,
 					  &fcp->hash_table[i], u.hlist) {
 			if (flow_entry_valid(fle))
 				continue;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 0f6bb6f8d391..3174f1998ee6 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -16,12 +16,11 @@ static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff
 {
 	struct net *net = seq_file_net(seq);
 	struct net_device *dev;
-	struct hlist_node *p;
 	struct hlist_head *h;
 	unsigned int count = 0, offset = get_offset(*pos);
 
 	h = &net->dev_name_head[get_bucket(*pos)];
-	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+	hlist_for_each_entry_rcu(dev, h, name_hlist) {
 		if (++count == offset)
 			return dev;
 	}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d8aa20f6a46e..b376410ff259 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1060,7 +1060,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx = 0, s_idx;
 	struct net_device *dev;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct nlattr *tb[IFLA_MAX+1];
 	u32 ext_filter_mask = 0;
 
@@ -1080,7 +1079,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index c4a2def5b7bd..c21f200eed93 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -175,12 +175,11 @@ static struct hlist_head *dn_find_list(struct sock *sk)
 static int check_port(__le16 port)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	if (port == 0)
 		return -1;
 
-	sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
+	sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
 		struct dn_scp *scp = DN_SK(sk);
 		if (scp->addrloc == port)
 			return -1;
@@ -374,11 +373,10 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn,
 struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr)
 {
 	struct hlist_head *list = listen_hash(addr);
-	struct hlist_node *node;
 	struct sock *sk;
 
 	read_lock(&dn_hash_lock);
-	sk_for_each(sk, node, list) {
+	sk_for_each(sk, list) {
 		struct dn_scp *scp = DN_SK(sk);
 		if (sk->sk_state != TCP_LISTEN)
 			continue;
@@ -414,11 +412,10 @@ struct sock *dn_find_by_skb(struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct sock *sk;
-	struct hlist_node *node;
 	struct dn_scp *scp;
 
 	read_lock(&dn_hash_lock);
-	sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
+	sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
 		scp = DN_SK(sk);
 		if (cb->src != dn_saddr2dn(&scp->peer))
 			continue;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index f968c1b58f47..6c2445bcaba1 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -483,7 +483,6 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
 	struct dn_fib_table *tb;
-	struct hlist_node *node;
 	int dumped = 0;
 
 	if (!net_eq(net, &init_net))
@@ -498,7 +497,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
 		e = 0;
-		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) {
+		hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) {
 			if (e < s_e)
 				goto next;
 			if (dumped)
@@ -828,7 +827,6 @@ out:
 struct dn_fib_table *dn_fib_get_table(u32 n, int create)
 {
 	struct dn_fib_table *t;
-	struct hlist_node *node;
 	unsigned int h;
 
 	if (n < RT_TABLE_MIN)
@@ -839,7 +837,7 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create)
 
 	h = n & (DN_FIB_TABLE_HASHSZ - 1);
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) {
+	hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) {
 		if (t->n == n) {
 			rcu_read_unlock();
 			return t;
@@ -885,11 +883,10 @@ void dn_fib_flush(void)
 {
 	int flushed = 0;
 	struct dn_fib_table *tb;
-	struct hlist_node *node;
 	unsigned int h;
 
 	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
-		hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist)
+		hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist)
 			flushed += tb->flush(tb);
 	}
 
@@ -908,12 +905,12 @@ void __init dn_fib_table_init(void)
 void __exit dn_fib_table_cleanup(void)
 {
 	struct dn_fib_table *t;
-	struct hlist_node *node, *next;
+	struct hlist_node *next;
 	unsigned int h;
 
 	write_lock(&dn_fib_tables_lock);
 	for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
-		hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h],
+		hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h],
 					  hlist) {
 			hlist_del(&t->hlist);
 			kfree(t);
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 16705611589a..e0da175f8e5b 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -350,7 +350,6 @@ static inline int ieee802154_match_sock(u8 *hw_addr, u16 pan_id,
 int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
 {
 	struct sock *sk, *prev = NULL;
-	struct hlist_node *node;
 	int ret = NET_RX_SUCCESS;
 	u16 pan_id, short_addr;
 
@@ -361,7 +360,7 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
 	short_addr = ieee802154_mlme_ops(dev)->get_short_addr(dev);
 
 	read_lock(&dgram_lock);
-	sk_for_each(sk, node, &dgram_head) {
+	sk_for_each(sk, &dgram_head) {
 		if (ieee802154_match_sock(dev->dev_addr, pan_id, short_addr,
 					dgram_sk(sk))) {
 			if (prev) {
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 50e823927d49..41f538b8e59c 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -221,10 +221,9 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
 void ieee802154_raw_deliver(struct net_device *dev, struct sk_buff *skb)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 	read_lock(&raw_lock);
-	sk_for_each(sk, node, &raw_head) {
+	sk_for_each(sk, &raw_head) {
 		bh_lock_sock(sk);
 		if (!sk->sk_bound_dev_if ||
 		    sk->sk_bound_dev_if == dev->ifindex) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5281314886c1..f678507bc829 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -139,10 +139,9 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 	u32 hash = inet_addr_hash(net, addr);
 	struct net_device *result = NULL;
 	struct in_ifaddr *ifa;
-	struct hlist_node *node;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
+	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
 		if (ifa->ifa_local == addr) {
 			struct net_device *dev = ifa->ifa_dev->dev;
 
@@ -588,7 +587,6 @@ static void check_lifetime(struct work_struct *work)
 {
 	unsigned long now, next, next_sec, next_sched;
 	struct in_ifaddr *ifa;
-	struct hlist_node *node;
 	int i;
 
 	now = jiffies;
@@ -596,8 +594,7 @@ static void check_lifetime(struct work_struct *work)
 
 	rcu_read_lock();
 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
-		hlist_for_each_entry_rcu(ifa, node,
-					 &inet_addr_lst[i], hash) {
+		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
 			unsigned long age;
 
 			if (ifa->ifa_flags & IFA_F_PERMANENT)
@@ -1493,7 +1490,6 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	struct in_device *in_dev;
 	struct in_ifaddr *ifa;
 	struct hlist_head *head;
-	struct hlist_node *node;
 
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
@@ -1503,7 +1499,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 		idx = 0;
 		head = &net->dev_index_head[h];
 		rcu_read_lock();
-		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			if (h > s_h || idx > s_idx)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 99f00d39d10b..eb4bb12b3eb4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -112,7 +112,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 struct fib_table *fib_get_table(struct net *net, u32 id)
 {
 	struct fib_table *tb;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned int h;
 
@@ -122,7 +121,7 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 
 	rcu_read_lock();
 	head = &net->ipv4.fib_table_hash[h];
-	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+	hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 		if (tb->tb_id == id) {
 			rcu_read_unlock();
 			return tb;
@@ -137,13 +136,12 @@ static void fib_flush(struct net *net)
 {
 	int flushed = 0;
 	struct fib_table *tb;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned int h;
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry(tb, node, head, tb_hlist)
+		hlist_for_each_entry(tb, head, tb_hlist)
 			flushed += fib_table_flush(tb);
 	}
 
@@ -656,7 +654,6 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
 	struct fib_table *tb;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	int dumped = 0;
 
@@ -670,7 +667,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
 		head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry(tb, node, head, tb_hlist) {
+		hlist_for_each_entry(tb, head, tb_hlist) {
 			if (e < s_e)
 				goto next;
 			if (dumped)
@@ -1117,11 +1114,11 @@ static void ip_fib_net_exit(struct net *net)
 	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
 		struct fib_table *tb;
 		struct hlist_head *head;
-		struct hlist_node *node, *tmp;
+		struct hlist_node *tmp;
 
 		head = &net->ipv4.fib_table_hash[i];
-		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
-			hlist_del(node);
+		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
+			hlist_del(&tb->tb_hlist);
 			fib_table_flush(tb);
 			fib_free_table(tb);
 		}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4797a800faf8..8f6cb7a87cd6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -298,14 +298,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 static struct fib_info *fib_find_info(const struct fib_info *nfi)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct fib_info *fi;
 	unsigned int hash;
 
 	hash = fib_info_hashfn(nfi);
 	head = &fib_info_hash[hash];
 
-	hlist_for_each_entry(fi, node, head, fib_hash) {
+	hlist_for_each_entry(fi, head, fib_hash) {
 		if (!net_eq(fi->fib_net, nfi->fib_net))
 			continue;
 		if (fi->fib_nhs != nfi->fib_nhs)
@@ -331,7 +330,6 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 int ip_fib_check_default(__be32 gw, struct net_device *dev)
 {
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct fib_nh *nh;
 	unsigned int hash;
 
@@ -339,7 +337,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
 
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];
-	hlist_for_each_entry(nh, node, head, nh_hash) {
+	hlist_for_each_entry(nh, head, nh_hash) {
 		if (nh->nh_dev == dev &&
 		    nh->nh_gw == gw &&
 		    !(nh->nh_flags & RTNH_F_DEAD)) {
@@ -721,10 +719,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 
 	for (i = 0; i < old_size; i++) {
 		struct hlist_head *head = &fib_info_hash[i];
-		struct hlist_node *node, *n;
+		struct hlist_node *n;
 		struct fib_info *fi;
 
-		hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
+		hlist_for_each_entry_safe(fi, n, head, fib_hash) {
 			struct hlist_head *dest;
 			unsigned int new_hash;
 
@@ -739,10 +737,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 
 	for (i = 0; i < old_size; i++) {
 		struct hlist_head *lhead = &fib_info_laddrhash[i];
-		struct hlist_node *node, *n;
+		struct hlist_node *n;
 		struct fib_info *fi;
 
-		hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
+		hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
 			struct hlist_head *ldest;
 			unsigned int new_hash;
 
@@ -1096,13 +1094,12 @@ int fib_sync_down_addr(struct net *net, __be32 local)
 	int ret = 0;
 	unsigned int hash = fib_laddr_hashfn(local);
 	struct hlist_head *head = &fib_info_laddrhash[hash];
-	struct hlist_node *node;
 	struct fib_info *fi;
 
 	if (fib_info_laddrhash == NULL || local == 0)
 		return 0;
 
-	hlist_for_each_entry(fi, node, head, fib_lhash) {
+	hlist_for_each_entry(fi, head, fib_lhash) {
 		if (!net_eq(fi->fib_net, net))
 			continue;
 		if (fi->fib_prefsrc == local) {
@@ -1120,13 +1117,12 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 	struct fib_info *prev_fi = NULL;
 	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
 	struct hlist_head *head = &fib_info_devhash[hash];
-	struct hlist_node *node;
 	struct fib_nh *nh;
 
 	if (force)
 		scope = -1;
 
-	hlist_for_each_entry(nh, node, head, nh_hash) {
+	hlist_for_each_entry(nh, head, nh_hash) {
 		struct fib_info *fi = nh->nh_parent;
 		int dead;
 
@@ -1232,7 +1228,6 @@ int fib_sync_up(struct net_device *dev)
 	struct fib_info *prev_fi;
 	unsigned int hash;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	struct fib_nh *nh;
 	int ret;
 
@@ -1244,7 +1239,7 @@ int fib_sync_up(struct net_device *dev)
 	head = &fib_info_devhash[hash];
 	ret = 0;
 
-	hlist_for_each_entry(nh, node, head, nh_hash) {
+	hlist_for_each_entry(nh, head, nh_hash) {
 		struct fib_info *fi = nh->nh_parent;
 		int alive;
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 61e03da3e1f5..ff06b7543d9f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -920,10 +920,9 @@ nomem:
 static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
 {
 	struct hlist_head *head = &l->list;
-	struct hlist_node *node;
 	struct leaf_info *li;
 
-	hlist_for_each_entry_rcu(li, node, head, hlist)
+	hlist_for_each_entry_rcu(li, head, hlist)
 		if (li->plen == plen)
 			return li;
 
@@ -943,12 +942,11 @@ static inline struct list_head *get_fa_head(struct leaf *l, int plen)
 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
 {
 	struct leaf_info *li = NULL, *last = NULL;
-	struct hlist_node *node;
 
 	if (hlist_empty(head)) {
 		hlist_add_head_rcu(&new->hlist, head);
 	} else {
-		hlist_for_each_entry(li, node, head, hlist) {
+		hlist_for_each_entry(li, head, hlist) {
 			if (new->plen > li->plen)
 				break;
 
@@ -1354,9 +1352,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
 {
 	struct leaf_info *li;
 	struct hlist_head *hhead = &l->list;
-	struct hlist_node *node;
 
-	hlist_for_each_entry_rcu(li, node, hhead, hlist) {
+	hlist_for_each_entry_rcu(li, hhead, hlist) {
 		struct fib_alias *fa;
 
 		if (l->key != (key & li->mask_plen))
@@ -1740,10 +1737,10 @@ static int trie_flush_leaf(struct leaf *l)
 {
 	int found = 0;
 	struct hlist_head *lih = &l->list;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	struct leaf_info *li = NULL;
 
-	hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
+	hlist_for_each_entry_safe(li, tmp, lih, hlist) {
 		found += trie_flush_list(&li->falh);
 
 		if (list_empty(&li->falh)) {
@@ -1895,14 +1892,13 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
 			struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct leaf_info *li;
-	struct hlist_node *node;
 	int i, s_i;
 
 	s_i = cb->args[4];
 	i = 0;
 
 	/* rcu_read_lock is hold by caller */
-	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
+	hlist_for_each_entry_rcu(li, &l->list, hlist) {
 		if (i < s_i) {
 			i++;
 			continue;
@@ -2092,14 +2088,13 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 		if (IS_LEAF(n)) {
 			struct leaf *l = (struct leaf *)n;
 			struct leaf_info *li;
-			struct hlist_node *tmp;
 
 			s->leaves++;
 			s->totdepth += iter.depth;
 			if (iter.depth > s->maxdepth)
 				s->maxdepth = iter.depth;
 
-			hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
+			hlist_for_each_entry_rcu(li, &l->list, hlist)
 				++s->prefixes;
 		} else {
 			const struct tnode *tn = (const struct tnode *) n;
@@ -2200,10 +2195,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
-		struct hlist_node *node;
 		struct fib_table *tb;
 
-		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 			struct trie *t = (struct trie *) tb->tb_data;
 			struct trie_stat stat;
 
@@ -2245,10 +2239,9 @@ static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
 
 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
-		struct hlist_node *node;
 		struct fib_table *tb;
 
-		hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 			struct rt_trie_node *n;
 
 			for (n = fib_trie_get_first(iter,
@@ -2298,7 +2291,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	/* new hash chain */
 	while (++h < FIB_TABLE_HASHSZ) {
 		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 			n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
 			if (n)
 				goto found;
@@ -2381,13 +2374,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 	} else {
 		struct leaf *l = (struct leaf *) n;
 		struct leaf_info *li;
-		struct hlist_node *node;
 		__be32 val = htonl(l->key);
 
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %pI4\n", &val);
 
-		hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
+		hlist_for_each_entry_rcu(li, &l->list, hlist) {
 			struct fib_alias *fa;
 
 			list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -2532,7 +2524,6 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 {
 	struct leaf *l = v;
 	struct leaf_info *li;
-	struct hlist_node *node;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
@@ -2541,7 +2532,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		return 0;
 	}
 
-	hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
+	hlist_for_each_entry_rcu(li, &l->list, hlist) {
 		struct fib_alias *fa;
 		__be32 mask, prefix;
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 11cb4979a465..7d1874be1df3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -57,7 +57,6 @@ int inet_csk_bind_conflict(const struct sock *sk,
 			   const struct inet_bind_bucket *tb, bool relax)
 {
 	struct sock *sk2;
-	struct hlist_node *node;
 	int reuse = sk->sk_reuse;
 	int reuseport = sk->sk_reuseport;
 	kuid_t uid = sock_i_uid((struct sock *)sk);
@@ -69,7 +68,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
 	 * one this bucket belongs to.
 	 */
 
-	sk_for_each_bound(sk2, node, &tb->owners) {
+	sk_for_each_bound(sk2, &tb->owners) {
 		if (sk != sk2 &&
 		    !inet_v6_ipv6only(sk2) &&
 		    (!sk->sk_bound_dev_if ||
@@ -95,7 +94,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
 			}
 		}
 	}
-	return node != NULL;
+	return sk2 != NULL;
 }
 EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
 
@@ -106,7 +105,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct inet_bind_hashbucket *head;
-	struct hlist_node *node;
 	struct inet_bind_bucket *tb;
 	int ret, attempts = 5;
 	struct net *net = sock_net(sk);
@@ -129,7 +127,7 @@ again:
 			head = &hashinfo->bhash[inet_bhashfn(net, rover,
 					hashinfo->bhash_size)];
 			spin_lock(&head->lock);
-			inet_bind_bucket_for_each(tb, node, &head->chain)
+			inet_bind_bucket_for_each(tb, &head->chain)
 				if (net_eq(ib_net(tb), net) && tb->port == rover) {
 					if (((tb->fastreuse > 0 &&
 					      sk->sk_reuse &&
@@ -183,7 +181,7 @@ have_snum:
 		head = &hashinfo->bhash[inet_bhashfn(net, snum,
 				hashinfo->bhash_size)];
 		spin_lock(&head->lock);
-		inet_bind_bucket_for_each(tb, node, &head->chain)
+		inet_bind_bucket_for_each(tb, &head->chain)
 			if (net_eq(ib_net(tb), net) && tb->port == snum)
 				goto tb_found;
 	}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 2e453bde6992..245ae078a07f 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -33,9 +33,9 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
 	get_random_bytes(&f->rnd, sizeof(u32));
 	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
 		struct inet_frag_queue *q;
-		struct hlist_node *p, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) {
+		hlist_for_each_entry_safe(q, n, &f->hash[i], list) {
 			unsigned int hval = f->hashfn(q);
 
 			if (hval != i) {
@@ -203,7 +203,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 {
 	struct inet_frag_queue *qp;
 #ifdef CONFIG_SMP
-	struct hlist_node *n;
 #endif
 	unsigned int hash;
 
@@ -219,7 +218,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
 	 * such entry could be created on other cpu, while we
 	 * promoted read lock to write lock.
 	 */
-	hlist_for_each_entry(qp, n, &f->hash[hash], list) {
+	hlist_for_each_entry(qp, &f->hash[hash], list) {
 		if (qp->net == nf && f->match(qp, arg)) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&f->lock);
@@ -278,9 +277,8 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	__releases(&f->lock)
 {
 	struct inet_frag_queue *q;
-	struct hlist_node *n;
 
-	hlist_for_each_entry(q, n, &f->hash[hash], list) {
+	hlist_for_each_entry(q, &f->hash[hash], list) {
 		if (q->net == nf && f->match(q, key)) {
 			atomic_inc(&q->refcnt);
 			read_unlock(&f->lock);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 0ce0595d9861..6af375afeeef 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -120,13 +120,12 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
 		 * that the listener socket's icsk_bind_hash is the same
 		 * as that of the child socket. We have to look up or
 		 * create a new bind bucket for the child here. */
-		struct hlist_node *node;
-		inet_bind_bucket_for_each(tb, node, &head->chain) {
+		inet_bind_bucket_for_each(tb, &head->chain) {
 			if (net_eq(ib_net(tb), sock_net(sk)) &&
 			    tb->port == port)
 				break;
 		}
-		if (!node) {
+		if (!tb) {
 			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
 						     sock_net(sk), head, port);
 			if (!tb) {
@@ -493,7 +492,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		int i, remaining, low, high, port;
 		static u32 hint;
 		u32 offset = hint + port_offset;
-		struct hlist_node *node;
 		struct inet_timewait_sock *tw = NULL;
 
 		inet_get_local_port_range(&low, &high);
@@ -512,7 +510,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			 * because the established check is already
 			 * unique enough.
 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
+			inet_bind_bucket_for_each(tb, &head->chain) {
 				if (net_eq(ib_net(tb), net) &&
 				    tb->port == port) {
 					if (tb->fastreuse >= 0 ||
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2784db3155fb..1f27c9f4afd0 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -216,7 +216,6 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
 				    const int slot)
 {
 	struct inet_timewait_sock *tw;
-	struct hlist_node *node;
 	unsigned int killed;
 	int ret;
 
@@ -229,7 +228,7 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
 	killed = 0;
 	ret = 0;
 rescan:
-	inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
+	inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) {
 		__inet_twsk_del_dead_node(tw);
 		spin_unlock(&twdr->death_lock);
 		__inet_twsk_kill(tw, twdr->hashinfo);
@@ -438,10 +437,10 @@ void inet_twdr_twcal_tick(unsigned long data)
 
 	for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
 		if (time_before_eq(j, now)) {
-			struct hlist_node *node, *safe;
+			struct hlist_node *safe;
 			struct inet_timewait_sock *tw;
 
-			inet_twsk_for_each_inmate_safe(tw, node, safe,
+			inet_twsk_for_each_inmate_safe(tw, safe,
 						       &twdr->twcal_row[slot]) {
 				__inet_twsk_del_dead_node(tw);
 				__inet_twsk_kill(tw, twdr->hashinfo);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 53ddebc292b6..dd44e0ab600c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -111,9 +111,7 @@ EXPORT_SYMBOL_GPL(raw_unhash_sk);
 static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 		unsigned short num, __be32 raddr, __be32 laddr, int dif)
 {
-	struct hlist_node *node;
-
-	sk_for_each_from(sk, node) {
+	sk_for_each_from(sk) {
 		struct inet_sock *inet = inet_sk(sk);
 
 		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
@@ -914,9 +912,7 @@ static struct sock *raw_get_first(struct seq_file *seq)
 
 	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
 			++state->bucket) {
-		struct hlist_node *node;
-
-		sk_for_each(sk, node, &state->h->ht[state->bucket])
+		sk_for_each(sk, &state->h->ht[state->bucket])
 			if (sock_net(sk) == seq_file_net(seq))
 				goto found;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 145d3bf8df86..4a8ec457310f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -954,7 +954,6 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *key;
-	struct hlist_node *pos;
 	unsigned int size = sizeof(struct in_addr);
 	struct tcp_md5sig_info *md5sig;
 
@@ -968,7 +967,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 	if (family == AF_INET6)
 		size = sizeof(struct in6_addr);
 #endif
-	hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
+	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
 		if (key->family != family)
 			continue;
 		if (!memcmp(&key->addr, addr, size))
@@ -1069,14 +1068,14 @@ static void tcp_clear_md5_list(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *key;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	struct tcp_md5sig_info *md5sig;
 
 	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
 
 	if (!hlist_empty(&md5sig->head))
 		tcp_free_md5sig_pool();
-	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
+	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
 		hlist_del_rcu(&key->node);
 		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
 		kfree_rcu(key, rcu);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4dc0d44a5d31..f2c7e615f902 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1419,11 +1419,10 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  struct net_device *dev, int strict)
 {
 	struct inet6_ifaddr *ifp;
-	struct hlist_node *node;
 	unsigned int hash = inet6_addr_hash(addr);
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1445,9 +1444,8 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 {
 	unsigned int hash = inet6_addr_hash(addr);
 	struct inet6_ifaddr *ifp;
-	struct hlist_node *node;
 
-	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1487,10 +1485,9 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 {
 	struct inet6_ifaddr *ifp, *result = NULL;
 	unsigned int hash = inet6_addr_hash(addr);
-	struct hlist_node *node;
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -2907,11 +2904,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	/* Step 2: clear hash table */
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 		struct hlist_head *h = &inet6_addr_lst[i];
-		struct hlist_node *n;
 
 		spin_lock_bh(&addrconf_hash_lock);
 	restart:
-		hlist_for_each_entry_rcu(ifa, n, h, addr_lst) {
+		hlist_for_each_entry_rcu(ifa, h, addr_lst) {
 			if (ifa->idev == idev) {
 				hlist_del_init_rcu(&ifa->addr_lst);
 				addrconf_del_timer(ifa);
@@ -3218,8 +3214,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
 	}
 
 	for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-		struct hlist_node *n;
-		hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
+		hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket],
 					 addr_lst) {
 			if (!net_eq(dev_net(ifa->idev->dev), net))
 				continue;
@@ -3244,9 +3239,8 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 {
 	struct if6_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct hlist_node *n = &ifa->addr_lst;
 
-	hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) {
+	hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) {
 		if (!net_eq(dev_net(ifa->idev->dev), net))
 			continue;
 		state->offset++;
@@ -3255,7 +3249,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
 		state->offset = 0;
-		hlist_for_each_entry_rcu_bh(ifa, n,
+		hlist_for_each_entry_rcu_bh(ifa,
 				     &inet6_addr_lst[state->bucket], addr_lst) {
 			if (!net_eq(dev_net(ifa->idev->dev), net))
 				continue;
@@ -3357,11 +3351,10 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
 {
 	int ret = 0;
 	struct inet6_ifaddr *ifp = NULL;
-	struct hlist_node *n;
 	unsigned int hash = inet6_addr_hash(addr);
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3383,7 +3376,6 @@ static void addrconf_verify(unsigned long foo)
 {
 	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
-	struct hlist_node *node;
 	int i;
 
 	rcu_read_lock_bh();
@@ -3395,7 +3387,7 @@ static void addrconf_verify(unsigned long foo)
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		hlist_for_each_entry_rcu_bh(ifp, node,
+		hlist_for_each_entry_rcu_bh(ifp,
 					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 
@@ -3866,7 +3858,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct hlist_head *head;
-	struct hlist_node *node;
 
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
@@ -3876,7 +3867,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			if (h > s_h || idx > s_idx)
@@ -4222,7 +4213,6 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	struct inet6_dev *idev;
 	struct hlist_head *head;
-	struct hlist_node *node;
 
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
@@ -4231,7 +4221,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
 		head = &net->dev_index_head[h];
-		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
+		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
 			idev = __in6_dev_get(dev);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ff76eecfd622..aad64352cb60 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -173,9 +173,8 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
 						  const struct in6_addr *addr,
 						  int type, int ifindex)
 {
-	struct hlist_node *pos;
 	struct ip6addrlbl_entry *p;
-	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
 		if (__ip6addrlbl_match(net, p, addr, type, ifindex))
 			return p;
 	}
@@ -261,9 +260,9 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 	if (hlist_empty(&ip6addrlbl_table.head)) {
 		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
 	} else {
-		struct hlist_node *pos, *n;
+		struct hlist_node *n;
 		struct ip6addrlbl_entry *p = NULL;
-		hlist_for_each_entry_safe(p, pos, n,
+		hlist_for_each_entry_safe(p, n,
 					  &ip6addrlbl_table.head, list) {
 			if (p->prefixlen == newp->prefixlen &&
 			    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
@@ -319,13 +318,13 @@ static int __ip6addrlbl_del(struct net *net,
 			    int ifindex)
 {
 	struct ip6addrlbl_entry *p = NULL;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	int ret = -ESRCH;
 
 	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
 		  __func__, prefix, prefixlen, ifindex);
 
-	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
 		if (p->prefixlen == prefixlen &&
 		    net_eq(ip6addrlbl_net(p), net) &&
 		    p->ifindex == ifindex &&
@@ -380,11 +379,11 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
 static void __net_exit ip6addrlbl_net_exit(struct net *net)
 {
 	struct ip6addrlbl_entry *p = NULL;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 
 	/* Remove all labels belonging to the exiting net */
 	spin_lock(&ip6addrlbl_table.lock);
-	hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
 		if (net_eq(ip6addrlbl_net(p), net)) {
 			hlist_del_rcu(&p->list);
 			ip6addrlbl_put(p);
@@ -505,12 +504,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct ip6addrlbl_entry *p;
-	struct hlist_node *pos;
 	int idx = 0, s_idx = cb->args[0];
 	int err;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
+	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
 		if (idx >= s_idx &&
 		    net_eq(ip6addrlbl_net(p), net)) {
 			if ((err = ip6addrlbl_fill(skb, p,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index b386a2ce4c6f..9bfab19ff3c0 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -31,7 +31,6 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 			    const struct inet_bind_bucket *tb, bool relax)
 {
 	const struct sock *sk2;
-	const struct hlist_node *node;
 	int reuse = sk->sk_reuse;
 	int reuseport = sk->sk_reuseport;
 	kuid_t uid = sock_i_uid((struct sock *)sk);
@@ -41,7 +40,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 	 * See comment in inet_csk_bind_conflict about sock lookup
 	 * vs net namespaces issues.
 	 */
-	sk_for_each_bound(sk2, node, &tb->owners) {
+	sk_for_each_bound(sk2, &tb->owners) {
 		if (sk != sk2 &&
 		    (!sk->sk_bound_dev_if ||
 		     !sk2->sk_bound_dev_if ||
@@ -58,7 +57,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 		}
 	}
 
-	return node != NULL;
+	return sk2 != NULL;
 }
 
 EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 710cafd2e1a9..192dd1a0e188 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -224,7 +224,6 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 {
 	struct fib6_table *tb;
 	struct hlist_head *head;
-	struct hlist_node *node;
 	unsigned int h;
 
 	if (id == 0)
@@ -232,7 +231,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 	h = id & (FIB6_TABLE_HASHSZ - 1);
 	rcu_read_lock();
 	head = &net->ipv6.fib_table_hash[h];
-	hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+	hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
 		if (tb->tb6_id == id) {
 			rcu_read_unlock();
 			return tb;
@@ -363,7 +362,6 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	struct rt6_rtnl_dump_arg arg;
 	struct fib6_walker_t *w;
 	struct fib6_table *tb;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	int res = 0;
 
@@ -398,7 +396,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
 		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
 			if (e < s_e)
 				goto next;
 			res = fib6_dump_table(tb, skb, cb);
@@ -1520,14 +1518,13 @@ void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg
 		    int prune, void *arg)
 {
 	struct fib6_table *table;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned int h;
 
 	rcu_read_lock();
 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
 		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
 			read_lock_bh(&table->tb6_lock);
 			fib6_clean_tree(net, &table->tb6_root,
 					func, prune, arg);
@@ -1540,14 +1537,13 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
 		    int prune, void *arg)
 {
 	struct fib6_table *table;
-	struct hlist_node *node;
 	struct hlist_head *head;
 	unsigned int h;
 
 	rcu_read_lock();
 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
 		head = &net->ipv6.fib_table_hash[h];
-		hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
+		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
 			write_lock_bh(&table->tb6_lock);
 			fib6_clean_tree(net, &table->tb6_root,
 					func, prune, arg);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index c65907db8c44..330b5e7b7df6 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -71,10 +71,9 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 		unsigned short num, const struct in6_addr *loc_addr,
 		const struct in6_addr *rmt_addr, int dif)
 {
-	struct hlist_node *node;
 	bool is_multicast = ipv6_addr_is_multicast(loc_addr);
 
-	sk_for_each_from(sk, node)
+	sk_for_each_from(sk)
 		if (inet_sk(sk)->inet_num == num) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 6cc48012b730..de2bcfaaf759 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -89,9 +89,8 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, const
 {
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry_rcu(x6spi, pos,
+	hlist_for_each_entry_rcu(x6spi,
 			     &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 			     list_byaddr) {
 		if (xfrm6_addr_equal(&x6spi->addr, saddr))
@@ -120,9 +119,8 @@ static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
 	int index = xfrm6_tunnel_spi_hash_byspi(spi);
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(x6spi, pos,
+	hlist_for_each_entry(x6spi,
 			     &xfrm6_tn->spi_byspi[index],
 			     list_byspi) {
 		if (x6spi->spi == spi)
@@ -203,11 +201,11 @@ static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 
 	spin_lock_bh(&xfrm6_tunnel_spi_lock);
 
-	hlist_for_each_entry_safe(x6spi, pos, n,
+	hlist_for_each_entry_safe(x6spi, n,
 				  &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 				  list_byaddr)
 	{
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index dfd6faaf0ea7..f547a47d381c 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -228,9 +228,8 @@ static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc,
 					 __be16 port)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
-	sk_for_each(s, node, &intrfc->if_sklist)
+	sk_for_each(s, &intrfc->if_sklist)
 		if (ipx_sk(s)->port == port)
 			goto found;
 	s = NULL;
@@ -259,12 +258,11 @@ static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc,
 						__be16 port)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	ipxitf_hold(intrfc);
 	spin_lock_bh(&intrfc->if_sklist_lock);
 
-	sk_for_each(s, node, &intrfc->if_sklist) {
+	sk_for_each(s, &intrfc->if_sklist) {
 		struct ipx_sock *ipxs = ipx_sk(s);
 
 		if (ipxs->port == port &&
@@ -282,14 +280,14 @@ found:
 static void __ipxitf_down(struct ipx_interface *intrfc)
 {
 	struct sock *s;
-	struct hlist_node *node, *t;
+	struct hlist_node *t;
 
 	/* Delete all routes associated with this interface */
 	ipxrtr_del_routes(intrfc);
 
 	spin_lock_bh(&intrfc->if_sklist_lock);
 	/* error sockets */
-	sk_for_each_safe(s, node, t, &intrfc->if_sklist) {
+	sk_for_each_safe(s, t, &intrfc->if_sklist) {
 		struct ipx_sock *ipxs = ipx_sk(s);
 
 		s->sk_err = ENOLINK;
@@ -385,12 +383,11 @@ static int ipxitf_demux_socket(struct ipx_interface *intrfc,
 	int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node,
 				   IPX_NODE_LEN);
 	struct sock *s;
-	struct hlist_node *node;
 	int rc;
 
 	spin_lock_bh(&intrfc->if_sklist_lock);
 
-	sk_for_each(s, node, &intrfc->if_sklist) {
+	sk_for_each(s, &intrfc->if_sklist) {
 		struct ipx_sock *ipxs = ipx_sk(s);
 
 		if (ipxs->port == ipx->ipx_dest.sock &&
@@ -446,12 +443,11 @@ static struct sock *ncp_connection_hack(struct ipx_interface *intrfc,
 		connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8);
 
 	if (connection) {
-		struct hlist_node *node;
 		/* Now we have to look for a special NCP connection handling
 		 * socket. Only these sockets have ipx_ncp_conn != 0, set by
 		 * SIOCIPXNCPCONN. */
 		spin_lock_bh(&intrfc->if_sklist_lock);
-		sk_for_each(sk, node, &intrfc->if_sklist)
+		sk_for_each(sk, &intrfc->if_sklist)
 			if (ipx_sk(sk)->ipx_ncp_conn == connection) {
 				sock_hold(sk);
 				goto found;
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 02ff7f2f60d4..65e8833a2510 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -103,19 +103,18 @@ out:
 static __inline__ struct sock *ipx_get_socket_idx(loff_t pos)
 {
 	struct sock *s = NULL;
-	struct hlist_node *node;
 	struct ipx_interface *i;
 
 	list_for_each_entry(i, &ipx_interfaces, node) {
 		spin_lock_bh(&i->if_sklist_lock);
-		sk_for_each(s, node, &i->if_sklist) {
+		sk_for_each(s, &i->if_sklist) {
 			if (!pos)
 				break;
 			--pos;
 		}
 		spin_unlock_bh(&i->if_sklist_lock);
 		if (!pos) {
-			if (node)
+			if (s)
 				goto found;
 			break;
 		}
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index cd6f7a991d80..a7d11ffe4284 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -156,14 +156,13 @@ static int afiucv_pm_freeze(struct device *dev)
 {
 	struct iucv_sock *iucv;
 	struct sock *sk;
-	struct hlist_node *node;
 	int err = 0;
 
 #ifdef CONFIG_PM_DEBUG
 	printk(KERN_WARNING "afiucv_pm_freeze\n");
 #endif
 	read_lock(&iucv_sk_list.lock);
-	sk_for_each(sk, node, &iucv_sk_list.head) {
+	sk_for_each(sk, &iucv_sk_list.head) {
 		iucv = iucv_sk(sk);
 		switch (sk->sk_state) {
 		case IUCV_DISCONN:
@@ -194,13 +193,12 @@ static int afiucv_pm_freeze(struct device *dev)
 static int afiucv_pm_restore_thaw(struct device *dev)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
 #ifdef CONFIG_PM_DEBUG
 	printk(KERN_WARNING "afiucv_pm_restore_thaw\n");
 #endif
 	read_lock(&iucv_sk_list.lock);
-	sk_for_each(sk, node, &iucv_sk_list.head) {
+	sk_for_each(sk, &iucv_sk_list.head) {
 		switch (sk->sk_state) {
 		case IUCV_CONNECTED:
 			sk->sk_err = EPIPE;
@@ -390,9 +388,8 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
 static struct sock *__iucv_get_sock_by_name(char *nm)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 
-	sk_for_each(sk, node, &iucv_sk_list.head)
+	sk_for_each(sk, &iucv_sk_list.head)
 		if (!memcmp(&iucv_sk(sk)->src_name, nm, 8))
 			return sk;
 
@@ -1678,7 +1675,6 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	unsigned char user_data[16];
 	unsigned char nuser_data[16];
 	unsigned char src_name[8];
-	struct hlist_node *node;
 	struct sock *sk, *nsk;
 	struct iucv_sock *iucv, *niucv;
 	int err;
@@ -1689,7 +1685,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	read_lock(&iucv_sk_list.lock);
 	iucv = NULL;
 	sk = NULL;
-	sk_for_each(sk, node, &iucv_sk_list.head)
+	sk_for_each(sk, &iucv_sk_list.head)
 		if (sk->sk_state == IUCV_LISTEN &&
 		    !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) {
 			/*
@@ -2115,7 +2111,6 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
 static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct packet_type *pt, struct net_device *orig_dev)
 {
-	struct hlist_node *node;
 	struct sock *sk;
 	struct iucv_sock *iucv;
 	struct af_iucv_trans_hdr *trans_hdr;
@@ -2132,7 +2127,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 	iucv = NULL;
 	sk = NULL;
 	read_lock(&iucv_sk_list.lock);
-	sk_for_each(sk, node, &iucv_sk_list.head) {
+	sk_for_each(sk, &iucv_sk_list.head) {
 		if (trans_hdr->flags == AF_IUCV_FLAG_SYN) {
 			if ((!memcmp(&iucv_sk(sk)->src_name,
 				     trans_hdr->destAppName, 8)) &&
@@ -2225,10 +2220,9 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
 	struct sk_buff *list_skb;
 	struct sk_buff *nskb;
 	unsigned long flags;
-	struct hlist_node *node;
 
 	read_lock_irqsave(&iucv_sk_list.lock, flags);
-	sk_for_each(sk, node, &iucv_sk_list.head)
+	sk_for_each(sk, &iucv_sk_list.head)
 		if (sk == isk) {
 			iucv = iucv_sk(sk);
 			break;
@@ -2299,14 +2293,13 @@ static int afiucv_netdev_event(struct notifier_block *this,
 			       unsigned long event, void *ptr)
 {
 	struct net_device *event_dev = (struct net_device *)ptr;
-	struct hlist_node *node;
 	struct sock *sk;
 	struct iucv_sock *iucv;
 
 	switch (event) {
 	case NETDEV_REBOOT:
 	case NETDEV_GOING_DOWN:
-		sk_for_each(sk, node, &iucv_sk_list.head) {
+		sk_for_each(sk, &iucv_sk_list.head) {
 			iucv = iucv_sk(sk);
 			if ((iucv->hs_dev == event_dev) &&
 			    (sk->sk_state == IUCV_CONNECTED)) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9ef79851f297..556fdafdd1ea 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -225,7 +225,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 {
 	struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
 	struct sock *sk;
-	struct hlist_node *node;
 	struct sk_buff *skb2 = NULL;
 	int err = -ESRCH;
 
@@ -236,7 +235,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 		return -ENOMEM;
 
 	rcu_read_lock();
-	sk_for_each_rcu(sk, node, &net_pfkey->table) {
+	sk_for_each_rcu(sk, &net_pfkey->table) {
 		struct pfkey_sock *pfk = pfkey_sk(sk);
 		int err2;
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index dcfd64e83ab7..d36875f3427e 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -221,10 +221,9 @@ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
 	struct hlist_head *session_list =
 		l2tp_session_id_hash_2(pn, session_id);
 	struct l2tp_session *session;
-	struct hlist_node *walk;
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
+	hlist_for_each_entry_rcu(session, session_list, global_hlist) {
 		if (session->session_id == session_id) {
 			rcu_read_unlock_bh();
 			return session;
@@ -253,7 +252,6 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
 {
 	struct hlist_head *session_list;
 	struct l2tp_session *session;
-	struct hlist_node *walk;
 
 	/* In L2TPv3, session_ids are unique over all tunnels and we
 	 * sometimes need to look them up before we know the
@@ -264,7 +262,7 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
 
 	session_list = l2tp_session_id_hash(tunnel, session_id);
 	read_lock_bh(&tunnel->hlist_lock);
-	hlist_for_each_entry(session, walk, session_list, hlist) {
+	hlist_for_each_entry(session, session_list, hlist) {
 		if (session->session_id == session_id) {
 			read_unlock_bh(&tunnel->hlist_lock);
 			return session;
@@ -279,13 +277,12 @@ EXPORT_SYMBOL_GPL(l2tp_session_find);
 struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 {
 	int hash;
-	struct hlist_node *walk;
 	struct l2tp_session *session;
 	int count = 0;
 
 	read_lock_bh(&tunnel->hlist_lock);
 	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
-		hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) {
+		hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
 			if (++count > nth) {
 				read_unlock_bh(&tunnel->hlist_lock);
 				return session;
@@ -306,12 +303,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 {
 	struct l2tp_net *pn = l2tp_pernet(net);
 	int hash;
-	struct hlist_node *walk;
 	struct l2tp_session *session;
 
 	rcu_read_lock_bh();
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
-		hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+		hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
 			if (!strcmp(session->ifname, ifname)) {
 				rcu_read_unlock_bh();
 				return session;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index f7ac8f42fee2..7f41b7051269 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -49,10 +49,9 @@ static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
 
 static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
 {
-	struct hlist_node *node;
 	struct sock *sk;
 
-	sk_for_each_bound(sk, node, &l2tp_ip_bind_table) {
+	sk_for_each_bound(sk, &l2tp_ip_bind_table) {
 		struct inet_sock *inet = inet_sk(sk);
 		struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 8ee4a86ae996..41f2f8126ebc 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -60,10 +60,9 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
 					   struct in6_addr *laddr,
 					   int dif, u32 tunnel_id)
 {
-	struct hlist_node *node;
 	struct sock *sk;
 
-	sk_for_each_bound(sk, node, &l2tp_ip6_bind_table) {
+	sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
 		struct in6_addr *addr = inet6_rcv_saddr(sk);
 		struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
 
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 7c5073badc73..78be45cda5c1 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -393,12 +393,11 @@ static void llc_sap_mcast(struct llc_sap *sap,
 {
 	int i = 0, count = 256 / sizeof(struct sock *);
 	struct sock *sk, *stack[count];
-	struct hlist_node *node;
 	struct llc_sock *llc;
 	struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex);
 
 	spin_lock_bh(&sap->sk_lock);
-	hlist_for_each_entry(llc, node, dev_hb, dev_hash_node) {
+	hlist_for_each_entry(llc, dev_hb, dev_hash_node) {
 
 		sk = &llc->sk;
 
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 6b3c4e119c63..dc7c8df40c2c 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -72,9 +72,9 @@ static inline struct mesh_table *resize_dereference_mpp_paths(void)
  * it's used twice. So it is illegal to do
  *	for_each_mesh_entry(rcu_dereference(...), ...)
  */
-#define for_each_mesh_entry(tbl, p, node, i) \
+#define for_each_mesh_entry(tbl, node, i) \
 	for (i = 0; i <= tbl->hash_mask; i++) \
-		hlist_for_each_entry_rcu(node, p, &tbl->hash_buckets[i], list)
+		hlist_for_each_entry_rcu(node, &tbl->hash_buckets[i], list)
 
 
 static struct mesh_table *mesh_table_alloc(int size_order)
@@ -139,7 +139,7 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs)
 	}
 	if (free_leafs) {
 		spin_lock_bh(&tbl->gates_lock);
-		hlist_for_each_entry_safe(gate, p, q,
+		hlist_for_each_entry_safe(gate, q,
 					 tbl->known_gates, list) {
 			hlist_del(&gate->list);
 			kfree(gate);
@@ -333,12 +333,11 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst,
 				      struct ieee80211_sub_if_data *sdata)
 {
 	struct mesh_path *mpath;
-	struct hlist_node *n;
 	struct hlist_head *bucket;
 	struct mpath_node *node;
 
 	bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)];
-	hlist_for_each_entry_rcu(node, n, bucket, list) {
+	hlist_for_each_entry_rcu(node, bucket, list) {
 		mpath = node->mpath;
 		if (mpath->sdata == sdata &&
 		    ether_addr_equal(dst, mpath->dst)) {
@@ -389,11 +388,10 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
 {
 	struct mesh_table *tbl = rcu_dereference(mesh_paths);
 	struct mpath_node *node;
-	struct hlist_node *p;
 	int i;
 	int j = 0;
 
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		if (sdata && node->mpath->sdata != sdata)
 			continue;
 		if (j++ == idx) {
@@ -417,13 +415,12 @@ int mesh_path_add_gate(struct mesh_path *mpath)
 {
 	struct mesh_table *tbl;
 	struct mpath_node *gate, *new_gate;
-	struct hlist_node *n;
 	int err;
 
 	rcu_read_lock();
 	tbl = rcu_dereference(mesh_paths);
 
-	hlist_for_each_entry_rcu(gate, n, tbl->known_gates, list)
+	hlist_for_each_entry_rcu(gate, tbl->known_gates, list)
 		if (gate->mpath == mpath) {
 			err = -EEXIST;
 			goto err_rcu;
@@ -460,9 +457,9 @@ err_rcu:
 static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath)
 {
 	struct mpath_node *gate;
-	struct hlist_node *p, *q;
+	struct hlist_node *q;
 
-	hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list) {
+	hlist_for_each_entry_safe(gate, q, tbl->known_gates, list) {
 		if (gate->mpath != mpath)
 			continue;
 		spin_lock_bh(&tbl->gates_lock);
@@ -504,7 +501,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
 	struct mesh_path *mpath, *new_mpath;
 	struct mpath_node *node, *new_node;
 	struct hlist_head *bucket;
-	struct hlist_node *n;
 	int grow = 0;
 	int err = 0;
 	u32 hash_idx;
@@ -550,7 +546,7 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
 	spin_lock(&tbl->hashwlock[hash_idx]);
 
 	err = -EEXIST;
-	hlist_for_each_entry(node, n, bucket, list) {
+	hlist_for_each_entry(node, bucket, list) {
 		mpath = node->mpath;
 		if (mpath->sdata == sdata &&
 		    ether_addr_equal(dst, mpath->dst))
@@ -640,7 +636,6 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
 	struct mesh_path *mpath, *new_mpath;
 	struct mpath_node *node, *new_node;
 	struct hlist_head *bucket;
-	struct hlist_node *n;
 	int grow = 0;
 	int err = 0;
 	u32 hash_idx;
@@ -680,7 +675,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
 	spin_lock(&tbl->hashwlock[hash_idx]);
 
 	err = -EEXIST;
-	hlist_for_each_entry(node, n, bucket, list) {
+	hlist_for_each_entry(node, bucket, list) {
 		mpath = node->mpath;
 		if (mpath->sdata == sdata &&
 		    ether_addr_equal(dst, mpath->dst))
@@ -725,14 +720,13 @@ void mesh_plink_broken(struct sta_info *sta)
 	static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	struct mesh_path *mpath;
 	struct mpath_node *node;
-	struct hlist_node *p;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	int i;
 	__le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE);
 
 	rcu_read_lock();
 	tbl = rcu_dereference(mesh_paths);
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		mpath = node->mpath;
 		if (rcu_dereference(mpath->next_hop) == sta &&
 		    mpath->flags & MESH_PATH_ACTIVE &&
@@ -792,13 +786,12 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
 	struct mesh_table *tbl;
 	struct mesh_path *mpath;
 	struct mpath_node *node;
-	struct hlist_node *p;
 	int i;
 
 	rcu_read_lock();
 	read_lock_bh(&pathtbl_resize_lock);
 	tbl = resize_dereference_mesh_paths();
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		mpath = node->mpath;
 		if (rcu_dereference(mpath->next_hop) == sta) {
 			spin_lock(&tbl->hashwlock[i]);
@@ -815,11 +808,10 @@ static void table_flush_by_iface(struct mesh_table *tbl,
 {
 	struct mesh_path *mpath;
 	struct mpath_node *node;
-	struct hlist_node *p;
 	int i;
 
 	WARN_ON(!rcu_read_lock_held());
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		mpath = node->mpath;
 		if (mpath->sdata != sdata)
 			continue;
@@ -865,7 +857,6 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
 	struct mesh_path *mpath;
 	struct mpath_node *node;
 	struct hlist_head *bucket;
-	struct hlist_node *n;
 	int hash_idx;
 	int err = 0;
 
@@ -875,7 +866,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
 	bucket = &tbl->hash_buckets[hash_idx];
 
 	spin_lock(&tbl->hashwlock[hash_idx]);
-	hlist_for_each_entry(node, n, bucket, list) {
+	hlist_for_each_entry(node, bucket, list) {
 		mpath = node->mpath;
 		if (mpath->sdata == sdata &&
 		    ether_addr_equal(addr, mpath->dst)) {
@@ -920,7 +911,6 @@ void mesh_path_tx_pending(struct mesh_path *mpath)
 int mesh_path_send_to_gates(struct mesh_path *mpath)
 {
 	struct ieee80211_sub_if_data *sdata = mpath->sdata;
-	struct hlist_node *n;
 	struct mesh_table *tbl;
 	struct mesh_path *from_mpath = mpath;
 	struct mpath_node *gate = NULL;
@@ -935,7 +925,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
 	if (!known_gates)
 		return -EHOSTUNREACH;
 
-	hlist_for_each_entry_rcu(gate, n, known_gates, list) {
+	hlist_for_each_entry_rcu(gate, known_gates, list) {
 		if (gate->mpath->sdata != sdata)
 			continue;
 
@@ -951,7 +941,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
 		}
 	}
 
-	hlist_for_each_entry_rcu(gate, n, known_gates, list)
+	hlist_for_each_entry_rcu(gate, known_gates, list)
 		if (gate->mpath->sdata == sdata) {
 			mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst);
 			mesh_path_tx_pending(gate->mpath);
@@ -1096,12 +1086,11 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
 	struct mesh_table *tbl;
 	struct mesh_path *mpath;
 	struct mpath_node *node;
-	struct hlist_node *p;
 	int i;
 
 	rcu_read_lock();
 	tbl = rcu_dereference(mesh_paths);
-	for_each_mesh_entry(tbl, p, node, i) {
+	for_each_mesh_entry(tbl, node, i) {
 		if (node->mpath->sdata != sdata)
 			continue;
 		mpath = node->mpath;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9f00db7e03f2..704e514e02ab 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -259,13 +259,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 {
 	unsigned int hash;
 	struct ip_vs_conn *cp;
-	struct hlist_node *n;
 
 	hash = ip_vs_conn_hashkey_param(p, false);
 
 	ct_read_lock(hash);
 
-	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
+	hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (cp->af == p->af &&
 		    p->cport == cp->cport && p->vport == cp->vport &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
@@ -344,13 +343,12 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 {
 	unsigned int hash;
 	struct ip_vs_conn *cp;
-	struct hlist_node *n;
 
 	hash = ip_vs_conn_hashkey_param(p, false);
 
 	ct_read_lock(hash);
 
-	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
+	hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (!ip_vs_conn_net_eq(cp, p->net))
 			continue;
 		if (p->pe_data && p->pe->ct_match) {
@@ -394,7 +392,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 {
 	unsigned int hash;
 	struct ip_vs_conn *cp, *ret=NULL;
-	struct hlist_node *n;
 
 	/*
 	 *	Check for "full" addressed entries
@@ -403,7 +400,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
 	ct_read_lock(hash);
 
-	hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
+	hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (cp->af == p->af &&
 		    p->vport == cp->cport && p->cport == cp->dport &&
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
@@ -953,11 +950,10 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 	int idx;
 	struct ip_vs_conn *cp;
 	struct ip_vs_iter_state *iter = seq->private;
-	struct hlist_node *n;
 
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		ct_read_lock_bh(idx);
-		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
+		hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (pos-- == 0) {
 				iter->l = &ip_vs_conn_tab[idx];
 				return cp;
@@ -981,7 +977,6 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ip_vs_conn *cp = v;
 	struct ip_vs_iter_state *iter = seq->private;
-	struct hlist_node *e;
 	struct hlist_head *l = iter->l;
 	int idx;
 
@@ -990,15 +985,15 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return ip_vs_conn_array(seq, 0);
 
 	/* more on same hash chain? */
-	if ((e = cp->c_list.next))
-		return hlist_entry(e, struct ip_vs_conn, c_list);
+	if (cp->c_list.next)
+		return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list);
 
 	idx = l - ip_vs_conn_tab;
 	ct_read_unlock_bh(idx);
 
 	while (++idx < ip_vs_conn_tab_size) {
 		ct_read_lock_bh(idx);
-		hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) {
+		hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			iter->l = &ip_vs_conn_tab[idx];
 			return cp;
 		}
@@ -1200,14 +1195,13 @@ void ip_vs_random_dropentry(struct net *net)
 	 */
 	for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
 		unsigned int hash = net_random() & ip_vs_conn_tab_mask;
-		struct hlist_node *n;
 
 		/*
 		 *  Lock is actually needed in this loop.
 		 */
 		ct_write_lock_bh(hash);
 
-		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) {
+		hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
 				continue;
@@ -1255,14 +1249,12 @@ static void ip_vs_conn_flush(struct net *net)
 
 flush_again:
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
-		struct hlist_node *n;
-
 		/*
 		 *  Lock is actually needed in this loop.
 		 */
 		ct_write_lock_bh(idx);
 
-		hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) {
+		hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (!ip_vs_conn_net_eq(cp, net))
 				continue;
 			IP_VS_DBG(4, "del connection\n");
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 3921e5bc1235..8c10e3db3d9b 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -90,14 +90,13 @@ __nf_ct_expect_find(struct net *net, u16 zone,
 		    const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (!net->ct.expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(tuple);
-	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
+	hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
 		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
 		    nf_ct_zone(i->master) == zone)
 			return i;
@@ -130,14 +129,13 @@ nf_ct_find_expectation(struct net *net, u16 zone,
 		       const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i, *exp = NULL;
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (!net->ct.expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(tuple);
-	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+	hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
 		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
 		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
 		    nf_ct_zone(i->master) == zone) {
@@ -172,13 +170,13 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
 {
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_expect *exp;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 
 	/* Optimization: most connection never expect any others. */
 	if (!help)
 		return;
 
-	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
 		if (del_timer(&exp->timeout)) {
 			nf_ct_unlink_expect(exp);
 			nf_ct_expect_put(exp);
@@ -348,9 +346,8 @@ static void evict_oldest_expect(struct nf_conn *master,
 {
 	struct nf_conn_help *master_help = nfct_help(master);
 	struct nf_conntrack_expect *exp, *last = NULL;
-	struct hlist_node *n;
 
-	hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
+	hlist_for_each_entry(exp, &master_help->expectations, lnode) {
 		if (exp->class == new->class)
 			last = exp;
 	}
@@ -369,7 +366,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	struct nf_conn_help *master_help = nfct_help(master);
 	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(expect);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	unsigned int h;
 	int ret = 1;
 
@@ -378,7 +375,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 		goto out;
 	}
 	h = nf_ct_expect_dst_hash(&expect->tuple);
-	hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
+	hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) {
 		if (expect_matches(i, expect)) {
 			if (del_timer(&i->timeout)) {
 				nf_ct_unlink_expect(i);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 013cdf69fe29..a9740bd6fe54 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -116,14 +116,13 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_helper *helper;
 	struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) };
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (!nf_ct_helper_count)
 		return NULL;
 
 	h = helper_hash(tuple);
-	hlist_for_each_entry_rcu(helper, n, &nf_ct_helper_hash[h], hnode) {
+	hlist_for_each_entry_rcu(helper, &nf_ct_helper_hash[h], hnode) {
 		if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask))
 			return helper;
 	}
@@ -134,11 +133,10 @@ struct nf_conntrack_helper *
 __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
 {
 	struct nf_conntrack_helper *h;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) {
+		hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
 			if (!strcmp(h->name, name) &&
 			    h->tuple.src.l3num == l3num &&
 			    h->tuple.dst.protonum == protonum)
@@ -357,7 +355,6 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 {
 	int ret = 0;
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *n;
 	unsigned int h = helper_hash(&me->tuple);
 
 	BUG_ON(me->expect_policy == NULL);
@@ -365,7 +362,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 	BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
 
 	mutex_lock(&nf_ct_helper_mutex);
-	hlist_for_each_entry(cur, n, &nf_ct_helper_hash[h], hnode) {
+	hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) {
 		if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 &&
 		    cur->tuple.src.l3num == me->tuple.src.l3num &&
 		    cur->tuple.dst.protonum == me->tuple.dst.protonum) {
@@ -386,13 +383,13 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
-	const struct hlist_node *n, *next;
+	const struct hlist_node *next;
 	const struct hlist_nulls_node *nn;
 	unsigned int i;
 
 	/* Get rid of expectations */
 	for (i = 0; i < nf_ct_expect_hsize; i++) {
-		hlist_for_each_entry_safe(exp, n, next,
+		hlist_for_each_entry_safe(exp, next,
 					  &net->ct.expect_hash[i], hnode) {
 			struct nf_conn_help *help = nfct_help(exp->master);
 			if ((rcu_dereference_protected(
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5d60e04f9679..9904b15f600e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2370,14 +2370,13 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net *net = sock_net(skb->sk);
 	struct nf_conntrack_expect *exp, *last;
 	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
-	struct hlist_node *n;
 	u_int8_t l3proto = nfmsg->nfgen_family;
 
 	rcu_read_lock();
 	last = (struct nf_conntrack_expect *)cb->args[1];
 	for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
 restart:
-		hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]],
+		hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]],
 				     hnode) {
 			if (l3proto && exp->tuple.src.l3num != l3proto)
 				continue;
@@ -2510,7 +2509,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	unsigned int i;
 	u16 zone;
@@ -2557,7 +2556,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		/* delete all expectations for this helper */
 		spin_lock_bh(&nf_conntrack_lock);
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, n, next,
+			hlist_for_each_entry_safe(exp, next,
 						  &net->ct.expect_hash[i],
 						  hnode) {
 				m_help = nfct_help(exp->master);
@@ -2575,7 +2574,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		/* This basically means we have to flush everything*/
 		spin_lock_bh(&nf_conntrack_lock);
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, n, next,
+			hlist_for_each_entry_safe(exp, next,
 						  &net->ct.expect_hash[i],
 						  hnode) {
 				if (del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 069229d919b6..0e7d423324c3 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -855,11 +855,11 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
 {
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_expect *exp;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	int found = 0;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
 		if (exp->class != SIP_EXPECT_SIGNALLING ||
 		    !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
 		    exp->tuple.dst.protonum != proto ||
@@ -881,10 +881,10 @@ static void flush_expectations(struct nf_conn *ct, bool media)
 {
 	struct nf_conn_help *help = nfct_help(ct);
 	struct nf_conntrack_expect *exp;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+	hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
 		if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
 			continue;
 		if (!del_timer(&exp->timeout))
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5f2f9109f461..8d5769c6d16e 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -191,9 +191,8 @@ find_appropriate_src(struct net *net, u16 zone,
 	unsigned int h = hash_by_src(net, zone, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
-	const struct hlist_node *n;
 
-	hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) {
+	hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
 		ct = nat->ct;
 		if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
 			/* Copy source part from reply tuple. */
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 945950a8b1f1..a191b6db657e 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -282,7 +282,6 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
 	const char *helper_name;
 	struct nf_conntrack_helper *cur, *helper = NULL;
 	struct nf_conntrack_tuple tuple;
-	struct hlist_node *n;
 	int ret = 0, i;
 
 	if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
@@ -296,7 +295,7 @@ nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
 
 	rcu_read_lock();
 	for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
-		hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) {
+		hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
 
 			/* skip non-userspace conntrack helpers. */
 			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
@@ -452,13 +451,12 @@ static int
 nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nf_conntrack_helper *cur, *last;
-	struct hlist_node *n;
 
 	rcu_read_lock();
 	last = (struct nf_conntrack_helper *)cb->args[1];
 	for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) {
 restart:
-		hlist_for_each_entry_rcu(cur, n,
+		hlist_for_each_entry_rcu(cur,
 				&nf_ct_helper_hash[cb->args[0]], hnode) {
 
 			/* skip non-userspace conntrack helpers. */
@@ -495,7 +493,6 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
 {
 	int ret = -ENOENT, i;
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *n;
 	struct sk_buff *skb2;
 	char *helper_name = NULL;
 	struct nf_conntrack_tuple tuple;
@@ -520,7 +517,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
 	}
 
 	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) {
+		hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
 
 			/* skip non-userspace conntrack helpers. */
 			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
@@ -568,7 +565,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb,
 {
 	char *helper_name = NULL;
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	struct nf_conntrack_tuple tuple;
 	bool tuple_set = false, found = false;
 	int i, j = 0, ret;
@@ -585,7 +582,7 @@ nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb,
 	}
 
 	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i],
+		hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
 								hnode) {
 			/* skip non-userspace conntrack helpers. */
 			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
@@ -654,13 +651,13 @@ err_out:
 static void __exit nfnl_cthelper_exit(void)
 {
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *n, *tmp;
+	struct hlist_node *tmp;
 	int i;
 
 	nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
 
 	for (i=0; i<nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i],
+		hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
 									hnode) {
 			/* skip non-userspace conntrack helpers. */
 			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 92fd8eca0d31..f248db572972 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -87,11 +87,10 @@ static struct nfulnl_instance *
 __instance_lookup(u_int16_t group_num)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct nfulnl_instance *inst;
 
 	head = &instance_table[instance_hashfn(group_num)];
-	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
+	hlist_for_each_entry_rcu(inst, head, hlist) {
 		if (inst->group_num == group_num)
 			return inst;
 	}
@@ -717,11 +716,11 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 		/* destroy all instances for this portid */
 		spin_lock_bh(&instances_lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
-			struct hlist_node *tmp, *t2;
+			struct hlist_node *t2;
 			struct nfulnl_instance *inst;
 			struct hlist_head *head = &instance_table[i];
 
-			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+			hlist_for_each_entry_safe(inst, t2, head, hlist) {
 				if ((net_eq(n->net, &init_net)) &&
 				    (n->portid == inst->peer_portid))
 					__instance_destroy(inst);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 3158d87b56a8..858fd52c1040 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -80,11 +80,10 @@ static struct nfqnl_instance *
 instance_lookup(u_int16_t queue_num)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct nfqnl_instance *inst;
 
 	head = &instance_table[instance_hashfn(queue_num)];
-	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
+	hlist_for_each_entry_rcu(inst, head, hlist) {
 		if (inst->queue_num == queue_num)
 			return inst;
 	}
@@ -583,11 +582,10 @@ nfqnl_dev_drop(int ifindex)
 	rcu_read_lock();
 
 	for (i = 0; i < INSTANCE_BUCKETS; i++) {
-		struct hlist_node *tmp;
 		struct nfqnl_instance *inst;
 		struct hlist_head *head = &instance_table[i];
 
-		hlist_for_each_entry_rcu(inst, tmp, head, hlist)
+		hlist_for_each_entry_rcu(inst, head, hlist)
 			nfqnl_flush(inst, dev_cmp, ifindex);
 	}
 
@@ -627,11 +625,11 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
 		/* destroy all instances for this portid */
 		spin_lock(&instances_lock);
 		for (i = 0; i < INSTANCE_BUCKETS; i++) {
-			struct hlist_node *tmp, *t2;
+			struct hlist_node *t2;
 			struct nfqnl_instance *inst;
 			struct hlist_head *head = &instance_table[i];
 
-			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+			hlist_for_each_entry_safe(inst, t2, head, hlist) {
 				if ((n->net == &init_net) &&
 				    (n->portid == inst->peer_portid))
 					__instance_destroy(inst);
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index f264032b8c56..370adf622cef 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -43,12 +43,11 @@ static void xt_rateest_hash_insert(struct xt_rateest *est)
 struct xt_rateest *xt_rateest_lookup(const char *name)
 {
 	struct xt_rateest *est;
-	struct hlist_node *n;
 	unsigned int h;
 
 	h = xt_rateest_hash(name);
 	mutex_lock(&xt_rateest_mutex);
-	hlist_for_each_entry(est, n, &rateest_hash[h], list) {
+	hlist_for_each_entry(est, &rateest_hash[h], list) {
 		if (strcmp(est->name, name) == 0) {
 			est->refcnt++;
 			mutex_unlock(&xt_rateest_mutex);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 70b5591a2586..c40b2695633b 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -101,7 +101,7 @@ static int count_them(struct net *net,
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct xt_connlimit_conn *conn;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	struct nf_conn *found_ct;
 	struct hlist_head *hash;
 	bool addit = true;
@@ -115,7 +115,7 @@ static int count_them(struct net *net,
 	rcu_read_lock();
 
 	/* check the saved connections */
-	hlist_for_each_entry_safe(conn, pos, n, hash, node) {
+	hlist_for_each_entry_safe(conn, n, hash, node) {
 		found    = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
 						 &conn->tuple);
 		found_ct = NULL;
@@ -258,14 +258,14 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_connlimit_info *info = par->matchinfo;
 	struct xt_connlimit_conn *conn;
-	struct hlist_node *pos, *n;
+	struct hlist_node *n;
 	struct hlist_head *hash = info->data->iphash;
 	unsigned int i;
 
 	nf_ct_l3proto_module_put(par->family);
 
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
-		hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) {
+		hlist_for_each_entry_safe(conn, n, &hash[i], node) {
 			hlist_del(&conn->node);
 			kfree(conn);
 		}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 98218c896d2e..f330e8beaf69 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -141,11 +141,10 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
 	     const struct dsthash_dst *dst)
 {
 	struct dsthash_ent *ent;
-	struct hlist_node *pos;
 	u_int32_t hash = hash_dst(ht, dst);
 
 	if (!hlist_empty(&ht->hash[hash])) {
-		hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node)
+		hlist_for_each_entry_rcu(ent, &ht->hash[hash], node)
 			if (dst_cmp(ent, dst)) {
 				spin_lock(&ent->lock);
 				return ent;
@@ -297,8 +296,8 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
 	spin_lock_bh(&ht->lock);
 	for (i = 0; i < ht->cfg.size; i++) {
 		struct dsthash_ent *dh;
-		struct hlist_node *pos, *n;
-		hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
+		struct hlist_node *n;
+		hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) {
 			if ((*select)(ht, dh))
 				dsthash_free(ht, dh);
 		}
@@ -343,9 +342,8 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) {
+	hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
 		if (!strcmp(name, hinfo->pde->name) &&
 		    hinfo->family == family) {
 			hinfo->use++;
@@ -821,10 +819,9 @@ static int dl_seq_show(struct seq_file *s, void *v)
 	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 	struct dsthash_ent *ent;
-	struct hlist_node *pos;
 
 	if (!hlist_empty(&htable->hash[*bucket])) {
-		hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
 			if (dl_seq_real_show(ent, htable->family, s))
 				return -1;
 	}
@@ -877,7 +874,6 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
 static void __net_exit hashlimit_proc_net_exit(struct net *net)
 {
 	struct xt_hashlimit_htable *hinfo;
-	struct hlist_node *pos;
 	struct proc_dir_entry *pde;
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 
@@ -890,7 +886,7 @@ static void __net_exit hashlimit_proc_net_exit(struct net *net)
 	if (pde == NULL)
 		pde = hashlimit_net->ip6t_hashlimit;
 
-	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node)
+	hlist_for_each_entry(hinfo, &hashlimit_net->htables, node)
 		remove_proc_entry(hinfo->pde->name, pde);
 
 	hashlimit_net->ipt_hashlimit = NULL;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8097b4f3ead4..1e3fd5bfcd86 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -248,11 +248,10 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 	struct nl_portid_hash *hash = &nl_table[protocol].hash;
 	struct hlist_head *head;
 	struct sock *sk;
-	struct hlist_node *node;
 
 	read_lock(&nl_table_lock);
 	head = nl_portid_hashfn(hash, portid);
-	sk_for_each(sk, node, head) {
+	sk_for_each(sk, head) {
 		if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
 			sock_hold(sk);
 			goto found;
@@ -312,9 +311,9 @@ static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
 
 	for (i = 0; i <= omask; i++) {
 		struct sock *sk;
-		struct hlist_node *node, *tmp;
+		struct hlist_node *tmp;
 
-		sk_for_each_safe(sk, node, tmp, &otable[i])
+		sk_for_each_safe(sk, tmp, &otable[i])
 			__sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
 	}
 
@@ -344,7 +343,6 @@ static void
 netlink_update_listeners(struct sock *sk)
 {
 	struct netlink_table *tbl = &nl_table[sk->sk_protocol];
-	struct hlist_node *node;
 	unsigned long mask;
 	unsigned int i;
 	struct listeners *listeners;
@@ -355,7 +353,7 @@ netlink_update_listeners(struct sock *sk)
 
 	for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
 		mask = 0;
-		sk_for_each_bound(sk, node, &tbl->mc_list) {
+		sk_for_each_bound(sk, &tbl->mc_list) {
 			if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
 				mask |= nlk_sk(sk)->groups[i];
 		}
@@ -371,18 +369,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 	struct hlist_head *head;
 	int err = -EADDRINUSE;
 	struct sock *osk;
-	struct hlist_node *node;
 	int len;
 
 	netlink_table_grab();
 	head = nl_portid_hashfn(hash, portid);
 	len = 0;
-	sk_for_each(osk, node, head) {
+	sk_for_each(osk, head) {
 		if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
 			break;
 		len++;
 	}
-	if (node)
+	if (osk)
 		goto err;
 
 	err = -EBUSY;
@@ -575,7 +572,6 @@ static int netlink_autobind(struct socket *sock)
 	struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
 	struct hlist_head *head;
 	struct sock *osk;
-	struct hlist_node *node;
 	s32 portid = task_tgid_vnr(current);
 	int err;
 	static s32 rover = -4097;
@@ -584,7 +580,7 @@ retry:
 	cond_resched();
 	netlink_table_grab();
 	head = nl_portid_hashfn(hash, portid);
-	sk_for_each(osk, node, head) {
+	sk_for_each(osk, head) {
 		if (!net_eq(sock_net(osk), net))
 			continue;
 		if (nlk_sk(osk)->portid == portid) {
@@ -1101,7 +1097,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 {
 	struct net *net = sock_net(ssk);
 	struct netlink_broadcast_data info;
-	struct hlist_node *node;
 	struct sock *sk;
 
 	skb = netlink_trim(skb, allocation);
@@ -1124,7 +1119,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 
 	netlink_lock_table();
 
-	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
+	sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
 		do_one_broadcast(sk, &info);
 
 	consume_skb(skb);
@@ -1200,7 +1195,6 @@ out:
 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
 {
 	struct netlink_set_err_data info;
-	struct hlist_node *node;
 	struct sock *sk;
 	int ret = 0;
 
@@ -1212,7 +1206,7 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
 
 	read_lock(&nl_table_lock);
 
-	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
+	sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
 		ret += do_one_set_err(sk, &info);
 
 	read_unlock(&nl_table_lock);
@@ -1676,10 +1670,9 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
 
-	sk_for_each_bound(sk, node, &tbl->mc_list)
+	sk_for_each_bound(sk, &tbl->mc_list)
 		netlink_update_socket_mc(nlk_sk(sk), group, 0);
 }
 
@@ -1974,14 +1967,13 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 	struct nl_seq_iter *iter = seq->private;
 	int i, j;
 	struct sock *s;
-	struct hlist_node *node;
 	loff_t off = 0;
 
 	for (i = 0; i < MAX_LINKS; i++) {
 		struct nl_portid_hash *hash = &nl_table[i].hash;
 
 		for (j = 0; j <= hash->mask; j++) {
-			sk_for_each(s, node, &hash->table[j]) {
+			sk_for_each(s, &hash->table[j]) {
 				if (sock_net(s) != seq_file_net(seq))
 					continue;
 				if (off == pos) {
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 297b07a029de..d1fa1d9ffd2e 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -104,10 +104,9 @@ static void nr_remove_socket(struct sock *sk)
 static void nr_kill_by_device(struct net_device *dev)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, node, &nr_list)
+	sk_for_each(s, &nr_list)
 		if (nr_sk(s)->device == dev)
 			nr_disconnect(s, ENETUNREACH);
 	spin_unlock_bh(&nr_list_lock);
@@ -149,10 +148,9 @@ static void nr_insert_socket(struct sock *sk)
 static struct sock *nr_find_listener(ax25_address *addr)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, node, &nr_list)
+	sk_for_each(s, &nr_list)
 		if (!ax25cmp(&nr_sk(s)->source_addr, addr) &&
 		    s->sk_state == TCP_LISTEN) {
 			bh_lock_sock(s);
@@ -170,10 +168,9 @@ found:
 static struct sock *nr_find_socket(unsigned char index, unsigned char id)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, node, &nr_list) {
+	sk_for_each(s, &nr_list) {
 		struct nr_sock *nr = nr_sk(s);
 
 		if (nr->my_index == index && nr->my_id == id) {
@@ -194,10 +191,9 @@ static struct sock *nr_find_peer(unsigned char index, unsigned char id,
 	ax25_address *dest)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_list_lock);
-	sk_for_each(s, node, &nr_list) {
+	sk_for_each(s, &nr_list) {
 		struct nr_sock *nr = nr_sk(s);
 
 		if (nr->your_index == index && nr->your_id == id &&
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 70ffff76a967..b976d5eff2de 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -49,10 +49,9 @@ static struct nr_node *nr_node_get(ax25_address *callsign)
 {
 	struct nr_node *found = NULL;
 	struct nr_node *nr_node;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each(nr_node, node, &nr_node_list)
+	nr_node_for_each(nr_node, &nr_node_list)
 		if (ax25cmp(callsign, &nr_node->callsign) == 0) {
 			nr_node_hold(nr_node);
 			found = nr_node;
@@ -67,10 +66,9 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
 {
 	struct nr_neigh *found = NULL;
 	struct nr_neigh *nr_neigh;
-	struct hlist_node *node;
 
 	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each(nr_neigh, node, &nr_neigh_list)
+	nr_neigh_for_each(nr_neigh, &nr_neigh_list)
 		if (ax25cmp(callsign, &nr_neigh->callsign) == 0 &&
 		    nr_neigh->dev == dev) {
 			nr_neigh_hold(nr_neigh);
@@ -114,10 +112,9 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 	 */
 	if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) {
 		struct nr_node *nr_nodet;
-		struct hlist_node *node;
 
 		spin_lock_bh(&nr_node_list_lock);
-		nr_node_for_each(nr_nodet, node, &nr_node_list) {
+		nr_node_for_each(nr_nodet, &nr_node_list) {
 			nr_node_lock(nr_nodet);
 			for (i = 0; i < nr_nodet->count; i++)
 				if (nr_nodet->routes[i].neighbour == nr_neigh)
@@ -485,11 +482,11 @@ static int nr_dec_obs(void)
 {
 	struct nr_neigh *nr_neigh;
 	struct nr_node  *s;
-	struct hlist_node *node, *nodet;
+	struct hlist_node *nodet;
 	int i;
 
 	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each_safe(s, node, nodet, &nr_node_list) {
+	nr_node_for_each_safe(s, nodet, &nr_node_list) {
 		nr_node_lock(s);
 		for (i = 0; i < s->count; i++) {
 			switch (s->routes[i].obs_count) {
@@ -540,15 +537,15 @@ static int nr_dec_obs(void)
 void nr_rt_device_down(struct net_device *dev)
 {
 	struct nr_neigh *s;
-	struct hlist_node *node, *nodet, *node2, *node2t;
+	struct hlist_node *nodet, *node2t;
 	struct nr_node  *t;
 	int i;
 
 	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) {
+	nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) {
 		if (s->dev == dev) {
 			spin_lock_bh(&nr_node_list_lock);
-			nr_node_for_each_safe(t, node2, node2t, &nr_node_list) {
+			nr_node_for_each_safe(t, node2t, &nr_node_list) {
 				nr_node_lock(t);
 				for (i = 0; i < t->count; i++) {
 					if (t->routes[i].neighbour == s) {
@@ -737,11 +734,10 @@ int nr_rt_ioctl(unsigned int cmd, void __user *arg)
 void nr_link_failed(ax25_cb *ax25, int reason)
 {
 	struct nr_neigh *s, *nr_neigh = NULL;
-	struct hlist_node *node;
 	struct nr_node  *nr_node = NULL;
 
 	spin_lock_bh(&nr_neigh_list_lock);
-	nr_neigh_for_each(s, node, &nr_neigh_list) {
+	nr_neigh_for_each(s, &nr_neigh_list) {
 		if (s->ax25 == ax25) {
 			nr_neigh_hold(s);
 			nr_neigh = s;
@@ -761,7 +757,7 @@ void nr_link_failed(ax25_cb *ax25, int reason)
 		return;
 	}
 	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each(nr_node, node, &nr_node_list) {
+	nr_node_for_each(nr_node, &nr_node_list) {
 		nr_node_lock(nr_node);
 		if (nr_node->which < nr_node->count &&
 		    nr_node->routes[nr_node->which].neighbour == nr_neigh)
@@ -1013,16 +1009,16 @@ void __exit nr_rt_free(void)
 {
 	struct nr_neigh *s = NULL;
 	struct nr_node  *t = NULL;
-	struct hlist_node *node, *nodet;
+	struct hlist_node *nodet;
 
 	spin_lock_bh(&nr_neigh_list_lock);
 	spin_lock_bh(&nr_node_list_lock);
-	nr_node_for_each_safe(t, node, nodet, &nr_node_list) {
+	nr_node_for_each_safe(t, nodet, &nr_node_list) {
 		nr_node_lock(t);
 		nr_remove_node_locked(t);
 		nr_node_unlock(t);
 	}
-	nr_neigh_for_each_safe(s, node, nodet, &nr_neigh_list) {
+	nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) {
 		while(s->count) {
 			s->count--;
 			nr_neigh_put(s);
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 746f5a2f9804..7f8266dd14cb 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -71,14 +71,14 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
 static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
 {
 	struct sock *sk;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *tmp;
 	struct nfc_llcp_sock *llcp_sock;
 
 	skb_queue_purge(&local->tx_queue);
 
 	write_lock(&local->sockets.lock);
 
-	sk_for_each_safe(sk, node, tmp, &local->sockets.head) {
+	sk_for_each_safe(sk, tmp, &local->sockets.head) {
 		llcp_sock = nfc_llcp_sock(sk);
 
 		bh_lock_sock(sk);
@@ -171,7 +171,6 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
 					       u8 ssap, u8 dsap)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct nfc_llcp_sock *llcp_sock, *tmp_sock;
 
 	pr_debug("ssap dsap %d %d\n", ssap, dsap);
@@ -183,7 +182,7 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
 
 	llcp_sock = NULL;
 
-	sk_for_each(sk, node, &local->sockets.head) {
+	sk_for_each(sk, &local->sockets.head) {
 		tmp_sock = nfc_llcp_sock(sk);
 
 		if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) {
@@ -272,7 +271,6 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
 					    u8 *sn, size_t sn_len)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct nfc_llcp_sock *llcp_sock, *tmp_sock;
 
 	pr_debug("sn %zd %p\n", sn_len, sn);
@@ -284,7 +282,7 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
 
 	llcp_sock = NULL;
 
-	sk_for_each(sk, node, &local->sockets.head) {
+	sk_for_each(sk, &local->sockets.head) {
 		tmp_sock = nfc_llcp_sock(sk);
 
 		pr_debug("llcp sock %p\n", tmp_sock);
@@ -601,14 +599,13 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu)
 void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
 			       struct sk_buff *skb, u8 direction)
 {
-	struct hlist_node *node;
 	struct sk_buff *skb_copy = NULL, *nskb;
 	struct sock *sk;
 	u8 *data;
 
 	read_lock(&local->raw_sockets.lock);
 
-	sk_for_each(sk, node, &local->raw_sockets.head) {
+	sk_for_each(sk, &local->raw_sockets.head) {
 		if (sk->sk_state != LLCP_BOUND)
 			continue;
 
@@ -697,11 +694,10 @@ static struct nfc_llcp_sock *nfc_llcp_connecting_sock_get(struct nfc_llcp_local
 {
 	struct sock *sk;
 	struct nfc_llcp_sock *llcp_sock;
-	struct hlist_node *node;
 
 	read_lock(&local->connecting_sockets.lock);
 
-	sk_for_each(sk, node, &local->connecting_sockets.head) {
+	sk_for_each(sk, &local->connecting_sockets.head) {
 		llcp_sock = nfc_llcp_sock(sk);
 
 		if (llcp_sock->ssap == ssap) {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9dc537df46c4..e87a26506dba 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -158,11 +158,10 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 {
 	struct vport *vport;
-	struct hlist_node *n;
 	struct hlist_head *head;
 
 	head = vport_hash_bucket(dp, port_no);
-	hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) {
+	hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
 		if (vport->port_no == port_no)
 			return vport;
 	}
@@ -1386,9 +1385,9 @@ static void __dp_destroy(struct datapath *dp)
 
 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
 		struct vport *vport;
-		struct hlist_node *node, *n;
+		struct hlist_node *n;
 
-		hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node)
+		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
 			if (vport->port_no != OVSP_LOCAL)
 				ovs_dp_detach_port(vport);
 	}
@@ -1825,10 +1824,9 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
 		struct vport *vport;
-		struct hlist_node *n;
 
 		j = 0;
-		hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) {
+		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
 			if (j >= skip &&
 			    ovs_vport_cmd_fill_info(vport, skb,
 						    NETLINK_CB(cb->skb).portid,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c3294cebc4f2..20605ecf100b 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -299,10 +299,10 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
 	for (i = 0; i < table->n_buckets; i++) {
 		struct sw_flow *flow;
 		struct hlist_head *head = flex_array_get(table->buckets, i);
-		struct hlist_node *node, *n;
+		struct hlist_node *n;
 		int ver = table->node_ver;
 
-		hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) {
+		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
 			hlist_del_rcu(&flow->hash_node[ver]);
 			ovs_flow_free(flow);
 		}
@@ -332,7 +332,6 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
 {
 	struct sw_flow *flow;
 	struct hlist_head *head;
-	struct hlist_node *n;
 	int ver;
 	int i;
 
@@ -340,7 +339,7 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
 	while (*bucket < table->n_buckets) {
 		i = 0;
 		head = flex_array_get(table->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) {
+		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
 			if (i < *last) {
 				i++;
 				continue;
@@ -367,11 +366,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
 	for (i = 0; i < old->n_buckets; i++) {
 		struct sw_flow *flow;
 		struct hlist_head *head;
-		struct hlist_node *n;
 
 		head = flex_array_get(old->buckets, i);
 
-		hlist_for_each_entry(flow, n, head, hash_node[old_ver])
+		hlist_for_each_entry(flow, head, hash_node[old_ver])
 			ovs_flow_tbl_insert(new, flow);
 	}
 	old->keep_flows = true;
@@ -766,14 +764,13 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
 				struct sw_flow_key *key, int key_len)
 {
 	struct sw_flow *flow;
-	struct hlist_node *n;
 	struct hlist_head *head;
 	u32 hash;
 
 	hash = ovs_flow_hash(key, key_len);
 
 	head = find_bucket(table, hash);
-	hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) {
+	hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
 
 		if (flow->hash == hash &&
 		    !memcmp(&flow->key, key, key_len)) {
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 70af0bedbac4..ba717cc038b3 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -86,9 +86,8 @@ struct vport *ovs_vport_locate(struct net *net, const char *name)
 {
 	struct hlist_head *bucket = hash_bucket(net, name);
 	struct vport *vport;
-	struct hlist_node *node;
 
-	hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
+	hlist_for_each_entry_rcu(vport, bucket, hash_node)
 		if (!strcmp(name, vport->ops->get_name(vport)) &&
 		    net_eq(ovs_dp_get_net(vport->dp), net))
 			return vport;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index c7bfeff10767..1d6793dbfbae 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3263,12 +3263,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
 {
 	struct sock *sk;
-	struct hlist_node *node;
 	struct net_device *dev = data;
 	struct net *net = dev_net(dev);
 
 	rcu_read_lock();
-	sk_for_each_rcu(sk, node, &net->packet.sklist) {
+	sk_for_each_rcu(sk, &net->packet.sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
 		switch (msg) {
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 8db6e21c46bd..d3fcd1ebef7e 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -172,13 +172,12 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct packet_diag_req *req;
 	struct net *net;
 	struct sock *sk;
-	struct hlist_node *node;
 
 	net = sock_net(skb->sk);
 	req = nlmsg_data(cb->nlh);
 
 	mutex_lock(&net->packet.sklist_lock);
-	sk_for_each(sk, node, &net->packet.sklist) {
+	sk_for_each(sk, &net->packet.sklist) {
 		if (!net_eq(sock_net(sk), net))
 			continue;
 		if (num < s_num)
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 576f22c9c76e..e77411735de8 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -640,11 +640,10 @@ static struct sock *pep_find_pipe(const struct hlist_head *hlist,
 					const struct sockaddr_pn *dst,
 					u8 pipe_handle)
 {
-	struct hlist_node *node;
 	struct sock *sknode;
 	u16 dobj = pn_sockaddr_get_object(dst);
 
-	sk_for_each(sknode, node, hlist) {
+	sk_for_each(sknode, hlist) {
 		struct pep_sock *pnnode = pep_sk(sknode);
 
 		/* Ports match, but addresses might not: */
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index b7e982782255..1afd1381cdc7 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -76,7 +76,6 @@ static struct hlist_head *pn_hash_list(u16 obj)
  */
 struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
 {
-	struct hlist_node *node;
 	struct sock *sknode;
 	struct sock *rval = NULL;
 	u16 obj = pn_sockaddr_get_object(spn);
@@ -84,7 +83,7 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn)
 	struct hlist_head *hlist = pn_hash_list(obj);
 
 	rcu_read_lock();
-	sk_for_each_rcu(sknode, node, hlist) {
+	sk_for_each_rcu(sknode, hlist) {
 		struct pn_sock *pn = pn_sk(sknode);
 		BUG_ON(!pn->sobject); /* unbound socket */
 
@@ -120,10 +119,9 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
 
 	rcu_read_lock();
 	for (h = 0; h < PN_HASHSIZE; h++) {
-		struct hlist_node *node;
 		struct sock *sknode;
 
-		sk_for_each(sknode, node, hlist) {
+		sk_for_each(sknode, hlist) {
 			struct sk_buff *clone;
 
 			if (!net_eq(sock_net(sknode), net))
@@ -543,12 +541,11 @@ static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos)
 {
 	struct net *net = seq_file_net(seq);
 	struct hlist_head *hlist = pnsocks.hlist;
-	struct hlist_node *node;
 	struct sock *sknode;
 	unsigned int h;
 
 	for (h = 0; h < PN_HASHSIZE; h++) {
-		sk_for_each_rcu(sknode, node, hlist) {
+		sk_for_each_rcu(sknode, hlist) {
 			if (!net_eq(net, sock_net(sknode)))
 				continue;
 			if (!pos)
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 637bde56c9db..b5ad65a0067e 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -52,13 +52,12 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
 					struct rds_sock *insert)
 {
 	struct rds_sock *rs;
-	struct hlist_node *node;
 	struct hlist_head *head = hash_to_bucket(addr, port);
 	u64 cmp;
 	u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
+	hlist_for_each_entry_rcu(rs, head, rs_bound_node) {
 		cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
 		      be16_to_cpu(rs->rs_bound_port);
 
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 9e07c756d1f9..642ad42c416b 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -69,9 +69,8 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
 					      struct rds_transport *trans)
 {
 	struct rds_connection *conn, *ret = NULL;
-	struct hlist_node *pos;
 
-	hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
+	hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 		if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
 				conn->c_trans == trans) {
 			ret = conn;
@@ -376,7 +375,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 				  int want_send)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct list_head *list;
 	struct rds_connection *conn;
 	struct rds_message *rm;
@@ -390,7 +388,7 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 
 	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
 	     i++, head++) {
-		hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
+		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 			if (want_send)
 				list = &conn->c_send_queue;
 			else
@@ -439,7 +437,6 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 {
 	uint64_t buffer[(item_len + 7) / 8];
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct rds_connection *conn;
 	size_t i;
 
@@ -450,7 +447,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 
 	for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
 	     i++, head++) {
-		hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
+		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 
 			/* XXX no c_lock usage.. */
 			if (!visitor(conn, buffer))
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index b768fe9d5e7a..cf68e6e4054a 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -165,10 +165,9 @@ static void rose_remove_socket(struct sock *sk)
 void rose_kill_by_neigh(struct rose_neigh *neigh)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (rose->neighbour == neigh) {
@@ -186,10 +185,9 @@ void rose_kill_by_neigh(struct rose_neigh *neigh)
 static void rose_kill_by_device(struct net_device *dev)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (rose->device == dev) {
@@ -246,10 +244,9 @@ static void rose_insert_socket(struct sock *sk)
 static struct sock *rose_find_listener(rose_address *addr, ax25_address *call)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (!rosecmp(&rose->source_addr, addr) &&
@@ -258,7 +255,7 @@ static struct sock *rose_find_listener(rose_address *addr, ax25_address *call)
 			goto found;
 	}
 
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (!rosecmp(&rose->source_addr, addr) &&
@@ -278,10 +275,9 @@ found:
 struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock_bh(&rose_list_lock);
-	sk_for_each(s, node, &rose_list) {
+	sk_for_each(s, &rose_list) {
 		struct rose_sock *rose = rose_sk(s);
 
 		if (rose->lci == lci && rose->neighbour == neigh)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a181b484812a..c297e2a8e2a1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -545,7 +545,7 @@ static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 {
 	struct Qdisc_class_common *cl;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	struct hlist_head *nhash, *ohash;
 	unsigned int nsize, nmask, osize;
 	unsigned int i, h;
@@ -564,7 +564,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 
 	sch_tree_lock(sch);
 	for (i = 0; i < osize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
+		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
 			h = qdisc_class_hash(cl->classid, nmask);
 			hlist_add_head(&cl->hnode, &nhash[h]);
 		}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 0e19948470b8..13aa47aa2ffb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1041,14 +1041,13 @@ static void cbq_adjust_levels(struct cbq_class *this)
 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
 {
 	struct cbq_class *cl;
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (q->quanta[prio] == 0)
 		return;
 
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
 			/* BUGGGG... Beware! This expression suffer of
 			 * arithmetic overflows!
 			 */
@@ -1087,10 +1086,9 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 			continue;
 
 		for (h = 0; h < q->clhash.hashsize; h++) {
-			struct hlist_node *n;
 			struct cbq_class *c;
 
-			hlist_for_each_entry(c, n, &q->clhash.hash[h],
+			hlist_for_each_entry(c, &q->clhash.hash[h],
 					     common.hnode) {
 				if (c->split == split && c->level < level &&
 				    c->defmap & (1<<i)) {
@@ -1210,7 +1208,6 @@ cbq_reset(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
-	struct hlist_node *n;
 	int prio;
 	unsigned int h;
 
@@ -1228,7 +1225,7 @@ cbq_reset(struct Qdisc *sch)
 		q->active[prio] = NULL;
 
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
 			qdisc_reset(cl->q);
 
 			cl->next_alive = NULL;
@@ -1697,7 +1694,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 static void cbq_destroy(struct Qdisc *sch)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	struct cbq_class *cl;
 	unsigned int h;
 
@@ -1710,11 +1707,11 @@ static void cbq_destroy(struct Qdisc *sch)
 	 * be bound to classes which have been destroyed already. --TGR '04
 	 */
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
 					  common.hnode)
 			cbq_destroy_class(sch, cl);
 	}
@@ -2013,14 +2010,13 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl;
-	struct hlist_node *n;
 	unsigned int h;
 
 	if (arg->stop)
 		return;
 
 	for (h = 0; h < q->clhash.hashsize; h++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 71e50c80315f..759b308d1a8d 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -293,14 +293,13 @@ static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	if (arg->stop)
 		return;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
@@ -451,11 +450,10 @@ static void drr_reset_qdisc(struct Qdisc *sch)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (cl->qdisc->q.qlen)
 				list_del(&cl->alist);
 			qdisc_reset(cl->qdisc);
@@ -468,13 +466,13 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	unsigned int i;
 
 	tcf_destroy_chain(&q->filter_list);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
 					  common.hnode)
 			drr_destroy_class(sch, cl);
 	}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6c2ec4510540..9facea03faeb 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1389,7 +1389,6 @@ static void
 hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hlist_node *n;
 	struct hfsc_class *cl;
 	unsigned int i;
 
@@ -1397,7 +1396,7 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 		return;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i],
+		hlist_for_each_entry(cl, &q->clhash.hash[i],
 				     cl_common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
@@ -1523,11 +1522,10 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
 			hfsc_reset_class(cl);
 	}
 	q->eligible = RB_ROOT;
@@ -1540,16 +1538,16 @@ static void
 hfsc_destroy_qdisc(struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	struct hfsc_class *cl;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
 					  cl_common.hnode)
 			hfsc_destroy_class(sch, cl);
 	}
@@ -1564,12 +1562,11 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_hfsc_qopt qopt;
 	struct hfsc_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	sch->qstats.backlog = 0;
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
 			sch->qstats.backlog += cl->qdisc->qstats.backlog;
 	}
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 03c2692ca01e..571f1d211f4d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -949,11 +949,10 @@ static void htb_reset(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (cl->level)
 				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 			else {
@@ -1218,7 +1217,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	struct htb_class *cl;
 	unsigned int i;
 
@@ -1232,11 +1231,11 @@ static void htb_destroy(struct Qdisc *sch)
 	tcf_destroy_chain(&q->filter_list);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
 			tcf_destroy_chain(&cl->filter_list);
 	}
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
 					  common.hnode)
 			htb_destroy_class(sch, cl);
 	}
@@ -1516,14 +1515,13 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	if (arg->stop)
 		return;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6ed37652a4c3..e9a77f621c3d 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -276,9 +276,8 @@ static struct qfq_aggregate *qfq_find_agg(struct qfq_sched *q,
 					  u32 lmax, u32 weight)
 {
 	struct qfq_aggregate *agg;
-	struct hlist_node *n;
 
-	hlist_for_each_entry(agg, n, &q->nonfull_aggs, nonfull_next)
+	hlist_for_each_entry(agg, &q->nonfull_aggs, nonfull_next)
 		if (agg->lmax == lmax && agg->class_weight == weight)
 			return agg;
 
@@ -670,14 +669,13 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	if (arg->stop)
 		return;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (arg->count < arg->skip) {
 				arg->count++;
 				continue;
@@ -1376,11 +1374,10 @@ static unsigned int qfq_drop_from_slot(struct qfq_sched *q,
 				       struct hlist_head *slot)
 {
 	struct qfq_aggregate *agg;
-	struct hlist_node *n;
 	struct qfq_class *cl;
 	unsigned int len;
 
-	hlist_for_each_entry(agg, n, slot, next) {
+	hlist_for_each_entry(agg, slot, next) {
 		list_for_each_entry(cl, &agg->active, alist) {
 
 			if (!cl->qdisc->ops->drop)
@@ -1459,11 +1456,10 @@ static void qfq_reset_qdisc(struct Qdisc *sch)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl;
-	struct hlist_node *n;
 	unsigned int i;
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 			if (cl->qdisc->q.qlen > 0)
 				qfq_deactivate_class(q, cl);
 
@@ -1477,13 +1473,13 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl;
-	struct hlist_node *n, *next;
+	struct hlist_node *next;
 	unsigned int i;
 
 	tcf_destroy_chain(&q->filter_list);
 
 	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
 					  common.hnode) {
 			qfq_destroy_class(sch, cl);
 		}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 73aad3d16a45..2b3ef03c6098 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -332,7 +332,6 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
 	struct sctp_transport *t = NULL;
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
-	struct hlist_node *node;
 	int hash;
 	int rport;
 
@@ -350,7 +349,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
 				 rport);
 	head = &sctp_assoc_hashtable[hash];
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		tmp = sctp_assoc(epb);
 		if (tmp->ep != ep || rport != tmp->peer.port)
 			continue;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 965bbbbe48d4..4b2c83146aa7 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -784,13 +784,12 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 	struct sctp_endpoint *ep;
-	struct hlist_node *node;
 	int hash;
 
 	hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port));
 	head = &sctp_ep_hashtable[hash];
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		ep = sctp_ep(epb);
 		if (sctp_endpoint_is_match(ep, net, laddr))
 			goto hit;
@@ -876,7 +875,6 @@ static struct sctp_association *__sctp_lookup_association(
 	struct sctp_ep_common *epb;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
-	struct hlist_node *node;
 	int hash;
 
 	/* Optimize here for direct hit, only listening connections can
@@ -886,7 +884,7 @@ static struct sctp_association *__sctp_lookup_association(
 				 ntohs(peer->v4.sin_port));
 	head = &sctp_assoc_hashtable[hash];
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		asoc = sctp_assoc(epb);
 		transport = sctp_assoc_is_match(asoc, net, local, peer);
 		if (transport)
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 8c19e97262ca..ab3bba8cb0a8 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -213,7 +213,6 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 	struct sctp_ep_common *epb;
 	struct sctp_endpoint *ep;
 	struct sock *sk;
-	struct hlist_node *node;
 	int    hash = *(loff_t *)v;
 
 	if (hash >= sctp_ep_hashsize)
@@ -222,7 +221,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 	head = &sctp_ep_hashtable[hash];
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		ep = sctp_ep(epb);
 		sk = epb->sk;
 		if (!net_eq(sock_net(sk), seq_file_net(seq)))
@@ -321,7 +320,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	struct sctp_ep_common *epb;
 	struct sctp_association *assoc;
 	struct sock *sk;
-	struct hlist_node *node;
 	int    hash = *(loff_t *)v;
 
 	if (hash >= sctp_assoc_hashsize)
@@ -330,7 +328,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	head = &sctp_assoc_hashtable[hash];
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		assoc = sctp_assoc(epb);
 		sk = epb->sk;
 		if (!net_eq(sock_net(sk), seq_file_net(seq)))
@@ -436,7 +434,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 	struct sctp_association *assoc;
-	struct hlist_node *node;
 	struct sctp_transport *tsp;
 	int    hash = *(loff_t *)v;
 
@@ -447,7 +444,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
 	rcu_read_lock();
-	sctp_for_each_hentry(epb, node, &head->chain) {
+	sctp_for_each_hentry(epb, &head->chain) {
 		if (!net_eq(sock_net(epb->sk), seq_file_net(seq)))
 			continue;
 		assoc = sctp_assoc(epb);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index cedd9bf67b8c..c99458df3f3f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5882,8 +5882,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
 static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 {
 	struct sctp_bind_hashbucket *head; /* hash list */
-	struct sctp_bind_bucket *pp; /* hash list port iterator */
-	struct hlist_node *node;
+	struct sctp_bind_bucket *pp;
 	unsigned short snum;
 	int ret;
 
@@ -5910,7 +5909,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 			index = sctp_phashfn(sock_net(sk), rover);
 			head = &sctp_port_hashtable[index];
 			sctp_spin_lock(&head->lock);
-			sctp_for_each_hentry(pp, node, &head->chain)
+			sctp_for_each_hentry(pp, &head->chain)
 				if ((pp->port == rover) &&
 				    net_eq(sock_net(sk), pp->net))
 					goto next;
@@ -5938,7 +5937,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 		 */
 		head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)];
 		sctp_spin_lock(&head->lock);
-		sctp_for_each_hentry(pp, node, &head->chain) {
+		sctp_for_each_hentry(pp, &head->chain) {
 			if ((pp->port == snum) && net_eq(pp->net, sock_net(sk)))
 				goto pp_found;
 		}
@@ -5970,7 +5969,7 @@ pp_found:
 		 * that this port/socket (sk) combination are already
 		 * in an endpoint.
 		 */
-		sk_for_each_bound(sk2, node, &pp->owner) {
+		sk_for_each_bound(sk2, &pp->owner) {
 			struct sctp_endpoint *ep2;
 			ep2 = sctp_sk(sk2)->ep;
 
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 392adc41e2e5..f5294047df77 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -407,7 +407,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 {
 	LIST_HEAD(free);
 	struct rpc_cred_cache *cache = auth->au_credcache;
-	struct hlist_node *pos;
 	struct rpc_cred	*cred = NULL,
 			*entry, *new;
 	unsigned int nr;
@@ -415,7 +414,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	nr = hash_long(from_kuid(&init_user_ns, acred->uid), cache->hashbits);
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
+	hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
 		spin_lock(&cache->lock);
@@ -439,7 +438,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	}
 
 	spin_lock(&cache->lock);
-	hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) {
+	hlist_for_each_entry(entry, &cache->hashtable[nr], cr_hash) {
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
 		cred = get_rpccred(entry);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f3897d10f649..39a4112faf54 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -670,13 +670,13 @@ static void cache_revisit_request(struct cache_head *item)
 {
 	struct cache_deferred_req *dreq;
 	struct list_head pending;
-	struct hlist_node *lp, *tmp;
+	struct hlist_node *tmp;
 	int hash = DFR_HASH(item);
 
 	INIT_LIST_HEAD(&pending);
 	spin_lock(&cache_defer_lock);
 
-	hlist_for_each_entry_safe(dreq, lp, tmp, &cache_defer_hash[hash], hash)
+	hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash)
 		if (dreq->item == item) {
 			__unhash_deferred_req(dreq);
 			list_add(&dreq->recent, &pending);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 7963569fc04f..2af7b0cba43a 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -138,13 +138,12 @@ auth_domain_lookup(char *name, struct auth_domain *new)
 {
 	struct auth_domain *hp;
 	struct hlist_head *head;
-	struct hlist_node *np;
 
 	head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
 
 	spin_lock(&auth_domain_lock);
 
-	hlist_for_each_entry(hp, np, head, hash) {
+	hlist_for_each_entry(hp, head, hash) {
 		if (strcmp(hp->name, name)==0) {
 			kref_get(&hp->ref);
 			spin_unlock(&auth_domain_lock);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 46754779fd3d..24b167914311 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -473,11 +473,10 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq,
 static struct name_seq *nametbl_find_seq(u32 type)
 {
 	struct hlist_head *seq_head;
-	struct hlist_node *seq_node;
 	struct name_seq *ns;
 
 	seq_head = &table.types[hash(type)];
-	hlist_for_each_entry(ns, seq_node, seq_head, ns_list) {
+	hlist_for_each_entry(ns, seq_head, ns_list) {
 		if (ns->type == type)
 			return ns;
 	}
@@ -853,7 +852,6 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 			 u32 type, u32 lowbound, u32 upbound)
 {
 	struct hlist_head *seq_head;
-	struct hlist_node *seq_node;
 	struct name_seq *seq;
 	int all_types;
 	int ret = 0;
@@ -873,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		upbound = ~0;
 		for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
 			seq_head = &table.types[i];
-			hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
+			hlist_for_each_entry(seq, seq_head, ns_list) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, seq->type,
 						   lowbound, upbound, i);
@@ -889,7 +887,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		ret += nametbl_header(buf + ret, len - ret, depth);
 		i = hash(type);
 		seq_head = &table.types[i];
-		hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
+		hlist_for_each_entry(seq, seq_head, ns_list) {
 			if (seq->type == type) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, type,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 48f39dd3eae8..6e6c434872e8 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -69,12 +69,11 @@ static unsigned int tipc_hashfn(u32 addr)
 struct tipc_node *tipc_node_find(u32 addr)
 {
 	struct tipc_node *node;
-	struct hlist_node *pos;
 
 	if (unlikely(!in_own_cluster_exact(addr)))
 		return NULL;
 
-	hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) {
+	hlist_for_each_entry(node, &node_htable[tipc_hashfn(addr)], hash) {
 		if (node->addr == addr)
 			return node;
 	}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 87d284289012..51be64f163ec 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -263,9 +263,8 @@ static struct sock *__unix_find_socket_byname(struct net *net,
 					      int len, int type, unsigned int hash)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
-	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+	sk_for_each(s, &unix_socket_table[hash ^ type]) {
 		struct unix_sock *u = unix_sk(s);
 
 		if (!net_eq(sock_net(s), net))
@@ -298,10 +297,9 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
 static struct sock *unix_find_socket_byinode(struct inode *i)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	spin_lock(&unix_table_lock);
-	sk_for_each(s, node,
+	sk_for_each(s,
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->path.dentry;
 
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 5ac19dc1d5e4..d591091603bf 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -192,10 +192,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	     slot < ARRAY_SIZE(unix_socket_table);
 	     s_num = 0, slot++) {
 		struct sock *sk;
-		struct hlist_node *node;
 
 		num = 0;
-		sk_for_each(sk, node, &unix_socket_table[slot]) {
+		sk_for_each(sk, &unix_socket_table[slot]) {
 			if (!net_eq(sock_net(sk), net))
 				continue;
 			if (num < s_num)
@@ -226,9 +225,7 @@ static struct sock *unix_lookup_by_ino(int ino)
 
 	spin_lock(&unix_table_lock);
 	for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
-		struct hlist_node *node;
-
-		sk_for_each(sk, node, &unix_socket_table[i])
+		sk_for_each(sk, &unix_socket_table[i])
 			if (ino == sock_i_ino(sk)) {
 				sock_hold(sk);
 				spin_unlock(&unix_table_lock);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index a306bc66000e..37ca9694aabe 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -208,11 +208,10 @@ static void x25_remove_socket(struct sock *sk)
 static void x25_kill_by_device(struct net_device *dev)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	write_lock_bh(&x25_list_lock);
 
-	sk_for_each(s, node, &x25_list)
+	sk_for_each(s, &x25_list)
 		if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev)
 			x25_disconnect(s, ENETUNREACH, 0, 0);
 
@@ -280,12 +279,11 @@ static struct sock *x25_find_listener(struct x25_address *addr,
 {
 	struct sock *s;
 	struct sock *next_best;
-	struct hlist_node *node;
 
 	read_lock_bh(&x25_list_lock);
 	next_best = NULL;
 
-	sk_for_each(s, node, &x25_list)
+	sk_for_each(s, &x25_list)
 		if ((!strcmp(addr->x25_addr,
 			x25_sk(s)->source_addr.x25_addr) ||
 				!strcmp(addr->x25_addr,
@@ -323,9 +321,8 @@ found:
 static struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
-	sk_for_each(s, node, &x25_list)
+	sk_for_each(s, &x25_list)
 		if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) {
 			sock_hold(s);
 			goto found;
@@ -1782,11 +1779,10 @@ static struct notifier_block x25_dev_notifier = {
 void x25_kill_by_neigh(struct x25_neigh *nb)
 {
 	struct sock *s;
-	struct hlist_node *node;
 
 	write_lock_bh(&x25_list_lock);
 
-	sk_for_each(s, node, &x25_list)
+	sk_for_each(s, &x25_list)
 		if (x25_sk(s)->neighbour == nb)
 			x25_disconnect(s, ENETUNREACH, 0, 0);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5b47180986f8..167c67d46c6a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -379,27 +379,27 @@ static void xfrm_dst_hash_transfer(struct hlist_head *list,
 				   struct hlist_head *ndsttable,
 				   unsigned int nhashmask)
 {
-	struct hlist_node *entry, *tmp, *entry0 = NULL;
+	struct hlist_node *tmp, *entry0 = NULL;
 	struct xfrm_policy *pol;
 	unsigned int h0 = 0;
 
 redo:
-	hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
+	hlist_for_each_entry_safe(pol, tmp, list, bydst) {
 		unsigned int h;
 
 		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
 				pol->family, nhashmask);
 		if (!entry0) {
-			hlist_del(entry);
+			hlist_del(&pol->bydst);
 			hlist_add_head(&pol->bydst, ndsttable+h);
 			h0 = h;
 		} else {
 			if (h != h0)
 				continue;
-			hlist_del(entry);
+			hlist_del(&pol->bydst);
 			hlist_add_after(entry0, &pol->bydst);
 		}
-		entry0 = entry;
+		entry0 = &pol->bydst;
 	}
 	if (!hlist_empty(list)) {
 		entry0 = NULL;
@@ -411,10 +411,10 @@ static void xfrm_idx_hash_transfer(struct hlist_head *list,
 				   struct hlist_head *nidxtable,
 				   unsigned int nhashmask)
 {
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	struct xfrm_policy *pol;
 
-	hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
+	hlist_for_each_entry_safe(pol, tmp, list, byidx) {
 		unsigned int h;
 
 		h = __idx_hash(pol->index, nhashmask);
@@ -544,7 +544,6 @@ static u32 xfrm_gen_index(struct net *net, int dir)
 	static u32 idx_generator;
 
 	for (;;) {
-		struct hlist_node *entry;
 		struct hlist_head *list;
 		struct xfrm_policy *p;
 		u32 idx;
@@ -556,7 +555,7 @@ static u32 xfrm_gen_index(struct net *net, int dir)
 			idx = 8;
 		list = net->xfrm.policy_byidx + idx_hash(net, idx);
 		found = 0;
-		hlist_for_each_entry(p, entry, list, byidx) {
+		hlist_for_each_entry(p, list, byidx) {
 			if (p->index == idx) {
 				found = 1;
 				break;
@@ -628,13 +627,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct xfrm_policy *pol;
 	struct xfrm_policy *delpol;
 	struct hlist_head *chain;
-	struct hlist_node *entry, *newpos;
+	struct hlist_node *newpos;
 
 	write_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
 	delpol = NULL;
 	newpos = NULL;
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == policy->type &&
 		    !selector_cmp(&pol->selector, &policy->selector) &&
 		    xfrm_policy_mark_match(policy, pol) &&
@@ -691,13 +690,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 {
 	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
-	struct hlist_node *entry;
 
 	*err = 0;
 	write_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_bysel(net, sel, sel->family, dir);
 	ret = NULL;
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == type &&
 		    (mark & pol->mark.m) == pol->mark.v &&
 		    !selector_cmp(sel, &pol->selector) &&
@@ -729,7 +727,6 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 {
 	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
-	struct hlist_node *entry;
 
 	*err = -ENOENT;
 	if (xfrm_policy_id2dir(id) != dir)
@@ -739,7 +736,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 	write_lock_bh(&xfrm_policy_lock);
 	chain = net->xfrm.policy_byidx + idx_hash(net, id);
 	ret = NULL;
-	hlist_for_each_entry(pol, entry, chain, byidx) {
+	hlist_for_each_entry(pol, chain, byidx) {
 		if (pol->type == type && pol->index == id &&
 		    (mark & pol->mark.m) == pol->mark.v) {
 			xfrm_pol_hold(pol);
@@ -772,10 +769,9 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 		struct xfrm_policy *pol;
-		struct hlist_node *entry;
 		int i;
 
-		hlist_for_each_entry(pol, entry,
+		hlist_for_each_entry(pol,
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
@@ -789,7 +785,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 			}
 		}
 		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
-			hlist_for_each_entry(pol, entry,
+			hlist_for_each_entry(pol,
 					     net->xfrm.policy_bydst[dir].table + i,
 					     bydst) {
 				if (pol->type != type)
@@ -828,11 +824,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 		struct xfrm_policy *pol;
-		struct hlist_node *entry;
 		int i;
 
 	again1:
-		hlist_for_each_entry(pol, entry,
+		hlist_for_each_entry(pol,
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
@@ -852,7 +847,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 
 		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 	again2:
-			hlist_for_each_entry(pol, entry,
+			hlist_for_each_entry(pol,
 					     net->xfrm.policy_bydst[dir].table + i,
 					     bydst) {
 				if (pol->type != type)
@@ -980,7 +975,6 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	int err;
 	struct xfrm_policy *pol, *ret;
 	const xfrm_address_t *daddr, *saddr;
-	struct hlist_node *entry;
 	struct hlist_head *chain;
 	u32 priority = ~0U;
 
@@ -992,7 +986,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	read_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_direct(net, daddr, saddr, family, dir);
 	ret = NULL;
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir);
 		if (err) {
 			if (err == -ESRCH)
@@ -1008,7 +1002,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 		}
 	}
 	chain = &net->xfrm.policy_inexact[dir];
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir);
 		if (err) {
 			if (err == -ESRCH)
@@ -3041,13 +3035,12 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector
 						     u8 dir, u8 type)
 {
 	struct xfrm_policy *pol, *ret = NULL;
-	struct hlist_node *entry;
 	struct hlist_head *chain;
 	u32 priority = ~0U;
 
 	read_lock_bh(&xfrm_policy_lock);
 	chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir);
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
 		    pol->type == type) {
 			ret = pol;
@@ -3056,7 +3049,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector
 		}
 	}
 	chain = &init_net.xfrm.policy_inexact[dir];
-	hlist_for_each_entry(pol, entry, chain, bydst) {
+	hlist_for_each_entry(pol, chain, bydst) {
 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
 		    pol->type == type &&
 		    pol->priority < priority) {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ae01bdbcb294..2c341bdaf47c 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -72,10 +72,10 @@ static void xfrm_hash_transfer(struct hlist_head *list,
 			       struct hlist_head *nspitable,
 			       unsigned int nhashmask)
 {
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	struct xfrm_state *x;
 
-	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
+	hlist_for_each_entry_safe(x, tmp, list, bydst) {
 		unsigned int h;
 
 		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
@@ -368,14 +368,14 @@ static void xfrm_state_gc_task(struct work_struct *work)
 {
 	struct net *net = container_of(work, struct net, xfrm.state_gc_work);
 	struct xfrm_state *x;
-	struct hlist_node *entry, *tmp;
+	struct hlist_node *tmp;
 	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_state_gc_lock);
 	hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
-	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
+	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
 		xfrm_state_gc_destroy(x);
 
 	wake_up(&net->xfrm.km_waitq);
@@ -577,10 +577,9 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
 	int i, err = 0;
 
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
-		struct hlist_node *entry;
 		struct xfrm_state *x;
 
-		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 			if (xfrm_id_proto_match(x->id.proto, proto) &&
 			   (err = security_xfrm_state_delete(x)) != 0) {
 				xfrm_audit_state_delete(x, 0,
@@ -613,10 +612,9 @@ int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 
 	err = -ESRCH;
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
-		struct hlist_node *entry;
 		struct xfrm_state *x;
 restart:
-		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 			if (!xfrm_state_kern(x) &&
 			    xfrm_id_proto_match(x->id.proto, proto)) {
 				xfrm_state_hold(x);
@@ -685,9 +683,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 {
 	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 	struct xfrm_state *x;
-	struct hlist_node *entry;
 
-	hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) {
+	hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
 		if (x->props.family != family ||
 		    x->id.spi       != spi ||
 		    x->id.proto     != proto ||
@@ -710,9 +707,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 {
 	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 	struct xfrm_state *x;
-	struct hlist_node *entry;
 
-	hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) {
+	hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
 		if (x->props.family != family ||
 		    x->id.proto     != proto ||
 		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -798,7 +794,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	static xfrm_address_t saddr_wildcard = { };
 	struct net *net = xp_net(pol);
 	unsigned int h, h_wildcard;
-	struct hlist_node *entry;
 	struct xfrm_state *x, *x0, *to_put;
 	int acquire_in_progress = 0;
 	int error = 0;
@@ -810,7 +805,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 
 	spin_lock_bh(&xfrm_state_lock);
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -826,7 +821,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		goto found;
 
 	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -906,11 +901,10 @@ xfrm_stateonly_find(struct net *net, u32 mark,
 {
 	unsigned int h;
 	struct xfrm_state *rx = NULL, *x = NULL;
-	struct hlist_node *entry;
 
 	spin_lock(&xfrm_state_lock);
 	h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -972,12 +966,11 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 	unsigned short family = xnew->props.family;
 	u32 reqid = xnew->props.reqid;
 	struct xfrm_state *x;
-	struct hlist_node *entry;
 	unsigned int h;
 	u32 mark = xnew->mark.v & xnew->mark.m;
 
 	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -1004,11 +997,10 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
 					  const xfrm_address_t *saddr, int create)
 {
 	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
-	struct hlist_node *entry;
 	struct xfrm_state *x;
 	u32 mark = m->v & m->m;
 
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.reqid  != reqid ||
 		    x->props.mode   != mode ||
 		    x->props.family != family ||
@@ -1215,12 +1207,11 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
 {
 	unsigned int h;
 	struct xfrm_state *x;
-	struct hlist_node *entry;
 
 	if (m->reqid) {
 		h = xfrm_dst_hash(&init_net, &m->old_daddr, &m->old_saddr,
 				  m->reqid, m->old_family);
-		hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
+		hlist_for_each_entry(x, init_net.xfrm.state_bydst+h, bydst) {
 			if (x->props.mode != m->mode ||
 			    x->id.proto != m->proto)
 				continue;
@@ -1237,7 +1228,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
 	} else {
 		h = xfrm_src_hash(&init_net, &m->old_daddr, &m->old_saddr,
 				  m->old_family);
-		hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) {
+		hlist_for_each_entry(x, init_net.xfrm.state_bysrc+h, bysrc) {
 			if (x->props.mode != m->mode ||
 			    x->id.proto != m->proto)
 				continue;
@@ -1466,10 +1457,9 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 s
 	int i;
 
 	for (i = 0; i <= net->xfrm.state_hmask; i++) {
-		struct hlist_node *entry;
 		struct xfrm_state *x;
 
-		hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) {
+		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 			if (x->km.seq == seq &&
 			    (mark & x->mark.m) == x->mark.v &&
 			    x->km.state == XFRM_STATE_ACQ) {
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index 55a6271bce7a..ff63fe00c195 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -45,12 +45,11 @@ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value)
 {
 	struct ima_queue_entry *qe, *ret = NULL;
 	unsigned int key;
-	struct hlist_node *pos;
 	int rc;
 
 	key = ima_hash_key(digest_value);
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(qe, pos, &ima_htable.queue[key], hnext) {
+	hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) {
 		rc = memcmp(qe->entry->digest, digest_value, IMA_DIGEST_SIZE);
 		if (rc == 0) {
 			ret = qe;
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 4d3fab47e643..dad36a6ab45f 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -188,11 +188,9 @@ int avc_get_hash_stats(char *page)
 	for (i = 0; i < AVC_CACHE_SLOTS; i++) {
 		head = &avc_cache.slots[i];
 		if (!hlist_empty(head)) {
-			struct hlist_node *next;
-
 			slots_used++;
 			chain_len = 0;
-			hlist_for_each_entry_rcu(node, next, head, list)
+			hlist_for_each_entry_rcu(node, head, list)
 				chain_len++;
 			if (chain_len > max_chain_len)
 				max_chain_len = chain_len;
@@ -241,7 +239,6 @@ static inline int avc_reclaim_node(void)
 	int hvalue, try, ecx;
 	unsigned long flags;
 	struct hlist_head *head;
-	struct hlist_node *next;
 	spinlock_t *lock;
 
 	for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) {
@@ -253,7 +250,7 @@ static inline int avc_reclaim_node(void)
 			continue;
 
 		rcu_read_lock();
-		hlist_for_each_entry(node, next, head, list) {
+		hlist_for_each_entry(node, head, list) {
 			avc_node_delete(node);
 			avc_cache_stats_incr(reclaims);
 			ecx++;
@@ -301,11 +298,10 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass)
 	struct avc_node *node, *ret = NULL;
 	int hvalue;
 	struct hlist_head *head;
-	struct hlist_node *next;
 
 	hvalue = avc_hash(ssid, tsid, tclass);
 	head = &avc_cache.slots[hvalue];
-	hlist_for_each_entry_rcu(node, next, head, list) {
+	hlist_for_each_entry_rcu(node, head, list) {
 		if (ssid == node->ae.ssid &&
 		    tclass == node->ae.tclass &&
 		    tsid == node->ae.tsid) {
@@ -394,7 +390,6 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_dec
 	node = avc_alloc_node();
 	if (node) {
 		struct hlist_head *head;
-		struct hlist_node *next;
 		spinlock_t *lock;
 
 		hvalue = avc_hash(ssid, tsid, tclass);
@@ -404,7 +399,7 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_dec
 		lock = &avc_cache.slots_lock[hvalue];
 
 		spin_lock_irqsave(lock, flag);
-		hlist_for_each_entry(pos, next, head, list) {
+		hlist_for_each_entry(pos, head, list) {
 			if (pos->ae.ssid == ssid &&
 			    pos->ae.tsid == tsid &&
 			    pos->ae.tclass == tclass) {
@@ -541,7 +536,6 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
 	unsigned long flag;
 	struct avc_node *pos, *node, *orig = NULL;
 	struct hlist_head *head;
-	struct hlist_node *next;
 	spinlock_t *lock;
 
 	node = avc_alloc_node();
@@ -558,7 +552,7 @@ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass,
 
 	spin_lock_irqsave(lock, flag);
 
-	hlist_for_each_entry(pos, next, head, list) {
+	hlist_for_each_entry(pos, head, list) {
 		if (ssid == pos->ae.ssid &&
 		    tsid == pos->ae.tsid &&
 		    tclass == pos->ae.tclass &&
@@ -614,7 +608,6 @@ out:
 static void avc_flush(void)
 {
 	struct hlist_head *head;
-	struct hlist_node *next;
 	struct avc_node *node;
 	spinlock_t *lock;
 	unsigned long flag;
@@ -630,7 +623,7 @@ static void avc_flush(void)
 		 * prevent RCU grace periods from ending.
 		 */
 		rcu_read_lock();
-		hlist_for_each_entry(node, next, head, list)
+		hlist_for_each_entry(node, head, list)
 			avc_node_delete(node);
 		rcu_read_unlock();
 		spin_unlock_irqrestore(lock, flag);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index bc4ad7977438..c8be0fbc5145 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -314,7 +314,6 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 {
 	struct hlist_head *head;
-	struct hlist_node *pos;
 	struct perf_sample_id *sid;
 	int hash;
 
@@ -324,7 +323,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 	hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
 	head = &evlist->heads[hash];
 
-	hlist_for_each_entry(sid, pos, head, node)
+	hlist_for_each_entry(sid, head, node)
 		if (sid->id == id)
 			return sid->evsel;
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b6eea5cc7b34..adb17f266b28 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -268,14 +268,13 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 			 struct kvm_irq_routing_table *irq_rt)
 {
 	struct kvm_kernel_irq_routing_entry *e;
-	struct hlist_node *n;
 
 	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
 		rcu_assign_pointer(irqfd->irq_entry, NULL);
 		return;
 	}
 
-	hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
+	hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
 		/* Only fast-path MSI. */
 		if (e->type == KVM_IRQ_ROUTING_MSI)
 			rcu_assign_pointer(irqfd->irq_entry, e);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index ff6d40e2c06d..e9073cf4d040 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -173,7 +173,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
 	int ret = -1, i = 0;
 	struct kvm_irq_routing_table *irq_rt;
-	struct hlist_node *n;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -184,7 +183,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	rcu_read_lock();
 	irq_rt = rcu_dereference(kvm->irq_routing);
 	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, n, &irq_rt->map[irq], link)
+		hlist_for_each_entry(e, &irq_rt->map[irq], link)
 			irq_set[i++] = *e;
 	rcu_read_unlock();
 
@@ -212,7 +211,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
-	struct hlist_node *n;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -227,7 +225,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	rcu_read_lock();
 	irq_rt = rcu_dereference(kvm->irq_routing);
 	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, n, &irq_rt->map[irq], link) {
+		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
 			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
 				ret = kvm_set_msi_inatomic(e, kvm);
 			else
@@ -241,13 +239,12 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	struct hlist_node *n;
 	int gsi;
 
 	rcu_read_lock();
 	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
 	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi) {
 				rcu_read_unlock();
@@ -263,7 +260,6 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	struct hlist_node *n;
 	int gsi;
 
 	trace_kvm_ack_irq(irqchip, pin);
@@ -271,7 +267,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	rcu_read_lock();
 	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
 	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi)
 				kian->irq_acked(kian);
@@ -369,13 +365,12 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask)
 {
 	struct kvm_irq_mask_notifier *kimn;
-	struct hlist_node *n;
 	int gsi;
 
 	rcu_read_lock();
 	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
 	if (gsi != -1)
-		hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
+		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
 	rcu_read_unlock();
@@ -396,13 +391,12 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 	int delta;
 	unsigned max_pin;
 	struct kvm_kernel_irq_routing_entry *ei;
-	struct hlist_node *n;
 
 	/*
 	 * Do not allow GSI to be mapped to the same irqchip more than once.
 	 * Allow only one to one mapping between GSI and MSI.
 	 */
-	hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
+	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
 		if (ei->type == KVM_IRQ_ROUTING_MSI ||
 		    ue->type == KVM_IRQ_ROUTING_MSI ||
 		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
-- 
cgit v1.2.3


From f9c6a655a94042f94c0adb30d07d93cfd8915e95 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 Feb 2013 21:36:03 +0000
Subject: dmaengine: dw_dmac: move to generic DMA binding

The original device tree binding for this driver, from Viresh Kumar
unfortunately conflicted with the generic DMA binding, and did not allow
to completely seperate slave device configuration from the controller.

This is an attempt to replace it with an implementation of the generic
binding, but it is currently completely untested, because I do not have
any hardware with this particular controller.

The patch applies on top of the slave-dma tree, which contains both the base
support for the generic DMA binding, as well as the earlier attempt from
Viresh. Both of these are currently not merged upstream however.

This version incorporates feedback from Viresh Kumar, Andy Shevchenko
and Russell King.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Vinod Koul <vinod.koul@linux.intel.com>
Cc: devicetree-discuss@lists.ozlabs.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 Documentation/devicetree/bindings/dma/snps-dma.txt |  70 +++++-----
 drivers/dma/dw_dmac.c                              | 145 ++++++++++-----------
 drivers/dma/dw_dmac_regs.h                         |   7 +-
 include/linux/dw_dmac.h                            |   5 -
 4 files changed, 111 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
index 5bb3dfb6f1d8..d58675ea1abf 100644
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -3,59 +3,61 @@
 Required properties:
 - compatible: "snps,dma-spear1340"
 - reg: Address range of the DMAC registers
-- interrupt-parent: Should be the phandle for the interrupt controller
-  that services interrupts for this device
 - interrupt: Should contain the DMAC interrupt number
-- nr_channels: Number of channels supported by hardware
-- is_private: The device channels should be marked as private and not for by the
-  general purpose DMA channel allocator. False if not passed.
+- dma-channels: Number of channels supported by hardware
+- dma-requests: Number of DMA request lines supported, up to 16
+- dma-masters: Number of AHB masters supported by the controller
+- #dma-cells: must be <3>
 - chan_allocation_order: order of allocation of channel, 0 (default): ascending,
   1: descending
 - chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1:
   increase from chan n->0
 - block_size: Maximum block size supported by the controller
-- nr_masters: Number of AHB masters supported by the controller
 - data_width: Maximum data width supported by hardware per AHB master
   (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
-- slave_info:
-	- bus_id: name of this device channel, not just a device name since
-	  devices may have more than one channel e.g. "foo_tx". For using the
-	  dw_generic_filter(), slave drivers must pass exactly this string as
-	  param to filter function.
-	- cfg_hi: Platform-specific initializer for the CFG_HI register
-	- cfg_lo: Platform-specific initializer for the CFG_LO register
-	- src_master: src master for transfers on allocated channel.
-	- dst_master: dest master for transfers on allocated channel.
+
+
+Optional properties:
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device
+- is_private: The device channels should be marked as private and not for by the
+  general purpose DMA channel allocator. False if not passed.
 
 Example:
 
-	dma@fc000000 {
+	dmahost: dma@fc000000 {
 		compatible = "snps,dma-spear1340";
 		reg = <0xfc000000 0x1000>;
 		interrupt-parent = <&vic1>;
 		interrupts = <12>;
 
-		nr_channels = <8>;
+		dma-channels = <8>;
+		dma-requests = <16>;
+		dma-masters = <2>;
+		#dma-cells = <3>;
 		chan_allocation_order = <1>;
 		chan_priority = <1>;
 		block_size = <0xfff>;
-		nr_masters = <2>;
 		data_width = <3 3 0 0>;
+	};
 
-		slave_info {
-			uart0-tx {
-				bus_id = "uart0-tx";
-				cfg_hi = <0x4000>;	/* 0x8 << 11 */
-				cfg_lo = <0>;
-				src_master = <0>;
-				dst_master = <1>;
-			};
-			spi0-tx {
-				bus_id = "spi0-tx";
-				cfg_hi = <0x2000>;	/* 0x4 << 11 */
-				cfg_lo = <0>;
-				src_master = <0>;
-				dst_master = <0>;
-			};
-		};
+DMA clients connected to the Designware DMA controller must use the format
+described in the dma.txt file, using a four-cell specifier for each channel.
+The four cells in order are:
+
+1. A phandle pointing to the DMA controller
+2. The DMA request line number
+3. Source master for transfers on allocated channel
+4. Destination master for transfers on allocated channel
+
+Example:
+	
+	serial@e0000000 {
+		compatible = "arm,pl011", "arm,primecell";
+		reg = <0xe0000000 0x1000>;
+		interrupts = <0 35 0x4>;
+		status = "disabled";
+		dmas = <&dmahost 12 0 1>,
+			<&dmahost 13 0 1 0>;
+		dma-names = "rx", "rx";
 	};
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 4c83f18803f1..c3159abf9ac1 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -19,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/of.h>
+#include <linux/of_dma.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -170,7 +171,13 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 	if (dwc->initialized == true)
 		return;
 
-	if (dws) {
+	if (dws && dws->cfg_hi == ~0 && dws->cfg_lo == ~0) {
+		/* autoconfigure based on request line from DT */
+		if (dwc->direction == DMA_MEM_TO_DEV)
+			cfghi = DWC_CFGH_DST_PER(dwc->request_line);
+		else if (dwc->direction == DMA_DEV_TO_MEM)
+			cfghi = DWC_CFGH_SRC_PER(dwc->request_line);
+	} else if (dws) {
 		/*
 		 * We need controller-specific data to set up slave
 		 * transfers.
@@ -1225,49 +1232,64 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
-bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
+struct dw_dma_filter_args {
+	struct dw_dma *dw;
+	unsigned int req;
+	unsigned int src;
+	unsigned int dst;
+};
+
+static bool dw_dma_generic_filter(struct dma_chan *chan, void *param)
 {
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
 	struct dw_dma *dw = to_dw_dma(chan->device);
-	static struct dw_dma *last_dw;
-	static char *last_bus_id;
-	int i = -1;
+	struct dw_dma_filter_args *fargs = param;
+	struct dw_dma_slave *dws = &dwc->slave;
 
-	/*
-	 * dmaengine framework calls this routine for all channels of all dma
-	 * controller, until true is returned. If 'param' bus_id is not
-	 * registered with a dma controller (dw), then there is no need of
-	 * running below function for all channels of dw.
-	 *
-	 * This block of code does this by saving the parameters of last
-	 * failure. If dw and param are same, i.e. trying on same dw with
-	 * different channel, return false.
-	 */
-	if ((last_dw == dw) && (last_bus_id == param))
-		return false;
-	/*
-	 * Return true:
-	 * - If dw_dma's platform data is not filled with slave info, then all
-	 *   dma controllers are fine for transfer.
-	 * - Or if param is NULL
-	 */
-	if (!dw->sd || !param)
-		return true;
+	/* ensure the device matches our channel */
+        if (chan->device != &fargs->dw->dma)
+                return false;
 
-	while (++i < dw->sd_count) {
-		if (!strcmp(dw->sd[i].bus_id, param)) {
-			chan->private = &dw->sd[i];
-			last_dw = NULL;
-			last_bus_id = NULL;
+	dws->dma_dev	= dw->dma.dev;
+	dws->cfg_hi	= ~0;
+	dws->cfg_lo	= ~0;
+	dws->src_master	= fargs->src;
+	dws->dst_master	= fargs->dst;
 
-			return true;
-		}
-	}
+	dwc->request_line = fargs->req;
 
-	last_dw = dw;
-	last_bus_id = param;
-	return false;
+	chan->private = dws;
+
+	return true;
+}
+
+static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dw_dma *dw = ofdma->of_dma_data;
+	struct dw_dma_filter_args fargs = {
+		.dw = dw,
+	};
+	dma_cap_mask_t cap;
+
+	if (dma_spec->args_count != 3)
+		return NULL;
+
+	fargs.req = be32_to_cpup(dma_spec->args+0);
+	fargs.src = be32_to_cpup(dma_spec->args+1);
+	fargs.dst = be32_to_cpup(dma_spec->args+2);
+
+	if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS ||
+		    fargs.src >= dw->nr_masters ||
+		    fargs.dst >= dw->nr_masters))
+		return NULL;
+
+	dma_cap_zero(cap);
+	dma_cap_set(DMA_SLAVE, cap);
+
+	/* TODO: there should be a simpler way to do this */
+	return dma_request_channel(cap, dw_dma_generic_filter, &fargs);
 }
-EXPORT_SYMBOL(dw_dma_generic_filter);
 
 /* --------------------- Cyclic DMA API extensions -------------------- */
 
@@ -1553,9 +1575,8 @@ static void dw_dma_off(struct dw_dma *dw)
 static struct dw_dma_platform_data *
 dw_dma_parse_dt(struct platform_device *pdev)
 {
-	struct device_node *sn, *cn, *np = pdev->dev.of_node;
+	struct device_node *np = pdev->dev.of_node;
 	struct dw_dma_platform_data *pdata;
-	struct dw_dma_slave *sd;
 	u32 tmp, arr[4];
 
 	if (!np) {
@@ -1567,7 +1588,7 @@ dw_dma_parse_dt(struct platform_device *pdev)
 	if (!pdata)
 		return NULL;
 
-	if (of_property_read_u32(np, "nr_channels", &pdata->nr_channels))
+	if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels))
 		return NULL;
 
 	if (of_property_read_bool(np, "is_private"))
@@ -1582,7 +1603,7 @@ dw_dma_parse_dt(struct platform_device *pdev)
 	if (!of_property_read_u32(np, "block_size", &tmp))
 		pdata->block_size = tmp;
 
-	if (!of_property_read_u32(np, "nr_masters", &tmp)) {
+	if (!of_property_read_u32(np, "dma-masters", &tmp)) {
 		if (tmp > 4)
 			return NULL;
 
@@ -1594,36 +1615,6 @@ dw_dma_parse_dt(struct platform_device *pdev)
 		for (tmp = 0; tmp < pdata->nr_masters; tmp++)
 			pdata->data_width[tmp] = arr[tmp];
 
-	/* parse slave data */
-	sn = of_find_node_by_name(np, "slave_info");
-	if (!sn)
-		return pdata;
-
-	/* calculate number of slaves */
-	tmp = of_get_child_count(sn);
-	if (!tmp)
-		return NULL;
-
-	sd = devm_kzalloc(&pdev->dev, sizeof(*sd) * tmp, GFP_KERNEL);
-	if (!sd)
-		return NULL;
-
-	pdata->sd = sd;
-	pdata->sd_count = tmp;
-
-	for_each_child_of_node(sn, cn) {
-		sd->dma_dev = &pdev->dev;
-		of_property_read_string(cn, "bus_id", &sd->bus_id);
-		of_property_read_u32(cn, "cfg_hi", &sd->cfg_hi);
-		of_property_read_u32(cn, "cfg_lo", &sd->cfg_lo);
-		if (!of_property_read_u32(cn, "src_master", &tmp))
-			sd->src_master = tmp;
-
-		if (!of_property_read_u32(cn, "dst_master", &tmp))
-			sd->dst_master = tmp;
-		sd++;
-	}
-
 	return pdata;
 }
 #else
@@ -1704,8 +1695,6 @@ static int dw_probe(struct platform_device *pdev)
 	clk_prepare_enable(dw->clk);
 
 	dw->regs = regs;
-	dw->sd = pdata->sd;
-	dw->sd_count = pdata->sd_count;
 
 	/* get hardware configuration parameters */
 	if (autocfg) {
@@ -1836,6 +1825,14 @@ static int dw_probe(struct platform_device *pdev)
 
 	dma_async_device_register(&dw->dma);
 
+	if (pdev->dev.of_node) {
+		err = of_dma_controller_register(pdev->dev.of_node,
+						 dw_dma_xlate, dw);
+		if (err && err != -ENODEV)
+			dev_err(&pdev->dev,
+				"could not register of_dma_controller\n");
+	}
+
 	return 0;
 }
 
@@ -1844,6 +1841,8 @@ static int __devexit dw_remove(struct platform_device *pdev)
 	struct dw_dma		*dw = platform_get_drvdata(pdev);
 	struct dw_dma_chan	*dwc, *_dwc;
 
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
 	dw_dma_off(dw);
 	dma_async_device_unregister(&dw->dma);
 
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 88dd8eb31957..cf0ce5c77d60 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -13,6 +13,7 @@
 #include <linux/dw_dmac.h>
 
 #define DW_DMA_MAX_NR_CHANNELS	8
+#define DW_DMA_MAX_NR_REQUESTS	16
 
 /* flow controller */
 enum dw_dma_fc {
@@ -211,6 +212,8 @@ struct dw_dma_chan {
 	/* hardware configuration */
 	unsigned int		block_size;
 	bool			nollp;
+	unsigned int		request_line;
+	struct dw_dma_slave	slave;
 
 	/* configuration passed via DMA_SLAVE_CONFIG */
 	struct dma_slave_config dma_sconfig;
@@ -239,10 +242,6 @@ struct dw_dma {
 	struct tasklet_struct	tasklet;
 	struct clk		*clk;
 
-	/* slave information */
-	struct dw_dma_slave	*sd;
-	unsigned int		sd_count;
-
 	u8			all_chan_mask;
 
 	/* hardware configuration */
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 41766de66e33..481ab2345d6b 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -27,7 +27,6 @@
  */
 struct dw_dma_slave {
 	struct device		*dma_dev;
-	const char		*bus_id;
 	u32			cfg_hi;
 	u32			cfg_lo;
 	u8			src_master;
@@ -60,9 +59,6 @@ struct dw_dma_platform_data {
 	unsigned short	block_size;
 	unsigned char	nr_masters;
 	unsigned char	data_width[4];
-
-	struct dw_dma_slave *sd;
-	unsigned int sd_count;
 };
 
 /* bursts size */
@@ -114,6 +110,5 @@ void dw_dma_cyclic_stop(struct dma_chan *chan);
 dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
 
 dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
-bool dw_dma_generic_filter(struct dma_chan *chan, void *param);
 
 #endif /* DW_DMAC_H */
-- 
cgit v1.2.3


From edddbb1eda61753c886a3c5e159293a7b3a9e30a Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@netapp.com>
Date: Thu, 28 Feb 2013 20:30:09 -0500
Subject: SUNRPC: add call to get configured timeout

Returns the configured timeout for the xprt of the rpc client.

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/clnt.c           | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 34206b84d8da..21d52d0dc15c 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -160,6 +160,7 @@ void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 int		rpc_protocol(struct rpc_clnt *);
 struct net *	rpc_net_ns(struct rpc_clnt *);
 size_t		rpc_max_payload(struct rpc_clnt *);
+unsigned long	rpc_get_timeout(struct rpc_clnt *clnt);
 void		rpc_force_rebind(struct rpc_clnt *);
 size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a9f7906c1a6a..09dc0c2b56a3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1195,6 +1195,21 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
 }
 EXPORT_SYMBOL_GPL(rpc_max_payload);
 
+/**
+ * rpc_get_timeout - Get timeout for transport in tenths of seconds
+ * @clnt: RPC client to query
+ */
+unsigned long rpc_get_timeout(struct rpc_clnt *clnt)
+{
+	unsigned long ret;
+
+	rcu_read_lock();
+	ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval;
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_get_timeout);
+
 /**
  * rpc_force_rebind - force transport to check that remote port is unchanged
  * @clnt: client to rebind
-- 
cgit v1.2.3


From 3000512137602b84d1ad5fd89d62984993a19bb6 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@netapp.com>
Date: Thu, 28 Feb 2013 20:30:10 -0500
Subject: NFSv4.1: LAYOUTGET EDELAY loops timeout to the MDS

The client will currently try LAYOUTGETs forever if a server is returning
NFS4ERR_LAYOUTTRYLATER or NFS4ERR_RECALLCONFLICT - even if the client no
longer needs the layout (ie process killed, unmounted).

This patch uses the DS timeout value (module parameter 'dataserver_timeo'
via rpc layer) to set an upper limit of how long the client tries LATOUTGETs
in this situation.  Once the timeout is reached, IO is redirected to the MDS.

This also changes how the client checks if a layout is on the clp list
to avoid a double list_add.

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 9 ++++++++-
 fs/nfs/pnfs.c           | 7 +++----
 include/linux/nfs_xdr.h | 1 +
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 37eb38566fbc..b2671cb0f901 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -93,6 +93,8 @@ static int nfs4_map_errors(int err)
 		return err;
 	switch (err) {
 	case -NFS4ERR_RESOURCE:
+	case -NFS4ERR_LAYOUTTRYLATER:
+	case -NFS4ERR_RECALLCONFLICT:
 		return -EREMOTEIO;
 	case -NFS4ERR_WRONGSEC:
 		return -EPERM;
@@ -6046,6 +6048,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct pnfs_layout_hdr *lo;
 	struct nfs4_state *state = NULL;
+	unsigned long timeo, giveup;
 
 	dprintk("--> %s\n", __func__);
 
@@ -6057,7 +6060,10 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 		goto out;
 	case -NFS4ERR_LAYOUTTRYLATER:
 	case -NFS4ERR_RECALLCONFLICT:
-		task->tk_status = -NFS4ERR_DELAY;
+		timeo = rpc_get_timeout(task->tk_client);
+		giveup = lgp->args.timestamp + timeo;
+		if (time_after(giveup, jiffies))
+			task->tk_status = -NFS4ERR_DELAY;
 		break;
 	case -NFS4ERR_EXPIRED:
 	case -NFS4ERR_BAD_STATEID:
@@ -6178,6 +6184,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
 		return ERR_PTR(-ENOMEM);
 	}
 	lgp->args.layout.pglen = max_pages * PAGE_SIZE;
+	lgp->args.timestamp = jiffies;
 
 	lgp->res.layoutp = &lgp->args.layout;
 	lgp->res.seq_res.sr_slot = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 97767c8683f9..48ac5aad6258 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1181,7 +1181,7 @@ pnfs_update_layout(struct inode *ino,
 	struct nfs_client *clp = server->nfs_client;
 	struct pnfs_layout_hdr *lo;
 	struct pnfs_layout_segment *lseg = NULL;
-	bool first = false;
+	bool first;
 
 	if (!pnfs_enabled_sb(NFS_SERVER(ino)))
 		goto out;
@@ -1215,10 +1215,9 @@ pnfs_update_layout(struct inode *ino,
 		goto out_unlock;
 	atomic_inc(&lo->plh_outstanding);
 
-	if (list_empty(&lo->plh_segs))
-		first = true;
-
+	first = list_empty(&lo->plh_layouts) ? true : false;
 	spin_unlock(&ino->i_lock);
+
 	if (first) {
 		/* The lo must be on the clp list if there is any
 		 * chance of a CB_LAYOUTRECALL(FILE) coming in.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 29adb12c7ecf..2250cab6fc4b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -233,6 +233,7 @@ struct nfs4_layoutget_args {
 	struct inode *inode;
 	struct nfs_open_context *ctx;
 	nfs4_stateid stateid;
+	unsigned long timestamp;
 	struct nfs4_layoutdriver_data layout;
 };
 
-- 
cgit v1.2.3


From 1a71fb84fda651105e1e194c2d3a3a13a38210a9 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Tue, 27 Sep 2011 22:21:37 +0200
Subject: rtc: stmp3xxx: add wdt-accessor function

This RTC also includes a watchdog timer. Provide an accessor function
for setting the watchdog timeout value which will be picked up by a
watchdog driver. Also register the platform_device for the watchdog here
to get the boot-time dependencies right.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/rtc/rtc-stmp3xxx.c       | 64 ++++++++++++++++++++++++++++++++++++++++
 include/linux/stmp3xxx_rtc_wdt.h | 15 ++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 include/linux/stmp3xxx_rtc_wdt.h

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index b2a8ed99b2bf..98f0d3c30738 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -27,6 +27,8 @@
 #include <linux/slab.h>
 #include <linux/of_device.h>
 #include <linux/of.h>
+#include <linux/stmp_device.h>
+#include <linux/stmp3xxx_rtc_wdt.h>
 
 #include <mach/common.h>
 
@@ -36,6 +38,7 @@
 #define STMP3XXX_RTC_CTRL_ALARM_IRQ_EN		0x00000001
 #define STMP3XXX_RTC_CTRL_ONEMSEC_IRQ_EN	0x00000002
 #define STMP3XXX_RTC_CTRL_ALARM_IRQ		0x00000004
+#define STMP3XXX_RTC_CTRL_WATCHDOGEN		0x00000010
 
 #define STMP3XXX_RTC_STAT			0x10
 #define STMP3XXX_RTC_STAT_STALE_SHIFT		16
@@ -45,6 +48,8 @@
 
 #define STMP3XXX_RTC_ALARM			0x40
 
+#define STMP3XXX_RTC_WATCHDOG			0x50
+
 #define STMP3XXX_RTC_PERSISTENT0		0x60
 #define STMP3XXX_RTC_PERSISTENT0_SET		0x64
 #define STMP3XXX_RTC_PERSISTENT0_CLR		0x68
@@ -52,12 +57,70 @@
 #define STMP3XXX_RTC_PERSISTENT0_ALARM_EN	0x00000004
 #define STMP3XXX_RTC_PERSISTENT0_ALARM_WAKE	0x00000080
 
+#define STMP3XXX_RTC_PERSISTENT1		0x70
+/* missing bitmask in headers */
+#define STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER	0x80000000
+
 struct stmp3xxx_rtc_data {
 	struct rtc_device *rtc;
 	void __iomem *io;
 	int irq_alarm;
 };
 
+#if IS_ENABLED(CONFIG_STMP3XXX_RTC_WATCHDOG)
+/**
+ * stmp3xxx_wdt_set_timeout - configure the watchdog inside the STMP3xxx RTC
+ * @dev: the parent device of the watchdog (= the RTC)
+ * @timeout: the desired value for the timeout register of the watchdog.
+ *           0 disables the watchdog
+ *
+ * The watchdog needs one register and two bits which are in the RTC domain.
+ * To handle the resource conflict, the RTC driver will create another
+ * platform_device for the watchdog driver as a child of the RTC device.
+ * The watchdog driver is passed the below accessor function via platform_data
+ * to configure the watchdog. Locking is not needed because accessing SET/CLR
+ * registers is atomic.
+ */
+
+static void stmp3xxx_wdt_set_timeout(struct device *dev, u32 timeout)
+{
+	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
+
+	if (timeout) {
+		writel(timeout, rtc_data->io + STMP3XXX_RTC_WATCHDOG);
+		writel(STMP3XXX_RTC_CTRL_WATCHDOGEN,
+		       rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_SET);
+		writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER,
+		       rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_SET);
+	} else {
+		writel(STMP3XXX_RTC_CTRL_WATCHDOGEN,
+		       rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_CLR);
+		writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER,
+		       rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_CLR);
+	}
+}
+
+static struct stmp3xxx_wdt_pdata wdt_pdata = {
+	.wdt_set_timeout = stmp3xxx_wdt_set_timeout,
+};
+
+static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev)
+{
+	struct platform_device *wdt_pdev =
+		platform_device_alloc("stmp3xxx_rtc_wdt", rtc_pdev->id);
+
+	if (wdt_pdev) {
+		wdt_pdev->dev.parent = &rtc_pdev->dev;
+		wdt_pdev->dev.platform_data = &wdt_pdata;
+		platform_device_add(wdt_pdev);
+	}
+}
+#else
+static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev)
+{
+}
+#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */
+
 static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
 {
 	/*
@@ -233,6 +296,7 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev)
 		goto out_irq_alarm;
 	}
 
+	stmp3xxx_wdt_register(pdev);
 	return 0;
 
 out_irq_alarm:
diff --git a/include/linux/stmp3xxx_rtc_wdt.h b/include/linux/stmp3xxx_rtc_wdt.h
new file mode 100644
index 000000000000..1dd12c96231b
--- /dev/null
+++ b/include/linux/stmp3xxx_rtc_wdt.h
@@ -0,0 +1,15 @@
+/*
+ * stmp3xxx_rtc_wdt.h
+ *
+ * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K.
+ *
+ * This file is released under the GPLv2.
+ */
+#ifndef __LINUX_STMP3XXX_RTC_WDT_H
+#define __LINUX_STMP3XXX_RTC_WDT_H
+
+struct stmp3xxx_wdt_pdata {
+	void (*wdt_set_timeout)(struct device *dev, u32 timeout);
+};
+
+#endif /* __LINUX_STMP3XXX_RTC_WDT_H */
-- 
cgit v1.2.3


From f82dedf812ecdf0c19c6c240e85a4a487ab62016 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Thu, 24 Jan 2013 18:13:34 +0100
Subject: watchdog: bcm47xx_wdt.c: use platform device

Instead of accessing the function to set the watchdog timer directly,
register a platform driver the platform could register to use this
watchdog driver.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/bcm47xx_wdt.c | 156 ++++++++++++++++++++---------------------
 include/linux/bcm47xx_wdt.h    |   9 +++
 2 files changed, 87 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c
index 4c520d68397e..97ccfce0dabb 100644
--- a/drivers/watchdog/bcm47xx_wdt.c
+++ b/drivers/watchdog/bcm47xx_wdt.c
@@ -3,6 +3,7 @@
  *
  *  Copyright (C) 2008 Aleksandar Radovanovic <biblbroks@sezampro.rs>
  *  Copyright (C) 2009 Matthieu CASTET <castet.matthieu@free.fr>
+ *  Copyright (C) 2012-2013 Hauke Mehrtens <hauke@hauke-m.de>
  *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
@@ -12,19 +13,19 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bcm47xx_wdt.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 #include <linux/timer.h>
 #include <linux/jiffies.h>
-#include <linux/ssb/ssb_embedded.h>
-#include <asm/mach-bcm47xx/bcm47xx.h>
 
 #define DRV_NAME		"bcm47xx_wdt"
 
@@ -43,48 +44,19 @@ MODULE_PARM_DESC(nowayout,
 		"Watchdog cannot be stopped once started (default="
 				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
-static struct timer_list wdt_timer;
-static atomic_t ticks;
-
-static inline void bcm47xx_wdt_hw_start(void)
+static inline struct bcm47xx_wdt *bcm47xx_wdt_get(struct watchdog_device *wdd)
 {
-	/* this is 2,5s on 100Mhz clock  and 2s on 133 Mhz */
-	switch (bcm47xx_bus_type) {
-#ifdef CONFIG_BCM47XX_SSB
-	case BCM47XX_BUS_TYPE_SSB:
-		ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0xfffffff);
-		break;
-#endif
-#ifdef CONFIG_BCM47XX_BCMA
-	case BCM47XX_BUS_TYPE_BCMA:
-		bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc,
-					       0xfffffff);
-		break;
-#endif
-	}
+	return container_of(wdd, struct bcm47xx_wdt, wdd);
 }
 
-static inline int bcm47xx_wdt_hw_stop(void)
+static void bcm47xx_timer_tick(unsigned long data)
 {
-	switch (bcm47xx_bus_type) {
-#ifdef CONFIG_BCM47XX_SSB
-	case BCM47XX_BUS_TYPE_SSB:
-		return ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0);
-#endif
-#ifdef CONFIG_BCM47XX_BCMA
-	case BCM47XX_BUS_TYPE_BCMA:
-		bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, 0);
-		return 0;
-#endif
-	}
-	return -EINVAL;
-}
+	struct bcm47xx_wdt *wdt = (struct bcm47xx_wdt *)data;
+	u32 next_tick = min(wdt->wdd.timeout * 1000, wdt->max_timer_ms);
 
-static void bcm47xx_timer_tick(unsigned long unused)
-{
-	if (!atomic_dec_and_test(&ticks)) {
-		bcm47xx_wdt_hw_start();
-		mod_timer(&wdt_timer, jiffies + HZ);
+	if (!atomic_dec_and_test(&wdt->soft_ticks)) {
+		wdt->timer_set_ms(wdt, next_tick);
+		mod_timer(&wdt->soft_timer, jiffies + HZ);
 	} else {
 		pr_crit("Watchdog will fire soon!!!\n");
 	}
@@ -92,23 +64,29 @@ static void bcm47xx_timer_tick(unsigned long unused)
 
 static int bcm47xx_wdt_keepalive(struct watchdog_device *wdd)
 {
-	atomic_set(&ticks, wdt_time);
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
+
+	atomic_set(&wdt->soft_ticks, wdd->timeout);
 
 	return 0;
 }
 
 static int bcm47xx_wdt_start(struct watchdog_device *wdd)
 {
-	bcm47xx_wdt_pet();
-	bcm47xx_timer_tick(0);
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
+
+	bcm47xx_wdt_keepalive(wdd);
+	bcm47xx_timer_tick((unsigned long)wdt);
 
 	return 0;
 }
 
 static int bcm47xx_wdt_stop(struct watchdog_device *wdd)
 {
-	del_timer_sync(&wdt_timer);
-	bcm47xx_wdt_hw_stop();
+	struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd);
+
+	del_timer_sync(&wdt->soft_timer);
+	wdt->timer_set(wdt, 0);
 
 	return 0;
 }
@@ -116,10 +94,13 @@ static int bcm47xx_wdt_stop(struct watchdog_device *wdd)
 static int bcm47xx_wdt_set_timeout(struct watchdog_device *wdd,
 				   unsigned int new_time)
 {
-	if ((new_time <= 0) || (new_time > WDT_MAX_TIME))
+	if (new_time < 1 || new_time > WDT_MAX_TIME) {
+		pr_warn("timeout value must be 1<=x<=%d, using %d\n",
+			WDT_MAX_TIME, new_time);
 		return -EINVAL;
+	}
 
-	wdt_time = new_time;
+	wdd->timeout = new_time;
 	return 0;
 }
 
@@ -133,8 +114,11 @@ static const struct watchdog_info bcm47xx_wdt_info = {
 static int bcm47xx_wdt_notify_sys(struct notifier_block *this,
 				  unsigned long code, void *unused)
 {
+	struct bcm47xx_wdt *wdt;
+
+	wdt = container_of(this, struct bcm47xx_wdt, notifier);
 	if (code == SYS_DOWN || code == SYS_HALT)
-		bcm47xx_wdt_stop();
+		wdt->wdd.ops->stop(&wdt->wdd);
 	return NOTIFY_DONE;
 }
 
@@ -146,56 +130,72 @@ static struct watchdog_ops bcm47xx_wdt_ops = {
 	.set_timeout	= bcm47xx_wdt_set_timeout,
 };
 
-static struct watchdog_device bcm47xx_wdt_wdd = {
-	.info		= &bcm47xx_wdt_info,
-	.ops		= &bcm47xx_wdt_ops,
-};
-
-static struct notifier_block bcm47xx_wdt_notifier = {
-	.notifier_call = bcm47xx_wdt_notify_sys,
-};
-
-static int __init bcm47xx_wdt_init(void)
+static int bcm47xx_wdt_probe(struct platform_device *pdev)
 {
 	int ret;
+	struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev);
 
-	if (bcm47xx_wdt_hw_stop() < 0)
-		return -ENODEV;
+	if (!wdt)
+		return -ENXIO;
 
-	setup_timer(&wdt_timer, bcm47xx_timer_tick, 0L);
+	setup_timer(&wdt->soft_timer, bcm47xx_timer_tick,
+		    (long unsigned int)wdt);
 
-	if (bcm47xx_wdt_settimeout(wdt_time)) {
-		bcm47xx_wdt_settimeout(WDT_DEFAULT_TIME);
-		pr_info("wdt_time value must be 0 < wdt_time < %d, using %d\n",
-			(WDT_MAX_TIME + 1), wdt_time);
-	}
-	watchdog_set_nowayout(&bcm47xx_wdt_wdd, nowayout);
+	wdt->wdd.ops = &bcm47xx_wdt_ops;
+	wdt->wdd.info = &bcm47xx_wdt_info;
+	wdt->wdd.timeout = WDT_DEFAULT_TIME;
+	ret = wdt->wdd.ops->set_timeout(&wdt->wdd, timeout);
+	if (ret)
+		goto err_timer;
+	watchdog_set_nowayout(&wdt->wdd, nowayout);
+
+	wdt->notifier.notifier_call = &bcm47xx_wdt_notify_sys;
 
-	ret = register_reboot_notifier(&bcm47xx_wdt_notifier);
+	ret = register_reboot_notifier(&wdt->notifier);
 	if (ret)
-		return ret;
+		goto err_timer;
 
-	ret = watchdog_register_device(&bcm47xx_wdt_wdd);
-	if (ret) {
-		unregister_reboot_notifier(&bcm47xx_wdt_notifier);
-		return ret;
-	}
+	ret = watchdog_register_device(&wdt->wdd);
+	if (ret)
+		goto err_notifier;
 
 	pr_info("BCM47xx Watchdog Timer enabled (%d seconds%s)\n",
 		wdt_time, nowayout ? ", nowayout" : "");
 	return 0;
+
+err_notifier:
+	unregister_reboot_notifier(&wdt->notifier);
+err_timer:
+	del_timer_sync(&wdt->soft_timer);
+
+	return ret;
 }
 
-static void __exit bcm47xx_wdt_exit(void)
+static int bcm47xx_wdt_remove(struct platform_device *pdev)
 {
-	watchdog_unregister_device(&bcm47xx_wdt_wdd);
+	struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev);
+
+	if (!wdt)
+		return -ENXIO;
+
+	watchdog_unregister_device(&wdt->wdd);
+	unregister_reboot_notifier(&wdt->notifier);
 
-	unregister_reboot_notifier(&bcm47xx_wdt_notifier);
+	return 0;
 }
 
-module_init(bcm47xx_wdt_init);
-module_exit(bcm47xx_wdt_exit);
+static struct platform_driver bcm47xx_wdt_driver = {
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "bcm47xx-wdt",
+	},
+	.probe		= bcm47xx_wdt_probe,
+	.remove		= bcm47xx_wdt_remove,
+};
+
+module_platform_driver(bcm47xx_wdt_driver);
 
 MODULE_AUTHOR("Aleksandar Radovanovic");
+MODULE_AUTHOR("Hauke Mehrtens <hauke@hauke-m.de>");
 MODULE_DESCRIPTION("Watchdog driver for Broadcom BCM47xx");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/bcm47xx_wdt.h b/include/linux/bcm47xx_wdt.h
index e5dfc256485b..b708786d4cbf 100644
--- a/include/linux/bcm47xx_wdt.h
+++ b/include/linux/bcm47xx_wdt.h
@@ -1,7 +1,10 @@
 #ifndef LINUX_BCM47XX_WDT_H_
 #define LINUX_BCM47XX_WDT_H_
 
+#include <linux/notifier.h>
+#include <linux/timer.h>
 #include <linux/types.h>
+#include <linux/watchdog.h>
 
 
 struct bcm47xx_wdt {
@@ -10,6 +13,12 @@ struct bcm47xx_wdt {
 	u32 max_timer_ms;
 
 	void *driver_data;
+
+	struct watchdog_device wdd;
+	struct notifier_block notifier;
+
+	struct timer_list soft_timer;
+	atomic_t soft_ticks;
 };
 
 static inline void *bcm47xx_wdt_get_drvdata(struct bcm47xx_wdt *wdt)
-- 
cgit v1.2.3


From 3048253ed957fc6cdc34599178408559aa1e0062 Mon Sep 17 00:00:00 2001
From: Fabio Porcedda <fabio.porcedda@gmail.com>
Date: Tue, 8 Jan 2013 11:04:10 +0100
Subject: watchdog: core: dt: add support for the timeout-sec dt property

Add support for watchdog drivers to initialize/set the timeout field
of the watchdog_device structure. The timeout field is initialised
either with the module timeout parameter value (if valid) or with the
timeout-sec dt property (if valid). If both are invalid the initial
value is unchanged.

Signed-off-by: Fabio Porcedda <fabio.porcedda@gmail.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 Documentation/watchdog/watchdog-kernel-api.txt | 14 +++++-
 drivers/watchdog/watchdog_core.c               | 66 ++++++++++++++++++++++----
 drivers/watchdog/watchdog_dev.c                |  3 +-
 include/linux/watchdog.h                       |  9 ++++
 4 files changed, 80 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
index 086638f6c82d..a0438f3957ca 100644
--- a/Documentation/watchdog/watchdog-kernel-api.txt
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -1,6 +1,6 @@
 The Linux WatchDog Timer Driver Core kernel API.
 ===============================================
-Last reviewed: 22-May-2012
+Last reviewed: 12-Feb-2013
 
 Wim Van Sebroeck <wim@iguana.be>
 
@@ -212,3 +212,15 @@ driver specific data to and a pointer to the data itself.
 The watchdog_get_drvdata function allows you to retrieve driver specific data.
 The argument of this function is the watchdog device where you want to retrieve
 data from. The function returns the pointer to the driver specific data.
+
+To initialize the timeout field, the following function can be used:
+
+extern int watchdog_init_timeout(struct watchdog_device *wdd,
+                                  unsigned int timeout_parm, struct device *dev);
+
+The watchdog_init_timeout function allows you to initialize the timeout field
+using the module timeout parameter or by retrieving the timeout-sec property from
+the device tree (if the module timeout parameter is invalid). Best practice is
+to set the default timeout value as timeout value in the watchdog_device and
+then use this function to set the user "preferred" timeout value.
+This routine returns zero on success and a negative errno code for failure.
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index 3796434991fa..05d18b4c661b 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -36,12 +36,68 @@
 #include <linux/init.h>		/* For __init/__exit/... */
 #include <linux/idr.h>		/* For ida_* macros */
 #include <linux/err.h>		/* For IS_ERR macros */
+#include <linux/of.h>		/* For of_get_timeout_sec */
 
 #include "watchdog_core.h"	/* For watchdog_dev_register/... */
 
 static DEFINE_IDA(watchdog_ida);
 static struct class *watchdog_class;
 
+static void watchdog_check_min_max_timeout(struct watchdog_device *wdd)
+{
+	/*
+	 * Check that we have valid min and max timeout values, if
+	 * not reset them both to 0 (=not used or unknown)
+	 */
+	if (wdd->min_timeout > wdd->max_timeout) {
+		pr_info("Invalid min and max timeout values, resetting to 0!\n");
+		wdd->min_timeout = 0;
+		wdd->max_timeout = 0;
+	}
+}
+
+/**
+ * watchdog_init_timeout() - initialize the timeout field
+ * @timeout_parm: timeout module parameter
+ * @dev: Device that stores the timeout-sec property
+ *
+ * Initialize the timeout field of the watchdog_device struct with either the
+ * timeout module parameter (if it is valid value) or the timeout-sec property
+ * (only if it is a valid value and the timeout_parm is out of bounds).
+ * If none of them are valid then we keep the old value (which should normally
+ * be the default timeout value.
+ *
+ * A zero is returned on success and -EINVAL for failure.
+ */
+int watchdog_init_timeout(struct watchdog_device *wdd,
+				unsigned int timeout_parm, struct device *dev)
+{
+	unsigned int t = 0;
+	int ret = 0;
+
+	watchdog_check_min_max_timeout(wdd);
+
+	/* try to get the tiemout module parameter first */
+	if (!watchdog_timeout_invalid(wdd, timeout_parm)) {
+		wdd->timeout = timeout_parm;
+		return ret;
+	}
+	if (timeout_parm)
+		ret = -EINVAL;
+
+	/* try to get the timeout_sec property */
+	if (dev == NULL || dev->of_node == NULL)
+		return ret;
+	of_property_read_u32(dev->of_node, "timeout-sec", &t);
+	if (!watchdog_timeout_invalid(wdd, t))
+		wdd->timeout = t;
+	else
+		ret = -EINVAL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(watchdog_init_timeout);
+
 /**
  * watchdog_register_device() - register a watchdog device
  * @wdd: watchdog device
@@ -63,15 +119,7 @@ int watchdog_register_device(struct watchdog_device *wdd)
 	if (wdd->ops->start == NULL || wdd->ops->stop == NULL)
 		return -EINVAL;
 
-	/*
-	 * Check that we have valid min and max timeout values, if
-	 * not reset them both to 0 (=not used or unknown)
-	 */
-	if (wdd->min_timeout > wdd->max_timeout) {
-		pr_info("Invalid min and max timeout values, resetting to 0!\n");
-		wdd->min_timeout = 0;
-		wdd->max_timeout = 0;
-	}
+	watchdog_check_min_max_timeout(wdd);
 
 	/*
 	 * Note: now that all watchdog_device data has been verified, we
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index ef8edecfc526..08b48bbf9f4b 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -200,8 +200,7 @@ static int watchdog_set_timeout(struct watchdog_device *wddev,
 	    !(wddev->info->options & WDIOF_SETTIMEOUT))
 		return -EOPNOTSUPP;
 
-	if ((wddev->max_timeout != 0) &&
-	    (timeout < wddev->min_timeout || timeout > wddev->max_timeout))
+	if (watchdog_timeout_invalid(wddev, timeout))
 		return -EINVAL;
 
 	mutex_lock(&wddev->lock);
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 3a9df2f43be6..2a3038ee17a3 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -118,6 +118,13 @@ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway
 		set_bit(WDOG_NO_WAY_OUT, &wdd->status);
 }
 
+/* Use the following function to check if a timeout value is invalid */
+static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t)
+{
+	return ((wdd->max_timeout != 0) &&
+		(t < wdd->min_timeout || t > wdd->max_timeout));
+}
+
 /* Use the following functions to manipulate watchdog driver specific data */
 static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
 {
@@ -130,6 +137,8 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
 }
 
 /* drivers/watchdog/watchdog_core.c */
+extern int watchdog_init_timeout(struct watchdog_device *wdd,
+				  unsigned int timeout_parm, struct device *dev);
 extern int watchdog_register_device(struct watchdog_device *);
 extern void watchdog_unregister_device(struct watchdog_device *);
 
-- 
cgit v1.2.3


From 8eae508b7c6ff502a71d0293b69e97c5505d5840 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 1 Mar 2013 11:11:47 -0800
Subject: hsi: fix kernel-doc warnings

Fix kernel-doc warnings in hsi files:

  Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'e_handler' description in 'hsi_client'
  Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'pclaimed' description in 'hsi_client'
  Warning(include/linux/hsi/hsi.h:136): Excess struct/union/enum/typedef member 'nb' description in 'hsi_client'
  Warning(drivers/hsi/hsi.c:434): No description found for parameter 'handler'
  Warning(drivers/hsi/hsi.c:434): Excess function parameter 'cb' description in 'hsi_register_port_event'

Don't document "private:" fields with kernel-doc notation.
If you want to leave them fully documented, that's OK, but
then don't mark them as "private:".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Carlos Chinea <carlos.chinea@nokia.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-kernel@vger.kernel.org
Cc: linux-omap@vger.kernel.org
Acked-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hsi/hsi.c       | 2 +-
 include/linux/hsi/hsi.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c
index 2d58f939d27f..833dd1afbf46 100644
--- a/drivers/hsi/hsi.c
+++ b/drivers/hsi/hsi.c
@@ -420,7 +420,7 @@ static int hsi_event_notifier_call(struct notifier_block *nb,
 /**
  * hsi_register_port_event - Register a client to receive port events
  * @cl: HSI client that wants to receive port events
- * @cb: Event handler callback
+ * @handler: Event handler callback
  *
  * Clients should register a callback to be able to receive
  * events from the ports. Registration should happen after
diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h
index 56fae865e272..0dca785288cf 100644
--- a/include/linux/hsi/hsi.h
+++ b/include/linux/hsi/hsi.h
@@ -121,9 +121,9 @@ static inline int hsi_register_board_info(struct hsi_board_info const *info,
  * @device: Driver model representation of the device
  * @tx_cfg: HSI TX configuration
  * @rx_cfg: HSI RX configuration
- * @e_handler: Callback for handling port events (RX Wake High/Low)
- * @pclaimed: Keeps tracks if the clients claimed its associated HSI port
- * @nb: Notifier block for port events
+ * e_handler: Callback for handling port events (RX Wake High/Low)
+ * pclaimed: Keeps tracks if the clients claimed its associated HSI port
+ * nb: Notifier block for port events
  */
 struct hsi_client {
 	struct device		device;
-- 
cgit v1.2.3


From fd7c092e711ebab55b2688d3859d95dfd0301f73 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 1 Mar 2013 22:45:44 +0000
Subject: dm: fix truncated status strings

Avoid returning a truncated table or status string instead of setting
the DM_BUFFER_FULL_FLAG when the last target of a table fills the
buffer.

When processing a table or status request, the function retrieve_status
calls ti->type->status. If ti->type->status returns non-zero,
retrieve_status assumes that the buffer overflowed and sets
DM_BUFFER_FULL_FLAG.

However, targets don't return non-zero values from their status method
on overflow. Most targets returns always zero.

If a buffer overflow happens in a target that is not the last in the
table, it gets noticed during the next iteration of the loop in
retrieve_status; but if a buffer overflow happens in the last target, it
goes unnoticed and erroneously truncated data is returned.

In the current code, the targets behave in the following way:
* dm-crypt returns -ENOMEM if there is not enough space to store the
  key, but it returns 0 on all other overflows.
* dm-thin returns errors from the status method if a disk error happened.
  This is incorrect because retrieve_status doesn't check the error
  code, it assumes that all non-zero values mean buffer overflow.
* all the other targets always return 0.

This patch changes the ti->type->status function to return void (because
most targets don't use the return code). Overflow is detected in
retrieve_status: if the status method fills up the remaining space
completely, it is assumed that buffer overflow happened.

Cc: stable@vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         | 39 +++++----------------
 drivers/md/dm-delay.c         |  8 ++---
 drivers/md/dm-flakey.c        |  7 ++--
 drivers/md/dm-ioctl.c         | 14 +++++---
 drivers/md/dm-linear.c        |  7 ++--
 drivers/md/dm-mpath.c         |  8 ++---
 drivers/md/dm-raid.c          |  8 ++---
 drivers/md/dm-raid1.c         |  8 ++---
 drivers/md/dm-snap.c          | 16 ++++-----
 drivers/md/dm-stripe.c        |  7 ++--
 drivers/md/dm-thin.c          | 80 ++++++++++++++++++++++++++-----------------
 drivers/md/dm-verity.c        |  8 ++---
 include/linux/device-mapper.h |  4 +--
 13 files changed, 99 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f7369f9d8595..2ae151e59c0c 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1234,20 +1234,6 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size)
 	return 0;
 }
 
-/*
- * Encode key into its hex representation
- */
-static void crypt_encode_key(char *hex, u8 *key, unsigned int size)
-{
-	unsigned int i;
-
-	for (i = 0; i < size; i++) {
-		sprintf(hex, "%02x", *key);
-		hex += 2;
-		key++;
-	}
-}
-
 static void crypt_free_tfms(struct crypt_config *cc)
 {
 	unsigned i;
@@ -1717,11 +1703,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_SUBMITTED;
 }
 
-static int crypt_status(struct dm_target *ti, status_type_t type,
-			unsigned status_flags, char *result, unsigned maxlen)
+static void crypt_status(struct dm_target *ti, status_type_t type,
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct crypt_config *cc = ti->private;
-	unsigned int sz = 0;
+	unsigned i, sz = 0;
 
 	switch (type) {
 	case STATUSTYPE_INFO:
@@ -1731,17 +1717,11 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
 	case STATUSTYPE_TABLE:
 		DMEMIT("%s ", cc->cipher_string);
 
-		if (cc->key_size > 0) {
-			if ((maxlen - sz) < ((cc->key_size << 1) + 1))
-				return -ENOMEM;
-
-			crypt_encode_key(result + sz, cc->key, cc->key_size);
-			sz += cc->key_size << 1;
-		} else {
-			if (sz >= maxlen)
-				return -ENOMEM;
-			result[sz++] = '-';
-		}
+		if (cc->key_size > 0)
+			for (i = 0; i < cc->key_size; i++)
+				DMEMIT("%02x", cc->key[i]);
+		else
+			DMEMIT("-");
 
 		DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
 				cc->dev->name, (unsigned long long)cc->start);
@@ -1751,7 +1731,6 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
 
 		break;
 	}
-	return 0;
 }
 
 static void crypt_postsuspend(struct dm_target *ti)
@@ -1845,7 +1824,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 12, 0},
+	.version = {1, 12, 1},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index cc1bd048acb2..c0d03b006e40 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -293,8 +293,8 @@ static int delay_map(struct dm_target *ti, struct bio *bio)
 	return delay_bio(dc, dc->read_delay, bio);
 }
 
-static int delay_status(struct dm_target *ti, status_type_t type,
-			unsigned status_flags, char *result, unsigned maxlen)
+static void delay_status(struct dm_target *ti, status_type_t type,
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct delay_c *dc = ti->private;
 	int sz = 0;
@@ -314,8 +314,6 @@ static int delay_status(struct dm_target *ti, status_type_t type,
 			       dc->write_delay);
 		break;
 	}
-
-	return 0;
 }
 
 static int delay_iterate_devices(struct dm_target *ti,
@@ -337,7 +335,7 @@ out:
 
 static struct target_type delay_target = {
 	.name	     = "delay",
-	.version     = {1, 2, 0},
+	.version     = {1, 2, 1},
 	.module      = THIS_MODULE,
 	.ctr	     = delay_ctr,
 	.dtr	     = delay_dtr,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 9721f2ffb1a2..5d6c04cceee0 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -337,8 +337,8 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
 	return error;
 }
 
-static int flakey_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void flakey_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned sz = 0;
 	struct flakey_c *fc = ti->private;
@@ -368,7 +368,6 @@ static int flakey_status(struct dm_target *ti, status_type_t type,
 
 		break;
 	}
-	return 0;
 }
 
 static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg)
@@ -411,7 +410,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_
 
 static struct target_type flakey_target = {
 	.name   = "flakey",
-	.version = {1, 3, 0},
+	.version = {1, 3, 1},
 	.module = THIS_MODULE,
 	.ctr    = flakey_ctr,
 	.dtr    = flakey_dtr,
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 0666b5d14b88..eee353da3742 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1067,6 +1067,7 @@ static void retrieve_status(struct dm_table *table,
 	num_targets = dm_table_get_num_targets(table);
 	for (i = 0; i < num_targets; i++) {
 		struct dm_target *ti = dm_table_get_target(table, i);
+		size_t l;
 
 		remaining = len - (outptr - outbuf);
 		if (remaining <= sizeof(struct dm_target_spec)) {
@@ -1093,14 +1094,17 @@ static void retrieve_status(struct dm_table *table,
 		if (ti->type->status) {
 			if (param->flags & DM_NOFLUSH_FLAG)
 				status_flags |= DM_STATUS_NOFLUSH_FLAG;
-			if (ti->type->status(ti, type, status_flags, outptr, remaining)) {
-				param->flags |= DM_BUFFER_FULL_FLAG;
-				break;
-			}
+			ti->type->status(ti, type, status_flags, outptr, remaining);
 		} else
 			outptr[0] = '\0';
 
-		outptr += strlen(outptr) + 1;
+		l = strlen(outptr) + 1;
+		if (l == remaining) {
+			param->flags |= DM_BUFFER_FULL_FLAG;
+			break;
+		}
+
+		outptr += l;
 		used = param->data_start + (outptr - outbuf);
 
 		outptr = align_ptr(outptr);
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 328cad5617ab..5be301c1e809 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -95,8 +95,8 @@ static int linear_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-static int linear_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void linear_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct linear_c *lc = (struct linear_c *) ti->private;
 
@@ -110,7 +110,6 @@ static int linear_status(struct dm_target *ti, status_type_t type,
 				(unsigned long long)lc->start);
 		break;
 	}
-	return 0;
 }
 
 static int linear_ioctl(struct dm_target *ti, unsigned int cmd,
@@ -155,7 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti,
 
 static struct target_type linear_target = {
 	.name   = "linear",
-	.version = {1, 2, 0},
+	.version = {1, 2, 1},
 	.module = THIS_MODULE,
 	.ctr    = linear_ctr,
 	.dtr    = linear_dtr,
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 573bd04591bf..d267bb5705e9 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1378,8 +1378,8 @@ static void multipath_resume(struct dm_target *ti)
  *     [priority selector-name num_ps_args [ps_args]*
  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
  */
-static int multipath_status(struct dm_target *ti, status_type_t type,
-			    unsigned status_flags, char *result, unsigned maxlen)
+static void multipath_status(struct dm_target *ti, status_type_t type,
+			     unsigned status_flags, char *result, unsigned maxlen)
 {
 	int sz = 0;
 	unsigned long flags;
@@ -1485,8 +1485,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
 	}
 
 	spin_unlock_irqrestore(&m->lock, flags);
-
-	return 0;
 }
 
 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
@@ -1695,7 +1693,7 @@ out:
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 5, 0},
+	.version = {1, 5, 1},
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 9e58dbd8d8cb..5a578d89da2d 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1201,8 +1201,8 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_SUBMITTED;
 }
 
-static int raid_status(struct dm_target *ti, status_type_t type,
-		       unsigned status_flags, char *result, unsigned maxlen)
+static void raid_status(struct dm_target *ti, status_type_t type,
+			unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct raid_set *rs = ti->private;
 	unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
@@ -1344,8 +1344,6 @@ static int raid_status(struct dm_target *ti, status_type_t type,
 				DMEMIT(" -");
 		}
 	}
-
-	return 0;
 }
 
 static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
@@ -1405,7 +1403,7 @@ static void raid_resume(struct dm_target *ti)
 
 static struct target_type raid_target = {
 	.name = "raid",
-	.version = {1, 4, 1},
+	.version = {1, 4, 2},
 	.module = THIS_MODULE,
 	.ctr = raid_ctr,
 	.dtr = raid_dtr,
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index fa519185ebba..7f2419099b50 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1347,8 +1347,8 @@ static char device_status_char(struct mirror *m)
 }
 
 
-static int mirror_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void mirror_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned int m, sz = 0;
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1383,8 +1383,6 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
 		if (ms->features & DM_RAID1_HANDLE_ERRORS)
 			DMEMIT(" 1 handle_errors");
 	}
-
-	return 0;
 }
 
 static int mirror_iterate_devices(struct dm_target *ti,
@@ -1403,7 +1401,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
 
 static struct target_type mirror_target = {
 	.name	 = "mirror",
-	.version = {1, 13, 1},
+	.version = {1, 13, 2},
 	.module	 = THIS_MODULE,
 	.ctr	 = mirror_ctr,
 	.dtr	 = mirror_dtr,
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 10079e07edf4..6e45e3774eab 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1836,8 +1836,8 @@ static void snapshot_merge_resume(struct dm_target *ti)
 	start_merge(s);
 }
 
-static int snapshot_status(struct dm_target *ti, status_type_t type,
-			   unsigned status_flags, char *result, unsigned maxlen)
+static void snapshot_status(struct dm_target *ti, status_type_t type,
+			    unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned sz = 0;
 	struct dm_snapshot *snap = ti->private;
@@ -1883,8 +1883,6 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 					  maxlen - sz);
 		break;
 	}
-
-	return 0;
 }
 
 static int snapshot_iterate_devices(struct dm_target *ti,
@@ -2138,8 +2136,8 @@ static void origin_resume(struct dm_target *ti)
 	ti->max_io_len = get_origin_minimum_chunksize(dev->bdev);
 }
 
-static int origin_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void origin_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct dm_dev *dev = ti->private;
 
@@ -2152,8 +2150,6 @@ static int origin_status(struct dm_target *ti, status_type_t type,
 		snprintf(result, maxlen, "%s", dev->name);
 		break;
 	}
-
-	return 0;
 }
 
 static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
@@ -2180,7 +2176,7 @@ static int origin_iterate_devices(struct dm_target *ti,
 
 static struct target_type origin_target = {
 	.name    = "snapshot-origin",
-	.version = {1, 8, 0},
+	.version = {1, 8, 1},
 	.module  = THIS_MODULE,
 	.ctr     = origin_ctr,
 	.dtr     = origin_dtr,
@@ -2193,7 +2189,7 @@ static struct target_type origin_target = {
 
 static struct target_type snapshot_target = {
 	.name    = "snapshot",
-	.version = {1, 11, 0},
+	.version = {1, 11, 1},
 	.module  = THIS_MODULE,
 	.ctr     = snapshot_ctr,
 	.dtr     = snapshot_dtr,
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index c89cde86d400..aaecefa63935 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -312,8 +312,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
  *
  */
 
-static int stripe_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void stripe_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct stripe_c *sc = (struct stripe_c *) ti->private;
 	char buffer[sc->stripes + 1];
@@ -340,7 +340,6 @@ static int stripe_status(struct dm_target *ti, status_type_t type,
 			    (unsigned long long)sc->stripe[i].physical_start);
 		break;
 	}
-	return 0;
 }
 
 static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
@@ -428,7 +427,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 
 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 5, 0},
+	.version = {1, 5, 1},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 5409607d4875..7a66d73148e6 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2299,8 +2299,8 @@ static void emit_flags(struct pool_features *pf, char *result,
  *    <transaction id> <used metadata sectors>/<total metadata sectors>
  *    <used data sectors>/<total data sectors> <held metadata root>
  */
-static int pool_status(struct dm_target *ti, status_type_t type,
-		       unsigned status_flags, char *result, unsigned maxlen)
+static void pool_status(struct dm_target *ti, status_type_t type,
+			unsigned status_flags, char *result, unsigned maxlen)
 {
 	int r;
 	unsigned sz = 0;
@@ -2326,32 +2326,41 @@ static int pool_status(struct dm_target *ti, status_type_t type,
 		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
 			(void) commit_or_fallback(pool);
 
-		r = dm_pool_get_metadata_transaction_id(pool->pmd,
-							&transaction_id);
-		if (r)
-			return r;
+		r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
+		if (r) {
+			DMERR("dm_pool_get_metadata_transaction_id returned %d", r);
+			goto err;
+		}
 
-		r = dm_pool_get_free_metadata_block_count(pool->pmd,
-							  &nr_free_blocks_metadata);
-		if (r)
-			return r;
+		r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata);
+		if (r) {
+			DMERR("dm_pool_get_free_metadata_block_count returned %d", r);
+			goto err;
+		}
 
 		r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
-		if (r)
-			return r;
+		if (r) {
+			DMERR("dm_pool_get_metadata_dev_size returned %d", r);
+			goto err;
+		}
 
-		r = dm_pool_get_free_block_count(pool->pmd,
-						 &nr_free_blocks_data);
-		if (r)
-			return r;
+		r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data);
+		if (r) {
+			DMERR("dm_pool_get_free_block_count returned %d", r);
+			goto err;
+		}
 
 		r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
-		if (r)
-			return r;
+		if (r) {
+			DMERR("dm_pool_get_data_dev_size returned %d", r);
+			goto err;
+		}
 
 		r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
-		if (r)
-			return r;
+		if (r) {
+			DMERR("dm_pool_get_metadata_snap returned %d", r);
+			goto err;
+		}
 
 		DMEMIT("%llu %llu/%llu %llu/%llu ",
 		       (unsigned long long)transaction_id,
@@ -2388,8 +2397,10 @@ static int pool_status(struct dm_target *ti, status_type_t type,
 		emit_flags(&pt->requested_pf, result, sz, maxlen);
 		break;
 	}
+	return;
 
-	return 0;
+err:
+	DMEMIT("Error");
 }
 
 static int pool_iterate_devices(struct dm_target *ti,
@@ -2468,7 +2479,7 @@ static struct target_type pool_target = {
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 6, 0},
+	.version = {1, 6, 1},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -2676,8 +2687,8 @@ static void thin_postsuspend(struct dm_target *ti)
 /*
  * <nr mapped sectors> <highest mapped sector>
  */
-static int thin_status(struct dm_target *ti, status_type_t type,
-		       unsigned status_flags, char *result, unsigned maxlen)
+static void thin_status(struct dm_target *ti, status_type_t type,
+			unsigned status_flags, char *result, unsigned maxlen)
 {
 	int r;
 	ssize_t sz = 0;
@@ -2687,7 +2698,7 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 
 	if (get_pool_mode(tc->pool) == PM_FAIL) {
 		DMEMIT("Fail");
-		return 0;
+		return;
 	}
 
 	if (!tc->td)
@@ -2696,12 +2707,16 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 		switch (type) {
 		case STATUSTYPE_INFO:
 			r = dm_thin_get_mapped_count(tc->td, &mapped);
-			if (r)
-				return r;
+			if (r) {
+				DMERR("dm_thin_get_mapped_count returned %d", r);
+				goto err;
+			}
 
 			r = dm_thin_get_highest_mapped_block(tc->td, &highest);
-			if (r < 0)
-				return r;
+			if (r < 0) {
+				DMERR("dm_thin_get_highest_mapped_block returned %d", r);
+				goto err;
+			}
 
 			DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
 			if (r)
@@ -2721,7 +2736,10 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 		}
 	}
 
-	return 0;
+	return;
+
+err:
+	DMEMIT("Error");
 }
 
 static int thin_iterate_devices(struct dm_target *ti,
@@ -2748,7 +2766,7 @@ static int thin_iterate_devices(struct dm_target *ti,
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 7, 0},
+	.version = {1, 7, 1},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 52cde982164a..6ad538375c3c 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -508,8 +508,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 /*
  * Status: V (valid) or C (corruption found)
  */
-static int verity_status(struct dm_target *ti, status_type_t type,
-			 unsigned status_flags, char *result, unsigned maxlen)
+static void verity_status(struct dm_target *ti, status_type_t type,
+			  unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct dm_verity *v = ti->private;
 	unsigned sz = 0;
@@ -540,8 +540,6 @@ static int verity_status(struct dm_target *ti, status_type_t type,
 				DMEMIT("%02x", v->salt[x]);
 		break;
 	}
-
-	return 0;
 }
 
 static int verity_ioctl(struct dm_target *ti, unsigned cmd,
@@ -860,7 +858,7 @@ bad:
 
 static struct target_type verity_target = {
 	.name		= "verity",
-	.version	= {1, 1, 0},
+	.version	= {1, 1, 1},
 	.module		= THIS_MODULE,
 	.ctr		= verity_ctr,
 	.dtr		= verity_dtr,
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index bf6afa2fc432..a5cda3ea6b88 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -68,8 +68,8 @@ typedef void (*dm_postsuspend_fn) (struct dm_target *ti);
 typedef int (*dm_preresume_fn) (struct dm_target *ti);
 typedef void (*dm_resume_fn) (struct dm_target *ti);
 
-typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
-			     unsigned status_flags, char *result, unsigned maxlen);
+typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
+			      unsigned status_flags, char *result, unsigned maxlen);
 
 typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv);
 
-- 
cgit v1.2.3


From 55a62eef8d1b50ceff3b7bf46851103bdcc7e5b0 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Fri, 1 Mar 2013 22:45:47 +0000
Subject: dm: rename request variables to bios

Use 'bio' in the name of variables and functions that deal with
bios rather than 'request' to avoid confusion with the normal
block layer use of 'request'.

No functional changes.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         |  6 +++---
 drivers/md/dm-delay.c         |  4 ++--
 drivers/md/dm-flakey.c        |  4 ++--
 drivers/md/dm-linear.c        |  6 +++---
 drivers/md/dm-mpath.c         |  4 ++--
 drivers/md/dm-raid.c          |  2 +-
 drivers/md/dm-raid1.c         |  4 ++--
 drivers/md/dm-snap.c          | 10 +++++-----
 drivers/md/dm-stripe.c        | 20 +++++++++----------
 drivers/md/dm-table.c         | 10 +++++-----
 drivers/md/dm-target.c        |  2 +-
 drivers/md/dm-thin.c          | 12 +++++------
 drivers/md/dm-zero.c          |  2 +-
 drivers/md/dm.c               | 46 +++++++++++++++++++++----------------------
 include/linux/device-mapper.h | 30 ++++++++++++++--------------
 15 files changed, 81 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 2ae151e59c0c..13c15480d940 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1637,7 +1637,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 		if (opt_params == 1 && opt_string &&
 		    !strcasecmp(opt_string, "allow_discards"))
-			ti->num_discard_requests = 1;
+			ti->num_discard_bios = 1;
 		else if (opt_params) {
 			ret = -EINVAL;
 			ti->error = "Invalid feature arguments";
@@ -1665,7 +1665,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 	ti->discard_zeroes_data_unsupported = true;
 
 	return 0;
@@ -1726,7 +1726,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 		DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
 				cc->dev->name, (unsigned long long)cc->start);
 
-		if (ti->num_discard_requests)
+		if (ti->num_discard_bios)
 			DMEMIT(" 1 allow_discards");
 
 		break;
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index c0d03b006e40..496d5f3646a5 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -198,8 +198,8 @@ out:
 	mutex_init(&dc->timer_lock);
 	atomic_set(&dc->may_delay, 1);
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
 	ti->private = dc;
 	return 0;
 
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 5d6c04cceee0..7fcf21cb4ff8 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -216,8 +216,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
 	ti->per_bio_data_size = sizeof(struct per_bio_data);
 	ti->private = fc;
 	return 0;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 5be301c1e809..4f99d267340c 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -53,9 +53,9 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
-	ti->num_write_same_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
+	ti->num_write_same_bios = 1;
 	ti->private = lc;
 	return 0;
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d267bb5705e9..51bb81676be3 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -905,8 +905,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 		goto bad;
 	}
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
 
 	return 0;
 
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5a578d89da2d..9a01d1e4c783 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1151,7 +1151,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
 
 	INIT_WORK(&rs->md.event_work, do_table_event);
 	ti->private = rs;
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 
 	mutex_lock(&rs->md.reconfig_mutex);
 	ret = md_run(&rs->md);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 7f2419099b50..e2ea97723e10 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1072,8 +1072,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (r)
 		goto err_free_context;
 
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
+	ti->num_flush_bios = 1;
+	ti->num_discard_bios = 1;
 	ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record);
 	ti->discard_zeroes_data_unsupported = true;
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 6e45e3774eab..5d78027e07ac 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1037,7 +1037,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	int i;
 	int r = -EINVAL;
 	char *origin_path, *cow_path;
-	unsigned args_used, num_flush_requests = 1;
+	unsigned args_used, num_flush_bios = 1;
 	fmode_t origin_mode = FMODE_READ;
 
 	if (argc != 4) {
@@ -1047,7 +1047,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	if (dm_target_is_snapshot_merge(ti)) {
-		num_flush_requests = 2;
+		num_flush_bios = 2;
 		origin_mode = FMODE_WRITE;
 	}
 
@@ -1127,7 +1127,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	spin_lock_init(&s->tracked_chunk_lock);
 
 	ti->private = s;
-	ti->num_flush_requests = num_flush_requests;
+	ti->num_flush_bios = num_flush_bios;
 	ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk);
 
 	/* Add snapshot to the list of snapshots for this origin */
@@ -1691,7 +1691,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 	init_tracked_chunk(bio);
 
 	if (bio->bi_rw & REQ_FLUSH) {
-		if (!dm_bio_get_target_request_nr(bio))
+		if (!dm_bio_get_target_bio_nr(bio))
 			bio->bi_bdev = s->origin->bdev;
 		else
 			bio->bi_bdev = s->cow->bdev;
@@ -2102,7 +2102,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->private = dev;
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 
 	return 0;
 }
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index aaecefa63935..d8837d313f54 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -160,9 +160,9 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (r)
 		return r;
 
-	ti->num_flush_requests = stripes;
-	ti->num_discard_requests = stripes;
-	ti->num_write_same_requests = stripes;
+	ti->num_flush_bios = stripes;
+	ti->num_discard_bios = stripes;
+	ti->num_write_same_bios = stripes;
 
 	sc->chunk_size = chunk_size;
 	if (chunk_size & (chunk_size - 1))
@@ -276,19 +276,19 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 {
 	struct stripe_c *sc = ti->private;
 	uint32_t stripe;
-	unsigned target_request_nr;
+	unsigned target_bio_nr;
 
 	if (bio->bi_rw & REQ_FLUSH) {
-		target_request_nr = dm_bio_get_target_request_nr(bio);
-		BUG_ON(target_request_nr >= sc->stripes);
-		bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
+		target_bio_nr = dm_bio_get_target_bio_nr(bio);
+		BUG_ON(target_bio_nr >= sc->stripes);
+		bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev;
 		return DM_MAPIO_REMAPPED;
 	}
 	if (unlikely(bio->bi_rw & REQ_DISCARD) ||
 	    unlikely(bio->bi_rw & REQ_WRITE_SAME)) {
-		target_request_nr = dm_bio_get_target_request_nr(bio);
-		BUG_ON(target_request_nr >= sc->stripes);
-		return stripe_map_range(sc, bio, target_request_nr);
+		target_bio_nr = dm_bio_get_target_bio_nr(bio);
+		BUG_ON(target_bio_nr >= sc->stripes);
+		return stripe_map_range(sc, bio, target_bio_nr);
 	}
 
 	stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index c95405d04726..e50dad0c65f4 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -822,8 +822,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
 	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 
-	if (!tgt->num_discard_requests && tgt->discards_supported)
-		DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
+	if (!tgt->num_discard_bios && tgt->discards_supported)
+		DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.",
 		       dm_device_name(t->md), type);
 
 	return 0;
@@ -1359,7 +1359,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
-		if (!ti->num_flush_requests)
+		if (!ti->num_flush_bios)
 			continue;
 
 		if (ti->flush_supported)
@@ -1438,7 +1438,7 @@ static bool dm_table_supports_write_same(struct dm_table *t)
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
-		if (!ti->num_write_same_requests)
+		if (!ti->num_write_same_bios)
 			return false;
 
 		if (!ti->type->iterate_devices ||
@@ -1656,7 +1656,7 @@ bool dm_table_supports_discards(struct dm_table *t)
 	while (i < dm_table_get_num_targets(t)) {
 		ti = dm_table_get_target(t, i++);
 
-		if (!ti->num_discard_requests)
+		if (!ti->num_discard_bios)
 			continue;
 
 		if (ti->discards_supported)
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 617d21a77256..37ba5db71cd9 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -116,7 +116,7 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
 	/*
 	 * Return error for discards instead of -EOPNOTSUPP
 	 */
-	tt->num_discard_requests = 1;
+	tt->num_discard_bios = 1;
 
 	return 0;
 }
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 088f6b34f599..303e11da7c2a 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1944,7 +1944,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	pt->data_dev = data_dev;
 	pt->low_water_blocks = low_water_blocks;
 	pt->adjusted_pf = pt->requested_pf = pf;
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 
 	/*
 	 * Only need to enable discards if the pool should pass
@@ -1952,7 +1952,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	 * processing will cause mappings to be removed from the btree.
 	 */
 	if (pf.discard_enabled && pf.discard_passdown) {
-		ti->num_discard_requests = 1;
+		ti->num_discard_bios = 1;
 
 		/*
 		 * Setting 'discards_supported' circumvents the normal
@@ -2593,17 +2593,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	if (r)
 		goto bad_thin_open;
 
-	ti->num_flush_requests = 1;
+	ti->num_flush_bios = 1;
 	ti->flush_supported = true;
 	ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook);
 
 	/* In case the pool supports discards, pass them on. */
 	if (tc->pool->pf.discard_enabled) {
 		ti->discards_supported = true;
-		ti->num_discard_requests = 1;
+		ti->num_discard_bios = 1;
 		ti->discard_zeroes_data_unsupported = true;
-		/* Discard requests must be split on a block boundary */
-		ti->split_discard_requests = true;
+		/* Discard bios must be split on a block boundary */
+		ti->split_discard_bios = true;
 	}
 
 	dm_put(pool_md);
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index 69a5c3b3b340..c99003e0d47a 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -25,7 +25,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	/*
 	 * Silently drop discards, avoiding -EOPNOTSUPP.
 	 */
-	ti->num_discard_requests = 1;
+	ti->num_discard_bios = 1;
 
 	return 0;
 }
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4521d72637c2..caef71befc43 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1103,7 +1103,7 @@ static void clone_bio(struct dm_target_io *tio, struct bio *bio,
 
 static struct dm_target_io *alloc_tio(struct clone_info *ci,
 				      struct dm_target *ti, int nr_iovecs,
-				      unsigned target_request_nr)
+				      unsigned target_bio_nr)
 {
 	struct dm_target_io *tio;
 	struct bio *clone;
@@ -1114,15 +1114,15 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
 	tio->io = ci->io;
 	tio->ti = ti;
 	memset(&tio->info, 0, sizeof(tio->info));
-	tio->target_request_nr = target_request_nr;
+	tio->target_bio_nr = target_bio_nr;
 
 	return tio;
 }
 
 static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
-				   unsigned request_nr, sector_t len)
+				   unsigned target_bio_nr, sector_t len)
 {
-	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, request_nr);
+	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
 	struct bio *clone = &tio->clone;
 
 	/*
@@ -1137,13 +1137,13 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
 	__map_bio(tio);
 }
 
-static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
-				    unsigned num_requests, sector_t len)
+static void __issue_target_bios(struct clone_info *ci, struct dm_target *ti,
+				unsigned num_bios, sector_t len)
 {
-	unsigned request_nr;
+	unsigned target_bio_nr;
 
-	for (request_nr = 0; request_nr < num_requests; request_nr++)
-		__issue_target_request(ci, ti, request_nr, len);
+	for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++)
+		__issue_target_request(ci, ti, target_bio_nr, len);
 }
 
 static int __clone_and_map_empty_flush(struct clone_info *ci)
@@ -1153,7 +1153,7 @@ static int __clone_and_map_empty_flush(struct clone_info *ci)
 
 	BUG_ON(bio_has_data(ci->bio));
 	while ((ti = dm_table_get_target(ci->map, target_nr++)))
-		__issue_target_requests(ci, ti, ti->num_flush_requests, 0);
+		__issue_target_bios(ci, ti, ti->num_flush_bios, 0);
 
 	return 0;
 }
@@ -1173,32 +1173,32 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
 	ci->sector_count = 0;
 }
 
-typedef unsigned (*get_num_requests_fn)(struct dm_target *ti);
+typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
 
-static unsigned get_num_discard_requests(struct dm_target *ti)
+static unsigned get_num_discard_bios(struct dm_target *ti)
 {
-	return ti->num_discard_requests;
+	return ti->num_discard_bios;
 }
 
-static unsigned get_num_write_same_requests(struct dm_target *ti)
+static unsigned get_num_write_same_bios(struct dm_target *ti)
 {
-	return ti->num_write_same_requests;
+	return ti->num_write_same_bios;
 }
 
 typedef bool (*is_split_required_fn)(struct dm_target *ti);
 
 static bool is_split_required_for_discard(struct dm_target *ti)
 {
-	return ti->split_discard_requests;
+	return ti->split_discard_bios;
 }
 
 static int __clone_and_map_changing_extent_only(struct clone_info *ci,
-						get_num_requests_fn get_num_requests,
+						get_num_bios_fn get_num_bios,
 						is_split_required_fn is_split_required)
 {
 	struct dm_target *ti;
 	sector_t len;
-	unsigned num_requests;
+	unsigned num_bios;
 
 	do {
 		ti = dm_table_find_target(ci->map, ci->sector);
@@ -1211,8 +1211,8 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
 		 * reconfiguration might also have changed that since the
 		 * check was performed.
 		 */
-		num_requests = get_num_requests ? get_num_requests(ti) : 0;
-		if (!num_requests)
+		num_bios = get_num_bios ? get_num_bios(ti) : 0;
+		if (!num_bios)
 			return -EOPNOTSUPP;
 
 		if (is_split_required && !is_split_required(ti))
@@ -1220,7 +1220,7 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
 		else
 			len = min(ci->sector_count, max_io_len(ci->sector, ti));
 
-		__issue_target_requests(ci, ti, num_requests, len);
+		__issue_target_bios(ci, ti, num_bios, len);
 
 		ci->sector += len;
 	} while (ci->sector_count -= len);
@@ -1230,13 +1230,13 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci,
 
 static int __clone_and_map_discard(struct clone_info *ci)
 {
-	return __clone_and_map_changing_extent_only(ci, get_num_discard_requests,
+	return __clone_and_map_changing_extent_only(ci, get_num_discard_bios,
 						    is_split_required_for_discard);
 }
 
 static int __clone_and_map_write_same(struct clone_info *ci)
 {
-	return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL);
+	return __clone_and_map_changing_extent_only(ci, get_num_write_same_bios, NULL);
 }
 
 static int __clone_and_map(struct clone_info *ci)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index a5cda3ea6b88..d5f984b07466 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -187,26 +187,26 @@ struct dm_target {
 	uint32_t max_io_len;
 
 	/*
-	 * A number of zero-length barrier requests that will be submitted
+	 * A number of zero-length barrier bios that will be submitted
 	 * to the target for the purpose of flushing cache.
 	 *
-	 * The request number can be accessed with dm_bio_get_target_request_nr.
-	 * It is a responsibility of the target driver to remap these requests
+	 * The bio number can be accessed with dm_bio_get_target_bio_nr.
+	 * It is a responsibility of the target driver to remap these bios
 	 * to the real underlying devices.
 	 */
-	unsigned num_flush_requests;
+	unsigned num_flush_bios;
 
 	/*
-	 * The number of discard requests that will be submitted to the target.
-	 * The request number can be accessed with dm_bio_get_target_request_nr.
+	 * The number of discard bios that will be submitted to the target.
+	 * The bio number can be accessed with dm_bio_get_target_bio_nr.
 	 */
-	unsigned num_discard_requests;
+	unsigned num_discard_bios;
 
 	/*
-	 * The number of WRITE SAME requests that will be submitted to the target.
-	 * The request number can be accessed with dm_bio_get_target_request_nr.
+	 * The number of WRITE SAME bios that will be submitted to the target.
+	 * The bio number can be accessed with dm_bio_get_target_bio_nr.
 	 */
-	unsigned num_write_same_requests;
+	unsigned num_write_same_bios;
 
 	/*
 	 * The minimum number of extra bytes allocated in each bio for the
@@ -233,10 +233,10 @@ struct dm_target {
 	bool discards_supported:1;
 
 	/*
-	 * Set if the target required discard request to be split
+	 * Set if the target required discard bios to be split
 	 * on max_io_len boundary.
 	 */
-	bool split_discard_requests:1;
+	bool split_discard_bios:1;
 
 	/*
 	 * Set if this target does not return zeroes on discarded blocks.
@@ -261,7 +261,7 @@ struct dm_target_io {
 	struct dm_io *io;
 	struct dm_target *ti;
 	union map_info info;
-	unsigned target_request_nr;
+	unsigned target_bio_nr;
 	struct bio clone;
 };
 
@@ -275,9 +275,9 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
 	return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone));
 }
 
-static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio)
+static inline unsigned dm_bio_get_target_bio_nr(const struct bio *bio)
 {
-	return container_of(bio, struct dm_target_io, clone)->target_request_nr;
+	return container_of(bio, struct dm_target_io, clone)->target_bio_nr;
 }
 
 int dm_register_target(struct target_type *t);
-- 
cgit v1.2.3


From df5d2e9089c7d5b8c46f767e4278610ea3e815b9 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 1 Mar 2013 22:45:49 +0000
Subject: dm kcopyd: introduce configurable throttling

This patch allows the administrator to reduce the rate at which kcopyd
issues I/O.

Each module that uses kcopyd acquires a throttle parameter that can be
set in /sys/module/*/parameters.

We maintain a history of kcopyd usage by each module in the variables
io_period and total_period in struct dm_kcopyd_throttle. The actual
kcopyd activity is calculated as a percentage of time equal to
"(100 * io_period / total_period)".  This is compared with the user-defined
throttle percentage threshold and if it is exceeded, we sleep.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-kcopyd.c    | 121 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/md/dm-raid1.c     |   5 +-
 drivers/md/dm-snap.c      |   5 +-
 drivers/md/dm-thin.c      |   5 +-
 include/linux/dm-kcopyd.h |  25 +++++++++-
 5 files changed, 156 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 68c02673263b..d581fe5d2faf 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <linux/delay.h>
 #include <linux/device-mapper.h>
 #include <linux/dm-kcopyd.h>
 
@@ -51,6 +52,8 @@ struct dm_kcopyd_client {
 	struct workqueue_struct *kcopyd_wq;
 	struct work_struct kcopyd_work;
 
+	struct dm_kcopyd_throttle *throttle;
+
 /*
  * We maintain three lists of jobs:
  *
@@ -68,6 +71,117 @@ struct dm_kcopyd_client {
 
 static struct page_list zero_page_list;
 
+static DEFINE_SPINLOCK(throttle_spinlock);
+
+/*
+ * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period.
+ * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided
+ * by 2.
+ */
+#define ACCOUNT_INTERVAL_SHIFT		SHIFT_HZ
+
+/*
+ * Sleep this number of milliseconds.
+ *
+ * The value was decided experimentally.
+ * Smaller values seem to cause an increased copy rate above the limit.
+ * The reason for this is unknown but possibly due to jiffies rounding errors
+ * or read/write cache inside the disk.
+ */
+#define SLEEP_MSEC			100
+
+/*
+ * Maximum number of sleep events. There is a theoretical livelock if more
+ * kcopyd clients do work simultaneously which this limit avoids.
+ */
+#define MAX_SLEEPS			10
+
+static void io_job_start(struct dm_kcopyd_throttle *t)
+{
+	unsigned throttle, now, difference;
+	int slept = 0, skew;
+
+	if (unlikely(!t))
+		return;
+
+try_again:
+	spin_lock_irq(&throttle_spinlock);
+
+	throttle = ACCESS_ONCE(t->throttle);
+
+	if (likely(throttle >= 100))
+		goto skip_limit;
+
+	now = jiffies;
+	difference = now - t->last_jiffies;
+	t->last_jiffies = now;
+	if (t->num_io_jobs)
+		t->io_period += difference;
+	t->total_period += difference;
+
+	/*
+	 * Maintain sane values if we got a temporary overflow.
+	 */
+	if (unlikely(t->io_period > t->total_period))
+		t->io_period = t->total_period;
+
+	if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) {
+		int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT);
+		t->total_period >>= shift;
+		t->io_period >>= shift;
+	}
+
+	skew = t->io_period - throttle * t->total_period / 100;
+
+	if (unlikely(skew > 0) && slept < MAX_SLEEPS) {
+		slept++;
+		spin_unlock_irq(&throttle_spinlock);
+		msleep(SLEEP_MSEC);
+		goto try_again;
+	}
+
+skip_limit:
+	t->num_io_jobs++;
+
+	spin_unlock_irq(&throttle_spinlock);
+}
+
+static void io_job_finish(struct dm_kcopyd_throttle *t)
+{
+	unsigned long flags;
+
+	if (unlikely(!t))
+		return;
+
+	spin_lock_irqsave(&throttle_spinlock, flags);
+
+	t->num_io_jobs--;
+
+	if (likely(ACCESS_ONCE(t->throttle) >= 100))
+		goto skip_limit;
+
+	if (!t->num_io_jobs) {
+		unsigned now, difference;
+
+		now = jiffies;
+		difference = now - t->last_jiffies;
+		t->last_jiffies = now;
+
+		t->io_period += difference;
+		t->total_period += difference;
+
+		/*
+		 * Maintain sane values if we got a temporary overflow.
+		 */
+		if (unlikely(t->io_period > t->total_period))
+			t->io_period = t->total_period;
+	}
+
+skip_limit:
+	spin_unlock_irqrestore(&throttle_spinlock, flags);
+}
+
+
 static void wake(struct dm_kcopyd_client *kc)
 {
 	queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
@@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context)
 	struct kcopyd_job *job = (struct kcopyd_job *) context;
 	struct dm_kcopyd_client *kc = job->kc;
 
+	io_job_finish(kc->throttle);
+
 	if (error) {
 		if (job->rw & WRITE)
 			job->write_err |= error;
@@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job)
 		.client = job->kc->io_client,
 	};
 
+	io_job_start(job->kc->throttle);
+
 	if (job->rw == READ)
 		r = dm_io(&io_req, 1, &job->source, NULL);
 	else
@@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block)
 /*-----------------------------------------------------------------
  * Client setup
  *---------------------------------------------------------------*/
-struct dm_kcopyd_client *dm_kcopyd_client_create(void)
+struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
 {
 	int r = -ENOMEM;
 	struct dm_kcopyd_client *kc;
@@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void)
 	INIT_LIST_HEAD(&kc->complete_jobs);
 	INIT_LIST_HEAD(&kc->io_jobs);
 	INIT_LIST_HEAD(&kc->pages_jobs);
+	kc->throttle = throttle;
 
 	kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
 	if (!kc->job_pool)
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index e2ea97723e10..d053098c6a91 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -82,6 +82,9 @@ struct mirror_set {
 	struct mirror mirror[0];
 };
 
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle,
+		"A percentage of time allocated for raid resynchronization");
+
 static void wakeup_mirrord(void *context)
 {
 	struct mirror_set *ms = context;
@@ -1111,7 +1114,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto err_destroy_wq;
 	}
 
-	ms->kcopyd_client = dm_kcopyd_client_create();
+	ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(ms->kcopyd_client)) {
 		r = PTR_ERR(ms->kcopyd_client);
 		goto err_destroy_wq;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 58c2b5881377..c0e07026a8d1 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -124,6 +124,9 @@ struct dm_snapshot {
 #define RUNNING_MERGE          0
 #define SHUTDOWN_MERGE         1
 
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
+		"A percentage of time allocated for copy on write");
+
 struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
 {
 	return s->origin;
@@ -1108,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad_hash_tables;
 	}
 
-	s->kcopyd_client = dm_kcopyd_client_create();
+	s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(s->kcopyd_client)) {
 		r = PTR_ERR(s->kcopyd_client);
 		ti->error = "Could not create kcopyd client";
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 303e11da7c2a..35d9d0396cc2 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -26,6 +26,9 @@
 #define PRISON_CELLS 1024
 #define COMMIT_PERIOD HZ
 
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
+		"A percentage of time allocated for copy on write");
+
 /*
  * The block size of the device holding pool data must be
  * between 64KB and 1GB.
@@ -1642,7 +1645,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 		goto bad_prison;
 	}
 
-	pool->copier = dm_kcopyd_client_create();
+	pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(pool->copier)) {
 		r = PTR_ERR(pool->copier);
 		*error = "Error creating pool's kcopyd client";
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index 47d9d376e4e7..f486d636b82e 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -21,11 +21,34 @@
 
 #define DM_KCOPYD_IGNORE_ERROR 1
 
+struct dm_kcopyd_throttle {
+	unsigned throttle;
+	unsigned num_io_jobs;
+	unsigned io_period;
+	unsigned total_period;
+	unsigned last_jiffies;
+};
+
+/*
+ * kcopyd clients that want to support throttling must pass an initialised
+ * dm_kcopyd_throttle struct into dm_kcopyd_client_create().
+ * Two or more clients may share the same instance of this struct between
+ * them if they wish to be throttled as a group.
+ *
+ * This macro also creates a corresponding module parameter to configure
+ * the amount of throttling.
+ */
+#define DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(name, description)	\
+static struct dm_kcopyd_throttle dm_kcopyd_throttle = { 100, 0, 0, 0, 0 }; \
+module_param_named(name, dm_kcopyd_throttle.throttle, uint, 0644); \
+MODULE_PARM_DESC(name, description)
+
 /*
  * To use kcopyd you must first create a dm_kcopyd_client object.
+ * throttle can be NULL if you don't want any throttling.
  */
 struct dm_kcopyd_client;
-struct dm_kcopyd_client *dm_kcopyd_client_create(void);
+struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle);
 void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc);
 
 /*
-- 
cgit v1.2.3


From b0d8ed4d96a26ef3ac54a4aa8911c9413070662e Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Fri, 1 Mar 2013 22:45:49 +0000
Subject: dm: add target num_write_bios fn

Add a num_write_bios function to struct target.

If an instance of a target sets this, it will be queried before the
target's mapping function is called on a write bio, and the response
controls the number of copies of the write bio that the target will
receive.

This provides a convenient way for a target to send the same data to
more than one device.  The new cache target uses this in writethrough
mode, to send the data both to the cache and the backing device.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm.c               | 22 +++++++++++++++-------
 include/linux/device-mapper.h | 15 +++++++++++++++
 2 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e417cf0a69ef..7e469260fe5e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1152,15 +1152,23 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
 {
 	struct bio *bio = ci->bio;
 	struct dm_target_io *tio;
+	unsigned target_bio_nr;
+	unsigned num_target_bios = 1;
 
-	tio = alloc_tio(ci, ti, nr_iovecs, 0);
-
-	if (split_bvec)
-		clone_split_bio(tio, bio, sector, idx, offset, len);
-	else
-		clone_bio(tio, bio, sector, idx, bv_count, len);
+	/*
+	 * Does the target want to receive duplicate copies of the bio?
+	 */
+	if (bio_data_dir(bio) == WRITE && ti->num_write_bios)
+		num_target_bios = ti->num_write_bios(ti, bio);
 
-	__map_bio(tio);
+	for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
+		tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr);
+		if (split_bvec)
+			clone_split_bio(tio, bio, sector, idx, offset, len);
+		else
+			clone_bio(tio, bio, sector, idx, bv_count, len);
+		__map_bio(tio);
+	}
 }
 
 typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index d5f984b07466..1e483fa7afb4 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -175,6 +175,14 @@ struct target_type {
 #define DM_TARGET_IMMUTABLE		0x00000004
 #define dm_target_is_immutable(type)	((type)->features & DM_TARGET_IMMUTABLE)
 
+/*
+ * Some targets need to be sent the same WRITE bio severals times so
+ * that they can send copies of it to different devices.  This function
+ * examines any supplied bio and returns the number of copies of it the
+ * target requires.
+ */
+typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio);
+
 struct dm_target {
 	struct dm_table *table;
 	struct target_type *type;
@@ -214,6 +222,13 @@ struct dm_target {
 	 */
 	unsigned per_bio_data_size;
 
+	/*
+	 * If defined, this function is called to find out how many
+	 * duplicate bios should be sent to the target when writing
+	 * data.
+	 */
+	dm_num_write_bios_fn num_write_bios;
+
 	/* target specific data */
 	void *private;
 
-- 
cgit v1.2.3


From dd37978c50bc8b354e5c4633f69387f16572fdac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Mar 2013 19:48:30 -0500
Subject: cache the value of file_inode() in struct file

Note that this thing does *not* contribute to inode refcount;
it's pinned down by dentry.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 2 ++
 fs/open.c          | 3 ++-
 include/linux/fs.h | 3 ++-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index aa07d3684a2e..cd4d87a82951 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -176,6 +176,7 @@ struct file *alloc_file(struct path *path, fmode_t mode,
 		return file;
 
 	file->f_path = *path;
+	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
 	file->f_mode = mode;
 	file->f_op = fop;
@@ -258,6 +259,7 @@ static void __fput(struct file *file)
 		drop_file_write_access(file);
 	file->f_path.dentry = NULL;
 	file->f_path.mnt = NULL;
+	file->f_inode = NULL;
 	file_free(file);
 	dput(dentry);
 	mntput(mnt);
diff --git a/fs/open.c b/fs/open.c
index 62f907e3bc36..806d4589559f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -689,7 +689,7 @@ static int do_dentry_open(struct file *f,
 		f->f_mode = FMODE_PATH;
 
 	path_get(&f->f_path);
-	inode = file_inode(f);
+	inode = f->f_inode = f->f_path.dentry->d_inode;
 	if (f->f_mode & FMODE_WRITE) {
 		error = __get_file_write_access(inode, f->f_path.mnt);
 		if (error)
@@ -752,6 +752,7 @@ cleanup_file:
 	path_put(&f->f_path);
 	f->f_path.mnt = NULL;
 	f->f_path.dentry = NULL;
+	f->f_inode = NULL;
 	return error;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4e686a099465..74a907b8b950 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -769,6 +769,7 @@ struct file {
 	} f_u;
 	struct path		f_path;
 #define f_dentry	f_path.dentry
+	struct inode		*f_inode;	/* cached value */
 	const struct file_operations	*f_op;
 
 	/*
@@ -2217,7 +2218,7 @@ static inline bool execute_ok(struct inode *inode)
 
 static inline struct inode *file_inode(struct file *f)
 {
-	return f->f_path.dentry->d_inode;
+	return f->f_inode;
 }
 
 /*
-- 
cgit v1.2.3


From dcf787f39162ce32ca325b3e784aba2d2444619a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 1 Mar 2013 23:51:07 -0500
Subject: constify path_get/path_put and fs_struct.c stuff

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_struct.c            | 6 +++---
 fs/internal.h             | 2 +-
 fs/namei.c                | 4 ++--
 include/linux/fs_struct.h | 4 ++--
 include/linux/path.h      | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index fe6ca583bbc0..d8ac61d0c932 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -10,7 +10,7 @@
  * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
  * It can block.
  */
-void set_fs_root(struct fs_struct *fs, struct path *path)
+void set_fs_root(struct fs_struct *fs, const struct path *path)
 {
 	struct path old_root;
 
@@ -29,7 +29,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
  * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
  * It can block.
  */
-void set_fs_pwd(struct fs_struct *fs, struct path *path)
+void set_fs_pwd(struct fs_struct *fs, const struct path *path)
 {
 	struct path old_pwd;
 
@@ -53,7 +53,7 @@ static inline int replace_path(struct path *p, const struct path *old, const str
 	return 1;
 }
 
-void chroot_fs_refs(struct path *old_root, struct path *new_root)
+void chroot_fs_refs(const struct path *old_root, const struct path *new_root)
 {
 	struct task_struct *g, *p;
 	struct fs_struct *fs;
diff --git a/fs/internal.h b/fs/internal.h
index 2f6af7f645eb..507141fceb99 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -69,7 +69,7 @@ extern void __mnt_drop_write_file(struct file *);
 /*
  * fs_struct.c
  */
-extern void chroot_fs_refs(struct path *, struct path *);
+extern void chroot_fs_refs(const struct path *, const struct path *);
 
 /*
  * file_table.c
diff --git a/fs/namei.c b/fs/namei.c
index dc984fee5532..961bc1268366 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -451,7 +451,7 @@ int inode_permission(struct inode *inode, int mask)
  *
  * Given a path increment the reference count to the dentry and the vfsmount.
  */
-void path_get(struct path *path)
+void path_get(const struct path *path)
 {
 	mntget(path->mnt);
 	dget(path->dentry);
@@ -464,7 +464,7 @@ EXPORT_SYMBOL(path_get);
  *
  * Given a path decrement the reference count to the dentry and the vfsmount.
  */
-void path_put(struct path *path)
+void path_put(const struct path *path)
 {
 	dput(path->dentry);
 	mntput(path->mnt);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index d0ae3a84bcfb..729eded4b24f 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -17,8 +17,8 @@ struct fs_struct {
 extern struct kmem_cache *fs_cachep;
 
 extern void exit_fs(struct task_struct *);
-extern void set_fs_root(struct fs_struct *, struct path *);
-extern void set_fs_pwd(struct fs_struct *, struct path *);
+extern void set_fs_root(struct fs_struct *, const struct path *);
+extern void set_fs_pwd(struct fs_struct *, const struct path *);
 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
 extern void free_fs_struct(struct fs_struct *);
 extern int unshare_fs_struct(void);
diff --git a/include/linux/path.h b/include/linux/path.h
index edc98dec6266..d1372186f431 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -9,8 +9,8 @@ struct path {
 	struct dentry *dentry;
 };
 
-extern void path_get(struct path *);
-extern void path_put(struct path *);
+extern void path_get(const struct path *);
+extern void path_put(const struct path *);
 
 static inline int path_equal(const struct path *path1, const struct path *path2)
 {
-- 
cgit v1.2.3


From 20e6926dcbafa1b361f1c29d967688be14b6ca4b Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 1 Mar 2013 14:51:27 -0800
Subject: x86, ACPI, mm: Revert movablemem_map support

Tim found:

  WARNING: at arch/x86/kernel/smpboot.c:324 topology_sane.isra.2+0x6f/0x80()
  Hardware name: S2600CP
  sched: CPU #1's llc-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency.
  smpboot: Booting Node   1, Processors  #1
  Modules linked in:
  Pid: 0, comm: swapper/1 Not tainted 3.9.0-0-generic #1
  Call Trace:
    set_cpu_sibling_map+0x279/0x449
    start_secondary+0x11d/0x1e5

Don Morris reproduced on a HP z620 workstation, and bisected it to
commit e8d195525809 ("acpi, memory-hotplug: parse SRAT before memblock
is ready")

It turns out movable_map has some problems, and it breaks several things

1. numa_init is called several times, NOT just for srat. so those
	nodes_clear(numa_nodes_parsed)
	memset(&numa_meminfo, 0, sizeof(numa_meminfo))
   can not be just removed.  Need to consider sequence is: numaq, srat, amd, dummy.
   and make fall back path working.

2. simply split acpi_numa_init to early_parse_srat.
   a. that early_parse_srat is NOT called for ia64, so you break ia64.
   b.  for (i = 0; i < MAX_LOCAL_APIC; i++)
	     set_apicid_to_node(i, NUMA_NO_NODE)
     still left in numa_init. So it will just clear result from early_parse_srat.
     it should be moved before that....
   c.  it breaks ACPI_TABLE_OVERIDE...as the acpi table scan is moved
       early before override from INITRD is settled.

3. that patch TITLE is total misleading, there is NO x86 in the title,
   but it changes critical x86 code. It caused x86 guys did not
   pay attention to find the problem early. Those patches really should
   be routed via tip/x86/mm.

4. after that commit, following range can not use movable ram:
  a. real_mode code.... well..funny, legacy Node0 [0,1M) could be hot-removed?
  b. initrd... it will be freed after booting, so it could be on movable...
  c. crashkernel for kdump...: looks like we can not put kdump kernel above 4G
	anymore.
  d. init_mem_mapping: can not put page table high anymore.
  e. initmem_init: vmemmap can not be high local node anymore. That is
     not good.

If node is hotplugable, the mem related range like page table and
vmemmap could be on the that node without problem and should be on that
node.

We have workaround patch that could fix some problems, but some can not
be fixed.

So just remove that offending commit and related ones including:

 f7210e6c4ac7 ("mm/memblock.c: use CONFIG_HAVE_MEMBLOCK_NODE_MAP to
    protect movablecore_map in memblock_overlaps_region().")

 01a178a94e8e ("acpi, memory-hotplug: support getting hotplug info from
    SRAT")

 27168d38fa20 ("acpi, memory-hotplug: extend movablemem_map ranges to
    the end of node")

 e8d195525809 ("acpi, memory-hotplug: parse SRAT before memblock is
    ready")

 fb06bc8e5f42 ("page_alloc: bootmem limit with movablecore_map")

 42f47e27e761 ("page_alloc: make movablemem_map have higher priority")

 6981ec31146c ("page_alloc: introduce zone_movable_limit[] to keep
    movable limit for nodes")

 34b71f1e04fc ("page_alloc: add movable_memmap kernel parameter")

 4d59a75125d5 ("x86: get pg_data_t's memory from other node")

Later we should have patches that will make sure kernel put page table
and vmemmap on local node ram instead of push them down to node0.  Also
need to find way to put other kernel used ram to local node ram.

Reported-by: Tim Gardner <tim.gardner@canonical.com>
Reported-by: Don Morris <don.morris@hp.com>
Bisected-by: Don Morris <don.morris@hp.com>
Tested-by: Don Morris <don.morris@hp.com>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Thomas Renninger <trenn@suse.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  36 -----
 arch/x86/kernel/setup.c             |  13 +-
 arch/x86/mm/numa.c                  |  11 +-
 arch/x86/mm/srat.c                  | 125 +---------------
 drivers/acpi/numa.c                 |  23 ++-
 include/linux/acpi.h                |   8 -
 include/linux/memblock.h            |   2 -
 include/linux/mm.h                  |  18 ---
 mm/memblock.c                       |  50 -------
 mm/page_alloc.c                     | 285 +-----------------------------------
 10 files changed, 27 insertions(+), 544 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e567af39ee34..3a54fca730c0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1645,42 +1645,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			that the amount of memory usable for all allocations
 			is not too small.
 
-	movablemem_map=acpi
-			[KNL,X86,IA-64,PPC] This parameter is similar to
-			memmap except it specifies the memory map of
-			ZONE_MOVABLE.
-			This option inform the kernel to use Hot Pluggable bit
-			in flags from SRAT from ACPI BIOS to determine which
-			memory devices could be hotplugged. The corresponding
-			memory ranges will be set as ZONE_MOVABLE.
-			NOTE: Whatever node the kernel resides in will always
-			      be un-hotpluggable.
-
-	movablemem_map=nn[KMG]@ss[KMG]
-			[KNL,X86,IA-64,PPC] This parameter is similar to
-			memmap except it specifies the memory map of
-			ZONE_MOVABLE.
-			If user specifies memory ranges, the info in SRAT will
-			be ingored. And it works like the following:
-			- If more ranges are all within one node, then from
-			  lowest ss to the end of the node will be ZONE_MOVABLE.
-			- If a range is within a node, then from ss to the end
-			  of the node will be ZONE_MOVABLE.
-			- If a range covers two or more nodes, then from ss to
-			  the end of the 1st node will be ZONE_MOVABLE, and all
-			  the rest nodes will only have ZONE_MOVABLE.
-			If memmap is specified at the same time, the
-			movablemem_map will be limited within the memmap
-			areas. If kernelcore or movablecore is also specified,
-			movablemem_map will have higher priority to be
-			satisfied. So the administrator should be careful that
-			the amount of movablemem_map areas are not too large.
-			Otherwise kernel won't have enough memory to start.
-			NOTE: We don't stop users specifying the node the
-			      kernel resides in as hotpluggable so that this
-			      option can be used as a workaround of firmware
-                              bugs.
-
 	MTD_Partition=	[MTD]
 			Format: <name>,<region-number>,<size>,<offset>
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e89acdf6b77b..84d32855f65c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1056,15 +1056,6 @@ void __init setup_arch(char **cmdline_p)
 	setup_bios_corruption_check();
 #endif
 
-	/*
-	 * In the memory hotplug case, the kernel needs info from SRAT to
-	 * determine which memory is hotpluggable before allocating memory
-	 * using memblock.
-	 */
-	acpi_boot_table_init();
-	early_acpi_boot_init();
-	early_parse_srat();
-
 #ifdef CONFIG_X86_32
 	printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
 			(max_pfn_mapped<<PAGE_SHIFT) - 1);
@@ -1110,6 +1101,10 @@ void __init setup_arch(char **cmdline_p)
 	/*
 	 * Parse the ACPI tables for possible boot-time SMP configuration.
 	 */
+	acpi_boot_table_init();
+
+	early_acpi_boot_init();
+
 	initmem_init();
 	memblock_find_dma_reserve();
 
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ff3633c794c6..72fe01e9e414 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -212,9 +212,10 @@ static void __init setup_node_data(int nid, u64 start, u64 end)
 	 * Allocate node data.  Try node-local memory and then any node.
 	 * Never allocate in DMA zone.
 	 */
-	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
 	if (!nd_pa) {
-		pr_err("Cannot find %zu bytes in any node\n", nd_size);
+		pr_err("Cannot find %zu bytes in node %d\n",
+		       nd_size, nid);
 		return;
 	}
 	nd = __va(nd_pa);
@@ -559,12 +560,10 @@ static int __init numa_init(int (*init_func)(void))
 	for (i = 0; i < MAX_LOCAL_APIC; i++)
 		set_apicid_to_node(i, NUMA_NO_NODE);
 
-	/*
-	 * Do not clear numa_nodes_parsed or zero numa_meminfo here, because
-	 * SRAT was parsed earlier in early_parse_srat().
-	 */
+	nodes_clear(numa_nodes_parsed);
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
+	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
 	WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
 	numa_reset_distance();
 
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 79836d01f789..cdd0da9dd530 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -141,126 +141,11 @@ static inline int save_add_info(void) {return 1;}
 static inline int save_add_info(void) {return 0;}
 #endif
 
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-static void __init
-handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
-{
-	int overlap, i;
-	unsigned long start_pfn, end_pfn;
-
-	start_pfn = PFN_DOWN(start);
-	end_pfn = PFN_UP(end);
-
-	/*
-	 * For movablemem_map=acpi:
-	 *
-	 * SRAT:		|_____| |_____| |_________| |_________| ......
-	 * node id:                0       1         1           2
-	 * hotpluggable:	   n       y         y           n
-	 * movablemem_map:	        |_____| |_________|
-	 *
-	 * Using movablemem_map, we can prevent memblock from allocating memory
-	 * on ZONE_MOVABLE at boot time.
-	 *
-	 * Before parsing SRAT, memblock has already reserve some memory ranges
-	 * for other purposes, such as for kernel image. We cannot prevent
-	 * kernel from using these memory, so we need to exclude these memory
-	 * even if it is hotpluggable.
-	 * Furthermore, to ensure the kernel has enough memory to boot, we make
-	 * all the memory on the node which the kernel resides in
-	 * un-hotpluggable.
-	 */
-	if (hotpluggable && movablemem_map.acpi) {
-		/* Exclude ranges reserved by memblock. */
-		struct memblock_type *rgn = &memblock.reserved;
-
-		for (i = 0; i < rgn->cnt; i++) {
-			if (end <= rgn->regions[i].base ||
-			    start >= rgn->regions[i].base +
-			    rgn->regions[i].size)
-				continue;
-
-			/*
-			 * If the memory range overlaps the memory reserved by
-			 * memblock, then the kernel resides in this node.
-			 */
-			node_set(node, movablemem_map.numa_nodes_kernel);
-
-			goto out;
-		}
-
-		/*
-		 * If the kernel resides in this node, then the whole node
-		 * should not be hotpluggable.
-		 */
-		if (node_isset(node, movablemem_map.numa_nodes_kernel))
-			goto out;
-
-		insert_movablemem_map(start_pfn, end_pfn);
-
-		/*
-		 * numa_nodes_hotplug nodemask represents which nodes are put
-		 * into movablemem_map.map[].
-		 */
-		node_set(node, movablemem_map.numa_nodes_hotplug);
-		goto out;
-	}
-
-	/*
-	 * For movablemem_map=nn[KMG]@ss[KMG]:
-	 *
-	 * SRAT:		|_____| |_____| |_________| |_________| ......
-	 * node id:		   0       1         1           2
-	 * user specified:	          |__|                 |___|
-	 * movablemem_map:		  |___| |_________|    |______| ......
-	 *
-	 * Using movablemem_map, we can prevent memblock from allocating memory
-	 * on ZONE_MOVABLE at boot time.
-	 *
-	 * NOTE: In this case, SRAT info will be ingored.
-	 */
-	overlap = movablemem_map_overlap(start_pfn, end_pfn);
-	if (overlap >= 0) {
-		/*
-		 * If part of this range is in movablemem_map, we need to
-		 * add the range after it to extend the range to the end
-		 * of the node, because from the min address specified to
-		 * the end of the node will be ZONE_MOVABLE.
-		 */
-		start_pfn = max(start_pfn,
-			    movablemem_map.map[overlap].start_pfn);
-		insert_movablemem_map(start_pfn, end_pfn);
-
-		/*
-		 * Set the nodemask, so that if the address range on one node
-		 * is not continuse, we can add the subsequent ranges on the
-		 * same node into movablemem_map.
-		 */
-		node_set(node, movablemem_map.numa_nodes_hotplug);
-	} else {
-		if (node_isset(node, movablemem_map.numa_nodes_hotplug))
-			/*
-			 * Insert the range if we already have movable ranges
-			 * on the same node.
-			 */
-			insert_movablemem_map(start_pfn, end_pfn);
-	}
-out:
-	return;
-}
-#else		/* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-static inline void
-handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
-{
-}
-#endif		/* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
 int __init
 acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 {
 	u64 start, end;
-	u32 hotpluggable;
 	int node, pxm;
 
 	if (srat_disabled())
@@ -269,8 +154,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 		goto out_err_bad_srat;
 	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
 		goto out_err;
-	hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
-	if (hotpluggable && !save_add_info())
+	if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
 		goto out_err;
 
 	start = ma->base_address;
@@ -290,12 +174,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 
 	node_set(node, numa_nodes_parsed);
 
-	printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n",
+	printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
 	       node, pxm,
-	       (unsigned long long) start, (unsigned long long) end - 1,
-	       hotpluggable ? "Hot Pluggable": "");
-
-	handle_movablemem(node, start, end, hotpluggable);
+	       (unsigned long long) start, (unsigned long long) end - 1);
 
 	return 0;
 out_err_bad_srat:
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 59844ee149be..33e609f63585 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id,
 					    handler, max_entries);
 }
 
-static int srat_mem_cnt;
-
-void __init early_parse_srat(void)
+int __init acpi_numa_init(void)
 {
+	int cnt = 0;
+
 	/*
 	 * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
 	 * SRAT cpu entries could have different order with that in MADT.
@@ -295,24 +295,21 @@ void __init early_parse_srat(void)
 	/* SRAT: Static Resource Affinity Table */
 	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
-				      acpi_parse_x2apic_affinity, 0);
+				     acpi_parse_x2apic_affinity, 0);
 		acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
-				      acpi_parse_processor_affinity, 0);
-		srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
-						     acpi_parse_memory_affinity,
-						     NR_NODE_MEMBLKS);
+				     acpi_parse_processor_affinity, 0);
+		cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
+					    acpi_parse_memory_affinity,
+					    NR_NODE_MEMBLKS);
 	}
-}
 
-int __init acpi_numa_init(void)
-{
 	/* SLIT: System Locality Information Table */
 	acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
 
 	acpi_numa_arch_fixup();
 
-	if (srat_mem_cnt < 0)
-		return srat_mem_cnt;
+	if (cnt < 0)
+		return cnt;
 	else if (!parsed_numa_memblks)
 		return -ENOENT;
 	return 0;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index f46cfd73a553..bcbdd7484e58 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -485,14 +485,6 @@ static inline bool acpi_driver_match_device(struct device *dev,
 
 #endif	/* !CONFIG_ACPI */
 
-#ifdef CONFIG_ACPI_NUMA
-void __init early_parse_srat(void);
-#else
-static inline void early_parse_srat(void)
-{
-}
-#endif
-
 #ifdef CONFIG_ACPI
 void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 			       u32 pm1a_ctrl,  u32 pm1b_ctrl));
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 3e5ecb2d790e..f388203db7e8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,7 +42,6 @@ struct memblock {
 
 extern struct memblock memblock;
 extern int memblock_debug;
-extern struct movablemem_map movablemem_map;
 
 #define memblock_dbg(fmt, ...) \
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
@@ -61,7 +60,6 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 			  unsigned long *out_end_pfn, int *out_nid);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e7c3f9a0111a..1ede55f292c2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1333,24 +1333,6 @@ extern void free_bootmem_with_active_regions(int nid,
 						unsigned long max_low_pfn);
 extern void sparse_memory_present_with_active_regions(int nid);
 
-#define MOVABLEMEM_MAP_MAX MAX_NUMNODES
-struct movablemem_entry {
-	unsigned long start_pfn;    /* start pfn of memory segment */
-	unsigned long end_pfn;      /* end pfn of memory segment (exclusive) */
-};
-
-struct movablemem_map {
-	bool acpi;	/* true if using SRAT info */
-	int nr_map;
-	struct movablemem_entry map[MOVABLEMEM_MAP_MAX];
-	nodemask_t numa_nodes_hotplug;	/* on which nodes we specify memory */
-	nodemask_t numa_nodes_kernel;	/* on which nodes kernel resides in */
-};
-
-extern void __init insert_movablemem_map(unsigned long start_pfn,
-					 unsigned long end_pfn);
-extern int __init movablemem_map_overlap(unsigned long start_pfn,
-					 unsigned long end_pfn);
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
diff --git a/mm/memblock.c b/mm/memblock.c
index 1bcd9b970564..b8d9147e5c08 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -92,58 +92,9 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
  *
  * Find @size free area aligned to @align in the specified range and node.
  *
- * If we have CONFIG_HAVE_MEMBLOCK_NODE_MAP defined, we need to check if the
- * memory we found if not in hotpluggable ranges.
- *
  * RETURNS:
  * Found address on success, %0 on failure.
  */
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
-					phys_addr_t end, phys_addr_t size,
-					phys_addr_t align, int nid)
-{
-	phys_addr_t this_start, this_end, cand;
-	u64 i;
-	int curr = movablemem_map.nr_map - 1;
-
-	/* pump up @end */
-	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
-		end = memblock.current_limit;
-
-	/* avoid allocating the first page */
-	start = max_t(phys_addr_t, start, PAGE_SIZE);
-	end = max(start, end);
-
-	for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
-		this_start = clamp(this_start, start, end);
-		this_end = clamp(this_end, start, end);
-
-restart:
-		if (this_end <= this_start || this_end < size)
-			continue;
-
-		for (; curr >= 0; curr--) {
-			if ((movablemem_map.map[curr].start_pfn << PAGE_SHIFT)
-			    < this_end)
-				break;
-		}
-
-		cand = round_down(this_end - size, align);
-		if (curr >= 0 &&
-		    cand < movablemem_map.map[curr].end_pfn << PAGE_SHIFT) {
-			this_end = movablemem_map.map[curr].start_pfn
-				   << PAGE_SHIFT;
-			goto restart;
-		}
-
-		if (cand >= this_start)
-			return cand;
-	}
-
-	return 0;
-}
-#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 					phys_addr_t end, phys_addr_t size,
 					phys_addr_t align, int nid)
@@ -172,7 +123,6 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
 	}
 	return 0;
 }
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 /**
  * memblock_find_in_range - find free area in given range
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0dade3f18f7d..8fcced7823fa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -202,18 +202,11 @@ static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-/* Movable memory ranges, will also be used by memblock subsystem. */
-struct movablemem_map movablemem_map = {
-	.acpi = false,
-	.nr_map = 0,
-};
-
 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
 static unsigned long __initdata required_kernelcore;
 static unsigned long __initdata required_movablecore;
 static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
-static unsigned long __meminitdata zone_movable_limit[MAX_NUMNODES];
 
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
 int movable_zone;
@@ -4412,77 +4405,6 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
 	return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
 }
 
-/**
- * sanitize_zone_movable_limit - Sanitize the zone_movable_limit array.
- *
- * zone_movable_limit is initialized as 0. This function will try to get
- * the first ZONE_MOVABLE pfn of each node from movablemem_map, and
- * assigne them to zone_movable_limit.
- * zone_movable_limit[nid] == 0 means no limit for the node.
- *
- * Note: Each range is represented as [start_pfn, end_pfn)
- */
-static void __meminit sanitize_zone_movable_limit(void)
-{
-	int map_pos = 0, i, nid;
-	unsigned long start_pfn, end_pfn;
-
-	if (!movablemem_map.nr_map)
-		return;
-
-	/* Iterate all ranges from minimum to maximum */
-	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
-		/*
-		 * If we have found lowest pfn of ZONE_MOVABLE of the node
-		 * specified by user, just go on to check next range.
-		 */
-		if (zone_movable_limit[nid])
-			continue;
-
-#ifdef CONFIG_ZONE_DMA
-		/* Skip DMA memory. */
-		if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA])
-			start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA];
-#endif
-
-#ifdef CONFIG_ZONE_DMA32
-		/* Skip DMA32 memory. */
-		if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA32])
-			start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA32];
-#endif
-
-#ifdef CONFIG_HIGHMEM
-		/* Skip lowmem if ZONE_MOVABLE is highmem. */
-		if (zone_movable_is_highmem() &&
-		    start_pfn < arch_zone_lowest_possible_pfn[ZONE_HIGHMEM])
-			start_pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
-#endif
-
-		if (start_pfn >= end_pfn)
-			continue;
-
-		while (map_pos < movablemem_map.nr_map) {
-			if (end_pfn <= movablemem_map.map[map_pos].start_pfn)
-				break;
-
-			if (start_pfn >= movablemem_map.map[map_pos].end_pfn) {
-				map_pos++;
-				continue;
-			}
-
-			/*
-			 * The start_pfn of ZONE_MOVABLE is either the minimum
-			 * pfn specified by movablemem_map, or 0, which means
-			 * the node has no ZONE_MOVABLE.
-			 */
-			zone_movable_limit[nid] = max(start_pfn,
-					movablemem_map.map[map_pos].start_pfn);
-
-			break;
-		}
-	}
-}
-
 #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
 					unsigned long zone_type,
@@ -4500,6 +4422,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
 
 	return zholes_size[zone_type];
 }
+
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
@@ -4941,19 +4864,12 @@ static void __init find_zone_movable_pfns_for_nodes(void)
 		required_kernelcore = max(required_kernelcore, corepages);
 	}
 
-	/*
-	 * If neither kernelcore/movablecore nor movablemem_map is specified,
-	 * there is no ZONE_MOVABLE. But if movablemem_map is specified, the
-	 * start pfn of ZONE_MOVABLE has been stored in zone_movable_limit[].
-	 */
-	if (!required_kernelcore) {
-		if (movablemem_map.nr_map)
-			memcpy(zone_movable_pfn, zone_movable_limit,
-				sizeof(zone_movable_pfn));
+	/* If kernelcore was not specified, there is no ZONE_MOVABLE */
+	if (!required_kernelcore)
 		goto out;
-	}
 
 	/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
+	find_usable_zone_for_movable();
 	usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
 
 restart:
@@ -4981,24 +4897,10 @@ restart:
 		for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
 			unsigned long size_pages;
 
-			/*
-			 * Find more memory for kernelcore in
-			 * [zone_movable_pfn[nid], zone_movable_limit[nid]).
-			 */
 			start_pfn = max(start_pfn, zone_movable_pfn[nid]);
 			if (start_pfn >= end_pfn)
 				continue;
 
-			if (zone_movable_limit[nid]) {
-				end_pfn = min(end_pfn, zone_movable_limit[nid]);
-				/* No range left for kernelcore in this node */
-				if (start_pfn >= end_pfn) {
-					zone_movable_pfn[nid] =
-							zone_movable_limit[nid];
-					break;
-				}
-			}
-
 			/* Account for what is only usable for kernelcore */
 			if (start_pfn < usable_startpfn) {
 				unsigned long kernel_pages;
@@ -5058,12 +4960,12 @@ restart:
 	if (usable_nodes && required_kernelcore > usable_nodes)
 		goto restart;
 
-out:
 	/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
 	for (nid = 0; nid < MAX_NUMNODES; nid++)
 		zone_movable_pfn[nid] =
 			roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
 
+out:
 	/* restore the node_state */
 	node_states[N_MEMORY] = saved_node_state;
 }
@@ -5126,8 +5028,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 
 	/* Find the PFNs that ZONE_MOVABLE begins at in each node */
 	memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
-	find_usable_zone_for_movable();
-	sanitize_zone_movable_limit();
 	find_zone_movable_pfns_for_nodes();
 
 	/* Print out the zone ranges */
@@ -5211,181 +5111,6 @@ static int __init cmdline_parse_movablecore(char *p)
 early_param("kernelcore", cmdline_parse_kernelcore);
 early_param("movablecore", cmdline_parse_movablecore);
 
-/**
- * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[].
- * @start_pfn:	start pfn of the range to be checked
- * @end_pfn: 	end pfn of the range to be checked (exclusive)
- *
- * This function checks if a given memory range [start_pfn, end_pfn) overlaps
- * the movablemem_map.map[] array.
- *
- * Return: index of the first overlapped element in movablemem_map.map[]
- *         or -1 if they don't overlap each other.
- */
-int __init movablemem_map_overlap(unsigned long start_pfn,
-				   unsigned long end_pfn)
-{
-	int overlap;
-
-	if (!movablemem_map.nr_map)
-		return -1;
-
-	for (overlap = 0; overlap < movablemem_map.nr_map; overlap++)
-		if (start_pfn < movablemem_map.map[overlap].end_pfn)
-			break;
-
-	if (overlap == movablemem_map.nr_map ||
-	    end_pfn <= movablemem_map.map[overlap].start_pfn)
-		return -1;
-
-	return overlap;
-}
-
-/**
- * insert_movablemem_map - Insert a memory range in to movablemem_map.map.
- * @start_pfn:	start pfn of the range
- * @end_pfn:	end pfn of the range
- *
- * This function will also merge the overlapped ranges, and sort the array
- * by start_pfn in monotonic increasing order.
- */
-void __init insert_movablemem_map(unsigned long start_pfn,
-				  unsigned long end_pfn)
-{
-	int pos, overlap;
-
-	/*
-	 * pos will be at the 1st overlapped range, or the position
-	 * where the element should be inserted.
-	 */
-	for (pos = 0; pos < movablemem_map.nr_map; pos++)
-		if (start_pfn <= movablemem_map.map[pos].end_pfn)
-			break;
-
-	/* If there is no overlapped range, just insert the element. */
-	if (pos == movablemem_map.nr_map ||
-	    end_pfn < movablemem_map.map[pos].start_pfn) {
-		/*
-		 * If pos is not the end of array, we need to move all
-		 * the rest elements backward.
-		 */
-		if (pos < movablemem_map.nr_map)
-			memmove(&movablemem_map.map[pos+1],
-				&movablemem_map.map[pos],
-				sizeof(struct movablemem_entry) *
-				(movablemem_map.nr_map - pos));
-		movablemem_map.map[pos].start_pfn = start_pfn;
-		movablemem_map.map[pos].end_pfn = end_pfn;
-		movablemem_map.nr_map++;
-		return;
-	}
-
-	/* overlap will be at the last overlapped range */
-	for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++)
-		if (end_pfn < movablemem_map.map[overlap].start_pfn)
-			break;
-
-	/*
-	 * If there are more ranges overlapped, we need to merge them,
-	 * and move the rest elements forward.
-	 */
-	overlap--;
-	movablemem_map.map[pos].start_pfn = min(start_pfn,
-					movablemem_map.map[pos].start_pfn);
-	movablemem_map.map[pos].end_pfn = max(end_pfn,
-					movablemem_map.map[overlap].end_pfn);
-
-	if (pos != overlap && overlap + 1 != movablemem_map.nr_map)
-		memmove(&movablemem_map.map[pos+1],
-			&movablemem_map.map[overlap+1],
-			sizeof(struct movablemem_entry) *
-			(movablemem_map.nr_map - overlap - 1));
-
-	movablemem_map.nr_map -= overlap - pos;
-}
-
-/**
- * movablemem_map_add_region - Add a memory range into movablemem_map.
- * @start:	physical start address of range
- * @end:	physical end address of range
- *
- * This function transform the physical address into pfn, and then add the
- * range into movablemem_map by calling insert_movablemem_map().
- */
-static void __init movablemem_map_add_region(u64 start, u64 size)
-{
-	unsigned long start_pfn, end_pfn;
-
-	/* In case size == 0 or start + size overflows */
-	if (start + size <= start)
-		return;
-
-	if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) {
-		pr_err("movablemem_map: too many entries;"
-			" ignoring [mem %#010llx-%#010llx]\n",
-			(unsigned long long) start,
-			(unsigned long long) (start + size - 1));
-		return;
-	}
-
-	start_pfn = PFN_DOWN(start);
-	end_pfn = PFN_UP(start + size);
-	insert_movablemem_map(start_pfn, end_pfn);
-}
-
-/*
- * cmdline_parse_movablemem_map - Parse boot option movablemem_map.
- * @p:	The boot option of the following format:
- *	movablemem_map=nn[KMG]@ss[KMG]
- *
- * This option sets the memory range [ss, ss+nn) to be used as movable memory.
- *
- * Return: 0 on success or -EINVAL on failure.
- */
-static int __init cmdline_parse_movablemem_map(char *p)
-{
-	char *oldp;
-	u64 start_at, mem_size;
-
-	if (!p)
-		goto err;
-
-	if (!strcmp(p, "acpi"))
-		movablemem_map.acpi = true;
-
-	/*
-	 * If user decide to use info from BIOS, all the other user specified
-	 * ranges will be ingored.
-	 */
-	if (movablemem_map.acpi) {
-		if (movablemem_map.nr_map) {
-			memset(movablemem_map.map, 0,
-				sizeof(struct movablemem_entry)
-				* movablemem_map.nr_map);
-			movablemem_map.nr_map = 0;
-		}
-		return 0;
-	}
-
-	oldp = p;
-	mem_size = memparse(p, &p);
-	if (p == oldp)
-		goto err;
-
-	if (*p == '@') {
-		oldp = ++p;
-		start_at = memparse(p, &p);
-		if (p == oldp || *p != '\0')
-			goto err;
-
-		movablemem_map_add_region(start_at, mem_size);
-		return 0;
-	}
-err:
-	return -EINVAL;
-}
-early_param("movablemem_map", cmdline_parse_movablemem_map);
-
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 /**
-- 
cgit v1.2.3


From 5698c50d9da4ab2f57d98c64ea97675dcaf2a608 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 9 Oct 2012 10:54:47 +0100
Subject: metag: Internal and external irqchips

Meta core internal interrupts (from HWSTATMETA and friends) are vectored
onto the TR1 core trigger for the current thread. This is demultiplexed
in irq-metag.c to individual Linux IRQs for each internal interrupt.

External SoC interrupts (from HWSTATEXT and friends) are vectored onto
the TR2 core trigger for the current thread. This is demultiplexed in
irq-metag-ext.c to individual Linux IRQs for each external SoC interrupt.
The external irqchip has devicetree bindings for configuring the number
of irq banks and the type of masking available.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Rob Herring <rob.herring@calxeda.com>
Cc: Rob Landley <rob@landley.net>
Cc: Dom Cobley <popcornmix@gmail.com>
Cc: Simon Arlott <simon@fire.lp0.eu>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: devicetree-discuss@lists.ozlabs.org
Cc: linux-doc@vger.kernel.org
---
 .../devicetree/bindings/metag/meta-intc.txt        |  82 ++
 MAINTAINERS                                        |   2 +
 arch/metag/kernel/irq.c                            |   5 +
 drivers/irqchip/Makefile                           |   2 +
 drivers/irqchip/irq-metag-ext.c                    | 868 +++++++++++++++++++++
 drivers/irqchip/irq-metag.c                        | 343 ++++++++
 include/linux/irqchip/metag-ext.h                  |  33 +
 include/linux/irqchip/metag.h                      |  24 +
 8 files changed, 1359 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/metag/meta-intc.txt
 create mode 100644 drivers/irqchip/irq-metag-ext.c
 create mode 100644 drivers/irqchip/irq-metag.c
 create mode 100644 include/linux/irqchip/metag-ext.h
 create mode 100644 include/linux/irqchip/metag.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/metag/meta-intc.txt b/Documentation/devicetree/bindings/metag/meta-intc.txt
new file mode 100644
index 000000000000..8c47dcbfabc6
--- /dev/null
+++ b/Documentation/devicetree/bindings/metag/meta-intc.txt
@@ -0,0 +1,82 @@
+* Meta External Trigger Controller Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a Meta external trigger controller.
+
+Required properties:
+
+    - compatible: Specifies the compatibility list for the interrupt controller.
+      The type shall be <string> and the value shall include "img,meta-intc".
+
+    - num-banks: Specifies the number of interrupt banks (each of which can
+      handle 32 interrupt sources).
+
+    - interrupt-controller: The presence of this property identifies the node
+      as an interupt controller. No property value shall be defined.
+
+    - #interrupt-cells: Specifies the number of cells needed to encode an
+      interrupt source. The type shall be a <u32> and the value shall be 2.
+
+    - #address-cells: Specifies the number of cells needed to encode an
+      address. The type shall be <u32> and the value shall be 0. As such,
+      'interrupt-map' nodes do not have to specify a parent unit address.
+
+Optional properties:
+
+    - no-mask: The controller doesn't have any mask registers.
+
+* Interrupt Specifier Definition
+
+  Interrupt specifiers consists of 2 cells encoded as follows:
+
+    - <1st-cell>: The interrupt-number that identifies the interrupt source.
+
+    - <2nd-cell>: The Linux interrupt flags containing level-sense information,
+                  encoded as follows:
+                    1 = edge triggered
+                    4 = level-sensitive
+
+* Examples
+
+Example 1:
+
+	/*
+	 * Meta external trigger block
+	 */
+	intc: intc {
+		// This is an interrupt controller node.
+		interrupt-controller;
+
+		// No address cells so that 'interrupt-map' nodes which
+		// reference this interrupt controller node do not need a parent
+		// address specifier.
+		#address-cells = <0>;
+
+		// Two cells to encode interrupt sources.
+		#interrupt-cells = <2>;
+
+		// Number of interrupt banks
+		num-banks = <2>;
+
+		// No HWMASKEXT is available (specify on Chorus2 and Comet ES1)
+		no-mask;
+
+		// Compatible with Meta hardware trigger block.
+		compatible = "img,meta-intc";
+	};
+
+Example 2:
+
+	/*
+	 * An interrupt generating device that is wired to a Meta external
+	 * trigger block.
+	 */
+	uart1: uart@0x02004c00 {
+		// Interrupt source '5' that is level-sensitive.
+		// Note that there are only two cells as specified in the
+		// interrupt parent's '#interrupt-cells' property.
+		interrupts = <5 4 /* level */>;
+
+		// The interrupt controller that this device is wired to.
+		interrupt-parent = <&intc>;
+	};
diff --git a/MAINTAINERS b/MAINTAINERS
index 749d76699ead..9b2b7699da4d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5040,6 +5040,8 @@ F:	arch/metag/
 F:	Documentation/metag/
 F:	Documentation/devicetree/bindings/metag/
 F:	drivers/clocksource/metag_generic.c
+F:	drivers/irqchip/irq-metag.c
+F:	drivers/irqchip/irq-metag-ext.c
 
 MICROBLAZE ARCHITECTURE
 M:	Michal Simek <monstr@monstr.eu>
diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
index 7c043491e1e3..87707efeb0a3 100644
--- a/arch/metag/kernel/irq.c
+++ b/arch/metag/kernel/irq.c
@@ -6,6 +6,8 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqchip/metag.h>
 #include <linux/irqdomain.h>
 #include <linux/ratelimit.h>
 
@@ -258,6 +260,9 @@ void __init init_IRQ(void)
 
 	irq_ctx_init(smp_processor_id());
 
+	init_internal_IRQ();
+	init_external_IRQ();
+
 	if (machine_desc->init_irq)
 		machine_desc->init_irq();
 }
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index bf4609a5bd9d..ff02f6b98863 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -1,4 +1,6 @@
 obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2835.o
+obj-$(CONFIG_METAG)			+= irq-metag-ext.o
+obj-$(CONFIG_METAG_PERFCOUNTER_IRQS)	+= irq-metag.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi.o
 obj-$(CONFIG_VERSATILE_FPGA_IRQ)	+= irq-versatile-fpga.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c
new file mode 100644
index 000000000000..92c41ab4dbfd
--- /dev/null
+++ b/drivers/irqchip/irq-metag-ext.c
@@ -0,0 +1,868 @@
+/*
+ * Meta External interrupt code.
+ *
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * External interrupts on Meta are configured at two-levels, in the CPU core and
+ * in the external trigger block. Interrupts from SoC peripherals are
+ * multiplexed onto a single Meta CPU "trigger" - traditionally it has always
+ * been trigger 2 (TR2). For info on how de-multiplexing happens check out
+ * meta_intc_irq_demux().
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqdomain.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define HWSTAT_STRIDE 8
+#define HWVEC_BLK_STRIDE 0x1000
+
+/**
+ * struct meta_intc_priv - private meta external interrupt data
+ * @nr_banks:		Number of interrupt banks
+ * @domain:		IRQ domain for all banks of external IRQs
+ * @unmasked:		Record of unmasked IRQs
+ * @levels_altered:	Record of altered level bits
+ */
+struct meta_intc_priv {
+	unsigned int		nr_banks;
+	struct irq_domain	*domain;
+
+	unsigned long		unmasked[4];
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	unsigned long		levels_altered[4];
+#endif
+};
+
+/* Private data for the one and only external interrupt controller */
+static struct meta_intc_priv meta_intc_priv;
+
+/**
+ * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Bit offset into the IRQ's bank registers
+ */
+static unsigned int meta_intc_offset(irq_hw_number_t hw)
+{
+	return hw & 0x1f;
+}
+
+/**
+ * meta_intc_bank() - Get the bank number of a hardware IRQ number
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Bank number indicating which register the IRQ's bits are
+ */
+static unsigned int meta_intc_bank(irq_hw_number_t hw)
+{
+	return hw >> 5;
+}
+
+/**
+ * meta_intc_stat_addr() - Get the address of a HWSTATEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWSTATEXT register containing the status bit for
+ *		the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWSTATEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_level_addr() - Get the address of a HWLEVELEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWLEVELEXT register containing the sense bit for
+ *		the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_level_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWLEVELEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_mask_addr() - Get the address of a HWMASKEXT register
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWMASKEXT register containing the mask bit for the
+ *		specified hardware IRQ number
+ */
+static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWMASKEXT +
+				HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_vec_addr() - Get the vector address of a hardware interrupt
+ * @hw:		Hardware IRQ number (within external trigger block)
+ *
+ * Returns:	Address of a HWVECEXT register controlling the core trigger to
+ *		vector the IRQ onto
+ */
+static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw)
+{
+	return (void __iomem *)(HWVEC0EXT +
+				HWVEC_BLK_STRIDE * meta_intc_bank(hw) +
+				HWVECnEXT_STRIDE * meta_intc_offset(hw));
+}
+
+/**
+ * meta_intc_startup_irq() - set up an external irq
+ * @data:	data for the external irq to start up
+ *
+ * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and
+ * unmask irq, both using the appropriate callbacks.
+ */
+static unsigned int meta_intc_startup_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	int thread = hard_processor_id();
+
+	/* Perform any necessary acking. */
+	if (data->chip->irq_ack)
+		data->chip->irq_ack(data);
+
+	/* Wire up this interrupt to the core with HWVECxEXT. */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/* Perform any necessary unmasking. */
+	data->chip->irq_unmask(data);
+
+	return 0;
+}
+
+/**
+ * meta_intc_shutdown_irq() - turn off an external irq
+ * @data:	data for the external irq to turn off
+ *
+ * Mask irq using the appropriate callback and stop muxing it onto TR2.
+ */
+static void meta_intc_shutdown_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+	/* Mask the IRQ */
+	data->chip->irq_mask(data);
+
+	/*
+	 * Disable the IRQ at the core by removing the interrupt from
+	 * the HW vector mapping.
+	 */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_ack_irq() - acknowledge an external irq
+ * @data:	data for the external irq to ack
+ *
+ * Clear down an edge interrupt in the status register.
+ */
+static void meta_intc_ack_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+
+	/* Ack the int, if it is still 'on'.
+	 * NOTE - this only works for edge triggered interrupts.
+	 */
+	if (metag_in32(stat_addr) & bit)
+		metag_out32(bit, stat_addr);
+}
+
+/**
+ * record_irq_is_masked() - record the IRQ masked so it doesn't get handled
+ * @data:	data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is masked (by whichever
+ * callback is used). It records the IRQ masked so that it doesn't get handled
+ * if it still shows up in the status register.
+ */
+static void record_irq_is_masked(struct irq_data *data)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw = data->hwirq;
+
+	clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/**
+ * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled
+ * @data:	data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is unmasked (by whichever
+ * callback is used). It records the IRQ unmasked so that it gets handled if it
+ * shows up in the status register.
+ */
+static void record_irq_is_unmasked(struct irq_data *data)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw = data->hwirq;
+
+	set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/*
+ * For use by wrapper IRQ drivers
+ */
+
+/**
+ * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers
+ * @data:	data for the external irq being masked
+ *
+ * This should be called by any wrapper IRQ driver mask functions. it doesn't do
+ * any masking but records the IRQ as masked so that the core code knows the
+ * mask has taken place. It is the callers responsibility to ensure that the IRQ
+ * won't trigger an interrupt to the core.
+ */
+void meta_intc_mask_irq_simple(struct irq_data *data)
+{
+	record_irq_is_masked(data);
+}
+
+/**
+ * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers
+ * @data:	data for the external irq being unmasked
+ *
+ * This should be called by any wrapper IRQ driver unmask functions. it doesn't
+ * do any unmasking but records the IRQ as unmasked so that the core code knows
+ * the unmask has taken place. It is the callers responsibility to ensure that
+ * the IRQ can now trigger an interrupt to the core.
+ */
+void meta_intc_unmask_irq_simple(struct irq_data *data)
+{
+	record_irq_is_unmasked(data);
+}
+
+
+/**
+ * meta_intc_mask_irq() - mask an external irq using HWMASKEXT
+ * @data:	data for the external irq to mask
+ *
+ * This is a default implementation of a mask function which makes use of the
+ * HWMASKEXT registers available in newer versions.
+ *
+ * Earlier versions without these registers should use SoC level IRQ masking
+ * which call the meta_intc_*_simple() functions above, or if that isn't
+ * available should use the fallback meta_intc_*_nomask() functions below.
+ */
+static void meta_intc_mask_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *mask_addr = meta_intc_mask_addr(hw);
+	unsigned long flags;
+
+	record_irq_is_masked(data);
+
+	/* update the interrupt mask */
+	__global_lock2(flags);
+	metag_out32(metag_in32(mask_addr) & ~bit, mask_addr);
+	__global_unlock2(flags);
+}
+
+/**
+ * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT
+ * @data:	data for the external irq to unmask
+ *
+ * This is a default implementation of an unmask function which makes use of the
+ * HWMASKEXT registers available on new versions. It should be paired with
+ * meta_intc_mask_irq() above.
+ */
+static void meta_intc_unmask_irq(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *mask_addr = meta_intc_mask_addr(hw);
+	unsigned long flags;
+
+	record_irq_is_unmasked(data);
+
+	/* update the interrupt mask */
+	__global_lock2(flags);
+	metag_out32(metag_in32(mask_addr) | bit, mask_addr);
+	__global_unlock2(flags);
+}
+
+/**
+ * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring
+ * @data:	data for the external irq to mask
+ *
+ * This is the version of the mask function for older versions which don't have
+ * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is
+ * unvectored from the core and retriggered if necessary later.
+ */
+static void meta_intc_mask_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+	record_irq_is_masked(data);
+
+	/* there is no interrupt mask, so unvector the interrupt */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data:	data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to edge interrupts.
+ */
+static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	record_irq_is_unmasked(data);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/*
+	 * Re-trigger interrupt
+	 *
+	 * Writing a 1 toggles, and a 0->1 transition triggers. We only
+	 * retrigger if the status bit is already set, which means we
+	 * need to clear it first. Retriggering is fundamentally racy
+	 * because if the interrupt fires again after we clear it we
+	 * could end up clearing it again and the interrupt handler
+	 * thinking it hasn't fired. Therefore we need to keep trying to
+	 * retrigger until the bit is set.
+	 */
+	if (metag_in32(stat_addr) & bit) {
+		metag_out32(bit, stat_addr);
+		while (!(metag_in32(stat_addr) & bit))
+			metag_out32(bit, stat_addr);
+	}
+}
+
+/**
+ * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring
+ * @data:	data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to level interrupts.
+ */
+static void meta_intc_unmask_level_irq_nomask(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *stat_addr = meta_intc_stat_addr(hw);
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	record_irq_is_unmasked(data);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	/* Re-trigger interrupt */
+	/* Writing a 1 triggers interrupt */
+	if (metag_in32(stat_addr) & bit)
+		metag_out32(bit, stat_addr);
+}
+
+/**
+ * meta_intc_irq_set_type() - set the type of an external irq
+ * @data:	data for the external irq to set the type of
+ * @flow_type:	new irq flow type
+ *
+ * Set the flow type of an external interrupt. This updates the irq chip and irq
+ * handler depending on whether the irq is edge or level sensitive (the polarity
+ * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows
+ * when to trigger.
+ */
+static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	struct meta_intc_priv *priv = &meta_intc_priv;
+#endif
+	unsigned int irq = data->irq;
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *level_addr = meta_intc_level_addr(hw);
+	unsigned long flags;
+	unsigned int level;
+
+	/* update the chip/handler */
+	if (flow_type & IRQ_TYPE_LEVEL_MASK)
+		__irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip,
+						   handle_level_irq, NULL);
+	else
+		__irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip,
+						   handle_edge_irq, NULL);
+
+	/* and clear/set the bit in HWLEVELEXT */
+	__global_lock2(flags);
+	level = metag_in32(level_addr);
+	if (flow_type & IRQ_TYPE_LEVEL_MASK)
+		level |= bit;
+	else
+		level &= ~bit;
+	metag_out32(level, level_addr);
+#ifdef CONFIG_METAG_SUSPEND_MEM
+	priv->levels_altered[meta_intc_bank(hw)] |= bit;
+#endif
+	__global_unlock2(flags);
+
+	return 0;
+}
+
+/**
+ * meta_intc_irq_demux() - external irq de-multiplexer
+ * @irq:	the virtual interrupt number
+ * @desc:	the interrupt description structure for this irq
+ *
+ * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is
+ * this function's job to demux this irq and figure out exactly which external
+ * irq needs servicing.
+ *
+ * Whilst using TR2 to detect external interrupts is a software convention it is
+ * (hopefully) unlikely to change.
+ */
+static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	irq_hw_number_t hw;
+	unsigned int bank, irq_no, status;
+	void __iomem *stat_addr = meta_intc_stat_addr(0);
+
+	/*
+	 * Locate which interrupt has caused our handler to run.
+	 */
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		/* Which interrupts are currently pending in this bank? */
+recalculate:
+		status = metag_in32(stat_addr) & priv->unmasked[bank];
+
+		for (hw = bank*32; status; status >>= 1, ++hw) {
+			if (status & 0x1) {
+				/*
+				 * Map the hardware IRQ number to a virtual
+				 * Linux IRQ number.
+				 */
+				irq_no = irq_linear_revmap(priv->domain, hw);
+
+				/*
+				 * Only fire off external interrupts that are
+				 * registered to be handled by the kernel.
+				 * Other external interrupts are probably being
+				 * handled by other Meta hardware threads.
+				 */
+				generic_handle_irq(irq_no);
+
+				/*
+				 * The handler may have re-enabled interrupts
+				 * which could have caused a nested invocation
+				 * of this code and make the copy of the
+				 * status register we are using invalid.
+				 */
+				goto recalculate;
+			}
+		}
+		stat_addr += HWSTAT_STRIDE;
+	}
+}
+
+#ifdef CONFIG_SMP
+/**
+ * meta_intc_set_affinity() - set the affinity for an interrupt
+ * @data:	data for the external irq to set the affinity of
+ * @cpumask:	cpu mask representing cpus which can handle the interrupt
+ * @force:	whether to force (ignored)
+ *
+ * Revector the specified external irq onto a specific cpu's TR2 trigger, so
+ * that that cpu tends to be the one who handles it.
+ */
+static int meta_intc_set_affinity(struct irq_data *data,
+				  const struct cpumask *cpumask, bool force)
+{
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = meta_intc_vec_addr(hw);
+	unsigned int cpu, thread;
+
+	/*
+	 * Wire up this interrupt from HWVECxEXT to the Meta core.
+	 *
+	 * Note that we can't wire up HWVECxEXT to interrupt more than
+	 * one cpu (the interrupt code doesn't support it), so we just
+	 * pick the first cpu we find in 'cpumask'.
+	 */
+	cpu = cpumask_any(cpumask);
+	thread = cpu_2_hwthread_id[cpu];
+
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+	return 0;
+}
+#else
+#define meta_intc_set_affinity	NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+#define META_INTC_CHIP_FLAGS	(IRQCHIP_MASK_ON_SUSPEND \
+				| IRQCHIP_SKIP_SET_WAKE)
+#else
+#define META_INTC_CHIP_FLAGS	0
+#endif
+
+/* public edge/level irq chips which SoCs can override */
+
+struct irq_chip meta_intc_edge_chip = {
+	.irq_startup		= meta_intc_startup_irq,
+	.irq_shutdown		= meta_intc_shutdown_irq,
+	.irq_ack		= meta_intc_ack_irq,
+	.irq_mask		= meta_intc_mask_irq,
+	.irq_unmask		= meta_intc_unmask_irq,
+	.irq_set_type		= meta_intc_irq_set_type,
+	.irq_set_affinity	= meta_intc_set_affinity,
+	.flags			= META_INTC_CHIP_FLAGS,
+};
+
+struct irq_chip meta_intc_level_chip = {
+	.irq_startup		= meta_intc_startup_irq,
+	.irq_shutdown		= meta_intc_shutdown_irq,
+	.irq_set_type		= meta_intc_irq_set_type,
+	.irq_mask		= meta_intc_mask_irq,
+	.irq_unmask		= meta_intc_unmask_irq,
+	.irq_set_affinity	= meta_intc_set_affinity,
+	.flags			= META_INTC_CHIP_FLAGS,
+};
+
+/**
+ * meta_intc_map() - map an external irq
+ * @d:		irq domain of external trigger block
+ * @irq:	virtual irq number
+ * @hw:		hardware irq number within external trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured, using the HWLEVELEXT registers to determine
+ * edge/level flow type. These registers will have been set when the irq type is
+ * set (or set to a default at init time).
+ */
+static int meta_intc_map(struct irq_domain *d, unsigned int irq,
+			 irq_hw_number_t hw)
+{
+	unsigned int bit = 1 << meta_intc_offset(hw);
+	void __iomem *level_addr = meta_intc_level_addr(hw);
+
+	/* Go by the current sense in the HWLEVELEXT register */
+	if (metag_in32(level_addr) & bit)
+		irq_set_chip_and_handler(irq, &meta_intc_level_chip,
+					 handle_level_irq);
+	else
+		irq_set_chip_and_handler(irq, &meta_intc_edge_chip,
+					 handle_edge_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops meta_intc_domain_ops = {
+	.map = meta_intc_map,
+	.xlate = irq_domain_xlate_twocell,
+};
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+
+/**
+ * struct meta_intc_context - suspend context
+ * @levels:	State of HWLEVELEXT registers
+ * @masks:	State of HWMASKEXT registers
+ * @vectors:	State of HWVECEXT registers
+ * @txvecint:	State of TxVECINT registers
+ *
+ * This structure stores the IRQ state across suspend.
+ */
+struct meta_intc_context {
+	u32 levels[4];
+	u32 masks[4];
+	u8 vectors[4*32];
+
+	u8 txvecint[4][4];
+};
+
+/* suspend context */
+static struct meta_intc_context *meta_intc_context;
+
+/**
+ * meta_intc_suspend() - store irq state
+ *
+ * To avoid interfering with other threads we only save the IRQ state of IRQs in
+ * use by Linux.
+ */
+static int meta_intc_suspend(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	int i, j;
+	irq_hw_number_t hw;
+	unsigned int bank;
+	unsigned long flags;
+	struct meta_intc_context *context;
+	void __iomem *level_addr, *mask_addr, *vec_addr;
+	u32 mask, bit;
+
+	context = kzalloc(sizeof(*context), GFP_ATOMIC);
+	if (!context)
+		return -ENOMEM;
+
+	hw = 0;
+	level_addr = meta_intc_level_addr(0);
+	mask_addr = meta_intc_mask_addr(0);
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		vec_addr = meta_intc_vec_addr(hw);
+
+		/* create mask of interrupts in use */
+		mask = 0;
+		for (bit = 1; bit; bit <<= 1) {
+			i = irq_linear_revmap(priv->domain, hw);
+			/* save mapped irqs which are enabled or have actions */
+			if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+				  irq_has_action(i))) {
+				mask |= bit;
+
+				/* save trigger vector */
+				context->vectors[hw] = metag_in32(vec_addr);
+			}
+
+			++hw;
+			vec_addr += HWVECnEXT_STRIDE;
+		}
+
+		/* save level state if any IRQ levels altered */
+		if (priv->levels_altered[bank])
+			context->levels[bank] = metag_in32(level_addr);
+		/* save mask state if any IRQs in use */
+		if (mask)
+			context->masks[bank] = metag_in32(mask_addr);
+
+		level_addr += HWSTAT_STRIDE;
+		mask_addr += HWSTAT_STRIDE;
+	}
+
+	/* save trigger matrixing */
+	__global_lock2(flags);
+	for (i = 0; i < 4; ++i)
+		for (j = 0; j < 4; ++j)
+			context->txvecint[i][j] = metag_in32(T0VECINT_BHALT +
+							     TnVECINT_STRIDE*i +
+							     8*j);
+	__global_unlock2(flags);
+
+	meta_intc_context = context;
+	return 0;
+}
+
+/**
+ * meta_intc_resume() - restore saved irq state
+ *
+ * Restore the saved IRQ state and drop it.
+ */
+static void meta_intc_resume(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	int i, j;
+	irq_hw_number_t hw;
+	unsigned int bank;
+	unsigned long flags;
+	struct meta_intc_context *context = meta_intc_context;
+	void __iomem *level_addr, *mask_addr, *vec_addr;
+	u32 mask, bit, tmp;
+
+	meta_intc_context = NULL;
+
+	hw = 0;
+	level_addr = meta_intc_level_addr(0);
+	mask_addr = meta_intc_mask_addr(0);
+	for (bank = 0; bank < priv->nr_banks; ++bank) {
+		vec_addr = meta_intc_vec_addr(hw);
+
+		/* create mask of interrupts in use */
+		mask = 0;
+		for (bit = 1; bit; bit <<= 1) {
+			i = irq_linear_revmap(priv->domain, hw);
+			/* restore mapped irqs, enabled or with actions */
+			if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+				  irq_has_action(i))) {
+				mask |= bit;
+
+				/* restore trigger vector */
+				metag_out32(context->vectors[hw], vec_addr);
+			}
+
+			++hw;
+			vec_addr += HWVECnEXT_STRIDE;
+		}
+
+		if (mask) {
+			/* restore mask state */
+			__global_lock2(flags);
+			tmp = metag_in32(mask_addr);
+			tmp = (tmp & ~mask) | (context->masks[bank] & mask);
+			metag_out32(tmp, mask_addr);
+			__global_unlock2(flags);
+		}
+
+		mask = priv->levels_altered[bank];
+		if (mask) {
+			/* restore level state */
+			__global_lock2(flags);
+			tmp = metag_in32(level_addr);
+			tmp = (tmp & ~mask) | (context->levels[bank] & mask);
+			metag_out32(tmp, level_addr);
+			__global_unlock2(flags);
+		}
+
+		level_addr += HWSTAT_STRIDE;
+		mask_addr += HWSTAT_STRIDE;
+	}
+
+	/* restore trigger matrixing */
+	__global_lock2(flags);
+	for (i = 0; i < 4; ++i) {
+		for (j = 0; j < 4; ++j) {
+			metag_out32(context->txvecint[i][j],
+				    T0VECINT_BHALT +
+				    TnVECINT_STRIDE*i +
+				    8*j);
+		}
+	}
+	__global_unlock2(flags);
+
+	kfree(context);
+}
+
+static struct syscore_ops meta_intc_syscore_ops = {
+	.suspend = meta_intc_suspend,
+	.resume = meta_intc_resume,
+};
+
+static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv)
+{
+	register_syscore_ops(&meta_intc_syscore_ops);
+}
+#else
+#define meta_intc_init_syscore_ops(priv) do {} while (0)
+#endif
+
+/**
+ * meta_intc_init_cpu() - register with a Meta cpu
+ * @priv:	private interrupt controller data
+ * @cpu:	the CPU to register on
+ *
+ * Configure @cpu's TR2 irq so that we can demux external irqs.
+ */
+static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu)
+{
+	unsigned int thread = cpu_2_hwthread_id[cpu];
+	unsigned int signum = TBID_SIGNUM_TR2(thread);
+	int irq = tbisig_map(signum);
+
+	/* Register the multiplexed IRQ handler */
+	irq_set_chained_handler(irq, meta_intc_irq_demux);
+	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * meta_intc_no_mask() - indicate lack of HWMASKEXT registers
+ *
+ * Called from SoC code (or init code below) to dynamically indicate the lack of
+ * HWMASKEXT registers (for example depending on some SoC revision register).
+ * This alters the irq mask and unmask callbacks to use the fallback
+ * unvectoring/retriggering technique instead of using HWMASKEXT registers.
+ */
+void __init meta_intc_no_mask(void)
+{
+	meta_intc_edge_chip.irq_mask	= meta_intc_mask_irq_nomask;
+	meta_intc_edge_chip.irq_unmask	= meta_intc_unmask_edge_irq_nomask;
+	meta_intc_level_chip.irq_mask	= meta_intc_mask_irq_nomask;
+	meta_intc_level_chip.irq_unmask	= meta_intc_unmask_level_irq_nomask;
+}
+
+/**
+ * init_external_IRQ() - initialise the external irq controller
+ *
+ * Set up the external irq controller using device tree properties. This is
+ * called from init_IRQ().
+ */
+int __init init_external_IRQ(void)
+{
+	struct meta_intc_priv *priv = &meta_intc_priv;
+	struct device_node *node;
+	int ret, cpu;
+	u32 val;
+	bool no_masks = false;
+
+	node = of_find_compatible_node(NULL, NULL, "img,meta-intc");
+	if (!node)
+		return -ENOENT;
+
+	/* Get number of banks */
+	ret = of_property_read_u32(node, "num-banks", &val);
+	if (ret) {
+		pr_err("meta-intc: No num-banks property found\n");
+		return ret;
+	}
+	if (val < 1 || val > 4) {
+		pr_err("meta-intc: num-banks (%u) out of range\n", val);
+		return -EINVAL;
+	}
+	priv->nr_banks = val;
+
+	/* Are any mask registers present? */
+	if (of_get_property(node, "no-mask", NULL))
+		no_masks = true;
+
+	/* No HWMASKEXT registers present? */
+	if (no_masks)
+		meta_intc_no_mask();
+
+	/* Set up an IRQ domain */
+	/*
+	 * This is a legacy IRQ domain for now until all the platform setup code
+	 * has been converted to devicetree.
+	 */
+	priv->domain = irq_domain_add_linear(node, priv->nr_banks*32,
+					     &meta_intc_domain_ops, priv);
+	if (unlikely(!priv->domain)) {
+		pr_err("meta-intc: cannot add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	/* Setup TR2 for all cpus. */
+	for_each_possible_cpu(cpu)
+		meta_intc_init_cpu(priv, cpu);
+
+	/* Set up system suspend/resume callbacks */
+	meta_intc_init_syscore_ops(priv);
+
+	pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n",
+		priv->nr_banks*32);
+
+	return 0;
+}
diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c
new file mode 100644
index 000000000000..8e94d7a3b20d
--- /dev/null
+++ b/drivers/irqchip/irq-metag.c
@@ -0,0 +1,343 @@
+/*
+ * Meta internal (HWSTATMETA) interrupt code.
+ *
+ * Copyright (C) 2011-2012 Imagination Technologies Ltd.
+ *
+ * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c
+ * The code base could be generalised/merged as a lot of the functionality is
+ * similar. Until this is done, we try to keep the code simple here.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqdomain.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define PERF0VECINT		0x04820580
+#define PERF1VECINT		0x04820588
+#define PERF0TRIG_OFFSET	16
+#define PERF1TRIG_OFFSET	17
+
+/**
+ * struct metag_internal_irq_priv - private meta internal interrupt data
+ * @domain:		IRQ domain for all internal Meta IRQs (HWSTATMETA)
+ * @unmasked:		Record of unmasked IRQs
+ */
+struct metag_internal_irq_priv {
+	struct irq_domain	*domain;
+
+	unsigned long		unmasked;
+};
+
+/* Private data for the one and only internal interrupt controller */
+static struct metag_internal_irq_priv metag_internal_irq_priv;
+
+static unsigned int metag_internal_irq_startup(struct irq_data *data);
+static void metag_internal_irq_shutdown(struct irq_data *data);
+static void metag_internal_irq_ack(struct irq_data *data);
+static void metag_internal_irq_mask(struct irq_data *data);
+static void metag_internal_irq_unmask(struct irq_data *data);
+#ifdef CONFIG_SMP
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+			const struct cpumask *cpumask, bool force);
+#endif
+
+static struct irq_chip internal_irq_edge_chip = {
+	.name = "HWSTATMETA-IRQ",
+	.irq_startup = metag_internal_irq_startup,
+	.irq_shutdown = metag_internal_irq_shutdown,
+	.irq_ack = metag_internal_irq_ack,
+	.irq_mask = metag_internal_irq_mask,
+	.irq_unmask = metag_internal_irq_unmask,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = metag_internal_irq_set_affinity,
+#endif
+};
+
+/*
+ *	metag_hwvec_addr - get the address of *VECINT regs of irq
+ *
+ *	This function is a table of supported triggers on HWSTATMETA
+ *	Could do with a structure, but better keep it simple. Changes
+ *	in this code should be rare.
+ */
+static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw)
+{
+	void __iomem *addr;
+
+	switch (hw) {
+	case PERF0TRIG_OFFSET:
+		addr = (void __iomem *)PERF0VECINT;
+		break;
+	case PERF1TRIG_OFFSET:
+		addr = (void __iomem *)PERF1VECINT;
+		break;
+	default:
+		addr = NULL;
+		break;
+	}
+	return addr;
+}
+
+/*
+ *	metag_internal_startup - setup an internal irq
+ *	@irq:	the irq to startup
+ *
+ *	Multiplex interrupts for @irq onto TR1. Clear any pending
+ *	interrupts.
+ */
+static unsigned int metag_internal_irq_startup(struct irq_data *data)
+{
+	/* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+	metag_internal_irq_ack(data);
+
+	/* Enable the interrupt by unmasking it */
+	metag_internal_irq_unmask(data);
+
+	return 0;
+}
+
+/*
+ *	metag_internal_irq_shutdown - turn off the irq
+ *	@irq:	the irq number to turn off
+ *
+ *	Mask @irq and clear any pending interrupts.
+ *	Stop muxing @irq onto TR1.
+ */
+static void metag_internal_irq_shutdown(struct irq_data *data)
+{
+	/* Disable the IRQ at the core by masking it. */
+	metag_internal_irq_mask(data);
+
+	/* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+	metag_internal_irq_ack(data);
+}
+
+/*
+ *	metag_internal_irq_ack - acknowledge irq
+ *	@irq:	the irq to ack
+ */
+static void metag_internal_irq_ack(struct irq_data *data)
+{
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << hw;
+
+	if (metag_in32(HWSTATMETA) & bit)
+		metag_out32(bit, HWSTATMETA);
+}
+
+/**
+ * metag_internal_irq_mask() - mask an internal irq by unvectoring
+ * @data:	data for the internal irq to mask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core
+ * and retriggered if necessary later.
+ */
+static void metag_internal_irq_mask(struct irq_data *data)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	irq_hw_number_t hw = data->hwirq;
+	void __iomem *vec_addr = metag_hwvec_addr(hw);
+
+	clear_bit(hw, &priv->unmasked);
+
+	/* there is no interrupt mask, so unvector the interrupt */
+	metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data:	data for the internal irq to unmask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the
+ * core and retriggered if necessary.
+ */
+static void metag_internal_irq_unmask(struct irq_data *data)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int bit = 1 << hw;
+	void __iomem *vec_addr = metag_hwvec_addr(hw);
+	unsigned int thread = hard_processor_id();
+
+	set_bit(hw, &priv->unmasked);
+
+	/* there is no interrupt mask, so revector the interrupt */
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr);
+
+	/*
+	 * Re-trigger interrupt
+	 *
+	 * Writing a 1 toggles, and a 0->1 transition triggers. We only
+	 * retrigger if the status bit is already set, which means we
+	 * need to clear it first. Retriggering is fundamentally racy
+	 * because if the interrupt fires again after we clear it we
+	 * could end up clearing it again and the interrupt handler
+	 * thinking it hasn't fired. Therefore we need to keep trying to
+	 * retrigger until the bit is set.
+	 */
+	if (metag_in32(HWSTATMETA) & bit) {
+		metag_out32(bit, HWSTATMETA);
+		while (!(metag_in32(HWSTATMETA) & bit))
+			metag_out32(bit, HWSTATMETA);
+	}
+}
+
+#ifdef CONFIG_SMP
+/*
+ *	metag_internal_irq_set_affinity - set the affinity for an interrupt
+ */
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+			const struct cpumask *cpumask, bool force)
+{
+	unsigned int cpu, thread;
+	irq_hw_number_t hw = data->hwirq;
+	/*
+	 * Wire up this interrupt from *VECINT to the Meta core.
+	 *
+	 * Note that we can't wire up *VECINT to interrupt more than
+	 * one cpu (the interrupt code doesn't support it), so we just
+	 * pick the first cpu we find in 'cpumask'.
+	 */
+	cpu = cpumask_any(cpumask);
+	thread = cpu_2_hwthread_id[cpu];
+
+	metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)),
+		    metag_hwvec_addr(hw));
+
+	return 0;
+}
+#endif
+
+/*
+ *	metag_internal_irq_demux - irq de-multiplexer
+ *	@irq:	the interrupt number
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	The cpu receives an interrupt on TR1 when an interrupt has
+ *	occurred. It is this function's job to demux this irq and
+ *	figure out exactly which trigger needs servicing.
+ */
+static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+	struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc);
+	irq_hw_number_t hw;
+	unsigned int irq_no;
+	u32 status;
+
+recalculate:
+	status = metag_in32(HWSTATMETA) & priv->unmasked;
+
+	for (hw = 0; status != 0; status >>= 1, ++hw) {
+		if (status & 0x1) {
+			/*
+			 * Map the hardware IRQ number to a virtual Linux IRQ
+			 * number.
+			 */
+			irq_no = irq_linear_revmap(priv->domain, hw);
+
+			/*
+			 * Only fire off interrupts that are
+			 * registered to be handled by the kernel.
+			 * Other interrupts are probably being
+			 * handled by other Meta hardware threads.
+			 */
+			generic_handle_irq(irq_no);
+
+			/*
+			 * The handler may have re-enabled interrupts
+			 * which could have caused a nested invocation
+			 * of this code and make the copy of the
+			 * status register we are using invalid.
+			 */
+			goto recalculate;
+		}
+	}
+}
+
+/**
+ * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number.
+ * @hw:		Number of the internal IRQ. Must be in range.
+ *
+ * Returns:	The virtual IRQ number of the Meta internal IRQ specified by
+ *		@hw.
+ */
+int internal_irq_map(unsigned int hw)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	if (!priv->domain)
+		return -ENODEV;
+	return irq_create_mapping(priv->domain, hw);
+}
+
+/**
+ *	metag_internal_irq_init_cpu - regsister with the Meta cpu
+ *	@cpu:	the CPU to register on
+ *
+ *	Configure @cpu's TR1 irq so that we can demux irqs.
+ */
+static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv,
+					int cpu)
+{
+	unsigned int thread = cpu_2_hwthread_id[cpu];
+	unsigned int signum = TBID_SIGNUM_TR1(thread);
+	int irq = tbisig_map(signum);
+
+	/* Register the multiplexed IRQ handler */
+	irq_set_handler_data(irq, priv);
+	irq_set_chained_handler(irq, metag_internal_irq_demux);
+	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * metag_internal_intc_map() - map an internal irq
+ * @d:		irq domain of internal trigger block
+ * @irq:	virtual irq number
+ * @hw:		hardware irq number within internal trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured.
+ */
+static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq,
+				   irq_hw_number_t hw)
+{
+	/* only register interrupt if it is mapped */
+	if (!metag_hwvec_addr(hw))
+		return -EINVAL;
+
+	irq_set_chip_and_handler(irq, &internal_irq_edge_chip,
+				 handle_edge_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops metag_internal_intc_domain_ops = {
+	.map	= metag_internal_intc_map,
+};
+
+/**
+ *	metag_internal_irq_register - register internal IRQs
+ *
+ *	Register the irq chip and handler function for all internal IRQs
+ */
+int __init init_internal_IRQ(void)
+{
+	struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+	unsigned int cpu;
+
+	/* Set up an IRQ domain */
+	priv->domain = irq_domain_add_linear(NULL, 32,
+					     &metag_internal_intc_domain_ops,
+					     priv);
+	if (unlikely(!priv->domain)) {
+		pr_err("meta-internal-intc: cannot add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	/* Setup TR1 for all cpus. */
+	for_each_possible_cpu(cpu)
+		metag_internal_irq_init_cpu(priv, cpu);
+
+	return 0;
+};
diff --git a/include/linux/irqchip/metag-ext.h b/include/linux/irqchip/metag-ext.h
new file mode 100644
index 000000000000..697af0fe7c5a
--- /dev/null
+++ b/include/linux/irqchip/metag-ext.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies
+ */
+
+#ifndef _LINUX_IRQCHIP_METAG_EXT_H_
+#define _LINUX_IRQCHIP_METAG_EXT_H_
+
+struct irq_data;
+struct platform_device;
+
+/* called from core irq code at init */
+int init_external_IRQ(void);
+
+/*
+ * called from SoC init_irq() callback to dynamically indicate the lack of
+ * HWMASKEXT registers.
+ */
+void meta_intc_no_mask(void);
+
+/*
+ * These allow SoCs to specialise the interrupt controller from their init_irq
+ * callbacks.
+ */
+
+extern struct irq_chip meta_intc_edge_chip;
+extern struct irq_chip meta_intc_level_chip;
+
+/* this should be called in the mask callback */
+void meta_intc_mask_irq_simple(struct irq_data *data);
+/* this should be called in the unmask callback */
+void meta_intc_unmask_irq_simple(struct irq_data *data);
+
+#endif /* _LINUX_IRQCHIP_METAG_EXT_H_ */
diff --git a/include/linux/irqchip/metag.h b/include/linux/irqchip/metag.h
new file mode 100644
index 000000000000..4ebdfb3101ab
--- /dev/null
+++ b/include/linux/irqchip/metag.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2011 Imagination Technologies
+ */
+
+#ifndef _LINUX_IRQCHIP_METAG_H_
+#define _LINUX_IRQCHIP_METAG_H_
+
+#include <linux/errno.h>
+
+#ifdef CONFIG_METAG_PERFCOUNTER_IRQS
+extern int init_internal_IRQ(void);
+extern int internal_irq_map(unsigned int hw);
+#else
+static inline int init_internal_IRQ(void)
+{
+	return 0;
+}
+static inline int internal_irq_map(unsigned int hw)
+{
+	return -EINVAL;
+}
+#endif
+
+#endif /* _LINUX_IRQCHIP_METAG_H_ */
-- 
cgit v1.2.3


From 9ca52ed979b6b45ae480a5fc56d593efb3bf16e8 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 16 Oct 2012 10:16:14 +0100
Subject: mm: define VM_GROWSUP for CONFIG_METAG

Commit cc2383ec06be093789469852e1fe96e1148e9a2c ("mm: introduce
arch-specific vma flag VM_ARCH_1") merged in v3.7-rc1.

The above commit combined several arch-specific vma flags into one, and
in the process it changed the VM_GROWSUP definition to depend on
specific architectures rather than CONFIG_STACK_GROWSUP. Therefore add
an ifdef for CONFIG_METAG to also set VM_GROWSUP.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michel Lespinasse <walken@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-mm@kvack.org
---
 include/linux/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 66e2f7c61e5c..44ac3dc363e7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -114,6 +114,8 @@ extern unsigned int kobjsize(const void *objp);
 # define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP	VM_ARCH_1
+#elif defined(CONFIG_METAG)
+# define VM_GROWSUP	VM_ARCH_1
 #elif defined(CONFIG_IA64)
 # define VM_GROWSUP	VM_ARCH_1
 #elif !defined(CONFIG_MMU)
-- 
cgit v1.2.3